[med-svn] [htsjdk] 02/06: Imported Upstream version 2.0.1+dfsg.1
Andreas Tille
tille at debian.org
Sat Dec 19 21:26:56 UTC 2015
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository htsjdk.
commit ad6800cf21471c4d01fc8a0f6736dbf0cc16974f
Author: Andreas Tille <tille at debian.org>
Date: Sat Dec 19 21:26:08 2015 +0100
Imported Upstream version 2.0.1+dfsg.1
---
.idea/modules/htsjdk.iml | 3 +-
.travis.yml | 5 +-
README.md | 2 +
build.sbt | 8 +-
build.xml | 9 +-
htsjdk.iml | 9 +
src/java/htsjdk/samtools/AbstractBAMFileIndex.java | 27 +-
.../{SAMFileSpan.java => BAMFileSpan.java} | 59 +-
src/java/htsjdk/samtools/BAMRecord.java | 7 +-
src/java/htsjdk/samtools/BAMRecordCodec.java | 6 +-
src/java/htsjdk/samtools/BinaryTagCodec.java | 11 +-
src/java/htsjdk/samtools/CRAMFileReader.java | 64 +-
src/java/htsjdk/samtools/CRAMFileWriter.java | 99 +-
src/java/htsjdk/samtools/CRAMIndexer.java | 36 +-
src/java/htsjdk/samtools/CRAMIterator.java | 34 +-
.../samtools/ChainedDownsamplingIterator.java | 90 ++
.../ConstantMemoryDownsamplingIterator.java | 88 ++
.../htsjdk/samtools/DefaultSAMRecordFactory.java | 6 +-
src/java/htsjdk/samtools/DownsamplingIterator.java | 164 ++-
.../samtools/DownsamplingIteratorFactory.java | 118 +++
.../htsjdk/samtools/DuplicateScoringStrategy.java | 39 +-
src/java/htsjdk/samtools/DuplicateSet.java | 70 +-
.../samtools/HighAccuracyDownsamplingIterator.java | 196 ++++
.../htsjdk/samtools/MergingSamRecordIterator.java | 12 +-
.../SAMBinaryTagAndUnsignedArrayValue.java | 14 +-
src/java/htsjdk/samtools/SAMBinaryTagAndValue.java | 43 +-
src/java/htsjdk/samtools/SAMFileSpan.java | 244 -----
src/java/htsjdk/samtools/SAMFileWriterFactory.java | 149 ++-
src/java/htsjdk/samtools/SAMFileWriterImpl.java | 20 +-
src/java/htsjdk/samtools/SAMRecord.java | 680 ++++++++++---
.../samtools/SAMRecordCoordinateComparator.java | 6 +
.../samtools/SAMRecordDuplicateComparator.java | 46 +-
.../samtools/SAMRecordQueryHashComparator.java | 68 ++
src/java/htsjdk/samtools/SAMTag.java | 3 +
src/java/htsjdk/samtools/SAMUtils.java | 174 +++-
src/java/htsjdk/samtools/SRAFileReader.java | 306 ++++++
src/java/htsjdk/samtools/SRAIndex.java | 257 +++++
src/java/htsjdk/samtools/SRAIterator.java | 248 +++++
src/java/htsjdk/samtools/SamFileValidator.java | 21 +-
src/java/htsjdk/samtools/SamFiles.java | 23 +-
src/java/htsjdk/samtools/SamIndexes.java | 94 ++
src/java/htsjdk/samtools/SamInputResource.java | 68 +-
src/java/htsjdk/samtools/SamPairUtil.java | 55 +-
src/java/htsjdk/samtools/SamReader.java | 1 +
src/java/htsjdk/samtools/SamReaderFactory.java | 47 +-
src/java/htsjdk/samtools/SamStreams.java | 1 +
src/java/htsjdk/samtools/TextTagCodec.java | 19 +-
src/java/htsjdk/samtools/cram/CRAIEntry.java | 148 +++
src/java/htsjdk/samtools/cram/CRAIIndex.java | 164 +++
src/java/htsjdk/samtools/cram/CRAMException.java | 22 +
.../samtools/cram/build/ContainerParser.java | 13 +-
src/java/htsjdk/samtools/cram/build/CramIO.java | 10 +-
.../samtools/cram/build/Sam2CramRecordFactory.java | 13 +
.../cram/encoding/reader/CramRecordReader.java | 22 +-
.../htsjdk/samtools/cram/ref/ReferenceSource.java | 9 +-
.../cram/structure/CramCompressionRecord.java | 6 +-
.../htsjdk/samtools/cram/structure/ReadTag.java | 30 +-
.../htsjdk/samtools/filter/FilteringIterator.java | 2 +-
.../htsjdk/samtools/filter/IntervalFilter.java | 2 +-
.../samtools/filter/OverclippedReadFilter.java | 76 ++
src/java/htsjdk/samtools/metrics/MetricsFile.java | 32 +-
.../reference/AbstractFastaSequenceFile.java | 74 +-
.../samtools/reference/FastaSequenceFile.java | 18 +-
.../samtools/reference/FastaSequenceIndex.java | 21 +-
.../reference/IndexedFastaSequenceFile.java | 96 +-
.../reference/ReferenceSequenceFileFactory.java | 45 +-
.../seekablestream/SeekableMemoryStream.java | 64 ++
src/java/htsjdk/samtools/sra/ReferenceCache.java | 79 ++
src/java/htsjdk/samtools/sra/SRAAccession.java | 108 ++
.../htsjdk/samtools/sra/SRAAlignmentIterator.java | 194 ++++
.../samtools/sra/SRAIndexedSequenceFile.java | 121 +++
src/java/htsjdk/samtools/sra/SRALazyRecord.java | 1056 ++++++++++++++++++++
.../samtools/sra/SRAUnalignmentIterator.java | 181 ++++
src/java/htsjdk/samtools/sra/SRAUtils.java | 83 ++
.../htsjdk/samtools/util/AbstractAsyncWriter.java | 2 +-
.../samtools/util/AbstractProgressLogger.java | 2 +-
src/java/htsjdk/samtools/util/BinaryCodec.java | 10 +-
src/java/htsjdk/samtools/util/DiskBackedQueue.java | 12 +-
src/java/htsjdk/samtools/util/Histogram.java | 20 +-
src/java/htsjdk/samtools/util/IOUtil.java | 62 +-
src/java/htsjdk/samtools/util/Murmur3.java | 115 +++
src/java/htsjdk/samtools/util/ProgressLogger.java | 2 +-
src/java/htsjdk/samtools/util/SequenceUtil.java | 57 +-
src/java/htsjdk/samtools/util/StringUtil.java | 4 +-
.../variant/variantcontext/VariantContext.java | 8 +-
.../variantcontext/filter/CompoundFilter.java | 74 ++
.../variantcontext/filter/FilteringIterator.java | 127 +++
.../filter/GenotypeQualityFilter.java | 79 ++
.../filter/HeterozygosityFilter.java | 84 ++
.../filter/PassingVariantFilter.java} | 71 +-
.../variantcontext/filter/SnpFilter.java} | 71 +-
.../filter/VariantContextFilter.java} | 70 +-
.../variantcontext/writer/BCF2FieldEncoder.java | 35 +-
.../writer/VariantContextWriterBuilder.java | 25 +-
src/java/htsjdk/variant/vcf/VCFRecordCodec.java | 18 +-
.../java/htsjdk/samtools/BAMFileWriterTest.java | 133 ++-
.../java/htsjdk/samtools/CRAMComplianceTest.java | 7 +-
.../java/htsjdk/samtools/CRAMEdgeCasesTest.java | 33 +-
.../java/htsjdk/samtools/CRAMFileIndexTest.java | 66 ++
.../samtools/CRAMFileWriterWithIndexTest.java | 3 +-
src/tests/java/htsjdk/samtools/CigarTest.java | 81 +-
.../java/htsjdk/samtools/CramFileWriterTest.java | 122 ++-
.../htsjdk/samtools/DownsamplingIteratorTests.java | 82 ++
.../htsjdk/samtools/DuplicateSetIteratorTest.java | 5 +-
.../samtools/MergingSamRecordIteratorTest.java | 41 +
.../java/htsjdk/samtools/SAMFileReaderTest.java | 69 +-
.../htsjdk/samtools/SAMFileWriterFactoryTest.java | 159 ++-
.../java/htsjdk/samtools/SAMIntegerTagTest.java | 167 +++-
.../samtools/SAMRecordDuplicateComparatorTest.java | 15 +
.../java/htsjdk/samtools/SAMRecordUnitTest.java | 797 ++++++++++++++-
.../java/htsjdk/samtools/SAMTextWriterTest.java | 12 +
src/tests/java/htsjdk/samtools/SAMUtilsTest.java | 106 ++
src/tests/java/htsjdk/samtools/SamFilesTest.java | 60 ++
src/tests/java/htsjdk/samtools/SamIndexesTest.java | 192 ++++
.../java/htsjdk/samtools/SamReaderFactoryTest.java | 25 +
src/tests/java/htsjdk/samtools/SamSpecIntTest.java | 4 +-
.../java/htsjdk/samtools/cram/CRAIEntryTest.java | 145 +++
.../java/htsjdk/samtools/cram/CRAIIndexTest.java | 133 +++
.../htsjdk/samtools/cram/build/CramIOTest.java | 82 ++
.../cram/structure/CramCompressionRecordTest.java | 68 ++
.../samtools/cram/structure/ReadTagTest.java | 21 +-
.../samtools/filter/OverclippedReadFilterTest.java | 83 ++
.../htsjdk/samtools/metrics/MetricsFileTest.java | 21 +
.../samtools/reference/FastaSequenceIndexTest.java | 8 +-
.../reference/IndexedFastaSequenceFileTest.java | 10 +-
.../java/htsjdk/samtools/sra/SRAIndexTest.java | 150 +++
.../htsjdk/samtools/sra/SRALazyRecordTest.java | 51 +
.../java/htsjdk/samtools/sra/SRAQueryTest.java | 116 +++
.../java/htsjdk/samtools/sra/SRAReferenceTest.java | 25 +
src/tests/java/htsjdk/samtools/sra/SRATest.java | 464 +++++++++
.../java/htsjdk/samtools/util/CodeUtilTest.java | 2 +-
.../htsjdk/samtools/util/DiskBackedQueueTest.java | 30 +-
.../htsjdk/samtools/util/SequenceUtilTest.java | 22 +
.../samtools/util/SortingCollectionTest.java | 39 +-
.../htsjdk/variant/bcf2/BCF2UtilsUnitTest.java | 25 +-
.../variantcontext/filter/AllFailFilter.java} | 68 +-
.../variantcontext/filter/AllPassFilter.java} | 68 +-
.../variantcontext/filter/CompoundFilterTest.java | 78 ++
.../filter/FilteringIteratorTest.java | 88 ++
.../filter/GenotypeQualityFilterTest.java | 105 ++
.../filter/HeterozygosityFilterTest.java | 128 +++
.../filter/PassingVariantFilterTest.java | 46 +
.../variantcontext/filter/SnpFilterTest.java | 54 +
.../VariantContextWriterBuilderUnitTest.java | 10 +
.../java/htsjdk/variant/vcf/VCFHeaderUnitTest.java | 34 +-
.../cram/CRAMException/testContigNotInRef.cram | Bin 0 -> 3433 bytes
.../cram/CRAMException/testContigNotInRef.cram.bai | Bin 0 -> 96 bytes
.../cram/CRAMException/testContigNotInRef.dict | 2 +
.../cram/CRAMException/testContigNotInRef.fa | 2 +
.../cram/CRAMException/testContigNotInRef.fa.fai | 1 +
.../cram/CRAMException/testContigNotInRef.fasta | 2 +
testdata/htsjdk/samtools/cram_tlen.fasta | 41 +
testdata/htsjdk/samtools/cram_tlen.fasta.fai | 8 +
.../htsjdk/samtools/cram_tlen_reads.sorted.sam | 19 +
testdata/htsjdk/samtools/cram_with_bai_index.cram | Bin 0 -> 4213 bytes
.../htsjdk/samtools/cram_with_bai_index.cram.bai | Bin 0 -> 336 bytes
testdata/htsjdk/samtools/cram_with_crai_index.cram | Bin 0 -> 4213 bytes
.../htsjdk/samtools/cram_with_crai_index.cram.crai | Bin 0 -> 77 bytes
testdata/htsjdk/samtools/hg19mini.fasta | 804 +++++++++++++++
testdata/htsjdk/samtools/hg19mini.fasta.fai | 4 +
.../htsjdk/samtools/metrics/metricsOne.metrics | 13 +
.../htsjdk/samtools/metrics/metricsOneCopy.metrics | 13 +
.../metrics/metricsOneModifiedHistogram.metrics | 14 +
.../metrics/metricsOneModifiedMetrics.metrics | 13 +
testdata/htsjdk/samtools/sra/test_archive.sra | Bin 0 -> 1099831 bytes
165 files changed, 11274 insertions(+), 1401 deletions(-)
diff --git a/.idea/modules/htsjdk.iml b/.idea/modules/htsjdk.iml
index b3d5fa4..57d35da 100644
--- a/.idea/modules/htsjdk.iml
+++ b/.idea/modules/htsjdk.iml
@@ -34,10 +34,11 @@
<root url="jar://$MODULE_DIR$/../../lib/commons-logging-1.1.1.jar!/" />
<root url="jar://$MODULE_DIR$/../../lib/snappy-java-1.0.3-rc3.jar!/" />
<root url="jar://$MODULE_DIR$/../../lib/commons-jexl-2.1.1.jar!/" />
+ <root url="jar://$MODULE_DIR$/../../lib/ngs-java-1.2.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</orderEntry>
</component>
-</module>
\ No newline at end of file
+</module>
diff --git a/.travis.yml b/.travis.yml
index f168684..75df51d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,16 +1,13 @@
language: java
sudo: true
jdk:
- - oraclejdk7
- oraclejdk8
- - openjdk7
- - openjdk6
install: ant
script: ant all test
after_success:
- echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'";
echo "JAVA_HOME='$JAVA_HOME'";
- if [ "$TRAVIS_BRANCH" == "master" ] && [ "$JAVA_HOME" == "/usr/lib/jvm/java-7-oracle" ]; then
+ if [ "$TRAVIS_BRANCH" == "master" ] && [ "$JAVA_HOME" == "/usr/lib/jvm/java-8-oracle" ]; then
sbt \
'set buildSnapshot := true' \
'set javacOptions in (Compile, doc) ++= Seq("-quiet")' \
diff --git a/README.md b/README.md
index 12b185f..f634cec 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,8 @@ Not all sub-packages of htsjdk are subject to the same license, so a license not
#### Java Minimum Version Support Policy
+> **NOTE: _Effective November 24th 2015, HTSJDK has ended support of Java 7 and previous versions. Java 8 is now required_.**
+
We will support all Java SE versions supported by Oracle until at least six months after Oracle's Public Updates period has ended ([see this link](http://www.oracle.com/technetwork/java/eol-135779.html)).
Java SE Major Release | End of Java SE Oracle Public Updates | Proposed End of Support in HTSJDK | Actual End of Support in HTSJDK
diff --git a/build.sbt b/build.sbt
index 1c6b96a..c437ca0 100644
--- a/build.sbt
+++ b/build.sbt
@@ -4,10 +4,12 @@ import sbt.Package.ManifestAttributes
name := "htsjdk"
-val buildVersion = "1.138"
+val buildVersion = "2.0.1"
organization := "com.github.samtools"
+libraryDependencies += "gov.nih.nlm.ncbi" % "ngs-java" % "1.2.2"
+
libraryDependencies += "org.apache.commons" % "commons-jexl" % "2.1.1"
libraryDependencies += "commons-logging" % "commons-logging" % "1.1.1"
@@ -87,9 +89,7 @@ artifactName := { (sv: ScalaVersion, module: ModuleID, artifact: Artifact) =>
crossPaths := false
-javacOptions in Compile ++= Seq("-source", "1.6")
-
-javacOptions in(Compile, compile) ++= Seq("-target", "1.6")
+javacOptions in (Compile,doc) ++= Seq("-Xdoclint:none")
packageOptions := Seq(ManifestAttributes(
("Implementation-Version", s"${implementationVersion.value}"),
diff --git a/build.xml b/build.xml
index b0980fe..44c2ef4 100755
--- a/build.xml
+++ b/build.xml
@@ -35,13 +35,13 @@
<property name="scripts" value="src/scripts"/>
<property name="test.output" value="dist/test"/>
- <property name="javac.target" value="1.6"/>
+ <property name="javac.target" value="1.8"/>
<property name="javac.debug" value="true"/>
<!-- Get GIT hash, if available, otherwise leave it blank. -->
<property name="repository.revision" value=""/>
- <property name="htsjdk-version" value="1.138"/>
+ <property name="htsjdk-version" value="2.0.1"/>
<property name="htsjdk-version-file" value="htsjdk.version.properties"/>
<property name="testng.verbosity" value="2"/>
<property name="test.debug.port" value="5005" /> <!-- override on the command line if desired -->
@@ -157,7 +157,7 @@
<testng suitename="htsjdk-single-test" classpathref="classpath" outputdir="${test.output}"
verbose="${testng.verbosity}">
- <jvmarg line="-Xmx512M ${debug.jvm.args}"/>
+ <jvmarg line="-Xmx512m ${debug.jvm.args}"/>
<classpath>
<pathelement path="${classes}"/>
<pathelement path="${classes.test}"/>
@@ -177,7 +177,7 @@
<fileset dir="${classes}" includes="htsjdk/tribble/**/*.*"/>
<fileset dir="${classes}" includes="htsjdk/variant/**/*.*"/>
<manifest>
- <attribute name="Implementation-Version" value="${hts-version}(${repository.revision})"/>
+ <attribute name="Implementation-Version" value="${htsjdk-version}(${repository.revision})"/>
<attribute name="Implementation-Vendor" value="Broad Institute"/>
</manifest>
</jar>
@@ -197,6 +197,7 @@
protected="true"
use="true"
version="true"
+ additionalparam="-Xdoclint:none -notimestamp"
failonerror="true">
<classpath>
<pathelement location="${java.home}/../lib/tools.jar" />
diff --git a/htsjdk.iml b/htsjdk.iml
index 59a3114..3c722e4 100644
--- a/htsjdk.iml
+++ b/htsjdk.iml
@@ -52,6 +52,15 @@
<SOURCES />
</library>
</orderEntry>
+ <orderEntry type="module-library">
+ <library>
+ <CLASSES>
+ <root url="jar://$MODULE_DIR$/lib/ngs-java-1.2.2.jar!/" />
+ </CLASSES>
+ <JAVADOC />
+ <SOURCES />
+ </library>
+ </orderEntry>
</component>
</module>
diff --git a/src/java/htsjdk/samtools/AbstractBAMFileIndex.java b/src/java/htsjdk/samtools/AbstractBAMFileIndex.java
index a2a1d03..4475e00 100644
--- a/src/java/htsjdk/samtools/AbstractBAMFileIndex.java
+++ b/src/java/htsjdk/samtools/AbstractBAMFileIndex.java
@@ -64,8 +64,9 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
{
mBamDictionary = dictionary;
mIndexBuffer = new IndexStreamBuffer(stream);
-
- seek(4);
+
+ verifyBAMMagicNumber(stream.getSource());
+
sequenceIndexes = new int[readInteger() + 1];
Arrays.fill(sequenceIndexes, -1);
}
@@ -78,15 +79,8 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
mBamDictionary = dictionary;
mIndexBuffer = (useMemoryMapping ? new MemoryMappedFileBuffer(file) : new RandomAccessFileBuffer(file));
- // Verify the magic number.
- seek(0);
- final byte[] buffer = new byte[4];
- readBytes(buffer);
- if (!Arrays.equals(buffer, BAMFileConstants.BAM_INDEX_MAGIC)) {
- throw new RuntimeIOException("Invalid file header in BAM index " + file +
- ": " + new String(buffer));
- }
-
+ verifyBAMMagicNumber(file.getName());
+
sequenceIndexes = new int[readInteger() + 1];
Arrays.fill(sequenceIndexes, -1);
}
@@ -399,6 +393,17 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
return Chunk.optimizeChunkList(chunks, minimumOffset);
}
+ private void verifyBAMMagicNumber(final String sourceName) {
+ // Verify the magic number.
+ seek(0);
+ final byte[] buffer = new byte[4];
+ readBytes(buffer);
+ if (!Arrays.equals(buffer, BAMFileConstants.BAM_INDEX_MAGIC)) {
+ throw new RuntimeIOException("Invalid file header in BAM index " + sourceName +
+ ": " + new String(buffer));
+ }
+ }
+
private void skipToSequence(final int sequenceIndex) {
//Use sequence position cache if available
if(sequenceIndexes[sequenceIndex] != -1){
diff --git a/src/java/htsjdk/samtools/SAMFileSpan.java b/src/java/htsjdk/samtools/BAMFileSpan.java
similarity index 83%
copy from src/java/htsjdk/samtools/SAMFileSpan.java
copy to src/java/htsjdk/samtools/BAMFileSpan.java
index 9da77bc..193e443 100644
--- a/src/java/htsjdk/samtools/SAMFileSpan.java
+++ b/src/java/htsjdk/samtools/BAMFileSpan.java
@@ -31,31 +31,6 @@ import java.util.Collections;
import java.util.List;
/**
- * A interface representing a collection of (possibly) discontinuous segments in the
- * BAM file, possibly representing the results of an index query.
- */
-public interface SAMFileSpan extends Cloneable {
- /**
- * Gets a pointer over the data immediately following this span.
- * @return The a pointer to data immediately following this span.
- */
- public SAMFileSpan getContentsFollowing();
-
- /**
- * Remove all pointers in this file span before the given file span starts.
- * @param fileSpan The filespan before which to eliminate.
- * @return The portion of the chunk list after the given chunk.
- */
- public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan);
-
- /**
- * Does this file span point to any data, or is it completely empty?
- * @return True if the file span is empty, false otherwise.
- */
- public boolean isEmpty();
-}
-
-/**
* An ordered list of chunks, capable of representing a set of discontiguous
* regions in the BAM file. FileSpans are mutable within the package, but perceived
* as immutable outside the package.
@@ -66,8 +41,8 @@ public interface SAMFileSpan extends Cloneable {
* @author mhanna
* @version 0.1
*/
-class BAMFileSpan implements SAMFileSpan, Serializable {
- private static final long serialVersionUID = 1L;
+public class BAMFileSpan implements SAMFileSpan, Serializable {
+ private static final long serialVersionUID = 1L;
/**
* The constituent chunks of this list.
@@ -77,7 +52,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
/**
* Create a new empty list of chunks.
*/
- protected BAMFileSpan() {
+ public BAMFileSpan() {
this.chunks = new ArrayList<Chunk>();
}
@@ -86,7 +61,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* a single chunk.
* @param chunk Chunk to use as the sole region in this span.
*/
- protected BAMFileSpan(final Chunk chunk) {
+ public BAMFileSpan(final Chunk chunk) {
this.chunks = new ArrayList<Chunk>();
chunks.add(chunk);
}
@@ -95,7 +70,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* Create a new chunk list from the given list of chunks.
* @param chunks Constituent chunks.
*/
- protected BAMFileSpan(final List<Chunk> chunks) {
+ public BAMFileSpan(final List<Chunk> chunks) {
this.chunks = new ArrayList<Chunk>(chunks);
}
@@ -104,7 +79,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* @return True iff the ChunkList points to any data within the BAM.
*/
public boolean isEmpty() {
- return chunks.isEmpty();
+ return chunks.isEmpty();
}
/**
@@ -119,11 +94,11 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
}
/**
- * Remove all chunks in this file span before the given file span starts.
+ * Creates a new file span by removing all chunks before the given file span starts.
* If a chunk in the chunk list starts before and ends after the given
* chunk, the first portion of the chunk will be deleted.
* @param fileSpan The filespan before which to eliminate.
- * @return The portion of the chunk list after the given chunk.
+ * @return A new BAMFileSpan which contains the portion of the chunk list after the given chunk.
*/
public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) {
if(fileSpan == null)
@@ -147,7 +122,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
trimmedChunkList.add(chunkToTrim.clone());
}
else {
- // This chunk from the list partially overlaps the filtering chunk and must be trimmed.
+ // This chunk from the list partially overlaps the filtering chunk and must be trimmed.
trimmedChunkList.add(new Chunk(bamFileSpan.chunks.get(0).getChunkStart(),chunkToTrim.getChunkEnd()));
}
}
@@ -171,7 +146,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
*
* @param span - span with chunks to add to this one
*/
- public void add(final BAMFileSpan span) {
+ protected void add(final BAMFileSpan span) {
for (final Chunk c : span.chunks) {
chunks.add(c);
}
@@ -184,12 +159,12 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
protected void add(final Chunk chunk) {
chunks.add(chunk);
}
-
+
/**
* Convert the chunk list to an array of offsets, paired in [start,end) format.
* @return Array of offsets.
*/
- protected long[] toCoordinateArray() {
+ public long[] toCoordinateArray() {
final int count = chunks.size() * 2;
if (count == 0) {
return null;
@@ -207,7 +182,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* Find the first offset in the chunk list
* @return The first offset in the span
*/
- protected long getFirstOffset() {
+ public long getFirstOffset() {
final long result = 0;
if (chunks == null){
return result;
@@ -222,7 +197,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* Gets the constituent chunks stored in this span.
* @return An unmodifiable list of chunks.
*/
- protected List<Chunk> getChunks() {
+ public List<Chunk> getChunks() {
return Collections.unmodifiableList(chunks);
}
@@ -284,9 +259,9 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
public static BAMFileSpan merge(final BAMFileSpan[] spans) {
final ArrayList<Chunk> inputChunks = new ArrayList<Chunk>();
for (final BAMFileSpan span : spans) {
- if(span != null){
- inputChunks.addAll(span.chunks);
- }
+ if(span != null){
+ inputChunks.addAll(span.chunks);
+ }
}
return new BAMFileSpan(Chunk.optimizeChunkList(inputChunks, 0));
}
diff --git a/src/java/htsjdk/samtools/BAMRecord.java b/src/java/htsjdk/samtools/BAMRecord.java
index f27fe20..c45566f 100644
--- a/src/java/htsjdk/samtools/BAMRecord.java
+++ b/src/java/htsjdk/samtools/BAMRecord.java
@@ -65,6 +65,11 @@ public class BAMRecord extends SAMRecord {
*/
private boolean mBinaryDataStale;
+ /**
+ * Create a new BAM Record. If the reference sequence index or mate reference sequence index are any value other
+ * than NO_ALIGNMENT_REFERENCE_INDEX (-1), then the specified index values must exist in the sequence dictionary
+ * in the header argument.
+ */
protected BAMRecord(final SAMFileHeader header,
final int referenceID,
final int coordinate,
@@ -242,7 +247,7 @@ public class BAMRecord extends SAMRecord {
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
super.initializeCigar(BinaryCigarCodec.decode(byteBuffer));
mCigarDecoded = true;
- if (getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
+ if (null != getHeader() && getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
// Don't know line number, and don't want to force read name to be decoded.
SAMUtils.processValidationErrors(validateCigar(-1L), -1, getValidationStringency());
}
diff --git a/src/java/htsjdk/samtools/BAMRecordCodec.java b/src/java/htsjdk/samtools/BAMRecordCodec.java
index 25c2b27..dc1ca81 100644
--- a/src/java/htsjdk/samtools/BAMRecordCodec.java
+++ b/src/java/htsjdk/samtools/BAMRecordCodec.java
@@ -200,7 +200,11 @@ public class BAMRecordCodec implements SortingCollection.Codec<SAMRecord> {
final BAMRecord ret = this.samRecordFactory.createBAMRecord(
header, referenceID, coordinate, readNameLength, mappingQuality,
bin, cigarLen, flags, readLen, mateReferenceID, mateCoordinate, insertSize, restOfRecord);
- ret.setHeader(header);
+
+ if (null != header) {
+ // don't reset a null header as this will clobber the reference and mate reference indices
+ ret.setHeader(header);
+ }
return ret;
}
}
diff --git a/src/java/htsjdk/samtools/BinaryTagCodec.java b/src/java/htsjdk/samtools/BinaryTagCodec.java
index 902e3ba..5603cfc 100644
--- a/src/java/htsjdk/samtools/BinaryTagCodec.java
+++ b/src/java/htsjdk/samtools/BinaryTagCodec.java
@@ -320,12 +320,15 @@ public class BinaryTagCodec {
return (char)byteBuffer.get();
case 'I':
final long val = byteBuffer.getInt() & 0xffffffffL;
- if (val <= Integer.MAX_VALUE) {
+ if ( val <= Integer.MAX_VALUE ) {
return (int)val;
}
- SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE,
- "Tag value " + val + " too large to store as signed integer.", null), validationStringency);
- // convert to unsigned int stored in a long
+ // If it won't fit into a signed integer, but is within range for an unsigned 32-bit integer,
+ // return it directly as a long
+ if (! SAMUtils.isValidUnsignedIntegerAttribute(val)) {
+ SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE,
+ "Unsigned integer is out of range for a 32-bit unsigned value: " + val, null), validationStringency);
+ }
return val;
case 'i':
return byteBuffer.getInt();
diff --git a/src/java/htsjdk/samtools/CRAMFileReader.java b/src/java/htsjdk/samtools/CRAMFileReader.java
index 79b1f5f..04521ba 100644
--- a/src/java/htsjdk/samtools/CRAMFileReader.java
+++ b/src/java/htsjdk/samtools/CRAMFileReader.java
@@ -17,6 +17,7 @@ package htsjdk.samtools;
import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SamReader.Type;
+import htsjdk.samtools.cram.CRAIIndex;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
@@ -126,10 +127,30 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
this.referenceSource = referenceSource;
this.validationStringency = validationStringency;
- iterator = new CRAMIterator(inputStream, referenceSource);
- iterator.setValidationStringency(validationStringency);
- if (indexInputStream != null)
- mIndex = new CachingBAMFileIndex(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ iterator = new CRAMIterator(inputStream, referenceSource, validationStringency);
+ if (indexInputStream != null) {
+ try {
+ mIndex = new CachingBAMFileIndex(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ } catch (Exception e) {
+ // try CRAI instead:
+ indexInputStream.seek(0);
+ final SeekableStream baiStream = CRAIIndex.openCraiFileAsBaiStream(indexInputStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ mIndex = new CachingBAMFileIndex(baiStream, iterator.getSAMFileHeader().getSequenceDictionary());
+ }
+ }
+ }
+
+ public CRAMFileReader(final InputStream stream,
+ final File indexFile, final ReferenceSource referenceSource,
+ final ValidationStringency validationStringency) throws IOException {
+ this(stream, indexFile == null ? null: new SeekableFileStream(indexFile), referenceSource, validationStringency);
+ }
+
+ public CRAMFileReader(final File cramFile,
+ final File indexFile, final ReferenceSource referenceSource,
+ final ValidationStringency validationStringency) throws IOException {
+ this(new FileInputStream(cramFile), indexFile, referenceSource, validationStringency);
+ this.cramFile = cramFile;
}
@Override
@@ -165,10 +186,25 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
if (mIndex == null) {
final SAMSequenceDictionary dictionary = getFileHeader()
.getSequenceDictionary();
- mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile,
- dictionary, mEnableIndexMemoryMapping)
- : new DiskBasedBAMFileIndex(mIndexFile, dictionary,
- mEnableIndexMemoryMapping);
+ if (mIndexFile.getName().endsWith(BAMIndex.BAMIndexSuffix)) {
+ mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile,
+ dictionary, mEnableIndexMemoryMapping)
+ : new DiskBasedBAMFileIndex(mIndexFile, dictionary,
+ mEnableIndexMemoryMapping);
+ return mIndex;
+ }
+
+ if (!mIndexFile.getName().endsWith(CRAIIndex.CRAI_INDEX_SUFFIX)) return null;
+ // convert CRAI into BAI:
+ final SeekableStream baiStream;
+ try {
+ baiStream = CRAIIndex.openCraiFileAsBaiStream(mIndexFile, iterator.getSAMFileHeader().getSequenceDictionary());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(baiStream, getFileHeader().getSequenceDictionary()) :
+ new DiskBasedBAMFileIndex(baiStream, getFileHeader().getSequenceDictionary());
}
return mIndex;
}
@@ -191,7 +227,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
try {
// create an input stream that reads the source cram stream only within the coordinate pairs:
final SeekableStream seekableStream = getSeekableStreamOrFailWithRTE();
- return new CRAMIterator(seekableStream, referenceSource, coordinateArray);
+ return new CRAMIterator(seekableStream, referenceSource, coordinateArray, validationStringency);
} catch (final IOException e) {
throw new RuntimeException(e);
}
@@ -210,11 +246,10 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
final CRAMIterator newIterator;
if (cramFile != null) {
newIterator = new CRAMIterator(new FileInputStream(cramFile),
- referenceSource);
+ referenceSource, validationStringency);
} else
- newIterator = new CRAMIterator(inputStream, referenceSource);
+ newIterator = new CRAMIterator(inputStream, referenceSource, validationStringency);
- newIterator.setValidationStringency(validationStringency);
iterator = newIterator;
return iterator;
} catch (final Exception e) {
@@ -322,7 +357,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
if (filePointers == null || filePointers.length == 0)
return emptyIterator;
- final CRAMIterator newIterator = new CRAMIterator(getSeekableStreamOrFailWithRTE(), referenceSource, filePointers);
+ final CRAMIterator newIterator = new CRAMIterator(getSeekableStreamOrFailWithRTE(), referenceSource, filePointers, validationStringency);
return new IntervalIterator(newIterator, new QueryInterval(referenceIndex, start, end), overlap);
}
@@ -334,8 +369,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
final CRAMIterator newIterator;
try {
seekableStream.seek(0);
- newIterator = new CRAMIterator(seekableStream, referenceSource);
- newIterator.setValidationStringency(validationStringency);
+ newIterator = new CRAMIterator(seekableStream, referenceSource, validationStringency);
seekableStream.seek(startOfLastLinearBin >>> 16);
final Container container = ContainerIO.readContainerHeader(newIterator.getCramHeader().getVersion().major, seekableStream);
seekableStream.seek(seekableStream.position() + container.containerByteSize);
diff --git a/src/java/htsjdk/samtools/CRAMFileWriter.java b/src/java/htsjdk/samtools/CRAMFileWriter.java
index dc83bc3..20347a0 100644
--- a/src/java/htsjdk/samtools/CRAMFileWriter.java
+++ b/src/java/htsjdk/samtools/CRAMFileWriter.java
@@ -32,6 +32,7 @@ import htsjdk.samtools.cram.structure.CramCompressionRecord;
import htsjdk.samtools.cram.structure.CramHeader;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.StringLineReader;
import java.io.IOException;
@@ -72,22 +73,75 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
private CRAMIndexer indexer;
private long offset;
- public CRAMFileWriter(final OutputStream outputStream, final ReferenceSource source, final SAMFileHeader samFileHeader, final String fileName) {
- this(outputStream, null, source, samFileHeader, fileName);
+ /**
+ * Create a CRAMFileWriter on an output stream. Requires input records to be presorted to match the
+ * sort order defined by the input {@code samFileHeader}.
+ *
+ * @param outputStream where to write the output.
+ * @param source reference source
+ * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
+ * @param fileName used for display in error messages
+ */
+ public CRAMFileWriter(
+ final OutputStream outputStream,
+ final ReferenceSource source,
+ final SAMFileHeader samFileHeader,
+ final String fileName)
+ {
+ this(outputStream, null, source, samFileHeader, fileName); // defaults to presorted == true
}
- public CRAMFileWriter(final OutputStream outputStream, final OutputStream indexOS, final ReferenceSource source, final SAMFileHeader samFileHeader, final String fileName) {
+ /**
+ * Create a CRAMFileWriter and index on output streams. Requires input records to be presorted to match the
+ * sort order defined by the input {@code samFileHeader}.
+ *
+ * @param outputStream where to write the output.
+ * @param indexOS where to write the output index. Can be null if no index is required.
+ * @param source reference source
+ * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
+ * @param fileName used for display in error messages
+ */
+ public CRAMFileWriter(
+ final OutputStream outputStream,
+ final OutputStream indexOS,
+ final ReferenceSource source,
+ final SAMFileHeader samFileHeader,
+ final String fileName)
+ {
+ this(outputStream, indexOS, true, source, samFileHeader, fileName); // defaults to presorted==true
+ }
+
+ /**
+ * Create a CRAMFileWriter and index on output streams.
+ *
+ * @param outputStream where to write the output.
+ * @param indexOS where to write the output index. Can be null if no index is required.
+ * @param presorted if true records written to this writer must already be sorted in the order specified by the header
+ * @param source reference source
+ * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
+ * @param fileName used for display in error message display
+ */
+ public CRAMFileWriter(final OutputStream outputStream, final OutputStream indexOS, final boolean presorted,
+ final ReferenceSource source, final SAMFileHeader samFileHeader, final String fileName) {
this.outputStream = outputStream;
- this.source = source;
this.samFileHeader = samFileHeader;
this.fileName = fileName;
- setSortOrder(samFileHeader.getSortOrder(), true);
+ initCRAMWriter(indexOS, source, samFileHeader, presorted);
+ }
+
+ private void initCRAMWriter(final OutputStream indexOS, final ReferenceSource source, final SAMFileHeader samFileHeader, final boolean preSorted) {
+ this.source = source;
+ setSortOrder(samFileHeader.getSortOrder(), preSorted);
setHeader(samFileHeader);
- if (this.source == null) this.source = new ReferenceSource(Defaults.REFERENCE_FASTA);
+ if (this.source == null) {
+ this.source = new ReferenceSource(Defaults.REFERENCE_FASTA);
+ }
containerFactory = new ContainerFactory(samFileHeader, recordsPerSlice);
- if (indexOS != null) indexer = new CRAMIndexer(indexOS, samFileHeader);
+ if (indexOS != null) {
+ indexer = new CRAMIndexer(indexOS, samFileHeader);
+ }
}
/**
@@ -99,7 +153,6 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
*/
protected boolean shouldFlushContainer(final SAMRecord nextRecord) {
return samRecords.size() >= containerSize || refSeqIndex != REF_SEQ_INDEX_NOT_INITIALIZED && refSeqIndex != nextRecord.getReferenceIndex();
-
}
private static void updateTracks(final List<SAMRecord> samRecords, final ReferenceTracks tracks) {
@@ -250,7 +303,7 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
last = last.next;
}
if (last.templateSize != -templateLength) detach(cramRecord);
- }
+ }else detach(cramRecord);
} else detach(cramRecord);
}
@@ -331,12 +384,20 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
while ((cramRecord = cramRecord.next) != null);
}
+ /**
+ * Write an alignment record.
+ * @param alignment must not be null and must have a valid SAMFileHeader.
+ */
@Override
protected void writeAlignment(final SAMRecord alignment) {
- if (shouldFlushContainer(alignment)) try {
- flushContainer();
- } catch (final Exception e) {
- throw new RuntimeException(e);
+ if (shouldFlushContainer(alignment)) {
+ try {
+ flushContainer();
+ } catch (IOException e) {
+ throw new RuntimeIOException(e);
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException(e);
+ }
}
updateReferenceContext(alignment.getReferenceIndex());
@@ -374,12 +435,18 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
@Override
protected void finish() {
try {
- if (!samRecords.isEmpty()) flushContainer();
+ if (!samRecords.isEmpty()) {
+ flushContainer();
+ }
CramIO.issueEOF(cramVersion, outputStream);
outputStream.flush();
- if (indexer != null)
+ if (indexer != null) {
indexer.finish();
- } catch (final Exception e) {
+ }
+ outputStream.close();
+ } catch (final IOException e) {
+ throw new RuntimeIOException(e);
+ } catch (final IllegalAccessException e) {
throw new RuntimeException(e);
}
}
diff --git a/src/java/htsjdk/samtools/CRAMIndexer.java b/src/java/htsjdk/samtools/CRAMIndexer.java
index 338874f..eec8c31 100755
--- a/src/java/htsjdk/samtools/CRAMIndexer.java
+++ b/src/java/htsjdk/samtools/CRAMIndexer.java
@@ -46,7 +46,6 @@ import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BlockCompressedFilePointerUtil;
import htsjdk.samtools.util.Log;
-import org.testng.Assert;
import java.io.File;
import java.io.IOException;
@@ -145,39 +144,6 @@ public class CRAMIndexer {
}
/**
- * Generates a BAM index file, either textual or binary, from an input BAI file.
- * Only used for testing, but located here for visibility into CachingBAMFileIndex.
- *
- * @param output BAM Index (.bai) file (or bai.txt file when text)
- * @param textOutput Whether to create text output or binary
- */
- static public void createAndWriteIndex(final File input, final File output, final boolean textOutput) {
-
- // content is from an existing bai file.
-
- final CachingBAMFileIndex existingIndex = new CachingBAMFileIndex(input, null);
- final int nRef = existingIndex.getNumberOfReferences();
- final BAMIndexWriter outputWriter;
- if (textOutput) {
- outputWriter = new TextualBAMIndexWriter(nRef, output);
- } else {
- outputWriter = new BinaryBAMIndexWriter(nRef, output);
- }
-
- // write the content one reference at a time
- try {
- for (int i = 0; i < nRef; i++) {
- outputWriter.writeReference(existingIndex.getQueryResults(i));
- }
- outputWriter.writeNoCoordinateRecordCount(existingIndex.getNoCoordinateCount());
- outputWriter.close();
-
- } catch (final Exception e) {
- throw new SAMException("Exception creating BAM index", e);
- }
- }
-
- /**
* Class for constructing BAM index files.
* One instance is used to construct an entire index.
* processAlignment is called for each alignment until a new reference is encountered, then
@@ -408,7 +374,7 @@ public class CRAMIndexer {
}
} catch (final IOException e) {
- Assert.fail("Failed to read cram container", e);
+ throw new RuntimeException("Failed to read cram container", e);
}
} while (!container.isEOF());
diff --git a/src/java/htsjdk/samtools/CRAMIterator.java b/src/java/htsjdk/samtools/CRAMIterator.java
index fc8915f..6e08f05 100644
--- a/src/java/htsjdk/samtools/CRAMIterator.java
+++ b/src/java/htsjdk/samtools/CRAMIterator.java
@@ -41,6 +41,8 @@ import java.util.Collections;
import java.util.Iterator;
import java.util.List;
+import htsjdk.samtools.cram.CRAMException;
+
public class CRAMIterator implements SAMRecordIterator {
private static final Log log = Log.getInstance(CRAMIterator.class);
private final CountingInputStream countingInputStream;
@@ -78,10 +80,14 @@ public class CRAMIterator implements SAMRecordIterator {
private long samRecordIndex;
private ArrayList<CramCompressionRecord> cramRecords;
- public CRAMIterator(final InputStream inputStream, final ReferenceSource referenceSource)
+ public CRAMIterator(final InputStream inputStream, final ReferenceSource referenceSource, final ValidationStringency validationStringency)
throws IOException {
+ if (null == referenceSource) {
+ throw new CRAMException("A reference source is required for CRAM files");
+ }
this.countingInputStream = new CountingInputStream(inputStream);
this.referenceSource = referenceSource;
+ this.validationStringency = validationStringency;
final CramContainerIterator containerIterator = new CramContainerIterator(this.countingInputStream);
cramHeader = containerIterator.getCramHeader();
this.containerIterator = containerIterator;
@@ -93,10 +99,14 @@ public class CRAMIterator implements SAMRecordIterator {
parser = new ContainerParser(cramHeader.getSamFileHeader());
}
- public CRAMIterator(final SeekableStream seekableStream, final ReferenceSource referenceSource, final long[] coordinates)
+ public CRAMIterator(final SeekableStream seekableStream, final ReferenceSource referenceSource, final long[] coordinates, final ValidationStringency validationStringency)
throws IOException {
+ if (null == referenceSource) {
+ throw new CRAMException("A reference source is required for CRAM files");
+ }
this.countingInputStream = new CountingInputStream(seekableStream);
this.referenceSource = referenceSource;
+ this.validationStringency = validationStringency;
final CramSpanContainerIterator containerIterator = CramSpanContainerIterator.fromFileSpan(seekableStream, coordinates);
cramHeader = containerIterator.getCramHeader();
this.containerIterator = containerIterator;
@@ -108,12 +118,18 @@ public class CRAMIterator implements SAMRecordIterator {
parser = new ContainerParser(cramHeader.getSamFileHeader());
}
+ @Deprecated
+ public CRAMIterator(final SeekableStream seekableStream, final ReferenceSource referenceSource, final long[] coordinates)
+ throws IOException {
+ this(seekableStream, referenceSource, coordinates, ValidationStringency.DEFAULT_STRINGENCY);
+ }
+
public CramHeader getCramHeader() {
return cramHeader;
}
private void nextContainer() throws IOException, IllegalArgumentException,
- IllegalAccessException {
+ IllegalAccessException, CRAMException {
if (containerIterator != null) {
if (!containerIterator.hasNext()) {
@@ -145,7 +161,7 @@ public class CRAMIterator implements SAMRecordIterator {
else
cramRecords.clear();
- parser.getRecords(container, cramRecords);
+ parser.getRecords(container, cramRecords, validationStringency);
if (container.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
refs = new byte[]{};
@@ -156,6 +172,9 @@ public class CRAMIterator implements SAMRecordIterator {
final SAMSequenceRecord sequence = cramHeader.getSamFileHeader()
.getSequence(container.sequenceId);
refs = referenceSource.getReferenceBases(sequence, true);
+ if (refs == null) {
+ throw new CRAMException(String.format("Contig %s not found in the reference file.", sequence.getSequenceName()));
+ }
prevSeqId = container.sequenceId;
}
@@ -242,7 +261,12 @@ public class CRAMIterator implements SAMRecordIterator {
if (!iterator.hasNext()) {
try {
nextContainer();
- } catch (final Exception e) {
+ } catch (CRAMException ce) {
+ throw ce;
+ } catch (SAMFormatException se) {
+ throw se;
+ }
+ catch (final Exception e) {
throw new RuntimeEOFException(e);
}
}
diff --git a/src/java/htsjdk/samtools/ChainedDownsamplingIterator.java b/src/java/htsjdk/samtools/ChainedDownsamplingIterator.java
new file mode 100644
index 0000000..4fa3a7d
--- /dev/null
+++ b/src/java/htsjdk/samtools/ChainedDownsamplingIterator.java
@@ -0,0 +1,90 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools;
+
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * A DownsamplingIterator that combines the ConstantMemory and HighAccuracy downsampling techniques to provide an
+ * iterator that has accuracy approaching that of HighAccuracy, but with more limited memory usage. Instead of
+ * requiring memory proportional to number of read names in the incoming stream of reads, requires memory
+ * approximately proportional to the number of output reads.
+ *
+ * @author Tim Fennell
+ */
+class ChainedDownsamplingIterator extends HighAccuracyDownsamplingIterator {
+ public static final int MIN_ACCURATE_INPUT_READS = 50000;
+
+ /**
+ * Constructs a chained iterator that will read from the provided iterator and attempt to downsampling to the provided proportion.
+ */
+ ChainedDownsamplingIterator(final Iterator<SAMRecord> iterator, final double proportion, final int seed) {
+ super(new ConstantMemoryDownsamplingIterator(iterator, adjustProportion(proportion), seed), proportion, seed);
+
+ // Deal with the fact that the iterator will advance and discard some reads at construction
+ final long discarded = ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).getDiscardedCount();
+ recordDiscardRecords(discarded);
+ }
+
+ /**
+ * Calculates the upper bound of 99.9% CI given the proportion, that is used to "buffer" the proportion on
+ * the constant memory downsampler, to make sure it leaves enough reads for us to downsample.
+ *
+ * Uses an assumed number of reads tested as this is often not known until after the fact.
+ */
+ private static double adjustProportion(final double p) {
+ final double ciAdjustment99_9 = 3.3 * Math.sqrt(p/MIN_ACCURATE_INPUT_READS);
+ return Math.min(1, p + ciAdjustment99_9);
+ }
+
+
+ /**
+ * Resets statistics before reading from the underlying iterator.
+ */
+ @Override
+ protected void readFromUnderlyingIterator(final List<SAMRecord> recs, final Set<String> names, final int templatesToRead) {
+ // Reset the stats on the underlying iterator
+ ((ConstantMemoryDownsamplingIterator) getUnderlyingIterator()).resetStatistics();
+
+ // Read from the underlying iterator
+ super.readFromUnderlyingIterator(recs, names, templatesToRead);
+ }
+
+ @Override
+ protected int calculateTemplatesToKeep(final int templatesRead, final double overallProportion) {
+ // Calculate an adjusted proportion to keep, knowing what proportion the underlying iterator discarded
+ final ConstantMemoryDownsamplingIterator iter = (ConstantMemoryDownsamplingIterator) getUnderlyingIterator();
+ final double priorProportion = iter.getAcceptedFraction();
+ final double p = Math.max(0, Math.min(1, overallProportion / priorProportion));
+ final int retval = super.calculateTemplatesToKeep(templatesRead, p);
+
+ // Record all the discarded records to keep the overall statistics accurate, but do it after
+ // the call to super() so it doesn't affect the proportion calculation.
+ recordDiscardRecords(iter.getDiscardedCount());
+
+ return retval;
+ }
+}
diff --git a/src/java/htsjdk/samtools/ConstantMemoryDownsamplingIterator.java b/src/java/htsjdk/samtools/ConstantMemoryDownsamplingIterator.java
new file mode 100644
index 0000000..c6e0de4
--- /dev/null
+++ b/src/java/htsjdk/samtools/ConstantMemoryDownsamplingIterator.java
@@ -0,0 +1,88 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools;
+
+import htsjdk.samtools.util.Murmur3;
+import htsjdk.samtools.util.PeekableIterator;
+
+import java.util.Iterator;
+
+/**
+ * A DownsamplingIterator that runs in constant (and very small) memory. For each read the read name is hashed
+ * using the Murmur3_32 hash algorithm to obtain an integer value that is, enough for our purposes, uniformly
+ * distributed between the min and max int values even for highly similar inputs. The proportion is used to
+ * calculate a maximum acceptable hash value within the range. Records whose hash value is below the limit
+ * are emitted, records whose hash value is above the limit are discarded.
+ *
+ * Does not make any attempt to be accurate (have actual proportion == requested proportion) beyond what would
+ * be expected for a random process and so may become quite inaccurate when downsampling to small numbers of
+ * reads.
+ *
+ * @author Tim Fennell
+ */
+class ConstantMemoryDownsamplingIterator extends DownsamplingIterator {
+ private final PeekableIterator<SAMRecord> underlyingIterator;
+ private final int maxHashValue;
+ private final Murmur3 hasher;
+
+
+ /** Constructs a downsampling iterator upon the supplied iterator, using the Random as the source of randomness. */
+ ConstantMemoryDownsamplingIterator(final Iterator<SAMRecord> iterator, final double proportion, final int seed) {
+ super(proportion);
+ this.hasher = new Murmur3(seed);
+ this.underlyingIterator = new PeekableIterator<SAMRecord>(iterator);
+
+ final long range = (long) Integer.MAX_VALUE - (long) Integer.MIN_VALUE;
+ this.maxHashValue = Integer.MIN_VALUE + (int) Math.round(range * proportion);
+
+ advanceToNextAcceptedRead();
+ }
+
+ /** Returns true if there is another record available post-downsampling, false otherwise. */
+ @Override public boolean hasNext() {
+ // The underlying iterator is always left at the next return-able read, so if it has a next read, so do we
+ return this.underlyingIterator.hasNext();
+ }
+
+ /**
+ * Advances the underlying, peekable, iterator until the next records is one that is to be emitted.
+ * @return true if there is at least one emittable record ready for emission, false otherwise
+ */
+ private boolean advanceToNextAcceptedRead() {
+ while (this.underlyingIterator.hasNext() && this.hasher.hashUnencodedChars(this.underlyingIterator.peek().getReadName()) > this.maxHashValue) {
+ this.underlyingIterator.next();
+ recordDiscardedRecord();
+ }
+
+ return this.underlyingIterator.hasNext();
+ }
+
+ /** Returns the next record from the iterator, or throws an exception if there is no next record. */
+ @Override public SAMRecord next() {
+ final SAMRecord rec = this.underlyingIterator.next();
+ recordAcceptedRecord();
+ advanceToNextAcceptedRead();
+ return rec;
+ }
+}
diff --git a/src/java/htsjdk/samtools/DefaultSAMRecordFactory.java b/src/java/htsjdk/samtools/DefaultSAMRecordFactory.java
index 2f23a48..8a6077a 100644
--- a/src/java/htsjdk/samtools/DefaultSAMRecordFactory.java
+++ b/src/java/htsjdk/samtools/DefaultSAMRecordFactory.java
@@ -18,7 +18,11 @@ public class DefaultSAMRecordFactory implements SAMRecordFactory {
return new SAMRecord(header);
}
- /** Create a new BAM Record. */
+ /**
+ * Create a new BAM Record. If the reference sequence index or mate reference sequence index are
+ * any value other than NO_ALIGNMENT_REFERENCE_INDEX, the values must be resolvable against the sequence
+ * dictionary in the header argument.
+ */
public BAMRecord createBAMRecord (final SAMFileHeader header,
final int referenceSequenceIndex,
final int alignmentStart,
diff --git a/src/java/htsjdk/samtools/DownsamplingIterator.java b/src/java/htsjdk/samtools/DownsamplingIterator.java
index 4ae8ffb..8ca0d84 100644
--- a/src/java/htsjdk/samtools/DownsamplingIterator.java
+++ b/src/java/htsjdk/samtools/DownsamplingIterator.java
@@ -25,127 +25,89 @@ package htsjdk.samtools;
import htsjdk.samtools.util.CloseableIterator;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NoSuchElementException;
-import java.util.Random;
-
/**
- * An iterator of SAMRecords that can downsample on the fly. Allows for inclusion of secondary and/or
- * supplemental records (off by default), though this will cause memory use to increase as the decisions
- * for each read name must be cached permanently.
+ * Abstract base class for all DownsamplingIterators that provides a uniform interface for recording
+ * and reporting statistics bout how many records have been kept and discarded.
+ *
+ * A DownsamplingIterator is an iterator that takes another iterator of SAMRecords and filters out a
+ * subset of those records in a random way, while ensuring that all records for a template (i.e. record name)
+ * are either retained or discarded. Strictly speaking the proportion parameter applies to templates,
+ * though in most instances it is safe to think about it being applied to records.
*
* @author Tim Fennell
*/
-public class DownsamplingIterator implements CloseableIterator<SAMRecord>, Iterable<SAMRecord> {
- private final Iterator<SAMRecord> underlyingIterator;
- private final Random random;
- private final double probabilityOfKeeping;
- private SAMRecord nextRecord;
- private long totalReads, keptReads;
- private final Map<String, Boolean> decisions = new HashMap<String, Boolean>();
- private boolean allowSecondaryAlignments = false;
- private boolean allowSupplementalAlignments = false;
- private boolean includeNoRefReads = true;
-
- /** Constructs a downsampling iterator upon the supplied iterator, using the Random as the source of randomness. */
- public DownsamplingIterator(final Iterator<SAMRecord> iterator, final Random random, final double probabilityOfKeeping) {
- this.underlyingIterator = iterator;
- this.random = random;
- this.probabilityOfKeeping = probabilityOfKeeping;
+public abstract class DownsamplingIterator implements CloseableIterator<SAMRecord> {
+ private long recordsSeen;
+ private long recordsAccepted;
+ private double targetProportion;
+
+ /** Constructs a downsampling iterator that aims to retain the targetProportion of reads. */
+ public DownsamplingIterator(final double targetProportion) {
+ if (targetProportion < 0) throw new IllegalArgumentException("targetProportion must be >= 0");
+ if (targetProportion > 1) throw new IllegalArgumentException("targetProportion must be <= 1");
+ this.targetProportion = targetProportion;
}
- /** Sets whether or not secondary alignments are allowed (true) or all discarded (false). */
- public DownsamplingIterator setAllowSecondaryAlignments(final boolean allowSecondaryAlignments) {
- this.allowSecondaryAlignments = allowSecondaryAlignments;
- return this;
- }
+ /** Does nothing. */
+ @Override public void close() { /** No Op. */ }
- /** Sets whether or not supplemental alignments are allowed (true) or all discarded (false). */
- public DownsamplingIterator setAllowSupplementalAlignments(final boolean allowSupplementalAlignments) {
- this.allowSupplementalAlignments = allowSupplementalAlignments;
- return this;
- }
+ /** Returns the number of records seen, including accepted and discarded, since creation of the last call to resetStatistics. */
+ public long getSeenCount() { return this.recordsSeen; }
- /** Sets whether the iterator will stop when no-ref reads are encountered, or keep downsampling through them. */
- public DownsamplingIterator setIncludeNoRefReads(final boolean includeNoRefReads) {
- this.includeNoRefReads = includeNoRefReads;
- return this;
- }
+ /** Returns the number of records returned since creation of the last call to resetStatistics. */
+ public long getAcceptedCount() { return this.recordsAccepted; }
+
+ /** Returns the number of records discarded since creation of the last call to resetStatistics. */
+ public long getDiscardedCount() { return this.recordsSeen - this.recordsAccepted; }
- /** Returns the total number of reads/records considered up to the point when the method is called. */
- public long getTotalReads() { return totalReads; }
+ /** Gets the fraction of records discarded since creation or the last call to resetStatistics(). */
+ public double getDiscardedFraction() { return getDiscardedCount() / (double) getSeenCount(); }
- /** Returns the number of reads/records kept post-downsampling up to the point when the method is called. */
- public long getKeptReads() { return keptReads; }
+ /** Gets the fraction of records accepted since creation or the last call to resetStatistics(). */
+ public double getAcceptedFraction() { return getAcceptedCount() / (double) getSeenCount(); }
- /** Simple implementation of iterable that returns this iterator. */
- @Override public Iterator<SAMRecord> iterator() { return this; }
+ /** Resets the statistics for records seen/accepted/discarded. */
+ public void resetStatistics() {
+ this.recordsSeen = 0;
+ this.recordsAccepted = 0;
+ }
+
+ /** Gets the target proportion of records that should be retained during downsampling. */
+ public double getTargetProportion() {
+ return targetProportion;
+ }
+
+ /** Method for subclasses to record a record as being discarded. */
+ protected final void recordDiscardedRecord() { this.recordsSeen++; }
/**
- * Clears the current record and attempts to advance through the underlying iterator until a
- * record is kept during downsampling. If no more records are kept and the end of the input
- * is reached this.nextRecord will be null.
- *
- * @return true if a record is available after advancing, false otherwise
+ * Method for subclasses to record a specific record as being accepted. Null may be passed if a record
+ * was discarded but access to the object is no longer available.
*/
- private boolean advance() {
- this.nextRecord = null;
- final boolean oneRecPerRead = !allowSecondaryAlignments && !allowSupplementalAlignments;
-
- while (this.nextRecord == null && this.underlyingIterator.hasNext()) {
- final SAMRecord rec = this.underlyingIterator.next();
- if (!this.allowSecondaryAlignments && rec.getNotPrimaryAlignmentFlag()) continue;
- if (!this.allowSupplementalAlignments && rec.getSupplementaryAlignmentFlag()) continue;
- if (!this.includeNoRefReads && rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) break;
-
- ++totalReads;
-
- final String key = rec.getReadName();
- final Boolean previous = oneRecPerRead ? decisions.remove(key) : decisions.get(key);
- final boolean keeper;
-
- if (previous == null) {
- keeper = this.random.nextDouble() <= this.probabilityOfKeeping;
- if (rec.getReadPairedFlag() || this.allowSecondaryAlignments || this.allowSupplementalAlignments) decisions.put(key, keeper);
- }
- else {
- keeper = previous;
- }
-
- if (keeper) {
- this.nextRecord = rec;
- ++keptReads;
- }
- }
-
- return this.nextRecord != null;
- }
+ protected final void recordAcceptedRecord() { this.recordsSeen++; this.recordsAccepted++; }
- /** Returns true if there is another record available post-downsampling, false otherwise. */
- @Override public boolean hasNext() {
- return this.nextRecord != null || advance();
+ /** Record one or more records as having been discarded. */
+ protected final void recordDiscardRecords(final long n) {
+ this.recordsSeen += n;
}
- /** Returns the next record from the iterator, or throws an exception if there is no next record. */
- @Override public SAMRecord next() {
- if (this.nextRecord == null) {
- throw new NoSuchElementException("Call to next() when hasNext() == false");
- }
- else {
- final SAMRecord retval = this.nextRecord;
- advance();
- return retval;
- }
+ /** Record one or more records as having been discarded. */
+ protected final void recordAcceptedRecords(final long n) {
+ this.recordsSeen += n;
+ this.recordsAccepted += n;
}
- /** Unsupported operation. */
- @Override public void remove() {
- throw new UnsupportedOperationException("remove() is not supported.");
+ /**
+ * Indicates whether or not the strategy implemented by this DownsamplingIterator makes any effort to
+ * increase accuracy beyond random sampling (i.e. to reduce the delta between the requested proportion
+ * of reads and the actually emitted proportion of reads).
+ */
+ public boolean isHigherAccuracy() {
+ return false;
}
- @Override public void close() {
- // Do nothing.
+ /** Not supported. */
+ @Override public void remove() {
+ throw new UnsupportedOperationException("remove() not supported in DownsamplingIterators");
}
}
diff --git a/src/java/htsjdk/samtools/DownsamplingIteratorFactory.java b/src/java/htsjdk/samtools/DownsamplingIteratorFactory.java
new file mode 100644
index 0000000..d54e706
--- /dev/null
+++ b/src/java/htsjdk/samtools/DownsamplingIteratorFactory.java
@@ -0,0 +1,118 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools;
+
+import htsjdk.samtools.util.IOUtil;
+
+import java.io.File;
+import java.util.Iterator;
+
+/**
+ * A factory for creating DownsamplingIterators that uses a number of different strategies to achieve downsampling while
+ * meeting various criteria.
+ *
+ * @author Tim Fennell
+ */
+public class DownsamplingIteratorFactory {
+ public static final String HIGH_ACCURACY_DESCRIPTION =
+ "Attempts (but does not guarantee) to provide accuracy up to a specified limit. Accuracy is defined as emitting " +
+ "a proportion of reads as close to the requested proportion as possible. In order to do so this strategy requires " +
+ "memory that is proportional to the number of template names in the incoming stream of reads, and will thus require " +
+ "large amounts of memory when running on large input files.";
+
+ public static final String CONSTANT_MEMORY_DESCRPTION =
+ "Downsamples a stream or file of SAMRecords using a hash-projection strategy such that it can run in constant memory. " +
+ "The downsampling is stochastic, and therefore the actual retained proportion will vary around the requested proportion. Due " +
+ "to working in fixed memory this strategy is good for large inputs, and due to the stochastic nature the accuracy of this strategy " +
+ "is highest with a high number of output records, and diminishes at low output volumes.";
+
+ public static final String CHAINED_DESCRIPTION =
+ "Attempts to provide a compromise strategy that offers some of the advantages of both the ConstantMemory and HighAccuracy strategies. " +
+ "Uses a ConstantMemory strategy to downsample the incoming stream to approximately the desired proportion, and then a HighAccuracy " +
+ "strategy to finish. Works in a single pass, and will provide accuracy close to (but often not as good as) HighAccuracy while requiring " +
+ "memory proportional to the set of reads emitted from the ConstantMemory strategy to the HighAccuracy strategy. Works well when downsampling " +
+ "large inputs to small proportions (e.g. downsampling hundreds of millions of reads and retaining only 2%. Should be accurate 99.9% of the time " +
+ "when the input contains >= 50,000 templates (read names). For smaller inputs, HighAccuracy is recommended instead.";
+
+ /** Describes the available downsampling strategies. */
+ public enum Strategy {
+ HighAccuracy(HIGH_ACCURACY_DESCRIPTION),
+ ConstantMemory(CONSTANT_MEMORY_DESCRPTION),
+ Chained(CHAINED_DESCRIPTION);
+
+ public final String description;
+
+ Strategy(final String description) {
+ this.description = description;
+ }
+
+ /** Gets the description of the strategy. */
+ public String getDescription() {
+ return description;
+ }
+ }
+
+ /**
+ * Creates a new DownsamplingIterator using the supplied Strategy that attempts to read from the provided iterator and return
+ * approximately proportion of the records read.
+ *
+ * @param iterator The iterator from which to consume SAMRecords
+ * @param strategy The downsampling strategy to use
+ * @param proportion The proportion of records the downsampling strategy should attempt to emit
+ * @param accuracy If supported by the downsampling strategy, the accuracy goal for the downsampler. Higher accuracy will generally
+ * require higher memory usage. An accuracy value of 0.0001 tells the strategy to try and ensure the emitted proportion
+ * is within proportion +/0 0.0001.
+ * @param seed The seed value to use for any random process used in down-sampling.
+ */
+ public static DownsamplingIterator make(final Iterator<SAMRecord> iterator, final Strategy strategy, final double proportion, final double accuracy, final int seed) {
+ if (strategy == null) throw new IllegalArgumentException("strategy may not be null");
+ if (iterator == null) throw new IllegalArgumentException("iterator may not be null");
+ if (proportion < 0) throw new IllegalArgumentException("proportion must be greater than 0");
+ if (proportion > 1) throw new IllegalArgumentException("proportion must be less than 1");
+
+ switch (strategy) {
+ case HighAccuracy: return new HighAccuracyDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy);
+ case ConstantMemory: return new ConstantMemoryDownsamplingIterator(iterator, proportion, seed);
+ case Chained: return new ChainedDownsamplingIterator(iterator, proportion, seed).setTargetAccuracy(accuracy);
+ default: throw new IllegalStateException("Unexpected value for Strategy enum in switch statement. Bug!!");
+ }
+ }
+
+ /**
+ * Convenience method that constructs a downsampling iterator for all the reads in a SAM file.
+ * See {@link DownsamplingIteratorFactory#make(Iterator, Strategy, double, double, int)} for detailed parameter information.
+ */
+ public static DownsamplingIterator make(final File samFile, final Strategy strategy, final double proportion, final double accuracy, final int seed) {
+ IOUtil.assertFileIsReadable(samFile);
+ return make(SamReaderFactory.makeDefault().open(samFile), strategy, proportion, accuracy, seed);
+ }
+
+ /**
+ * Convenience method that constructs a downsampling iterator for all the reads available from a SamReader.
+ * See {@link DownsamplingIteratorFactory#make(Iterator, Strategy, double, double, int)} for detailed parameter information.
+ */
+ public static DownsamplingIterator make(final SamReader reader, final Strategy strategy, final double proportion, final double accuracy, final int seed) {
+ return make(reader.iterator(), strategy, proportion, accuracy, seed);
+ }
+}
diff --git a/src/java/htsjdk/samtools/DuplicateScoringStrategy.java b/src/java/htsjdk/samtools/DuplicateScoringStrategy.java
index c6e0884..9d0bed5 100644
--- a/src/java/htsjdk/samtools/DuplicateScoringStrategy.java
+++ b/src/java/htsjdk/samtools/DuplicateScoringStrategy.java
@@ -36,6 +36,9 @@ public class DuplicateScoringStrategy {
TOTAL_MAPPED_REFERENCE_LENGTH
}
+ /** An enum to use for storing temporary attributes on SAMRecords. */
+ private static enum Attr { DuplicateScore }
+
/** Calculates a score for the read which is the sum of scores over Q15. */
private static short getSumOfBaseQualities(final SAMRecord rec) {
short score = 0;
@@ -60,22 +63,30 @@ public class DuplicateScoringStrategy {
* computed on both ends.
*/
public static short computeDuplicateScore(final SAMRecord record, final ScoringStrategy scoringStrategy, final boolean assumeMateCigar) {
- short score = 0;
+ Short storedScore = (Short) record.getTransientAttribute(Attr.DuplicateScore);
+
+ if (storedScore == null) {
+ short score = 0;
- switch (scoringStrategy) {
- case SUM_OF_BASE_QUALITIES:
- score += getSumOfBaseQualities(record);
- break;
- case TOTAL_MAPPED_REFERENCE_LENGTH:
- if (!record.getReadUnmappedFlag()) {
- score += record.getCigar().getReferenceLength();
- }
- if (assumeMateCigar && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
- score += SAMUtils.getMateCigar(record).getReferenceLength();
- }
- break;
+ switch (scoringStrategy) {
+ case SUM_OF_BASE_QUALITIES:
+ score += getSumOfBaseQualities(record);
+ break;
+ case TOTAL_MAPPED_REFERENCE_LENGTH:
+ if (!record.getReadUnmappedFlag()) {
+ score += record.getCigar().getReferenceLength();
+ }
+ if (assumeMateCigar && record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
+ score += SAMUtils.getMateCigar(record).getReferenceLength();
+ }
+ break;
+ }
+
+ storedScore = score;
+ record.setTransientAttribute(Attr.DuplicateScore, storedScore);
}
- return score;
+
+ return storedScore;
}
/**
diff --git a/src/java/htsjdk/samtools/DuplicateSet.java b/src/java/htsjdk/samtools/DuplicateSet.java
index df34526..8333069 100644
--- a/src/java/htsjdk/samtools/DuplicateSet.java
+++ b/src/java/htsjdk/samtools/DuplicateSet.java
@@ -32,21 +32,23 @@ import java.util.List;
* considered the representative of the duplicate, and typically does not have it's duplicate flag set.
* The records' duplicate flag will be set appropriately as records are added. This behavior can be
* turned off.
- *
+ *
* At this time, this set does not track optical duplicates.
*
* @author nhomer
*/
public class DuplicateSet {
-
+
private final List<SAMRecord> records;
private static final SAMRecordDuplicateComparator defaultComparator = new SAMRecordDuplicateComparator();
private final SAMRecordDuplicateComparator comparator;
-
+
+ private SAMRecord representative = null;
+
private boolean needsSorting = false;
-
+
private boolean setDuplicateFlag = false;
/** Sets the duplicate flag by default */
@@ -67,7 +69,7 @@ public class DuplicateSet {
this.setDuplicateFlag = setDuplicateFlag;
this.comparator = comparator;
}
-
+
/**
* Adds a record to the set and returns zero if either the set is empty, or it is a duplicate of the records already in the set. Otherwise,
* it does not add the record and returns non-zero.
@@ -77,24 +79,32 @@ public class DuplicateSet {
public int add(final SAMRecord record) {
if (!this.records.isEmpty()) {
- final int cmp = this.comparator.duplicateSetCompare(this.getRepresentative(), record);
+ final int cmp = this.comparator.duplicateSetCompare(this.representative, record);
if (0 != cmp) {
return cmp;
}
+
+ // update representative
+ if (0 < this.comparator.compare(this.representative, record)) {
+ this.representative = record;
+ }
+ }
+ else {
+ this.representative = record;
}
-
+
this.records.add(record);
needsSorting = true;
-
+
return 0;
}
private void sort() {
if (!records.isEmpty()) {
- Collections.sort(records, this.comparator);
-
- final SAMRecord representative = records.get(0);
-
+ if (1 < records.size()) {
+ Collections.sort(records, this.comparator);
+ }
+
if (setDuplicateFlag) {
// reset duplicate flags
for (final SAMRecord record : records) {
@@ -104,30 +114,42 @@ public class DuplicateSet {
}
records.get(0).setDuplicateReadFlag(false);
}
+
+ if (!records.get(0).equals(this.representative)) {
+ throw new SAMException("BUG: the representative was not the first record after sorting."
+ + "\nFIRST: " + records.get(0).getSAMString() + "\nSECOND: " + this.representative.getSAMString());
+ }
}
needsSorting = false; // this could be in the if above if you think hard about it
}
/**
* Gets the list of records from this set.
+ *
+ * Setting sort to false likely will not yield records in duplicate order within the set.
+ *
+ * @param sort true if we want the records in the duplicate set sorted by duplicate order, false if we do not care about the order.
*/
- public List<SAMRecord> getRecords() {
- if (needsSorting) {
+ public List<SAMRecord> getRecords(final boolean sort) {
+ if (sort && needsSorting) {
sort();
}
-
+
return this.records;
}
/**
+ * Gets the list of records from this set.
+ */
+ public List<SAMRecord> getRecords() {
+ return getRecords(true);
+ }
+
+ /**
* Gets the representative record according to the duplicate comparator.
*/
public SAMRecord getRepresentative() {
- if (needsSorting) {
- sort();
- }
-
- return records.get(0);
+ return this.representative;
}
/**
@@ -149,8 +171,8 @@ public class DuplicateSet {
}
}
return n;
- }
-
+ }
+
public boolean isEmpty() {
return this.records.isEmpty();
}
@@ -158,5 +180,5 @@ public class DuplicateSet {
/**
* Controls if we should update the duplicate flag of the records in this set.
*/
- public void setDuplicateFlag(boolean setDuplicateFlag) { this.setDuplicateFlag = setDuplicateFlag; }
-}
+ public void setDuplicateFlag(final boolean setDuplicateFlag) { this.setDuplicateFlag = setDuplicateFlag; }
+}
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java b/src/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java
new file mode 100644
index 0000000..f8561b6
--- /dev/null
+++ b/src/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java
@@ -0,0 +1,196 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+import java.util.Random;
+import java.util.Set;
+
+/**
+ * A DownsamplingIterator that attempts to provide very high accuracy (minimizing the difference between emitted proportion
+ * and requested proportion) at the expense of using memory proportional to the number of reads in the incoming stream.
+ *
+ * @author Tim Fennell
+ */
+class HighAccuracyDownsamplingIterator extends DownsamplingIterator {
+ private final Iterator<SAMRecord> underlyingIterator;
+ private final Random random;
+ private SAMRecord nextRecord;
+ private final Map<String, Boolean> decisions = new HashMap<String, Boolean>();
+
+ private double targetAccuracy = 0.0001;
+ private long totalTemplates, keptTemplates;
+ private Iterator<SAMRecord> bufferedRecords = new ArrayList<SAMRecord>().iterator();
+ private Set<String> bufferedRecordsToKeep;
+
+ /** Override method to make it clear that this iterator attempts to provide a higher accuracy of downsampling. */
+ @Override public boolean isHigherAccuracy() {
+ return true;
+ }
+
+ /** Constructs a downsampling iterator upon the supplied iterator, using the Random as the source of randomness. */
+ HighAccuracyDownsamplingIterator(final Iterator<SAMRecord> iterator, final double proportion, final int seed) {
+ super(proportion);
+ this.underlyingIterator = iterator;
+ this.random = new Random(seed);
+ }
+
+ /**
+ * Sets the target accuracy of the downsampling iterator. The value should be thought of as
+ * probability +/- accuracy. So a value of 0.001 would instruct the downsampling iterator to
+ * attempt to guarantee at accuracy to within 0.1%. The downsampler will need to buffer reads
+ * for 1/accuracy templates, so setting this to extremely small numbers is not advisable.
+ */
+ public DownsamplingIterator setTargetAccuracy(final double accuracy) {
+ if (accuracy >= 1 || accuracy <= 1d/Integer.MAX_VALUE) throw new IllegalArgumentException("Illegal value. Must be 1/MAX_INT < accuracy < 1");
+ this.targetAccuracy = accuracy;
+ return this;
+ }
+
+ /** Returns true if there is another record available post-downsampling, false otherwise. */
+ @Override public boolean hasNext() {
+ return this.nextRecord != null || advance();
+ }
+
+ /** Returns the next record from the iterator, or throws an exception if there is no next record. */
+ @Override public SAMRecord next() {
+ if (this.nextRecord == null) {
+ throw new NoSuchElementException("Call to next() when hasNext() == false");
+ }
+ else {
+ final SAMRecord retval = this.nextRecord;
+ advance();
+ return retval;
+ }
+ }
+
+ /** Returns the underlying iterator so that subclasses may manipulate it. */
+ protected Iterator<SAMRecord> getUnderlyingIterator() {
+ return this.underlyingIterator;
+ }
+
+ /**
+ * Clears the current record and attempts to advance through the underlying iterator until a
+ * record is kept during downsampling. If no more records are kept and the end of the input
+ * is reached this.nextRecord will be null.
+ *
+ * @return true if a record is available after advancing, false otherwise
+ */
+ protected boolean advance() {
+ this.nextRecord = null;
+
+ while (this.nextRecord == null && (this.bufferedRecords.hasNext() || bufferNextChunkOfRecords(getTargetProportion(), this.targetAccuracy))) {
+ final SAMRecord rec = this.bufferedRecords.next();
+ final String key = rec.getReadName();
+ final Boolean previous = decisions.get(key);
+ final boolean keepThisRecord;
+
+ if (previous == null) {
+ keepThisRecord = this.bufferedRecordsToKeep.contains(rec.getReadName());
+ decisions.put(key, keepThisRecord);
+ }
+ else {
+ keepThisRecord = previous;
+ }
+
+ if (keepThisRecord) {
+ this.nextRecord = rec;
+ recordAcceptedRecord();
+ }
+ else {
+ recordDiscardedRecord();
+ }
+ }
+
+ return this.nextRecord != null;
+ }
+
+ /**
+ * Buffers reads until either the end of the file is reached or enough reads have been buffered such
+ * that downsampling can be performed to the desired target accuracy. Once reads have been buffered,
+ * template names are randomly sampled out for discarding until the desired number of reads have
+ * been discarded.
+ *
+ * @return True if one or more reads have been buffered, false otherwise
+ */
+ protected boolean bufferNextChunkOfRecords(final double proportion, final double accuracy) {
+ final int templatesToRead = (int) Math.ceil(1 / accuracy);
+ final Set<String> names = new HashSet<String>();
+ final List<SAMRecord> recs = new ArrayList<SAMRecord>(templatesToRead);
+
+ readFromUnderlyingIterator(recs, names, templatesToRead);
+
+ // Determine how many templates to keep/discard
+ final int templatesRead = names.size();
+ final int templatesToKeep = calculateTemplatesToKeep(templatesRead, proportion);
+
+ // Randomly shuffle a list of all the template names, and then remove some from the set
+ final int templatesToDiscard = templatesRead - templatesToKeep;
+ final List<String> tmp = new ArrayList<String>(names);
+ Collections.shuffle(tmp, this.random);
+ for (int i = 0; i < templatesToDiscard; ++i) names.remove(tmp.get(i));
+
+ // Set all the instance state so that advance()/next() get what they need
+ this.bufferedRecordsToKeep = names;
+ this.bufferedRecords = recs.iterator();
+ this.totalTemplates += templatesRead;
+ this.keptTemplates += names.size();
+ return recs.size() > 0;
+ }
+
+ /**
+ * Calculates the number of templates to keep in a specific batch of reads having just read templatesRead reads
+ * and wanting to keep proportion of them. Rounds the final number up or down based on whether, to this point,
+ * the iterator is under or over it's goal proportion.
+ *
+ * Implemented as second method to allow ChainedDownsamplingIterator to tamper with the strategy!
+ */
+ protected int calculateTemplatesToKeep(final int templatesRead, final double proportion) {
+ final double rawTemplatesToKeep = templatesRead * proportion;
+ return (keptTemplates / (double) totalTemplates < proportion)
+ ? (int) Math.ceil(rawTemplatesToKeep) : (int) Math.floor(rawTemplatesToKeep);
+ }
+
+ /**
+ * Reads from the underlying iterator until it has observed templatesToRead templates (i.e. read names) that it has not yet
+ * observed, so that templatesToRead new keep/reject decisions can be made. The records that are read are placed into recs
+ * and _novel_ template names are placed into names.
+ */
+ protected void readFromUnderlyingIterator(final List<SAMRecord> recs, final Set<String> names, final int templatesToRead) {
+ while (this.underlyingIterator.hasNext() && names.size() < templatesToRead) {
+ final SAMRecord rec = this.underlyingIterator.next();
+ recs.add(rec);
+
+ if (this.decisions.containsKey(rec.getReadName())) continue;
+ names.add(rec.getReadName());
+ }
+ }
+}
diff --git a/src/java/htsjdk/samtools/MergingSamRecordIterator.java b/src/java/htsjdk/samtools/MergingSamRecordIterator.java
index 245ab64..63d0d26 100644
--- a/src/java/htsjdk/samtools/MergingSamRecordIterator.java
+++ b/src/java/htsjdk/samtools/MergingSamRecordIterator.java
@@ -124,6 +124,7 @@ public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
final ComparableSamRecordIterator iterator = this.pq.poll();
final SAMRecord record = iterator.next();
addIfNotEmpty(iterator);
+ // this will resolve the reference indices against the new, merged header
record.setHeader(this.samHeaderMerger.getMergedHeader());
// Fix the read group if needs be
@@ -144,17 +145,6 @@ public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
}
}
- // Fix up the sequence indexes if needs be
- if (this.samHeaderMerger.hasMergedSequenceDictionary()) {
- if (record.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- record.setReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(), record.getReferenceIndex()));
- }
-
- if (record.getReadPairedFlag() && record.getMateReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- record.setMateReferenceIndex(this.samHeaderMerger.getMergedSequenceIndex(iterator.getReader().getFileHeader(), record.getMateReferenceIndex()));
- }
- }
-
return record;
}
diff --git a/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java b/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
index ae53b8c..e74e106 100644
--- a/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
+++ b/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
@@ -34,13 +34,25 @@ public class SAMBinaryTagAndUnsignedArrayValue extends SAMBinaryTagAndValue {
super(tag, value);
}
- /** Creates and returns a deep copy of the list of tag/values. */
+ /** Creates and returns a shallow copy of the list of tag/values. */
+ @Override
public SAMBinaryTagAndValue copy() {
final SAMBinaryTagAndValue retval = new SAMBinaryTagAndUnsignedArrayValue(this.tag, this.value);
if (next != null) retval.next = next.copy();
return retval;
}
+ /** Creates and returns a deep copy of the list of tag/values. */
+ @Override
+ public SAMBinaryTagAndValue deepCopy() {
+ final SAMBinaryTagAndValue retval = new SAMBinaryTagAndUnsignedArrayValue(this.tag, cloneValue());
+ if (next != null) {
+ retval.next = next.deepCopy();
+ }
+ return retval;
+ }
+
+
@Override
public boolean isUnsignedArray() {
return true;
diff --git a/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java b/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java
index 7a409e8..70011f9 100644
--- a/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java
+++ b/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java
@@ -45,8 +45,12 @@ public class SAMBinaryTagAndValue implements Serializable {
/**
* @param tag tagname (in binary form) for this attribute
* @param value value for this attribute (must be of a type that implements {@link Serializable} or else serialization will fail)
+ * Cannot be null.
*/
public SAMBinaryTagAndValue(final short tag, final Object value) {
+ if (null == value) {
+ throw new IllegalArgumentException("SAMBinaryTagAndValue value may not be null");
+ }
this.tag = tag;
this.value = value;
}
@@ -76,13 +80,48 @@ public class SAMBinaryTagAndValue implements Serializable {
return result;
}
- /** Creates and returns a deep copy of the list of tag/values. */
+ /** Creates and returns a shallow copy of the list of tag/values. */
public SAMBinaryTagAndValue copy() {
final SAMBinaryTagAndValue retval = new SAMBinaryTagAndValue(this.tag, this.value);
- if (next != null) retval.next = next.copy();
+ if (next != null) {
+ retval.next = next.copy();
+ }
return retval;
}
+ /** Creates and returns a deep copy of the list of tag/values. */
+ public SAMBinaryTagAndValue deepCopy() {
+ final SAMBinaryTagAndValue retval = new SAMBinaryTagAndValue(this.tag, cloneValue());
+ if (next != null) {
+ retval.next = next.deepCopy();
+ }
+ return retval;
+ }
+
+ /* Create and return a clone of value object */
+ protected Object cloneValue() {
+ Object valueClone;
+
+ if (value instanceof byte[]) {
+ valueClone = ((byte[]) value).clone();
+ }
+ else if (value instanceof short[]) {
+ valueClone = ((short[]) value).clone();
+ }
+ else if (value instanceof int[]) {
+ valueClone = ((int[]) value).clone();
+ }
+ else if (value instanceof float[]) {
+ valueClone = ((float[]) value).clone();
+ }
+ else {
+ // otherwise, the api limits the remaining possible value types to
+ // immutable (String or boxed primitive) types
+ valueClone = value;
+ }
+ return valueClone;
+ }
+
// The methods below are for implementing a light-weight, single-direction linked list
public SAMBinaryTagAndValue getNext() { return this.next; }
diff --git a/src/java/htsjdk/samtools/SAMFileSpan.java b/src/java/htsjdk/samtools/SAMFileSpan.java
index 9da77bc..4122b24 100644
--- a/src/java/htsjdk/samtools/SAMFileSpan.java
+++ b/src/java/htsjdk/samtools/SAMFileSpan.java
@@ -23,13 +23,6 @@
*/
package htsjdk.samtools;
-import htsjdk.samtools.util.StringUtil;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
/**
* A interface representing a collection of (possibly) discontinuous segments in the
* BAM file, possibly representing the results of an index query.
@@ -54,240 +47,3 @@ public interface SAMFileSpan extends Cloneable {
*/
public boolean isEmpty();
}
-
-/**
- * An ordered list of chunks, capable of representing a set of discontiguous
- * regions in the BAM file. FileSpans are mutable within the package, but perceived
- * as immutable outside the package.
- *
- * Some operations on FileSpans assume that the spans are sorted. In these cases,
- * sort order will be validated.
- *
- * @author mhanna
- * @version 0.1
- */
-class BAMFileSpan implements SAMFileSpan, Serializable {
- private static final long serialVersionUID = 1L;
-
- /**
- * The constituent chunks of this list.
- */
- private final List<Chunk> chunks;
-
- /**
- * Create a new empty list of chunks.
- */
- protected BAMFileSpan() {
- this.chunks = new ArrayList<Chunk>();
- }
-
- /**
- * Convenience constructor to construct a BAM file span from
- * a single chunk.
- * @param chunk Chunk to use as the sole region in this span.
- */
- protected BAMFileSpan(final Chunk chunk) {
- this.chunks = new ArrayList<Chunk>();
- chunks.add(chunk);
- }
-
- /**
- * Create a new chunk list from the given list of chunks.
- * @param chunks Constituent chunks.
- */
- protected BAMFileSpan(final List<Chunk> chunks) {
- this.chunks = new ArrayList<Chunk>(chunks);
- }
-
- /**
- * Does this chunk list map to any position within the BAM file?
- * @return True iff the ChunkList points to any data within the BAM.
- */
- public boolean isEmpty() {
- return chunks.isEmpty();
- }
-
- /**
- * Deep clone the given chunk list.
- * @return A copy of the chunk list.
- */
- public BAMFileSpan clone() {
- final BAMFileSpan clone = new BAMFileSpan();
- for(final Chunk chunk: chunks)
- clone.chunks.add(chunk.clone());
- return clone;
- }
-
- /**
- * Remove all chunks in this file span before the given file span starts.
- * If a chunk in the chunk list starts before and ends after the given
- * chunk, the first portion of the chunk will be deleted.
- * @param fileSpan The filespan before which to eliminate.
- * @return The portion of the chunk list after the given chunk.
- */
- public SAMFileSpan removeContentsBefore(final SAMFileSpan fileSpan) {
- if(fileSpan == null)
- return clone();
-
- if(!(fileSpan instanceof BAMFileSpan))
- throw new SAMException("Unable to compare ");
-
- final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan;
-
- if(bamFileSpan.isEmpty())
- return clone();
-
- validateSorted();
-
- final BAMFileSpan trimmedChunkList = new BAMFileSpan();
- for(final Chunk chunkToTrim: chunks) {
- if(chunkToTrim.getChunkEnd() > chunkToTrim.getChunkStart()) {
- if(chunkToTrim.getChunkStart() >= bamFileSpan.chunks.get(0).getChunkStart()) {
- // This chunk from the list is completely beyond the start of the filtering chunk.
- trimmedChunkList.add(chunkToTrim.clone());
- }
- else {
- // This chunk from the list partially overlaps the filtering chunk and must be trimmed.
- trimmedChunkList.add(new Chunk(bamFileSpan.chunks.get(0).getChunkStart(),chunkToTrim.getChunkEnd()));
- }
- }
- }
- return trimmedChunkList;
- }
-
- /**
- * Gets a file span over the data immediately following this span.
- * @return The a pointer to data immediately following this span.
- */
- public SAMFileSpan getContentsFollowing() {
- if(chunks.isEmpty())
- throw new SAMException("Unable to get the file pointer following this one: no data present.");
- validateSorted();
- return new BAMFileSpan(new Chunk(chunks.get(chunks.size()-1).getChunkEnd(),Long.MAX_VALUE));
- }
-
- /**
- * Merge one span into another
- *
- * @param span - span with chunks to add to this one
- */
- public void add(final BAMFileSpan span) {
- for (final Chunk c : span.chunks) {
- chunks.add(c);
- }
- }
-
- /**
- * Adds a new chunk to this list. Visible only within the BAm.
- * @param chunk Chunk to add.
- */
- protected void add(final Chunk chunk) {
- chunks.add(chunk);
- }
-
- /**
- * Convert the chunk list to an array of offsets, paired in [start,end) format.
- * @return Array of offsets.
- */
- protected long[] toCoordinateArray() {
- final int count = chunks.size() * 2;
- if (count == 0) {
- return null;
- }
- int index = 0;
- final long[] result = new long[count];
- for (final Chunk chunk : chunks) {
- result[index++] = chunk.getChunkStart();
- result[index++] = chunk.getChunkEnd();
- }
- return result;
- }
-
- /**
- * Find the first offset in the chunk list
- * @return The first offset in the span
- */
- protected long getFirstOffset() {
- final long result = 0;
- if (chunks == null){
- return result;
- }
- for (final Chunk chunk : chunks) {
- return chunk.getChunkStart();
- }
- return result;
- }
-
- /**
- * Gets the constituent chunks stored in this span.
- * @return An unmodifiable list of chunks.
- */
- protected List<Chunk> getChunks() {
- return Collections.unmodifiableList(chunks);
- }
-
- /**
- * Checks that there is only a single chunk for this span and returns it.
- * @return The single chunk stored in this span
- */
- protected Chunk getSingleChunk() {
- if (chunks.size() != 1){
- throw new SAMException("Expecting a single chunk for span. Found " + chunks.size());
- }
- return chunks.get(0);
- }
-
- /**
- * The list of chunks is often represented as an array of
- * longs where every even-numbered index is a start coordinate
- * and every odd-numbered index is a stop coordinate. Convert
- * from that format back to a list of chunks.
- * @param coordinateArray List of chunks to convert.
- * @return A list of chunks.
- */
- protected static SAMFileSpan toChunkList(final long[] coordinateArray) {
- if(coordinateArray.length % 2 != 0)
- throw new SAMException("Data supplied does not appear to be in coordinate array format.");
-
- final BAMFileSpan chunkList = new BAMFileSpan();
- for(int i = 0; i < coordinateArray.length; i += 2)
- chunkList.add(new Chunk(coordinateArray[i],coordinateArray[i+1]));
-
- chunkList.validateSorted();
-
- return chunkList;
- }
-
- /**
- * Validates the list of chunks to ensure that they appear in sorted order.
- */
- private void validateSorted() {
- for(int i = 1; i < chunks.size(); i++) {
- if(chunks.get(i).getChunkStart() < chunks.get(i-1).getChunkEnd())
- throw new SAMException(String.format("Chunk list is unsorted; chunk %s is before chunk %s",chunks.get(i-1),chunks.get(i)));
- }
- }
-
- /**
- * Creates a string representation of this chunk list.
- */
- @Override
- public String toString() {
- return StringUtil.join(";", chunks);
- }
-
- /**
- *
- * @return A single BAMFileSpan that is an intelligent merge of the input spans, i.e. contiguous, overlapping
- * and contained chunks are intelligently merged, and the chunks are sorted.
- */
- public static BAMFileSpan merge(final BAMFileSpan[] spans) {
- final ArrayList<Chunk> inputChunks = new ArrayList<Chunk>();
- for (final BAMFileSpan span : spans) {
- if(span != null){
- inputChunks.addAll(span.chunks);
- }
- }
- return new BAMFileSpan(Chunk.optimizeChunkList(inputChunks, 0));
- }
-}
diff --git a/src/java/htsjdk/samtools/SAMFileWriterFactory.java b/src/java/htsjdk/samtools/SAMFileWriterFactory.java
index c32cf97..0566df1 100644
--- a/src/java/htsjdk/samtools/SAMFileWriterFactory.java
+++ b/src/java/htsjdk/samtools/SAMFileWriterFactory.java
@@ -26,20 +26,20 @@ package htsjdk.samtools;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.util.BlockCompressedOutputStream;
import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.Md5CalculatingOutputStream;
import htsjdk.samtools.util.RuntimeIOException;
-import java.io.BufferedOutputStream;
import java.io.File;
-import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
/**
- * Create a SAMFileWriter for writing SAM or BAM.
+ * Create a writer for writing SAM, BAM, or CRAM files.
*/
public class SAMFileWriterFactory {
+ private final static Log log = Log.getInstance(SAMFileWriterFactory.class);
private static boolean defaultCreateIndexWhileWriting = Defaults.CREATE_INDEX;
private boolean createIndex = defaultCreateIndexWhileWriting;
private static boolean defaultCreateMd5File = Defaults.CREATE_MD5;
@@ -82,7 +82,7 @@ public class SAMFileWriterFactory {
/**
* Convenience method allowing newSAMFileWriterFactory().setCreateIndex(true);
* Equivalent to SAMFileWriterFactory.setDefaultCreateIndexWhileWriting(true); newSAMFileWriterFactory();
- * If a BAM (not SAM) file is created, the setting is true, and the file header specifies coordinate order,
+ * If a BAM or CRAM (not SAM) file is created, the setting is true, and the file header specifies coordinate order,
* then a BAM index file will be written along with the BAM file.
*
* @param setting whether to attempt to create a BAM index while creating the BAM file.
@@ -170,14 +170,14 @@ public class SAMFileWriterFactory {
try {
final boolean createMd5File = this.createMd5File && IOUtil.isRegularPath(outputFile);
if (this.createMd5File && !createMd5File) {
- System.err.println("Cannot create MD5 file for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
+ log.warn("Cannot create MD5 file for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
}
OutputStream os = IOUtil.maybeBufferOutputStream(new FileOutputStream(outputFile, false), bufferSize);
if (createMd5File) os = new Md5CalculatingOutputStream(os, new File(outputFile.getAbsolutePath() + ".md5"));
final BAMFileWriter ret = new BAMFileWriter(os, outputFile, compressionLevel);
final boolean createIndex = this.createIndex && IOUtil.isRegularPath(outputFile);
if (this.createIndex && !createIndex) {
- System.err.println("Cannot create index for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
+ log.warn("Cannot create index for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
}
if (this.tmpDir != null) ret.setTempDirectory(this.tmpDir);
initializeBAMWriter(ret, header, presorted, createIndex);
@@ -293,46 +293,133 @@ public class SAMFileWriterFactory {
return makeBAMWriter(header, presorted, outputFile);
}
+ /**
+ *
+ * Create a SAM, BAM or CRAM writer based on examination of the outputFile extension.
+ *
+ * @param header header. Sort order is determined by the sortOrder property of this arg.
+ * @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder.
+ * @param outputFile where to write the output. Must end with .sam, .bam or .cram.
+ * @param referenceFasta reference sequence file
+ * @return SAMFileWriter appropriate for the file type specified in outputFile
+ *
+ */
public SAMFileWriter makeWriter(final SAMFileHeader header, final boolean presorted, final File outputFile, final File referenceFasta) {
- if (outputFile.getName().endsWith(SamReader.Type.CRAM_TYPE.fileExtension()))
- try {
- return makeCRAMWriter(header, new FileOutputStream(outputFile), referenceFasta);
- } catch (final FileNotFoundException e) {
- throw new RuntimeIOException(e);
- }
- return makeSAMOrBAMWriter(header, presorted, outputFile);
+ if (outputFile.getName().endsWith(SamReader.Type.CRAM_TYPE.fileExtension())) {
+ return makeCRAMWriter(header, presorted, outputFile, referenceFasta);
+ }
+ else {
+ return makeSAMOrBAMWriter(header, presorted, outputFile);
+ }
}
+ /**
+ * Create a CRAMFileWriter on an output stream. Requires the input to be presorted to match the sort order defined
+ * by the input header.
+ *
+ * Note: does not honor factory settings for CREATE_MD5, CREATE_INDEX, USE_ASYNC_IO.
+ *
+ * @param header entire header. Sort order is determined by the sortOrder property of this arg.
+ * @param stream where to write the output.
+ * @param referenceFasta reference sequence file
+ * @return CRAMFileWriter
+ */
public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final OutputStream stream, final File referenceFasta) {
-
+ // create the CRAMFileWriter directly without propagating factory settings
final CRAMFileWriter writer = new CRAMFileWriter(stream, new ReferenceSource(referenceFasta), header, null);
- writer.setPreserveReadNames(true);
- writer.setCaptureAllTags(true);
+ setCRAMWriterDefaults(writer);
return writer;
}
+ /**
+ * Create a CRAMFileWriter on an output file. Requires input record to be presorted to match the
+ * sort order defined by the input header.
+ *
+ * Note: does not honor factory settings for USE_ASYNC_IO.
+ *
+ * @param header entire header. Sort order is determined by the sortOrder property of this arg.
+ * @param outputFile where to write the output. Must end with .sam, .bam or .cram.
+ * @param referenceFasta reference sequence file
+ * @return CRAMFileWriter
+ *
+ */
public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final File outputFile, final File referenceFasta) {
+ return createCRAMWriterWithSettings(header, true, outputFile, referenceFasta);
+ }
- final boolean createIndex = this.createIndex && IOUtil.isRegularPath(outputFile);
- if (this.createIndex && !createIndex) {
- System.err.println("Cannot create index for CAM because output file is not a regular file: " + outputFile.getAbsolutePath());
+ /**
+ * Create a CRAMFileWriter on an output file.
+ *
+ * Note: does not honor factory setting for USE_ASYNC_IO.
+ *
+ * @param header entire header. Sort order is determined by the sortOrder property of this arg.
+ * @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder.
+ * @param outputFile where to write the output. Must end with .sam, .bam or .cram.
+ * @param referenceFasta reference sequence file
+ * @return CRAMFileWriter
+ *
+ */
+ public CRAMFileWriter makeCRAMWriter(final SAMFileHeader header, final boolean presorted, final File outputFile, final File referenceFasta) {
+ return createCRAMWriterWithSettings(header, presorted, outputFile, referenceFasta);
+ }
+
+ /**
+ * Create a CRAMFileWriter on an output file based on factory settings.
+ *
+ * Note: does not honor the factory setting for USE_ASYNC_IO.
+ *
+ * @param header entire header. Sort order is determined by the sortOrder property of this arg.
+ * @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder.
+ * @param outputFile where to write the output. Must end with .sam, .bam or .cram.
+ * @param referenceFasta reference sequence file
+ * @return CRAMFileWriter
+ */
+ private CRAMFileWriter createCRAMWriterWithSettings(
+ final SAMFileHeader header,
+ final boolean presorted,
+ final File outputFile,
+ final File referenceFasta) {
+ OutputStream cramOS = null;
+ OutputStream indexOS = null ;
+
+ if (createIndex) {
+ if (!IOUtil.isRegularPath(outputFile)) {
+ log.warn("Cannot create index for CRAM because output file is not a regular file: " + outputFile.getAbsolutePath());
+ }
+ else {
+ try {
+ final File indexFile = new File(outputFile.getAbsolutePath() + BAMIndex.BAMIndexSuffix) ;
+ indexOS = new FileOutputStream(indexFile) ;
+ }
+ catch (final IOException ioe) {
+ throw new RuntimeIOException("Error creating index file for: " + outputFile.getAbsolutePath()+ BAMIndex.BAMIndexSuffix);
+ }
+ }
}
try {
-
- OutputStream indexOS = null ;
- if (createIndex) {
- File indexFile = new File(outputFile.getAbsolutePath() + ".bai") ;
- indexOS = new FileOutputStream(indexFile) ;
- }
- final CRAMFileWriter writer = new CRAMFileWriter(new FileOutputStream(outputFile), indexOS, new ReferenceSource(referenceFasta), header, null);
- writer.setPreserveReadNames(true);
- writer.setCaptureAllTags(true);
- return writer;
- } catch (final IOException ioe) {
- throw new RuntimeIOException("Error opening file: " + outputFile.getAbsolutePath());
+ cramOS = IOUtil.maybeBufferOutputStream(new FileOutputStream(outputFile, false), bufferSize);
+ }
+ catch (final IOException ioe) {
+ throw new RuntimeIOException("Error creating CRAM file: " + outputFile.getAbsolutePath());
}
+
+ CRAMFileWriter writer = new CRAMFileWriter(
+ createMd5File ? new Md5CalculatingOutputStream(cramOS, new File(outputFile.getAbsolutePath() + ".md5")) : cramOS,
+ indexOS,
+ presorted,
+ new ReferenceSource(referenceFasta),
+ header,
+ outputFile.getAbsolutePath());
+ setCRAMWriterDefaults(writer);
+
+ return writer;
}
+ // Set the default CRAM writer preservation parameters
+ private void setCRAMWriterDefaults(CRAMFileWriter writer) {
+ writer.setPreserveReadNames(true);
+ writer.setCaptureAllTags(true);
+ }
}
diff --git a/src/java/htsjdk/samtools/SAMFileWriterImpl.java b/src/java/htsjdk/samtools/SAMFileWriterImpl.java
index 82282fe..219f64c 100644
--- a/src/java/htsjdk/samtools/SAMFileWriterImpl.java
+++ b/src/java/htsjdk/samtools/SAMFileWriterImpl.java
@@ -123,10 +123,13 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter
}
/**
- * Must be called before addAlignment.
+ * Must be called before addAlignment. Header cannot be null.
*/
public void setHeader(final SAMFileHeader header)
{
+ if (null == header) {
+ throw new IllegalArgumentException("A non-null SAMFileHeader is required for a writer");
+ }
this.header = header;
if (sortOrder == null) {
sortOrder = SAMFileHeader.SortOrder.unsorted;
@@ -168,12 +171,19 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter
throw new IllegalStateException("sortOrder should not be null");
}
+ /**
+ * Add an alignment record to be emitted by the writer.
+ *
+ * @param alignment Must not be null. If the alignment record's SAMFileHeader is null, the record will be
+ * updated to the header used by this writer, which will in turn cause any unresolved reference and
+ * mate reference indices to be resolved against the new header's sequence dictionary.
+ */
public void addAlignment(final SAMRecord alignment)
{
+ if (null == alignment.getHeader()) {
+ alignment.setHeader(header); // re-establish the record header and attempt to resolve reference index values
+ }
if (sortOrder.equals(SAMFileHeader.SortOrder.unsorted)) {
- if (!header.getGroupOrder().equals(SAMFileHeader.GroupOrder.none)) {
- throw new UnsupportedOperationException("GroupOrder " + header.getGroupOrder() + " is not supported");
- }
writeAlignment(alignment);
} else if (presorted) {
assertPresorted(alignment);
@@ -213,7 +223,7 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter
/**
* Writes the record to disk. Sort order has been taken care of by the time
- * this method is called.
+ * this method is called. The record must hava a non-null SAMFileHeader.
* @param alignment
*/
abstract protected void writeAlignment(SAMRecord alignment);
diff --git a/src/java/htsjdk/samtools/SAMRecord.java b/src/java/htsjdk/samtools/SAMRecord.java
index 8eb9fd3..cfa922f 100644
--- a/src/java/htsjdk/samtools/SAMRecord.java
+++ b/src/java/htsjdk/samtools/SAMRecord.java
@@ -32,54 +32,77 @@ import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Set;
/**
* Java binding for a SAM file record. c.f. http://samtools.sourceforge.net/SAM1.pdf
- *
+ * <p>
* The presence of reference name/reference index and alignment start
* do not necessarily mean that a read is aligned. Those values may merely be set to force a SAMRecord
* to appear in a certain place in the sort order. The readUnmappedFlag must be checked to determine whether
* or not a read is mapped. Only if the readUnmappedFlag is false can the reference name/index and alignment start
* be interpreted as indicating an actual alignment position.
- *
+ * <p>
* Likewise, presence of mate reference name/index and mate alignment start do not necessarily mean that the
* mate is aligned. These may be set for an unaligned mate if the mate has been forced into a particular place
* in the sort order per the above paragraph. Only if the mateUnmappedFlag is false can the mate reference name/index
* and mate alignment start be interpreted as indicating the actual alignment position of the mate.
- *
+ * <p>
* Note also that there are a number of getters & setters that are linked, i.e. they present different representations
* of the same underlying data. In these cases there is typically a representation that is preferred because it
* ought to be faster than some other representation. The following are the preferred representations:
- *
- * getReadNameLength() is preferred to getReadName().length()
- * get/setReadBases() is preferred to get/setReadString()
- * get/setBaseQualities() is preferred to get/setBaseQualityString()
- * get/setReferenceIndex() is preferred to get/setReferenceName()
- * get/setMateReferenceIndex() is preferred to get/setMateReferenceName()
- * getCigarLength() is preferred to getCigar().getNumElements()
- * get/setCigar() is preferred to get/setCigarString()
- *
- * Note that setIndexingBin() need not be called when writing SAMRecords. It will be computed as necessary. It is only
- * present as an optimization in the event that the value is already known and need not be computed.
- *
- * setHeader() need not be called when writing SAMRecords. It may be convenient to call it, however, because
- * get/setReferenceIndex() and get/setMateReferenceIndex() must have access to the SAM header, either as an argument
- * or previously passed to setHeader().
- *
+ * </p><ul>
+ * <li>getReadNameLength() is preferred to getReadName().length()</li>
+ * <li>get/setReadBases() is preferred to get/setReadString()</li>
+ * <li>get/setBaseQualities() is preferred to get/setBaseQualityString()</li>
+ * <li>get/setReferenceIndex() is preferred to get/setReferenceName() for records with valid SAMFileHeaders</li>
+ * <li>get/setMateReferenceIndex() is preferred to get/setMateReferenceName() for records with valid SAMFileHeaders</li>
+ * <li>getCigarLength() is preferred to getCigar().getNumElements()</li>
+ * <li>get/setCigar() is preferred to get/setCigarString()</li>
+ * </ul>
+ * <p>
* setHeader() is called by the SAM reading code, so the get/setReferenceIndex() and get/setMateReferenceIndex()
- * methods will have access to the sequence dictionary.
- *
+ * methods will have access to the sequence dictionary to resolve reference and mate reference names to dictionary
+ * indices.
+ * <p>
+ * setHeader() need not be called explicitly when writing SAMRecords, however the writers require a record
+ * in order to call get/setReferenceIndex() and get/setMateReferenceIndex(). Therefore adding records to a writer
+ * has a side effect: any record that does not have an assigned header at the time it is added to a writer will be
+ * updated and assigned the header associated with the writer.
+ * <p>
* Some of the get() methods return values that are mutable, due to the limitations of Java. A caller should
* never change the value returned by a get() method. If you want to change the value of some attribute of a
* SAMRecord, create a new value object and call the appropriate set() method.
- *
+ * </p>
+ * Note that setIndexingBin() need not be called when writing SAMRecords. It will be computed as necessary. It is only
+ * present as an optimization in the event that the value is already known and need not be computed.
+ * <p>
* By default, extensive validation of SAMRecords is done when they are read. Very limited validation is done when
* values are set onto SAMRecords.
- */
-/**
+ * <p>
+ * <h3>Notes on Headerless SAMRecords</h3>
+ * <p>
+ * If the header is null, the following SAMRecord methods may throw exceptions:
+ * <ul>
+ * <li>getReferenceIndex</li>
+ * <li>setReferenceIndex</li>
+ * <li>getMateReferenceIndex</li>
+ * <li>setMateReferenceIndex</li>
+ * </ul><p>
+ * Record comparators (i.e. SAMRecordCoordinateComparator and SAMRecordDuplicateComparator) require records with
+ * non-null header values.
+ * <p>
+ * A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
+ * read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
+ * <p>
+ * Also, SAMTextWriter, BAMFileWriter, and CRAMFileWriter all require records to have a valid header in order to be
+ * written. Any record that does not have a header at the time it is added to the writer will be updated to use the
+ * header associated with the writer.
+ * <p>
* @author alecw at broadinstitute.org
* @author mishali.naik at intel.com
*/
@@ -168,6 +191,9 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
private transient SAMFileSource mFileSource;
private SAMFileHeader mHeader = null;
+ /** Transient Map of attributes for use by anyone. */
+ private transient Map<Object,Object> transientAttributes;
+
public SAMRecord(final SAMFileHeader header) {
mHeader = header;
}
@@ -286,8 +312,8 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
}
private static boolean hasReferenceName(final Integer referenceIndex, final String referenceName) {
- return (referenceIndex != null && referenceIndex != NO_ALIGNMENT_REFERENCE_INDEX) ||
- !NO_ALIGNMENT_REFERENCE_NAME.equals(referenceName);
+ return (referenceIndex != null && !referenceIndex.equals(NO_ALIGNMENT_REFERENCE_INDEX)) ||
+ (!NO_ALIGNMENT_REFERENCE_NAME.equals(referenceName));
}
/**
@@ -305,119 +331,208 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
}
/**
- * @return Reference name, or null if record has no reference.
+ * @return Reference name, or NO_ALIGNMENT_REFERENCE_NAME (*) if the record has no reference name
*/
- public String getReferenceName() {
- return mReferenceName;
- }
+ public String getReferenceName() { return mReferenceName; }
- public void setReferenceName(final String value) {
- /* String.intern() is surprisingly expensive, so avoid it by looking up in sequence dictionary if possible */
- if (NO_ALIGNMENT_REFERENCE_NAME.equals(value)) {
+ /**
+ * Sets the reference name for this record. If the record has a valid SAMFileHeader and the reference
+ * name is present in the associated sequence dictionary, the record's reference index will also be
+ * updated with the corresponding sequence index. If referenceName is NO_ALIGNMENT_REFERENCE_NAME, sets
+ * the reference index to NO_ALIGNMENT_REFERENCE_INDEX.
+ *
+ * @param referenceName - must not be null
+ */
+ public void setReferenceName(final String referenceName) {
+ if (null == referenceName) {
+ throw new IllegalArgumentException(
+ "Reference name must not be null. Use SAMRecord.NO_ALIGNMENT_REFERENCE_NAME to reset the reference name.");
+ }
+ else if (NO_ALIGNMENT_REFERENCE_NAME.equals(referenceName)) {
mReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
mReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
- return;
- } else if (mHeader != null) {
- final int referenceIndex = mHeader.getSequenceIndex(value);
- if (referenceIndex != -1) {
- setReferenceIndex(referenceIndex);
- return;
+ }
+ else if (null != mHeader) {
+ // String.intern() is surprisingly expensive, so avoid it by looking up in sequence dictionary if possible
+ final int referenceIndex = mHeader.getSequenceIndex(referenceName);
+ if (-1 != referenceIndex) {
+ setReferenceIndex(referenceIndex); // sets reference name and index
}
+ else {
+ mReferenceName = referenceName.intern();
+ mReferenceIndex = null;
+ }
+ }
+ else {
+ mReferenceName = referenceName.intern();
+ mReferenceIndex = null;
}
- // Drop through from above if nothing done.
- mReferenceName = value.intern();
- mReferenceIndex = null;
}
/**
- * @return index of the reference sequence for this read in the sequence dictionary, or -1
- * if read has no reference sequence set, or if a String reference name is not found in the sequence index..
+ * Returns the reference index for this record.
+ *
+ * If the reference name for this record has previously been resolved against the sequence dictionary, the corresponding
+ * index is returned directly. Otherwise, the record must have a non-null SAMFileHeader that can be used to
+ * resolve the index for the record's current reference name, unless the reference name is NO_ALIGNMENT_REFERENCE_NAME.
+ * If the record has a header, and the name does not appear in the header's sequence dictionary, the value
+ * NO_ALIGNMENT_REFERENCE_INDEX (-1) will be returned. If the record does not have a header, an IllegalStateException
+ * is thrown.
+ *
+ * @return Index in the sequence dictionary of the reference sequence. If the read has no reference sequence, or if
+ * the reference name is not found in the sequence index, NO_ALIGNMENT_REFERENCE_INDEX (-1) is returned.
+ *
+ * @throws IllegalStateException if the reference index cannot be resolved because the SAMFileHeader for the
+ * record is null.
*/
public Integer getReferenceIndex() {
- if (mReferenceIndex == null) {
- if (mReferenceName == null) {
+ if (null == mReferenceIndex) {
+ // try to resolve the reference index
+ if (NO_ALIGNMENT_REFERENCE_NAME.equals(mReferenceName)) {
mReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
- } else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mReferenceName)) {
- mReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
- } else {
+ }
+ else if (null != mHeader) {
mReferenceIndex = mHeader.getSequenceIndex(mReferenceName);
}
+ else {
+ throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the reference index");
+ }
}
return mReferenceIndex;
}
/**
- * @param referenceIndex Must either equal -1 (indicating no reference), or exist in the sequence dictionary
- * in the header associated with this record.
+ * Updates the reference index. The record must have a valid SAMFileHeader unless the referenceIndex parameter equals
+ * NO_ALIGNMENT_REFERENCE_INDEX, and the reference index must appear in the header's sequence dictionary. If the
+ * reference index is valid, the reference name will also be resolved and updated to the name for the sequence
+ * dictionary entry corresponding to the index.
+ *
+ * @param referenceIndex Must either equal NO_ALIGNMENT_REFERENCE_INDEX (-1) indicating no reference, or the
+ * record must have a SAMFileHeader and the index must exist in the associated sequence
+ * dictionary.
+ * @throws IllegalStateException if the SAMFileHeader is null for this record or the reference index is not
+ * found in the sequence dictionary for this record.
*/
public void setReferenceIndex(final int referenceIndex) {
- mReferenceIndex = referenceIndex;
- if (mReferenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
+ if (referenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
+ mReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
mReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
- } else {
- try {
- mReferenceName = mHeader.getSequence(referenceIndex).getSequenceName();
- } catch (final NullPointerException e) {
- throw new IllegalArgumentException("Reference index " + referenceIndex + " not found in sequence dictionary.", e);
+ }
+ else if (null == mHeader) {
+ throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the reference index");
+ }
+ else {
+ SAMSequenceRecord samSequence = mHeader.getSequence(referenceIndex);
+ if (null != samSequence) {
+ mReferenceIndex = referenceIndex;
+ mReferenceName = samSequence.getSequenceName();
+ }
+ else {
+ throw new IllegalArgumentException("Reference index " + referenceIndex + " not found in sequence dictionary.");
}
}
}
/**
- * @return Mate reference name, or null if one is not assigned.
+ * @return Mate reference name, or NO_ALIGNMENT_REFERENCE_NAME (*) if the record has no mate reference name
*/
public String getMateReferenceName() {
return mMateReferenceName;
}
+ /**
+ * Sets the mate reference name for this record. If the record has a valid SAMFileHeader and the mate reference
+ * name is present in the associated sequence dictionary, the record's mate reference index will also be
+ * updated with the corresponding sequence index. If mateReferenceName is NO_ALIGNMENT_REFERENCE_NAME, sets the
+ * mate reference index to NO_ALIGNMENT_REFERENCE_INDEX.
+ *
+ * @param mateReferenceName - must not be null
+ */
public void setMateReferenceName(final String mateReferenceName) {
- /* String.intern() is surprisingly expensive, so avoid it by looking up in sequence dictionary if possible */
- if (NO_ALIGNMENT_REFERENCE_NAME.equals(mateReferenceName)) {
+ if (null == mateReferenceName) {
+ throw new IllegalArgumentException("Mate reference name must not be null");
+ }
+ else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mateReferenceName)) {
mMateReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
mMateReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
- return;
- } else if (mHeader != null) {
- final int referenceIndex = mHeader.getSequenceIndex(mateReferenceName);
- if (referenceIndex != -1) {
- setMateReferenceIndex(referenceIndex);
- return;
+ }
+ else if (null != mHeader) {
+ final int mateReferenceIndex = mHeader.getSequenceIndex(mateReferenceName);
+ if (-1 != mateReferenceIndex) {
+ setMateReferenceIndex(mateReferenceIndex); // sets mate reference name and index
+ }
+ else {
+ mMateReferenceName = mateReferenceName.intern();
+ mMateReferenceIndex = null;
}
}
- // Drop through from above if nothing done.
- this.mMateReferenceName = mateReferenceName.intern();
- mMateReferenceIndex = null;
+ else {
+ mMateReferenceName = mateReferenceName.intern();
+ mMateReferenceIndex = null;
+ }
}
/**
- * @return index of the reference sequence for this read's mate in the sequence dictionary, or -1
- * if mate has no reference sequence set.
+ * Returns the mate reference index for this record.
+ *
+ * If the mate reference name for this record has previously been resolved against the sequence dictionary, the
+ * corresponding index is returned directly. Otherwise, the record must have a non-null SAMFileHeader that can be
+ * used to resolve the index for the record's current mate reference name, unless the mate reference name is
+ * NO_ALIGNMENT_REFERENCE_NAME. If the record has a header, and the name does not appear in the header's
+ * sequence dictionary, the value NO_ALIGNMENT_REFERENCE_INDEX (-1) will be returned. If the record does not have
+ * a header, an IllegalStateException is thrown.
+ *
+ * @return Index in the sequence dictionary of the mate reference sequence. If the read has no mate reference
+ * sequence, or if the mate reference name is not found in the sequence index, NO_ALIGNMENT_REFERENCE_INDEX (-1)
+ * is returned.
+ *
+ * @throws IllegalStateException if the mate reference index cannot be resolved because the SAMFileHeader for the
+ * record is null.
*/
public Integer getMateReferenceIndex() {
- if (mMateReferenceIndex == null) {
- if (mMateReferenceName == null) {
+ if (null == mMateReferenceIndex) {
+ // try to resolve the reference index
+ if (NO_ALIGNMENT_REFERENCE_NAME.equals(mMateReferenceName)) {
mMateReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
- } else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mMateReferenceName)){
- mMateReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
- } else {
+ }
+ else if (null != mHeader) {
mMateReferenceIndex = mHeader.getSequenceIndex(mMateReferenceName);
}
+ else {
+ throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the mate reference index");
+ }
}
return mMateReferenceIndex;
}
/**
- * @param referenceIndex Must either equal -1 (indicating no reference), or exist in the sequence dictionary
- * in the header associated with this record.
- */
- public void setMateReferenceIndex(final int referenceIndex) {
- mMateReferenceIndex = referenceIndex;
- if (mMateReferenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
+ * Updates the mate reference index. The record must have a valid SAMFileHeader, and the mate reference index must appear in
+ * the header's sequence dictionary, unless the mateReferenceIndex parameter equals NO_ALIGNMENT_REFERENCE_INDEX. If the mate
+ * reference index is valid, the mate reference name will also be resolved and updated to the name for the sequence dictionary
+ * entry corresponding to the index.
+ *
+ * @param mateReferenceIndex Must either equal NO_ALIGNMENT_REFERENCE_INDEX (-1) indicating no reference, or the
+ * record must have a SAMFileHeader and the index must exist in the associated sequence
+ * dictionary.
+ * @throws IllegalStateException if the SAMFileHeader is null for this record or the mate reference index is not
+ * found in the sequence dictionary for this record.
+ */
+ public void setMateReferenceIndex(final int mateReferenceIndex) {
+ if (mateReferenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
+ mMateReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
mMateReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
- } else {
- try {
- mMateReferenceName = mHeader.getSequence(referenceIndex).getSequenceName();
- } catch (final NullPointerException e) {
- throw new IllegalArgumentException("Reference index " + referenceIndex + " not found in sequence dictionary.", e);
+ }
+ else if (null == mHeader) {
+ throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the mate reference index");
+ }
+ else {
+ SAMSequenceRecord samSequence = mHeader.getSequence(mateReferenceIndex);
+ if (null != samSequence) {
+ mMateReferenceIndex = mateReferenceIndex;
+ mMateReferenceName = samSequence.getSequenceName();
+ }
+ else {
+ throw new IllegalArgumentException("Reference index " + mateReferenceIndex + " not found in sequence dictionary.");
}
}
}
@@ -478,21 +593,32 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
/**
+ * @param offset 1-based location within the unclipped sequence or 0 if there is no position.
+ * <p/>
+ * Non static version of the static function with the same name.
* @return 1-based inclusive reference position of the unclipped sequence at a given offset,
- * or 0 if there is no position.
- * For example, given the sequence NNNAAACCCGGG, cigar 3S9M, and an alignment start of 1,
- * and a (1-based)offset 10 (start of GGG) it returns 7 (1-based offset starting after the soft clip.
- * For example: given the sequence AAACCCGGGTTT, cigar 4M1D6M, an alignment start of 1,
- * an offset of 4 returns reference position 4, an offset of 5 returns reference position 6.
- * Another example: given the sequence AAACCCGGGTTT, cigar 4M1I6M, an alignment start of 1,
- * an offset of 4 returns reference position 4, an offset of 5 returns 0.
- * @offset 1-based location within the unclipped sequence
*/
public int getReferencePositionAtReadPosition(final int offset) {
+ return getReferencePositionAtReadPosition(this, offset);
+ }
+
+ /**
+ * @param rec record to use
+ * @param offset 1-based location within the unclipped sequence
+ * @return 1-based inclusive reference position of the unclipped sequence at a given offset,
+ * or 0 if there is no position.
+ * For example, given the sequence NNNAAACCCGGG, cigar 3S9M, and an alignment start of 1,
+ * and a (1-based)offset 10 (start of GGG) it returns 7 (1-based offset starting after the soft clip.
+ * For example: given the sequence AAACCCGGGTTT, cigar 4M1D6M, an alignment start of 1,
+ * an offset of 4 returns reference position 4, an offset of 5 returns reference position 6.
+ * Another example: given the sequence AAACCCGGGTTT, cigar 4M1I6M, an alignment start of 1,
+ * an offset of 4 returns reference position 4, an offset of 5 returns 0.
+ */
+ public static int getReferencePositionAtReadPosition(final SAMRecord rec, final int offset) {
if (offset == 0) return 0;
- for (final AlignmentBlock alignmentBlock : getAlignmentBlocks()) {
+ for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) {
if (CoordMath.getEnd(alignmentBlock.getReadStart(), alignmentBlock.getLength()) < offset) {
continue;
} else if (offset < alignmentBlock.getReadStart()) {
@@ -504,6 +630,77 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
return 0; // offset not located in an alignment block
}
+
+ /**
+ * @param pos 1-based reference position
+ * return the offset
+ * @return 1-based (to match getReferencePositionAtReadPosition behavior) inclusive position into the
+ * unclipped sequence at a given reference position, or 0 if there is no such position.
+ *
+ * See examples in the static version below
+ */
+ public int getReadPositionAtReferencePosition(final int pos) {
+ return getReadPositionAtReferencePosition(this, pos, false);
+ }
+
+ /**
+ * @param pos 1-based reference position
+ * @param returnLastBaseIfDeleted if positive, and reference position matches a deleted base in the read, function will
+ * return the offset
+ * @return 1-based (to match getReferencePositionAtReadPosition behavior) inclusive position into the
+ * unclipped sequence at a given reference position,
+ * or 0 if there is no such position. If returnLastBaseIfDeleted is true deletions are assumed to "live" on the last read base
+ * in the preceding block.
+ *
+ * Non-static version of static function with the same name. See examples below.
+ */
+ public int getReadPositionAtReferencePosition(final int pos, final boolean returnLastBaseIfDeleted) {
+ return getReadPositionAtReferencePosition(this, pos, returnLastBaseIfDeleted);
+ }
+
+ /**
+ * @param rec record to use
+ * @param pos 1-based reference position
+ * @param returnLastBaseIfDeleted if positive, and reference position matches a deleted base in the read, function will
+ * return the offset
+ * @return 1-based (to match getReferencePositionAtReadPosition behavior) inclusive position into the
+ * unclipped sequence at a given reference position,
+ * or 0 if there is no such position. If returnLastBaseIfDeleted is true deletions are assumed to "live" on the last read base
+ * in the preceding block.
+ * For example, given the sequence NNNAAACCCGGG, cigar 3S9M, and an alignment start of 1,
+ * and a (1-based)pos of 7 (start of GGG) it returns 10 (1-based offset including the soft clip.
+ * For example: given the sequence AAACCCGGGT, cigar 4M1D6M, an alignment start of 1,
+ * a reference position of 4 returns offset of 4, a reference of 5 also returns an offset 4 (using "left aligning") if returnLastBaseIfDeleted
+ * and 0 otherwise.
+ * For example: given the sequence AAACtCGGGTT, cigar 4M1I6M, an alignment start of 1,
+ * a position 4 returns an offset 5, a position of 5 returns 6 (the inserted base is the 5th offset), a position of 11 returns 0 since
+ * that position in the reference doesn't overlap the read at all.
+ *
+ */
+ public static int getReadPositionAtReferencePosition(final SAMRecord rec, final int pos, final boolean returnLastBaseIfDeleted) {
+
+ if (pos <= 0) {
+ return 0;
+ }
+
+ int lastAlignmentOffset = 0;
+ for (final AlignmentBlock alignmentBlock : rec.getAlignmentBlocks()) {
+ if (CoordMath.getEnd(alignmentBlock.getReferenceStart(), alignmentBlock.getLength()) >= pos) {
+ if (pos < alignmentBlock.getReferenceStart()) {
+ //There must have been a deletion block that skipped
+ return returnLastBaseIfDeleted ? lastAlignmentOffset : 0;
+ } else {
+ return pos - alignmentBlock.getReferenceStart() + alignmentBlock.getReadStart() ;
+ }
+ } else {
+ // record the offset to the last base in the current block, in case the next block starts too late
+ lastAlignmentOffset = alignmentBlock.getReadStart() + alignmentBlock.getLength() - 1 ;
+ }
+ }
+ // if we are here, the reference position was not overlapping the read at all
+ return 0;
+ }
+
/**
* @return 1-based inclusive leftmost position of the clipped mate sequence, or 0 if there is no position.
*/
@@ -563,7 +760,9 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
public Cigar getCigar() {
if (mCigar == null && mCigarString != null) {
mCigar = TextCigarCodec.decode(mCigarString);
- if (getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
+ if (null != getHeader() &&
+ getValidationStringency() != ValidationStringency.SILENT &&
+ !this.getReadUnmappedFlag()) {
// Don't know line number, and don't want to force read name to be decoded.
SAMUtils.processValidationErrors(this.validateCigar(-1L), -1L, getValidationStringency());
}
@@ -601,16 +800,16 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* Get the SAMReadGroupRecord for this SAMRecord.
* @return The SAMReadGroupRecord from the SAMFileHeader for this SAMRecord, or null if
* 1) this record has no RG tag, or 2) the header doesn't contain the read group with
- * the given ID.
- * @throws NullPointerException if this.getHeader() returns null.
+ * the given ID.or 3) this record has no SAMFileHeader
* @throws ClassCastException if RG tag does not have a String value.
*/
public SAMReadGroupRecord getReadGroup() {
final String rgId = (String)getAttribute(SAMTagUtil.getSingleton().RG);
- if (rgId == null) {
+ if (rgId == null || getHeader() == null) {
return null;
+ } else {
+ return getHeader().getReadGroup(rgId);
}
- return getHeader().getReadGroup(rgId);
}
/**
@@ -879,8 +1078,8 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
/**
* Get the tag value and attempt to coerce it into the requested type.
* @param tag The requested tag.
- * @return The value of a tag, converted into an Integer if possible.
- * @throws RuntimeException If the value is not an integer type, or will not fit in an Integer.
+ * @return The value of a tag, converted into a signed Integer if possible.
+ * @throws RuntimeException If the value is not an integer type, or will not fit in a signed Integer.
*/
public Integer getIntegerAttribute(final String tag) {
final Object val = getAttribute(tag);
@@ -899,6 +1098,46 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
}
/**
+ * A convenience method that will return a valid unsigned integer as a Long,
+ * or fail with an exception if the tag value is invalid.
+ *
+ * @param tag Two-character tag name.
+ * @return valid unsigned integer associated with the tag, as a Long
+ * @throws {@link htsjdk.samtools.SAMException} if the value is out of range for a 32-bit unsigned value, or not a Number
+ */
+ public Long getUnsignedIntegerAttribute(final String tag) throws SAMException {
+ return getUnsignedIntegerAttribute(SAMTagUtil.getSingleton().makeBinaryTag(tag));
+ }
+
+ /**
+ * A convenience method that will return a valid unsigned integer as a Long,
+ * or fail with an exception if the tag value is invalid.
+ *
+ * @param tag Binary representation of a 2-char String tag as created by SAMTagUtil.
+ * @return valid unsigned integer associated with the tag, as a Long
+ * @throws {@link htsjdk.samtools.SAMException} if the value is out of range for a 32-bit unsigned value, or not a Number
+ */
+ public Long getUnsignedIntegerAttribute(final short tag) throws SAMException {
+ final Object value = getAttribute(tag);
+ if (value == null) {
+ return null;
+ }
+
+ if (value instanceof Number) {
+ final long lValue = ((Number)value).longValue();
+ if (SAMUtils.isValidUnsignedIntegerAttribute(lValue)) {
+ return lValue;
+ } else {
+ throw new SAMException("Unsigned integer value of tag " +
+ SAMTagUtil.getSingleton().makeStringTag(tag) + " is out of bounds for a 32-bit unsigned integer: " + lValue);
+ }
+ } else {
+ throw new SAMException("Unexpected attribute value data type " + value.getClass() + " for tag " +
+ SAMTagUtil.getSingleton().makeStringTag(tag));
+ }
+ }
+
+ /**
* Get the tag value and attempt to coerce it into the requested type.
* @param tag The requested tag.
* @return The value of a tag, converted into a Short if possible.
@@ -1074,14 +1313,15 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
/**
* Set a named attribute onto the SAMRecord. Passing a null value causes the attribute to be cleared.
* @param tag two-character tag name. See http://samtools.sourceforge.net/SAM1.pdf for standard and user-defined tags.
- * @param value Supported types are String, Char, Integer, Float, byte[], short[]. int[], float[].
+ * @param value Supported types are String, Char, Integer, Float,
+ * Long (for values that fit into a signed or unsigned 32-bit integer only),
+ * byte[], short[], int[], float[].
* If value == null, tag is cleared.
*
* Byte and Short are allowed but discouraged. If written to a SAM file, these will be converted to Integer,
* whereas if written to BAM, getAttribute() will return as Byte or Short, respectively.
*
- * Long with value between 0 and MAX_UINT is allowed for BAM but discouraged. Attempting to write such a value
- * to SAM will cause an exception to be thrown.
+ * Long is allowed for values that fit into a signed or unsigned 32-bit integer only, but discouraged.
*
* To set unsigned byte[], unsigned short[] or unsigned int[] (which is discouraged because of poor Java language
* support), setUnsignedArrayAttribute() must be used instead of this method.
@@ -1119,24 +1359,43 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
setAttribute(tag, value, false);
}
- protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
- if (value != null &&
- !(value instanceof Byte || value instanceof Short || value instanceof Integer ||
- value instanceof String || value instanceof Character || value instanceof Float ||
- value instanceof byte[] || value instanceof short[] || value instanceof int[] ||
- value instanceof float[])) {
- throw new SAMException("Attribute type " + value.getClass() + " not supported. Tag: " +
- SAMTagUtil.getSingleton().makeStringTag(tag));
+ /**
+ * Checks if the value is allowed as an attribute value.
+ *
+ * @param value the value to be checked
+ * @return true if the value is valid and false otherwise
+ */
+ protected static boolean isAllowedAttributeValue(final Object value) {
+ if (value instanceof Byte || value instanceof Short || value instanceof Integer ||
+ value instanceof String || value instanceof Character || value instanceof Float ||
+ value instanceof byte[] || value instanceof short[] || value instanceof int[] ||
+ value instanceof float[]) {
+ return true;
}
+
+ // A special case for Longs: we require Long values to fit into either a uint32_t or an int32_t,
+ // as that is what the BAM spec allows.
+ if (value instanceof Long) {
+ return SAMUtils.isValidUnsignedIntegerAttribute((Long) value)
+ || ((Long) value >= Integer.MIN_VALUE && (Long) value <= Integer.MAX_VALUE);
+ }
+ return false;
+ }
+
+ protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
if (value == null) {
- if (this.mAttributes != null) this.mAttributes = this.mAttributes.remove(tag);
+ // setting a tag value to null removes the tag:
+ if (this.mAttributes != null) {
+ this.mAttributes = this.mAttributes.remove(tag);
+ }
+ return;
}
- else {
+
+ if (isAllowedAttributeValue(value)) {
final SAMBinaryTagAndValue tmp;
- if(!isUnsignedArray) {
+ if (!isUnsignedArray) {
tmp = new SAMBinaryTagAndValue(tag, value);
- }
- else {
+ } else {
if (!value.getClass().isArray() || value instanceof float[]) {
throw new SAMException("Attribute type " + value.getClass() +
" cannot be encoded as an unsigned array. Tag: " +
@@ -1144,8 +1403,15 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
}
tmp = new SAMBinaryTagAndUnsignedArrayValue(tag, value);
}
- if (this.mAttributes == null) this.mAttributes = tmp;
- else this.mAttributes = this.mAttributes.insert(tmp);
+
+ if (this.mAttributes == null) {
+ this.mAttributes = tmp;
+ } else {
+ this.mAttributes = this.mAttributes.insert(tmp);
+ }
+ } else {
+ throw new SAMException("Attribute type " + value.getClass() + " not supported. Tag: " +
+ SAMTagUtil.getSingleton().makeStringTag(tag));
}
}
@@ -1255,16 +1521,70 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
return GenomicIndexUtil.reg2bin(alignmentStart, alignmentEnd);
}
+ /**
+ * @return the SAMFileHeader for this record. If the header is null, the following SAMRecord methods may throw
+ * exceptions:
+ * <p><ul>
+ * <li>getReferenceIndex</li>
+ * <li>setReferenceIndex</li>
+ * <li>getMateReferenceIndex</li>
+ * <li>setMateReferenceIndex</li>
+ * </ul><p>
+ * Record comparators (i.e. SAMRecordCoordinateComparator and SAMRecordDuplicateComparator) require records with
+ * non-null header values.
+ * <p>
+ * A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
+ * read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
+ * <p>
+ * SAMTextWriter, BAMFileWriter, and CRAMFileWriter all require records to have a valid header in order to be
+ * written. Any record that does not have a header at the time it is added to the writer will be updated to use the
+ * header associated with the writer.
+ */
public SAMFileHeader getHeader() {
return mHeader;
}
/**
- * Setting header into SAMRecord facilitates conversion btw reference sequence names and indices
+ * Sets the SAMFileHeader for this record. Setting the header into SAMRecord facilitates conversion between reference
+ * sequence names and indices.
+ * <p>
+ * <b>NOTE:</b> If the record has a reference or mate reference name, the corresponding reference and mate reference
+ * indices are resolved and updated using the sequence dictionary in the new header. setHeader does not throw an
+ * exception if either the reference or mate reference name does not appear in the new header's sequence dictionary.
+ * <p>
+ * When the SAMFileHeader is set to null, the reference and mate reference indices are cleared. Therefore, calls to
+ * the following SAMRecord methods on records with a null header may throw IllegalArgumentExceptions:
+ * <ul>
+ * <li>getReferenceIndex</li>
+ * <li>setReferenceIndex</li>
+ * <li>getMateReferenceIndex</li>
+ * <li>setMateReferenceIndex</li>
+ * </ul><p>
+ * Record comparators (i.e. SAMRecordCoordinateComparator and SAMRecordDuplicateComparator) require records with
+ * non-null header values.
+ * <p>
+ * A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
+ * read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
+ * <p>
+ * SAMTextWriter, BAMFileWriter, and CRAMFileWriter all require records to have a valid header in order to be
+ * written. Any record that does not have a header at the time it is added to the writer will be updated to use the
+ * header associated with the writer.
+ *
* @param header contains sequence dictionary for this SAMRecord
*/
public void setHeader(final SAMFileHeader header) {
this.mHeader = header;
+ if (null == header) {
+ // mark the reference indices as unresolved
+ mReferenceIndex = null;
+ mMateReferenceIndex = null;
+ }
+ else {
+ // attempt to resolve the existing reference names and indices against the new sequence dictionary, but
+ // don't throw if the names don't appear in the dictionary
+ setReferenceName(mReferenceName);
+ setMateReferenceName(mMateReferenceName);
+ }
}
/**
@@ -1391,7 +1711,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
public List<SAMValidationError> validateCigar(final long recordNumber) {
List<SAMValidationError> ret = null;
- if (getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
+ if (null != getHeader() && getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
ret = SAMUtils.validateCigar(this, getCigar(), getReferenceIndex(), getAlignmentBlocks(), recordNumber, "Read CIGAR");
}
return ret;
@@ -1460,7 +1780,12 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* Perform various validations of SAMRecord.
* Note that this method deliberately returns null rather than Collections.emptyList() if there
* are no validation errors, because callers tend to assume that if a non-null list is returned, it is modifiable.
+ *
+ * A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
+ * read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
+ *
* @return null if valid. If invalid, returns a list of error messages.
+ *
*/
public List<SAMValidationError> isValid() {
return isValid(false);
@@ -1470,6 +1795,10 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* Perform various validations of SAMRecord.
* Note that this method deliberately returns null rather than Collections.emptyList() if there
* are no validation errors, because callers tend to assume that if a non-null list is returned, it is modifiable.
+ *
+ * A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
+ * read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
+ *
* @param firstOnly return only the first error if true, false otherwise
* @return null if valid. If invalid, returns a list of error messages.
*/
@@ -1503,7 +1832,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_FLAG_SECOND_OF_PAIR, "Second of pair flag should not be set for unpaired read.", getReadName()));
if (firstOnly) return ret;
}
- if (getMateReferenceIndex() != NO_ALIGNMENT_REFERENCE_INDEX) {
+ if (null != getHeader() && getMateReferenceIndex() != NO_ALIGNMENT_REFERENCE_INDEX) {
if (ret == null) ret = new ArrayList<SAMValidationError>();
ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_MATE_REF_INDEX, "MRNM should not be set for unpaired read.", getReadName()));
if (firstOnly) return ret;
@@ -1583,7 +1912,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
*/
if (firstOnly) return ret;
}
- if (getHeader().getSequenceDictionary().size() == 0) {
+ if (getHeader() != null && getHeader().getSequenceDictionary().size() == 0) {
if (ret == null) ret = new ArrayList<SAMValidationError>();
ret.add(new SAMValidationError(SAMValidationError.Type.MISSING_SEQUENCE_DICTIONARY, "Empty sequence dictionary.", getReadName()));
if (firstOnly) return ret;
@@ -1606,11 +1935,11 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
}
// Validate the RG ID is found in header
final String rgId = (String)getAttribute(SAMTagUtil.getSingleton().RG);
- if (rgId != null && getHeader().getReadGroup(rgId) == null) {
- if (ret == null) ret = new ArrayList<SAMValidationError>();
- ret.add(new SAMValidationError(SAMValidationError.Type.READ_GROUP_NOT_FOUND,
- "RG ID on SAMRecord not found in header: " + rgId, getReadName()));
- if (firstOnly) return ret;
+ if (rgId != null && getHeader() != null && getHeader().getReadGroup(rgId) == null) {
+ if (ret == null) ret = new ArrayList<SAMValidationError>();
+ ret.add(new SAMValidationError(SAMValidationError.Type.READ_GROUP_NOT_FOUND,
+ "RG ID on SAMRecord not found in header: " + rgId, getReadName()));
+ if (firstOnly) return ret;
}
final List<SAMValidationError> errors = isValidReferenceIndexAndPosition(mReferenceIndex, mReferenceName, getAlignmentStart(), false);
if (errors != null) {
@@ -1710,8 +2039,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_ALIGNMENT_START, buildMessage("Alignment start should != 0 because reference name != *.", isMate), getReadName()));
if (firstOnly) return ret;
}
-
- if (getHeader().getSequenceDictionary().size() > 0) {
+ if (getHeader() != null && getHeader().getSequenceDictionary().size() > 0) {
final SAMSequenceRecord sequence =
(referenceIndex != null? getHeader().getSequence(referenceIndex): getHeader().getSequence(referenceName));
if (sequence == null) {
@@ -1750,6 +2078,58 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
return newRecord;
}
+ /**
+ * Returns a deep copy of the SAM record, with the following exceptions:
+ *
+ * - The header field, which shares the reference with the original record
+ * - The file source field, which will always always be set to null in the copy
+ *
+ * Note that some fields, i.e. the cigar elements, alignment blocks, and
+ * indexing bin, are not explicitly populated in the copy since they are lazily
+ * generated on demand.
+ *
+ * Also note that this fails:
+ *
+ * original.deepCopy().equals(original)
+ *
+ * due to the fact that SAMBinaryTagAndValue.equals winds up calling object.equals on the
+ * value field, which uses reference equality.
+ *
+ */
+ public SAMRecord deepCopy() {
+ final SAMRecord newSAM = new SAMRecord(getHeader());
+
+ newSAM.setReadName(getReadName());
+ newSAM.setReadBases(Arrays.copyOf(getReadBases(), getReadLength()));
+ final byte baseQualities[] = getBaseQualities();
+ newSAM.setBaseQualities(Arrays.copyOf(baseQualities, baseQualities.length));
+ newSAM.setReferenceName(getReferenceName());
+ newSAM.setAlignmentStart(getAlignmentStart()); // clears mAlignmentEnd
+ newSAM.setMappingQuality(getMappingQuality());
+ newSAM.setCigarString(getCigarString()); // clears Cigar element and alignmentBlocks
+ newSAM.setFileSource(null);
+
+ newSAM.setFlags(getFlags());
+ newSAM.setMateReferenceName(getMateReferenceName());
+ newSAM.setMateAlignmentStart(getMateAlignmentStart());
+ newSAM.setInferredInsertSize(getInferredInsertSize());
+ if (null != getHeader()) {
+ newSAM.setReferenceIndex(getReferenceIndex());
+ newSAM.setMateReferenceIndex(getMateReferenceIndex());
+ }
+ else {
+ newSAM.mReferenceIndex = null;
+ newSAM.mMateReferenceIndex = null;
+ }
+ newSAM.setValidationStringency(getValidationStringency());
+ SAMBinaryTagAndValue attributes = getBinaryAttributes();
+ if (null != attributes) {
+ newSAM.setAttributes(attributes.deepCopy());
+ }
+
+ return newSAM;
+ }
+
/** Simple toString() that gives a little bit of useful info about the read. */
@Override
public String toString() {
@@ -1803,7 +2183,39 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* shortcut to <pre>SAMFlag.getFlags( this.getFlags() );</pre>
* @returns a set of SAMFlag associated to this sam record */
public final Set<SAMFlag> getSAMFlags() {
- return SAMFlag.getFlags( this.getFlags() );
+ return SAMFlag.getFlags(this.getFlags());
+ }
+
+ /**
+ * Fetches the value of a transient attribute on the SAMRecord, of null if not set.
+ *
+ * The intended use for transient attributes is to store values that are 1-to-1 with the SAMRecord,
+ * may be needed many times and are expensive to compute. These values can be computed lazily and
+ * then stored as transient attributes to avoid frequent re-computation.
+ */
+ public final Object getTransientAttribute(final Object key) {
+ return (this.transientAttributes == null) ? null : this.transientAttributes.get(key);
+ }
+
+ /**
+ * Sets the value of a transient attribute, and returns the previous value if defined.
+ *
+ * The intended use for transient attributes is to store values that are 1-to-1 with the SAMRecord,
+ * may be needed many times and are expensive to compute. These values can be computed lazily and
+ * then stored as transient attributes to avoid frequent re-computation.
+ */
+ public final Object setTransientAttribute(final Object key, final Object value) {
+ if (this.transientAttributes == null) this.transientAttributes = new HashMap<Object,Object>();
+ return this.transientAttributes.put(key, value);
+ }
+
+ /**
+ * Removes a transient attribute if it is stored, and returns the stored value. If there is not
+ * a stored value, will return null.
+ */
+ public final Object removeTransientAttribute(final Object key) {
+ if (this.transientAttributes != null) return this.transientAttributes.remove(key);
+ else return null;
}
}
diff --git a/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java b/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java
index 717609c..24ebb90 100644
--- a/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java
+++ b/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java
@@ -75,10 +75,16 @@ public class SAMRecordCoordinateComparator implements SAMRecordComparator {
* Less stringent compare method than the regular compare. If the two records
* are equal enough that their ordering in a sorted SAM file would be arbitrary,
* this method returns 0. If read is paired and unmapped, use the mate mapping to sort.
+ * Records being compared must have non-null SAMFileHeaders.
*
* @return negative if samRecord1 < samRecord2, 0 if equal, else positive
*/
public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
+
+ if (null == samRecord1.getHeader() || null == samRecord2.getHeader()) {
+ throw new IllegalArgumentException("Records must have non-null SAMFileHeaders to be compared");
+ }
+
final int refIndex1 = samRecord1.getReferenceIndex();
final int refIndex2 = samRecord2.getReferenceIndex();
if (refIndex1 == -1) {
diff --git a/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java b/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java
index 99ae9a5..6de77da 100644
--- a/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java
+++ b/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java
@@ -35,10 +35,15 @@ import java.util.Map;
* There are three orderings provided by this comparator: compare, duplicateSetCompare, and fileOrderCompare.
*
* Specify the headers when constructing this comparator if you would like to consider the library as the major sort key.
+ * The records being compared must also have non-null SAMFileHeaders.
*
* @author nhomer
*/
public class SAMRecordDuplicateComparator implements SAMRecordComparator {
+ /** An enum to provide type-safe keys for transient attributes the comparator puts on SAMRecords. */
+ private static enum Attr {
+ LibraryId, ReadCoordinate, MateCoordinate
+ }
private static final byte FF = 0, FR = 1, F = 2, RF = 3, RR = 4, R = 5;
@@ -67,6 +72,18 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
}
/**
+ * Populates the set of transient attributes on SAMRecords if they are not already there.
+ */
+ private void populateTransientAttributes(final SAMRecord... recs) {
+ for (final SAMRecord rec : recs) {
+ if (rec.getTransientAttribute(Attr.LibraryId) != null) continue;
+ rec.setTransientAttribute(Attr.LibraryId, getLibraryId(rec));
+ rec.setTransientAttribute(Attr.ReadCoordinate, rec.getReadNegativeStrandFlag() ? rec.getUnclippedEnd() : rec.getUnclippedStart());
+ rec.setTransientAttribute(Attr.MateCoordinate, getMateCoordinate(rec));
+ }
+ }
+
+ /**
* Gets the library name from the header for the record. If the RG tag is not present on
* the record, or the library isn't denoted on the read group, a constant string is
* returned.
@@ -75,10 +92,13 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
final String readGroupId = (String) rec.getAttribute("RG");
if (readGroupId != null) {
- final SAMReadGroupRecord rg = rec.getHeader().getReadGroup(readGroupId);
- if (rg != null) {
- final String libraryName = rg.getLibrary();
- if (null != libraryName) return libraryName;
+ final SAMFileHeader samHeader = rec.getHeader();
+ if (null != samHeader) {
+ final SAMReadGroupRecord rg = samHeader.getReadGroup(readGroupId);
+ if (rg != null) {
+ final String libraryName = rg.getLibrary();
+ if (null != libraryName) return libraryName;
+ }
}
}
@@ -198,6 +218,7 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
* properly choose the first end for optical duplicate identification when both ends are mapped to the same position etc.
*/
public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
+ populateTransientAttributes(samRecord1, samRecord2);
int cmp;
// temporary variables for comparisons
@@ -234,15 +255,20 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
*
*/
private int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2, final boolean collapseOrientation, final boolean considerNumberOfEndsMappedAndPairing) {
+ populateTransientAttributes(samRecord1, samRecord2);
int cmp;
+ if (null == samRecord1.getHeader() || null == samRecord2.getHeader()) {
+ throw new IllegalArgumentException("Records must have non-null SAMFileHeaders to be compared");
+ }
+
// temporary variables for comparisons
int samRecord1Value, samRecord2Value;
// library identifier
{
- samRecord1Value = getLibraryId(samRecord1);
- samRecord2Value = getLibraryId(samRecord2);
+ samRecord1Value = (Short) samRecord1.getTransientAttribute(Attr.LibraryId);
+ samRecord2Value = (Short) samRecord2.getTransientAttribute(Attr.LibraryId);
cmp = samRecord1Value - samRecord2Value;
}
// reference index
@@ -262,8 +288,8 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
}
// read coordinate
if (cmp == 0) {
- samRecord1Value = samRecord1.getReadNegativeStrandFlag() ? samRecord1.getUnclippedEnd() : samRecord1.getUnclippedStart();
- samRecord2Value = samRecord2.getReadNegativeStrandFlag() ? samRecord2.getUnclippedEnd() : samRecord2.getUnclippedStart();
+ samRecord1Value = (Integer) samRecord1.getTransientAttribute(Attr.ReadCoordinate);
+ samRecord2Value = (Integer) samRecord2.getTransientAttribute(Attr.ReadCoordinate);
cmp = samRecord1Value - samRecord2Value;
}
// orientation
@@ -287,8 +313,8 @@ public class SAMRecordDuplicateComparator implements SAMRecordComparator {
}
// mate's coordinate
if (cmp == 0) {
- samRecord1Value = getMateCoordinate(samRecord1);
- samRecord2Value = getMateCoordinate(samRecord2);
+ samRecord1Value = (Integer) samRecord1.getTransientAttribute(Attr.MateCoordinate);
+ samRecord2Value = (Integer) samRecord2.getTransientAttribute(Attr.MateCoordinate);;
cmp = samRecord1Value - samRecord2Value;
}
}
diff --git a/src/java/htsjdk/samtools/SAMRecordQueryHashComparator.java b/src/java/htsjdk/samtools/SAMRecordQueryHashComparator.java
new file mode 100644
index 0000000..fc250e9
--- /dev/null
+++ b/src/java/htsjdk/samtools/SAMRecordQueryHashComparator.java
@@ -0,0 +1,68 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools;
+
+import htsjdk.samtools.util.Murmur3;
+
+/**
+ * SAMRecord comparator that provides an ordering based on a hash of the queryname. Has
+ * the useful property that reads with the same name will be grouped together, but that
+ * reads appear in an otherwise random order. Useful for when the read names in a BAM
+ * are correlated to something else (e.g. position, read group), making a straight
+ * queryname sort undesirable.
+ *
+ * @author Tim Fennell
+ */
+public class SAMRecordQueryHashComparator extends SAMRecordQueryNameComparator {
+ private final Murmur3 hasher = new Murmur3(42);
+
+ /**
+ * Compares two records based on an integer hash of their read name's. If the hash
+ * values are equal, falls back to the behaviour of SAMRecordQueryNameComparator
+ * to break the tie.
+ */
+ @Override
+ public int compare(final SAMRecord lhs, final SAMRecord rhs) {
+ final int retval = compareHashes(lhs, rhs);
+ if (retval == 0) return super.compare(lhs, rhs);
+ else return retval;
+ }
+
+ /**
+ * Compares two records based on an integer hash of their read names. If the hash
+ * values are equal, falls back to the behaviour of SAMRecordQueryNameComparator
+ * to break the tie.
+ */
+ @Override
+ public int fileOrderCompare(final SAMRecord lhs, final SAMRecord rhs) {
+ final int retval = compareHashes(lhs, rhs);
+ if (retval == 0) return super.fileOrderCompare(lhs, rhs);
+ else return retval;
+ }
+
+ /** Compares the hash values for two records. */
+ private int compareHashes(final SAMRecord lhs, final SAMRecord rhs) {
+ return new Integer(this.hasher.hashUnencodedChars(lhs.getReadName())).compareTo(this.hasher.hashUnencodedChars(rhs.getReadName()));
+ }
+}
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/java/htsjdk/samtools/SAMTag.java
index 7dac5a2..fa25728 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/java/htsjdk/samtools/SAMTag.java
@@ -41,6 +41,7 @@ public enum SAMTag {
E2,
FI,
FS,
+ FT,
FZ,
GC, // for backwards compatibility
GS, // for backwards compatibility
@@ -60,6 +61,8 @@ public enum SAMTag {
OQ,
OP,
OC,
+ OF,
+ OR,
PG,
PQ,
PT,
diff --git a/src/java/htsjdk/samtools/SAMUtils.java b/src/java/htsjdk/samtools/SAMUtils.java
index b751ef0..14e2246 100644
--- a/src/java/htsjdk/samtools/SAMUtils.java
+++ b/src/java/htsjdk/samtools/SAMUtils.java
@@ -23,6 +23,8 @@
*/
package htsjdk.samtools;
+import htsjdk.samtools.util.BinaryCodec;
+import htsjdk.samtools.util.CigarUtil;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.RuntimeEOFException;
@@ -546,6 +548,14 @@ public final class SAMUtils {
}
}
+ /**
+ * Strip mapping information from a SAMRecord.
+ *
+ * WARNING: by clearing the secondary and supplementary flags,
+ * this may have the affect of producing multiple distinct records with the
+ * same read name and flags, which may lead to invalid SAM/BAM output.
+ * Callers of this method should make sure to deal with this issue.
+ */
public static void makeReadUnmapped(final SAMRecord rec) {
if (rec.getReadNegativeStrandFlag()) {
SAMRecordUtil.reverseComplement(rec);
@@ -558,10 +568,33 @@ public final class SAMUtils {
rec.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
rec.setInferredInsertSize(0);
rec.setNotPrimaryAlignmentFlag(false);
+ rec.setSupplementaryAlignmentFlag(false);
rec.setProperPairFlag(false);
rec.setReadUnmappedFlag(true);
}
+ /**
+ * Strip mapping information from a SAMRecord, but preserve it in the 'O' tags if it isn't already set.
+ */
+ public static void makeReadUnmappedWithOriginalTags(final SAMRecord rec) {
+ if (!hasOriginalMappingInformation(rec)) {
+ rec.setAttribute(SAMTag.OP.name(), rec.getAlignmentStart());
+ rec.setAttribute(SAMTag.OC.name(), rec.getCigarString());
+ rec.setAttribute(SAMTag.OF.name(), rec.getFlags());
+ rec.setAttribute(SAMTag.OR.name(), rec.getReferenceName());
+ }
+ makeReadUnmapped(rec);
+ }
+
+ /**
+ * See if any tags pertaining to original mapping information have been set.
+ */
+ public static boolean hasOriginalMappingInformation(final SAMRecord rec) {
+ return rec.getAttribute(SAMTag.OP.name()) != null
+ || rec.getAttribute(SAMTag.OC.name()) != null
+ || rec.getAttribute(SAMTag.OF.name()) != null
+ || rec.getAttribute(SAMTag.OR.name()) != null;
+ }
/**
* Determines if a cigar has any element that both consumes read bases and consumes reference bases
@@ -579,9 +612,15 @@ public final class SAMUtils {
/**
* Tests if the provided record is mapped entirely beyond the end of the reference (i.e., the alignment start is greater than the
* length of the sequence to which the record is mapped).
+ * @param record must not have a null SamFileHeader
*/
public static boolean recordMapsEntirelyBeyondEndOfReference(final SAMRecord record) {
- return record.getHeader().getSequence(record.getReferenceIndex()).getSequenceLength() < record.getAlignmentStart();
+ if (record.getHeader() == null) {
+ throw new SAMException("A non-null SAMHeader is required to resolve the mapping position: " + record.getReadName());
+ }
+ else {
+ return record.getHeader().getSequence(record.getReferenceIndex()).getSequenceLength() < record.getAlignmentStart();
+ }
}
/**
@@ -865,14 +904,22 @@ public final class SAMUtils {
// Don't know line number, and don't want to force read name to be decoded.
List<SAMValidationError> ret = cigar.isValid(rec.getReadName(), recordNumber);
if (referenceIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- final SAMSequenceRecord sequence = rec.getHeader().getSequence(referenceIndex);
- final int referenceSequenceLength = sequence.getSequenceLength();
- for (final AlignmentBlock alignmentBlock : alignmentBlocks) {
- if (alignmentBlock.getReferenceStart() + alignmentBlock.getLength() - 1 > referenceSequenceLength) {
- if (ret == null) ret = new ArrayList<SAMValidationError>();
- ret.add(new SAMValidationError(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE,
- cigarTypeName + " M operator maps off end of reference", rec.getReadName(), recordNumber));
- break;
+ SAMFileHeader samHeader = rec.getHeader();
+ if (null == samHeader) {
+ if (ret == null) ret = new ArrayList<SAMValidationError>();
+ ret.add(new SAMValidationError(SAMValidationError.Type.MISSING_HEADER,
+ cigarTypeName + " A non-null SAMHeader is required to validate cigar elements for: ", rec.getReadName(), recordNumber));
+ }
+ else {
+ final SAMSequenceRecord sequence = samHeader.getSequence(referenceIndex);
+ final int referenceSequenceLength = sequence.getSequenceLength();
+ for (final AlignmentBlock alignmentBlock : alignmentBlocks) {
+ if (alignmentBlock.getReferenceStart() + alignmentBlock.getLength() - 1 > referenceSequenceLength) {
+ if (ret == null) ret = new ArrayList<SAMValidationError>();
+ ret.add(new SAMValidationError(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE,
+ cigarTypeName + " M operator maps off end of reference", rec.getReadName(), recordNumber));
+ break;
+ }
}
}
}
@@ -939,4 +986,113 @@ public final class SAMUtils {
else name = name + ":" + record.getReadName();
return name;
}
+
+ /**
+ * Returns the number of bases that need to be clipped due to overlapping pairs. If the record is not paired,
+ * or the given record's start position is greater than its mate's start position, zero is automatically returned.
+ * NB: This method assumes that the record's mate is not contained within the given record's alignment.
+ *
+ * @param rec
+ * @return the number of bases at the end of the read that need to be clipped such that there would be no overlapping bases with its mate.
+ * Read bases include only those from insertion, match, or mismatch Cigar operators.
+ */
+ public static int getNumOverlappingAlignedBasesToClip(final SAMRecord rec) {
+ // NB: ignores how to handle supplemental records when present for both ends by just using the mate information in the record.
+
+ if (!rec.getReadPairedFlag() || rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag()) return 0;
+
+ // Only clip records that are left-most in genomic order and overlapping.
+ if (rec.getMateAlignmentStart() < rec.getAlignmentStart()) return 0; // right-most, so ignore.
+
+ // Find the number of read bases after the given mate's alignment start.
+ int numBasesToClip = 0;
+ final int refStartPos = rec.getMateAlignmentStart(); // relative reference position after which we should start clipping
+ final Cigar cigar = rec.getCigar();
+ int refPos = rec.getAlignmentStart();
+ for (final CigarElement el : cigar.getCigarElements()) {
+ final CigarOperator operator = el.getOperator();
+ final int refBasesLength = operator.consumesReferenceBases() ? el.getLength() : 0;
+ if (refStartPos <= refPos + refBasesLength - 1) { // add to clipped bases
+ if (operator == CigarOperator.MATCH_OR_MISMATCH) { // M
+ if (refStartPos < refPos) numBasesToClip += refBasesLength; // use all of the bases
+ else numBasesToClip += (refPos + refBasesLength) - refStartPos; // since the mate's alignment start can be in the middle of a cigar element
+ }
+ else if (operator == CigarOperator.SOFT_CLIP || operator == CigarOperator.HARD_CLIP || operator == CigarOperator.PADDING || operator == CigarOperator.SKIPPED_REGION) {
+ // ignore
+ }
+ else { // ID
+ numBasesToClip += operator.consumesReadBases() ? el.getLength() : 0; // clip all the bases in the read from this operator
+ }
+ }
+ refPos += refBasesLength;
+ }
+
+ if (numBasesToClip < 0) return 0; // left-most but not overlapping
+
+ return numBasesToClip;
+ }
+
+ /**
+ * Returns a (possibly new) record that has been clipped if isa mapped paired and has overlapping bases with its mate.
+ * See {@link #getNumOverlappingAlignedBasesToClip(SAMRecord)} for how the number of overlapping bases is computed.
+ * NB: this does not properly consider a cigar like: 100M20S10H.
+ * NB: This method assumes that the record's mate is not contained within the given record's alignment.
+ *
+ * @param record the record from which to clip bases.
+ * @param noSideEffects if true a modified clone of the original record is returned, otherwise we modify the record directly.
+ * @return
+ */
+ public static SAMRecord clipOverlappingAlignedBases(final SAMRecord record, final boolean noSideEffects) {
+ return clipOverlappingAlignedBases(record, getNumOverlappingAlignedBasesToClip(record), noSideEffects);
+ }
+
+ /**
+ * Returns a (possibly new) SAMRecord with the given number of bases soft-clipped at the end of the read if is a mapped
+ * paired and has overlapping bases with its mate.
+ * NB: this does not properly consider a cigar like: 100M20S10H.
+ * NB: This method assumes that the record's mate is not contained within the given record's alignment.
+ *
+ * @param record the record from which to clip bases.
+ * @param numOverlappingBasesToClip the number of bases to clip at the end of the read.
+ * @param noSideEffects if true a modified clone of the original record is returned, otherwise we modify the record directly.
+ * @return
+ */
+ public static SAMRecord clipOverlappingAlignedBases(final SAMRecord record, final int numOverlappingBasesToClip, final boolean noSideEffects) {
+ // NB: ignores how to handle supplemental records when present for both ends by just using the mate information in the record.
+
+ if (numOverlappingBasesToClip <= 0 || record.getReadUnmappedFlag() || record.getMateUnmappedFlag()) return record;
+
+ try {
+ final SAMRecord rec = noSideEffects ? ((SAMRecord)record.clone()) : record;
+
+ // watch out for when the second read overlaps all of the first read
+ if (rec.getMateAlignmentStart() <= rec.getAlignmentStart()) { // make it unmapped
+ rec.setReadUnmappedFlag(true);
+ return rec;
+ }
+
+ // 1-based index of first base in read to clip.
+ int clipFrom = rec.getReadLength() - numOverlappingBasesToClip + 1;
+ // we have to check if the last cigar element is soft-clipping, so we can subtract that from clipFrom
+ final CigarElement cigarElement = rec.getCigar().getCigarElement(rec.getCigarLength()-1);
+ if (CigarOperator.SOFT_CLIP == cigarElement.getOperator()) clipFrom -= cigarElement.getLength();
+ // FIXME: does not properly consider a cigar like: 100M20S10H
+
+ // clip it, clip it good
+ rec.setCigar(new Cigar(CigarUtil.softClipEndOfRead(clipFrom, rec.getCigar().getCigarElements())));
+ return rec;
+ } catch (final CloneNotSupportedException e) {
+ throw new SAMException(e.getMessage(), e);
+ }
+ }
+
+ /**
+ * Checks if a long attribute value is within the allowed range of a 32-bit unsigned integer.
+ *
+ * @param value a long value to check
+ * @return true if value is >= 0 and <= {@link BinaryCodec#MAX_UINT}, and false otherwise
+ */
+ public static boolean isValidUnsignedIntegerAttribute(long value) {
+ return value >= 0 && value <= BinaryCodec.MAX_UINT;
+ }
}
diff --git a/src/java/htsjdk/samtools/SRAFileReader.java b/src/java/htsjdk/samtools/SRAFileReader.java
new file mode 100644
index 0000000..14d7df8
--- /dev/null
+++ b/src/java/htsjdk/samtools/SRAFileReader.java
@@ -0,0 +1,306 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+/**
+ * Created by andrii.nikitiuk on 8/11/15.
+ */
+
+package htsjdk.samtools;
+
+import htsjdk.samtools.sra.ReferenceCache;
+import htsjdk.samtools.sra.SRAAccession;
+import htsjdk.samtools.util.CloseableIterator;
+
+import htsjdk.samtools.SamReader.Type;
+
+import htsjdk.samtools.util.Log;
+import ngs.ErrorMsg;
+import ngs.ReadCollection;
+import ngs.ReadGroupIterator;
+import ngs.ReferenceIterator;
+import ngs.Reference;
+
+import java.util.ArrayList;
+import java.util.List;
+
+
+public class SRAFileReader extends SamReader.ReaderImplementation implements SamReader.Indexing {
+ private static final Log log = Log.getInstance(SRAFileReader.class);
+ private SRAAccession acc;
+ private SAMFileHeader virtualHeader;
+ private ReadCollection run;
+ private ValidationStringency validationStringency;
+ private SRAIterator.RecordRangeInfo recordRangeInfo;
+ private SRAIndex index;
+ private ReferenceCache cachedReferences;
+
+ public SRAFileReader(final SRAAccession acc) {
+ this.acc = acc;
+
+ if (!acc.isValid()) {
+ throw new IllegalArgumentException("Invalid SRA accession was passed to SRA reader: " + acc);
+ }
+
+ try {
+ run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc.toString());
+ virtualHeader = loadSamHeader();
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+
+ cachedReferences = new ReferenceCache(run, virtualHeader);
+ recordRangeInfo = SRAIterator.getRecordsRangeInfo(run);
+ index = new SRAIndex(virtualHeader, recordRangeInfo);
+ }
+
+ @Override
+ public Type type() {
+ return Type.SRA_TYPE;
+ }
+
+ @Override
+ public boolean hasIndex() {
+ return true;
+ }
+
+ @Override
+ public BAMIndex getIndex() {
+ return index;
+ }
+
+ @Override
+ public SAMFileHeader getFileHeader() {
+ return virtualHeader;
+ }
+
+ @Override
+ public CloseableIterator<SAMRecord> getIterator() {
+ return getIterator(getFilePointerSpanningReads());
+ }
+
+ @Override
+ public CloseableIterator<SAMRecord> getIterator(SAMFileSpan chunks) {
+ if (run == null) {
+ throw new RuntimeException("Cannot create iterator - SRA run is uninitialized");
+ }
+
+ if (virtualHeader == null) {
+ throw new RuntimeException("Cannot create iterator - SAM file header is uninitialized");
+ }
+
+ List<Chunk> chunkList = ((BAMFileSpan) chunks).getChunks();
+
+ final SRAIterator newIterator = new SRAIterator(acc, run, virtualHeader, cachedReferences, recordRangeInfo, chunkList);
+ if (validationStringency != null) {
+ newIterator.setValidationStringency(validationStringency);
+ }
+
+ return newIterator;
+ }
+
+ @Override
+ public SAMFileSpan getFilePointerSpanningReads() {
+ if (recordRangeInfo.getTotalRecordRangeLength() <= 0) {
+ throw new RuntimeException("Cannot create file span - SRA file is empty");
+ }
+
+ return new BAMFileSpan(new Chunk(0, recordRangeInfo.getTotalRecordRangeLength()));
+ }
+
+ @Override
+ public CloseableIterator<SAMRecord> query(QueryInterval[] intervals, boolean contained) {
+ BAMFileSpan span = new BAMFileSpan();
+ BrowseableBAMIndex index = getBrowseableIndex();
+
+ for (QueryInterval interval : intervals) {
+ BAMFileSpan intervalSpan;
+ if (!contained) {
+ intervalSpan = index.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end);
+
+ } else {
+ intervalSpan = getSpanContained(interval.referenceIndex, interval.start, interval.end);
+ }
+ span.add(intervalSpan);
+ }
+
+ return getIterator(span);
+ }
+
+ @Override
+ public CloseableIterator<SAMRecord> queryAlignmentStart(String sequence, int start) {
+ int sequenceIndex = virtualHeader.getSequenceIndex(sequence);
+ if (sequenceIndex == -1) {
+ throw new IllegalArgumentException("Unknown sequence '" + sequence + "' was passed to SRAFileReader");
+ }
+
+ return getIterator(getSpanContained(sequenceIndex, start, -1));
+ }
+
+ @Override
+ public CloseableIterator<SAMRecord> queryUnmapped() {
+ if (recordRangeInfo.getTotalRecordRangeLength() <= 0) {
+ throw new RuntimeException("Cannot create file span - SRA file is empty");
+ }
+
+ SAMFileSpan span = new BAMFileSpan(new Chunk(recordRangeInfo.getTotalReferencesLength(), recordRangeInfo.getTotalRecordRangeLength()));
+ return getIterator(span);
+ }
+
+ @Override
+ public void close() { }
+
+ @Override
+ public ValidationStringency getValidationStringency() {
+ return validationStringency;
+ }
+
+
+ /** INDEXING */
+
+
+ /**
+ * Returns true if the supported index is browseable, meaning the bins in it can be traversed
+ * and chunk data inspected and retrieved.
+ *
+ * @return True if the index supports the BrowseableBAMIndex interface. False otherwise.
+ */
+ @Override
+ public boolean hasBrowseableIndex() {
+ return true;
+ }
+
+ /**
+ * Gets an index tagged with the BrowseableBAMIndex interface. Throws an exception if no such
+ * index is available.
+ *
+ * @return An index with a browseable interface, if possible.
+ * @throws SAMException if no such index is available.
+ */
+ @Override
+ public BrowseableBAMIndex getBrowseableIndex() {
+ return index;
+ }
+
+ /**
+ * Iterate through the given chunks in the file.
+ *
+ * @param chunks List of chunks for which to retrieve data.
+ * @return An iterator over the given chunks.
+ */
+ @Override
+ public SAMRecordIterator iterator(final SAMFileSpan chunks) {
+ CloseableIterator<SAMRecord> it = getIterator(chunks);
+ if (it == null) {
+ return null;
+ }
+ return (SAMRecordIterator) it;
+ }
+
+ /** ReaderImplementation */
+ @Override
+ void enableFileSource(final SamReader reader, final boolean enabled) {
+ log.info("enableFileSource is not supported");
+ }
+
+ @Override
+ void enableIndexCaching(final boolean enabled) {
+ log.info("enableIndexCaching is not supported");
+ }
+
+ @Override
+ void enableIndexMemoryMapping(final boolean enabled) {
+ log.info("enableIndexMemoryMapping is not supported");
+ }
+
+ @Override
+ void enableCrcChecking(final boolean enabled) {
+ log.info("enableCrcChecking is not supported");
+ }
+
+ @Override
+ void setSAMRecordFactory(final SAMRecordFactory factory) {
+ log.info("setSAMRecordFactory is not supported");
+ }
+
+ @Override
+ void setValidationStringency(final ValidationStringency validationStringency) {
+ this.validationStringency = validationStringency;
+ }
+
+ protected SRAIterator.RecordRangeInfo getRecordsRangeInfo() {
+ return recordRangeInfo;
+ }
+
+ private SAMFileHeader loadSamHeader() throws ErrorMsg {
+ if (run == null) {
+ throw new RuntimeException("Cannot load SAMFileHeader - SRA run is uninitialized");
+ }
+
+ String runName = run.getName();
+
+ SAMFileHeader header = new SAMFileHeader();
+ header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+
+ ReadGroupIterator itRg = run.getReadGroups();
+ while (itRg.nextReadGroup()) {
+ String rgName = itRg.getName();
+ if (rgName.isEmpty())
+ rgName = runName;
+ SAMReadGroupRecord rg = new SAMReadGroupRecord(rgName);
+ rg.setSample(runName);
+ header.addReadGroup(rg);
+ }
+
+ ReferenceIterator itRef = run.getReferences();
+ while (itRef.nextReference()) {
+ header.addSequence(new SAMSequenceRecord(itRef.getCanonicalName(), (int) itRef.getLength()));
+ }
+
+ return header;
+ }
+
+ private BAMFileSpan getSpanContained(int sequenceIndex, long start, long end) {
+ if (recordRangeInfo.getTotalRecordRangeLength() <= 0) {
+ throw new RuntimeException("Cannot create file span - SRA file is empty");
+ }
+
+ long sequenceOffset = recordRangeInfo.getReferenceOffsets().get(sequenceIndex);
+ long sequenceLength = recordRangeInfo.getReferenceLengthsAligned().get(sequenceIndex);
+ if (end == -1) {
+ end = sequenceLength;
+ }
+
+ if (start > sequenceLength) {
+ throw new IllegalArgumentException("Sequence start position is larger than its length");
+ }
+
+ if (end > sequenceLength) {
+ throw new IllegalArgumentException("Sequence end position is larger than its length");
+ }
+
+ return new BAMFileSpan(new Chunk(sequenceOffset + start, sequenceOffset + end));
+ }
+}
diff --git a/src/java/htsjdk/samtools/SRAIndex.java b/src/java/htsjdk/samtools/SRAIndex.java
new file mode 100644
index 0000000..b74ee63
--- /dev/null
+++ b/src/java/htsjdk/samtools/SRAIndex.java
@@ -0,0 +1,257 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+package htsjdk.samtools;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Emulates BAM index so that we can request chunks of records from SRAFileReader
+ *
+ * Here is how it works:
+ * SRA allows reading of alignments by Reference position fast, so we divide our "file" range for alignments as
+ * a length of all references. Reading unaligned reads is then fast if we use read positions for lookup and (internally)
+ * filter out aligned fragments.
+ *
+ * Total SRA "file" range is calculated as sum of all reference lengths plus number of reads (both aligned and unaligned)
+ * in SRA archive.
+ *
+ * Now, we can use Chunks to lookup for aligned and unaligned fragments.
+ *
+ * We emulate BAM index bins by mapping SRA reference positions to bin numbers.
+ * And then we map from bin number to list of chunks, which represent SRA "file" positions (which are simply reference
+ * positions).
+ *
+ * We only emulate last level of BAM index bins (and they refer to a portion of reference SRA_BIN_SIZE bases long).
+ * For all other bins RuntimeException will be returned (but since nobody else creates bins, except SRAIndex class
+ * that is fine).
+ *
+ * But since the last level of bins was not meant to refer to fragments that only partially overlap bin reference
+ * positions, we also return chunk that goes 5000 bases left before beginning of the bin to assure fragments that
+ * start before the bin positions but still overlap with it can be retrieved by SRA reader.
+ * Later we will add support to NGS API to get a maximum number of bases that we need to go left to retrieve such fragments.
+ *
+ * Created by andrii.nikitiuk on 9/4/15.
+ */
+public class SRAIndex implements BrowseableBAMIndex {
+ /**
+ * Number of reference bases bins in last level can represent
+ */
+ public static final int SRA_BIN_SIZE = 16 * 1024;
+
+ /**
+ * Chunks of that size will be created when using SRA index
+ */
+ public static final int SRA_CHUNK_SIZE = 50000;
+
+ /**
+ * First bin number in last level
+ */
+ private static final int SRA_BIN_INDEX_OFFSET = GenomicIndexUtil.LEVEL_STARTS[GenomicIndexUtil.LEVEL_STARTS.length - 1];
+
+ /**
+ * How many bases should we go left on the reference to find all fragments that start before requested interval
+ * but overlap with it
+ */
+ private static final int MAX_FRAGMENT_OVERLAP = 5000;
+
+ private SAMFileHeader header;
+ private SRAIterator.RecordRangeInfo recordRangeInfo;
+
+ /**
+ * @param header sam header
+ * @param recordRangeInfo info about record ranges withing SRA archive
+ */
+ public SRAIndex(SAMFileHeader header, SRAIterator.RecordRangeInfo recordRangeInfo) {
+ this.header = header;
+ this.recordRangeInfo = recordRangeInfo;
+ }
+
+ /**
+ * Gets the size (number of bins in) a given level of a BAM index.
+ * @param levelNumber Level for which to inspect the size.
+ * @return Size of the given level.
+ */
+ @Override
+ public int getLevelSize(int levelNumber) {
+ if (levelNumber == GenomicIndexUtil.LEVEL_STARTS.length - 1)
+ return GenomicIndexUtil.MAX_BINS - GenomicIndexUtil.LEVEL_STARTS[levelNumber]-1;
+ else
+ return GenomicIndexUtil.LEVEL_STARTS[levelNumber+1] - GenomicIndexUtil.LEVEL_STARTS[levelNumber];
+ }
+
+ /**
+ * SRA only operates on bins from last level
+ * @param bin The bin for which to determine the level.
+ * @return bin level
+ */
+ @Override
+ public int getLevelForBin(Bin bin) {
+ if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) {
+ throw new RuntimeException("SRA only supports bins from the last level");
+ }
+ return GenomicIndexUtil.LEVEL_STARTS.length - 1;
+ }
+
+ /**
+ * Gets the first locus that this bin can index into.
+ * @param bin The bin to test.
+ * @return first position that associated with given bin number
+ */
+ @Override
+ public int getFirstLocusInBin(Bin bin) {
+ if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) {
+ throw new RuntimeException("SRA only supports bins from the last level");
+ }
+
+ return (bin.getBinNumber() - SRA_BIN_INDEX_OFFSET) * SRA_BIN_SIZE + 1;
+ }
+
+ /**
+ * Gets the last locus that this bin can index into.
+ * @param bin The bin to test.
+ * @return last position that associated with given bin number
+ */
+ @Override
+ public int getLastLocusInBin(Bin bin) {
+ if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) {
+ throw new RuntimeException("SRA only supports bins from the last level");
+ }
+
+ return (bin.getBinNumber() - SRA_BIN_INDEX_OFFSET + 1) * SRA_BIN_SIZE;
+ }
+
+ /**
+ * Provides a list of bins that contain bases at requested positions
+ * @param referenceIndex sequence of desired SAMRecords
+ * @param startPos 1-based start of the desired interval, inclusive
+ * @param endPos 1-based end of the desired interval, inclusive
+ * @return a list of bins that contain relevant data
+ */
+ @Override
+ public BinList getBinsOverlapping(int referenceIndex, int startPos, int endPos) {
+ long refLength = recordRangeInfo.getReferenceLengthsAligned().get(referenceIndex);
+
+ // convert to chunk address space within reference
+ long refStartPos = startPos - 1;
+ long refEndPos = endPos;
+ if (refEndPos >= refLength) {
+ throw new RuntimeException("refEndPos is larger than reference length");
+ }
+
+ int firstBinNumber = (int)refStartPos / SRA_BIN_SIZE;
+ int lastBinNumber = (int)(refEndPos - 1) / SRA_BIN_SIZE;
+
+ int numberOfBins = ((int)refLength / SRA_BIN_SIZE) + 1;
+
+ BitSet binBitSet = new BitSet();
+ binBitSet.set(0, SRA_BIN_INDEX_OFFSET, false);
+ if (firstBinNumber > 0) {
+ binBitSet.set(SRA_BIN_INDEX_OFFSET, SRA_BIN_INDEX_OFFSET + firstBinNumber, false);
+ }
+ binBitSet.set(SRA_BIN_INDEX_OFFSET + firstBinNumber, SRA_BIN_INDEX_OFFSET + lastBinNumber + 1, true);
+ if (lastBinNumber + 1 < numberOfBins) {
+ binBitSet.set(SRA_BIN_INDEX_OFFSET + lastBinNumber + 1, SRA_BIN_INDEX_OFFSET + numberOfBins, false);
+ }
+
+ return new BinList(referenceIndex, binBitSet);
+ }
+
+ @Override
+ public BAMFileSpan getSpanOverlapping(Bin bin) {
+ return new BAMFileSpan(getBinChunks(bin));
+ }
+
+ @Override
+ public BAMFileSpan getSpanOverlapping(int referenceIndex, int startPos, int endPos) {
+ BinList binList = getBinsOverlapping(referenceIndex, startPos, endPos);
+ BAMFileSpan result = new BAMFileSpan();
+ Set<Chunk> savedChunks = new HashSet<Chunk>();
+ for (Bin bin : binList) {
+ List<Chunk> chunks = getSpanOverlapping(bin).getChunks();
+ for (Chunk chunk : chunks) {
+ if (!savedChunks.contains(chunk)) {
+ savedChunks.add(chunk);
+ result.add(chunk);
+ }
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * @return a position where aligned fragments end
+ */
+ @Override
+ public long getStartOfLastLinearBin() {
+ int numberOfReferences = recordRangeInfo.getReferenceLengthsAligned().size();
+ long refOffset = recordRangeInfo.getReferenceOffsets().get(numberOfReferences - 1);
+ long lastChunkNumber = recordRangeInfo.getReferenceLengthsAligned().get(numberOfReferences - 1) / SRA_CHUNK_SIZE;
+ return lastChunkNumber * SRA_CHUNK_SIZE + refOffset;
+ }
+
+ @Override
+ public BAMIndexMetaData getMetaData(int reference) {
+ throw new UnsupportedOperationException("Getting of BAM index metadata for SRA is not implemented");
+ }
+
+ @Override
+ public void close() { }
+
+ /**
+ * @param bin Requested bin
+ * @return chunks that represent all bases of requested bin
+ */
+ private List<Chunk> getBinChunks(Bin bin) {
+ if (bin.containsChunks()) {
+ return bin.getChunkList();
+ }
+
+ if (bin.getBinNumber() < SRA_BIN_INDEX_OFFSET) {
+ throw new RuntimeException("SRA only supports bins from the last level");
+ }
+ int binNumber = bin.getBinNumber() - SRA_BIN_INDEX_OFFSET;
+ long refOffset = recordRangeInfo.getReferenceOffsets().get(bin.getReferenceSequence());
+
+ // move requested position MAX_FRAGMENT_OVERLAP bases behind, so that we take all the reads that overlap requested position
+ int firstChunkCorrection = binNumber == 0 ? 0 : -MAX_FRAGMENT_OVERLAP;
+
+ long binGlobalOffset = binNumber * SRA_BIN_SIZE + refOffset;
+ long firstChunkNumber = (binGlobalOffset + firstChunkCorrection) / SRA_CHUNK_SIZE;
+ long lastChunkNumber = (binGlobalOffset + SRA_BIN_SIZE - 1) / SRA_CHUNK_SIZE;
+ List<Chunk> chunks = new ArrayList<Chunk>();
+ for (long chunkNumber = firstChunkNumber; chunkNumber <= lastChunkNumber; chunkNumber++) {
+ chunks.add(new Chunk(chunkNumber * SRA_CHUNK_SIZE, (chunkNumber + 1) * SRA_CHUNK_SIZE));
+ }
+
+ return chunks;
+ }
+}
diff --git a/src/java/htsjdk/samtools/SRAIterator.java b/src/java/htsjdk/samtools/SRAIterator.java
new file mode 100644
index 0000000..1347e1c
--- /dev/null
+++ b/src/java/htsjdk/samtools/SRAIterator.java
@@ -0,0 +1,248 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+/**
+ * Created by andrii.nikitiuk on 8/11/15.
+ */
+
+package htsjdk.samtools;
+
+import htsjdk.samtools.SAMFileHeader.SortOrder;
+
+import htsjdk.samtools.sra.ReferenceCache;
+import htsjdk.samtools.sra.SRAAccession;
+import htsjdk.samtools.sra.SRAAlignmentIterator;
+import htsjdk.samtools.sra.SRAUnalignmentIterator;
+import htsjdk.samtools.sra.SRAUtils;
+import ngs.ErrorMsg;
+import ngs.ReadCollection;
+import ngs.Reference;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * SRA iterator which returns SAMRecords for requested list of chunks
+ */
+public class SRAIterator implements SAMRecordIterator {
+ private ValidationStringency validationStringency;
+
+ private SRAAccession accession;
+ private ReadCollection run;
+ private SAMFileHeader header;
+ private ReferenceCache cachedReferences;
+ private RecordRangeInfo recordRangeInfo;
+ private Iterator<Chunk> chunksIterator;
+ private Chunk currentChunk;
+
+ private SRAAlignmentIterator alignmentIterator;
+ private SRAUnalignmentIterator unalignmentIterator;
+
+ /**
+ * Describes record ranges info needed for emulating BAM index
+ */
+ public static class RecordRangeInfo {
+ private List<Long> referenceOffsets;
+ private List<Long> referenceLengthsAligned;
+ private long totalReferencesLength;
+ private long numberOfReads; // is used for unaligned read space
+ private long totalRecordRangeLength;
+
+ /**
+ * @param referenceLengthsAligned a list with lengths of each reference
+ * @param numberOfReads total number of reads within SRA archive
+ */
+ public RecordRangeInfo(List<Long> referenceLengthsAligned, long numberOfReads) {
+ this.numberOfReads = numberOfReads;
+ this.referenceLengthsAligned = referenceLengthsAligned;
+
+ referenceOffsets = new ArrayList<Long>();
+
+ totalReferencesLength = 0;
+ for (Long refLen : referenceLengthsAligned) {
+ referenceOffsets.add(totalReferencesLength);
+ totalReferencesLength += refLen;
+ }
+
+ totalRecordRangeLength = totalReferencesLength + this.numberOfReads;
+ }
+
+ public long getNumberOfReads() {
+ return numberOfReads;
+ }
+
+ public long getTotalReferencesLength() {
+ return totalReferencesLength;
+ }
+
+ public long getTotalRecordRangeLength() {
+ return totalRecordRangeLength;
+ }
+
+ public final List<Long> getReferenceOffsets() {
+ return Collections.unmodifiableList(referenceOffsets);
+ }
+
+ public final List<Long> getReferenceLengthsAligned() {
+ return Collections.unmodifiableList(referenceLengthsAligned);
+ }
+ }
+
+ /**
+ * Loads record ranges needed for emulating BAM index
+ * @param run read collection
+ * @return record ranges
+ */
+ public static RecordRangeInfo getRecordsRangeInfo(ReadCollection run) {
+ try {
+ return new RecordRangeInfo(SRAUtils.getReferencesLengthsAligned(run), SRAUtils.getNumberOfReads(run));
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * @param run opened read collection
+ * @param header sam header
+ * @param cachedReferences list of cached references shared among all iterators from a single SRAFileReader
+ * @param recordRangeInfo info about record ranges withing SRA archive
+ * @param chunks used to determine which records the iterator should return
+ */
+ public SRAIterator(SRAAccession accession, final ReadCollection run, final SAMFileHeader header, ReferenceCache cachedReferences,
+ final RecordRangeInfo recordRangeInfo, final List<Chunk> chunks) {
+ this.accession = accession;
+ this.run = run;
+ this.header = header;
+ this.cachedReferences = cachedReferences;
+ this.recordRangeInfo = recordRangeInfo;
+ chunksIterator = chunks.iterator();
+ if (chunksIterator.hasNext()) {
+ currentChunk = chunksIterator.next();
+ }
+
+ hasNext();
+ }
+
+ /**
+ * NGS iterators implement a single method "nextObject" which return true if the operation was successful or
+ * false if there are no more objects available.
+ * That means that there is no way to check "hasNext" without actually moving the iterator forward.
+ * Because of that all the logic of moving iterator forward is actually happens in "hasNext".
+ *
+ * Here is explanation of how it works:
+ * Iterator holds a list of chunks of requested records. Here we have chunksIterator that walks though that list.
+ * We walk though that list using chunksIterator. If current chunk can represent aligned fragments then we create
+ * SRAAlignmentIterator iterator, pass the chunk into it and ask if it can find any record. If record was found,
+ * we say that we have next; otherwise we check if the chunk can represent unaligned fragments and then create
+ * SRAUnalignmentIterator if so and do the same steps as with alignemnt iterator.
+ *
+ * If record was not found in both SRAAlignmentIterator and SRAUnalignmentIterator (it is possible that reference
+ * range has no alignments or that reads range has all aligned fragment), we try the next chunk.
+ *
+ * When there are no more chunks and both iterators have no more records we return false.
+ *
+ * @return true if there are more records available
+ */
+ @Override
+ public boolean hasNext() {
+ while (currentChunk != null) {
+ if (alignmentIterator == null) {
+ if (currentChunk.getChunkStart() < recordRangeInfo.getTotalReferencesLength()) {
+ alignmentIterator = new SRAAlignmentIterator(accession, run, header, cachedReferences, recordRangeInfo, currentChunk);
+ if (validationStringency != null) {
+ alignmentIterator.setValidationStringency(validationStringency);
+ }
+ }
+ }
+
+ if (alignmentIterator != null && alignmentIterator.hasNext()) {
+ return true;
+ }
+
+ if (unalignmentIterator == null) {
+ if (currentChunk.getChunkEnd() > recordRangeInfo.getTotalReferencesLength()) {
+ unalignmentIterator = new SRAUnalignmentIterator(accession, run, header, recordRangeInfo, currentChunk);
+ if (validationStringency != null) {
+ unalignmentIterator.setValidationStringency(validationStringency);
+ }
+ }
+ }
+ if (unalignmentIterator != null && unalignmentIterator.hasNext()) {
+ return true;
+ }
+
+ alignmentIterator = null;
+ unalignmentIterator = null;
+ if (chunksIterator.hasNext()) {
+ currentChunk = chunksIterator.next();
+ } else {
+ currentChunk = null;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Call hasNext to make sure that one of inner iterators points to the next record, the retrieve the record from
+ * one of them.
+ * @return lazy SRA record
+ */
+ @Override
+ public SAMRecord next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException("No more records are available in SRAIterator");
+ }
+
+ if (alignmentIterator != null && alignmentIterator.hasNext()) {
+ return alignmentIterator.next();
+ }
+
+ return unalignmentIterator.next();
+ }
+
+ @Override
+ public void remove() { throw new UnsupportedOperationException("Removal of records not implemented."); }
+
+ @Override
+ public void close() { }
+
+ @Override
+ public SAMRecordIterator assertSorted(final SortOrder sortOrder) { throw new UnsupportedOperationException("assertSorted is not implemented."); }
+
+ public void setValidationStringency(ValidationStringency validationStringency) {
+ this.validationStringency = validationStringency;
+
+ if (alignmentIterator != null) {
+ alignmentIterator.setValidationStringency(validationStringency);
+ }
+ if (unalignmentIterator != null) {
+ unalignmentIterator.setValidationStringency(validationStringency);
+ }
+ }
+}
diff --git a/src/java/htsjdk/samtools/SamFileValidator.java b/src/java/htsjdk/samtools/SamFileValidator.java
index 5e138d3..42d2580 100644
--- a/src/java/htsjdk/samtools/SamFileValidator.java
+++ b/src/java/htsjdk/samtools/SamFileValidator.java
@@ -274,11 +274,22 @@ public class SamFileValidator {
}
validateMateFields(record, recordNumber);
- validateSortOrder(record, recordNumber);
+ final boolean hasValidSortOrder = validateSortOrder(record, recordNumber);
validateReadGroup(record, header);
final boolean cigarIsValid = validateCigar(record, recordNumber);
if (cigarIsValid) {
- validateNmTag(record, recordNumber);
+ try {
+ validateNmTag(record, recordNumber);
+ }
+ catch (SAMException e) {
+ if (hasValidSortOrder) {
+ // If a CRAM file has an invalid sort order, the ReferenceFileWalker will throw a
+ // SAMException due to an out of order request when retrieving reference bases during NM
+ // tag validation; rethrow the exception only if the sort order is valid, otherwise
+ // swallow the exception and carry on validating
+ throw e;
+ }
+ }
}
validateSecondaryBaseCalls(record, recordNumber);
validateTags(record, recordNumber);
@@ -397,9 +408,10 @@ public class SamFileValidator {
}
- private void validateSortOrder(final SAMRecord record, final long recordNumber) {
+ private boolean validateSortOrder(final SAMRecord record, final long recordNumber) {
final SAMRecord prev = orderChecker.getPreviousRecord();
- if (!orderChecker.isSorted(record)) {
+ boolean isValidSortOrder = orderChecker.isSorted(record);
+ if (!isValidSortOrder) {
addError(new SAMValidationError(
Type.RECORD_OUT_OF_ORDER,
String.format(
@@ -411,6 +423,7 @@ public class SamFileValidator {
record.getReadName(),
recordNumber));
}
+ return isValidSortOrder;
}
private void init(final ReferenceSequenceFile reference, final SAMFileHeader header) {
diff --git a/src/java/htsjdk/samtools/SamFiles.java b/src/java/htsjdk/samtools/SamFiles.java
index 2160a5e..0112855 100644
--- a/src/java/htsjdk/samtools/SamFiles.java
+++ b/src/java/htsjdk/samtools/SamFiles.java
@@ -1,13 +1,17 @@
package htsjdk.samtools;
+import htsjdk.samtools.cram.CRAIIndex;
+import htsjdk.samtools.cram.build.CramIO;
+
import java.io.File;
/**
* @author mccowan
*/
public class SamFiles {
+
/**
- * Finds the index file associated with the provided SAM file. The index file must exist and be reachable to be found.
+ * Finds the index file associated with the provided SAM file. The index file must exist and be reachable to be found.
*
* @return The index for the provided SAM, or null if one was not found.
*/
@@ -21,14 +25,27 @@ public class SamFiles {
if (indexFile.isFile()) {
return indexFile;
}
+
+
+ } else if (fileName.endsWith(CramIO.CRAM_FILE_EXTENSION)) {
+ final String crai = fileName.substring(0, fileName.length() - CramIO.CRAM_FILE_EXTENSION.length()) + CRAIIndex.CRAI_INDEX_SUFFIX;
+ indexFile = new File(samFile.getParent(), crai);
+ if (indexFile.isFile()) {
+ return indexFile;
+ }
+
+ indexFile = new File(samFile.getParent(), samFile.getName() + CRAIIndex.CRAI_INDEX_SUFFIX);
+ if (indexFile.isFile()) {
+ return indexFile;
+ }
}
// If foo.bai doesn't exist look for foo.bam.bai
indexFile = new File(samFile.getParent(), samFile.getName() + BAMIndex.BAMIndexSuffix);
if (indexFile.isFile()) {
return indexFile;
- } else {
- return null;
}
+
+ return null;
}
}
diff --git a/src/java/htsjdk/samtools/SamIndexes.java b/src/java/htsjdk/samtools/SamIndexes.java
new file mode 100644
index 0000000..a888811
--- /dev/null
+++ b/src/java/htsjdk/samtools/SamIndexes.java
@@ -0,0 +1,94 @@
+package htsjdk.samtools;
+
+import htsjdk.samtools.cram.CRAIIndex;
+import htsjdk.samtools.seekablestream.SeekableBufferedStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+
+/**
+ * A helper class to read BAI and CRAI indexes. Main goal is to provide BAI stream as a sort of common API for all index types.
+ * <p/>
+ * Created by vadim on 14/08/2015.
+ */
+public enum SamIndexes {
+ BAI(BAMIndex.BAMIndexSuffix, "BAI\1".getBytes()),
+ // CRAI is gzipped text, so it's magic is same as {@link java.util.zip.GZIPInputStream.GZIP_MAGIC}
+ CRAI(CRAIIndex.CRAI_INDEX_SUFFIX, new byte[]{(byte) 0x1f, (byte) 0x8b});
+
+ public final String fileNameSuffix;
+ public final byte[] magic;
+
+ SamIndexes(final String fileNameSuffix, final byte[] magic) {
+ this.fileNameSuffix = fileNameSuffix;
+ this.magic = magic;
+ }
+
+ public static InputStream openIndexFileAsBaiOrNull(final File file, final SAMSequenceDictionary dictionary) throws IOException {
+ return openIndexUrlAsBaiOrNull(file.toURI().toURL(), dictionary);
+ }
+
+ public static InputStream openIndexUrlAsBaiOrNull(final URL url, final SAMSequenceDictionary dictionary) throws IOException {
+ if (url.getFile().toLowerCase().endsWith(BAI.fileNameSuffix.toLowerCase())) {
+ return url.openStream();
+ }
+ if (url.getFile().toLowerCase().endsWith(CRAI.fileNameSuffix.toLowerCase())) {
+ return CRAIIndex.openCraiFileAsBaiStream(url.openStream(), dictionary);
+ }
+
+ return null;
+ }
+
+ public static InputStream asBaiStreamOrNull(final InputStream inputStream, final SAMSequenceDictionary dictionary) throws IOException {
+ final BufferedInputStream bis = new BufferedInputStream(inputStream);
+ bis.mark(BAI.magic.length);
+ if (doesStreamStartWith(bis, BAI.magic)) {
+ bis.reset();
+ return bis;
+ } else {
+ bis.reset();
+ }
+
+ bis.mark(CRAI.magic.length);
+ if (doesStreamStartWith(bis, CRAI.magic)) {
+ bis.reset();
+ return CRAIIndex.openCraiFileAsBaiStream(bis, dictionary);
+ } else {
+ bis.reset();
+ }
+
+ return null;
+ }
+
+ public static SeekableStream asBaiSeekableStreamOrNull(final SeekableStream inputStream, final SAMSequenceDictionary dictionary) throws IOException {
+ final SeekableBufferedStream bis = new SeekableBufferedStream(inputStream);
+ bis.seek(0);
+ if (doesStreamStartWith(bis, BAI.magic)) {
+ bis.seek(0);
+ return bis;
+ }
+
+ bis.seek(0);
+ if (doesStreamStartWith(bis, CRAI.magic)) {
+ bis.seek(0);
+ return CRAIIndex.openCraiFileAsBaiStream(bis, dictionary);
+ } else {
+ bis.reset();
+ }
+
+ return null;
+ }
+
+ private static boolean doesStreamStartWith(final InputStream is, final byte[] bytes) throws IOException {
+ for (final byte b : bytes) {
+ if (is.read() != (0xFF & b)) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
diff --git a/src/java/htsjdk/samtools/SamInputResource.java b/src/java/htsjdk/samtools/SamInputResource.java
index 03b1ee3..2692c6e 100644
--- a/src/java/htsjdk/samtools/SamInputResource.java
+++ b/src/java/htsjdk/samtools/SamInputResource.java
@@ -1,10 +1,9 @@
package htsjdk.samtools;
-import htsjdk.samtools.seekablestream.SeekableFTPStream;
import htsjdk.samtools.seekablestream.SeekableFileStream;
-import htsjdk.samtools.seekablestream.SeekableHTTPStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.seekablestream.SeekableStreamFactory;
+import htsjdk.samtools.sra.SRAAccession;
import htsjdk.samtools.util.Lazy;
import htsjdk.samtools.util.RuntimeIOException;
@@ -69,6 +68,8 @@ public class SamInputResource {
/** Creates a {@link SamInputResource} reading from the provided resource, with no index. */
public static SamInputResource of(final SeekableStream seekableStream) { return new SamInputResource(new SeekableStreamInputResource(seekableStream)); }
+ public static SamInputResource of(final SRAAccession acc) { return new SamInputResource(new SRAInputResource(acc)); }
+
/** Creates a {@link SamInputResource} from a string specifying *either* a url or a file path */
public static SamInputResource of(final String string) {
try {
@@ -115,7 +116,7 @@ abstract class InputResource {
protected InputResource(final Type type) {this.type = type;}
enum Type {
- FILE, URL, SEEKABLE_STREAM, INPUT_STREAM
+ FILE, URL, SEEKABLE_STREAM, INPUT_STREAM, SRA_ACCESSION
}
private final Type type;
@@ -136,6 +137,9 @@ abstract class InputResource {
/** All resource types support {@link java.io.InputStream} generation. */
abstract InputStream asUnbufferedInputStream();
+ /** SRA archive resource */
+ abstract SRAAccession asSRAAccession();
+
@Override
public String toString() {
final String childToString;
@@ -152,6 +156,9 @@ abstract class InputResource {
case URL:
childToString = asUrl().toString();
break;
+ case SRA_ACCESSION:
+ childToString = asSRAAccession().toString();
+ break;
default:
throw new IllegalStateException();
}
@@ -198,6 +205,11 @@ class FileInputResource extends InputResource {
public InputStream asUnbufferedInputStream() {
return asUnbufferedSeekableStream();
}
+
+ @Override
+ public SRAAccession asSRAAccession() {
+ return null;
+ }
}
class UrlInputResource extends InputResource {
@@ -235,6 +247,11 @@ class UrlInputResource extends InputResource {
public InputStream asUnbufferedInputStream() {
return asUnbufferedSeekableStream();
}
+
+ @Override
+ public SRAAccession asSRAAccession() {
+ return null;
+ }
}
class SeekableStreamInputResource extends InputResource {
@@ -265,6 +282,11 @@ class SeekableStreamInputResource extends InputResource {
InputStream asUnbufferedInputStream() {
return asUnbufferedSeekableStream();
}
+
+ @Override
+ public SRAAccession asSRAAccession() {
+ return null;
+ }
}
class InputStreamInputResource extends InputResource {
@@ -295,4 +317,44 @@ class InputStreamInputResource extends InputResource {
InputStream asUnbufferedInputStream() {
return inputStreamResource;
}
+
+ @Override
+ public SRAAccession asSRAAccession() {
+ return null;
+ }
}
+
+class SRAInputResource extends InputResource {
+
+ final SRAAccession accession;
+
+ SRAInputResource(final SRAAccession accession) {
+ super(Type.SRA_ACCESSION);
+ this.accession = accession;
+ }
+
+ @Override
+ File asFile() {
+ return null;
+ }
+
+ @Override
+ URL asUrl() {
+ return null;
+ }
+
+ @Override
+ SeekableStream asUnbufferedSeekableStream() {
+ return null;
+ }
+
+ @Override
+ InputStream asUnbufferedInputStream() {
+ return null;
+ }
+
+ @Override
+ public SRAAccession asSRAAccession() {
+ return accession;
+ }
+}
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/SamPairUtil.java b/src/java/htsjdk/samtools/SamPairUtil.java
index 5c3ea99..5daf6e6 100644
--- a/src/java/htsjdk/samtools/SamPairUtil.java
+++ b/src/java/htsjdk/samtools/SamPairUtil.java
@@ -25,9 +25,7 @@
package htsjdk.samtools;
import htsjdk.samtools.util.PeekableIterator;
-import htsjdk.samtools.util.ProgressLogger;
-import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
@@ -52,9 +50,9 @@ public class SamPairUtil {
{
FR, // ( 5' --F--> <--R-- 5' ) - aka. innie
RF, // ( <--R-- 5' 5' --F--> ) - aka. outie
- TANDEM; // ( 5' --F--> 5' --F--> or ( <--R-- 5' <--R-- 5' )
+ TANDEM // ( 5' --F--> 5' --F--> or ( <--R-- 5' <--R-- 5' )
- };
+ }
/**
@@ -64,7 +62,7 @@ public class SamPairUtil {
* @throws IllegalArgumentException If the record is not a paired read, or
* one or both reads are unmapped.
*/
- public static PairOrientation getPairOrientation(SAMRecord r)
+ public static PairOrientation getPairOrientation(final SAMRecord r)
{
final boolean readIsOnReverseStrand = r.getReadNegativeStrandFlag();
@@ -185,8 +183,8 @@ public class SamPairUtil {
/**
* Write the mate info for two SAMRecords
- * @param rec1 the first SAM record
- * @param rec2 the second SAM record
+ * @param rec1 the first SAM record. Must have a non-null SAMFileHeader.
+ * @param rec2 the second SAM record. Must have a non-null SAMFileHeader.
* @param setMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present.
*/
public static void setMateInfo(final SAMRecord rec1, final SAMRecord rec2, final boolean setMateCigar) {
@@ -361,6 +359,7 @@ public class SamPairUtil {
private final Queue<SAMRecord> records = new LinkedList<SAMRecord>();
private final boolean setMateCigar;
+ private final boolean ignoreMissingMates;
private long numMateCigarsAdded = 0;
/**
@@ -376,8 +375,18 @@ public class SamPairUtil {
* @param setMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present.
*/
public SetMateInfoIterator(final Iterator<SAMRecord> iterator, final boolean setMateCigar) {
+ this(iterator, setMateCigar, false);
+ }
+
+ /**
+ * @param iterator the iterator to wrap
+ * @param setMateCigar true if we are to update/create the Mate CIGAR (MC) optional tag, false if we are to clear any mate cigar tag that is present.
+ * @param ignoreMissingMates set this to true if we are to ignore missing mates, otherwise an exception will be thrown when a missing mate is encountered
+ */
+ public SetMateInfoIterator(final Iterator<SAMRecord> iterator, final boolean setMateCigar, final boolean ignoreMissingMates) {
super(iterator);
this.setMateCigar = setMateCigar;
+ this.ignoreMissingMates = ignoreMissingMates;
}
/**
@@ -414,8 +423,7 @@ public class SamPairUtil {
throw new SAMException("Found two records that are paired, not supplementary, and first of the pair");
}
firstPrimaryRecord = record;
- }
- else if (record.getSecondOfPairFlag()) {
+ } else if (record.getSecondOfPairFlag()) {
if (null != secondPrimaryRecord) {
throw new SAMException("Found two records that are paired, not supplementary, and second of the pair");
}
@@ -426,27 +434,34 @@ public class SamPairUtil {
}
records.add(record);
}
+ // TODO: should we check that we do not have a mix of paired and fragment reads?
+
// we must find both records to update the mate info
if (null != firstPrimaryRecord && null != secondPrimaryRecord) {
// Update mate info
SamPairUtil.setMateInfo(firstPrimaryRecord, secondPrimaryRecord, this.setMateCigar);
if (this.setMateCigar) this.numMateCigarsAdded += 2;
- }
- // Set mate information on supplemental records
- if (containsSupplementalRecord) {
- for (final SAMRecord record : records) {
- if (record.getReadPairedFlag() && record.getSupplementaryAlignmentFlag()) {
- if (record.getFirstOfPairFlag()) {
- SamPairUtil.setMateInformationOnSupplementalAlignment(record, secondPrimaryRecord, this.setMateCigar);
- }
- else {
- SamPairUtil.setMateInformationOnSupplementalAlignment(record, firstPrimaryRecord, this.setMateCigar);
+ // Set mate information on supplemental records
+ if (containsSupplementalRecord) {
+ for (final SAMRecord record : records) {
+ if (record.getReadPairedFlag() && record.getSupplementaryAlignmentFlag()) {
+ if (record.getFirstOfPairFlag()) {
+ SamPairUtil.setMateInformationOnSupplementalAlignment(record, secondPrimaryRecord, this.setMateCigar);
+ } else {
+ SamPairUtil.setMateInformationOnSupplementalAlignment(record, firstPrimaryRecord, this.setMateCigar);
+ }
+ this.numMateCigarsAdded++;
}
- this.numMateCigarsAdded++;
}
}
+ } else if (!this.ignoreMissingMates) {
+ if (null != firstPrimaryRecord && firstPrimaryRecord.getReadPairedFlag()) {
+ throw new SAMException("Missing second read of pair: " + firstPrimaryRecord.getReadName());
+ } else if (null != secondPrimaryRecord && secondPrimaryRecord.getReadPairedFlag()) {
+ throw new SAMException("Missing first read of pair: " + secondPrimaryRecord.getReadName());
+ }
}
}
diff --git a/src/java/htsjdk/samtools/SamReader.java b/src/java/htsjdk/samtools/SamReader.java
index 9ded4a3..9493593 100644
--- a/src/java/htsjdk/samtools/SamReader.java
+++ b/src/java/htsjdk/samtools/SamReader.java
@@ -57,6 +57,7 @@ public interface SamReader extends Iterable<SAMRecord>, Closeable {
}
}
+ public static Type SRA_TYPE = new TypeImpl("SRA", "sra", null);
public static Type CRAM_TYPE = new TypeImpl("CRAM", "cram", "crai");
public static Type BAM_TYPE = new TypeImpl("BAM", "bam", "bai");
public static Type SAM_TYPE = new TypeImpl("SAM", "sam", null);
diff --git a/src/java/htsjdk/samtools/SamReaderFactory.java b/src/java/htsjdk/samtools/SamReaderFactory.java
index 4ab92af..5403379 100644
--- a/src/java/htsjdk/samtools/SamReaderFactory.java
+++ b/src/java/htsjdk/samtools/SamReaderFactory.java
@@ -9,6 +9,7 @@ import java.util.zip.GZIPInputStream;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.seekablestream.SeekableStream;
+import htsjdk.samtools.sra.SRAAccession;
import htsjdk.samtools.util.BlockCompressedInputStream;
import htsjdk.samtools.util.BlockCompressedStreamConstants;
import htsjdk.samtools.util.CloserUtil;
@@ -241,6 +242,8 @@ public abstract class SamReaderFactory {
} else {
throw new SAMFormatException("Unrecognized file format: " + data.asUnbufferedSeekableStream());
}
+ } else if (type == InputResource.Type.SRA_ACCESSION) {
+ primitiveSamReader = new SRAFileReader(data.asSRAAccession());
} else {
InputStream bufferedStream =
IOUtil.maybeBufferInputStream(
@@ -262,19 +265,18 @@ public abstract class SamReaderFactory {
} else if (SamStreams.isGzippedSAMFile(bufferedStream)) {
primitiveSamReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency, this.samRecordFactory);
} else if (SamStreams.isCRAMFile(bufferedStream)) {
+ if (referenceSource == null && Defaults.REFERENCE_FASTA != null) referenceSource = new ReferenceSource(Defaults.REFERENCE_FASTA);
if (sourceFile == null || !sourceFile.isFile()) {
- sourceFile = null;
+ primitiveSamReader = new CRAMFileReader(bufferedStream, indexFile, referenceSource, validationStringency);
} else {
bufferedStream.close();
- bufferedStream = null;
+ primitiveSamReader = new CRAMFileReader(sourceFile, indexFile, referenceSource, validationStringency);
}
-
- // Always attempt to pass in the index. If it is null, that's fine. If the reference isn't supplied, use the default.
- if (referenceSource != null) {
- primitiveSamReader = new CRAMFileReader(sourceFile, indexMaybe == null ? null : indexMaybe.asFile(), referenceSource);
- } else {
- primitiveSamReader = new CRAMFileReader(sourceFile, indexMaybe == null ? null : indexMaybe.asFile(), new ReferenceSource(Defaults.REFERENCE_FASTA));
+ } else if (sourceFile != null && SRAAccession.isValid(sourceFile.getPath())) {
+ if (bufferedStream != null) {
+ bufferedStream.close();
}
+ primitiveSamReader = new SRAFileReader(new SRAAccession(sourceFile.getPath()));
} else {
if (indexDefined) {
bufferedStream.close();
@@ -325,6 +327,11 @@ public abstract class SamReaderFactory {
void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) {
underlyingReader.enableFileSource(reader, true);
}
+
+ @Override
+ void applyTo(final SRAFileReader underlyingReader, final SamReader reader) {
+ underlyingReader.enableFileSource(reader, true);
+ }
},
/**
@@ -349,6 +356,11 @@ public abstract class SamReaderFactory {
void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) {
underlyingReader.enableIndexCaching(true);
}
+
+ @Override
+ void applyTo(final SRAFileReader underlyingReader, final SamReader reader) {
+ underlyingReader.enableIndexCaching(true);
+ }
},
/**
@@ -373,6 +385,11 @@ public abstract class SamReaderFactory {
void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) {
underlyingReader.enableIndexMemoryMapping(false);
}
+
+ @Override
+ void applyTo(final SRAFileReader underlyingReader, final SamReader reader) {
+ underlyingReader.enableIndexMemoryMapping(false);
+ }
},
/**
@@ -394,6 +411,11 @@ public abstract class SamReaderFactory {
void applyTo(final CRAMFileReader underlyingReader, final SamReader reader) {
logDebugIgnoringOption(reader, this);
}
+
+ @Override
+ void applyTo(final SRAFileReader underlyingReader, final SamReader reader) {
+ logDebugIgnoringOption(reader, this);
+ }
},
/**
@@ -416,6 +438,11 @@ public abstract class SamReaderFactory {
logDebugIgnoringOption(reader, this);
}
+ @Override
+ void applyTo(final SRAFileReader underlyingReader, final SamReader reader) {
+ logDebugIgnoringOption(reader, this);
+ }
+
};
public static EnumSet<Option> DEFAULTS = EnumSet.noneOf(Option.class);
@@ -429,6 +456,8 @@ public abstract class SamReaderFactory {
applyTo((SAMTextReader) underlyingReader, reader);
} else if (underlyingReader instanceof CRAMFileReader) {
applyTo((CRAMFileReader) underlyingReader, reader);
+ } else if (underlyingReader instanceof SRAFileReader) {
+ applyTo((SRAFileReader) underlyingReader, reader);
} else {
throw new IllegalArgumentException(String.format("Unrecognized reader type: %s.", underlyingReader.getClass()));
}
@@ -446,5 +475,7 @@ public abstract class SamReaderFactory {
abstract void applyTo(final SAMTextReader underlyingReader, final SamReader reader);
abstract void applyTo(final CRAMFileReader underlyingReader, final SamReader reader);
+
+ abstract void applyTo(final SRAFileReader underlyingReader, final SamReader reader);
}
}
diff --git a/src/java/htsjdk/samtools/SamStreams.java b/src/java/htsjdk/samtools/SamStreams.java
index cea099d..173eb7c 100644
--- a/src/java/htsjdk/samtools/SamStreams.java
+++ b/src/java/htsjdk/samtools/SamStreams.java
@@ -39,6 +39,7 @@ public class SamStreams {
return Arrays.equals(buffer, CramHeader.MAGIC);
}
+
/**
* @param stream stream.markSupported() must be true
* @return true if this looks like a BAM file.
diff --git a/src/java/htsjdk/samtools/TextTagCodec.java b/src/java/htsjdk/samtools/TextTagCodec.java
index 109555d..ceec61a 100644
--- a/src/java/htsjdk/samtools/TextTagCodec.java
+++ b/src/java/htsjdk/samtools/TextTagCodec.java
@@ -23,6 +23,7 @@
*/
package htsjdk.samtools;
+import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.DateParser;
import htsjdk.samtools.util.Iso8601Date;
import htsjdk.samtools.util.StringUtil;
@@ -72,8 +73,9 @@ public class TextTagCodec {
value = getArrayType(value, false) + "," + encodeArrayValue(value);
} else if (tagType == 'i') {
final long longVal = ((Number) value).longValue();
- if (longVal > Integer.MAX_VALUE || longVal < Integer.MIN_VALUE) {
- throw new SAMFormatException("Value for tag " + tagName + " cannot be stored in an Integer: " + longVal);
+ // as the spec says: [-2^31, 2^32)
+ if (longVal < Integer.MIN_VALUE || longVal > BinaryCodec.MAX_UINT) {
+ throw new IllegalArgumentException("Value for tag " + tagName + " cannot be stored in either a signed or unsigned 32-bit integer: " + longVal);
}
}
sb.append(tagType);
@@ -182,11 +184,22 @@ public class TextTagCodec {
}
return stringVal.charAt(0);
} else if (type.equals("i")) {
+ final long lValue;
try {
- return new Integer(stringVal);
+ lValue = Long.valueOf(stringVal);
} catch (NumberFormatException e) {
throw new SAMFormatException("Tag of type i should have signed decimal value");
}
+
+ if (lValue >= Integer.MIN_VALUE && lValue <= Integer.MAX_VALUE) {
+ return (int) lValue;
+ }
+ else if (SAMUtils.isValidUnsignedIntegerAttribute(lValue)) {
+ return lValue;
+ }
+ else {
+ throw new SAMFormatException("Integer is out of range for both a 32-bit signed and unsigned integer: " + stringVal);
+ }
} else if (type.equals("f")) {
try {
return new Float(stringVal);
diff --git a/src/java/htsjdk/samtools/cram/CRAIEntry.java b/src/java/htsjdk/samtools/cram/CRAIEntry.java
new file mode 100644
index 0000000..0c7da6e
--- /dev/null
+++ b/src/java/htsjdk/samtools/cram/CRAIEntry.java
@@ -0,0 +1,148 @@
+package htsjdk.samtools.cram;
+
+import htsjdk.samtools.cram.structure.Container;
+import htsjdk.samtools.cram.structure.Slice;
+
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.List;
+
+/**
+ * A class representing CRAI index entry: file and alignment offsets for each slice.
+ * Created by vadim on 10/08/2015.
+ */
+public class CRAIEntry implements Comparable<CRAIEntry>, Cloneable {
+ public int sequenceId;
+ public int alignmentStart;
+ public int alignmentSpan;
+ public long containerStartOffset;
+ public int sliceOffset;
+ public int sliceSize;
+ public int sliceIndex;
+
+ public CRAIEntry() {
+ }
+
+ public static List<CRAIEntry> fromContainer(final Container container) {
+ final List<CRAIEntry> entries = new ArrayList<CRAIEntry>(container.slices.length);
+ for (int i = 0; i < container.slices.length; i++) {
+ final Slice s = container.slices[i];
+ final CRAIEntry e = new CRAIEntry();
+ e.sequenceId = s.sequenceId;
+ e.alignmentStart = s.alignmentStart;
+ e.alignmentSpan = s.alignmentSpan;
+ e.containerStartOffset = s.containerOffset;
+ e.sliceOffset = container.landmarks[i];
+ e.sliceSize = s.size;
+
+ e.sliceIndex = i;
+ entries.add(e);
+ }
+ return entries;
+ }
+
+ public static CRAIEntry fromCraiLine(final String line) {
+ return new CRAIEntry(line);
+ }
+
+ public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException {
+ final String[] chunks = line.split("\t");
+ if (chunks.length != 6) {
+ throw new CRAIIndex.CRAIIndexException("Expecting 6 columns but got " + chunks.length);
+ }
+
+ try {
+ sequenceId = Integer.parseInt(chunks[0]);
+ alignmentStart = Integer.parseInt(chunks[1]);
+ alignmentSpan = Integer.parseInt(chunks[2]);
+ containerStartOffset = Long.parseLong(chunks[3]);
+ sliceOffset = Integer.parseInt(chunks[4]);
+ sliceSize = Integer.parseInt(chunks[5]);
+ } catch (final NumberFormatException e) {
+ throw new CRAIIndex.CRAIIndexException(e);
+ }
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan,
+ containerStartOffset, sliceOffset, sliceSize);
+ }
+
+ @Override
+ public int compareTo(final CRAIEntry o) {
+ if (o == null) {
+ return 1;
+ }
+ if (sequenceId != o.sequenceId) {
+ return sequenceId - o.sequenceId;
+ }
+ if (alignmentStart != o.alignmentStart) {
+ return alignmentStart - o.alignmentStart;
+ }
+
+ return (int) (containerStartOffset - o.containerStartOffset);
+ }
+
+ @Override
+ public CRAIEntry clone() throws CloneNotSupportedException {
+ super.clone();
+ final CRAIEntry entry = new CRAIEntry();
+ entry.sequenceId = sequenceId;
+ entry.alignmentStart = alignmentStart;
+ entry.alignmentSpan = alignmentSpan;
+ entry.containerStartOffset = containerStartOffset;
+ entry.sliceOffset = sliceOffset;
+ entry.sliceSize = sliceSize;
+ return entry;
+ }
+
+ public static Comparator<CRAIEntry> byEnd = new Comparator<CRAIEntry>() {
+
+ @Override
+ public int compare(final CRAIEntry o1, final CRAIEntry o2) {
+ if (o1.sequenceId != o2.sequenceId) {
+ return o2.sequenceId - o1.sequenceId;
+ }
+ if (o1.alignmentStart + o1.alignmentSpan != o2.alignmentStart + o2.alignmentSpan) {
+ return o1.alignmentStart + o1.alignmentSpan - o2.alignmentStart - o2.alignmentSpan;
+ }
+
+ return (int) (o1.containerStartOffset - o2.containerStartOffset);
+ }
+ };
+
+ public static final Comparator<CRAIEntry> byStart = new Comparator<CRAIEntry>() {
+
+ @Override
+ public int compare(final CRAIEntry o1, final CRAIEntry o2) {
+ if (o1.sequenceId != o2.sequenceId) {
+ return o2.sequenceId - o1.sequenceId;
+ }
+ if (o1.alignmentStart != o2.alignmentStart) {
+ return o1.alignmentStart - o2.alignmentStart;
+ }
+
+ return (int) (o1.containerStartOffset - o2.containerStartOffset);
+ }
+ };
+
+
+ public static boolean intersect(final CRAIEntry e0, final CRAIEntry e1) {
+ if (e0.sequenceId != e1.sequenceId) {
+ return false;
+ }
+ if (e0.sequenceId < 0) {
+ return false;
+ }
+
+ final int a0 = e0.alignmentStart;
+ final int a1 = e1.alignmentStart;
+
+ final int b0 = a0 + e0.alignmentSpan;
+ final int b1 = a1 + e1.alignmentSpan;
+
+ return Math.abs(a0 + b0 - a1 - b1) < (e0.alignmentSpan + e1.alignmentSpan);
+
+ }
+}
diff --git a/src/java/htsjdk/samtools/cram/CRAIIndex.java b/src/java/htsjdk/samtools/cram/CRAIIndex.java
new file mode 100644
index 0000000..0a3f567
--- /dev/null
+++ b/src/java/htsjdk/samtools/cram/CRAIIndex.java
@@ -0,0 +1,164 @@
+package htsjdk.samtools.cram;
+
+import htsjdk.samtools.CRAMIndexer;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.cram.structure.Slice;
+import htsjdk.samtools.seekablestream.SeekableMemoryStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Scanner;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * A collection of static methods to read, write and convert CRAI index.
+ */
+public class CRAIIndex {
+ public static final String CRAI_INDEX_SUFFIX = ".crai";
+
+ public static void writeIndex(final OutputStream os, final List<CRAIEntry> index) throws IOException {
+ for (final CRAIEntry e : index) {
+ os.write(e.toString().getBytes());
+ os.write('\n');
+ }
+ }
+
+ public static List<CRAIEntry> readIndex(final InputStream is) throws CRAIIndexException {
+ final List<CRAIEntry> list = new LinkedList<CRAIEntry>();
+ final Scanner scanner = new Scanner(is);
+
+ try {
+ while (scanner.hasNextLine()) {
+ final String line = scanner.nextLine();
+ final CRAIEntry entry = CRAIEntry.fromCraiLine(line);
+ list.add(entry);
+ }
+ } finally {
+ scanner.close();
+ }
+
+ return list;
+ }
+
+ public static List<CRAIEntry> find(final List<CRAIEntry> list, final int seqId, final int start, final int span) {
+ final boolean whole = start < 1 || span < 1;
+ final CRAIEntry query = new CRAIEntry();
+ query.sequenceId = seqId;
+ query.alignmentStart = start < 1 ? 1 : start;
+ query.alignmentSpan = span < 1 ? Integer.MAX_VALUE : span;
+ query.containerStartOffset = Long.MAX_VALUE;
+ query.sliceOffset = Integer.MAX_VALUE;
+ query.sliceSize = Integer.MAX_VALUE;
+
+ final List<CRAIEntry> l = new ArrayList<CRAIEntry>();
+ for (final CRAIEntry e : list) {
+ if (e.sequenceId != seqId) {
+ continue;
+ }
+ if (whole || CRAIEntry.intersect(e, query)) {
+ l.add(e);
+ }
+ }
+ Collections.sort(l, CRAIEntry.byStart);
+ return l;
+ }
+
+ public static CRAIEntry getLeftmost(final List<CRAIEntry> list) {
+ if (list == null || list.isEmpty()) {
+ return null;
+ }
+ CRAIEntry left = list.get(0);
+
+ for (final CRAIEntry e : list) {
+ if (e.alignmentStart < left.alignmentStart) {
+ left = e;
+ }
+ }
+
+ return left;
+ }
+
+ /**
+ * Find index of the last aligned entry in the list. Assumes the index is sorted by coordinate and unmapped entries (with sequence id = -1) follow the mapped entries.
+ *
+ * @param list a list of CRAI entries
+ * @return integer index of the last entry with sequence id not equal to -1
+ */
+ public static int findLastAlignedEntry(final List<CRAIEntry> list) {
+ if (list.isEmpty()) {
+ return -1;
+ }
+
+ int low = 0;
+ int high = list.size() - 1;
+
+ while (low <= high) {
+ final int mid = (low + high) >>> 1;
+ final CRAIEntry midVal = list.get(mid);
+
+ if (midVal.sequenceId >= 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+ if (low >= list.size()) {
+ return list.size() - 1;
+ }
+ for (; low >= 0 && list.get(low).sequenceId == -1; low--) {
+ }
+ return low;
+ }
+
+ public static SeekableStream openCraiFileAsBaiStream(final File cramIndexFile, final SAMSequenceDictionary dictionary) throws IOException {
+ return openCraiFileAsBaiStream(new FileInputStream(cramIndexFile), dictionary);
+ }
+
+ public static SeekableStream openCraiFileAsBaiStream(final InputStream indexStream, final SAMSequenceDictionary dictionary) throws IOException, CRAIIndexException {
+ final List<CRAIEntry> full = CRAIIndex.readIndex(new GZIPInputStream(indexStream));
+ Collections.sort(full);
+
+ final SAMFileHeader header = new SAMFileHeader();
+ header.setSequenceDictionary(dictionary);
+
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final CRAMIndexer indexer = new CRAMIndexer(baos, header);
+
+ for (final CRAIEntry entry : full) {
+ final Slice slice = new Slice();
+ slice.containerOffset = entry.containerStartOffset;
+ slice.alignmentStart = entry.alignmentStart;
+ slice.alignmentSpan = entry.alignmentSpan;
+ slice.sequenceId = entry.sequenceId;
+ slice.nofRecords = entry.sliceSize;
+ slice.index = entry.sliceIndex;
+ slice.offset = entry.sliceOffset;
+
+ indexer.processAlignment(slice);
+ }
+ indexer.finish();
+
+ return new SeekableMemoryStream(baos.toByteArray(), null);
+ }
+
+ public static class CRAIIndexException extends RuntimeException {
+
+ public CRAIIndexException(final String s) {
+ super(s);
+ }
+
+ public CRAIIndexException(final NumberFormatException e) {
+ super(e);
+ }
+ }
+}
diff --git a/src/java/htsjdk/samtools/cram/CRAMException.java b/src/java/htsjdk/samtools/cram/CRAMException.java
new file mode 100644
index 0000000..7d5fb72
--- /dev/null
+++ b/src/java/htsjdk/samtools/cram/CRAMException.java
@@ -0,0 +1,22 @@
+package htsjdk.samtools.cram;
+
+import htsjdk.samtools.SAMException;
+
+/**
+ * Created by edwardk on 8/13/15.
+ */
+public class CRAMException extends SAMException {
+ public CRAMException() {}
+
+ public CRAMException(final String s) {
+ super(s);
+ }
+
+ public CRAMException(final String s, final Throwable throwable) {
+ super(s, throwable);
+ }
+
+ public CRAMException(final Throwable throwable) {
+ super(throwable);
+ }
+}
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/cram/build/ContainerParser.java b/src/java/htsjdk/samtools/cram/build/ContainerParser.java
index 5cef35e..002502e 100644
--- a/src/java/htsjdk/samtools/cram/build/ContainerParser.java
+++ b/src/java/htsjdk/samtools/cram/build/ContainerParser.java
@@ -20,6 +20,7 @@ package htsjdk.samtools.cram.build;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.cram.encoding.reader.CramRecordReader;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory.DataReaderWithStats;
@@ -50,14 +51,14 @@ public class ContainerParser {
}
public List<CramCompressionRecord> getRecords(final Container container,
- ArrayList<CramCompressionRecord> records) throws IllegalArgumentException,
+ ArrayList<CramCompressionRecord> records, ValidationStringency validationStringency) throws IllegalArgumentException,
IllegalAccessException {
final long time1 = System.nanoTime();
if (records == null)
records = new ArrayList<CramCompressionRecord>(container.nofRecords);
for (final Slice slice : container.slices)
- records.addAll(getRecords(slice, container.header));
+ records.addAll(getRecords(slice, container.header, validationStringency));
final long time2 = System.nanoTime();
@@ -73,7 +74,7 @@ public class ContainerParser {
}
ArrayList<CramCompressionRecord> getRecords(ArrayList<CramCompressionRecord> records,
- final Slice slice, final CompressionHeader header) throws IllegalArgumentException,
+ final Slice slice, final CompressionHeader header, ValidationStringency validationStringency) throws IllegalArgumentException,
IllegalAccessException {
String seqName = SAMRecord.NO_ALIGNMENT_REFERENCE_NAME;
switch (slice.sequenceId) {
@@ -97,7 +98,7 @@ public class ContainerParser {
}
long time;
- final CramRecordReader reader = new CramRecordReader();
+ final CramRecordReader reader = new CramRecordReader(validationStringency);
dataReaderFactory.buildReader(reader, new DefaultBitInputStream(
new ByteArrayInputStream(slice.coreBlock.getRawContent())),
inputMap, header, slice.sequenceId);
@@ -150,8 +151,8 @@ public class ContainerParser {
return records;
}
- List<CramCompressionRecord> getRecords(final Slice slice, final CompressionHeader header)
+ List<CramCompressionRecord> getRecords(final Slice slice, final CompressionHeader header, ValidationStringency validationStringency)
throws IllegalArgumentException, IllegalAccessException {
- return getRecords(null, slice, header);
+ return getRecords(null, slice, header, validationStringency);
}
}
diff --git a/src/java/htsjdk/samtools/cram/build/CramIO.java b/src/java/htsjdk/samtools/cram/build/CramIO.java
index 6667ce5..4a08016 100644
--- a/src/java/htsjdk/samtools/cram/build/CramIO.java
+++ b/src/java/htsjdk/samtools/cram/build/CramIO.java
@@ -52,6 +52,7 @@ import java.util.Arrays;
* A collection of methods to open and close CRAM files.
*/
public class CramIO {
+ public static final String CRAM_FILE_EXTENSION = ".cram";
/**
* The 'zero-B' EOF marker as per CRAM specs v2.1. This is basically a serialized empty CRAM container with sequence id set to some
* number to spell out 'EOF' in hex.
@@ -101,13 +102,14 @@ public class CramIO {
}
private static boolean streamEndsWith(final SeekableStream seekableStream, final byte[] marker) throws IOException {
- final byte[] tail = new byte[ZERO_B_EOF_MARKER.length];
+ final byte[] tail = new byte[marker.length];
seekableStream.seek(seekableStream.length() - marker.length);
InputStreamUtils.readFully(seekableStream, tail, 0, tail.length);
+ if (Arrays.equals(tail, marker)) return true ;
// relaxing the ITF8 hanging bits:
- tail[8] |= 0xf0;
+ tail[8] = marker[8];
return Arrays.equals(tail, marker);
}
@@ -122,8 +124,8 @@ public class CramIO {
@SuppressWarnings("SimplifiableIfStatement")
private static boolean checkEOF(final Version version, final SeekableStream seekableStream) throws IOException {
- if (version.compatibleWith(CramVersions.CRAM_v3)) return streamEndsWith(seekableStream, ZERO_B_EOF_MARKER);
- if (version.compatibleWith(CramVersions.CRAM_v2_1)) return streamEndsWith(seekableStream, ZERO_F_EOF_MARKER);
+ if (version.compatibleWith(CramVersions.CRAM_v3)) return streamEndsWith(seekableStream, ZERO_F_EOF_MARKER);
+ if (version.compatibleWith(CramVersions.CRAM_v2_1)) return streamEndsWith(seekableStream, ZERO_B_EOF_MARKER);
return false;
}
diff --git a/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java b/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java
index 8a0b93d..f840a5f 100644
--- a/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java
+++ b/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java
@@ -62,6 +62,8 @@ public class Sam2CramRecordFactory {
private final Version version;
private byte[] refSNPs;
+ final private SAMFileHeader header;
+
private static final Log log = Log.getInstance(Sam2CramRecordFactory.class);
private final Map<String, Integer> readGroupMap = new HashMap<String, Integer>();
@@ -88,6 +90,7 @@ public class Sam2CramRecordFactory {
public Sam2CramRecordFactory(final byte[] refBases, final SAMFileHeader samFileHeader, final Version version) {
this.refBases = refBases;
this.version = version;
+ this.header = samFileHeader;
final List<SAMReadGroupRecord> readGroups = samFileHeader.getReadGroups();
for (int i = 0; i < readGroups.size(); i++) {
@@ -96,7 +99,17 @@ public class Sam2CramRecordFactory {
}
}
+ /**
+ * Create a CramCompressionRecord.
+ *
+ * @param record If the input record does not have an associated SAMFileHeader, it will be updated
+ * with the header used for the factory in order to allow reference indices to be resolved.
+ * @return CramCompressionRecord
+ */
public CramCompressionRecord createCramRecord(final SAMRecord record) {
+ if (null == record.getHeader()) {
+ record.setHeader(header);
+ }
final CramCompressionRecord cramRecord = new CramCompressionRecord();
if (record.getReadPairedFlag()) {
cramRecord.mateAlignmentStart = record.getMateAlignmentStart();
diff --git a/src/java/htsjdk/samtools/cram/encoding/reader/CramRecordReader.java b/src/java/htsjdk/samtools/cram/encoding/reader/CramRecordReader.java
index 01b8df2..7cbd98d 100644
--- a/src/java/htsjdk/samtools/cram/encoding/reader/CramRecordReader.java
+++ b/src/java/htsjdk/samtools/cram/encoding/reader/CramRecordReader.java
@@ -17,7 +17,9 @@
*/
package htsjdk.samtools.cram.encoding.reader;
+import htsjdk.samtools.SAMFormatException;
import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.cram.encoding.readfeatures.BaseQualityScore;
import htsjdk.samtools.cram.encoding.readfeatures.Bases;
import htsjdk.samtools.cram.encoding.readfeatures.Deletion;
@@ -38,6 +40,11 @@ import java.util.LinkedList;
public class CramRecordReader extends AbstractReader {
private CramCompressionRecord prevRecord;
+ private ValidationStringency validationStringency;
+
+ public CramRecordReader(ValidationStringency validationStringency) {
+ this.validationStringency = validationStringency;
+ }
@SuppressWarnings("ConstantConditions")
public void read(final CramCompressionRecord cramRecord) {
@@ -87,7 +94,7 @@ public class CramRecordReader extends AbstractReader {
for (int i = 0; i < ids.length; i++) {
final int id = ReadTag.name3BytesToInt(ids[i]);
final DataReader<byte[]> dataReader = tagValueCodecs.get(id);
- final ReadTag tag = new ReadTag(id, dataReader.readData());
+ final ReadTag tag = new ReadTag(id, dataReader.readData(), validationStringency);
cramRecord.tags[i] = tag;
}
}
@@ -186,10 +193,19 @@ public class CramRecordReader extends AbstractReader {
recordCounter++;
prevRecord = cramRecord;
- } catch (final Exception e) {
- if (prevRecord != null)
+ }
+ catch (final SAMFormatException e) {
+ if (prevRecord != null) {
+ System.err.printf("Failed at record %d. Here is the previously read record: %s\n", recordCounter,
+ prevRecord.toString());
+ }
+ throw e;
+ }
+ catch (final Exception e) {
+ if (prevRecord != null) {
System.err.printf("Failed at record %d. Here is the previously read record: %s\n", recordCounter,
prevRecord.toString());
+ }
throw new RuntimeException(e);
}
}
diff --git a/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java b/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
index 4115a75..cf9748c 100644
--- a/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
+++ b/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
@@ -32,6 +32,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.lang.ref.WeakReference;
import java.net.URL;
+import java.nio.file.Path;
import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
import java.util.HashMap;
@@ -50,8 +51,12 @@ public class ReferenceSource {
}
public ReferenceSource(final File file) {
- if (file != null)
- rsFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(file);
+ this(file == null ? null : file.toPath());
+ }
+
+ public ReferenceSource(final Path path) {
+ if (path != null)
+ rsFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(path);
}
public ReferenceSource(final ReferenceSequenceFile rsFile) {
diff --git a/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java b/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java
index 0ba0dc4..b0b95d3 100644
--- a/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java
+++ b/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java
@@ -17,6 +17,7 @@
*/
package htsjdk.samtools.cram.structure;
+import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.cram.common.MutableInt;
import htsjdk.samtools.cram.encoding.readfeatures.Deletion;
import htsjdk.samtools.cram.encoding.readfeatures.InsertBase;
@@ -157,7 +158,10 @@ public class CramCompressionRecord {
}
void calculateAlignmentBoundaries() {
- if (readFeatures == null || readFeatures.isEmpty()) {
+ if (isSegmentUnmapped()) {
+ alignmentSpan = 0;
+ alignmentEnd = SAMRecord.NO_ALIGNMENT_START;
+ } else if (readFeatures == null || readFeatures.isEmpty()) {
alignmentSpan = readLength;
alignmentEnd = alignmentStart + alignmentSpan - 1;
} else {
diff --git a/src/java/htsjdk/samtools/cram/structure/ReadTag.java b/src/java/htsjdk/samtools/cram/structure/ReadTag.java
index dc5967a..791bf2c 100644
--- a/src/java/htsjdk/samtools/cram/structure/ReadTag.java
+++ b/src/java/htsjdk/samtools/cram/structure/ReadTag.java
@@ -19,9 +19,13 @@ package htsjdk.samtools.cram.structure;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFormatException;
+import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecord.SAMTagAndValue;
import htsjdk.samtools.SAMTagUtil;
+import htsjdk.samtools.SAMUtils;
+import htsjdk.samtools.SAMValidationError;
import htsjdk.samtools.TagValueAndUnsignedArrayFlag;
+import htsjdk.samtools.ValidationStringency;
import htsjdk.samtools.util.StringUtil;
import java.nio.ByteBuffer;
@@ -50,10 +54,10 @@ public class ReadTag implements Comparable<ReadTag> {
private short code;
private byte index;
- public ReadTag(final int id, final byte[] dataAsByteArray) {
+ public ReadTag(final int id, final byte[] dataAsByteArray, ValidationStringency validationStringency) {
this.type = (char) (0xFF & id);
key = new String(new char[]{(char) ((id >> 16) & 0xFF), (char) ((id >> 8) & 0xFF)});
- value = restoreValueFromByteArray(type, dataAsByteArray);
+ value = restoreValueFromByteArray(type, dataAsByteArray, validationStringency);
keyType3Bytes = this.key + this.type;
keyType3BytesAsInt = id;
@@ -179,10 +183,10 @@ public class ReadTag implements Comparable<ReadTag> {
return writeSingleValue((byte) type, value, false);
}
- private static Object restoreValueFromByteArray(final char type, final byte[] array) {
+ private static Object restoreValueFromByteArray(final char type, final byte[] array, ValidationStringency validationStringency) {
final ByteBuffer buffer = ByteBuffer.wrap(array);
buffer.order(ByteOrder.LITTLE_ENDIAN);
- return readSingleValue((byte) type, buffer);
+ return readSingleValue((byte) type, buffer, validationStringency);
}
// copied from net.sf.samtools.BinaryTagCodec 1.62:
@@ -216,8 +220,7 @@ public class ReadTag implements Comparable<ReadTag> {
// copied from net.sf.samtools.BinaryTagCodec:
static private char getIntegerType(final long val) {
if (val > MAX_UINT) {
- throw new IllegalArgumentException(
- "Integer attribute value too large to be encoded in BAM");
+ throw new IllegalArgumentException("Integer attribute value too large: "+val);
}
if (val > MAX_INT) {
return 'I';
@@ -288,7 +291,7 @@ public class ReadTag implements Comparable<ReadTag> {
buffer.position(buffer.position() - 4);
break;
case 'i':
- buffer.putInt((Integer) value);
+ buffer.putInt(((Number) value).intValue());
break;
case 's':
buffer.putShort(((Number) value).shortValue());
@@ -365,7 +368,7 @@ public class ReadTag implements Comparable<ReadTag> {
}
public static Object readSingleValue(final byte tagType,
- final ByteBuffer byteBuffer) {
+ final ByteBuffer byteBuffer, ValidationStringency validationStringency) {
switch (tagType) {
case 'Z':
return readNullTerminatedString(byteBuffer);
@@ -374,10 +377,15 @@ public class ReadTag implements Comparable<ReadTag> {
case 'I':
final long val = byteBuffer.getInt() & 0xffffffffL;
if (val <= Integer.MAX_VALUE) {
- return (int) val;
+ return (int)val;
+ }
+ // If it won't fit into a signed integer, but is within range for an unsigned 32-bit integer,
+ // return it directly as a long
+ if (! SAMUtils.isValidUnsignedIntegerAttribute(val)) {
+ SAMUtils.processValidationError(new SAMValidationError(SAMValidationError.Type.TAG_VALUE_TOO_LARGE,
+ "Unsigned integer is out of range for a 32-bit unsigned value: " + val, null), validationStringency);
}
- throw new RuntimeException(
- "Tag value is too large to store as signed integer.");
+ return val;
case 'i':
return byteBuffer.getInt();
case 's':
diff --git a/src/java/htsjdk/samtools/filter/FilteringIterator.java b/src/java/htsjdk/samtools/filter/FilteringIterator.java
index df7dc35..00e489f 100644
--- a/src/java/htsjdk/samtools/filter/FilteringIterator.java
+++ b/src/java/htsjdk/samtools/filter/FilteringIterator.java
@@ -142,7 +142,7 @@ public class FilteringIterator implements CloseableIterator<SAMRecord> {
}
} else if (filterReadPairs && record.getReadPairedFlag() &&
record.getSecondOfPairFlag()) {
- // assume that we did a filterOut(first, second) and it passed the filter
+ // assume that we did a pass(first, second) and it passed the filter
return record;
} else if (!filter.filterOut(record)) {
return record;
diff --git a/src/java/htsjdk/samtools/filter/IntervalFilter.java b/src/java/htsjdk/samtools/filter/IntervalFilter.java
index e8647f9..ee3de6d 100644
--- a/src/java/htsjdk/samtools/filter/IntervalFilter.java
+++ b/src/java/htsjdk/samtools/filter/IntervalFilter.java
@@ -33,7 +33,7 @@ import java.util.List;
/**
* Filter SAMRecords so that only those that overlap the given list of intervals.
- * It is required that the SAMRecords are passed in coordinate order
+ * It is required that the SAMRecords are passed in coordinate order, and have non-null SAMFileHeaders.
*
* $Id$
*
diff --git a/src/java/htsjdk/samtools/filter/OverclippedReadFilter.java b/src/java/htsjdk/samtools/filter/OverclippedReadFilter.java
new file mode 100644
index 0000000..2e8f43f
--- /dev/null
+++ b/src/java/htsjdk/samtools/filter/OverclippedReadFilter.java
@@ -0,0 +1,76 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.filter;
+
+import htsjdk.samtools.CigarElement;
+import htsjdk.samtools.CigarOperator;
+import htsjdk.samtools.SAMException;
+import htsjdk.samtools.SAMRecord;
+
+/**
+ * Filters out reads with very few unclipped bases, likely due to the read coming
+ * from a foreign organism, e.g. bacterial contamination.
+ *
+ * Based on GATK's OverclippedReadFilter.
+ */
+public class OverclippedReadFilter implements SamRecordFilter {
+ // if the number of unclipped bases is below this threshold, the read is considered overclipped
+ private final int unclippedBasesThreshold;
+ // if set to true, then reads with at least one clipped end will be filtered; if false, we require both ends to be clipped
+ private final boolean filterSingleEndClips;
+
+ public OverclippedReadFilter(final int unclippedBasesThreshold, final boolean filterSingleEndClips) {
+ if (unclippedBasesThreshold < 0) throw new SAMException("unclippedBasesThreshold must be non-negative");
+ this.unclippedBasesThreshold = unclippedBasesThreshold;
+ this.filterSingleEndClips = filterSingleEndClips;
+ }
+
+ @Override
+ public boolean filterOut(final SAMRecord record) {
+ int alignedLength = 0;
+ int softClipBlocks = 0;
+ int minSoftClipBlocks = filterSingleEndClips ? 1 : 2;
+ CigarOperator lastOperator = null;
+
+ for ( final CigarElement element : record.getCigar().getCigarElements() ) {
+ if ( element.getOperator() == CigarOperator.S ) {
+ //Treat consecutive S blocks as a single one
+ if(lastOperator != CigarOperator.S){
+ softClipBlocks += 1;
+ }
+
+ } else if ( element.getOperator().consumesReadBases() ) { // M, I, X, and EQ (S was already accounted for above)
+ alignedLength += element.getLength();
+ }
+ lastOperator = element.getOperator();
+ }
+
+ return(alignedLength < unclippedBasesThreshold && softClipBlocks >= minSoftClipBlocks);
+ }
+
+ @Override
+ public boolean filterOut(final SAMRecord first, final SAMRecord second) {
+ return filterOut(first) || filterOut(second);
+ }
+}
diff --git a/src/java/htsjdk/samtools/metrics/MetricsFile.java b/src/java/htsjdk/samtools/metrics/MetricsFile.java
index 954aea1..f3f2216 100644
--- a/src/java/htsjdk/samtools/metrics/MetricsFile.java
+++ b/src/java/htsjdk/samtools/metrics/MetricsFile.java
@@ -28,6 +28,7 @@ import htsjdk.samtools.SAMException;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.FormatUtil;
import htsjdk.samtools.util.Histogram;
+import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.*;
@@ -535,14 +536,12 @@ public class MetricsFile<BEAN extends MetricBase, HKEY extends Comparable> imple
* @param file to be read.
* @return list of beans from the file.
*/
- public static List<? extends MetricBase> readBeans(final File file) {
- try {
- final MetricsFile<MetricBase, Comparable<?>> metricsFile = new MetricsFile<MetricBase, Comparable<?>>();
- metricsFile.read(new FileReader(file));
- return metricsFile.getMetrics();
- } catch (FileNotFoundException e) {
- throw new SAMException(e.getMessage(), e);
- }
+ public static <T extends MetricBase> List<T> readBeans(final File file) {
+ final MetricsFile<T, Comparable<?>> metricsFile = new MetricsFile<T, Comparable<?>>();
+ final Reader in = IOUtil.openFileForBufferedReading(file);
+ metricsFile.read(in);
+ CloserUtil.close(in);
+ return metricsFile.getMetrics();
}
/**
@@ -573,4 +572,21 @@ public class MetricsFile<BEAN extends MetricBase, HKEY extends Comparable> imple
}
}
+
+ /**
+ * Compare the metrics and histograms in two files, ignoring headers.
+ */
+ public static boolean areMetricsAndHistogramsEqual(final File file1, final File file2) {
+ try {
+ final MetricsFile<MetricBase, Comparable<?>> mf1 = new MetricsFile<MetricBase, Comparable<?>>();
+ final MetricsFile<MetricBase, Comparable<?>> mf2 = new MetricsFile<MetricBase, Comparable<?>>();
+ mf1.read(new FileReader(file1));
+ mf2.read(new FileReader(file2));
+
+ return mf1.areMetricsEqual(mf2) && mf1.areHistogramsEqual(mf2);
+
+ } catch (FileNotFoundException e) {
+ throw new SAMException(e.getMessage(), e);
+ }
+ }
}
diff --git a/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java b/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
index 0f09ae6..e0c7dca 100644
--- a/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
+++ b/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
@@ -32,14 +32,15 @@ import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.IOUtil;
import java.io.File;
-import java.io.FileInputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
/**
* Provide core sequence dictionary functionality required by all fasta file readers.
* @author Matt Hanna
*/
abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
- protected final File file;
+ private final Path path;
protected SAMSequenceDictionary sequenceDictionary;
/**
@@ -47,15 +48,23 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
* @param file Fasta file to read. Also acts as a prefix for supporting files.
*/
AbstractFastaSequenceFile(final File file) {
- this.file = file;
- final File dictionary = findSequenceDictionary(file);
+ this(file == null ? null : file.toPath());
+ }
+
+ /**
+ * Finds and loads the sequence file dictionary.
+ * @param path Fasta file to read. Also acts as a prefix for supporting files.
+ */
+ AbstractFastaSequenceFile(final Path path) {
+ this.path = path;
+ final Path dictionary = findSequenceDictionary(path);
if (dictionary != null) {
IOUtil.assertFileIsReadable(dictionary);
try {
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
- final BufferedLineReader reader = new BufferedLineReader(new FileInputStream(dictionary));
+ final BufferedLineReader reader = new BufferedLineReader(Files.newInputStream(dictionary));
final SAMFileHeader header = codec.decode(reader,
dictionary.toString());
if (header.getSequenceDictionary() != null && header.getSequenceDictionary().size() > 0) {
@@ -70,33 +79,51 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
}
protected static File findSequenceDictionary(final File file) {
+ if (file == null) {
+ return null;
+ }
+ Path dictionary = findSequenceDictionary(file.toPath());
+ if (dictionary == null) {
+ return null;
+ }
+ return dictionary.toFile();
+ }
+
+ protected static Path findSequenceDictionary(final Path path) {
+ if (path == null) {
+ return null;
+ }
// Try and locate the dictionary
- String dictionaryName = file.getAbsolutePath();
- String dictionaryNameExt = file.getAbsolutePath();
+ Path dictionary = path.toAbsolutePath();
+ Path dictionaryExt = path.toAbsolutePath();
boolean fileTypeSupported = false;
for (final String extension : ReferenceSequenceFileFactory.FASTA_EXTENSIONS) {
- if (dictionaryName.endsWith(extension)) {
- dictionaryNameExt = new String(dictionaryName);
- dictionaryNameExt += IOUtil.DICT_FILE_EXTENSION;
- dictionaryName = dictionaryName.substring(0, dictionaryName.lastIndexOf(extension));
- dictionaryName += IOUtil.DICT_FILE_EXTENSION;
- fileTypeSupported = true;
- break;
+ String filename = dictionary.getFileName().toString();
+ if (filename.endsWith(extension)) {
+ dictionaryExt = dictionary.resolveSibling(filename + IOUtil
+ .DICT_FILE_EXTENSION);
+ String filenameNoExt = filename.substring(0, filename.lastIndexOf(extension));
+ dictionary = dictionary.resolveSibling(filenameNoExt+ IOUtil.DICT_FILE_EXTENSION);
+ fileTypeSupported = true;
+ break;
}
}
if (!fileTypeSupported)
- throw new IllegalArgumentException("File is not a supported reference file type: " + file.getAbsolutePath());
+ throw new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath());
- final File dictionary = new File(dictionaryName);
- if (dictionary.exists())
+ if (Files.exists(dictionary))
return dictionary;
// try without removing the file extension
- final File dictionaryExt = new File(dictionaryNameExt);
- if (dictionaryExt.exists())
+ if (Files.exists(dictionaryExt))
return dictionaryExt;
else return null;
}
+ /** Returns the path to the reference file. */
+ protected Path getPath() {
+ return path;
+ }
+
/**
* Returns the list of sequence records associated with the reference sequence if found
* otherwise null.
@@ -106,8 +133,13 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
}
/** Returns the full path to the reference file. */
+ protected String getAbsolutePath() {
+ return path.toAbsolutePath().toString();
+ }
+
+ /** Returns the full path to the reference file. */
public String toString() {
- return this.file.getAbsolutePath();
+ return getAbsolutePath();
}
/** default implementation -- override if index is supported */
@@ -120,7 +152,7 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
/** default implementation -- override if index is supported */
public ReferenceSequence getSubsequenceAt( String contig, long start, long stop ) {
- throw new UnsupportedOperationException("Index does not appear to exist for" + file.getAbsolutePath() + ". samtools faidx can be used to create an index");
+ throw new UnsupportedOperationException("Index does not appear to exist for " + getAbsolutePath() + ". samtools faidx can be used to create an index");
}
}
diff --git a/src/java/htsjdk/samtools/reference/FastaSequenceFile.java b/src/java/htsjdk/samtools/reference/FastaSequenceFile.java
index f674521..72c0583 100644
--- a/src/java/htsjdk/samtools/reference/FastaSequenceFile.java
+++ b/src/java/htsjdk/samtools/reference/FastaSequenceFile.java
@@ -32,6 +32,7 @@ import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
+import java.nio.file.Path;
/**
* Implementation of ReferenceSequenceFile for reading from FASTA files.
@@ -48,9 +49,14 @@ public class FastaSequenceFile extends AbstractFastaSequenceFile {
/** Constructs a FastaSequenceFile that reads from the specified file. */
public FastaSequenceFile(final File file, final boolean truncateNamesAtWhitespace) {
- super(file);
+ this(file == null ? null : file.toPath(), truncateNamesAtWhitespace);
+ }
+
+ /** Constructs a FastaSequenceFile that reads from the specified file. */
+ public FastaSequenceFile(final Path path, final boolean truncateNamesAtWhitespace) {
+ super(path);
this.truncateNamesAtWhitespace = truncateNamesAtWhitespace;
- this.in = new FastLineReader(IOUtil.openFileForReading(file));
+ this.in = new FastLineReader(IOUtil.openFileForReading(path));
}
/**
@@ -80,7 +86,7 @@ public class FastaSequenceFile extends AbstractFastaSequenceFile {
public void reset() {
this.sequenceIndex = -1;
this.in.close();
- this.in = new FastLineReader(IOUtil.openFileForReading(file));
+ this.in = new FastLineReader(IOUtil.openFileForReading(getPath()));
}
@@ -91,7 +97,7 @@ public class FastaSequenceFile extends AbstractFastaSequenceFile {
}
final byte b = in.getByte();
if (b != '>') {
- throw new SAMException("Format exception reading FASTA " + file + ". Expected > but saw chr(" +
+ throw new SAMException("Format exception reading FASTA " + getAbsolutePath() + ". Expected > but saw chr(" +
b + ") at start of sequence with index " + this.sequenceIndex);
}
final byte[] nameBuffer = new byte[4096];
@@ -102,11 +108,11 @@ public class FastaSequenceFile extends AbstractFastaSequenceFile {
}
nameLength += in.readToEndOfOutputBufferOrEoln(nameBuffer, nameLength);
if (nameLength == nameBuffer.length && !in.atEoln()) {
- throw new SAMException("Sequence name too long in FASTA " + file);
+ throw new SAMException("Sequence name too long in FASTA " + getAbsolutePath());
}
} while (!in.atEoln());
if (nameLength == 0) {
- throw new SAMException("Missing sequence name in FASTA " + file);
+ throw new SAMException("Missing sequence name in FASTA " + getAbsolutePath());
}
String name = StringUtil.bytesToString(nameBuffer, 0, nameLength).trim();
if (truncateNamesAtWhitespace) {
diff --git a/src/java/htsjdk/samtools/reference/FastaSequenceIndex.java b/src/java/htsjdk/samtools/reference/FastaSequenceIndex.java
index d75f65e..e314fcc 100644
--- a/src/java/htsjdk/samtools/reference/FastaSequenceIndex.java
+++ b/src/java/htsjdk/samtools/reference/FastaSequenceIndex.java
@@ -30,6 +30,8 @@ import htsjdk.samtools.util.IOUtil;
import java.io.File;
import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.nio.file.Path;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
@@ -51,6 +53,15 @@ public class FastaSequenceIndex implements Iterable<FastaSequenceIndexEntry> {
* @throws FileNotFoundException if the index file cannot be found.
*/
public FastaSequenceIndex( File indexFile ) {
+ this(indexFile == null ? null : indexFile.toPath());
+ }
+
+ /**
+ * Build a sequence index from the specified file.
+ * @param indexFile File to open.
+ * @throws FileNotFoundException if the index file cannot be found.
+ */
+ public FastaSequenceIndex( Path indexFile ) {
IOUtil.assertFileIsReadable(indexFile);
parseIndexFile(indexFile);
}
@@ -111,12 +122,11 @@ public class FastaSequenceIndex implements Iterable<FastaSequenceIndexEntry> {
/**
* Parse the contents of an index file, caching the results internally.
* @param indexFile File to parse.
- * @throws FileNotFoundException Thrown if file could not be opened.
+ * @throws IOException Thrown if file could not be opened.
*/
- private void parseIndexFile(File indexFile) {
+ private void parseIndexFile(Path indexFile) {
try {
Scanner scanner = new Scanner(indexFile);
-
int sequenceIndex = 0;
while( scanner.hasNext() ) {
// Tokenize and validate the index line.
@@ -142,8 +152,9 @@ public class FastaSequenceIndex implements Iterable<FastaSequenceIndexEntry> {
add(new FastaSequenceIndexEntry(contig,location,size,basesPerLine,bytesPerLine, sequenceIndex++) );
}
scanner.close();
- } catch (FileNotFoundException e) {
- throw new SAMException("Fasta index file should be found but is not: " + indexFile, e);
+ } catch (IOException e) {
+ throw new SAMException("Fasta index file could not be opened: " + indexFile, e);
+
}
}
diff --git a/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java b/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
index bb15000..b341d6f 100644
--- a/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
+++ b/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
@@ -37,6 +37,9 @@ import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
+import java.nio.channels.SeekableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.Iterator;
/**
@@ -47,7 +50,7 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
/**
* The interface facilitating direct access to the fasta.
*/
- private final FileChannel channel;
+ private final SeekableByteChannel channel;
/**
* A representation of the sequence index, stored alongside the fasta in a .fasta.fai file.
@@ -66,33 +69,48 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
* @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
*/
public IndexedFastaSequenceFile(final File file, final FastaSequenceIndex index) {
- super(file);
- if (index == null) throw new IllegalArgumentException("Null index for fasta " + file);
+ this(file == null ? null : file.toPath(), index);
+ }
+
+ /**
+ * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
+ * @param file The file to open.
+ * @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
+ */
+ public IndexedFastaSequenceFile(final File file) throws FileNotFoundException {
+ this(file, new FastaSequenceIndex((findRequiredFastaIndexFile(file))));
+ }
+
+ /**
+ * Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
+ * @param path The file to open.
+ * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
+ */
+ public IndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index) {
+ super(path);
+ if (index == null) throw new IllegalArgumentException("Null index for fasta " + path);
this.index = index;
- IOUtil.assertFileIsReadable(file);
- final FileInputStream in;
+ IOUtil.assertFileIsReadable(path);
try {
- in = new FileInputStream(file);
- } catch (FileNotFoundException e) {
- throw new SAMException("Fasta file should be readable but is not: " + file, e);
+ this.channel = Files.newByteChannel(path);
+ } catch (IOException e) {
+ throw new SAMException("Fasta file should be readable but is not: " + path, e);
}
- channel = in.getChannel();
reset();
if(getSequenceDictionary() != null)
- sanityCheckDictionaryAgainstIndex(file.getAbsolutePath(),sequenceDictionary,index);
+ sanityCheckDictionaryAgainstIndex(path.toAbsolutePath().toString(),sequenceDictionary,index);
}
/**
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened.
- * @param file The file to open.
+ * @param path The file to open.
* @throws FileNotFoundException If the fasta or any of its supporting files cannot be found.
*/
- public IndexedFastaSequenceFile(final File file) throws FileNotFoundException {
- this(file, new FastaSequenceIndex((findRequiredFastaIndexFile(file))));
+ public IndexedFastaSequenceFile(final Path path) throws FileNotFoundException {
+ this(path, new FastaSequenceIndex((findRequiredFastaIndexFile(path))));
}
-
public boolean isIndexed() {return true;}
private static File findFastaIndex(File fastaFile) {
@@ -116,6 +134,27 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
findFastaIndex(fastaFile) != null);
}
+ private static Path findFastaIndex(Path fastaFile) {
+ Path indexFile = getFastaIndexFileName(fastaFile);
+ if (!Files.exists(indexFile)) return null;
+ return indexFile;
+ }
+
+ private static Path getFastaIndexFileName(Path fastaFile) {
+ return fastaFile.resolveSibling(fastaFile.getFileName() + ".fai");
+ }
+
+ private static Path findRequiredFastaIndexFile(Path fastaFile) throws FileNotFoundException {
+ Path ret = findFastaIndex(fastaFile);
+ if (ret == null) throw new FileNotFoundException(getFastaIndexFileName(fastaFile) + " not found.");
+ return ret;
+ }
+
+ public static boolean canCreateIndexedFastaReader(final Path fastaFile) {
+ return (Files.exists(fastaFile) &&
+ findFastaIndex(fastaFile) != null);
+ }
+
/**
* Do some basic checking to make sure the dictionary and the index match.
* @param fastaFile Used for error reporting only.
@@ -202,10 +241,10 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
startOffset += Math.max((int)(startOffset%bytesPerLine - basesPerLine + 1),0);
try {
- startOffset += channel.read(channelBuffer,indexEntry.getLocation()+startOffset);
+ startOffset += readFromPosition(channel, channelBuffer, indexEntry.getLocation()+startOffset);
}
catch(IOException ex) {
- throw new SAMException("Unable to load " + contig + "(" + start + ", " + stop + ") from " + file);
+ throw new SAMException("Unable to load " + contig + "(" + start + ", " + stop + ") from " + getAbsolutePath(), ex);
}
// Reset the buffer for outbound transfers.
@@ -235,6 +274,29 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
}
/**
+ * Reads a sequence of bytes from this channel into the given buffer,
+ * starting at the given file position.
+ * @param channel the channel to read from
+ * @param buffer the buffer into which bytes are to be transferred
+ * @param position the position to start reading at
+ * @return the number of bytes read
+ * @throws IOException if an I/O error occurs while reading
+ */
+ private static int readFromPosition(final SeekableByteChannel channel, final ByteBuffer buffer, long position) throws IOException {
+ if (channel instanceof FileChannel) { // special case to take advantage of native code path
+ return ((FileChannel) channel).read(buffer,position);
+ } else {
+ long oldPos = channel.position();
+ try {
+ channel.position(position);
+ return channel.read(buffer);
+ } finally {
+ channel.position(oldPos);
+ }
+ }
+ }
+
+ /**
* Gets the next sequence if available, or null if not present.
* @return next sequence if available, or null if not present.
*/
@@ -256,7 +318,7 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
* @return String representation of the file.
*/
public String toString() {
- return this.file.getAbsolutePath();
+ return getAbsolutePath();
}
@Override
diff --git a/src/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java b/src/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java
index 6441140..5978072 100644
--- a/src/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java
+++ b/src/java/htsjdk/samtools/reference/ReferenceSequenceFileFactory.java
@@ -26,6 +26,7 @@ package htsjdk.samtools.reference;
import java.io.File;
import java.io.FileNotFoundException;
+import java.nio.file.Path;
import java.util.HashSet;
import java.util.Set;
@@ -78,24 +79,58 @@ public class ReferenceSequenceFileFactory {
* @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader
*/
public static ReferenceSequenceFile getReferenceSequenceFile(final File file, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) {
- final String name = file.getName();
+ return getReferenceSequenceFile(file.toPath(), truncateNamesAtWhitespace, preferIndexed);
+ }
+
+ /**
+ * Attempts to determine the type of the reference file and return an instance
+ * of ReferenceSequenceFile that is appropriate to read it. Sequence names
+ * will be truncated at first whitespace, if any.
+ *
+ * @param path the reference sequence file on disk
+ */
+ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path) {
+ return getReferenceSequenceFile(path, true);
+ }
+
+ /**
+ * Attempts to determine the type of the reference file and return an instance
+ * of ReferenceSequenceFile that is appropriate to read it.
+ *
+ * @param path the reference sequence file on disk
+ * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name
+ */
+ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace) {
+ return getReferenceSequenceFile(path, truncateNamesAtWhitespace, true);
+ }
+
+ /**
+ * Attempts to determine the type of the reference file and return an instance
+ * of ReferenceSequenceFile that is appropriate to read it.
+ *
+ * @param path the reference sequence file path
+ * @param truncateNamesAtWhitespace if true, only include the first word of the sequence name
+ * @param preferIndexed if true attempt to return an indexed reader that supports non-linear traversal, else return the non-indexed reader
+ */
+ public static ReferenceSequenceFile getReferenceSequenceFile(final Path path, final boolean truncateNamesAtWhitespace, final boolean preferIndexed) {
+ final String name = path.getFileName().toString();
for (final String ext : FASTA_EXTENSIONS) {
if (name.endsWith(ext)) {
// Using faidx requires truncateNamesAtWhitespace
- if (truncateNamesAtWhitespace && preferIndexed && IndexedFastaSequenceFile.canCreateIndexedFastaReader(file)) {
+ if (truncateNamesAtWhitespace && preferIndexed && IndexedFastaSequenceFile.canCreateIndexedFastaReader(path)) {
try {
- return new IndexedFastaSequenceFile(file);
+ return new IndexedFastaSequenceFile(path);
}
catch (final FileNotFoundException e) {
throw new IllegalStateException("Should never happen, because existence of files has been checked.", e);
}
}
else {
- return new FastaSequenceFile(file, truncateNamesAtWhitespace);
+ return new FastaSequenceFile(path, truncateNamesAtWhitespace);
}
}
}
- throw new IllegalArgumentException("File is not a supported reference file type: " + file.getAbsolutePath());
+ throw new IllegalArgumentException("File is not a supported reference file type: " + path.toAbsolutePath());
}
}
diff --git a/src/java/htsjdk/samtools/seekablestream/SeekableMemoryStream.java b/src/java/htsjdk/samtools/seekablestream/SeekableMemoryStream.java
new file mode 100644
index 0000000..a6efc91
--- /dev/null
+++ b/src/java/htsjdk/samtools/seekablestream/SeekableMemoryStream.java
@@ -0,0 +1,64 @@
+package htsjdk.samtools.seekablestream;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+public class SeekableMemoryStream extends SeekableStream {
+ private final ByteBuffer buf;
+ private final String source;
+
+ public SeekableMemoryStream(final byte[] data, final String source) {
+ this.buf = ByteBuffer.wrap(data);
+ this.source = source;
+ }
+
+ @Override
+ public void close() throws IOException {
+ buf.clear();
+ }
+
+ @Override
+ public boolean eof() throws IOException {
+ return buf.position() == buf.limit();
+ }
+
+ @Override
+ public String getSource() {
+ return source;
+ }
+
+ @Override
+ public long length() {
+ return buf.array().length - buf.arrayOffset();
+ }
+
+ @Override
+ public int read(final byte[] buffer, final int offset, final int length) throws IOException {
+ int availableLength = Math.min(length, buf.remaining());
+ if (availableLength < 1) {
+ return -1;
+ }
+ buf.get(buffer, offset, availableLength);
+ return availableLength;
+ }
+
+ @Override
+ public void seek(final long position) throws IOException {
+ buf.position((int) position);
+ }
+
+ @Override
+ public int read() throws IOException {
+ if (buf.position() < buf.limit()) {
+ return buf.get();
+ } else {
+ return -1;
+ }
+ }
+
+ @Override
+ public long position() throws IOException {
+ return buf.position();
+ }
+
+}
diff --git a/src/java/htsjdk/samtools/sra/ReferenceCache.java b/src/java/htsjdk/samtools/sra/ReferenceCache.java
new file mode 100644
index 0000000..de6e27b
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/ReferenceCache.java
@@ -0,0 +1,79 @@
+package htsjdk.samtools.sra;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import ngs.ErrorMsg;
+import ngs.ReadCollection;
+import ngs.Reference;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * That is a thread-safe wrapper for a list of cache Reference objects.
+ * Those objects can be used from different threads without issues, however to load and save a Reference object, we
+ * need to acquire a lock.
+ *
+ * Created by andrii.nikitiuk on 10/28/15.
+ */
+public class ReferenceCache {
+ private ReadCollection run;
+ private SAMFileHeader virtualHeader;
+ private final List<Reference> cachedReferences;
+
+ public ReferenceCache(ReadCollection run, SAMFileHeader virtualHeader) {
+ this.run = run;
+ this.virtualHeader = virtualHeader;
+ cachedReferences = initializeReferenceCache();
+ }
+
+ /**
+ * This method returns Reference objects by reference indexes in SAM header
+ * Those obejcts can be used from different threads
+ *
+ * This method maintains thread safety, so that if Reference object is set already, it can be easily returned
+ * without locks. However, if Reference object is null, we need to acquire a lock, load the object and save it in
+ * array.
+ *
+ * @param referenceIndex reference index in
+ * @return a Reference object
+ */
+ public Reference get(int referenceIndex) {
+ Reference reference = cachedReferences.get(referenceIndex);
+
+ if (reference != null) {
+ return reference;
+ }
+
+ // maintain thread safety
+ synchronized (this) {
+ reference = cachedReferences.get(referenceIndex);
+ if (reference == null) {
+ try {
+ reference = run.getReference(virtualHeader.getSequence(referenceIndex).getSequenceName());
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ cachedReferences.set(referenceIndex, reference);
+ }
+ }
+
+
+ return reference;
+ }
+
+ private List<Reference> initializeReferenceCache() {
+ if (virtualHeader == null) {
+ throw new RuntimeException("Cannot cache references - header is uninitialized");
+ }
+
+ SAMSequenceDictionary sequenceDictionary = virtualHeader.getSequenceDictionary();
+ List<Reference> references = new ArrayList<Reference>(sequenceDictionary.size());
+ for (SAMSequenceRecord sequence : sequenceDictionary.getSequences()) {
+ references.add(null);
+ }
+
+ return references;
+ }
+}
diff --git a/src/java/htsjdk/samtools/sra/SRAAccession.java b/src/java/htsjdk/samtools/sra/SRAAccession.java
new file mode 100644
index 0000000..6f39eca
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRAAccession.java
@@ -0,0 +1,108 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+package htsjdk.samtools.sra;
+
+import htsjdk.samtools.util.Log;
+import gov.nih.nlm.ncbi.ngs.NGS;
+
+import java.io.Serializable;
+
+/**
+ * Describes a single SRA accession
+ * Also provides app string functionality and allows to check if working SRA is supported on the running platform
+ */
+public class SRAAccession implements Serializable {
+ private static final Log log = Log.getInstance(SRAAccession.class);
+
+ private static Boolean isSupportedCached = null;
+ private static String appVersionString = null;
+ private final static String defaultAppVersionString = "[unknown software]";
+ private final static String htsJdkVersionString = "HTSJDK-NGS";
+
+ private String acc;
+
+ /**
+ * Sets an app version string which will let SRA know which software uses it.
+ * @param appVersionString a string that describes running application
+ */
+ public static void setAppVersionString(String appVersionString) {
+ SRAAccession.appVersionString = appVersionString;
+ }
+
+ /**
+ * Returns true if SRA is supported on the running platform
+ * @return true if SRA engine was successfully loaded and operational, false otherwise
+ */
+ public static boolean isSupported() {
+ if (isSupportedCached == null) {
+ log.debug("Checking if SRA module is supported in that environment");
+ isSupportedCached = NGS.isSupported();
+ if (!isSupportedCached) {
+ log.info("SRA is not supported. Will not be able to read from SRA");
+ } else {
+ NGS.setAppVersionString(getFullVersionString());
+ }
+ }
+ return isSupportedCached;
+ }
+
+ /**
+ * @param acc accession
+ * @return true if a string is a valid SRA accession
+ */
+ public static boolean isValid(String acc) {
+ if (!isSupported()) {
+ return false;
+ }
+
+ return NGS.isValid(acc);
+ }
+
+ /**
+ * @param acc accession
+ */
+ public SRAAccession(String acc) {
+ this.acc = acc;
+ }
+
+ public String toString() {
+ return acc;
+ }
+
+ /**
+ * @return true if contained string is an SRA accession
+ */
+ public boolean isValid() {
+ return SRAAccession.isValid(acc);
+ }
+
+ private static String getFullVersionString() {
+ String versionString = appVersionString == null ? defaultAppVersionString : appVersionString;
+ versionString += " through " + htsJdkVersionString;
+ return versionString;
+ }
+}
diff --git a/src/java/htsjdk/samtools/sra/SRAAlignmentIterator.java b/src/java/htsjdk/samtools/sra/SRAAlignmentIterator.java
new file mode 100644
index 0000000..2ebade1
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRAAlignmentIterator.java
@@ -0,0 +1,194 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+package htsjdk.samtools.sra;
+
+
+import htsjdk.samtools.Chunk;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SRAIterator;
+import htsjdk.samtools.ValidationStringency;
+import ngs.Alignment;
+import ngs.AlignmentIterator;
+import ngs.ErrorMsg;
+import ngs.ReadCollection;
+import ngs.Reference;
+import ngs.ReferenceIterator;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+
+/**
+ * Iterator for aligned reads.
+ * Is used from SRAIterator.
+ * Created by andrii.nikitiuk on 9/3/15.
+ */
+public class SRAAlignmentIterator implements Iterator<SAMRecord> {
+ private ValidationStringency validationStringency;
+
+ private SRAAccession accession;
+ private ReadCollection run;
+ private SAMFileHeader header;
+ private ReferenceCache cachedReferences;
+ private List<Long> referencesLengths;
+ private Iterator<Chunk> referencesChunksIterator;
+ private int currentReference = -1;
+
+ private boolean hasMoreReferences = true;
+
+ private AlignmentIterator alignedIterator;
+ private Boolean hasMoreAlignments = false;
+
+ private SRALazyRecord lastRecord;
+
+ /**
+ * @param run opened read collection
+ * @param header sam header
+ * @param cachedReferences list of cached references shared among all iterators from a single SRAFileReader
+ * @param recordRangeInfo info about record ranges withing SRA archive
+ * @param chunk used to determine which alignments the iterator should return
+ */
+ public SRAAlignmentIterator(SRAAccession accession, final ReadCollection run, final SAMFileHeader header, ReferenceCache cachedReferences,
+ final SRAIterator.RecordRangeInfo recordRangeInfo, final Chunk chunk) {
+ this.accession = accession;
+ this.run = run;
+ this.header = header;
+ this.cachedReferences = cachedReferences;
+ this.referencesLengths = recordRangeInfo.getReferenceLengthsAligned();
+
+ referencesChunksIterator = getReferenceChunks(chunk).iterator();
+
+ try {
+ nextReference();
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ // check aligned
+ if (hasMoreAlignments == null) {
+ try {
+ lastRecord.detachFromIterator();
+ hasMoreAlignments = alignedIterator.nextAlignment();
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ while (!hasMoreAlignments && hasMoreReferences) {
+ nextReference();
+ }
+
+ return hasMoreAlignments;
+ }
+
+ @Override
+ public SAMRecord next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException("No more alignments are available");
+ }
+
+ return nextAlignment();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("Removal of records not implemented.");
+ }
+
+ public void setValidationStringency(ValidationStringency validationStringency) {
+ this.validationStringency = validationStringency;
+ }
+
+ private SAMRecord nextAlignment() {
+ try {
+ lastRecord = new SRALazyRecord(header, accession, run, alignedIterator, alignedIterator.getReadId(), alignedIterator.getAlignmentId());
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ if (validationStringency != null) {
+ lastRecord.setValidationStringency(validationStringency);
+ }
+
+ hasMoreAlignments = null;
+
+ return lastRecord;
+ }
+
+ private void nextReference() {
+ if (!hasMoreReferences) {
+ throw new NoSuchElementException("Cannot get next reference - already at last one");
+ }
+
+ try {
+ hasMoreReferences = referencesChunksIterator.hasNext();
+ if (!hasMoreReferences) {
+ hasMoreAlignments = false;
+ return;
+ }
+
+ currentReference++;
+ Chunk refChunk = referencesChunksIterator.next();
+ if (refChunk == null) {
+ hasMoreAlignments = false;
+ return;
+ }
+
+ Reference reference = cachedReferences.get(currentReference);
+
+ alignedIterator = reference.getFilteredAlignmentSlice(
+ refChunk.getChunkStart(), refChunk.getChunkEnd() - refChunk.getChunkStart(),
+ Alignment.all, Alignment.startWithinSlice | Alignment.passDuplicates | Alignment.passFailed, 0);
+
+ hasMoreAlignments = alignedIterator.nextAlignment();
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private List<Chunk> getReferenceChunks(final Chunk chunk) {
+ List<Chunk> referencesChunks = new ArrayList<Chunk>();
+ long refOffset = 0;
+ for (Long refLen : referencesLengths) {
+ if (chunk.getChunkStart() - refOffset >= refLen || chunk.getChunkEnd() - refOffset <= 0) {
+ referencesChunks.add(null);
+ } else {
+ long refChunkStart = Math.max(chunk.getChunkStart() - refOffset, 0);
+ long refChunkEnd = Math.min(chunk.getChunkEnd() - refOffset, refLen);
+ referencesChunks.add(new Chunk(refChunkStart, refChunkEnd));
+ }
+
+ refOffset += refLen;
+ }
+
+ return referencesChunks;
+ }
+}
diff --git a/src/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java b/src/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java
new file mode 100644
index 0000000..567bce0
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRAIndexedSequenceFile.java
@@ -0,0 +1,121 @@
+package htsjdk.samtools.sra;
+
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.samtools.sra.SRAAccession;
+import ngs.ErrorMsg;
+import ngs.ReadCollection;
+import ngs.Reference;
+import ngs.ReferenceIterator;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * Allows reading Reference data from SRA
+ */
+public class SRAIndexedSequenceFile implements ReferenceSequenceFile {
+ private SRAAccession acc;
+ private ReadCollection run;
+ private ThreadLocal<HashMap<Integer, Reference>> cachedReferences = new ThreadLocal<HashMap<Integer, Reference>>();
+
+ private Iterator<SAMSequenceRecord> sequenceRecordIterator;
+
+ protected SAMSequenceDictionary sequenceDictionary;
+
+ /**
+ * @param acc accession
+ */
+ public SRAIndexedSequenceFile(SRAAccession acc) {
+ this.acc = acc;
+
+ if (!acc.isValid()) {
+ throw new RuntimeException("Passed an invalid SRA accession into SRA reader: " + acc);
+ }
+
+ try {
+ run = gov.nih.nlm.ncbi.ngs.NGS.openReadCollection(acc.toString());
+ sequenceDictionary = loadSequenceDictionary();
+ } catch (final ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+
+ reset();
+ }
+
+ @Override
+ public SAMSequenceDictionary getSequenceDictionary() {
+ return sequenceDictionary;
+ }
+
+ @Override
+ public ReferenceSequence nextSequence() {
+ SAMSequenceRecord sequence = sequenceRecordIterator.next();
+ return getSubsequenceAt(sequence.getSequenceName(), 1L, sequence.getSequenceLength());
+ }
+
+ @Override
+ public void reset() {
+ sequenceRecordIterator = sequenceDictionary.getSequences().iterator();
+ }
+
+ @Override
+ public boolean isIndexed() {
+ return true;
+ }
+
+ @Override
+ public ReferenceSequence getSequence(String contig) {
+ return getSubsequenceAt(contig, 1L, sequenceDictionary.getSequence(contig).getSequenceLength());
+ }
+
+ @Override
+ public ReferenceSequence getSubsequenceAt(String contig, long start, long stop) {
+ SAMSequenceRecord sequence = sequenceDictionary.getSequence(contig);
+ int referenceIndex = sequence.getSequenceIndex();
+
+ byte[] bases;
+
+ try {
+ HashMap<Integer, Reference> localRefs = cachedReferences.get();
+ if (localRefs == null) {
+ localRefs = new HashMap<Integer, Reference>();
+ cachedReferences.set(localRefs);
+ }
+ Reference reference = localRefs.get(referenceIndex);
+ if (reference == null) {
+ reference = run.getReference(contig);
+ localRefs.put(referenceIndex, reference);
+ }
+
+ bases = reference.getReferenceBases(start - 1, stop - (start - 1)).getBytes();
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+
+ return new ReferenceSequence(contig, referenceIndex, bases);
+ }
+
+ @Override
+ public void close() throws IOException {
+
+ }
+
+ protected SAMSequenceDictionary loadSequenceDictionary() throws ErrorMsg {
+ SAMSequenceDictionary dict = new SAMSequenceDictionary();
+
+ ReferenceIterator itRef = run.getReferences();
+ while (itRef.nextReference()) {
+ dict.addSequence(new SAMSequenceRecord(itRef.getCanonicalName(), (int) itRef.getLength()));
+ }
+
+ return dict;
+ }
+}
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/sra/SRALazyRecord.java b/src/java/htsjdk/samtools/sra/SRALazyRecord.java
new file mode 100644
index 0000000..4391857
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRALazyRecord.java
@@ -0,0 +1,1056 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+package htsjdk.samtools.sra;
+
+import gov.nih.nlm.ncbi.ngs.NGS;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMTagUtil;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.Cigar;
+import htsjdk.samtools.SAMBinaryTagAndValue;
+import htsjdk.samtools.SAMUtils;
+import htsjdk.samtools.SAMValidationError;
+import htsjdk.samtools.util.Log;
+import ngs.ReadCollection;
+import ngs.AlignmentIterator;
+import ngs.Alignment;
+import ngs.ReadIterator;
+import ngs.Read;
+import ngs.Fragment;
+import ngs.ErrorMsg;
+
+import java.io.IOException;
+import java.util.EnumSet;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.List;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+/**
+ * Extends SAMRecord so that any of the fields will be loaded only when needed.
+ * Since SRA is a column oriented database, it is very inefficient to load all the fields at once.
+ * However, loading only set of actually needed fields will be even faster than in row oriented databases.
+ *
+ * Because of that we are providing lazy loading of fields, flags and attributes.
+ *
+ * Created by andrii.nikitiuk on 8/25/15.
+ */
+public class SRALazyRecord extends SAMRecord {
+ private static final Log log = Log.getInstance(SRALazyRecord.class);
+
+ private SRAAccession accession;
+ private boolean isAligned;
+ private transient ReadCollection run;
+ private transient Alignment alignmentIterator;
+ private transient Read unalignmentIterator;
+ private String sraReadId;
+ private String sraAlignmentId;
+ private int unalignedReadFragmentIndex = -1;
+
+
+ private Set<LazyField> initializedFields = EnumSet.noneOf(LazyField.class);
+ private Set<LazyFlag> initializedFlags = EnumSet.noneOf(LazyFlag.class);
+ private Set<LazyAttribute> initializedAttributes = EnumSet.noneOf(LazyAttribute.class);
+
+ private enum LazyField {
+ ALIGNMENT_START {
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getAlignmentStart();
+ }
+ },
+ MAPPING_QUALITY {
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getMappingQuality();
+ }
+ },
+ REFERENCE_NAME {
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getReferenceName();
+ }
+ },
+ CIGAR_STRING {
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getCigarString();
+ }
+ },
+ BASES {
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getReadBases();
+ }
+ },
+ QUALS {
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getBaseQualities();
+ }
+ },
+ MATE_ALIGNMENT_START {
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getMateAlignmentStart();
+ }
+ },
+ MATE_REFERENCE_NAME {
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getMateReferenceName();
+ }
+ },
+ INFERRED_INSERT_SIZE {
+ @Override
+ public void loadValue(SRALazyRecord self) {
+ self.getInferredInsertSize();
+ }
+ };
+
+ public abstract void loadValue(SRALazyRecord self);
+ }
+
+ private enum LazyFlag {
+ READ_NEGATIVE_STRAND(true) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getReadNegativeStrandFlag();
+ }
+ },
+ READ_PAIRED(true) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getReadPairedFlag();
+ }
+ },
+ PROPER_PAIR(false) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getProperPairFlag();
+ }
+ },
+ NOT_PRIMARY_ALIGNMENT(true) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getNotPrimaryAlignmentFlag();
+ }
+ },
+ MATE_NEGATIVE_STRAND(false) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getMateNegativeStrandFlag();
+ }
+ },
+ MATE_UNMAPPED(false) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getMateUnmappedFlag();
+ }
+ },
+ FIRST_OF_PAIR(false) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getFirstOfPairFlag();
+ }
+ },
+ SECOND_OF_PAIR(false) {
+ @Override
+ public boolean getFlag(SRALazyRecord self) {
+ return self.getSecondOfPairFlag();
+ }
+ };
+
+ private final boolean canCallOnNotPaired;
+
+ LazyFlag(final boolean canCallOnNotPaired) {
+ this.canCallOnNotPaired = canCallOnNotPaired;
+ }
+
+ public boolean canCallOnNotPaired() { return canCallOnNotPaired; }
+
+ public abstract boolean getFlag(SRALazyRecord self);
+ }
+
+ private enum LazyAttribute {
+ RG {
+ @Override
+ public String getAttribute(SRALazyRecord self) {
+ return self.getAttributeGroupNameImpl();
+ }
+ };
+
+ public abstract String getAttribute(SRALazyRecord self);
+ }
+
+ private static Map<Short, LazyAttribute> lazyAttributeTags;
+ static
+ {
+ lazyAttributeTags = new HashMap<Short, LazyAttribute>();
+ lazyAttributeTags.put(SAMTagUtil.getSingleton().RG, LazyAttribute.RG);
+ }
+
+ public SRALazyRecord(final SAMFileHeader header, SRAAccession accession, ReadCollection run, AlignmentIterator alignmentIterator, String readId, String alignmentId) {
+ this(header, accession, readId, alignmentId);
+
+ this.run = run;
+ this.alignmentIterator = alignmentIterator;
+ }
+
+ public SRALazyRecord(final SAMFileHeader header, SRAAccession accession, ReadCollection run, ReadIterator unalignmentIterator, String readId, int unalignedReadFragmentIndex) {
+ this(header, accession, readId, unalignedReadFragmentIndex);
+
+ this.run = run;
+ this.unalignmentIterator = unalignmentIterator;
+ }
+
+ protected SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, String alignmentId) {
+ this(header, accession, readId, true);
+
+ this.sraAlignmentId = alignmentId;
+ }
+
+ protected SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, int unalignedReadFragmentIndex) {
+ this(header, accession, readId, false);
+
+ this.unalignedReadFragmentIndex = unalignedReadFragmentIndex;
+ }
+
+ private SRALazyRecord(final SAMFileHeader header, SRAAccession accession, String readId, boolean isAligned) {
+ super(header);
+
+ this.accession = accession;
+ this.isAligned = isAligned;
+ this.sraReadId = readId;
+ setReadName(readId);
+ setReadUnmappedFlag(!isAligned);
+ }
+
+ /**
+ * Is being called when original NGS iterator is being moved to the next object.
+ * Later, if any of uninitialized fields is requested, either Read object or Alignment has to be retrieved from
+ * ReadCollection
+ */
+ public void detachFromIterator() {
+ alignmentIterator = null;
+ unalignmentIterator = null;
+ }
+
+ // ===== fields =====
+
+ @Override
+ public int getAlignmentStart() {
+ if (!initializedFields.contains(LazyField.ALIGNMENT_START)) {
+ setAlignmentStart(getAlignmentStartImpl());
+ }
+ return super.getAlignmentStart();
+ }
+
+ @Override
+ public void setAlignmentStart(final int value) {
+ if (!initializedFields.contains(LazyField.ALIGNMENT_START)) {
+ initializedFields.add(LazyField.ALIGNMENT_START);
+ }
+ super.setAlignmentStart(value);
+ }
+
+ @Override
+ public int getMappingQuality() {
+ if (!initializedFields.contains(LazyField.MAPPING_QUALITY)) {
+ setMappingQuality(getMappingQualityImpl());
+ }
+ return super.getMappingQuality();
+ }
+
+ @Override
+ public void setMappingQuality(final int value) {
+ if (!initializedFields.contains(LazyField.MAPPING_QUALITY)) {
+ initializedFields.add(LazyField.MAPPING_QUALITY);
+ }
+ super.setMappingQuality(value);
+ }
+
+ @Override
+ public String getReferenceName() {
+ if (!initializedFields.contains(LazyField.REFERENCE_NAME)) {
+ setReferenceName(getReferenceNameImpl());
+ }
+ return super.getReferenceName();
+ }
+
+ @Override
+ public void setReferenceName(final String value) {
+ if (!initializedFields.contains(LazyField.REFERENCE_NAME)) {
+ initializedFields.add(LazyField.REFERENCE_NAME);
+ }
+ super.setReferenceName(value);
+ }
+
+ @Override
+ public Integer getReferenceIndex() {
+ if (!initializedFields.contains(LazyField.REFERENCE_NAME)) {
+ setReferenceName(getReferenceNameImpl());
+ }
+ return super.getReferenceIndex();
+ }
+
+ @Override
+ public void setReferenceIndex(final int value) {
+ if (!initializedFields.contains(LazyField.REFERENCE_NAME)) {
+ initializedFields.add(LazyField.REFERENCE_NAME);
+ }
+ super.setReferenceIndex(value);
+ }
+
+ @Override
+ public String getCigarString() {
+ if (!initializedFields.contains(LazyField.CIGAR_STRING)) {
+ setCigarString(getCigarStringImpl());
+ }
+ return super.getCigarString();
+ }
+
+ @Override
+ public void setCigarString(final String value) {
+ if (!initializedFields.contains(LazyField.CIGAR_STRING)) {
+ initializedFields.add(LazyField.CIGAR_STRING);
+ }
+ super.setCigarString(value);
+ }
+
+ @Override
+ public Cigar getCigar() {
+ if (!initializedFields.contains(LazyField.CIGAR_STRING)) {
+ setCigarString(getCigarStringImpl());
+ }
+ return super.getCigar();
+ }
+
+ @Override
+ public void setCigar(final Cigar value) {
+ if (!initializedFields.contains(LazyField.CIGAR_STRING)) {
+ initializedFields.add(LazyField.CIGAR_STRING);
+ }
+ super.setCigar(value);
+ }
+
+ @Override
+ public byte[] getReadBases() {
+ if (!initializedFields.contains(LazyField.BASES)) {
+ setReadBases(getReadBasesImpl());
+ }
+ return super.getReadBases();
+ }
+
+ @Override
+ public void setReadBases(final byte[] value) {
+ if (!initializedFields.contains(LazyField.BASES)) {
+ initializedFields.add(LazyField.BASES);
+ }
+ super.setReadBases(value);
+ }
+
+ @Override
+ public byte[] getBaseQualities() {
+ if (!initializedFields.contains(LazyField.QUALS)) {
+ setBaseQualities(getBaseQualitiesImpl());
+ }
+ return super.getBaseQualities();
+ }
+
+ @Override
+ public void setBaseQualities(final byte[] value) {
+ if (!initializedFields.contains(LazyField.QUALS)) {
+ initializedFields.add(LazyField.QUALS);
+ }
+ super.setBaseQualities(value);
+ }
+
+ @Override
+ public int getMateAlignmentStart() {
+ if (!initializedFields.contains(LazyField.MATE_ALIGNMENT_START)) {
+ setMateAlignmentStart(getMateAlignmentStartImpl());
+ }
+ return super.getMateAlignmentStart();
+ }
+
+ @Override
+ public void setMateAlignmentStart(final int value) {
+ if (!initializedFields.contains(LazyField.MATE_ALIGNMENT_START)) {
+ initializedFields.add(LazyField.MATE_ALIGNMENT_START);
+ }
+ super.setMateAlignmentStart(value);
+ }
+
+ @Override
+ public String getMateReferenceName() {
+ if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) {
+ setMateReferenceName(getMateReferenceNameImpl());
+ }
+ return super.getMateReferenceName();
+ }
+
+ @Override
+ public void setMateReferenceName(final String value) {
+ if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) {
+ initializedFields.add(LazyField.MATE_REFERENCE_NAME);
+ }
+ super.setMateReferenceName(value);
+ }
+
+ @Override
+ public Integer getMateReferenceIndex() {
+ if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) {
+ setMateReferenceName(getMateReferenceNameImpl());
+ }
+ return super.getMateReferenceIndex();
+ }
+
+ @Override
+ public void setMateReferenceIndex(final int value) {
+ if (!initializedFields.contains(LazyField.MATE_REFERENCE_NAME)) {
+ initializedFields.add(LazyField.MATE_REFERENCE_NAME);
+ }
+ super.setMateReferenceIndex(value);
+ }
+
+ @Override
+ public int getInferredInsertSize() {
+ if (!initializedFields.contains(LazyField.INFERRED_INSERT_SIZE)) {
+ setInferredInsertSize(getInferredInsertSizeImpl());
+ }
+ return super.getInferredInsertSize();
+ }
+
+ @Override
+ public void setInferredInsertSize(final int value) {
+ if (!initializedFields.contains(LazyField.INFERRED_INSERT_SIZE)) {
+ initializedFields.add(LazyField.INFERRED_INSERT_SIZE);
+ }
+ super.setInferredInsertSize(value);
+ }
+
+ // ===== flags =====
+
+ @Override
+ public int getFlags() {
+ for (LazyFlag flag : LazyFlag.values()) {
+ if (initializedFlags.contains(flag)) {
+ continue;
+ }
+
+ if (flag.canCallOnNotPaired() || getReadPairedFlag()) {
+ flag.getFlag(this);
+ }
+ }
+
+ return super.getFlags();
+ }
+
+ @Override
+ public void setFlags(final int value) {
+ for (LazyFlag flag : LazyFlag.values()) {
+ if (!initializedFlags.contains(flag)) {
+ initializedFlags.add(flag);
+ }
+ }
+ super.setFlags(value);
+ }
+
+ @Override
+ public boolean getReadNegativeStrandFlag() {
+ if (!initializedFlags.contains(LazyFlag.READ_NEGATIVE_STRAND)) {
+ setReadNegativeStrandFlag(getReadNegativeStrandFlagImpl());
+ }
+ return super.getReadNegativeStrandFlag();
+ }
+
+ @Override
+ public void setReadNegativeStrandFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.READ_NEGATIVE_STRAND)) {
+ initializedFlags.add(LazyFlag.READ_NEGATIVE_STRAND);
+ }
+ super.setReadNegativeStrandFlag(flag);
+ }
+
+ @Override
+ public boolean getReadPairedFlag() {
+ if (!initializedFlags.contains(LazyFlag.READ_PAIRED)) {
+ setReadPairedFlag(getReadPairedFlagImpl());
+ }
+ return super.getReadPairedFlag();
+ }
+
+ @Override
+ public void setReadPairedFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.READ_PAIRED)) {
+ initializedFlags.add(LazyFlag.READ_PAIRED);
+ }
+ super.setReadPairedFlag(flag);
+ }
+
+ @Override
+ public boolean getProperPairFlag() {
+ if (!initializedFlags.contains(LazyFlag.PROPER_PAIR)) {
+ setProperPairFlag(getProperPairFlagImpl());
+ }
+ return super.getProperPairFlag();
+ }
+
+ @Override
+ public void setProperPairFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.PROPER_PAIR)) {
+ initializedFlags.add(LazyFlag.PROPER_PAIR);
+ }
+ super.setProperPairFlag(flag);
+ }
+
+ @Override
+ public boolean getNotPrimaryAlignmentFlag() {
+ if (!initializedFlags.contains(LazyFlag.NOT_PRIMARY_ALIGNMENT)) {
+ setNotPrimaryAlignmentFlag(getNotPrimaryAlignmentFlagImpl());
+ }
+ return super.getNotPrimaryAlignmentFlag();
+ }
+
+ @Override
+ public void setNotPrimaryAlignmentFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.NOT_PRIMARY_ALIGNMENT)) {
+ initializedFlags.add(LazyFlag.NOT_PRIMARY_ALIGNMENT);
+ }
+ super.setNotPrimaryAlignmentFlag(flag);
+ }
+
+ @Override
+ public boolean getMateNegativeStrandFlag() {
+ if (!initializedFlags.contains(LazyFlag.MATE_NEGATIVE_STRAND)) {
+ setMateNegativeStrandFlag(getMateNegativeStrandFlagImpl());
+ }
+ return super.getMateNegativeStrandFlag();
+ }
+
+ @Override
+ public void setMateNegativeStrandFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.MATE_NEGATIVE_STRAND)) {
+ initializedFlags.add(LazyFlag.MATE_NEGATIVE_STRAND);
+ }
+ super.setMateNegativeStrandFlag(flag);
+ }
+
+ @Override
+ public boolean getMateUnmappedFlag() {
+ if (!initializedFlags.contains(LazyFlag.MATE_UNMAPPED)) {
+ setMateUnmappedFlag(getMateUnmappedFlagImpl());
+ }
+ return super.getMateUnmappedFlag();
+ }
+
+ @Override
+ public void setMateUnmappedFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.MATE_UNMAPPED)) {
+ initializedFlags.add(LazyFlag.MATE_UNMAPPED);
+ }
+ super.setMateUnmappedFlag(flag);
+ }
+
+ @Override
+ public boolean getFirstOfPairFlag() {
+ if (!initializedFlags.contains(LazyFlag.FIRST_OF_PAIR)) {
+ setFirstOfPairFlag(getFirstOfPairFlagImpl());
+ }
+ return super.getFirstOfPairFlag();
+ }
+
+ @Override
+ public void setFirstOfPairFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.FIRST_OF_PAIR)) {
+ initializedFlags.add(LazyFlag.FIRST_OF_PAIR);
+ }
+ super.setFirstOfPairFlag(flag);
+ }
+
+ @Override
+ public boolean getSecondOfPairFlag() {
+ if (!initializedFlags.contains(LazyFlag.SECOND_OF_PAIR)) {
+ setSecondOfPairFlag(getSecondOfPairFlagImpl());
+ }
+ return super.getSecondOfPairFlag();
+ }
+
+ @Override
+ public void setSecondOfPairFlag(final boolean flag) {
+ if (!initializedFlags.contains(LazyFlag.SECOND_OF_PAIR)) {
+ initializedFlags.add(LazyFlag.SECOND_OF_PAIR);
+ }
+ super.setSecondOfPairFlag(flag);
+ }
+
+
+ // ===== attributes =====
+
+ @Override
+ public Object getAttribute(final short tag) {
+ LazyAttribute attr = lazyAttributeTags.get(tag);
+ if (attr != null) {
+ if (!initializedAttributes.contains(attr)) {
+ setAttribute(tag, attr.getAttribute(this));
+ }
+ }
+ return super.getAttribute(tag);
+ }
+
+ @Override
+ public void setAttribute(final short tag, final Object value) {
+ LazyAttribute attr = lazyAttributeTags.get(tag);
+ if (attr != null && !initializedAttributes.contains(attr)) {
+ initializedAttributes.add(attr);
+ }
+ super.setAttribute(tag, value);
+ }
+
+ @Override
+ protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
+ LazyAttribute attr = lazyAttributeTags.get(tag);
+ if (attr != null && !initializedAttributes.contains(attr)) {
+ initializedAttributes.add(attr);
+ }
+ super.setAttribute(tag, value, isUnsignedArray);
+ }
+
+ @Override
+ public void clearAttributes() {
+ for (LazyAttribute lazyAttribute : LazyAttribute.values()) {
+ if (!initializedAttributes.contains(lazyAttribute)) {
+ initializedAttributes.add(lazyAttribute);
+ }
+ }
+ super.clearAttributes();
+ }
+
+ @Override
+ protected void setAttributes(final SAMBinaryTagAndValue attributes) {
+ for (LazyAttribute lazyAttribute : LazyAttribute.values()) {
+ if (!initializedAttributes.contains(lazyAttribute)) {
+ initializedAttributes.add(lazyAttribute);
+ }
+ }
+ super.setAttributes(attributes);
+ }
+
+ @Override
+ protected SAMBinaryTagAndValue getBinaryAttributes() {
+ for (Map.Entry<Short, LazyAttribute> info : lazyAttributeTags.entrySet()) {
+ if (!initializedAttributes.contains(info.getValue())) {
+ getAttribute(info.getKey());
+ }
+ }
+
+ return super.getBinaryAttributes();
+ }
+
+ public boolean isUnsignedArrayAttribute(final String tag) {
+ Short binaryTag = SAMTagUtil.getSingleton().makeBinaryTag(tag);
+ LazyAttribute attr = lazyAttributeTags.get(binaryTag);
+ if (attr != null && !initializedAttributes.contains(attr)) {
+ getAttribute(binaryTag);
+ }
+
+ return super.isUnsignedArrayAttribute(tag);
+ }
+
+ // ===== misc ====
+
+ /**
+ * For records equality, we should only compare read id, reference and position on the reference.
+ * Since read id is a constructor parameter, we only need to make sure that reference info is loaded.
+ * @param o other
+ * @return comparison result
+ */
+ @Override
+ public boolean equals(final Object o) {
+ if (o instanceof SRALazyRecord) {
+ SRALazyRecord otherRecord = (SRALazyRecord)o;
+ otherRecord.getReferenceIndex();
+ otherRecord.getAlignmentStart();
+ }
+
+ getReferenceIndex();
+ getAlignmentStart();
+
+ return super.equals(o);
+ }
+
+ /**
+ * The same approach as with 'equals' method. We only load reference and position.
+ */
+ @Override
+ public int hashCode() {
+ getReferenceIndex();
+ getAlignmentStart();
+
+ return super.hashCode();
+ }
+
+ /**
+ * Performs a deep copy of the SAMRecord and detaches a copy from NGS iterator
+ * @return new object
+ * @throws CloneNotSupportedException
+ */
+ @Override
+ public Object clone() throws CloneNotSupportedException {
+ SRALazyRecord newObject = (SRALazyRecord)super.clone();
+ newObject.initializedFields = EnumSet.copyOf(this.initializedFields);
+ newObject.initializedFlags = EnumSet.copyOf(this.initializedFlags);
+ newObject.initializedAttributes = EnumSet.copyOf(this.initializedAttributes);
+ newObject.detachFromIterator();
+
+ return newObject;
+ }
+
+ @Override
+ public String format() {
+ if (!initializedAttributes.contains(LazyAttribute.RG)) {
+ getAttribute("RG");
+ }
+ return super.format();
+ }
+
+ @Override
+ public List<SAMValidationError> isValid(final boolean firstOnly) {
+ loadFields();
+ getFlags();
+ getBinaryAttributes();
+
+ return super.isValid(firstOnly);
+ }
+
+ // =============================== Implementation ========================================
+
+ private ReadCollection getReadCollection() {
+ if (run != null) {
+ return run;
+ }
+
+ log.debug("Recovering SRA read collection. Accession: " + accession);
+ try {
+ return run = NGS.openReadCollection(accession.toString());
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private Alignment getCurrentAlignment() throws ErrorMsg {
+ if (!isAligned) {
+ throw new RuntimeException("Should be called for aligned records only");
+ }
+
+ if (alignmentIterator == null) {
+ log.debug("Recovering SAM record after detaching from iterator. Alignment id: " + sraAlignmentId);
+ if (sraAlignmentId == null) {
+ throw new RuntimeException("Cannot recover SAM object after detaching from iterator: no alignment id");
+ }
+
+ alignmentIterator = getReadCollection().getAlignment(sraAlignmentId);
+ }
+ return alignmentIterator;
+ }
+
+ private Read getCurrentUnalignedRead() throws ErrorMsg {
+ if (isAligned) {
+ throw new RuntimeException("Should be called for unaligned records only");
+ }
+
+ if (unalignmentIterator == null) {
+ log.debug("Recovering SAM record after detaching from iterator. Read id: " + sraReadId + ", fragment index: " + unalignedReadFragmentIndex);
+ if (sraReadId == null) {
+ throw new RuntimeException("Cannot recover SAM object after detaching from iterator: no read id");
+ }
+
+ Read read = getReadCollection().getRead(sraReadId);
+ for (int i = 0; i < unalignedReadFragmentIndex + 1; i++) {
+ read.nextFragment();
+ }
+
+ unalignmentIterator = read;
+ }
+ return unalignmentIterator;
+ }
+
+ // ===== fields =====
+
+ private void loadFields() {
+ for (LazyField field : LazyField.values()) {
+ if (initializedFields.contains(field)) {
+ continue;
+ }
+
+ field.loadValue(this);
+ }
+ }
+
+ private int getAlignmentStartImpl() {
+ try {
+ if (isAligned) {
+ return (int) getCurrentAlignment().getAlignmentPosition() + 1;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return SAMRecord.NO_ALIGNMENT_START;
+ }
+
+ private int getMappingQualityImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getMappingQuality();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return SAMRecord.NO_MAPPING_QUALITY;
+ }
+
+ private String getReferenceNameImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getReferenceSpec();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return SAMRecord.NO_ALIGNMENT_REFERENCE_NAME;
+ }
+
+ private String getCigarStringImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getShortCigar(false);
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return SAMRecord.NO_ALIGNMENT_CIGAR;
+ }
+
+ private byte[] getReadBasesImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getAlignedFragmentBases().getBytes();
+ } else {
+ return getCurrentUnalignedRead().getFragmentBases().getBytes();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private byte[] getBaseQualitiesImpl() {
+ try {
+ Fragment fragment;
+ if (isAligned) {
+ fragment = getCurrentAlignment();
+ } else {
+ fragment = getCurrentUnalignedRead();
+ }
+
+ // quals are being taken from PRIMARY_ALIGNMENT.SAM_QUALITY column which reverse automatically them if needed
+ return SAMUtils.fastqToPhred(fragment.getFragmentQualities());
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private int getMateAlignmentStartImpl() {
+ try {
+ if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) {
+ Alignment mate = getCurrentAlignment().getMateAlignment();
+ return (int) mate.getAlignmentPosition() + 1;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return SAMRecord.NO_ALIGNMENT_START;
+ }
+
+ private String getMateReferenceNameImpl() {
+ try {
+ if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) {
+ return getCurrentAlignment().getMateReferenceSpec();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return SAMRecord.NO_ALIGNMENT_REFERENCE_NAME;
+ }
+
+ private int getInferredInsertSizeImpl() {
+ try {
+ if (isAligned) {
+ return (int) getCurrentAlignment().getTemplateLength();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return 0;
+ }
+
+ // ===== flags =====
+
+ private boolean getReadNegativeStrandFlagImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getIsReversedOrientation();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ return false;
+ }
+
+ private boolean getReadPairedFlagImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().isPaired();
+ } else {
+ return getCurrentUnalignedRead().getNumFragments() > 1;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private boolean getProperPairFlagImpl() {
+ return isAligned && getReadPairedFlag() && !getMateUnmappedFlag();
+ }
+
+ private boolean getNotPrimaryAlignmentFlagImpl() {
+ try {
+ if (isAligned) {
+ return getCurrentAlignment().getAlignmentCategory() == Alignment.secondaryAlignment;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+
+ return false;
+ }
+
+ private boolean getMateNegativeStrandFlagImpl() {
+ try {
+ if (isAligned && getReadPairedFlag() && !getMateUnmappedFlag()) {
+ Alignment mate = getCurrentAlignment().getMateAlignment();
+ return mate.getIsReversedOrientation();
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+
+ return false;
+ }
+
+ private boolean getMateUnmappedFlagImpl() {
+ try {
+ if (isAligned) {
+ return !getCurrentAlignment().hasMate();
+ } else {
+ Read unalignedRead = getCurrentUnalignedRead();
+ int numFragments = unalignedRead.getNumFragments();
+ int nextFragmentIdx = unalignedReadFragmentIndex + 1;
+ if (nextFragmentIdx == numFragments) {
+ nextFragmentIdx = 0;
+ }
+
+ return unalignedRead.fragmentIsAligned(nextFragmentIdx);
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private boolean getFirstOfPairFlagImpl() {
+ if (!getReadPairedFlag()) {
+ return false;
+ }
+ try {
+ if (isAligned) {
+ String fragmentId = getCurrentAlignment().getFragmentId();
+ if (!fragmentId.contains(".FA")) {
+ throw new RuntimeException("Invalid fragment id: " + fragmentId);
+ }
+
+ return fragmentId.contains(".FA0.");
+ } else {
+ return unalignedReadFragmentIndex == 0;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private boolean getSecondOfPairFlagImpl() {
+ if (!getReadPairedFlag()) {
+ return false;
+ }
+ try {
+ if (isAligned) {
+ String fragmentId = getCurrentAlignment().getFragmentId();
+ if (!fragmentId.contains(".FA")) {
+ throw new RuntimeException("Invalid fragment id: " + fragmentId);
+ }
+
+ return !fragmentId.contains(".FA0.");
+ } else {
+ return unalignedReadFragmentIndex != 0;
+ }
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ // ===== attributes =====
+
+ private String getAttributeGroupNameImpl() {
+ try {
+ String readGroupName;
+ if (isAligned) {
+ readGroupName = getCurrentAlignment().getReadGroup();
+ } else {
+ readGroupName = getCurrentUnalignedRead().getReadGroup();
+ }
+
+ if (!readGroupName.isEmpty()) {
+ return readGroupName;
+ }
+ return getReadCollection().getName();
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/src/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java b/src/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java
new file mode 100644
index 0000000..f128a2b
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRAUnalignmentIterator.java
@@ -0,0 +1,181 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+package htsjdk.samtools.sra;
+
+
+import htsjdk.samtools.Chunk;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SRAIterator;
+import htsjdk.samtools.ValidationStringency;
+import ngs.ErrorMsg;
+import ngs.Read;
+import ngs.ReadCollection;
+import ngs.ReadIterator;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Iterator for unaligned reads.
+ * Is used from SRAIterator.
+ *
+ * Created by andrii.nikitiuk on 9/3/15.
+ */
+public class SRAUnalignmentIterator implements Iterator<SAMRecord> {
+ private ValidationStringency validationStringency;
+
+ private SRAAccession accession;
+ private ReadCollection run;
+ private SAMFileHeader header;
+ private SRAIterator.RecordRangeInfo recordRangeInfo;
+
+ private ReadIterator unalignedIterator;
+ private boolean hasMoreUnalignedReads = true;
+ private Boolean hasMoreUnalignedFragments = false;
+ private int lastUnalignedFragmentIndex;
+
+ private SRALazyRecord lastRecord;
+
+ /**
+ *
+ * @param run opened read collection
+ * @param header sam header
+ * @param recordRangeInfo info about record ranges withing SRA archive
+ * @param chunk used to determine which unaligned reads the iterator should return
+ */
+ public SRAUnalignmentIterator(SRAAccession accession, final ReadCollection run, final SAMFileHeader header, SRAIterator.RecordRangeInfo recordRangeInfo, Chunk chunk) {
+ this.accession = accession;
+ this.run = run;
+ this.header = header;
+ this.recordRangeInfo = recordRangeInfo;
+
+ long readStart = chunk.getChunkStart() - recordRangeInfo.getTotalReferencesLength();
+ if (readStart < 0) {
+ readStart = 0;
+ } else if (readStart >= recordRangeInfo.getNumberOfReads()) {
+ throw new RuntimeException("Invalid chunk provided: chunkStart position is after last read");
+ }
+
+ long readEnd = chunk.getChunkEnd() - recordRangeInfo.getTotalReferencesLength();
+ if (readEnd > recordRangeInfo.getNumberOfReads()) {
+ readEnd = recordRangeInfo.getNumberOfReads();
+ } else if (readEnd <= 0) {
+ throw new RuntimeException("Invalid chunk provided: chunkEnd position is before last read");
+ }
+
+ try {
+ unalignedIterator = run.getReadRange(readStart + 1, readEnd - readStart, Read.partiallyAligned | Read.unaligned);
+ nextUnalignedFragment();
+
+ } catch (final Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ // check unaligned
+ if (hasMoreUnalignedFragments == null) {
+ try {
+ lastRecord.detachFromIterator();
+ nextUnalignedFragment();
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return hasMoreUnalignedFragments;
+ }
+
+ @Override
+ public SAMRecord next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException("No more alignments are available");
+ }
+
+ return nextUnalignment();
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("Removal of records not implemented.");
+ }
+
+ public void setValidationStringency(ValidationStringency validationStringency) {
+ this.validationStringency = validationStringency;
+ }
+
+ private SAMRecord nextUnalignment() {
+ try {
+ lastRecord = new SRALazyRecord(header, accession, run, unalignedIterator, unalignedIterator.getReadId(), lastUnalignedFragmentIndex);
+ } catch (ErrorMsg e) {
+ throw new RuntimeException(e);
+ }
+
+ if (validationStringency != null) {
+ lastRecord.setValidationStringency(validationStringency);
+ }
+
+ hasMoreUnalignedFragments = null;
+
+ return lastRecord;
+ }
+
+ private void nextUnalignedFragment() throws ErrorMsg {
+ while (hasMoreUnalignedFragments == null || hasMoreUnalignedFragments) {
+ hasMoreUnalignedFragments = unalignedIterator.nextFragment();
+ lastUnalignedFragmentIndex++;
+
+ if (hasMoreUnalignedFragments && !unalignedIterator.isAligned()) {
+ return;
+ }
+ }
+
+ if (!hasMoreUnalignedReads) {
+ throw new RuntimeException("Cannot get next unaligned read - already at last one");
+ }
+
+ while (true) {
+ hasMoreUnalignedReads = unalignedIterator.nextRead();
+ lastUnalignedFragmentIndex = -1;
+ if (!hasMoreUnalignedReads) {
+ break;
+ }
+
+ // search for unaligned fragment
+ do {
+ hasMoreUnalignedFragments = unalignedIterator.nextFragment();
+ lastUnalignedFragmentIndex++;
+ } while (hasMoreUnalignedFragments && unalignedIterator.isAligned());
+
+ // means that we found fragment
+ if (hasMoreUnalignedFragments) {
+ return;
+ }
+ }
+ }
+}
diff --git a/src/java/htsjdk/samtools/sra/SRAUtils.java b/src/java/htsjdk/samtools/sra/SRAUtils.java
new file mode 100644
index 0000000..e72caa8
--- /dev/null
+++ b/src/java/htsjdk/samtools/sra/SRAUtils.java
@@ -0,0 +1,83 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+package htsjdk.samtools.sra;
+
+import ngs.ErrorMsg;
+import ngs.Read;
+import ngs.ReadCollection;
+import ngs.ReferenceIterator;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Provides some functionality which can be used by other classes
+ *
+ * Created by andrii.nikitiuk on 10/28/15.
+ */
+public class SRAUtils {
+ /**
+ * References are stored in SRA table in chunks of 5k bases per row, while last chunk of a reference is less or
+ * equal than 5k bases in size (even if the next reference follows).
+ * So, it will be optimal if we align reference sizes to 5k bases to read by reference rows.
+ */
+ public static final int REFERENCE_ALIGNMENT = 5000;
+
+ /**
+ * Is used to build RecordRangeInfo
+ * @param run open read collection
+ * @return total number of reads (both aligned and unaligned) in SRA archive
+ * @throws ErrorMsg
+ */
+ public static long getNumberOfReads(ReadCollection run) throws ErrorMsg {
+ return run.getReadCount(Read.all);
+ }
+
+ /**
+ * Loads reference lengths from a read collection.
+ * Aligns reference lengths by REFERENCE_ALIGNMENT bases for optimal loads of alignments
+ * (references are stored in REFERENCE_ALIGNMENT bases chunks in SRA table)
+ *
+ * Is used to build RecordRangeInfo
+ * @param run single opened read collection
+ * @return list with references lengths
+ * @throws ErrorMsg
+ */
+ public static List<Long> getReferencesLengthsAligned(ReadCollection run) throws ErrorMsg {
+ ReferenceIterator refIt = run.getReferences();
+ List<Long> lengths = new ArrayList<Long>();
+ while (refIt.nextReference()) {
+ long refLen = refIt.getLength();
+ // lets optimize references so they always align in 5000 bases positions
+ if (refLen % REFERENCE_ALIGNMENT != 0) {
+ refLen += REFERENCE_ALIGNMENT - (refLen % REFERENCE_ALIGNMENT);
+ }
+ lengths.add(refLen);
+ }
+ return lengths;
+ }
+}
diff --git a/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java b/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java
index 5088890..bd2f654 100644
--- a/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java
+++ b/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java
@@ -64,7 +64,7 @@ public abstract class AbstractAsyncWriter<T> implements Closeable {
if (!this.isClosed.getAndSet(true)) {
try {
- this.writer.interrupt(); // signal to writer clean up
+ if (this.queue.isEmpty()) this.writer.interrupt(); // signal to writer clean up
this.writer.join();
} catch (final InterruptedException ie) {
throw new RuntimeException("Interrupted waiting on writer thread.", ie);
diff --git a/src/java/htsjdk/samtools/util/AbstractProgressLogger.java b/src/java/htsjdk/samtools/util/AbstractProgressLogger.java
index 9bc2dc7..5bd5e92 100644
--- a/src/java/htsjdk/samtools/util/AbstractProgressLogger.java
+++ b/src/java/htsjdk/samtools/util/AbstractProgressLogger.java
@@ -76,7 +76,7 @@ abstract public class AbstractProgressLogger implements ProgressLoggerInterface
*/
@Override
public synchronized boolean record(final SAMRecord rec) {
- if (rec.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+ if (SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(rec.getReferenceName())) {
return record(null, 0);
}
else {
diff --git a/src/java/htsjdk/samtools/util/BinaryCodec.java b/src/java/htsjdk/samtools/util/BinaryCodec.java
index aaf69bf..843c128 100644
--- a/src/java/htsjdk/samtools/util/BinaryCodec.java
+++ b/src/java/htsjdk/samtools/util/BinaryCodec.java
@@ -79,9 +79,9 @@ public class BinaryCodec implements Closeable {
private static final ByteOrder LITTLE_ENDIAN = ByteOrder.LITTLE_ENDIAN;
private static final byte NULL_BYTE[] = {0};
- private static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1;
- private static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1;
- private static final long MAX_UINT = ((long)Integer.MAX_VALUE * 2) + 1;
+ public static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1;
+ public static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1;
+ public static final long MAX_UINT = ((long)Integer.MAX_VALUE * 2) + 1;
// We never serialize more than this much at a time (except for Strings)
private static final int MAX_BYTE_BUFFER = 8;
@@ -101,10 +101,10 @@ public class BinaryCodec implements Closeable {
try {
this.isWriting = writing;
if (this.isWriting) {
- this.outputStream = new FileOutputStream(file);
+ this.outputStream = IOUtil.maybeBufferOutputStream(new FileOutputStream(file));
this.outputFileName = file.getName();
} else {
- this.inputStream = new FileInputStream(file);
+ this.inputStream = IOUtil.maybeBufferInputStream(new FileInputStream(file));
this.inputFileName = file.getName();
}
} catch (FileNotFoundException e) {
diff --git a/src/java/htsjdk/samtools/util/DiskBackedQueue.java b/src/java/htsjdk/samtools/util/DiskBackedQueue.java
index 0af6818..fd07f68 100644
--- a/src/java/htsjdk/samtools/util/DiskBackedQueue.java
+++ b/src/java/htsjdk/samtools/util/DiskBackedQueue.java
@@ -25,6 +25,7 @@
package htsjdk.samtools.util;
import htsjdk.samtools.Defaults;
+import htsjdk.samtools.SAMException;
import java.io.File;
import java.io.FileInputStream;
@@ -100,13 +101,13 @@ public class DiskBackedQueue<E> implements Queue<E> {
* Syntactic sugar around the ctor, to save some typing of type parameters
*
* @param codec For writing records to file and reading them back into RAM
- * @param maxRecordsInRAM how many records to accumulate in memory before spilling to disk
+ * @param maxRecordsInRam how many records to accumulate in memory before spilling to disk
* @param tmpDir Where to write files of records that will not fit in RAM
*/
public static <T> DiskBackedQueue<T> newInstance(final SortingCollection.Codec<T> codec,
- final int maxRecordsInRAM,
+ final int maxRecordsInRam,
final List<File> tmpDir) {
- return new DiskBackedQueue<T>(codec, maxRecordsInRAM, tmpDir);
+ return new DiskBackedQueue<T>(codec, maxRecordsInRam, tmpDir);
}
public boolean canAdd() {
@@ -135,12 +136,14 @@ public class DiskBackedQueue<E> implements Queue<E> {
// NB: we add all the records before removing them, so we can never have spilled to disk unless all the space for ram records
// have been exhausted.
if (this.headRecord == null) { // this is the first record in the queue
+ if (0 < this.numRecordsOnDisk) throw new SAMException("Head record was null but we have records on disk. Bug!");
this.headRecord = record;
}
else if (this.ramRecords.size() == this.maxRecordsInRamQueue) {
spillToDisk(record);
}
else {
+ if (0 < this.numRecordsOnDisk) throw new SAMException("Trying to add records to RAM but there were records on disk. Bug!");
this.ramRecords.add(record);
}
return true;
@@ -274,11 +277,14 @@ public class DiskBackedQueue<E> implements Queue<E> {
private void updateQueueHead() {
if (!this.ramRecords.isEmpty()) {
this.headRecord = this.ramRecords.poll();
+ if (0 < numRecordsOnDisk) this.canAdd = false;
}
else if (this.diskRecords != null) {
this.headRecord = this.readFileRecord(this.diskRecords);
+ this.canAdd = false;
}
else {
+ this.canAdd = true;
this.headRecord = null;
}
}
diff --git a/src/java/htsjdk/samtools/util/Histogram.java b/src/java/htsjdk/samtools/util/Histogram.java
index 4ebbdbd..f69408c 100644
--- a/src/java/htsjdk/samtools/util/Histogram.java
+++ b/src/java/htsjdk/samtools/util/Histogram.java
@@ -45,7 +45,6 @@ import static java.lang.Math.*;
public class Histogram<K extends Comparable> extends TreeMap<K, Bin> {
private String binLabel = "BIN";
private String valueLabel = "VALUE";
- private Double mean;
/** Constructs a new Histogram with default bin and value labels. */
public Histogram() { }
@@ -73,7 +72,6 @@ public class Histogram<K extends Comparable> extends TreeMap<K, Bin> {
super(in);
this.binLabel = in.binLabel;
this.valueLabel = in.valueLabel;
- this.mean = in.mean;
}
/** Represents a bin in the Histogram. */
@@ -146,7 +144,6 @@ public class Histogram<K extends Comparable> extends TreeMap<K, Bin> {
}
bin.value += increment;
- mean = null;
}
public String getBinLabel() { return binLabel; }
@@ -164,12 +161,23 @@ public class Histogram<K extends Comparable> extends TreeMap<K, Bin> {
super.equals(o);
}
+ /**
+ * Assuming that the key type for the histogram is a Number type, returns the mean of
+ * all the items added to the histogram.
+ */
public double getMean() {
- if (mean == null) {
- mean = getSum() / getCount();
+ // Could use simply getSum() / getCount(), but that would require iterating over the
+ // values() set twice, which seems inefficient given how simply the computation is.
+ double product=0, totalCount=0;
+ for (final Bin bin : values()) {
+ final double idValue = bin.getIdValue();
+ final double count = bin.getValue();
+
+ product += idValue * count;
+ totalCount += count;
}
- return mean;
+ return product / totalCount;
}
/**
diff --git a/src/java/htsjdk/samtools/util/IOUtil.java b/src/java/htsjdk/samtools/util/IOUtil.java
index 9e7427a..199c6d1 100644
--- a/src/java/htsjdk/samtools/util/IOUtil.java
+++ b/src/java/htsjdk/samtools/util/IOUtil.java
@@ -50,6 +50,8 @@ import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -320,16 +322,26 @@ public class IOUtil {
* @param file the file to check for readability
*/
public static void assertFileIsReadable(final File file) {
- if (file == null) {
- throw new IllegalArgumentException("Cannot check readability of null file.");
- } else if (!file.exists()) {
- throw new SAMException("Cannot read non-existent file: " + file.getAbsolutePath());
+ assertFileIsReadable(file == null ? null : file.toPath());
+ }
+
+ /**
+ * Checks that a file is non-null, exists, is not a directory and is readable. If any
+ * condition is false then a runtime exception is thrown.
+ *
+ * @param path the file to check for readability
+ */
+ public static void assertFileIsReadable(final Path path) {
+ if (path == null) {
+ throw new IllegalArgumentException("Cannot check readability of null file.");
+ } else if (!Files.exists(path)) {
+ throw new SAMException("Cannot read non-existent file: " + path.toAbsolutePath());
}
- else if (file.isDirectory()) {
- throw new SAMException("Cannot read file because it is a directory: " + file.getAbsolutePath());
+ else if (Files.isDirectory(path)) {
+ throw new SAMException("Cannot read file because it is a directory: " + path.toAbsolutePath());
}
- else if (!file.canRead()) {
- throw new SAMException("File exists but is not readable: " + file.getAbsolutePath());
+ else if (!Files.isReadable(path)) {
+ throw new SAMException("File exists but is not readable: " + path.toAbsolutePath());
}
}
@@ -487,18 +499,28 @@ public class IOUtil {
* @return the input stream to read from
*/
public static InputStream openFileForReading(final File file) {
+ return openFileForReading(file.toPath());
+ }
+
+ /**
+ * Opens a file for reading, decompressing it if necessary
+ *
+ * @param path The file to open
+ * @return the input stream to read from
+ */
+ public static InputStream openFileForReading(final Path path) {
try {
- if (file.getName().endsWith(".gz") ||
- file.getName().endsWith(".bfq")) {
- return openGzipFileForReading(file);
+ if (path.getFileName().toString().endsWith(".gz") ||
+ path.getFileName().toString().endsWith(".bfq")) {
+ return openGzipFileForReading(path);
}
else {
- return new FileInputStream(file);
+ return Files.newInputStream(path);
}
}
catch (IOException ioe) {
- throw new SAMException("Error opening file: " + file.getName(), ioe);
+ throw new SAMException("Error opening file: " + path, ioe);
}
}
@@ -510,12 +532,22 @@ public class IOUtil {
* @return the input stream to read from
*/
public static InputStream openGzipFileForReading(final File file) {
+ return openGzipFileForReading(file.toPath());
+ }
+
+ /**
+ * Opens a GZIP-encoded file for reading, decompressing it if necessary
+ *
+ * @param path The file to open
+ * @return the input stream to read from
+ */
+ public static InputStream openGzipFileForReading(final Path path) {
try {
- return new GZIPInputStream(new FileInputStream(file));
+ return new GZIPInputStream(Files.newInputStream(path));
}
catch (IOException ioe) {
- throw new SAMException("Error opening file: " + file.getName(), ioe);
+ throw new SAMException("Error opening file: " + path, ioe);
}
}
diff --git a/src/java/htsjdk/samtools/util/Murmur3.java b/src/java/htsjdk/samtools/util/Murmur3.java
new file mode 100644
index 0000000..9372008
--- /dev/null
+++ b/src/java/htsjdk/samtools/util/Murmur3.java
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2011 The Guava Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+ * in compliance with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed under the License
+ * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing permissions and limitations under
+ * the License.
+ *
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public
+ * domain. The author hereby disclaims copyright to this source code.
+ *
+ * Source:
+ * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
+ * (Modified to adapt to Guava coding conventions and to use the HashFunction interface)
+ *
+ * Modified to remove stuff Clojure doesn't need, placed under clojure.lang namespace,
+ * all fns made static, added hashOrdered/Unordered
+ *
+ * Modified again by Tim Fennell to remove code not needed by HTSJDK, to make methods non-static (so that different uses can
+ * supply different seed values without colliding) and to comform to HTSJDK coding conventions where possible.
+ *
+ * @author Austin Appleby
+ * @author Dimitris Andreou
+ * @author Kurt Alfred Kluever
+ */
+package htsjdk.samtools.util;
+
+/**
+ * Provides an implementation of the Murmur3_32 hash algorithm that has desirable properties in terms of randomness
+ * and uniformity of the distribution of output values that make it a useful hashing algorithm for downsampling.
+ */
+public final class Murmur3 {
+ private final int seed ;
+
+ /** Constructs a Murmur3 hash with the given seed. */
+ public Murmur3(final int seed) {
+ this.seed = seed;
+ }
+
+ /** Hashes a character stream to an int using Murmur3. */
+ public int hashUnencodedChars(CharSequence input){
+ int h1 = this.seed;
+
+ // step through the CharSequence 2 chars at a time
+ final int length = input.length();
+ for(int i = 1; i < length; i += 2) {
+ int k1 = input.charAt(i - 1) | (input.charAt(i) << 16);
+ k1 = mixK1(k1);
+ h1 = mixH1(h1, k1);
+ }
+
+ // deal with any remaining characters
+ if((length & 1) == 1) {
+ int k1 = input.charAt(length - 1);
+ k1 = mixK1(k1);
+ h1 ^= k1;
+ }
+
+ return fmix(h1, 2 * length);
+ }
+
+ private int hashInt(int input){
+ if(input == 0) return 0;
+ int k1 = mixK1(input);
+ int h1 = mixH1(this.seed, k1);
+
+ return fmix(h1, 4);
+ }
+
+ private int hashLong(long input){
+ if(input == 0) return 0;
+ int low = (int) input;
+ int high = (int) (input >>> 32);
+
+ int k1 = mixK1(low);
+ int h1 = mixH1(this.seed, k1);
+
+ k1 = mixK1(high);
+ h1 = mixH1(h1, k1);
+
+ return fmix(h1, 8);
+ }
+
+ private static int mixK1(int k1){
+ final int c1 = 0xcc9e2d51;
+ final int c2 = 0x1b873593;
+ k1 *= c1;
+ k1 = Integer.rotateLeft(k1, 15);
+ k1 *= c2;
+ return k1;
+ }
+
+ private static int mixH1(int h1, int k1){
+ h1 ^= k1;
+ h1 = Integer.rotateLeft(h1, 13);
+ h1 = h1 * 5 + 0xe6546b64;
+ return h1;
+ }
+
+ // Finalization mix - force all bits of a hash block to avalanche
+ private static int fmix(int h1, int length){
+ h1 ^= length;
+ h1 ^= h1 >>> 16;
+ h1 *= 0x85ebca6b;
+ h1 ^= h1 >>> 13;
+ h1 *= 0xc2b2ae35;
+ h1 ^= h1 >>> 16;
+ return h1;
+ }
+}
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/util/ProgressLogger.java b/src/java/htsjdk/samtools/util/ProgressLogger.java
index 8603dd4..6a293d6 100644
--- a/src/java/htsjdk/samtools/util/ProgressLogger.java
+++ b/src/java/htsjdk/samtools/util/ProgressLogger.java
@@ -47,6 +47,6 @@ public class ProgressLogger extends AbstractProgressLogger {
@Override
protected void log(final String... message) {
- log.info(message);
+ log.info((Object[])message);
}
}
diff --git a/src/java/htsjdk/samtools/util/SequenceUtil.java b/src/java/htsjdk/samtools/util/SequenceUtil.java
index 6594880..bd4bfdd 100644
--- a/src/java/htsjdk/samtools/util/SequenceUtil.java
+++ b/src/java/htsjdk/samtools/util/SequenceUtil.java
@@ -103,21 +103,43 @@ public class SequenceUtil {
}
/**
- * Throws an exception only if both parameters are not null
+ * default signature that forces the lists to be the same size
*
* @param s1 a list of sequence headers
* @param s2 a second list of sequence headers
*/
public static void assertSequenceListsEqual(final List<SAMSequenceRecord> s1, final List<SAMSequenceRecord> s2) {
+ assertSequenceListsEqual(s1, s2, false);
+ }
+ /**
+ * Throws an exception only if both (first) parameters are not null
+ * optionally check that one list is a (nonempty) prefix of the other.
+ *
+ * @param s1 a list of sequence headers
+ * @param s2 a second list of sequence headers
+ * @param checkPrefixOnly a flag specifying whether to only look at the first records in the lists. This will then check that the
+ * records of the smaller dictionary are equal to the records of the beginning of the larger dictionary, which can be useful since
+ * sometimes different pipelines choose to use only the first contigs of a standard reference.
+ */
+ public static void assertSequenceListsEqual(final List<SAMSequenceRecord> s1, final List<SAMSequenceRecord> s2, final boolean checkPrefixOnly) {
if (s1 != null && s2 != null) {
- if (s1.size() != s2.size()) {
- throw new SequenceListsDifferException(
- "Sequence dictionaries are not the same size (" + s1.size() + ", " + s2.size() +
- ")");
- }
+ final int sizeToTest;
- for (int i = 0; i < s1.size(); ++i) {
+ if (checkPrefixOnly) {
+ sizeToTest = Math.min(s1.size(), s2.size());
+ if (sizeToTest == 0) {
+ throw new SequenceListsDifferException("Neither of the dictionaries can be empty.");
+ }
+ } else {
+ sizeToTest = s1.size();
+ if (s1.size() != s2.size()) {
+ throw new SequenceListsDifferException(
+ "Sequence dictionaries are not the same size (" + s1.size() + ", " + s2.size() +
+ ")");
+ }
+ }
+ for (int i = 0; i < sizeToTest; ++i) {
if (!s1.get(i).isSameSequence(s2.get(i))) {
String s1Attrs = "";
for (final java.util.Map.Entry<String, String> entry : s1.get(i)
@@ -159,6 +181,9 @@ public class SequenceUtil {
/**
* Returns true if both parameters are null or equal, otherwise returns false
+ *
+ * @param s1 a list of sequence headers
+ * @param s2 a second list of sequence headers
*/
public static boolean areSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2) {
if (s1 == null && s2 == null) return true;
@@ -174,10 +199,26 @@ public class SequenceUtil {
/**
* Throws an exception if both parameters are non-null and unequal.
+ *
+ * @param s1 a list of sequence headers
+ * @param s2 a second list of sequence headers
*/
public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2) {
+ assertSequenceDictionariesEqual(s1, s2, false);
+ }
+
+ /**
+ * Throws an exception if both (first) parameters are non-null and unequal (if checkPrefixOnly, checks prefix of lists only).
+ *
+ * @param s1 a list of sequence headers
+ * @param s2 a second list of sequence headers
+ * @param checkPrefixOnly a flag specifying whether to only look at the first records in the lists. This will then check that the
+ * records of the smaller dictionary are equal to the records of the beginning of the larger dictionary, which can be useful since
+ * sometimes different pipelines choose to use only the first contigs of a standard reference.
+ */
+ public static void assertSequenceDictionariesEqual(final SAMSequenceDictionary s1, final SAMSequenceDictionary s2, final boolean checkPrefixOnly) {
if (s1 == null || s2 == null) return;
- assertSequenceListsEqual(s1.getSequences(), s2.getSequences());
+ assertSequenceListsEqual(s1.getSequences(), s2.getSequences(), checkPrefixOnly);
}
/**
diff --git a/src/java/htsjdk/samtools/util/StringUtil.java b/src/java/htsjdk/samtools/util/StringUtil.java
index 44a6aaf..e205bbf 100644
--- a/src/java/htsjdk/samtools/util/StringUtil.java
+++ b/src/java/htsjdk/samtools/util/StringUtil.java
@@ -84,7 +84,7 @@ public class StringUtil {
tokens[nTokens++] = aString;
return nTokens;
}
- while ((end > 0) && (nTokens < maxTokens))
+ while ((end >= 0) && (nTokens < maxTokens))
{
tokens[nTokens++] = aString.substring(start, end);
start = end + 1;
@@ -125,7 +125,7 @@ public class StringUtil {
tokens[nTokens++] = aString;
return nTokens;
}
- while ((end > 0) && (nTokens < maxTokens - 1))
+ while ((end >= 0) && (nTokens < maxTokens - 1))
{
tokens[nTokens++] = aString.substring(start, end);
start = end + 1;
diff --git a/src/java/htsjdk/variant/variantcontext/VariantContext.java b/src/java/htsjdk/variant/variantcontext/VariantContext.java
index 32db7b5..d2cc5af 100644
--- a/src/java/htsjdk/variant/variantcontext/VariantContext.java
+++ b/src/java/htsjdk/variant/variantcontext/VariantContext.java
@@ -1011,11 +1011,15 @@ public class VariantContext implements Feature, Serializable {
return getGenotypes().containsSample(sample);
}
+ /**
+ * @param ith the sample index
+ *
+ * @return the ith genotype in this context or null if there aren't that many genotypes
+ */
public Genotype getGenotype(int ith) {
- return genotypes.get(ith);
+ return genotypes.size() > ith ? genotypes.get(ith) : null;
}
-
/**
* Returns the number of chromosomes carrying any allele in the genotypes (i.e., excluding NO_CALLS)
*
diff --git a/src/java/htsjdk/variant/variantcontext/filter/CompoundFilter.java b/src/java/htsjdk/variant/variantcontext/filter/CompoundFilter.java
new file mode 100644
index 0000000..9a3724a
--- /dev/null
+++ b/src/java/htsjdk/variant/variantcontext/filter/CompoundFilter.java
@@ -0,0 +1,74 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.VariantContext;
+
+import java.util.ArrayList;
+
+/**
+ * A Predicate on VariantContexts that returns true when either all its sub-predicates are true, or none are false.
+ *
+ * @author Yossi Farjoun
+ */
+public class CompoundFilter extends ArrayList<VariantContextFilter> implements VariantContextFilter {
+
+ final boolean requireAll;
+
+ /**
+ * A constructor that will determine if this compound filter will require that *all* the included filters pass
+ * or *some* of them pass (depending on the requireAll parameter in the constructor).
+ *
+ * @param requireAll a boolean parameter determining whether this filter requires all its elements to pass (true) for
+ * it to pass, or only one (false). If there are no variantfilters it will return true.
+ */
+ public CompoundFilter(final boolean requireAll) {
+ super();
+ this.requireAll = requireAll;
+ }
+
+ /**
+ * @param variantContext the record to examine against the sub-filters
+ * @return true if variantContext either passes all the filters (when requireAll==true)
+ * or doesn't fail any of the filters (when requireAll==false)
+ */
+ @Override
+ public boolean test(final VariantContext variantContext) {
+
+ if (requireAll) {
+ for (final VariantContextFilter filter : this) {
+ if (!filter.test(variantContext)) return false;
+ }
+
+ return true;
+ } else {
+ for (final VariantContextFilter filter : this) {
+ if (filter.test(variantContext)) return true;
+ }
+
+ return isEmpty();
+ }
+ }
+}
diff --git a/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java b/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java
new file mode 100644
index 0000000..c5b943f
--- /dev/null
+++ b/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java
@@ -0,0 +1,127 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.samtools.util.CloserUtil;
+import htsjdk.variant.variantcontext.VariantContext;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * A filtering iterator for VariantContexts that takes a base iterator and a VariantContextFilter.
+ *
+ * The iterator returns all the variantcontexts for which the filter's function "test" returns true (and only those)
+ *
+ * @author Yossi Farjoun
+ */
+public class FilteringIterator implements CloseableIterator<VariantContext>, Iterable<VariantContext>{
+ private final Iterator<VariantContext> iterator;
+ private final VariantContextFilter filter;
+ private VariantContext next = null;
+
+ /**
+ * Constructor of an iterator based on the provided iterator and predicate. The resulting
+ * records will be all those VariantContexts from iterator for which filter.test( . ) is true
+ *
+ * @param iterator the backing iterator
+ * @param filter the filter
+ */
+ public FilteringIterator(final Iterator<VariantContext> iterator, final VariantContextFilter filter) {
+ this.iterator = iterator;
+ this.filter = filter;
+ next = getNextVC();
+ }
+
+ @Override
+ public void close() {
+ CloserUtil.close(iterator);
+ }
+
+ /**
+ * Returns true if the iteration has more elements.
+ *
+ * @return true if the iteration has more elements. Otherwise returns false.
+ */
+ @Override
+ public boolean hasNext() {
+ return next != null;
+ }
+
+ /**
+ * Returns the next element in the iteration.
+ *
+ * @return the next element in the iteration
+ * @throws NoSuchElementException if there are no more elements to return
+ *
+ */
+ @Override
+ public VariantContext next() throws NoSuchElementException {
+ if (next == null) {
+ throw new NoSuchElementException("Iterator has no more elements.");
+ }
+ final VariantContext result = next;
+ next = getNextVC();
+ return result;
+ }
+
+ /**
+ * Required method for Iterator API.
+ *
+ * @throws UnsupportedOperationException since it is unsupported here.
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("Remove() not supported by FilteringIterator");
+ }
+
+ /**
+ * Gets the next record from the underlying iterator that passes the filter
+ *
+ * @return VariantContext the next filter-passing record
+ */
+ private VariantContext getNextVC() {
+
+ while (iterator.hasNext()) {
+ final VariantContext record = iterator.next();
+
+ if (filter.test(record)) {
+ return record;
+ }
+ }
+ return null;
+ }
+
+ /**
+ * function to satisfy the Iterable interface
+ *
+ * @return itself since the class inherits from Iterator
+ */
+ @Override
+ public Iterator<VariantContext> iterator() {
+ return this;
+ }
+}
diff --git a/src/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilter.java b/src/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilter.java
new file mode 100644
index 0000000..862dcce
--- /dev/null
+++ b/src/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilter.java
@@ -0,0 +1,79 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.VariantContext;
+
+/**
+ * A Predicate on VariantContexts that returns true at sites that are either unfiltered, or passing (as variants).
+ *
+ * @author Yossi Farjoun
+ */
+public class GenotypeQualityFilter implements VariantContextFilter {
+
+ final private String sample;
+ final private int gqThreshold;
+
+ /**
+ * Constructor for a filter that will keep VC for which the
+ * genotype quality (GQ) of sample passes a threshold. If sample is null, the first genotype in the
+ * variant context will be used.
+ *
+ * @param gqThreshold the smallest value of GQ that this filter will pass
+ * @param sample the name of the sample in the variant context whose genotype should be examined.
+ */
+ public GenotypeQualityFilter(final int gqThreshold, final String sample ) {
+ this.sample = sample;
+ this.gqThreshold = gqThreshold;
+ }
+
+ /**
+ * Constructor as above that doesn't take a sample, instead it will look at the first genotype of the variant context.
+ * @param gqThreshold the smallest value of GQ that this filter will pass
+ */
+ public GenotypeQualityFilter(final int gqThreshold) {
+ this( gqThreshold, null);
+ }
+
+ /**
+ * @return true if variantContext is to be kept, otherwise false
+ * Assumes that this.sample is a sample in the variantContext, if not null,
+ * otherwise looks for the first genotype (and assumes it exists).
+ * @param variantContext the record to examine for GQ
+ */
+ @Override
+ public boolean test(final VariantContext variantContext) {
+ final Genotype gt = (sample == null) ? variantContext.getGenotype(0) : variantContext.getGenotype(sample);
+
+ if (gt == null) {
+ throw new IllegalArgumentException((sample == null) ?
+ "Cannot find any genotypes in VariantContext: " + variantContext :
+ "Cannot find sample requested: " + sample);
+ }
+
+ return gt.getGQ() >= gqThreshold;
+ }
+}
diff --git a/src/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilter.java b/src/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilter.java
new file mode 100644
index 0000000..0675b25
--- /dev/null
+++ b/src/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilter.java
@@ -0,0 +1,84 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.VariantContext;
+
+/**
+ * A Predicate on VariantContexts that either returns true at heterozygous sites (invertible to false).
+ * if optional "sample" argument to constructor is given, the genotype of that sample will be examined,
+ * otherwise first genotype will be used.
+ *
+ * Missing sample, or no genotype will result in an exception being thrown.
+ *
+ * @author Yossi Farjoun
+ */
+public class HeterozygosityFilter implements VariantContextFilter {
+
+ final private String sample;
+ final private boolean keepHets;
+
+ /**
+ * Constructor for a filter that will keep (or remove, if keepHets is false) VC for which the
+ * genotype of sample is heterozygous. If sample is null, the first genotype in the
+ * variant context will be used.
+ *
+ * @param keepHets determine whether to keep the het sites (true) or filter them out (false)
+ * @param sample the name of the sample in the variant context whose genotype should be examined.
+ */
+ public HeterozygosityFilter(final boolean keepHets, final String sample) {
+ this.keepHets = keepHets;
+ this.sample = sample;
+ }
+
+ /**
+ * Constructor as above that doesn't take a sample, instead it will look at the first genotype of the variant context.
+ * @param keepHets if true, the heterozygous variant contexts will pass the filter, otherwise they will fail.
+ */
+ public HeterozygosityFilter(final boolean keepHets) {
+ this(keepHets, null);
+ }
+
+ /**
+ * @return true if variantContext is to be kept, otherwise false
+ * Assumes that this.sample is a sample in the variantContext, if not null,
+ * otherwise looks for the first genotype (and assumes it exists).
+ * @param variantContext the record to examine for heterozygosity
+ */
+ @Override
+ public boolean test(final VariantContext variantContext) {
+ final Genotype gt = (sample == null) ? variantContext.getGenotype(0) : variantContext.getGenotype(sample);
+
+ if (gt == null) {
+ throw new IllegalArgumentException((sample == null) ?
+ "Cannot find any genotypes in VariantContext: " + variantContext :
+ "Cannot find sample requested: " + sample);
+ }
+
+ //XOR operator to reverse behaviour if keepHets is true.
+ return gt.isHet() ^ !keepHets;
+ }
+}
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/java/htsjdk/variant/variantcontext/filter/PassingVariantFilter.java
similarity index 60%
copy from src/java/htsjdk/samtools/SAMTag.java
copy to src/java/htsjdk/variant/variantcontext/filter/PassingVariantFilter.java
index 7dac5a2..f24678f 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/java/htsjdk/variant/variantcontext/filter/PassingVariantFilter.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,59 +21,24 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package htsjdk.samtools;
+
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.VariantContext;
/**
- * The standard tags for a SAM record that are defined in the SAM spec.
+ * A Predicate on VariantContexts that returns true at sites that are either unfiltered, or passing (as variants).
+ *
+ * @author Yossi Farjoun
*/
-public enum SAMTag {
- AM,
- AS,
- BC,
- BQ,
- CC,
- CM,
- CO,
- CP,
- CQ,
- CS,
- CT,
- E2,
- FI,
- FS,
- FZ,
- GC, // for backwards compatibility
- GS, // for backwards compatibility
- GQ, // for backwards compatibility
- LB,
- H0,
- H1,
- H2,
- HI,
- IH,
- MC,
- MF, // for backwards compatibility
- MD,
- MQ,
- NH,
- NM,
- OQ,
- OP,
- OC,
- PG,
- PQ,
- PT,
- PU,
- QT,
- Q2,
- R2,
- RG,
- RT,
- S2, // for backwards compatibility
- SA,
- SM,
- SQ, // for backwards compatibility
- TC,
- U2,
- UQ
+public class PassingVariantFilter implements VariantContextFilter {
+
+ /**
+ * @return true if variantContext is a SNP
+ * @param variantContext the record to examine for being a SNP
+ */
+ @Override
+ public boolean test(final VariantContext variantContext) {
+ return variantContext.isNotFiltered();
+ }
}
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/java/htsjdk/variant/variantcontext/filter/SnpFilter.java
similarity index 60%
copy from src/java/htsjdk/samtools/SAMTag.java
copy to src/java/htsjdk/variant/variantcontext/filter/SnpFilter.java
index 7dac5a2..4d8b17b 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/java/htsjdk/variant/variantcontext/filter/SnpFilter.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,59 +21,24 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package htsjdk.samtools;
+
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.VariantContext;
/**
- * The standard tags for a SAM record that are defined in the SAM spec.
+ * A Predicate on VariantContexts that returns true at sites that are SNPs
+ *
+ * @author Yossi Farjoun
*/
-public enum SAMTag {
- AM,
- AS,
- BC,
- BQ,
- CC,
- CM,
- CO,
- CP,
- CQ,
- CS,
- CT,
- E2,
- FI,
- FS,
- FZ,
- GC, // for backwards compatibility
- GS, // for backwards compatibility
- GQ, // for backwards compatibility
- LB,
- H0,
- H1,
- H2,
- HI,
- IH,
- MC,
- MF, // for backwards compatibility
- MD,
- MQ,
- NH,
- NM,
- OQ,
- OP,
- OC,
- PG,
- PQ,
- PT,
- PU,
- QT,
- Q2,
- R2,
- RG,
- RT,
- S2, // for backwards compatibility
- SA,
- SM,
- SQ, // for backwards compatibility
- TC,
- U2,
- UQ
+public class SnpFilter implements VariantContextFilter {
+
+ /**
+ * @return true if variantContext is a SNP
+ * @param variantContext the record to examine for being a SNP
+ */
+ @Override
+ public boolean test(final VariantContext variantContext) {
+ return variantContext.isSNP();
+ }
}
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/java/htsjdk/variant/variantcontext/filter/VariantContextFilter.java
similarity index 60%
copy from src/java/htsjdk/samtools/SAMTag.java
copy to src/java/htsjdk/variant/variantcontext/filter/VariantContextFilter.java
index 7dac5a2..451dc63 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/java/htsjdk/variant/variantcontext/filter/VariantContextFilter.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,59 +21,23 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package htsjdk.samtools;
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.VariantContext;
/**
- * The standard tags for a SAM record that are defined in the SAM spec.
+ *
+ * API for filtering VariantContexts
+ *
+ * @author Yossi Farjoun
+ *
*/
-public enum SAMTag {
- AM,
- AS,
- BC,
- BQ,
- CC,
- CM,
- CO,
- CP,
- CQ,
- CS,
- CT,
- E2,
- FI,
- FS,
- FZ,
- GC, // for backwards compatibility
- GS, // for backwards compatibility
- GQ, // for backwards compatibility
- LB,
- H0,
- H1,
- H2,
- HI,
- IH,
- MC,
- MF, // for backwards compatibility
- MD,
- MQ,
- NH,
- NM,
- OQ,
- OP,
- OC,
- PG,
- PQ,
- PT,
- PU,
- QT,
- Q2,
- R2,
- RG,
- RT,
- S2, // for backwards compatibility
- SA,
- SM,
- SQ, // for backwards compatibility
- TC,
- U2,
- UQ
+public interface VariantContextFilter {
+ /**
+ * Determines whether a VariantContext matches this filter
+ *
+ * @param record the VariantContext to evaluate
+ * @return true if the VariantContext matches the filter, otherwise false
+ */
+ boolean test(VariantContext record);
}
diff --git a/src/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java b/src/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java
index fab5095..7d1f0de 100644
--- a/src/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java
+++ b/src/java/htsjdk/variant/variantcontext/writer/BCF2FieldEncoder.java
@@ -116,65 +116,56 @@ public abstract class BCF2FieldEncoder {
}
/**
- * True if this field has a constant, fixed number of elements (such as 1 for an atomic integer)
- *
- * @return
+ * @return True if this field has a constant, fixed number of elements (such as 1 for an atomic integer)
*/
public boolean hasConstantNumElements() {
return getCountType() == VCFHeaderLineCount.INTEGER;
}
/**
- * True if the only way to determine how many elements this field contains is by
+ * @return True if the only way to determine how many elements this field contains is by
* inspecting the actual value directly, such as when the number of elements
* is a variable length list per site or per genotype.
- * @return
*/
public boolean hasValueDeterminedNumElements() {
return getCountType() == VCFHeaderLineCount.UNBOUNDED;
}
/**
- * True if this field has a non-fixed number of elements that depends only on the properties
+ * @return True if this field has a non-fixed number of elements that depends only on the properties
* of the current VariantContext, such as one value per Allele or per genotype configuration.
- *
- * @return
*/
public boolean hasContextDeterminedNumElements() {
return ! hasConstantNumElements() && ! hasValueDeterminedNumElements();
}
/**
- * Get the number of elements, assuming this field has a constant number of elements.
- * @return
+ * @return the number of elements, assuming this field has a constant number of elements.
*/
public int numElements() {
return headerLine.getCount();
}
/**
- * Get the number of elements by looking at the actual value provided
- * @return
+ * @return the number of elements by looking at the actual value provided
*/
public int numElements(final Object value) {
return numElementsFromValue(value);
}
/**
- * Get the number of elements, assuming this field has context-determined number of elements.
- * @return
+ * @return the number of elements, assuming this field has context-determined number of elements.
*/
public int numElements(final VariantContext vc) {
return headerLine.getCount(vc);
}
/**
- * A convenience access for the number of elements, returning
- * the number of encoded elements, either from the fixed number
- * it has, from the VC, or from the value itself.
+ * A convenience access for the number of elements.
* @param vc
* @param value
- * @return
+ * @return the number of encoded elements, either from the fixed number
+ * it has, from the VC, or from the value itself.
*/
public final int numElements(final VariantContext vc, final Object value) {
if ( hasConstantNumElements() ) return numElements();
@@ -188,7 +179,7 @@ public abstract class BCF2FieldEncoder {
* Assumes the value is encoded as a List
*
* @param value
- * @return
+ * @return the number of elements we will encode for {@param value}.
*/
protected int numElementsFromValue(final Object value) {
if ( value == null ) return 0;
@@ -205,14 +196,14 @@ public abstract class BCF2FieldEncoder {
/**
* Is the BCF2 type of this field static, or does it have to be determine from
* the actual field value itself?
- * @return
+ * @return true if the field is static
*/
public final boolean isStaticallyTyped() { return ! isDynamicallyTyped(); }
/**
* Is the BCF2 type of this field static, or does it have to be determine from
* the actual field value itself?
- * @return
+ * @return true if the field is not static
*/
public final boolean isDynamicallyTyped() { return staticType == null; }
@@ -220,7 +211,7 @@ public abstract class BCF2FieldEncoder {
* Get the BCF2 type for this field, either from the static type of the
* field itself or by inspecting the value itself.
*
- * @return
+ * @return the BCF2 type for this field
*/
public final BCF2Type getType(final Object value) {
return isDynamicallyTyped() ? getDynamicType(value) : getStaticType();
diff --git a/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java b/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
index 5e5eb95..4e95888 100644
--- a/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
+++ b/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
@@ -125,7 +125,7 @@ public class VariantContextWriterBuilder {
private IndexCreator idxCreator = null;
private int bufferSize = Defaults.BUFFER_SIZE;
private boolean createMD5 = Defaults.CREATE_MD5;
- private EnumSet<Options> options = DEFAULT_OPTIONS.clone();
+ protected EnumSet<Options> options = DEFAULT_OPTIONS.clone();
/**
* Default constructor. Adds <code>USE_ASYNC_IO</code> to the Options if it is present in Defaults.
@@ -338,12 +338,33 @@ public class VariantContextWriterBuilder {
}
/**
+ * Add one option to the set of default <code>Options</code> that will be used as the initial set of options
+ * for all VariantContextWriterBuilders created after this call.
+ *
+ * @param option the option to set
+ */
+ public static void setDefaultOption(final Options option) {
+ VariantContextWriterBuilder.DEFAULT_OPTIONS.add(option);
+ }
+
+ /**
+ * Remove an option from the set of default <code>Options</code> that will be used as the initial set of options
+ * for all VariantContextWriterBuilders created after this call.
+ *
+ * @param option the option to unset
+ * @return this <code>VariantContextWriterBuilder</code>
+ */
+ public static void unsetDefaultOption(final Options option) {
+ VariantContextWriterBuilder.DEFAULT_OPTIONS.remove(option);
+ }
+
+ /**
* Remove all options from the set of <code>Options</code> for the <code>VariantContextWriterBuilder</code>.
*
* @return this VariantContextWriterBuilder
*/
public VariantContextWriterBuilder clearOptions() {
- this.options = NO_OPTIONS;
+ this.options = NO_OPTIONS.clone();
return this;
}
diff --git a/src/java/htsjdk/variant/vcf/VCFRecordCodec.java b/src/java/htsjdk/variant/vcf/VCFRecordCodec.java
index cddfa22..8fe9b67 100644
--- a/src/java/htsjdk/variant/vcf/VCFRecordCodec.java
+++ b/src/java/htsjdk/variant/vcf/VCFRecordCodec.java
@@ -16,20 +16,20 @@ import java.io.PrintStream;
* with SortingCollection ONLY.
*/
public class VCFRecordCodec implements SortingCollection.Codec<VariantContext> {
-
- final VCFCodec vcfDecoder = new VCFCodec();
-
- final VCFEncoder vcfEncoder;
-
+ private final VCFCodec vcfDecoder = new VCFCodec();
+ private final VCFEncoder vcfEncoder;
private PrintStream outputStream = null;
-
private BufferedReader inputReader = null;
public VCFRecordCodec(final VCFHeader header) {
- this.vcfEncoder = new VCFEncoder(header, false, false);
+ this(header, false);
+ }
+
+ public VCFRecordCodec(final VCFHeader header, final boolean allowMissingFieldsInHeader) {
+ this.vcfEncoder = new VCFEncoder(header, allowMissingFieldsInHeader, false);
// Explicitly set the version because it's not available in the header itself.
this.vcfDecoder.setVCFHeader(header, VCFHeaderVersion.VCF4_2);
- }
+ }
@Override
public void setOutputStream(final OutputStream stream) {
@@ -58,7 +58,7 @@ public class VCFRecordCodec implements SortingCollection.Codec<VariantContext> {
@Override
public VCFRecordCodec clone() {
- return new VCFRecordCodec(this.vcfEncoder.getVCFHeader());
+ return new VCFRecordCodec(this.vcfEncoder.getVCFHeader(), this.vcfEncoder.getAllowMissingFieldsInHeader());
}
}
diff --git a/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java b/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java
index 43fe3a9..4504ddc 100644
--- a/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java
+++ b/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java
@@ -37,7 +37,7 @@ import java.io.File;
*/
public class BAMFileWriterTest {
- private SAMRecordSetBuilder getSAMReader(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
+ private SAMRecordSetBuilder getRecordSetBuilder(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
final SAMRecordSetBuilder ret = new SAMRecordSetBuilder(sortForMe, sortOrder);
ret.addPair("readB", 20, 200, 300);
ret.addPair("readA", 20, 100, 150);
@@ -55,7 +55,7 @@ public class BAMFileWriterTest {
* @param presorted If true, samText is in the order specified by sortOrder
*/
private void testHelper(final SAMRecordSetBuilder samRecordSetBuilder, final SAMFileHeader.SortOrder sortOrder, final boolean presorted) throws Exception {
- SamReader samReader = samRecordSetBuilder.getSamReader();
+ final SamReader samReader = samRecordSetBuilder.getSamReader();
final File bamFile = File.createTempFile("test.", BamFileIoUtils.BAM_FILE_EXTENSION);
bamFile.deleteOnExit();
samReader.getFileHeader().setSortOrder(sortOrder);
@@ -68,43 +68,47 @@ public class BAMFileWriterTest {
it.close();
samReader.close();
- if (presorted) {
- // If SAM text input was presorted, then we can compare SAM object to BAM object
- final SamReader bamReader = SamReaderFactory.makeDefault().open(bamFile);
- samReader = samRecordSetBuilder.getSamReader();
- samReader.getFileHeader().setSortOrder(bamReader.getFileHeader().getSortOrder());
- Assert.assertEquals(bamReader.getFileHeader(), samReader.getFileHeader());
- it = samReader.iterator();
- final CloseableIterator<SAMRecord> bamIt = bamReader.iterator();
- while (it.hasNext()) {
- Assert.assertTrue(bamIt.hasNext());
- final SAMRecord samRecord = it.next();
- final SAMRecord bamRecord = bamIt.next();
-
- // SAMRecords don't have this set, so stuff it in there
- samRecord.setIndexingBin(bamRecord.getIndexingBin());
-
- // Force reference index attributes to be populated
- samRecord.getReferenceIndex();
- bamRecord.getReferenceIndex();
- samRecord.getMateReferenceIndex();
- bamRecord.getMateReferenceIndex();
-
- Assert.assertEquals(bamRecord, samRecord);
- }
- Assert.assertFalse(bamIt.hasNext());
+ if (presorted) { // If SAM text input was presorted, then we can compare SAM object to BAM object
+ verifyBAMFile(samRecordSetBuilder, bamFile);
+ }
+ }
+
+ private void verifyBAMFile(final SAMRecordSetBuilder samRecordSetBuilder, final File bamFile) {
+
+ final SamReader bamReader = SamReaderFactory.makeDefault().open(bamFile);
+ final SamReader samReader = samRecordSetBuilder.getSamReader();
+ samReader.getFileHeader().setSortOrder(bamReader.getFileHeader().getSortOrder());
+ Assert.assertEquals(bamReader.getFileHeader(), samReader.getFileHeader());
+ final CloseableIterator<SAMRecord> it = samReader.iterator();
+ final CloseableIterator<SAMRecord> bamIt = bamReader.iterator();
+ while (it.hasNext()) {
+ Assert.assertTrue(bamIt.hasNext());
+ final SAMRecord samRecord = it.next();
+ final SAMRecord bamRecord = bamIt.next();
+
+ // SAMRecords don't have this set, so stuff it in there
+ samRecord.setIndexingBin(bamRecord.getIndexingBin());
+
+ // Force reference index attributes to be populated
+ samRecord.getReferenceIndex();
+ bamRecord.getReferenceIndex();
+ samRecord.getMateReferenceIndex();
+ bamRecord.getMateReferenceIndex();
+
+ Assert.assertEquals(bamRecord, samRecord);
}
+ Assert.assertFalse(bamIt.hasNext());
CloserUtil.close(samReader);
}
@DataProvider(name = "test1")
public Object[][] createTestData() {
return new Object[][]{
- {"coordinate sorted", getSAMReader(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.coordinate, false},
- {"query sorted", getSAMReader(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.queryname, false},
- {"unsorted", getSAMReader(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.unsorted, false},
- {"coordinate presorted", getSAMReader(true, SAMFileHeader.SortOrder.coordinate), SAMFileHeader.SortOrder.coordinate, true},
- {"query presorted", getSAMReader(true, SAMFileHeader.SortOrder.queryname), SAMFileHeader.SortOrder.queryname, true},
+ {"coordinate sorted", getRecordSetBuilder(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.coordinate, false},
+ {"query sorted", getRecordSetBuilder(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.queryname, false},
+ {"unsorted", getRecordSetBuilder(false, SAMFileHeader.SortOrder.unsorted), SAMFileHeader.SortOrder.unsorted, false},
+ {"coordinate presorted", getRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate), SAMFileHeader.SortOrder.coordinate, true},
+ {"query presorted", getRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname), SAMFileHeader.SortOrder.queryname, true},
};
}
@@ -114,10 +118,73 @@ public class BAMFileWriterTest {
testHelper(samRecordSetBuilder, order, presorted);
}
+ @Test(dataProvider = "test1")
+ public void testNullRecordHeaders(final String testName, final SAMRecordSetBuilder samRecordSetBuilder, final SAMFileHeader.SortOrder order, final boolean presorted) throws Exception {
+
+ // test that BAMFileWriter can write records that have a null header
+ final SAMFileHeader samHeader = samRecordSetBuilder.getHeader();
+ for (SAMRecord rec : samRecordSetBuilder.getRecords()) {
+ rec.setHeader(null);
+ }
+
+ // make sure the records can actually be written out
+ final File bamFile = File.createTempFile("test.", BamFileIoUtils.BAM_FILE_EXTENSION);
+ bamFile.deleteOnExit();
+ samHeader.setSortOrder(order);
+ final SAMFileWriter bamWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(samHeader, presorted, bamFile);
+ for (final SAMRecord rec : samRecordSetBuilder.getRecords()) {
+ bamWriter.addAlignment(rec);
+ }
+ bamWriter.close();
+
+ if (presorted) {
+ verifyBAMFile(samRecordSetBuilder, bamFile);
+ }
+ }
+
+ @Test
+ public void testNullRecordsMismatchedHeader() throws Exception {
+
+ final SAMRecordSetBuilder samRecordSetBuilder = getRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname);
+ for (final SAMRecord rec : samRecordSetBuilder.getRecords()) {
+ rec.setHeader(null);
+ }
+
+ // create a fake header to make sure the records can still be written using an invalid
+ // sequence dictionary and unresolvable references
+ final SAMFileHeader fakeHeader = new SAMFileHeader();
+ fakeHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
+ final File bamFile = File.createTempFile("test.", BamFileIoUtils.BAM_FILE_EXTENSION);
+ bamFile.deleteOnExit();
+
+ final SAMFileWriter bamWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(fakeHeader, false, bamFile);
+ for (SAMRecord rec : samRecordSetBuilder.getRecords()) {
+ bamWriter.addAlignment(rec);
+ }
+ bamWriter.close();
+
+ final SamReader bamReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile);
+ final SamReader samReader = samRecordSetBuilder.getSamReader();
+ samReader.getFileHeader().setSortOrder(bamReader.getFileHeader().getSortOrder());
+ final CloseableIterator<SAMRecord> it = samReader.iterator();
+ final CloseableIterator<SAMRecord> bamIt = bamReader.iterator();
+ while (it.hasNext()) {
+ Assert.assertTrue(bamIt.hasNext());
+ final SAMRecord samRecord = it.next();
+ final SAMRecord bamRecord = bamIt.next();
+
+ // test only reference names since we'll have lost reference indices due to the fake null header
+ Assert.assertEquals(bamRecord.getReferenceName(), samRecord.getReferenceName());
+ Assert.assertEquals(bamRecord.getAlignmentStart(), samRecord.getAlignmentStart());
+ }
+ Assert.assertFalse(bamIt.hasNext());
+ CloserUtil.close(samReader);
+ }
+
@Test(expectedExceptions = IllegalArgumentException.class)
public void testNegativePresorted() throws Exception {
- testHelper(getSAMReader(true, SAMFileHeader.SortOrder.coordinate), SAMFileHeader.SortOrder.queryname, true);
+ testHelper(getRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate), SAMFileHeader.SortOrder.queryname, true);
Assert.fail("Exception should be thrown");
}
}
diff --git a/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java b/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
index bc6e752..ae23787 100644
--- a/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
@@ -2,6 +2,7 @@ package htsjdk.samtools;
import htsjdk.samtools.cram.common.CramVersions;
import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.Log;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
@@ -105,7 +106,7 @@ public class CRAMComplianceTest {
cramFileWriter.close();
- CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), null, source, ValidationStringency.SILENT);
+ CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream)null, source, ValidationStringency.SILENT);
SAMRecordIterator cramFileReaderIterator = cramFileReader.getIterator();
for (SAMRecord samRecord : samRecords) {
Assert.assertTrue(cramFileReaderIterator.hasNext());
@@ -116,7 +117,7 @@ public class CRAMComplianceTest {
Assert.assertFalse(cramFileReaderIterator.hasNext());
if (t.cramFile_21.exists()) {
- cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_21), null, source, ValidationStringency.SILENT);
+ cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_21), (SeekableStream)null, source, ValidationStringency.SILENT);
cramFileReaderIterator = cramFileReader.getIterator();
for (SAMRecord samRecord : samRecords) {
Assert.assertTrue(cramFileReaderIterator.hasNext());
@@ -128,7 +129,7 @@ public class CRAMComplianceTest {
}
if (t.cramFile_30.exists()) {
- cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_30), null, source, ValidationStringency.SILENT);
+ cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_30), (SeekableStream)null, source, ValidationStringency.SILENT);
cramFileReaderIterator = cramFileReader.getIterator();
for (SAMRecord samRecord : samRecords) {
Assert.assertTrue(cramFileReaderIterator.hasNext());
diff --git a/src/tests/java/htsjdk/samtools/CRAMEdgeCasesTest.java b/src/tests/java/htsjdk/samtools/CRAMEdgeCasesTest.java
index 1098ec4..4d3b0a7 100644
--- a/src/tests/java/htsjdk/samtools/CRAMEdgeCasesTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMEdgeCasesTest.java
@@ -1,7 +1,9 @@
package htsjdk.samtools;
+import htsjdk.samtools.cram.CRAMException;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
+import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.Log;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
@@ -9,6 +11,8 @@ import org.testng.annotations.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
@@ -33,6 +37,20 @@ public class CRAMEdgeCasesTest {
testRecords(records, records.iterator().next().getReadBases());
}
+ // int test for CRAMException
+ // testing for a contig found in the reads but not in the reference
+ @Test(expectedExceptions = CRAMException.class)
+ public void testContigNotFoundInRef() throws IOException {
+ boolean sawException = false;
+ final File CRAMFile = new File("testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram");
+ final File refFile = new File("testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa");
+ final ReferenceSource refSource = new ReferenceSource(refFile);
+ final CRAMIterator iterator = new CRAMIterator(new FileInputStream(CRAMFile), refSource, ValidationStringency.STRICT);
+ while (iterator.hasNext()) {
+ iterator.next();
+ }
+ }
+
@Test
public void testBizilionTags() throws IOException {
final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
@@ -42,7 +60,9 @@ public class CRAMEdgeCasesTest {
char b1 = (char) ('A' + i / 26);
char b2 = (char) ('A' + i % 26);
String tag = new String(new char[]{b1, b2});
- if ("RG".equals(tag)) continue;
+ if ("RG".equals(tag)) {
+ continue;
+ }
record.setAttribute(tag, i);
}
@@ -73,7 +93,7 @@ public class CRAMEdgeCasesTest {
}
cramFileWriter.close();
- CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), null, source, ValidationStringency.SILENT);
+ CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream) null, source, ValidationStringency.SILENT);
final SAMRecordIterator iterator = cramFileReader.getIterator();
Assert.assertTrue(iterator.hasNext());
@@ -101,7 +121,7 @@ public class CRAMEdgeCasesTest {
cramFileWriter.addAlignment(record);
cramFileWriter.close();
- CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), null, source, ValidationStringency.SILENT);
+ CRAMFileReader cramFileReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (SeekableStream) null, source, ValidationStringency.SILENT);
final SAMRecordIterator iterator = cramFileReader.getIterator();
Assert.assertTrue(iterator.hasNext());
SAMRecord s2 = iterator.next();
@@ -127,8 +147,11 @@ public class CRAMEdgeCasesTest {
s.setAlignmentStart(1);
s.setReferenceName("chr1");
s.setReadName("1");
- if (bases == SAMRecord.NULL_SEQUENCE) s.setCigarString("10M");
- else s.setCigarString(s.getReadLength() + "M");
+ if (bases == SAMRecord.NULL_SEQUENCE) {
+ s.setCigarString("10M");
+ } else {
+ s.setCigarString(s.getReadLength() + "M");
+ }
testSingleRecord(s, ref);
}
diff --git a/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java b/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
index dd50ec3..b1e1f2d 100644
--- a/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
@@ -5,6 +5,7 @@ import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.reference.FakeReferenceSequenceFile;
import htsjdk.samtools.seekablestream.ByteArraySeekableStream;
+import htsjdk.samtools.seekablestream.SeekableBufferedStream;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CoordMath;
@@ -43,6 +44,52 @@ public class CRAMFileIndexTest {
@Test
+ public void testConstructors () throws IOException {
+ CRAMFileReader reader = new CRAMFileReader(cramFile, indexFile, source, ValidationStringency.SILENT);
+ CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart("chrM", 1500);
+ Assert.assertTrue(iterator.hasNext());
+ SAMRecord record = iterator.next();
+
+ Assert.assertEquals(record.getReferenceName(), "chrM");
+ Assert.assertTrue(record.getAlignmentStart() >= 1500);
+ reader.close();
+
+ reader = new CRAMFileReader(new SeekableFileStream(cramFile), indexFile, source, ValidationStringency.SILENT);
+ iterator = reader.queryAlignmentStart("chrM", 1500);
+ Assert.assertTrue(iterator.hasNext());
+ record = iterator.next();
+
+ Assert.assertEquals(record.getReferenceName(), "chrM");
+ Assert.assertTrue(record.getAlignmentStart() >= 1500);
+ reader.close();
+
+ reader = new CRAMFileReader(new SeekableFileStream(cramFile), new SeekableFileStream(indexFile), source, ValidationStringency.SILENT);
+ iterator = reader.queryAlignmentStart("chrM", 1500);
+ Assert.assertTrue(iterator.hasNext());
+ record = iterator.next();
+
+ Assert.assertEquals(record.getReferenceName(), "chrM");
+ Assert.assertTrue(record.getAlignmentStart() >= 1500);
+ reader.close();
+
+ reader = new CRAMFileReader(new SeekableFileStream(cramFile), (File)null, source, ValidationStringency.SILENT);
+ try {
+ reader.queryAlignmentStart("chrM", 1500);
+ Assert.fail("Expecting query to fail when there is no index");
+ } catch (SAMException e) {
+ }
+ reader.close();
+
+ reader = new CRAMFileReader(new SeekableFileStream(cramFile), (SeekableFileStream)null, source, ValidationStringency.SILENT);
+ try {
+ reader.queryAlignmentStart("chrM", 1500);
+ Assert.fail("Expecting query to fail when there is no index");
+ } catch (SAMException e) {
+ }
+ reader.close();
+ }
+
+ @Test
public void test_chrM_1500_location() throws IOException {
CRAMFileReader reader = new CRAMFileReader(cramFile, indexFile, source);
reader.setValidationStringency(ValidationStringency.SILENT);
@@ -85,6 +132,25 @@ public class CRAMFileIndexTest {
}
@Test
+ public void testNoStringencyConstructor() throws IOException {
+ final File CRAMFile = new File("testdata/htsjdk/samtools/cram/auxf#values.3.0.cram");
+ final File refFile = new File("testdata/htsjdk/samtools/cram/auxf.fa");
+ ReferenceSource refSource = new ReferenceSource(refFile);
+ File indexFile = null;
+
+ long start = 0;
+ long end = CRAMFile.length();
+ long[] boundaries = new long[] {start << 16, (end - 1) << 16};
+ final CRAMIterator iterator = new CRAMIterator(new SeekableFileStream(CRAMFile), refSource, boundaries);
+ long count = 0;
+ while (iterator.hasNext()) {
+ count++;
+ iterator.next();
+ }
+ Assert.assertEquals(count, 2);
+ }
+
+ @Test
public void testIteratorFromFileSpan_WholeFile() throws IOException {
CRAMFileReader reader = new CRAMFileReader(new ByteArraySeekableStream(cramBytes), new ByteArraySeekableStream(baiBytes), source, ValidationStringency.SILENT);
reader.setValidationStringency(ValidationStringency.SILENT);
diff --git a/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java b/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java
index 1e1838b..1203121 100644
--- a/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java
@@ -180,9 +180,8 @@ public class CRAMFileWriterWithIndexTest {
Collections.sort(list, new SAMRecordCoordinateComparator());
for (SAMRecord record : list)
- writer.writeAlignment(record);
+ writer.addAlignment(record);
- System.out.println();
list.clear();
writer.finish();
writer.close();
diff --git a/src/tests/java/htsjdk/samtools/CigarTest.java b/src/tests/java/htsjdk/samtools/CigarTest.java
index 9e9858a..1d7d4c6 100644
--- a/src/tests/java/htsjdk/samtools/CigarTest.java
+++ b/src/tests/java/htsjdk/samtools/CigarTest.java
@@ -24,6 +24,7 @@
package htsjdk.samtools;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.List;
@@ -33,49 +34,61 @@ import java.util.List;
*/
public class CigarTest {
- @Test
- public void testPositive() {
- Assert.assertNull(TextCigarCodec.decode("").isValid(null, -1));
- Assert.assertNull(TextCigarCodec.decode("2M1P4M1P2D1P6D").isValid(null, -1));
- Assert.assertNull(TextCigarCodec.decode("10M5N1I12M").isValid(null, -1));
- Assert.assertNull(TextCigarCodec.decode("10M1I5N1I12M").isValid(null, -1));
- Assert.assertNull(TextCigarCodec.decode("9M1D5N1I12M").isValid(null, -1));
+ @DataProvider(name = "positiveTestsData")
+ public Object[][] testPositive() {
+ return new Object[][]{
+ {""},
+ {"2M1P4M1P2D1P6D"},
+ {"10M5N1I12M"},
+ {"10M1I5N1I12M"},
+ {"9M1D5N1I12M"},
- // I followed by D and vice versa is now allowed.
- Assert.assertNull(TextCigarCodec.decode("1M1I1D1M").isValid(null, -1));
- Assert.assertNull(TextCigarCodec.decode("1M1D1I1M").isValid(null, -1));
+ // I followed by D and vice versa is now allowed.
+ {"1M1I1D1M"},
+ {"1M1D1I1M"},
- // Soft-clip inside of hard-clip now allowed.
- Assert.assertNull(TextCigarCodec.decode("29M1S15H").isValid(null, -1));
+ // Soft-clip inside of hard-clip now allowed.
+ {"29M1S15H"},
+ };
}
- @Test
- public void testNegative() {
- // Cannot have two consecutive insertions
- List<SAMValidationError> errors = TextCigarCodec.decode("1M1I1I1M").isValid(null, -1);
- Assert.assertEquals(errors.size(), 1);
- Assert.assertEquals(errors.get(0).getType(), SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR);
+ @Test(dataProvider = "positiveTestsData")
+ public void testPositive(final String cigar) {
+ Assert.assertNull(TextCigarCodec.decode(cigar).isValid(null, -1));
+ }
- // Cannot have two consecutive deletions
- errors = TextCigarCodec.decode("1M1D1D1M").isValid(null, -1);
- Assert.assertEquals(errors.size(), 1);
- Assert.assertEquals(errors.get(0).getType(), SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR);
+ @DataProvider(name = "negativeTestsData")
+ public Object[][] negativeTestsData() {
- // Soft clip must be at end of read or inside of hard clip
- errors = TextCigarCodec.decode("1M1D1S1M").isValid(null, -1);
- Assert.assertEquals(errors.size(), 1);
- Assert.assertEquals(errors.get(0).getType(), SAMValidationError.Type.INVALID_CIGAR);
+ return new Object[][]{
+ // Cannot have two consecutive insertions (of the same type)
+ {"1M1D1D1M", SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR},
+ {"1M1I1I1M", SAMValidationError.Type.ADJACENT_INDEL_IN_CIGAR},
- // Soft clip must be at end of read or inside of hard clip
- errors = TextCigarCodec.decode("1M1D1S1M1H").isValid(null, -1);
- Assert.assertEquals(errors.size(), 1);
- Assert.assertEquals(errors.get(0).getType(), SAMValidationError.Type.INVALID_CIGAR);
+ // Soft clip must be at end of read or inside of hard clip
+ {"1M1D1S1M", SAMValidationError.Type.INVALID_CIGAR},
+ {"1M1D1S1M1H", SAMValidationError.Type.INVALID_CIGAR},
+ {"1M1D1S1S", SAMValidationError.Type.INVALID_CIGAR},
+ {"1M1D1S1S1H", SAMValidationError.Type.INVALID_CIGAR},
+ {"1H1S1S1M1D", SAMValidationError.Type.INVALID_CIGAR},
+ {"1S1S1M1D", SAMValidationError.Type.INVALID_CIGAR},
+ // Soft clip must be at end of read or inside of hard clip, but there must be something left
+ {"1S1S", SAMValidationError.Type.INVALID_CIGAR},
+ {"1H1S", SAMValidationError.Type.INVALID_CIGAR},
+ {"1S1H", SAMValidationError.Type.INVALID_CIGAR},
+ {"1H1H", SAMValidationError.Type.INVALID_CIGAR},
+ };
/*
- // Zero length for an element not allowed.
- errors = TextCigarCodec.decode("100M0D10M1D10M").isValid(null, -1);
- Assert.assertEquals(errors.size(), 1);
- Assert.assertEquals(errors.get(0).getType(), SAMValidationError.Type.INVALID_CIGAR);
+ // Zero length for an element not allowed. TODO: not sure why this is commented out
+ {"100M0D10M1D10M", SAMValidationError.Type.INVALID_CIGAR}
*/
}
+
+ @Test(dataProvider = "negativeTestsData")
+ public void testNegative(final String cigar, final SAMValidationError.Type type) {
+ final List<SAMValidationError> errors = TextCigarCodec.decode(cigar).isValid(null, -1);
+ Assert.assertEquals(errors.size(), 1, String.format("Got %d error, expected exactly one error.", errors.size()));
+ Assert.assertEquals(errors.get(0).getType(), type);
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/CramFileWriterTest.java b/src/tests/java/htsjdk/samtools/CramFileWriterTest.java
index 0a42052..89e9a68 100644
--- a/src/tests/java/htsjdk/samtools/CramFileWriterTest.java
+++ b/src/tests/java/htsjdk/samtools/CramFileWriterTest.java
@@ -33,6 +33,8 @@ import org.testng.annotations.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@@ -46,10 +48,20 @@ public class CramFileWriterTest {
}
@Test(description = "Test for lossy CRAM compression invariants.")
- public void lossyCramInvariantsTest() throws Exception {
+ public void lossyCramInvariantsTest() {
doTest(createRecords(1000));
}
+ @Test(description = "Tests a writing records with null SAMFileHeaders")
+ public void writeRecordsWithNullHeader() throws Exception {
+
+ final List<SAMRecord> samRecs = createRecords(50);
+ for (SAMRecord rec : samRecs) {
+ rec.setHeader(null);
+ }
+ doTest(samRecs);
+ }
+
@Test(description = "Tests a unmapped record with sequence and quality fields")
public void unmappedWithSequenceAndQualityField() throws Exception {
unmappedSequenceAndQualityFieldHelper(true);
@@ -79,17 +91,18 @@ public class CramFileWriterTest {
doTest(list);
}
- private List<SAMRecord> createRecords(int count) throws Exception {
+ private List<SAMRecord> createRecords(int count) {
List<SAMRecord> list = new ArrayList<SAMRecord>(count);
final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
if (builder.getHeader().getReadGroups().isEmpty()) {
- throw new Exception("Read group expected in the header");
+ throw new IllegalStateException("Read group expected in the header");
}
int posInRef = 1;
- for (int i = 0; i < count / 2; i++)
+ for (int i = 0; i < count / 2; i++) {
builder.addPair(Integer.toString(i), 0, posInRef += 1,
posInRef += 3);
+ }
list.addAll(builder.getRecords());
Collections.sort(list, new SAMRecordCoordinateComparator());
@@ -97,35 +110,38 @@ public class CramFileWriterTest {
return list;
}
- private void doTest(final List<SAMRecord> samRecords) {
+ private SAMFileHeader createSAMHeader(SAMFileHeader.SortOrder sortOrder) {
final SAMFileHeader header = new SAMFileHeader();
- header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+ header.setSortOrder(sortOrder);
header.addSequence(new SAMSequenceRecord("chr1", 123));
SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("1");
header.addReadGroup(readGroupRecord);
+ return header;
+ }
+ private ReferenceSource createReferenceSource() {
byte[] refBases = new byte[1024 * 1024];
Arrays.fill(refBases, (byte) 'A');
InMemoryReferenceSequenceFile rsf = new InMemoryReferenceSequenceFile();
rsf.add("chr1", refBases);
- ReferenceSource source = new ReferenceSource(rsf);
+ return new ReferenceSource(rsf);
+ }
- ByteArrayOutputStream os = new ByteArrayOutputStream();
- CRAMFileWriter writer = new CRAMFileWriter(os, source, header, null);
+ private void writeRecordsToCRAM(CRAMFileWriter writer, List<SAMRecord> samRecords) {
for (SAMRecord record : samRecords) {
- writer.writeAlignment(record);
+ writer.addAlignment(record);
}
- writer.finish();
writer.close();
+ }
+
+ private void validateRecords(final List<SAMRecord> expectedRecords, ByteArrayInputStream is, ReferenceSource referenceSource) {
+ CRAMFileReader cReader = new CRAMFileReader(null, is, referenceSource);
- CRAMFileReader cReader = new CRAMFileReader(null,
- new ByteArrayInputStream(os.toByteArray()),
- new ReferenceSource(rsf));
SAMRecordIterator iterator2 = cReader.getIterator();
int index = 0;
while (iterator2.hasNext()) {
- SAMRecord actualRecord= iterator2.next();
- SAMRecord expectedRecord = samRecords.get(index++);
+ SAMRecord actualRecord = iterator2.next();
+ SAMRecord expectedRecord = expectedRecords.get(index++);
Assert.assertEquals(actualRecord.getReadName(), expectedRecord.getReadName());
Assert.assertEquals(actualRecord.getFlags(), expectedRecord.getFlags());
@@ -141,4 +157,78 @@ public class CramFileWriterTest {
}
cReader.close();
}
+
+ private void doTest(final List<SAMRecord> samRecords) {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ReferenceSource refSource = createReferenceSource();
+ final ByteArrayOutputStream os = new ByteArrayOutputStream();
+
+ CRAMFileWriter writer = new CRAMFileWriter(os, refSource, header, null);
+ writeRecordsToCRAM(writer, samRecords);
+
+ validateRecords(samRecords, new ByteArrayInputStream(os.toByteArray()), refSource);
+ }
+
+ @Test(description = "Test CRAMWriter constructor with index stream")
+ public void testCRAMWriterWithIndex() {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ReferenceSource refSource = createReferenceSource();
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ final ByteArrayOutputStream indexStream = new ByteArrayOutputStream();
+
+ final List<SAMRecord> samRecords = createRecords(100);
+ CRAMFileWriter writer = new CRAMFileWriter(outStream, indexStream, refSource, header, null);
+
+ writeRecordsToCRAM(writer, samRecords);
+ validateRecords(samRecords, new ByteArrayInputStream(outStream.toByteArray()), refSource);
+ Assert.assertTrue(indexStream.size() != 0);
+ }
+
+ @Test(description = "Test CRAMWriter constructor with presorted==false")
+ public void testCRAMWriterNotPresorted() {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ReferenceSource refSource = createReferenceSource();
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ final ByteArrayOutputStream indexStream = new ByteArrayOutputStream();
+
+ CRAMFileWriter writer = new CRAMFileWriter(outStream, indexStream, false, refSource, header, null);
+
+ // force records to not be coordinate sorted to ensure we're relying on presorted=false
+ final List<SAMRecord> samRecords = createRecords(100);
+ Collections.sort(samRecords, new SAMRecordCoordinateComparator().reversed());
+
+ writeRecordsToCRAM(writer, samRecords);
+
+ // for validation, restore the sort order of the expected records so they match the order of the written records
+ Collections.sort(samRecords, new SAMRecordCoordinateComparator());
+ validateRecords(samRecords, new ByteArrayInputStream(outStream.toByteArray()), refSource);
+ Assert.assertTrue(indexStream.size() != 0);
+ }
+
+ @Test
+ public void test_roundtrip_tlen_preserved() throws IOException {
+ SamReader reader = SamReaderFactory.make().open(new File("testdata/htsjdk/samtools/cram_tlen_reads.sorted.sam"));
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final ReferenceSource source = new ReferenceSource(new File("testdata/htsjdk/samtools/cram_tlen.fasta"));
+ CRAMFileWriter writer = new CRAMFileWriter(baos, source, reader.getFileHeader(), "test.cram");
+ SAMRecordIterator iterator = reader.iterator();
+ List<SAMRecord> records = new ArrayList<SAMRecord>();
+ while (iterator.hasNext()) {
+ final SAMRecord record = iterator.next();
+ writer.addAlignment(record);
+ records.add(record);
+ }
+ writer.close();
+
+ CRAMFileReader cramReader = new CRAMFileReader(new ByteArrayInputStream(baos.toByteArray()), (File) null, source, ValidationStringency.STRICT);
+ iterator = cramReader.getIterator();
+ int i = 0;
+ while (iterator.hasNext()) {
+ SAMRecord record1 = iterator.next();
+ SAMRecord record2 = records.get(i++);
+ Assert.assertEquals(record1.getInferredInsertSize(), record2.getInferredInsertSize(), record1.getReadName());
+ }
+ Assert.assertEquals(records.size(), i);
+ }
+
}
diff --git a/src/tests/java/htsjdk/samtools/DownsamplingIteratorTests.java b/src/tests/java/htsjdk/samtools/DownsamplingIteratorTests.java
new file mode 100644
index 0000000..d492f11
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/DownsamplingIteratorTests.java
@@ -0,0 +1,82 @@
+package htsjdk.samtools;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.EnumMap;
+import java.util.List;
+import java.util.Random;
+
+import htsjdk.samtools.DownsamplingIteratorFactory.Strategy;
+
+/**
+ * Tests for the downsampling iterator class.
+ * @author Tim Fennell
+ */
+public class DownsamplingIteratorTests {
+ final int NUM_TEMPLATES = 50000;
+ final EnumMap<Strategy, Double> ACCURACY = new EnumMap<Strategy,Double>(Strategy.class){{
+ put(Strategy.HighAccuracy, 0.001);
+ put(Strategy.Chained, 0.005);
+ put(Strategy.ConstantMemory, 0.01);
+ }};
+
+ @Test
+ public void testBasicFunction() {
+ final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ final Random r = new Random();
+ for (int i=0; i<NUM_TEMPLATES; ++i) {
+ builder.addPair("pair" + r.nextInt(), r.nextInt(24), r.nextInt(1000000), r.nextInt(1000000));
+ }
+ final Collection<SAMRecord> recs = builder.getRecords();
+
+ runTests("testBasicFunction", recs);
+ }
+
+ private void runTests(final String name, final Collection<SAMRecord> recs) {
+ for (final DownsamplingIteratorFactory.Strategy strategy : DownsamplingIteratorFactory.Strategy.values()) {
+ final double accuracy = ACCURACY.get(strategy);
+
+ for (final double p : new double[]{0, 0.01, 0.1, 0.5, 0.9, 1}) {
+ final DownsamplingIterator iterator = DownsamplingIteratorFactory.make(recs.iterator(), strategy, p, accuracy, 42);
+ final List<SAMRecord> out = new ArrayList<SAMRecord>();
+ while (iterator.hasNext()) out.add(iterator.next());
+
+ final String testcase = name + ": strategy=" + strategy.name() + ", p=" + p + ", accuracy=" + accuracy;
+
+ final double readFraction = iterator.getAcceptedFraction();
+ Assert.assertEquals(out.size(), iterator.getAcceptedCount(), "Mismatched sizes with " + testcase);
+ Assert.assertTrue(readFraction > p - accuracy && readFraction < p + accuracy, "Read fraction " + readFraction + " out of bounds in " + testcase);
+ }
+ }
+ }
+
+ @Test
+ public void testMixOfPairsAndFrags() {
+ final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ final Random r = new Random();
+ for (int i=0; i<NUM_TEMPLATES; ++i) {
+ builder.addFrag("frag" + r.nextInt(), r.nextInt(24), r.nextInt(1000000), false);
+ builder.addPair("pair" + r.nextInt(), r.nextInt(24), r.nextInt(1000000), r.nextInt(1000000));
+ }
+
+ final Collection<SAMRecord> recs = builder.getRecords();
+ runTests("testMixOfPairsAndFrags", recs);
+ }
+
+ @Test
+ public void testSecondaryAlignments() {
+ final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ final Random r = new Random();
+ for (int i=0; i<NUM_TEMPLATES; ++i) {
+ final int x = r.nextInt();
+ builder.addPair("pair" + x, r.nextInt(24), r.nextInt(1000000), r.nextInt(1000000));
+ builder.addPair("pair" + x, r.nextInt(24), r.nextInt(24), r.nextInt(1000000), r.nextInt(1000000), false, false, "50M", "50M", false, true, true, true, 20);
+ }
+
+ final Collection<SAMRecord> recs = builder.getRecords();
+ runTests("testSecondaryAlignments", recs);
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/DuplicateSetIteratorTest.java b/src/tests/java/htsjdk/samtools/DuplicateSetIteratorTest.java
index c2a87a4..5952953 100644
--- a/src/tests/java/htsjdk/samtools/DuplicateSetIteratorTest.java
+++ b/src/tests/java/htsjdk/samtools/DuplicateSetIteratorTest.java
@@ -1,5 +1,6 @@
package htsjdk.samtools;
+import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.HashMap;
@@ -56,7 +57,7 @@ public class DuplicateSetIteratorTest {
}
//we expect 15 duplicate sets one for the initial two reads and one for each of the additional 14 reads.
- assert (allSets.size() == 15);
- assert (allSets.get("READ0").size() == 2);
+ Assert.assertEquals(allSets.size(), 15, "Wrong number of duplicate sets.");
+ Assert.assertEquals(allSets.get("READ0").size(), 2, "Should be two reads in the READ0 duplicate set, but there are not.");
}
}
diff --git a/src/tests/java/htsjdk/samtools/MergingSamRecordIteratorTest.java b/src/tests/java/htsjdk/samtools/MergingSamRecordIteratorTest.java
index 6c17d91..885321b 100644
--- a/src/tests/java/htsjdk/samtools/MergingSamRecordIteratorTest.java
+++ b/src/tests/java/htsjdk/samtools/MergingSamRecordIteratorTest.java
@@ -268,4 +268,45 @@ public class MergingSamRecordIteratorTest {
builder1.getSamReader().close();
builder2.getSamReader().close();
}
+
+ @Test
+ public void testReferenceIndexMapping() throws Exception {
+ // Create two SamReaders with sequence dictionaries such that a merging iterator with merged
+ // headers will require remapping a record's reference index to the merged dictionary
+ final SAMRecordSetBuilder builder1 = new SAMRecordSetBuilder();
+ SAMSequenceRecord fakeSequenceRec = new SAMSequenceRecord("FAKE_CONTIG_A", 0);
+ builder1.getHeader().addSequence(fakeSequenceRec);
+
+ final SAMRecordSetBuilder builder2 = new SAMRecordSetBuilder();
+ fakeSequenceRec = new SAMSequenceRecord("FAKE_CONTIG_B", 0);
+ builder2.getHeader().addSequence(fakeSequenceRec);
+
+ // create a record with a reference index that will need to be remapped after merging
+ SAMRecord recRequiresMapping = new SAMRecord(builder2.getHeader());
+ recRequiresMapping.setReadName("fakeread");
+ recRequiresMapping.setReferenceName("FAKE_CONTIG_B");
+ builder2.addRecord(recRequiresMapping);
+ // cache the original reference index
+ int originalRefIndex = recRequiresMapping.getReferenceIndex();
+ Assert.assertTrue(25 == originalRefIndex);
+
+ // get a merging iterator with a merged header
+ final SamReader samReader1 = builder1.getSamReader();
+ final SamReader samReader2 = builder2.getSamReader();
+ final List<SamReader> readerList = new ArrayList<SamReader>();
+ readerList.add(samReader1);
+ readerList.add(samReader2);
+ final List<SAMFileHeader> headerList = new ArrayList<SAMFileHeader>();
+ headerList.add(samReader1.getFileHeader());
+ headerList.add(samReader2.getFileHeader());
+ final SamFileHeaderMerger samFileHeaderMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headerList, true);
+ final MergingSamRecordIterator iterator = new MergingSamRecordIterator(samFileHeaderMerger, readerList, false);
+
+ Assert.assertTrue(iterator.hasNext());
+ final SAMRecord rec = iterator.next();
+ Assert.assertTrue(26 == rec.getReferenceIndex());
+
+ samReader1.close();
+ samReader2.close();
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java b/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
index 7ad9143..c8378a6 100644
--- a/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
@@ -23,6 +23,8 @@
*/
package htsjdk.samtools;
+import htsjdk.samtools.cram.CRAMException;
+import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
@@ -56,25 +58,36 @@ public class SAMFileReaderTest {
// tests for CRAM indexing
@Test(dataProvider = "SmallCRAMTest")
- public void CRAMIndexTest(final String inputFile) {
+ public void CRAMIndexTest(final String inputFile, final String referenceFile, QueryInterval queryInterval, String expectedReadName) {
final File input = new File(TEST_DATA_DIR, inputFile);
- final SamReader reader = SamReaderFactory.makeDefault().open(input);
+ final File reference = new File(TEST_DATA_DIR, referenceFile);
+ final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(reference).open(input);
Assert.assertTrue(reader.hasIndex());
+
+ final CloseableIterator<SAMRecord> iterator = reader.query(new QueryInterval[]{queryInterval}, false);
+ Assert.assertTrue(iterator.hasNext());
+ SAMRecord r1 = iterator.next();
+ Assert.assertEquals(r1.getReadName(), expectedReadName);
+
CloserUtil.close(reader);
}
@DataProvider(name = "SmallCRAMTest")
public Object[][] CRAMIndexTestData() {
final Object[][] testFiles = new Object[][]{
- {"cram/test.cram"},
+ {"cram/test.cram", "cram/auxf.fa", new QueryInterval(0, 12, 13), "Jim"},
+ {"cram_with_bai_index.cram", "hg19mini.fasta", new QueryInterval(3, 700, 0), "k"},
+ {"cram_with_crai_index.cram", "hg19mini.fasta", new QueryInterval(2, 350, 0), "i"},
};
return testFiles;
}
+
@Test(dataProvider = "NoIndexCRAMTest")
- public void CRAMNoIndexTest(final String inputFile) {
+ public void CRAMNoIndexTest(final String inputFile, final String referenceFile) {
final File input = new File(TEST_DATA_DIR, inputFile);
- final SamReader reader = SamReaderFactory.makeDefault().open(input);
+ final File reference = new File(TEST_DATA_DIR, referenceFile);
+ final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(reference).open(input);
Assert.assertFalse(reader.hasIndex());
CloserUtil.close(reader);
}
@@ -82,7 +95,7 @@ public class SAMFileReaderTest {
@DataProvider(name = "NoIndexCRAMTest")
public Object[][] CRAMNoIndexTestData() {
final Object[][] testFiles = new Object[][]{
- {"cram/test2.cram"},
+ {"cram/test2.cram", "cram/auxf.fa"},
};
return testFiles;
}
@@ -120,4 +133,48 @@ public class SAMFileReaderTest {
else if (inputFile.endsWith(".bam")) Assert.assertEquals(factory.bamRecordsCreated, i);
}
+ @DataProvider(name = "cramNegativeTestCases")
+ public Object[][] cramTestNegativeCases() {
+ final Object[][] scenarios = new Object[][]{
+ {"cram_with_bai_index.cram",},
+ {"cram_with_crai_index.cram"},
+ };
+ return scenarios;
+ }
+
+ @Test(dataProvider = "cramNegativeTestCases", expectedExceptions=CRAMException.class)
+ public void testReferenceRequiredForCRAM(final String inputFile) {
+ final File input = new File(TEST_DATA_DIR, inputFile);
+ final SamReader reader = SamReaderFactory.makeDefault().open(input);
+ for (final SAMRecord rec : reader) {
+ }
+ CloserUtil.close(reader);
+ }
+
+ @DataProvider(name = "cramPositiveTestCases")
+ public Object[][] cramTestPositiveCases() {
+ final Object[][] scenarios = new Object[][]{
+ {"cram_with_bai_index.cram", "hg19mini.fasta"},
+ {"cram_with_crai_index.cram", "hg19mini.fasta"},
+ };
+ return scenarios;
+ }
+
+ @Test(dataProvider = "cramPositiveTestCases")
+ public void testIterateCRAMWithIndex(final String inputFile, final String referenceFile) {
+ final File input = new File(TEST_DATA_DIR, inputFile);
+ final File reference = new File(TEST_DATA_DIR, referenceFile);
+ final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(reference).open(input);
+ for (final SAMRecord rec : reader) {
+ }
+ CloserUtil.close(reader);
+ }
+
+ @Test
+ public void samRecordFactoryNullHeaderTest() {
+ final SAMRecordFactory factory = new DefaultSAMRecordFactory();
+ final SAMRecord samRec = factory.createSAMRecord(null);
+ Assert.assertTrue(samRec.getHeader() == null);
+ }
+
}
diff --git a/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java b/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java
index ad940d8..7c6fa56 100644
--- a/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java
@@ -23,8 +23,11 @@
*/
package htsjdk.samtools;
+import htsjdk.samtools.cram.build.CramIO;
+import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.ByteArrayOutputStream;
@@ -32,9 +35,9 @@ import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
+import java.io.IOException;
import java.io.OutputStream;
-
public class SAMFileWriterFactoryTest {
private static final File TEST_DATA_DIR = new File("testdata/htsjdk/samtools");
@@ -111,6 +114,35 @@ public class SAMFileWriterFactoryTest {
Assert.assertEquals(writtensam, originalsam);
}
+ @Test(description="Write SAM records with null SAMFileHeader")
+ public void samNullHeaderRoundTrip() throws Exception {
+ final File input = new File(TEST_DATA_DIR, "roundtrip.sam");
+
+ final SamReader reader = SamReaderFactory.makeDefault().open(input);
+ final File outputFile = File.createTempFile("nullheader-out", ".sam");
+ outputFile.delete();
+ outputFile.deleteOnExit();
+ FileOutputStream os = new FileOutputStream(outputFile);
+ final SAMFileWriterFactory factory = new SAMFileWriterFactory();
+ final SAMFileWriter writer = factory.makeSAMWriter(reader.getFileHeader(), false, os);
+ for (SAMRecord rec : reader) {
+ rec.setHeader(null);
+ writer.addAlignment(rec);
+ }
+ writer.close();
+ os.close();
+
+ InputStream is = new FileInputStream(input);
+ String originalsam = IOUtil.readFully(is);
+ is.close();
+
+ is = new FileInputStream(outputFile);
+ String writtensam = IOUtil.readFully(is);
+ is.close();
+
+ Assert.assertEquals(writtensam, originalsam);
+ }
+
private void createSmallBam(final File outputFile) {
final SAMFileWriterFactory factory = new SAMFileWriterFactory();
factory.setCreateIndex(true);
@@ -123,8 +155,8 @@ public class SAMFileWriterFactoryTest {
fillSmallBam(writer);
writer.close();
}
-
-
+
+
private void createSmallBamToOutputStream(final OutputStream outputStream,boolean binary) {
final SAMFileWriterFactory factory = new SAMFileWriterFactory();
factory.setCreateIndex(false);
@@ -141,10 +173,123 @@ public class SAMFileWriterFactoryTest {
writer.close();
}
- private void fillSmallBam(SAMFileWriter writer) {
+ private int fillSmallBam(SAMFileWriter writer) {
final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
builder.addUnmappedFragment("HiMom!");
- for (final SAMRecord rec: builder.getRecords()) writer.addAlignment(rec);
- }
-
+ int numRecs = builder.getRecords().size();
+ for (final SAMRecord rec: builder.getRecords()) {
+ writer.addAlignment(rec);
+ }
+ return numRecs;
+ }
+
+ private File prepareOutputFile(String extension) throws IOException {
+ final File outputFile = File.createTempFile("tmp.", extension);
+ outputFile.delete();
+ outputFile.deleteOnExit();
+ return outputFile;
+ }
+
+ // Create a writer factory that creates and index and md5 file and set the header to coord sorted
+ private SAMFileWriterFactory createWriterFactoryWithOptions(SAMFileHeader header) {
+ final SAMFileWriterFactory factory = new SAMFileWriterFactory();
+ factory.setCreateIndex(true);
+ factory.setCreateMd5File(true);
+ // index only created if coordinate sorted
+ header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+ header.addSequence(new SAMSequenceRecord("chr1", 123));
+ header.addReadGroup(new SAMReadGroupRecord("1"));
+ return factory;
+ }
+
+ private void verifyWriterOutput(File outputFile, ReferenceSource refSource, int nRecs, boolean verifySupplementalFiles) {
+ if (verifySupplementalFiles) {
+ final File indexFile = SamFiles.findIndex(outputFile);
+ indexFile.deleteOnExit();
+ final File md5File = new File(outputFile.getParent(), outputFile.getName() + ".md5");
+ md5File.deleteOnExit();
+ Assert.assertTrue(indexFile.length() > 0);
+ Assert.assertTrue(md5File.length() > 0);
+ }
+
+ SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.LENIENT);
+ if (refSource != null) {
+ factory.referenceSource(refSource);
+ }
+ SamReader reader = factory.open(outputFile);
+ SAMRecordIterator it = reader.iterator();
+ int count = 0;
+ for (; it.hasNext(); it.next()) {
+ count++;
+ }
+
+ Assert.assertTrue(count == nRecs);
+ }
+
+ @DataProvider(name="bamOrCramWriter")
+ public Object[][] bamOrCramWriter() {
+ return new Object[][] {
+ { BamFileIoUtils.BAM_FILE_EXTENSION, },
+ { CramIO.CRAM_FILE_EXTENSION }
+ };
+ }
+
+ @Test(dataProvider="bamOrCramWriter")
+ public void testMakeWriter(String extension) throws Exception {
+ final File outputFile = prepareOutputFile(extension);
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMFileWriterFactory factory = createWriterFactoryWithOptions(header);
+ final File referenceFile = new File(TEST_DATA_DIR, "hg19mini.fasta");
+
+ final SAMFileWriter samWriter = factory.makeWriter(header, false, outputFile, referenceFile);
+ int nRecs = fillSmallBam(samWriter);
+ samWriter.close();
+
+ verifyWriterOutput(outputFile, new ReferenceSource(referenceFile), nRecs, true);
+ }
+
+ @Test
+ public void testMakeCRAMWriterWithOptions() throws Exception {
+ final File outputFile = prepareOutputFile(CramIO.CRAM_FILE_EXTENSION);
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMFileWriterFactory factory = createWriterFactoryWithOptions(header);
+ final File referenceFile = new File(TEST_DATA_DIR, "hg19mini.fasta");
+
+ final SAMFileWriter samWriter = factory.makeCRAMWriter(header, false, outputFile, referenceFile);
+ int nRecs = fillSmallBam(samWriter);
+ samWriter.close();
+
+ verifyWriterOutput(outputFile, new ReferenceSource(referenceFile), nRecs, true);
+ }
+
+ @Test
+ public void testMakeCRAMWriterIgnoresOptions() throws Exception {
+ final File outputFile = prepareOutputFile(CramIO.CRAM_FILE_EXTENSION);
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMFileWriterFactory factory = createWriterFactoryWithOptions(header);
+ final File referenceFile = new File(TEST_DATA_DIR, "hg19mini.fasta");
+
+ // Note: does not honor factory settings for CREATE_MD5 or CREATE_INDEX.
+ final SAMFileWriter samWriter = factory.makeCRAMWriter(header, new FileOutputStream(outputFile), referenceFile);
+ int nRecs = fillSmallBam(samWriter);
+ samWriter.close();
+
+ verifyWriterOutput(outputFile, new ReferenceSource(referenceFile), nRecs, false);
+ }
+
+ @Test
+ public void testMakeCRAMWriterPresortedDefault() throws Exception {
+ final File outputFile = prepareOutputFile(CramIO.CRAM_FILE_EXTENSION);
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMFileWriterFactory factory = createWriterFactoryWithOptions(header);
+ final File referenceFile = new File(TEST_DATA_DIR, "hg19mini.fasta");
+
+ // Defaults to preSorted==true
+ final SAMFileWriter samWriter = factory.makeCRAMWriter(header, outputFile, referenceFile);
+ int nRecs = fillSmallBam(samWriter);
+ samWriter.close();
+
+ verifyWriterOutput(outputFile, new ReferenceSource(referenceFile), nRecs, true);
+ }
+
}
diff --git a/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java b/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java
index 4003435..bc5cc8e 100644
--- a/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java
@@ -23,13 +23,21 @@
*/
package htsjdk.samtools;
+import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.CloserUtil;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
+import java.io.PrintStream;
+import java.util.ArrayList;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
/**
@@ -46,6 +54,10 @@ public class SAMIntegerTagTest {
private static final String UNSIGNED_INTEGER_TAG = "UI";
private static final String STRING_TAG = "ST";
+ private static final long TOO_LARGE_UNSIGNED_INT_VALUE = BinaryCodec.MAX_UINT + 1L;
+
+ enum FORMAT {SAM, BAM, CRAM}
+
@Test
public void testBAM() throws Exception {
final SAMRecord rec = writeAndReadSamRecord("bam");
@@ -68,22 +80,14 @@ public class SAMIntegerTagTest {
Assert.assertEquals(((Number) rec.getAttribute(INTEGER_TAG)).intValue(), 1);
}
- @Test(expectedExceptions = SAMException.class)
- public void testUnsignedIntegerBAM() throws Exception {
- SAMRecord rec = createSamRecord();
- final long val = 1l + Integer.MAX_VALUE;
- rec.setAttribute(UNSIGNED_INTEGER_TAG, val);
- Assert.fail("Exception should have been thrown.");
- }
-
- /**
- * Cannot store unsigned int in SAM text format.
- */
- @Test(expectedExceptions = SAMException.class)
+ @Test
public void testUnsignedIntegerSAM() throws Exception {
final SAMRecord rec = createSamRecord();
final long val = 1l + Integer.MAX_VALUE;
rec.setAttribute(UNSIGNED_INTEGER_TAG, val);
+ final Object roundTripValue = rec.getAttribute(UNSIGNED_INTEGER_TAG);
+ Assert.assertTrue(roundTripValue instanceof Long);
+ Assert.assertEquals(((Long)roundTripValue).longValue(), val);
}
@Test
@@ -174,29 +178,55 @@ public class SAMIntegerTagTest {
return builder.iterator().next();
}
- @Test(expectedExceptions = {SAMFormatException.class})
- public void testBadSamStrict() {
- final SamReader reader = SamReaderFactory.makeDefault().open(new File(TEST_DATA_DIR, "variousAttributes.sam"));
+ private static SamInputResource createSamForIntAttr(long value) {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintStream ps = new PrintStream(baos);
+ ps.println("@HD\tVN:1.0");
+ ps.print("1\t4\t*\t0\t0\t*\t*\t0\t0\tA\t<\tUI:i:");
+ ps.println(value);
+ ps.close();
+
+ return new SamInputResource(new InputStreamInputResource(new ByteArrayInputStream(baos.toByteArray())));
+ }
+
+ @Test
+ public void testGoodSamStrict() throws IOException {
+ final SamReaderFactory factory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.STRICT);
+
+ Assert.assertEquals(0, ((Number) factory.open(createSamForIntAttr(0)).iterator().next().getAttribute("UI")).intValue());
+ Assert.assertEquals(-1, ((Number) factory.open(createSamForIntAttr(-1)).iterator().next().getAttribute("UI")).intValue());
+ Assert.assertEquals(Integer.MIN_VALUE, ((Number) factory.open(createSamForIntAttr(Integer.MIN_VALUE)).iterator().next().getAttribute("UI")).intValue());
+ Assert.assertEquals(Integer.MAX_VALUE, ((Number) factory.open(createSamForIntAttr(Integer.MAX_VALUE)).iterator().next().getAttribute("UI")).intValue());
+ Assert.assertEquals(1L + (long) Integer.MAX_VALUE, ((Number) factory.open(createSamForIntAttr(1L + (long) Integer.MAX_VALUE)).iterator().next().getAttribute("UI")).longValue());
+ Assert.assertEquals(BinaryCodec.MAX_UINT, ((Number) factory.open(createSamForIntAttr(BinaryCodec.MAX_UINT)).iterator().next().getAttribute("UI")).longValue());
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void testBadSamStrict() throws IOException {
+ final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.STRICT).open(createSamForIntAttr(BinaryCodec.MAX_UINT + 1L));
reader.iterator().next();
- Assert.fail("Should not reach.");
}
- @Test(expectedExceptions = {RuntimeException.class})
- public void testBadBamStrict() {
- final SamReader reader = SamReaderFactory.makeDefault()
- .enable(SamReaderFactory.Option.EAGERLY_DECODE)
- .open(new File(TEST_DATA_DIR, "variousAttributes.bam"));
+ @Test
+ public void testBadSamSilent() throws IOException {
+ final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(createSamForIntAttr(BinaryCodec.MAX_UINT + 1L));
reader.iterator().next();
- Assert.fail("Should not reach.");
+ }
+ @DataProvider(name = "legalIntegerAttributesFiles")
+ public Object[][] getLegalIntegerAttributesFiles() {
+ return new Object[][] {
+ { new File(TEST_DATA_DIR, "variousAttributes.sam") },
+ { new File(TEST_DATA_DIR, "variousAttributes.bam") }
+ };
}
- @Test
- public void testBadBamLenient() {
+ @Test(dataProvider = "legalIntegerAttributesFiles")
+ public void testLegalIntegerAttributesFilesStrict( final File inputFile ) {
final SamReader reader = SamReaderFactory.makeDefault()
.enable(SamReaderFactory.Option.EAGERLY_DECODE)
- .validationStringency(ValidationStringency.LENIENT)
- .open(new File(TEST_DATA_DIR, "variousAttributes.bam"));
+ .validationStringency(ValidationStringency.STRICT)
+ .open(inputFile);
final SAMRecord rec = reader.iterator().next();
final Map<String, Number> expectedTags = new HashMap<String, Number>();
@@ -209,8 +239,91 @@ public class SAMIntegerTagTest {
expectedTags.put("UI", 4294967295L);
for (final Map.Entry<String, Number> entry : expectedTags.entrySet()) {
final Object value = rec.getAttribute(entry.getKey());
- Assert.assertEquals(value, entry.getValue());
+ Assert.assertTrue(((Number) value).longValue() == entry.getValue().longValue());
}
CloserUtil.close(reader);
}
+
+ @DataProvider(name = "valid_set")
+ public static Object[][] valid_set() {
+ List<Object[]> params = new ArrayList<Object[]>();
+ for (FORMAT format:FORMAT.values()) {
+ for (ValidationStringency stringency:ValidationStringency.values()) {
+ params.add(new Object[]{0, format, stringency});
+ params.add(new Object[]{1, format, stringency});
+ params.add(new Object[]{-1, format, stringency});
+ params.add(new Object[]{Integer.MIN_VALUE, format, stringency});
+ params.add(new Object[]{Integer.MAX_VALUE, format, stringency});
+
+ params.add(new Object[]{1L, format, stringency});
+ params.add(new Object[]{-1L, format, stringency});
+ params.add(new Object[]{(long)Integer.MAX_VALUE+1L, format, stringency});
+ params.add(new Object[]{BinaryCodec.MAX_UINT, format, stringency});
+ }
+ }
+
+ return params.toArray(new Object[3][params.size()]);
+ }
+
+ @DataProvider(name = "invalid_set")
+ public static Object[][] invalid_set() {
+ List<Object[]> params = new ArrayList<Object[]>();
+ for (FORMAT format:FORMAT.values()) {
+ for (ValidationStringency stringency:ValidationStringency.values()) {
+ params.add(new Object[]{(long)Integer.MIN_VALUE -1L, format, stringency});
+ params.add(new Object[]{TOO_LARGE_UNSIGNED_INT_VALUE, format, stringency});
+ }
+ }
+
+ return params.toArray(new Object[3][params.size()]);
+ }
+
+ @Test(dataProvider = "valid_set")
+ public void testValidIntegerAttributeRoundtrip(final long value, final FORMAT format, ValidationStringency validationStringency) throws IOException {
+ testRoundtripIntegerAttribute(value, format, validationStringency);
+ }
+
+ @Test(dataProvider = "invalid_set", expectedExceptions = RuntimeException.class)
+ public void testInvalidIntegerAttributeRoundtrip(final long value, final FORMAT format, ValidationStringency validationStringency) throws IOException {
+ testRoundtripIntegerAttribute(value, format, validationStringency);
+ }
+
+ private void testRoundtripIntegerAttribute(final Number value, final FORMAT format, ValidationStringency validationStringency) throws IOException {
+ final SAMFileHeader header = new SAMFileHeader();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ final SAMFileWriter w;
+ switch (format) {
+ case SAM:
+ w = new SAMFileWriterFactory().makeSAMWriter(header, false, baos);
+ break;
+ case BAM:
+ w = new SAMFileWriterFactory().makeBAMWriter(header, false, baos);
+ break;
+ case CRAM:
+ w = new SAMFileWriterFactory().makeCRAMWriter(header, baos, null);
+ break;
+ default:
+ throw new RuntimeException("Unknown format: " + format);
+ }
+
+ final SAMRecord record = new SAMRecord(header);
+ record.setAttribute("UI", value);
+ record.setReadName("1");
+ record.setReadUnmappedFlag(true);
+ record.setReadBases("A".getBytes());
+ record.setBaseQualityString("!");
+ Assert.assertEquals(value, record.getAttribute("UI"));
+
+ w.addAlignment(record);
+ w.close();
+
+ final SamReader reader = SamReaderFactory.make().validationStringency(validationStringency).referenceSource(new ReferenceSource()).
+ open(SamInputResource.of(new ByteArrayInputStream(baos.toByteArray())));
+ final SAMRecordIterator iterator = reader.iterator();
+ Assert.assertTrue(iterator.hasNext());
+ final SAMRecord record2 = iterator.next();
+ final Number returnedValue = (Number) record2.getAttribute("UI");
+ Assert.assertEquals(value.longValue(), returnedValue.longValue());
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java b/src/tests/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java
index f61bd3a..cb50925 100644
--- a/src/tests/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMRecordDuplicateComparatorTest.java
@@ -27,6 +27,7 @@ import org.testng.Assert;
import org.testng.annotations.Test;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
@@ -215,4 +216,18 @@ public class SAMRecordDuplicateComparatorTest {
assertEquals(Arrays.asList(-1,-1,-1), records, false);
}
+ @Test(expectedExceptions=IllegalArgumentException.class)
+ public void testNullHeaders() {
+ final SAMRecordSetBuilder records = getSAMRecordSetBuilder();
+
+ records.addPair("READ0", 1, 55, 55);
+ records.addPair("READ1", 2, 55, 55);
+ Collection<SAMRecord> recs = records.getRecords();
+ for (SAMRecord rec : recs) {
+ rec.setHeader(null);
+ }
+
+ assertEquals(Arrays.asList(-1, -1, -1), records, false);
+ }
+
}
diff --git a/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java b/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
index 557c496..a3c3e68 100644
--- a/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
@@ -24,12 +24,15 @@
package htsjdk.samtools;
+import htsjdk.samtools.util.BinaryCodec;
import htsjdk.samtools.util.TestUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.*;
+import java.util.Arrays;
+import java.util.List;
public class SAMRecordUnitTest {
@@ -52,4 +55,796 @@ public class SAMRecordUnitTest {
Assert.assertEquals(deserializedSAMRecord, initialSAMRecord, "Deserialized SAMRecord not equal to original SAMRecord");
}
-}
+ @DataProvider
+ public Object [][] offsetAtReferenceData() {
+ return new Object[][]{
+ {"3S9M", 7, 10, false},
+ {"3S9M", 0, 0, false},
+ {"3S9M", -1, 0, false},
+ {"3S9M", 13, 0, false},
+ {"4M1D6M", 4, 4, false},
+ {"4M1D6M", 4, 4, true},
+ {"4M1D6M", 5, 0, false},
+ {"4M1D6M", 5, 4, true},
+ {"4M1I6M", 5, 6, false},
+ {"4M1I6M", 11, 0, false},
+ };
+ }
+
+ @Test(dataProvider = "offsetAtReferenceData")
+ public void testOffsetAtReference(String cigar, int posInReference, int expectedPosInRead, boolean returnLastBaseIfDeleted) {
+
+ SAMRecord sam = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, cigar, null, 2);
+ Assert.assertEquals(SAMRecord.getReadPositionAtReferencePosition(sam, posInReference, returnLastBaseIfDeleted), expectedPosInRead);
+ }
+
+ @DataProvider
+ public Object [][] referenceAtReadData() {
+ return new Object[][]{
+ {"3S9M", 7, 10},
+ {"3S9M", 0, 0},
+ {"3S9M", 0, 13},
+ {"4M1D6M", 4, 4},
+ {"4M1D6M", 6, 5},
+ {"4M1I6M", 0, 5},
+ {"4M1I6M", 5, 6},
+ };
+ }
+
+ @Test(dataProvider = "referenceAtReadData")
+ public void testOffsetAtRead(String cigar, int expectedReferencePos, int posInRead) {
+
+ SAMRecord sam = new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, cigar, null, 2);
+ Assert.assertEquals(sam.getReferencePositionAtReadPosition(posInRead), expectedReferencePos);
+ }
+
+ @DataProvider(name = "deepCopyTestData")
+ public Object [][] deepCopyTestData() {
+ return new Object[][]{
+ { new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "3S9M", null, 2) },
+ { new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "4M1I6M", null, 2) }
+ };
+ }
+
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepCopyRef(final SAMRecord sam) {
+ testDeepCopy(sam);
+ }
+
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepCopyMutate(final SAMRecord sam) {
+ final byte[] initialBaseQualityCopy = Arrays.copyOf(sam.getBaseQualities(), sam.getBaseQualities().length);
+ final int initialStart = sam.getAlignmentStart();
+
+ final SAMRecord deepCopy = testDeepCopy(sam);
+ Assert.assertTrue(Arrays.equals(sam.getBaseQualities(), deepCopy.getBaseQualities()));
+ Assert.assertTrue(sam.getAlignmentStart() == deepCopy.getAlignmentStart());
+
+ // mutate copy and make sure original remains unchanged
+ final byte[] copyBaseQuals = deepCopy.getBaseQualities();
+ for (int i = 0; i < copyBaseQuals.length; i++) {
+ copyBaseQuals[i]++;
+ }
+ deepCopy.setBaseQualities(copyBaseQuals);
+ deepCopy.setAlignmentStart(initialStart + 1);
+ Assert.assertTrue(Arrays.equals(sam.getBaseQualities(), initialBaseQualityCopy));
+ Assert.assertTrue(sam.getAlignmentStart() == initialStart);
+ }
+
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepByteAttributes( final SAMRecord sam ) throws Exception {
+ // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
+ // SAMBinaryTagAndValue.equals using reference equality on attribute values.
+ SAMRecord deepCopy = testDeepCopy(sam);
+ Assert.assertTrue(sam.equals(deepCopy));
+
+ final byte bytes[] = { -2, -1, 0, 1, 2 };
+ sam.setAttribute("BY", bytes);
+ deepCopy = sam.deepCopy();
+
+ // validate reference inequality and content equality
+ final byte samBytes[] = sam.getByteArrayAttribute("BY");
+ final byte copyBytes[] = deepCopy.getByteArrayAttribute("BY");
+ Assert.assertFalse(copyBytes == samBytes);
+ Assert.assertTrue(Arrays.equals(copyBytes, samBytes));
+
+ // validate mutation independence
+ final byte testByte = -1;
+ Assert.assertTrue(samBytes[2] != testByte); // ensure initial test condition
+ Assert.assertTrue(copyBytes[2] != testByte); // ensure initial test condition
+ samBytes[2] = testByte; // mutate original
+ Assert.assertTrue(samBytes[2] == testByte);
+ Assert.assertTrue(copyBytes[2] != testByte);
+ sam.setAttribute("BY", samBytes);
+ Assert.assertTrue(sam.getByteArrayAttribute("BY")[2] != deepCopy.getByteArrayAttribute("BY")[2]);
+
+ // now unsigned...
+ sam.setUnsignedArrayAttribute("BY", bytes);
+ deepCopy = sam.deepCopy();
+ final byte samUBytes[] = sam.getUnsignedByteArrayAttribute("BY");
+ final byte copyUBytes[] = deepCopy.getUnsignedByteArrayAttribute("BY");
+ Assert.assertFalse(copyUBytes == bytes);
+ Assert.assertTrue(Arrays.equals(copyUBytes, samUBytes));
+
+ // validate mutation independence
+ final byte uByte = 1;
+ Assert.assertTrue(samUBytes[2] != uByte); // ensure initial test condition
+ Assert.assertTrue(samUBytes[2] != uByte); // ensure initial test condition
+ samUBytes[2] = uByte; // mutate original
+ Assert.assertTrue(samUBytes[2] == uByte);
+ Assert.assertTrue(copyUBytes[2] != uByte);
+ sam.setUnsignedArrayAttribute("BY", samBytes);
+ Assert.assertTrue(sam.getUnsignedByteArrayAttribute("BY")[2] != deepCopy.getUnsignedByteArrayAttribute("BY")[2]);
+ }
+
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepShortAttributes( final SAMRecord sam ) throws Exception {
+ // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
+ // SAMBinaryTagAndValue.equals using reference equality on attribute values.
+ SAMRecord deepCopy = testDeepCopy(sam);
+ Assert.assertTrue(sam.equals(deepCopy));
+
+ final short shorts[] = { -20, -10, 0, 10, 20 };
+ sam.setAttribute("SH", shorts);
+ deepCopy = sam.deepCopy();
+
+ // validate reference inequality, content equality
+ final short samShorts[] = sam.getSignedShortArrayAttribute("SH");
+ final short copyShorts[] = deepCopy.getSignedShortArrayAttribute("SH");
+ Assert.assertFalse(copyShorts == samShorts);
+ Assert.assertTrue(Arrays.equals(copyShorts, samShorts));
+
+ // validate mutation independence
+ final short testShort = -1;
+ Assert.assertTrue(samShorts[2] != testShort); // ensure initial test condition
+ Assert.assertTrue(samShorts[2] != testShort); // ensure initial test condition
+ samShorts[2] = testShort; // mutate original
+ Assert.assertTrue(samShorts[2] == testShort);
+ Assert.assertTrue(copyShorts[2] != testShort);
+ sam.setAttribute("SH", samShorts);
+ Assert.assertTrue(sam.getSignedShortArrayAttribute("SH")[2] != deepCopy.getSignedShortArrayAttribute("SH")[2]);
+
+ // now unsigned...
+ sam.setUnsignedArrayAttribute("SH", shorts);
+ deepCopy = sam.deepCopy();
+
+ final short samUShorts[] = sam.getUnsignedShortArrayAttribute("SH");
+ final short copyUShorts[] = deepCopy.getUnsignedShortArrayAttribute("SH");
+ Assert.assertFalse(copyUShorts == shorts);
+ Assert.assertTrue(Arrays.equals(copyUShorts, samUShorts));
+
+ // validate mutation independence
+ final byte uShort = 1;
+ Assert.assertTrue(samUShorts[2] != uShort); // ensure initial test condition
+ Assert.assertTrue(samUShorts[2] != uShort); // ensure initial test condition
+ samUShorts[2] = uShort; // mutate original
+ Assert.assertTrue(samUShorts[2] == uShort);
+ Assert.assertTrue(copyUShorts[2] != uShort);
+ sam.setUnsignedArrayAttribute("SH", samShorts);
+ Assert.assertTrue(sam.getUnsignedShortArrayAttribute("SH")[2] != deepCopy.getUnsignedShortArrayAttribute("SH")[2]);
+ }
+
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepIntAttributes( final SAMRecord sam ) throws Exception {
+ // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
+ // SAMBinaryTagAndValue.equals using reference equality on attribute values.
+ SAMRecord deepCopy = testDeepCopy(sam);
+ Assert.assertTrue(sam.equals(deepCopy));
+
+ final int ints[] = { -200, -100, 0, 100, 200 };
+ sam.setAttribute("IN", ints);
+ deepCopy = sam.deepCopy();
+
+ // validate reference inequality and content equality
+ final int samInts[] = sam.getSignedIntArrayAttribute("IN");
+ final int copyInts[] = deepCopy.getSignedIntArrayAttribute("IN");
+ Assert.assertFalse(copyInts == ints);
+ Assert.assertTrue(Arrays.equals(copyInts, samInts));
+
+ // validate mutation independence
+ final short testInt = -1;
+ Assert.assertTrue(samInts[2] != testInt); // ensure initial test condition
+ Assert.assertTrue(samInts[2] != testInt); // ensure initial test condition
+ samInts[2] = testInt; // mutate original
+ Assert.assertTrue(samInts[2] == testInt);
+ Assert.assertTrue(copyInts[2] != testInt);
+ sam.setAttribute("IN", samInts);
+ Assert.assertTrue(sam.getSignedIntArrayAttribute("IN")[2] != deepCopy.getSignedIntArrayAttribute("IN")[2]);
+
+ // now unsigned...
+ sam.setUnsignedArrayAttribute("IN", ints);
+ deepCopy = sam.deepCopy();
+
+ final int samUInts[] = sam.getUnsignedIntArrayAttribute("IN");
+ final int copyUInts[] = deepCopy.getUnsignedIntArrayAttribute("IN");
+ Assert.assertFalse(copyUInts == ints);
+ Assert.assertTrue(Arrays.equals(copyUInts, samUInts));
+
+ // validate mutation independence
+ byte uInt = 1;
+ Assert.assertTrue(samUInts[2] != uInt); // ensure initial test condition
+ Assert.assertTrue(samUInts[2] != uInt); // ensure initial test condition
+ samInts[2] = uInt; // mutate original
+ Assert.assertTrue(samUInts[2] == uInt);
+ Assert.assertTrue(copyUInts[2] != uInt);
+ sam.setUnsignedArrayAttribute("IN", samInts);
+ Assert.assertTrue(sam.getUnsignedIntArrayAttribute("IN")[2] != deepCopy.getUnsignedIntArrayAttribute("IN")[2]);
+ }
+
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepFloatAttributes( final SAMRecord sam ) throws Exception {
+ // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
+ // SAMBinaryTagAndValue.equals using reference equality on attribute values.
+ SAMRecord deepCopy = testDeepCopy(sam);
+ Assert.assertTrue(sam.equals(deepCopy));
+
+ final float floats[] = { -2.4f, -1.2f, 0, 2.3f, 4.6f };
+ sam.setAttribute("FL", floats);
+ deepCopy = sam.deepCopy();
+
+ // validate reference inequality and content equality
+ final float samFloats[] = sam.getFloatArrayAttribute("FL");
+ final float copyFloats[] = deepCopy.getFloatArrayAttribute("FL");
+ Assert.assertFalse(copyFloats == floats);
+ Assert.assertFalse(copyFloats == samFloats);
+ Assert.assertTrue(Arrays.equals(copyFloats, samFloats));
+
+ // validate mutation independence
+ final float testFloat = -1.0f;
+ Assert.assertTrue(samFloats[2] != testFloat); // ensure initial test condition
+ Assert.assertTrue(samFloats[2] != testFloat); // ensure initial test condition
+ samFloats[2] = testFloat; // mutate original
+ Assert.assertTrue(samFloats[2] == testFloat);
+ Assert.assertTrue(copyFloats[2] != testFloat);
+ sam.setAttribute("FL", samFloats);
+ Assert.assertTrue(sam.getFloatArrayAttribute("FL")[2] != deepCopy.getFloatArrayAttribute("FL")[2]);
+ }
+
+ private SAMRecord testDeepCopy(SAMRecord sam) {
+ final SAMRecord deepCopy = sam.deepCopy();
+
+ // force the indexing bins to be computed in order to satisfy equality test
+ sam.setIndexingBin(sam.computeIndexingBin());
+ deepCopy.setIndexingBin(deepCopy.computeIndexingBin());
+ Assert.assertTrue(sam.equals(deepCopy));
+
+ return deepCopy;
+ }
+
+ @Test
+ public void test_getUnsignedIntegerAttribute_valid() {
+ final String stringTag = "UI";
+ final short binaryTag = SAMTagUtil.getSingleton().makeBinaryTag(stringTag);
+ SAMFileHeader header = new SAMFileHeader();
+ SAMRecord record = new SAMRecord(header);
+ Assert.assertNull(record.getUnsignedIntegerAttribute(stringTag));
+ Assert.assertNull(record.getUnsignedIntegerAttribute(binaryTag));
+
+ record.setAttribute("UI", 0L);
+ Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(stringTag));
+ Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(binaryTag));
+
+ record.setAttribute("UI", BinaryCodec.MAX_UINT);
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(stringTag));
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(binaryTag));
+
+ final SAMBinaryTagAndValue tv_zero = new SAMBinaryTagAndUnsignedArrayValue(binaryTag, 0L);
+ record = new SAMRecord(header){
+ {
+ setAttributes(tv_zero);
+ }
+ };
+ Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(stringTag));
+ Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(binaryTag));
+
+ final SAMBinaryTagAndValue tv_max = new SAMBinaryTagAndUnsignedArrayValue(binaryTag, BinaryCodec.MAX_UINT);
+ record = new SAMRecord(header){
+ {
+ setAttributes(tv_max);
+ }
+ };
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(stringTag));
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(binaryTag));
+ }
+
+ /**
+ * This is an alternative to test_getUnsignedIntegerAttribute_valid().
+ * The purpose is to ensure that the hacky way of setting arbitrary tag values works ok.
+ * This is required for testing invalid (out of range) unsigned integer value.
+ */
+ @Test
+ public void test_getUnsignedIntegerAttribute_valid_alternative() {
+ final short tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ SAMRecord record;
+
+ record = new SAMRecord(header) {
+ {
+ setAttributes(new SAMBinaryTagAndUnsignedArrayValue(tag, 0L));
+ }
+ };
+ Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(tag));
+
+ record = new SAMRecord(header) {
+ {
+ setAttributes(new SAMBinaryTagAndUnsignedArrayValue(tag, BinaryCodec.MAX_UINT));
+ }
+ };
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(tag));
+
+ // the following works because we bypass value checks implemented in SAMRecord:
+ record = new SAMRecord(header) {
+ {
+ setAttributes(new SAMBinaryTagAndUnsignedArrayValue(tag, BinaryCodec.MAX_UINT+1L));
+ }
+ };
+ // check that the invalid value is still there:
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT+1L), (Long)record.getBinaryAttributes().value);
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void test_getUnsignedIntegerAttribute_negative() {
+ short tag = 0;
+ SAMRecord record = null;
+ try {
+ tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ final SAMBinaryTagAndValue tv = new SAMBinaryTagAndUnsignedArrayValue(tag, -1L);
+ record = new SAMRecord(header) {
+ {
+ setAttributes(tv);
+ }
+ };
+ } catch (Exception e) {
+ Assert.fail("Unexpected exception", e);
+ }
+ record.getUnsignedIntegerAttribute(tag);
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void test_getUnsignedIntegerAttribute_tooLarge() {
+ short tag = 0;
+ SAMRecord record = null;
+ try {
+ tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ final SAMBinaryTagAndValue tv = new SAMBinaryTagAndUnsignedArrayValue(tag, BinaryCodec.MAX_UINT + 1);
+ record = new SAMRecord(header) {
+ {
+ setAttributes(tv);
+ }
+ };
+ } catch (Exception e) {
+ Assert.fail("Unexpected exception", e);
+ }
+
+ record.getUnsignedIntegerAttribute(tag);
+ }
+
+ @Test
+ public void test_isAllowedAttributeDataType() {
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Byte((byte) 0)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Short((short) 0)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Integer(0)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue("a string"));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Character('C')));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Float(0.1F)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new byte[]{0}));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new short[]{0}));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new int[]{0}));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new float[]{0.1F}));
+
+ // unsigned integers:
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Long(0)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Long(BinaryCodec.MAX_UINT)));
+ Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Long(-1L)));
+ Assert.assertFalse(SAMRecord.isAllowedAttributeValue(new Long(BinaryCodec.MAX_UINT + 1L)));
+ Assert.assertFalse(SAMRecord.isAllowedAttributeValue(new Long(Integer.MIN_VALUE - 1L)));
+
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void test_setAttribute_unsigned_int_negative() {
+ short tag = 0;
+ SAMRecord record = null;
+ try {
+ tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ record = new SAMRecord(header);
+ Assert.assertNull(record.getUnsignedIntegerAttribute(tag));
+ } catch (SAMException e) {
+ Assert.fail("Unexpected exception", e);
+ }
+
+ record.setAttribute(tag, (long)Integer.MIN_VALUE-1L);
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void test_setAttribute_unsigned_int_tooLarge() {
+ short tag = 0;
+ SAMRecord record = null;
+ try {
+ tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ record = new SAMRecord(header);
+ Assert.assertNull(record.getUnsignedIntegerAttribute(tag));
+ } catch (SAMException e) {
+ Assert.fail("Unexpected exception", e);
+ }
+
+ record.setAttribute(tag, BinaryCodec.MAX_UINT + 1L);
+ }
+
+ @Test
+ public void test_setAttribute_null_removes_tag() {
+ final short tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ SAMFileHeader header = new SAMFileHeader();
+ SAMRecord record = new SAMRecord(header);
+ Assert.assertNull(record.getUnsignedIntegerAttribute(tag));
+
+ record.setAttribute(tag, BinaryCodec.MAX_UINT);
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(tag));
+
+ record.setAttribute(tag, null);
+ Assert.assertNull(record.getUnsignedIntegerAttribute(tag));
+ }
+
+ private SAMRecord createTestRecordHelper() {
+ return new SAMRecordSetBuilder().addFrag("test", 0, 1, false, false, "3S9M", null, 2);
+ }
+
+ @Test
+ public void testReferenceName() {
+ SAMRecord sam = createTestRecordHelper();
+
+ // NO_ALIGNMENT_NAME
+ sam.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ Assert.assertTrue(sam.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+
+ // valid reference name
+ sam = createTestRecordHelper();
+ sam.setReferenceName("chr4");
+ Assert.assertTrue(sam.getReferenceName().equals("chr4"));
+ Assert.assertTrue(sam.getReferenceIndex().equals(3));
+
+ // invalid reference name sets name but leaves ref index invalid
+ sam = createTestRecordHelper();
+ sam.setReferenceName("unresolvableName");
+ Assert.assertTrue(sam.getReferenceName().equals("unresolvableName"));
+ Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ }
+
+ @Test
+ public void testReferenceIndex() {
+ // NO_ALIGNMENT_REFERENCE
+ SAMRecord sam = createTestRecordHelper();
+ sam.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ Assert.assertTrue(sam.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+
+ // valid reference
+ sam = createTestRecordHelper();
+ sam.setReferenceIndex(3);
+ Assert.assertTrue(sam.getReferenceIndex().equals(3));
+ Assert.assertTrue(sam.getReferenceName().equals("chr4"));
+ }
+
+ @Test(expectedExceptions=IllegalArgumentException.class)
+ public void testInvalidReferenceIndex() {
+ // unresolvable reference
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setReferenceIndex(9999);
+ }
+
+ @Test
+ public void testMateReferenceName() {
+ // NO_ALIGNMENT_NAME
+ SAMRecord sam = createTestRecordHelper();
+ sam.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ Assert.assertTrue(sam.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+
+ // valid reference
+ sam = createTestRecordHelper();
+ sam.setMateReferenceName("chr4");
+ Assert.assertTrue(sam.getMateReferenceName().equals("chr4"));
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(3));
+
+ // unresolvable reference
+ sam = createTestRecordHelper();
+ sam.setMateReferenceName("unresolvableName");
+ Assert.assertTrue(sam.getMateReferenceName().equals("unresolvableName"));
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ }
+
+ @Test
+ public void testMateReferenceIndex() {
+ // NO_ALIGNMENT_REFERENCE
+ SAMRecord sam = createTestRecordHelper();
+ sam.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ Assert.assertTrue(sam.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+
+ // valid reference
+ sam = createTestRecordHelper();
+ sam.setMateReferenceIndex(3);
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(3));
+ Assert.assertTrue(sam.getMateReferenceName().equals("chr4"));
+ }
+
+ @Test(expectedExceptions=IllegalArgumentException.class)
+ public void testInvalidMateReferenceIndex() {
+ // unresolvable reference
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setMateReferenceIndex(9999);
+ }
+
+ @Test
+ public void testRecordValidation() {
+ final SAMRecord sam = createTestRecordHelper();
+ List<SAMValidationError> validationErrors = sam.isValid(false);
+ Assert.assertTrue(validationErrors == null);
+ }
+
+ @Test
+ public void testInvalidAlignmentStartValidation() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setAlignmentStart(0);
+ List<SAMValidationError> validationErrors = sam.isValid(false);
+ Assert.assertTrue(validationErrors != null && validationErrors.size() == 1);
+ }
+
+ // ----------------- NULL header tests ---------------------
+
+ @Test
+ public void testNullHeaderReferenceName() {
+ final SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ Assert.assertTrue(null != samHeader);
+ final String originalRefName = sam.getReferenceName();
+
+ // setting header to null retains the previously assigned ref name
+ sam.setHeader(null);
+ Assert.assertTrue(originalRefName.equals(sam.getReferenceName()));
+
+ // null header allows reference name to be set to NO_ALIGNMENT_REFERENCE_NAME
+ sam.setReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ Assert.assertTrue(sam.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+
+ // null header allows reference name to be reset to a valid namw
+ sam.setReferenceName(originalRefName);
+ Assert.assertTrue(sam.getReferenceName().equals(originalRefName));
+ }
+
+ @Test
+ public void testNullHeaderReferenceIndex() {
+ SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ int originalRefIndex = sam.getReferenceIndex();
+ Assert.assertTrue(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX != originalRefIndex);
+
+ // setting header to null resets the reference index to null
+ sam.setHeader(null);
+ Assert.assertTrue(null == sam.mReferenceIndex);
+ // restoring the header to restores the reference index back to the original
+ sam.setHeader(samHeader);
+ Assert.assertTrue(sam.getReferenceIndex().equals(originalRefIndex));
+
+ // setting the header to null allows setting the reference index to NO_ALIGNMENT_REFERENCE_INDEX
+ sam.setHeader(null);
+ sam.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ Assert.assertTrue(sam.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ Assert.assertTrue(sam.getReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+
+ // force the internal SAMRecord reference index value to (null) initial state
+ sam = new SAMRecord(null);
+ Assert.assertTrue(null == sam.mReferenceIndex);
+ Assert.assertTrue(sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+
+ // an unresolvable reference name doesn't throw
+ final String unresolvableRefName = "unresolvable";
+ sam.setReferenceName(unresolvableRefName);
+ // now force the SAMRecord to try to resolve the unresolvable name
+ sam.setHeader(samHeader);
+ Assert.assertTrue(null == sam.mReferenceIndex);
+ Assert.assertTrue(sam.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ }
+
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderSetReferenceIndex() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setHeader(null);
+ // setReferenceIndex with null header throws
+ sam.setReferenceIndex(3);
+ }
+
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderGetReferenceIndex() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setHeader(null);
+ // getReferenceIndex with null header throws
+ sam.getReferenceIndex();
+ }
+
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderForceIndexResolutionFailure() {
+ // force the internal SAMRecord reference index value to null initial state
+ final SAMRecord sam = new SAMRecord(null);
+ sam.setReferenceName("unresolvable");
+ sam.getReferenceIndex();
+ }
+
+ @Test
+ public void testNullHeaderMateReferenceName() {
+ final SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ Assert.assertTrue(null != samHeader);
+ final String originalMateRefName = sam.getMateReferenceName();
+
+ // setting header to null retains the previously assigned mate ref name
+ sam.setHeader(null);
+ Assert.assertTrue(originalMateRefName.equals(sam.getMateReferenceName()));
+
+ // null header allows mate reference name to be set to NO_ALIGNMENT_REFERENCE_NAME
+ sam.setMateReferenceName(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ Assert.assertTrue(sam.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+
+ // null header allows reference name to be reset to a valid namw
+ sam.setMateReferenceName(originalMateRefName);
+ Assert.assertTrue(sam.getMateReferenceName().equals(originalMateRefName));
+ }
+
+ @Test
+ public void testNullHeaderMateReferenceIndex() {
+ SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ sam.setMateReferenceName("chr1");
+ int originalMateRefIndex = sam.getMateReferenceIndex();
+ Assert.assertTrue(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX != originalMateRefIndex);
+
+ // setting header to null resets the mate reference index to null
+ sam.setHeader(null);
+ Assert.assertTrue(null == sam.mMateReferenceIndex);
+ // restoring the header to restores the reference index back to the original
+ sam.setHeader(samHeader);
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(originalMateRefIndex));
+
+ // setting the header to null allows setting the mate reference index to NO_ALIGNMENT_REFERENCE_INDEX
+ sam.setHeader(null);
+ sam.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ Assert.assertTrue(sam.getMateReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ Assert.assertTrue(sam.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME));
+
+ // force the internal SAMRecord mate reference index value to (null) initial state
+ sam = new SAMRecord(null);
+ Assert.assertTrue(null == sam.mMateReferenceIndex);
+ Assert.assertTrue(sam.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+
+ // an unresolvable mate reference name doesn't throw
+ final String unresolvableRefName = "unresolvable";
+ sam.setMateReferenceName(unresolvableRefName);
+ // now force the SAMRecord to try to resolve the unresolvable mate reference name
+ sam.setHeader(samHeader);
+ Assert.assertTrue(null == sam.mMateReferenceIndex);
+ Assert.assertTrue(sam.getMateReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ }
+
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderSetMateReferenceIndex() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setHeader(null);
+ sam.setMateReferenceIndex(3);
+ }
+
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderGetMateReferenceIndex() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setMateReferenceName("chr1");
+ sam.setHeader(null);
+ // getMateReferenceIndex with null header throws
+ sam.getMateReferenceIndex();
+ }
+
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testNullHeaderForceMateIndexResolutionFailure() {
+ // force the internal SAMRecord reference index value to null initial state
+ final SAMRecord sam = new SAMRecord(null);
+ sam.setMateReferenceName("unresolvable");
+ sam.getMateReferenceIndex();
+ }
+
+ @Test
+ public void testNullHeaderGetReadGroup() {
+ final SAMRecord sam = createTestRecordHelper();
+ Assert.assertTrue(null != sam.getHeader());
+
+ Assert.assertTrue(null != sam.getReadGroup() && sam.getReadGroup().getId().equals("1"));
+ sam.setHeader(null);
+ Assert.assertNull(sam.getReadGroup());
+ }
+
+ @Test(dataProvider = "serializationTestData")
+ public void testNullHeaderSerialization(final File inputFile) throws Exception {
+ final SamReader reader = SamReaderFactory.makeDefault().open(inputFile);
+ final SAMRecord initialSAMRecord = reader.iterator().next();
+ reader.close();
+
+ initialSAMRecord.setHeader(null);
+ final SAMRecord deserializedSAMRecord = TestUtil.serializeAndDeserialize(initialSAMRecord);
+ Assert.assertEquals(deserializedSAMRecord, initialSAMRecord, "Deserialized SAMRecord not equal to original SAMRecord");
+ }
+
+ @Test
+ public void testNullHeaderRecordValidation() {
+ final SAMRecord sam = createTestRecordHelper();
+ sam.setHeader(null);
+ List<SAMValidationError> validationErrors = sam.isValid(false);
+ Assert.assertTrue(validationErrors == null);
+ }
+
+ @Test
+ private SAMRecord testNullHeaderDeepCopy() {
+ SAMRecord sam = createTestRecordHelper();
+ sam.setHeader(null);
+ final SAMRecord deepCopy = sam.deepCopy();
+
+ // force the indexing bins to be computed in order to satisfy equality test
+ sam.setIndexingBin(sam.computeIndexingBin());
+ deepCopy.setIndexingBin(deepCopy.computeIndexingBin());
+ Assert.assertTrue(sam.equals(deepCopy));
+
+ return deepCopy;
+ }
+
+ private void testNullHeaderCigar(SAMRecord rec) {
+ Cigar origCigar = rec.getCigar();
+ Assert.assertNotNull(origCigar);
+ String originalCigarString = rec.getCigarString();
+
+ // set the cigar to null and then reset the cigar string in order to force getCigar to decode it
+ rec.setCigar(null);
+ Assert.assertNull(rec.getCigar());
+ rec.setCigarString(originalCigarString);
+ rec.setValidationStringency(ValidationStringency.STRICT);
+ rec.setHeader(null);
+ Assert.assertTrue(rec.getValidationStringency() == ValidationStringency.STRICT);
+
+ // force getCigar to decode the cigar string, validate that SAMRecord doesn't try to validate the cigar
+ Cigar cig = rec.getCigar();
+ Assert.assertNotNull(cig);
+ String cigString = TextCigarCodec.encode(cig);
+ Assert.assertEquals(cigString, originalCigarString);
+ }
+
+ @Test
+ private void testNullHeadGetCigarSAM() {
+ SAMRecord sam = createTestRecordHelper();
+ testNullHeaderCigar(sam);
+ }
+
+ @Test
+ private void testNullHeadGetCigarBAM() {
+ SAMRecord sam = createTestRecordHelper();
+ SAMRecordFactory factory = new DefaultSAMRecordFactory();
+ BAMRecord bamRec = factory.createBAMRecord(
+ sam.getHeader(),
+ sam.getReferenceIndex(),
+ sam.getAlignmentStart(),
+ (short) sam.getReadNameLength(),
+ (short) sam.getMappingQuality(),
+ 0,
+ sam.getCigarLength(),
+ sam.getFlags(),
+ sam.getReadLength(),
+ sam.getMateReferenceIndex(),
+ sam.getMateAlignmentStart(),
+ 0, null);
+
+ bamRec.setCigarString(sam.getCigarString());
+
+ testNullHeaderCigar(bamRec);
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/samtools/SAMTextWriterTest.java b/src/tests/java/htsjdk/samtools/SAMTextWriterTest.java
index 28b0745..46ce5be 100644
--- a/src/tests/java/htsjdk/samtools/SAMTextWriterTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMTextWriterTest.java
@@ -44,7 +44,19 @@ public class SAMTextWriterTest {
@Test
public void testBasic() throws Exception {
+ doTest(getSAMReader(true, SAMFileHeader.SortOrder.coordinate));
+ }
+
+ @Test
+ public void testNullHeader() throws Exception {
final SAMRecordSetBuilder recordSetBuilder = getSAMReader(true, SAMFileHeader.SortOrder.coordinate);
+ for (final SAMRecord rec : recordSetBuilder.getRecords()) {
+ rec.setHeader(null);
+ }
+ doTest(recordSetBuilder);
+ }
+
+ private void doTest(final SAMRecordSetBuilder recordSetBuilder) throws Exception{
SamReader inputSAM = recordSetBuilder.getSamReader();
final File samFile = File.createTempFile("tmp.", ".sam");
samFile.deleteOnExit();
diff --git a/src/tests/java/htsjdk/samtools/SAMUtilsTest.java b/src/tests/java/htsjdk/samtools/SAMUtilsTest.java
index 8c0096c..441d662 100644
--- a/src/tests/java/htsjdk/samtools/SAMUtilsTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMUtilsTest.java
@@ -26,6 +26,8 @@ package htsjdk.samtools;
import org.testng.Assert;
import org.testng.annotations.Test;
+import java.util.Arrays;
+
public class SAMUtilsTest {
@Test
public void testCompareMapqs() {
@@ -42,4 +44,108 @@ public class SAMUtilsTest {
Assert.assertTrue(SAMUtils.compareMapqs(1, 255) > 0);
Assert.assertTrue(SAMUtils.compareMapqs(2, 1) > 0);
}
+
+ @Test
+ public void testSimpleClippingOfRecord() {
+ // setup the record
+ final SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord("1", 1000));
+ final SAMRecord record = new SAMRecord(header);
+ record.setReadPairedFlag(true);
+ record.setCigar(TextCigarCodec.decode("10M"));
+ record.setReferenceIndex(0);
+ record.setAlignmentStart(1);
+ record.setMateAlignmentStart(6); // should overlap 5M
+ record.setMateReferenceIndex(0);
+ record.setReadBases("AAAAAAAAAA".getBytes());
+
+ final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
+ Assert.assertEquals(numToClip, 5);
+
+ SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK
+
+ Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("5M5S")));
+ }
+
+ @Test
+ public void testClippingOfRecordWithSoftClipBasesAtTheEnd() {
+ /**
+ * Tests that if we need to clip a read with soft-clipping at the end, it does the right thing.
+ */
+
+ // setup the record
+ final SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord("1", 1000));
+ final SAMRecord record = new SAMRecord(header);
+ record.setReadPairedFlag(true);
+ record.setCigar(TextCigarCodec.decode("5M5S"));
+ record.setReferenceIndex(0);
+ record.setAlignmentStart(1);
+ record.setMateAlignmentStart(5); // should overlap 1M5S
+ record.setMateReferenceIndex(0);
+ record.setReadBases("AAAAAAAAAA".getBytes());
+
+ final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
+ Assert.assertEquals(numToClip, 1);
+
+ SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK
+
+ Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M6S")));
+ }
+
+ @Test
+ public void testClippingOfRecordWithInsertion() {
+ /**
+ * Tests that if we need to clip a read with an insertion that overlaps
+ */
+
+ // setup the record
+ final SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord("1", 1000));
+ final SAMRecord record = new SAMRecord(header);
+ record.setReadPairedFlag(true);
+ record.setCigar(TextCigarCodec.decode("5M1I5M"));
+ record.setReferenceIndex(0);
+ record.setAlignmentStart(1);
+ record.setMateAlignmentStart(5); // should overlap the 1M1I5M
+ record.setMateReferenceIndex(0);
+ record.setReadBases("AAAAAAAAAAA".getBytes());
+
+
+ final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
+ Assert.assertEquals(numToClip, 7);
+
+ SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK
+
+ Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M7S")));
+
+ }
+
+ // TODO: deletion
+
+ @Test
+ public void testClippingOfRecordWithDeletion() {
+ /**
+ * Tests that if we need to clip a read with an deletion that overlaps
+ */
+
+ // setup the record
+ final SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord("1", 1000));
+ final SAMRecord record = new SAMRecord(header);
+ record.setReadPairedFlag(true);
+ record.setCigar(TextCigarCodec.decode("5M1D5M"));
+ record.setReferenceIndex(0);
+ record.setAlignmentStart(1);
+ record.setMateAlignmentStart(5); // should overlap the 1M1D5M
+ record.setMateReferenceIndex(0);
+ record.setReadBases("AAAAAAAAAA".getBytes());
+
+ final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
+ Assert.assertEquals(numToClip, 6);
+
+ SAMUtils.clipOverlappingAlignedBases(record, numToClip, false); // Side-effects are OK
+ Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M6S")));
+
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/SamFilesTest.java b/src/tests/java/htsjdk/samtools/SamFilesTest.java
new file mode 100644
index 0000000..b37fc3a
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/SamFilesTest.java
@@ -0,0 +1,60 @@
+package htsjdk.samtools;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * Test valid combinations of bam/cram vs bai/crai files.
+ * Created by vadim on 10/08/2015.
+ */
+public class SamFilesTest {
+
+ @DataProvider(name = "FindIndexParams")
+ public static Object[][] paramsFindIndexForSuffixes() {
+ return new Object[][]{
+ // no index available sanity checks:
+ {".tmp", null, null},
+ {".bam", null, null},
+ {".cram", null, null},
+
+ // legit cases for BAM files:
+ {".bam", ".bai", ".bai"},
+ {".bam", ".bam.bai", ".bam.bai"},
+
+ // legit cases for CRAM files:
+ {".cram", ".cram.bai", ".cram.bai"},
+ {".cram", ".cram.crai", ".cram.crai"},
+
+ // special prohibited cases:
+ {".bam", ".crai", null},
+ {".tmp", ".crai", null},
+ };
+ }
+
+ @Test(dataProvider = "FindIndexParams")
+ public void testFindIndexForSuffixes(final String dataFileSuffix, final String indexFileSuffix, final String expectIndexSuffix) throws IOException {
+ final File dataFile = File.createTempFile("test", dataFileSuffix);
+ dataFile.deleteOnExit();
+ Assert.assertNull(SamFiles.findIndex(dataFile));
+
+ File indexFile = null;
+ if (indexFileSuffix != null) {
+ indexFile = new File(dataFile.getAbsolutePath().replaceFirst("\\.\\S+$", indexFileSuffix));
+ indexFile.createNewFile();
+ indexFile.deleteOnExit();
+ }
+
+ final File foundIndexFile = SamFiles.findIndex(dataFile);
+ if (expectIndexSuffix == null) {
+ Assert.assertNull(foundIndexFile);
+ return;
+ }
+
+ Assert.assertNotNull(foundIndexFile);
+ Assert.assertTrue(foundIndexFile.getName().endsWith(expectIndexSuffix));
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/SamIndexesTest.java b/src/tests/java/htsjdk/samtools/SamIndexesTest.java
new file mode 100644
index 0000000..7e0bc85
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/SamIndexesTest.java
@@ -0,0 +1,192 @@
+package htsjdk.samtools;
+
+import htsjdk.samtools.cram.CRAIEntry;
+import htsjdk.samtools.cram.CRAIIndex;
+import htsjdk.samtools.seekablestream.SeekableFileStream;
+import htsjdk.samtools.seekablestream.SeekableMemoryStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
+import htsjdk.samtools.util.IOUtil;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.GZIPOutputStream;
+
+public class SamIndexesTest {
+
+ @Test
+ public void testEmptyBai() throws IOException {
+ final File baiFile = File.createTempFile("test", ".bai");
+ baiFile.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(baiFile);
+ fos.write(SamIndexes.BAI.magic);
+ fos.close();
+
+
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ baos.write(SamIndexes.BAI.magic);
+ baos.close();
+
+ final InputStream inputStream = SamIndexes.asBaiStreamOrNull(new ByteArrayInputStream(baos.toByteArray()), null);
+ for (final byte b : SamIndexes.BAI.magic) {
+ Assert.assertEquals(inputStream.read(), 0xFF & b);
+ }
+ }
+
+ @Test(expectedExceptions = NullPointerException.class)
+ public void testCraiRequiresDictionary() throws IOException {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final GZIPOutputStream gos = new GZIPOutputStream(baos);
+ gos.close();
+
+ SamIndexes.asBaiStreamOrNull(new ByteArrayInputStream(baos.toByteArray()), null);
+ }
+
+ @Test
+ public void testEmptyCraiReadAsBai() throws IOException {
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final GZIPOutputStream gos = new GZIPOutputStream(baos);
+ gos.close();
+
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+ final InputStream inputStream = SamIndexes.asBaiStreamOrNull(new ByteArrayInputStream(baos.toByteArray()), dictionary);
+ for (final byte b : SamIndexes.BAI.magic) {
+ Assert.assertEquals(inputStream.read(), 0xFF & b);
+ }
+ }
+
+ @Test
+ public void testCraiInMemory() throws IOException {
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ final CRAIEntry entry = new CRAIEntry();
+ entry.sequenceId = 0;
+ entry.alignmentStart = 1;
+ entry.alignmentSpan = 2;
+ entry.sliceOffset = 3;
+ entry.sliceSize = 4;
+ entry.containerStartOffset = 5;
+ index.add(entry);
+
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final GZIPOutputStream gos = new GZIPOutputStream(baos);
+ CRAIIndex.writeIndex(gos, index);
+ gos.close();
+
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+
+ final InputStream baiStream = SamIndexes.asBaiStreamOrNull(new ByteArrayInputStream(baos.toByteArray()), dictionary);
+ Assert.assertNotNull(baiStream);
+
+ baos = new ByteArrayOutputStream();
+ IOUtil.copyStream(baiStream, baos);
+ final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(new SeekableMemoryStream(baos.toByteArray(), null), dictionary);
+ final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart);
+ Assert.assertNotNull(span);
+ final long[] coordinateArray = span.toCoordinateArray();
+ Assert.assertEquals(coordinateArray.length, 2);
+ Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset);
+ Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1);
+ }
+
+ @Test
+ public void testCraiFromFile() throws IOException {
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ final CRAIEntry entry = new CRAIEntry();
+ entry.sequenceId = 0;
+ entry.alignmentStart = 1;
+ entry.alignmentSpan = 2;
+ entry.sliceOffset = 3;
+ entry.sliceSize = 4;
+ entry.containerStartOffset = 5;
+ index.add(entry);
+
+ final File file = File.createTempFile("test", ".crai");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ final GZIPOutputStream gos = new GZIPOutputStream(fos);
+ CRAIIndex.writeIndex(gos, index);
+ gos.close();
+
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+
+ final SeekableStream baiStream = SamIndexes.asBaiSeekableStreamOrNull(new SeekableFileStream(file), dictionary);
+ Assert.assertNotNull(baiStream);
+
+ final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(baiStream, dictionary);
+ final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart);
+ Assert.assertNotNull(span);
+ final long[] coordinateArray = span.toCoordinateArray();
+ Assert.assertEquals(coordinateArray.length, 2);
+ Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset);
+ Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1);
+ }
+
+ @Test(expectedExceptions = NullPointerException.class)
+ public void testOpenIndexFileAsBaiOrNull_NPE() throws IOException {
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+ Assert.assertNull(SamIndexes.openIndexFileAsBaiOrNull(null, dictionary));
+ }
+
+ @Test
+ public void testOpenIndexFileAsBaiOrNull_ReturnsNull() throws IOException {
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+ File file = File.createTempFile("test", ".notbai");
+ file.deleteOnExit();
+ Assert.assertNull(SamIndexes.openIndexFileAsBaiOrNull(file, dictionary));
+ file.delete();
+
+ file = File.createTempFile("test", ".notcrai");
+ file.deleteOnExit();
+ Assert.assertNull(SamIndexes.openIndexFileAsBaiOrNull(file, dictionary));
+ file.delete();
+ }
+
+ @Test
+ public void testOpenIndexUrlAsBaiOrNull() throws IOException {
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ final CRAIEntry entry = new CRAIEntry();
+ entry.sequenceId = 0;
+ entry.alignmentStart = 1;
+ entry.alignmentSpan = 2;
+ entry.sliceOffset = 3;
+ entry.sliceSize = 4;
+ entry.containerStartOffset = 5;
+ index.add(entry);
+
+
+ final File file = File.createTempFile("test", ".crai");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ final GZIPOutputStream gos = new GZIPOutputStream(fos);
+ CRAIIndex.writeIndex(gos, index);
+ gos.close();
+
+ final InputStream baiStream = SamIndexes.openIndexUrlAsBaiOrNull(file.toURI().toURL(), dictionary);
+ Assert.assertNotNull(baiStream);
+
+ final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ IOUtil.copyStream(baiStream, baos);
+ final CachingBAMFileIndex bamIndex = new CachingBAMFileIndex(new SeekableMemoryStream(baos.toByteArray(), null), dictionary);
+ final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart);
+ Assert.assertNotNull(span);
+ final long[] coordinateArray = span.toCoordinateArray();
+ Assert.assertEquals(coordinateArray.length, 2);
+ Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset);
+ Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1);
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/SamReaderFactoryTest.java b/src/tests/java/htsjdk/samtools/SamReaderFactoryTest.java
index 85d3c53..fc3d37b 100644
--- a/src/tests/java/htsjdk/samtools/SamReaderFactoryTest.java
+++ b/src/tests/java/htsjdk/samtools/SamReaderFactoryTest.java
@@ -110,6 +110,25 @@ public class SamReaderFactoryTest {
else if (inputFile.endsWith(".bam")) Assert.assertEquals(recordFactory.bamRecordsCreated, i);
}
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void samRecordFactoryNullHeaderBAMTest() {
+ final SAMRecordFactory recordFactory = new DefaultSAMRecordFactory();
+ recordFactory.createBAMRecord(
+ null, // null header
+ 0,
+ 0,
+ (short) 0,
+ (short) 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ null);
+ }
+
/**
* Unit tests for asserting all permutations of data and index sources read the same records and header.
@@ -132,8 +151,14 @@ public class SamReaderFactoryTest {
public Object[][] composeAllPermutationsOfSamInputResource() {
final List<SamInputResource> sources = new ArrayList<SamInputResource>();
for (final InputResource.Type dataType : InputResource.Type.values()) {
+ if (dataType.equals(InputResource.Type.SRA_ACCESSION))
+ continue;
+
sources.add(new SamInputResource(composeInputResourceForType(dataType, false)));
for (final InputResource.Type indexType : InputResource.Type.values()) {
+ if (indexType.equals(InputResource.Type.SRA_ACCESSION))
+ continue;
+
sources.add(new SamInputResource(
composeInputResourceForType(dataType, false),
composeInputResourceForType(indexType, true)
diff --git a/src/tests/java/htsjdk/samtools/SamSpecIntTest.java b/src/tests/java/htsjdk/samtools/SamSpecIntTest.java
index 154b69d..9be6d6c 100644
--- a/src/tests/java/htsjdk/samtools/SamSpecIntTest.java
+++ b/src/tests/java/htsjdk/samtools/SamSpecIntTest.java
@@ -61,7 +61,7 @@ public class SamSpecIntTest {
CloserUtil.close(samReader);
samWriter.close();
bamWriter.close();
- Assert.assertEquals(errorMessages.size(), 2);
+ Assert.assertEquals(errorMessages.size(), 0);
bamOutput.deleteOnExit();
samOutput.deleteOnExit();
}
@@ -89,7 +89,7 @@ public class SamSpecIntTest {
CloserUtil.close(bamReader);
samWriter.close();
bamWriter.close();
- Assert.assertEquals(errorMessages.size(), 2);
+ Assert.assertEquals(errorMessages.size(), 0);
bamOutput.deleteOnExit();
samOutput.deleteOnExit();
}
diff --git a/src/tests/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/tests/java/htsjdk/samtools/cram/CRAIEntryTest.java
new file mode 100644
index 0000000..c27f2f2
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/CRAIEntryTest.java
@@ -0,0 +1,145 @@
+package htsjdk.samtools.cram;
+
+import htsjdk.samtools.cram.structure.Container;
+import htsjdk.samtools.cram.structure.Slice;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Created by vadim on 25/08/2015.
+ */
+public class CRAIEntryTest {
+
+ @Test
+ public void testFromContainer() {
+ final Container container = new Container();
+ final Slice slice = new Slice();
+ slice.sequenceId = 1;
+ slice.alignmentStart = 2;
+ slice.alignmentSpan = 3;
+ slice.containerOffset = 4;
+ container.landmarks = new int[]{5};
+ container.slices = new Slice[]{slice};
+
+ final List<CRAIEntry> entries = CRAIEntry.fromContainer(container);
+ Assert.assertNotNull(entries);
+ Assert.assertEquals(entries.size(), 1);
+ final CRAIEntry entry = entries.get(0);
+
+ Assert.assertEquals(entry.sequenceId, slice.sequenceId);
+ Assert.assertEquals(entry.alignmentStart, slice.alignmentStart);
+ Assert.assertEquals(entry.alignmentSpan, slice.alignmentSpan);
+ Assert.assertEquals(entry.containerStartOffset, slice.containerOffset);
+ }
+
+ @Test
+ public void testFromCraiLine() {
+ int counter = 1;
+ final int sequenceId = counter++;
+ final int alignmentStart = counter++;
+ final int alignmentSpan = counter++;
+ final int containerOffset = Integer.MAX_VALUE + counter++;
+ final int sliceOffset = counter++;
+ final int sliceSise = counter++;
+
+ final String line = String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan, containerOffset, sliceOffset, sliceSise);
+ final CRAIEntry entry = CRAIEntry.fromCraiLine(line);
+ Assert.assertNotNull(entry);
+ Assert.assertEquals(entry.sequenceId, sequenceId);
+ Assert.assertEquals(entry.alignmentStart, alignmentStart);
+ Assert.assertEquals(entry.alignmentSpan, alignmentSpan);
+ Assert.assertEquals(entry.containerStartOffset, containerOffset);
+ }
+
+ @Test
+ public void testIntersetcsZeroSpan() {
+ Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 0)));
+ }
+
+ @Test
+ public void testIntersetcsSame() {
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 1)));
+ }
+
+ @Test
+ public void testIntersetcsIncluded() {
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(1, 1)));
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(2, 1)));
+
+ // is symmetrical?
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 1), newEntry(1, 2)));
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(2, 1), newEntry(1, 2)));
+ }
+
+
+ @Test
+ public void testIntersetcsOvertlaping() {
+ Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 2), newEntry(0, 1)));
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(0, 2)));
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(1, 2), newEntry(2, 1)));
+ Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 2), newEntry(3, 1)));
+ }
+
+ @Test
+ public void testIntersetcsAnotherSequence() {
+ Assert.assertTrue(CRAIEntry.intersect(newEntry(10, 1, 2), newEntry(10, 2, 1)));
+ Assert.assertFalse(CRAIEntry.intersect(newEntry(10, 1, 2), newEntry(11, 2, 1)));
+ }
+
+ @Test
+ public void testCompareTo () {
+ final List<CRAIEntry> list = new ArrayList<CRAIEntry>(2);
+ CRAIEntry e1;
+ CRAIEntry e2;
+
+ e1 = new CRAIEntry();
+ e1.sequenceId = 100;
+ e2 = new CRAIEntry();
+ e2.sequenceId = 200;
+ list.add(e2);
+ list.add(e1);
+ Assert.assertTrue(list.get(1).sequenceId < list.get(0).sequenceId);
+ Collections.sort(list);
+ Assert.assertTrue(list.get(0).sequenceId < list.get(1).sequenceId);
+
+ list.clear();
+ e1 = new CRAIEntry();
+ e1.alignmentStart = 100;
+ e2 = new CRAIEntry();
+ e2.alignmentStart = 200;
+ list.add(e2);
+ list.add(e1);
+ Assert.assertTrue(list.get(1).alignmentStart < list.get(0).alignmentStart);
+ Collections.sort(list);
+ Assert.assertTrue(list.get(0).alignmentStart < list.get(1).alignmentStart);
+
+ list.clear();
+ e1 = new CRAIEntry();
+ e1.containerStartOffset = 100;
+ e2 = new CRAIEntry();
+ e2.containerStartOffset = 200;
+ list.add(e2);
+ list.add(e1);
+ Assert.assertTrue(list.get(1).containerStartOffset < list.get(0).containerStartOffset);
+ Collections.sort(list);
+ Assert.assertTrue(list.get(0).containerStartOffset < list.get(1).containerStartOffset);
+ }
+
+ private static CRAIEntry newEntry(final int start, final int span) {
+ return newEntry(1, start, span);
+ }
+
+ private static CRAIEntry newEntry(final int seqId, final int start, final int span) {
+ final CRAIEntry e1 = new CRAIEntry();
+ e1.sequenceId = seqId;
+ e1.alignmentStart = start;
+ e1.alignmentSpan = span;
+ return e1;
+ }
+
+
+}
diff --git a/src/tests/java/htsjdk/samtools/cram/CRAIIndexTest.java b/src/tests/java/htsjdk/samtools/cram/CRAIIndexTest.java
new file mode 100644
index 0000000..8989963
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/CRAIIndexTest.java
@@ -0,0 +1,133 @@
+package htsjdk.samtools.cram;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Created by vadim on 25/08/2015.
+ */
+public class CRAIIndexTest {
+
+ @Test
+ public void testFind() throws IOException, CloneNotSupportedException {
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+
+ final int sequenceId = 1;
+ CRAIEntry e = new CRAIEntry();
+ e.sequenceId = sequenceId;
+ e.alignmentStart = 1;
+ e.alignmentSpan = 1;
+ e.containerStartOffset = 1;
+ e.sliceOffset = 1;
+ e.sliceSize = 0;
+ index.add(e);
+
+ e = e.clone();
+ e.sequenceId = sequenceId;
+ e.alignmentStart = 2;
+ e.alignmentSpan = 1;
+ e.containerStartOffset = 2;
+ e.sliceOffset = 1;
+ e.sliceSize = 0;
+ index.add(e);
+
+ e = e.clone();
+ e.sequenceId = sequenceId;
+ e.alignmentStart = 3;
+ e.alignmentSpan = 1;
+ e.containerStartOffset = 3;
+ e.sliceOffset = 1;
+ e.sliceSize = 0;
+ index.add(e);
+
+ Assert.assertFalse(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 0));
+
+ Assert.assertTrue(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 1));
+ Assert.assertTrue(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 2));
+ Assert.assertTrue(allFoundEntriesIntersectQueryInFind(index, sequenceId, 2, 1));
+ Assert.assertTrue(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 3));
+
+ final int nonExistentSequenceId = 2;
+ Assert.assertFalse(allFoundEntriesIntersectQueryInFind(index, nonExistentSequenceId, 2, 1));
+ // a query starting beyond all entries:
+ Assert.assertFalse(allFoundEntriesIntersectQueryInFind(index, sequenceId, 4, 1));
+ }
+
+ private boolean allFoundEntriesIntersectQueryInFind(final List<CRAIEntry> index, final int sequenceId, final int start, final int span) {
+ int foundCount = 0;
+ for (final CRAIEntry found : CRAIIndex.find(index, sequenceId, start, span)) {
+ foundCount++;
+ Assert.assertEquals(found.sequenceId, sequenceId);
+ boolean intersects = false;
+ for (int pos = Math.min(found.alignmentStart, start); pos <= Math.max(found.alignmentStart + found.alignmentSpan, start + span); pos++) {
+ if (pos >= found.alignmentStart && pos >= start &&
+ pos <= found.alignmentStart + found.alignmentSpan && pos <= start + span) {
+ intersects = true;
+ break;
+ }
+ }
+ if (!intersects) {
+ return false;
+ }
+ }
+ return foundCount > 0;
+ }
+
+ @Test
+ public void testGetLeftmost() {
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ Assert.assertNull(CRAIIndex.getLeftmost(index));
+
+ final CRAIEntry e1 = new CRAIEntry();
+ e1.sequenceId = 1;
+ e1.alignmentStart = 2;
+ e1.alignmentSpan = 3;
+ e1.containerStartOffset = 4;
+ e1.sliceOffset = 5;
+ e1.sliceSize = 6;
+ index.add(e1);
+ // trivial case of single entry in index:
+ Assert.assertEquals(e1, CRAIIndex.getLeftmost(index));
+
+ final CRAIEntry e2 = new CRAIEntry();
+ e2.sequenceId = 1;
+ e2.alignmentStart = e1.alignmentStart + 1;
+ e2.alignmentSpan = 3;
+ e2.containerStartOffset = 4;
+ e2.sliceOffset = 5;
+ e2.sliceSize = 6;
+ index.add(e2);
+ Assert.assertEquals(e1, CRAIIndex.getLeftmost(index));
+ }
+
+ @Test
+ public void testFindLastAlignedEntry() {
+ final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ Assert.assertEquals(-1, CRAIIndex.findLastAlignedEntry(index));
+
+ // Scan all allowed combinations of 10 mapped/unmapped entries and assert the found last aligned entry:
+ final int indexSize = 10;
+ for (int lastAligned = 0; lastAligned < indexSize; lastAligned++) {
+ index.clear();
+ for (int i = 0; i < indexSize; i++) {
+ final CRAIEntry e = new CRAIEntry();
+
+ e.sequenceId = (i <= lastAligned ? 0 : -1);
+ e.alignmentStart = i;
+ index.add(e);
+ }
+ // check expectations are correct before calling findLastAlignedEntry method:
+ Assert.assertTrue(index.get(lastAligned).sequenceId != -1);
+ if (lastAligned < index.size() - 1) {
+ Assert.assertTrue(index.get(lastAligned + 1).sequenceId == -1);
+ }
+ // assert the the found value matches the expectation:
+ Assert.assertEquals(CRAIIndex.findLastAlignedEntry(index), lastAligned);
+ }
+ }
+
+}
diff --git a/src/tests/java/htsjdk/samtools/cram/build/CramIOTest.java b/src/tests/java/htsjdk/samtools/cram/build/CramIOTest.java
new file mode 100644
index 0000000..1035f24
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/build/CramIOTest.java
@@ -0,0 +1,82 @@
+package htsjdk.samtools.cram.build;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.cram.common.CramVersions;
+import htsjdk.samtools.cram.structure.CramHeader;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+/**
+ * Created by vadim on 25/08/2015.
+ */
+public class CramIOTest {
+ @Test
+ public void testCheckHeaderAndEOF_v2() throws IOException {
+ final String id = "testid";
+
+ final CramHeader cramHeader = new CramHeader(CramVersions.CRAM_v2_1, id, new SAMFileHeader());
+ final File file = File.createTempFile("test", ".cram");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ CramIO.writeCramHeader(cramHeader, fos);
+ CramIO.issueEOF(cramHeader.getVersion(), fos);
+ fos.close();
+
+ Assert.assertTrue(CramIO.checkHeaderAndEOF(file));
+ file.delete();
+ }
+
+ @Test
+ public void testCheckHeaderAndEOF_v3() throws IOException {
+ final String id = "testid";
+
+ final CramHeader cramHeader = new CramHeader(CramVersions.CRAM_v3, id, new SAMFileHeader());
+ final File file = File.createTempFile("test", ".cram");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ CramIO.writeCramHeader(cramHeader, fos);
+ CramIO.issueEOF(cramHeader.getVersion(), fos);
+ fos.close();
+
+ Assert.assertTrue(CramIO.checkHeaderAndEOF(file));
+ file.delete();
+ }
+
+ @Test
+ public void testReplaceCramHeader() throws IOException {
+ final String id = "testid";
+
+ final CramHeader cramHeader = new CramHeader(CramVersions.CRAM_v3, id, new SAMFileHeader());
+ Assert.assertTrue(cramHeader.getSamFileHeader().getSequenceDictionary().isEmpty());
+ final File file = File.createTempFile("test", ".cram");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ CramIO.writeCramHeader(cramHeader, fos);
+ CramIO.issueEOF(cramHeader.getVersion(), fos);
+ fos.close();
+ final long length = file.length();
+
+ final SAMFileHeader samFileHeader = new SAMFileHeader();
+ final SAMSequenceRecord sequenceRecord = new SAMSequenceRecord("1", 123);
+ samFileHeader.addSequence(sequenceRecord);
+ final String id2 = "testid2";
+ final CramHeader cramHeader2 = new CramHeader(CramVersions.CRAM_v3, id2, samFileHeader);
+ final boolean replaced = CramIO.replaceCramHeader(file, cramHeader2);
+ Assert.assertTrue(replaced);
+ Assert.assertEquals(file.length(), length);
+ Assert.assertTrue(CramIO.checkHeaderAndEOF(file));
+
+ final CramHeader cramHeader3 = CramIO.readCramHeader(new FileInputStream(file));
+ Assert.assertEquals(cramHeader3.getVersion(), CramVersions.CRAM_v3);
+ Assert.assertFalse(cramHeader3.getSamFileHeader().getSequenceDictionary().isEmpty());
+ Assert.assertNotNull(cramHeader3.getSamFileHeader().getSequenceDictionary().getSequence(0));
+ Assert.assertEquals(cramHeader3.getSamFileHeader().getSequence(sequenceRecord.getSequenceName()).getSequenceLength(), sequenceRecord.getSequenceLength());
+ file.delete();
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/cram/structure/CramCompressionRecordTest.java b/src/tests/java/htsjdk/samtools/cram/structure/CramCompressionRecordTest.java
new file mode 100644
index 0000000..03360bd
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/structure/CramCompressionRecordTest.java
@@ -0,0 +1,68 @@
+package htsjdk.samtools.cram.structure;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.cram.encoding.readfeatures.Deletion;
+import htsjdk.samtools.cram.encoding.readfeatures.InsertBase;
+import htsjdk.samtools.cram.encoding.readfeatures.Insertion;
+import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature;
+import htsjdk.samtools.cram.encoding.readfeatures.SoftClip;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+
+/**
+ * Created by vadim on 28/09/2015.
+ */
+public class CramCompressionRecordTest {
+ @Test
+ public void test_getAlignmentEnd() {
+ CramCompressionRecord r = new CramCompressionRecord();
+ r.alignmentStart = 1;
+ r.setSegmentUnmapped(true);
+ Assert.assertEquals(r.getAlignmentEnd(), SAMRecord.NO_ALIGNMENT_START);
+
+ r = new CramCompressionRecord();
+ int readLength = 100;
+ r.alignmentStart = 1;
+ r.readLength = readLength;
+ r.setSegmentUnmapped(false);
+ Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1);
+
+ r = new CramCompressionRecord();
+ r.alignmentStart = 1;
+ r.readLength = readLength;
+ r.setSegmentUnmapped(false);
+ r.readFeatures = new ArrayList<ReadFeature>();
+ String softClip = "AAA";
+ r.readFeatures.add(new SoftClip(1, softClip.getBytes()));
+ Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1 - softClip.length());
+
+ r = new CramCompressionRecord();
+ r.alignmentStart = 1;
+ r.readLength = readLength;
+ r.setSegmentUnmapped(false);
+ r.readFeatures = new ArrayList<ReadFeature>();
+ int deletionLength = 5;
+ r.readFeatures.add(new Deletion(1, deletionLength));
+ Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1 + deletionLength);
+
+ r = new CramCompressionRecord();
+ r.alignmentStart = 1;
+ r.readLength = readLength;
+ r.setSegmentUnmapped(false);
+ r.readFeatures = new ArrayList<ReadFeature>();
+ String insertion = "CCCCCCCCCC";
+ r.readFeatures.add(new Insertion(1, insertion.getBytes()));
+ Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1 - insertion.length());
+
+
+ r = new CramCompressionRecord();
+ r.alignmentStart = 1;
+ r.readLength = readLength;
+ r.setSegmentUnmapped(false);
+ r.readFeatures = new ArrayList<ReadFeature>();
+ r.readFeatures.add(new InsertBase(1, (byte) 'A'));
+ Assert.assertEquals(r.getAlignmentEnd(), r.readLength + r.alignmentStart - 1 - 1);
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/cram/structure/ReadTagTest.java b/src/tests/java/htsjdk/samtools/cram/structure/ReadTagTest.java
index 362678f..3ed0b40 100644
--- a/src/tests/java/htsjdk/samtools/cram/structure/ReadTagTest.java
+++ b/src/tests/java/htsjdk/samtools/cram/structure/ReadTagTest.java
@@ -25,6 +25,7 @@ package htsjdk.samtools.cram.structure;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.ValidationStringency;
import org.testng.Assert;
import org.testng.annotations.Test;
@@ -53,25 +54,37 @@ public class ReadTagTest {
byte[] data = ReadTag.writeSingleValue((byte) 'i', intValue, false);
ByteBuffer byteBuffer = ByteBuffer.wrap(data);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
- Object value = ReadTag.readSingleValue((byte) 'i', byteBuffer);
+ Object value = ReadTag.readSingleValue((byte) 'i', byteBuffer, ValidationStringency.DEFAULT_STRINGENCY);
Assert.assertEquals (((Integer) value).intValue(), intValue);
String sValue = "value";
data = ReadTag.writeSingleValue((byte) 'Z', sValue, false);
byteBuffer = ByteBuffer.wrap(data);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
- value = ReadTag.readSingleValue((byte) 'Z', byteBuffer);
+ value = ReadTag.readSingleValue((byte) 'Z', byteBuffer, ValidationStringency.DEFAULT_STRINGENCY);
Assert.assertEquals(sValue, value);
byte[] baValue = "value".getBytes();
data = ReadTag.writeSingleValue((byte) 'B', baValue, false);
byteBuffer = ByteBuffer.wrap(data);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
- value = ReadTag.readSingleValue((byte) 'B', byteBuffer);
+ value = ReadTag.readSingleValue((byte) 'B', byteBuffer, ValidationStringency.DEFAULT_STRINGENCY);
Assert.assertEquals((byte[]) value, baValue);
}
@Test
+ public void testUnsignedInt() {
+ long intValue = Integer.MAX_VALUE+1L;
+ byte[] data = ReadTag.writeSingleValue((byte) 'I', intValue, false);
+ ByteBuffer byteBuffer = ByteBuffer.wrap(data);
+ byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+ Object value = ReadTag.readSingleValue((byte) 'I', byteBuffer, ValidationStringency.SILENT);
+ Assert.assertTrue(value instanceof Long);
+ long lValue = (Long)value;
+ Assert.assertEquals (lValue & 0xFFFFFFFF, intValue);
+ }
+
+ @Test
public void testParallelReadTag() throws Exception {
// NOTE: testng 5.5 (circa 2007) doesn't support parallel data providers, but modern versions do.
// For now, roll our own.
@@ -109,7 +122,7 @@ public class ReadTagTest {
final byte[] data = ReadTag.writeSingleValue(tagType, originalValue, false);
final ByteBuffer byteBuffer = ByteBuffer.wrap(data);
byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
- final Object readValue = ReadTag.readSingleValue(tagType, byteBuffer);
+ final Object readValue = ReadTag.readSingleValue(tagType, byteBuffer, ValidationStringency.DEFAULT_STRINGENCY);
Assert.assertEquals(readValue, originalValue);
}
diff --git a/src/tests/java/htsjdk/samtools/filter/OverclippedReadFilterTest.java b/src/tests/java/htsjdk/samtools/filter/OverclippedReadFilterTest.java
new file mode 100644
index 0000000..bff8491
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/filter/OverclippedReadFilterTest.java
@@ -0,0 +1,83 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.filter;
+
+import htsjdk.samtools.Cigar;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordSetBuilder;
+import org.testng.Assert;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class OverclippedReadFilterTest {
+ private final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ private final int unclippedBasesThreshold = 30;
+
+ private SAMRecord buildFrag(final String name, final String cigarString) {
+ // for this test, all we care about is the CIGAR
+ return builder.addFrag(name, 0, 1, false, false, cigarString, null, 30);
+ }
+
+ @Test(dataProvider = "data")
+ public void testOverclippedReadFilter(final String name, final String cigar, final boolean filterSingleEndClips, final boolean shouldFail) {
+ final OverclippedReadFilter filter = new OverclippedReadFilter(unclippedBasesThreshold, filterSingleEndClips);
+ final SAMRecord rec = buildFrag(name, cigar);
+ Assert.assertEquals(filter.filterOut(rec), shouldFail);
+ }
+
+ @DataProvider(name = "data")
+ private Object[][] testData() {
+ return new Object[][]{
+ {"foo", "1S10M1S", false, true},
+ {"foo", "1S10X1S", false, true},
+ {"foo", "1H1S10M1S1H", false, true},
+ {"foo", "1S40M1S", false, false},
+ {"foo", "1S40X1S", false, false},
+ {"foo", "1H10M1S", false, false},
+ {"foo", "1S10M1H", false, false},
+ {"foo", "10M1S", false, false},
+ {"foo", "1S10M", false, false},
+ {"foo", "10M1S", true, true},
+ {"foo", "1S10M", true, true},
+ {"foo", "1S10M10D10M1S", false, true},
+ {"foo", "1S1M40I1S", false, false},
+ {"foo", "1S10I1S", false, true},
+ {"foo", "1S40I1S", false, false},
+ {"foo", "1S40I1S", true, false},
+ {"foo", "25S40I25M", true, false},
+ {"foo", "25S25M", true, true},
+ {"foo", "25S25X", true, true},
+ {"foo", "25S25H", true, true},
+ {"foo", "25S25H", false, false},
+ {"foo", "25S25M25S", false, true},
+ {"foo", "25M25S", true, true},
+ {"foo", "25S25M", true, true},
+ {"foo", "25S35S", true, true},
+ {"foo", "25S35M25S", true, false},
+ {"foo", "35M25S", true, false},
+ {"foo", "25S35M", true, false}
+ };
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/metrics/MetricsFileTest.java b/src/tests/java/htsjdk/samtools/metrics/MetricsFileTest.java
index e3c81b6..2393031 100644
--- a/src/tests/java/htsjdk/samtools/metrics/MetricsFileTest.java
+++ b/src/tests/java/htsjdk/samtools/metrics/MetricsFileTest.java
@@ -97,6 +97,8 @@ public class MetricsFileTest {
MetricsFile<FloatingPointMetric,Integer> file2 = writeThenReadBack(file);
Assert.assertEquals(file, file2);
+
+
}
@Test
@@ -178,6 +180,22 @@ public class MetricsFileTest {
Assert.assertEquals(file, file3);
}
+ @Test
+ public void areMetricsFilesEqualTest(){
+ final File TEST_DIR = new File("testdata/htsjdk/samtools/metrics/");
+ final File file1 = new File(TEST_DIR,"metricsOne.metrics");
+ final File file2 = new File(TEST_DIR,"metricsOneCopy.metrics");
+ final File fileModifiedHist = new File(TEST_DIR,"metricsOneModifiedHistogram.metrics");
+ final File fileModifiedMet = new File(TEST_DIR,"metricsOneModifiedMetrics.metrics");
+
+ Assert.assertTrue(MetricsFile.areMetricsEqual(file1, file2));
+ Assert.assertTrue(MetricsFile.areMetricsEqual(file1, fileModifiedHist));
+
+ Assert.assertFalse(MetricsFile.areMetricsAndHistogramsEqual(file1, fileModifiedHist));
+ Assert.assertFalse(MetricsFile.areMetricsEqual(file1, fileModifiedMet));
+ Assert.assertFalse(MetricsFile.areMetricsAndHistogramsEqual(file1, fileModifiedMet));
+ }
+
/** Helper method to persist metrics to file and read them back again. */
private <METRIC extends MetricBase> MetricsFile<METRIC, Integer> writeThenReadBack(MetricsFile<METRIC,Integer> in) throws IOException {
File f = File.createTempFile("test", ".metrics");
@@ -189,4 +207,7 @@ public class MetricsFileTest {
retval.read(new FileReader(f));
return retval;
}
+
+
+
}
diff --git a/src/tests/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java b/src/tests/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java
index c762c2f..511b1ab 100644
--- a/src/tests/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/reference/FastaSequenceIndexTest.java
@@ -42,13 +42,17 @@ public class FastaSequenceIndexTest {
@DataProvider(name="homosapiens")
public Object[][] provideHomoSapiens() throws FileNotFoundException {
final File sequenceIndexFile = new File(TEST_DATA_DIR,"Homo_sapiens_assembly18.fasta.fai");
- return new Object[][] { new Object[] { new FastaSequenceIndex(sequenceIndexFile) } };
+ return new Object[][] { new Object[]
+ { new FastaSequenceIndex(sequenceIndexFile) },
+ { new FastaSequenceIndex(sequenceIndexFile.toPath()) } };
}
@DataProvider(name="specialcharacters")
public Object[][] provideSpecialCharacters() throws FileNotFoundException {
final File sequenceIndexFile = new File(TEST_DATA_DIR,"testing.fai");
- return new Object[][] { new Object[] { new FastaSequenceIndex(sequenceIndexFile) } };
+ return new Object[][] { new Object[]
+ { new FastaSequenceIndex(sequenceIndexFile) },
+ { new FastaSequenceIndex(sequenceIndexFile.toPath()) } };
}
@Test(dataProvider="homosapiens")
diff --git a/src/tests/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java b/src/tests/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java
index 9970bbc..5c1a9ac 100644
--- a/src/tests/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java
+++ b/src/tests/java/htsjdk/samtools/reference/IndexedFastaSequenceFileTest.java
@@ -53,7 +53,9 @@ public class IndexedFastaSequenceFileTest{
public Object[][] provideSequenceFile() throws FileNotFoundException {
return new Object[][] { new Object[]
{ new IndexedFastaSequenceFile(SEQUENCE_FILE) },
- { new IndexedFastaSequenceFile(SEQUENCE_FILE_NODICT) }};
+ { new IndexedFastaSequenceFile(SEQUENCE_FILE_NODICT) },
+ { new IndexedFastaSequenceFile(SEQUENCE_FILE.toPath()) },
+ { new IndexedFastaSequenceFile(SEQUENCE_FILE_NODICT.toPath()) }};
}
@DataProvider(name="comparative")
@@ -62,7 +64,11 @@ public class IndexedFastaSequenceFileTest{
new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile(SEQUENCE_FILE),
new IndexedFastaSequenceFile(SEQUENCE_FILE) },
new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile(SEQUENCE_FILE, true),
- new IndexedFastaSequenceFile(SEQUENCE_FILE) },};
+ new IndexedFastaSequenceFile(SEQUENCE_FILE) },
+ new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile(SEQUENCE_FILE.toPath()),
+ new IndexedFastaSequenceFile(SEQUENCE_FILE.toPath()) },
+ new Object[] { ReferenceSequenceFileFactory.getReferenceSequenceFile(SEQUENCE_FILE.toPath(), true),
+ new IndexedFastaSequenceFile(SEQUENCE_FILE.toPath()) },};
}
@Test(dataProvider="homosapiens")
diff --git a/src/tests/java/htsjdk/samtools/sra/SRAIndexTest.java b/src/tests/java/htsjdk/samtools/sra/SRAIndexTest.java
new file mode 100644
index 0000000..9cf0c28
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRAIndexTest.java
@@ -0,0 +1,150 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+package htsjdk.samtools.sra;
+
+import htsjdk.samtools.BAMFileSpan;
+import htsjdk.samtools.Bin;
+import htsjdk.samtools.GenomicIndexUtil;
+import htsjdk.samtools.SRAFileReader;
+import htsjdk.samtools.SRAIndex;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * Unit tests for SRAIndex
+ *
+ * Created by andrii.nikitiuk on 10/28/15.
+ */
+public class SRAIndexTest {
+ private static final SRAAccession DEFAULT_ACCESSION = new SRAAccession("SRR1298981");
+ private static final int LAST_BIN_LEVEL = GenomicIndexUtil.LEVEL_STARTS.length - 1;
+ private static final int SRA_BIN_OFFSET = GenomicIndexUtil.LEVEL_STARTS[LAST_BIN_LEVEL];
+
+ @Test
+ public void testLevelSize() {
+ if (!SRAAccession.isSupported()) return;
+
+ SRAIndex index = getIndex(DEFAULT_ACCESSION);
+ Assert.assertEquals(index.getLevelSize(0), GenomicIndexUtil.LEVEL_STARTS[1] - GenomicIndexUtil.LEVEL_STARTS[0]);
+
+ Assert.assertEquals(index.getLevelSize(LAST_BIN_LEVEL), GenomicIndexUtil.MAX_BINS - GenomicIndexUtil.LEVEL_STARTS[LAST_BIN_LEVEL] - 1);
+ }
+
+ @Test
+ public void testLevelForBin() {
+ if (!SRAAccession.isSupported()) return;
+
+ SRAIndex index = getIndex(DEFAULT_ACCESSION);
+ Bin bin = new Bin(0, SRA_BIN_OFFSET);
+ Assert.assertEquals(index.getLevelForBin(bin), LAST_BIN_LEVEL);
+ }
+
+ @DataProvider(name = "testBinLocuses")
+ public Object[][] createDataForBinLocuses() {
+ return new Object[][] {
+ {DEFAULT_ACCESSION, 0, 0, 1, SRAIndex.SRA_BIN_SIZE},
+ {DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 2}
+ };
+ }
+
+ @Test(dataProvider = "testBinLocuses")
+ public void testBinLocuses(SRAAccession acc, int reference, int binIndex, int firstLocus, int lastLocus) {
+ if (!SRAAccession.isSupported()) return;
+
+ SRAIndex index = getIndex(acc);
+ Bin bin = new Bin(reference, SRA_BIN_OFFSET + binIndex);
+
+ Assert.assertEquals(index.getFirstLocusInBin(bin), firstLocus);
+ Assert.assertEquals(index.getLastLocusInBin(bin), lastLocus);
+ }
+
+ @DataProvider(name = "testBinOverlappings")
+ public Object[][] createDataForBinOverlappings() {
+ return new Object[][] {
+ {DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE, new HashSet<Integer>(Arrays.asList(0))},
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 2, new HashSet<Integer>(Arrays.asList(1))},
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 3, new HashSet<Integer>(Arrays.asList(1, 2))},
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE * 2, SRAIndex.SRA_BIN_SIZE * 2 + 1, new HashSet<Integer>(Arrays.asList(1, 2))}
+ };
+ }
+
+
+ @Test(dataProvider = "testBinOverlappings")
+ public void testBinOverlappings(SRAAccession acc, int reference, int firstLocus, int lastLocus, Set<Integer> binNumbers) {
+ if (!SRAAccession.isSupported()) return;
+
+ SRAIndex index = getIndex(acc);
+ Iterator<Bin> binIterator = index.getBinsOverlapping(reference, firstLocus, lastLocus).iterator();
+ Set<Integer> binNumbersFromIndex = new HashSet<Integer>();
+ while (binIterator.hasNext()) {
+ Bin bin = binIterator.next();
+ binNumbersFromIndex.add(bin.getBinNumber() - SRA_BIN_OFFSET);
+ }
+
+ Assert.assertEquals(binNumbers, binNumbersFromIndex);
+ }
+
+ @DataProvider(name = "testSpanOverlappings")
+ public Object[][] createDataForSpanOverlappings() {
+ return new Object[][] {
+ {DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE, new long[] {0, SRAIndex.SRA_CHUNK_SIZE} },
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE * 2, SRAIndex.SRA_BIN_SIZE * 2 + 1, new long[]{0, SRAIndex.SRA_CHUNK_SIZE} },
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_CHUNK_SIZE, SRAIndex.SRA_CHUNK_SIZE + 1, new long[]{0, SRAIndex.SRA_CHUNK_SIZE, SRAIndex.SRA_CHUNK_SIZE, SRAIndex.SRA_CHUNK_SIZE * 2} },
+ };
+ }
+
+ @Test(dataProvider = "testSpanOverlappings")
+ public void testSpanOverlappings(SRAAccession acc, int reference, int firstLocus, int lastLocus, long[] spanCoordinates) {
+ if (!SRAAccession.isSupported()) return;
+
+ SRAIndex index = getIndex(acc);
+ BAMFileSpan span = index.getSpanOverlapping(reference, firstLocus, lastLocus);
+
+ long[] coordinatesFromIndex = span.toCoordinateArray();
+ List<Long> coordinatesListFromIndex = new ArrayList<Long>();
+ for (long coordinate : coordinatesFromIndex) {
+ coordinatesListFromIndex.add(coordinate);
+ }
+
+ Assert.assertTrue(Arrays.equals(coordinatesFromIndex, spanCoordinates),
+ "Coordinates mismatch. Expected: " + Arrays.toString(spanCoordinates) +
+ " but was : " + Arrays.toString(coordinatesFromIndex));
+ }
+
+ private SRAIndex getIndex(SRAAccession acc) {
+ SRAFileReader reader = new SRAFileReader(acc);
+ return (SRAIndex) reader.getIndex();
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/sra/SRALazyRecordTest.java b/src/tests/java/htsjdk/samtools/sra/SRALazyRecordTest.java
new file mode 100644
index 0000000..9b6dccb
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRALazyRecordTest.java
@@ -0,0 +1,51 @@
+package htsjdk.samtools.sra;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SRAFileReader;
+import htsjdk.samtools.util.TestUtil;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+/**
+ * Tests for SRA extension of SAMRecord objects which load fields on demand
+ */
+public class SRALazyRecordTest {
+ private static final SRAAccession DEFAULT_ACCESSION = new SRAAccession("SRR1298981");
+
+ @DataProvider(name = "serializationTestData")
+ public Object[][] getSerializationTestData() {
+ return new Object[][] {
+ { DEFAULT_ACCESSION }
+ };
+ }
+
+ @Test(dataProvider = "serializationTestData")
+ public void testSerialization(SRAAccession accession) throws Exception {
+ SRAFileReader reader = new SRAFileReader(accession);
+ final SAMRecord initialSAMRecord = reader.getIterator().next();
+ reader.close();
+
+ final SAMRecord deserializedSAMRecord = TestUtil.serializeAndDeserialize(initialSAMRecord);
+
+ Assert.assertEquals(deserializedSAMRecord, initialSAMRecord, "Deserialized SAMRecord not equal to original SAMRecord");
+ }
+
+ @Test
+ public void testCloneAndEquals() throws Exception {
+ SRAFileReader reader = new SRAFileReader(DEFAULT_ACCESSION);
+ final SAMRecord record = reader.getIterator().next();
+ reader.close();
+
+ SAMRecord newRecord = (SAMRecord)record.clone();
+ Assert.assertFalse(record == newRecord);
+ Assert.assertNotSame(record, newRecord);
+ Assert.assertEquals(record, newRecord);
+ Assert.assertEquals(newRecord, record);
+
+ newRecord.setAlignmentStart(record.getAlignmentStart() + 100);
+ Assert.assertFalse(record.equals(newRecord));
+ Assert.assertFalse(newRecord.equals(record));
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/sra/SRAQueryTest.java b/src/tests/java/htsjdk/samtools/sra/SRAQueryTest.java
new file mode 100644
index 0000000..b37c37a
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRAQueryTest.java
@@ -0,0 +1,116 @@
+package htsjdk.samtools.sra;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SamInputResource;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.NoSuchElementException;
+
+public class SRAQueryTest {
+
+ @DataProvider(name = "testUnmappedCounts")
+ public Object[][] createDataForUnmappedCounts() {
+ return new Object[][] {
+ {"SRR2096940", 498}
+ };
+ }
+
+ @Test(dataProvider = "testUnmappedCounts")
+ public void testUnmappedCounts(String acc, int numberUnalignments) {
+ if (!SRAAccession.isSupported()) return;
+
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+
+ final SAMRecordIterator samRecordIterator = reader.queryUnmapped();
+
+ checkAlignedUnalignedCountsByIterator(samRecordIterator, 0, numberUnalignments);
+ }
+
+ @DataProvider(name = "testReferenceAlignedCounts")
+ public Object[][] createDataForReferenceAlignedCounts() {
+ return new Object[][] {
+ {"SRR2096940", "CM000681.1", 0, 10591},
+ {"SRR2096940", "CM000681.1", 55627015, 10591},
+ {"SRR2096940", "CM000681.1", 55627016, 0},
+ };
+ }
+
+ @Test(dataProvider = "testReferenceAlignedCounts")
+ public void testReferenceAlignedCounts(String acc, String reference, int refernceStart, int numberAlignments) {
+ if (!SRAAccession.isSupported()) return;
+
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+
+ final SAMRecordIterator samRecordIterator = reader.queryAlignmentStart(reference, refernceStart);
+
+ checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, 0);
+ }
+
+ @DataProvider(name = "testQueryCounts")
+ public Object[][] createDataForQueryCounts() {
+ return new Object[][] {
+ {"SRR2096940", "CM000681.1", 0, 59128983, true, 10591, 0},
+ {"SRR2096940", "CM000681.1", 55627015, 59128983, true, 10591, 0},
+ {"SRR2096940", "CM000681.1", 55627016, 59128983, true, 0, 0},
+ {"SRR2096940", "CM000681.1", 55627016, 59128983, false, 10591, -1},
+ };
+ }
+
+ @Test(dataProvider = "testQueryCounts")
+ public void testQueryCounts(String acc, String reference, int refernceStart, int referenceEnd, boolean contained, int numberAlignments, int numberUnalignment) {
+ if (!SRAAccession.isSupported()) return;
+
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+
+ final SAMRecordIterator samRecordIterator = reader.query(reference, refernceStart, referenceEnd, contained);
+
+ checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, numberUnalignment);
+ }
+
+ private void checkAlignedUnalignedCountsByIterator(SAMRecordIterator samRecordIterator,
+ int numberAlignments, int numberUnalignments) {
+ int countAlignments = 0, countUnalignments = 0;
+ while (true) {
+ boolean hasRecord = samRecordIterator.hasNext();
+ SAMRecord record = null;
+ try {
+ record = samRecordIterator.next();
+ Assert.assertTrue(hasRecord); // exception is not thrown if we came to this point
+ } catch (NoSuchElementException e) {
+ Assert.assertFalse(hasRecord);
+ }
+
+ Assert.assertEquals(hasRecord, record != null);
+
+ if (record == null) {
+ break;
+ }
+
+ if (record.getReadUnmappedFlag()) {
+ countUnalignments++;
+ } else {
+ countAlignments++;
+ }
+ }
+
+ if (numberAlignments != -1) {
+ Assert.assertEquals(numberAlignments, countAlignments);
+ }
+ if (numberUnalignments != -1) {
+ Assert.assertEquals(numberUnalignments, countUnalignments);
+ }
+ }
+
+}
diff --git a/src/tests/java/htsjdk/samtools/sra/SRAReferenceTest.java b/src/tests/java/htsjdk/samtools/sra/SRAReferenceTest.java
new file mode 100644
index 0000000..1313b4d
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRAReferenceTest.java
@@ -0,0 +1,25 @@
+package htsjdk.samtools.sra;
+
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class SRAReferenceTest {
+ @DataProvider(name = "testReference")
+ public Object[][] createDataForReference() {
+ return new Object[][] {
+ {"SRR2096940", "CM000681.1", 95001, 95050, "AGATGATTCAGTCTCACCAAGAACACTGAAAGTCACATGGCTACCAGCAT"},
+ };
+ }
+
+ @Test(dataProvider = "testReference")
+ public void testReference(String acc, String refContig, int refStart, int refStop, String refBases) {
+ if (!SRAAccession.isSupported()) return;
+
+ ReferenceSequenceFile refSeqFile = new SRAIndexedSequenceFile(new SRAAccession(acc));
+ ReferenceSequence refSeq = refSeqFile.getSubsequenceAt(refContig, refStart, refStop);
+ Assert.assertEquals(new String(refSeq.getBases()), refBases);
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/sra/SRATest.java b/src/tests/java/htsjdk/samtools/sra/SRATest.java
new file mode 100644
index 0000000..86a5218
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRATest.java
@@ -0,0 +1,464 @@
+/*===========================================================================
+*
+* PUBLIC DOMAIN NOTICE
+* National Center for Biotechnology Information
+*
+* This software/database is a "United States Government Work" under the
+* terms of the United States Copyright Act. It was written as part of
+* the author's official duties as a United States Government employee and
+* thus cannot be copyrighted. This software/database is freely available
+* to the public for use. The National Library of Medicine and the U.S.
+* Government have not placed any restriction on its use or reproduction.
+*
+* Although all reasonable efforts have been taken to ensure the accuracy
+* and reliability of the software and data, the NLM and the U.S.
+* Government do not and cannot warrant the performance or results that
+* may be obtained by using this software or data. The NLM and the U.S.
+* Government disclaim all warranties, express or implied, including
+* warranties of performance, merchantability or fitness for any particular
+* purpose.
+*
+* Please cite the author in any work or product based on this material.
+*
+* ===========================================================================
+*
+*/
+
+package htsjdk.samtools.sra;
+
+import htsjdk.samtools.*;
+
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+/**
+ * Integration tests for SRA functionality
+ *
+ * Created by andrii.nikitiuk on 8/24/15.
+ */
+public class SRATest {
+
+ @DataProvider(name = "testCounts")
+ public Object[][] createDataForCounts() {
+ return new Object[][] {
+ {"SRR2096940", 10591, 498}
+ };
+ }
+
+ @Test(dataProvider = "testCounts")
+ public void testCounts(String acc, int numberAlignments, int numberUnalignments) {
+ if (!SRAAccession.isSupported()) return;
+
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+
+ final SAMRecordIterator samRecordIterator = reader.iterator();
+
+ checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, numberUnalignments);
+ }
+
+ @DataProvider(name = "testCountsBySpan")
+ public Object[][] createDataForCountsBySpan() {
+ return new Object[][] {
+ {"SRR2096940", Arrays.asList(new Chunk(0, 59128983), new Chunk(59128983, 59141089)), 10591, 498},
+ {"SRR2096940", Arrays.asList(new Chunk(0, 29128983), new Chunk(29128983, 59141089)), 10591, 498},
+ {"SRR2096940", Arrays.asList(new Chunk(0, 59134983), new Chunk(59134983, 59141089)), 10591, 498},
+ {"SRR2096940", Arrays.asList(new Chunk(0, 59130000)), 10591, 0},
+ {"SRR2096940", Arrays.asList(new Chunk(0, 59140889)), 10591, 298}
+ };
+ }
+
+ @Test(dataProvider = "testCountsBySpan")
+ public void testCountsBySpan(String acc, List<Chunk> chunks, int numberAlignments, int numberUnalignments) {
+ if (!SRAAccession.isSupported()) return;
+
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+
+ final SAMRecordIterator samRecordIterator = ((SamReader.Indexing) reader).iterator(new BAMFileSpan(chunks));
+
+ checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, numberUnalignments);
+ }
+
+ @DataProvider(name = "testGroups")
+ public Object[][] createDataForGroups() {
+ return new Object[][] {
+ {"SRR822962", new TreeSet<String>(Arrays.asList(
+ "GS54389-FS3-L08", "GS57511-FS3-L08", "GS54387-FS3-L02", "GS54387-FS3-L01",
+ "GS57510-FS3-L01", "GS57510-FS3-L03", "GS54389-FS3-L07", "GS54389-FS3-L05",
+ "GS54389-FS3-L06", "GS57510-FS3-L02", "GS57510-FS3-L04", "GS54387-FS3-L03",
+ "GS46253-FS3-L03"))
+ },
+ {"SRR2096940", new HashSet<String>(Arrays.asList("SRR2096940"))}
+ };
+ }
+
+ @Test(dataProvider = "testGroups")
+ public void testGroups(String acc, Set<String> groups) {
+ if (!SRAAccession.isSupported()) return;
+
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+
+ final SAMRecordIterator samRecordIterator = reader.iterator();
+
+ SAMFileHeader header = reader.getFileHeader();
+ Set<String> headerGroups = new TreeSet<String>();
+ for (SAMReadGroupRecord group : header.getReadGroups()) {
+ Assert.assertEquals(group.getReadGroupId(), group.getId());
+ headerGroups.add(group.getReadGroupId());
+ }
+
+ Assert.assertEquals(groups, headerGroups);
+
+ Set<String> foundGroups = new TreeSet<String>();
+
+ for (int i = 0; i < 10000; i++) {
+ if (!samRecordIterator.hasNext()) {
+ break;
+ }
+ SAMRecord record = samRecordIterator.next();
+ String groupName = (String)record.getAttribute("RG");
+
+ foundGroups.add(groupName);
+ }
+
+ // please note that some groups may be introduced after 10k records, which is not an error
+ Assert.assertEquals(groups, foundGroups);
+ }
+
+ @DataProvider(name = "testReferences")
+ public Object[][] createDataForReferences() {
+ return new Object[][] {
+ // primary alignment only
+ {"SRR1063272", 1,
+ Arrays.asList("supercont2.1", "supercont2.2", "supercont2.3", "supercont2.4",
+ "supercont2.5", "supercont2.6", "supercont2.7", "supercont2.8",
+ "supercont2.9", "supercont2.10", "supercont2.11", "supercont2.12",
+ "supercont2.13", "supercont2.14"),
+ Arrays.asList(2291499, 1621675, 1575141, 1084805,
+ 1814975, 1422463, 1399503, 1398693,
+ 1186808, 1059964, 1561994, 774062,
+ 756744, 926563)},
+ };
+ }
+
+ @Test(dataProvider = "testReferences")
+ public void testReferences(String acc, int numberFirstReferenceFound, List<String> references, List<Integer> refLengths) {
+ if (!SRAAccession.isSupported()) return;
+
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+
+ final SAMRecordIterator samRecordIterator = reader.iterator();
+
+ SAMFileHeader header = reader.getFileHeader();
+ Set<String> headerRefNames = new TreeSet<String>();
+
+ for (SAMSequenceRecord ref : header.getSequenceDictionary().getSequences()) {
+ String refName = ref.getSequenceName();
+
+ int refIndex = references.indexOf(refName);
+ Assert.assertTrue(refIndex != -1, "Unexpected reference: " + refName);
+
+ Assert.assertEquals(refLengths.get(refIndex), (Integer) ref.getSequenceLength(), "Reference length is incorrect");
+
+ headerRefNames.add(refName);
+ }
+
+ Assert.assertEquals(new TreeSet<String>(references), headerRefNames);
+
+ Set<String> foundRefNames = new TreeSet<String>();
+ for (int i = 0; i < 10000; i++) {
+ if (!samRecordIterator.hasNext()) {
+ break;
+ }
+ SAMRecord record = samRecordIterator.next();
+
+ if (record.getReferenceIndex().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)) {
+ continue;
+ }
+
+ String refName = record.getReferenceName();
+ Assert.assertNotNull(refName);
+
+ foundRefNames.add(refName);
+ }
+
+ Assert.assertEquals(new TreeSet<String>(references.subList(0, numberFirstReferenceFound)), foundRefNames);
+ }
+
+ @DataProvider(name = "testRows")
+ public Object[][] createDataForRowsTest() {
+ return new Object[][] {
+ // primary alignment only
+ {"SRR1063272", 0, 99, "SRR1063272.R.1",
+ "ACTCGACATTCTGCCTTCGACCTATCTTTCTCCTCTCCCAGTCATCGCCCAGTAGAATTACCAGGCAATGAACCAGGGCCTTCCATCCCAACGGCACAGCA",
+ "@@CDDBDFFBFHFIEEFGIGGHIEHIGIGGFGEGAFDHIIIIIGGGDFHII;=BF at FEHGIEEH?AHHFHFFFFDC5'5=?CC?ADCD at AC??9BDDCDB<",
+ 86, "101M", "supercont2.1", 60, true, false},
+
+ // small SRA archive
+ {"SRR2096940", 1, 16, "SRR2096940.R.3",
+ "GTGTGTCACCAGATAAGGAATCTGCCTAACAGGAGGTGTGGGTTAGACCCAATATCAGGAGACCAGGAAGGAGGAGGCCTAAGGATGGGGCTTTTCTGTCACCAATCCTGTCCCTAGTGGCCCCACTGTGGGGTGGAGGGGACAGATAAAAGTACCCAGAACCAGAG",
+ "AAAABFFFFFFFGGGGGGGGIIIIIIIIIIIIIIIIIIIIIIIIIIIIII7IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGGGGGFGFFDFFFFFC",
+ 55627016, "167M", "CM000681.1", 42, false, false},
+
+ {"SRR2096940", 10591, 4, "SRR2096940.R.10592",
+ "CTCTGGTTCTGGGTACTTTTATCTGTCCCCTCCACCCCACAGTGGCGAGCCAGATTCCTTATCTGGTGACACAC",
+ "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII",
+ -1, null, null, -1, false, false},
+
+ // primary and secondary alignments
+ {"SRR833251", 81, 393, "SRR833251.R.51",
+ "ATGCAAATCCGAATGGGCTATTTGTGGGTACTTGGGCAGGTAAGTAGCTGGCAATCTTGGTCGGTAAACCAATACCCAAGTTCACATAGGCACCATCGGGA",
+ "CCCFFFFFHHHHHIJJJIJJJJJIIJJJGIJIJIIJIJJJDGIGIIJIJIHIJJJJJJGIGHIHEDFFFFDDEEEDDDDDCDEEDDDDDDDDDDDDDBBDB",
+ 1787186, "38M63S", "gi|169794206|ref|NC_010410.1|", 11, true, true},
+
+ // local SRA file
+ {"testdata/htsjdk/samtools/sra/test_archive.sra", 1, 99, "test_archive.R.2",
+ "TGTCGATGCTGAAAGTGTCTGCGGTGAACCACTTCATGCACAGCGCACACTGCAGCTCCACTTCACCCAGCTGACGGCCGTTCTCATCGTCTCCAGAGCCCGTCTGAGCGTCCGCTGCTTCAGAACTGTCCCCGGCTGTATCCTGAAGAC",
+ "BBAABBBFAFFFGGGGGGGGGGGGEEFHHHHGHHHHHFHHGHFDGGGGGHHGHHHHHHHHHHHHFHHHGHHHHHHGGGGGGGHGGHHHHHHHHHGHHHHHGGGGHGHHHGGGGGGGGGHHHHEHHHHHHHHHHGCGGGHHHHHHGBFFGF",
+ 2811570, "150M", "NC_007121.5", 60, true, false}
+ };
+ }
+
+ @Test(dataProvider = "testRows")
+ public void testRows(String acc, int recordIndex, int flags, String readName, String bases, String quals, int refStart, String cigar,
+ String refName, int mapQ, boolean hasMate, boolean isSecondaryAlignment) {
+ if (!SRAAccession.isSupported()) return;
+
+ SAMRecord record = getRecordByIndex(acc, recordIndex, false);
+
+ checkSAMRecord(record, flags, readName, bases, quals, refStart, cigar, refName, mapQ, hasMate, isSecondaryAlignment);
+ }
+
+ @Test(dataProvider = "testRows")
+ public void testRowsAfterIteratorDetach(String acc, int recordIndex, int flags, String readName, String bases, String quals,
+ int refStart, String cigar, String refName, int mapQ, boolean hasMate,
+ boolean isSecondaryAlignment) {
+ if (!SRAAccession.isSupported()) return;
+
+ SAMRecord record = getRecordByIndex(acc, recordIndex, true);
+
+ checkSAMRecord(record, flags, readName, bases, quals, refStart, cigar, refName, mapQ, hasMate, isSecondaryAlignment);
+ }
+
+ @Test(dataProvider = "testRows")
+ public void testRowsOverrideValues(String acc, int recordIndex, int flags, String readName, String bases, String quals,
+ int refStart, String cigar, String refName, int mapQ, boolean hasMate,
+ boolean isSecondaryAlignment) {
+ if (!SRAAccession.isSupported()) return;
+
+ SAMRecord record = getRecordByIndex(acc, recordIndex, true);
+ SAMFileHeader header = record.getHeader();
+
+
+ record.setFlags(0);
+ record.setReadUnmappedFlag(refStart == -1);
+ record.setReadBases("C".getBytes());
+ record.setBaseQualities(SAMUtils.fastqToPhred("A"));
+ if (refStart == -1) {
+ checkSAMRecord(record, 4, readName, "C", "A", refStart, "1M", refName, mapQ, false, false);
+ } else {
+ int sequenceIndex = header.getSequenceIndex(refName);
+ Assert.assertFalse(sequenceIndex == -1);
+
+ if (sequenceIndex == 0) {
+ if (header.getSequenceDictionary().getSequences().size() > 1) {
+ sequenceIndex++;
+ }
+ } else {
+ sequenceIndex--;
+ }
+
+ refName = header.getSequence(sequenceIndex).getSequenceName();
+
+ record.setAlignmentStart(refStart - 100);
+ record.setCigarString("1M");
+ record.setMappingQuality(mapQ - 1);
+ record.setReferenceIndex(sequenceIndex);
+
+ checkSAMRecord(record, 0, readName, "C", "A", refStart - 100, "1M", refName, mapQ - 1, false, false);
+ }
+ }
+
+ @Test(dataProvider = "testRows")
+ public void testRowsBySpan(String acc, int recordIndex, int flags, String readName, String bases, String quals,
+ int refStart, String cigar, String refName, int mapQ, boolean hasMate,
+ boolean isSecondaryAlignment) {
+ if (!SRAAccession.isSupported()) return;
+
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+
+ SAMFileHeader header = reader.getFileHeader();
+
+ Chunk chunk;
+ if (refStart != -1) {
+ long refOffset = 0;
+ int refIndex = header.getSequenceDictionary().getSequence(refName).getSequenceIndex();
+ for (SAMSequenceRecord sequenceRecord : header.getSequenceDictionary().getSequences()) {
+ if (sequenceRecord.getSequenceIndex() < refIndex) {
+ refOffset += sequenceRecord.getSequenceLength();
+ }
+ }
+
+ chunk = new Chunk(refOffset + refStart - 1, refOffset + refStart);
+ } else {
+ long totalRefLength = header.getSequenceDictionary().getReferenceLength();
+ long totalRecordRange = ((BAMFileSpan)reader.indexing().getFilePointerSpanningReads()).toCoordinateArray()[1];
+ chunk = new Chunk(totalRefLength, totalRecordRange);
+ }
+
+ final SAMRecordIterator samRecordIterator = ((SamReader.Indexing) reader).iterator(new BAMFileSpan(chunk));
+
+ SAMRecord record = null;
+ while (samRecordIterator.hasNext()) {
+ SAMRecord currentRecord = samRecordIterator.next();
+ if (currentRecord.getReadName().equals(readName)) {
+ record = currentRecord;
+ break;
+ }
+ }
+
+ checkSAMRecord(record, flags, readName, bases, quals, refStart, cigar, refName, mapQ, hasMate, isSecondaryAlignment);
+ }
+
+ @Test(dataProvider = "testRows")
+ public void testRowsByIndex(String acc, int recordIndex, int flags, String readName, String bases, String quals,
+ int refStart, String cigar, String refName, int mapQ, boolean hasMate,
+ boolean isSecondaryAlignment) {
+ if (!SRAAccession.isSupported()) return;
+
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+
+ Assert.assertTrue(reader.hasIndex());
+ Assert.assertTrue(reader.indexing().hasBrowseableIndex());
+
+ SAMFileHeader header = reader.getFileHeader();
+ BrowseableBAMIndex index = reader.indexing().getBrowseableIndex();
+
+ BAMFileSpan span;
+ if (refStart != -1) {
+ int refIndex = header.getSequenceDictionary().getSequence(refName).getSequenceIndex();
+ span = index.getSpanOverlapping(refIndex, refStart, refStart + 1);
+ } else {
+ long chunkStart = index.getStartOfLastLinearBin();
+ long totalRecordRange = ((BAMFileSpan) reader.indexing().getFilePointerSpanningReads()).toCoordinateArray()[1];
+ span = new BAMFileSpan(new Chunk(chunkStart, totalRecordRange));
+ }
+
+ final SAMRecordIterator samRecordIterator = ((SamReader.Indexing) reader).iterator(span);
+
+ SAMRecord record = null;
+ while (samRecordIterator.hasNext()) {
+ SAMRecord currentRecord = samRecordIterator.next();
+ if (refStart != -1 && currentRecord.getAlignmentStart() + currentRecord.getReadLength() < refStart) {
+ continue;
+ }
+
+ if (currentRecord.getReadName().equals(readName)) {
+ record = currentRecord;
+ break;
+ }
+ }
+
+ checkSAMRecord(record, flags, readName, bases, quals, refStart, cigar, refName, mapQ, hasMate, isSecondaryAlignment);
+ }
+
+ private SAMRecord getRecordByIndex(String acc, int recordIndex, boolean detach) {
+ SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
+ SamInputResource.of(new SRAAccession(acc))
+ );
+
+ final SAMRecordIterator samRecordIterator = reader.iterator();
+
+ while (recordIndex != 0) {
+ Assert.assertTrue(samRecordIterator.hasNext(), "Record set is too small");
+
+ samRecordIterator.next();
+ recordIndex--;
+ }
+ Assert.assertTrue(samRecordIterator.hasNext(), "Record set is too small");
+
+ SAMRecord record = samRecordIterator.next();
+
+ if (detach) {
+ samRecordIterator.next();
+ }
+
+ return record;
+ }
+
+ private void checkSAMRecord(SAMRecord record, int flags, String readName, String bases, String quals,
+ int refStart, String cigar, String refName, int mapQ, boolean hasMate,
+ boolean isSecondaryAlignment) {
+
+ Assert.assertNotNull(record, "Record with read id: " + readName + " was not found by span created from index");
+
+ List<SAMValidationError> validationErrors = record.isValid();
+ Assert.assertNull(validationErrors, "SRA Lazy record is invalid. List of errors: " +
+ (validationErrors != null ? validationErrors.toString() : ""));
+
+ Assert.assertEquals(new String(record.getReadBases()), bases);
+ Assert.assertEquals(record.getBaseQualityString(), quals);
+ Assert.assertEquals(record.getReadPairedFlag(), hasMate);
+ Assert.assertEquals(record.getFlags(), flags);
+ Assert.assertEquals(record.getNotPrimaryAlignmentFlag(), isSecondaryAlignment);
+ if (refStart == -1) {
+ Assert.assertEquals(record.getReadUnmappedFlag(), true);
+ Assert.assertEquals(record.getAlignmentStart(), 0);
+ Assert.assertEquals(record.getCigarString(), "*");
+ Assert.assertEquals(record.getReferenceName(), "*");
+ Assert.assertEquals(record.getMappingQuality(), 0);
+ } else {
+ Assert.assertEquals(record.getReadUnmappedFlag(), false);
+ Assert.assertEquals(record.getAlignmentStart(), refStart);
+ Assert.assertEquals(record.getCigarString(), cigar);
+ Assert.assertEquals(record.getReferenceName(), refName);
+ Assert.assertEquals(record.getMappingQuality(), mapQ);
+ }
+ }
+
+ private void checkAlignedUnalignedCountsByIterator(SAMRecordIterator samRecordIterator,
+ int numberAlignments, int numberUnalignments) {
+ int countAlignments = 0, countUnalignments = 0;
+ while (true) {
+ boolean hasRecord = samRecordIterator.hasNext();
+ SAMRecord record = null;
+ try {
+ record = samRecordIterator.next();
+ Assert.assertTrue(hasRecord); // exception is not thrown if we came to this point
+ } catch (NoSuchElementException e) {
+ Assert.assertFalse(hasRecord);
+ }
+
+ Assert.assertEquals(hasRecord, record != null);
+
+ if (record == null) {
+ break;
+ }
+
+ if (record.getReadUnmappedFlag()) {
+ countUnalignments++;
+ } else {
+ countAlignments++;
+ }
+ }
+
+ Assert.assertEquals(numberAlignments, countAlignments);
+ Assert.assertEquals(numberUnalignments, countUnalignments);
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/util/CodeUtilTest.java b/src/tests/java/htsjdk/samtools/util/CodeUtilTest.java
index ea3435e..e8b9957 100644
--- a/src/tests/java/htsjdk/samtools/util/CodeUtilTest.java
+++ b/src/tests/java/htsjdk/samtools/util/CodeUtilTest.java
@@ -10,6 +10,6 @@ public class CodeUtilTest {
final String notNull = "Not null!";
Assert.assertEquals(CodeUtil.getOrElse(notNull, null), notNull);
Assert.assertEquals(CodeUtil.getOrElse(null, notNull), notNull);
- Assert.assertEquals(CodeUtil.getOrElse(null, null), null);
+ Assert.assertEquals((Object) CodeUtil.getOrElse(null, null), (Object) null);
}
}
diff --git a/src/tests/java/htsjdk/samtools/util/DiskBackedQueueTest.java b/src/tests/java/htsjdk/samtools/util/DiskBackedQueueTest.java
index 7c8b8fe..88b05e2 100644
--- a/src/tests/java/htsjdk/samtools/util/DiskBackedQueueTest.java
+++ b/src/tests/java/htsjdk/samtools/util/DiskBackedQueueTest.java
@@ -25,14 +25,15 @@
package htsjdk.samtools.util;
import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.AfterTest;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Collections;
-/**
- * Created by bradt on 4/28/14.
- */
public class DiskBackedQueueTest extends SortingCollectionTest {
@DataProvider(name = "diskBackedQueueProvider")
public Object[][] createDBQTestData() {
@@ -49,6 +50,9 @@ public class DiskBackedQueueTest extends SortingCollectionTest {
};
}
+ @BeforeMethod void setup() { resetTmpDir(); }
+ @AfterMethod void tearDown() { resetTmpDir(); }
+
/**
* Generate some strings, put into SortingCollection, confirm that the right number of
* Strings come out, and in the right order.
@@ -85,12 +89,12 @@ public class DiskBackedQueueTest extends SortingCollectionTest {
}
private DiskBackedQueue<String> makeDiskBackedQueue(final int maxRecordsInRam) {
- return DiskBackedQueue.newInstance(new StringCodec(), maxRecordsInRam, Collections.singletonList(tmpDir));
+ return DiskBackedQueue.newInstance(new StringCodec(), maxRecordsInRam, Collections.singletonList(tmpDir()));
}
@Test
public void testReadOnlyQueueJustBeforeReadingFromDisk() {
- DiskBackedQueue<String> queue = makeDiskBackedQueue(2);
+ final DiskBackedQueue<String> queue = makeDiskBackedQueue(2);
queue.add("foo");
queue.add("bar");
queue.add("baz");
@@ -109,4 +113,20 @@ public class DiskBackedQueueTest extends SortingCollectionTest {
Assert.assertTrue(queue.canAdd());
}
+ /** See: https://github.com/broadinstitute/picard/issues/327 */
+ @Test(expectedExceptions = IllegalStateException.class)
+ public void testPathologyIssue327() {
+
+ final DiskBackedQueue<String> queue = makeDiskBackedQueue(2);
+
+ // testing a particular order of adding to the queue, setting the result state, and emitting.
+ queue.add("0");
+ queue.add("1");
+ queue.add("2"); // spills to disk
+ Assert.assertEquals(queue.poll(), "0"); // gets from ram, so now there is space in ram, but a record on disk
+ queue.add("3"); // adds, but we assumed we added all records before removing them
+ Assert.assertEquals(queue.poll(), "1");
+ Assert.assertEquals(queue.poll(), "2");
+ Assert.assertEquals(queue.poll(), "3");
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java b/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
index e47d866..2ce0c79 100644
--- a/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
+++ b/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
@@ -26,6 +26,7 @@ package htsjdk.samtools.util;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.TextCigarCodec;
@@ -55,6 +56,27 @@ public class SequenceUtilTest {
SequenceUtil.assertSequenceDictionariesEqual(sd1, sd2);
}
+ @DataProvider
+ public Object[][] compatibleNonEqualLists(){
+ final String s = HEADER +
+ String.format("@SQ\tSN:phix174.seq\tLN:%d\tUR:%s\tAS:PhiX174\tM5:%s\n", 5386, "/seq/references/PhiX174/v0/PhiX174.fasta", "3332ed720ac7eaa9b3655c06f6b9e196")+
+ String.format("@SQ\tSN:phix175.seq\tLN:%d\tUR:%s\tAS:HiMom\tM5:%s\n", 5385, "/seq/references/PhiX174/v0/HiMom.fasta", "deadbeed");
+
+ return new Object[][]{ {makeSequenceDictionary(5386, "/seq/references/PhiX174/v0/PhiX174.fasta",
+ "3332ed720ac7eaa9b3655c06f6b9e196"),
+ new SAMTextHeaderCodec().decode(new StringLineReader(s), null).getSequenceDictionary()}};
+ }
+
+ @Test(dataProvider = "compatibleNonEqualLists")
+ public void testCompatible(SAMSequenceDictionary sd1, SAMSequenceDictionary sd2) {
+ SequenceUtil.assertSequenceDictionariesEqual(sd1, sd2, true);
+ }
+
+ @Test(dataProvider = "compatibleNonEqualLists",expectedExceptions = SequenceUtil.SequenceListsDifferException.class)
+ public void testinCompatible(SAMSequenceDictionary sd1, SAMSequenceDictionary sd2) {
+ SequenceUtil.assertSequenceDictionariesEqual(sd1, sd2, false);
+ }
+
@Test(expectedExceptions = SequenceUtil.SequenceListsDifferException.class)
public void testMismatch() {
final SAMSequenceDictionary sd1 = makeSequenceDictionary(5386, "/seq/references/PhiX174/v0/PhiX174.fasta",
diff --git a/src/tests/java/htsjdk/samtools/util/SortingCollectionTest.java b/src/tests/java/htsjdk/samtools/util/SortingCollectionTest.java
index 8770938..1ec928d 100644
--- a/src/tests/java/htsjdk/samtools/util/SortingCollectionTest.java
+++ b/src/tests/java/htsjdk/samtools/util/SortingCollectionTest.java
@@ -24,7 +24,9 @@
package htsjdk.samtools.util;
import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
import org.testng.annotations.AfterTest;
+import org.testng.annotations.BeforeMethod;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -41,31 +43,23 @@ import java.util.Random;
public class SortingCollectionTest {
// Create a separate directory for files so it is possible to confirm that the directory is emptied
- protected final File tmpDir = new File(System.getProperty("java.io.tmpdir") + "/" + System.getProperty("user.name"),
- "SortingCollectionTest");
- @BeforeTest void setup() {
- // Clear out any existing files if the directory exists
- if (tmpDir.exists()) {
- for (final File f : tmpDir.listFiles()) {
- f.delete();
- }
- }
- tmpDir.mkdirs();
+ protected File tmpDir() {
+ return new File(System.getProperty("java.io.tmpdir") + "/" + System.getProperty("user.name"), getClass().getSimpleName());
}
+
+ @BeforeMethod void setup() { resetTmpDir(); }
+ @AfterMethod void tearDown() { resetTmpDir(); }
+
+ /** Deletes and re-creates the temporary directory. */
+ void resetTmpDir() {
+ System.err.println("Resetting tmpdir");
+ IOUtil.deleteDirectoryTree(tmpDir());
+ if (!tmpDir().mkdirs()) throw new IllegalStateException("Could not create tmpdir: " + tmpDir().getAbsolutePath());
- @AfterTest void tearDown() {
- System.err.println("In SortingCollectionTest.tearDown. tmpDir: " + tmpDir);
- if (tmpDir.exists()) {
- for (final File f : tmpDir.listFiles()) {
- f.delete();
- }
- tmpDir.delete();
- }
}
protected boolean tmpDirIsEmpty() {
- System.err.println("In SortingCollectionTest.tmpDirIsEmpty. tmpDir: " + tmpDir);
- return tmpDir.listFiles().length == 0;
+ return tmpDir().listFiles().length == 0;
}
@DataProvider(name = "test1")
@@ -105,7 +99,7 @@ public class SortingCollectionTest {
assertIteratorEqualsList(strings, sortingCollection.iterator());
sortingCollection.cleanup();
- Assert.assertEquals(tmpDir.list().length, 0);
+ Assert.assertEquals(tmpDir().list().length, 0);
}
private void assertIteratorEqualsList(final String[] strings, final Iterator<String> sortingCollection) {
@@ -118,8 +112,7 @@ public class SortingCollectionTest {
}
private SortingCollection<String> makeSortingCollection(final int maxRecordsInRam) {
- return SortingCollection.newInstance(String.class, new StringCodec(), new StringComparator(),
- maxRecordsInRam, tmpDir);
+ return SortingCollection.newInstance(String.class, new StringCodec(), new StringComparator(), maxRecordsInRam, tmpDir());
}
/**
diff --git a/src/tests/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java b/src/tests/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
index 81fe49a..91804c4 100644
--- a/src/tests/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
+++ b/src/tests/java/htsjdk/variant/bcf2/BCF2UtilsUnitTest.java
@@ -93,6 +93,21 @@ public final class BCF2UtilsUnitTest extends VariantBaseTest {
Assert.assertEquals(7,dict_size);
}
+ /**
+ * Wrapper class for HeaderOrderTestProvider test cases to prevent TestNG from calling toString()
+ * on the VCFHeaders and spamming the log output.
+ */
+ private static class HeaderOrderTestCase {
+ public final VCFHeader inputHeader;
+ public final VCFHeader testHeader;
+ public final boolean expectedConsistent;
+
+ public HeaderOrderTestCase( final VCFHeader inputHeader, final VCFHeader testHeader, final boolean expectedConsistent ) {
+ this.inputHeader = inputHeader;
+ this.testHeader = testHeader;
+ this.expectedConsistent = expectedConsistent;
+ }
+ }
@DataProvider(name = "HeaderOrderTestProvider")
public Object[][] makeHeaderOrderTestProvider() {
@@ -132,7 +147,7 @@ public final class BCF2UtilsUnitTest extends VariantBaseTest {
allLines.addAll(permutation);
final VCFHeader testHeader = new VCFHeader(new LinkedHashSet<VCFHeaderLine>(allLines));
final boolean expectedConsistent = expectedConsistent(testHeader, inputLineCounter);
- tests.add(new Object[]{inputHeader, testHeader, expectedConsistent});
+ tests.add(new Object[]{new HeaderOrderTestCase(inputHeader, testHeader, expectedConsistent)});
}
}
}
@@ -153,7 +168,7 @@ public final class BCF2UtilsUnitTest extends VariantBaseTest {
for ( final List<String> testSamplesPermutation : permutations ) {
final VCFHeader testHeaderWithSamples = new VCFHeader(inputHeader.getMetaDataInInputOrder(), testSamplesPermutation);
final boolean expectedConsistent = testSamples.equals(inSamples);
- tests.add(new Object[]{inputHeaderWithSamples, testHeaderWithSamples, expectedConsistent});
+ tests.add(new Object[]{new HeaderOrderTestCase(inputHeaderWithSamples, testHeaderWithSamples, expectedConsistent)});
}
}
}
@@ -182,9 +197,9 @@ public final class BCF2UtilsUnitTest extends VariantBaseTest {
// even when the header file is slightly different
//
@Test(dataProvider = "HeaderOrderTestProvider")
- public void testHeaderOrder(final VCFHeader inputHeader, final VCFHeader testHeader, final boolean expectedConsistent) {
- final boolean actualOrderConsistency = BCF2Utils.headerLinesAreOrderedConsistently(testHeader, inputHeader);
- Assert.assertEquals(actualOrderConsistency, expectedConsistent);
+ public void testHeaderOrder( final HeaderOrderTestCase testCase ) {
+ final boolean actualOrderConsistency = BCF2Utils.headerLinesAreOrderedConsistently(testCase.testHeader, testCase.inputHeader);
+ Assert.assertEquals(actualOrderConsistency, testCase.expectedConsistent);
}
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/tests/java/htsjdk/variant/variantcontext/filter/AllFailFilter.java
similarity index 60%
copy from src/java/htsjdk/samtools/SAMTag.java
copy to src/tests/java/htsjdk/variant/variantcontext/filter/AllFailFilter.java
index 7dac5a2..d62e146 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/AllFailFilter.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,59 +21,21 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package htsjdk.samtools;
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.VariantContext;
/**
- * The standard tags for a SAM record that are defined in the SAM spec.
+ * A trivial filter (always false) that can be used in testing
+ *
+ * @author Yossi Farjoun
*/
-public enum SAMTag {
- AM,
- AS,
- BC,
- BQ,
- CC,
- CM,
- CO,
- CP,
- CQ,
- CS,
- CT,
- E2,
- FI,
- FS,
- FZ,
- GC, // for backwards compatibility
- GS, // for backwards compatibility
- GQ, // for backwards compatibility
- LB,
- H0,
- H1,
- H2,
- HI,
- IH,
- MC,
- MF, // for backwards compatibility
- MD,
- MQ,
- NH,
- NM,
- OQ,
- OP,
- OC,
- PG,
- PQ,
- PT,
- PU,
- QT,
- Q2,
- R2,
- RG,
- RT,
- S2, // for backwards compatibility
- SA,
- SM,
- SQ, // for backwards compatibility
- TC,
- U2,
- UQ
+public class AllFailFilter implements VariantContextFilter {
+
+ /* @return false so that all VCs are filtered out. */
+ @Override
+ public boolean test(final VariantContext record) {
+
+ return false;
+ }
}
diff --git a/src/java/htsjdk/samtools/SAMTag.java b/src/tests/java/htsjdk/variant/variantcontext/filter/AllPassFilter.java
similarity index 60%
copy from src/java/htsjdk/samtools/SAMTag.java
copy to src/tests/java/htsjdk/variant/variantcontext/filter/AllPassFilter.java
index 7dac5a2..b29aa51 100644
--- a/src/java/htsjdk/samtools/SAMTag.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/AllPassFilter.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,59 +21,21 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package htsjdk.samtools;
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.VariantContext;
/**
- * The standard tags for a SAM record that are defined in the SAM spec.
+ * A trivial filter (always true) that can be used in testing
+ *
+ * @author Yossi Farjoun
*/
-public enum SAMTag {
- AM,
- AS,
- BC,
- BQ,
- CC,
- CM,
- CO,
- CP,
- CQ,
- CS,
- CT,
- E2,
- FI,
- FS,
- FZ,
- GC, // for backwards compatibility
- GS, // for backwards compatibility
- GQ, // for backwards compatibility
- LB,
- H0,
- H1,
- H2,
- HI,
- IH,
- MC,
- MF, // for backwards compatibility
- MD,
- MQ,
- NH,
- NM,
- OQ,
- OP,
- OC,
- PG,
- PQ,
- PT,
- PU,
- QT,
- Q2,
- R2,
- RG,
- RT,
- S2, // for backwards compatibility
- SA,
- SM,
- SQ, // for backwards compatibility
- TC,
- U2,
- UQ
+public class AllPassFilter implements VariantContextFilter {
+
+ /* @return true so that all VCs are kept. */
+ @Override
+ public boolean test(final VariantContext record) {
+
+ return true;
+ }
}
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/CompoundFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/CompoundFilterTest.java
new file mode 100644
index 0000000..0a49853
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/CompoundFilterTest.java
@@ -0,0 +1,78 @@
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Created by farjoun on 9/9/15.
+ */
+public class CompoundFilterTest {
+
+ static AllPassFilter pass = new AllPassFilter();
+ static AllFailFilter fail = new AllFailFilter();
+
+ static Allele refA = Allele.create("A", true);
+ static Allele G = Allele.create("G", false);
+
+ static VariantContext vc = new VariantContextBuilder("dummy", "chr1", 1, 1, Arrays.asList(refA, G)).make();
+
+ @DataProvider
+ Iterator<Object[]> testCompoundFilterProvider() {
+ final List<Object[]> filters = new ArrayList<Object[]>(10);
+
+ // requireAll = TRUE
+ { // all pass
+ final CompoundFilter compoundFilter = new CompoundFilter(true);
+ compoundFilter.add(pass);
+ compoundFilter.add(pass);
+ compoundFilter.add(pass);
+ filters.add(new Object[]{compoundFilter, true});
+ }
+ { // one fail
+ final CompoundFilter compoundFilter = new CompoundFilter(true);
+ compoundFilter.add(pass);
+ compoundFilter.add(fail);
+ compoundFilter.add(pass);
+ filters.add(new Object[]{compoundFilter, false});
+ }
+ { // empty
+ final CompoundFilter compoundFilter = new CompoundFilter(true);
+ filters.add(new Object[]{compoundFilter, true});
+ }
+
+ //requireAll = FALSE
+ { // all fail
+ final CompoundFilter compoundFilter = new CompoundFilter(false);
+ compoundFilter.add(fail);
+ compoundFilter.add(fail);
+ compoundFilter.add(fail);
+ filters.add(new Object[]{compoundFilter, false});
+ }
+ { // one fail
+ final CompoundFilter compoundFilter = new CompoundFilter(false);
+ compoundFilter.add(pass);
+ compoundFilter.add(fail);
+ compoundFilter.add(pass);
+ filters.add(new Object[]{compoundFilter, true});
+ }
+ { // empty
+ final CompoundFilter compoundFilter = new CompoundFilter(false);
+ filters.add(new Object[]{compoundFilter, true});
+ }
+ return filters.iterator();
+ }
+
+ @Test(dataProvider = "testCompoundFilterProvider")
+ public void testCompoundFilter(final VariantContextFilter filter, final boolean shouldPass) {
+ Assert.assertEquals(filter.test(vc), shouldPass, filter.toString());
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/FilteringIteratorTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/FilteringIteratorTest.java
new file mode 100644
index 0000000..0964309
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/FilteringIteratorTest.java
@@ -0,0 +1,88 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFFileReader;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+
+/**
+ * Tests for testing the (VariantContext)FilteringIterator, and the HeterozygosityFilter
+ */
+
+public class FilteringIteratorTest {
+ final File testDir = new File("testdata/htsjdk/variant");
+
+ @DataProvider
+ public Object [][] filteringIteratorData() {
+ return new Object[][] {
+ {new HeterozygosityFilter(true, "NA00001"), 2},
+ {new HeterozygosityFilter(false, "NA00001"), 3},
+ {new HeterozygosityFilter(true, null), 2},
+ {new HeterozygosityFilter(false, null), 3},
+ {new AllPassFilter(), 5},
+ {new HeterozygosityFilter(true, "NA00002"), 4},
+ {new HeterozygosityFilter(false, "NA00002"), 1},
+ };
+ }
+
+ @Test(dataProvider = "filteringIteratorData")
+ public void testFilteringIterator(final VariantContextFilter filter, final int expectedCount) {
+
+ final File vcf = new File(testDir,"ex2.vcf");
+ final VCFFileReader vcfReader = new VCFFileReader(vcf, false);
+ final FilteringIterator filteringIterator = new FilteringIterator(vcfReader.iterator(), filter);
+ int count = 0;
+
+ for(final VariantContext vc : filteringIterator) {
+ count++;
+ }
+
+ Assert.assertEquals(count, expectedCount);
+ }
+
+ @DataProvider
+ public Object [][] badSampleData() {
+ return new Object[][] {
+ {"ex2.vcf", "DOES_NOT_EXIST"},
+ {"breakpoint.vcf", null},
+ };
+ }
+
+ @Test(dataProvider = "badSampleData", expectedExceptions = IllegalArgumentException.class)
+ public void testMissingSample(final String file, final String sample) {
+
+ final File vcf = new File(testDir, file);
+ final VCFFileReader vcfReader = new VCFFileReader(vcf, false);
+ final HeterozygosityFilter heterozygosityFilter = new HeterozygosityFilter(true, sample);
+
+ new FilteringIterator(vcfReader.iterator(), heterozygosityFilter).next();
+ }
+}
+
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilterTest.java
new file mode 100644
index 0000000..809133f
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/GenotypeQualityFilterTest.java
@@ -0,0 +1,105 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.GenotypeBuilder;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+public class GenotypeQualityFilterTest {
+
+ Allele refA = Allele.create("A", true);
+ Allele G = Allele.create("G", false);
+
+ @DataProvider
+ public Iterator<Object[]> genotypeProvider() {
+
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G));
+ final GenotypeBuilder gt_builder = new GenotypeBuilder("test").alleles(Arrays.asList(refA, G));
+ final List<Object[]> variants = new ArrayList<Object[]>(10);
+
+ //without gq
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.make()).make(), null, false});
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.make()).make(), "test", false});
+
+ //without sample
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ( 1).make()).make(), null, false});
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ(10).make()).make(), null, true});
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ(20).make()).make(), null, true});
+
+ //with sample
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ( 1).make()).make(), "test", false});
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ(10).make()).make(), "test", true});
+ variants.add(new Object[]{vc_builder.genotypes(gt_builder.GQ(20).make()).make(), "test", true});
+
+ return variants.iterator();
+ }
+
+ @Test(dataProvider = "genotypeProvider")
+ public void testHetFilter(final VariantContext vc, final String sample, final boolean shouldPass) {
+ final GenotypeQualityFilter gqFilter = getFilter(sample);
+
+ Assert.assertEquals(gqFilter.test(vc), shouldPass, vc.toString());
+ }
+
+ @DataProvider(name = "badSamplesProvider")
+ public Iterator<Object[]> badSamplesProvider() {
+
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G));
+ final GenotypeBuilder gt_builder = new GenotypeBuilder();
+ final List<Object[]> hets = new ArrayList<Object[]>(10);
+
+ hets.add(new Object[]{vc_builder.make(), null});
+ hets.add(new Object[]{vc_builder.genotypes(Arrays.asList(gt_builder.name("test1").make(), gt_builder.name("test2").make())).make(), "notNull"});
+ hets.add(new Object[]{vc_builder.genotypes(Collections.singleton(gt_builder.name("This").make())).make(), "That"});
+
+ return hets.iterator();
+ }
+
+ @Test(dataProvider = "badSamplesProvider", expectedExceptions = IllegalArgumentException.class)
+ public void testbadSample(final VariantContext vc, final String sample) {
+ final GenotypeQualityFilter gqFilter = getFilter(sample);
+
+ //should fail
+ gqFilter.test(vc);
+ }
+
+ private GenotypeQualityFilter getFilter(String sample){
+ if (sample == null) {
+ return new GenotypeQualityFilter(10);
+ } else {
+ return new GenotypeQualityFilter(10, sample);
+ }
+ }
+}
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java
new file mode 100644
index 0000000..5ceed9f
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java
@@ -0,0 +1,128 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.GenotypeBuilder;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+public class HeterozygosityFilterTest {
+
+ Allele refA = Allele.create("A", true);
+ Allele G = Allele.create("G", false);
+
+ @DataProvider(name = "Hets")
+ public Iterator<Object[]> hetsProvider() {
+
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G));
+ final GenotypeBuilder gt_builder = new GenotypeBuilder("test");
+ final List<Object[]> hets = new ArrayList<Object[]>(10);
+
+ hets.add(new Object[]{vc_builder.genotypes(gt_builder.alleles(Arrays.asList(refA, G)).make()).make(), null, true});
+ hets.add(new Object[]{vc_builder.genotypes(gt_builder.alleles(Arrays.asList(refA, G)).make()).make(), "test", true});
+
+ //non-variant
+ hets.add(new Object[]{vc_builder.genotypes(gt_builder.alleles(Collections.singletonList(refA)).make()).make(), "test", false});
+ hets.add(new Object[]{vc_builder.genotypes(gt_builder.alleles(Collections.singletonList(refA)).make()).make(), null, false});
+
+ return hets.iterator();
+ }
+
+ @Test(dataProvider = "Hets")
+ public void testHetFilter(final VariantContext vc, final String sample, final boolean shouldPass) {
+ final HeterozygosityFilter hf = getFilter(shouldPass, sample);
+
+ Assert.assertTrue(hf.test(vc));
+ }
+
+ @DataProvider(name = "badSamplesProvider")
+ public Iterator<Object[]> badSamplesProvider() {
+
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G));
+ final GenotypeBuilder gt_builder = new GenotypeBuilder();
+ final List<Object[]> hets = new ArrayList<Object[]>(10);
+
+ hets.add(new Object[]{vc_builder.make(), null});
+ hets.add(new Object[]{vc_builder.genotypes(Arrays.asList(gt_builder.name("test1").make(), gt_builder.name("test2").make())).make(), "notNull"});
+ hets.add(new Object[]{vc_builder.genotypes(Collections.singleton(gt_builder.name("This").make())).make(), "That"});
+
+ return hets.iterator();
+ }
+
+ @Test(dataProvider = "badSamplesProvider", expectedExceptions = IllegalArgumentException.class)
+ public void testbadSample(final VariantContext vc, final String sample) {
+ final HeterozygosityFilter hf = getFilter(true, sample);
+
+ //should fail
+ hf.test(vc);
+ }
+
+ @DataProvider(name = "variantsProvider")
+ public Object[][] variantsProvider() {
+
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Arrays.asList(refA, G));
+ final GenotypeBuilder gt_builder = new GenotypeBuilder("test");
+ final List<VariantContext> vcs = new ArrayList<VariantContext>(10);
+
+ //hets:
+ vcs.add(vc_builder.genotypes(gt_builder.alleles(Arrays.asList(refA, G)).make()).make());
+ vcs.add(vc_builder.loc("chr1", 10, 10).genotypes(gt_builder.alleles(Arrays.asList(refA, G)).make()).make());
+
+ //non-variant:
+ vcs.add(vc_builder.loc("chr1", 20, 20).genotypes(gt_builder.alleles(Collections.singletonList(refA)).make()).make());
+ vcs.add(vc_builder.loc("chr1", 30, 30).genotypes(gt_builder.alleles(Collections.singletonList(refA)).make()).make());
+
+ return new Object[][]{new Object[]{vcs.iterator(), new int[]{1, 10}}};
+ }
+
+ @Test(dataProvider = "variantsProvider")
+ public void testFilteringIterator(final Iterator<VariantContext> vcs, final int[] passingPositions) {
+ final Iterator<VariantContext> filteringIterator = new FilteringIterator(vcs, new HeterozygosityFilter(true, "test"));
+
+ int i = 0;
+ while (filteringIterator.hasNext()) {
+ final VariantContext vc = filteringIterator.next();
+ Assert.assertTrue(i < passingPositions.length);
+ Assert.assertEquals(vc.getStart(), passingPositions[i++]);
+ }
+ }
+
+ private HeterozygosityFilter getFilter(final boolean shouldPass, String sample) {
+ if (sample == null) {
+ return new HeterozygosityFilter(shouldPass);
+ } else {
+ return new HeterozygosityFilter(shouldPass, sample);
+ }
+ }
+}
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/PassingVariantFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/PassingVariantFilterTest.java
new file mode 100644
index 0000000..3cbb60c
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/PassingVariantFilterTest.java
@@ -0,0 +1,46 @@
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Created by farjoun on 9/10/15.
+ */
+public class PassingVariantFilterTest {
+ Allele refA = Allele.create("A", true);
+ Allele G = Allele.create("G", false);
+
+ @DataProvider()
+ public Iterator<Object[]> variantProvider() {
+
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("test", "chr1", 1, 1, Arrays.asList(refA, G));
+ final List<Object[]> variants = new ArrayList<Object[]>(10);
+
+ // unfiltered
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G)).make(), true});
+ // passing
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G)).passFilters().make(), true});
+
+ // failing
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G)).filters(Collections.singleton("FILTER")).make(), false});
+
+ return variants.iterator();
+ }
+
+ @Test(dataProvider = "variantProvider")
+ public void testPassingVariantFilter(final VariantContext vc, final boolean shouldPass) {
+ final PassingVariantFilter passingVariantFilter = new PassingVariantFilter();
+
+ Assert.assertEquals(passingVariantFilter.test(vc), shouldPass, vc.toString());
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/SnpFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/SnpFilterTest.java
new file mode 100644
index 0000000..74f1bb5
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/SnpFilterTest.java
@@ -0,0 +1,54 @@
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Created by farjoun on 9/9/15.
+ */
+public class SnpFilterTest {
+ Allele refA = Allele.create("A", true);
+ Allele refAG = Allele.create("AG", true);
+
+ Allele G = Allele.create("G", false);
+ Allele T = Allele.create("T", false);
+ Allele AG = Allele.create("AG", false);
+ Allele AT = Allele.create("AT", false);
+ Allele star = Allele.create("<*>", false);
+
+
+ @DataProvider()
+ public Iterator<Object[]> variantProvider() {
+
+ final VariantContextBuilder vc_builder = new VariantContextBuilder("testCode", "chr1", 1, 1, Collections.<Allele>emptyList());
+ final List<Object[]> variants = new ArrayList<Object[]>(10);
+
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G)) .make(), true}); // SNP
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G, T)) .make(), true}); // SNP
+
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, AG)) .make(), false}); // INDEL
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, G, AG)) .make(), false}); // MIXED
+ variants.add(new Object[]{vc_builder.alleles(Arrays.asList(refA, star)) .make(), false}); // SYMBOLIC
+ variants.add(new Object[]{vc_builder.stop(2).alleles(Arrays.asList(refAG, T)) .make(), false}); // INDEL
+ variants.add(new Object[]{vc_builder.stop(2).alleles(Arrays.asList(refAG, AT)).make(), false}); // MNP
+
+ return variants.iterator();
+ }
+
+ @Test(dataProvider = "variantProvider")
+ public void testSnpFilter(final VariantContext vc, final boolean shouldPass) {
+ final SnpFilter snpFilter = new SnpFilter();
+
+ Assert.assertEquals(snpFilter.test(vc), shouldPass, vc.toString());
+ }
+}
diff --git a/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java b/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java
index 5a8705e..9b8f6e8 100644
--- a/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java
@@ -349,4 +349,14 @@ public class VariantContextWriterBuilderUnitTest extends VariantBaseTest {
.setOption(Options.INDEX_ON_THE_FLY)
.build();
}
+
+ @Test
+ public void testClearOptions() {
+ // Verify that clearOptions doesn't have a side effect of carrying previously set options
+ // forward to subsequent builders
+ VariantContextWriterBuilder vcwb = new VariantContextWriterBuilder();
+ vcwb.clearOptions().setOption(Options.INDEX_ON_THE_FLY);
+ final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().clearOptions();
+ Assert.assertTrue(builder.options.isEmpty());
+ }
}
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
index 0bf5f74..1a53cd6 100644
--- a/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
+++ b/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
@@ -34,7 +34,6 @@ import htsjdk.tribble.readers.LineReaderUtil;
import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.variantcontext.VariantContext;
import org.testng.Assert;
-import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.*;
@@ -127,20 +126,17 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
final VCFHeader header = (VCFHeader) codec.readHeader(vcfIterator).getHeaderValue();
}
- @DataProvider(name = "HiSeqVCFHeaderDataProvider")
- public Object[][] getHiSeqVCFHeaderData() {
+ private VCFHeader getHiSeqVCFHeader() {
final File vcf = new File("testdata/htsjdk/variant/HiSeq.10000.vcf");
final VCFFileReader reader = new VCFFileReader(vcf, false);
final VCFHeader header = reader.getFileHeader();
reader.close();
-
- return new Object[][] {
- { header }
- };
+ return header;
}
- @Test(dataProvider = "HiSeqVCFHeaderDataProvider")
- public void testVCFHeaderAddInfoLine( final VCFHeader header ) {
+ @Test
+ public void testVCFHeaderAddInfoLine() {
+ final VCFHeader header = getHiSeqVCFHeader();
final VCFInfoHeaderLine infoLine = new VCFInfoHeaderLine("TestInfoLine", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test info line");
header.addMetaDataLine(infoLine);
@@ -154,8 +150,9 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
Assert.assertFalse(header.getOtherHeaderLines().contains(infoLine), "TestInfoLine present in other header lines");
}
- @Test(dataProvider = "HiSeqVCFHeaderDataProvider")
- public void testVCFHeaderAddFormatLine( final VCFHeader header ) {
+ @Test
+ public void testVCFHeaderAddFormatLine() {
+ final VCFHeader header = getHiSeqVCFHeader();
final VCFFormatHeaderLine formatLine = new VCFFormatHeaderLine("TestFormatLine", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "test format line");
header.addMetaDataLine(formatLine);
@@ -169,8 +166,9 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
Assert.assertFalse(header.getOtherHeaderLines().contains(formatLine), "TestFormatLine present in other header lines");
}
- @Test(dataProvider = "HiSeqVCFHeaderDataProvider")
- public void testVCFHeaderAddFilterLine( final VCFHeader header ) {
+ @Test
+ public void testVCFHeaderAddFilterLine() {
+ final VCFHeader header = getHiSeqVCFHeader();
final VCFFilterHeaderLine filterLine = new VCFFilterHeaderLine("TestFilterLine");
header.addMetaDataLine(filterLine);
@@ -184,8 +182,9 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
Assert.assertFalse(header.getOtherHeaderLines().contains(filterLine), "TestFilterLine present in other header lines");
}
- @Test(dataProvider = "HiSeqVCFHeaderDataProvider")
- public void testVCFHeaderAddContigLine( final VCFHeader header ) {
+ @Test
+ public void testVCFHeaderAddContigLine() {
+ final VCFHeader header = getHiSeqVCFHeader();
final VCFContigHeaderLine contigLine = new VCFContigHeaderLine("<ID=chr1,length=1234567890,assembly=FAKE,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\">", VCFHeaderVersion.VCF4_0, "chr1", 0);
header.addMetaDataLine(contigLine);
@@ -198,8 +197,9 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
Assert.assertFalse(header.getOtherHeaderLines().contains(contigLine), "Test contig line present in other header lines");
}
- @Test(dataProvider = "HiSeqVCFHeaderDataProvider")
- public void testVCFHeaderAddOtherLine( final VCFHeader header ) {
+ @Test
+ public void testVCFHeaderAddOtherLine() {
+ final VCFHeader header = getHiSeqVCFHeader();
final VCFHeaderLine otherLine = new VCFHeaderLine("TestOtherLine", "val");
header.addMetaDataLine(otherLine);
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram
new file mode 100644
index 0000000..59f11d2
Binary files /dev/null and b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram differ
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram.bai b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram.bai
new file mode 100644
index 0000000..fcb31fc
Binary files /dev/null and b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.cram.bai differ
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.dict b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.dict
new file mode 100644
index 0000000..7f41717
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.dict
@@ -0,0 +1,2 @@
+ at HD VN:1.4 SO:unsorted
+ at SQ SN:Shelly LN:20 M5:7ddd8a4b4f2c1dec43476a738b1a9b72 UR:file:/Users/edwardk/Documents/htsjdk/testdata/htsjdk/samtools/cram/auxf.fa
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa
new file mode 100644
index 0000000..63e0c92
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa
@@ -0,0 +1,2 @@
+>Shelly
+GCTAGCTCAGAAAAAAAAAA
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa.fai b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa.fai
new file mode 100644
index 0000000..3deea7f
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fa.fai
@@ -0,0 +1 @@
+Shelly 20 8 20 21
diff --git a/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fasta b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fasta
new file mode 100644
index 0000000..11d25dd
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram/CRAMException/testContigNotInRef.fasta
@@ -0,0 +1,2 @@
+>Sheila
+GCTAGCTCAGAAAAAAAAAA
diff --git a/testdata/htsjdk/samtools/cram_tlen.fasta b/testdata/htsjdk/samtools/cram_tlen.fasta
new file mode 100644
index 0000000..01b8f8a
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram_tlen.fasta
@@ -0,0 +1,41 @@
+>chr1
+TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC
+TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA
+A
+>chr2
+CATCTCTACAAGCGCGTCCTACCAGACGCGCTTCCGATCTGAGAGCATAC
+TTTTCATTGGATTCCAGCACAACTCCATTTTTGATCCACTTGACACCTTT
+T
+>chr3
+CGTATGCGCTTTTTATGTCGCCCACAGTGCCTAGTATAGCCCCTGCTAAT
+AAAAAGAGATGAATACGTTTACTTAAAAAACTGAAACTAGGAATGTGCAA
+A
+>chr4
+CGTGATACCAACTCATGTTCACAGCCAAAGCCTGAAGCTGTCTATTATAT
+TTCTCAACCATAAACTTTTGCCTCAGGCATCCGCAGAATGGTTTGCAGCC
+C
+>chr5
+NTCTCATTTAAAAATGGTTATAAAAACATTTATGCTGAAAAGGTGAAGTT
+CATTAATGAACAGGCTGACTGTCTCACTATCGCGTTCGCAAGACGTTATC
+T
+>chr6
+NAATTGTTCTTAGTTTCTCGGTTTATGTGCTCTTCCAGGTGGGTAACACA
+ATAATGGCCTTCCAGATCGTAAGAGCGACGTGTGTTGCACCAGTGTCGAT
+C
+>chr7
+CAACAGAAGGGGGGATCTGTGTTTGTGTTTCGGATTTCCTGCTGAAAAGG
+TTTTCGGGTCCCCCCCCCATCCCGATTTCCTTCCGCAGCTTACCTCCCGA
+AACGCGGCATCCCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCA
+GCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCCAGAGCATA
+CACAACAGAAGGGGGGATCTGTGTTTGTGTTTCGGATTTCCTGCTGAAAA
+GGTTTTCGGGTCCCCCCCCCATCCCGATTTCCTTCCGCAGCTTACCTCCC
+GAAACGCGGCATCCCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGG
+CAGCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCCAGAGCA
+CAGCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCCAGAGCA
+TACA
+>chr8
+CACATCGTGAATCTTACAATCTGCGGTTTCAGATGTGGAGCGATGTGTGA
+GAGATTGAGCAACTGATCTGAAAAGCAGACACAGCTATTCCTAAGATGAC
+CCCAGGTTCAAATGTGCAGCCCCTTTTGAGAGATTTTTTTTTTGGGCTGG
+AAAAAAGACACAGCTATTCCTAAGATGACAAGATCAGAAAAAAAGTCAAG
+CA
\ No newline at end of file
diff --git a/testdata/htsjdk/samtools/cram_tlen.fasta.fai b/testdata/htsjdk/samtools/cram_tlen.fasta.fai
new file mode 100644
index 0000000..4f7bd29
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram_tlen.fasta.fai
@@ -0,0 +1,8 @@
+chr1 101 6 50 51
+chr2 101 116 50 51
+chr3 101 226 50 51
+chr4 101 336 50 51
+chr5 101 446 50 51
+chr6 101 556 50 51
+chr7 454 666 50 51
+chr8 202 1136 50 51
diff --git a/testdata/htsjdk/samtools/cram_tlen_reads.sorted.sam b/testdata/htsjdk/samtools/cram_tlen_reads.sorted.sam
new file mode 100644
index 0000000..0d1947e
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram_tlen_reads.sorted.sam
@@ -0,0 +1,19 @@
+ at HD VN:1.5 SO:coordinate
+ at SQ SN:chr1 LN:101
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:454
+ at SQ SN:chr8 LN:202
+ at RG ID:0 SM:Hi,Mom! PL:ILLUMINA
+ at PG ID:1 VN:2.0 PN:Hey!
+both_reads_align_clip_marked 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
+both_reads_present_only_first_aligns 89 chr7 1 255 101M * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
+read_2_too_many_gaps 83 chr7 1 255 101M = 302 201 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
+both_reads_align_clip_adapter 147 chr7 16 255 101M = 21 -96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
+both_reads_align_clip_adapter 99 chr7 21 255 101M = 16 96 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0
+both_reads_align_clip_marked 163 chr7 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
+read_2_too_many_gaps 163 chr7 302 255 10M1D10M5I76M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
+both_reads_present_only_first_aligns 165 * 0 0 * chr7 1 0 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 RG:Z:0
diff --git a/testdata/htsjdk/samtools/cram_with_bai_index.cram b/testdata/htsjdk/samtools/cram_with_bai_index.cram
new file mode 100644
index 0000000..5609d5e
Binary files /dev/null and b/testdata/htsjdk/samtools/cram_with_bai_index.cram differ
diff --git a/testdata/htsjdk/samtools/cram_with_bai_index.cram.bai b/testdata/htsjdk/samtools/cram_with_bai_index.cram.bai
new file mode 100644
index 0000000..db53e08
Binary files /dev/null and b/testdata/htsjdk/samtools/cram_with_bai_index.cram.bai differ
diff --git a/testdata/htsjdk/samtools/cram_with_crai_index.cram b/testdata/htsjdk/samtools/cram_with_crai_index.cram
new file mode 100644
index 0000000..5609d5e
Binary files /dev/null and b/testdata/htsjdk/samtools/cram_with_crai_index.cram differ
diff --git a/testdata/htsjdk/samtools/cram_with_crai_index.cram.crai b/testdata/htsjdk/samtools/cram_with_crai_index.cram.crai
new file mode 100644
index 0000000..309f06f
Binary files /dev/null and b/testdata/htsjdk/samtools/cram_with_crai_index.cram.crai differ
diff --git a/testdata/htsjdk/samtools/hg19mini.fasta b/testdata/htsjdk/samtools/hg19mini.fasta
new file mode 100644
index 0000000..038dd84
--- /dev/null
+++ b/testdata/htsjdk/samtools/hg19mini.fasta
@@ -0,0 +1,804 @@
+>1 dna:chromosome chromosome:GRCh37:1:1:16000:1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTA
+ACCCTAACCCTAACCCTAACCCTAACCCAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAAC
+CCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCC
+TAACCCTAAACCCTAAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCAACCCCAACCCCAACCC
+CAACCCTAACCCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCC
+TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTC
+AGCCGGCCCGCCCGCCCGGGTCTGACCTGAGGAGAACTGTGCTCCGCCTTCAGAGTACCACCGAAATCTGTGCAGAGGAC
+AACGCAGCTCCGCCCTCGCGGTGCTCTCCGGGTCTGTGCTGAGGAGAACGCAACTCCGCCGTTGCAAAGGCGCGCCGCGC
+CGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGC
+GCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGA
+CACATGCTAGCGCGTCGGGGTGGAGGCGTGGCGCAGGCGCAGAGAGGCGCGCCGCGCCGGCGCAGGCGCAGAGACACATG
+CTACCGCGTCCAGGGGTGGAGGCGTGGCGCAGGCGCAGAGAGGCGCACCGCGCCGGCGCAGGCGCAGAGACACATGCTAG
+CGCGTCCAGGGGTGGAGGCGTGGCGCAGGCGCAGAGACGCAAGCCTACGGGCGGGGGTTGGGGGGGCGTGTGTTGCAGGA
+GCAAAGTCGCACGGCGCCGGGCTGGGGCGGGGGGAGGGTGGCGCCGTGCACGCGCAGAAACTCACGTCACGGTGGCGCGG
+CGCAGAGACGGGTAGAACCTCAGTAATCCGAAAAGCCGGGATCGACCGCCCCTTGCTTGCAGCCGGGCACTACAGGACCC
+GCTTGCTCACGGTGCTGTGCCAGGGCGCCCCCTGCTGGCGACTAGGGCAACTGCAGGGCTCTCTTGCTTAGAGTGGTGGC
+CAGCGCCCCCTGCTGGCGCCGGGGCACTGCAGGGCCCTCTTGCTTACTGTATAGTGGTGGCACGCCGCCTGCTGGCAGCT
+AGGGACATTGCAGGGTCCTCTTGCTCAAGGTGTAGTGGCAGCACGCCCACCTGCTGGCAGCTGGGGACACTGCCGGGCCC
+TCTTGCTCCAACAGTACTGGCGGATTATAGGGAAACACCCGGAGCATATGCTGTTTGGTCTCAGTAGACTCCTAAATATG
+GGATTCCTGGGTTTAAAAGTAAAAAATAAATATGTTTAATTTGTGAACTGATTACCATCAGAATTGTACTGTTCTGTATC
+CCACCAGCAATGTCTAGGAATGCCTGTTTCTCCACAAAGTGTTTACTTTTGGATTTTTGCCAGTCTAACAGGTGAAGCCC
+TGGAGATTCTTATTAGTGATTTGGGCTGGGGCCTGGCCATGTGTATTTTTTTAAATTTCCACTGATGATTTTGCTGCATG
+GCCGGTGTTGAGAATGACTGCGCAAATTTGCCGGATTTCCTTTGCTGTTCCTGCATGTAGTTTAAACGAGATTGCCAGCA
+CCGGGTATCATTCACCATTTTTCTTTTCGTTAACTTGCCGTCAGCCTTTTCTTTGACCTCTTCTTTCTGTTCATGTGTAT
+TTGCTGTCTCTTAGCCCAGACTTCCCGTGTCCTTTCCACCGGGCCTTTGAGAGGTCACAGGGTCTTGATGCTGTGGTCTT
+CATCTGCAGGTGTCTGACTTCCAGCAACTGCTGGCCTGTGCCAGGGTGCAAGCTGAGCACTGGAGTGGAGTTTTCCTGTG
+GAGAGGAGCCATGCCTAGAGTGGGATGGGCCATTGTTCATCTTCTGGCCCCTGTTGTCTGCATGTAACTTAATACCACAA
+CCAGGCATAGGGGAAAGATTGGAGGAAAGATGAGTGAGAGCATCAACTTCTCTCACAACCTAGGCCAGTAAGTAGTGCTT
+GTGCTCATCTCCTTGGCTGTGATACGTGGCCGGCCCTCGCTCCAGCAGCTGGACCCCTACCTGCCGTCTGCTGCCATCGG
+AGCCCAAAGCCGGGCTGTGACTGCTCAGACCAGCCGGCTGGAGGGAGGGGCTCAGCAGGTCTGGCTTTGGCCCTGGGAGA
+GCAGGTGGAAGATCAGGCAGGCCATCGCTGCCACAGAACCCAGTGGATTGGCCTAGGTGGGATCTCTGAGCTCAACAAGC
+CCTCTCTGGGTGGTAGGTGCAGAGACGGGAGGGGCAGAGCCGCAGGCACAGCCAAGAGGGCTGAAGAAATGGTAGAACGG
+AGCAGCTGGTGATGTGTGGGCCCACCGGCCCCAGGCTCCTGTCTCCCCCCAGGTGTGTGGTGATGCCAGGCATGCCCTTC
+CCCAGCATCAGGTCTCCAGAGCTGCAGAAGACGACGGCCGACTTGGATCACACTCTTGTGAGTGTCCCCAGTGTTGCAGA
+GGTGAGAGGAGAGTAGACAGTGAGTGGGAGTGGCGTCGCCCCTAGGGCTCTACGGGGCCGGCGTCTCCTGTCTCCTGGAG
+AGGCTTCGATGCCCCTCCACACCCTCTTGATCTTCCCTGTGATGTCATCTGGAGCCCTGCTGCTTGCGGTGGCCTATAAA
+GCCTCCTAGTCTGGCTCCAAGGCCTGGCAGAGTCTTTCCCAGGGAAAGCTACAAGCAGCAAACAGTCTGCATGGGTCATC
+CCCTTCACTCCCAGCTCAGAGCCCAGGCCAGGGGCCCCCAAGAAAGGCTCTGGTGGAGAACCTGTGCATGAAGGCTGTCA
+ACCAGTCCATAGGCAAGCCTGGCTGCCTCCAGCTGGGTCGACAGACAGGGGCTGGAGAAGGGGAGAAGAGGAAAGTGAGG
+TTGCCTGCCCTGTCTCCTACCTGAGGCTGAGGAAGGAGAAGGGGATGCACTGTTGGGGAGGCAGCTGTAACTCAAAGCCT
+TAGCCTCTGTTCCCACGAAGGCAGGGCCATCAGGCACCAAAGGGATTCTGCCAGCATAGTGCTCCTGGACCAGTGATACA
+CCCGGCACCCTGTCCTGGACACGCTGTTGGCCTGGATCTGAGCCCTGGTGGAGGTCAAAGCCACCTTTGGTTCTGCCATT
+GCTGCTGTGTGGAAGTTCACTCCTGCCTTTTCCTTTCCCTAGAGCCTCCACCACCCCGAGATCACATTTCTCACTGCCTT
+TTGTCTGCCCAGTTTCACCAGAAGTAGGCCTCTTCCTGACAGGCAGCTGCACCACTGCCTGGCGCTGTGCCCTTCCTTTG
+CTCTGCCCGCTGGAGACGGTGTTTGTCATGGGCCTGGTCTGCAGGGATCCTGCTACAAAGGTGAAACCCAGGAGAGTGTG
+GAGTCCAGAGTGTTGCCAGGACCCAGGCACAGGCATTAGTGCCCGTTGGAGAAAACAGGGGAATCCCGAAGAAATGGTGG
+GTCCTGGCCATCCGTGAGATCTTCCCAGGGCAGCTCCCCTCTGTGGAATCCAATCTGTCTTCCATCCTGCGTGGCCGAGG
+GCCAGGCTTCTCACTGGGCCTCTGCAGGAGGCTGCCATTTGTCCTGCCCACCTTCTTAGAAGCGAGACGGAGCAGACCCA
+TCTGCTACTGCCCTTTCTATAATAACTAAAGTTAGCTGCCCTGGACTATTCACCCCCTAGTCTCAATTTAAGAAGATCCC
+CATGGCCACAGGGCCCCTGCCTGGGGGCTTGTCACCTCCCCCACCTTCTTCCTGAGTCATTCCTGCAGCCTTGCTCCCTA
+ACCTGCCCCACAGCCTTGCCTGGATTTCTATCTCCCTGGCTTGGTGCCAGTTCCTCCAAGTCGATGGCACCTCCCTCCCT
+CTCAACCACTTGAGCAAACTCCAAGACATCTTCTACCCCAACACCAGCAATTGTGCCAAGGGCCATTAGGCTCTCAGCAT
+GACTATTTTTAGAGACCCCGTGTCTGTCACTGAAACCTTTTTTGTGGGAGACTATTCCTCCCATCTGCAACAGCTGCCCC
+TGCTGACTGCCCTTCTCTCCTCCCTCTCATCCCAGAGAAACAGGTCAGCTGGGAGCTTCTGCCCCCACTGCCTAGGGACC
+AACAGGGGCAGGAGGCAGTCACTGACCCCGAGACGTTTGCATCCTGCACAGCTAGAGATCCTTTATTAAAAGCACACTGT
+TGGTTTCTGCTCAGTTCTTTATTGATTGGTGTGCCGTTTTCTCTGGAAGCCTCTTAAGAACACAGTGGCGCAGGCTGGGT
+GGAGCCGTCCCCCCATGGAGCACAGGCAGACAGAAGTCCCCGCCCCAGCTGTGTGGCCTCAAGCCAGCCTTCCGCTCCTT
+GAAGCTGGTCTCCACACAGTGCTGGTTCCGTCACCCCCTCCCAAGGAAGTAGGTCTGAGCAGCTTGTCCTGGCTGTGTCC
+ATGTCAGAGCAACGGCCCAAGTCTGGGTCTGGGGGGGAAGGTGTCATGGAGCCCCCTACGATTCCCAGTCGTCCTCGTCC
+TCCTCTGCCTGTGGCTGCTGCGGTGGCGGCAGAGGAGGGATGGAGTCTGACACGCGGGCAAAGGCTCCTCCGGGCCCCTC
+ACCAGCCCCAGGTCCTTTCCCAGAGATGCCTGGAGGGAAAAGGCTGAGTGAGGGTGGTTGGTGGGAAACCCTGGTTCCCC
+CAGCCCCCGGAGACTTAAATACAGGAAGAAAAAGGCAGGACAGAATTACAAGGTGCTGGCCCAGGGCGGGCAGCGGCCCT
+GCCTCCTACCCTTGCGCCTCATGACCAGCTTGTTGAAGAGATCCGACATCAAGTGCCCACCTTGGCTCGTGGCTCTCACT
+GCAACGGGAAAGCCACAGACTGGGGTGAAGAGTTCAGTCACATGCGACCGGTGACTCCCTGTCCCCACCCCCATGACACT
+CCCCAGCCCTCCAAGGCCACTGTGTTTCCCAGTTAGCTCAGAGCCTCAGTCGATCCCTGACCCAGCACCGGGCACTGATG
+AGACAGCGGCTGTTTGAGGAGCCACCTCCCAGCCACCTCGGGGCCAGGGCCAGGGTGTGCAGCACCACTGTACAATGGGG
+AAACTGGCCCAGAGAGGTGAGGCAGCTTGCCTGGGGTCACAGAGCAAGGCAAAAGCAGCGCTGGGTACAAGCTCAAAACC
+ATAGTGCCCAGGGCACTGCCGCTGCAGGCGCAGGCATCGCATCACACCAGTGTCTGCGTTCACAGCAGGCATCATCAGTA
+GCCTCCAGAGGCCTCAGGTCCAGTCTCTAAAAATATCTCAGGAGGCTGCAGTGGCTGACCATTGCCTTGGACCGCTCTTG
+GCAGTCGAAGAAGATTCTCCTGTCAGTTTGAGCTGGGTGAGCTTAGAGAGGAAAGCTCCACTATGGCTCCCAAACCAGGA
+AGGAGCCATAGCCCAGGCAGGAGGGCTGAGGACCTCTGGTGGCGGCCCAGGGCTTCCAGCATGTGCCCTAGGGGAAGCAG
+GGGCCAGCTGGCAAGAGCAGGGGGTGGGCAGAAAGCACCCGGTGGACTCAGGGCTGGAGGGGAGGAGGCGATCTTGCCCA
+AGGCCCTCCGACTGCAAGCTCCAGGGCCCGCTCACCTTGCTCCTGCTCCTTCTGCTGCTGCTTCTCCAGCTTTCGCTCCT
+TCATGCTGCGCAGCTTGGCCTTGCCGATGCCCCCAGCTTGGCGGATGGACTCTAGCAGAGTGGCCAGCCACCGGAGGGGT
+CAACCACTTCCCTGGGAGCTCCCTGGACTGGAGCCGGGAGGTGGGGAACAGGGCAAGGAGGAAAGGCTGCTCAGGCAGGG
+>2 dna:chromosome chromosome:GRCh37:2:1:16000:1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+CGTATCCCACACACCACACCCACACACCACACCCACACACACCCACACCCACACCCACACACACCACACCCACACACCAC
+ACCCACACCCACACACCACACCCACACCACACCCACACACCACACACCACACCCACACCCACACACACCACACCCACACA
+CCACACCCACACACACCCTAACCCTAACCCCTAACCCCTAACCCTAACCCTACCCGAACCCTAACCCTAACCCTAACCCC
+TAACCCTAACCCCTAACCCTAACCCTAACCGTAACCCTAACCCTTTACCCTAACCCGAACCCCTAACCCCTAACCCCTAA
+CCCTTAACCCTAACCCTTAACCCTGACCCTGACCCTGACCGTGACCCTGACCCTAACCCGAACCCGAACCCGAACCCCGA
+ACCCCGAACCCCGAACCCCAACCCCAACCCCAACCCCAACCCTAACCCCTCACCCTCACCCTCGACCCCCGACCCCCGAC
+CCCCGACCCCCACCCCGAACCCGACCCCGACCCCGACCCAAACCCTAACCCTAAAACCCTAACCCTAGCCCTAGCCCTAG
+CCCTAGCCCTAACCCCTAACCCCTAACCCTAAGCCGAAGCCTAACTCGTGTCTGACTTTGAGTATTCAGTGCTGCAAACA
+GGAAGTATTTTATTCACCGTCGATGCGGCCCCGAGGGGTCCCAAAGCGAGGCAGTGCCCCCAAACTCTGTCCTGAGGAGA
+ATGCTGCTTCGCCTTTACGGTGTCCACCGGGTGTGTGCTCAGCAAAACGCAGCTCCGCCTTCGCGGTGCCCGTGGCCCAC
+CCGCCCGGGTCTGTGGTGAAGAGAACGCAGCTCCTAGTCGCAAAGGCACCGCGCCCGCGCAGGCGCAGAGAGGCGCACCG
+CGCCCGCGCAGGCGCAGAGAGGCGCACCGCGCCCGCGCAGGCGCAGAGAGGCGCACCGCGCCCGCGCAGGCGCAGAGAGG
+CGCACCGCGCCCGCGCAGGCGCAGAGAGGCGCACCGCGCCCGCGCAGGCGCAGAGAGGCGCACCGCGCCCGCGCAGGCGC
+AGAGAGGCGCACCGCGCCCGCGCAGGCGCAGAGAGGCGCACCGCGCCCGCGCAGGCGCAGAGAGGCGCACCGCGCCCGCG
+CAGGCGCAGAGAGGCGCACCGCGCCCGCGCAGGCGCACACAGGCGCACCGCGCCCGCGCAGGCGCACACAGGCGCACCGC
+GCCCGCGCAGGCGCACACAGGCGCACCGCGCCCGCGCAGGCGCACACAGGCGCACCGCGCCCGCGCAGGCGCACACAGGC
+GCACCGCGCCCGCGCAGGCGCACAGAGGGTCGCTGGGCAGGGGTTGGGGGTCGTACTGCAGGTGCACAGCTGCATAAGCG
+CACAGTCGCAAGCCGCCAGGCGCGGAGCGTGGGGGTGGCGGGGTGCAGGCGCAGAGACGGACGTCCCCGGGGGCGCGGCA
+CAGAGACAGGTGGAACCTCAATAATCCGAAAAGCCGGGCTCGGGAACCCCCTGCTTGCAACCGGGCACTACAGGATCCGC
+TTGCCCACGGTGCTCTGCCAGTGCGCCCCTTGCTGGCCACTAGGGCAACTGCAGGGCTATCTTGCTTACAGTGGTGTCCA
+GCGCCCTCTGCTGGCGTCGGAGCATTGCAGGGCTCTCTTGCTCGCAGTGTAGTGGCGGCACGCCGCCTGCTGGCAGCTAG
+GGACATTGCAGAGCCCTCTTGCTCACAGTGTAGTGGCAGCACGCCCGCCTCCTGGCAGCTAGGGACAGTGCCAGGCCCTC
+TTGCTCCAAGTGTAGTGGCAGCTGGCTCCCCCGCTGGCAGCTGGGGACACTGACGGGCCCTCTTGCTTGCAGTATAGTCG
+TCGCACGCCTTCTGGCCGCTGGCGGCAGTACAGGATCCTCTTGCTCACAGTGTAGGGCCCTCTTGCTCCCGGTGTGACGG
+CTGGCGTCCCCTACTGGCCGCCTCCTGCACCAATTAAAGTCGGAGCACCGGTTACGCCCCATCACTTCTGTAAATTCAAA
+CTGAAACGGAGCTATTAGTGGGGAGAGCTGATGTCCCAGTTCTTGTTTAACTTGGAAGAAAGATTTTCACCAAGAGGCAG
+TACAAAGATGACAGATAACTTCATTGAAAAGAAATACAGTGTAAACACCTTACTGTAGAAAAATAGGGAGGACAGGGCTG
+ATCGTGCATGAAAACAGCCTAAGAGTCTTGTGCAGGGAAGTTACTCTAACTGTAACTTACAACAACTTAGTAGATTATAC
+TTTTGTAAACAGAAGTGAAGCATTTATCTTTTTTCTTGCTTGATTATTTACATAATCAAGCAAAATCTAACAAAACAATA
+ATATTTTAACAATAATATTTTTAAAACAATAATAGTCTTACTTTGATTATGATCAAAAATGATGGTTACTACAGAGAGAA
+TTTTTATGTTTCAATGGAAAAGTATAACATGGCCAGGCATGGTTGCACATGCCTATAATTACAGCACTTTGGGAGGCCAG
+GAGTTCAACATCAGCCTGGGCAACATGGTGAAACCACGTCTCTACCAAAAATACAAAAATTAGATGGGCATGGTGGCATG
+TGCCTGTAGTCCCAGGTAATCAGGAGGCTGAGGAGGGAGGATCGTTTGCACCCGGGAAGTAGAGGTTGCAGTGAGCTGAG
+ATTGCACCTTTGCACTCCAGCCTAGGCGACAGAGCCAGATCCTGTCTCAAAAAAAATTTTTTTAAAGGAAAACTACAGCC
+ATTGTGGGTTATCAGATTCTAGTCTTGTTTCTTGTTTCTGGGCTATTTTTACCTCTTTGTAAACTGCATCCTGCCATCTG
+ATGAATTTTGTCCCACAATGATACTTGGGGAACAAGAAGCCAATTATTGTCTCTCCTACTAATGTATCTATTGTCAGTTA
+ATTTGAAGGTCTCCAACCCTGGAACAAAGTTAGAAGAGGAAGGTTCTGCTCCCCAAAATGCATAACCAAATTGTGGTACA
+TTCATGTAATGGAACACTATTTAGCCATAGAAACGAACAAGCTATCAACTCACACAAAGACATGAGTGAATCTTGCATGC
+ACATTGCTAAGTGGAAGAAGACAGTCTGAGGAGGATACACACAGTGTGACCTCATTTAATGAGACACTGGAGAAGGCAAA
+CTACACAGATGGGAAGCCATTGGCTCCATGGGGTGGGGGTTTGAAGCATTCCATATGATACTTTAATAGTGGGATATCTG
+CCACAATGCATTTGTCAAAATATGCAGAATTTTACAGCCATATGGTTAGAGCAAACTCTATTCAAATTAAATAAAATTAC
+TCAGGATGTGGAGTATCCCAGGACAGAATACATCATGTGAAAAAGCATTTATGCTACAAATTACTATGGTAATTATGCTA
+CAAATTTATGGTACCATAAATTACCATAGTAATTTGTAGCATAAATTTGTACTATGGTACAAATTACATGGGAGAGTGAA
+GGTGGGTTAAAACATTCATATTAAAGAACTTCCACTCAGATTGCAAGAAAAGAGAGAGGAATGGAGATGGTAGCACAAGT
+CCCTACAATAAAAGTAGATGTTTTGAGATCAGTTCTATTTGTTCTGACAAAAATTAAAGACAGAAACCAAAGTTTAGCCT
+GAGGCAACAATTAGTTGGGGAATAAGCCAGAGGCATATATGGCATAGACACATTTAAACATTTCTCTCATATTAATACAA
+ATACTAAAATGACATATCAATTGATTCCAAATAAAACAAATATTTAAAACATTTAATGAATAAACACTGGGGTCTACAGT
+AGTAGTTAAAGGAGATCTCACAAACAGGTTTGGTTTTTGAAGGTTAGAAATGATGGTCTAGAGAATTCATTTCATTCCAG
+AGACAGAAAGAGGAATATCTTGGGTTCCTTCAGGAATACATCTGGCTTTGCCTCATCTTTGTGTGTTTGAACTATGCATA
+CGGCAGAAGAAAACATGGGGGTTTCAGAGTTTTTTTTTTTTTTGAGACAGAGTCTTGCTCTGTTGCCCAGGCTGGAGTGC
+CGTGGTGTTATCTCGCCTCACTGCAACCTCTGCCTTCTGGGTTCACACCATTCTCCTGCCTCAGCCTCCGGAGTAGCTGG
+GACTACAGGTGCACCACACCTGGCTACTTTTTCTGTATTTTTTAGTAGAGATGGGGTTTCACCATGTTAGCCAGAATGGT
+CTAGTTCTCCTGACCTTATGATCCAACCGCCTCAGCCGCCAAGAATGATGGGATTACAGGGGTGAACCACCACGCCCGGC
+TAAGGATTTCACAGATTTAAGGTGCTAAAATCACTGGGTTCTCTAAGAAGCCTGGGATTCTTCTGCTGGAAAAATAAGTT
+TGTTGAGAAAAATGAGTTGGAGGAGGTTGTTATTGAAGTGAAGCAGAATTGTTTTTACTAATCTGCTTATTACCCACTCT
+GAAGTGTGGAAACAAATTTTTCATGCACAAGGTCATCTTACTGTTACTGGAATGCAGTGGAAAGAGAACAGATTAGTTTT
+TCTCTCTCAGAACACAACCACTAGAAACGTCCTATGTCAGATGAGATATTGCCCAGTTATTTTCAAAAGACTGAAAAATC
+CTGGATGTAAATGTTTGCTGCAAAATAAATACATGCTAGAAACAGAAGCATCTGGGTCACAGCTATATTAGAGCTACCTG
+TGTTCCCCTGTTACTGAGATTAAAACAAAAATGTCCAATAAAAATCATTCACAGTGTGGGAGAGGGGAAGTTGAAGGATG
+GAAAGGCCAGGCATAAAAAGATTTCAGAATTTCAGTCCATAAGGAAGTTGCTTTGTGCATTGTCTGTTGCTGTGTGCAAG
+GTGAAGGCTGGGGCAAGAAAACGTGCAGTAACAAGGGCTCCTTTGTCCATCTCACCTCTCTAGATACCAAGTTTCAGACA
+TGTTGCATTTTAATTGAAAAGTTGATATAACTTTTTTTAAAAGAATACTTGCAGTGCTTGAAGTGTAAAAAGCTGCTGTG
+ACAAAAAAAAAAGCAGGGAAAGGGATTTTTTTTAAAAAAGCAAACAGCAACAATAAAAAACCCACAAACAACAAATAACA
+AACAAAAAAACAGAGGAAGAAGTCGAAACACCCTGGGCTGAGACTATTTCCAGGAAGGGGCTACGAAAGGCAGTTGGAAA
+TTCCATTTTCTTTGCAACTGTGGGTTTTCTGGCCTGCTTCCTTTCTAAAGTATATTACTTTCTTTTTGGTTCATGAAGTT
+ATCCCTTTGTGTCTTCTGGAACAGCTATGTATTTTCTTTATCTATCATCTAGCTACCTGCCTATCATCTATCTGTCTTTT
+CTGCCTTTTGCTATCAAAAGCTTGGGTCAAGCAGGATAGAATTCCAGTGTATGTTCACTCTACCATTTAAAACAAGAGCT
+CTTGTAGGCATTCTTCATCACATCACAAACCTGAGCTTTCTAAAACAGGGTGTGGCAAACTACCATGCATGGGCCATGTC
+TGACACAGTCTGCATTTGTAAGTAAAGTTGTATTGGGACAAAGCCACATAGATGTGTTATATAACATCTCTGGCTACTTT
+CATAGTACAATGGAAGAGCTGAGTCATTGAGACAGAGACCACATGGCTTGGAAAACTTAAAATATTTAACATTTTGCCCT
+TTGCAGAAAATACTTGCTAACTCTTGTTTTAAAAGATCTTTGTTTAGAATGCTACCTGTTGCCTTCTGGGTAGAATCACA
+ACTATATACCACAATAGACACAACTTGAACCCTGCTTCTATATCCAGCCTCATCTATTATTTCCTCTCCTTCTTATTTTC
+CTTCTGGCCGTGCTGATGGATTGTCAGCTTCCCAGATGTGCGAGAATCTCTCCTCCCTTCCCCACATTCTCATGCTCTCC
+CTCTGCCTCTGGAGAACTACCTGCCCCATCTCTCATGATAAATCCTTTCTTCATTCTTTATGTTGCAGCCCCTTTGCTCC
+TTCCTTAAGGATGTCTATCTGGCTCTATTTTGGGTGACATGCTCCTTCTGCATCTCCCAGAGCCAGCCTGTGTGTGTCAG
+>3 dna:chromosome chromosome:GRCh37:3:1:16000:1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>4 dna:chromosome chromosome:GRCh37:4:1:16000:1
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+ACCCTAACCCTAACCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTTAACCCTTAACCCTAACCCTAACCCTAA
+CCCTAACCCTAACCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCC
+TAACGCTAACCCTAACCCTAACCCTAACCCTAACCCTCATTATTCTCGGCTGCAAAGAGGAAGGATCTTTACCGTGGATG
+TGGCCCCCAGTTGTCCCAAAATGAAGCAGTGCCCCCAACGTCTGTGGAGAGGCATGCGCTGCTCCACCTTCGGGATGTCC
+CCCGCGTCTGTGCTGAGCAGAATGCAGCTCCGTTATCGCGTTCCCCCCGAAGTCTCTGCAGAGGAAAACGGAGCTCCTCC
+TTCGCGATGCTCTCCAGGTCTGCGCTGAGGAGAACGCAGCTCCGCCCTCGCAAAGGCATAGCGCCATCGCAGGCGCAGAA
+AAAAACGTCGGTGCAGCGCAGGCGCAGAGAAAAACGACGGCGCGTCCCTGGGGGGCGCGGCGCAGGCCCAGAGAGGCATG
+CCACCGTTGCGCCGGGGCGTGGGCGCGGCGCAGACGCAGAGACGCACGCCGGCGCAGCGCCGGGATGGGAGCGCGGCCCA
+GGCGCAGACACGGACGGCAGCGTGGCGCCTGGCCGGAGGCGCGACGCAGGCCCAGACACACACGGCGGCGCGGCGCCATG
+ATGGGACCCCGCGCAGGCGCACAGACGGATGGTGGCGCGGCGCAGGCGCAGTAGAAAAACGCCAGCGCGGTGCGGGGGGC
+GCGGCGCAGGCACAGGCGCAGAGACGGACGCCGACGGGGCGCAGGCGCAGAGACGGACGCCGCCGGGGCGCAGGCGCAGA
+GACGGACGCCGCCGCGGCGCAGGCGCAGAGACGGACGCCGCCGCGGCGCAGGCGCAGAGACGGACGCCGCCGCGGCGCAG
+GCGCAGAGACGGACGCCGCCGCGGCGCAGGCGCAGAGACGGACGCCGCCGCGGCGCCGTGGCGGTGGCAAGAGTCACGCG
+GAGAGATGCACGGCTGCGTGGCGCAGGCGCAGAGAAAAACGCCGGCGCGTCCCCTATGGGCGCGGCGGAGGCCCAGAGAC
+GCACGCCGGCGCGGCGCCGGGGCGGGGGTCGGGGCGCAGGCGCAGAGAAAAACGCCGGCGCGGCGCCGGGGCGGGGGTCG
+CGGCGCAGGCGCAGAGACCCACGCCGGGGCGGGGGCGCGGCGCAGGCGCAGACACGCACGCCGGGGCAGGGGCGCGGCGC
+AGGCCCAGAGACGCACGCCGGCGCGGCGCCGGGGCGGGAGCTCCGCGCAGGGGCAGAAAAGGACGCTGGCGCGGCGCAGG
+CGCAGAAAAAAAATGGCGGCGCAGCGCAGGCGCCGAGAAAAGCGCCAGCGCCGGGGGTCGCGGCGCAGGCGCAGAGAAAA
+ACGCCAGCGCGGCGCCGGCGCAAAGACGGGCGCAGGCGCAGAGTCGGGCGCTGGCGCGTCGCCGAGGTGGGGGCGCGATG
+CACGCGCAGAGACGCACGGCTGCGTGGCGCAGACGCAGAGAAGAACGCGAGCGCGGCGCCGAGGACAAGGCGCAGGCGCG
+GAGACGCACGCCAGCGCGGGGGCGAGGCGCAGGCGCGGAGATGCACTCCGCCAGGCGCGGGGAGGGGGGCGCGGCGCAGG
+CGCAGTGACGCACGCCGCCTGGGGCGCAGCGCAGAGACAGGCGGAACCTCAGTAATCTGAAAAGCCAGGTTGCCCCCTCC
+TTGCGGCCGGGCACTAAAGGGCCCACTTGCTGAAGGCGCTGTGCCAGCGTGCCCCCTGCTGGTGACTGGGGCAACTGCAG
+GGTTCTCTTGCTTCCATTAGTGGCCAGCGGCCCCTGCTGGCGGCGGGGCACCGCAGGGTCCTCTTGCACACAGTATAGTG
+GCGGCATGCCGCCTGCTGGCAGCTGGAGACATTGCAGGGCTCTCTTGCTCATAGTATAGTGACAGGACGCCCGCCTGCTG
+GCAGCTGGGGACACTGCCGGCCACTCTTGCTCCAAGTGTAGTGGCTGTTGGCTCCCCTGCTGGCAGCTGGGGACACTGCC
+GGGCCCTCTTGCTTGCAGTTTACTGGGGGCACGCCCCCTTCTGGCCGCTTGGGGCACTACAGGATGCTCTTGCTCACAGT
+GTAGTGGCAGCTCGCCGCCTGCTGGCAACCAGGGTACTGCAGGGTTCTCTTGCTCATGGTGTGGTGCCCGTCCACCACCT
+GCTGGCAGCTAAGGACACTGCAGGGCCCTCTTGCTCAGAGTGTAGTCGTCGTACACCCCCTGCTGGCAGCTGGGGACGCT
+GCCGGGACTTTTCCTGGCACTGTCGTGGCAGCACACTACCTGCAGGCAGATGGGGACTATGCAGGGACCTCTTGTTCAGG
+GTGTGAGGGCTGGCACGCCCTACTGGCCGCCTCCTGCACCACTTAAAGTCGGAGCGCCAGTTAAGCACCATCAGTTCTGG
+AAATTGAAACTGAAATGGAGCTATTACTGAGGAGAGTTGATGTCCCAGTTCTTGTCTAACTTGGAAGAAAGATTTTTCAC
+CAAGAGGCAGTAAAAACATGGCAGATAACTTCATTGAAAACAAATACAGTGTAAAGAGCTTATTGTAGAATAATAGGGAG
+GAGTGGGCTGATTGTGCAGGAAAACAGCCTGAGAGTCCTGTGCAGGGAATTTTATTTTGGACTTCTTCACATTTCTGCCT
+CTGTCTCAAGTCTCCACCTGTTTTCTTTGTCTGGTTTTCCTGCTACTGCCTTAGGTCCCTGAGTTGCCCCACTTAGGCTT
+ATGGGACCTCCTCACTGTTGGTTGAGGCACATGTGTGCTGATCAATCCGAATCCACTCTGGTACCAGGCTCCTTCCCCCC
+ATCCCAGGCAGGCTGACAGCGGTCATGTTTCTGCCTACAGCGCCTGCCTATCTCTTTTGAATGTCCTTCTCTACACTACT
+CTGTACTTATGGTGCCAGGTTTCTCTTAAGAATGTCCCCTTTGTCCTTCTTATCAGCATGTAGCCAGCAATATTGTGACA
+TTTTTACTGCAGAGTGAATGATGACTGGGGCATCTTAAATGGAGTTCTGGGGTGTTTCTTTCTGCATAGGTACCTCTGCA
+GTAGTAGTTTCCAAAATACTTTTGGTAATTTTTAACCTTAAAGTTAACCTTAAAGTTAAGCTAAGTAAAAGATTTGCATT
+AAATATCTAGACCATTTATAAATAAGATACAATACTAAAACATTACTGAAGATAAATAATTCTAGTTTACATACTTTTGG
+CTACTTATTTTTACAGAGAAACTAAAGATATTTTAGCCCATTAATAAACATGTTTTTGTCTACCACACTGAGAAATTGTA
+CTATGAGGAAACACATCCCTCTAGATGTTGGGAGATGGTATACTCATACATTTTCTAACCTACTATAGAATGCTAACATA
+TGACAGTTTATAACTGTCTACTTCCTAGTTTTCTCTGGAAAATAAAAGATTACTAAGTATTAAAATTATAATCAATATGT
+GTAAATAAAACTACTGGAAATAATAGAATAACTAGAAACAACTCTATGCAAAGCATGCAAGAAAAGTAGTGCATGTTTTG
+CAAGTAAAGTAGGACGTATTTTTTATAAGGAAAACCATACAAAAGATACAAATAAAAAGAGATACCTAGCCTTCCCTGTG
+TTATATTTGTATGGGTAAAATGTCATGTTTTCAGAAATTATATAAAATTCCTGGAAATTTGTCAATGTTCTCCTTATCCA
+TGCTATGTGCCAGTATAGAGTTATGAGTCATAATTCCAATTATTATTTTAAATGTTGTGCTGGGTGCAGTGGCTCACGTC
+TGTAATCCCACCACTTTGGGAGGCCTAGGCAGGTGGATCACAAGGTCAGGAGATCGAGAACATCCTGGCTGACATGGTGA
+AACCCCATCTCTACTAAAAATACAAAAAATTAGCCAGGCGTGGTGGTGGGCACCTGTACTCCCAGCTACTCAGGAGGCTG
+AGGCAGGAGAATGGCATGAACCAGGGAGGCAGAGCTTGCAGTGAGCCAAGATAGCGCCACTGCACTCCAGCATGGGCAAC
+AGAGCGAGACTCTGTCTCTAAATAAATAAATAAATAAATAAATAAATGTTTTATCCCACAGAAAAAATCGAATATCCTTG
+TCAGTTGTGGTATAATGAACTCTCATCAGATCTTTCATCACAGCCATTTCATATTCTTTATCATTTAGATATTATTTCCC
+CCTGATGCTTTCCTGAAAGCTCCTGCAATCAGCTACAGGTCAGAATGTTCATCTCCATCACGGGACTCCCTCTGAGACAC
+ACAGAAAAGAGTATGCAAGATAGTCTGGTTATAGGCTTCTGATGATATTGCTTAAATAACTTTAAGACCATACACTTCGC
+TCAGTGAAGATCTCCAGAAGTCTGCTTCAGAAATTGATGGGTTCATGACACTGCTAACCCAAGATGCAACAAGACTGGAA
+TTGATTACATGGTACTGAATGAACTGATGAAAATTGATTATAATTTTATAGCTTTTTGGAGCATTGCTGGTTCTTTAATG
+TTCTAGTTTCTGGACTTAAGAAATCTCTTTCTCTTAACCTAACTGTAACATACAATTTAGTAGATTATACTTTTGAAAAC
+AGAAGTGAAGCATTTATCTTTTTTCCCCTGCCTGATTTTTCCAGAATTTTGAAATCCTTACTGAACACTCTTATTTTCAC
+GATGATATAGTTGTTAGCAAAAGTCCAATAAGAATCTGTTCACCTTGAACAGAGACCTCAGAAATAATGCCGCATATCTA
+CAACCATCTGATCTTTGACAAACCTGACAAAAACAAGCAATGGGGAAAGGATTCCCTATTTAATAAATGGTGCTGGGAAA
+ACTGGCTAGCCATATGTAGAAAGCTGAAACTGGATCCCTTCCTTACACATTATACAAAAATTAATTCAAGATGGATTAAA
+GACTTACATGTTAGACCTAAAACCATAAAAACCCTAGAAGAAAACCTAGGCAATACCATTCAGGACATAGGCATGGGCAG
+GGACTTCATGTTTAAAACACCAAAAACAATGGCAACAAAAGCCAAAATGGACAAATGGGATCTAATTAAACTAAAGAGCT
+TCTGCACAGCAAAAAAAAACCTACTGTCAGAGTGAACAGGCAACCTACAAAATGGGAGAAAATTTTCACAACCTACTCAT
+CTGACAAAGGGCTAATATCCAGAATCTACAATGAACACAAAGAAATTTACAAGAAAAAAACAAACAACCCCATCAAAAAG
+TGGGCGAAGGATGTGAACAGACACTTCTCAAAAGAAGACATTTATGCAGCCAAAAGACATGTGAAGGAATGCTCATCATC
+ATTGGCCATCAGAGAAATGCAAATCAAAATCACAATGAGACACCATCTCACACCAGTTAGAATGGCGATCATTAAAAAGT
+CAGGAAACAACAGGTGCTGGAGAGGATGTGGAGAAATAGGAACACTTTTACACTGTTGGTGGGACTGTAAACTAGTTCAA
+CCATTGTAGAAGATGGTGTGGCGATTCCTCAGGGATCTAGAACTAGAAATACCATTTGACCCAGCCATCCCATTACTTGG
+TATATACCCAAAGGAGTATAAATCATGCTGCTATAAAGACACATGCACACGTATGTTTATTGCGGCACTATTCACAATAG
+CAAAGACTTGGAACCAACCCAAATATCCAACAATGATAGACTGGATTAAGAAAATGTGGCACATATACACCATGGAATAC
+TATGCAGCCATAAAAAATGAAGAGTTCATGTCCTTTGTAGGGACATGGATGAAACTGGAAACCATCATTCTCAGCAAACT
+ATCACAAGGACAAAAAAACCAAACACTGCATGTTCTCACTCATAGGTGGGAATTGAACAATGAGAATACATGGACATGGG
+AAGGGGAACATCACACTCCAGGGACTGTTGTGGGGTGGGGGGGAGGGGGGAGGGATAGCATTAGGAGATATACCTAATGC
diff --git a/testdata/htsjdk/samtools/hg19mini.fasta.fai b/testdata/htsjdk/samtools/hg19mini.fasta.fai
new file mode 100644
index 0000000..2a20cf3
--- /dev/null
+++ b/testdata/htsjdk/samtools/hg19mini.fasta.fai
@@ -0,0 +1,4 @@
+1 16000 48 80 81
+2 16000 16296 80 81
+3 16000 32544 80 81
+4 16000 48792 80 81
diff --git a/testdata/htsjdk/samtools/metrics/metricsOne.metrics b/testdata/htsjdk/samtools/metrics/metricsOne.metrics
new file mode 100644
index 0000000..46c724e
--- /dev/null
+++ b/testdata/htsjdk/samtools/metrics/metricsOne.metrics
@@ -0,0 +1,13 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPT [...]
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Mon Aug 24 13:31:51 EDT 2015
+
+## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
+STRING_PROP DATE_PROP SHORT_PROP INTEGER_PROP LONG_PROP FLOAT_PROP DOUBLE_PROP ENUM_PROP BOOLEAN_PROP CHARACTER_PROP SHORT_PRIMITIVE INT_PRIMITIVE LONG_PRIMITIVE FLOAT_PRIMITIVE DOUBLE_PRIMITIVE BOOLEAN_PRIMITIVE CHAR_PRIMITIVE
+Hello World 2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B
+
+## HISTOGRAM java.lang.Integer
+clipped_bases read_count
+6 1
+7 1
diff --git a/testdata/htsjdk/samtools/metrics/metricsOneCopy.metrics b/testdata/htsjdk/samtools/metrics/metricsOneCopy.metrics
new file mode 100644
index 0000000..46c724e
--- /dev/null
+++ b/testdata/htsjdk/samtools/metrics/metricsOneCopy.metrics
@@ -0,0 +1,13 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPT [...]
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Mon Aug 24 13:31:51 EDT 2015
+
+## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
+STRING_PROP DATE_PROP SHORT_PROP INTEGER_PROP LONG_PROP FLOAT_PROP DOUBLE_PROP ENUM_PROP BOOLEAN_PROP CHARACTER_PROP SHORT_PRIMITIVE INT_PRIMITIVE LONG_PRIMITIVE FLOAT_PRIMITIVE DOUBLE_PRIMITIVE BOOLEAN_PRIMITIVE CHAR_PRIMITIVE
+Hello World 2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B
+
+## HISTOGRAM java.lang.Integer
+clipped_bases read_count
+6 1
+7 1
diff --git a/testdata/htsjdk/samtools/metrics/metricsOneModifiedHistogram.metrics b/testdata/htsjdk/samtools/metrics/metricsOneModifiedHistogram.metrics
new file mode 100644
index 0000000..3e6f088
--- /dev/null
+++ b/testdata/htsjdk/samtools/metrics/metricsOneModifiedHistogram.metrics
@@ -0,0 +1,14 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPT [...]
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Mon Aug 24 13:31:51 EDT 2015
+
+## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
+STRING_PROP DATE_PROP SHORT_PROP INTEGER_PROP LONG_PROP FLOAT_PROP DOUBLE_PROP ENUM_PROP BOOLEAN_PROP CHARACTER_PROP SHORT_PRIMITIVE INT_PRIMITIVE LONG_PRIMITIVE FLOAT_PRIMITIVE DOUBLE_PRIMITIVE BOOLEAN_PRIMITIVE CHAR_PRIMITIVE
+Hello World 2008-12-31 123 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B
+
+## HISTOGRAM java.lang.Integer
+clipped_bases read_count
+6 1
+7 1
+8 1
diff --git a/testdata/htsjdk/samtools/metrics/metricsOneModifiedMetrics.metrics b/testdata/htsjdk/samtools/metrics/metricsOneModifiedMetrics.metrics
new file mode 100644
index 0000000..a4d23d8
--- /dev/null
+++ b/testdata/htsjdk/samtools/metrics/metricsOneModifiedMetrics.metrics
@@ -0,0 +1,13 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.illumina.MarkIlluminaAdapters INPUT=testdata/picard/illumina/MarkIlluminaAdaptersTest/unevenReads.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven5946421709712534555.sam METRICS=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/uneven4591996041776878558.metrics MIN_MATCH_BASES_SE=12 MIN_MATCH_BASES_PE=6 MAX_ERROR_RATE_SE=0.1 MAX_ERROR_RATE_PE=0.1 ADAPTERS=[INDEXED, DUAL_INDEXED, PAIRED_END] ADAPTER_TRUNCATION_LENGTH=30 PRUNE_ADAPTER_LIST_AFTER_THIS_MANY_ADAPT [...]
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Mon Aug 24 13:31:51 EDT 2015
+
+## METRICS CLASS htsjdk.samtools.metrics.MetricsFileTest$TestMetric
+STRING_PROP DATE_PROP SHORT_PROP INTEGER_PROP LONG_PROP FLOAT_PROP DOUBLE_PROP ENUM_PROP BOOLEAN_PROP CHARACTER_PROP SHORT_PRIMITIVE INT_PRIMITIVE LONG_PRIMITIVE FLOAT_PRIMITIVE DOUBLE_PRIMITIVE BOOLEAN_PRIMITIVE CHAR_PRIMITIVE
+Hello World 2008-12-31 122 9223372036854775807 456.789001 0.713487 Two N A 123 919834781 9223372034707292160 0.55694 0.229233 Y B
+
+## HISTOGRAM java.lang.Integer
+clipped_bases read_count
+6 1
+7 1
diff --git a/testdata/htsjdk/samtools/sra/test_archive.sra b/testdata/htsjdk/samtools/sra/test_archive.sra
new file mode 100644
index 0000000..a9b6e70
Binary files /dev/null and b/testdata/htsjdk/samtools/sra/test_archive.sra differ
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/htsjdk.git
More information about the debian-med-commit
mailing list