[med-svn] [htsjdk] 01/05: Imported Upstream version 2.3.0+dfsg.1
Andreas Tille
tille at debian.org
Fri May 20 09:49:28 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository htsjdk.
commit 759cd028874eda7edb142828b89c9eb3395deb60
Author: Andreas Tille <tille at debian.org>
Date: Fri May 20 11:29:47 2016 +0200
Imported Upstream version 2.3.0+dfsg.1
---
README.md | 8 +-
build.sbt | 9 +-
build.xml | 26 +-
src/c/inteldeflater/IntelDeflater.c | 389 ++++++++++++++-------
src/c/inteldeflater/README | 7 +
src/c/inteldeflater/igzip_lib.h | 50 +++
src/c/inteldeflater/internal_state_size.h | 32 ++
src/c/inteldeflater/types.h | 46 +++
src/java/htsjdk/samtools/BAMFileReader.java | 33 +-
src/java/htsjdk/samtools/BAMIndexMetaData.java | 2 +-
src/java/htsjdk/samtools/BAMIndexer.java | 28 +-
src/java/htsjdk/samtools/BinningIndexBuilder.java | 4 +-
.../{CRAMIndexer.java => CRAMBAIIndexer.java} | 12 +-
.../htsjdk/samtools/CRAMContainerStreamWriter.java | 11 +-
src/java/htsjdk/samtools/CRAMFileReader.java | 38 +-
src/java/htsjdk/samtools/CRAMFileWriter.java | 7 +-
src/java/htsjdk/samtools/CRAMIterator.java | 11 +-
src/java/htsjdk/samtools/Defaults.java | 83 ++++-
src/java/htsjdk/samtools/DuplicateSetIterator.java | 29 +-
src/java/htsjdk/samtools/GenomicIndexUtil.java | 8 +-
.../SAMBinaryTagAndUnsignedArrayValue.java | 7 +-
src/java/htsjdk/samtools/SAMBinaryTagAndValue.java | 90 ++++-
src/java/htsjdk/samtools/SAMFileReader.java | 83 +----
src/java/htsjdk/samtools/SAMFileWriterFactory.java | 5 +-
src/java/htsjdk/samtools/SAMRecord.java | 66 +---
src/java/htsjdk/samtools/SAMUtils.java | 4 +-
src/java/htsjdk/samtools/SamReaderFactory.java | 29 +-
src/java/htsjdk/samtools/SamStreams.java | 34 +-
src/java/htsjdk/samtools/cram/CRAIEntry.java | 83 +++--
src/java/htsjdk/samtools/cram/CRAIIndex.java | 11 +-
.../htsjdk/samtools/cram/build/CramNormalizer.java | 6 +-
.../samtools/cram/ref/CRAMReferenceSource.java | 22 ++
.../htsjdk/samtools/cram/ref/ReferenceSource.java | 19 +-
src/java/htsjdk/samtools/cram/structure/Slice.java | 10 -
.../htsjdk/samtools/example/PrintReadsExample.java | 104 ++++++
.../htsjdk/samtools/fastq/FastqWriterFactory.java | 2 +-
.../htsjdk/samtools/filter/FilteringIterator.java | 207 +++--------
...ringIterator.java => FilteringSamIterator.java} | 307 ++++++++--------
.../samtools/reference/ReferenceSequence.java | 12 +
.../htsjdk/samtools/util/AbstractAsyncWriter.java | 21 +-
.../samtools/util/AsyncBufferedIterator.java | 281 +++++++++++++++
.../samtools/util/BlockCompressedInputStream.java | 8 +
.../htsjdk/samtools/util/CloseableIterator.java | 24 +-
src/java/htsjdk/samtools/util/Log.java | 2 +-
.../htsjdk/samtools/util/SamLocusIterator.java | 4 +-
.../util/SamRecordIntervalIteratorFactory.java | 8 +-
src/java/htsjdk/samtools/util/SequenceUtil.java | 23 ++
src/java/htsjdk/samtools/util/Tuple.java | 3 +-
src/java/htsjdk/tribble/AbstractFeatureReader.java | 10 +
.../tribble/TribbleIndexedFeatureReader.java | 28 +-
src/java/htsjdk/tribble/index/IndexFactory.java | 45 +--
.../htsjdk/tribble/index/tabix/TabixIndex.java | 18 +-
.../htsjdk/tribble/readers/LineReaderUtil.java | 4 +-
.../variant/example/PrintVariantsExample.java | 105 ++++++
.../variantcontext/GenotypeJEXLContext.java | 58 +++
.../htsjdk/variant/variantcontext/JEXLMap.java | 75 +---
.../variant/variantcontext/VariantJEXLContext.java | 36 +-
.../variantcontext/filter/FilteringIterator.java | 94 +----
...r.java => FilteringVariantContextIterator.java} | 6 +-
.../writer/VariantContextWriterBuilder.java | 22 +-
.../writer/VariantContextWriterFactory.java | 2 +-
src/java/htsjdk/variant/vcf/AbstractVCFCodec.java | 7 +-
.../htsjdk/variant/vcf/VCFStandardHeaderLines.java | 2 +-
src/scripts/build_intel_deflater.sh | 29 +-
.../java/htsjdk/samtools/BAMIndexWriterTest.java | 9 +
.../java/htsjdk/samtools/CRAMBAIIndexerTest.java | 2 +-
.../java/htsjdk/samtools/CRAMComplianceTest.java | 67 ++--
...ileIndexTest.java => CRAMFileBAIIndexTest.java} | 14 +-
...FileWriterTest.java => CRAMFileWriterTest.java} | 4 +-
.../java/htsjdk/samtools/GenomicIndexUtilTest.java | 50 +++
.../samtools/SAMBinaryTagAndValueUnitTest.java | 183 ++++++++++
.../java/htsjdk/samtools/SAMFileReaderTest.java | 18 +-
.../htsjdk/samtools/SAMFileWriterFactoryTest.java | 22 ++
.../java/htsjdk/samtools/SAMIntegerTagTest.java | 2 +-
.../java/htsjdk/samtools/SAMRecordUnitTest.java | 169 ++++-----
src/tests/java/htsjdk/samtools/SamStreamsTest.java | 60 ++++
.../java/htsjdk/samtools/cram/CRAIEntryTest.java | 3 +-
.../java/htsjdk/samtools/cram/CRAIIndexTest.java | 110 +++++-
.../samtools/util/AsyncBufferedIteratorTest.java | 115 ++++++
.../util/BlockCompressedOutputStreamTest.java | 10 +
.../samtools/util/CloseableIteratorTest.java | 31 ++
.../htsjdk/samtools/util/IntelDeflaterTest.java | 130 +++++++
.../htsjdk/samtools/util/SequenceUtilTest.java | 20 ++
.../htsjdk/tribble/AbstractFeatureReaderTest.java | 61 ++++
.../tribble/TribbleIndexFeatureReaderTest.java | 86 +++++
.../htsjdk/tribble/index/tabix/TabixIndexTest.java | 79 ++++-
.../htsjdk/variant/PrintVariantsExampleTest.java | 65 ++++
.../variantcontext/VariantJEXLContextUnitTest.java | 46 ++-
...va => FilteringVariantContextIteratorTest.java} | 8 +-
.../filter/HeterozygosityFilterTest.java | 2 +-
.../filter/JavascriptVariantFilterTest.java | 2 +-
.../VariantContextWriterBuilderUnitTest.java | 17 +-
.../htsjdk/variant/vcf/AbstractVCFCodecTest.java | 24 +-
.../tabix/YRI.trio.2010_07.indel.sites.vcf.gz | Bin 0 -> 250626 bytes
.../tabix/YRI.trio.2010_07.indel.sites.vcf.gz.tbi | Bin 0 -> 32877 bytes
testdata/htsjdk/tribble/test.vcf | 24 ++
testdata/htsjdk/tribble/test.vcf.gz | Bin 0 -> 822 bytes
97 files changed, 3107 insertions(+), 1175 deletions(-)
diff --git a/README.md b/README.md
index b04d3ab..7d8f2e3 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,8 @@
-[![Build Status](https://travis-ci.org/samtools/htsjdk.svg?branch=master)](https://travis-ci.org/samtools/htsjdk)
+Status of master branch build: [![Build Status](https://travis-ci.org/samtools/htsjdk.svg?branch=master)](https://travis-ci.org/samtools/htsjdk)
+
+Status of downstream projects automatically built on top of the current htsjdk master branch. See [gatk-jenkins](https://gatk-jenkins.broadinstitute.org/view/HTSJDK%20Release%20Tests/) for detailed logs. Failure may indicate problems in htsjdk, but may also be due to expected incompatibilities between versions, or unrelated failures in downstream projects.
+- [Picard](https://github.com/broadinstitute/picard): [![Build Status](https://gatk-jenkins.broadinstitute.org/buildStatus/icon?job=picard-on-htsjdk-master)](https://gatk-jenkins.broadinstitute.org/job/picard-on-htsjdk-master/)
+- [GATK 4](https://github.com/broadinstitute/gatk): [![Build Status](https://gatk-jenkins.broadinstitute.org/buildStatus/icon?job=gatk-on-htsjdk-master)](https://gatk-jenkins.broadinstitute.org/job/gatk-on-htsjdk-master/)
## A Java API for high-throughput sequencing data (HTS) formats.
@@ -9,6 +13,8 @@ manipulating HTS data.
Please see the [HTSJDK Documentation](http://samtools.github.io/htsjdk) for more information.
+> **NOTE: _HTSJDK does not currently support the latest Variant Call Format Specification (VCFv4.3 and BCFv2.2)._**
+
#### Licensing Information
Not all sub-packages of htsjdk are subject to the same license, so a license notice is included in each source file or sub-package as appropriate. Please check the relevant license notice whenever you start working with a part of htsjdk that you have not previously worked with to avoid any surprises.
diff --git a/build.sbt b/build.sbt
index 645f3fa..ebcce1b 100644
--- a/build.sbt
+++ b/build.sbt
@@ -2,9 +2,12 @@ import com.typesafe.sbt.SbtGit._
import de.johoop.testngplugin.TestNGPlugin._
import sbt.Package.ManifestAttributes
+//added as a workaround for #https://github.com/samtools/htsjdk/issues/573
+resolvers += Resolver.sbtPluginRepo("releases")
+
name := "htsjdk"
-val buildVersion = "2.1.1"
+val buildVersion = "2.3.0"
organization := "com.github.samtools"
@@ -26,6 +29,10 @@ libraryDependencies += "org.testng" % "testng" % "6.8.8"
unmanagedBase := baseDirectory.value
+mappings in (Compile, packageBin) ++= Seq(
+ (baseDirectory.value / "lib/jni/libIntelDeflater.so") -> "lib/jni/libIntelDeflater.so"
+)
+
javaSource in Compile := baseDirectory.value / "src/java"
javaSource in Test := baseDirectory.value / "src/tests"
diff --git a/build.xml b/build.xml
index af0d741..9a5fb16 100755
--- a/build.xml
+++ b/build.xml
@@ -34,6 +34,7 @@
<property name="classes.test" value="testclasses"/>
<property name="scripts" value="src/scripts"/>
<property name="test.output" value="dist/test"/>
+ <property name="intelDeflator" value="${lib}/jni/libIntelDeflater.so"/>
<property name="javac.target" value="1.8"/>
<property name="javac.debug" value="true"/>
@@ -41,19 +42,22 @@
<!-- Get GIT hash, if available, otherwise leave it blank. -->
<property name="repository.revision" value=""/>
- <property name="htsjdk-version" value="2.1.1"/>
+ <property name="htsjdk-version" value="2.3.0"/>
<property name="htsjdk-version-file" value="htsjdk.version.properties"/>
<property name="testng.verbosity" value="2"/>
<property name="test.debug.port" value="5005" /> <!-- override on the command line if desired -->
- <condition property="isUnix">
- <os family="unix"/>
+ <condition property="isUnix">
+ <and>
+ <os family="unix"/>
+ <not><os family="mac"/></not>
+ </and>
</condition>
<target name="set_excluded_test_groups_unix" if="isUnix">
<property name="excludedTestGroups" value="slow, broken"/>
</target>
<target name="set_excluded_test_groups_non_unix" unless="isUnix">
- <property name="excludedTestGroups" value="slow, broken, unix"/>
+ <property name="excludedTestGroups" value="slow, broken, unix, intel"/>
</target>
<target name="set_excluded_test_groups" depends="set_excluded_test_groups_unix,set_excluded_test_groups_non_unix"/>
@@ -122,6 +126,8 @@
<macrodef name="run-test">
<attribute name="excludedTestGroups" default=""/>
<attribute name="includedTestGroups" default=""/>
+ <attribute name="additionalJVMarg" default=""/>
+
<sequential>
<taskdef resource="testngtasks" classpathref="classpath"/>
<testng suitename="htsjdk-tests" classpathref="classpath" outputdir="${test.output}"
@@ -137,7 +143,7 @@
<include name="**/Test*.class"/>
<include name="**/*Test.class"/>
</classfileset>
- <jvmarg value="-Xmx2G"/>
+ <jvmarg line="-Xmx2G @{additionalJVMarg}"/>
</testng>
<junitreport todir="${dist}/test" >
<fileset dir="${test.output}">
@@ -150,14 +156,20 @@
</sequential>
</macrodef>
- <target name="test" depends="compile, set_excluded_test_groups" description="Run unit tests">
- <run-test excludedTestGroups="${excludedTestGroups}, sra"/>
+
+ <target name="test" depends="compile, set_excluded_test_groups, intel-test" description="Run unit tests">
+ <run-test excludedTestGroups="${excludedTestGroups}, sra, intel"/>
</target>
<target name="sra-test" depends="compile, set_excluded_test_groups" description="Run SRA unit tests">
<run-test includedTestGroups="sra" excludedTestGroups="${excludedTestGroups}"/>
</target>
+ <!-- needs to be in a separate target since jvm has different argument -->
+ <target name="intel-test" depends="compile, set_excluded_test_groups" description="Run Intel unit tests" if="isUnix">
+ <echo message="intelDeflator = ${intelDeflator}"/>
+ <run-test includedTestGroups="intel" additionalJVMarg="-Dsamjdk.intel_deflater_so_path=${intelDeflator}"/>
+ </target>
<target name="single-test"
depends="compile, compile-tests"
diff --git a/src/c/inteldeflater/IntelDeflater.c b/src/c/inteldeflater/IntelDeflater.c
index de503f1..65f5d1d 100644
--- a/src/c/inteldeflater/IntelDeflater.c
+++ b/src/c/inteldeflater/IntelDeflater.c
@@ -24,20 +24,26 @@
*/
/*
- * Native method support for net.sf.samtools.util.zip.IntelDeflater.
- * This is copied from OpenJDK native support for java.util.zip.Deflater, with only package and class name changed.
+ * Native method support for htsjdk.samtools.util.zip.IntelDeflater.
+ * This is copied from OpenJDK native support for java.util.zip.Deflater, and modified to support igzip.
*/
#include <stdio.h>
#include <stdlib.h>
+#include <immintrin.h>
+#include <emmintrin.h>
+#include <stdbool.h>
+#include <assert.h>
#include "jlong.h"
#include "jni.h"
-#include "jni_util.h"
-#include "zlib.h"
+//#include "jni_util.h"
+#include "zlib.h"
#include "htsjdk_samtools_util_zip_IntelDeflater.h"
-
+#include "igzip_lib.h"
#define DEF_MEM_LEVEL 8
+#define FAST_COMPRESSION 1
+#define IGZIP_TRUE 1
static jfieldID levelID;
static jfieldID strategyID;
@@ -46,6 +52,75 @@ static jfieldID finishID;
static jfieldID finishedID;
static jfieldID bufID, offID, lenID;
+typedef struct {
+ z_stream zStream;
+ LZ_Stream2 lz2Stream;
+ int useIGZIP;
+} Stream;
+
+
+bool is_cpuid_ecx_bit_set(int eax, int bitidx)
+{
+ int ecx = 0, edx = 0, ebx = 0;
+ __asm__ ("cpuid"
+ :"=b" (ebx),
+ "=c" (ecx),
+ "=d" (edx)
+ :"a" (eax)
+ );
+ return (((ecx >> bitidx)&1) == 1);
+}
+
+bool is_sse42_supported()
+{
+#ifdef __INTEL_COMPILER
+ return (_may_i_use_cpu_feature(_FEATURE_SSE4_2) > 0);
+#else
+ // return __builtin_cpu_supports("sse4.2");
+ return is_cpuid_ecx_bit_set(1, 20);
+#endif
+}
+//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+/**
+ * Throw a Java exception by name. Similar to SignalError.
+ */
+JNIEXPORT void JNICALL
+JNU_ThrowByName(JNIEnv *env, const char *name, const char *msg)
+{
+ jclass cls = (*env)->FindClass(env, name);
+
+ if (cls != 0) /* Otherwise an exception has already been thrown */
+ (*env)->ThrowNew(env, cls, msg);
+}
+
+/* JNU_Throw common exceptions */
+
+JNIEXPORT void JNICALL
+JNU_ThrowNullPointerException(JNIEnv *env, const char *msg)
+{
+ JNU_ThrowByName(env, "java/lang/NullPointerException", msg);
+}
+
+
+JNIEXPORT void JNICALL
+JNU_ThrowOutOfMemoryError(JNIEnv *env, const char *msg)
+{
+ JNU_ThrowByName(env, "java/lang/OutOfMemoryError", msg);
+}
+
+JNIEXPORT void JNICALL
+JNU_ThrowIllegalArgumentException(JNIEnv *env, const char *msg)
+{
+ JNU_ThrowByName(env, "java/lang/IllegalArgumentException", msg);
+}
+
+JNIEXPORT void JNICALL
+JNU_ThrowInternalError(JNIEnv *env, const char *msg)
+{
+ JNU_ThrowByName(env, "java/lang/InternalError", msg);
+}
+/////////////////////////////////////////////////////////////////////////////////////////////////////////////////
JNIEXPORT void JNICALL
Java_htsjdk_samtools_util_zip_IntelDeflater_initIDs(JNIEnv *env, jclass cls)
{
@@ -57,38 +132,52 @@ Java_htsjdk_samtools_util_zip_IntelDeflater_initIDs(JNIEnv *env, jclass cls)
bufID = (*env)->GetFieldID(env, cls, "buf", "[B");
offID = (*env)->GetFieldID(env, cls, "off", "I");
lenID = (*env)->GetFieldID(env, cls, "len", "I");
+
}
JNIEXPORT jlong JNICALL
Java_htsjdk_samtools_util_zip_IntelDeflater_init(JNIEnv *env, jclass cls, jint level,
jint strategy, jboolean nowrap)
{
- z_stream *strm = calloc(1, sizeof(z_stream));
-
- if (strm == 0) {
- JNU_ThrowOutOfMemoryError(env, 0);
- return jlong_zero;
+ Stream *strm = calloc(1, sizeof(Stream));
+ if (level == FAST_COMPRESSION && is_sse42_supported()) { //Use igzip
+ printf("Using igzip\n");
+ strm->useIGZIP = IGZIP_TRUE;
+ if (strm == 0) {
+ JNU_ThrowOutOfMemoryError(env, 0);
+ return jlong_zero;
+ } else {
+ init_stream(&strm->lz2Stream); //CHECK RETURN VALUE
+ return ptr_to_jlong(strm);
+ }
+
} else {
- char *msg;
- switch (deflateInit2(strm, level, Z_DEFLATED,
- nowrap ? -MAX_WBITS : MAX_WBITS,
- DEF_MEM_LEVEL, strategy)) {
- case Z_OK:
- return ptr_to_jlong(strm);
- case Z_MEM_ERROR:
- free(strm);
- JNU_ThrowOutOfMemoryError(env, 0);
- return jlong_zero;
- case Z_STREAM_ERROR:
- free(strm);
- JNU_ThrowIllegalArgumentException(env, 0);
- return jlong_zero;
- default:
- msg = strm->msg;
- free(strm);
- JNU_ThrowInternalError(env, msg);
- return jlong_zero;
- }
+
+ if (strm == 0) {
+ JNU_ThrowOutOfMemoryError(env, 0);
+ return jlong_zero;
+ } else {
+ char *msg;
+ switch (deflateInit2(&strm->zStream, level, Z_DEFLATED,
+ nowrap ? -MAX_WBITS : MAX_WBITS,
+ DEF_MEM_LEVEL, strategy)) {
+ case Z_OK:
+ return ptr_to_jlong(&strm->zStream);
+ case Z_MEM_ERROR:
+ free(strm);
+ JNU_ThrowOutOfMemoryError(env, 0);
+ return jlong_zero;
+ case Z_STREAM_ERROR:
+ free(strm);
+ JNU_ThrowIllegalArgumentException(env, 0);
+ return jlong_zero;
+ default:
+ msg = strm->zStream.msg;
+ free(strm);
+ JNU_ThrowInternalError(env, msg);
+ return jlong_zero;
+ }
+ }
}
}
@@ -101,7 +190,7 @@ Java_htsjdk_samtools_util_zip_IntelDeflater_setDictionary(JNIEnv *env, jclass cl
if (buf == 0) {/* out of memory */
return;
}
- res = deflateSetDictionary((z_stream *)jlong_to_ptr(addr), buf + off, len);
+ res = deflateSetDictionary(&((Stream *)jlong_to_ptr(addr))->zStream, buf + off, len);
(*env)->ReleasePrimitiveArrayCritical(env, b, buf, 0);
switch (res) {
case Z_OK:
@@ -110,7 +199,7 @@ Java_htsjdk_samtools_util_zip_IntelDeflater_setDictionary(JNIEnv *env, jclass cl
JNU_ThrowIllegalArgumentException(env, 0);
break;
default:
- JNU_ThrowInternalError(env, ((z_stream *)jlong_to_ptr(addr))->msg);
+ JNU_ThrowInternalError(env, ((Stream *)jlong_to_ptr(addr))->zStream.msg);
break;
}
}
@@ -119,129 +208,185 @@ JNIEXPORT jint JNICALL
Java_htsjdk_samtools_util_zip_IntelDeflater_deflateBytes(JNIEnv *env, jobject this, jlong addr,
jarray b, jint off, jint len, jint flush)
{
- z_stream *strm = jlong_to_ptr(addr);
-
jarray this_buf = (*env)->GetObjectField(env, this, bufID);
jint this_off = (*env)->GetIntField(env, this, offID);
jint this_len = (*env)->GetIntField(env, this, lenID);
jbyte *in_buf;
jbyte *out_buf;
- int res;
- if ((*env)->GetBooleanField(env, this, setParamsID)) {
- int level = (*env)->GetIntField(env, this, levelID);
- int strategy = (*env)->GetIntField(env, this, strategyID);
- in_buf = (*env)->GetPrimitiveArrayCritical(env, this_buf, 0);
- if (in_buf == NULL) {
- // Throw OOME only when length is not zero
- if (this_len != 0)
- JNU_ThrowOutOfMemoryError(env, 0);
- return 0;
- }
- out_buf = (*env)->GetPrimitiveArrayCritical(env, b, 0);
- if (out_buf == NULL) {
- (*env)->ReleasePrimitiveArrayCritical(env, this_buf, in_buf, 0);
- if (len != 0)
- JNU_ThrowOutOfMemoryError(env, 0);
- return 0;
- }
-
- strm->next_in = (Bytef *) (in_buf + this_off);
- strm->next_out = (Bytef *) (out_buf + off);
- strm->avail_in = this_len;
- strm->avail_out = len;
- res = deflateParams(strm, level, strategy);
- (*env)->ReleasePrimitiveArrayCritical(env, b, out_buf, 0);
- (*env)->ReleasePrimitiveArrayCritical(env, this_buf, in_buf, 0);
-
- switch (res) {
- case Z_OK:
- (*env)->SetBooleanField(env, this, setParamsID, JNI_FALSE);
- this_off += this_len - strm->avail_in;
- (*env)->SetIntField(env, this, offID, this_off);
- (*env)->SetIntField(env, this, lenID, strm->avail_in);
- return len - strm->avail_out;
- case Z_BUF_ERROR:
- (*env)->SetBooleanField(env, this, setParamsID, JNI_FALSE);
- return 0;
- default:
- JNU_ThrowInternalError(env, strm->msg);
- return 0;
- }
+ Stream *strm = jlong_to_ptr(addr);
+
+ //igzip only supports one compression level so setParamsID should not be set when using igzip
+ //igzip does not support flush
+ if (((Stream *)jlong_to_ptr(addr))->useIGZIP && (((*env)->GetBooleanField(env, this, setParamsID) && strm->lz2Stream.total_in != 0) || flush == 1)) {
+ JNU_ThrowInternalError(env, "igzip doesn't support this");
+ } else if (((Stream *)jlong_to_ptr(addr))->useIGZIP) {
+ in_buf = (*env)->GetPrimitiveArrayCritical(env, this_buf, 0);
+ if (in_buf == NULL) {
+ // Throw OOME only when length is not zero
+ if (this_len != 0) {
+ JNU_ThrowOutOfMemoryError(env, 0);
+ }
+ return 0;
+ }
+ out_buf = (*env)->GetPrimitiveArrayCritical(env, b, 0);
+ if (out_buf == NULL) {
+ (*env)->ReleasePrimitiveArrayCritical(env, this_buf, in_buf, 0);
+ if (len != 0) {
+ JNU_ThrowOutOfMemoryError(env, 0);
+ }
+ return 0;
+ }
+ strm->lz2Stream.next_in = (Bytef *) (in_buf + this_off);
+ strm->lz2Stream.next_out = (Bytef *) (out_buf + off);
+ strm->lz2Stream.avail_in = this_len;
+ strm->lz2Stream.avail_out = len;
+ assert(strm->lz2Stream.avail_in != 0);
+ assert(strm->lz2Stream.avail_out != 0);
+ jboolean finish = (*env)->GetBooleanField(env, this, finishID);
+ if (finish) {
+ strm->lz2Stream.end_of_stream = 1;
+ } else {
+ strm->lz2Stream.end_of_stream = 0;
+ }
+ fast_lz(&strm->lz2Stream);
+
+ (*env)->ReleasePrimitiveArrayCritical(env, b, out_buf, 0);
+ (*env)->ReleasePrimitiveArrayCritical(env, this_buf, in_buf, 0);
+ if (finish) {
+ (*env)->SetBooleanField(env, this, finishedID, JNI_TRUE);
+ }
+ this_off += this_len - strm->lz2Stream.avail_in;
+ (*env)->SetIntField(env, this, offID, this_off);
+ (*env)->SetIntField(env, this, lenID, strm->lz2Stream.avail_in);
+ return len - strm->lz2Stream.avail_out;
} else {
- jboolean finish = (*env)->GetBooleanField(env, this, finishID);
- in_buf = (*env)->GetPrimitiveArrayCritical(env, this_buf, 0);
- if (in_buf == NULL) {
- if (this_len != 0)
- JNU_ThrowOutOfMemoryError(env, 0);
- return 0;
- }
- out_buf = (*env)->GetPrimitiveArrayCritical(env, b, 0);
- if (out_buf == NULL) {
- (*env)->ReleasePrimitiveArrayCritical(env, this_buf, in_buf, 0);
- if (len != 0)
- JNU_ThrowOutOfMemoryError(env, 0);
-
- return 0;
- }
-
- strm->next_in = (Bytef *) (in_buf + this_off);
- strm->next_out = (Bytef *) (out_buf + off);
- strm->avail_in = this_len;
- strm->avail_out = len;
- res = deflate(strm, finish ? Z_FINISH : flush);
- (*env)->ReleasePrimitiveArrayCritical(env, b, out_buf, 0);
- (*env)->ReleasePrimitiveArrayCritical(env, this_buf, in_buf, 0);
-
- switch (res) {
- case Z_STREAM_END:
- (*env)->SetBooleanField(env, this, finishedID, JNI_TRUE);
- /* fall through */
- case Z_OK:
- this_off += this_len - strm->avail_in;
- (*env)->SetIntField(env, this, offID, this_off);
- (*env)->SetIntField(env, this, lenID, strm->avail_in);
- return len - strm->avail_out;
- case Z_BUF_ERROR:
- return 0;
+
+ int res;
+ if ((*env)->GetBooleanField(env, this, setParamsID)) {
+ int level = (*env)->GetIntField(env, this, levelID);
+ int strategy = (*env)->GetIntField(env, this, strategyID);
+
+ in_buf = (*env)->GetPrimitiveArrayCritical(env, this_buf, 0);
+ if (in_buf == NULL) {
+ // Throw OOME only when length is not zero
+ if (this_len != 0)
+ JNU_ThrowOutOfMemoryError(env, 0);
+ return 0;
+ }
+ out_buf = (*env)->GetPrimitiveArrayCritical(env, b, 0);
+ if (out_buf == NULL) {
+ (*env)->ReleasePrimitiveArrayCritical(env, this_buf, in_buf, 0);
+ if (len != 0)
+ JNU_ThrowOutOfMemoryError(env, 0);
+ return 0;
+ }
+
+ strm->zStream.next_in = (Bytef *) (in_buf + this_off);
+ strm->zStream.next_out = (Bytef *) (out_buf + off);
+ strm->zStream.avail_in = this_len;
+ strm->zStream.avail_out = len;
+ res = deflateParams(&strm->zStream, level, strategy);
+ (*env)->ReleasePrimitiveArrayCritical(env, b, out_buf, 0);
+ (*env)->ReleasePrimitiveArrayCritical(env, this_buf, in_buf, 0);
+
+ switch (res) {
+ case Z_OK:
+ (*env)->SetBooleanField(env, this, setParamsID, JNI_FALSE);
+ this_off += this_len - strm->zStream.avail_in;
+ (*env)->SetIntField(env, this, offID, this_off);
+ (*env)->SetIntField(env, this, lenID, strm->zStream.avail_in);
+ return len - strm->zStream.avail_out;
+ case Z_BUF_ERROR:
+ (*env)->SetBooleanField(env, this, setParamsID, JNI_FALSE);
+ return 0;
+ default:
+ JNU_ThrowInternalError(env, strm->zStream.msg);
+ return 0;
+ }
+ } else {
+ jboolean finish = (*env)->GetBooleanField(env, this, finishID);
+ in_buf = (*env)->GetPrimitiveArrayCritical(env, this_buf, 0);
+ if (in_buf == NULL) {
+ if (this_len != 0)
+ JNU_ThrowOutOfMemoryError(env, 0);
+ return 0;
+ }
+ out_buf = (*env)->GetPrimitiveArrayCritical(env, b, 0);
+ if (out_buf == NULL) {
+ (*env)->ReleasePrimitiveArrayCritical(env, this_buf, in_buf, 0);
+ if (len != 0)
+ JNU_ThrowOutOfMemoryError(env, 0);
+
+ return 0;
+ }
+
+ strm->zStream.next_in = (Bytef *) (in_buf + this_off);
+ strm->zStream.next_out = (Bytef *) (out_buf + off);
+ strm->zStream.avail_in = this_len;
+ strm->zStream.avail_out = len;
+ res = deflate(&strm->zStream, finish ? Z_FINISH : flush);
+ (*env)->ReleasePrimitiveArrayCritical(env, b, out_buf, 0);
+ (*env)->ReleasePrimitiveArrayCritical(env, this_buf, in_buf, 0);
+
+ switch (res) {
+ case Z_STREAM_END:
+ (*env)->SetBooleanField(env, this, finishedID, JNI_TRUE);
+ /* fall through */
+ case Z_OK:
+ this_off += this_len - strm->zStream.avail_in;
+ (*env)->SetIntField(env, this, offID, this_off);
+ (*env)->SetIntField(env, this, lenID, strm->zStream.avail_in);
+ return len - strm->zStream.avail_out;
+ case Z_BUF_ERROR:
+ return 0;
default:
- JNU_ThrowInternalError(env, strm->msg);
- return 0;
- }
+ JNU_ThrowInternalError(env, strm->zStream.msg);
+ return 0;
+ }
+ }
}
}
JNIEXPORT jint JNICALL
Java_htsjdk_samtools_util_zip_IntelDeflater_getAdler(JNIEnv *env, jclass cls, jlong addr)
{
- return ((z_stream *)jlong_to_ptr(addr))->adler;
+ if (((Stream *)jlong_to_ptr(addr))->useIGZIP)
+ JNU_ThrowInternalError(env, "igzip doesn't support getAdler function");
+ else
+ return ((Stream *)jlong_to_ptr(addr))->zStream.adler;
}
JNIEXPORT jlong JNICALL
Java_htsjdk_samtools_util_zip_IntelDeflater_getBytesRead(JNIEnv *env, jclass cls, jlong addr)
{
- return ((z_stream *)jlong_to_ptr(addr))->total_in;
+ return ( ((Stream *)jlong_to_ptr(addr))->useIGZIP ? ((Stream *) jlong_to_ptr(addr))->lz2Stream.total_in : ((Stream *)jlong_to_ptr(addr))->zStream.total_in);
}
JNIEXPORT jlong JNICALL
Java_htsjdk_samtools_util_zip_IntelDeflater_getBytesWritten(JNIEnv *env, jclass cls, jlong addr)
{
- return ((z_stream *)jlong_to_ptr(addr))->total_out;
+ return ( ((Stream *)jlong_to_ptr(addr))->useIGZIP ? ((Stream *) jlong_to_ptr(addr))->lz2Stream.total_out : ((Stream *)jlong_to_ptr(addr))->zStream.total_out);
}
JNIEXPORT void JNICALL
Java_htsjdk_samtools_util_zip_IntelDeflater_reset(JNIEnv *env, jclass cls, jlong addr)
{
- if (deflateReset((z_stream *)jlong_to_ptr(addr)) != Z_OK) {
- JNU_ThrowInternalError(env, 0);
+ if (((Stream *)jlong_to_ptr(addr))->useIGZIP)
+ init_stream(&(((Stream *)jlong_to_ptr(addr))->lz2Stream));
+ else {
+ if (deflateReset(&(((Stream *)jlong_to_ptr(addr))->zStream)) != Z_OK) {
+ JNU_ThrowInternalError(env, 0);
+ }
}
}
JNIEXPORT void JNICALL
Java_htsjdk_samtools_util_zip_IntelDeflater_end(JNIEnv *env, jclass cls, jlong addr)
{
- if (deflateEnd((z_stream *)jlong_to_ptr(addr)) == Z_STREAM_ERROR) {
- JNU_ThrowInternalError(env, 0);
- } else {
- free((z_stream *)jlong_to_ptr(addr));
+ if (!((Stream *)jlong_to_ptr(addr))->useIGZIP) {
+ if (deflateEnd(&(((Stream *)jlong_to_ptr(addr))->zStream)) == Z_STREAM_ERROR) {
+ JNU_ThrowInternalError(env, 0);
+ }
}
+ free((Stream *)jlong_to_ptr(addr));
}
diff --git a/src/c/inteldeflater/README b/src/c/inteldeflater/README
new file mode 100644
index 0000000..31170d4
--- /dev/null
+++ b/src/c/inteldeflater/README
@@ -0,0 +1,7 @@
+- IntelDeflater.c implements JNI for the IntelDeflater in htsjdk.samtools.util.zip.IntelDeflater
+- IntelDeflater uses Intel(R) Integrated Performance Primitives (Intel(R) IPP) Samples and igzip to accelerate BAM compression.
+- Steps to build Intel Deflater using src/scripts/build_intel_deflater.sh:
+ - $OPENJDK should point to the OpenJDK directory
+ - $IPP8_INSTALL_DIR should point to the composer_xe_YEAR/ipp directory
+ - $IPP8_CODE_SAMPLES_DIR should point to ipp-samples.8.0.0.x directory
+ - $IGZIP_LIB should point to igzip_042/igzip directory, igzip should be built with ONLY_DEFLATE and GENOME_BAM defined.
diff --git a/src/c/inteldeflater/igzip_lib.h b/src/c/inteldeflater/igzip_lib.h
new file mode 100644
index 0000000..a27b334
--- /dev/null
+++ b/src/c/inteldeflater/igzip_lib.h
@@ -0,0 +1,50 @@
+/**********************************************************************
+The MIT License
+
+Copyright (c) 2014 Intel Corporation
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use,
+ copy, modify, merge, publish, distribute, sublicense, and/or
+ sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following
+ conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the
+ Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+ KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**********************************************************************/
+#include "internal_state_size.h"
+#include "types.h"
+
+typedef struct {
+ UINT8 opaque[INTERNAL_STATE_SIZE];
+} LZ_State2;
+
+typedef struct {
+ UINT8 *next_in; // Next input byte
+ UINT32 avail_in; // number of bytes available at next_in
+ UINT32 total_in; // total number of bytes read so far
+
+ UINT8 *next_out; // Next output byte
+ UINT32 avail_out; // number of bytes available at next_out
+ UINT32 total_out; // total number of bytes written so far
+ UINT32 end_of_stream; // non-zero if this is the last input buffer
+
+ LZ_State2 internal_state;
+} LZ_Stream2;
+
+
+void init_stream(LZ_Stream2 *stream);
+void fast_lz(LZ_Stream2 *stream);
diff --git a/src/c/inteldeflater/internal_state_size.h b/src/c/inteldeflater/internal_state_size.h
new file mode 100644
index 0000000..1823a33
--- /dev/null
+++ b/src/c/inteldeflater/internal_state_size.h
@@ -0,0 +1,32 @@
+/**********************************************************************
+The MIT License
+
+Copyright (c) 2014 Intel Corporation
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use,
+ copy, modify, merge, publish, distribute, sublicense, and/or
+ sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following
+ conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the
+ Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+ KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**********************************************************************/
+//// for 8K
+#define INTERNAL_STATE_SIZE 82368+16
+
+// for 32K
+//#define INTERNAL_STATE_SIZE 328128+16
diff --git a/src/c/inteldeflater/types.h b/src/c/inteldeflater/types.h
new file mode 100644
index 0000000..83f7cc2
--- /dev/null
+++ b/src/c/inteldeflater/types.h
@@ -0,0 +1,46 @@
+/**********************************************************************
+The MIT License
+
+Copyright (c) 2014 Intel Corporation
+
+ Permission is hereby granted, free of charge, to any person
+ obtaining a copy of this software and associated documentation
+ files (the "Software"), to deal in the Software without
+ restriction, including without limitation the rights to use,
+ copy, modify, merge, publish, distribute, sublicense, and/or
+ sell copies of the Software, and to permit persons to whom the
+ Software is furnished to do so, subject to the following
+ conditions:
+
+ The above copyright notice and this permission notice shall be
+ included in all copies or substantial portions of the
+ Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY
+ KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
+ WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
+ PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+**********************************************************************/
+
+#ifndef __TYPES_H
+#define __TYPES_H
+
+#include <stdint.h> // For standard integer types
+
+typedef struct {
+ uint64_t low;
+ uint64_t high;
+} uint128_t;
+
+typedef int64_t INT64;
+typedef uint64_t UINT64;
+typedef uint32_t UINT32;
+typedef uint16_t UINT16;
+typedef uint8_t UINT8;
+typedef uint128_t UINT128;
+
+#endif
diff --git a/src/java/htsjdk/samtools/BAMFileReader.java b/src/java/htsjdk/samtools/BAMFileReader.java
index 0c226e9..94673bf 100644
--- a/src/java/htsjdk/samtools/BAMFileReader.java
+++ b/src/java/htsjdk/samtools/BAMFileReader.java
@@ -67,6 +67,10 @@ class BAMFileReader extends SamReader.ReaderImplementation {
// If true, all SAMRecords are fully decoded as they are read.
private boolean eagerDecode;
+ // If true, the BAMFileReader will use asynchronous IO.
+ // Note: this field currently has no effect (is not hooked up anywhere), but will be in the future. See https://github.com/samtools/htsjdk/pull/576
+ private final boolean useAsynchronousIO;
+
// For error-checking.
private ValidationStringency mValidationStringency;
@@ -97,11 +101,13 @@ class BAMFileReader extends SamReader.ReaderImplementation {
BAMFileReader(final InputStream stream,
final File indexFile,
final boolean eagerDecode,
+ final boolean useAsynchronousIO,
final ValidationStringency validationStringency,
final SAMRecordFactory factory)
throws IOException {
mIndexFile = indexFile;
mIsSeekable = false;
+ this.useAsynchronousIO = useAsynchronousIO;
mCompressedInputStream = new BlockCompressedInputStream(stream);
mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream));
this.eagerDecode = eagerDecode;
@@ -119,10 +125,11 @@ class BAMFileReader extends SamReader.ReaderImplementation {
BAMFileReader(final File file,
final File indexFile,
final boolean eagerDecode,
+ final boolean useAsynchronousIO,
final ValidationStringency validationStringency,
final SAMRecordFactory factory)
throws IOException {
- this(new BlockCompressedInputStream(file), indexFile!=null ? indexFile : SamFiles.findIndex(file), eagerDecode, file.getAbsolutePath(), validationStringency, factory);
+ this(new BlockCompressedInputStream(file), indexFile!=null ? indexFile : SamFiles.findIndex(file), eagerDecode, useAsynchronousIO, file.getAbsolutePath(), validationStringency, factory);
if (mIndexFile != null && mIndexFile.lastModified() < file.lastModified()) {
System.err.println("WARNING: BAM index file " + mIndexFile.getAbsolutePath() +
" is older than BAM " + file.getAbsolutePath());
@@ -134,24 +141,27 @@ class BAMFileReader extends SamReader.ReaderImplementation {
BAMFileReader(final SeekableStream strm,
final File indexFile,
final boolean eagerDecode,
+ final boolean useAsynchronousIO,
final ValidationStringency validationStringency,
final SAMRecordFactory factory)
throws IOException {
- this(new BlockCompressedInputStream(strm), indexFile, eagerDecode, strm.getSource(), validationStringency, factory);
+ this(new BlockCompressedInputStream(strm), indexFile, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, factory);
}
BAMFileReader(final SeekableStream strm,
final SeekableStream indexStream,
final boolean eagerDecode,
+ final boolean useAsynchronousIO,
final ValidationStringency validationStringency,
final SAMRecordFactory factory)
throws IOException {
- this(new BlockCompressedInputStream(strm), indexStream, eagerDecode, strm.getSource(), validationStringency, factory);
+ this(new BlockCompressedInputStream(strm), indexStream, eagerDecode, useAsynchronousIO, strm.getSource(), validationStringency, factory);
}
private BAMFileReader(final BlockCompressedInputStream compressedInputStream,
final File indexFile,
final boolean eagerDecode,
+ final boolean useAsynchronousIO,
final String source,
final ValidationStringency validationStringency,
final SAMRecordFactory factory)
@@ -161,6 +171,7 @@ class BAMFileReader extends SamReader.ReaderImplementation {
mCompressedInputStream = compressedInputStream;
mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream));
this.eagerDecode = eagerDecode;
+ this.useAsynchronousIO = useAsynchronousIO;
this.mValidationStringency = validationStringency;
this.samRecordFactory = factory;
this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, source);
@@ -170,6 +181,7 @@ class BAMFileReader extends SamReader.ReaderImplementation {
private BAMFileReader(final BlockCompressedInputStream compressedInputStream,
final SeekableStream indexStream,
final boolean eagerDecode,
+ final boolean useAsynchronousIO,
final String source,
final ValidationStringency validationStringency,
final SAMRecordFactory factory)
@@ -179,6 +191,7 @@ class BAMFileReader extends SamReader.ReaderImplementation {
mCompressedInputStream = compressedInputStream;
mStream = new BinaryCodec(new DataInputStream(mCompressedInputStream));
this.eagerDecode = eagerDecode;
+ this.useAsynchronousIO = useAsynchronousIO;
this.mValidationStringency = validationStringency;
this.samRecordFactory = factory;
this.mFileHeader = readHeader(this.mStream, this.mValidationStringency, source);
@@ -187,7 +200,7 @@ class BAMFileReader extends SamReader.ReaderImplementation {
/** Reads through the header and sequence records to find the virtual file offset of the first record in the BAM file. */
static long findVirtualOffsetOfFirstRecord(final File bam) throws IOException {
- final BAMFileReader reader = new BAMFileReader(bam, null, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory());
+ final BAMFileReader reader = new BAMFileReader(bam, null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory());
final long offset = reader.mFirstRecordPointer;
reader.close();
return offset;
@@ -203,7 +216,7 @@ class BAMFileReader extends SamReader.ReaderImplementation {
/**
* If true, uses the caching version of the index reader.
- * @param enabled true to write source information into each SAMRecord.
+ * @param enabled true to use the caching version of the reader.
*/
protected void enableIndexCaching(final boolean enabled) {
if(mIndex != null)
@@ -260,8 +273,16 @@ class BAMFileReader extends SamReader.ReaderImplementation {
}
public void setEagerDecode(final boolean desired) { this.eagerDecode = desired; }
-
+
+ @Override
public void close() {
+ if (mCompressedInputStream != null) {
+ try {
+ mCompressedInputStream.close();
+ } catch (IOException e) {
+ throw new RuntimeIOException("Exception closing compressed input stream.", e);
+ }
+ }
if (mStream != null) {
mStream.close();
}
diff --git a/src/java/htsjdk/samtools/BAMIndexMetaData.java b/src/java/htsjdk/samtools/BAMIndexMetaData.java
index 699aed3..3dceab2 100644
--- a/src/java/htsjdk/samtools/BAMIndexMetaData.java
+++ b/src/java/htsjdk/samtools/BAMIndexMetaData.java
@@ -218,7 +218,7 @@ public class BAMIndexMetaData {
*/
static public void printIndexStats(final File inputBamFile) {
try {
- final BAMFileReader bam = new BAMFileReader(inputBamFile, null, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory());
+ final BAMFileReader bam = new BAMFileReader(inputBamFile, null, false, false, ValidationStringency.SILENT, new DefaultSAMRecordFactory());
if (!bam.hasIndex()) {
throw new SAMException("No index for bam file " + inputBamFile);
}
diff --git a/src/java/htsjdk/samtools/BAMIndexer.java b/src/java/htsjdk/samtools/BAMIndexer.java
index 17753c3..80b557a 100644
--- a/src/java/htsjdk/samtools/BAMIndexer.java
+++ b/src/java/htsjdk/samtools/BAMIndexer.java
@@ -27,6 +27,7 @@ import htsjdk.samtools.util.Log;
import java.io.File;
import java.io.OutputStream;
+import java.util.function.Function;
/**
* Class for both constructing BAM index content and writing it out.
@@ -49,15 +50,14 @@ public class BAMIndexer {
// content is built up from the input bam file using this
private final BAMIndexBuilder indexBuilder;
+ private static final Log log = Log.getInstance(BAMIndexer.class);
+
/**
* @param output binary BAM Index (.bai) file
* @param fileHeader header for the corresponding bam file
*/
public BAMIndexer(final File output, final SAMFileHeader fileHeader) {
-
- numReferences = fileHeader.getSequenceDictionary().size();
- indexBuilder = new BAMIndexBuilder(fileHeader.getSequenceDictionary());
- outputWriter = new BinaryBAMIndexWriter(numReferences, output);
+ this(fileHeader, numRefs -> new BinaryBAMIndexWriter(numRefs, output));
}
/**
@@ -67,10 +67,28 @@ public class BAMIndexer {
* @param fileHeader header for the corresponding bam file.
*/
public BAMIndexer(final OutputStream output, final SAMFileHeader fileHeader) {
+ this(fileHeader, numRefs -> new BinaryBAMIndexWriter(numRefs, output));
+ }
+ /*
+ * Prepare to index a BAM.
+ *
+ * @param fileHeader header for the corresponding bam file.
+ * @param createWrite a lambda that, given an Integer numReferences value, will create a BinaryBAMIndexWriter
+ * with that value and an appropriate output.
+ */
+ private BAMIndexer(final SAMFileHeader fileHeader, Function<Integer, BinaryBAMIndexWriter> createWriter) {
+ if (fileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
+ if (fileHeader.getSortOrder() == SAMFileHeader.SortOrder.unsorted) {
+ log.warn("For indexing, the BAM file is required to be coordinate sorted. Attempting to index \"unsorted\" BAM file.");
+ }
+ else {
+ throw new SAMException("Indexing requires a coordinate-sorted input BAM.");
+ }
+ }
numReferences = fileHeader.getSequenceDictionary().size();
indexBuilder = new BAMIndexBuilder(fileHeader.getSequenceDictionary());
- outputWriter = new BinaryBAMIndexWriter(numReferences, output);
+ outputWriter = createWriter.apply(numReferences);
}
/**
diff --git a/src/java/htsjdk/samtools/BinningIndexBuilder.java b/src/java/htsjdk/samtools/BinningIndexBuilder.java
index 60675f3..37933f4 100644
--- a/src/java/htsjdk/samtools/BinningIndexBuilder.java
+++ b/src/java/htsjdk/samtools/BinningIndexBuilder.java
@@ -174,7 +174,7 @@ public class BinningIndexBuilder {
}
private int computeIndexingBin(final FeatureToBeIndexed feature) {
- // reg2bin has zero-based, half-open API
+ // regionToBin has zero-based, half-open API
final int start = feature.getStart()-1;
int end = feature.getEnd();
if (end <= 0) {
@@ -182,6 +182,6 @@ public class BinningIndexBuilder {
// then treat this as a one base feature for indexing purposes.
end = start + 1;
}
- return GenomicIndexUtil.reg2bin(start, end);
+ return GenomicIndexUtil.regionToBin(start, end);
}
}
diff --git a/src/java/htsjdk/samtools/CRAMIndexer.java b/src/java/htsjdk/samtools/CRAMBAIIndexer.java
similarity index 95%
rename from src/java/htsjdk/samtools/CRAMIndexer.java
rename to src/java/htsjdk/samtools/CRAMBAIIndexer.java
index 2543983..352e9e1 100755
--- a/src/java/htsjdk/samtools/CRAMIndexer.java
+++ b/src/java/htsjdk/samtools/CRAMBAIIndexer.java
@@ -67,7 +67,7 @@ import java.util.TreeSet;
* In both cases, processAlignment is called for each cram slice and
* finish() is called at the end.
*/
-public class CRAMIndexer {
+public class CRAMBAIIndexer {
// The number of references (chromosomes) in the BAM file
private final int numReferences;
@@ -86,7 +86,7 @@ public class CRAMIndexer {
* @param output binary BAM Index (.bai) file
* @param fileHeader header for the corresponding bam file
*/
- public CRAMIndexer(final File output, final SAMFileHeader fileHeader) {
+ public CRAMBAIIndexer(final File output, final SAMFileHeader fileHeader) {
numReferences = fileHeader.getSequenceDictionary().size();
indexBuilder = new BAMIndexBuilder(fileHeader);
@@ -99,7 +99,7 @@ public class CRAMIndexer {
* @param output Index will be written here. output will be closed when finish() method is called.
* @param fileHeader header for the corresponding bam file.
*/
- public CRAMIndexer(final OutputStream output, final SAMFileHeader fileHeader) {
+ public CRAMBAIIndexer(final OutputStream output, final SAMFileHeader fileHeader) {
numReferences = fileHeader.getSequenceDictionary().size();
indexBuilder = new BAMIndexBuilder(fileHeader);
@@ -247,7 +247,7 @@ public class CRAMIndexer {
}
private int computeIndexingBin(final Slice slice) {
- // reg2bin has zero-based, half-open API
+ // regionToBin has zero-based, half-open API
final int alignmentStart = slice.alignmentStart - 1;
int alignmentEnd = slice.alignmentStart + slice.alignmentSpan - 1;
if (alignmentEnd <= alignmentStart) {
@@ -255,7 +255,7 @@ public class CRAMIndexer {
// then treat this as a one base alignment for indexing purposes.
alignmentEnd = alignmentStart + 1;
}
- return GenomicIndexUtil.reg2bin(alignmentStart, alignmentEnd);
+ return GenomicIndexUtil.regionToBin(alignmentStart, alignmentEnd);
}
@@ -430,7 +430,7 @@ public class CRAMIndexer {
if (cramHeader.getSamFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
throw new SAMException("Expecting a coordinate sorted file.");
}
- final CRAMIndexer indexer = new CRAMIndexer(output, cramHeader.getSamFileHeader());
+ final CRAMBAIIndexer indexer = new CRAMBAIIndexer(output, cramHeader.getSamFileHeader());
int totalRecords = 0;
Container container = null;
diff --git a/src/java/htsjdk/samtools/CRAMContainerStreamWriter.java b/src/java/htsjdk/samtools/CRAMContainerStreamWriter.java
index 9a7bec6..4707b7b 100644
--- a/src/java/htsjdk/samtools/CRAMContainerStreamWriter.java
+++ b/src/java/htsjdk/samtools/CRAMContainerStreamWriter.java
@@ -9,7 +9,7 @@ import htsjdk.samtools.cram.common.CramVersions;
import htsjdk.samtools.cram.common.Version;
import htsjdk.samtools.cram.lossy.PreservationPolicy;
import htsjdk.samtools.cram.lossy.QualityScorePreservation;
-import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.cram.ref.CRAMReferenceSource;
import htsjdk.samtools.cram.ref.ReferenceTracks;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
@@ -17,7 +17,6 @@ import htsjdk.samtools.cram.structure.CramCompressionRecord;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;
-import htsjdk.samtools.util.StringLineReader;
import java.io.IOException;
import java.io.OutputStream;
@@ -44,7 +43,7 @@ public class CRAMContainerStreamWriter {
private final SAMFileHeader samFileHeader;
private final String cramID;
private final OutputStream outputStream;
- private ReferenceSource source;
+ private CRAMReferenceSource source;
private final List<SAMRecord> samRecords = new ArrayList<SAMRecord>();
private ContainerFactory containerFactory;
@@ -58,7 +57,7 @@ public class CRAMContainerStreamWriter {
private Set<String> captureTags = new TreeSet<String>();
private Set<String> ignoreTags = new TreeSet<String>();
- private CRAMIndexer indexer;
+ private CRAMBAIIndexer indexer;
private long offset;
/**
@@ -74,7 +73,7 @@ public class CRAMContainerStreamWriter {
public CRAMContainerStreamWriter(
final OutputStream outputStream,
final OutputStream indexStream,
- final ReferenceSource source,
+ final CRAMReferenceSource source,
final SAMFileHeader samFileHeader,
final String cramId) {
this.outputStream = outputStream;
@@ -83,7 +82,7 @@ public class CRAMContainerStreamWriter {
this.source = source;
containerFactory = new ContainerFactory(samFileHeader, recordsPerSlice);
if (indexStream != null) {
- indexer = new CRAMIndexer(indexStream, samFileHeader);
+ indexer = new CRAMBAIIndexer(indexStream, samFileHeader);
}
}
diff --git a/src/java/htsjdk/samtools/CRAMFileReader.java b/src/java/htsjdk/samtools/CRAMFileReader.java
index 7c83c94..71ef0c7 100644
--- a/src/java/htsjdk/samtools/CRAMFileReader.java
+++ b/src/java/htsjdk/samtools/CRAMFileReader.java
@@ -18,10 +18,10 @@ package htsjdk.samtools;
import htsjdk.samtools.SAMFileHeader.SortOrder;
import htsjdk.samtools.SamReader.Type;
import htsjdk.samtools.cram.CRAIIndex;
+import htsjdk.samtools.cram.ref.CRAMReferenceSource;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
-import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.CloseableIterator;
@@ -47,7 +47,7 @@ import java.util.Iterator;
@SuppressWarnings("UnusedDeclaration")
public class CRAMFileReader extends SamReader.ReaderImplementation implements SamReader.Indexing {
private File cramFile;
- private final ReferenceSource referenceSource;
+ private final CRAMReferenceSource referenceSource;
private InputStream inputStream;
private CRAMIterator iterator;
private BAMIndex mIndex;
@@ -82,10 +82,10 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
* reference sequences. May not be null.
*
* @throws IllegalArgumentException if the {@code cramFile} and the {@code inputStream} are both null
- * or if the {@code ReferenceSource} is null
+ * or if the {@code CRAMReferenceSource} is null
*/
public CRAMFileReader(final File cramFile, final InputStream inputStream,
- final ReferenceSource referenceSource) {
+ final CRAMReferenceSource referenceSource) {
if (cramFile == null && inputStream == null) {
throw new IllegalArgumentException("Either file or input stream is required.");
}
@@ -105,12 +105,12 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
*
* @param cramFile CRAM file to read. May not be null.
* @param indexFile index file to be used for random access. May be null.
- * @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
+ * @param referenceSource a {@link htsjdk.samtools.cram.ref.CRAMReferenceSource source} of
* reference sequences. May not be null.
- * @throws IllegalArgumentException if the {@code cramFile} or the {@code ReferenceSource} is null
+ * @throws IllegalArgumentException if the {@code cramFile} or the {@code CRAMReferenceSource} is null
*/
public CRAMFileReader(final File cramFile, final File indexFile,
- final ReferenceSource referenceSource) {
+ final CRAMReferenceSource referenceSource) {
if (cramFile == null)
throw new IllegalArgumentException("File is required.");
if (referenceSource == null) {
@@ -128,11 +128,11 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
* Create a CRAMFileReader from a file using the supplied reference source.
*
* @param cramFile CRAM file to read. Can not be null.
- * @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
+ * @param referenceSource a {@link htsjdk.samtools.cram.ref.CRAMReferenceSource source} of
* reference sequences. May not be null.
- * @throws IllegalArgumentException if the {@code cramFile} or the {@code ReferenceSource} is null
+ * @throws IllegalArgumentException if the {@code cramFile} or the {@code CRAMReferenceSource} is null
*/
- public CRAMFileReader(final File cramFile, final ReferenceSource referenceSource) {
+ public CRAMFileReader(final File cramFile, final CRAMReferenceSource referenceSource) {
if (cramFile == null)
throw new IllegalArgumentException("CRAM file cannot be null.");
if (referenceSource == null) {
@@ -151,14 +151,14 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
*
* @param inputStream CRAM stream to read. May not be null.
* @param indexInputStream index stream to be used for random access. May be null.
- * @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
+ * @param referenceSource a {@link htsjdk.samtools.cram.ref.CRAMReferenceSource source} of
* reference sequences. May not be null.
* @param validationStringency Validation stringency to be used when reading
*
- * @throws IllegalArgumentException if the {@code inputStream} or the {@code ReferenceSource} is null
+ * @throws IllegalArgumentException if the {@code inputStream} or the {@code CRAMReferenceSource} is null
*/
public CRAMFileReader(final InputStream inputStream, final SeekableStream indexInputStream,
- final ReferenceSource referenceSource, final ValidationStringency validationStringency) throws IOException {
+ final CRAMReferenceSource referenceSource, final ValidationStringency validationStringency) throws IOException {
if (inputStream == null) {
throw new IllegalArgumentException("Input stream can not be null for CRAM reader");
}
@@ -189,14 +189,14 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
*
* @param stream CRAM stream to read. May not be null.
* @param indexFile index file to be used for random access. May be null.
- * @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
+ * @param referenceSource a {@link htsjdk.samtools.cram.ref.CRAMReferenceSource source} of
* reference sequences. May not be null.
* @param validationStringency Validation stringency to be used when reading
*
- * @throws IllegalArgumentException if the {@code inputStream} or the {@code ReferenceSource} is null
+ * @throws IllegalArgumentException if the {@code inputStream} or the {@code CRAMReferenceSource} is null
*/
public CRAMFileReader(final InputStream stream,
- final File indexFile, final ReferenceSource referenceSource,
+ final File indexFile, final CRAMReferenceSource referenceSource,
final ValidationStringency validationStringency) throws IOException {
this(stream, indexFile == null ? null: new SeekableFileStream(indexFile), referenceSource, validationStringency);
}
@@ -207,14 +207,14 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
*
* @param cramFile CRAM stream to read. May not be null.
* @param indexFile index file to be used for random access. May be null.
- * @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
+ * @param referenceSource a {@link htsjdk.samtools.cram.ref.CRAMReferenceSource source} of
* reference sequences. May not be null.
* @param validationStringency Validation stringency to be used when reading
*
- * @throws IllegalArgumentException if the {@code cramFile} or the {@code ReferenceSource} is null
+ * @throws IllegalArgumentException if the {@code cramFile} or the {@code CRAMReferenceSource} is null
*/
public CRAMFileReader(final File cramFile,
- final File indexFile, final ReferenceSource referenceSource,
+ final File indexFile, final CRAMReferenceSource referenceSource,
final ValidationStringency validationStringency) throws IOException {
this(new FileInputStream(cramFile), indexFile, referenceSource, validationStringency);
this.cramFile = cramFile;
diff --git a/src/java/htsjdk/samtools/CRAMFileWriter.java b/src/java/htsjdk/samtools/CRAMFileWriter.java
index 717a52f..5d3f2e2 100644
--- a/src/java/htsjdk/samtools/CRAMFileWriter.java
+++ b/src/java/htsjdk/samtools/CRAMFileWriter.java
@@ -16,6 +16,7 @@
package htsjdk.samtools;
import htsjdk.samtools.cram.lossy.PreservationPolicy;
+import htsjdk.samtools.cram.ref.CRAMReferenceSource;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.StringLineReader;
@@ -45,7 +46,7 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
*/
public CRAMFileWriter(
final OutputStream outputStream,
- final ReferenceSource referenceSource,
+ final CRAMReferenceSource referenceSource,
final SAMFileHeader samFileHeader,
final String fileName)
{
@@ -67,7 +68,7 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
public CRAMFileWriter(
final OutputStream outputStream,
final OutputStream indexOS,
- final ReferenceSource referenceSource,
+ final CRAMReferenceSource referenceSource,
final SAMFileHeader samFileHeader,
final String fileName)
{
@@ -87,7 +88,7 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
* @throws IllegalArgumentException if the {@code outputStream}, {@code referenceSource} or {@code samFileHeader} are null
*/
public CRAMFileWriter(final OutputStream outputStream, final OutputStream indexOS, final boolean presorted,
- final ReferenceSource referenceSource, final SAMFileHeader samFileHeader, final String fileName) {
+ final CRAMReferenceSource referenceSource, final SAMFileHeader samFileHeader, final String fileName) {
if (outputStream == null) {
throw new IllegalArgumentException("CRAMWriter output stream can not be null.");
}
diff --git a/src/java/htsjdk/samtools/CRAMIterator.java b/src/java/htsjdk/samtools/CRAMIterator.java
index 59d08cd..3504736 100644
--- a/src/java/htsjdk/samtools/CRAMIterator.java
+++ b/src/java/htsjdk/samtools/CRAMIterator.java
@@ -22,7 +22,7 @@ import htsjdk.samtools.cram.build.CramContainerIterator;
import htsjdk.samtools.cram.build.CramNormalizer;
import htsjdk.samtools.cram.build.CramSpanContainerIterator;
import htsjdk.samtools.cram.io.CountingInputStream;
-import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.cram.ref.CRAMReferenceSource;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
import htsjdk.samtools.cram.structure.CramCompressionRecord;
@@ -30,7 +30,6 @@ import htsjdk.samtools.cram.structure.CramHeader;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.Log;
-import htsjdk.samtools.util.SequenceUtil;
import java.io.IOException;
import java.io.InputStream;
@@ -57,7 +56,7 @@ public class CRAMIterator implements SAMRecordIterator {
private Iterator<Container> containerIterator;
private ContainerParser parser;
- private final ReferenceSource referenceSource;
+ private final CRAMReferenceSource referenceSource;
private Iterator<SAMRecord> iterator = Collections.<SAMRecord>emptyList().iterator();
@@ -75,7 +74,7 @@ public class CRAMIterator implements SAMRecordIterator {
private long samRecordIndex;
private ArrayList<CramCompressionRecord> cramRecords;
- public CRAMIterator(final InputStream inputStream, final ReferenceSource referenceSource, final ValidationStringency validationStringency)
+ public CRAMIterator(final InputStream inputStream, final CRAMReferenceSource referenceSource, final ValidationStringency validationStringency)
throws IOException {
if (null == referenceSource) {
throw new CRAMException("A reference source is required for CRAM files");
@@ -94,7 +93,7 @@ public class CRAMIterator implements SAMRecordIterator {
parser = new ContainerParser(cramHeader.getSamFileHeader());
}
- public CRAMIterator(final SeekableStream seekableStream, final ReferenceSource referenceSource, final long[] coordinates, final ValidationStringency validationStringency)
+ public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSource referenceSource, final long[] coordinates, final ValidationStringency validationStringency)
throws IOException {
if (null == referenceSource) {
throw new CRAMException("A reference source is required for CRAM files");
@@ -114,7 +113,7 @@ public class CRAMIterator implements SAMRecordIterator {
}
@Deprecated
- public CRAMIterator(final SeekableStream seekableStream, final ReferenceSource referenceSource, final long[] coordinates)
+ public CRAMIterator(final SeekableStream seekableStream, final CRAMReferenceSource referenceSource, final long[] coordinates)
throws IOException {
this(seekableStream, referenceSource, coordinates, ValidationStringency.DEFAULT_STRINGENCY);
}
diff --git a/src/java/htsjdk/samtools/Defaults.java b/src/java/htsjdk/samtools/Defaults.java
index 3ffd5fa..e6a2e13 100644
--- a/src/java/htsjdk/samtools/Defaults.java
+++ b/src/java/htsjdk/samtools/Defaults.java
@@ -1,6 +1,11 @@
package htsjdk.samtools;
+import htsjdk.samtools.util.Log;
+
import java.io.File;
+import java.util.Collections;
+import java.util.SortedMap;
+import java.util.TreeMap;
/**
* Embodies defaults for global values that affect how the SAM JDK operates. Defaults are encoded in the class
@@ -9,15 +14,34 @@ import java.io.File;
* @author Tim Fennell
*/
public class Defaults {
+ private static Log log = Log.getInstance(Defaults.class);
+
/** Should BAM index files be created when writing out coordinate sorted BAM files? Default = false. */
public static final boolean CREATE_INDEX;
/** Should MD5 files be created when writing out SAM and BAM files? Default = false. */
public static final boolean CREATE_MD5;
- /** Should asynchronous I/O be used when writing out SAM and BAM files (one thread per file). Default = false. */
+ /** Should asynchronous I/O be used where supported throughout all of htsjdk (one thread per file).
+ * Note: this option takes precedence over {@link #USE_ASYNC_IO_FOR_SAMTOOLS} and {@link #USE_ASYNC_IO_FOR_TRIBBLE}.
+ * Default = false.
+ */
public static final boolean USE_ASYNC_IO;
+ /** Should asynchronous I/O be used where supported by the samtools package (one thread per file).
+ * Note: The {@link #USE_ASYNC_IO} option takes precedence over this option.
+ * Default = false.
+ */
+ public static final boolean USE_ASYNC_IO_FOR_SAMTOOLS;
+
+ /** Should asynchronous I/O be used where supported by the tribble package (one thread per file).
+ * Note: performance may depend on the characteristics of the input file (eg number of samples in the VCF) and should be tested on a case-by-case basis.
+ * In particular, asynchronous reading of VCF files with few samples is known to perform worse than synchronous reading.
+ * Note: The {@link #USE_ASYNC_IO} option takes precedence over this option.
+ * Default = false.
+ */
+ public static final boolean USE_ASYNC_IO_FOR_TRIBBLE;
+
/** Compresion level to be used for writing BAM and other block-compressed outputs. Default = 5. */
public static final int COMPRESSION_LEVEL;
@@ -69,7 +93,15 @@ public class Defaults {
static {
CREATE_INDEX = getBooleanProperty("create_index", false);
CREATE_MD5 = getBooleanProperty("create_md5", false);
- USE_ASYNC_IO = getBooleanProperty("use_async_io", false);
+ if (hasProperty("use_async_io")){
+ USE_ASYNC_IO = getBooleanProperty("use_async_io", false);
+ USE_ASYNC_IO_FOR_SAMTOOLS = USE_ASYNC_IO;
+ USE_ASYNC_IO_FOR_TRIBBLE = USE_ASYNC_IO;
+ } else {
+ USE_ASYNC_IO = false;
+ USE_ASYNC_IO_FOR_SAMTOOLS = getBooleanProperty("use_async_io_samtools", false);
+ USE_ASYNC_IO_FOR_TRIBBLE = getBooleanProperty("use_async_io_tribble", false);
+ }
COMPRESSION_LEVEL = getIntProperty("compression_level", 5);
BUFFER_SIZE = getIntProperty("buffer_size", 1024 * 128);
TRY_USE_INTEL_DEFLATER = getBooleanProperty("try_use_intel_deflater", true);
@@ -85,9 +117,52 @@ public class Defaults {
CUSTOM_READER_FACTORY = getStringProperty("custom_reader", "");
}
- /** Gets a string system property, prefixed with "samjdk." using the default if the property does not exist. */
+ /**
+ * Returns a map of all default values (keys are names), lexicographically sorted by keys.
+ * The returned map is unmodifiable.
+ * This function is useful for example when logging all defaults.
+ */
+ public static SortedMap<String, Object> allDefaults(){
+ final SortedMap<String, Object> result = new TreeMap<>();
+ result.put("CREATE_INDEX", CREATE_INDEX);
+ result.put("CREATE_MD5", CREATE_MD5);
+ result.put("USE_ASYNC_IO", USE_ASYNC_IO);
+ result.put("USE_ASYNC_IO_FOR_SAMTOOLS", USE_ASYNC_IO_FOR_SAMTOOLS);
+ result.put("USE_ASYNC_IO_FOR_TRIBBLE", USE_ASYNC_IO_FOR_TRIBBLE);
+ result.put("COMPRESSION_LEVEL", COMPRESSION_LEVEL);
+ result.put("BUFFER_SIZE", BUFFER_SIZE);
+ result.put("TRY_USE_INTEL_DEFLATER", TRY_USE_INTEL_DEFLATER);
+ result.put("INTEL_DEFLATER_SHARED_LIBRARY_PATH", INTEL_DEFLATER_SHARED_LIBRARY_PATH);
+ result.put("NON_ZERO_BUFFER_SIZE", NON_ZERO_BUFFER_SIZE);
+ result.put("REFERENCE_FASTA", REFERENCE_FASTA);
+ result.put("USE_CRAM_REF_DOWNLOAD", USE_CRAM_REF_DOWNLOAD);
+ result.put("EBI_REFERENCE_SEVICE_URL_MASK", EBI_REFERENCE_SEVICE_URL_MASK);
+ result.put("CUSTOM_READER_FACTORY", CUSTOM_READER_FACTORY);
+ return Collections.unmodifiableSortedMap(result);
+ }
+
+ /** Gets a string system property, prefixed with "samjdk." using the default
+ * if the property does not exist or if the java.security manager raises an exception for
+ * applications started with -Djava.security.manager . */
private static String getStringProperty(final String name, final String def) {
- return System.getProperty("samjdk." + name, def);
+ try {
+ return System.getProperty("samjdk." + name, def);
+ } catch (final java.security.AccessControlException error) {
+ log.warn(error,"java Security Manager forbids 'System.getProperty(\"" + name + "\")' , returning default value: " + def );
+ return def;
+ }
+ }
+
+ /** Checks whether a string system property, prefixed with "samjdk.", exists.
+ * If the property does not exist or if the java.security manager raises an exception for
+ * applications started with -Djava.security.manager this method returns false. */
+ private static boolean hasProperty(final String name){
+ try {
+ return null != System.getProperty("samjdk." + name);
+ } catch (final java.security.AccessControlException error) {
+ log.warn(error,"java Security Manager forbids 'System.getProperty(\"" + name + "\")' , returning false");
+ return false;
+ }
}
/** Gets a boolean system property, prefixed with "samjdk." using the default if the property does not exist. */
diff --git a/src/java/htsjdk/samtools/DuplicateSetIterator.java b/src/java/htsjdk/samtools/DuplicateSetIterator.java
index ffd9523..9a0c6f1 100644
--- a/src/java/htsjdk/samtools/DuplicateSetIterator.java
+++ b/src/java/htsjdk/samtools/DuplicateSetIterator.java
@@ -24,6 +24,8 @@
package htsjdk.samtools;
import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SortingCollection;
import java.io.File;
@@ -54,36 +56,53 @@ public class DuplicateSetIterator implements CloseableIterator<DuplicateSet> {
public DuplicateSetIterator(final CloseableIterator<SAMRecord> iterator,
final SAMFileHeader header,
final boolean preSorted) {
- this(iterator, header, preSorted, new SAMRecordDuplicateComparator(Collections.singletonList(header)));
+ this(iterator, header, preSorted, null);
+ }
+
+ public DuplicateSetIterator(final CloseableIterator<SAMRecord> iterator,
+ final SAMFileHeader header,
+ final boolean preSorted,
+ final SAMRecordDuplicateComparator comparator) {
+ this(iterator, header, preSorted, comparator, null);
}
/**
* Allows the user of this iterator to skip the sorting of the input if the input is already sorted. If the records are said to be
- * sorted but not actually sorted in the correct order, an exception during iteration will be thrown.
+ * sorted but not actually sorted in the correct order, an exception during iteration will be thrown. Progress information will
+ * be printed for sorting of the input if `log` is provided.
*/
public DuplicateSetIterator(final CloseableIterator<SAMRecord> iterator,
final SAMFileHeader header,
final boolean preSorted,
- final SAMRecordDuplicateComparator comparator) {
- this.comparator = comparator;
+ final SAMRecordDuplicateComparator comparator,
+ final Log log) {
+ this.comparator = (comparator == null) ? new SAMRecordDuplicateComparator(Collections.singletonList(header)) : comparator;
if (preSorted) {
this.wrappedIterator = iterator;
} else {
+ ProgressLogger progressLogger = null;
+ if (log != null) {
+ progressLogger = new ProgressLogger(log, 100000);
+ log.info("Duplicate set iterator initializing.");
+ }
+
// Sort it!
final int maxRecordsInRam = SAMFileWriterImpl.getDefaultMaxRecordsInRam();
final File tmpDir = new File(System.getProperty("java.io.tmpdir"));
final SortingCollection<SAMRecord> alignmentSorter = SortingCollection.newInstance(SAMRecord.class,
- new BAMRecordCodec(header), comparator,
+ new BAMRecordCodec(header), this.comparator,
maxRecordsInRam, tmpDir);
while (iterator.hasNext()) {
final SAMRecord record = iterator.next();
alignmentSorter.add(record);
+ if (progressLogger != null) progressLogger.record(record);
}
iterator.close();
this.wrappedIterator = alignmentSorter.iterator();
+ if (log != null) log.info("Duplicate set iterator initialized.");
}
this.duplicateSet = new DuplicateSet(this.comparator);
diff --git a/src/java/htsjdk/samtools/GenomicIndexUtil.java b/src/java/htsjdk/samtools/GenomicIndexUtil.java
index ea834e9..f634932 100644
--- a/src/java/htsjdk/samtools/GenomicIndexUtil.java
+++ b/src/java/htsjdk/samtools/GenomicIndexUtil.java
@@ -54,11 +54,11 @@ public class GenomicIndexUtil {
/**
* calculate the bin given an alignment in [beg,end)
- * Copied from SAM spec.
+ * Described in "The Human Genome Browser at UCSC. Kent & al. doi: 10.1101/gr.229102 "
* @param beg 0-based start of read (inclusive)
* @param end 0-based end of read (exclusive)
*/
- static int reg2bin(final int beg, int end)
+ public static int regionToBin(final int beg, int end)
{
--end;
@@ -70,9 +70,9 @@ public class GenomicIndexUtil {
return 0;
}
- // TODO: It is disturbing that reg2bin is 0-based, but regionToBins is 1-based.
+ // TODO: It is disturbing that regionToBins is 0-based, but regionToBins is 1-based.
// TODO: It is also suspicious that regionToBins decrements endPos. Test it!
- // TODO: However end is decremented in reg2bin so perhaps there is no conflict.
+ // TODO: However end is decremented in regionToBins so perhaps there is no conflict.
/**
* Get candidate bins for the specified region
* @param startPos 1-based start of target region, inclusive.
diff --git a/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java b/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
index e74e106..507e319 100644
--- a/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
+++ b/src/java/htsjdk/samtools/SAMBinaryTagAndUnsignedArrayValue.java
@@ -25,13 +25,18 @@ package htsjdk.samtools;
/**
* Simple extension to SAMBinaryTagAndValue in order to distinguish unsigned array values, because
- * signedness cannot be determined by introspection of value.
+ * signedness cannot be determined by introspection of value. Must be array of byte, short, or int.
*
* @author alecw at broadinstitute.org
*/
public class SAMBinaryTagAndUnsignedArrayValue extends SAMBinaryTagAndValue {
public SAMBinaryTagAndUnsignedArrayValue(final short tag, final Object value) {
super(tag, value);
+ if (!value.getClass().isArray() || value instanceof float[]) {
+ throw new IllegalArgumentException("Attribute type " + value.getClass() +
+ " cannot be encoded as an unsigned array. Tag: " +
+ SAMTagUtil.getSingleton().makeStringTag(tag));
+ }
}
/** Creates and returns a shallow copy of the list of tag/values. */
diff --git a/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java b/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java
index 70011f9..0778190 100644
--- a/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java
+++ b/src/java/htsjdk/samtools/SAMBinaryTagAndValue.java
@@ -24,6 +24,7 @@
package htsjdk.samtools;
import java.io.Serializable;
+import java.util.Arrays;
/**
* Holds a SAMRecord attribute and the tagname (in binary form) for that attribute.
@@ -31,7 +32,12 @@ import java.io.Serializable;
* See SAMTagUtil to convert the tag to String form.
*
* Values associated with attribute tags must be of a type that implements {@link Serializable} or else
- * serialization will fail.
+ * serialization will fail. Accepted types are String, scalar types Short, Integer, Character, Float,
+ * and Long (see below); array types byte[], short[], int[] and float[]. Cannot be null.
+ *
+ * Long valued attributes are constrained to the range [Integer.MIN_VALUE, BinaryCodec.MAX_UINT],
+ * which includes the entire range of signed ints [Integer.MIN_VALUE, Integer.MAX_VALUE] and
+ * the entire range of unsigned ints that can be stored per the BAM spec [0, (Integer.MAX_VALUE * 2) + 1].
*
* @author alecw at broadinstitute.org
*/
@@ -44,17 +50,46 @@ public class SAMBinaryTagAndValue implements Serializable {
/**
* @param tag tagname (in binary form) for this attribute
- * @param value value for this attribute (must be of a type that implements {@link Serializable} or else serialization will fail)
- * Cannot be null.
+ * @param value value for this attribute (must be of a type that implements {@link Serializable}
+ * or else serialization will fail). Cannot be null.
*/
public SAMBinaryTagAndValue(final short tag, final Object value) {
if (null == value) {
throw new IllegalArgumentException("SAMBinaryTagAndValue value may not be null");
}
+ if (!isAllowedAttributeValue(value)) {
+ throw new IllegalArgumentException("Attribute type " + value.getClass() + " not supported. Tag: " +
+ SAMTagUtil.getSingleton().makeStringTag(tag));
+ }
this.tag = tag;
this.value = value;
}
+ // Inspect the proposed value to determine if it is an allowed value type,
+ // and if the value is in range.
+ protected static boolean isAllowedAttributeValue(final Object value) {
+ if (value instanceof Byte ||
+ value instanceof Short ||
+ value instanceof Integer ||
+ value instanceof String ||
+ value instanceof Character ||
+ value instanceof Float ||
+ value instanceof byte[] ||
+ value instanceof short[] ||
+ value instanceof int[] ||
+ value instanceof float[]) {
+ return true;
+ }
+
+ // A special case for Longs: we require Long values to fit into either a uint32_t or an int32_t,
+ // as that is what the BAM spec allows.
+ if (value instanceof Long) {
+ return SAMUtils.isValidUnsignedIntegerAttribute((Long) value)
+ || ((Long) value >= Integer.MIN_VALUE && (Long) value <= Integer.MAX_VALUE);
+ }
+ return false;
+ }
+
@Override public boolean equals(final Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
@@ -64,7 +99,7 @@ public class SAMBinaryTagAndValue implements Serializable {
/** Type safe equals method that recurses down the list looking for equality. */
private boolean typeSafeEquals(final SAMBinaryTagAndValue that) {
if (this.tag != that.tag) return false;
- if ((this.value == null) ? that.value == null : this.value.equals(that.value)) {
+ if (this.valueEquals(that)) {
if (this.next == null) return that.next == null;
else return this.next.equals(that.next);
}
@@ -73,11 +108,52 @@ public class SAMBinaryTagAndValue implements Serializable {
}
}
+ private boolean valueEquals(SAMBinaryTagAndValue that) {
+ if (this.value instanceof byte[]) {
+ return that.value instanceof byte[] ?
+ Arrays.equals((byte[])this.value, (byte[])that.value) : false;
+ }
+ else if (this.value instanceof short[]) {
+ return that.value instanceof short[] ?
+ Arrays.equals((short[])this.value, (short[])that.value) : false;
+ }
+ else if (this.value instanceof int[]) {
+ return that.value instanceof int[] ?
+ Arrays.equals((int[])this.value, (int[])that.value) : false;
+ }
+ else if (this.value instanceof float[]) {
+ return that.value instanceof float[] ?
+ Arrays.equals((float[])this.value, (float[])that.value) : false;
+ }
+ else {
+ // otherwise, the api limits the remaining possible value types to
+ // immutable (String or boxed primitive) types
+ return this.value.equals(that.value);
+ }
+ }
+
@Override
public int hashCode() {
- int result = (int) tag;
- result = 31 * result + value.hashCode();
- return result;
+ int valueHash;
+ if (this.value instanceof byte[]) {
+ valueHash = Arrays.hashCode((byte[])this.value);
+ }
+ else if (this.value instanceof short[]) {
+ valueHash = Arrays.hashCode((short[])this.value);
+ }
+ else if (this.value instanceof int[]) {
+ valueHash = Arrays.hashCode((int[])this.value);
+ }
+ else if (this.value instanceof float[]) {
+ valueHash = Arrays.hashCode((float[])this.value);
+ }
+ else {
+ // otherwise, the api limits the remaining possible value types to
+ // immutable (String or boxed primitive) types
+ valueHash = value.hashCode();
+ }
+
+ return 31 * tag + valueHash;
}
/** Creates and returns a shallow copy of the list of tag/values. */
diff --git a/src/java/htsjdk/samtools/SAMFileReader.java b/src/java/htsjdk/samtools/SAMFileReader.java
index 20e753e..6e0e9b8 100644
--- a/src/java/htsjdk/samtools/SAMFileReader.java
+++ b/src/java/htsjdk/samtools/SAMFileReader.java
@@ -27,21 +27,10 @@ package htsjdk.samtools;
import htsjdk.samtools.seekablestream.SeekableBufferedStream;
import htsjdk.samtools.seekablestream.SeekableHTTPStream;
import htsjdk.samtools.seekablestream.SeekableStream;
-import htsjdk.samtools.util.BlockCompressedInputStream;
-import htsjdk.samtools.util.BlockCompressedStreamConstants;
-import htsjdk.samtools.util.CloseableIterator;
-import htsjdk.samtools.util.CloserUtil;
-import htsjdk.samtools.util.IOUtil;
-import htsjdk.samtools.util.RuntimeIOException;
-
-import java.io.BufferedInputStream;
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
+import htsjdk.samtools.util.*;
+
+import java.io.*;
import java.net.URL;
-import java.util.Arrays;
import java.util.NoSuchElementException;
import java.util.zip.GZIPInputStream;
@@ -85,6 +74,7 @@ public class SAMFileReader implements SamReader, SamReader.Indexing {
private BAMIndex mIndex = null;
private SAMRecordFactory samRecordFactory = new DefaultSAMRecordFactory();
private ReaderImplementation mReader = null;
+ private boolean useAsyncIO = Defaults.USE_ASYNC_IO_FOR_SAMTOOLS;
private File samFile = null;
@@ -214,6 +204,13 @@ public class SAMFileReader implements SamReader, SamReader.Indexing {
}
/**
+ * If true, this reader will use asynchronous IO.
+ */
+ public void setUseAsyncIO(final boolean useAsyncIO) {
+ this.useAsyncIO = useAsyncIO;
+ }
+
+ /**
* If true, writes the source of every read into the source SAMRecords.
*
* @param enabled true to write source information into each SAMRecord.
@@ -225,7 +222,7 @@ public class SAMFileReader implements SamReader, SamReader.Indexing {
/**
* If true, uses the caching version of the index reader.
*
- * @param enabled true to write source information into each SAMRecord.
+ * @param enabled true to use the caching version of the reader.
*/
public void enableIndexCaching(final boolean enabled) {
if (mIndex != null)
@@ -604,7 +601,7 @@ public class SAMFileReader implements SamReader, SamReader.Indexing {
try {
if (streamLooksLikeBam(strm)) {
mIsBinary = true;
- mReader = new BAMFileReader(strm, indexFile, eagerDecode, validationStringency, this.samRecordFactory);
+ mReader = new BAMFileReader(strm, indexFile, eagerDecode, useAsyncIO, validationStringency, this.samRecordFactory);
} else {
throw new SAMFormatException("Unrecognized file format: " + strm);
}
@@ -620,7 +617,7 @@ public class SAMFileReader implements SamReader, SamReader.Indexing {
try {
if (streamLooksLikeBam(strm)) {
mIsBinary = true;
- mReader = new BAMFileReader(strm, indexStream, eagerDecode, validationStringency, this.samRecordFactory);
+ mReader = new BAMFileReader(strm, indexStream, eagerDecode, useAsyncIO, validationStringency, this.samRecordFactory);
} else {
throw new SAMFormatException("Unrecognized file format: " + strm);
}
@@ -652,19 +649,19 @@ public class SAMFileReader implements SamReader, SamReader.Indexing {
final int bufferSize = Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE);
if (file != null) bufferedStream = new BufferedInputStream(new FileInputStream(file), bufferSize);
else bufferedStream = IOUtil.toBufferedStream(stream);
- if (isBAMFile(bufferedStream)) {
+ if (SamStreams.isBAMFile(bufferedStream)) {
mIsBinary = true;
if (file == null || !file.isFile()) {
// Handle case in which file is a named pipe, e.g. /dev/stdin or created by mkfifo
- mReader = new BAMFileReader(bufferedStream, indexFile, eagerDecode, validationStringency, this.samRecordFactory);
+ mReader = new BAMFileReader(bufferedStream, indexFile, eagerDecode, useAsyncIO, validationStringency, this.samRecordFactory);
} else {
bufferedStream.close();
- mReader = new BAMFileReader(file, indexFile, eagerDecode, validationStringency, this.samRecordFactory);
+ mReader = new BAMFileReader(file, indexFile, eagerDecode, useAsyncIO, validationStringency, this.samRecordFactory);
}
} else if (BlockCompressedInputStream.isValidFile(bufferedStream)) {
mIsBinary = false;
mReader = new SAMTextReader(new BlockCompressedInputStream(bufferedStream), validationStringency, this.samRecordFactory);
- } else if (isGzippedSAMFile(bufferedStream)) {
+ } else if (SamStreams.isGzippedSAMFile(bufferedStream)) {
mIsBinary = false;
mReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency, this.samRecordFactory);
} else if (SamStreams.isCRAMFile(bufferedStream)) {
@@ -694,25 +691,6 @@ public class SAMFileReader implements SamReader, SamReader.Indexing {
}
}
- /**
- * @param stream stream.markSupported() must be true
- * @return true if this looks like a BAM file.
- */
- private boolean isBAMFile(final InputStream stream)
- throws IOException {
- if (!BlockCompressedInputStream.isValidFile(stream)) {
- return false;
- }
- final int buffSize = BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE;
- stream.mark(buffSize);
- final byte[] buffer = new byte[buffSize];
- readBytes(stream, buffer, 0, buffSize);
- stream.reset();
- final byte[] magicBuf = new byte[4];
- final int magicLength = readBytes(new BlockCompressedInputStream(new ByteArrayInputStream(buffer)), magicBuf, 0, 4);
- return magicLength == BAMFileConstants.BAM_MAGIC.length && Arrays.equals(BAMFileConstants.BAM_MAGIC, magicBuf);
- }
-
private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length)
throws IOException {
int bytesRead = 0;
@@ -726,31 +704,6 @@ public class SAMFileReader implements SamReader, SamReader.Indexing {
return bytesRead;
}
- /**
- * Attempts to check whether the file is a gzipped sam file. Returns true if it
- * is and false otherwise.
- */
- private boolean isGzippedSAMFile(final BufferedInputStream stream) {
- if (!stream.markSupported()) {
- throw new IllegalArgumentException("Cannot test a stream that doesn't support marking.");
- }
- stream.mark(8000);
-
- try {
- final GZIPInputStream gunzip = new GZIPInputStream(stream);
- final int ch = gunzip.read();
- return true;
- } catch (final IOException ioe) {
- return false;
- } finally {
- try {
- stream.reset();
- } catch (final IOException ioe) {
- throw new IllegalStateException("Could not reset stream.");
- }
- }
- }
-
private boolean isSAMFile(final InputStream stream) {
// For now, assume every non-binary file is a SAM text file.
return true;
diff --git a/src/java/htsjdk/samtools/SAMFileWriterFactory.java b/src/java/htsjdk/samtools/SAMFileWriterFactory.java
index 7eb0823..077b490 100644
--- a/src/java/htsjdk/samtools/SAMFileWriterFactory.java
+++ b/src/java/htsjdk/samtools/SAMFileWriterFactory.java
@@ -44,7 +44,7 @@ public class SAMFileWriterFactory implements Cloneable {
private boolean createIndex = defaultCreateIndexWhileWriting;
private static boolean defaultCreateMd5File = Defaults.CREATE_MD5;
private boolean createMd5File = defaultCreateMd5File;
- private boolean useAsyncIo = Defaults.USE_ASYNC_IO;
+ private boolean useAsyncIo = Defaults.USE_ASYNC_IO_FOR_SAMTOOLS;
private int asyncOutputBufferSize = AsyncSAMFileWriter.DEFAULT_QUEUE_SIZE;
private int bufferSize = Defaults.BUFFER_SIZE;
private File tmpDir;
@@ -90,8 +90,9 @@ public class SAMFileWriterFactory implements Cloneable {
}
/** set compression level 0!none 9: max */
- public void setCompressionLevel(final int compressionLevel) {
+ public SAMFileWriterFactory setCompressionLevel(final int compressionLevel) {
this.compressionLevel = Math.min(9, Math.max(0, compressionLevel));
+ return this;
}
public int getCompressionLevel() {
diff --git a/src/java/htsjdk/samtools/SAMRecord.java b/src/java/htsjdk/samtools/SAMRecord.java
index 104e0fd..eb76c28 100644
--- a/src/java/htsjdk/samtools/SAMRecord.java
+++ b/src/java/htsjdk/samtools/SAMRecord.java
@@ -1393,43 +1393,27 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
*
* @param value the value to be checked
* @return true if the value is valid and false otherwise
+
+ * @deprecated
+ * The attribute type and value checks have been moved directly into
+ * {@code SAMBinaryTagAndValue}.
*/
+ @Deprecated
protected static boolean isAllowedAttributeValue(final Object value) {
- if (value instanceof Byte || value instanceof Short || value instanceof Integer ||
- value instanceof String || value instanceof Character || value instanceof Float ||
- value instanceof byte[] || value instanceof short[] || value instanceof int[] ||
- value instanceof float[]) {
- return true;
- }
-
- // A special case for Longs: we require Long values to fit into either a uint32_t or an int32_t,
- // as that is what the BAM spec allows.
- if (value instanceof Long) {
- return SAMUtils.isValidUnsignedIntegerAttribute((Long) value)
- || ((Long) value >= Integer.MIN_VALUE && (Long) value <= Integer.MAX_VALUE);
- }
- return false;
+ return SAMBinaryTagAndValue.isAllowedAttributeValue(value);
}
protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
if (value == null) {
- // setting a tag value to null removes the tag:
if (this.mAttributes != null) {
+ // setting a tag value to null removes the tag:
this.mAttributes = this.mAttributes.remove(tag);
}
- return;
- }
-
- if (isAllowedAttributeValue(value)) {
+ } else {
final SAMBinaryTagAndValue tmp;
if (!isUnsignedArray) {
tmp = new SAMBinaryTagAndValue(tag, value);
} else {
- if (!value.getClass().isArray() || value instanceof float[]) {
- throw new SAMException("Attribute type " + value.getClass() +
- " cannot be encoded as an unsigned array. Tag: " +
- SAMTagUtil.getSingleton().makeStringTag(tag));
- }
tmp = new SAMBinaryTagAndUnsignedArrayValue(tag, value);
}
@@ -1438,9 +1422,6 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
} else {
this.mAttributes = this.mAttributes.insert(tmp);
}
- } else {
- throw new SAMException("Attribute type " + value.getClass() + " not supported. Tag: " +
- SAMTagUtil.getSingleton().makeStringTag(tag));
}
}
@@ -1539,7 +1520,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* @return indexing bin based on alignment start & end.
*/
int computeIndexingBin() {
- // reg2bin has zero-based, half-open API
+ // regionToBin has zero-based, half-open API
final int alignmentStart = getAlignmentStart()-1;
int alignmentEnd = getAlignmentEnd();
if (alignmentEnd <= 0) {
@@ -1547,7 +1528,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
// then treat this as a one base alignment for indexing purposes.
alignmentEnd = alignmentStart + 1;
}
- return GenomicIndexUtil.reg2bin(alignmentStart, alignmentEnd);
+ return GenomicIndexUtil.regionToBin(alignmentStart, alignmentEnd);
}
/**
@@ -2144,20 +2125,8 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
/**
* Returns a deep copy of the SAM record, with the following exceptions:
*
- * - The header field, which shares the reference with the original record
+ * - The header field, which shares the header reference with the original record
* - The file source field, which will always always be set to null in the copy
- *
- * Note that some fields, i.e. the cigar elements, alignment blocks, and
- * indexing bin, are not explicitly populated in the copy since they are lazily
- * generated on demand.
- *
- * Also note that this fails:
- *
- * original.deepCopy().equals(original)
- *
- * due to the fact that SAMBinaryTagAndValue.equals winds up calling object.equals on the
- * value field, which uses reference equality.
- *
*/
public SAMRecord deepCopy() {
final SAMRecord newSAM = new SAMRecord(getHeader());
@@ -2176,19 +2145,16 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
newSAM.setMateReferenceName(getMateReferenceName());
newSAM.setMateAlignmentStart(getMateAlignmentStart());
newSAM.setInferredInsertSize(getInferredInsertSize());
- if (null != getHeader()) {
- newSAM.setReferenceIndex(getReferenceIndex());
- newSAM.setMateReferenceIndex(getMateReferenceIndex());
- }
- else {
- newSAM.mReferenceIndex = null;
- newSAM.mMateReferenceIndex = null;
- }
+ // transfer the reference indices directly to avoid mutating
+ // the source record
+ newSAM.mReferenceIndex = this.mReferenceIndex;
+ newSAM.mMateReferenceIndex = this.mMateReferenceIndex;
newSAM.setValidationStringency(getValidationStringency());
SAMBinaryTagAndValue attributes = getBinaryAttributes();
if (null != attributes) {
newSAM.setAttributes(attributes.deepCopy());
}
+ newSAM.setIndexingBin(getIndexingBin());
return newSAM;
}
diff --git a/src/java/htsjdk/samtools/SAMUtils.java b/src/java/htsjdk/samtools/SAMUtils.java
index 4a77a25..b31b771 100644
--- a/src/java/htsjdk/samtools/SAMUtils.java
+++ b/src/java/htsjdk/samtools/SAMUtils.java
@@ -415,10 +415,10 @@ public final class SAMUtils {
*
* @param beg 0-based start of read (inclusive)
* @param end 0-based end of read (exclusive)
- * @deprecated Use GenomicIndexUtil.reg2bin
+ * @deprecated Use GenomicIndexUtil.regionToBin
*/
static int reg2bin(final int beg, final int end) {
- return GenomicIndexUtil.reg2bin(beg, end);
+ return GenomicIndexUtil.regionToBin(beg, end);
}
/**
diff --git a/src/java/htsjdk/samtools/SamReaderFactory.java b/src/java/htsjdk/samtools/SamReaderFactory.java
index 7be43ab..2e62a53 100644
--- a/src/java/htsjdk/samtools/SamReaderFactory.java
+++ b/src/java/htsjdk/samtools/SamReaderFactory.java
@@ -1,5 +1,6 @@
package htsjdk.samtools;
+import htsjdk.samtools.cram.ref.CRAMReferenceSource;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.sra.SRAAccession;
@@ -60,7 +61,7 @@ public abstract class SamReaderFactory {
abstract public ValidationStringency validationStringency();
- abstract public ReferenceSource referenceSource();
+ abstract public CRAMReferenceSource referenceSource();
/** Set this factory's {@link htsjdk.samtools.SAMRecordFactory} to the provided one, then returns itself. */
abstract public SamReaderFactory samRecordFactory(final SAMRecordFactory samRecordFactory);
@@ -78,7 +79,7 @@ public abstract class SamReaderFactory {
abstract public SamReaderFactory referenceSequence(File referenceSequence);
/** Sets the specified reference sequence * */
- abstract public SamReaderFactory referenceSource(ReferenceSource referenceSequence);
+ abstract public SamReaderFactory referenceSource(CRAMReferenceSource referenceSequence);
/** Utility method to open the file get the header and close the file */
abstract public SAMFileHeader getFileHeader(File samFile);
@@ -89,6 +90,12 @@ public abstract class SamReaderFactory {
/** Set this factory's {@link ValidationStringency} to the provided one, then returns itself. */
abstract public SamReaderFactory validationStringency(final ValidationStringency validationStringency);
+ /** Set whether readers created by this factory will use asynchronous IO.
+ * If this methods is not called, this flag will default to the value of {@link Defaults#USE_ASYNC_IO_FOR_SAMTOOLS}.
+ * Note that this option may not be applicable to all readers returned from this factory.
+ * Returns the factory itself. */
+ abstract public SamReaderFactory setUseAsyncIo(final boolean asynchronousIO);
+
private static SamReaderFactoryImpl DEFAULT =
new SamReaderFactoryImpl(Option.DEFAULTS, defaultValidationStringency, DefaultSAMRecordFactory.getInstance());
@@ -115,9 +122,10 @@ public abstract class SamReaderFactory {
private final static Log LOG = Log.getInstance(SamReaderFactory.class);
private final EnumSet<Option> enabledOptions;
private ValidationStringency validationStringency;
+ private boolean asynchronousIO = Defaults.USE_ASYNC_IO_FOR_SAMTOOLS;
private SAMRecordFactory samRecordFactory;
private CustomReaderFactory customReaderFactory;
- private ReferenceSource referenceSource;
+ private CRAMReferenceSource referenceSource;
private SamReaderFactoryImpl(final EnumSet<Option> enabledOptions, final ValidationStringency validationStringency, final SAMRecordFactory samRecordFactory) {
this.enabledOptions = EnumSet.copyOf(enabledOptions);
@@ -141,7 +149,7 @@ public abstract class SamReaderFactory {
}
@Override
- public ReferenceSource referenceSource() {
+ public CRAMReferenceSource referenceSource() {
return referenceSource;
}
@@ -181,7 +189,7 @@ public abstract class SamReaderFactory {
}
@Override
- public SamReaderFactory referenceSource(final ReferenceSource referenceSource) {
+ public SamReaderFactory referenceSource(final CRAMReferenceSource referenceSource) {
this.referenceSource = referenceSource;
return this;
}
@@ -208,6 +216,12 @@ public abstract class SamReaderFactory {
}
@Override
+ public SamReaderFactory setUseAsyncIo(final boolean asynchronousIO){
+ this.asynchronousIO = asynchronousIO;
+ return this;
+ }
+
+ @Override
public SamReader open(final SamInputResource resource) {
final SamReader.PrimitiveSamReader primitiveSamReader;
try {
@@ -236,6 +250,7 @@ public abstract class SamReaderFactory {
IOUtil.maybeBufferedSeekableStream(data.asUnbufferedSeekableStream()),
bufferedIndexStream,
false,
+ asynchronousIO,
validationStringency,
this.samRecordFactory
);
@@ -255,10 +270,10 @@ public abstract class SamReaderFactory {
if (SamStreams.isBAMFile(bufferedStream)) {
if (sourceFile == null || !sourceFile.isFile()) {
// Handle case in which file is a named pipe, e.g. /dev/stdin or created by mkfifo
- primitiveSamReader = new BAMFileReader(bufferedStream, indexFile, false, validationStringency, this.samRecordFactory);
+ primitiveSamReader = new BAMFileReader(bufferedStream, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory);
} else {
bufferedStream.close();
- primitiveSamReader = new BAMFileReader(sourceFile, indexFile, false, validationStringency, this.samRecordFactory);
+ primitiveSamReader = new BAMFileReader(sourceFile, indexFile, false, asynchronousIO, validationStringency, this.samRecordFactory);
}
} else if (BlockCompressedInputStream.isValidFile(bufferedStream)) {
primitiveSamReader = new SAMTextReader(new BlockCompressedInputStream(bufferedStream), validationStringency, this.samRecordFactory);
diff --git a/src/java/htsjdk/samtools/SamStreams.java b/src/java/htsjdk/samtools/SamStreams.java
index 173eb7c..a6710bc 100644
--- a/src/java/htsjdk/samtools/SamStreams.java
+++ b/src/java/htsjdk/samtools/SamStreams.java
@@ -54,22 +54,17 @@ public class SamStreams {
final byte[] buffer = new byte[buffSize];
readBytes(stream, buffer, 0, buffSize);
stream.reset();
- final byte[] magicBuf = new byte[4];
- final int magicLength = readBytes(new BlockCompressedInputStream(new ByteArrayInputStream(buffer)), magicBuf, 0, 4);
- return magicLength == BAMFileConstants.BAM_MAGIC.length && Arrays.equals(BAMFileConstants.BAM_MAGIC, magicBuf);
- }
-
- // Its too expensive to examine the remote file to determine type.
- // Rely on file extension.
- public static boolean sourceLikeBam(final SeekableStream strm) {
- String source = strm.getSource();
- if (source == null) return true;
- source = source.toLowerCase();
- //Source will typically be a file path or URL
- //If it's a URL we require one of the query parameters to be bam file
- return source.endsWith(".bam") || source.contains(".bam?") || source.contains(".bam&") || source.contains(".bam%26");
+ try(final BlockCompressedInputStream bcis = new BlockCompressedInputStream(new ByteArrayInputStream(buffer))){
+ final byte[] magicBuf = new byte[4];
+ final int magicLength = readBytes(bcis, magicBuf, 0, 4);
+ return magicLength == BAMFileConstants.BAM_MAGIC.length && Arrays.equals(BAMFileConstants.BAM_MAGIC, magicBuf);
+ }
}
+ /**
+ * Checks whether the file is a gzipped sam file. Returns true if it
+ * is and false otherwise.
+ */
public static boolean isGzippedSAMFile(final InputStream stream) {
if (!stream.markSupported()) {
throw new IllegalArgumentException("Cannot test a stream that doesn't support marking.");
@@ -90,4 +85,15 @@ public class SamStreams {
}
}
}
+
+ // Its too expensive to examine the remote file to determine type.
+ // Rely on file extension.
+ public static boolean sourceLikeBam(final SeekableStream strm) {
+ String source = strm.getSource();
+ if (source == null) return true;
+ source = source.toLowerCase();
+ //Source will typically be a file path or URL
+ //If it's a URL we require one of the query parameters to be bam file
+ return source.endsWith(".bam") || source.contains(".bam?") || source.contains(".bam&") || source.contains(".bam%26");
+ }
}
diff --git a/src/java/htsjdk/samtools/cram/CRAIEntry.java b/src/java/htsjdk/samtools/cram/CRAIEntry.java
index 0c7da6e..777dd13 100644
--- a/src/java/htsjdk/samtools/cram/CRAIEntry.java
+++ b/src/java/htsjdk/samtools/cram/CRAIEntry.java
@@ -2,7 +2,10 @@ package htsjdk.samtools.cram;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.Slice;
+import htsjdk.samtools.util.RuntimeIOException;
+import java.io.IOException;
+import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
@@ -20,35 +23,23 @@ public class CRAIEntry implements Comparable<CRAIEntry>, Cloneable {
public int sliceSize;
public int sliceIndex;
- public CRAIEntry() {
- }
+ private static int CRAI_INDEX_COLUMNS = 6;
+ private static String entryFormat = "%d\t%d\t%d\t%d\t%d\t%d";
- public static List<CRAIEntry> fromContainer(final Container container) {
- final List<CRAIEntry> entries = new ArrayList<CRAIEntry>(container.slices.length);
- for (int i = 0; i < container.slices.length; i++) {
- final Slice s = container.slices[i];
- final CRAIEntry e = new CRAIEntry();
- e.sequenceId = s.sequenceId;
- e.alignmentStart = s.alignmentStart;
- e.alignmentSpan = s.alignmentSpan;
- e.containerStartOffset = s.containerOffset;
- e.sliceOffset = container.landmarks[i];
- e.sliceSize = s.size;
-
- e.sliceIndex = i;
- entries.add(e);
- }
- return entries;
- }
-
- public static CRAIEntry fromCraiLine(final String line) {
- return new CRAIEntry(line);
+ public CRAIEntry() {
}
+ /**
+ * Create a CRAI Entry from a serialized CRAI index line.
+ *
+ * @param line string formatted as a CRAI index entry
+ * @throws CRAIIndex.CRAIIndexException
+ */
public CRAIEntry(final String line) throws CRAIIndex.CRAIIndexException {
final String[] chunks = line.split("\t");
- if (chunks.length != 6) {
- throw new CRAIIndex.CRAIIndexException("Expecting 6 columns but got " + chunks.length);
+ if (chunks.length != CRAI_INDEX_COLUMNS) {
+ throw new CRAIIndex.CRAIIndexException(
+ "Malformed CRAI index entry: expecting " + CRAI_INDEX_COLUMNS + " columns but got " + chunks.length);
}
try {
@@ -63,13 +54,51 @@ public class CRAIEntry implements Comparable<CRAIEntry>, Cloneable {
}
}
- @Override
- public String toString() {
- return String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan,
+ /**
+ * Serialize the entry to a CRAI index stream.
+ * @param os stream to write to
+ */
+ public void writeToStream(OutputStream os) {
+ try {
+ os.write(serializeToString().getBytes());
+ os.write('\n');
+ }
+ catch (IOException e) {
+ throw new RuntimeIOException(e);
+ }
+ }
+
+ /**
+ * Format the entry as a string suitable for serialization in the CRAI index
+ */
+ private String serializeToString() {
+ return String.format(entryFormat,
+ sequenceId, alignmentStart, alignmentSpan,
containerStartOffset, sliceOffset, sliceSize);
}
@Override
+ public String toString() { return serializeToString(); }
+
+ public static List<CRAIEntry> fromContainer(final Container container) {
+ final List<CRAIEntry> entries = new ArrayList<>(container.slices.length);
+ for (int i = 0; i < container.slices.length; i++) {
+ final Slice s = container.slices[i];
+ final CRAIEntry e = new CRAIEntry();
+ e.sequenceId = s.sequenceId;
+ e.alignmentStart = s.alignmentStart;
+ e.alignmentSpan = s.alignmentSpan;
+ e.containerStartOffset = s.containerOffset;
+ e.sliceOffset = container.landmarks[i];
+ e.sliceSize = s.size;
+
+ e.sliceIndex = i;
+ entries.add(e);
+ }
+ return entries;
+ }
+
+ @Override
public int compareTo(final CRAIEntry o) {
if (o == null) {
return 1;
diff --git a/src/java/htsjdk/samtools/cram/CRAIIndex.java b/src/java/htsjdk/samtools/cram/CRAIIndex.java
index 76668b0..8d9e28b 100644
--- a/src/java/htsjdk/samtools/cram/CRAIIndex.java
+++ b/src/java/htsjdk/samtools/cram/CRAIIndex.java
@@ -1,6 +1,6 @@
package htsjdk.samtools.cram;
-import htsjdk.samtools.CRAMIndexer;
+import htsjdk.samtools.CRAMBAIIndexer;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.cram.structure.Slice;
@@ -27,10 +27,7 @@ public class CRAIIndex {
public static final String CRAI_INDEX_SUFFIX = ".crai";
public static void writeIndex(final OutputStream os, final List<CRAIEntry> index) throws IOException {
- for (final CRAIEntry e : index) {
- os.write(e.toString().getBytes());
- os.write('\n');
- }
+ index.stream().forEach(e -> e.writeToStream(os));
}
public static List<CRAIEntry> readIndex(final InputStream is) throws CRAIIndexException {
@@ -40,7 +37,7 @@ public class CRAIIndex {
try {
while (scanner.hasNextLine()) {
final String line = scanner.nextLine();
- final CRAIEntry entry = CRAIEntry.fromCraiLine(line);
+ final CRAIEntry entry = new CRAIEntry(line);
list.add(entry);
}
} finally {
@@ -132,7 +129,7 @@ public class CRAIIndex {
header.setSequenceDictionary(dictionary);
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
- final CRAMIndexer indexer = new CRAMIndexer(baos, header);
+ final CRAMBAIIndexer indexer = new CRAMBAIIndexer(baos, header);
for (final CRAIEntry entry : full) {
final Slice slice = new Slice();
diff --git a/src/java/htsjdk/samtools/cram/build/CramNormalizer.java b/src/java/htsjdk/samtools/cram/build/CramNormalizer.java
index 111f271..1be1aa5 100644
--- a/src/java/htsjdk/samtools/cram/build/CramNormalizer.java
+++ b/src/java/htsjdk/samtools/cram/build/CramNormalizer.java
@@ -28,7 +28,7 @@ import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature;
import htsjdk.samtools.cram.encoding.readfeatures.RefSkip;
import htsjdk.samtools.cram.encoding.readfeatures.SoftClip;
import htsjdk.samtools.cram.encoding.readfeatures.Substitution;
-import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.cram.ref.CRAMReferenceSource;
import htsjdk.samtools.cram.structure.CramCompressionRecord;
import htsjdk.samtools.cram.structure.SubstitutionMatrix;
import htsjdk.samtools.util.Log;
@@ -42,13 +42,13 @@ public class CramNormalizer {
private int readCounter = 0;
private static Log log = Log.getInstance(CramNormalizer.class);
- private ReferenceSource referenceSource;
+ private CRAMReferenceSource referenceSource;
private CramNormalizer(final SAMFileHeader header) {
this.header = header;
}
- public CramNormalizer(final SAMFileHeader header, final ReferenceSource referenceSource) {
+ public CramNormalizer(final SAMFileHeader header, final CRAMReferenceSource referenceSource) {
if (referenceSource == null) {
throw new IllegalArgumentException("A reference is required.");
}
diff --git a/src/java/htsjdk/samtools/cram/ref/CRAMReferenceSource.java b/src/java/htsjdk/samtools/cram/ref/CRAMReferenceSource.java
new file mode 100644
index 0000000..35a3e79
--- /dev/null
+++ b/src/java/htsjdk/samtools/cram/ref/CRAMReferenceSource.java
@@ -0,0 +1,22 @@
+package htsjdk.samtools.cram.ref;
+
+import htsjdk.samtools.SAMSequenceRecord;
+
+/**
+ * Interface used to supply a reference source when reading CRAM files.
+ */
+public interface CRAMReferenceSource {
+
+ /**
+ * getReferenceBases
+ * @param sequenceRecord the SAMSequenceRecord identifying the reference
+ * being requested
+ * @param tryNameVariants if true, attempt to match the requested sequence name
+ * against the reference by using common name variations,
+ * such as adding or removing a leading "chr" prefix
+ * from the requested name. if false, use exact match
+ * @return the bases representing the requested sequence. or null if the sequence
+ * cannot be found
+ */
+ byte[] getReferenceBases(final SAMSequenceRecord sequenceRecord, final boolean tryNameVariants);
+}
diff --git a/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java b/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
index fd7157d..ab531b9 100644
--- a/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
+++ b/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
@@ -40,14 +40,23 @@ import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
-public class ReferenceSource {
+/**
+ * Used to represent a CRAM reference, the backing source for which can either be
+ * a file or the EBI ENA reference service.
+ *
+ * NOTE: In a future release, this class will be renamed and the functionality it
+ * contains will be refactored and distributed into one or more separate reference
+ * source implementations, each corresponding to the type of resource backing the
+ * reference.
+ */
+public class ReferenceSource implements CRAMReferenceSource {
private static final Log log = Log.getInstance(ReferenceSource.class);
private ReferenceSequenceFile rsFile;
private int downloadTriesBeforeFailing = 2;
private final Map<String, WeakReference<byte[]>> cacheW = new HashMap<String, WeakReference<byte[]>>();
- public ReferenceSource() {
+ private ReferenceSource() {
}
public ReferenceSource(final File file) {
@@ -64,10 +73,10 @@ public class ReferenceSource {
}
/**
- * Attempts to construct a default ReferenceSource for use with CRAM files when
+ * Attempts to construct a default CRAMReferenceSource for use with CRAM files when
* one has not been explicitly provided.
*
- * @return ReferenceSource if one can be acquired. Guaranteed to no be null if none
+ * @return CRAMReferenceSource if one can be acquired. Guaranteed to not be null if none
* of the listed exceptions is thrown.
* @throws IllegalStateException if no default reference source can be acquired
* @throws IllegalArgumentException if the reference_fasta environment variable refers to a
@@ -81,7 +90,7 @@ public class ReferenceSource {
* <li>ENA Reference Service if it is enabled</li>
* </ul>
*/
- public static ReferenceSource getDefaultCRAMReferenceSource() {
+ public static CRAMReferenceSource getDefaultCRAMReferenceSource() {
if (null != Defaults.REFERENCE_FASTA) {
if (Defaults.REFERENCE_FASTA.exists()) {
return new ReferenceSource(Defaults.REFERENCE_FASTA);
diff --git a/src/java/htsjdk/samtools/cram/structure/Slice.java b/src/java/htsjdk/samtools/cram/structure/Slice.java
index dd8a388..30f0a74 100644
--- a/src/java/htsjdk/samtools/cram/structure/Slice.java
+++ b/src/java/htsjdk/samtools/cram/structure/Slice.java
@@ -221,12 +221,6 @@ public class Slice {
}
void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
- if (value != null && !(value instanceof Byte || value instanceof Short || value instanceof Integer || value instanceof String ||
- value instanceof Character || value instanceof Float || value instanceof byte[] || value instanceof short[] || value
- instanceof int[] || value instanceof float[])) {
- throw new SAMException("Attribute type " + value.getClass() + " not supported. Tag: " + SAMTagUtil.getSingleton()
- .makeStringTag(tag));
- }
if (value == null) {
if (this.sliceTags != null) this.sliceTags = this.sliceTags.remove(tag);
} else {
@@ -234,10 +228,6 @@ public class Slice {
if (!isUnsignedArray) {
tmp = new SAMBinaryTagAndValue(tag, value);
} else {
- if (!value.getClass().isArray() || value instanceof float[]) {
- throw new SAMException("Attribute type " + value.getClass() + " cannot be encoded as an unsigned array. Tag: " +
- SAMTagUtil.getSingleton().makeStringTag(tag));
- }
tmp = new SAMBinaryTagAndUnsignedArrayValue(tag, value);
}
if (this.sliceTags == null) this.sliceTags = tmp;
diff --git a/src/java/htsjdk/samtools/example/PrintReadsExample.java b/src/java/htsjdk/samtools/example/PrintReadsExample.java
new file mode 100755
index 0000000..b8b4106
--- /dev/null
+++ b/src/java/htsjdk/samtools/example/PrintReadsExample.java
@@ -0,0 +1,104 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ */
+package htsjdk.samtools.example;
+
+import htsjdk.samtools.*;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.ProgressLogger;
+import htsjdk.samtools.util.zip.DeflaterFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+
+/**
+ * This is a example program showing how to use SAM readers and (optionally) writers.
+ * It's also useful for measuring time.
+ * An example invocation is:
+ * java -cp dist/htsjdk-2.1.1.jar htsjdk.samtools.example.PrintReadsExample in.bam false a.bam
+ * <p>
+ * or (to test the IntelDeflator)
+ * java -Dsamjdk.intel_deflater_so_path=$PWD/lib/jni/libIntelDeflater.so -cp dist/htsjdk-2.1.1.jar htsjdk.samtools.example.PrintReadsExample in.bam false a.bam
+ * <p>
+ * Arguments:
+ * - the first argument is the input file (SAM or BAM)
+ * - the second argument is a boolean (true or false) that indicates whether reads are to be eagerly decoded (useful for benchmarking)
+ * - the third argument is optional and is the name of the output file (nothing gets written if this argument is missing)
+ */
+public final class PrintReadsExample {
+ private PrintReadsExample() {
+ }
+
+ private static final Log log = Log.getInstance(PrintReadsExample.class);
+
+ public static void main(String[] args) throws IOException {
+ if (args.length < 2) {
+ System.out.println("Usage: " + PrintReadsExample.class.getCanonicalName() + " inFile eagerDecode [outFile]");
+ System.exit(1);
+ }
+ final File inputFile = new File(args[0]);
+ final boolean eagerDecode = Boolean.parseBoolean(args[1]); //useful to test (realistic) scenarios in which every record is always fully decoded.
+ final File outputFile = args.length >= 3 ? new File(args[2]) : null;
+
+ final long start = System.currentTimeMillis();
+
+ log.info("Start with args:" + Arrays.toString(args));
+ printConfigurationInfo();
+
+ SamReaderFactory readerFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
+ if (eagerDecode) {
+ readerFactory = readerFactory.enable(SamReaderFactory.Option.EAGERLY_DECODE);
+ }
+
+ try (final SamReader reader = readerFactory.open(inputFile)) {
+ final SAMFileHeader header = reader.getFileHeader();
+ try (final SAMFileWriter writer = outputFile != null ? new SAMFileWriterFactory().makeBAMWriter(header, true, outputFile) : null) {
+ final ProgressLogger pl = new ProgressLogger(log, 1000000);
+ for (final SAMRecord record : reader) {
+ if (writer != null) {
+ writer.addAlignment(record);
+ }
+ pl.record(record);
+ }
+ }
+ }
+ final long end = System.currentTimeMillis();
+ log.info(String.format("Done. Elapsed time %.3f seconds", (end - start) / 1000.0));
+ }
+
+ private static void printConfigurationInfo() throws IOException {
+ log.info("Executing as " +
+ System.getProperty("user.name") + '@' + InetAddress.getLocalHost().getHostName() +
+ " on " + System.getProperty("os.name") + ' ' + System.getProperty("os.version") +
+ ' ' + System.getProperty("os.arch") + "; " + System.getProperty("java.vm.name") +
+ ' ' + System.getProperty("java.runtime.version") +
+ ' ' + (DeflaterFactory.usingIntelDeflater() ? "IntelDeflater" : "JdkDeflater"));
+
+ final List<String> list = Defaults.allDefaults().entrySet().stream().map(e -> e.getKey() + ':' + e.getValue()).collect(Collectors.toList());
+ log.info(String.join(" ", list));
+ }
+}
diff --git a/src/java/htsjdk/samtools/fastq/FastqWriterFactory.java b/src/java/htsjdk/samtools/fastq/FastqWriterFactory.java
index d483251..3f7a006 100644
--- a/src/java/htsjdk/samtools/fastq/FastqWriterFactory.java
+++ b/src/java/htsjdk/samtools/fastq/FastqWriterFactory.java
@@ -10,7 +10,7 @@ import java.io.File;
* @author Tim Fennell
*/
public class FastqWriterFactory {
- boolean useAsyncIo = Defaults.USE_ASYNC_IO;
+ boolean useAsyncIo = Defaults.USE_ASYNC_IO_FOR_SAMTOOLS;
boolean createMd5 = Defaults.CREATE_MD5;
/** Sets whether or not to use async io (i.e. a dedicated thread per writer. */
diff --git a/src/java/htsjdk/samtools/filter/FilteringIterator.java b/src/java/htsjdk/samtools/filter/FilteringIterator.java
index 00e489f..3ce9f96 100644
--- a/src/java/htsjdk/samtools/filter/FilteringIterator.java
+++ b/src/java/htsjdk/samtools/filter/FilteringIterator.java
@@ -1,154 +1,53 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package htsjdk.samtools.filter;
-
-import htsjdk.samtools.SAMFileHeader;
-import htsjdk.samtools.SAMRecord;
-import htsjdk.samtools.SAMRecordIterator;
-import htsjdk.samtools.SamPairUtil;
-import htsjdk.samtools.util.CloseableIterator;
-import htsjdk.samtools.util.CloserUtil;
-import htsjdk.samtools.util.PeekableIterator;
-
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-
-/**
- * Filtering Iterator which takes a filter and an iterator and iterates through only those records
- * which are not rejected by the filter.
- * <p/>
- * $Id$
- *
- * @author Kathleen Tibbetts
- */
-public class FilteringIterator implements CloseableIterator<SAMRecord> {
-
- private final PeekableIterator<SAMRecord> iterator;
- private final SamRecordFilter filter;
- private boolean filterReadPairs = false;
- private SAMRecord next = null;
-
- /**
- * Constructor
- *
- * @param iterator the backing iterator
- * @param filter the filter (which may be a FilterAggregator)
- * @param filterByPair if true, filter reads in pairs
- */
- public FilteringIterator(final Iterator<SAMRecord> iterator, final SamRecordFilter filter,
- final boolean filterByPair) {
-
- if (filterByPair && iterator instanceof SAMRecordIterator) {
- ((SAMRecordIterator)iterator).assertSorted(SAMFileHeader.SortOrder.queryname);
- }
-
- this.iterator = new PeekableIterator<SAMRecord>(iterator);
- this.filter = filter;
- this.filterReadPairs = filterByPair;
- next = getNextRecord();
- }
-
- /**
- * Constructor
- *
- * @param iterator the backing iterator
- * @param filter the filter (which may be a FilterAggregator)
- */
- public FilteringIterator(final Iterator<SAMRecord> iterator, final SamRecordFilter filter) {
- this.iterator = new PeekableIterator<SAMRecord>(iterator);
- this.filter = filter;
- next = getNextRecord();
- }
-
- /**
- * Returns true if the iteration has more elements.
- *
- * @return true if the iteration has more elements. Otherwise returns false.
- */
- public boolean hasNext() {
- return next != null;
- }
-
- /**
- * Returns the next element in the iteration.
- *
- * @return the next element in the iteration
- * @throws java.util.NoSuchElementException
- *
- */
- public SAMRecord next() {
- if (next == null) {
- throw new NoSuchElementException("Iterator has no more elements.");
- }
- final SAMRecord result = next;
- next = getNextRecord();
- return result;
- }
-
- /**
- * Required method for Iterator API.
- *
- * @throws UnsupportedOperationException
- */
- public void remove() {
- throw new UnsupportedOperationException("Remove() not supported by FilteringIterator");
- }
-
- public void close() {
- CloserUtil.close(iterator);
- }
-
- /**
- * Gets the next record from the underlying iterator that passes the filter
- *
- * @return SAMRecord the next filter-passing record
- */
- private SAMRecord getNextRecord() {
-
- while (iterator.hasNext()) {
- final SAMRecord record = iterator.next();
-
- if (filterReadPairs && record.getReadPairedFlag() && record.getFirstOfPairFlag() &&
- iterator.hasNext()) {
-
- SamPairUtil.assertMate(record, iterator.peek());
-
- if (filter.filterOut(record, iterator.peek())) {
- // skip second read
- iterator.next();
- } else {
- return record;
- }
- } else if (filterReadPairs && record.getReadPairedFlag() &&
- record.getSecondOfPairFlag()) {
- // assume that we did a pass(first, second) and it passed the filter
- return record;
- } else if (!filter.filterOut(record)) {
- return record;
- }
- }
-
- return null;
- }
-}
\ No newline at end of file
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.samtools.filter;
+
+import htsjdk.samtools.SAMRecord;
+
+import java.util.Iterator;
+
+/**
+ * Filtering Iterator which takes a filter and an iterator and iterates through only those records
+ * which are not rejected by the filter.
+ * <p/>
+ * $Id$
+ *
+ * @author Kathleen Tibbetts
+ *
+ * use {@link FilteringSamIterator} instead
+ */
+
+ at Deprecated /** use {@link FilteringSamIterator} instead **/
+public class FilteringIterator extends FilteringSamIterator{
+
+ public FilteringIterator(final Iterator<SAMRecord> iterator, final SamRecordFilter filter, final boolean filterByPair) {
+ super(iterator, filter, filterByPair);
+ }
+
+ public FilteringIterator(final Iterator<SAMRecord> iterator, final SamRecordFilter filter) {
+ super(iterator, filter);
+ }
+
+}
diff --git a/src/java/htsjdk/samtools/filter/FilteringIterator.java b/src/java/htsjdk/samtools/filter/FilteringSamIterator.java
similarity index 89%
copy from src/java/htsjdk/samtools/filter/FilteringIterator.java
copy to src/java/htsjdk/samtools/filter/FilteringSamIterator.java
index 00e489f..7ac1c0a 100644
--- a/src/java/htsjdk/samtools/filter/FilteringIterator.java
+++ b/src/java/htsjdk/samtools/filter/FilteringSamIterator.java
@@ -1,154 +1,155 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package htsjdk.samtools.filter;
-
-import htsjdk.samtools.SAMFileHeader;
-import htsjdk.samtools.SAMRecord;
-import htsjdk.samtools.SAMRecordIterator;
-import htsjdk.samtools.SamPairUtil;
-import htsjdk.samtools.util.CloseableIterator;
-import htsjdk.samtools.util.CloserUtil;
-import htsjdk.samtools.util.PeekableIterator;
-
-import java.util.Iterator;
-import java.util.NoSuchElementException;
-
-/**
- * Filtering Iterator which takes a filter and an iterator and iterates through only those records
- * which are not rejected by the filter.
- * <p/>
- * $Id$
- *
- * @author Kathleen Tibbetts
- */
-public class FilteringIterator implements CloseableIterator<SAMRecord> {
-
- private final PeekableIterator<SAMRecord> iterator;
- private final SamRecordFilter filter;
- private boolean filterReadPairs = false;
- private SAMRecord next = null;
-
- /**
- * Constructor
- *
- * @param iterator the backing iterator
- * @param filter the filter (which may be a FilterAggregator)
- * @param filterByPair if true, filter reads in pairs
- */
- public FilteringIterator(final Iterator<SAMRecord> iterator, final SamRecordFilter filter,
- final boolean filterByPair) {
-
- if (filterByPair && iterator instanceof SAMRecordIterator) {
- ((SAMRecordIterator)iterator).assertSorted(SAMFileHeader.SortOrder.queryname);
- }
-
- this.iterator = new PeekableIterator<SAMRecord>(iterator);
- this.filter = filter;
- this.filterReadPairs = filterByPair;
- next = getNextRecord();
- }
-
- /**
- * Constructor
- *
- * @param iterator the backing iterator
- * @param filter the filter (which may be a FilterAggregator)
- */
- public FilteringIterator(final Iterator<SAMRecord> iterator, final SamRecordFilter filter) {
- this.iterator = new PeekableIterator<SAMRecord>(iterator);
- this.filter = filter;
- next = getNextRecord();
- }
-
- /**
- * Returns true if the iteration has more elements.
- *
- * @return true if the iteration has more elements. Otherwise returns false.
- */
- public boolean hasNext() {
- return next != null;
- }
-
- /**
- * Returns the next element in the iteration.
- *
- * @return the next element in the iteration
- * @throws java.util.NoSuchElementException
- *
- */
- public SAMRecord next() {
- if (next == null) {
- throw new NoSuchElementException("Iterator has no more elements.");
- }
- final SAMRecord result = next;
- next = getNextRecord();
- return result;
- }
-
- /**
- * Required method for Iterator API.
- *
- * @throws UnsupportedOperationException
- */
- public void remove() {
- throw new UnsupportedOperationException("Remove() not supported by FilteringIterator");
- }
-
- public void close() {
- CloserUtil.close(iterator);
- }
-
- /**
- * Gets the next record from the underlying iterator that passes the filter
- *
- * @return SAMRecord the next filter-passing record
- */
- private SAMRecord getNextRecord() {
-
- while (iterator.hasNext()) {
- final SAMRecord record = iterator.next();
-
- if (filterReadPairs && record.getReadPairedFlag() && record.getFirstOfPairFlag() &&
- iterator.hasNext()) {
-
- SamPairUtil.assertMate(record, iterator.peek());
-
- if (filter.filterOut(record, iterator.peek())) {
- // skip second read
- iterator.next();
- } else {
- return record;
- }
- } else if (filterReadPairs && record.getReadPairedFlag() &&
- record.getSecondOfPairFlag()) {
- // assume that we did a pass(first, second) and it passed the filter
- return record;
- } else if (!filter.filterOut(record)) {
- return record;
- }
- }
-
- return null;
- }
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.samtools.filter;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SamPairUtil;
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.samtools.util.CloserUtil;
+import htsjdk.samtools.util.PeekableIterator;
+
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+
+/**
+ * Filtering Iterator which takes a filter and an iterator and iterates through only those records
+ * which are not rejected by the filter.
+ * <p/>
+ * $Id$
+ *
+ * @author Kathleen Tibbetts
+ */
+public class FilteringSamIterator implements CloseableIterator<SAMRecord> {
+
+ private final PeekableIterator<SAMRecord> iterator;
+ private final SamRecordFilter filter;
+ private boolean filterReadPairs = false;
+ private SAMRecord next = null;
+
+ /**
+ * Constructor
+ *
+ * @param iterator the backing iterator
+ * @param filter the filter (which may be a FilterAggregator)
+ * @param filterByPair if true, filter reads in pairs
+ */
+ public FilteringSamIterator(final Iterator<SAMRecord> iterator, final SamRecordFilter filter,
+ final boolean filterByPair) {
+
+ if (filterByPair && iterator instanceof SAMRecordIterator) {
+ ((SAMRecordIterator)iterator).assertSorted(SAMFileHeader.SortOrder.queryname);
+ }
+
+ this.iterator = new PeekableIterator<SAMRecord>(iterator);
+ this.filter = filter;
+ this.filterReadPairs = filterByPair;
+ next = getNextRecord();
+ }
+
+ /**
+ * Constructor
+ *
+ * @param iterator the backing iterator
+ * @param filter the filter (which may be a FilterAggregator)
+ */
+ public FilteringSamIterator(final Iterator<SAMRecord> iterator, final SamRecordFilter filter) {
+ this.iterator = new PeekableIterator<SAMRecord>(iterator);
+ this.filter = filter;
+ next = getNextRecord();
+ }
+
+ /**
+ * Returns true if the iteration has more elements.
+ *
+ * @return true if the iteration has more elements. Otherwise returns false.
+ */
+ public boolean hasNext() {
+ return next != null;
+ }
+
+ /**
+ * Returns the next element in the iteration.
+ *
+ * @return the next element in the iteration
+ * @throws java.util.NoSuchElementException
+ *
+ */
+ public SAMRecord next() {
+ if (next == null) {
+ throw new NoSuchElementException("Iterator has no more elements.");
+ }
+ final SAMRecord result = next;
+ next = getNextRecord();
+ return result;
+ }
+
+ /**
+ * Required method for Iterator API.
+ *
+ * @throws UnsupportedOperationException
+ */
+ public void remove() {
+ throw new UnsupportedOperationException("Remove() not supported by FilteringSamIterator");
+ }
+
+ public void close() {
+ CloserUtil.close(iterator);
+ }
+
+ /**
+ * Gets the next record from the underlying iterator that passes the filter
+ *
+ * @return SAMRecord the next filter-passing record
+ */
+ private SAMRecord getNextRecord() {
+
+ while (iterator.hasNext()) {
+ final SAMRecord record = iterator.next();
+
+ if (filterReadPairs && record.getReadPairedFlag() && record.getFirstOfPairFlag() &&
+ iterator.hasNext()) {
+
+ SamPairUtil.assertMate(record, iterator.peek());
+
+ if (filter.filterOut(record, iterator.peek())) {
+ // skip second read
+ iterator.next();
+ } else {
+ return record;
+ }
+ } else if (filterReadPairs && record.getReadPairedFlag() &&
+ record.getSecondOfPairFlag()) {
+ // assume that we did a pass(first, second) and it passed the filter
+ return record;
+ } else if (!filter.filterOut(record)) {
+ return record;
+ }
+ }
+
+ return null;
+ }
}
\ No newline at end of file
diff --git a/src/java/htsjdk/samtools/reference/ReferenceSequence.java b/src/java/htsjdk/samtools/reference/ReferenceSequence.java
index eb5533b..a7ddb44 100644
--- a/src/java/htsjdk/samtools/reference/ReferenceSequence.java
+++ b/src/java/htsjdk/samtools/reference/ReferenceSequence.java
@@ -24,6 +24,8 @@
package htsjdk.samtools.reference;
+import htsjdk.samtools.util.StringUtil;
+
/**
* Wrapper around a reference sequence that has been read from a reference file.
*
@@ -60,6 +62,16 @@ public class ReferenceSequence {
*/
public byte[] getBases() { return bases; }
+ /**
+ * Returns the bases represented by this ReferenceSequence as a String. Since this will copy the bases
+ * and convert them to two-byte characters, this should not be used on very long reference sequences,
+ * but as a convenience when manipulating short sequences returned by
+ * {@link ReferenceSequenceFile#getSubsequenceAt(String, long, long)}
+ *
+ * @return The set of bases represented by this ReferenceSequence, as a String
+ */
+ public String getBaseString() { return StringUtil.bytesToString(bases); }
+
/** Gets the 0-based index of this contig in the source file from which it came. */
public int getContigIndex() { return contigIndex; }
diff --git a/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java b/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java
index bd2f654..86c18b9 100644
--- a/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java
+++ b/src/java/htsjdk/samtools/util/AbstractAsyncWriter.java
@@ -11,6 +11,9 @@ import java.util.concurrent.atomic.AtomicReference;
* Abstract class that is designed to be extended and specialized to provide an asynchronous
* wrapper around any kind of Writer class that takes an object and writes it out somehow.
*
+ * NOTE: Objects of subclasses of this class are not intended to be shared between threads.
+ * In particular there must be only one thread that calls {@link #write} and {@link #close}.
+ *
* @author Tim Fennell
*/
public abstract class AbstractAsyncWriter<T> implements Closeable {
@@ -56,7 +59,7 @@ public abstract class AbstractAsyncWriter<T> implements Closeable {
}
/**
- * Attempts to finishing draining the queue and then calls synchronoslyClose() to allow implementation
+ * Attempts to finish draining the queue and then calls synchronouslyClose() to allow implementation
* to do any one time clean up.
*/
public void close() {
@@ -70,9 +73,13 @@ public abstract class AbstractAsyncWriter<T> implements Closeable {
throw new RuntimeException("Interrupted waiting on writer thread.", ie);
}
- // Assert that the queue is empty
- if (!this.queue.isEmpty()) {
- throw new RuntimeException("Queue should be empty but is size: " + this.queue.size());
+ //The queue should be empty but if it's not, we'll drain it here to protect against any lost data.
+ //There's no need to timeout on poll because poll is called only when queue is not empty and
+ // at this point the writer thread is definitely dead and noone is removing items from the queue.
+ //The item pulled will never be null (same reasoning).
+ while (!this.queue.isEmpty()) {
+ final T item = queue.poll();
+ synchronouslyWrite(item);
}
synchronouslyClose();
@@ -100,7 +107,11 @@ public abstract class AbstractAsyncWriter<T> implements Closeable {
private class WriterRunnable implements Runnable {
public void run() {
try {
- while (!queue.isEmpty() || !isClosed.get()) {
+ //The order of the two conditions is important, see https://github.com/samtools/htsjdk/issues/564
+ //because we want to make sure that emptiness status of the queue does not change after we have evaluated isClosed
+ //as it is now (isClosed checked before queue.isEmpty),
+ //the two operations are effectively atomic if isClosed returns true
+ while (!isClosed.get() || !queue.isEmpty()) {
try {
final T item = queue.poll(2, TimeUnit.SECONDS);
if (item != null) synchronouslyWrite(item);
diff --git a/src/java/htsjdk/samtools/util/AsyncBufferedIterator.java b/src/java/htsjdk/samtools/util/AsyncBufferedIterator.java
new file mode 100644
index 0000000..bf78ecb
--- /dev/null
+++ b/src/java/htsjdk/samtools/util/AsyncBufferedIterator.java
@@ -0,0 +1,281 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 Daniel Cameron
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.util;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Iterator that uses a dedicated background thread to perform read-ahead to improve
+ * throughput at the expense of increased latency. This iterator will block
+ * until the background thread has read a full buffer of records.
+ *
+ * Note that this implementation is not synchronized. If multiple threads
+ * access an instance concurrently, it must be synchronized externally.
+ *
+ * @author Daniel Cameron
+ *
+ */
+public class AsyncBufferedIterator<T> implements CloseableIterator<T> {
+ private static final Log log = Log.getInstance(AsyncBufferedIterator.class);
+ private static final AtomicInteger threadsCreated = new AtomicInteger(0);
+ private final int bufferSize;
+ /**
+ * A dedicated background thread is required since these iterators can be chained
+ * thus able to block on each other. Usage of a thread pool would result in
+ * a deadlock due to task dependencies.
+ */
+ private Thread backgroundThread;
+ private final Iterator<T> underlyingIterator;
+ private final BlockingQueue<IteratorBuffer<T>> buffers;
+ private IteratorBuffer<T> currentBlock = new IteratorBuffer<>(Collections.emptyList());
+
+ /**
+ * Creates a new iterator that traverses the given iterator on a background
+ * thread
+ *
+ * @param iterator iterator to traverse
+ * @param bufferSize size of read-ahead buffer. A larger size will increase both throughput and latency.
+ * Double buffering is used so the maximum number of records on which read-ahead is performed is twice this.
+ */
+ public AsyncBufferedIterator(final Iterator<T> iterator, final int bufferSize) {
+ this(iterator, bufferSize, 1, null);
+ }
+
+ /**
+ * Creates a new iterator that traverses the given iterator on a background
+ * thread
+ *
+ * @param iterator iterator to traverse
+ * @param bufferSize size of each read-ahead buffer. A larger size will increase both throughput and latency.
+ * @param bufferCount number of read-ahead buffers
+ */
+ public AsyncBufferedIterator(final Iterator<T> iterator, final int bufferSize, final int bufferCount) {
+ this(iterator, bufferSize, bufferCount, null);
+ }
+
+ /**
+ * Creates a new iterator that traverses the given iterator on a background
+ * thread
+ *
+ * @param iterator iterator to traverse
+ * @param bufferSize size of each read-ahead buffer. A larger size will increase both throughput and latency.
+ * @param bufferCount number of read-ahead buffers
+ * @param threadName background thread name. A name will be automatically generated if this parameter is null.
+ */
+ public AsyncBufferedIterator(final Iterator<T> iterator, final int bufferSize, final int bufferCount, final String threadName) {
+ if (iterator == null) throw new IllegalArgumentException("iterator cannot be null");
+ if (bufferCount <= 0) throw new IllegalArgumentException("Must use at least 1 buffer.");
+ if (bufferSize <= 0) throw new IllegalArgumentException("Buffer size must be at least 1 record.");
+ this.underlyingIterator = iterator;
+ this.buffers = new ArrayBlockingQueue<>(bufferCount);
+ this.bufferSize = bufferSize;
+ int threadNumber = threadsCreated.incrementAndGet();
+ this.backgroundThread = new Thread(new Runnable() {
+ @Override
+ public void run() {
+ backgroundRun();
+ }
+ }, threadName != null ? threadName : getThreadNamePrefix() + threadNumber);
+ this.backgroundThread.setDaemon(true);
+ log.debug("Starting thread " + this.backgroundThread.getName());
+ this.backgroundThread.start();
+ }
+
+ protected String getThreadNamePrefix() {
+ return AsyncBufferedIterator.class.getSimpleName();
+ }
+
+ @Override
+ public void close() {
+ if (backgroundThread != null) {
+ try {
+ backgroundThread.interrupt();
+ buffers.clear();
+ backgroundThread.join();
+ } catch (InterruptedException ie) {
+ throw new RuntimeException("Interrupted waiting for background thread to complete", ie);
+ } finally {
+ CloserUtil.close(underlyingIterator);
+ backgroundThread = null;
+ currentBlock = null;
+ }
+ }
+ }
+
+ private void ensureHasNext() {
+ if (!currentBlock.hasNext()) {
+ // Rethrow any exceptions raised on the background thread
+ // at the point the exception would have been encountered
+ // if we had performed synchronous iteration
+ raiseBackgroundThreadException();
+ if (!currentBlock.isEndOfStream()) {
+ try {
+ // Load the next block
+ // All exceptions on the background thread are swallowed (except InterruptedException)
+ // so there's no risk of blocking forever except when the background thread is
+ // interrupted as we aren't. This does not happen during normal operation as
+ // interrupting the background thread should only happen during the close() method.
+ currentBlock = buffers.take();
+ } catch (InterruptedException e) {
+ throw new RuntimeException("Error reading from background thread", e);
+ }
+ }
+ }
+ }
+
+ @Override
+ public boolean hasNext() {
+ if (backgroundThread == null) {
+ throw new IllegalStateException("iterator has been closed");
+ }
+ ensureHasNext();
+ return currentBlock.hasNext();
+ }
+
+ /**
+ * Raises any exception encountered when processing records on
+ * the background thread back to the foreground caller
+ * @throws Error
+ */
+ private void raiseBackgroundThreadException() throws Error {
+ Throwable t = currentBlock.getException();
+ if (t != null) {
+ if (t instanceof Error) {
+ throw (Error) t;
+ } else if (t instanceof RuntimeException) {
+ throw (RuntimeException) t;
+ } else {
+ throw new RuntimeException(t);
+ }
+ }
+ }
+
+ @Override
+ public T next() {
+ if (hasNext()) {
+ return currentBlock.next();
+ }
+ throw new NoSuchElementException("next");
+ }
+
+ /**
+ * Performs 1 buffer worth of read-ahead on the underlying iterator
+ * (background thread method)
+ */
+ private IteratorBuffer<T> readAhead() {
+ List<T> readAhead = null;
+ try {
+ if (!underlyingIterator.hasNext()) return new IteratorBuffer<>();
+ readAhead = new ArrayList<>(bufferSize);
+ for (int i = 0; i < bufferSize && underlyingIterator.hasNext(); i++) {
+ if (Thread.currentThread().isInterrupted()) {
+ // eager abort if we've been told to stop
+ return new IteratorBuffer<>(readAhead, new InterruptedException());
+ }
+ readAhead.add(underlyingIterator.next());
+ }
+ return new IteratorBuffer<>(readAhead);
+ } catch (Throwable t) {
+ // Catch absolutely everything so we can try to raise it on the foreground thread
+ return new IteratorBuffer<>(readAhead, t);
+ }
+ }
+ /**
+ * Background thread run loop
+ * @throws InterruptedException
+ */
+ private void backgroundRun() {
+ try {
+ IteratorBuffer<T> block;
+ do {
+ block = readAhead();
+ if (block.getException() instanceof InterruptedException) {
+ // stop thread immediately if we've been told to stop
+ return;
+ }
+ buffers.put(block);
+ } while (!block.isEndOfStream());
+ } catch (InterruptedException e) {
+ // stop thread
+ }
+ }
+ /**
+ * Block of records from the underlying iterator
+ */
+ private static class IteratorBuffer<U> implements Iterator<U> {
+ private final Throwable exception;
+ private final Iterator<U> it;
+ public IteratorBuffer(Iterable<U> it) {
+ this.it = it != null ? it.iterator() : null;;
+ this.exception = null;
+ }
+
+ /**
+ * Record block with exception thrown when attempting to retrieve the next record
+ * @param it records successfully iterated over
+ * @param exception exception thrown when attempting to iterate over the next record
+ */
+ public IteratorBuffer(Iterable<U> it, Throwable exception) {
+ this.it = it != null ? it.iterator() : null;
+ this.exception = exception;
+ }
+
+ /**
+ * Record block indicating end of stream
+ */
+ public IteratorBuffer() {
+ this.it = null;
+ this.exception = null;
+ }
+
+ @Override
+ public boolean hasNext() {
+ return it != null && it.hasNext();
+ }
+
+ @Override
+ public U next() {
+ return it.next();
+ }
+
+ public boolean isEndOfStream() {
+ return it == null;
+ }
+
+ /**
+ * Exception thrown when attempting to retrieve records from the underlying stream
+ * @return exception thrown on background thread, null if no exception occurred
+ */
+ public Throwable getException() {
+ return exception;
+ }
+ }
+}
diff --git a/src/java/htsjdk/samtools/util/BlockCompressedInputStream.java b/src/java/htsjdk/samtools/util/BlockCompressedInputStream.java
index 1c5683e..0261b19 100755
--- a/src/java/htsjdk/samtools/util/BlockCompressedInputStream.java
+++ b/src/java/htsjdk/samtools/util/BlockCompressedInputStream.java
@@ -133,6 +133,14 @@ public class BlockCompressedInputStream extends InputStream implements LocationA
}
/**
+ * @return <code>true</code> if the stream is at the end of a BGZF block,
+ * <code>false</code> otherwise.
+ */
+ public boolean endOfBlock() {
+ return (mCurrentBlock != null && mCurrentOffset == mCurrentBlock.length);
+ }
+
+ /**
* Closes the underlying InputStream or RandomAccessFile
*/
public void close()
diff --git a/src/java/htsjdk/samtools/util/CloseableIterator.java b/src/java/htsjdk/samtools/util/CloseableIterator.java
index 9988364..d26443e 100755
--- a/src/java/htsjdk/samtools/util/CloseableIterator.java
+++ b/src/java/htsjdk/samtools/util/CloseableIterator.java
@@ -24,7 +24,13 @@
package htsjdk.samtools.util;
import java.io.Closeable;
+import java.util.ArrayList;
import java.util.Iterator;
+import java.util.List;
+import java.util.Spliterator;
+import java.util.Spliterators;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
/**
* This interface is used by iterators that use releasable resources during iteration.
@@ -36,10 +42,22 @@ import java.util.Iterator;
* 2) When hasNext() returns false, the iterator implementation should automatically close itself.
* The latter makes it somewhat safer for consumers to use the for loop syntax for iteration:
* for (Type obj : getCloseableIterator()) { ... }
- *
- * We do not inherit from java.io.Closeable because IOExceptions are a pain to deal with.
*/
public interface CloseableIterator<T> extends Iterator<T>, Closeable {
+ /** Should be implemented to close/release any underlying resources. */
+ void close();
+
+ /** Consumes the contents of the iterator and returns it as a List. */
+ default List<T> toList() {
+ final List<T> list = new ArrayList<>();
+ while (hasNext()) list.add(next());
+ close();
+ return list;
+ }
- public void close();
+ /** Returns a Stream that will consume from the underlying iterator. */
+ default Stream<T> stream() {
+ final Spliterator<T> s = Spliterators.spliteratorUnknownSize(this, Spliterator.ORDERED);
+ return StreamSupport.stream(s, false).onClose(this::close);
+ }
}
diff --git a/src/java/htsjdk/samtools/util/Log.java b/src/java/htsjdk/samtools/util/Log.java
index d17e841..efd7b67 100644
--- a/src/java/htsjdk/samtools/util/Log.java
+++ b/src/java/htsjdk/samtools/util/Log.java
@@ -72,7 +72,7 @@ public final class Log {
}
/** Returns true if the specified log level is enabled otherwise false. */
- public final boolean isEnabled(final LogLevel level) {
+ public static final boolean isEnabled(final LogLevel level) {
return level.ordinal() <= globalLogLevel.ordinal();
}
diff --git a/src/java/htsjdk/samtools/util/SamLocusIterator.java b/src/java/htsjdk/samtools/util/SamLocusIterator.java
index d9d189d..f0dd952 100644
--- a/src/java/htsjdk/samtools/util/SamLocusIterator.java
+++ b/src/java/htsjdk/samtools/util/SamLocusIterator.java
@@ -31,7 +31,7 @@ import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.filter.AggregateFilter;
import htsjdk.samtools.filter.DuplicateReadFilter;
-import htsjdk.samtools.filter.FilteringIterator;
+import htsjdk.samtools.filter.FilteringSamIterator;
import htsjdk.samtools.filter.SamRecordFilter;
import htsjdk.samtools.filter.SecondaryOrSupplementaryFilter;
@@ -224,7 +224,7 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
tempIterator = samReader.iterator();
}
if (samFilters != null) {
- tempIterator = new FilteringIterator(tempIterator, new AggregateFilter(samFilters));
+ tempIterator = new FilteringSamIterator(tempIterator, new AggregateFilter(samFilters));
}
samIterator = new PeekableIterator<SAMRecord>(tempIterator);
return this;
diff --git a/src/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java b/src/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java
index 7303851..7546a01 100644
--- a/src/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java
+++ b/src/java/htsjdk/samtools/util/SamRecordIntervalIteratorFactory.java
@@ -79,10 +79,10 @@ public class SamRecordIntervalIteratorFactory {
/**
* Halt iteration after a read is encountered that starts after the given sequence and position.
- * Note that most of this code is copied from FilteringIterator. It would be nice just to override getNextRecord,
- * but that method is called FilteringIterator ctor, so the stopAfter members can't be initialized before
+ * Note that most of this code is copied from FilteringSamIterator. It would be nice just to override getNextRecord,
+ * but that method is called FilteringSamIterator ctor, so the stopAfter members can't be initialized before
* it is called.
- * FilteringIterator ctor could take a boolean "advance" that would tell it whether or not to call getNextRecord
+ * FilteringSamIterator ctor could take a boolean "advance" that would tell it whether or not to call getNextRecord
* in the ctor, so that it could be delayed in the subclass. If this pattern happens again, we should do that.
*/
private class StopAfterFilteringIterator implements CloseableIterator<SAMRecord> {
@@ -132,7 +132,7 @@ public class SamRecordIntervalIteratorFactory {
* @throws UnsupportedOperationException
*/
public void remove() {
- throw new UnsupportedOperationException("Remove() not supported by FilteringIterator");
+ throw new UnsupportedOperationException("Remove() not supported by FilteringSamIterator");
}
public void close() {
diff --git a/src/java/htsjdk/samtools/util/SequenceUtil.java b/src/java/htsjdk/samtools/util/SequenceUtil.java
index fc273b0..d2fb861 100644
--- a/src/java/htsjdk/samtools/util/SequenceUtil.java
+++ b/src/java/htsjdk/samtools/util/SequenceUtil.java
@@ -1089,4 +1089,27 @@ public class SequenceUtil {
return sofar;
}
+
+ /**
+ * Returns a read name from a FASTQ header string suitable for use in a SAM/BAM file. Any letters after the first space are ignored.
+ * Ths method also strips trailing "/1" or "/2" so that paired end reads have the same name.
+ *
+ * @param fastqHeader the header from a {@link htsjdk.samtools.fastq.FastqRecord}.
+ * @return a read name appropriate for output in a SAM/BAM file.
+ */
+ // Read names cannot contain blanks
+ public static String getSamReadNameFromFastqHeader(final String fastqHeader) {
+ final int idx = fastqHeader.indexOf(" ");
+ String readName = (idx == -1) ? fastqHeader : fastqHeader.substring(0,idx);
+
+ // NOTE: the while loop isn't necessarily the most efficient way to handle this but we don't
+ // expect this to ever happen more than once, just trapping pathological cases
+ while ((readName.endsWith("/1") || readName.endsWith("/2"))) {
+ // If this is an unpaired run we want to make sure that "/1" isn't tacked on the end of the read name,
+ // as this can cause problems down the road (ex. in Picard's MergeBamAlignment).
+ readName = readName.substring(0, readName.length() - 2);
+ }
+
+ return readName;
+ }
}
diff --git a/src/java/htsjdk/samtools/util/Tuple.java b/src/java/htsjdk/samtools/util/Tuple.java
index 6a865d7..2177920 100644
--- a/src/java/htsjdk/samtools/util/Tuple.java
+++ b/src/java/htsjdk/samtools/util/Tuple.java
@@ -22,8 +22,8 @@ public class Tuple<A, B> {
final Tuple<?, ?> tuple = (Tuple<?, ?>) o;
if (a != null ? !a.equals(tuple.a) : tuple.a != null) return false;
- return !(b != null ? !b.equals(tuple.b) : tuple.b != null);
+ return !(b != null ? !b.equals(tuple.b) : tuple.b != null);
}
@Override
@@ -38,5 +38,4 @@ public class Tuple<A, B> {
public String toString() {
return "[" + a + ", " + b + "]";
}
-
}
diff --git a/src/java/htsjdk/tribble/AbstractFeatureReader.java b/src/java/htsjdk/tribble/AbstractFeatureReader.java
index a494afb..80d9a6c 100644
--- a/src/java/htsjdk/tribble/AbstractFeatureReader.java
+++ b/src/java/htsjdk/tribble/AbstractFeatureReader.java
@@ -24,6 +24,7 @@ import htsjdk.tribble.util.TabixUtils;
import java.io.File;
import java.io.IOException;
+import java.net.URI;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
@@ -155,6 +156,15 @@ public abstract class AbstractFeatureReader<T extends Feature, SOURCE> implement
}
/**
+ * Whether the path of a URI resource ends in one of the BLOCK_COMPRESSED_EXTENSIONS
+ * @param uri a URI representing the resource to check
+ * @return
+ */
+ public static boolean hasBlockCompressedExtension (final URI uri) {
+ return hasBlockCompressedExtension(uri.getPath());
+ }
+
+ /**
* get the header
*
* @return the header object we've read-in
diff --git a/src/java/htsjdk/tribble/TribbleIndexedFeatureReader.java b/src/java/htsjdk/tribble/TribbleIndexedFeatureReader.java
index 1d1e053..ae278f4 100644
--- a/src/java/htsjdk/tribble/TribbleIndexedFeatureReader.java
+++ b/src/java/htsjdk/tribble/TribbleIndexedFeatureReader.java
@@ -35,6 +35,8 @@ import htsjdk.tribble.util.ParsingUtils;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@@ -215,7 +217,7 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
PositionalBufferedStream pbs = null;
try {
is = ParsingUtils.openInputStream(path);
- if (path.endsWith("gz")) {
+ if (isGZIPPath(path)) {
// TODO -- warning I don't think this can work, the buffered input stream screws up position
is = new GZIPInputStream(new BufferedInputStream(is));
}
@@ -271,6 +273,27 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
return new WFIterator();
}
+ //Visible for testing
+ static boolean isGZIPPath(final String path) {
+ if (path.toLowerCase().endsWith(".gz")) {
+ return true;
+ }
+ else {
+ String uriPath = null;
+ try {
+ URI uri = new URI(path);
+ if (uri != null) {
+ uriPath = uri.getPath();
+ return uriPath != null && uriPath.toLowerCase().endsWith(".gz");
+ }
+ return false;
+ }
+ catch (URISyntaxException e) {
+ return false;
+ }
+ }
+ }
+
/**
* Class to iterator over an entire file.
*/
@@ -287,7 +310,7 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
final InputStream inputStream = ParsingUtils.openInputStream(path);
final PositionalBufferedStream pbs;
- if (path.endsWith(".gz")) {
+ if (isGZIPPath(path)) {
// Gzipped -- we need to buffer the GZIPInputStream methods as this class makes read() calls,
// and seekableStream does not support single byte reads
final InputStream is = new GZIPInputStream(new BufferedInputStream(inputStream, 512000));
@@ -321,7 +344,6 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
return ret;
}
-
/**
* Advance to the next record in the query interval.
*
diff --git a/src/java/htsjdk/tribble/index/IndexFactory.java b/src/java/htsjdk/tribble/index/IndexFactory.java
index d394aa4..85fbd72 100644
--- a/src/java/htsjdk/tribble/index/IndexFactory.java
+++ b/src/java/htsjdk/tribble/index/IndexFactory.java
@@ -116,7 +116,7 @@ public class IndexFactory {
return tribbleIndexType;
}
- public Class getIndexType() {
+ public Class<Index> getIndexType() {
return indexType;
}
@@ -165,39 +165,30 @@ public class IndexFactory {
* @param indexFile from which to load the index
*/
public static Index loadIndex(final String indexFile) {
- final Index idx = null;
- BufferedInputStream bufferedInputStream = null;
- final LittleEndianInputStream dis = null;
- try {
- InputStream inputStream = ParsingUtils.openInputStream(indexFile);
- if (indexFile.endsWith(".gz")) {
- inputStream = new GZIPInputStream(inputStream);
- }
- else if (indexFile.endsWith(TabixUtils.STANDARD_INDEX_EXTENSION)) {
- inputStream = new BlockCompressedInputStream(inputStream);
- }
- // Must be buffered, because getIndexType uses mark and reset
- bufferedInputStream = new BufferedInputStream(inputStream, Defaults.NON_ZERO_BUFFER_SIZE);
- final Class indexClass = IndexType.getIndexType(bufferedInputStream).getIndexType();
-
- final Constructor ctor = indexClass.getConstructor(InputStream.class);
-
- return (Index) ctor.newInstance(bufferedInputStream);
+ // Must be buffered, because getIndexType uses mark and reset
+ try (BufferedInputStream bufferedInputStream = new BufferedInputStream(indexFileInputStream(indexFile), Defaults.NON_ZERO_BUFFER_SIZE)){
+ final Class<Index> indexClass = IndexType.getIndexType(bufferedInputStream).getIndexType();
+ final Constructor<Index> ctor = indexClass.getConstructor(InputStream.class);
+ return ctor.newInstance(bufferedInputStream);
} catch (final IOException ex) {
throw new TribbleException.UnableToReadIndexFile("Unable to read index file", indexFile, ex);
} catch (final Exception ex) {
throw new RuntimeException(ex);
- } finally {
- try {
- if (bufferedInputStream != null) bufferedInputStream.close();
- if (dis != null) dis.close();
- //log.info(String.format("Closed %s and %s", is, dis));
- } catch (final IOException e) {
- //log.error("Error closing indexFile: " + indexFile, e);
- }
}
}
+ private static InputStream indexFileInputStream(final String indexFile) throws IOException {
+ final InputStream inputStreamInitial = ParsingUtils.openInputStream(indexFile);
+ if (indexFile.endsWith(".gz")) {
+ return new GZIPInputStream(inputStreamInitial);
+ }
+ else if (indexFile.endsWith(TabixUtils.STANDARD_INDEX_EXTENSION)) {
+ return new BlockCompressedInputStream(inputStreamInitial);
+ }
+ else {
+ return inputStreamInitial;
+ }
+ }
/**
* a helper method for creating a linear binned index with default bin size
diff --git a/src/java/htsjdk/tribble/index/tabix/TabixIndex.java b/src/java/htsjdk/tribble/index/tabix/TabixIndex.java
index 077f022..9ab05d4 100644
--- a/src/java/htsjdk/tribble/index/tabix/TabixIndex.java
+++ b/src/java/htsjdk/tribble/index/tabix/TabixIndex.java
@@ -44,11 +44,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
+import java.util.*;
/**
* This class represent a Tabix index that has been built in memory or read from a file. It can be queried or
@@ -149,11 +145,15 @@ public class TabixIndex implements Index {
return Collections.emptyList();
}
final List<Chunk> chunks = indices[sequenceIndex].getChunksOverlapping(start, end);
- final List<Block> ret = new ArrayList<Block>(chunks.size());
- for (final Chunk chunk : chunks) {
- ret.add(new Block(chunk.getChunkStart(), chunk.getChunkEnd() - chunk.getChunkStart()));
+ if (chunks == null) {
+ return Collections.emptyList();
+ } else {
+ final List<Block> ret = new ArrayList<>(chunks.size());
+ chunks.stream()
+ .map(chunk -> new Block(chunk.getChunkStart(), chunk.getChunkEnd() - chunk.getChunkStart()))
+ .forEach(ret::add);
+ return ret;
}
- return ret;
}
@Override
diff --git a/src/java/htsjdk/tribble/readers/LineReaderUtil.java b/src/java/htsjdk/tribble/readers/LineReaderUtil.java
index f925156..3671ed0 100644
--- a/src/java/htsjdk/tribble/readers/LineReaderUtil.java
+++ b/src/java/htsjdk/tribble/readers/LineReaderUtil.java
@@ -26,11 +26,11 @@ public class LineReaderUtil {
* returned.
*/
public static LineReader fromBufferedStream(final InputStream stream) {
- return fromBufferedStream(stream, Defaults.USE_ASYNC_IO ? LineReaderOption.ASYNCHRONOUS : LineReaderOption.SYNCHRONOUS);
+ return fromBufferedStream(stream, Defaults.USE_ASYNC_IO_FOR_TRIBBLE ? LineReaderOption.ASYNCHRONOUS : LineReaderOption.SYNCHRONOUS);
}
public static LineReader fromStringReader(final StringReader reader) {
- return fromStringReader(reader, Defaults.USE_ASYNC_IO ? LineReaderOption.ASYNCHRONOUS : LineReaderOption.SYNCHRONOUS);
+ return fromStringReader(reader, Defaults.USE_ASYNC_IO_FOR_TRIBBLE ? LineReaderOption.ASYNCHRONOUS : LineReaderOption.SYNCHRONOUS);
}
public static LineReader fromStringReader(final StringReader stringReader, final LineReaderOption lineReaderOption) {
diff --git a/src/java/htsjdk/variant/example/PrintVariantsExample.java b/src/java/htsjdk/variant/example/PrintVariantsExample.java
new file mode 100755
index 0000000..997f0ee
--- /dev/null
+++ b/src/java/htsjdk/variant/example/PrintVariantsExample.java
@@ -0,0 +1,105 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ */
+package htsjdk.variant.example;
+
+import htsjdk.samtools.Defaults;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.ProgressLogger;
+import htsjdk.samtools.util.zip.DeflaterFactory;
+import htsjdk.tribble.AbstractFeatureReader;
+import htsjdk.tribble.readers.LineIterator;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.writer.Options;
+import htsjdk.variant.variantcontext.writer.VariantContextWriter;
+import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
+import htsjdk.variant.vcf.VCFCodec;
+import htsjdk.variant.vcf.VCFHeader;
+
+import java.io.File;
+import java.io.IOException;
+import java.net.InetAddress;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+
+/**
+ * This is a example program showing how to use Feature readers and (optionally) writers.
+ * It's also useful for measuring time.
+ * An example invocation is:
+ * java -cp dist/htsjdk-2.1.1.jar htsjdk.variant.example.PrintVariantsExample in.vcf out.vcf
+ * <p>
+ * Arguments:
+ * - the first argument is the input file (VCF)
+ * - the second argument is optional and is the name of the output file (nothing gets written if this argument is missing)
+ */
+public final class PrintVariantsExample {
+ private PrintVariantsExample() {
+ }
+
+ private static final Log log = Log.getInstance(PrintVariantsExample.class);
+
+ public static void main(final String[] args) throws IOException {
+ if (args.length < 1) {
+ System.out.println("Usage: " + PrintVariantsExample.class.getCanonicalName() + " inFile [outFile]");
+ System.exit(1);
+ }
+ final File inputFile = new File(args[0]);
+ final File outputFile = args.length >= 2 ? new File(args[1]) : null;
+
+ final long start = System.currentTimeMillis();
+
+ log.info("Start with args:" + Arrays.toString(args));
+ printConfigurationInfo();
+
+ try(final VariantContextWriter writer = outputFile == null ? null : new VariantContextWriterBuilder().setOutputFile(outputFile).setOutputFileType(VariantContextWriterBuilder.OutputType.VCF).unsetOption(Options.INDEX_ON_THE_FLY).build();
+ final AbstractFeatureReader<VariantContext, LineIterator> reader = AbstractFeatureReader.getFeatureReader(inputFile.getAbsolutePath(), new VCFCodec(), false)){
+
+ log.info(reader.getClass().getSimpleName() + " hasIndex " + reader.hasIndex());
+ if (writer != null){
+ log.info(writer.getClass().getSimpleName());
+ writer.writeHeader((VCFHeader) reader.getHeader());
+ }
+
+ final ProgressLogger pl = new ProgressLogger(log, 1000000);
+ for (final VariantContext vc : reader.iterator()) {
+ if (writer != null){
+ writer.add(vc);
+ }
+ pl.record(vc.getContig(), vc.getStart());
+ }
+ }
+
+ final long end = System.currentTimeMillis();
+ log.info(String.format("Done. Elapsed time %.3f seconds", (end - start) / 1000.0));
+ }
+
+ private static void printConfigurationInfo() throws IOException {
+ log.info("Executing as " +
+ System.getProperty("user.name") + '@' + InetAddress.getLocalHost().getHostName() +
+ " on " + System.getProperty("os.name") + ' ' + System.getProperty("os.version") +
+ ' ' + System.getProperty("os.arch") + "; " + System.getProperty("java.vm.name") +
+ ' ' + System.getProperty("java.runtime.version") +
+ ' ' + (DeflaterFactory.usingIntelDeflater() ? "IntelDeflater" : "JdkDeflater"));
+
+ log.info(Defaults.allDefaults().entrySet().stream().map(e -> e.getKey() + ':' + e.getValue()).collect(Collectors.<String>joining(" ")));
+ }
+}
diff --git a/src/java/htsjdk/variant/variantcontext/GenotypeJEXLContext.java b/src/java/htsjdk/variant/variantcontext/GenotypeJEXLContext.java
new file mode 100644
index 0000000..cda97ab
--- /dev/null
+++ b/src/java/htsjdk/variant/variantcontext/GenotypeJEXLContext.java
@@ -0,0 +1,58 @@
+package htsjdk.variant.variantcontext;
+
+import htsjdk.variant.vcf.VCFConstants;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ *
+ * @author bbimber
+ *
+ * implements the JEXL context for Genotype; this saves us from
+ * having to generate a JEXL context lookup map everytime we want to evaluate an expression.
+ *
+ */
+public class GenotypeJEXLContext extends VariantJEXLContext {
+ private Genotype g;
+
+ private interface AttributeGetter {
+ public Object get(Genotype g);
+ }
+
+ private static Map<String, AttributeGetter> attributes = new HashMap<String, AttributeGetter>();
+
+ static {
+ attributes.put("g", (Genotype g) -> g);
+ attributes.put(VCFConstants.GENOTYPE_KEY, Genotype::getGenotypeString);
+
+ attributes.put("isHom", (Genotype g) -> g.isHom() ? "1" : "0");
+ attributes.put("isHomRef", (Genotype g) -> g.isHomRef() ? "1" : "0");
+ attributes.put("isHet", (Genotype g) -> g.isHet() ? "1" : "0");
+ attributes.put("isHomVar", (Genotype g) -> g.isHomVar() ? "1" : "0");
+ attributes.put("isCalled", (Genotype g) -> g.isCalled() ? "1" : "0");
+ attributes.put("isNoCall", (Genotype g) -> g.isNoCall() ? "1" : "0");
+ attributes.put("isMixed", (Genotype g) -> g.isMixed() ? "1" : "0");
+ attributes.put("isAvailable", (Genotype g) -> g.isAvailable() ? "1" : "0");
+ attributes.put("isPassFT", (Genotype g) -> g.isFiltered() ? "0" : "1");
+ attributes.put(VCFConstants.GENOTYPE_FILTER_KEY, (Genotype g) -> g.isFiltered()? g.getFilters() : "PASS");
+ attributes.put(VCFConstants.GENOTYPE_QUALITY_KEY, Genotype::getGQ);
+ }
+
+ public GenotypeJEXLContext(VariantContext vc, Genotype g) {
+ super(vc);
+ this.g = g;
+ }
+
+ public Object get(String name) {
+ //should matching genotype attributes always supersede vc?
+ if ( attributes.containsKey(name) ) { // dynamic resolution of name -> value via map
+ return attributes.get(name).get(g);
+ } else if ( g.hasAnyAttribute(name) ) {
+ return g.getAnyAttribute(name);
+ } else if ( g.getFilters().contains(name) ) {
+ return "1";
+ } else
+ return super.get(name);
+ }
+}
diff --git a/src/java/htsjdk/variant/variantcontext/JEXLMap.java b/src/java/htsjdk/variant/variantcontext/JEXLMap.java
index ce816e0..a7a871f 100644
--- a/src/java/htsjdk/variant/variantcontext/JEXLMap.java
+++ b/src/java/htsjdk/variant/variantcontext/JEXLMap.java
@@ -1,12 +1,11 @@
package htsjdk.variant.variantcontext;
-import htsjdk.variant.utils.GeneralUtils;
import htsjdk.variant.variantcontext.VariantContextUtils.JexlVCMatchExp;
-import htsjdk.variant.vcf.VCFConstants;
import org.apache.commons.jexl2.JexlContext;
import org.apache.commons.jexl2.MapContext;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
@@ -55,68 +54,14 @@ class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
*
*/
private void createContext() {
- if ( g == null ) {
- // todo -- remove dependancy on g to the entire system
+ if ( vc == null ) {
+ jContext = new MapContext(Collections.emptyMap());
+ }
+ else if (g == null) {
jContext = new VariantJEXLContext(vc);
- } else {
- //
- // this whole branch is here just to support G jexl operations
- //
- Map<String, Object> infoMap = new HashMap<String, Object>();
-
- if ( vc != null ) {
- // create a mapping of what we know about the variant context, its Chromosome, positions, etc.
- infoMap.put("CHROM", vc.getChr());
- infoMap.put("POS", vc.getStart());
- infoMap.put("TYPE", vc.getType().toString());
- infoMap.put("QUAL", String.valueOf(vc.getPhredScaledQual()));
-
- // add alleles
- infoMap.put("ALLELES", GeneralUtils.join(";", vc.getAlleles()));
- infoMap.put("N_ALLELES", String.valueOf(vc.getNAlleles()));
-
- // add attributes
- addAttributesToMap(infoMap, vc.getAttributes());
-
- // add filter fields
- infoMap.put("FILTER", vc.isFiltered() ? "1" : "0");
- for ( Object filterCode : vc.getFilters() ) {
- infoMap.put(String.valueOf(filterCode), "1");
- }
-
- // add genotype-specific fields
- // TODO -- implement me when we figure out a good way to represent this
- // for ( Genotype g : vc.getGenotypes().values() ) {
- // String prefix = g.getSampleName() + ".";
- // addAttributesToMap(infoMap, g.getAttributes(), prefix);
- // infoMap.put(prefix + "GT", g.getGenotypeString());
- // }
-
- // add specific genotype if one is provided
- infoMap.put(VCFConstants.GENOTYPE_KEY, g.getGenotypeString());
- infoMap.put("isHom", g.isHom() ? "1" : "0");
- infoMap.put("isHomRef", g.isHomRef() ? "1" : "0");
- infoMap.put("isHet", g.isHet() ? "1" : "0");
- infoMap.put("isHomVar", g.isHomVar() ? "1" : "0");
- infoMap.put("isCalled", g.isCalled()? "1" : "0");
- infoMap.put("isNoCall", g.isNoCall()? "1" : "0");
- infoMap.put("isMixed", g.isMixed()? "1" : "0");
- infoMap.put("isAvailable", g.isAvailable()? "1" : "0");
- infoMap.put("isPassFT", g.isFiltered()? "0" : "1");
- infoMap.put(VCFConstants.GENOTYPE_FILTER_KEY, g.isFiltered()? g.getFilters() : "PASS");
-
- infoMap.put(VCFConstants.GENOTYPE_QUALITY_KEY, g.getGQ());
- if ( g.hasDP() )
- infoMap.put(VCFConstants.DEPTH_KEY, g.getDP());
- for ( Entry<String, Object> e : g.getExtendedAttributes().entrySet() ) {
- if ( e.getValue() != null && !e.getValue().equals(VCFConstants.MISSING_VALUE_v4) )
- infoMap.put(e.getKey(), e.getValue());
- }
- }
-
- // create the internal context that we can evaluate expressions against
-
- jContext = new MapContext(infoMap);
+ }
+ else {
+ jContext = new GenotypeJEXLContext(vc, g);
}
}
@@ -185,10 +130,10 @@ class JEXLMap implements Map<JexlVCMatchExp, Boolean> {
} catch (Exception e) {
// if exception happens because variable is undefined (i.e. field in expression is not present), evaluate to FALSE
// todo - might be safer if we explicitly checked for an exception type, but Apache's API doesn't seem to have that ability
- if (e.getMessage().contains("undefined variable"))
+ if (e.getMessage() != null && e.getMessage().contains("undefined variable"))
jexl.put(exp,false);
else
- throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, e.getMessage()));
+ throw new IllegalArgumentException(String.format("Invalid JEXL expression detected for %s with message %s", exp.name, (e.getMessage() == null ? "no message" : e.getMessage())));
}
}
diff --git a/src/java/htsjdk/variant/variantcontext/VariantJEXLContext.java b/src/java/htsjdk/variant/variantcontext/VariantJEXLContext.java
index f9f55cf..ee23229 100644
--- a/src/java/htsjdk/variant/variantcontext/VariantJEXLContext.java
+++ b/src/java/htsjdk/variant/variantcontext/VariantJEXLContext.java
@@ -37,12 +37,11 @@ import java.util.Map;
*
* Class VariantJEXLContext
*
- * implements the JEXML context for VariantContext; this saves us from
- * having to generate a JEXML context lookup map everytime we want to evaluate an expression.
+ * implements the JEXL context for VariantContext; this saves us from
+ * having to generate a JEXL context lookup map everytime we want to evaluate an expression.
*
* This is package protected, only classes in variantcontext should have access to it.
*
- * // todo -- clean up to remove or better support genotype filtering
*/
class VariantJEXLContext implements JexlContext {
@@ -53,22 +52,21 @@ class VariantJEXLContext implements JexlContext {
public Object get(VariantContext vc);
}
- private static Map<String, AttributeGetter> x = new HashMap<String, AttributeGetter>();
+ private static Map<String, AttributeGetter> attributes = new HashMap<String, AttributeGetter>();
static {
- x.put("vc", new AttributeGetter() { public Object get(VariantContext vc) { return vc; }});
- x.put("CHROM", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getChr(); }});
- x.put("POS", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getStart(); }});
- x.put("TYPE", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getType().toString(); }});
- x.put("QUAL", new AttributeGetter() { public Object get(VariantContext vc) { return -10 * vc.getLog10PError(); }});
- x.put("ALLELES", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getAlleles(); }});
- x.put("N_ALLELES", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getNAlleles(); }});
- x.put("FILTER", new AttributeGetter() { public Object get(VariantContext vc) { return vc.isFiltered() ? "1" : "0"; }});
-
-// x.put("GT", new AttributeGetter() { public Object get(VariantContext vc) { return g.getGenotypeString(); }});
- x.put("homRefCount", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getHomRefCount(); }});
- x.put("hetCount", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getHetCount(); }});
- x.put("homVarCount", new AttributeGetter() { public Object get(VariantContext vc) { return vc.getHomVarCount(); }});
+ attributes.put("vc", (VariantContext vc) -> vc);
+ attributes.put("CHROM", VariantContext::getChr);
+ attributes.put("POS", VariantContext::getStart);
+ attributes.put("TYPE", (VariantContext vc) -> vc.getType().toString());
+ attributes.put("QUAL", (VariantContext vc) -> -10 * vc.getLog10PError());
+ attributes.put("ALLELES", VariantContext::getAlleles);
+ attributes.put("N_ALLELES", VariantContext::getNAlleles);
+ attributes.put("FILTER", (VariantContext vc) -> vc.isFiltered() ? "1" : "0");
+
+ attributes.put("homRefCount", VariantContext::getHomRefCount);
+ attributes.put("hetCount", VariantContext::getHetCount);
+ attributes.put("homVarCount", VariantContext::getHomVarCount);
}
public VariantJEXLContext(VariantContext vc) {
@@ -77,8 +75,8 @@ class VariantJEXLContext implements JexlContext {
public Object get(String name) {
Object result = null;
- if ( x.containsKey(name) ) { // dynamic resolution of name -> value via map
- result = x.get(name).get(vc);
+ if ( attributes.containsKey(name) ) { // dynamic resolution of name -> value via map
+ result = attributes.get(name).get(vc);
} else if ( vc.hasAttribute(name)) {
result = vc.getAttribute(name);
} else if ( vc.getFilters().contains(name) ) {
diff --git a/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java b/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java
index c5b943f..04609a8 100644
--- a/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java
+++ b/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2015 The Broad Institute
+ * Copyright (c) 2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,14 +22,12 @@
* THE SOFTWARE.
*/
+
package htsjdk.variant.variantcontext.filter;
-import htsjdk.samtools.util.CloseableIterator;
-import htsjdk.samtools.util.CloserUtil;
import htsjdk.variant.variantcontext.VariantContext;
import java.util.Iterator;
-import java.util.NoSuchElementException;
/**
* A filtering iterator for VariantContexts that takes a base iterator and a VariantContextFilter.
@@ -37,91 +35,13 @@ import java.util.NoSuchElementException;
* The iterator returns all the variantcontexts for which the filter's function "test" returns true (and only those)
*
* @author Yossi Farjoun
+ *
+ * use {@link FilteringVariantContextIterator} instead
*/
-public class FilteringIterator implements CloseableIterator<VariantContext>, Iterable<VariantContext>{
- private final Iterator<VariantContext> iterator;
- private final VariantContextFilter filter;
- private VariantContext next = null;
- /**
- * Constructor of an iterator based on the provided iterator and predicate. The resulting
- * records will be all those VariantContexts from iterator for which filter.test( . ) is true
- *
- * @param iterator the backing iterator
- * @param filter the filter
- */
+ at Deprecated
+public class FilteringIterator extends FilteringVariantContextIterator{
public FilteringIterator(final Iterator<VariantContext> iterator, final VariantContextFilter filter) {
- this.iterator = iterator;
- this.filter = filter;
- next = getNextVC();
- }
-
- @Override
- public void close() {
- CloserUtil.close(iterator);
- }
-
- /**
- * Returns true if the iteration has more elements.
- *
- * @return true if the iteration has more elements. Otherwise returns false.
- */
- @Override
- public boolean hasNext() {
- return next != null;
- }
-
- /**
- * Returns the next element in the iteration.
- *
- * @return the next element in the iteration
- * @throws NoSuchElementException if there are no more elements to return
- *
- */
- @Override
- public VariantContext next() throws NoSuchElementException {
- if (next == null) {
- throw new NoSuchElementException("Iterator has no more elements.");
- }
- final VariantContext result = next;
- next = getNextVC();
- return result;
- }
-
- /**
- * Required method for Iterator API.
- *
- * @throws UnsupportedOperationException since it is unsupported here.
- */
- @Override
- public void remove() {
- throw new UnsupportedOperationException("Remove() not supported by FilteringIterator");
- }
-
- /**
- * Gets the next record from the underlying iterator that passes the filter
- *
- * @return VariantContext the next filter-passing record
- */
- private VariantContext getNextVC() {
-
- while (iterator.hasNext()) {
- final VariantContext record = iterator.next();
-
- if (filter.test(record)) {
- return record;
- }
- }
- return null;
- }
-
- /**
- * function to satisfy the Iterable interface
- *
- * @return itself since the class inherits from Iterator
- */
- @Override
- public Iterator<VariantContext> iterator() {
- return this;
+ super(iterator, filter);
}
}
diff --git a/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java b/src/java/htsjdk/variant/variantcontext/filter/FilteringVariantContextIterator.java
similarity index 93%
copy from src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java
copy to src/java/htsjdk/variant/variantcontext/filter/FilteringVariantContextIterator.java
index c5b943f..e61d9a5 100644
--- a/src/java/htsjdk/variant/variantcontext/filter/FilteringIterator.java
+++ b/src/java/htsjdk/variant/variantcontext/filter/FilteringVariantContextIterator.java
@@ -38,7 +38,7 @@ import java.util.NoSuchElementException;
*
* @author Yossi Farjoun
*/
-public class FilteringIterator implements CloseableIterator<VariantContext>, Iterable<VariantContext>{
+public class FilteringVariantContextIterator implements CloseableIterator<VariantContext>, Iterable<VariantContext>{
private final Iterator<VariantContext> iterator;
private final VariantContextFilter filter;
private VariantContext next = null;
@@ -50,7 +50,7 @@ public class FilteringIterator implements CloseableIterator<VariantContext>, Ite
* @param iterator the backing iterator
* @param filter the filter
*/
- public FilteringIterator(final Iterator<VariantContext> iterator, final VariantContextFilter filter) {
+ public FilteringVariantContextIterator(final Iterator<VariantContext> iterator, final VariantContextFilter filter) {
this.iterator = iterator;
this.filter = filter;
next = getNextVC();
@@ -95,7 +95,7 @@ public class FilteringIterator implements CloseableIterator<VariantContext>, Ite
*/
@Override
public void remove() {
- throw new UnsupportedOperationException("Remove() not supported by FilteringIterator");
+ throw new UnsupportedOperationException("Remove() not supported by FilteringVariantContextIterator");
}
/**
diff --git a/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java b/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
index 4e95888..165c272 100644
--- a/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
+++ b/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilder.java
@@ -131,8 +131,9 @@ public class VariantContextWriterBuilder {
* Default constructor. Adds <code>USE_ASYNC_IO</code> to the Options if it is present in Defaults.
*/
public VariantContextWriterBuilder() {
- if (Defaults.USE_ASYNC_IO)
+ if (Defaults.USE_ASYNC_IO_FOR_TRIBBLE) {
options.add(Options.USE_ASYNC_IO);
+ }
}
/**
@@ -338,6 +339,16 @@ public class VariantContextWriterBuilder {
}
/**
+ * Set or unset option depending on the boolean given
+ * @param option the option to modify
+ * @param setIt true to set the option, false to unset it.
+ * @return this <code>VariantContextWriterBuilder</code>
+ */
+ public VariantContextWriterBuilder modifyOption(final Options option, final boolean setIt) {
+ return (setIt) ? this.setOption(option) : this.unsetOption(option);
+ }
+
+ /**
* Add one option to the set of default <code>Options</code> that will be used as the initial set of options
* for all VariantContextWriterBuilders created after this call.
*
@@ -369,6 +380,15 @@ public class VariantContextWriterBuilder {
}
/**
+ * Used for testing; tests if the option is set
+ * @param option the option to test
+ * @return true if the option is set, false otherwise.
+ */
+ boolean isOptionSet(final Options option) {
+ return this.options.contains(option);
+ }
+
+ /**
* Validate and build the <code>VariantContextWriter</code>.
*
* @return the <code>VariantContextWriter</code> as specified by previous method calls
diff --git a/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterFactory.java b/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterFactory.java
index 8be6e53..6f3511d 100644
--- a/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterFactory.java
+++ b/src/java/htsjdk/variant/variantcontext/writer/VariantContextWriterFactory.java
@@ -56,7 +56,7 @@ public class VariantContextWriterFactory {
public static final EnumSet<Options> NO_OPTIONS = EnumSet.noneOf(Options.class);
static {
- if (Defaults.USE_ASYNC_IO) {
+ if (Defaults.USE_ASYNC_IO_FOR_TRIBBLE) {
DEFAULT_OPTIONS.add(Options.USE_ASYNC_IO);
}
}
diff --git a/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java
index 51ac9d8..7b157ca 100644
--- a/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java
+++ b/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java
@@ -613,8 +613,9 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
alleles.add(allele);
}
- public final static boolean canDecodeFile(final String potentialInput, final String MAGIC_HEADER_LINE) {
+ public static boolean canDecodeFile(final String potentialInput, final String MAGIC_HEADER_LINE) {
try {
+ //isVCFStream closes the stream that's passed in
return isVCFStream(new FileInputStream(potentialInput), MAGIC_HEADER_LINE) ||
isVCFStream(new GZIPInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE) ||
isVCFStream(new BlockCompressedInputStream(new FileInputStream(potentialInput)), MAGIC_HEADER_LINE);
@@ -625,14 +626,12 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
}
}
- private final static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) {
+ private static boolean isVCFStream(final InputStream stream, final String MAGIC_HEADER_LINE) {
try {
byte[] buff = new byte[MAGIC_HEADER_LINE.length()];
int nread = stream.read(buff, 0, MAGIC_HEADER_LINE.length());
boolean eq = Arrays.equals(buff, MAGIC_HEADER_LINE.getBytes());
return eq;
-// String firstLine = new String(buff);
-// return firstLine.startsWith(MAGIC_HEADER_LINE);
} catch ( IOException e ) {
return false;
} catch ( RuntimeException e ) {
diff --git a/src/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java b/src/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
index 371b297..dfb3f0f 100644
--- a/src/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
+++ b/src/java/htsjdk/variant/vcf/VCFStandardHeaderLines.java
@@ -183,7 +183,7 @@ public class VCFStandardHeaderLines {
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Integer, "Genotype Quality"));
registerStandard(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_PL_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
- registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
+ registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_ALLELE_DEPTHS, VCFHeaderLineCount.R, VCFHeaderLineType.Integer, "Allelic depths for the ref and alt alleles in the order listed"));
registerStandard(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_FILTER_KEY, 1, VCFHeaderLineType.String, "Genotype-level filter"));
registerStandard(new VCFFormatHeaderLine(VCFConstants.PHASE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Read-backed phasing quality"));
diff --git a/src/scripts/build_intel_deflater.sh b/src/scripts/build_intel_deflater.sh
index 522fb7d..f139cf0 100644
--- a/src/scripts/build_intel_deflater.sh
+++ b/src/scripts/build_intel_deflater.sh
@@ -23,14 +23,17 @@
# THE SOFTWARE.
#
-# Build libIntelDeflater.so, the JNI library that wraps Intel IPP compression library.
+# Build libIntelDeflater.so, the JNI library that wraps Intel IPP compression library and igzip.
# Note that this is not built as part of standard release process. Rather, it is built manually and then
-# copied to Picard-public/lib/jni.
+# copied to htsjdk/lib/jni.
# Assumes OpenJDK exists at $OPENJDK. I used openjdk-7-fcs-src-b147-27_jun_2011.zip
# Assumes that Picard-public java sources have been compiled
# Assumes IPP8_CODE_SAMPLES_DIR points to Intel IPP sample code built with -fPIC
-# Assumes IPP8_COMPOSER_XE_DIR points to Intel composer xe directory
+# Assumes IPP8_INSTALL_DIR points to composer_xe_2013_sp1 installation
+# Assumes IGZIP_LIB points to the directory containing libigzip0c.a
+source ${IPP8_INSTALL_DIR}/bin/ippvars.sh intel64
+
set -e
if [ "$OPENJDK" = "" ]
@@ -43,8 +46,12 @@ then echo "ERROR: IPP8_CODE_SAMPLES_DIR environment variable not defined." >&2
exit 1
fi
-if [ "$IPP8_COMPOSER_XE_DIR" = "" ]
-then echo "ERROR: IPP8_COMPOSER_XE_DIR environment variable not defined." >&2
+if [ "$IPP8_INSTALL_DIR" = "" ]
+then echo "ERROR: IPP8_INSTALL_DIR environment variable not defined." >&2
+ exit 1
+fi
+if [ "$IGZIP_LIB" = "" ]
+then echo "ERROR: IGZIP_LIB environment variable not defined." >&2
exit 1
fi
@@ -55,17 +62,15 @@ builddir=$rootdir/lib_build
rm -rf $builddir
mkdir -p $builddir
+echo $rootdir
# Create JNI C header file
javah -jni -classpath $rootdir/classes -d $builddir htsjdk.samtools.util.zip.IntelDeflater
# Compile source and create library.
-gcc -o src/c/inteldeflater/IntelDeflater.o -I$builddir -I$JAVA_HOME/include/ -I$JAVA_HOME/include/linux/ -I$OPENJDK/jdk/src/share/native/common/ \
--I$OPENJDK/jdk/src/solaris/native/common/ -c -O3 -fPIC src/c/inteldeflater/IntelDeflater.c
-gcc -shared -o $builddir/libIntelDeflater.so src/c/inteldeflater/IntelDeflater.o \
--L${IPP8_CODE_SAMPLES_DIR}/__cmake/data-compression.intel64.make.static.release/__lib/release \
--L${IPP8_COMPOSER_XE_DIR}/lib/intel64 \
--L${IPP8_COMPOSER_XE_DIR}/ipp/lib/intel64 \
--lzlib -lstdc++ -Wl,-Bstatic -lbfp754 -ldecimal -liomp5 -liompstubs5 -lipgo -lippac -lippcc -lippch -lippcv \
+gcc -I$builddir -I$rootdir/src/c/inteldeflater/ -I$JAVA_HOME/include/ -I$JAVA_HOME/include/linux/ -I$OPENJDK/jdk/src/share/native/common/ \
+-I$OPENJDK/jdk/src/solaris/native/common/ -c -O3 -fPIC $rootdir/src/c/inteldeflater/IntelDeflater.c
+gcc -z noexecstack -shared -o $builddir/libIntelDeflater.so IntelDeflater.o -L${IPP8_CODE_SAMPLES_DIR}/__cmake/data-compression.intel64.make.static.release/__lib/release \
+-lzlib -lstdc++ -Wl,-Bstatic -L$IGZIP_LIB -ligzip0c -lbfp754 -ldecimal -liomp5 -liompstubs5 -lipgo -lippac -lippcc -lippch -lippcv \
-lippdc -lippdi -lippgen -lippi -lippj -lippm -lippr -lippsc -lippvc -lippvm -lirng -lmatmul -lpdbx \
-lpdbxinst -lsvml -lipps -limf -lirc -lirc_s -lippcore -Wl,-Bdynamic
diff --git a/src/tests/java/htsjdk/samtools/BAMIndexWriterTest.java b/src/tests/java/htsjdk/samtools/BAMIndexWriterTest.java
index 92496e2..c907980 100644
--- a/src/tests/java/htsjdk/samtools/BAMIndexWriterTest.java
+++ b/src/tests/java/htsjdk/samtools/BAMIndexWriterTest.java
@@ -28,6 +28,7 @@ import htsjdk.samtools.util.IOUtil;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
@@ -200,6 +201,14 @@ public class BAMIndexWriterTest {
};
}
+ @Test(expectedExceptions = SAMException.class)
+ public void testRequireCoordinateSortOrder() {
+ SAMFileHeader header = new SAMFileHeader();
+ header.setSortOrder(SAMFileHeader.SortOrder.queryname);
+
+ new BAMIndexer(new ByteArrayOutputStream(), header);
+ }
+
/** generates the index file using the latest java index generating code */
private File createIndexFile(File bamFile) throws IOException {
final File bamIndexFile = File.createTempFile("Bai.", ".bai");
diff --git a/src/tests/java/htsjdk/samtools/CRAMBAIIndexerTest.java b/src/tests/java/htsjdk/samtools/CRAMBAIIndexerTest.java
index 05f3b6f..6f3b954 100644
--- a/src/tests/java/htsjdk/samtools/CRAMBAIIndexerTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMBAIIndexerTest.java
@@ -42,7 +42,7 @@ public class CRAMBAIIndexerTest {
samFileHeader.addSequence(new SAMSequenceRecord("2", 10));
samFileHeader.addSequence(new SAMSequenceRecord("3", 10));
ByteArrayOutputStream indexBAOS = new ByteArrayOutputStream();
- CRAMIndexer indexer = new CRAMIndexer(indexBAOS, samFileHeader);
+ CRAMBAIIndexer indexer = new CRAMBAIIndexer(indexBAOS, samFileHeader);
int recordsPerContainer = 3;
ContainerFactory containerFactory = new ContainerFactory(samFileHeader, recordsPerContainer);
List<CramCompressionRecord> records = new ArrayList<>();
diff --git a/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java b/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
index 432653f..bea3257 100644
--- a/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
@@ -66,18 +66,12 @@ public class CRAMComplianceTest {
File refFile;
File cramFile_21;
File cramFile_30;
- File embedCramFile;
- File norefCramFile;
- File refCramFile;
public TestCase(File root, String name) {
bamFile = new File(root, name + ".sam");
refFile = new File(root, name.split("#")[0] + ".fa");
cramFile_21 = new File(root, name + ".2.1.cram");
cramFile_30 = new File(root, name + ".3.0.cram");
- embedCramFile = new File(root, name + ".embed.cram");
- norefCramFile = new File(root, name + ".noref.cram");
- refCramFile = new File(root, name + ".ref.cram");
}
}
@@ -85,10 +79,7 @@ public class CRAMComplianceTest {
public void test(String name) throws IOException {
TestCase t = new TestCase(new File("testdata/htsjdk/samtools/cram/"), name);
- ReferenceSource source = null;
- if (t.refFile.exists())
- source = new ReferenceSource(t.refFile);
-
+ ReferenceSource source = new ReferenceSource(t.refFile);
SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(t.bamFile);
final SAMRecordIterator samRecordIterator = reader.iterator();
@@ -112,33 +103,31 @@ public class CRAMComplianceTest {
Assert.assertTrue(cramFileReaderIterator.hasNext());
SAMRecord restored = cramFileReaderIterator.next();
Assert.assertNotNull(restored);
- assertSameRecords(CramVersions.CRAM_v3.major, samRecord, restored);
+ assertSameRecords(CramVersions.DEFAULT_CRAM_VERSION.major, samRecord, restored);
}
Assert.assertFalse(cramFileReaderIterator.hasNext());
- if (t.cramFile_21.exists()) {
- cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_21), (SeekableStream)null, source, ValidationStringency.SILENT);
- cramFileReaderIterator = cramFileReader.getIterator();
- for (SAMRecord samRecord : samRecords) {
- Assert.assertTrue(cramFileReaderIterator.hasNext());
- SAMRecord restored = cramFileReaderIterator.next();
- Assert.assertNotNull(restored);
- assertSameRecords(CramVersions.CRAM_v2_1.major, samRecord, restored);
- }
- Assert.assertFalse(cramFileReaderIterator.hasNext());
+ //v2.1 test
+ cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_21), (SeekableStream)null, source, ValidationStringency.SILENT);
+ cramFileReaderIterator = cramFileReader.getIterator();
+ for (SAMRecord samRecord : samRecords) {
+ Assert.assertTrue(cramFileReaderIterator.hasNext());
+ SAMRecord restored = cramFileReaderIterator.next();
+ Assert.assertNotNull(restored);
+ assertSameRecords(CramVersions.CRAM_v2_1.major, samRecord, restored);
}
+ Assert.assertFalse(cramFileReaderIterator.hasNext());
- if (t.cramFile_30.exists()) {
- cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_30), (SeekableStream)null, source, ValidationStringency.SILENT);
- cramFileReaderIterator = cramFileReader.getIterator();
- for (SAMRecord samRecord : samRecords) {
- Assert.assertTrue(cramFileReaderIterator.hasNext());
- SAMRecord restored = cramFileReaderIterator.next();
- Assert.assertNotNull(restored);
- assertSameRecords(CramVersions.CRAM_v3.major, samRecord, restored);
- }
- Assert.assertFalse(cramFileReaderIterator.hasNext());
+ //v3.0 test
+ cramFileReader = new CRAMFileReader(new FileInputStream(t.cramFile_30), (SeekableStream)null, source, ValidationStringency.SILENT);
+ cramFileReaderIterator = cramFileReader.getIterator();
+ for (SAMRecord samRecord : samRecords) {
+ Assert.assertTrue(cramFileReaderIterator.hasNext());
+ SAMRecord restored = cramFileReaderIterator.next();
+ Assert.assertNotNull(restored);
+ assertSameRecords(CramVersions.CRAM_v3.major, samRecord, restored);
}
+ Assert.assertFalse(cramFileReaderIterator.hasNext());
}
private void assertSameRecords(int majorVersion, SAMRecord record1, SAMRecord record2) {
@@ -147,16 +136,14 @@ public class CRAMComplianceTest {
Assert.assertEquals(record2.getReferenceName(), record1.getReferenceName());
Assert.assertEquals(record2.getAlignmentStart(), record1.getAlignmentStart());
- {
- /**
- * Known issue: CRAM v2.1 doesn't handle reads with missing bases correctly. This causes '*' bases to arise when reading CRAM.
- * Skipping the base comparison asserts.
- */
- if (record1.getReadBases() == SAMRecord.NULL_SEQUENCE && majorVersion < CramVersions.CRAM_v3.major)
- ;
- else
- Assert.assertEquals(record2.getReadBases(), record1.getReadBases());
+ /**
+ * Known issue: CRAM v2.1 doesn't handle reads with missing bases correctly. This
+ * causes '*' bases to arise when reading CRAM. Skipping the base comparison asserts.
+ */
+ if (record1.getReadBases() != SAMRecord.NULL_SEQUENCE || majorVersion >= CramVersions.CRAM_v3.major) {
+ Assert.assertEquals(record2.getReadBases(), record1.getReadBases());
}
+
Assert.assertEquals(record2.getBaseQualities(), record1.getBaseQualities());
}
diff --git a/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java b/src/tests/java/htsjdk/samtools/CRAMFileBAIIndexTest.java
similarity index 96%
rename from src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
rename to src/tests/java/htsjdk/samtools/CRAMFileBAIIndexTest.java
index 3cc147c..9108283 100644
--- a/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMFileBAIIndexTest.java
@@ -27,15 +27,15 @@ import java.util.Map;
import java.util.TreeSet;
/**
- * A collection of tests for CRAM index write/read that use BAMFileIndexTest/index_test.bam file as the source of the test data.
+ * A collection of tests for CRAM BAI index write/read that use BAMFileIndexTest/index_test.bam file as the source of the test data.
* The test will create a BAI index of the cram file before hand.
* The scan* tests check that for every records in the BAM file the query returns the same records from the CRAM file.
* Created by Vadim on 14/03/2015.
*/
-public class CRAMFileIndexTest {
+public class CRAMFileBAIIndexTest {
private final File BAM_FILE = new File("testdata/htsjdk/samtools/BAMFileIndexTest/index_test.bam");
- private File cramFile = new File("testdata/htsjdk/samtools/BAMFileIndexTest/index_test.cram");
- private File indexFile = new File("testdata/htsjdk/samtools/BAMFileIndexTest/index_test.cram.bai");
+ private File cramFile;
+ private File indexFile;
private byte[] cramBytes;
private byte[] baiBytes;
private ReferenceSource source;
@@ -45,6 +45,8 @@ public class CRAMFileIndexTest {
private int nofReadsPerContainer = 1000 ;
+ // Mixes testing queryAlignmentStart with each CRAMFileReaderConstructor
+ // Separate into individual tests
@Test
public void testConstructors () throws IOException {
CRAMFileReader reader = new CRAMFileReader(cramFile, indexFile, source, ValidationStringency.SILENT);
@@ -91,6 +93,7 @@ public class CRAMFileIndexTest {
reader.close();
}
+ // this test is the same as the ones above in testConstructors
@Test
public void test_chrM_1500_location() throws IOException {
CRAMFileReader reader = new CRAMFileReader(cramFile, indexFile, source);
@@ -139,7 +142,6 @@ public class CRAMFileIndexTest {
final File CRAMFile = new File("testdata/htsjdk/samtools/cram/auxf#values.3.0.cram");
final File refFile = new File("testdata/htsjdk/samtools/cram/auxf.fa");
ReferenceSource refSource = new ReferenceSource(refFile);
- File indexFile = null;
long start = 0;
long end = CRAMFile.length();
@@ -262,7 +264,7 @@ public class CRAMFileIndexTest {
fos.write(cramBytes);
fos.close();
- CRAMIndexer.createIndex(new SeekableFileStream(cramFile), indexFile, null, ValidationStringency.STRICT);
+ CRAMBAIIndexer.createIndex(new SeekableFileStream(cramFile), indexFile, null, ValidationStringency.STRICT);
baiBytes = readFile(indexFile);
}
diff --git a/src/tests/java/htsjdk/samtools/CramFileWriterTest.java b/src/tests/java/htsjdk/samtools/CRAMFileWriterTest.java
similarity index 99%
rename from src/tests/java/htsjdk/samtools/CramFileWriterTest.java
rename to src/tests/java/htsjdk/samtools/CRAMFileWriterTest.java
index 0a5cbac..e09f38c 100644
--- a/src/tests/java/htsjdk/samtools/CramFileWriterTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMFileWriterTest.java
@@ -40,7 +40,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.List;
-public class CramFileWriterTest {
+public class CRAMFileWriterTest {
@BeforeClass
public void initClass() {
@@ -261,7 +261,7 @@ public class CramFileWriterTest {
try (final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(reference).open(input);
final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader().clone(), false, outputFile, reference)) {
for (SAMRecord rec : reader) {
- writer.addAlignment(rec);
+ writer.addAlignment(rec);
}
}
diff --git a/src/tests/java/htsjdk/samtools/GenomicIndexUtilTest.java b/src/tests/java/htsjdk/samtools/GenomicIndexUtilTest.java
new file mode 100644
index 0000000..8f5569c
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/GenomicIndexUtilTest.java
@@ -0,0 +1,50 @@
+package htsjdk.samtools;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+/**
+ * Tests for GenomicIndexUtil.
+ */
+public class GenomicIndexUtilTest {
+
+ @Test(dataProvider = "testRegionToBinDataProvider")
+ public void testRegionToBin(final int beg, final int end, final int bin) {
+ Assert.assertEquals(GenomicIndexUtil.regionToBin(beg, end), bin);
+ }
+
+ @DataProvider(name = "testRegionToBinDataProvider")
+ public Object[][] testRegionToBinDataProvider() {
+ return new Object[][] {
+ {0, 0, 0},
+ {1, 1, 4681},
+ {0, 1<<14, 4681},
+ {0, (1<<14)+1, 585},
+
+ {1<<14, 1<<14, 585},
+ {(1<<14)+1, (1<<14)+1, 4682},
+ {1<<14, 1<<17, 585},
+ {1<<14, (1<<17)+1, 73},
+
+ {1<<17, 1<<17, 73},
+ {(1<<17)+1, (1<<17)+1, 4689},
+ {1<<17, 1<<20, 73},
+ {1<<17, (1<<20)+1, 9},
+
+ {1<<20, 1<<20, 9},
+ {(1<<20)+1, (1<<20)+1, 4745},
+ {1<<20, 1<<23, 9},
+ {1<<20, (1<<23)+1, 1},
+
+ {1<<23, 1<<23, 1},
+ {(1<<23)+1, (1<<23)+1, 5193},
+ {1<<23, 1<<26, 1},
+ {1<<23, (1<<26)+1, 0},
+
+ {1<<26, 1<<26, 0},
+ {(1<<26)+1, (1<<26)+1, 8777},
+ {1<<26, 1<<26+1, 2}
+ };
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/samtools/SAMBinaryTagAndValueUnitTest.java b/src/tests/java/htsjdk/samtools/SAMBinaryTagAndValueUnitTest.java
new file mode 100644
index 0000000..f5f7a5c
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/SAMBinaryTagAndValueUnitTest.java
@@ -0,0 +1,183 @@
+package htsjdk.samtools;
+
+import htsjdk.samtools.util.BinaryCodec;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class SAMBinaryTagAndValueUnitTest {
+
+ @DataProvider(name="allowedAttributeTypes")
+ public Object[][] allowedTypes() {
+ return new Object[][] {
+ {new String("a string")},
+ {new Byte((byte) 7)},
+ {new Short((short) 8)},
+ {new Integer(0)},
+ {new Character('C')},
+ {new Float(0.1F)},
+ // unsigned longs
+ {new Long(0)},
+ {new Long(BinaryCodec.MAX_UINT)},
+ // signed longs
+ {new Long(-1L)},
+ {new Long(Integer.MAX_VALUE)},
+ {new Long(Integer.MIN_VALUE)},
+ // array values
+ {new byte[]{0, 1, 2}},
+ {new short[]{3, 4, 5}},
+ {new int[]{6, 7, 8}},
+ {new float[]{0.1F, 0.2F}},
+ };
+ }
+
+ @Test(dataProvider="allowedAttributeTypes")
+ public void test_isAllowedAttribute(final Object value) {
+ Assert.assertTrue(SAMBinaryTagAndValue.isAllowedAttributeValue(value));
+ }
+
+ @Test(dataProvider="allowedAttributeTypes")
+ public void test_isAllowedConstructor(final Object value) {
+ Assert.assertNotNull(new SAMBinaryTagAndValue(SAMTagUtil.getSingleton().makeBinaryTag("UI"), value));
+ }
+
+ @DataProvider(name="notAllowedAttributeTypes")
+ public Object[][] notAllowedTypes() {
+ return new Object[][] {
+ {new Long(BinaryCodec.MAX_UINT + 1L)},
+ {new Long(Integer.MIN_VALUE - 1L)},
+ {new Double(0.3F)},
+ {new Object()},
+ {new Object[]{}},
+ {new Integer[]{}}
+ };
+ }
+
+ @Test(dataProvider="notAllowedAttributeTypes")
+ public void test_isNotAllowedAttribute(final Object value) {
+ Assert.assertFalse(SAMBinaryTagAndValue.isAllowedAttributeValue(value));
+ }
+
+ @Test(dataProvider="notAllowedAttributeTypes", expectedExceptions=IllegalArgumentException.class)
+ public void test_isNotAllowedConstructor(final Object value) {
+ new SAMBinaryTagAndValue(SAMTagUtil.getSingleton().makeBinaryTag("ZZ"), value);
+ }
+
+ @DataProvider(name="allowedUnsignedArrayTypes")
+ public Object[][] allowedUnsignedArrayTypes() {
+ return new Object[][] {
+ {new byte[]{0, 1, 2}},
+ {new short[]{3, 4, 5}},
+ {new int[]{6, 7, 8}},
+ };
+ }
+
+ @Test(dataProvider="allowedUnsignedArrayTypes")
+ public void test_isAllowedUnsignedArrayAttribute(final Object value) {
+ final short binaryTag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ Assert.assertNotNull(new SAMBinaryTagAndUnsignedArrayValue(binaryTag, value));
+ }
+
+ @DataProvider(name="notAllowedUnsignedArrayTypes")
+ public Object[][] notAllowedUnsignedArrayTypes() {
+ return new Object[][] {
+ {new float[]{0.1F, 0.2F}},
+ {new Object[]{}}
+ };
+ }
+
+ @Test(dataProvider="notAllowedUnsignedArrayTypes", expectedExceptions=IllegalArgumentException.class)
+ public void test_isNotAllowedUnsignedArrayAttribute(final Object value) {
+ final short binaryTag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ new SAMBinaryTagAndUnsignedArrayValue(binaryTag, value);
+ }
+
+ @DataProvider(name="hashCopyEquals")
+ public Object[][] hashCopyEquals() {
+ final short tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
+ return new Object[][] {
+ {new SAMBinaryTagAndValue(tag, new String("a string")), new SAMBinaryTagAndValue(tag, new String("a string")), true, true},
+ {new SAMBinaryTagAndValue(tag, new String("a string")), new SAMBinaryTagAndValue(tag, new String("different string")), false, false},
+
+ {new SAMBinaryTagAndValue(tag, new Byte((byte) 0)), new SAMBinaryTagAndValue(tag, new Byte((byte) 0)), true, true},
+ {new SAMBinaryTagAndValue(tag, new Byte((byte) 0)), new SAMBinaryTagAndValue(tag, new Byte((byte) 1)), false, false},
+
+ {new SAMBinaryTagAndValue(tag, new Short((short) 0)), new SAMBinaryTagAndValue(tag, new Short((short) 0)), true, true},
+ {new SAMBinaryTagAndValue(tag, new Short((short) 0)), new SAMBinaryTagAndValue(tag, new Short((short) 1)), false, false},
+
+ {new SAMBinaryTagAndValue(tag, new Integer(0)), new SAMBinaryTagAndValue(tag, new Integer(0)), true, true},
+ {new SAMBinaryTagAndValue(tag, new Integer(0)), new SAMBinaryTagAndValue(tag, new Integer(0)), true, true},
+
+ {new SAMBinaryTagAndValue(tag, new Character('C')), new SAMBinaryTagAndValue(tag, new Character('C')), true, true},
+ {new SAMBinaryTagAndValue(tag, new Character('C')), new SAMBinaryTagAndValue(tag, new Character('D')), false, false},
+
+ {new SAMBinaryTagAndValue(tag,new Float(0.1F)), new SAMBinaryTagAndValue(tag, new Float(0.1F)), true, true},
+ {new SAMBinaryTagAndValue(tag, new Float(0.1F)), new SAMBinaryTagAndValue(tag, new Float(0.2F)), false, false},
+
+ {new SAMBinaryTagAndValue(tag,new Long(37L)), new SAMBinaryTagAndValue(tag, new Long(37L)), true, true},
+ {new SAMBinaryTagAndValue(tag, new Long(37L)), new SAMBinaryTagAndValue(tag, new Long(38L)), false, false},
+
+ {new SAMBinaryTagAndValue(tag,new Long(BinaryCodec.MAX_UINT)), new SAMBinaryTagAndValue(tag, new Long(BinaryCodec.MAX_UINT)), true, true},
+ {new SAMBinaryTagAndValue(tag, new Long(BinaryCodec.MAX_UINT)), new SAMBinaryTagAndValue(tag, new Long(BinaryCodec.MAX_UINT-1)), false, false},
+
+ // arrays
+
+ {new SAMBinaryTagAndUnsignedArrayValue(tag, new byte[]{0, 1, 2}), new SAMBinaryTagAndUnsignedArrayValue(tag, new byte[]{0, 1, 2}), true, true},
+ {new SAMBinaryTagAndUnsignedArrayValue(tag, new byte[]{0, 1, 2}), new SAMBinaryTagAndUnsignedArrayValue(tag, new byte[]{3, 4, 5}), false, false},
+
+ {new SAMBinaryTagAndUnsignedArrayValue(tag, new short[]{0, 1, 2}), new SAMBinaryTagAndUnsignedArrayValue(tag, new short[]{0, 1, 2}), true, true},
+ {new SAMBinaryTagAndUnsignedArrayValue(tag, new short[]{0, 1, 2}), new SAMBinaryTagAndUnsignedArrayValue(tag, new short[]{3, 4, 5}), false, false},
+
+ {new SAMBinaryTagAndUnsignedArrayValue(tag, new int[]{0, 1, 2}), new SAMBinaryTagAndUnsignedArrayValue(tag, new int[]{0, 1, 2}), true, true},
+ {new SAMBinaryTagAndUnsignedArrayValue(tag, new int[]{0, 1, 2}), new SAMBinaryTagAndUnsignedArrayValue(tag, new int[]{3, 4, 5}), false, false},
+
+ // mix signed array and unsigned array; hashCodes are equal but objects are not
+ {new SAMBinaryTagAndValue(tag, new short[]{0, 1, 2}), new SAMBinaryTagAndUnsignedArrayValue(tag, new short[]{0, 1, 2}), true, false},
+
+ // mix signed array and unsigned array; hashCodes and objects are not equal
+ {new SAMBinaryTagAndValue(tag, new short[]{0, 1, 2}), new SAMBinaryTagAndUnsignedArrayValue(tag, new short[]{1, 1, 3}), false, false},
+ };
+ }
+
+ @Test(dataProvider="hashCopyEquals")
+ public void testHashAndEquals(
+ final SAMBinaryTagAndValue v1,
+ final SAMBinaryTagAndValue v2,
+ final boolean hashEquals,
+ final boolean isEquals)
+ {
+ Assert.assertEquals(hashEquals, v1.hashCode() == v2.hashCode());
+
+ Assert.assertEquals(isEquals, v1.equals(v2));
+ Assert.assertEquals(isEquals, v2.equals(v1));
+ }
+
+ @Test(dataProvider="hashCopyEquals")
+ public void testCopy(
+ final SAMBinaryTagAndValue v1,
+ final SAMBinaryTagAndValue v2,
+ final boolean unused_hashEquals,
+ final boolean isEquals)
+ {
+ Assert.assertTrue(v1.equals(v1.copy()));
+ Assert.assertTrue(v2.equals(v2.copy()));
+
+ Assert.assertEquals(isEquals, v1.equals(v2.copy()));
+ Assert.assertEquals(isEquals, v2.equals(v1.copy()));
+ }
+
+ @Test(dataProvider="hashCopyEquals")
+ public void testDeepCopy(
+ final SAMBinaryTagAndValue v1,
+ final SAMBinaryTagAndValue v2,
+ final boolean unused_hashEquals,
+ final boolean isEquals)
+ {
+ Assert.assertTrue(v1.equals(v1.deepCopy()));
+ Assert.assertTrue(v2.equals(v2.deepCopy()));
+
+ Assert.assertEquals(isEquals, v1.equals(v2.deepCopy()));
+ Assert.assertEquals(isEquals, v2.equals(v1.deepCopy()));
+ }
+
+}
diff --git a/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java b/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
index d158563..d0f9d5b 100644
--- a/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
@@ -82,7 +82,6 @@ public class SAMFileReaderTest {
return testFiles;
}
-
@Test(dataProvider = "NoIndexCRAMTest")
public void CRAMNoIndexTest(final String inputFile, final String referenceFile) {
final File input = new File(TEST_DATA_DIR, inputFile);
@@ -133,17 +132,8 @@ public class SAMFileReaderTest {
else if (inputFile.endsWith(".bam")) Assert.assertEquals(factory.bamRecordsCreated, i);
}
- @DataProvider(name = "cramNegativeTestCases")
- public Object[][] cramTestNegativeCases() {
- final Object[][] scenarios = new Object[][]{
- {"cram_with_bai_index.cram",},
- {"cram_with_crai_index.cram"},
- };
- return scenarios;
- }
-
- @Test(dataProvider = "cramNegativeTestCases", expectedExceptions=IllegalStateException.class)
- public void testReferenceRequiredForCRAM(final String inputFile) {
+ @Test(dataProvider = "cramTestCases", expectedExceptions=IllegalStateException.class)
+ public void testReferenceRequiredForCRAM(final String inputFile, final String ignoredReferenceFile) {
final File input = new File(TEST_DATA_DIR, inputFile);
final SamReader reader = SamReaderFactory.makeDefault().open(input);
for (final SAMRecord rec : reader) {
@@ -151,7 +141,7 @@ public class SAMFileReaderTest {
CloserUtil.close(reader);
}
- @DataProvider(name = "cramPositiveTestCases")
+ @DataProvider(name = "cramTestCases")
public Object[][] cramTestPositiveCases() {
final Object[][] scenarios = new Object[][]{
{"cram_with_bai_index.cram", "hg19mini.fasta"},
@@ -160,7 +150,7 @@ public class SAMFileReaderTest {
return scenarios;
}
- @Test(dataProvider = "cramPositiveTestCases")
+ @Test(dataProvider = "cramTestCases")
public void testIterateCRAMWithIndex(final String inputFile, final String referenceFile) {
final File input = new File(TEST_DATA_DIR, inputFile);
final File reference = new File(TEST_DATA_DIR, referenceFile);
diff --git a/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java b/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java
index 7c6fa56..9f0447b 100644
--- a/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMFileWriterFactoryTest.java
@@ -26,6 +26,10 @@ package htsjdk.samtools;
import htsjdk.samtools.cram.build.CramIO;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.util.IOUtil;
+import htsjdk.variant.variantcontext.writer.AsyncVariantContextWriter;
+import htsjdk.variant.variantcontext.writer.Options;
+import htsjdk.variant.variantcontext.writer.VariantContextWriter;
+import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -292,4 +296,22 @@ public class SAMFileWriterFactoryTest {
verifyWriterOutput(outputFile, new ReferenceSource(referenceFile), nRecs, true);
}
+ @Test
+ public void testAsync() throws IOException {
+ final SAMFileWriterFactory builder = new SAMFileWriterFactory();
+
+ final File outputFile = prepareOutputFile(BamFileIoUtils.BAM_FILE_EXTENSION);
+ final SAMFileHeader header = new SAMFileHeader();
+ final SAMFileWriterFactory factory = createWriterFactoryWithOptions(header);
+ final File referenceFile = new File(TEST_DATA_DIR, "hg19mini.fasta");
+
+ SAMFileWriter writer = builder.makeWriter(header, false, outputFile, referenceFile);
+ Assert.assertEquals(writer instanceof AsyncSAMFileWriter, Defaults.USE_ASYNC_IO_FOR_SAMTOOLS, "testAsync default");
+
+ writer = builder.setUseAsyncIo(true).makeWriter(header, false, outputFile, referenceFile);
+ Assert.assertTrue(writer instanceof AsyncSAMFileWriter, "testAsync option=set");
+
+ writer = builder.setUseAsyncIo(false).makeWriter(header, false, outputFile, referenceFile);
+ Assert.assertFalse(writer instanceof AsyncSAMFileWriter, "testAsync option=unset");
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java b/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java
index bc5cc8e..059900d 100644
--- a/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMIntegerTagTest.java
@@ -318,7 +318,7 @@ public class SAMIntegerTagTest {
w.addAlignment(record);
w.close();
- final SamReader reader = SamReaderFactory.make().validationStringency(validationStringency).referenceSource(new ReferenceSource()).
+ final SamReader reader = SamReaderFactory.make().validationStringency(validationStringency).referenceSource(new ReferenceSource((File)null)).
open(SamInputResource.of(new ByteArrayInputStream(baos.toByteArray())));
final SAMRecordIterator iterator = reader.iterator();
Assert.assertTrue(iterator.hasNext());
diff --git a/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java b/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
index dda10b9..dca9de6 100644
--- a/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
@@ -31,7 +31,6 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.*;
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -108,7 +107,30 @@ public class SAMRecordUnitTest {
}
@Test(dataProvider = "deepCopyTestData")
- public void testDeepCopyRef(final SAMRecord sam) {
+ public void testDeepCopyBasic(final SAMRecord sam) {
+ testDeepCopy(sam);
+ }
+
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepCopyCigar(SAMRecord sam) {
+ sam.setCigar(sam.getCigar());
+ final SAMRecord deepCopy = sam.deepCopy();
+ Assert.assertTrue(sam.equals(deepCopy));
+ }
+
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepCopyGetCigarString(SAMRecord sam) {
+ sam.setCigarString(sam.getCigarString());
+ final SAMRecord deepCopy = sam.deepCopy();
+ Assert.assertTrue(sam.equals(deepCopy));
+ }
+
+ @Test(dataProvider = "deepCopyTestData")
+ public void testDeepCopyGetCigar(final SAMRecord sam)
+ {
+ testDeepCopy(sam);
+ sam.setCigarString(sam.getCigarString());
+ sam.getCigar(); // force cigar elements to be resolved for equals
testDeepCopy(sam);
}
@@ -134,14 +156,12 @@ public class SAMRecordUnitTest {
@Test(dataProvider = "deepCopyTestData")
public void testDeepByteAttributes( final SAMRecord sam ) throws Exception {
- // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
- // SAMBinaryTagAndValue.equals using reference equality on attribute values.
SAMRecord deepCopy = testDeepCopy(sam);
- Assert.assertTrue(sam.equals(deepCopy));
final byte bytes[] = { -2, -1, 0, 1, 2 };
sam.setAttribute("BY", bytes);
deepCopy = sam.deepCopy();
+ Assert.assertEquals(sam, deepCopy);
// validate reference inequality and content equality
final byte samBytes[] = sam.getByteArrayAttribute("BY");
@@ -162,6 +182,7 @@ public class SAMRecordUnitTest {
// now unsigned...
sam.setUnsignedArrayAttribute("BY", bytes);
deepCopy = sam.deepCopy();
+ Assert.assertEquals(sam, deepCopy);
final byte samUBytes[] = sam.getUnsignedByteArrayAttribute("BY");
final byte copyUBytes[] = deepCopy.getUnsignedByteArrayAttribute("BY");
Assert.assertFalse(copyUBytes == bytes);
@@ -180,14 +201,12 @@ public class SAMRecordUnitTest {
@Test(dataProvider = "deepCopyTestData")
public void testDeepShortAttributes( final SAMRecord sam ) throws Exception {
- // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
- // SAMBinaryTagAndValue.equals using reference equality on attribute values.
SAMRecord deepCopy = testDeepCopy(sam);
- Assert.assertTrue(sam.equals(deepCopy));
final short shorts[] = { -20, -10, 0, 10, 20 };
sam.setAttribute("SH", shorts);
deepCopy = sam.deepCopy();
+ Assert.assertEquals(sam, deepCopy);
// validate reference inequality, content equality
final short samShorts[] = sam.getSignedShortArrayAttribute("SH");
@@ -208,6 +227,7 @@ public class SAMRecordUnitTest {
// now unsigned...
sam.setUnsignedArrayAttribute("SH", shorts);
deepCopy = sam.deepCopy();
+ Assert.assertEquals(sam, deepCopy);
final short samUShorts[] = sam.getUnsignedShortArrayAttribute("SH");
final short copyUShorts[] = deepCopy.getUnsignedShortArrayAttribute("SH");
@@ -227,14 +247,12 @@ public class SAMRecordUnitTest {
@Test(dataProvider = "deepCopyTestData")
public void testDeepIntAttributes( final SAMRecord sam ) throws Exception {
- // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
- // SAMBinaryTagAndValue.equals using reference equality on attribute values.
SAMRecord deepCopy = testDeepCopy(sam);
- Assert.assertTrue(sam.equals(deepCopy));
final int ints[] = { -200, -100, 0, 100, 200 };
sam.setAttribute("IN", ints);
deepCopy = sam.deepCopy();
+ Assert.assertEquals(sam, deepCopy);
// validate reference inequality and content equality
final int samInts[] = sam.getSignedIntArrayAttribute("IN");
@@ -255,6 +273,7 @@ public class SAMRecordUnitTest {
// now unsigned...
sam.setUnsignedArrayAttribute("IN", ints);
deepCopy = sam.deepCopy();
+ Assert.assertEquals(sam, deepCopy);
final int samUInts[] = sam.getUnsignedIntArrayAttribute("IN");
final int copyUInts[] = deepCopy.getUnsignedIntArrayAttribute("IN");
@@ -274,14 +293,12 @@ public class SAMRecordUnitTest {
@Test(dataProvider = "deepCopyTestData")
public void testDeepFloatAttributes( final SAMRecord sam ) throws Exception {
- // Note that "samRecord.deepCopy().equals(samRecord)" fails with attributes due to
- // SAMBinaryTagAndValue.equals using reference equality on attribute values.
SAMRecord deepCopy = testDeepCopy(sam);
- Assert.assertTrue(sam.equals(deepCopy));
final float floats[] = { -2.4f, -1.2f, 0, 2.3f, 4.6f };
sam.setAttribute("FL", floats);
deepCopy = sam.deepCopy();
+ Assert.assertEquals(sam, deepCopy);
// validate reference inequality and content equality
final float samFloats[] = sam.getFloatArrayAttribute("FL");
@@ -303,12 +320,7 @@ public class SAMRecordUnitTest {
private SAMRecord testDeepCopy(SAMRecord sam) {
final SAMRecord deepCopy = sam.deepCopy();
-
- // force the indexing bins to be computed in order to satisfy equality test
- sam.setIndexingBin(sam.computeIndexingBin());
- deepCopy.setIndexingBin(deepCopy.computeIndexingBin());
Assert.assertTrue(sam.equals(deepCopy));
-
return deepCopy;
}
@@ -321,7 +333,7 @@ public class SAMRecordUnitTest {
Assert.assertNull(record.getUnsignedIntegerAttribute(stringTag));
Assert.assertNull(record.getUnsignedIntegerAttribute(binaryTag));
- record.setAttribute("UI", 0L);
+ record.setAttribute("UI", (long) 0L);
Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(stringTag));
Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(binaryTag));
@@ -329,7 +341,7 @@ public class SAMRecordUnitTest {
Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(stringTag));
Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(binaryTag));
- final SAMBinaryTagAndValue tv_zero = new SAMBinaryTagAndUnsignedArrayValue(binaryTag, 0L);
+ final SAMBinaryTagAndValue tv_zero = new SAMBinaryTagAndValue(binaryTag, 0L);
record = new SAMRecord(header){
{
setAttributes(tv_zero);
@@ -338,7 +350,7 @@ public class SAMRecordUnitTest {
Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(stringTag));
Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(binaryTag));
- final SAMBinaryTagAndValue tv_max = new SAMBinaryTagAndUnsignedArrayValue(binaryTag, BinaryCodec.MAX_UINT);
+ final SAMBinaryTagAndValue tv_max = new SAMBinaryTagAndValue(binaryTag, BinaryCodec.MAX_UINT);
record = new SAMRecord(header){
{
setAttributes(tv_max);
@@ -350,7 +362,6 @@ public class SAMRecordUnitTest {
/**
* This is an alternative to test_getUnsignedIntegerAttribute_valid().
- * The purpose is to ensure that the hacky way of setting arbitrary tag values works ok.
* This is required for testing invalid (out of range) unsigned integer value.
*/
@Test
@@ -359,69 +370,32 @@ public class SAMRecordUnitTest {
SAMFileHeader header = new SAMFileHeader();
SAMRecord record;
- record = new SAMRecord(header) {
- {
- setAttributes(new SAMBinaryTagAndUnsignedArrayValue(tag, 0L));
- }
- };
+ record = new SAMRecord(header);
+ record.setAttribute("UI", 0L);
Assert.assertEquals(new Long(0L), record.getUnsignedIntegerAttribute(tag));
- record = new SAMRecord(header) {
- {
- setAttributes(new SAMBinaryTagAndUnsignedArrayValue(tag, BinaryCodec.MAX_UINT));
- }
- };
- Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute(tag));
-
- // the following works because we bypass value checks implemented in SAMRecord:
- record = new SAMRecord(header) {
- {
- setAttributes(new SAMBinaryTagAndUnsignedArrayValue(tag, BinaryCodec.MAX_UINT+1L));
- }
- };
- // check that the invalid value is still there:
- Assert.assertEquals(new Long(BinaryCodec.MAX_UINT+1L), (Long)record.getBinaryAttributes().value);
+ record = new SAMRecord(header);
+ record.setAttribute("UI", BinaryCodec.MAX_UINT);
+ Assert.assertEquals(new Long(BinaryCodec.MAX_UINT), record.getUnsignedIntegerAttribute("UI"));
}
@Test(expectedExceptions = SAMException.class)
public void test_getUnsignedIntegerAttribute_negative() {
- short tag = 0;
- SAMRecord record = null;
- try {
- tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
- SAMFileHeader header = new SAMFileHeader();
- final SAMBinaryTagAndValue tv = new SAMBinaryTagAndUnsignedArrayValue(tag, -1L);
- record = new SAMRecord(header) {
- {
- setAttributes(tv);
- }
- };
- } catch (Exception e) {
- Assert.fail("Unexpected exception", e);
- }
- record.getUnsignedIntegerAttribute(tag);
+ SAMFileHeader header = new SAMFileHeader();
+ SAMRecord record = new SAMRecord(header);
+ record.setAttribute("UI", -1L);
+ record.getUnsignedIntegerAttribute("UI");
}
- @Test(expectedExceptions = SAMException.class)
- public void test_getUnsignedIntegerAttribute_tooLarge() {
- short tag = 0;
- SAMRecord record = null;
- try {
- tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
- SAMFileHeader header = new SAMFileHeader();
- final SAMBinaryTagAndValue tv = new SAMBinaryTagAndUnsignedArrayValue(tag, BinaryCodec.MAX_UINT + 1);
- record = new SAMRecord(header) {
- {
- setAttributes(tv);
- }
- };
- } catch (Exception e) {
- Assert.fail("Unexpected exception", e);
- }
-
- record.getUnsignedIntegerAttribute(tag);
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void test_setUnsignedIntegerAttributeTooLarge() {
+ SAMFileHeader header = new SAMFileHeader();
+ SAMRecord record = new SAMRecord(header);
+ record.setAttribute("UI", BinaryCodec.MAX_UINT + 1);
}
+ // NOTE: SAMRecord.asAllowedAttribute is deprecated, as it has been moved into
+ // SAMBinaryTagAndValue, but we'll leave this test here until the code is removed.
@Test
public void test_isAllowedAttributeDataType() {
Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Byte((byte) 0)));
@@ -441,39 +415,23 @@ public class SAMRecordUnitTest {
Assert.assertTrue(SAMRecord.isAllowedAttributeValue(new Long(-1L)));
Assert.assertFalse(SAMRecord.isAllowedAttributeValue(new Long(BinaryCodec.MAX_UINT + 1L)));
Assert.assertFalse(SAMRecord.isAllowedAttributeValue(new Long(Integer.MIN_VALUE - 1L)));
-
}
- @Test(expectedExceptions = SAMException.class)
+ @Test(expectedExceptions = IllegalArgumentException.class)
public void test_setAttribute_unsigned_int_negative() {
- short tag = 0;
+ SAMFileHeader header = new SAMFileHeader();
SAMRecord record = null;
- try {
- tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
- SAMFileHeader header = new SAMFileHeader();
- record = new SAMRecord(header);
- Assert.assertNull(record.getUnsignedIntegerAttribute(tag));
- } catch (SAMException e) {
- Assert.fail("Unexpected exception", e);
- }
-
- record.setAttribute(tag, (long) Integer.MIN_VALUE - 1L);
+ record = new SAMRecord(header);
+ Assert.assertNull(record.getUnsignedIntegerAttribute("UI"));
+ record.setAttribute("UI", (long) Integer.MIN_VALUE - 1L);
}
- @Test(expectedExceptions = SAMException.class)
+ @Test(expectedExceptions = IllegalArgumentException.class)
public void test_setAttribute_unsigned_int_tooLarge() {
- short tag = 0;
- SAMRecord record = null;
- try {
- tag = SAMTagUtil.getSingleton().makeBinaryTag("UI");
- SAMFileHeader header = new SAMFileHeader();
- record = new SAMRecord(header);
- Assert.assertNull(record.getUnsignedIntegerAttribute(tag));
- } catch (SAMException e) {
- Assert.fail("Unexpected exception", e);
- }
-
- record.setAttribute(tag, BinaryCodec.MAX_UINT + 1L);
+ SAMFileHeader header = new SAMFileHeader();
+ SAMRecord record = new SAMRecord(header);
+ Assert.assertNull(record.getUnsignedIntegerAttribute("UI"));
+ record.setAttribute("UI", (long) BinaryCodec.MAX_UINT + 1L);
}
@Test
@@ -804,17 +762,12 @@ public class SAMRecordUnitTest {
}
@Test
- private SAMRecord testNullHeaderDeepCopy() {
+ private void testNullHeaderDeepCopy() {
SAMRecord sam = createTestRecordHelper();
sam.setHeader(null);
final SAMRecord deepCopy = sam.deepCopy();
- // force the indexing bins to be computed in order to satisfy equality test
- sam.setIndexingBin(sam.computeIndexingBin());
- deepCopy.setIndexingBin(deepCopy.computeIndexingBin());
Assert.assertTrue(sam.equals(deepCopy));
-
- return deepCopy;
}
private void testNullHeaderCigar(SAMRecord rec) {
diff --git a/src/tests/java/htsjdk/samtools/SamStreamsTest.java b/src/tests/java/htsjdk/samtools/SamStreamsTest.java
new file mode 100644
index 0000000..834538e
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/SamStreamsTest.java
@@ -0,0 +1,60 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package htsjdk.samtools;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.*;
+
+public class SamStreamsTest {
+
+ private static final File TEST_DATA_DIR = new File("testdata/htsjdk/samtools");
+
+ @Test(dataProvider = "makeData")
+ public void testDataFormat(final String inputFile, final boolean isGzippedSAMFile, final boolean isBAMFile, final boolean isCRAMFile) throws Exception {
+ final File input = new File(TEST_DATA_DIR, inputFile);
+ try(final InputStream fis = new BufferedInputStream(new FileInputStream(input))) { //must be buffered or the isGzippedSAMFile will blow up
+ Assert.assertEquals(SamStreams.isGzippedSAMFile(fis), isGzippedSAMFile, "isGzippedSAMFile:" + inputFile);
+ Assert.assertEquals(SamStreams.isBAMFile(fis), isBAMFile, "isBAMFile:" + inputFile);
+ Assert.assertEquals(SamStreams.isCRAMFile(fis), isCRAMFile, "isCRAMFile:" + inputFile);
+ }
+ }
+
+ @DataProvider(name = "makeData")
+ public Object[][] makeData() {
+ final Object[][] scenarios = new Object[][]{
+ //isGzippedSAMFile isBAMFile isCRAMFile
+ {"block_compressed.sam.gz", true, false, false},
+ {"uncompressed.sam", false, false, false},
+ {"compressed.sam.gz", true, false, false},
+ {"compressed.bam", true, true, false}, //this is slightly weird (responding true to isGzippedSAMFile)
+ {"cram_query_sorted.cram", false, false, true},
+ };
+ return scenarios;
+ }
+
+}
diff --git a/src/tests/java/htsjdk/samtools/cram/CRAIEntryTest.java b/src/tests/java/htsjdk/samtools/cram/CRAIEntryTest.java
index c27f2f2..6cf4934 100644
--- a/src/tests/java/htsjdk/samtools/cram/CRAIEntryTest.java
+++ b/src/tests/java/htsjdk/samtools/cram/CRAIEntryTest.java
@@ -47,7 +47,7 @@ public class CRAIEntryTest {
final int sliceSise = counter++;
final String line = String.format("%d\t%d\t%d\t%d\t%d\t%d", sequenceId, alignmentStart, alignmentSpan, containerOffset, sliceOffset, sliceSise);
- final CRAIEntry entry = CRAIEntry.fromCraiLine(line);
+ final CRAIEntry entry = new CRAIEntry(line);
Assert.assertNotNull(entry);
Assert.assertEquals(entry.sequenceId, sequenceId);
Assert.assertEquals(entry.alignmentStart, alignmentStart);
@@ -75,7 +75,6 @@ public class CRAIEntryTest {
Assert.assertTrue(CRAIEntry.intersect(newEntry(2, 1), newEntry(1, 2)));
}
-
@Test
public void testIntersetcsOvertlaping() {
Assert.assertFalse(CRAIEntry.intersect(newEntry(1, 2), newEntry(0, 1)));
diff --git a/src/tests/java/htsjdk/samtools/cram/CRAIIndexTest.java b/src/tests/java/htsjdk/samtools/cram/CRAIIndexTest.java
index 8989963..ac7608c 100644
--- a/src/tests/java/htsjdk/samtools/cram/CRAIIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/cram/CRAIIndexTest.java
@@ -1,11 +1,28 @@
package htsjdk.samtools.cram;
+import htsjdk.samtools.BAMFileSpan;
+import htsjdk.samtools.DiskBasedBAMFileIndex;
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.seekablestream.SeekableBufferedStream;
+import htsjdk.samtools.seekablestream.SeekableFileStream;
+import htsjdk.samtools.seekablestream.SeekableMemoryStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
+import htsjdk.samtools.util.IOUtil;
import org.testng.Assert;
import org.testng.annotations.Test;
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
+import java.util.function.BiFunction;
+import java.util.zip.GZIPOutputStream;
/**
* Created by vadim on 25/08/2015.
@@ -27,21 +44,13 @@ public class CRAIIndexTest {
index.add(e);
e = e.clone();
- e.sequenceId = sequenceId;
e.alignmentStart = 2;
- e.alignmentSpan = 1;
e.containerStartOffset = 2;
- e.sliceOffset = 1;
- e.sliceSize = 0;
index.add(e);
e = e.clone();
- e.sequenceId = sequenceId;
e.alignmentStart = 3;
- e.alignmentSpan = 1;
e.containerStartOffset = 3;
- e.sliceOffset = 1;
- e.sliceSize = 0;
index.add(e);
Assert.assertFalse(allFoundEntriesIntersectQueryInFind(index, sequenceId, 1, 0));
@@ -77,8 +86,84 @@ public class CRAIIndexTest {
return foundCount > 0;
}
+ @Test(expectedExceptions = NullPointerException.class)
+ public void testCraiRequiresDictionary() throws IOException {
+ try (final ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final GZIPOutputStream gos = new GZIPOutputStream(baos);
+ final BufferedInputStream bis = new BufferedInputStream(new ByteArrayInputStream(baos.toByteArray()))) {
+ CRAIIndex.openCraiFileAsBaiStream(bis, null);
+ }
+ }
+
+ @Test
+ public void testCraiInMemory() throws IOException {
+ doCRAITest(this::getBaiStreamFromMemory);
+ }
+
+ @Test
+ public void testCraiFromFile() throws IOException {
+ doCRAITest(this::getBaiStreamFromFile);
+ }
+
+ private void doCRAITest(BiFunction<SAMSequenceDictionary, List<CRAIEntry>, SeekableStream> getBaiStreamForIndex) throws IOException {
+ final ArrayList<CRAIEntry> index = new ArrayList<CRAIEntry>();
+ final CRAIEntry entry = new CRAIEntry();
+ entry.sequenceId = 0;
+ entry.alignmentStart = 1;
+ entry.alignmentSpan = 2;
+ entry.sliceOffset = 3;
+ entry.sliceSize = 4;
+ entry.containerStartOffset = 5;
+ index.add(entry);
+
+ final SAMSequenceDictionary dictionary = new SAMSequenceDictionary();
+ dictionary.addSequence(new SAMSequenceRecord("1", 100));
+
+ final SeekableStream baiStream = getBaiStreamForIndex.apply(dictionary, index);
+
+ final DiskBasedBAMFileIndex bamIndex = new DiskBasedBAMFileIndex(baiStream, dictionary);
+ final BAMFileSpan span = bamIndex.getSpanOverlapping(entry.sequenceId, entry.alignmentStart, entry.alignmentStart);
+ Assert.assertNotNull(span);
+ final long[] coordinateArray = span.toCoordinateArray();
+ Assert.assertEquals(coordinateArray.length, 2);
+ Assert.assertEquals(coordinateArray[0] >> 16, entry.containerStartOffset);
+ Assert.assertEquals(coordinateArray[1] & 0xFFFF, 1);
+ }
+
+ public SeekableStream getBaiStreamFromMemory(SAMSequenceDictionary dictionary, final List<CRAIEntry> index) {
+ try {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final GZIPOutputStream gos = new GZIPOutputStream(baos);
+ CRAIIndex.writeIndex(gos, index);
+ gos.close();
+ final SeekableStream baiStream = CRAIIndex.openCraiFileAsBaiStream(new ByteArrayInputStream(baos.toByteArray()), dictionary);
+ Assert.assertNotNull(baiStream);
+ return baiStream;
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private SeekableStream getBaiStreamFromFile(SAMSequenceDictionary dictionary, final List<CRAIEntry> index) {
+ try {
+ final File file = File.createTempFile("test", ".crai");
+ file.deleteOnExit();
+ final FileOutputStream fos = new FileOutputStream(file);
+ final GZIPOutputStream gos = new GZIPOutputStream(fos);
+ CRAIIndex.writeIndex(gos, index);
+ gos.close();
+ final SeekableStream baiStream = CRAIIndex.openCraiFileAsBaiStream(new SeekableBufferedStream(new SeekableFileStream(file)), dictionary);
+ Assert.assertNotNull(baiStream);
+ return baiStream;
+ }
+ catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
@Test
- public void testGetLeftmost() {
+ public void testGetLeftmost() throws CloneNotSupportedException {
final List<CRAIEntry> index = new ArrayList<CRAIEntry>();
Assert.assertNull(CRAIIndex.getLeftmost(index));
@@ -93,13 +178,8 @@ public class CRAIIndexTest {
// trivial case of single entry in index:
Assert.assertEquals(e1, CRAIIndex.getLeftmost(index));
- final CRAIEntry e2 = new CRAIEntry();
- e2.sequenceId = 1;
+ final CRAIEntry e2 = e1.clone();
e2.alignmentStart = e1.alignmentStart + 1;
- e2.alignmentSpan = 3;
- e2.containerStartOffset = 4;
- e2.sliceOffset = 5;
- e2.sliceSize = 6;
index.add(e2);
Assert.assertEquals(e1, CRAIIndex.getLeftmost(index));
}
diff --git a/src/tests/java/htsjdk/samtools/util/AsyncBufferedIteratorTest.java b/src/tests/java/htsjdk/samtools/util/AsyncBufferedIteratorTest.java
new file mode 100644
index 0000000..817c60e
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/util/AsyncBufferedIteratorTest.java
@@ -0,0 +1,115 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 Daniel Cameron
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.util;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class AsyncBufferedIteratorTest {
+ private static class TestCloseableIterator implements CloseableIterator<Integer> {
+ private int[] results;
+ private volatile int offset = 0;
+ public volatile boolean isClosed = false;
+ public TestCloseableIterator(int[] results) {
+ this.results = results;
+ }
+ @Override
+ public void close() {
+ isClosed = true;
+ }
+ @Override
+ public boolean hasNext() {
+ return offset < results.length;
+ }
+ @Override
+ public Integer next() {
+ return results[offset++];
+ }
+ public int consumed() {
+ return offset;
+ }
+ }
+ @Test
+ public void testWrapUnderlying() {
+ AsyncBufferedIterator<Integer> abi = new AsyncBufferedIterator<Integer>(new TestCloseableIterator(new int[] { 0, 1, 2, 3}), 1, 1);
+ for (int i = 0; i < 4; i++) {
+ Assert.assertEquals(i, (int)abi.next());
+ }
+ abi.close();
+ }
+ @Test
+ public void testClose() {
+ TestCloseableIterator tci = new TestCloseableIterator(new int[] { 0, 1, 2, 3});
+ AsyncBufferedIterator<Integer> abi = new AsyncBufferedIterator<Integer>(tci, 1, 1);
+ abi.close();
+ Assert.assertTrue(tci.isClosed);
+ }
+ /**
+ * Background thread should block when buffers are full
+ */
+ @Test
+ public void testBackgroundBlocks() throws InterruptedException {
+ TestCloseableIterator it = new TestCloseableIterator(new int[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 });
+ AsyncBufferedIterator<Integer> abi = new AsyncBufferedIterator<Integer>(it, 3, 2, "testBackgroundBlocks");
+ Assert.assertNotNull(getThreadWithName("testBackgroundBlocks"));
+ Thread.sleep(10); // how do we write this test and not be subject to race conditions?
+ // should have read 9 records: 2*3 in the buffers, and another 3 read but
+ // blocking waiting to be added
+ Assert.assertEquals(it.consumed(), 9);
+ abi.close();
+ }
+ @Test
+ public void testBackgroundThreadCompletes() throws InterruptedException {
+ TestCloseableIterator it = new TestCloseableIterator(new int[] { 0, 1, 2, 3, 4, 5 });
+ AsyncBufferedIterator<Integer> abi = new AsyncBufferedIterator<Integer>(it, 3, 2, "testBackgroundThreadCompletes");
+ Assert.assertNotNull(getThreadWithName("testBackgroundThreadCompletes"));
+ // both buffers should be full
+ // clear out one buffer so the background thread can write the end of stream indicator
+ // and complete
+ abi.next();
+
+ // how do we write this test and not be subject to a race condition
+ // since we're waiting for a background thread we have no access?
+ Thread t;
+ for (int i = 0; i < 64; i++) {
+ Thread.sleep(1);
+ t = getThreadWithName("testBackgroundThreadCompletes");
+ if (t == null || !t.isAlive()) break;
+ }
+ t = getThreadWithName("testBackgroundThreadCompletes");
+ Assert.assertTrue(t == null || !t.isAlive());
+ abi.close();
+ }
+ private static Thread getThreadWithName(String name) {
+ Thread[] allthreads = new Thread[Thread.activeCount() + 16];
+ int threadCount = Thread.enumerate(allthreads);
+ for (int i = 0; i < threadCount; i++) {
+ String threadName = allthreads[i].getName();
+ if (name.equals(threadName)) {
+ return allthreads[i];
+ }
+ }
+ return null;
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java b/src/tests/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java
index d4c1703..758d8fa 100644
--- a/src/tests/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java
+++ b/src/tests/java/htsjdk/samtools/util/BlockCompressedOutputStreamTest.java
@@ -63,6 +63,15 @@ public class BlockCompressedOutputStreamTest {
for(int i = 0; (line = reader.readLine()) != null; ++i) {
Assert.assertEquals(line + "\n", linesWritten.get(i));
}
+ bcis.close();
+ final BlockCompressedInputStream bcis2 = new BlockCompressedInputStream(f);
+ int available = bcis2.available();
+ Assert.assertFalse(bcis2.endOfBlock(), "Should not be at end of block");
+ Assert.assertTrue(available > 0);
+ byte[] buffer = new byte[available];
+ Assert.assertEquals(bcis2.read(buffer), available, "Should read to end of block");
+ Assert.assertTrue(bcis2.endOfBlock(), "Should be at end of block");
+ bcis2.close();
}
@Test
@@ -89,6 +98,7 @@ public class BlockCompressedOutputStreamTest {
}
}
Assert.assertEquals(i, INPUT_SIZE);
+ bcis.close();
}
// PIC-393 exception closing BGZF stream opened to /dev/null
diff --git a/src/tests/java/htsjdk/samtools/util/CloseableIteratorTest.java b/src/tests/java/htsjdk/samtools/util/CloseableIteratorTest.java
new file mode 100644
index 0000000..b96d1f6
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/util/CloseableIteratorTest.java
@@ -0,0 +1,31 @@
+package htsjdk.samtools.util;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.stream.Collectors;
+
+public class CloseableIteratorTest {
+ @Test
+ public void testToList() {
+ final List<Integer> expected = Arrays.asList(1,2,3,4,5);
+ final PeekableIterator<Integer> peeky = new PeekableIterator<>(expected.iterator());
+ final List<Integer> actual = peeky.toList();
+
+ Assert.assertEquals(actual, expected);
+ Assert.assertEquals(peeky.toList(), new ArrayList<>()); // Should be empty the second time
+ }
+
+ @Test
+ public void testToStream() {
+ final List<Integer> inputs = Arrays.asList(1,2,3,4,5);
+ final PeekableIterator<Integer> peeky = new PeekableIterator<>(inputs.iterator());
+ final List<Integer> expected = inputs.stream().map(i -> i*2).collect(Collectors.toList());
+ final List<Integer> actual = peeky.stream().map(i -> i*2).collect(Collectors.toList());
+
+ Assert.assertEquals(actual, expected);
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/util/IntelDeflaterTest.java b/src/tests/java/htsjdk/samtools/util/IntelDeflaterTest.java
new file mode 100755
index 0000000..54c919b
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/util/IntelDeflaterTest.java
@@ -0,0 +1,130 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ */
+package htsjdk.samtools.util;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMFileWriter;
+import htsjdk.samtools.SAMFileWriterFactory;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
+import htsjdk.samtools.util.zip.DeflaterFactory;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+import java.util.concurrent.ExecutionException;
+
+/**
+ * This is a test for IntelDeflater.
+ */
+
+public class IntelDeflaterTest {
+ static final File TEST_DIR = new File("testdata/htsjdk/samtools");
+
+ @DataProvider(name="TestIntelDeflaterIsLoadedData")
+ Iterator<Object[]> TestIntelDeflaterIsLoadedData(){
+
+ List<File> files = CollectionUtil.makeList(
+ new File(TEST_DIR, "coordinate_sorted.sam"),
+ new File(TEST_DIR, "queryname_sorted.sam"),
+ new File(TEST_DIR, "compressed.bam"),
+ new File(TEST_DIR, "empty.bam"),
+ new File(TEST_DIR, "cram_with_bai_index.cram"),
+ new File(TEST_DIR, "uncompressed.sam"),
+ new File(TEST_DIR, "cram_with_crai_index.cram"));
+
+ List<Boolean> eagerlyDecodes = CollectionUtil.makeList(Boolean.TRUE, Boolean.FALSE);
+ List<Integer> compressionLevels = CollectionUtil.makeList(1, 2, 3, 4, 5, 6, 7, 8, 9);
+
+ List<Object[]> retVal = new ArrayList<>();
+ files.stream()
+ .forEach(file ->
+ eagerlyDecodes.stream()
+ .forEach(eagerlyDecode -> compressionLevels.stream()
+ .forEach(compressionLevel ->
+ retVal.add(new Object[]{file, eagerlyDecode, compressionLevel}))));
+ return retVal.iterator();
+ }
+
+ @Test(dataProvider = "TestIntelDeflaterIsLoadedData", groups="intel",expectedExceptions = IllegalAccessError.class)
+ public void TestIntelDeflatorIsLoaded(final File inputFile, final Boolean eagerlyDecode,final Integer compressionLevel) throws IOException,IllegalAccessError {
+
+ Log log = Log.getInstance(IntelDeflaterTest.class);
+ Log.setGlobalLogLevel(Log.LogLevel.INFO);
+
+ log.info("In TestIntelDeflatorIsLoaded. testing: " + inputFile);
+ IOUtil.assertFileIsReadable(inputFile);
+
+ final File outputFile = File.createTempFile("IntelDeflater", "bam");
+ outputFile.deleteOnExit();
+
+
+ Assert.assertTrue(DeflaterFactory.usingIntelDeflater(), "IntelDeflater is not loaded.");
+ log.info("IntelDeflater is loaded");
+
+
+ SamReaderFactory readerFactory = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT);
+ if (eagerlyDecode) {
+ readerFactory = readerFactory.enable(SamReaderFactory.Option.EAGERLY_DECODE);
+ }
+
+ if(inputFile.getName().endsWith(".cram")) {
+ readerFactory.referenceSequence(new File(TEST_DIR, "hg19mini.fasta"));
+ }
+
+ final SamReader reader = readerFactory.open(inputFile);
+ final SAMFileHeader header = reader.getFileHeader();
+ int nRecords = 0;
+ try (final SAMFileWriter writer = new SAMFileWriterFactory().makeBAMWriter(header, true, outputFile, compressionLevel)) {
+ for (final SAMRecord record : reader) {
+ writer.addAlignment(record);
+ nRecords++;
+ }
+ } catch (Exception e) {
+ Assert.fail("Error reading record no. " + nRecords);
+ }
+
+ log.info("wrote " + nRecords + " Records");
+
+ int nReadRecords = 0;
+ try (final SamReader outputReader = readerFactory.open(outputFile)) {
+ for (final SAMRecord ignored : outputReader) {
+ nReadRecords++;
+ }
+ } catch (Exception e) {
+ Assert.fail("Error reading record written with the IntelDeflater library");
+ }
+ log.info("read " + nReadRecords + " Records");
+
+ Assert.assertEquals(nReadRecords, nRecords, "Number of read records mismatches number of written records.");
+
+ throw new IllegalAccessError("Got to the end successfully! (i.e. no segmentation fault");
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java b/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
index a5157cf..c5c797e 100644
--- a/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
+++ b/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
@@ -409,4 +409,24 @@ public class SequenceUtilTest {
private char toLowerCase(final char base) {
return (char) (toUpperCase(base) + 32);
}
+
+ @Test(dataProvider = "testGetSamReadNameFromFastqHeader")
+ public void testGetSamReadNameFromFastqHeader(final String fastqHeader,
+ final String expectedSamReadName) {
+ Assert.assertEquals(SequenceUtil.getSamReadNameFromFastqHeader(fastqHeader), expectedSamReadName);
+ }
+
+ @DataProvider(name = "testGetSamReadNameFromFastqHeader")
+ public Object[][] testGetSamReadNameFromFastqHeaderTestCases() {
+ return new Object[][] {
+ {"Simple:Name", "Simple:Name"},
+ {"Simple:Name", "Simple:Name"},
+ {"Name/1", "Name"},
+ {"Name/2", "Name"},
+ {"Name/3", "Name/3"},
+ {"Simple:Name Blank", "Simple:Name"},
+ {"Simple:Name Blank /1", "Simple:Name"},
+ {"Name/1/2", "Name"}
+ };
+ }
}
diff --git a/src/tests/java/htsjdk/tribble/AbstractFeatureReaderTest.java b/src/tests/java/htsjdk/tribble/AbstractFeatureReaderTest.java
index ce5a624..f266bc2 100644
--- a/src/tests/java/htsjdk/tribble/AbstractFeatureReaderTest.java
+++ b/src/tests/java/htsjdk/tribble/AbstractFeatureReaderTest.java
@@ -6,9 +6,14 @@ import htsjdk.tribble.readers.LineIterator;
import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFCodec;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import java.io.File;
import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
import static org.testng.Assert.*;
@@ -47,4 +52,60 @@ public class AbstractFeatureReaderTest {
assertNotNull(feat);
}
}
+
+ @DataProvider(name = "blockCompressedExtensionExtensionStrings")
+ public Object[][] createBlockCompressedExtensionStrings() {
+ return new Object[][] {
+ { "testzip.gz", true },
+ { "test.gzip", true },
+ { "test.bgz", true },
+ { "test.bgzf", true },
+ { "test.bzip2", false }
+ };
+ }
+
+ @Test(enabled = true, dataProvider = "blockCompressedExtensionExtensionStrings")
+ public void testBlockCompressionExtensionString(final String testString, final boolean expected) {
+ Assert.assertEquals(AbstractFeatureReader.hasBlockCompressedExtension(testString), expected);
+ }
+
+ @Test(enabled = true, dataProvider = "blockCompressedExtensionExtensionStrings")
+ public void testBlockCompressionExtensionFile(final String testString, final boolean expected) {
+ Assert.assertEquals(AbstractFeatureReader.hasBlockCompressedExtension(new File(testString)), expected);
+ }
+
+ @DataProvider(name = "blockCompressedExtensionExtensionURIStrings")
+ public Object[][] createBlockCompressedExtensionURIs() {
+ return new Object[][]{
+ {"testzip.gz", true},
+ {"test.gzip", true},
+ {"test.bgz", true},
+ {"test.bgzf", true},
+ {"test", false},
+ {"test.bzip2", false},
+
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gz", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gzip", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgz", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgzf", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bzip2", false},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877", false},
+
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gz?alt=media", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gzip?alt=media", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgz?alt=media", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgzf?alt=media", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bzip2?alt=media", false},
+
+ {"ftp://ftp.broadinstitute.org/distribution/igv/TEST/cpgIslands.hg18.gz", true},
+ {"ftp://ftp.broadinstitute.org/distribution/igv/TEST/cpgIslands.hg18.bed", false}
+ };
+ }
+
+ @Test(enabled = true, dataProvider = "blockCompressedExtensionExtensionURIStrings")
+ public void testBlockCompressionExtension(final String testURIString, final boolean expected) throws URISyntaxException {
+ URI testURI = URI.create(testURIString);
+ Assert.assertEquals(AbstractFeatureReader.hasBlockCompressedExtension(testURI), expected);
+ }
+
}
diff --git a/src/tests/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java b/src/tests/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java
new file mode 100644
index 0000000..76bd410
--- /dev/null
+++ b/src/tests/java/htsjdk/tribble/TribbleIndexFeatureReaderTest.java
@@ -0,0 +1,86 @@
+package htsjdk.tribble;
+
+import htsjdk.tribble.readers.LineIterator;
+import htsjdk.tribble.TestUtils;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFCodec;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class TribbleIndexFeatureReaderTest {
+
+ @DataProvider(name = "extensionURIStrings")
+ public Object[][] createBlockCompressedExtensionURIs() {
+ return new Object[][]{
+ {"testzip.gz", true},
+ {"testzip.GZ", true},
+ {"testzip.gZ", true},
+ {"testzip.Gz", true},
+
+ {"test", false},
+ {"test.gzip", false},
+ {"test.bgz", false},
+ {"test.bgzf", false},
+ {"test.bzip2", false},
+
+ {"file://testzip.gz", true},
+ {"file://apath/testzip.gz", true},
+
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gz", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.GZ", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gzip", false},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgz", false},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgzf", false},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bzip2", false},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877", false},
+
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gz?alt=media", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.GZ?alt=media", true},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gzip?alt=media", false},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgz?alt=media", false},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bgzf?alt=media", false},
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.bzip2?alt=media", false},
+
+ {"ftp://ftp.broadinstitute.org/distribution/igv/TEST/cpgIslands.hg18.gz", true},
+ {"ftp://ftp.broadinstitute.org/distribution/igv/TEST/cpgIslands.hg18.bed", false},
+
+ {"https://www.googleapis.com/download/storage/v1/b/deflaux-public-test/o/NA12877.vcf.gz", true},
+ };
+ }
+
+ @Test(enabled = true, dataProvider = "extensionURIStrings")
+ public void testGZExtension(final String testString, final boolean expected) throws URISyntaxException {
+ Assert.assertEquals(TribbleIndexedFeatureReader.isGZIPPath(testString), expected);
+ }
+
+ @DataProvider(name = "featureFileStrings")
+ public Object[][] createFeatureFileStrings() {
+ return new Object[][]{
+ {TestUtils.DATA_DIR + "test.vcf", 5},
+ {TestUtils.DATA_DIR + "test.vcf.gz", 5}
+ };
+ }
+
+ @Test(dataProvider = "featureFileStrings")
+ public void testIndexedGZIPVCF(final String testPath, final int expectedCount) throws IOException {
+ final VCFCodec codec = new VCFCodec();
+ try (final TribbleIndexedFeatureReader<VariantContext, LineIterator> featureReader =
+ new TribbleIndexedFeatureReader(testPath, codec, false)) {
+ final CloseableTribbleIterator<VariantContext> localIterator = featureReader.iterator();
+ int count = 0;
+ for (final Feature feat : featureReader.iterator()) {
+ localIterator.next();
+ count++;
+ }
+ Assert.assertEquals(count, expectedCount);
+ }
+ }
+
+}
diff --git a/src/tests/java/htsjdk/tribble/index/tabix/TabixIndexTest.java b/src/tests/java/htsjdk/tribble/index/tabix/TabixIndexTest.java
index d929cf7..340ff70 100644
--- a/src/tests/java/htsjdk/tribble/index/tabix/TabixIndexTest.java
+++ b/src/tests/java/htsjdk/tribble/index/tabix/TabixIndexTest.java
@@ -24,13 +24,20 @@
package htsjdk.tribble.index.tabix;
import htsjdk.samtools.util.BlockCompressedOutputStream;
+import htsjdk.tribble.index.IndexFactory;
import htsjdk.tribble.util.LittleEndianOutputStream;
import htsjdk.tribble.util.TabixUtils;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.writer.VariantContextWriter;
+import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
+import htsjdk.variant.vcf.VCFCodec;
+import htsjdk.variant.vcf.VCFFileReader;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
+import java.util.Iterator;
public class TabixIndexTest {
private static final File SMALL_TABIX_FILE = new File("testdata/htsjdk/tribble/tabix/trioDup.vcf.gz.tbi");
@@ -57,10 +64,80 @@ public class TabixIndexTest {
@DataProvider(name = "readWriteTestDataProvider")
public Object[][] readWriteTestDataProvider() {
- return new Object[][] {
+ return new Object[][]{
{SMALL_TABIX_FILE},
{BIGGER_TABIX_FILE}
};
}
+ @Test
+ public void testQueryProvidedItemsAmount() {
+ final String VCF = "testdata/htsjdk/tribble/tabix/YRI.trio.2010_07.indel.sites.vcf";
+ // Note that we store only compressed files
+ final File plainTextVcfInputFile = new File(VCF);
+ plainTextVcfInputFile.deleteOnExit();
+ final File plainTextVcfIndexFile = new File(VCF + ".tbi");
+ plainTextVcfIndexFile.deleteOnExit();
+ final File compressedVcfInputFile = new File(VCF + ".gz");
+ final File compressedTbiIndexFile = new File(VCF + ".gz.tbi");
+ final VCFFileReader compressedVcfReader = new VCFFileReader(compressedVcfInputFile, compressedTbiIndexFile);
+
+ //create plain text VCF without "index on the fly" option
+ final VariantContextWriter plainTextVcfWriter = new VariantContextWriterBuilder()
+ .setOptions(VariantContextWriterBuilder.NO_OPTIONS)
+ .setOutputFile(VCF)
+ .build();
+ plainTextVcfWriter.writeHeader(compressedVcfReader.getFileHeader());
+ for (VariantContext vc : compressedVcfReader) {
+ if (vc != null) plainTextVcfWriter.add(vc);
+ }
+ plainTextVcfWriter.close();
+
+ IndexFactory.createTabixIndex(plainTextVcfInputFile,
+ new VCFCodec(),
+ TabixFormat.VCF,
+ new VCFFileReader(plainTextVcfInputFile, false).getFileHeader().getSequenceDictionary()
+ ) // create TabixIndex straight from plaintext VCF
+ .write(plainTextVcfIndexFile); // write it
+
+ final VCFFileReader plainTextVcfReader = new VCFFileReader(plainTextVcfInputFile, plainTextVcfIndexFile);
+ // Now we have both plaintext and compressed VCFs with provided TabixIndex-es and could test their "queryability"
+
+ // magic numbers chosen from just looking in provided VCF file
+ try {
+ // just somewhere in middle of chromosome
+ Assert.assertEquals(42, countIteratedElements(compressedVcfReader.query("1", 868379 - 1, 1006891 + 1)));
+ Assert.assertEquals(42, countIteratedElements(plainTextVcfReader.query("1", 868379 - 1, 1006891 + 1)));
+ // chromosome start
+ Assert.assertEquals(13, countIteratedElements(compressedVcfReader.query("1", 1, 836463 + 1)));
+ Assert.assertEquals(13, countIteratedElements(plainTextVcfReader.query("1", 1, 836463 + 1)));
+ // chromosome end
+ Assert.assertEquals(36, countIteratedElements(compressedVcfReader.query("1", 76690833 - 1, 76837502 + 11111111)));
+ Assert.assertEquals(36, countIteratedElements(plainTextVcfReader.query("1", 76690833 - 1, 76837502 + 11111111)));
+ // where's no one feature in the middle of chromosome
+ Assert.assertEquals(0, countIteratedElements(compressedVcfReader.query("1", 36606472 + 1, 36623523 - 1)));
+ Assert.assertEquals(0, countIteratedElements(plainTextVcfReader.query("1", 36606472 + 1, 36623523 - 1)));
+ // before chromosome
+ Assert.assertEquals(0, countIteratedElements(compressedVcfReader.query("1", 1, 10)));
+ Assert.assertEquals(0, countIteratedElements(plainTextVcfReader.query("1", 1, 10)));
+ // after chromosome
+ Assert.assertEquals(0, countIteratedElements(compressedVcfReader.query("1", 76837502 * 15, 76837502 * 16)));
+ Assert.assertEquals(0, countIteratedElements(plainTextVcfReader.query("1", 76837502 * 15, 76837502 * 16)));
+ } catch (NullPointerException e) {
+ Assert.fail("Exception caught on querying: ", e);
+ // before fix exception was thrown from 'TabixIndex.getBlocks()' on 'chunks.size()' while 'chunks == null' for plain files
+ } finally {
+ plainTextVcfReader.close();
+ compressedVcfReader.close();
+ }
+ }
+
+ private static int countIteratedElements(Iterator iterator) {
+ int counter = 0;
+ while (iterator.hasNext()) {
+ iterator.next();
+ counter++;
+ }
+ return counter;
+ }
}
diff --git a/src/tests/java/htsjdk/variant/PrintVariantsExampleTest.java b/src/tests/java/htsjdk/variant/PrintVariantsExampleTest.java
new file mode 100644
index 0000000..8bb95c5
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/PrintVariantsExampleTest.java
@@ -0,0 +1,65 @@
+/*
+* Copyright (c) 2012 The Broad Institute
+*
+* Permission is hereby granted, free of charge, to any person
+* obtaining a copy of this software and associated documentation
+* files (the "Software"), to deal in the Software without
+* restriction, including without limitation the rights to use,
+* copy, modify, merge, publish, distribute, sublicense, and/or sell
+* copies of the Software, and to permit persons to whom the
+* Software is furnished to do so, subject to the following
+* conditions:
+*
+* The above copyright notice and this permission notice shall be
+* included in all copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+package htsjdk.variant;
+
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.variant.example.PrintVariantsExample;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.List;
+import java.util.OptionalInt;
+import java.util.stream.IntStream;
+
+public class PrintVariantsExampleTest {
+ @Test
+ public void testExampleWriteFile() throws IOException {
+ final File tempFile = File.createTempFile("example", ".vcf");
+ tempFile.deleteOnExit();
+ File f1 = new File("testdata/htsjdk/variant/ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf");
+ final String[] args = {
+ f1.getAbsolutePath(),
+ tempFile.getAbsolutePath()
+ };
+ Assert.assertEquals(tempFile.length(), 0);
+ PrintVariantsExample.main(args);
+ Assert.assertNotEquals(tempFile.length(), 0);
+
+ assertFilesEqualSkipHeaders(tempFile, f1);
+ }
+
+ private void assertFilesEqualSkipHeaders(File tempFile, File f1) throws FileNotFoundException {
+ final List<String> lines1 = IOUtil.slurpLines(f1);
+ final List<String> lines2 = IOUtil.slurpLines(tempFile);
+ final int firstNonComment1 = IntStream.range(0, lines1.size()).filter(i -> !lines1.get(i).startsWith("#")).findFirst().getAsInt();
+ final int firstNonComment2 = IntStream.range(0, lines2.size()).filter(i -> !lines2.get(i).startsWith("#")).findFirst().getAsInt();
+ Assert.assertEquals(lines1.subList(firstNonComment1, lines1.size()), lines2.subList(firstNonComment2,lines2.size()));
+ }
+}
diff --git a/src/tests/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java b/src/tests/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java
index 0990628..bd00b75 100644
--- a/src/tests/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/VariantJEXLContextUnitTest.java
@@ -25,6 +25,7 @@
package htsjdk.variant.variantcontext;
+import htsjdk.samtools.util.Log;
import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.variantcontext.VariantContextUtils.JexlVCMatchExp;
@@ -40,11 +41,12 @@ import java.util.Map;
/**
*
- * @author aaron
+ * @author aaron
+ * @author bimber
*
* Class VariantJEXLContextUnitTest
*
- * Test out parts of the VariantJEXLContext
+ * Test out parts of the VariantJEXLContext and GenotypeJEXLContext
*/
public class VariantJEXLContextUnitTest extends VariantBaseTest {
@@ -101,8 +103,12 @@ public class VariantJEXLContextUnitTest extends VariantBaseTest {
"FTPASS", VariantContextUtils.engine.get().createExpression("FT==\"PASS\""));
JexlVCMatchExp failFT = new VariantContextUtils.JexlVCMatchExp(
"FTBadCall", VariantContextUtils.engine.get().createExpression("FT==\"BadCall\""));
-
- List<JexlVCMatchExp> jexlTests = Arrays.asList(passFlag, passFT, failFT);
+ JexlVCMatchExp AD1 = new VariantContextUtils.JexlVCMatchExp(
+ "AD1", VariantContextUtils.engine.get().createExpression("g.hasAD() && g.getAD().0==1"));
+ JexlVCMatchExp AD2 = new VariantContextUtils.JexlVCMatchExp(
+ "AD2", VariantContextUtils.engine.get().createExpression("g.hasAD() && g.getAD().1==2"));
+
+ List<JexlVCMatchExp> jexlTests = Arrays.asList(passFlag, passFT, failFT, AD1, AD2);
Map<VariantContextUtils.JexlVCMatchExp, Boolean> map;
List<Allele> alleles = Arrays.asList(Aref, T);
@@ -112,40 +118,46 @@ public class VariantJEXLContextUnitTest extends VariantBaseTest {
GenotypeBuilder gb = new GenotypeBuilder("SAMPLE", alleles);
Genotype genoNull = gb.make();
- Genotype genoPass = gb.filters("PASS").make();
- Genotype genoFail = gb.filters("BadCall").make();
-
+ Genotype genoPass = gb.filters("PASS").AD(new int[]{1,2}).DP(3).make();
+ Genotype genoFail = gb.filters("BadCall").AD(null).DP(0).make();
+
// Create the JEXL Maps using the combinations above of vc* and geno*
map = new JEXLMap(jexlTests,vcPass, genoPass);
// make sure the context has a value
Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 3);
+ Assert.assertEquals(map.size(), 5);
Assert.assertTrue(map.get(passFlag));
Assert.assertTrue(map.get(passFT));
Assert.assertFalse(map.get(failFT));
-
- map = new JEXLMap(jexlTests,vcPass, genoFail);
+ Assert.assertTrue(map.get(AD1));
+ Assert.assertTrue(map.get(AD2));
+
+ map = new JEXLMap(jexlTests, vcPass, genoFail);
// make sure the context has a value
Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 3);
+ Assert.assertEquals(map.size(), 5);
Assert.assertFalse(map.get(passFlag));
Assert.assertFalse(map.get(passFT));
Assert.assertTrue(map.get(failFT));
-
+ Assert.assertFalse(map.get(AD1));
+ Assert.assertFalse(map.get(AD2));
+
// Null genotype filter is equivalent to explicit "FT==PASS"
- map = new JEXLMap(jexlTests,vcPass, genoNull);
+ map = new JEXLMap(jexlTests, vcPass, genoNull);
// make sure the context has a value
Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 3);
+ Assert.assertEquals(map.size(), 5);
Assert.assertTrue(map.get(passFlag));
Assert.assertTrue(map.get(passFT));
Assert.assertFalse(map.get(failFT));
+ Assert.assertFalse(map.get(AD1));
+ Assert.assertFalse(map.get(AD2));
// Variant-level filters should have no effect here
map = new JEXLMap(jexlTests,vcFail, genoPass);
// make sure the context has a value
Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 3);
+ Assert.assertEquals(map.size(), 5);
Assert.assertTrue(map.get(passFlag));
Assert.assertTrue(map.get(passFT));
Assert.assertFalse(map.get(failFT));
@@ -153,7 +165,7 @@ public class VariantJEXLContextUnitTest extends VariantBaseTest {
map = new JEXLMap(jexlTests,vcFail, genoFail);
// make sure the context has a value
Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 3);
+ Assert.assertEquals(map.size(), 5);
Assert.assertFalse(map.get(passFlag));
Assert.assertFalse(map.get(passFT));
Assert.assertTrue(map.get(failFT));
@@ -161,7 +173,7 @@ public class VariantJEXLContextUnitTest extends VariantBaseTest {
map = new JEXLMap(jexlTests,vcFail, genoNull);
// make sure the context has a value
Assert.assertTrue(!map.isEmpty());
- Assert.assertEquals(map.size(), 3);
+ Assert.assertEquals(map.size(), 5);
Assert.assertTrue(map.get(passFlag));
Assert.assertTrue(map.get(passFT));
Assert.assertFalse(map.get(failFT));
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/FilteringIteratorTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/FilteringVariantContextIteratorTest.java
similarity index 88%
rename from src/tests/java/htsjdk/variant/variantcontext/filter/FilteringIteratorTest.java
rename to src/tests/java/htsjdk/variant/variantcontext/filter/FilteringVariantContextIteratorTest.java
index 0964309..ffb214b 100644
--- a/src/tests/java/htsjdk/variant/variantcontext/filter/FilteringIteratorTest.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/FilteringVariantContextIteratorTest.java
@@ -33,10 +33,10 @@ import org.testng.annotations.Test;
import java.io.File;
/**
- * Tests for testing the (VariantContext)FilteringIterator, and the HeterozygosityFilter
+ * Tests for testing the (VariantContext)FilteringVariantContextIterator, and the HeterozygosityFilter
*/
-public class FilteringIteratorTest {
+public class FilteringVariantContextIteratorTest {
final File testDir = new File("testdata/htsjdk/variant");
@DataProvider
@@ -57,7 +57,7 @@ public class FilteringIteratorTest {
final File vcf = new File(testDir,"ex2.vcf");
final VCFFileReader vcfReader = new VCFFileReader(vcf, false);
- final FilteringIterator filteringIterator = new FilteringIterator(vcfReader.iterator(), filter);
+ final FilteringVariantContextIterator filteringIterator = new FilteringVariantContextIterator(vcfReader.iterator(), filter);
int count = 0;
for(final VariantContext vc : filteringIterator) {
@@ -82,7 +82,7 @@ public class FilteringIteratorTest {
final VCFFileReader vcfReader = new VCFFileReader(vcf, false);
final HeterozygosityFilter heterozygosityFilter = new HeterozygosityFilter(true, sample);
- new FilteringIterator(vcfReader.iterator(), heterozygosityFilter).next();
+ new FilteringVariantContextIterator(vcfReader.iterator(), heterozygosityFilter).next();
}
}
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java
index 5ceed9f..b4cd3a8 100644
--- a/src/tests/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/HeterozygosityFilterTest.java
@@ -108,7 +108,7 @@ public class HeterozygosityFilterTest {
@Test(dataProvider = "variantsProvider")
public void testFilteringIterator(final Iterator<VariantContext> vcs, final int[] passingPositions) {
- final Iterator<VariantContext> filteringIterator = new FilteringIterator(vcs, new HeterozygosityFilter(true, "test"));
+ final Iterator<VariantContext> filteringIterator = new FilteringVariantContextIterator(vcs, new HeterozygosityFilter(true, "test"));
int i = 0;
while (filteringIterator.hasNext()) {
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java
index 52a5889..5c3cf54 100644
--- a/src/tests/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java
@@ -59,7 +59,7 @@ public class JavascriptVariantFilterTest {
vcfReader.close();
return;
}
- final FilteringIterator iter = new FilteringIterator(vcfReader.iterator(), filter);
+ final FilteringVariantContextIterator iter = new FilteringVariantContextIterator(vcfReader.iterator(), filter);
int count = 0;
while (iter.hasNext()) {
iter.next();
diff --git a/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java b/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java
index 9b8f6e8..8e460c2 100644
--- a/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/writer/VariantContextWriterBuilderUnitTest.java
@@ -32,6 +32,7 @@ import htsjdk.tribble.AbstractFeatureReader;
import htsjdk.tribble.Tribble;
import htsjdk.tribble.util.TabixUtils;
import htsjdk.variant.VariantBaseTest;
+import htsjdk.variant.variantcontext.writer.Options;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.Test;
@@ -186,7 +187,7 @@ public class VariantContextWriterBuilderUnitTest extends VariantBaseTest {
.setOutputFile(vcf);
VariantContextWriter writer = builder.build();
- Assert.assertEquals(writer instanceof AsyncVariantContextWriter, Defaults.USE_ASYNC_IO, "testAsync default");
+ Assert.assertEquals(writer instanceof AsyncVariantContextWriter, Defaults.USE_ASYNC_IO_FOR_TRIBBLE, "testAsync default");
writer = builder.setOption(Options.USE_ASYNC_IO).build();
Assert.assertTrue(writer instanceof AsyncVariantContextWriter, "testAsync option=set");
@@ -359,4 +360,18 @@ public class VariantContextWriterBuilderUnitTest extends VariantBaseTest {
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().clearOptions();
Assert.assertTrue(builder.options.isEmpty());
}
+
+ @Test
+ public void testModifyOption() {
+ final VariantContextWriterBuilder builder = new VariantContextWriterBuilder().clearOptions();
+ for (final Options option : Options.values()) {
+ Assert.assertFalse(builder.isOptionSet(option)); // shouldn't be set
+ builder.modifyOption(option, false);
+ Assert.assertFalse(builder.isOptionSet(option)); // still shouldn't be set
+ builder.modifyOption(option, true);
+ Assert.assertTrue(builder.isOptionSet(option)); // now is set
+ builder.modifyOption(option, false);
+ Assert.assertFalse(builder.isOptionSet(option)); // has been unset
+ }
+ }
}
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java b/src/tests/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
index 0d5c523..044be8a 100644
--- a/src/tests/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
+++ b/src/tests/java/htsjdk/variant/vcf/AbstractVCFCodecTest.java
@@ -1,16 +1,16 @@
package htsjdk.variant.vcf;
-import java.io.File;
-import java.util.List;
-
import htsjdk.tribble.TribbleException;
import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.VariantContext;
-
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import java.io.File;
+import java.util.List;
+
public class AbstractVCFCodecTest extends VariantBaseTest {
@@ -34,4 +34,20 @@ public class AbstractVCFCodecTest extends VariantBaseTest {
public void TestSpanDelParseAllelesException(){
List<Allele> list1 = VCF3Codec.parseAlleles(Allele.SPAN_DEL_STRING, "A", 0);
}
+
+ @DataProvider(name="thingsToTryToDecode")
+ public Object[][] getThingsToTryToDecode(){
+ return new Object[][] {
+ {"testdata/htsjdk/tribble/tabix/testTabixIndex.vcf", true},
+ {"testdata/htsjdk/tribble/tabix/testTabixIndex.vcf.gz", true},
+ {"testdata/htsjdk/tribble/nonexistant.garbage", false},
+ {"testdata/htsjdk/tribble/testIntervalList.list", false}
+ };
+ }
+
+ @Test(dataProvider = "thingsToTryToDecode")
+ public void testCanDecodeFile(String potentialInput, boolean canDecode) {
+ Assert.assertEquals(AbstractVCFCodec.canDecodeFile(potentialInput, VCFCodec.VCF4_MAGIC_HEADER), canDecode);
+ }
+
}
diff --git a/testdata/htsjdk/tribble/tabix/YRI.trio.2010_07.indel.sites.vcf.gz b/testdata/htsjdk/tribble/tabix/YRI.trio.2010_07.indel.sites.vcf.gz
new file mode 100644
index 0000000..d98edfb
Binary files /dev/null and b/testdata/htsjdk/tribble/tabix/YRI.trio.2010_07.indel.sites.vcf.gz differ
diff --git a/testdata/htsjdk/tribble/tabix/YRI.trio.2010_07.indel.sites.vcf.gz.tbi b/testdata/htsjdk/tribble/tabix/YRI.trio.2010_07.indel.sites.vcf.gz.tbi
new file mode 100644
index 0000000..e80441b
Binary files /dev/null and b/testdata/htsjdk/tribble/tabix/YRI.trio.2010_07.indel.sites.vcf.gz.tbi differ
diff --git a/testdata/htsjdk/tribble/test.vcf b/testdata/htsjdk/tribble/test.vcf
new file mode 100644
index 0000000..27d4500
--- /dev/null
+++ b/testdata/htsjdk/tribble/test.vcf
@@ -0,0 +1,24 @@
+##fileformat=VCFv4.1
+##fileDate=20090805
+##source=myImputationProgramV3.1
+##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta
+##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x>
+##phasing=partial
+##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
+##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
+##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
+##FILTER=<ID=q10,Description="Quality below 10">
+##FILTER=<ID=s50,Description="Less than 50% of samples have data">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003
+20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ 0|0:48:1:51,51 1|0:48:8:51,51 1/1:43:5:.,.
+20 17330 . T A 3 q10 NS=3;DP=11;AF=0.017 GT:GQ:DP:HQ 0|0:49:3:58,50 0|1:3:5:65,3 0/0:41:3
+20 1110696 rs6040355 A G,T 67 PASS NS=2;DP=10;AF=0.333,0.667;AA=T;DB GT:GQ:DP:HQ 1|2:21:6:23,27 2|1:2:0:18,2 2/2:35:4
+20 1230237 . T . 47 PASS NS=3;DP=13;AA=T GT:GQ:DP:HQ 0|0:54:7:56,60 0|0:48:4:51,51 0/0:61:2
+20 1234567 microsat1 GTC G,GTCT 50 PASS NS=3;DP=9;AA=G GT:GQ:DP 0/1:35:4 0/2:17:2 1/1:40:3
diff --git a/testdata/htsjdk/tribble/test.vcf.gz b/testdata/htsjdk/tribble/test.vcf.gz
new file mode 100644
index 0000000..0033290
Binary files /dev/null and b/testdata/htsjdk/tribble/test.vcf.gz differ
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/htsjdk.git
More information about the debian-med-commit
mailing list