[med-svn] [htsjdk] 01/06: Imported Upstream version 2.1.1+dfsg.1
Vincent Danjean
vdanjean at debian.org
Mon Mar 14 20:11:32 UTC 2016
This is an automated email from the git hooks/post-receive script.
vdanjean pushed a commit to branch master
in repository htsjdk.
commit a6a2d5484c843049b3fa689b9371737de611fb28
Author: Vincent Danjean <Vincent.Danjean at ens-lyon.org>
Date: Mon Mar 14 16:45:47 2016 +0100
Imported Upstream version 2.1.1+dfsg.1
---
.github/ISSUE_TEMPLATE.md | 21 +
.github/PULL_REQUEST_TEMPLATE.md | 11 +
.travis.yml | 2 +-
README.md | 2 +
build.sbt | 2 +-
build.xml | 62 +-
src/java/htsjdk/samtools/BAMFileReader.java | 2 +-
src/java/htsjdk/samtools/BAMFileWriter.java | 2 +-
src/java/htsjdk/samtools/BAMIndexMetaData.java | 4 +-
src/java/htsjdk/samtools/BamIndexValidator.java | 88 +-
...eWriter.java => CRAMContainerStreamWriter.java} | 364 ++++---
src/java/htsjdk/samtools/CRAMFileReader.java | 146 ++-
src/java/htsjdk/samtools/CRAMFileWriter.java | 427 +-------
src/java/htsjdk/samtools/CRAMIndexer.java | 130 ++-
src/java/htsjdk/samtools/CRAMIterator.java | 34 +-
src/java/htsjdk/samtools/Cigar.java | 85 +-
src/java/htsjdk/samtools/CigarElement.java | 5 +
src/java/htsjdk/samtools/CigarOperator.java | 25 +
src/java/htsjdk/samtools/Defaults.java | 4 +-
.../htsjdk/samtools/DiskBasedBAMFileIndex.java | 8 +-
src/java/htsjdk/samtools/DuplicateSetIterator.java | 16 +-
.../samtools/HighAccuracyDownsamplingIterator.java | 2 +-
.../htsjdk/samtools/MergingSamRecordIterator.java | 4 +-
src/java/htsjdk/samtools/SAMFileWriterFactory.java | 55 +-
src/java/htsjdk/samtools/SAMFileWriterImpl.java | 12 +-
.../htsjdk/samtools/SAMHeaderRecordComparator.java | 4 +-
src/java/htsjdk/samtools/SAMLineParser.java | 4 +-
src/java/htsjdk/samtools/SAMRecord.java | 243 +++--
.../samtools/SAMRecordCoordinateComparator.java | 6 +-
.../samtools/SAMRecordDuplicateComparator.java | 5 +-
.../samtools/SAMRecordQueryHashComparator.java | 8 +-
.../samtools/SAMRecordQueryNameComparator.java | 5 +-
src/java/htsjdk/samtools/SAMTextHeaderCodec.java | 2 +-
src/java/htsjdk/samtools/SAMUtils.java | 15 +-
src/java/htsjdk/samtools/SAMValidationError.java | 3 +-
src/java/htsjdk/samtools/SamFileHeaderMerger.java | 2 +-
src/java/htsjdk/samtools/SamFileValidator.java | 24 +-
src/java/htsjdk/samtools/SamFiles.java | 32 +-
src/java/htsjdk/samtools/SamReaderFactory.java | 29 +-
src/java/htsjdk/samtools/TextTagCodec.java | 12 +-
.../htsjdk/samtools/TextualBAMIndexWriter.java | 2 +-
src/java/htsjdk/samtools/cram/CRAIIndex.java | 2 +-
.../cram/build/CompressionHeaderFactory.java | 1118 ++++++++------------
.../samtools/cram/build/ContainerFactory.java | 29 +-
.../samtools/cram/build/ContainerParser.java | 81 +-
src/java/htsjdk/samtools/cram/build/CramIO.java | 24 +-
.../htsjdk/samtools/cram/build/CramNormalizer.java | 3 +
.../samtools/cram/build/Sam2CramRecordFactory.java | 2 -
.../htsjdk/samtools/cram/common/CramVersions.java | 6 +-
.../codec/CanonicalHuffmanIntegerCodec.java | 8 +-
.../huffman/codec/HuffmanParamsCalculator.java | 137 +++
.../cram/encoding/reader/DataReaderFactory.java | 24 +-
.../cram/encoding/reader/RefSeqIdReader.java | 242 +++++
.../encoding/readfeatures/BaseQualityScore.java | 6 +-
.../cram/encoding/readfeatures/InsertBase.java | 6 +-
.../cram/encoding/readfeatures/ReadBase.java | 11 +-
.../cram/encoding/readfeatures/Substitution.java | 7 +-
.../cram/lossy/QualityScorePreservation.java | 2 +-
.../htsjdk/samtools/cram/ref/ReferenceSource.java | 37 +
.../samtools/cram/structure/AlignmentSpan.java | 92 ++
.../htsjdk/samtools/cram/structure/Container.java | 12 +-
.../cram/structure/CramCompressionRecord.java | 10 +-
src/java/htsjdk/samtools/cram/structure/Slice.java | 29 +-
.../cram/structure/SubstitutionMatrix.java | 4 +-
.../htsjdk/samtools/fastq/BasicFastqWriter.java | 6 +-
src/java/htsjdk/samtools/fastq/FastqReader.java | 1 +
src/java/htsjdk/samtools/fastq/FastqRecord.java | 20 +-
.../samtools/filter/AbstractJavascriptFilter.java | 159 +++
.../samtools/filter/JavascriptSamRecordFilter.java | 101 ++
src/java/htsjdk/samtools/metrics/MetricBase.java | 4 +-
src/java/htsjdk/samtools/metrics/MetricsFile.java | 10 +-
.../reference/AbstractFastaSequenceFile.java | 2 +-
.../reference/IndexedFastaSequenceFile.java | 2 +-
src/java/htsjdk/samtools/sra/SRAAccession.java | 36 +-
.../samtools/util/AbstractProgressLogger.java | 8 +-
src/java/htsjdk/samtools/util/BinaryCodec.java | 8 +-
.../htsjdk/samtools/util/BufferedLineReader.java | 2 +-
src/java/htsjdk/samtools/util/CigarUtil.java | 4 +-
src/java/htsjdk/samtools/util/CollectionUtil.java | 59 +-
src/java/htsjdk/samtools/util/ComparableTuple.java | 28 +
src/java/htsjdk/samtools/util/DateParser.java | 30 +-
src/java/htsjdk/samtools/util/DiskBackedQueue.java | 2 +-
src/java/htsjdk/samtools/util/FormatUtil.java | 2 +-
src/java/htsjdk/samtools/util/IOUtil.java | 4 +-
src/java/htsjdk/samtools/util/IntervalList.java | 9 +-
src/java/htsjdk/samtools/util/LocusComparator.java | 4 +-
src/java/htsjdk/samtools/util/Log.java | 12 +-
src/java/htsjdk/samtools/util/Murmur3.java | 6 +-
src/java/htsjdk/samtools/util/ProcessExecutor.java | 2 +-
...Comparator.java => RuntimeScriptException.java} | 24 +-
.../htsjdk/samtools/util/SamLocusIterator.java | 38 +-
.../samtools/util/SamRecordTrackingBuffer.java | 6 +-
src/java/htsjdk/samtools/util/SequenceUtil.java | 157 ++-
.../htsjdk/samtools/util/SortingCollection.java | 4 +-
.../samtools/util/SortingLongCollection.java | 4 +-
src/java/htsjdk/samtools/util/StringUtil.java | 12 +-
src/java/htsjdk/samtools/util/TrimmingUtil.java | 67 ++
src/java/htsjdk/samtools/util/Tuple.java | 26 +
src/java/htsjdk/tribble/bed/BEDCodec.java | 4 +-
.../index/interval/IntervalIndexCreator.java | 8 +-
.../tribble/index/interval/IntervalTree.java | 8 +-
.../tribble/index/interval/IntervalTreeIndex.java | 2 +-
.../tribble/index/linear/LinearIndexCreator.java | 4 +-
src/java/htsjdk/tribble/readers/TabixReader.java | 4 +-
src/java/htsjdk/variant/bcf2/BCF2Codec.java | 2 +-
src/java/htsjdk/variant/bcf2/BCF2Utils.java | 4 +-
.../htsjdk/variant/variantcontext/CommonInfo.java | 4 +-
.../htsjdk/variant/variantcontext/Genotype.java | 8 +-
.../variantcontext/GenotypeLikelihoods.java | 161 ++-
.../variant/variantcontext/VariantContext.java | 39 +-
.../variantcontext/VariantContextBuilder.java | 14 +-
.../variantcontext/VariantContextComparator.java | 8 +-
.../variantcontext/VariantContextUtils.java | 62 +-
.../filter/JavascriptVariantFilter.java | 97 ++
.../variant/variantcontext/writer/BCF2Encoder.java | 2 +-
.../writer/SortingVariantContextWriterBase.java | 5 +-
src/java/htsjdk/variant/vcf/AbstractVCFCodec.java | 8 +-
src/java/htsjdk/variant/vcf/VCF3Codec.java | 2 +-
src/java/htsjdk/variant/vcf/VCFCodec.java | 2 +-
src/java/htsjdk/variant/vcf/VCFEncoder.java | 34 +-
src/java/htsjdk/variant/vcf/VCFHeaderLine.java | 20 +-
src/java/htsjdk/variant/vcf/VCFUtils.java | 2 +-
.../java/htsjdk/samtools/BAMFileWriterTest.java | 42 +-
.../java/htsjdk/samtools/CRAMBAIIndexerTest.java | 87 ++
.../java/htsjdk/samtools/CRAMComplianceTest.java | 4 +-
.../samtools/CRAMContainerStreamWriterTest.java | 184 ++++
.../java/htsjdk/samtools/CRAMFileIndexTest.java | 40 +-
.../java/htsjdk/samtools/CRAMFileReaderTest.java | 163 +++
.../samtools/CRAMFileWriterWithIndexTest.java | 6 +-
src/tests/java/htsjdk/samtools/CigarTest.java | 24 +
.../java/htsjdk/samtools/CramFileWriterTest.java | 48 +
.../java/htsjdk/samtools/SAMFileReaderTest.java | 2 +-
.../java/htsjdk/samtools/SAMRecordUnitTest.java | 162 ++-
src/tests/java/htsjdk/samtools/SAMUtilsTest.java | 37 +-
src/tests/java/htsjdk/samtools/SamFilesTest.java | 21 +
.../java/htsjdk/samtools/ValidateSamFileTest.java | 69 +-
.../samtools/cram/LosslessRoundTripTest.java | 68 ++
.../cram/build/CompressionHeaderFactoryTest.java | 208 ++++
.../samtools/cram/build/ContainerFactoryTest.java | 134 +++
.../samtools/cram/build/ContainerParserTest.java | 185 ++++
.../cram/encoding/huffman/codec/HuffmanTest.java | 18 +-
.../htsjdk/samtools/cram/structure/SliceTests.java | 36 +
.../htsjdk/samtools/fastq/FastqWriterTest.java | 73 ++
.../filter/JavascriptSamRecordFilterTest.java | 74 ++
.../java/htsjdk/samtools/sra/AbstractSRATest.java | 57 +
.../java/htsjdk/samtools/sra/SRAAccessionTest.java | 29 +
.../java/htsjdk/samtools/sra/SRAIndexTest.java | 56 +-
.../htsjdk/samtools/sra/SRALazyRecordTest.java | 12 +-
.../java/htsjdk/samtools/sra/SRAQueryTest.java | 67 +-
.../java/htsjdk/samtools/sra/SRAReferenceTest.java | 10 +-
src/tests/java/htsjdk/samtools/sra/SRATest.java | 115 +-
.../htsjdk/samtools/util/ComparableTupleTest.java | 61 ++
src/tests/java/htsjdk/samtools/util/IupacTest.java | 2 +-
.../htsjdk/samtools/util/SequenceUtilTest.java | 236 ++++-
.../htsjdk/samtools/util/TrimmingUtilTest.java | 70 ++
src/tests/java/htsjdk/samtools/util/TupleTest.java | 62 ++
.../htsjdk/tribble/util/ftp/FTPClientTest.java | 77 +-
.../GenotypeLikelihoodsUnitTest.java | 161 ++-
.../variant/variantcontext/GenotypeUnitTest.java | 2 +
.../filter/JavascriptVariantFilterTest.java | 72 ++
.../htsjdk/variant/vcf/VCFHeaderLineUnitTest.java | 43 +
.../java/htsjdk/variant/vcf/VCFHeaderUnitTest.java | 101 +-
.../BAMFileIndexTest/symlink_with_index.bam | 1 +
.../BAMFileIndexTest/symlink_with_index.bam.bai | 1 +
.../symlink_without_linked_index.bam | 1 +
.../ValidateSamFileTest/duplicated_reads.sam | 17 +
.../duplicated_reads_out_of_order.sam | 17 +
testdata/htsjdk/samtools/cram_query_sorted.cram | Bin 0 -> 13266 bytes
testdata/htsjdk/samtools/cram_query_sorted.fasta | 40 +
.../htsjdk/samtools/cram_query_sorted.fasta.fai | 8 +
testdata/htsjdk/samtools/samFilter01.js | 2 +
testdata/htsjdk/samtools/samFilter02.js | 9 +
testdata/htsjdk/variant/variantFilter01.js | 2 +
testdata/htsjdk/variant/variantFilter02.js | 20 +
174 files changed, 6000 insertions(+), 2200 deletions(-)
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
new file mode 100644
index 0000000..9c2648f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE.md
@@ -0,0 +1,21 @@
+### Verify
+Can you see anything in the logs?
+Make sure your issue is not already in the [htsjdk issue tracker](https://github.com/samtools/htsjdk/issues?q=)
+
+### Subject of the issue
+Describe your issue here.
+Provide **screenshots** , **stacktrace** , **logs** where appropriate.
+
+### Your environment
+* version of htsjdk
+* version of java
+* which OS
+
+### Steps to reproduce
+Tell us how to reproduce this issue. If possible, include a short code snippet to demonstrate the problem.
+
+### Expected behaviour
+Tell us what should happen
+
+### Actual behaviour
+Tell us what happens instead
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
new file mode 100644
index 0000000..2475bc9
--- /dev/null
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -0,0 +1,11 @@
+### Description
+
+Please explain the changes you made here.
+Explain the **motivation** for making this change. What existing problem does the pull request solve?
+
+### Checklist
+
+- [ ] Code compiles correctly
+- [ ] New tests covering changes and new functionality
+- [ ] All tests passing
+- [ ] Extended the README / documentation, if necessary
diff --git a/.travis.yml b/.travis.yml
index 75df51d..1f5abbe 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -3,7 +3,7 @@ sudo: true
jdk:
- oraclejdk8
install: ant
-script: ant all test
+script: ant all test sra-test
after_success:
- echo "TRAVIS_BRANCH='$TRAVIS_BRANCH'";
echo "JAVA_HOME='$JAVA_HOME'";
diff --git a/README.md b/README.md
index f634cec..b04d3ab 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,8 @@ Java SE Major Release | End of Java SE Oracle Public Updates | Proposed End of S
* to be finalized
+HTSJDK is migrating to semantic versioning (http://semver.org/). We will eventually adhere to it strictly and bump our major version whenever there are breaking changes to our API, but until we more clearly define what constitutes our official API, clients should assume that every release potentially contains at least minor changes to public methods.
+
[1]: http://samtools.sourceforge.net
[2]: http://vcftools.sourceforge.net/specs.html
diff --git a/build.sbt b/build.sbt
index c437ca0..645f3fa 100644
--- a/build.sbt
+++ b/build.sbt
@@ -4,7 +4,7 @@ import sbt.Package.ManifestAttributes
name := "htsjdk"
-val buildVersion = "2.0.1"
+val buildVersion = "2.1.1"
organization := "com.github.samtools"
diff --git a/build.xml b/build.xml
index 44c2ef4..af0d741 100755
--- a/build.xml
+++ b/build.xml
@@ -41,7 +41,7 @@
<!-- Get GIT hash, if available, otherwise leave it blank. -->
<property name="repository.revision" value=""/>
- <property name="htsjdk-version" value="2.0.1"/>
+ <property name="htsjdk-version" value="2.1.1"/>
<property name="htsjdk-version-file" value="htsjdk.version.properties"/>
<property name="testng.verbosity" value="2"/>
<property name="test.debug.port" value="5005" /> <!-- override on the command line if desired -->
@@ -119,32 +119,46 @@
</target>
<!-- TEST -->
+ <macrodef name="run-test">
+ <attribute name="excludedTestGroups" default=""/>
+ <attribute name="includedTestGroups" default=""/>
+ <sequential>
+ <taskdef resource="testngtasks" classpathref="classpath"/>
+ <testng suitename="htsjdk-tests" classpathref="classpath" outputdir="${test.output}"
+ failureproperty="tests.failed" excludedgroups="@{excludedTestGroups}" groups="@{includedTestGroups}"
+ workingDir="${basedir}"
+ verbose="${testng.verbosity}">
+ <classpath>
+ <pathelement path="${classes}"/>
+ <pathelement path="${classes.test}"/>
+ <pathelement path="${scripts}"/>
+ </classpath>
+ <classfileset dir="${classes.test}">
+ <include name="**/Test*.class"/>
+ <include name="**/*Test.class"/>
+ </classfileset>
+ <jvmarg value="-Xmx2G"/>
+ </testng>
+ <junitreport todir="${dist}/test" >
+ <fileset dir="${test.output}">
+ <include name="*.xml"/>
+ </fileset>
+ <report format="noframes" todir="${dist}/test" styledir="etc/test"/>
+ </junitreport>
+ <copy file="etc/test/testng.css" todir="${dist}/test" overwrite="true"/>
+ <fail if="tests.failed" message="There were failed unit tests"/>
+ </sequential>
+ </macrodef>
+
<target name="test" depends="compile, set_excluded_test_groups" description="Run unit tests">
- <taskdef resource="testngtasks" classpathref="classpath"/>
- <testng suitename="htsjdk-tests" classpathref="classpath" outputdir="${test.output}"
- failureproperty="tests.failed" excludedgroups="${excludedTestGroups}" workingDir="${basedir}"
- verbose="${testng.verbosity}">
- <classpath>
- <pathelement path="${classes}"/>
- <pathelement path="${classes.test}"/>
- <pathelement path="${scripts}"/>
- </classpath>
- <classfileset dir="${classes.test}">
- <include name="**/Test*.class"/>
- <include name="**/*Test.class"/>
- </classfileset>
- <jvmarg value="-Xmx2G"/>
- </testng>
- <junitreport todir="${dist}/test" >
- <fileset dir="${test.output}">
- <include name="*.xml"/>
- </fileset>
- <report format="noframes" todir="${dist}/test" styledir="etc/test"/>
- </junitreport>
- <copy file="etc/test/testng.css" todir="${dist}/test" overwrite="true"/>
- <fail if="tests.failed" message="There were failed unit tests"/>
+ <run-test excludedTestGroups="${excludedTestGroups}, sra"/>
+ </target>
+
+ <target name="sra-test" depends="compile, set_excluded_test_groups" description="Run SRA unit tests">
+ <run-test includedTestGroups="sra" excludedTestGroups="${excludedTestGroups}"/>
</target>
+
<target name="single-test"
depends="compile, compile-tests"
description="Compile and run a single test.">
diff --git a/src/java/htsjdk/samtools/BAMFileReader.java b/src/java/htsjdk/samtools/BAMFileReader.java
index 7dca165..0c226e9 100644
--- a/src/java/htsjdk/samtools/BAMFileReader.java
+++ b/src/java/htsjdk/samtools/BAMFileReader.java
@@ -504,7 +504,7 @@ class BAMFileReader extends SamReader.ReaderImplementation {
source);
final int sequenceCount = stream.readInt();
- if (samFileHeader.getSequenceDictionary().size() > 0) {
+ if (!samFileHeader.getSequenceDictionary().isEmpty()) {
// It is allowed to have binary sequences but no text sequences, so only validate if both are present
if (sequenceCount != samFileHeader.getSequenceDictionary().size()) {
throw new SAMFormatException("Number of sequences in text header (" +
diff --git a/src/java/htsjdk/samtools/BAMFileWriter.java b/src/java/htsjdk/samtools/BAMFileWriter.java
index 31a3a48..52b7bb1 100644
--- a/src/java/htsjdk/samtools/BAMFileWriter.java
+++ b/src/java/htsjdk/samtools/BAMFileWriter.java
@@ -95,7 +95,7 @@ class BAMFileWriter extends SAMFileWriterImpl {
private BAMIndexer createBamIndex(final String path) {
try {
final String indexFileBase = path.endsWith(BamFileIoUtils.BAM_FILE_EXTENSION) ?
- path.substring(0, path.lastIndexOf(".")) : path;
+ path.substring(0, path.lastIndexOf('.')) : path;
final File indexFile = new File(indexFileBase + BAMIndex.BAMIndexSuffix);
if (indexFile.exists()) {
if (!indexFile.canWrite()) {
diff --git a/src/java/htsjdk/samtools/BAMIndexMetaData.java b/src/java/htsjdk/samtools/BAMIndexMetaData.java
index efd4f77..699aed3 100644
--- a/src/java/htsjdk/samtools/BAMIndexMetaData.java
+++ b/src/java/htsjdk/samtools/BAMIndexMetaData.java
@@ -64,7 +64,7 @@ public class BAMIndexMetaData {
BAMIndexMetaData(List<Chunk> chunkList) {
noCoordinateRecords = 0;
- if (chunkList == null || chunkList.size() == 0) {
+ if (chunkList == null || chunkList.isEmpty()) {
// System.out.println("No metadata chunks");
} else if (chunkList.size() != 2) {
throw new SAMException("Unexpected number of metadata chunks " + (chunkList.size()));
@@ -153,7 +153,7 @@ public class BAMIndexMetaData {
final int alignmentStart = slice.alignmentStart;
if (alignmentStart == SAMRecord.NO_ALIGNMENT_START) {
- incrementNoCoordinateRecordCount();
+ noCoordinateRecords+=slice.nofRecords;
return;
}
diff --git a/src/java/htsjdk/samtools/BamIndexValidator.java b/src/java/htsjdk/samtools/BamIndexValidator.java
index f808d27..fb1c8d8 100644
--- a/src/java/htsjdk/samtools/BamIndexValidator.java
+++ b/src/java/htsjdk/samtools/BamIndexValidator.java
@@ -25,14 +25,20 @@ package htsjdk.samtools;
import htsjdk.samtools.util.CloseableIterator;
+import java.util.Arrays;
+import java.util.List;
+
/**
- * One crisp, informative sentence or noun phrase that explains
- * the concept modeled by the class.
- * <p/>
+ * Class to validate (at two different levels of thoroughness) the index for a BAM file.
+ *
* This class is [<em>not</em>] thread safe [because it is immutable].
*/
public class BamIndexValidator {
+ public enum IndexValidationStringency {
+ EXHAUSTIVE, LESS_EXHAUSTIVE, NONE
+ }
+
public static int exhaustivelyTestIndex(final SamReader reader) { // throws Exception {
// look at all chunk offsets in a linear index to make sure they are valid
@@ -40,37 +46,37 @@ public class BamIndexValidator {
// content is from an existing bai file
final CachingBAMFileIndex existingIndex = (CachingBAMFileIndex) reader.indexing().getBrowseableIndex(); // new CachingBAMFileIndex(inputBai, null);
- final int n_ref = existingIndex.getNumberOfReferences();
+ final int numRefs = existingIndex.getNumberOfReferences();
int chunkCount = 0;
int indexCount = 0;
- for (int i = 0; i < n_ref; i++) {
+ for (int i = 0; i < numRefs; i++) {
final BAMIndexContent content = existingIndex.getQueryResults(i);
for (final Chunk c : content.getAllChunks()) {
final CloseableIterator<SAMRecord> iter = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(c));
chunkCount++;
- BAMRecord b = null;
+ SAMRecord sam = null;
try {
- b = (BAMRecord) iter.next();
+ sam = iter.next();
iter.close();
} catch (final Exception e) {
- throw new SAMException("Exception in BamIndexValidator. Last good record " + b + " in chunk " + c + " chunkCount=" + chunkCount, e);
+ throw new SAMException("Exception in BamIndexValidator. Last good record " + sam + " in chunk " + c + " chunkCount=" + chunkCount, e);
}
}
// also seek to every position in the linear index
// final BAMRecordCodec bamRecordCodec = new BAMRecordCodec(reader.getFileHeader());
// bamRecordCodec.setInputStream(reader.getInputStream());
- LinearIndex linearIndex = content.getLinearIndex();
- for (long l : linearIndex.getIndexEntries()) {
+ final LinearIndex linearIndex = content.getLinearIndex();
+ for (final long l : linearIndex.getIndexEntries()) {
try {
if (l != 0) {
final CloseableIterator<SAMRecord> iter = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(new Chunk(l, l + 1)));
- BAMRecord b = (BAMRecord) iter.next(); // read the first record identified by the linear index
+ final SAMRecord sam = iter.next(); // read the first record identified by the linear index
indexCount++;
iter.close();
}
- } catch (Exception e) {
+ } catch (final Exception e) {
throw new SAMException("Exception in BamIndexValidator. Linear index access failure " + l + " indexCount=" + indexCount, e);
}
@@ -84,4 +90,62 @@ public class BamIndexValidator {
return 0;
}
+ /**
+ * A less time-consuming index validation that only looks at the first and last references in the index
+ * and the first and last chunks in each of those
+ *
+ * @param reader
+ * @return # of chunks examined, or 0 if there is no browseable index for the reader
+ */
+ public static int lessExhaustivelyTestIndex(final SamReader reader) {
+ // look at all chunk offsets in a linear index to make sure they are valid
+ if (reader.indexing().hasBrowseableIndex()) {
+
+ // content is from an existing bai file
+ final CachingBAMFileIndex existingIndex = (CachingBAMFileIndex) reader.indexing().getBrowseableIndex();
+ final int numRefs = existingIndex.getNumberOfReferences();
+
+ int chunkCount = 0;
+ int indexCount = 0;
+ for (int i = 0; i < numRefs; i++) {
+
+ final BAMIndexContent content = existingIndex.getQueryResults(i);
+
+ final List<Chunk> chunks = content.getAllChunks();
+ final int numChunks = chunks.size();
+ // We are looking only at the first and last chunks
+ for (final int chunkNo : Arrays.asList(0, numChunks - 1)) {
+ chunkCount++;
+
+ final Chunk c = chunks.get(chunkNo);
+ final CloseableIterator<SAMRecord> iter = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(c));
+ try {
+ final SAMRecord sam = iter.next();
+ iter.close();
+ } catch (final Exception e) {
+ throw new SAMException("Exception querying chunk " + chunkNo + " from reference index " + i, e);
+ }
+ }
+
+ // also seek to first and last position in the linear index
+ final long linearIndexEntries[] = content.getLinearIndex().getIndexEntries();
+ for (final int binNo : Arrays.asList(0, linearIndexEntries.length - 1)) {
+ indexCount++;
+ final long l = linearIndexEntries[binNo];
+ try {
+ if (l != 0) {
+ final CloseableIterator<SAMRecord> iter = ((SamReader.PrimitiveSamReaderToSamReaderAdapter) reader).iterator(new BAMFileSpan(new Chunk(l, l + 1)));
+ final SAMRecord sam = iter.next(); // read the first record identified by the linear index
+ iter.close();
+ }
+ } catch (final Exception e) {
+ throw new SAMException("Exception in BamIndexValidator. Linear index access failure " + l + " indexCount=" + indexCount, e);
+ }
+ }
+ }
+ return chunkCount;
+ }
+ // else it's not a bam file with a browseable index
+ return 0;
+ }
}
diff --git a/src/java/htsjdk/samtools/CRAMFileWriter.java b/src/java/htsjdk/samtools/CRAMContainerStreamWriter.java
similarity index 75%
copy from src/java/htsjdk/samtools/CRAMFileWriter.java
copy to src/java/htsjdk/samtools/CRAMContainerStreamWriter.java
index 20347a0..9a7bec6 100644
--- a/src/java/htsjdk/samtools/CRAMFileWriter.java
+++ b/src/java/htsjdk/samtools/CRAMContainerStreamWriter.java
@@ -1,18 +1,3 @@
-/*******************************************************************************
- * Copyright 2013 EMBL-EBI
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- ******************************************************************************/
package htsjdk.samtools;
import htsjdk.samtools.cram.build.ContainerFactory;
@@ -29,7 +14,6 @@ import htsjdk.samtools.cram.ref.ReferenceTracks;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
import htsjdk.samtools.cram.structure.CramCompressionRecord;
-import htsjdk.samtools.cram.structure.CramHeader;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;
@@ -44,26 +28,30 @@ import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
- at SuppressWarnings("UnusedDeclaration")
-public class CRAMFileWriter extends SAMFileWriterImpl {
- private static final int REF_SEQ_INDEX_NOT_INITIALIZED = -2;
- static int DEFAULT_RECORDS_PER_SLICE = 10000;
- private static final int DEFAULT_SLICES_PER_CONTAINER = 1;
- private static final Version cramVersion = CramVersions.CRAM_v2_1;
+/**
+ * Class for writing SAMRecords into a series of CRAM containers on an output stream.
+ */
+public class CRAMContainerStreamWriter {
+ private static final Version cramVersion = CramVersions.DEFAULT_CRAM_VERSION;
- private final String fileName;
- private final List<SAMRecord> samRecords = new ArrayList<SAMRecord>();
- private ContainerFactory containerFactory;
+ static int DEFAULT_RECORDS_PER_SLICE = 10000;
+ static int MIN_SINGLE_REF_RECORDS = 1000;
protected final int recordsPerSlice = DEFAULT_RECORDS_PER_SLICE;
+ private static final int DEFAULT_SLICES_PER_CONTAINER = 1;
protected final int containerSize = recordsPerSlice * DEFAULT_SLICES_PER_CONTAINER;
+ private static final int REF_SEQ_INDEX_NOT_INITIALIZED = -3;
+ private final SAMFileHeader samFileHeader;
+ private final String cramID;
private final OutputStream outputStream;
private ReferenceSource source;
+
+ private final List<SAMRecord> samRecords = new ArrayList<SAMRecord>();
+ private ContainerFactory containerFactory;
private int refSeqIndex = REF_SEQ_INDEX_NOT_INITIALIZED;
- private static final Log log = Log.getInstance(CRAMFileWriter.class);
+ private static final Log log = Log.getInstance(CRAMContainerStreamWriter.class);
- private final SAMFileHeader samFileHeader;
private boolean preserveReadNames = true;
private QualityScorePreservation preservation = null;
private boolean captureAllTags = true;
@@ -74,74 +62,124 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
private long offset;
/**
- * Create a CRAMFileWriter on an output stream. Requires input records to be presorted to match the
- * sort order defined by the input {@code samFileHeader}.
+ * Create a CRAMContainerStreamWriter for writing SAM records into a series of CRAM
+ * containers on output stream, with an optional index.
*
- * @param outputStream where to write the output.
+ * @param outputStream where to write the CRAM stream.
+ * @param indexStream where to write the output index. Can be null if no index is required.
* @param source reference source
* @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
- * @param fileName used for display in error messages
+ * @param cramId used for display in error message display
*/
- public CRAMFileWriter(
+ public CRAMContainerStreamWriter(
final OutputStream outputStream,
+ final OutputStream indexStream,
final ReferenceSource source,
final SAMFileHeader samFileHeader,
- final String fileName)
- {
- this(outputStream, null, source, samFileHeader, fileName); // defaults to presorted == true
+ final String cramId) {
+ this.outputStream = outputStream;
+ this.samFileHeader = samFileHeader;
+ this.cramID = cramId;
+ this.source = source;
+ containerFactory = new ContainerFactory(samFileHeader, recordsPerSlice);
+ if (indexStream != null) {
+ indexer = new CRAMIndexer(indexStream, samFileHeader);
+ }
}
/**
- * Create a CRAMFileWriter and index on output streams. Requires input records to be presorted to match the
- * sort order defined by the input {@code samFileHeader}.
- *
- * @param outputStream where to write the output.
- * @param indexOS where to write the output index. Can be null if no index is required.
- * @param source reference source
- * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
- * @param fileName used for display in error messages
+ * Write an alignment record.
+ * @param alignment must not be null
*/
- public CRAMFileWriter(
- final OutputStream outputStream,
- final OutputStream indexOS,
- final ReferenceSource source,
- final SAMFileHeader samFileHeader,
- final String fileName)
- {
- this(outputStream, indexOS, true, source, samFileHeader, fileName); // defaults to presorted==true
+ public void writeAlignment(final SAMRecord alignment) {
+ if (shouldFlushContainer(alignment)) {
+ try {
+ flushContainer();
+ } catch (IOException e) {
+ throw new RuntimeIOException(e);
+ } catch (IllegalAccessException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ updateReferenceContext(alignment.getReferenceIndex());
+
+ samRecords.add(alignment);
}
/**
- * Create a CRAMFileWriter and index on output streams.
- *
- * @param outputStream where to write the output.
- * @param indexOS where to write the output index. Can be null if no index is required.
- * @param presorted if true records written to this writer must already be sorted in the order specified by the header
- * @param source reference source
- * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
- * @param fileName used for display in error message display
+ * Write a CRAM file header and SAM header to the stream.
+
+ * @param header SAMFileHeader to write
*/
- public CRAMFileWriter(final OutputStream outputStream, final OutputStream indexOS, final boolean presorted,
- final ReferenceSource source, final SAMFileHeader samFileHeader, final String fileName) {
- this.outputStream = outputStream;
- this.samFileHeader = samFileHeader;
- this.fileName = fileName;
- initCRAMWriter(indexOS, source, samFileHeader, presorted);
+ public void writeHeader(final SAMFileHeader header) {
+ // TODO: header must be written exactly once per writer life cycle.
+ offset = CramIO.writeHeader(cramVersion, outputStream, header, cramID);
}
- private void initCRAMWriter(final OutputStream indexOS, final ReferenceSource source, final SAMFileHeader samFileHeader, final boolean preSorted) {
- this.source = source;
- setSortOrder(samFileHeader.getSortOrder(), preSorted);
- setHeader(samFileHeader);
-
- if (this.source == null) {
- this.source = new ReferenceSource(Defaults.REFERENCE_FASTA);
+ /**
+ * Finish writing to the stream. Flushes the record cache and optionally emits an EOF container.
+ * @param writeEOFContainer true if an EOF container should be written. Only use false if writing a CRAM file
+ * fragment which will later be aggregated into a complete CRAM file.
+ */
+ public void finish(final boolean writeEOFContainer) {
+ try {
+ if (!samRecords.isEmpty()) {
+ flushContainer();
+ }
+ if (writeEOFContainer) {
+ CramIO.issueEOF(cramVersion, outputStream);
+ }
+ outputStream.flush();
+ if (indexer != null) {
+ indexer.finish();
+ }
+ outputStream.close();
+ } catch (final IOException e) {
+ throw new RuntimeIOException(e);
+ } catch (final IllegalAccessException e) {
+ throw new RuntimeException(e);
}
+ }
- containerFactory = new ContainerFactory(samFileHeader, recordsPerSlice);
- if (indexOS != null) {
- indexer = new CRAMIndexer(indexOS, samFileHeader);
+ public boolean isPreserveReadNames() {
+ return preserveReadNames;
+ }
+
+ public void setPreserveReadNames(final boolean preserveReadNames) {
+ this.preserveReadNames = preserveReadNames;
+ }
+
+ public List<PreservationPolicy> getPreservationPolicies() {
+ if (preservation == null) {
+ // set up greedy policy by default:
+ preservation = new QualityScorePreservation("*8");
}
+ return preservation.getPreservationPolicies();
+ }
+
+ public boolean isCaptureAllTags() {
+ return captureAllTags;
+ }
+
+ public void setCaptureAllTags(final boolean captureAllTags) {
+ this.captureAllTags = captureAllTags;
+ }
+
+ public Set<String> getCaptureTags() {
+ return captureTags;
+ }
+
+ public void setCaptureTags(final Set<String> captureTags) {
+ this.captureTags = captureTags;
+ }
+
+ public Set<String> getIgnoreTags() {
+ return ignoreTags;
+ }
+
+ public void setIgnoreTags(final Set<String> ignoreTags) {
+ this.ignoreTags = ignoreTags;
}
/**
@@ -152,7 +190,42 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
* @return true if the current container should be flushed and the following records should go into a new container; false otherwise.
*/
protected boolean shouldFlushContainer(final SAMRecord nextRecord) {
- return samRecords.size() >= containerSize || refSeqIndex != REF_SEQ_INDEX_NOT_INITIALIZED && refSeqIndex != nextRecord.getReferenceIndex();
+ if (samRecords.isEmpty()) {
+ refSeqIndex = nextRecord.getReferenceIndex();
+ return false;
+ }
+
+ if (samRecords.size() >= containerSize) {
+ return true;
+ }
+
+ if (samFileHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
+ return false;
+ }
+
+ // make unmapped reads don't get into multiref containers:
+ if (refSeqIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && nextRecord.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+ return true;
+ }
+
+ if (refSeqIndex == Slice.MULTI_REFERENCE) {
+ return false;
+ }
+
+ final boolean sameRef = (refSeqIndex == nextRecord.getReferenceIndex());
+ if (sameRef) {
+ return false;
+ }
+
+ /**
+ * Protection against too small containers: flush at least X single refs, switch to multiref otherwise.
+ */
+ if (samRecords.size() > MIN_SINGLE_REF_RECORDS) {
+ return true;
+ } else {
+ refSeqIndex = Slice.MULTI_REFERENCE;
+ return false;
+ }
}
private static void updateTracks(final List<SAMRecord> samRecords, final ReferenceTracks tracks) {
@@ -198,11 +271,21 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
final byte[] refs;
String refSeqName = null;
- if (refSeqIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) refs = new byte[0];
- else {
+ switch (refSeqIndex) {
+ case Slice.MULTI_REFERENCE:
+ if (preservation != null && preservation.areReferenceTracksRequired()) {
+ throw new SAMException("Cannot apply reference-based lossy compression on non-coordinate sorted reads.");
+ }
+ refs = new byte[0];
+ break;
+ case SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX:
+ refs = new byte[0];
+ break;
+ default:
final SAMSequenceRecord sequence = samFileHeader.getSequence(refSeqIndex);
refs = source.getReferenceBases(sequence, true);
refSeqName = sequence.getSequenceName();
+ break;
}
int start = SAMRecord.NO_ALIGNMENT_START;
@@ -236,6 +319,10 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
int index = 0;
int prevAlStart = start;
for (final SAMRecord samRecord : samRecords) {
+ if (samRecord.getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && refSeqIndex != samRecord.getReferenceIndex()) {
+ // this may load all ref sequences into memory:
+ sam2CramRecordFactory.setRefBases(source.getReferenceBases(samFileHeader.getSequence(samRecord.getReferenceIndex()), true));
+ }
final CramCompressionRecord cramRecord = sam2CramRecordFactory.createCramRecord(samRecord);
cramRecord.index = ++index;
cramRecord.alignmentDelta = samRecord.getAlignmentStart() - prevAlStart;
@@ -246,7 +333,8 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
if (preservation != null) preservation.addQualityScores(samRecord, cramRecord, tracks);
else if (cramRecord.qualityScores != SAMRecord.NULL_QUALS) cramRecord.setForcePreserveQualityScores(true);
- }
+ }
+
if (sam2CramRecordFactory.getBaseCount() < 3 * sam2CramRecordFactory.getFeatureCount())
log.warn("Abnormally high number of mismatches, possibly wrong reference.");
@@ -357,16 +445,20 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
}
final Container container = containerFactory.buildContainer(cramRecords);
- for (final Slice slice : container.slices)
+ for (final Slice slice : container.slices) {
slice.setRefMD5(refs);
+ }
container.offset = offset;
offset += ContainerIO.writeContainer(cramVersion, container, outputStream);
if (indexer != null) {
- for (final Slice slice : container.slices) {
- indexer.processAlignment(slice);
- }
+ /**
+ * Using silent validation here because the reads have been through validation already or
+ * they have been generated somehow through the htsjdk.
+ */
+ indexer.processContainer(container, ValidationStringency.SILENT);
}
samRecords.clear();
+ refSeqIndex = REF_SEQ_INDEX_NOT_INITIALIZED;
}
/**
@@ -385,114 +477,20 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
}
/**
- * Write an alignment record.
- * @param alignment must not be null and must have a valid SAMFileHeader.
- */
- @Override
- protected void writeAlignment(final SAMRecord alignment) {
- if (shouldFlushContainer(alignment)) {
- try {
- flushContainer();
- } catch (IOException e) {
- throw new RuntimeIOException(e);
- } catch (IllegalAccessException e) {
- throw new RuntimeException(e);
- }
- }
-
- updateReferenceContext(alignment.getReferenceIndex());
-
- samRecords.add(alignment);
- }
-
- /**
* Check if the reference has changed and create a new record factory using the new reference.
*
* @param samRecordReferenceIndex index of the new reference sequence
*/
private void updateReferenceContext(final int samRecordReferenceIndex) {
- if (refSeqIndex == REF_SEQ_INDEX_NOT_INITIALIZED) {
- refSeqIndex = samRecordReferenceIndex;
- } else
- if (refSeqIndex != samRecordReferenceIndex) refSeqIndex = samRecordReferenceIndex;
- }
-
- @Override
- protected void writeHeader(final String textHeader) {
- // TODO: header must be written exactly once per writer life cycle.
- final SAMFileHeader header = new SAMTextHeaderCodec().decode(new StringLineReader(textHeader), (fileName != null ? fileName : null));
-
- containerFactory = new ContainerFactory(header, recordsPerSlice);
-
- final CramHeader cramHeader = new CramHeader(cramVersion, fileName, header);
- try {
- offset = CramIO.writeCramHeader(cramHeader, outputStream);
- } catch (final IOException e) {
- throw new RuntimeException(e);
+ if (refSeqIndex == Slice.MULTI_REFERENCE) {
+ return;
}
- }
- @Override
- protected void finish() {
- try {
- if (!samRecords.isEmpty()) {
- flushContainer();
- }
- CramIO.issueEOF(cramVersion, outputStream);
- outputStream.flush();
- if (indexer != null) {
- indexer.finish();
- }
- outputStream.close();
- } catch (final IOException e) {
- throw new RuntimeIOException(e);
- } catch (final IllegalAccessException e) {
- throw new RuntimeException(e);
- }
- }
-
- @Override
- protected String getFilename() {
- return fileName;
- }
-
- public boolean isPreserveReadNames() {
- return preserveReadNames;
- }
-
- public void setPreserveReadNames(final boolean preserveReadNames) {
- this.preserveReadNames = preserveReadNames;
- }
-
- public List<PreservationPolicy> getPreservationPolicies() {
- if (preservation == null) {
- // set up greedy policy by default:
- preservation = new QualityScorePreservation("*8");
- }
- return preservation.getPreservationPolicies();
- }
-
- public boolean isCaptureAllTags() {
- return captureAllTags;
- }
-
- public void setCaptureAllTags(final boolean captureAllTags) {
- this.captureAllTags = captureAllTags;
- }
-
- public Set<String> getCaptureTags() {
- return captureTags;
- }
-
- public void setCaptureTags(final Set<String> captureTags) {
- this.captureTags = captureTags;
+ if (refSeqIndex == REF_SEQ_INDEX_NOT_INITIALIZED) {
+ refSeqIndex = samRecordReferenceIndex;
+ } else if (refSeqIndex != samRecordReferenceIndex) {
+ refSeqIndex = Slice.MULTI_REFERENCE;
}
-
- public Set<String> getIgnoreTags() {
- return ignoreTags;
}
- public void setIgnoreTags(final Set<String> ignoreTags) {
- this.ignoreTags = ignoreTags;
- }
}
diff --git a/src/java/htsjdk/samtools/CRAMFileReader.java b/src/java/htsjdk/samtools/CRAMFileReader.java
index 04521ba..7c83c94 100644
--- a/src/java/htsjdk/samtools/CRAMFileReader.java
+++ b/src/java/htsjdk/samtools/CRAMFileReader.java
@@ -21,12 +21,14 @@ import htsjdk.samtools.cram.CRAIIndex;
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
+import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.RuntimeEOFException;
+import htsjdk.samtools.util.RuntimeIOException;
import java.io.File;
import java.io.FileInputStream;
@@ -56,32 +58,40 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
private ValidationStringency validationStringency;
/**
- * Open CRAM data for reading using either the file or the input stream
- * supplied in the arguments. The
- * {@link htsjdk.samtools.Defaults#REFERENCE_FASTA default} reference fasta
- * file will be used.
+ * Create a CRAMFileReader from either a file or input stream using the reference source returned by
+ * {@link ReferenceSource#getDefaultCRAMReferenceSource() getDefaultCRAMReferenceSource}.
+ *
*
* @param cramFile CRAM file to open
- * @param inputStream CRAM stream to read
+ * @param inputStream CRAM stream to read
+ *
+ * @throws IllegalArgumentException if the {@code cramFile} and the {@code inputStream} are both null
+ * @throws IllegalStateException if a {@link ReferenceSource#getDefaultCRAMReferenceSource() default}
+ * reference source cannot be acquired
*/
public CRAMFileReader(final File cramFile, final InputStream inputStream) {
- this(cramFile, inputStream, new ReferenceSource(Defaults.REFERENCE_FASTA));
+ this(cramFile, inputStream, ReferenceSource.getDefaultCRAMReferenceSource());
}
/**
- * Open CRAM data for reading using either the file or the input stream
- * supplied in the arguments.
+ * Create a CRAMFileReader from either a file or input stream using the supplied reference source.
*
- * @param cramFile CRAM file to read
- * @param inputStream index file to be used for random access
+ * @param cramFile CRAM file to read
+ * @param inputStream CRAM stream to read
* @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
- * reference sequences
+ * reference sequences. May not be null.
+ *
+ * @throws IllegalArgumentException if the {@code cramFile} and the {@code inputStream} are both null
+ * or if the {@code ReferenceSource} is null
*/
public CRAMFileReader(final File cramFile, final InputStream inputStream,
final ReferenceSource referenceSource) {
- if (cramFile == null && inputStream == null)
- throw new IllegalArgumentException(
- "Either file or input stream is required.");
+ if (cramFile == null && inputStream == null) {
+ throw new IllegalArgumentException("Either file or input stream is required.");
+ }
+ if (referenceSource == null) {
+ throw new IllegalArgumentException("A reference is required for CRAM readers");
+ }
this.cramFile = cramFile;
this.inputStream = inputStream;
@@ -90,18 +100,22 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
}
/**
- * Open CRAM file for reading. If index file is supplied than random access
- * will be available.
+ * Create a CRAMFileReader from a file and optional index file using the supplied reference source. If index file
+ * is supplied then random access will be available.
*
- * @param cramFile CRAM file to read
- * @param indexFile index file to be used for random access
+ * @param cramFile CRAM file to read. May not be null.
+ * @param indexFile index file to be used for random access. May be null.
* @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
- * reference sequences
+ * reference sequences. May not be null.
+ * @throws IllegalArgumentException if the {@code cramFile} or the {@code ReferenceSource} is null
*/
public CRAMFileReader(final File cramFile, final File indexFile,
final ReferenceSource referenceSource) {
if (cramFile == null)
throw new IllegalArgumentException("File is required.");
+ if (referenceSource == null) {
+ throw new IllegalArgumentException("A reference is required for CRAM readers");
+ }
this.cramFile = cramFile;
this.mIndexFile = indexFile;
@@ -110,10 +124,20 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
getIterator();
}
+ /**
+ * Create a CRAMFileReader from a file using the supplied reference source.
+ *
+ * @param cramFile CRAM file to read. Can not be null.
+ * @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
+ * reference sequences. May not be null.
+ * @throws IllegalArgumentException if the {@code cramFile} or the {@code ReferenceSource} is null
+ */
public CRAMFileReader(final File cramFile, final ReferenceSource referenceSource) {
- if (cramFile == null && inputStream == null)
- throw new IllegalArgumentException(
- "Either file or input stream is required.");
+ if (cramFile == null)
+ throw new IllegalArgumentException("CRAM file cannot be null.");
+ if (referenceSource == null) {
+ throw new IllegalArgumentException("A reference is required for CRAM readers");
+ }
this.cramFile = cramFile;
this.referenceSource = referenceSource;
@@ -121,8 +145,27 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
getIterator();
}
+ /**
+ * Create a CRAMFileReader from an input stream and optional index stream using the supplied reference
+ * source and validation stringency.
+ *
+ * @param inputStream CRAM stream to read. May not be null.
+ * @param indexInputStream index stream to be used for random access. May be null.
+ * @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
+ * reference sequences. May not be null.
+ * @param validationStringency Validation stringency to be used when reading
+ *
+ * @throws IllegalArgumentException if the {@code inputStream} or the {@code ReferenceSource} is null
+ */
public CRAMFileReader(final InputStream inputStream, final SeekableStream indexInputStream,
final ReferenceSource referenceSource, final ValidationStringency validationStringency) throws IOException {
+ if (inputStream == null) {
+ throw new IllegalArgumentException("Input stream can not be null for CRAM reader");
+ }
+ if (referenceSource == null) {
+ throw new IllegalArgumentException("A reference is required for CRAM readers");
+ }
+
this.inputStream = inputStream;
this.referenceSource = referenceSource;
this.validationStringency = validationStringency;
@@ -140,12 +183,36 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
}
}
+ /**
+ * Create a CRAMFileReader from an input stream and optional index file using the supplied reference
+ * source and validation stringency.
+ *
+ * @param stream CRAM stream to read. May not be null.
+ * @param indexFile index file to be used for random access. May be null.
+ * @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
+ * reference sequences. May not be null.
+ * @param validationStringency Validation stringency to be used when reading
+ *
+ * @throws IllegalArgumentException if the {@code inputStream} or the {@code ReferenceSource} is null
+ */
public CRAMFileReader(final InputStream stream,
final File indexFile, final ReferenceSource referenceSource,
final ValidationStringency validationStringency) throws IOException {
this(stream, indexFile == null ? null: new SeekableFileStream(indexFile), referenceSource, validationStringency);
}
+ /**
+ * Create a CRAMFileReader from a CRAM file and optional index file using the supplied reference
+ * source and validation stringency.
+ *
+ * @param cramFile CRAM stream to read. May not be null.
+ * @param indexFile index file to be used for random access. May be null.
+ * @param referenceSource a {@link htsjdk.samtools.cram.ref.ReferenceSource source} of
+ * reference sequences. May not be null.
+ * @param validationStringency Validation stringency to be used when reading
+ *
+ * @throws IllegalArgumentException if the {@code cramFile} or the {@code ReferenceSource} is null
+ */
public CRAMFileReader(final File cramFile,
final File indexFile, final ReferenceSource referenceSource,
final ValidationStringency validationStringency) throws IOException {
@@ -319,26 +386,19 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
final long containerOffset = filePointers[i] >>> 16;
try {
- if (seekableStream.position() != containerOffset || iterator.container == null) {
- seekableStream.seek(containerOffset);
- container = ContainerIO.readContainerHeader(iterator.getCramHeader().getVersion().major, seekableStream);
- if (container.alignmentStart + container.alignmentSpan > start) {
- seekableStream.seek(containerOffset);
- iterator.jumpWithinContainerToPos(fileHeader.getSequenceIndex(sequence), start);
- return new IntervalIterator(iterator, new QueryInterval(referenceIndex, start, -1));
- }
- } else {
- container = iterator.container;
- if (container.alignmentStart + container.alignmentSpan > start) {
- iterator.jumpWithinContainerToPos(fileHeader.getSequenceIndex(sequence), start);
- return new IntervalIterator(iterator, new QueryInterval(referenceIndex, start, -1));
- }
+ seekableStream.seek(containerOffset);
+ iterator.nextContainer();
+
+ if (iterator.jumpWithinContainerToPos(fileHeader.getSequenceIndex(sequence), start)) {
+ return new IntervalIterator(iterator, new QueryInterval(referenceIndex, start, -1));
}
} catch (final IOException e) {
- throw new RuntimeException(e);
+ throw new RuntimeIOException(e);
+ } catch (IllegalAccessException e) {
+ throw new SAMException(e);
}
}
- return iterator;
+ throw new SAMException("Failed to query alignment start: " + sequence + " at " + start);
}
CloseableIterator<SAMRecord> query(final int referenceIndex,
@@ -390,8 +450,9 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
} catch (final FileNotFoundException e) {
throw new RuntimeException(e);
}
- } else if (inputStream instanceof SeekableStream)
+ } else if (inputStream instanceof SeekableStream) {
seekableStream = (SeekableStream) inputStream;
+ }
return seekableStream;
}
@@ -527,6 +588,7 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
boolean isWithinTheInterval(final SAMRecord record) {
final boolean refMatch = record.getReferenceIndex() == interval.referenceIndex;
if (interval.start == -1) return refMatch;
+ if (!refMatch) return false;
final int start = record.getAlignmentStart();
final int end = record.getAlignmentEnd();
@@ -541,8 +603,10 @@ public class CRAMFileReader extends SamReader.ReaderImplementation implements Sa
boolean isBeyondTheInterval(final SAMRecord record) {
if (record.getReadUnmappedFlag()) return false;
- final boolean refMatch = record.getReferenceIndex() == interval.referenceIndex;
- return !refMatch || interval.end != -1 && record.getAlignmentStart() > interval.end;
+ if (record.getReferenceIndex() > interval.referenceIndex) return true;
+ if (record.getReferenceIndex() != interval.referenceIndex) return false;
+
+ return interval.end != -1 && record.getAlignmentStart() > interval.end;
}
diff --git a/src/java/htsjdk/samtools/CRAMFileWriter.java b/src/java/htsjdk/samtools/CRAMFileWriter.java
index 20347a0..717a52f 100644
--- a/src/java/htsjdk/samtools/CRAMFileWriter.java
+++ b/src/java/htsjdk/samtools/CRAMFileWriter.java
@@ -15,373 +15,93 @@
******************************************************************************/
package htsjdk.samtools;
-import htsjdk.samtools.cram.build.ContainerFactory;
-import htsjdk.samtools.cram.build.Cram2SamRecordFactory;
-import htsjdk.samtools.cram.build.CramIO;
-import htsjdk.samtools.cram.build.CramNormalizer;
-import htsjdk.samtools.cram.build.Sam2CramRecordFactory;
-import htsjdk.samtools.cram.common.CramVersions;
-import htsjdk.samtools.cram.common.Version;
import htsjdk.samtools.cram.lossy.PreservationPolicy;
-import htsjdk.samtools.cram.lossy.QualityScorePreservation;
import htsjdk.samtools.cram.ref.ReferenceSource;
-import htsjdk.samtools.cram.ref.ReferenceTracks;
-import htsjdk.samtools.cram.structure.Container;
-import htsjdk.samtools.cram.structure.ContainerIO;
-import htsjdk.samtools.cram.structure.CramCompressionRecord;
-import htsjdk.samtools.cram.structure.CramHeader;
-import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.util.Log;
-import htsjdk.samtools.util.RuntimeIOException;
import htsjdk.samtools.util.StringLineReader;
-import java.io.IOException;
import java.io.OutputStream;
-import java.util.ArrayList;
import java.util.List;
-import java.util.Map;
import java.util.Set;
-import java.util.TreeMap;
-import java.util.TreeSet;
@SuppressWarnings("UnusedDeclaration")
public class CRAMFileWriter extends SAMFileWriterImpl {
- private static final int REF_SEQ_INDEX_NOT_INITIALIZED = -2;
- static int DEFAULT_RECORDS_PER_SLICE = 10000;
- private static final int DEFAULT_SLICES_PER_CONTAINER = 1;
- private static final Version cramVersion = CramVersions.CRAM_v2_1;
-
+ private CRAMContainerStreamWriter cramContainerStream;
+ private final SAMFileHeader samFileHeader;
private final String fileName;
- private final List<SAMRecord> samRecords = new ArrayList<SAMRecord>();
- private ContainerFactory containerFactory;
- protected final int recordsPerSlice = DEFAULT_RECORDS_PER_SLICE;
- protected final int containerSize = recordsPerSlice * DEFAULT_SLICES_PER_CONTAINER;
-
- private final OutputStream outputStream;
- private ReferenceSource source;
- private int refSeqIndex = REF_SEQ_INDEX_NOT_INITIALIZED;
private static final Log log = Log.getInstance(CRAMFileWriter.class);
- private final SAMFileHeader samFileHeader;
- private boolean preserveReadNames = true;
- private QualityScorePreservation preservation = null;
- private boolean captureAllTags = true;
- private Set<String> captureTags = new TreeSet<String>();
- private Set<String> ignoreTags = new TreeSet<String>();
-
- private CRAMIndexer indexer;
- private long offset;
-
/**
* Create a CRAMFileWriter on an output stream. Requires input records to be presorted to match the
* sort order defined by the input {@code samFileHeader}.
*
- * @param outputStream where to write the output.
- * @param source reference source
- * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
+ * @param outputStream where to write the output. Can not be null.
+ * @param referenceSource reference source. Can not be null.
+ * @param samFileHeader {@link SAMFileHeader} to be used. Can not be null. Sort order is determined by the sortOrder property of this arg.
* @param fileName used for display in error messages
+ *
+ * @throws IllegalArgumentException if the {@code outputStream}, {@code referenceSource} or {@code samFileHeader} are null
*/
public CRAMFileWriter(
final OutputStream outputStream,
- final ReferenceSource source,
+ final ReferenceSource referenceSource,
final SAMFileHeader samFileHeader,
final String fileName)
{
- this(outputStream, null, source, samFileHeader, fileName); // defaults to presorted == true
+ this(outputStream, null, referenceSource, samFileHeader, fileName); // defaults to presorted == true
}
/**
- * Create a CRAMFileWriter and index on output streams. Requires input records to be presorted to match the
+ * Create a CRAMFileWriter and optional index on output streams. Requires input records to be presorted to match the
* sort order defined by the input {@code samFileHeader}.
*
- * @param outputStream where to write the output.
+ * @param outputStream where to write the output. Can not be null.
* @param indexOS where to write the output index. Can be null if no index is required.
- * @param source reference source
- * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
+ * @param referenceSource reference source
+ * @param samFileHeader {@link SAMFileHeader} to be used. Can not be null. Sort order is determined by the sortOrder property of this arg.
* @param fileName used for display in error messages
+ *
+ * @throws IllegalArgumentException if the {@code outputStream}, {@code referenceSource} or {@code samFileHeader} are null
*/
public CRAMFileWriter(
final OutputStream outputStream,
final OutputStream indexOS,
- final ReferenceSource source,
+ final ReferenceSource referenceSource,
final SAMFileHeader samFileHeader,
final String fileName)
{
- this(outputStream, indexOS, true, source, samFileHeader, fileName); // defaults to presorted==true
+ this(outputStream, indexOS, true, referenceSource, samFileHeader, fileName); // defaults to presorted==true
}
/**
- * Create a CRAMFileWriter and index on output streams.
+ * Create a CRAMFileWriter and optional index on output streams.
*
- * @param outputStream where to write the output.
+ * @param outputStream where to write the output. Can not be null.
* @param indexOS where to write the output index. Can be null if no index is required.
* @param presorted if true records written to this writer must already be sorted in the order specified by the header
- * @param source reference source
- * @param samFileHeader {@link SAMFileHeader} to be used. Sort order is determined by the sortOrder property of this arg.
+ * @param referenceSource reference source
+ * @param samFileHeader {@link SAMFileHeader} to be used. Can not be null. Sort order is determined by the sortOrder property of this arg.
* @param fileName used for display in error message display
- */
- public CRAMFileWriter(final OutputStream outputStream, final OutputStream indexOS, final boolean presorted,
- final ReferenceSource source, final SAMFileHeader samFileHeader, final String fileName) {
- this.outputStream = outputStream;
- this.samFileHeader = samFileHeader;
- this.fileName = fileName;
- initCRAMWriter(indexOS, source, samFileHeader, presorted);
- }
-
- private void initCRAMWriter(final OutputStream indexOS, final ReferenceSource source, final SAMFileHeader samFileHeader, final boolean preSorted) {
- this.source = source;
- setSortOrder(samFileHeader.getSortOrder(), preSorted);
- setHeader(samFileHeader);
-
- if (this.source == null) {
- this.source = new ReferenceSource(Defaults.REFERENCE_FASTA);
- }
-
- containerFactory = new ContainerFactory(samFileHeader, recordsPerSlice);
- if (indexOS != null) {
- indexer = new CRAMIndexer(indexOS, samFileHeader);
- }
- }
-
- /**
- * Decide if the current container should be completed and flushed. The decision is based on a) number of records and b) if the
- * reference sequence id has changed.
*
- * @param nextRecord the record to be added into the current or next container
- * @return true if the current container should be flushed and the following records should go into a new container; false otherwise.
+ * @throws IllegalArgumentException if the {@code outputStream}, {@code referenceSource} or {@code samFileHeader} are null
*/
- protected boolean shouldFlushContainer(final SAMRecord nextRecord) {
- return samRecords.size() >= containerSize || refSeqIndex != REF_SEQ_INDEX_NOT_INITIALIZED && refSeqIndex != nextRecord.getReferenceIndex();
- }
-
- private static void updateTracks(final List<SAMRecord> samRecords, final ReferenceTracks tracks) {
- for (final SAMRecord samRecord : samRecords) {
- if (samRecord.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) {
- int refPos = samRecord.getAlignmentStart();
- int readPos = 0;
- for (final CigarElement cigarElement : samRecord.getCigar().getCigarElements()) {
- if (cigarElement.getOperator().consumesReferenceBases()) {
- for (int elementIndex = 0; elementIndex < cigarElement.getLength(); elementIndex++)
- tracks.addCoverage(refPos + elementIndex, 1);
- }
- switch (cigarElement.getOperator()) {
- case M:
- case X:
- case EQ:
- for (int pos = readPos; pos < cigarElement.getLength(); pos++) {
- final byte readBase = samRecord.getReadBases()[readPos + pos];
- final byte refBase = tracks.baseAt(refPos + pos);
- if (readBase != refBase) tracks.addMismatches(refPos + pos, 1);
- }
- break;
-
- default:
- break;
- }
-
- readPos += cigarElement.getOperator().consumesReadBases() ? cigarElement.getLength() : 0;
- refPos += cigarElement.getOperator().consumesReferenceBases() ? cigarElement.getLength() : 0;
- }
- }
- }
- }
-
- /**
- * Complete the current container and flush it to the output stream.
- *
- * @throws IllegalArgumentException
- * @throws IllegalAccessException
- * @throws IOException
- */
- protected void flushContainer() throws IllegalArgumentException, IllegalAccessException, IOException {
-
- final byte[] refs;
- String refSeqName = null;
- if (refSeqIndex == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) refs = new byte[0];
- else {
- final SAMSequenceRecord sequence = samFileHeader.getSequence(refSeqIndex);
- refs = source.getReferenceBases(sequence, true);
- refSeqName = sequence.getSequenceName();
- }
-
- int start = SAMRecord.NO_ALIGNMENT_START;
- int stop = SAMRecord.NO_ALIGNMENT_START;
- for (final SAMRecord r : samRecords) {
- if (r.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) continue;
-
- if (start == SAMRecord.NO_ALIGNMENT_START) start = r.getAlignmentStart();
-
- start = Math.min(r.getAlignmentStart(), start);
- stop = Math.max(r.getAlignmentEnd(), stop);
- }
-
- ReferenceTracks tracks = null;
- if (preservation != null && preservation.areReferenceTracksRequired()) {
- tracks = new ReferenceTracks(refSeqIndex, refSeqName, refs);
-
- tracks.ensureRange(start, stop - start + 1);
- updateTracks(samRecords, tracks);
- }
-
- final List<CramCompressionRecord> cramRecords = new ArrayList<CramCompressionRecord>(samRecords.size());
-
- final Sam2CramRecordFactory sam2CramRecordFactory = new Sam2CramRecordFactory(refs, samFileHeader, cramVersion);
- sam2CramRecordFactory.preserveReadNames = preserveReadNames;
- sam2CramRecordFactory.captureAllTags = captureAllTags;
- sam2CramRecordFactory.captureTags.addAll(captureTags);
- sam2CramRecordFactory.ignoreTags.addAll(ignoreTags);
- containerFactory.setPreserveReadNames(preserveReadNames);
-
- int index = 0;
- int prevAlStart = start;
- for (final SAMRecord samRecord : samRecords) {
- final CramCompressionRecord cramRecord = sam2CramRecordFactory.createCramRecord(samRecord);
- cramRecord.index = ++index;
- cramRecord.alignmentDelta = samRecord.getAlignmentStart() - prevAlStart;
- cramRecord.alignmentStart = samRecord.getAlignmentStart();
- prevAlStart = samRecord.getAlignmentStart();
-
- cramRecords.add(cramRecord);
-
- if (preservation != null) preservation.addQualityScores(samRecord, cramRecord, tracks);
- else if (cramRecord.qualityScores != SAMRecord.NULL_QUALS) cramRecord.setForcePreserveQualityScores(true);
- }
-
- if (sam2CramRecordFactory.getBaseCount() < 3 * sam2CramRecordFactory.getFeatureCount())
- log.warn("Abnormally high number of mismatches, possibly wrong reference.");
-
- {
- if (samFileHeader.getSortOrder() == SAMFileHeader.SortOrder.coordinate) {
- // mating:
- final Map<String, CramCompressionRecord> primaryMateMap = new TreeMap<String, CramCompressionRecord>();
- final Map<String, CramCompressionRecord> secondaryMateMap = new TreeMap<String, CramCompressionRecord>();
- for (final CramCompressionRecord r : cramRecords) {
- if (!r.isMultiFragment()) {
- r.setDetached(true);
-
- r.setHasMateDownStream(false);
- r.recordsToNextFragment = -1;
- r.next = null;
- r.previous = null;
- } else {
- final String name = r.readName;
- final Map<String, CramCompressionRecord> mateMap = r.isSecondaryAlignment() ? secondaryMateMap : primaryMateMap;
- final CramCompressionRecord mate = mateMap.get(name);
- if (mate == null) {
- mateMap.put(name, r);
- } else {
- CramCompressionRecord prev = mate;
- while (prev.next != null) prev = prev.next;
- prev.recordsToNextFragment = r.index - prev.index - 1;
- prev.next = r;
- r.previous = prev;
- r.previous.setHasMateDownStream(true);
- r.setHasMateDownStream(false);
- r.setDetached(false);
- r.previous.setDetached(false);
- }
- }
- }
-
- // mark unpredictable reads as detached:
- for (final CramCompressionRecord cramRecord : cramRecords) {
- if (cramRecord.next == null || cramRecord.previous != null) continue;
- CramCompressionRecord last = cramRecord;
- while (last.next != null) last = last.next;
-
- if (cramRecord.isFirstSegment() && last.isLastSegment()) {
-
- final int templateLength = CramNormalizer.computeInsertSize(cramRecord, last);
-
- if (cramRecord.templateSize == templateLength) {
- last = cramRecord.next;
- while (last.next != null) {
- if (last.templateSize != -templateLength)
- break;
-
- last = last.next;
- }
- if (last.templateSize != -templateLength) detach(cramRecord);
- }else detach(cramRecord);
- } else detach(cramRecord);
- }
-
- for (final CramCompressionRecord cramRecord : primaryMateMap.values()) {
- if (cramRecord.next != null) continue;
- cramRecord.setDetached(true);
-
- cramRecord.setHasMateDownStream(false);
- cramRecord.recordsToNextFragment = -1;
- cramRecord.next = null;
- cramRecord.previous = null;
- }
-
- for (final CramCompressionRecord cramRecord : secondaryMateMap.values()) {
- if (cramRecord.next != null) continue;
- cramRecord.setDetached(true);
-
- cramRecord.setHasMateDownStream(false);
- cramRecord.recordsToNextFragment = -1;
- cramRecord.next = null;
- cramRecord.previous = null;
- }
- }
- else {
- for (final CramCompressionRecord cramRecord : cramRecords) {
- cramRecord.setDetached(true);
- }
- }
+ public CRAMFileWriter(final OutputStream outputStream, final OutputStream indexOS, final boolean presorted,
+ final ReferenceSource referenceSource, final SAMFileHeader samFileHeader, final String fileName) {
+ if (outputStream == null) {
+ throw new IllegalArgumentException("CRAMWriter output stream can not be null.");
}
-
-
- {
- /**
- * The following passage is for paranoid mode only. When java is run with asserts on it will throw an {@link AssertionError} if
- * read bases or quality scores of a restored SAM record mismatch the original. This is effectively a runtime round trip test.
- */
- @SuppressWarnings("UnusedAssignment") boolean assertsEnabled = false;
- //noinspection AssertWithSideEffects,ConstantConditions
- assert assertsEnabled = true;
- //noinspection ConstantConditions
- if (assertsEnabled) {
- final Cram2SamRecordFactory f = new Cram2SamRecordFactory(samFileHeader);
- for (int i = 0; i < samRecords.size(); i++) {
- final SAMRecord restoredSamRecord = f.create(cramRecords.get(i));
- assert (restoredSamRecord.getAlignmentStart() == samRecords.get(i).getAlignmentStart());
- assert (restoredSamRecord.getReferenceName().equals(samRecords.get(i).getReferenceName()));
- assert (restoredSamRecord.getReadString().equals(samRecords.get(i).getReadString()));
- assert (restoredSamRecord.getBaseQualityString().equals(samRecords.get(i).getBaseQualityString()));
- }
- }
+ if (referenceSource == null) {
+ throw new IllegalArgumentException("A reference is required for CRAM writers");
}
-
- final Container container = containerFactory.buildContainer(cramRecords);
- for (final Slice slice : container.slices)
- slice.setRefMD5(refs);
- container.offset = offset;
- offset += ContainerIO.writeContainer(cramVersion, container, outputStream);
- if (indexer != null) {
- for (final Slice slice : container.slices) {
- indexer.processAlignment(slice);
- }
+ if (samFileHeader == null) {
+ throw new IllegalArgumentException("A valid SAMFileHeader is required for CRAM writers");
}
- samRecords.clear();
- }
-
- /**
- * Traverse the graph and mark all segments as detached.
- *
- * @param cramRecord the starting point of the graph
- */
- private static void detach(CramCompressionRecord cramRecord) {
- do {
- cramRecord.setDetached(true);
-
- cramRecord.setHasMateDownStream(false);
- cramRecord.recordsToNextFragment = -1;
- }
- while ((cramRecord = cramRecord.next) != null);
+ this.samFileHeader = samFileHeader;
+ this.fileName = fileName;
+ setSortOrder(samFileHeader.getSortOrder(), presorted);
+ cramContainerStream = new CRAMContainerStreamWriter(outputStream, indexOS, referenceSource, samFileHeader, fileName);
+ setHeader(samFileHeader);
}
/**
@@ -390,65 +110,18 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
*/
@Override
protected void writeAlignment(final SAMRecord alignment) {
- if (shouldFlushContainer(alignment)) {
- try {
- flushContainer();
- } catch (IOException e) {
- throw new RuntimeIOException(e);
- } catch (IllegalAccessException e) {
- throw new RuntimeException(e);
- }
- }
-
- updateReferenceContext(alignment.getReferenceIndex());
-
- samRecords.add(alignment);
- }
-
- /**
- * Check if the reference has changed and create a new record factory using the new reference.
- *
- * @param samRecordReferenceIndex index of the new reference sequence
- */
- private void updateReferenceContext(final int samRecordReferenceIndex) {
- if (refSeqIndex == REF_SEQ_INDEX_NOT_INITIALIZED) {
- refSeqIndex = samRecordReferenceIndex;
- } else
- if (refSeqIndex != samRecordReferenceIndex) refSeqIndex = samRecordReferenceIndex;
+ cramContainerStream.writeAlignment(alignment);
}
@Override
protected void writeHeader(final String textHeader) {
- // TODO: header must be written exactly once per writer life cycle.
- final SAMFileHeader header = new SAMTextHeaderCodec().decode(new StringLineReader(textHeader), (fileName != null ? fileName : null));
-
- containerFactory = new ContainerFactory(header, recordsPerSlice);
-
- final CramHeader cramHeader = new CramHeader(cramVersion, fileName, header);
- try {
- offset = CramIO.writeCramHeader(cramHeader, outputStream);
- } catch (final IOException e) {
- throw new RuntimeException(e);
- }
+ cramContainerStream.writeHeader(
+ new SAMTextHeaderCodec().decode(new StringLineReader(textHeader),fileName != null ? fileName : null));
}
@Override
protected void finish() {
- try {
- if (!samRecords.isEmpty()) {
- flushContainer();
- }
- CramIO.issueEOF(cramVersion, outputStream);
- outputStream.flush();
- if (indexer != null) {
- indexer.finish();
- }
- outputStream.close();
- } catch (final IOException e) {
- throw new RuntimeIOException(e);
- } catch (final IllegalAccessException e) {
- throw new RuntimeException(e);
- }
+ cramContainerStream.finish(true); // flush the last container and issue EOF
}
@Override
@@ -457,42 +130,38 @@ public class CRAMFileWriter extends SAMFileWriterImpl {
}
public boolean isPreserveReadNames() {
- return preserveReadNames;
+ return cramContainerStream.isPreserveReadNames();
}
public void setPreserveReadNames(final boolean preserveReadNames) {
- this.preserveReadNames = preserveReadNames;
+ cramContainerStream.setPreserveReadNames(preserveReadNames);
}
public List<PreservationPolicy> getPreservationPolicies() {
- if (preservation == null) {
- // set up greedy policy by default:
- preservation = new QualityScorePreservation("*8");
- }
- return preservation.getPreservationPolicies();
+ return cramContainerStream.getPreservationPolicies();
}
public boolean isCaptureAllTags() {
- return captureAllTags;
+ return cramContainerStream.isCaptureAllTags();
}
public void setCaptureAllTags(final boolean captureAllTags) {
- this.captureAllTags = captureAllTags;
+ cramContainerStream.setCaptureAllTags(captureAllTags);
}
public Set<String> getCaptureTags() {
- return captureTags;
+ return cramContainerStream.getCaptureTags();
}
public void setCaptureTags(final Set<String> captureTags) {
- this.captureTags = captureTags;
+ cramContainerStream.setCaptureTags(captureTags);
}
public Set<String> getIgnoreTags() {
- return ignoreTags;
+ return cramContainerStream.getIgnoreTags();
}
public void setIgnoreTags(final Set<String> ignoreTags) {
- this.ignoreTags = ignoreTags;
+ cramContainerStream.setIgnoreTags(ignoreTags);
}
}
diff --git a/src/java/htsjdk/samtools/CRAMIndexer.java b/src/java/htsjdk/samtools/CRAMIndexer.java
index eec8c31..2543983 100755
--- a/src/java/htsjdk/samtools/CRAMIndexer.java
+++ b/src/java/htsjdk/samtools/CRAMIndexer.java
@@ -38,7 +38,9 @@
*/
package htsjdk.samtools;
+import htsjdk.samtools.cram.build.ContainerParser;
import htsjdk.samtools.cram.build.CramIO;
+import htsjdk.samtools.cram.structure.AlignmentSpan;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.cram.structure.ContainerIO;
import htsjdk.samtools.cram.structure.CramHeader;
@@ -46,12 +48,16 @@ import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BlockCompressedFilePointerUtil;
import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.ProgressLogger;
+import htsjdk.samtools.util.RuntimeIOException;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Arrays;
import java.util.List;
+import java.util.Map;
+import java.util.TreeSet;
/**
* Class for both constructing BAM index content and writing it out.
@@ -101,20 +107,89 @@ public class CRAMIndexer {
}
/**
- * Record any index information for a given CRAM slice.
+ * Index a container, any of mapped, unmapped and multiple references are allowed. The only requirement is sort
+ * order by coordinate.
+ * For multiref containers the method reads the container through unpacking all reads. This is slower than single
+ * reference but should be faster than normal reading.
+ *
+ * @param container container to be indexed
+ */
+ public void processContainer(final Container container, final ValidationStringency validationStringency) {
+ try {
+ if (container == null || container.isEOF()) {
+ return;
+ }
+
+ int sliceIndex = 0;
+ for (final Slice slice : container.slices) {
+ slice.containerOffset = container.offset;
+ slice.index = sliceIndex++;
+ if (slice.isMultiref()) {
+ final ContainerParser parser = new ContainerParser(indexBuilder.bamHeader);
+ final Map<Integer, AlignmentSpan> refSet = parser.getReferences(container, validationStringency);
+ final Slice fakeSlice = new Slice();
+ slice.containerOffset = container.offset;
+ slice.index = sliceIndex++;
+ /**
+ * Unmapped span must be processed after mapped spans:
+ */
+ AlignmentSpan unmappedSpan = refSet.remove(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ for (final int refId : new TreeSet<>(refSet.keySet())) {
+ final AlignmentSpan span = refSet.get(refId);
+ fakeSlice.sequenceId = refId;
+ fakeSlice.containerOffset = slice.containerOffset;
+ fakeSlice.offset = slice.offset;
+ fakeSlice.index = slice.index;
+
+ fakeSlice.alignmentStart = span.getStart();
+ fakeSlice.alignmentSpan = span.getSpan();
+ fakeSlice.nofRecords = span.getCount();
+ processSingleReferenceSlice(fakeSlice);
+ }
+ if (unmappedSpan != null) {
+ final AlignmentSpan span = unmappedSpan;
+ fakeSlice.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
+ fakeSlice.containerOffset = slice.containerOffset;
+ fakeSlice.offset = slice.offset;
+ fakeSlice.index = slice.index;
+
+ fakeSlice.alignmentStart = SAMRecord.NO_ALIGNMENT_START;
+ fakeSlice.alignmentSpan = 0;
+ fakeSlice.nofRecords = span.getCount();
+ processSingleReferenceSlice(fakeSlice);
+ }
+ } else {
+ processSingleReferenceSlice(slice);
+ }
+ }
+
+ } catch (final IOException e) {
+ throw new RuntimeIOException("Failed to read cram container", e);
+ }
+ }
+
+ /**
+ * Record index information for a given CRAM slice that contains either unmapped reads or
+ * reads mapped to a single reference.
* If this alignment starts a new reference, write out the old reference.
- * Requires a non-null value for rec.getFileSource().
*
- * @param slice The CRAM slice
+ * @param slice The CRAM slice, single ref or unmapped only.
+ * @throws htsjdk.samtools.SAMException if slice refers to multiple reference sequences.
*/
- public void processAlignment(final Slice slice) {
+ public void processSingleReferenceSlice(final Slice slice) {
try {
final int reference = slice.sequenceId;
- if (reference != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && reference != currentReference) {
+ if (reference == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+ return;
+ }
+ if (slice.sequenceId == Slice.MULTI_REFERENCE) {
+ throw new SAMException("Expecting a single reference slice.");
+ }
+ if (reference != currentReference) {
// process any completed references
advanceToReference(reference);
}
- indexBuilder.processAlignment(slice);
+ indexBuilder.processSingleReferenceSlice(slice);
} catch (final Exception e) {
throw new SAMException("Exception creating BAM index for slice " + slice, e);
}
@@ -185,11 +260,11 @@ public class CRAMIndexer {
/**
- * Record any index information for a given BAM record
+ * Record any index information for a given CRAM slice
*
- * @param slice The BAM record. Requires rec.getFileSource() is non-null.
+ * @param slice CRAM slice, single ref or unmapped only.
*/
- public void processAlignment(final Slice slice) {
+ private void processSingleReferenceSlice(final Slice slice) {
// metadata
indexStats.recordMetaData(slice);
@@ -293,7 +368,9 @@ public class CRAMIndexer {
}
// process bins
- if (binsSeen == 0) return null; // no bins for this reference
+ if (binsSeen == 0) {
+ return null; // no bins for this reference
+ }
// process chunks
// nothing needed
@@ -347,30 +424,43 @@ public class CRAMIndexer {
* @param output File for output index file
* @param log optional {@link htsjdk.samtools.util.Log} to output progress
*/
- public static void createIndex(final SeekableStream stream, final File output, final Log log) throws IOException {
+ public static void createIndex(final SeekableStream stream, final File output, final Log log, final ValidationStringency validationStringency) throws IOException {
final CramHeader cramHeader = CramIO.readCramHeader(stream);
+ if (cramHeader.getSamFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
+ throw new SAMException("Expecting a coordinate sorted file.");
+ }
final CRAMIndexer indexer = new CRAMIndexer(output, cramHeader.getSamFileHeader());
int totalRecords = 0;
Container container = null;
+ ProgressLogger progressLogger = new ProgressLogger(log, 1, "indexed", "slices");
do {
- if (++totalRecords % 10 == 0)
- if (null != log) log.info(totalRecords + " slices processed ...");
-
try {
final long offset = stream.position();
container = ContainerIO.readContainer(cramHeader.getVersion(), stream);
- if (container == null || container.isEOF())
+ if (container == null || container.isEOF()) {
break;
+ }
container.offset = offset;
- int i = 0;
- for (final Slice slice : container.slices) {
- slice.containerOffset = offset;
- slice.index = i++;
- indexer.processAlignment(slice);
+ indexer.processContainer(container, validationStringency);
+
+ if (null != log) {
+ String sequenceName;
+ switch (container.sequenceId) {
+ case SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX:
+ sequenceName = "?";
+ break;
+ case Slice.MULTI_REFERENCE:
+ sequenceName = "???";
+ break;
+ default:
+ sequenceName = cramHeader.getSamFileHeader().getSequence(container.sequenceId).getSequenceName();
+ break;
+ }
+ progressLogger.record(sequenceName, container.alignmentStart);
}
} catch (final IOException e) {
diff --git a/src/java/htsjdk/samtools/CRAMIterator.java b/src/java/htsjdk/samtools/CRAMIterator.java
index 6e08f05..59d08cd 100644
--- a/src/java/htsjdk/samtools/CRAMIterator.java
+++ b/src/java/htsjdk/samtools/CRAMIterator.java
@@ -30,7 +30,6 @@ import htsjdk.samtools.cram.structure.CramHeader;
import htsjdk.samtools.cram.structure.Slice;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.Log;
-import htsjdk.samtools.util.RuntimeEOFException;
import htsjdk.samtools.util.SequenceUtil;
import java.io.IOException;
@@ -49,10 +48,6 @@ public class CRAMIterator implements SAMRecordIterator {
private CramHeader cramHeader;
private ArrayList<SAMRecord> records;
private SAMRecord nextRecord = null;
- @SuppressWarnings({"CanBeFinal", "FieldCanBeLocal"})
- private boolean restoreNMTag = true;
- @SuppressWarnings({"CanBeFinal", "FieldCanBeLocal"})
- private boolean restoreMDTag = false;
private CramNormalizer normalizer;
private byte[] refs;
private int prevSeqId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
@@ -128,7 +123,7 @@ public class CRAMIterator implements SAMRecordIterator {
return cramHeader;
}
- private void nextContainer() throws IOException, IllegalArgumentException,
+ void nextContainer() throws IOException, IllegalArgumentException,
IllegalAccessException, CRAMException {
if (containerIterator != null) {
@@ -165,9 +160,10 @@ public class CRAMIterator implements SAMRecordIterator {
if (container.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
refs = new byte[]{};
- } else if (container.sequenceId == -2) {
+ prevSeqId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
+ } else if (container.sequenceId == Slice.MULTI_REFERENCE) {
refs = null;
- prevSeqId = -2;
+ prevSeqId = Slice.MULTI_REFERENCE;
} else if (prevSeqId < 0 || prevSeqId != container.sequenceId) {
final SAMSequenceRecord sequence = cramHeader.getSamFileHeader()
.getSequence(container.sequenceId);
@@ -201,8 +197,6 @@ public class CRAMIterator implements SAMRecordIterator {
final SAMSequenceRecord sequence = cramHeader.getSamFileHeader()
.getSequence(cramRecord.sequenceId);
refs = referenceSource.getReferenceBases(sequence, true);
- if (samRecord.getReadBases() != SAMRecord.NULL_SEQUENCE)
- SequenceUtil.calculateMdAndNmTags(samRecord, refs, restoreMDTag, restoreNMTag);
}
samRecord.setValidationStringency(validationStringency);
@@ -233,8 +227,8 @@ public class CRAMIterator implements SAMRecordIterator {
* @param refIndex reference sequence index
* @param pos alignment start to skip to
*/
- public void jumpWithinContainerToPos(final int refIndex, final int pos) {
- if (!hasNext()) return;
+ public boolean jumpWithinContainerToPos(final int refIndex, final int pos) {
+ if (!hasNext()) return false;
int i = 0;
for (final SAMRecord record : records) {
if (refIndex != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX && record.getReferenceIndex() != refIndex) continue;
@@ -242,17 +236,18 @@ public class CRAMIterator implements SAMRecordIterator {
if (pos <= 0) {
if (record.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
iterator = records.listIterator(i);
- return;
+ return true;
}
} else {
if (record.getAlignmentStart() >= pos) {
iterator = records.listIterator(i);
- return;
+ return true;
}
}
i++;
}
iterator = Collections.<SAMRecord>emptyList().iterator();
+ return false;
}
@Override
@@ -261,13 +256,10 @@ public class CRAMIterator implements SAMRecordIterator {
if (!iterator.hasNext()) {
try {
nextContainer();
- } catch (CRAMException ce) {
- throw ce;
- } catch (SAMFormatException se) {
- throw se;
- }
- catch (final Exception e) {
- throw new RuntimeEOFException(e);
+ } catch (IOException e) {
+ throw new SAMException(e);
+ } catch (IllegalAccessException e) {
+ throw new SAMException(e);
}
}
diff --git a/src/java/htsjdk/samtools/Cigar.java b/src/java/htsjdk/samtools/Cigar.java
index eb747ac..12ffd0c 100644
--- a/src/java/htsjdk/samtools/Cigar.java
+++ b/src/java/htsjdk/samtools/Cigar.java
@@ -26,6 +26,7 @@ package htsjdk.samtools;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
+import java.util.Iterator;
import java.util.List;
/**
@@ -37,7 +38,7 @@ import java.util.List;
*
* c.f. http://samtools.sourceforge.net/SAM1.pdf for complete CIGAR specification.
*/
-public class Cigar implements Serializable {
+public class Cigar implements Serializable, Iterable<CigarElement> {
public static final long serialVersionUID = 1L;
private final List<CigarElement> cigarElements = new ArrayList<CigarElement>();
@@ -82,6 +83,8 @@ public class Cigar implements Serializable {
case EQ:
case X:
length += element.getLength();
+ break;
+ default: break;
}
}
return length;
@@ -101,6 +104,8 @@ public class Cigar implements Serializable {
case X:
case P:
length += element.getLength();
+ break;
+ default: break;
}
}
return length;
@@ -236,15 +241,15 @@ public class Cigar implements Serializable {
}
private static boolean isInDelOperator(final CigarOperator op) {
- return op == CigarOperator.I || op == CigarOperator.D;
+ return op !=null && op.isIndel();
}
private static boolean isClippingOperator(final CigarOperator op) {
- return op == CigarOperator.S || op == CigarOperator.H;
+ return op !=null && op.isClipping();
}
private static boolean isPaddingOperator(final CigarOperator op) {
- return op == CigarOperator.P;
+ return op !=null && op.isPadding();
}
@Override
@@ -254,15 +259,79 @@ public class Cigar implements Serializable {
final Cigar cigar = (Cigar) o;
- if (cigarElements != null ? !cigarElements.equals(cigar.cigarElements) : cigar.cigarElements != null)
- return false;
+ return cigarElements.equals(cigar.cigarElements);
+ }
+
+ /** build a new Cigar object from a list of cigar operators.
+ * This can be used if you have the operators associated to
+ * each base in the read.
+ *
+ * e.g: read length =10 with cigar= <code>[M,M,M,M,M,M,M,M,M,M]</code>, here
+ * fromCigarOperators would generate the cigar '10M'
+ *
+ * later the user resolved the 'M' to '=' or 'X', the array is now
+ *
+ * <code>[=,=,=,=,=,X,X,=,=,=]</code>
+ *
+ * fromCigarOperators would generate the cigar '5M2X3M'
+ *
+ * */
+ public static Cigar fromCigarOperators(final List<CigarOperator> cigarOperators) {
+ if (cigarOperators == null) throw new IllegalArgumentException("cigarOperators is null");
+ final List<CigarElement> cigarElementList = new ArrayList<>();
+ int i = 0;
+ // find adjacent operators and build list of cigar elements
+ while (i < cigarOperators.size() ) {
+ final CigarOperator currentOp = cigarOperators.get(i);
+ int j = i + 1;
+ while (j < cigarOperators.size() && cigarOperators.get(j).equals(currentOp)) {
+ j++;
+ }
+ cigarElementList.add(new CigarElement(j - i, currentOp));
+ i = j;
+ }
+ return new Cigar(cigarElementList);
+ }
+
+ /** shortcut to <code>getCigarElements().iterator()</code> */
+ @Override
+ public Iterator<CigarElement> iterator() {
+ return this.getCigarElements().iterator();
+ }
+
+ /** returns true if the cigar string contains the given operator */
+ public boolean containsOperator(final CigarOperator operator) {
+ return this.cigarElements.stream().anyMatch( element -> element.getOperator() == operator);
+ }
+
+ /** returns the first cigar element */
+ public CigarElement getFirstCigarElement() {
+ return isEmpty() ? null : this.cigarElements.get(0);
+ }
+
+ /** returns the last cigar element */
+ public CigarElement getLastCigarElement() {
+ return isEmpty() ? null : this.cigarElements.get(this.numCigarElements() - 1 );
+ }
+
+ /** returns true if the cigar string starts With a clipping operator */
+ public boolean isLeftClipped() {
+ return !isEmpty() && isClippingOperator(getFirstCigarElement().getOperator());
+ }
- return true;
+ /** returns true if the cigar string ends With a clipping operator */
+ public boolean isRightClipped() {
+ return !isEmpty() && isClippingOperator(getLastCigarElement().getOperator());
}
+ /** returns true if the cigar is clipped */
+ public boolean isClipped() {
+ return isLeftClipped() || isRightClipped();
+ }
+
@Override
public int hashCode() {
- return cigarElements != null ? cigarElements.hashCode() : 0;
+ return cigarElements.hashCode();
}
public String toString() {
diff --git a/src/java/htsjdk/samtools/CigarElement.java b/src/java/htsjdk/samtools/CigarElement.java
index bd8226b..c645e6c 100644
--- a/src/java/htsjdk/samtools/CigarElement.java
+++ b/src/java/htsjdk/samtools/CigarElement.java
@@ -67,4 +67,9 @@ public class CigarElement implements Serializable {
result = 31 * result + (operator != null ? operator.hashCode() : 0);
return result;
}
+
+ @Override
+ public String toString() {
+ return String.valueOf(this.length)+this.operator;
+ }
}
diff --git a/src/java/htsjdk/samtools/CigarOperator.java b/src/java/htsjdk/samtools/CigarOperator.java
index 0eab7a0..46ea539 100644
--- a/src/java/htsjdk/samtools/CigarOperator.java
+++ b/src/java/htsjdk/samtools/CigarOperator.java
@@ -179,6 +179,31 @@ public enum CigarOperator {
return e.character;
}
+ /** Returns true if the operator is a clipped (hard or soft) operator */
+ public boolean isClipping() {
+ return this == S || this == H;
+ }
+
+ /** Returns true if the operator is a Insertion or Deletion operator */
+ public boolean isIndel() {
+ return this == I || this == D;
+ }
+
+ /** Returns true if the operator is a Skipped Region Insertion or Deletion operator */
+ public boolean isIndelOrSkippedRegion() {
+ return this == N || isIndel();
+ }
+
+ /** Returns true if the operator is a M, a X or a EQ */
+ public boolean isAlignment() {
+ return this == M || this == X || this == EQ;
+ }
+
+ /** Returns true if the operator is a Padding operator */
+ public boolean isPadding() {
+ return this == P;
+ }
+
/** Returns the cigar operator as it would be seen in a SAM file. */
@Override public String toString() {
return this.string;
diff --git a/src/java/htsjdk/samtools/Defaults.java b/src/java/htsjdk/samtools/Defaults.java
index a5b020f..3ffd5fa 100644
--- a/src/java/htsjdk/samtools/Defaults.java
+++ b/src/java/htsjdk/samtools/Defaults.java
@@ -92,13 +92,13 @@ public class Defaults {
/** Gets a boolean system property, prefixed with "samjdk." using the default if the property does not exist. */
private static boolean getBooleanProperty(final String name, final boolean def) {
- final String value = getStringProperty(name, new Boolean(def).toString());
+ final String value = getStringProperty(name, Boolean.toString(def));
return Boolean.parseBoolean(value);
}
/** Gets an int system property, prefixed with "samjdk." using the default if the property does not exist. */
private static int getIntProperty(final String name, final int def) {
- final String value = getStringProperty(name, new Integer(def).toString());
+ final String value = getStringProperty(name, Integer.toString(def));
return Integer.parseInt(value);
}
diff --git a/src/java/htsjdk/samtools/DiskBasedBAMFileIndex.java b/src/java/htsjdk/samtools/DiskBasedBAMFileIndex.java
index 9902177..b5d6f59 100644
--- a/src/java/htsjdk/samtools/DiskBasedBAMFileIndex.java
+++ b/src/java/htsjdk/samtools/DiskBasedBAMFileIndex.java
@@ -32,17 +32,17 @@ import java.util.List;
/**
* A class for reading BAM file indices, hitting the disk once per query.
*/
-class DiskBasedBAMFileIndex extends AbstractBAMFileIndex
+public class DiskBasedBAMFileIndex extends AbstractBAMFileIndex
{
- DiskBasedBAMFileIndex(final File file, final SAMSequenceDictionary dictionary) {
+ public DiskBasedBAMFileIndex(final File file, final SAMSequenceDictionary dictionary) {
super(file, dictionary);
}
- DiskBasedBAMFileIndex(final SeekableStream stream, final SAMSequenceDictionary dictionary) {
+ public DiskBasedBAMFileIndex(final SeekableStream stream, final SAMSequenceDictionary dictionary) {
super(stream, dictionary);
}
- DiskBasedBAMFileIndex(final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) {
+ public DiskBasedBAMFileIndex(final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) {
super(file, dictionary, useMemoryMapping);
}
diff --git a/src/java/htsjdk/samtools/DuplicateSetIterator.java b/src/java/htsjdk/samtools/DuplicateSetIterator.java
index 6150538..ffd9523 100644
--- a/src/java/htsjdk/samtools/DuplicateSetIterator.java
+++ b/src/java/htsjdk/samtools/DuplicateSetIterator.java
@@ -51,12 +51,21 @@ public class DuplicateSetIterator implements CloseableIterator<DuplicateSet> {
this(iterator, header, false);
}
+ public DuplicateSetIterator(final CloseableIterator<SAMRecord> iterator,
+ final SAMFileHeader header,
+ final boolean preSorted) {
+ this(iterator, header, preSorted, new SAMRecordDuplicateComparator(Collections.singletonList(header)));
+ }
+
/**
* Allows the user of this iterator to skip the sorting of the input if the input is already sorted. If the records are said to be
* sorted but not actually sorted in the correct order, an exception during iteration will be thrown.
*/
- public DuplicateSetIterator(final CloseableIterator<SAMRecord> iterator, final SAMFileHeader header, final boolean preSorted) {
- this.comparator = new SAMRecordDuplicateComparator(Collections.singletonList(header));
+ public DuplicateSetIterator(final CloseableIterator<SAMRecord> iterator,
+ final SAMFileHeader header,
+ final boolean preSorted,
+ final SAMRecordDuplicateComparator comparator) {
+ this.comparator = comparator;
if (preSorted) {
this.wrappedIterator = iterator;
@@ -85,6 +94,9 @@ public class DuplicateSetIterator implements CloseableIterator<DuplicateSet> {
}
+ @Deprecated
+ /** Do not use this method as the first duplicate set will not be compared with this scoring strategy.
+ * Instead, provide a comparator to the constructor that has the scoring strategy set. */
public void setScoringStrategy(final DuplicateScoringStrategy.ScoringStrategy scoringStrategy) {
this.comparator.setScoringStrategy(scoringStrategy);
}
diff --git a/src/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java b/src/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java
index f8561b6..2dd3b6c 100644
--- a/src/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java
+++ b/src/java/htsjdk/samtools/HighAccuracyDownsamplingIterator.java
@@ -163,7 +163,7 @@ class HighAccuracyDownsamplingIterator extends DownsamplingIterator {
this.bufferedRecords = recs.iterator();
this.totalTemplates += templatesRead;
this.keptTemplates += names.size();
- return recs.size() > 0;
+ return !recs.isEmpty();
}
/**
diff --git a/src/java/htsjdk/samtools/MergingSamRecordIterator.java b/src/java/htsjdk/samtools/MergingSamRecordIterator.java
index 63d0d26..6b790fe 100644
--- a/src/java/htsjdk/samtools/MergingSamRecordIterator.java
+++ b/src/java/htsjdk/samtools/MergingSamRecordIterator.java
@@ -25,6 +25,7 @@ package htsjdk.samtools;
import htsjdk.samtools.util.CloseableIterator;
+import java.io.Serializable;
import java.util.Collection;
import java.util.Map;
import java.util.PriorityQueue;
@@ -201,7 +202,8 @@ public class MergingSamRecordIterator implements CloseableIterator<SAMRecord> {
* sequence dictionary. I hate the fact that this extends SAMRecordCoordinateComparator, but it avoids
* more copy & paste.
*/
- private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator {
+ private class MergedSequenceDictionaryCoordinateOrderComparator extends SAMRecordCoordinateComparator implements Serializable {
+ private static final long serialVersionUID = 1L;
public int fileOrderCompare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
final int referenceIndex1 = getReferenceIndex(samRecord1);
diff --git a/src/java/htsjdk/samtools/SAMFileWriterFactory.java b/src/java/htsjdk/samtools/SAMFileWriterFactory.java
index 0566df1..7eb0823 100644
--- a/src/java/htsjdk/samtools/SAMFileWriterFactory.java
+++ b/src/java/htsjdk/samtools/SAMFileWriterFactory.java
@@ -38,7 +38,7 @@ import java.io.OutputStream;
/**
* Create a writer for writing SAM, BAM, or CRAM files.
*/
-public class SAMFileWriterFactory {
+public class SAMFileWriterFactory implements Cloneable {
private final static Log log = Log.getInstance(SAMFileWriterFactory.class);
private static boolean defaultCreateIndexWhileWriting = Defaults.CREATE_INDEX;
private boolean createIndex = defaultCreateIndexWhileWriting;
@@ -48,10 +48,32 @@ public class SAMFileWriterFactory {
private int asyncOutputBufferSize = AsyncSAMFileWriter.DEFAULT_QUEUE_SIZE;
private int bufferSize = Defaults.BUFFER_SIZE;
private File tmpDir;
+ /** compression level 0: min 9:max */
+ private int compressionLevel = BlockCompressedOutputStream.getDefaultCompressionLevel();
+ private Integer maxRecordsInRam = null;
-
- private Integer maxRecordsInRam;
-
+ /** simple constructor */
+ public SAMFileWriterFactory() {
+ }
+
+ /** copy constructor */
+ public SAMFileWriterFactory( final SAMFileWriterFactory other) {
+ if( other == null ) throw new IllegalArgumentException("SAMFileWriterFactory(null)");
+ this.createIndex = other.createIndex;
+ this.createMd5File = other.createMd5File;
+ this.useAsyncIo = other.useAsyncIo;
+ this.asyncOutputBufferSize = other.asyncOutputBufferSize;
+ this.bufferSize = other.bufferSize;
+ this.tmpDir = other.tmpDir;
+ this.compressionLevel = other.compressionLevel;
+ this.maxRecordsInRam = other.maxRecordsInRam;
+ }
+
+ @Override
+ public SAMFileWriterFactory clone() {
+ return new SAMFileWriterFactory(this);
+ }
+
/**
* Sets the default for whether to create md5Files for BAM files this factory.
*/
@@ -67,6 +89,15 @@ public class SAMFileWriterFactory {
return this;
}
+ /** set compression level 0!none 9: max */
+ public void setCompressionLevel(final int compressionLevel) {
+ this.compressionLevel = Math.min(9, Math.max(0, compressionLevel));
+ }
+
+ public int getCompressionLevel() {
+ return compressionLevel;
+ }
+
/**
* Sets the default for subsequent SAMFileWriterFactories
* that do not specify whether to create an index.
@@ -154,7 +185,7 @@ public class SAMFileWriterFactory {
* @param outputFile where to write the output.
*/
public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean presorted, final File outputFile) {
- return makeBAMWriter(header, presorted, outputFile, BlockCompressedOutputStream.getDefaultCompressionLevel());
+ return makeBAMWriter(header, presorted, outputFile, this.getCompressionLevel());
}
/**
@@ -250,7 +281,7 @@ public class SAMFileWriterFactory {
*/
public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean presorted, final OutputStream stream) {
- return initWriter(header, presorted, true, new BAMFileWriter(stream, null));
+ return initWriter(header, presorted, true, new BAMFileWriter(stream, null, this.getCompressionLevel()));
}
/**
@@ -404,7 +435,7 @@ public class SAMFileWriterFactory {
throw new RuntimeIOException("Error creating CRAM file: " + outputFile.getAbsolutePath());
}
- CRAMFileWriter writer = new CRAMFileWriter(
+ final CRAMFileWriter writer = new CRAMFileWriter(
createMd5File ? new Md5CalculatingOutputStream(cramOS, new File(outputFile.getAbsolutePath() + ".md5")) : cramOS,
indexOS,
presorted,
@@ -417,9 +448,17 @@ public class SAMFileWriterFactory {
}
// Set the default CRAM writer preservation parameters
- private void setCRAMWriterDefaults(CRAMFileWriter writer) {
+ private void setCRAMWriterDefaults(final CRAMFileWriter writer) {
writer.setPreserveReadNames(true);
writer.setCaptureAllTags(true);
}
+ @Override
+ public String toString() {
+ return "SAMFileWriterFactory [createIndex=" + createIndex + ", createMd5File=" + createMd5File + ", useAsyncIo="
+ + useAsyncIo + ", asyncOutputBufferSize=" + asyncOutputBufferSize + ", bufferSize=" + bufferSize
+ + ", tmpDir=" + tmpDir + ", compressionLevel=" + compressionLevel + ", maxRecordsInRam="
+ + maxRecordsInRam + "]";
+ }
+
}
diff --git a/src/java/htsjdk/samtools/SAMFileWriterImpl.java b/src/java/htsjdk/samtools/SAMFileWriterImpl.java
index 219f64c..130ecea 100644
--- a/src/java/htsjdk/samtools/SAMFileWriterImpl.java
+++ b/src/java/htsjdk/samtools/SAMFileWriterImpl.java
@@ -174,15 +174,15 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter
/**
* Add an alignment record to be emitted by the writer.
*
- * @param alignment Must not be null. If the alignment record's SAMFileHeader is null, the record will be
- * updated to the header used by this writer, which will in turn cause any unresolved reference and
- * mate reference indices to be resolved against the new header's sequence dictionary.
+ * @param alignment Must not be null. The record will be updated to use the header used by this writer, which will
+ * in turn cause any unresolved reference and mate reference indices to be resolved against the
+ * header's sequence dictionary.
+ * @throws IllegalArgumentException if the record's reference or mate reference indices cannot be
+ * resolved against the writer's header using the current reference and mate reference names
*/
public void addAlignment(final SAMRecord alignment)
{
- if (null == alignment.getHeader()) {
- alignment.setHeader(header); // re-establish the record header and attempt to resolve reference index values
- }
+ alignment.setHeaderStrict(header); // re-establish the record header and resolve reference indices
if (sortOrder.equals(SAMFileHeader.SortOrder.unsorted)) {
writeAlignment(alignment);
} else if (presorted) {
diff --git a/src/java/htsjdk/samtools/SAMHeaderRecordComparator.java b/src/java/htsjdk/samtools/SAMHeaderRecordComparator.java
index 10eccc8..f48df4d 100644
--- a/src/java/htsjdk/samtools/SAMHeaderRecordComparator.java
+++ b/src/java/htsjdk/samtools/SAMHeaderRecordComparator.java
@@ -24,6 +24,7 @@ package htsjdk.samtools;
* THE SOFTWARE.
*/
+import java.io.Serializable;
import java.util.Comparator;
/**
@@ -31,7 +32,8 @@ import java.util.Comparator;
* in the comparison to the constructor. Null attribute values (i.e., those attributes not present in the
* record) sort behind those that have values.
*/
-public class SAMHeaderRecordComparator<T extends AbstractSAMHeaderRecord> implements Comparator<T> {
+public class SAMHeaderRecordComparator<T extends AbstractSAMHeaderRecord> implements Comparator<T>, Serializable {
+ private static final long serialVersionUID = 1L;
private final String[] attributes;
diff --git a/src/java/htsjdk/samtools/SAMLineParser.java b/src/java/htsjdk/samtools/SAMLineParser.java
index 4c49659..ee84e1d 100644
--- a/src/java/htsjdk/samtools/SAMLineParser.java
+++ b/src/java/htsjdk/samtools/SAMLineParser.java
@@ -179,7 +179,7 @@ public class SAMLineParser {
reportErrorParsingLine("= is not a valid value for "
+ fieldName + " field.");
}
- if (this.mFileHeader.getSequenceDictionary().size() != 0) {
+ if (!this.mFileHeader.getSequenceDictionary().isEmpty()) {
if (this.mFileHeader.getSequence(rname) == null) {
reportErrorParsingLine(fieldName
+ " '" + rname + "' not found in any SQ record");
@@ -220,7 +220,7 @@ public class SAMLineParser {
reportErrorParsingLine("Too many fields in SAM text record.");
}
for (int i = 0; i < numFields; ++i) {
- if (mFields[i].length() == 0) {
+ if (mFields[i].isEmpty()) {
reportErrorParsingLine("Empty field at position " + i + " (zero-based)");
}
}
diff --git a/src/java/htsjdk/samtools/SAMRecord.java b/src/java/htsjdk/samtools/SAMRecord.java
index cfa922f..104e0fd 100644
--- a/src/java/htsjdk/samtools/SAMRecord.java
+++ b/src/java/htsjdk/samtools/SAMRecord.java
@@ -32,6 +32,7 @@ import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -99,9 +100,10 @@ import java.util.Set;
* A record with null a header may be validated by the isValid method, but the reference and mate reference indices,
* read group, sequence dictionary, and alignment start will not be fully validated unless a header is present.
* <p>
- * Also, SAMTextWriter, BAMFileWriter, and CRAMFileWriter all require records to have a valid header in order to be
- * written. Any record that does not have a header at the time it is added to the writer will be updated to use the
- * header associated with the writer.
+ * Also, SAMTextWriter, BAMFileWriter, and CRAMFileWriter all require the reference and mate reference names to be valid
+ * in order to be written. At the time a record is added to a writer it will be updated to use the header associated
+ * with the writer and the reference and mate reference names must be valid for that header. If the names cannot be
+ * resolved using the writer's header, an exception will be thrown.
* <p>
* @author alecw at broadinstitute.org
* @author mishali.naik at intel.com
@@ -295,7 +297,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
*/
public byte[] getOriginalBaseQualities() {
final String oqString = (String) getAttribute("OQ");
- if (oqString != null && oqString.length() > 0) {
+ if (oqString != null && !oqString.isEmpty()) {
return SAMUtils.fastqToPhred(oqString);
}
else {
@@ -342,27 +344,25 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* the reference index to NO_ALIGNMENT_REFERENCE_INDEX.
*
* @param referenceName - must not be null
+ * @throws IllegalArgumentException if {@code referenceName} is null
*/
public void setReferenceName(final String referenceName) {
if (null == referenceName) {
throw new IllegalArgumentException(
"Reference name must not be null. Use SAMRecord.NO_ALIGNMENT_REFERENCE_NAME to reset the reference name.");
}
+ if (null != mHeader) {
+ mReferenceIndex = resolveIndexFromName(referenceName, mHeader, false);
+ // String.intern() is surprisingly expensive, so avoid it by calling resolveNameFromIndex
+ // and using the interned value in the sequence dictionary if possible
+ mReferenceName = null == mReferenceIndex ?
+ referenceName.intern() :
+ resolveNameFromIndex(mReferenceIndex, mHeader);
+ }
else if (NO_ALIGNMENT_REFERENCE_NAME.equals(referenceName)) {
mReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
mReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
}
- else if (null != mHeader) {
- // String.intern() is surprisingly expensive, so avoid it by looking up in sequence dictionary if possible
- final int referenceIndex = mHeader.getSequenceIndex(referenceName);
- if (-1 != referenceIndex) {
- setReferenceIndex(referenceIndex); // sets reference name and index
- }
- else {
- mReferenceName = referenceName.intern();
- mReferenceIndex = null;
- }
- }
else {
mReferenceName = referenceName.intern();
mReferenceIndex = null;
@@ -382,21 +382,15 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* @return Index in the sequence dictionary of the reference sequence. If the read has no reference sequence, or if
* the reference name is not found in the sequence index, NO_ALIGNMENT_REFERENCE_INDEX (-1) is returned.
*
- * @throws IllegalStateException if the reference index cannot be resolved because the SAMFileHeader for the
- * record is null.
+ * @throws IllegalStateException if the reference index must be resolved but cannot be because the SAMFileHeader
+ * for the record is null.
*/
public Integer getReferenceIndex() {
- if (null == mReferenceIndex) {
- // try to resolve the reference index
- if (NO_ALIGNMENT_REFERENCE_NAME.equals(mReferenceName)) {
+ if (null == mReferenceIndex) { // try to resolve the reference index
+ mReferenceIndex = resolveIndexFromName(mReferenceName, mHeader, false);
+ if (null == mReferenceIndex) {
mReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
}
- else if (null != mHeader) {
- mReferenceIndex = mHeader.getSequenceIndex(mReferenceName);
- }
- else {
- throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the reference index");
- }
}
return mReferenceIndex;
}
@@ -410,27 +404,16 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* @param referenceIndex Must either equal NO_ALIGNMENT_REFERENCE_INDEX (-1) indicating no reference, or the
* record must have a SAMFileHeader and the index must exist in the associated sequence
* dictionary.
- * @throws IllegalStateException if the SAMFileHeader is null for this record or the reference index is not
- * found in the sequence dictionary for this record.
+ * @throws IllegalStateException if {@code referenceIndex} is not equal to NO_ALIGNMENT_REFERENCE_INDEX and the
+ * SAMFileHeader is null for this record
+ * @throws IllegalArgumentException if {@code referenceIndex} is not found in the sequence dictionary in the header
+ * for this record.
*/
public void setReferenceIndex(final int referenceIndex) {
- if (referenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
- mReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
- mReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
- }
- else if (null == mHeader) {
- throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the reference index");
- }
- else {
- SAMSequenceRecord samSequence = mHeader.getSequence(referenceIndex);
- if (null != samSequence) {
- mReferenceIndex = referenceIndex;
- mReferenceName = samSequence.getSequenceName();
- }
- else {
- throw new IllegalArgumentException("Reference index " + referenceIndex + " not found in sequence dictionary.");
- }
- }
+ // resolveNameFromIndex throws if the index can't be resolved
+ setReferenceName(resolveNameFromIndex(referenceIndex, mHeader));
+ // setReferenceName does this as a side effect, but set the value here to be explicit
+ mReferenceIndex = referenceIndex;
}
/**
@@ -447,25 +430,24 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* mate reference index to NO_ALIGNMENT_REFERENCE_INDEX.
*
* @param mateReferenceName - must not be null
+ * @throws IllegalArgumentException if {@code mateReferenceName} is null
*/
public void setMateReferenceName(final String mateReferenceName) {
if (null == mateReferenceName) {
- throw new IllegalArgumentException("Mate reference name must not be null");
+ throw new IllegalArgumentException("Mate reference name must not be null. Use SAMRecord.NO_ALIGNMENT_REFERENCE_NAME to reset the mate reference name.");
+ }
+ if (null != mHeader) {
+ mMateReferenceIndex = resolveIndexFromName(mateReferenceName, mHeader, false);
+ // String.intern() is surprisingly expensive, so avoid it by calling resolveNameFromIndex
+ // and using the interned value in the sequence dictionary if possible
+ mMateReferenceName = null == mMateReferenceIndex ?
+ mateReferenceName.intern() :
+ resolveNameFromIndex(mMateReferenceIndex, mHeader);
}
else if (NO_ALIGNMENT_REFERENCE_NAME.equals(mateReferenceName)) {
mMateReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
mMateReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
}
- else if (null != mHeader) {
- final int mateReferenceIndex = mHeader.getSequenceIndex(mateReferenceName);
- if (-1 != mateReferenceIndex) {
- setMateReferenceIndex(mateReferenceIndex); // sets mate reference name and index
- }
- else {
- mMateReferenceName = mateReferenceName.intern();
- mMateReferenceIndex = null;
- }
- }
else {
mMateReferenceName = mateReferenceName.intern();
mMateReferenceIndex = null;
@@ -486,21 +468,15 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* sequence, or if the mate reference name is not found in the sequence index, NO_ALIGNMENT_REFERENCE_INDEX (-1)
* is returned.
*
- * @throws IllegalStateException if the mate reference index cannot be resolved because the SAMFileHeader for the
- * record is null.
+ * @throws IllegalStateException if the mate reference index must be resolved but cannot be because the
+ * SAMFileHeader for the record is null.
*/
public Integer getMateReferenceIndex() {
- if (null == mMateReferenceIndex) {
- // try to resolve the reference index
- if (NO_ALIGNMENT_REFERENCE_NAME.equals(mMateReferenceName)) {
+ if (null == mMateReferenceIndex) { // try to resolve the mate reference index
+ mMateReferenceIndex = resolveIndexFromName(mMateReferenceName, mHeader, false);
+ if (null == mMateReferenceIndex) {
mMateReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
}
- else if (null != mHeader) {
- mMateReferenceIndex = mHeader.getSequenceIndex(mMateReferenceName);
- }
- else {
- throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the mate reference index");
- }
}
return mMateReferenceIndex;
}
@@ -514,27 +490,80 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
* @param mateReferenceIndex Must either equal NO_ALIGNMENT_REFERENCE_INDEX (-1) indicating no reference, or the
* record must have a SAMFileHeader and the index must exist in the associated sequence
* dictionary.
- * @throws IllegalStateException if the SAMFileHeader is null for this record or the mate reference index is not
- * found in the sequence dictionary for this record.
+ * @throws IllegalStateException if the SAMFileHeader is null for this record
+ * @throws IllegalArgumentException if the mate reference index is not found in the sequence dictionary in the header for this record.
*/
public void setMateReferenceIndex(final int mateReferenceIndex) {
- if (mateReferenceIndex == NO_ALIGNMENT_REFERENCE_INDEX) {
- mMateReferenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
- mMateReferenceName = NO_ALIGNMENT_REFERENCE_NAME;
- }
- else if (null == mHeader) {
- throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the mate reference index");
+ // resolveNameFromIndex throws if the index can't be resolved
+ setMateReferenceName(resolveNameFromIndex(mateReferenceIndex, mHeader));
+ // setMateReferenceName does this as a side effect, but set the value here to be explicit
+ mMateReferenceIndex = mateReferenceIndex;
+ }
+
+ /**
+ * Static method that resolves and returns the reference index corresponding to a given reference name.
+ *
+ * @param referenceName If {@code referenceName} is NO_ALIGNMENT_REFERENCE_NAME, the value NO_ALIGNMENT_REFERENCE_INDEX
+ * is returned directly. Otherwise {@code referenceName} must be looked up in the header's sequence
+ * dictionary.
+ * @param header SAMFileHeader to use when resolving {@code referenceName} to an index. Must be non null if the
+ * {@code referenceName} is not NO_ALIGNMENT_REFERENCE_NAME.
+ * @param strict if true, throws if {@code referenceName} does not appear in the header's sequence dictionary
+ * @returns the reference index corresponding to the {@code referenceName}, or null if strict is false and {@code referenceName}
+ * does not appear in the header's sequence dictionary.
+ * @throws IllegalStateException if {@code referenceName} is not equal to NO_ALIGNMENT_REFERENCE_NAME and the header is null
+ * @throws IllegalArgumentException if strict is true and the name does not appear in header's sequence dictionary.
+ *
+ * Does not mutate the SAMRecord.
+ */
+ protected static Integer resolveIndexFromName(final String referenceName, final SAMFileHeader header, final boolean strict) {
+ Integer referenceIndex = NO_ALIGNMENT_REFERENCE_INDEX;
+ if (!NO_ALIGNMENT_REFERENCE_NAME.equals(referenceName)) {
+ if (null == header) {
+ throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the reference index or name");
+ }
+ referenceIndex = header.getSequenceIndex(referenceName);
+ if (NO_ALIGNMENT_REFERENCE_INDEX == referenceIndex) {
+ if (strict) {
+ throw new IllegalArgumentException("Reference index for '" + referenceName + "' not found in sequence dictionary.");
+ }
+ else {
+ referenceIndex = null; // unresolved.
+ }
+ }
}
- else {
- SAMSequenceRecord samSequence = mHeader.getSequence(mateReferenceIndex);
- if (null != samSequence) {
- mMateReferenceIndex = mateReferenceIndex;
- mMateReferenceName = samSequence.getSequenceName();
+ return referenceIndex;
+ }
+
+ /**
+ * Static method that resolves and returns the reference name corresponding to a given reference index.
+ *
+ * @param referenceIndex If {@code referenceIndex} is NO_ALIGNMENT_REFERENCE_INDEX, the value NO_ALIGNMENT_REFERENCE_NAME
+ * is returned directly. Otherwise {@code referenceIndex} must be looked up in the header's sequence
+ * dictionary.
+ * @param header SAMFileHeader to use when resolving {@code referenceIndex} to a name. Must be non null unless the
+ * the {@code referenceIndex} is NO_ALIGNMENT_REFERENCE_INDEX.
+ * @returns the reference name corresponding to {@code referenceIndex}
+ * @throws IllegalStateException if {@code referenceIndex} is not equal to NO_ALIGNMENT_REFERENCE_NAME and the header
+ * is null
+ * @throws IllegalArgumentException if {@code referenceIndex} does not appear in header's sequence dictionary.
+ *
+ * Does not mutate the SAMRecord.
+ */
+ protected static String resolveNameFromIndex(final int referenceIndex, final SAMFileHeader header) {
+ String referenceName = NO_ALIGNMENT_REFERENCE_NAME;
+ if (NO_ALIGNMENT_REFERENCE_INDEX != referenceIndex) {
+ if (null == header) {
+ throw new IllegalStateException("A non-null SAMFileHeader is required to resolve the reference index or name");
}
- else {
- throw new IllegalArgumentException("Reference index " + mateReferenceIndex + " not found in sequence dictionary.");
+ SAMSequenceRecord samSeq = header.getSequence(referenceIndex);
+ if (null == samSeq) {
+ throw new IllegalArgumentException("Reference name for '" + referenceIndex + "' not found in sequence dictionary.");
}
+ referenceName = samSeq.getSequenceName();
}
+
+ return referenceName;
}
/**
@@ -1588,6 +1617,34 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
}
/**
+ * Establishes the SAMFileHeader for this record and forces resolution of the record's reference and mate reference
+ * names against the header using the sequence dictionary in the new header. If either the reference or mate
+ * reference name does not appear in the new header's sequence dictionary, an IllegalArgumentException is thrown.
+ *
+ * @param header new header for this record. May be null.
+ * @throws IllegalArgumentException if the record has reference or mate reference names that cannot be resolved
+ * to indices using the new header.
+ */
+ public void setHeaderStrict(final SAMFileHeader header) {
+ if (null == header) {
+ // mark the reference indices as unresolved
+ mReferenceIndex = null;
+ mMateReferenceIndex = null;
+ }
+ else {
+ // Attempt to resolve the existing reference names against the new sequence dictionary
+ // and throw if the names don't appear.
+ Integer referenceIndex = resolveIndexFromName(mReferenceName, header, true);
+ Integer mateReferenceIndex = resolveIndexFromName(mMateReferenceName, header, true);
+
+ // Mutate the record once we know the values are valid
+ mReferenceIndex = referenceIndex;
+ mMateReferenceIndex = mateReferenceIndex;
+ }
+ this.mHeader = header;
+ }
+
+ /**
* If this record has a valid binary representation of the variable-length portion of a binary record stored,
* return that byte array, otherwise return null. This will never be true for SAMRecords. It will be true
* for BAMRecords that have not been eagerDecoded(), and for which none of the data in the variable-length
@@ -1712,7 +1769,13 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
List<SAMValidationError> ret = null;
if (null != getHeader() && getValidationStringency() != ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
- ret = SAMUtils.validateCigar(this, getCigar(), getReferenceIndex(), getAlignmentBlocks(), recordNumber, "Read CIGAR");
+ try {
+ //make sure that the cashed version is good
+ //wrapped in a try to catch an un-parsable string
+ return SAMUtils.validateCigar(this, getCigar(), getReferenceIndex(), getAlignmentBlocks(), recordNumber, "Read CIGAR");
+ } catch( final IllegalArgumentException e){
+ return Collections.singletonList(new SAMValidationError(SAMValidationError.Type.INVALID_CIGAR,e.getMessage(),getReadName(),recordNumber));
+ }
}
return ret;
}
@@ -1912,7 +1975,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
*/
if (firstOnly) return ret;
}
- if (getHeader() != null && getHeader().getSequenceDictionary().size() == 0) {
+ if (getHeader() != null && getHeader().getSequenceDictionary().isEmpty()) {
if (ret == null) ret = new ArrayList<SAMValidationError>();
ret.add(new SAMValidationError(SAMValidationError.Type.MISSING_SEQUENCE_DICTIONARY, "Empty sequence dictionary.", getReadName()));
if (firstOnly) return ret;
@@ -1953,7 +2016,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
if (fz == null) {
final String cq = (String)getAttribute(SAMTagUtil.getSingleton().CQ);
final String cs = (String)getAttribute(SAMTagUtil.getSingleton().CS);
- if (cq == null || cq.length() == 0 || cs == null || cs.length() == 0) {
+ if (cq == null || cq.isEmpty() || cs == null || cs.isEmpty()) {
if (ret == null) ret = new ArrayList<SAMValidationError>();
ret.add(new SAMValidationError(SAMValidationError.Type.EMPTY_READ,
"Zero-length read without FZ, CS or CQ tag", getReadName()));
@@ -1992,7 +2055,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
if (firstOnly) return ret;
}
- if (ret == null || ret.size() == 0) {
+ if (ret == null || ret.isEmpty()) {
return null;
}
return ret;
@@ -2039,7 +2102,7 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
ret.add(new SAMValidationError(SAMValidationError.Type.INVALID_ALIGNMENT_START, buildMessage("Alignment start should != 0 because reference name != *.", isMate), getReadName()));
if (firstOnly) return ret;
}
- if (getHeader() != null && getHeader().getSequenceDictionary().size() > 0) {
+ if (getHeader() != null && !getHeader().getSequenceDictionary().isEmpty()) {
final SAMSequenceRecord sequence =
(referenceIndex != null? getHeader().getSequence(referenceIndex): getHeader().getSequence(referenceName));
if (sequence == null) {
@@ -2144,9 +2207,9 @@ public class SAMRecord implements Cloneable, Locatable, Serializable {
}
}
- builder.append(" ");
- builder.append(String.valueOf(getReadLength()));
- builder.append("b");
+ builder.append(' ')
+ .append(String.valueOf(getReadLength()))
+ .append('b');
if (getReadUnmappedFlag()) {
builder.append(" unmapped read.");
diff --git a/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java b/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java
index 24ebb90..e8887bc 100644
--- a/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java
+++ b/src/java/htsjdk/samtools/SAMRecordCoordinateComparator.java
@@ -23,6 +23,8 @@
*/
package htsjdk.samtools;
+import java.io.Serializable;
+
/**
* Comparator for sorting SAMRecords by coordinate. Note that the header is required because
* the order of sequences in the header defines the major sort order.
@@ -38,7 +40,9 @@ package htsjdk.samtools;
* if A < B && B < C, then A < C
*
*/
-public class SAMRecordCoordinateComparator implements SAMRecordComparator {
+public class SAMRecordCoordinateComparator implements SAMRecordComparator, Serializable {
+ private static final long serialVersionUID = 1L;
+
public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
int cmp = fileOrderCompare(samRecord1, samRecord2);
if (cmp != 0) {
diff --git a/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java b/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java
index 6de77da..4ed2bb5 100644
--- a/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java
+++ b/src/java/htsjdk/samtools/SAMRecordDuplicateComparator.java
@@ -25,6 +25,7 @@ package htsjdk.samtools;
import htsjdk.samtools.DuplicateScoringStrategy.ScoringStrategy;
+import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -39,7 +40,9 @@ import java.util.Map;
*
* @author nhomer
*/
-public class SAMRecordDuplicateComparator implements SAMRecordComparator {
+public class SAMRecordDuplicateComparator implements SAMRecordComparator, Serializable {
+ private static final long serialVersionUID = 1L;
+
/** An enum to provide type-safe keys for transient attributes the comparator puts on SAMRecords. */
private static enum Attr {
LibraryId, ReadCoordinate, MateCoordinate
diff --git a/src/java/htsjdk/samtools/SAMRecordQueryHashComparator.java b/src/java/htsjdk/samtools/SAMRecordQueryHashComparator.java
index fc250e9..dce2ffb 100644
--- a/src/java/htsjdk/samtools/SAMRecordQueryHashComparator.java
+++ b/src/java/htsjdk/samtools/SAMRecordQueryHashComparator.java
@@ -25,6 +25,8 @@ package htsjdk.samtools;
import htsjdk.samtools.util.Murmur3;
+import java.io.Serializable;
+
/**
* SAMRecord comparator that provides an ordering based on a hash of the queryname. Has
* the useful property that reads with the same name will be grouped together, but that
@@ -34,7 +36,9 @@ import htsjdk.samtools.util.Murmur3;
*
* @author Tim Fennell
*/
-public class SAMRecordQueryHashComparator extends SAMRecordQueryNameComparator {
+public class SAMRecordQueryHashComparator extends SAMRecordQueryNameComparator implements Serializable {
+ private static final long serialVersionUID = 1L;
+
private final Murmur3 hasher = new Murmur3(42);
/**
@@ -63,6 +67,6 @@ public class SAMRecordQueryHashComparator extends SAMRecordQueryNameComparator {
/** Compares the hash values for two records. */
private int compareHashes(final SAMRecord lhs, final SAMRecord rhs) {
- return new Integer(this.hasher.hashUnencodedChars(lhs.getReadName())).compareTo(this.hasher.hashUnencodedChars(rhs.getReadName()));
+ return Integer.compare(this.hasher.hashUnencodedChars(lhs.getReadName()), this.hasher.hashUnencodedChars(rhs.getReadName()));
}
}
diff --git a/src/java/htsjdk/samtools/SAMRecordQueryNameComparator.java b/src/java/htsjdk/samtools/SAMRecordQueryNameComparator.java
index b91ac47..7fd97f5 100644
--- a/src/java/htsjdk/samtools/SAMRecordQueryNameComparator.java
+++ b/src/java/htsjdk/samtools/SAMRecordQueryNameComparator.java
@@ -23,10 +23,13 @@
*/
package htsjdk.samtools;
+import java.io.Serializable;
+
/**
* Comparator for "queryname" ordering of SAMRecords.
*/
-public class SAMRecordQueryNameComparator implements SAMRecordComparator {
+public class SAMRecordQueryNameComparator implements SAMRecordComparator, Serializable {
+ private static final long serialVersionUID = 1L;
public int compare(final SAMRecord samRecord1, final SAMRecord samRecord2) {
int cmp = fileOrderCompare(samRecord1, samRecord2);
diff --git a/src/java/htsjdk/samtools/SAMTextHeaderCodec.java b/src/java/htsjdk/samtools/SAMTextHeaderCodec.java
index 136ad2a..491bf9b 100644
--- a/src/java/htsjdk/samtools/SAMTextHeaderCodec.java
+++ b/src/java/htsjdk/samtools/SAMTextHeaderCodec.java
@@ -128,7 +128,7 @@ public class SAMTextHeaderCodec {
return null;
}
mCurrentLine = mReader.readLine();
- textHeader.append(mCurrentLine).append("\n");
+ textHeader.append(mCurrentLine).append('\n');
return mCurrentLine;
}
diff --git a/src/java/htsjdk/samtools/SAMUtils.java b/src/java/htsjdk/samtools/SAMUtils.java
index 14e2246..4a77a25 100644
--- a/src/java/htsjdk/samtools/SAMUtils.java
+++ b/src/java/htsjdk/samtools/SAMUtils.java
@@ -433,7 +433,7 @@ public final class SAMUtils {
public static void processValidationErrors(final List<SAMValidationError> validationErrors,
final long samRecordIndex,
final ValidationStringency validationStringency) {
- if (validationErrors != null && validationErrors.size() > 0) {
+ if (validationErrors != null && !validationErrors.isEmpty()) {
for (final SAMValidationError validationError : validationErrors) {
validationError.setRecordNumber(samRecordIndex);
}
@@ -528,7 +528,7 @@ public final class SAMUtils {
public static void chainSAMProgramRecord(final SAMFileHeader header, final SAMProgramRecord program) {
final List<SAMProgramRecord> pgs = header.getProgramRecords();
- if (pgs.size() > 0) {
+ if (!pgs.isEmpty()) {
final List<String> referencedIds = new ArrayList<String>();
for (final SAMProgramRecord pg : pgs) {
if (pg.getPreviousProgramGroupId() != null) {
@@ -945,14 +945,14 @@ public final class SAMUtils {
} else {
if (getMateCigarString(rec) != null) {
ret = new ArrayList<SAMValidationError>();
- if (rec.getMateUnmappedFlag()) {
+ if (!rec.getReadPairedFlag()) {
+ // If the read is not paired, and the Mate Cigar String (MC Attribute) exists, that is a validation error
+ ret.add(new SAMValidationError(SAMValidationError.Type.MATE_CIGAR_STRING_INVALID_PRESENCE,
+ "Mate CIGAR String (MC Attribute) present for a read that is not paired", rec.getReadName(), recordNumber));
+ } else { // will hit here if rec.getMateUnmappedFlag() is true
// If the Mate is unmapped, and the Mate Cigar String (MC Attribute) exists, that is a validation error.
ret.add(new SAMValidationError(SAMValidationError.Type.MATE_CIGAR_STRING_INVALID_PRESENCE,
"Mate CIGAR String (MC Attribute) present for a read whose mate is unmapped", rec.getReadName(), recordNumber));
- } else {
- // If the Mate is not paired, and the Mate Cigar String (MC Attribute) exists, that is a validation error.
- ret.add(new SAMValidationError(SAMValidationError.Type.MATE_CIGAR_STRING_INVALID_PRESENCE,
- "Mate CIGAR String (MC Attribute) present for a read that is not paired", rec.getReadName(), recordNumber));
}
}
}
@@ -1003,6 +1003,7 @@ public final class SAMUtils {
// Only clip records that are left-most in genomic order and overlapping.
if (rec.getMateAlignmentStart() < rec.getAlignmentStart()) return 0; // right-most, so ignore.
+ else if (rec.getMateAlignmentStart() == rec.getAlignmentStart() && rec.getFirstOfPairFlag()) return 0; // same start, so pick the first end
// Find the number of read bases after the given mate's alignment start.
int numBasesToClip = 0;
diff --git a/src/java/htsjdk/samtools/SAMValidationError.java b/src/java/htsjdk/samtools/SAMValidationError.java
index 9581f9e..d560b11 100644
--- a/src/java/htsjdk/samtools/SAMValidationError.java
+++ b/src/java/htsjdk/samtools/SAMValidationError.java
@@ -257,8 +257,7 @@ public class SAMValidationError implements Serializable {
public String toString() {
final StringBuilder builder = new StringBuilder();
- builder.append(type.severity.toString());
- builder.append(": ");
+ builder.append(type.severity.toString()).append(": ");
if (source != null) {
builder.append("File ").append(source.toString()).append(", ");
}
diff --git a/src/java/htsjdk/samtools/SamFileHeaderMerger.java b/src/java/htsjdk/samtools/SamFileHeaderMerger.java
index 6dc3e1e..b162cb2 100644
--- a/src/java/htsjdk/samtools/SamFileHeaderMerger.java
+++ b/src/java/htsjdk/samtools/SamFileHeaderMerger.java
@@ -592,7 +592,7 @@ public class SamFileHeaderMerger {
}
}
// Append anything left in holder.
- if (holder.size() != 0) {
+ if (!holder.isEmpty()) {
resultingDict.addAll(holder);
}
return new SAMSequenceDictionary(resultingDict);
diff --git a/src/java/htsjdk/samtools/SamFileValidator.java b/src/java/htsjdk/samtools/SamFileValidator.java
index 42d2580..8971093 100644
--- a/src/java/htsjdk/samtools/SamFileValidator.java
+++ b/src/java/htsjdk/samtools/SamFileValidator.java
@@ -25,6 +25,7 @@
package htsjdk.samtools;
import htsjdk.samtools.SAMValidationError.Type;
+import htsjdk.samtools.BamIndexValidator.IndexValidationStringency;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
@@ -90,7 +91,7 @@ public class SamFileValidator {
private Set<Type> errorsToIgnore = EnumSet.noneOf(Type.class);
private boolean ignoreWarnings = false;
private boolean bisulfiteSequenced = false;
- private boolean validateIndex = false;
+ private IndexValidationStringency indexValidationStringency = IndexValidationStringency.NONE;
private boolean sequenceDictionaryEmptyAndNoWarningEmitted = false;
private final int maxTempFiles;
@@ -198,9 +199,14 @@ public class SamFileValidator {
orderChecker = new SAMSortOrderChecker(samReader.getFileHeader().getSortOrder());
validateSamRecordsAndQualityFormat(samReader, samReader.getFileHeader());
validateUnmatchedPairs();
- if (validateIndex) {
+ if (indexValidationStringency != IndexValidationStringency.NONE) {
try {
- BamIndexValidator.exhaustivelyTestIndex(samReader);
+ if (indexValidationStringency == IndexValidationStringency.LESS_EXHAUSTIVE) {
+ BamIndexValidator.lessExhaustivelyTestIndex(samReader);
+ }
+ else {
+ BamIndexValidator.exhaustivelyTestIndex(samReader);
+ }
} catch (Exception e) {
addError(new SAMValidationError(Type.INVALID_INDEX_FILE_POINTER, e.getMessage(), null));
}
@@ -581,10 +587,16 @@ public class SamFileValidator {
this.bisulfiteSequenced = bisulfiteSequenced;
}
- public SamFileValidator setValidateIndex(boolean validateIndex) {
+ /**
+ * @deprecated use setIndexValidationStringency instead
+ */
+ public SamFileValidator setValidateIndex(final boolean validateIndex) {
// The SAMFileReader must also have IndexCaching enabled to have the index validated,
- // samReader.enableIndexCaching(true);
- this.validateIndex = validateIndex;
+ return this.setIndexValidationStringency(validateIndex ? IndexValidationStringency.EXHAUSTIVE : IndexValidationStringency.NONE);
+ }
+
+ public SamFileValidator setIndexValidationStringency(final IndexValidationStringency stringency) {
+ this.indexValidationStringency = stringency;
return this;
}
diff --git a/src/java/htsjdk/samtools/SamFiles.java b/src/java/htsjdk/samtools/SamFiles.java
index 0112855..0a703a7 100644
--- a/src/java/htsjdk/samtools/SamFiles.java
+++ b/src/java/htsjdk/samtools/SamFiles.java
@@ -4,6 +4,7 @@ import htsjdk.samtools.cram.CRAIIndex;
import htsjdk.samtools.cram.build.CramIO;
import java.io.File;
+import java.io.IOException;
/**
* @author mccowan
@@ -13,10 +14,39 @@ public class SamFiles {
/**
* Finds the index file associated with the provided SAM file. The index file must exist and be reachable to be found.
*
+ * If the file is a symlink and the index cannot be found, try to unsymlink the file and look for the bai in the actual file path.
+ *
* @return The index for the provided SAM, or null if one was not found.
*/
public static File findIndex(final File samFile) {
- // If input is foo.bam, look for foo.bai
+ final File indexFile = lookForIndex(samFile); //try to find the index
+ if (indexFile == null) {
+ return unsymlinkAndLookForIndex(samFile);
+ } else {
+ return indexFile;
+ }
+ }
+
+ /**
+ * resolve the canonical path of samFile and attempt to find an index there.
+ * @return an index file or null if no index is found.
+ */
+ private static File unsymlinkAndLookForIndex(File samFile) {
+ try {
+ final File canonicalSamFile = samFile.getCanonicalFile();
+ final File canonicalIndexFile = lookForIndex(canonicalSamFile);
+ if ( canonicalIndexFile != null) {
+ System.err.println("The index file " + canonicalIndexFile.getPath()
+ + " was found by resolving the canonical path of a symlink: "
+ + samFile.getPath() + " -> " + samFile.getCanonicalPath());
+ }
+ return canonicalIndexFile;
+ } catch (IOException e) {
+ return null;
+ }
+ }
+
+ private static File lookForIndex(final File samFile) {// If input is foo.bam, look for foo.bai
File indexFile;
final String fileName = samFile.getName();
if (fileName.endsWith(BamFileIoUtils.BAM_FILE_EXTENSION)) {
diff --git a/src/java/htsjdk/samtools/SamReaderFactory.java b/src/java/htsjdk/samtools/SamReaderFactory.java
index 5403379..7be43ab 100644
--- a/src/java/htsjdk/samtools/SamReaderFactory.java
+++ b/src/java/htsjdk/samtools/SamReaderFactory.java
@@ -1,12 +1,5 @@
package htsjdk.samtools;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Collections;
-import java.util.EnumSet;
-import java.util.zip.GZIPInputStream;
-
import htsjdk.samtools.cram.ref.ReferenceSource;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.sra.SRAAccession;
@@ -17,6 +10,13 @@ import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.RuntimeIOException;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.EnumSet;
+import java.util.zip.GZIPInputStream;
+
/**
* <p>Describes the functionality for producing {@link SamReader}, and offers a
* handful of static generators.</p>
@@ -265,14 +265,16 @@ public abstract class SamReaderFactory {
} else if (SamStreams.isGzippedSAMFile(bufferedStream)) {
primitiveSamReader = new SAMTextReader(new GZIPInputStream(bufferedStream), validationStringency, this.samRecordFactory);
} else if (SamStreams.isCRAMFile(bufferedStream)) {
- if (referenceSource == null && Defaults.REFERENCE_FASTA != null) referenceSource = new ReferenceSource(Defaults.REFERENCE_FASTA);
+ if (referenceSource == null) {
+ referenceSource = ReferenceSource.getDefaultCRAMReferenceSource();
+ }
if (sourceFile == null || !sourceFile.isFile()) {
primitiveSamReader = new CRAMFileReader(bufferedStream, indexFile, referenceSource, validationStringency);
} else {
bufferedStream.close();
primitiveSamReader = new CRAMFileReader(sourceFile, indexFile, referenceSource, validationStringency);
}
- } else if (sourceFile != null && SRAAccession.isValid(sourceFile.getPath())) {
+ } else if (sourceFile != null && isSra(sourceFile)) {
if (bufferedStream != null) {
bufferedStream.close();
}
@@ -300,6 +302,15 @@ public abstract class SamReaderFactory {
}
}
+ /** Attempts to detect whether the file is an SRA accessioned file. If SRA support is not available, returns false. */
+ private boolean isSra(final File sourceFile) {
+ try {
+ return SRAAccession.isValid(sourceFile.getPath());
+ } catch (final Exception e) {
+ return false;
+ }
+ }
+
public static SamReaderFactory copyOf(final SamReaderFactoryImpl target) {
return new SamReaderFactoryImpl(target.enabledOptions, target.validationStringency, target.samRecordFactory);
}
diff --git a/src/java/htsjdk/samtools/TextTagCodec.java b/src/java/htsjdk/samtools/TextTagCodec.java
index ceec61a..0fae202 100644
--- a/src/java/htsjdk/samtools/TextTagCodec.java
+++ b/src/java/htsjdk/samtools/TextTagCodec.java
@@ -78,9 +78,7 @@ public class TextTagCodec {
throw new IllegalArgumentException("Value for tag " + tagName + " cannot be stored in either a signed or unsigned 32-bit integer: " + longVal);
}
}
- sb.append(tagType);
- sb.append(':');
- sb.append(value.toString());
+ sb.append(tagType).append(':').append(value.toString());
return sb.toString();
}
@@ -102,7 +100,7 @@ public class TextTagCodec {
final StringBuilder ret = new StringBuilder(Array.get(value, 0).toString());
final int length = Array.getLength(value);
for (int i = 1; i < length; ++i) {
- ret.append(",");
+ ret.append(',');
ret.append(Array.get(value, i).toString());
}
return ret.toString();
@@ -139,10 +137,8 @@ public class TextTagCodec {
* @return Colon-separated text representation suitable for a SAM header, i.e. name:value.
*/
public String encodeUntypedTag(final String tagName, final Object value) {
- final StringBuilder sb = new StringBuilder(tagName);
- sb.append(':');
- sb.append(value.toString());
- return sb.toString();
+ return new StringBuilder(tagName).append(':')
+ .append(value.toString()).toString();
}
/**
diff --git a/src/java/htsjdk/samtools/TextualBAMIndexWriter.java b/src/java/htsjdk/samtools/TextualBAMIndexWriter.java
index 740b252..d790270 100644
--- a/src/java/htsjdk/samtools/TextualBAMIndexWriter.java
+++ b/src/java/htsjdk/samtools/TextualBAMIndexWriter.java
@@ -110,7 +110,7 @@ class TextualBAMIndexWriter implements BAMIndexWriter {
continue;
}
pw.print(" Ref " + reference + " bin " + bin.getBinNumber() + " has n_chunk= " + chunkList.size());
- if (chunkList.size() == 0) {
+ if (chunkList.isEmpty()) {
pw.println();
}
for (final Chunk c : chunkList) {
diff --git a/src/java/htsjdk/samtools/cram/CRAIIndex.java b/src/java/htsjdk/samtools/cram/CRAIIndex.java
index 0a3f567..76668b0 100644
--- a/src/java/htsjdk/samtools/cram/CRAIIndex.java
+++ b/src/java/htsjdk/samtools/cram/CRAIIndex.java
@@ -144,7 +144,7 @@ public class CRAIIndex {
slice.index = entry.sliceIndex;
slice.offset = entry.sliceOffset;
- indexer.processAlignment(slice);
+ indexer.processSingleReferenceSlice(slice);
}
indexer.finish();
diff --git a/src/java/htsjdk/samtools/cram/build/CompressionHeaderFactory.java b/src/java/htsjdk/samtools/cram/build/CompressionHeaderFactory.java
index d771f64..c958fa4 100644
--- a/src/java/htsjdk/samtools/cram/build/CompressionHeaderFactory.java
+++ b/src/java/htsjdk/samtools/cram/build/CompressionHeaderFactory.java
@@ -18,28 +18,14 @@
package htsjdk.samtools.cram.build;
import htsjdk.samtools.cram.common.MutableInt;
-import htsjdk.samtools.cram.encoding.BetaIntegerEncoding;
-import htsjdk.samtools.cram.encoding.BitCodec;
import htsjdk.samtools.cram.encoding.ByteArrayLenEncoding;
import htsjdk.samtools.cram.encoding.ByteArrayStopEncoding;
-import htsjdk.samtools.cram.encoding.Encoding;
-import htsjdk.samtools.cram.encoding.ExternalByteArrayEncoding;
import htsjdk.samtools.cram.encoding.ExternalByteEncoding;
import htsjdk.samtools.cram.encoding.ExternalCompressor;
import htsjdk.samtools.cram.encoding.ExternalIntegerEncoding;
-import htsjdk.samtools.cram.encoding.GammaIntegerEncoding;
-import htsjdk.samtools.cram.encoding.NullEncoding;
-import htsjdk.samtools.cram.encoding.SubexponentialIntegerEncoding;
-import htsjdk.samtools.cram.encoding.huffman.HuffmanCode;
-import htsjdk.samtools.cram.encoding.huffman.HuffmanTree;
-import htsjdk.samtools.cram.encoding.huffman.codec.HuffmanByteEncoding;
import htsjdk.samtools.cram.encoding.huffman.codec.HuffmanIntegerEncoding;
import htsjdk.samtools.cram.encoding.rans.RANS;
-import htsjdk.samtools.cram.encoding.readfeatures.Deletion;
-import htsjdk.samtools.cram.encoding.readfeatures.HardClip;
-import htsjdk.samtools.cram.encoding.readfeatures.Padding;
import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature;
-import htsjdk.samtools.cram.encoding.readfeatures.RefSkip;
import htsjdk.samtools.cram.encoding.readfeatures.Substitution;
import htsjdk.samtools.cram.structure.CompressionHeader;
import htsjdk.samtools.cram.structure.CramCompressionRecord;
@@ -47,9 +33,10 @@ import htsjdk.samtools.cram.structure.EncodingKey;
import htsjdk.samtools.cram.structure.EncodingParams;
import htsjdk.samtools.cram.structure.ReadTag;
import htsjdk.samtools.cram.structure.SubstitutionMatrix;
-import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.RuntimeIOException;
-import java.nio.charset.Charset;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
@@ -60,745 +47,546 @@ import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
+/**
+ * A class responsible for decisions about which encodings to use for a given set of records.
+ * This particular version relies heavily on GZIP and RANS for better compression.
+ */
public class CompressionHeaderFactory {
- private static final Charset charset = Charset.forName("US-ASCII");
- private static final Log log = Log.getInstance(CompressionHeaderFactory.class);
- private static final int oqz = ReadTag.nameType3BytesToInt("OQ", 'Z');
- private static final int bqz = ReadTag.nameType3BytesToInt("BQ", 'Z');
-
- public CompressionHeader build(final List<CramCompressionRecord> records, final SubstitutionMatrix substitutionMatrix, final boolean sorted) {
- final CompressionHeader header = new CompressionHeader();
- header.externalIds = new ArrayList<Integer>();
- int exCounter = 0;
-
- final int baseID = exCounter++;
- header.externalIds.add(baseID);
- header.externalCompressors.put(baseID,
- ExternalCompressor.createRANS(RANS.ORDER.ONE));
-
- final int qualityScoreID = exCounter++;
- header.externalIds.add(qualityScoreID);
- header.externalCompressors.put(qualityScoreID,
- ExternalCompressor.createRANS(RANS.ORDER.ONE));
-
- final int readNameID = exCounter++;
- header.externalIds.add(readNameID);
- header.externalCompressors.put(readNameID, ExternalCompressor.createGZIP());
-
- final int mateInfoID = exCounter++;
- header.externalIds.add(mateInfoID);
- header.externalCompressors.put(mateInfoID,
- ExternalCompressor.createRANS(RANS.ORDER.ONE));
-
- header.encodingMap = new TreeMap<EncodingKey, EncodingParams>();
- for (final EncodingKey key : EncodingKey.values())
- header.encodingMap.put(key, NullEncoding.toParam());
-
- header.tMap = new TreeMap<Integer, EncodingParams>();
-
- { // bit flags encoding:
- getOptimalIntegerEncoding(header, EncodingKey.BF_BitFlags, 0, records);
- }
+ private static final int TAG_VALUE_BUFFER_SIZE = 1024 * 1024;
+ public static final int BYTE_SPACE_SIZE = 256;
+ public static final int ALL_BYTES_USED = -1;
+ private final Map<Integer, EncodingDetails> bestEncodings = new HashMap<>();
+ private final ByteArrayOutputStream baosForTagValues;
+
+ public CompressionHeaderFactory() {
+ baosForTagValues = new ByteArrayOutputStream(TAG_VALUE_BUFFER_SIZE);
+ }
- { // compression bit flags encoding:
- getOptimalIntegerEncoding(header, EncodingKey.CF_CompressionBitFlags, 0, records);
- }
+ /**
+ * Decides on compression methods to use for the given records.
+ *
+ * @param records
+ * the data to be compressed
+ * @param substitutionMatrix
+ * a matrix of base substitution frequencies, can be null, in
+ * which case it is re-calculated.
+ * @param sorted
+ * if true the records are assumed to be sorted by alignment
+ * position
+ * @return {@link htsjdk.samtools.cram.structure.CompressionHeader} object
+ * describing the encoding chosen for the data
+ */
+ public CompressionHeader build(final List<CramCompressionRecord> records, SubstitutionMatrix substitutionMatrix,
+ final boolean sorted) {
+
+ final CompressionHeaderBuilder builder = new CompressionHeaderBuilder(sorted);
+
+ builder.addExternalIntegerRansOrderZeroEncoding(EncodingKey.AP_AlignmentPositionOffset);
+ builder.addExternalByteRansOrderOneEncoding(EncodingKey.BA_Base);
+ // BB is not used
+ builder.addExternalIntegerRansOrderOneEncoding(EncodingKey.BF_BitFlags);
+ builder.addExternalByteGzipEncoding(EncodingKey.BS_BaseSubstitutionCode);
+ builder.addExternalIntegerRansOrderOneEncoding(EncodingKey.CF_CompressionBitFlags);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.DL_DeletionLength);
+ builder.addExternalByteGzipEncoding(EncodingKey.FC_FeatureCode);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.FN_NumberOfReadFeatures);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.FP_FeaturePosition);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.HC_HardClip);
+ builder.addExternalByteArrayStopTabGzipEncoding(EncodingKey.IN_Insertion);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.MF_MateBitFlags);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.MQ_MappingQualityScore);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.NF_RecordsToNextFragment);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.NP_NextFragmentAlignmentStart);
+ builder.addExternalIntegerRansOrderOneEncoding(EncodingKey.NS_NextFragmentReferenceSequenceID);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.PD_padding);
+ // QQ is not used
+ builder.addExternalByteRansOrderOneEncoding(EncodingKey.QS_QualityScore);
+ builder.addExternalIntegerRansOrderOneEncoding(EncodingKey.RG_ReadGroup);
+ builder.addExternalIntegerRansOrderZeroEncoding(EncodingKey.RI_RefId);
+ builder.addExternalIntegerRansOrderOneEncoding(EncodingKey.RL_ReadLength);
+ builder.addExternalByteArrayStopTabGzipEncoding(EncodingKey.RN_ReadName);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.RS_RefSkip);
+ builder.addExternalByteArrayStopTabGzipEncoding(EncodingKey.SC_SoftClip);
+ builder.addExternalIntegerGzipEncoding(EncodingKey.TC_TagCount);
+ builder.addExternalIntegerEncoding(EncodingKey.TL_TagIdList, ExternalCompressor.createGZIP());
+ builder.addExternalIntegerGzipEncoding(EncodingKey.TN_TagNameAndType);
+ builder.addExternalIntegerRansOrderOneEncoding(EncodingKey.TS_InsetSize);
+
+ builder.setTagIdDictionary(buildTagIdDictionary(records));
+
+ buildTagEncodings(records, builder);
+
+ if (substitutionMatrix == null) {
+ substitutionMatrix = new SubstitutionMatrix(buildFrequencies(records));
+ updateSubstitutionCodes(records, substitutionMatrix);
+ }
+ builder.setSubstitutionMatrix(substitutionMatrix);
+ return builder.getHeader();
+ }
- { // ref id:
+ /**
+ * Iterate over the records and for each tag found come up with an encoding.
+ * Tag encodings are registered via the builder.
+ *
+ * @param records
+ * CRAM records holding the tags to be encoded
+ * @param builder
+ * compression header builder to register encodings
+ */
+ private void buildTagEncodings(final List<CramCompressionRecord> records, final CompressionHeaderBuilder builder) {
+ final Set<Integer> tagIdSet = new HashSet<>();
- getOptimalIntegerEncoding(header, EncodingKey.RI_RefId, -2, records);
- }
+ for (final CramCompressionRecord record : records) {
+ if (record.tags == null || record.tags.length == 0) {
+ continue;
+ }
- { // read length encoding:
- getOptimalIntegerEncoding(header, EncodingKey.RL_ReadLength, 0, records);
+ for (final ReadTag tag : record.tags) {
+ tagIdSet.add(tag.keyType3BytesAsInt);
+ }
}
- { // alignment offset:
- if (sorted) { // alignment offset:
- header.APDelta = true;
- getOptimalIntegerEncoding(header, EncodingKey.AP_AlignmentPositionOffset, 0, records);
+ for (final int tagId : tagIdSet) {
+ if (bestEncodings.containsKey(tagId)) {
+ builder.addTagEncoding(tagId, bestEncodings.get(tagId));
} else {
- final int aStartID = exCounter++;
- header.APDelta = false;
- header.encodingMap.put(EncodingKey.AP_AlignmentPositionOffset,
- ExternalIntegerEncoding.toParam(aStartID));
- header.externalIds.add(aStartID);
- header.externalCompressors.put(aStartID,
- ExternalCompressor.createRANS(RANS.ORDER.ONE));
- log.debug("Assigned external id to alignment starts: " + aStartID);
+ final EncodingDetails e = buildEncodingForTag(records, tagId);
+ builder.addTagEncoding(tagId, e);
+ bestEncodings.put(tagId, e);
}
}
+ }
- { // read group
- getOptimalIntegerEncoding(header, EncodingKey.RG_ReadGroup, -1, records);
- }
-
- { // read name encoding:
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord record : records)
- calculator.add(record.readName.length());
- calculator.calculate();
-
- header.encodingMap.put(EncodingKey.RN_ReadName, ByteArrayLenEncoding.toParam(
- HuffmanIntegerEncoding.toParam(calculator.values(),
- calculator.bitLens()), ExternalByteArrayEncoding
- .toParam(readNameID)));
- }
-
- { // records to next fragment
- final IntegerEncodingCalculator calc = new IntegerEncodingCalculator(
- EncodingKey.NF_RecordsToNextFragment.name(), 0);
- for (final CramCompressionRecord r : records) {
- if (r.isHasMateDownStream())
- calc.addValue(r.recordsToNextFragment);
+ /**
+ * Given the records update the substitution matrix with actual substitution
+ * codes.
+ *
+ * @param records
+ * CRAM records
+ * @param substitutionMatrix
+ * the matrix to be updated
+ */
+ static void updateSubstitutionCodes(final List<CramCompressionRecord> records,
+ final SubstitutionMatrix substitutionMatrix) {
+ for (final CramCompressionRecord record : records) {
+ if (record.readFeatures != null) {
+ for (final ReadFeature recordFeature : record.readFeatures) {
+ if (recordFeature.getOperator() == Substitution.operator) {
+ final Substitution substitution = ((Substitution) recordFeature);
+ if (substitution.getCode() == Substitution.NO_CODE) {
+ final byte refBase = substitution.getReferenceBase();
+ final byte base = substitution.getBase();
+ substitution.setCode(substitutionMatrix.code(refBase, base));
+ }
+ }
+ }
}
-
- final Encoding<Integer> bestEncoding = calc.getBestEncoding();
- header.encodingMap.put(
- EncodingKey.NF_RecordsToNextFragment,
- new EncodingParams(bestEncoding.id(), bestEncoding
- .toByteArray()));
- }
-
- { // tag count
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord record : records)
- calculator.add(record.tags == null ? 0 : record.tags.length);
- calculator.calculate();
-
- header.encodingMap.put(EncodingKey.TC_TagCount, HuffmanIntegerEncoding.toParam(
- calculator.values(), calculator.bitLens()));
}
+ }
- { // tag name and type
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord record : records) {
- if (record.tags == null)
- continue;
- for (final ReadTag tag : record.tags)
- calculator.add(tag.keyType3BytesAsInt);
+ /**
+ * Build an array of substitution frequencies for the given CRAM records.
+ *
+ * @param records
+ * CRAM records to scan
+ * @return a 2D array of frequencies, see
+ * {@link htsjdk.samtools.cram.structure.SubstitutionMatrix}
+ */
+ static long[][] buildFrequencies(final List<CramCompressionRecord> records) {
+ final long[][] frequencies = new long[BYTE_SPACE_SIZE][BYTE_SPACE_SIZE];
+ for (final CramCompressionRecord record : records) {
+ if (record.readFeatures != null) {
+ for (final ReadFeature readFeature : record.readFeatures) {
+ if (readFeature.getOperator() == Substitution.operator) {
+ final Substitution substitution = ((Substitution) readFeature);
+ final byte refBase = substitution.getReferenceBase();
+ final byte base = substitution.getBase();
+ frequencies[refBase][base]++;
+ }
+ }
}
- calculator.calculate();
-
- header.encodingMap.put(EncodingKey.TN_TagNameAndType, HuffmanIntegerEncoding
- .toParam(calculator.values(), calculator.bitLens()));
}
+ return frequencies;
+ }
- {
-
- final Comparator<ReadTag> comparator = new Comparator<ReadTag>() {
-
- @Override
- public int compare(final ReadTag o1, final ReadTag o2) {
- return o1.keyType3BytesAsInt - o2.keyType3BytesAsInt;
- }
- };
-
- final Comparator<byte[]> baComparator = new Comparator<byte[]>() {
-
- @Override
- public int compare(final byte[] o1, final byte[] o2) {
- if (o1.length - o2.length != 0)
- return o1.length - o2.length;
+ /**
+ * Build a dictionary of tag ids.
+ *
+ * @param records
+ * records holding the tags
+ * @return a 3D byte array: a set of unique lists of tag ids.
+ */
+ private static byte[][][] buildTagIdDictionary(final List<CramCompressionRecord> records) {
+ final Comparator<ReadTag> comparator = new Comparator<ReadTag>() {
+
+ @Override
+ public int compare(final ReadTag o1, final ReadTag o2) {
+ return o1.keyType3BytesAsInt - o2.keyType3BytesAsInt;
+ }
+ };
- for (int i = 0; i < o1.length; i++)
- if (o1[i] != o2[i])
- return o1[i] - o2[i];
+ final Comparator<byte[]> baComparator = new Comparator<byte[]>() {
- return 0;
- }
- };
-
- final Map<byte[], MutableInt> map = new TreeMap<byte[], MutableInt>(baComparator);
- final MutableInt noTagCounter = new MutableInt();
- map.put(new byte[0], noTagCounter);
- for (final CramCompressionRecord record : records) {
- if (record.tags == null) {
- noTagCounter.value++;
- record.tagIdsIndex = noTagCounter;
- continue;
+ @Override
+ public int compare(final byte[] o1, final byte[] o2) {
+ if (o1.length - o2.length != 0) {
+ return o1.length - o2.length;
}
- Arrays.sort(record.tags, comparator);
- record.tagIds = new byte[record.tags.length * 3];
-
- int tagIndex = 0;
- for (int i = 0; i < record.tags.length; i++) {
- record.tagIds[i * 3] = (byte) record.tags[tagIndex].keyType3Bytes.charAt(0);
- record.tagIds[i * 3 + 1] = (byte) record.tags[tagIndex].keyType3Bytes.charAt(1);
- record.tagIds[i * 3 + 2] = (byte) record.tags[tagIndex].keyType3Bytes.charAt(2);
- tagIndex++;
+ for (int i = 0; i < o1.length; i++) {
+ if (o1[i] != o2[i]) {
+ return o1[i] - o2[i];
+ }
}
- MutableInt count = map.get(record.tagIds);
- if (count == null) {
- count = new MutableInt();
- map.put(record.tagIds, count);
- }
- count.value++;
- record.tagIdsIndex = count;
+ return 0;
}
+ };
- final byte[][][] dic = new byte[map.size()][][];
- int i = 0;
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final byte[] idsAsBytes : map.keySet()) {
- final int nofIds = idsAsBytes.length / 3;
- dic[i] = new byte[nofIds][];
- for (int j = 0; j < idsAsBytes.length; ) {
- final int idIndex = j / 3;
- dic[i][idIndex] = new byte[3];
- dic[i][idIndex][0] = idsAsBytes[j++];
- dic[i][idIndex][1] = idsAsBytes[j++];
- dic[i][idIndex][2] = idsAsBytes[j++];
- }
- calculator.add(i, map.get(idsAsBytes).value);
- map.get(idsAsBytes).value = i++;
+ final Map<byte[], MutableInt> map = new TreeMap<>(baComparator);
+ final MutableInt noTagCounter = new MutableInt();
+ map.put(new byte[0], noTagCounter);
+ for (final CramCompressionRecord record : records) {
+ if (record.tags == null) {
+ noTagCounter.value++;
+ record.tagIdsIndex = noTagCounter;
+ continue;
}
- calculator.calculate();
- header.encodingMap.put(EncodingKey.TL_TagIdList,
- HuffmanIntegerEncoding.toParam(calculator.values(), calculator.bitLens()));
- header.dictionary = dic;
- }
-
- { // tag values
- @SuppressWarnings("UnnecessaryLocalVariable") final int unsortedTagValueExternalID = exCounter;
- header.externalIds.add(unsortedTagValueExternalID);
- header.externalCompressors.put(unsortedTagValueExternalID,
- ExternalCompressor.createRANS(RANS.ORDER.ONE));
-
- final Set<Integer> tagIdSet = new HashSet<Integer>();
- for (final CramCompressionRecord record : records) {
- if (record.tags == null)
- continue;
+ Arrays.sort(record.tags, comparator);
+ record.tagIds = new byte[record.tags.length * 3];
- for (final ReadTag tag : record.tags)
- tagIdSet.add(tag.keyType3BytesAsInt);
+ int tagIndex = 0;
+ for (int i = 0; i < record.tags.length; i++) {
+ record.tagIds[i * 3] = (byte) record.tags[tagIndex].keyType3Bytes.charAt(0);
+ record.tagIds[i * 3 + 1] = (byte) record.tags[tagIndex].keyType3Bytes.charAt(1);
+ record.tagIds[i * 3 + 2] = (byte) record.tags[tagIndex].keyType3Bytes.charAt(2);
+ tagIndex++;
}
- for (final int id : tagIdSet) {
- final int externalID;
- final byte type = (byte) (id & 0xFF);
- switch (type) {
- case 'Z':
- case 'B':
- externalID = id;
- break;
-
- default:
- externalID = unsortedTagValueExternalID;
- break;
- }
-
- header.externalIds.add(externalID);
- header.externalCompressors.put(externalID,
- ExternalCompressor.createRANS(RANS.ORDER.ONE));
- header.tMap.put(id, ByteArrayLenEncoding.toParam(
- ExternalIntegerEncoding.toParam(externalID),
- ExternalByteEncoding.toParam(externalID)));
+ MutableInt count = map.get(record.tagIds);
+ if (count == null) {
+ count = new MutableInt();
+ map.put(record.tagIds, count);
}
- }
-
- { // number of read features
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord r : records)
- calculator.add(r.readFeatures == null ? 0 : r.readFeatures.size());
- calculator.calculate();
-
- header.encodingMap.put(EncodingKey.FN_NumberOfReadFeatures,
- HuffmanIntegerEncoding.toParam(calculator.values(), calculator.bitLens()));
- }
-
- { // feature position
- final IntegerEncodingCalculator calc = new IntegerEncodingCalculator("read feature position", 0);
- for (final CramCompressionRecord record : records) {
- int prevPos = 0;
- if (record.readFeatures == null)
- continue;
- for (final ReadFeature rf : record.readFeatures) {
- calc.addValue(rf.getPosition() - prevPos);
- prevPos = rf.getPosition();
- }
+ count.value++;
+ record.tagIdsIndex = count;
+ }
+
+ final byte[][][] dictionary = new byte[map.size()][][];
+ int i = 0;
+ for (final byte[] idsAsBytes : map.keySet()) {
+ final int nofIds = idsAsBytes.length / 3;
+ dictionary[i] = new byte[nofIds][];
+ for (int j = 0; j < idsAsBytes.length;) {
+ final int idIndex = j / 3;
+ dictionary[i][idIndex] = new byte[3];
+ dictionary[i][idIndex][0] = idsAsBytes[j++];
+ dictionary[i][idIndex][1] = idsAsBytes[j++];
+ dictionary[i][idIndex][2] = idsAsBytes[j++];
}
-
- final Encoding<Integer> bestEncoding = calc.getBestEncoding();
- header.encodingMap.put(EncodingKey.FP_FeaturePosition,
- new EncodingParams(bestEncoding.id(), bestEncoding.toByteArray()));
- }
-
- { // feature code
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord record : records)
- if (record.readFeatures != null)
- for (final ReadFeature readFeature : record.readFeatures)
- calculator.add(readFeature.getOperator());
- calculator.calculate();
-
- header.encodingMap.put(EncodingKey.FC_FeatureCode, HuffmanByteEncoding.toParam(
- calculator.valuesAsBytes(), calculator.bitLens));
- }
-
- { // bases:
- header.encodingMap.put(EncodingKey.BA_Base, ExternalByteEncoding.toParam(baseID));
- }
-
- { // quality scores:
- header.encodingMap.put(EncodingKey.QS_QualityScore, ExternalByteEncoding.toParam(qualityScoreID));
- }
-
- { // base substitution code
- if (substitutionMatrix == null) {
- final long[][] frequencies = new long[200][200];
- for (final CramCompressionRecord record : records) {
- if (record.readFeatures != null)
- for (final ReadFeature readFeature : record.readFeatures)
- if (readFeature.getOperator() == Substitution.operator) {
- final Substitution substitution = ((Substitution) readFeature);
- final byte refBase = substitution.getReferenceBase();
- final byte base = substitution.getBase();
- frequencies[refBase][base]++;
- }
- }
-
- header.substitutionMatrix = new SubstitutionMatrix(frequencies);
- } else
- header.substitutionMatrix = substitutionMatrix;
-
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord record : records)
- if (record.readFeatures != null)
- for (final ReadFeature recordFeature : record.readFeatures) {
- if (recordFeature.getOperator() == Substitution.operator) {
- final Substitution substitution = ((Substitution) recordFeature);
- if (substitution.getCode() == -1) {
- final byte refBase = substitution.getReferenceBase();
- final byte base = substitution.getBase();
- substitution.setCode(header.substitutionMatrix.code(refBase, base));
- }
- calculator.add(substitution.getCode());
- }
- }
- calculator.calculate();
-
- header.encodingMap.put(EncodingKey.BS_BaseSubstitutionCode,
- HuffmanIntegerEncoding.toParam(calculator.values, calculator.bitLens));
+ map.get(idsAsBytes).value = i++;
}
+ return dictionary;
+ }
- { // insertion bases
- header.encodingMap.put(EncodingKey.IN_Insertion, ByteArrayStopEncoding.toParam((byte) 0, baseID));
- }
+ /**
+ * Tag id is and integer where the first byte is its type and the other 2
+ * bytes represent the name. For example 'OQZ', where 'OQ' stands for
+ * original quality score tag and 'Z' stands for string type.
+ *
+ * @param tagID
+ * a 3 byte tag id stored in an int
+ * @return tag type, the lowest byte in the tag id
+ */
+ static byte getTagType(final int tagID) {
+ return (byte) (tagID & 0xFF);
+ }
- { // insertion bases
- header.encodingMap.put(EncodingKey.SC_SoftClip, ByteArrayStopEncoding.toParam((byte) 0, baseID));
- }
+ static ExternalCompressor getBestExternalCompressor(final byte[] data) {
+ final ExternalCompressor gzip = ExternalCompressor.createGZIP();
+ final int gzipLen = gzip.compress(data).length;
- { // deletion length
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord record : records)
- if (record.readFeatures != null)
- for (final ReadFeature recordFeature : record.readFeatures)
- if (recordFeature.getOperator() == Deletion.operator)
- calculator.add(((Deletion) recordFeature).getLength());
- calculator.calculate();
-
- header.encodingMap.put(EncodingKey.DL_DeletionLength,
- HuffmanIntegerEncoding.toParam(calculator.values, calculator.bitLens));
- }
+ final ExternalCompressor rans0 = ExternalCompressor.createRANS(RANS.ORDER.ZERO);
+ final int rans0Len = rans0.compress(data).length;
- { // hard clip length
- final IntegerEncodingCalculator calculator = new IntegerEncodingCalculator(EncodingKey.HC_HardClip.name(), 0);
- for (final CramCompressionRecord record : records)
- if (record.readFeatures != null)
- for (final ReadFeature recordFeature : record.readFeatures)
- if (recordFeature.getOperator() == HardClip.operator)
- calculator.addValue(((HardClip) recordFeature).getLength());
-
- final Encoding<Integer> bestEncoding = calculator.getBestEncoding();
- header.encodingMap.put(EncodingKey.HC_HardClip, new EncodingParams(bestEncoding.id(), bestEncoding.toByteArray()));
- }
-
- { // padding length
- final IntegerEncodingCalculator calculator = new IntegerEncodingCalculator(EncodingKey.PD_padding.name(), 0);
- for (final CramCompressionRecord record : records)
- if (record.readFeatures != null)
- for (final ReadFeature recordFeature : record.readFeatures)
- if (recordFeature.getOperator() == Padding.operator)
- calculator.addValue(((Padding) recordFeature).getLength());
-
- final Encoding<Integer> bestEncoding = calculator.getBestEncoding();
- header.encodingMap.put(EncodingKey.PD_padding, new EncodingParams(bestEncoding.id(), bestEncoding.toByteArray()));
+ final ExternalCompressor rans1 = ExternalCompressor.createRANS(RANS.ORDER.ONE);
+ final int rans1Len = rans1.compress(data).length;
+ // find the best of general purpose codecs:
+ final int minLen = Math.min(gzipLen, Math.min(rans0Len, rans1Len));
+ if (minLen == rans0Len) {
+ return rans0;
+ } else if (minLen == rans1Len) {
+ return rans1;
+ } else {
+ return gzip;
}
+ }
- { // ref skip length
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord record : records)
- if (record.readFeatures != null)
- for (final ReadFeature recordFeature : record.readFeatures)
- if (recordFeature.getOperator() == RefSkip.operator)
- calculator.add(((RefSkip) recordFeature).getLength());
- calculator.calculate();
+ byte[] getDataForTag(final List<CramCompressionRecord> records, final int tagID) {
+ baosForTagValues.reset();
- header.encodingMap.put(EncodingKey.RS_RefSkip, HuffmanIntegerEncoding.toParam(calculator.values, calculator.bitLens));
- }
-
- { // mapping quality score
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord record : records)
- if (!record.isSegmentUnmapped())
- calculator.add(record.mappingQuality);
- calculator.calculate();
+ for (final CramCompressionRecord record : records) {
+ if (record.tags == null) {
+ continue;
+ }
- header.encodingMap.put(EncodingKey.MQ_MappingQualityScore,
- HuffmanIntegerEncoding.toParam(calculator.values(), calculator.bitLens));
+ for (final ReadTag tag : record.tags) {
+ if (tag.keyType3BytesAsInt != tagID) {
+ continue;
+ }
+ final byte[] valueBytes = tag.getValueAsByteArray();
+ try {
+ baosForTagValues.write(valueBytes);
+ } catch (final IOException e) {
+ throw new RuntimeIOException(e);
+ }
+ }
}
- { // mate bit flags
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord record : records)
- calculator.add(record.getMateFlags());
- calculator.calculate();
+ return baosForTagValues.toByteArray();
+ }
- header.encodingMap.put(EncodingKey.MF_MateBitFlags,
- HuffmanIntegerEncoding.toParam(calculator.values, calculator.bitLens));
- }
+ static ByteSizeRange geByteSizeRangeOfTagValues(final List<CramCompressionRecord> records, final int tagID) {
+ final byte type = getTagType(tagID);
+ final ByteSizeRange stats = new ByteSizeRange();
+ for (final CramCompressionRecord record : records) {
+ if (record.tags == null) {
+ continue;
+ }
- { // next fragment ref id:
- final HuffmanParamsCalculator calculator = new HuffmanParamsCalculator();
- for (final CramCompressionRecord record : records)
- if (record.isDetached())
- calculator.add(record.mateSequenceID);
- calculator.calculate();
-
- if (calculator.values.length == 0)
- header.encodingMap.put(EncodingKey.NS_NextFragmentReferenceSequenceID, NullEncoding.toParam());
-
- header.encodingMap.put(EncodingKey.NS_NextFragmentReferenceSequenceID,
- HuffmanIntegerEncoding.toParam(calculator.values(),
- calculator.bitLens()));
- log.debug("NS: "
- + header.encodingMap.get(EncodingKey.NS_NextFragmentReferenceSequenceID));
+ for (final ReadTag tag : record.tags) {
+ if (tag.keyType3BytesAsInt != tagID) {
+ continue;
+ }
+ final int size = getTagValueByteSize(type, tag.getValue());
+ if (stats.min > size)
+ stats.min = size;
+ if (stats.max < size)
+ stats.max = size;
+ }
}
+ return stats;
+ }
- { // next fragment alignment start
- header.encodingMap.put(EncodingKey.NP_NextFragmentAlignmentStart, ExternalIntegerEncoding.toParam(mateInfoID));
+ /**
+ * Find a byte value never mentioned in the array
+ * @param array bytes
+ * @return byte value or -1 if the array contains all possible byte values.
+ */
+ static int getUnusedByte(final byte[] array) {
+ final byte[] usage = new byte[BYTE_SPACE_SIZE];
+ for (final byte b : array) {
+ usage[0xFF & b] = 1;
}
- { // template size
- header.encodingMap.put(EncodingKey.TS_InsetSize, ExternalIntegerEncoding.toParam(mateInfoID));
+ for (int i = 0; i < usage.length; i++) {
+ if (usage[i] == 0)
+ return i;
}
-
- return header;
+ return ALL_BYTES_USED;
}
- private static int getValue(final EncodingKey key, final CramCompressionRecord record) {
- switch (key) {
- case AP_AlignmentPositionOffset:
- return record.alignmentDelta;
- case BF_BitFlags:
- return record.flags;
- case CF_CompressionBitFlags:
- return record.compressionFlags;
- case FN_NumberOfReadFeatures:
- return record.readFeatures == null ? 0 : record.readFeatures.size();
- case MF_MateBitFlags:
- return record.mateFlags;
- case MQ_MappingQualityScore:
- return record.mappingQuality;
- case NF_RecordsToNextFragment:
- return record.recordsToNextFragment;
- case NP_NextFragmentAlignmentStart:
- return record.mateAlignmentStart;
- case NS_NextFragmentReferenceSequenceID:
- return record.mateSequenceID;
- case RG_ReadGroup:
- return record.readGroupID;
- case RI_RefId:
- return record.sequenceId;
- case RL_ReadLength:
- return record.readLength;
- case TC_TagCount:
- return record.tags == null ? 0 : record.tags.length;
-
- default:
- throw new RuntimeException("Unexpected encoding key: " + key.name());
- }
+ static class ByteSizeRange {
+ int min = Integer.MAX_VALUE, max = Integer.MIN_VALUE;
}
- private static void getOptimalIntegerEncoding(final CompressionHeader header, final EncodingKey key, final int minValue,
- final List<CramCompressionRecord> records) {
- final IntegerEncodingCalculator calc = new IntegerEncodingCalculator(key.name(), minValue);
- for (final CramCompressionRecord record : records) {
- final int value = getValue(key, record);
- calc.addValue(value);
- }
-
- final Encoding<Integer> bestEncoding = calc.getBestEncoding();
- header.encodingMap.put(key, new EncodingParams(bestEncoding.id(), bestEncoding.toByteArray()));
+ /**
+ * A combination of external compressor and encoding params. This is all
+ * that is needed to encode a data series.
+ */
+ private static class EncodingDetails {
+ ExternalCompressor compressor;
+ EncodingParams params;
}
- private static class BitCode implements Comparable<BitCode> {
- final int value;
- final int length;
+ /**
+ * Build an encoding for a specific tag for given records.
+ *
+ * @param records
+ * CRAM records holding the tags
+ * @param tagID
+ * an integer id of the tag
+ * @return an encoding for the tag
+ */
+ private EncodingDetails buildEncodingForTag(final List<CramCompressionRecord> records, final int tagID) {
+ final EncodingDetails details = new EncodingDetails();
+ final byte[] data = getDataForTag(records, tagID);
+
+ details.compressor = getBestExternalCompressor(data);
+
+ final byte type = getTagType(tagID);
+ switch (type) {
+ case 'A':
+ case 'c':
+ case 'C':
+ details.params = ByteArrayLenEncoding.toParam(
+ HuffmanIntegerEncoding.toParam(new int[] { 1 }, new int[] { 0 }),
+ ExternalByteEncoding.toParam(tagID));
+ return details;
+ case 'I':
+ case 'i':
+ case 'f':
+ details.params = ByteArrayLenEncoding.toParam(
+ HuffmanIntegerEncoding.toParam(new int[] { 4 }, new int[] { 0 }),
+ ExternalByteEncoding.toParam(tagID));
+ return details;
+
+ case 's':
+ case 'S':
+ details.params = ByteArrayLenEncoding.toParam(
+ HuffmanIntegerEncoding.toParam(new int[] { 2 }, new int[] { 0 }),
+ ExternalByteEncoding.toParam(tagID));
+ return details;
+ case 'Z':
+ case 'B':
+ final ByteSizeRange stats = geByteSizeRangeOfTagValues(records, tagID);
+ final boolean singleSize = stats.min == stats.max;
+ if (singleSize) {
+ details.params = ByteArrayLenEncoding.toParam(
+ HuffmanIntegerEncoding.toParam(new int[] { stats.min }, new int[] { 0 }),
+ ExternalByteEncoding.toParam(tagID));
+ return details;
+ }
- public BitCode(final int value, final int length) {
- this.value = value;
- this.length = length;
- }
+ if (type == 'Z') {
+ details.params = ByteArrayStopEncoding.toParam((byte) '\t', tagID);
+ return details;
+ }
+
+ final int minSize_threshold_ForByteArrayStopEncoding = 100;
+ if (stats.min > minSize_threshold_ForByteArrayStopEncoding) {
+ final int unusedByte = getUnusedByte(data);
+ if (unusedByte > ALL_BYTES_USED) {
+ details.params = ByteArrayStopEncoding.toParam((byte) unusedByte, tagID);
+ return details;
+ }
+ }
- @Override
- public int compareTo(@SuppressWarnings("NullableProblems") final BitCode o) {
- final int result = value - o.value;
- if (result != 0)
- return result;
- return length - o.length;
+ details.params = ByteArrayLenEncoding.toParam(ExternalIntegerEncoding.toParam(tagID),
+ ExternalByteEncoding.toParam(tagID));
+ return details;
+ default:
+ throw new IllegalArgumentException("Unknown tag type: " + (char) type);
}
}
- public static class HuffmanParamsCalculator {
- private final HashMap<Integer, MutableInt> countMap = new HashMap<Integer, MutableInt>();
- private int[] values = new int[]{};
- private int[] bitLens = new int[]{};
-
- public void add(final int value) {
- MutableInt counter = countMap.get(value);
- if (counter == null) {
- counter = new MutableInt();
- countMap.put(value, counter);
- }
- counter.value++;
- }
+ /**
+ * A helper class to build
+ * {@link htsjdk.samtools.cram.structure.CompressionHeader} object.
+ */
+ private static class CompressionHeaderBuilder {
+ private final CompressionHeader header;
- public void add(final Integer value, final int inc) {
- MutableInt counter = countMap.get(value);
- if (counter == null) {
- counter = new MutableInt();
- countMap.put(value, counter);
- }
- counter.value += inc;
- }
+ CompressionHeaderBuilder(final boolean sorted) {
+ header = new CompressionHeader();
+ header.externalIds = new ArrayList<>();
+ header.tMap = new TreeMap<>();
- public int[] bitLens() {
- return bitLens;
+ header.encodingMap = new TreeMap<>();
+ header.APDelta = sorted;
}
- public int[] values() {
- return values;
+ CompressionHeader getHeader() {
+ return header;
}
- public Integer[] valuesAsAutoIntegers() {
- final Integer[] intValues = new Integer[values.length];
- for (int i = 0; i < intValues.length; i++)
- intValues[i] = values[i];
-
- return intValues;
+ void addExternalEncoding(final EncodingKey encodingKey, final EncodingParams params,
+ final ExternalCompressor compressor) {
+ header.externalIds.add(encodingKey.ordinal());
+ header.externalCompressors.put(encodingKey.ordinal(), compressor);
+ header.encodingMap.put(encodingKey, params);
}
- public byte[] valuesAsBytes() {
- final byte[] byteValues = new byte[values.length];
- for (int i = 0; i < byteValues.length; i++)
- byteValues[i] = (byte) (0xFF & values[i]);
-
- return byteValues;
+ void addExternalByteArrayStopTabGzipEncoding(final EncodingKey encodingKey) {
+ addExternalEncoding(encodingKey, ByteArrayStopEncoding.toParam((byte) '\t', encodingKey.ordinal()),
+ ExternalCompressor.createGZIP());
}
- public Byte[] valuesAsAutoBytes() {
- final Byte[] byteValues = new Byte[values.length];
- for (int i = 0; i < byteValues.length; i++)
- byteValues[i] = (byte) (0xFF & values[i]);
-
- return byteValues;
+ void addExternalIntegerEncoding(final EncodingKey encodingKey, final ExternalCompressor compressor) {
+ addExternalEncoding(encodingKey, ExternalIntegerEncoding.toParam(encodingKey.ordinal()), compressor);
}
- public void calculate() {
- final HuffmanTree<Integer> tree;
- {
- final int size = countMap.size();
- final int[] frequencies = new int[size];
- final int[] values = new int[size];
-
- int i = 0;
- for (final Integer key : countMap.keySet()) {
- values[i] = key;
- frequencies[i] = countMap.get(key).value;
- i++;
- }
- tree = HuffmanCode.buildTree(frequencies, autobox(values));
- }
-
- final List<Integer> valueList = new ArrayList<Integer>();
- final List<Integer> lens = new ArrayList<Integer>();
- HuffmanCode.getValuesAndBitLengths(valueList, lens, tree);
-
- // the following sorting is not really required, but whatever:
- final BitCode[] codes = new BitCode[valueList.size()];
- for (int i = 0; i < valueList.size(); i++) {
- codes[i] = new BitCode(valueList.get(i), lens.get(i));
- }
- Arrays.sort(codes);
-
- values = new int[codes.length];
- bitLens = new int[codes.length];
-
- for (int i = 0; i < codes.length; i++) {
- final BitCode code = codes[i];
- bitLens[i] = code.length;
- values[i] = code.value;
- }
+ void addExternalIntegerGzipEncoding(final EncodingKey encodingKey) {
+ addExternalEncoding(encodingKey, ExternalIntegerEncoding.toParam(encodingKey.ordinal()),
+ ExternalCompressor.createGZIP());
}
- }
-
- private static Integer[] autobox(final int[] array) {
- final Integer[] newArray = new Integer[array.length];
- for (int i = 0; i < array.length; i++)
- newArray[i] = array[i];
- return newArray;
- }
-
- public static class EncodingLengthCalculator {
- private final BitCodec<Integer> codec;
- private final Encoding<Integer> encoding;
- private long length;
- public EncodingLengthCalculator(final Encoding<Integer> encoding) {
- this.encoding = encoding;
- codec = encoding.buildCodec(null, null);
+ void addExternalByteGzipEncoding(final EncodingKey encodingKey) {
+ addExternalEncoding(encodingKey, ExternalByteEncoding.toParam(encodingKey.ordinal()),
+ ExternalCompressor.createGZIP());
}
- public void add(final int value) {
- length += codec.numberOfBits(value);
+ void addExternalByteRansOrderOneEncoding(final EncodingKey encodingKey) {
+ addExternalEncoding(encodingKey, ExternalByteEncoding.toParam(encodingKey.ordinal()),
+ ExternalCompressor.createRANS(RANS.ORDER.ONE));
}
- public void add(final int value, final int inc) {
- length += inc * codec.numberOfBits(value);
+ void addExternalIntegerRansOrderOneEncoding(final EncodingKey encodingKey) {
+ addExternalIntegerEncoding(encodingKey, ExternalCompressor.createRANS(RANS.ORDER.ONE));
}
- public long length() {
- return length;
+ void addExternalIntegerRansOrderZeroEncoding(final EncodingKey encodingKey) {
+ addExternalIntegerEncoding(encodingKey, ExternalCompressor.createRANS(RANS.ORDER.ZERO));
}
- }
- public static class IntegerEncodingCalculator {
- public final List<EncodingLengthCalculator> calculators = new ArrayList<EncodingLengthCalculator>();
- private int max = 0;
- private int count = 0;
- private final String name;
- private HashMap<Integer, MutableInt> dictionary = new HashMap<Integer, MutableInt>();
- private final int dictionaryThreshold = 100;
- private final int minValue;
-
- public IntegerEncodingCalculator(final String name, final int dictionaryThreshold, final int minValue) {
- this.name = name;
- this.minValue = minValue;
- // for (int i = 2; i < 10; i++)
- // calculators.add(new EncodingLengthCalculator(
- // new GolombIntegerEncoding(i)));
- //
- // for (int i = 2; i < 20; i++)
- // calculators.add(new EncodingLengthCalculator(
- // new GolombRiceIntegerEncoding(i)));
-
- calculators.add(new EncodingLengthCalculator(new GammaIntegerEncoding(1 - minValue)));
-
- for (int i = 2; i < 5; i++)
- calculators.add(new EncodingLengthCalculator(new SubexponentialIntegerEncoding(0 - minValue, i)));
-
- if (dictionaryThreshold < 1)
- dictionary = null;
- else {
- dictionary = new HashMap<Integer, MutableInt>();
- // int pow = (int) Math.ceil(Math.log(dictionaryThreshold)
- // / Math.log(2f));
- // dictionaryThreshold = 1 << pow ;
- // dictionary = new HashMap<Integer,
- // MutableInt>(dictionaryThreshold, 1);
- }
+ void addTagEncoding(final int tagId, final EncodingDetails encodingDetails) {
+ header.externalIds.add(tagId);
+ header.externalCompressors.put(tagId, encodingDetails.compressor);
+ header.tMap.put(tagId, encodingDetails.params);
}
- public IntegerEncodingCalculator(final String name, final int minValue) {
- this(name, 255, minValue);
+ void setTagIdDictionary(final byte[][][] dictionary) {
+ header.dictionary = dictionary;
}
- public void addValue(final int value) {
- count++;
- if (value > max)
- max = value;
-
- for (final EncodingLengthCalculator calculator : calculators)
- calculator.add(value);
-
- if (dictionary != null) {
- if (dictionary.size() >= dictionaryThreshold - 1)
- dictionary = null;
- else {
- MutableInt mutableInt = dictionary.get(value);
- if (mutableInt == null) {
- mutableInt = new MutableInt();
- dictionary.put(value, mutableInt);
- }
- mutableInt.value++;
- }
-
- }
-
+ void setSubstitutionMatrix(final SubstitutionMatrix substitutionMatrix) {
+ header.substitutionMatrix = substitutionMatrix;
}
+ }
- public Encoding<Integer> getBestEncoding() {
- if (dictionary != null && dictionary.size() == 1) {
- final int value = dictionary.keySet().iterator().next();
- final EncodingParams param = HuffmanIntegerEncoding.toParam(new int[]{value}, new int[]{0});
- final HuffmanIntegerEncoding huffmanEncoding = new HuffmanIntegerEncoding();
- huffmanEncoding.fromByteArray(param.params);
- return huffmanEncoding;
- }
-
- EncodingLengthCalculator bestCalculator = calculators.get(0);
-
- for (final EncodingLengthCalculator calculator : calculators) {
- if (calculator.length() < bestCalculator.length())
- bestCalculator = calculator;
- }
-
- Encoding<Integer> bestEncoding = bestCalculator.encoding;
- long bits = bestCalculator.length();
-
- { // check if beta is better:
-
- final int betaLength = (int) Math.round(Math.log(max - minValue) / Math.log(2) + 0.5);
- if (bits > betaLength * count) {
- bestEncoding = new BetaIntegerEncoding(-minValue, betaLength);
- bits = betaLength * count;
- }
- }
-
- { // try huffman:
- if (dictionary != null) {
- final HuffmanParamsCalculator huffmanParamsCalculator = new HuffmanParamsCalculator();
- for (final Integer value : dictionary.keySet())
- huffmanParamsCalculator.add(value, dictionary.get(value).value);
-
- huffmanParamsCalculator.calculate();
-
- final EncodingParams param = HuffmanIntegerEncoding.toParam(huffmanParamsCalculator.values(), huffmanParamsCalculator.bitLens());
- final HuffmanIntegerEncoding huffmanEncoding = new HuffmanIntegerEncoding();
- huffmanEncoding.fromByteArray(param.params);
- final EncodingLengthCalculator calculator = new EncodingLengthCalculator(huffmanEncoding);
- for (final Integer key : dictionary.keySet())
- calculator.add(key, dictionary.get(key).value);
-
- if (calculator.length() < bits) {
- bestEncoding = huffmanEncoding;
- bits = calculator.length();
- }
- }
- }
-
- byte[] params = bestEncoding.toByteArray();
- params = Arrays.copyOf(params, Math.min(params.length, 20));
- log.debug("Best encoding for " + name + ": " + bestEncoding.id().name() + Arrays.toString(params) + ", bits=" + bits);
-
- return bestEncoding;
+ /**
+ * Calculate byte size of a tag value based on it's type and value class
+ * @param type tag type, like 'A' or 'i'
+ * @param value object representing the tag value
+ * @return number of bytes used for the tag value
+ */
+ static int getTagValueByteSize(final byte type, final Object value) {
+ switch (type) {
+ case 'A':
+ return 1;
+ case 'I':
+ return 4;
+ case 'i':
+ return 4;
+ case 's':
+ return 2;
+ case 'S':
+ return 2;
+ case 'c':
+ return 1;
+ case 'C':
+ return 1;
+ case 'f':
+ return 4;
+ case 'Z':
+ return ((String) value).length()+1;
+ case 'B':
+ if (value instanceof byte[])
+ return 1+ 4+ ((byte[]) value).length;
+ if (value instanceof short[])
+ return 1+ 4+ ((short[]) value).length * 2;
+ if (value instanceof int[])
+ return 1+ 4+ ((int[]) value).length * 4;
+ if (value instanceof float[])
+ return 1+ 4+ ((float[]) value).length * 4;
+ if (value instanceof long[])
+ return 1+ 4+ ((long[]) value).length * 4;
+
+ throw new RuntimeException("Unknown tag array class: " + value.getClass());
+ default:
+ throw new RuntimeException("Unknown tag type: " + (char) type);
}
}
}
diff --git a/src/java/htsjdk/samtools/cram/build/ContainerFactory.java b/src/java/htsjdk/samtools/cram/build/ContainerFactory.java
index b96dd13..1c39ed2 100644
--- a/src/java/htsjdk/samtools/cram/build/ContainerFactory.java
+++ b/src/java/htsjdk/samtools/cram/build/ContainerFactory.java
@@ -64,11 +64,10 @@ public class ContainerFactory {
final long time1 = System.nanoTime();
final CompressionHeader header = new CompressionHeaderFactory().build(records,
substitutionMatrix, samFileHeader.getSortOrder() == SAMFileHeader.SortOrder.coordinate);
- header.APDelta = true;
+ header.APDelta = samFileHeader.getSortOrder() == SAMFileHeader.SortOrder.coordinate;
final long time2 = System.nanoTime();
header.readNamesIncluded = preserveReadNames;
- header.APDelta = true;
final List<Slice> slices = new ArrayList<Slice>();
@@ -149,28 +148,16 @@ public class ContainerFactory {
* 3) Detect alignment boundaries for the slice if not multi reference.
*/
// @formatter:on
- slice.sequenceId = Slice.UNMAPPED_OR_NO_REFERENCE;
+ slice.sequenceId = records.get(0).sequenceId;
final ContentDigests hasher = ContentDigests.create(ContentDigests.ALL);
for (final CramCompressionRecord record : records) {
slice.bases += record.readLength;
hasher.add(record);
+ if (slice.sequenceId == Slice.MULTI_REFERENCE) continue;
- if (slice.sequenceId != Slice.MULTI_REFERENCE
- && record.alignmentStart != SAMRecord.NO_ALIGNMENT_START
- && record.sequenceId != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
- switch (slice.sequenceId) {
- case Slice.UNMAPPED_OR_NO_REFERENCE:
- slice.sequenceId = record.sequenceId;
- break;
- case Slice.MULTI_REFERENCE:
- break;
-
- default:
- if (slice.sequenceId != record.sequenceId)
- slice.sequenceId = Slice.UNMAPPED_OR_NO_REFERENCE;
- break;
- }
-
+ if (slice.sequenceId != record.sequenceId) {
+ slice.sequenceId = Slice.MULTI_REFERENCE;
+ } else if (record.alignmentStart != SAMRecord.NO_ALIGNMENT_START) {
minAlStart = Math.min(record.alignmentStart, minAlStart);
maxAlEnd = Math.max(record.getAlignmentEnd(), maxAlEnd);
}
@@ -181,8 +168,8 @@ public class ContainerFactory {
if (slice.sequenceId == Slice.MULTI_REFERENCE
|| minAlStart == Integer.MAX_VALUE) {
- slice.alignmentStart = SAMRecord.NO_ALIGNMENT_START;
- slice.alignmentSpan = 0;
+ slice.alignmentStart = Slice.NO_ALIGNMENT_START;
+ slice.alignmentSpan = Slice.NO_ALIGNMENT_SPAN;
} else {
slice.alignmentStart = minAlStart;
slice.alignmentSpan = maxAlEnd - minAlStart + 1;
diff --git a/src/java/htsjdk/samtools/cram/build/ContainerParser.java b/src/java/htsjdk/samtools/cram/build/ContainerParser.java
index 002502e..050fce7 100644
--- a/src/java/htsjdk/samtools/cram/build/ContainerParser.java
+++ b/src/java/htsjdk/samtools/cram/build/ContainerParser.java
@@ -21,9 +21,11 @@ import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.ValidationStringency;
+import htsjdk.samtools.cram.structure.AlignmentSpan;
import htsjdk.samtools.cram.encoding.reader.CramRecordReader;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory;
import htsjdk.samtools.cram.encoding.reader.DataReaderFactory.DataReaderWithStats;
+import htsjdk.samtools.cram.encoding.reader.RefSeqIdReader;
import htsjdk.samtools.cram.io.DefaultBitInputStream;
import htsjdk.samtools.cram.structure.CompressionHeader;
import htsjdk.samtools.cram.structure.Container;
@@ -33,8 +35,10 @@ import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.Log.LogLevel;
import java.io.ByteArrayInputStream;
+import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -51,14 +55,17 @@ public class ContainerParser {
}
public List<CramCompressionRecord> getRecords(final Container container,
- ArrayList<CramCompressionRecord> records, ValidationStringency validationStringency) throws IllegalArgumentException,
+ ArrayList<CramCompressionRecord> records, final ValidationStringency validationStringency) throws IllegalArgumentException,
IllegalAccessException {
+ if (container.isEOF()) return Collections.emptyList();
final long time1 = System.nanoTime();
- if (records == null)
+ if (records == null) {
records = new ArrayList<CramCompressionRecord>(container.nofRecords);
+ }
- for (final Slice slice : container.slices)
+ for (final Slice slice : container.slices) {
records.addAll(getRecords(slice, container.header, validationStringency));
+ }
final long time2 = System.nanoTime();
@@ -73,8 +80,62 @@ public class ContainerParser {
return records;
}
+ public Map<Integer, AlignmentSpan> getReferences(final Container container, final ValidationStringency validationStringency) throws IOException {
+ final Map<Integer, AlignmentSpan> containerSpanMap = new HashMap<>();
+ for (final Slice slice : container.slices) {
+ addAllSpans(containerSpanMap, getReferences(slice, container.header, validationStringency));
+ }
+ return containerSpanMap;
+ }
+
+ private static void addSpan(final int seqId, final int start, final int span, final int count, final Map<Integer, AlignmentSpan> map) {
+ if (map.containsKey(seqId)) {
+ map.get(seqId).add(start, span, count);
+ } else {
+ map.put(seqId, new AlignmentSpan(start, span, count));
+ }
+ }
+
+ private static Map<Integer, AlignmentSpan> addAllSpans(final Map<Integer, AlignmentSpan> spanMap, final Map<Integer, AlignmentSpan> addition) {
+ for (final Map.Entry<Integer, AlignmentSpan> entry:addition.entrySet()) {
+ addSpan(entry.getKey(), entry.getValue().getStart(), entry.getValue().getCount(), entry.getValue().getSpan(), spanMap);
+ }
+ return spanMap;
+ }
+
+ Map<Integer, AlignmentSpan> getReferences(final Slice slice, final CompressionHeader header, final ValidationStringency validationStringency) throws IOException {
+ final Map<Integer, AlignmentSpan> spanMap = new HashMap<>();
+ switch (slice.sequenceId) {
+ case SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX:
+ spanMap.put(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, AlignmentSpan.UNMAPPED_SPAN);
+ break;
+ case Slice.MULTI_REFERENCE:
+ final DataReaderFactory dataReaderFactory = new DataReaderFactory();
+ final Map<Integer, InputStream> inputMap = new HashMap<Integer, InputStream>();
+ for (final Integer exId : slice.external.keySet()) {
+ inputMap.put(exId, new ByteArrayInputStream(slice.external.get(exId)
+ .getRawContent()));
+ }
+
+ final RefSeqIdReader reader = new RefSeqIdReader(Slice.MULTI_REFERENCE, slice.alignmentStart, validationStringency);
+ dataReaderFactory.buildReader(reader, new DefaultBitInputStream(
+ new ByteArrayInputStream(slice.coreBlock.getRawContent())),
+ inputMap, header, slice.sequenceId);
+
+ for (int i = 0; i < slice.nofRecords; i++) {
+ reader.read();
+ }
+ addAllSpans(spanMap, reader.getReferenceSpans());
+ break;
+ default:
+ addSpan(slice.sequenceId, slice.alignmentStart, slice.alignmentSpan, slice.nofRecords, spanMap);
+ break;
+ }
+ return spanMap;
+ }
+
ArrayList<CramCompressionRecord> getRecords(ArrayList<CramCompressionRecord> records,
- final Slice slice, final CompressionHeader header, ValidationStringency validationStringency) throws IllegalArgumentException,
+ final Slice slice, final CompressionHeader header, final ValidationStringency validationStringency) throws IllegalArgumentException,
IllegalAccessException {
String seqName = SAMRecord.NO_ALIGNMENT_REFERENCE_NAME;
switch (slice.sequenceId) {
@@ -103,8 +164,9 @@ public class ContainerParser {
new ByteArrayInputStream(slice.coreBlock.getRawContent())),
inputMap, header, slice.sequenceId);
- if (records == null)
+ if (records == null) {
records = new ArrayList<CramCompressionRecord>(slice.nofRecords);
+ }
long readNanos = 0;
int prevStart = slice.alignmentStart;
@@ -121,9 +183,9 @@ public class ContainerParser {
record.sequenceName = seqName;
record.sequenceId = slice.sequenceId;
} else {
- if (record.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX)
+ if (record.sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
record.sequenceName = SAMRecord.NO_ALIGNMENT_REFERENCE_NAME;
- else {
+ } else {
record.sequenceName = samFileHeader.getSequence(record.sequenceId)
.getSequenceName();
}
@@ -144,14 +206,15 @@ public class ContainerParser {
if (!nanosecondsMap.containsKey(key)) {
nanosecondsMap.put(key, 0L);
value = 0;
- } else
+ } else {
value = nanosecondsMap.get(key);
+ }
nanosecondsMap.put(key, value + statMap.get(key).nanos);
}
return records;
}
- List<CramCompressionRecord> getRecords(final Slice slice, final CompressionHeader header, ValidationStringency validationStringency)
+ List<CramCompressionRecord> getRecords(final Slice slice, final CompressionHeader header, final ValidationStringency validationStringency)
throws IllegalArgumentException, IllegalAccessException {
return getRecords(null, slice, header, validationStringency);
}
diff --git a/src/java/htsjdk/samtools/cram/build/CramIO.java b/src/java/htsjdk/samtools/cram/build/CramIO.java
index 4a08016..04ac339 100644
--- a/src/java/htsjdk/samtools/cram/build/CramIO.java
+++ b/src/java/htsjdk/samtools/cram/build/CramIO.java
@@ -33,6 +33,7 @@ import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.seekablestream.SeekableStream;
import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.RuntimeIOException;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
@@ -101,6 +102,25 @@ public class CramIO {
return 0;
}
+ /**
+ * Write a CRAM File header and a SAM Header to an output stream.
+ *
+ * @param cramVersion
+ * @param outStream
+ * @param samFileHeader
+ * @param cramID
+ * @return the offset in the stream after writing the headers
+ */
+
+ public static long writeHeader(final Version cramVersion, final OutputStream outStream, final SAMFileHeader samFileHeader, String cramID) {
+ final CramHeader cramHeader = new CramHeader(cramVersion, cramID, samFileHeader);
+ try {
+ return CramIO.writeCramHeader(cramHeader, outStream);
+ } catch (final IOException e) {
+ throw new RuntimeIOException(e);
+ }
+ }
+
private static boolean streamEndsWith(final SeekableStream seekableStream, final byte[] marker) throws IOException {
final byte[] tail = new byte[marker.length];
@@ -235,8 +255,8 @@ public class CramIO {
container.blocks = new Block[]{block};
container.landmarks = new int[0];
container.slices = new Slice[0];
- container.alignmentSpan = 0;
- container.alignmentStart = 0;
+ container.alignmentSpan = Slice.NO_ALIGNMENT_SPAN;
+ container.alignmentStart = Slice.NO_ALIGNMENT_START;
container.bases = 0;
container.globalRecordCounter = 0;
container.nofRecords = 0;
diff --git a/src/java/htsjdk/samtools/cram/build/CramNormalizer.java b/src/java/htsjdk/samtools/cram/build/CramNormalizer.java
index a36a995..111f271 100644
--- a/src/java/htsjdk/samtools/cram/build/CramNormalizer.java
+++ b/src/java/htsjdk/samtools/cram/build/CramNormalizer.java
@@ -49,6 +49,9 @@ public class CramNormalizer {
}
public CramNormalizer(final SAMFileHeader header, final ReferenceSource referenceSource) {
+ if (referenceSource == null) {
+ throw new IllegalArgumentException("A reference is required.");
+ }
this.header = header;
this.referenceSource = referenceSource;
}
diff --git a/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java b/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java
index f840a5f..b7ffcb1 100644
--- a/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java
+++ b/src/java/htsjdk/samtools/cram/build/Sam2CramRecordFactory.java
@@ -77,8 +77,6 @@ public class Sam2CramRecordFactory {
public final Set<String> ignoreTags = new TreeSet<String>();
{
- ignoreTags.add(SAMTag.NM.name());
- ignoreTags.add(SAMTag.MD.name());
ignoreTags.add(SAMTag.RG.name());
}
diff --git a/src/java/htsjdk/samtools/cram/common/CramVersions.java b/src/java/htsjdk/samtools/cram/common/CramVersions.java
index 913c2d4..5e1b726 100644
--- a/src/java/htsjdk/samtools/cram/common/CramVersions.java
+++ b/src/java/htsjdk/samtools/cram/common/CramVersions.java
@@ -1,7 +1,11 @@
package htsjdk.samtools.cram.common;
public class CramVersions {
-
public static final Version CRAM_v2_1 = new Version(2, 1, 0);
public static final Version CRAM_v3 = new Version(3, 0, 0);
+
+ /**
+ * The default CRAM version when creating a new CRAM output file or stream.
+ */
+ public static final Version DEFAULT_CRAM_VERSION = CRAM_v2_1;
}
diff --git a/src/java/htsjdk/samtools/cram/encoding/huffman/codec/CanonicalHuffmanIntegerCodec.java b/src/java/htsjdk/samtools/cram/encoding/huffman/codec/CanonicalHuffmanIntegerCodec.java
index 0eaf4b4..96dfc78 100644
--- a/src/java/htsjdk/samtools/cram/encoding/huffman/codec/CanonicalHuffmanIntegerCodec.java
+++ b/src/java/htsjdk/samtools/cram/encoding/huffman/codec/CanonicalHuffmanIntegerCodec.java
@@ -48,12 +48,8 @@ class CanonicalHuffmanIntegerCodec extends AbstractBitCodec<Integer> {
@Override
public long numberOfBits(final Integer object) {
final HuffmanBitCode bitCode;
- try {
- bitCode = helper.codes.get(object);
- return bitCode.bitLength;
- } catch (final NullPointerException e) {
- throw new RuntimeException("Value " + object + " not found.", e);
- }
+ bitCode = helper.codes.get(object);
+ return bitCode.bitLength;
}
@Override
diff --git a/src/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanParamsCalculator.java b/src/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanParamsCalculator.java
new file mode 100644
index 0000000..c489a73
--- /dev/null
+++ b/src/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanParamsCalculator.java
@@ -0,0 +1,137 @@
+package htsjdk.samtools.cram.encoding.huffman.codec;
+
+import htsjdk.samtools.cram.common.MutableInt;
+import htsjdk.samtools.cram.encoding.huffman.HuffmanCode;
+import htsjdk.samtools.cram.encoding.huffman.HuffmanTree;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+
+/**
+ * A utility class to calculate Huffman encoding parameters based on the values to be encoded.
+ */
+class HuffmanParamsCalculator {
+ private final HashMap<Integer, MutableInt> countMap = new HashMap<>();
+ private int[] values = new int[]{};
+ private int[] bitLens = new int[]{};
+
+ public void add(final int value) {
+ MutableInt counter = countMap.get(value);
+ if (counter == null) {
+ counter = new MutableInt();
+ countMap.put(value, counter);
+ }
+ counter.value++;
+ }
+
+ public void add(final Integer value, final int inc) {
+ MutableInt counter = countMap.get(value);
+ if (counter == null) {
+ counter = new MutableInt();
+ countMap.put(value, counter);
+ }
+ counter.value += inc;
+ }
+
+ public int[] bitLens() {
+ return bitLens;
+ }
+
+ public int[] values() {
+ return values;
+ }
+
+ public Integer[] valuesAsAutoIntegers() {
+ final Integer[] intValues = new Integer[values.length];
+ for (int i = 0; i < intValues.length; i++) {
+ intValues[i] = values[i];
+ }
+
+ return intValues;
+ }
+
+ public byte[] valuesAsBytes() {
+ final byte[] byteValues = new byte[values.length];
+ for (int i = 0; i < byteValues.length; i++) {
+ byteValues[i] = (byte) (0xFF & values[i]);
+ }
+
+ return byteValues;
+ }
+
+ public Byte[] valuesAsAutoBytes() {
+ final Byte[] byteValues = new Byte[values.length];
+ for (int i = 0; i < byteValues.length; i++) {
+ byteValues[i] = (byte) (0xFF & values[i]);
+ }
+
+ return byteValues;
+ }
+
+ public void calculate() {
+ final HuffmanTree<Integer> tree;
+ {
+ final int size = countMap.size();
+ final int[] frequencies = new int[size];
+ final int[] values = new int[size];
+
+ int i = 0;
+ for (final Integer key : countMap.keySet()) {
+ values[i] = key;
+ frequencies[i] = countMap.get(key).value;
+ i++;
+ }
+ tree = HuffmanCode.buildTree(frequencies, autobox(values));
+ }
+
+ final List<Integer> valueList = new ArrayList<Integer>();
+ final List<Integer> lens = new ArrayList<Integer>();
+ HuffmanCode.getValuesAndBitLengths(valueList, lens, tree);
+
+ // the following sorting is not really required, but whatever:
+ final BitCode[] codes = new BitCode[valueList.size()];
+ for (int i = 0; i < valueList.size(); i++) {
+ codes[i] = new BitCode(valueList.get(i), lens.get(i));
+ }
+ Arrays.sort(codes);
+
+ values = new int[codes.length];
+ bitLens = new int[codes.length];
+
+ for (int i = 0; i < codes.length; i++) {
+ final BitCode code = codes[i];
+ bitLens[i] = code.length;
+ values[i] = code.value;
+ }
+ }
+
+ private static Integer[] autobox(final int[] array) {
+ final Integer[] newArray = new Integer[array.length];
+ for (int i = 0; i < array.length; i++) {
+ newArray[i] = array[i];
+ }
+ return newArray;
+ }
+
+ private static class BitCode implements Comparable<BitCode> {
+ final int value;
+ final int length;
+
+ public BitCode(final int value, final int length) {
+ this.value = value;
+ this.length = length;
+ }
+
+ @Override
+ public int compareTo(@SuppressWarnings("NullableProblems") final BitCode o) {
+ final int result = value - o.value;
+ if (result != 0) {
+ return result;
+ }
+ return length - o.length;
+ }
+ }
+
+}
diff --git a/src/java/htsjdk/samtools/cram/encoding/reader/DataReaderFactory.java b/src/java/htsjdk/samtools/cram/encoding/reader/DataReaderFactory.java
index 106eaf4..4e5c4ec 100644
--- a/src/java/htsjdk/samtools/cram/encoding/reader/DataReaderFactory.java
+++ b/src/java/htsjdk/samtools/cram/encoding/reader/DataReaderFactory.java
@@ -17,6 +17,7 @@
*/
package htsjdk.samtools.cram.encoding.reader;
+import htsjdk.samtools.cram.CRAMException;
import htsjdk.samtools.cram.common.IntHashMap;
import htsjdk.samtools.cram.encoding.BitCodec;
import htsjdk.samtools.cram.encoding.DataSeries;
@@ -30,6 +31,7 @@ import htsjdk.samtools.cram.structure.EncodingID;
import htsjdk.samtools.cram.structure.EncodingKey;
import htsjdk.samtools.cram.structure.EncodingParams;
import htsjdk.samtools.cram.structure.ReadTag;
+import htsjdk.samtools.util.Log;
import java.io.IOException;
import java.io.InputStream;
@@ -37,15 +39,14 @@ import java.lang.reflect.Field;
import java.util.Map;
import java.util.TreeMap;
- at SuppressWarnings("unchecked")
public class DataReaderFactory {
+ private static Log log = Log.getInstance(DataReaderFactory.class);
private final static boolean collectStats = false;
public AbstractReader buildReader(final AbstractReader reader,
final BitInputStream bitInputStream, final Map<Integer, InputStream> inputMap,
- final CompressionHeader header, final int refId) throws IllegalArgumentException,
- IllegalAccessException {
+ final CompressionHeader header, final int refId) throws IllegalArgumentException {
reader.captureReadNames = header.readNamesIncluded;
reader.refId = refId;
reader.APDelta = header.APDelta;
@@ -56,10 +57,15 @@ public class DataReaderFactory {
final EncodingKey key = dataSeries.key();
final DataSeriesType type = dataSeries.type();
if (header.encodingMap.get(key) == null) {
- System.err.println("Encoding not found for key: " + key);
+ log.debug("Encoding not found for key: " + key);
+ } else {
+ try {
+ field.set(reader,
+ createReader(type, header.encodingMap.get(key), bitInputStream, inputMap));
+ } catch (IllegalAccessException e) {
+ throw new CRAMException(e);
+ }
}
- field.set(reader,
- createReader(type, header.encodingMap.get(key), bitInputStream, inputMap));
}
if (field.isAnnotationPresent(DataSeriesMap.class)) {
@@ -74,7 +80,11 @@ public class DataReaderFactory {
inputMap);
map.put(key, tagReader);
}
- field.set(reader, map);
+ try {
+ field.set(reader, map);
+ } catch (IllegalAccessException e) {
+ throw new CRAMException(e);
+ }
}
}
}
diff --git a/src/java/htsjdk/samtools/cram/encoding/reader/RefSeqIdReader.java b/src/java/htsjdk/samtools/cram/encoding/reader/RefSeqIdReader.java
new file mode 100644
index 0000000..1289429
--- /dev/null
+++ b/src/java/htsjdk/samtools/cram/encoding/reader/RefSeqIdReader.java
@@ -0,0 +1,242 @@
+/*******************************************************************************
+ * Copyright 2013 EMBL-EBI
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ ******************************************************************************/
+package htsjdk.samtools.cram.encoding.reader;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.ValidationStringency;
+import htsjdk.samtools.cram.CRAMException;
+import htsjdk.samtools.cram.encoding.readfeatures.BaseQualityScore;
+import htsjdk.samtools.cram.encoding.readfeatures.Bases;
+import htsjdk.samtools.cram.encoding.readfeatures.Deletion;
+import htsjdk.samtools.cram.encoding.readfeatures.HardClip;
+import htsjdk.samtools.cram.encoding.readfeatures.InsertBase;
+import htsjdk.samtools.cram.encoding.readfeatures.Insertion;
+import htsjdk.samtools.cram.encoding.readfeatures.Padding;
+import htsjdk.samtools.cram.encoding.readfeatures.ReadBase;
+import htsjdk.samtools.cram.encoding.readfeatures.ReadFeature;
+import htsjdk.samtools.cram.encoding.readfeatures.RefSkip;
+import htsjdk.samtools.cram.encoding.readfeatures.Scores;
+import htsjdk.samtools.cram.encoding.readfeatures.SoftClip;
+import htsjdk.samtools.cram.encoding.readfeatures.Substitution;
+import htsjdk.samtools.cram.structure.AlignmentSpan;
+import htsjdk.samtools.cram.structure.CramCompressionRecord;
+import htsjdk.samtools.cram.structure.ReadTag;
+import htsjdk.samtools.cram.structure.Slice;
+import htsjdk.samtools.util.RuntimeIOException;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.Map;
+
+/**
+ * A reader that only keeps track of alignment spans. The intended use is for
+ * CRAI index.
+ *
+ * @author vadim
+ *
+ */
+public class RefSeqIdReader extends AbstractReader {
+ /**
+ * Reference sequence id set by default
+ */
+ private final int globalReferenceSequenceId;
+
+ /**
+ * Alignment start to start counting from
+ */
+ private int alignmentStart;
+ private ValidationStringency validationStringency;
+ /**
+ * For diagnostic purposes
+ */
+ private int recordCounter = 0;
+
+ /**
+ * Single record to use for capturing read fields:
+ */
+ private final CramCompressionRecord cramRecord = new CramCompressionRecord();
+
+ /**
+ * Detected sequence spans
+ */
+ private final Map<Integer, AlignmentSpan> spans = new HashMap<>();
+
+ public RefSeqIdReader(final int seqId, final int alignmentStart, ValidationStringency validationStringency) {
+ super();
+ this.globalReferenceSequenceId = seqId;
+ this.alignmentStart = alignmentStart;
+ this.validationStringency = validationStringency;
+ }
+
+ public Map<Integer, AlignmentSpan> getReferenceSpans() {
+ return spans;
+ }
+
+ public void read() {
+ cramRecord.sequenceId = globalReferenceSequenceId;
+ try {
+ cramRecord.flags = bitFlagsCodec.readData();
+ cramRecord.compressionFlags = compressionBitFlagsCodec.readData();
+ if (refId == Slice.MULTI_REFERENCE)
+ cramRecord.sequenceId = refIdCodec.readData();
+ else
+ cramRecord.sequenceId = refId;
+
+ cramRecord.readLength = readLengthCodec.readData();
+ if (APDelta) {
+ cramRecord.alignmentDelta = alignmentStartCodec.readData();
+ alignmentStart += cramRecord.alignmentDelta;
+ }
+ else {
+ cramRecord.alignmentStart = alignmentStartCodec.readData();
+ alignmentStart = cramRecord.alignmentStart;
+ }
+
+ cramRecord.readGroupID = readGroupCodec.readData();
+
+ if (captureReadNames)
+ cramRecord.readName = new String(readNameCodec.readData(), charset);
+
+ // mate record:
+ if (cramRecord.isDetached()) {
+ cramRecord.mateFlags = mateBitFlagCodec.readData();
+ if (!captureReadNames)
+ cramRecord.readName = new String(readNameCodec.readData(), charset);
+
+ cramRecord.mateSequenceID = mateReferenceIdCodec.readData();
+ cramRecord.mateAlignmentStart = mateAlignmentStartCodec.readData();
+ cramRecord.templateSize = insertSizeCodec.readData();
+ detachedCount++;
+ } else if (cramRecord.isHasMateDownStream())
+ cramRecord.recordsToNextFragment = distanceToNextFragmentCodec.readData();
+
+ final Integer tagIdList = tagIdListCodec.readData();
+ final byte[][] ids = tagIdDictionary[tagIdList];
+ if (ids.length > 0) {
+ final int tagCount = ids.length;
+ cramRecord.tags = new ReadTag[tagCount];
+ for (int i = 0; i < ids.length; i++) {
+ final int id = ReadTag.name3BytesToInt(ids[i]);
+ final DataReader<byte[]> dataReader = tagValueCodecs.get(id);
+ final ReadTag tag = new ReadTag(id, dataReader.readData(), validationStringency);
+ cramRecord.tags[i] = tag;
+ }
+ }
+
+ if (!cramRecord.isSegmentUnmapped()) {
+ // reading read features:
+ final int size = numberOfReadFeaturesCodec.readData();
+ int prevPos = 0;
+ final java.util.List<ReadFeature> readFeatures = new LinkedList<>();
+ cramRecord.readFeatures = readFeatures;
+ for (int i = 0; i < size; i++) {
+ final Byte operator = readFeatureCodeCodec.readData();
+
+ final int pos = prevPos + readFeaturePositionCodec.readData();
+ prevPos = pos;
+
+ switch (operator) {
+ case ReadBase.operator:
+ final ReadBase readBase = new ReadBase(pos, baseCodec.readData(), qualityScoreCodec.readData());
+ readFeatures.add(readBase);
+ break;
+ case Substitution.operator:
+ final Substitution substitution = new Substitution();
+ substitution.setPosition(pos);
+ final byte code = baseSubstitutionCodec.readData();
+ substitution.setCode(code);
+ readFeatures.add(substitution);
+ break;
+ case Insertion.operator:
+ final Insertion insertion = new Insertion(pos, insertionCodec.readData());
+ readFeatures.add(insertion);
+ break;
+ case SoftClip.operator:
+ final SoftClip softClip = new SoftClip(pos, softClipCodec.readData());
+ readFeatures.add(softClip);
+ break;
+ case HardClip.operator:
+ final HardClip hardCLip = new HardClip(pos, hardClipCodec.readData());
+ readFeatures.add(hardCLip);
+ break;
+ case Padding.operator:
+ final Padding padding = new Padding(pos, paddingCodec.readData());
+ readFeatures.add(padding);
+ break;
+ case Deletion.operator:
+ final Deletion deletion = new Deletion(pos, deletionLengthCodec.readData());
+ readFeatures.add(deletion);
+ break;
+ case RefSkip.operator:
+ final RefSkip refSkip = new RefSkip(pos, refSkipCodec.readData());
+ readFeatures.add(refSkip);
+ break;
+ case InsertBase.operator:
+ final InsertBase insertBase = new InsertBase(pos, baseCodec.readData());
+ readFeatures.add(insertBase);
+ break;
+ case BaseQualityScore.operator:
+ final BaseQualityScore baseQualityScore = new BaseQualityScore(pos,
+ qualityScoreCodec.readData());
+ readFeatures.add(baseQualityScore);
+ break;
+ case Bases.operator:
+ final Bases bases = new Bases(pos, basesCodec.readData());
+ readFeatures.add(bases);
+ break;
+ case Scores.operator:
+ final Scores scores = new Scores(pos, scoresCodec.readData());
+ readFeatures.add(scores);
+ break;
+ default:
+ throw new RuntimeException("Unknown read feature operator: " + operator);
+ }
+ }
+
+ // mapping quality:
+ cramRecord.mappingQuality = mappingScoreCodec.readData();
+ if (cramRecord.isForcePreserveQualityScores()) {
+ cramRecord.qualityScores = qualityScoresCodec.readDataArray(cramRecord.readLength);
+ }
+ } else {
+ if (cramRecord.isUnknownBases()) {
+ cramRecord.readBases = SAMRecord.NULL_SEQUENCE;
+ cramRecord.qualityScores = SAMRecord.NULL_QUALS;
+ } else {
+ final byte[] bases = new byte[cramRecord.readLength];
+ for (int i = 0; i < bases.length; i++)
+ bases[i] = baseCodec.readData();
+ cramRecord.readBases = bases;
+
+ if (cramRecord.isForcePreserveQualityScores()) {
+ cramRecord.qualityScores = qualityScoresCodec.readDataArray(cramRecord.readLength);
+ }
+ }
+ }
+
+ recordCounter++;
+
+ } catch (final IOException e) {
+ throw new RuntimeIOException(e);
+ }
+
+ if (!spans.containsKey(cramRecord.sequenceId)) {
+ spans.put(cramRecord.sequenceId, new AlignmentSpan(alignmentStart, cramRecord.readLength));
+ } else
+ spans.get(cramRecord.sequenceId).addSingle(alignmentStart, cramRecord.readLength);
+ }
+}
diff --git a/src/java/htsjdk/samtools/cram/encoding/readfeatures/BaseQualityScore.java b/src/java/htsjdk/samtools/cram/encoding/readfeatures/BaseQualityScore.java
index 83da1e4..41a69d2 100644
--- a/src/java/htsjdk/samtools/cram/encoding/readfeatures/BaseQualityScore.java
+++ b/src/java/htsjdk/samtools/cram/encoding/readfeatures/BaseQualityScore.java
@@ -69,10 +69,8 @@ public class BaseQualityScore implements Serializable, ReadFeature {
@Override
public String toString() {
- final StringBuilder stringBuilder = new StringBuilder().append((char) operator).append('@');
- stringBuilder.append(position);
- stringBuilder.append('#').appendCodePoint(qualityScore);
- return stringBuilder.toString();
+ return new StringBuilder().append((char) operator).append('@')
+ .append(position).append('#').appendCodePoint(qualityScore).toString();
}
}
diff --git a/src/java/htsjdk/samtools/cram/encoding/readfeatures/InsertBase.java b/src/java/htsjdk/samtools/cram/encoding/readfeatures/InsertBase.java
index 398dbcf..5970413 100644
--- a/src/java/htsjdk/samtools/cram/encoding/readfeatures/InsertBase.java
+++ b/src/java/htsjdk/samtools/cram/encoding/readfeatures/InsertBase.java
@@ -63,10 +63,8 @@ public class InsertBase implements Serializable, ReadFeature {
@Override
public String toString() {
- final StringBuilder stringBuilder = new StringBuilder().append((char) operator).append('@');
- stringBuilder.append(position);
- stringBuilder.append('\\').appendCodePoint(base);
- return stringBuilder.toString();
+ return new StringBuilder().append((char) operator).append('@')
+ .append(position).append('\\').appendCodePoint(base).toString();
}
public byte getBase() {
diff --git a/src/java/htsjdk/samtools/cram/encoding/readfeatures/ReadBase.java b/src/java/htsjdk/samtools/cram/encoding/readfeatures/ReadBase.java
index 5e7909f..73ae208 100644
--- a/src/java/htsjdk/samtools/cram/encoding/readfeatures/ReadBase.java
+++ b/src/java/htsjdk/samtools/cram/encoding/readfeatures/ReadBase.java
@@ -71,12 +71,11 @@ public class ReadBase implements Serializable, ReadFeature {
@Override
public String toString() {
- final StringBuilder sb = new StringBuilder(getClass().getSimpleName() + "[");
- sb.append("position=").append(position);
- sb.append("; base=").appendCodePoint(base);
- sb.append("; score=").appendCodePoint(qualityScore);
- sb.append("] ");
- return sb.toString();
+ return new StringBuilder(getClass().getSimpleName() + "[")
+ .append("position=").append(position)
+ .append("; base=").appendCodePoint(base)
+ .append("; score=").appendCodePoint(qualityScore)
+ .append("] ").toString();
}
public byte getBase() {
diff --git a/src/java/htsjdk/samtools/cram/encoding/readfeatures/Substitution.java b/src/java/htsjdk/samtools/cram/encoding/readfeatures/Substitution.java
index 7a88cf1..b2ed5de 100644
--- a/src/java/htsjdk/samtools/cram/encoding/readfeatures/Substitution.java
+++ b/src/java/htsjdk/samtools/cram/encoding/readfeatures/Substitution.java
@@ -24,6 +24,7 @@ import java.io.Serializable;
* The class is also responsible for converting combinations of read base and reference base into a byte value (code).
*/
public class Substitution implements Serializable, ReadFeature {
+ public static final int NO_CODE = -1;
/**
* zero-based position in read
@@ -40,7 +41,7 @@ public class Substitution implements Serializable, ReadFeature {
/**
* A byte value denoting combination of the read base and the reference base.
*/
- private byte code = -1;
+ private byte code = NO_CODE;
public byte getCode() {
return code;
@@ -91,11 +92,11 @@ public class Substitution implements Serializable, ReadFeature {
if (position != substitution.position)
return false;
- if ((code != substitution.code) & (code == -1 || substitution.code == -1)) {
+ if ((code != substitution.code) & (code == NO_CODE || substitution.code == NO_CODE)) {
return false;
}
- if (code > -1 && substitution.code > -1) {
+ if (code > NO_CODE && substitution.code > NO_CODE) {
if (referenceBase != substitution.referenceBase) return false;
if (base != substitution.base) return false;
}
diff --git a/src/java/htsjdk/samtools/cram/lossy/QualityScorePreservation.java b/src/java/htsjdk/samtools/cram/lossy/QualityScorePreservation.java
index aaac99c..5ffe3c1 100644
--- a/src/java/htsjdk/samtools/cram/lossy/QualityScorePreservation.java
+++ b/src/java/htsjdk/samtools/cram/lossy/QualityScorePreservation.java
@@ -75,7 +75,7 @@ public class QualityScorePreservation {
private static List<PreservationPolicy> parsePolicies(final String spec) {
final List<PreservationPolicy> policyList = new ArrayList<PreservationPolicy>();
for (final String string : spec.split("-")) {
- if (string.length() == 0)
+ if (string.isEmpty())
continue;
final PreservationPolicy policy = parseSinglePolicy(string);
policyList.add(policy);
diff --git a/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java b/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
index cf9748c..fd7157d 100644
--- a/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
+++ b/src/java/htsjdk/samtools/cram/ref/ReferenceSource.java
@@ -63,6 +63,43 @@ public class ReferenceSource {
this.rsFile = rsFile;
}
+ /**
+ * Attempts to construct a default ReferenceSource for use with CRAM files when
+ * one has not been explicitly provided.
+ *
+ * @return ReferenceSource if one can be acquired. Guaranteed to no be null if none
+ * of the listed exceptions is thrown.
+ * @throws IllegalStateException if no default reference source can be acquired
+ * @throws IllegalArgumentException if the reference_fasta environment variable refers to a
+ * a file that doesn't exist
+ *<p>
+ * Construct a default reference source to use when an explicit reference has not been
+ * provided by checking for fallback sources in this order:
+ *<p><ul>
+ * <li>Defaults.REFERENCE_FASTA - the value of the system property "reference_fasta". If set,
+ * must refer to a valid reference file.</li>
+ * <li>ENA Reference Service if it is enabled</li>
+ * </ul>
+ */
+ public static ReferenceSource getDefaultCRAMReferenceSource() {
+ if (null != Defaults.REFERENCE_FASTA) {
+ if (Defaults.REFERENCE_FASTA.exists()) {
+ return new ReferenceSource(Defaults.REFERENCE_FASTA);
+ }
+ else {
+ throw new IllegalArgumentException(
+ "The file specified by the reference_fasta property does not exist: " + Defaults.REFERENCE_FASTA.getName());
+ }
+ }
+ else if (Defaults.USE_CRAM_REF_DOWNLOAD) {
+ return new ReferenceSource();
+ }
+ else {
+ throw new IllegalStateException(
+ "A valid CRAM reference was not supplied and one cannot be acquired via the property settings reference_fasta or use_cram_ref_download");
+ }
+ }
+
public void clearCache() {
cacheW.clear();
}
diff --git a/src/java/htsjdk/samtools/cram/structure/AlignmentSpan.java b/src/java/htsjdk/samtools/cram/structure/AlignmentSpan.java
new file mode 100644
index 0000000..6e18dd2
--- /dev/null
+++ b/src/java/htsjdk/samtools/cram/structure/AlignmentSpan.java
@@ -0,0 +1,92 @@
+package htsjdk.samtools.cram.structure;
+
+import htsjdk.samtools.SAMRecord;
+
+/**
+ * A span of reads on a single reference.
+ */
+public class AlignmentSpan {
+ /**
+ * A constant to represent an unmapped span.
+ */
+ public static final AlignmentSpan UNMAPPED_SPAN = new AlignmentSpan(SAMRecord.NO_ALIGNMENT_START, 0);
+
+ private int start;
+ private int span;
+ private int count;
+
+ /**
+ * Create a new span with a single read in it.
+ *
+ * @param start alignment start of the span
+ * @param span alignment span
+ */
+ public AlignmentSpan(final int start, final int span) {
+ this.setStart(start);
+ this.setSpan(span);
+ this.count = 1;
+ }
+
+ /**
+ * Create a new span with a multiple reads in it.
+ *
+ * @param start alignment start of the span
+ * @param span alignment span
+ * @param count number of reads in the span
+ */
+ public AlignmentSpan(final int start, final int span, final int count) {
+ this.setStart(start);
+ this.setSpan(span);
+ this.count = count;
+ }
+
+ /**
+ * Add multiple reads to the span.
+ *
+ * @param start alignment start
+ * @param span alignment span
+ * @param count number of reads to add
+ */
+ public void add(final int start, final int span, final int count) {
+ if (this.getStart() > start) {
+ this.setSpan(Math.max(this.getStart() + this.getSpan(), start + span) - start);
+ this.setStart(start);
+ } else if (this.getStart() < start) {
+ this.setSpan(Math.max(this.getStart() + this.getSpan(), start + span) - this.getStart());
+ } else {
+ this.setSpan(Math.max(this.getSpan(), span));
+ }
+
+ this.count += count;
+ }
+
+ /**
+ * Add a single read to the span
+ *
+ * @param start alignment start
+ * @param span read span on the reference
+ */
+ public void addSingle(final int start, final int span) {
+ add(start, span, 1);
+ }
+
+ public int getStart() {
+ return start;
+ }
+
+ public void setStart(final int start) {
+ this.start = start;
+ }
+
+ public int getSpan() {
+ return span;
+ }
+
+ public void setSpan(final int span) {
+ this.span = span;
+ }
+
+ public int getCount() {
+ return count;
+ }
+}
diff --git a/src/java/htsjdk/samtools/cram/structure/Container.java b/src/java/htsjdk/samtools/cram/structure/Container.java
index d369de9..2a4eb3b 100644
--- a/src/java/htsjdk/samtools/cram/structure/Container.java
+++ b/src/java/htsjdk/samtools/cram/structure/Container.java
@@ -17,17 +17,19 @@
*/
package htsjdk.samtools.cram.structure;
+import htsjdk.samtools.SAMRecord;
+
public class Container {
// container header as defined in the specs:
/**
* Byte size of the content excluding header.
*/
public int containerByteSize;
- public int sequenceId = -1;
- public int alignmentStart = -1;
- public int alignmentSpan = -1;
- public int nofRecords = -1;
- public long globalRecordCounter = -1;
+ public int sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
+ public int alignmentStart = Slice.NO_ALIGNMENT_START;
+ public int alignmentSpan = Slice.NO_ALIGNMENT_SPAN;
+ public int nofRecords = 0;
+ public long globalRecordCounter = 0;
public long bases = 0;
public int blockCount = -1;
diff --git a/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java b/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java
index b0b95d3..4997194 100644
--- a/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java
+++ b/src/java/htsjdk/samtools/cram/structure/CramCompressionRecord.java
@@ -137,10 +137,10 @@ public class CramCompressionRecord {
public String toString() {
final StringBuilder stringBuilder = new StringBuilder("[");
if (readName != null) stringBuilder.append(readName).append("; ");
- stringBuilder.append("flags=").append(flags);
- stringBuilder.append("; alignmentOffset=").append(alignmentDelta);
- stringBuilder.append("; mateOffset=").append(recordsToNextFragment);
- stringBuilder.append("; mappingQuality=").append(mappingQuality);
+ stringBuilder.append("flags=").append(flags)
+ .append("; alignmentOffset=").append(alignmentDelta)
+ .append("; mateOffset=").append(recordsToNextFragment)
+ .append("; mappingQuality=").append(mappingQuality);
if (readFeatures != null) for (final ReadFeature feature : readFeatures)
stringBuilder.append("; ").append(feature.toString());
@@ -148,7 +148,7 @@ public class CramCompressionRecord {
if (readBases != null) stringBuilder.append("; ").append("bases: ").append(new String(readBases));
if (qualityScores != null) stringBuilder.append("; ").append("scores: ").append(new String(qualityScores));
- stringBuilder.append("]");
+ stringBuilder.append(']');
return stringBuilder.toString();
}
diff --git a/src/java/htsjdk/samtools/cram/structure/Slice.java b/src/java/htsjdk/samtools/cram/structure/Slice.java
index d13126c..dd8a388 100644
--- a/src/java/htsjdk/samtools/cram/structure/Slice.java
+++ b/src/java/htsjdk/samtools/cram/structure/Slice.java
@@ -4,7 +4,7 @@
* <p/>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+ * You may obtain a copy of the License at4
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
@@ -20,6 +20,7 @@ package htsjdk.samtools.cram.structure;
import htsjdk.samtools.SAMBinaryTagAndUnsignedArrayValue;
import htsjdk.samtools.SAMBinaryTagAndValue;
import htsjdk.samtools.SAMException;
+import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTagUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.SequenceUtil;
@@ -33,8 +34,9 @@ import java.util.Map;
* CRAM slice is a logical union of blocks into for example alignment slices.
*/
public class Slice {
- public static final int UNMAPPED_OR_NO_REFERENCE = -1;
public static final int MULTI_REFERENCE = -2;
+ public static final int NO_ALIGNMENT_START = -1;
+ public static final int NO_ALIGNMENT_SPAN = 0;
private static final Log log = Log.getInstance(Slice.class);
// as defined in the specs:
@@ -66,7 +68,8 @@ public class Slice {
public SAMBinaryTagAndValue sliceTags;
private void alignmentBordersSanityCheck(final byte[] ref) {
- if (alignmentStart > 0 && sequenceId >= 0 && ref == null) throw new NullPointerException("Mapped slice reference is null.");
+ if (sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) return ;
+ if (alignmentStart > 0 && sequenceId >= 0 && ref == null) throw new IllegalArgumentException ("Mapped slice reference is null.");
if (alignmentStart > ref.length) {
log.error(String.format("Slice mapped outside of reference: seqID=%d, start=%d, counter=%d.", sequenceId, alignmentStart,
@@ -81,6 +84,11 @@ public class Slice {
}
public boolean validateRefMD5(final byte[] ref) {
+ if(sequenceId == Slice.MULTI_REFERENCE)
+ throw new SAMException("Cannot verify a slice with multiple references on a single reference.");
+
+ if (sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) return true;
+
alignmentBordersSanityCheck(ref);
if (!validateRefMD5(ref, alignmentStart, alignmentSpan, refMD5)) {
@@ -156,11 +164,8 @@ public class Slice {
if (ref.length <= shoulder * 2)
sb.append(new String(ref));
else {
- sb.append(new String(Arrays.copyOfRange(ref,
- alignmentStart - 1, alignmentStart + shoulder)));
- sb.append("...");
- sb.append(new String(Arrays.copyOfRange(ref, alignmentStart
- - 1 + span - shoulder, alignmentStart + span)));
+
+ sb.append(getBrief(alignmentStart, alignmentSpan, ref, shoulder));
}
log.debug(String.format("Slice md5: %s for %d:%d-%d, %s",
@@ -239,4 +244,12 @@ public class Slice {
else this.sliceTags = this.sliceTags.insert(tmp);
}
}
+
+ public boolean isMapped() {
+ return sequenceId > SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
+ }
+
+ public boolean isMultiref() {
+ return sequenceId == Slice.MULTI_REFERENCE;
+ }
}
diff --git a/src/java/htsjdk/samtools/cram/structure/SubstitutionMatrix.java b/src/java/htsjdk/samtools/cram/structure/SubstitutionMatrix.java
index bb5aa6e..d9948be 100644
--- a/src/java/htsjdk/samtools/cram/structure/SubstitutionMatrix.java
+++ b/src/java/htsjdk/samtools/cram/structure/SubstitutionMatrix.java
@@ -61,11 +61,11 @@ public class SubstitutionMatrix {
final StringBuilder stringBuilder = new StringBuilder();
for (final byte r : "ACGTN".getBytes()) {
stringBuilder.append((char) r);
- stringBuilder.append(":");
+ stringBuilder.append(':');
for (int i = 0; i < 4; i++) {
stringBuilder.append((char) bases[r][i]);
}
- stringBuilder.append("\t");
+ stringBuilder.append('\t');
}
return stringBuilder.toString();
}
diff --git a/src/java/htsjdk/samtools/fastq/BasicFastqWriter.java b/src/java/htsjdk/samtools/fastq/BasicFastqWriter.java
index c6aabb0..8a5afd3 100644
--- a/src/java/htsjdk/samtools/fastq/BasicFastqWriter.java
+++ b/src/java/htsjdk/samtools/fastq/BasicFastqWriter.java
@@ -27,6 +27,7 @@ import htsjdk.samtools.SAMException;
import htsjdk.samtools.util.IOUtil;
import java.io.File;
+import java.io.Flushable;
import java.io.OutputStream;
import java.io.PrintStream;
@@ -34,7 +35,7 @@ import java.io.PrintStream;
* In general FastqWriterFactory should be used so that AsyncFastqWriter can be enabled, but there are some
* cases in which that behavior is explicitly not wanted.
*/
-public class BasicFastqWriter implements FastqWriter {
+public class BasicFastqWriter implements FastqWriter,Flushable {
private final String path;
private final PrintStream writer;
@@ -55,6 +56,7 @@ public class BasicFastqWriter implements FastqWriter {
this(null, writer);
}
+ @Override
public void write(final FastqRecord rec) {
writer.print(FastqConstants.SEQUENCE_HEADER);
writer.println(rec.getReadHeader());
@@ -67,10 +69,12 @@ public class BasicFastqWriter implements FastqWriter {
}
}
+ @Override
public void flush() {
writer.flush();
}
+ @Override
public void close() {
writer.close();
}
diff --git a/src/java/htsjdk/samtools/fastq/FastqReader.java b/src/java/htsjdk/samtools/fastq/FastqReader.java
index 4ec7be1..8086dfa 100755
--- a/src/java/htsjdk/samtools/fastq/FastqReader.java
+++ b/src/java/htsjdk/samtools/fastq/FastqReader.java
@@ -156,6 +156,7 @@ public class FastqReader implements Iterator<FastqRecord>, Iterable<FastqRecord>
*/
public File getFile() { return fastqFile ; }
+ @Override
public void close() {
try {
reader.close();
diff --git a/src/java/htsjdk/samtools/fastq/FastqRecord.java b/src/java/htsjdk/samtools/fastq/FastqRecord.java
index 7306ad8..b1d3f75 100755
--- a/src/java/htsjdk/samtools/fastq/FastqRecord.java
+++ b/src/java/htsjdk/samtools/fastq/FastqRecord.java
@@ -23,24 +23,35 @@
*/
package htsjdk.samtools.fastq;
+import java.io.Serializable;
+
/**
* Represents a fastq record, fairly literally, i.e. without any conversion.
*/
-public class FastqRecord {
-
+public class FastqRecord implements Serializable {
+ private static final long serialVersionUID = 1L;
private final String seqHeaderPrefix;
private final String seqLine;
private final String qualHeaderPrefix;
private final String qualLine;
public FastqRecord(final String seqHeaderPrefix, final String seqLine, final String qualHeaderPrefix, final String qualLine) {
- if (seqHeaderPrefix != null && seqHeaderPrefix.length() > 0) this.seqHeaderPrefix = seqHeaderPrefix;
+ if (seqHeaderPrefix != null && !seqHeaderPrefix.isEmpty()) this.seqHeaderPrefix = seqHeaderPrefix;
else this.seqHeaderPrefix = null;
- if (qualHeaderPrefix != null && qualHeaderPrefix.length() > 0) this.qualHeaderPrefix = qualHeaderPrefix;
+ if (qualHeaderPrefix != null && !qualHeaderPrefix.isEmpty()) this.qualHeaderPrefix = qualHeaderPrefix;
else this.qualHeaderPrefix = null;
this.seqLine = seqLine ;
this.qualLine = qualLine ;
}
+
+ /** copy constructor */
+ public FastqRecord(final FastqRecord other) {
+ if( other == null ) throw new IllegalArgumentException("new FastqRecord(null)");
+ this.seqHeaderPrefix = other.seqHeaderPrefix;
+ this.seqLine = other.seqLine;
+ this.qualHeaderPrefix = other.qualHeaderPrefix;
+ this.qualLine = other.qualLine;
+ }
/** @return the read name */
public String getReadHeader() { return seqHeaderPrefix; }
@@ -53,7 +64,6 @@ public class FastqRecord {
/** shortcut to getReadString().length() */
public int length() { return this.seqLine==null?0:this.seqLine.length();}
-
@Override
public int hashCode() {
final int prime = 31;
diff --git a/src/java/htsjdk/samtools/filter/AbstractJavascriptFilter.java b/src/java/htsjdk/samtools/filter/AbstractJavascriptFilter.java
new file mode 100644
index 0000000..9163735
--- /dev/null
+++ b/src/java/htsjdk/samtools/filter/AbstractJavascriptFilter.java
@@ -0,0 +1,159 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Pierre Lindenbaum @yokofakun Institut du Thorax - Nantes - France
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.filter;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.Reader;
+import java.io.StringReader;
+
+import javax.script.Bindings;
+import javax.script.Compilable;
+import javax.script.CompiledScript;
+import javax.script.ScriptEngine;
+import javax.script.ScriptEngineManager;
+import javax.script.ScriptException;
+import javax.script.SimpleBindings;
+
+import htsjdk.samtools.util.CloserUtil;
+import htsjdk.samtools.util.RuntimeScriptException;
+
+/**
+ * Javascript filter with HEADER type containing TYPE records. contains two
+ * static method to get a SAM Read filter or a VariantFilter.
+ *
+ * warning: tools, like galaxy, using this class are not safe because a script
+ * can access the filesystem.
+ *
+ * @author Pierre Lindenbaum PhD
+ */
+public abstract class AbstractJavascriptFilter<HEADER, TYPE> {
+ public static final String DEFAULT_HEADER_KEY = "header";
+ /** compiled user script */
+ private CompiledScript script = null;
+
+ /** javascript bindings */
+ protected Bindings bindings;
+
+ /**
+ * constructor using a java.io.File script, compiles the script, puts
+ * 'header' in the bindings
+ */
+ protected AbstractJavascriptFilter(final File scriptFile, final HEADER header) throws IOException {
+ this(new FileReader(scriptFile), header);
+ }
+
+ /**
+ * constructor using a java.lang.String script, compiles the script, puts
+ * 'header' in the bindings
+ */
+ protected AbstractJavascriptFilter(final String scriptExpression, final HEADER header) {
+ this(new StringReader(scriptExpression), header);
+ }
+
+ /**
+ * Constructor, compiles script, put header in the bindings
+ *
+ * @param scriptReader
+ * reader containing the script. will be closed.
+ * @param header
+ * the header to be injected in the javascript context
+ */
+ protected AbstractJavascriptFilter(final Reader scriptReader, final HEADER header) {
+ final ScriptEngineManager manager = new ScriptEngineManager();
+ /* get javascript engine */
+ final ScriptEngine engine = manager.getEngineByName("js");
+ if (engine == null) {
+ CloserUtil.close(scriptReader);
+ throw new RuntimeScriptException("The embedded 'javascript' engine is not available in java. "
+ + "Do you use the SUN/Oracle Java Runtime ?");
+ }
+ if (scriptReader == null) {
+ throw new RuntimeScriptException("missing ScriptReader.");
+ }
+
+ try {
+ final Compilable compilingEngine = getCompilable(engine);
+ this.script = compilingEngine.compile(scriptReader);
+ } catch (ScriptException err) {
+ throw new RuntimeScriptException("Script error in input", err);
+ } finally {
+ CloserUtil.close(scriptReader);
+ }
+
+ /*
+ * create the javascript bindings and put the file header in that
+ * context
+ */
+ this.bindings = new SimpleBindings();
+ this.bindings.put(DEFAULT_HEADER_KEY, header);
+ }
+
+ /** return a javascript engine as a Compilable */
+ private static Compilable getCompilable(final ScriptEngine engine) {
+ if (!(engine instanceof Compilable)) {
+ throw new IllegalStateException("The current javascript engine (" + engine.getClass()
+ + ") cannot be cast to Compilable. " + "Do you use the SUN/Oracle Java Runtime ?");
+ }
+ return Compilable.class.cast(engine);
+ }
+
+ /** returns key used for header binding */
+ public String getHeaderKey() {
+ return DEFAULT_HEADER_KEY;
+ }
+
+ /** returns key used for record binding */
+ public abstract String getRecordKey();
+
+ /**
+ * Evaluates this predicate on the given argument
+ *
+ * @param record
+ * the record to test. It will be inject in the javascript
+ * context using getRecordKey()
+ * @return true (keep) if the user script returned 1 or true, else false
+ * (reject).
+ */
+ protected boolean accept(final TYPE record) {
+ try {
+ /* insert the record into the javascript context */
+ this.bindings.put(getRecordKey(), record);
+ /* get the result */
+ final Object result = this.script.eval(this.bindings);
+ if (result == null) {
+ return false;
+ } else if (result instanceof Boolean) {
+ return Boolean.TRUE.equals(result);
+ } else if (result instanceof Number) {
+ return (((Number) result).intValue() == 1);
+ } else {
+ return false;
+ }
+ } catch (ScriptException err) {
+ throw new RuntimeException(err);
+ }
+ }
+}
diff --git a/src/java/htsjdk/samtools/filter/JavascriptSamRecordFilter.java b/src/java/htsjdk/samtools/filter/JavascriptSamRecordFilter.java
new file mode 100644
index 0000000..20c784b
--- /dev/null
+++ b/src/java/htsjdk/samtools/filter/JavascriptSamRecordFilter.java
@@ -0,0 +1,101 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Pierre Lindenbaum @yokofakun Institut du Thorax - Nantes - France
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.filter;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+
+/**
+ * javascript based read filter
+ *
+ *
+ * The script puts the following variables in the script context:
+ *
+ * - 'record' a SamRecord (
+ * https://github.com/samtools/htsjdk/blob/master/src/java/htsjdk/samtools/
+ * SAMRecord.java ) - 'header' (
+ * https://github.com/samtools/htsjdk/blob/master/src/java/htsjdk/samtools/
+ * SAMFileHeader.java )
+ *
+ * @author Pierre Lindenbaum PhD Institut du Thorax - INSERM - Nantes - France
+ */
+public class JavascriptSamRecordFilter extends AbstractJavascriptFilter<SAMFileHeader, SAMRecord>
+ implements SamRecordFilter {
+ /**
+ * constructor using a javascript File
+ *
+ * @param scriptFile
+ * the javascript file to be compiled
+ * @param header
+ * the SAMHeader
+ */
+ public JavascriptSamRecordFilter(final File scriptFile, final SAMFileHeader header) throws IOException {
+ super(scriptFile, header);
+ }
+
+ /**
+ * constructor using a javascript expression
+ *
+ * @param scriptExpression
+ * the javascript expression to be compiled
+ * @param header
+ * the SAMHeader
+ */
+ public JavascriptSamRecordFilter(final String scriptExpression, final SAMFileHeader header) {
+ super(scriptExpression, header);
+ }
+
+ /**
+ * constructor using a java.io.Reader
+ *
+ * @param scriptReader
+ * the javascript reader to be compiled. will be closed
+ * @param header
+ * the SAMHeader
+ */
+ public JavascriptSamRecordFilter(final Reader scriptReader, final SAMFileHeader header) {
+ super(scriptReader, header);
+ }
+
+ /** return true of both records are filteredOut (AND) */
+ @Override
+ public boolean filterOut(final SAMRecord first, final SAMRecord second) {
+ return filterOut(first) && filterOut(second);
+ }
+
+ /** read is filtered out if the javascript program returns false */
+ @Override
+ public boolean filterOut(final SAMRecord record) {
+ return !accept(record);
+ }
+
+ @Override
+ public String getRecordKey() {
+ return "record";
+ }
+}
diff --git a/src/java/htsjdk/samtools/metrics/MetricBase.java b/src/java/htsjdk/samtools/metrics/MetricBase.java
index af174cd..119a479 100644
--- a/src/java/htsjdk/samtools/metrics/MetricBase.java
+++ b/src/java/htsjdk/samtools/metrics/MetricBase.java
@@ -110,9 +110,9 @@ public class MetricBase {
for (final Field f : getClass().getFields()) {
try {
buffer.append(f.getName());
- buffer.append("\t");
+ buffer.append('\t');
buffer.append(formatter.format(f.get(this)));
- buffer.append("\n");
+ buffer.append('\n');
}
catch (IllegalAccessException iae) {
throw new SAMException("Could not read field " + f.getName() + " from a " + getClass().getSimpleName());
diff --git a/src/java/htsjdk/samtools/metrics/MetricsFile.java b/src/java/htsjdk/samtools/metrics/MetricsFile.java
index f3f2216..fabac6e 100644
--- a/src/java/htsjdk/samtools/metrics/MetricsFile.java
+++ b/src/java/htsjdk/samtools/metrics/MetricsFile.java
@@ -81,7 +81,7 @@ public class MetricsFile<BEAN extends MetricBase, HKEY extends Comparable> imple
/** Returns the histogram contained in the metrics file if any. */
public Histogram<HKEY> getHistogram() {
- if (histograms.size() > 0) return this.histograms.get(0);
+ if (!histograms.isEmpty()) return this.histograms.get(0);
else return null;
}
@@ -382,7 +382,7 @@ public class MetricsFile<BEAN extends MetricBase, HKEY extends Comparable> imple
for (int i=0; i<fields.length; ++i) {
Object value = null;
- if (values[i] != null && values[i].length() > 0) {
+ if (values[i] != null && !values[i].isEmpty()) {
value = formatter.parseObject(values[i], fields[i].getType());
}
@@ -471,13 +471,13 @@ public class MetricsFile<BEAN extends MetricBase, HKEY extends Comparable> imple
if (tryOtherPackages) {
for (final String p : packages) {
try {
- return loadClass(p + className.substring(className.lastIndexOf(".")), false);
+ return loadClass(p + className.substring(className.lastIndexOf('.')), false);
}
catch (ClassNotFoundException cnf2) {/* do nothing */}
// If it ws an inner class, try and see if it's a stand-alone class now
- if (className.indexOf("$") > -1) {
+ if (className.indexOf('$') > -1) {
try {
- return loadClass(p + "." + className.substring(className.lastIndexOf("$") + 1), false);
+ return loadClass(p + "." + className.substring(className.lastIndexOf('$') + 1), false);
}
catch (ClassNotFoundException cnf2) {/* do nothing */}
}
diff --git a/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java b/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
index e0c7dca..86f11fe 100644
--- a/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
+++ b/src/java/htsjdk/samtools/reference/AbstractFastaSequenceFile.java
@@ -67,7 +67,7 @@ abstract class AbstractFastaSequenceFile implements ReferenceSequenceFile {
final BufferedLineReader reader = new BufferedLineReader(Files.newInputStream(dictionary));
final SAMFileHeader header = codec.decode(reader,
dictionary.toString());
- if (header.getSequenceDictionary() != null && header.getSequenceDictionary().size() > 0) {
+ if (header.getSequenceDictionary() != null && !header.getSequenceDictionary().isEmpty()) {
this.sequenceDictionary = header.getSequenceDictionary();
}
reader.close();
diff --git a/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java b/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
index b341d6f..60cc3b1 100644
--- a/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
+++ b/src/java/htsjdk/samtools/reference/IndexedFastaSequenceFile.java
@@ -230,7 +230,7 @@ public class IndexedFastaSequenceFile extends AbstractFastaSequenceFile implemen
long startOffset = ((start-1)/basesPerLine)*bytesPerLine + (start-1)%basesPerLine;
// Cast to long so the second argument cannot overflow a signed integer.
- final long minBufferSize = Math.min((long) Defaults.NON_ZERO_BUFFER_SIZE, (long)(length % basesPerLine + 2) * (long)bytesPerLine);
+ final long minBufferSize = Math.min((long) Defaults.NON_ZERO_BUFFER_SIZE, (long)(length / basesPerLine + 2) * (long)bytesPerLine);
if (minBufferSize > Integer.MAX_VALUE) throw new SAMException("Buffer is too large: " + minBufferSize);
// Allocate a buffer for reading in sequence data.
diff --git a/src/java/htsjdk/samtools/sra/SRAAccession.java b/src/java/htsjdk/samtools/sra/SRAAccession.java
index 6f39eca..1f2dbe0 100644
--- a/src/java/htsjdk/samtools/sra/SRAAccession.java
+++ b/src/java/htsjdk/samtools/sra/SRAAccession.java
@@ -29,11 +29,19 @@ package htsjdk.samtools.sra;
import htsjdk.samtools.util.Log;
import gov.nih.nlm.ncbi.ngs.NGS;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
import java.io.Serializable;
+import java.util.Arrays;
/**
- * Describes a single SRA accession
+ * Describes a single SRA accession for SRA read collection
* Also provides app string functionality and allows to check if working SRA is supported on the running platform
+ *
+ * Important: due to checks performed in SRAAccession.isValid(), we won't recognise any accessions other
+ * than ones that follow the pattern "^[SED]RR[0-9]{6,9}$", e.g. SRR000123
*/
public class SRAAccession implements Serializable {
private static final Log log = Log.getInstance(SRAAccession.class);
@@ -75,11 +83,31 @@ public class SRAAccession implements Serializable {
* @return true if a string is a valid SRA accession
*/
public static boolean isValid(String acc) {
- if (!isSupported()) {
- return false;
+ boolean looksLikeSRA = false;
+ File f = new File(acc);
+ if (f.isFile()) {
+ byte[] buffer = new byte[8];
+ byte[] signature1 = "NCBI.sra".getBytes();
+ byte[] signature2 = "NCBInenc".getBytes();
+
+ try (InputStream is = new FileInputStream(f)) {
+ int numRead = is.read(buffer);
+
+ looksLikeSRA = numRead == buffer.length &&
+ (Arrays.equals(buffer, signature1) || Arrays.equals(buffer, signature2));
+ } catch (IOException e) {
+ looksLikeSRA = false;
+ }
+ } else if (f.exists()) {
+ // anything else local other than a file is not an SRA archive
+ looksLikeSRA = false;
+ } else {
+ looksLikeSRA = acc.toUpperCase().matches ( "^[SED]RR[0-9]{6,9}$" );
}
- return NGS.isValid(acc);
+ if (!looksLikeSRA) return false;
+
+ return isSupported() && NGS.isValid(acc);
}
/**
diff --git a/src/java/htsjdk/samtools/util/AbstractProgressLogger.java b/src/java/htsjdk/samtools/util/AbstractProgressLogger.java
index 5bd5e92..de1f694 100644
--- a/src/java/htsjdk/samtools/util/AbstractProgressLogger.java
+++ b/src/java/htsjdk/samtools/util/AbstractProgressLogger.java
@@ -12,9 +12,9 @@ import java.text.NumberFormat;
* Concrete subclasses must provide the logger
*/
abstract public class AbstractProgressLogger implements ProgressLoggerInterface {
- protected final int n;
- protected final String verb;
- protected final String noun;
+ private final int n;
+ private final String verb;
+ private final String noun;
private final long startTime = System.currentTimeMillis();
private final NumberFormat fmt = new DecimalFormat("#,###");
private final NumberFormat timeFmt = new DecimalFormat("00");
@@ -93,7 +93,7 @@ abstract public class AbstractProgressLogger implements ProgressLoggerInterface
}
/** Returns the count of records processed. */
- public long getCount() { return this.processed; }
+ public synchronized long getCount() { return this.processed; }
/** Returns the number of seconds since progress tracking began. */
public long getElapsedSeconds() { return (System.currentTimeMillis() - this.startTime) / 1000; }
diff --git a/src/java/htsjdk/samtools/util/BinaryCodec.java b/src/java/htsjdk/samtools/util/BinaryCodec.java
index 843c128..8933ee3 100644
--- a/src/java/htsjdk/samtools/util/BinaryCodec.java
+++ b/src/java/htsjdk/samtools/util/BinaryCodec.java
@@ -613,13 +613,11 @@ public class BinaryCodec implements Closeable {
private String constructErrorMessage(final String msg) {
final StringBuilder sb = new StringBuilder(msg);
- sb.append("; BinaryCodec in ");
- sb.append(isWriting? "write": "read");
- sb.append("mode; ");
+ sb.append("; BinaryCodec in ")
+ .append(isWriting? "write": "read").append("mode; ");
final String filename = isWriting? outputFileName: inputFileName;
if (filename != null) {
- sb.append("file: ");
- sb.append(filename);
+ sb.append("file: ").append(filename);
} else {
sb.append("streamed file (filename not available)");
}
diff --git a/src/java/htsjdk/samtools/util/BufferedLineReader.java b/src/java/htsjdk/samtools/util/BufferedLineReader.java
index beb6ebe..de1115d 100644
--- a/src/java/htsjdk/samtools/util/BufferedLineReader.java
+++ b/src/java/htsjdk/samtools/util/BufferedLineReader.java
@@ -98,7 +98,7 @@ public class BufferedLineReader implements LineReader {
if (peekedLine == null) {
return -1;
}
- if (peekedLine.length() == 0) {
+ if (peekedLine.isEmpty()) {
return '\n';
}
return peekedLine.charAt(0);
diff --git a/src/java/htsjdk/samtools/util/CigarUtil.java b/src/java/htsjdk/samtools/util/CigarUtil.java
index 8178dd3..e6c14ab 100644
--- a/src/java/htsjdk/samtools/util/CigarUtil.java
+++ b/src/java/htsjdk/samtools/util/CigarUtil.java
@@ -167,7 +167,7 @@ public class CigarUtil {
}
private static boolean isValidCigar(SAMRecord rec, Cigar cigar, boolean isOldCigar) {
- if (cigar == null || cigar.getCigarElements() == null || cigar.getCigarElements().size() == 0) {
+ if (cigar == null || cigar.getCigarElements() == null || cigar.getCigarElements().isEmpty()) {
if (isOldCigar) {
if (rec.getReadUnmappedFlag()) {
// don't bother to warn since this does occur for PE reads
@@ -185,7 +185,7 @@ public class CigarUtil {
}
final List<SAMValidationError> validationErrors = cigar.isValid(rec.getReadName(), -1);
- if (validationErrors != null && validationErrors.size() != 0) {
+ if (validationErrors != null && !validationErrors.isEmpty()) {
log.error("Invalid cigar for read " + rec +
(isOldCigar ? " " : " for new cigar with clipped adapter ") +
" (" + rec.getCigarString() + "/" + cigar.toString() + ") " +
diff --git a/src/java/htsjdk/samtools/util/CollectionUtil.java b/src/java/htsjdk/samtools/util/CollectionUtil.java
index 6ff56bf..a80319b 100755
--- a/src/java/htsjdk/samtools/util/CollectionUtil.java
+++ b/src/java/htsjdk/samtools/util/CollectionUtil.java
@@ -34,6 +34,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.function.Function;
/**
* Small utility methods for dealing with collection classes.
@@ -42,34 +43,29 @@ import java.util.Set;
public class CollectionUtil {
/** Simple case-insensitive lexical comparator of objects using their {@link Object#toString()} value. */
- final public static Comparator<Object> OBJECT_TOSTRING_COMPARATOR = new Comparator<Object>() {
- @Override
- public int compare(final Object o1, final Object o2) {
- return o1.toString().compareToIgnoreCase(o2.toString());
- }
- };
+ final public static Comparator<Object> OBJECT_TOSTRING_COMPARATOR = (o1, o2) -> o1.toString().compareToIgnoreCase(o2.toString());
- public static <T> List<T> makeList (final T... list) {
- final List<T> result = new ArrayList<T>();
+ public static <T> List<T> makeList(final T... list) {
+ final List<T> result = new ArrayList<>();
Collections.addAll(result, list);
return result;
}
-
- public static <T> Set<T> makeSet (final T... list) {
- final Set<T> result = new HashSet<T>();
+
+ public static <T> Set<T> makeSet(final T... list) {
+ final Set<T> result = new HashSet<>();
Collections.addAll(result, list);
return result;
}
-
- public static <T> Collection<T> makeCollection (final Iterator<T> i) {
- final List<T> list = new LinkedList<T>();
+
+ public static <T> Collection<T> makeCollection(final Iterator<T> i) {
+ final List<T> list = new LinkedList<>();
while (i.hasNext()) {
list.add(i.next());
}
return list;
}
-
+
/** Construct a string by toString()ing each item in the collection with inBetween between each item. */
public static String join(final Collection<?> items, final String inBetween) {
final StringBuilder builder = new StringBuilder();
@@ -86,7 +82,7 @@ public class CollectionUtil {
throw new IllegalArgumentException(String.format("Expected a single element in %s, but found %s.", items, items.size()));
return items.iterator().next();
}
-
+
/** Simple multi-map for convenience of storing collections in map values. */
public static class MultiMap<K, V> extends HashMap<K, Collection<V>> {
public void append(final K k, final V v) {
@@ -101,25 +97,39 @@ public class CollectionUtil {
private void initializeKeyIfUninitialized(final K k) {
if (!this.containsKey(k))
- this.put(k, new LinkedList<V>());
+ this.put(k, new LinkedList<>());
}
}
- /**
+ /**
* Partitions a collection into groups based on a characteristics of that group. Partitions are embodied in a map, whose keys are the
* value of that characteristic, and the values are the partition of elements whose characteristic evaluate to that key.
*/
- public static <K, V> Map<K,Collection<V>> partition(final Collection<V> collection, final Partitioner<V, K> p) {
- final MultiMap<K, V> partitionToValues = new MultiMap<K, V>();
+ @Deprecated //use java8 .stream().collect(Collectors.groupingBy(()-> function)) instead
+ public static <K, V> Map<K, Collection<V>> partition(final Collection<V> collection, final Partitioner<V, K> p) {
+ final MultiMap<K, V> partitionToValues = new MultiMap<>();
for (final V entry : collection) {
partitionToValues.append(p.getPartition(entry), entry);
}
return partitionToValues;
}
+ @Deprecated //not needed, use Collectors.groupingBy instead
public static abstract class Partitioner<V, K> {
public abstract K getPartition(final V v);
}
-
+
+ /**
+ * Partitions a collection into groups based on a characteristics of that group. Partitions are embodied in a map, whose keys are the
+ * value of that characteristic, and the values are the partition of elements whose characteristic evaluate to that key.
+ */
+ public static <K, V> Map<K, Collection<V>> partition(final Collection<V> collection, final Function<? super V, ? extends K> keyer) {
+ final MultiMap<K, V> partitionToValues = new MultiMap<>();
+ for (final V entry : collection) {
+ partitionToValues.append(keyer.apply(entry), entry);
+ }
+ return partitionToValues;
+ }
+
/**
* A defaulting map, which returns a default value when a value that does not exist in the map is looked up.
*
@@ -138,12 +148,7 @@ public class CollectionUtil {
/** Creates a defaulting map which defaults to the provided value and with injecting-on-default disabled. */
public DefaultingMap(final V defaultValue) {
- this(new Factory<V, K>() {
- @Override
- public V make(final K k) {
- return defaultValue;
- }
- }, false);
+ this(k -> defaultValue, false);
}
/**
diff --git a/src/java/htsjdk/samtools/util/ComparableTuple.java b/src/java/htsjdk/samtools/util/ComparableTuple.java
new file mode 100644
index 0000000..1fe86ed
--- /dev/null
+++ b/src/java/htsjdk/samtools/util/ComparableTuple.java
@@ -0,0 +1,28 @@
+package htsjdk.samtools.util;
+
+/**
+ * A simple extension of the Tuple class that, for comparable Types, allows comparing Tuples of non-null elements.
+ * <p>
+ * The comparison will compare the first arguments and if equal (compareTo returns 0) compare the second arguments.
+ *
+ * @author farjoun
+ */
+public class ComparableTuple<A extends Comparable<A>, B extends Comparable<B>> extends Tuple<A, B> implements Comparable<ComparableTuple<A, B>> {
+
+ public ComparableTuple(final A a, final B b) {
+ super(a, b);
+
+ if (a == null || b == null) {
+ throw new IllegalArgumentException("ComparableTuple's behavior is undefined when containing a null.");
+ }
+ }
+
+ @Override
+ public int compareTo(final ComparableTuple<A, B> o) {
+ int retval = a.compareTo(o.a);
+ if (retval == 0) {
+ retval = b.compareTo(o.b);
+ }
+ return retval;
+ }
+}
diff --git a/src/java/htsjdk/samtools/util/DateParser.java b/src/java/htsjdk/samtools/util/DateParser.java
index 45042af..02a9609 100644
--- a/src/java/htsjdk/samtools/util/DateParser.java
+++ b/src/java/htsjdk/samtools/util/DateParser.java
@@ -261,22 +261,20 @@ public class DateParser {
public static String getIsoDate(Date date) {
Calendar calendar = new GregorianCalendar(TimeZone.getTimeZone("UTC"));
calendar.setTime(date);
- StringBuffer buffer = new StringBuffer();
- buffer.append(calendar.get(Calendar.YEAR));
- buffer.append("-");
- buffer.append(twoDigit(calendar.get(Calendar.MONTH) + 1));
- buffer.append("-");
- buffer.append(twoDigit(calendar.get(Calendar.DAY_OF_MONTH)));
- buffer.append("T");
- buffer.append(twoDigit(calendar.get(Calendar.HOUR_OF_DAY)));
- buffer.append(":");
- buffer.append(twoDigit(calendar.get(Calendar.MINUTE)));
- buffer.append(":");
- buffer.append(twoDigit(calendar.get(Calendar.SECOND)));
- buffer.append(".");
- buffer.append(twoDigit(calendar.get(Calendar.MILLISECOND) / 10));
- buffer.append("Z");
- return buffer.toString();
+ return new StringBuffer().append(calendar.get(Calendar.YEAR))
+ .append("-")
+ .append(twoDigit(calendar.get(Calendar.MONTH) + 1))
+ .append("-")
+ .append(twoDigit(calendar.get(Calendar.DAY_OF_MONTH)))
+ .append("T")
+ .append(twoDigit(calendar.get(Calendar.HOUR_OF_DAY)))
+ .append(":")
+ .append(twoDigit(calendar.get(Calendar.MINUTE)))
+ .append(":")
+ .append(twoDigit(calendar.get(Calendar.SECOND)))
+ .append(".")
+ .append(twoDigit(calendar.get(Calendar.MILLISECOND) / 10))
+ .append("Z").toString();
}
public static void test(String isodate) {
diff --git a/src/java/htsjdk/samtools/util/DiskBackedQueue.java b/src/java/htsjdk/samtools/util/DiskBackedQueue.java
index fd07f68..bbf3818 100644
--- a/src/java/htsjdk/samtools/util/DiskBackedQueue.java
+++ b/src/java/htsjdk/samtools/util/DiskBackedQueue.java
@@ -87,7 +87,7 @@ public class DiskBackedQueue<E> implements Queue<E> {
if (maxRecordsInRam < 0) {
throw new IllegalArgumentException("maxRecordsInRamQueue must be >= 0");
}
- if (tmpDirs == null || tmpDirs.size() == 0) {
+ if (tmpDirs == null || tmpDirs.isEmpty()) {
throw new IllegalArgumentException("At least one temp directory must be provided.");
}
for (final File tmpDir : tmpDirs) IOUtil.assertDirectoryIsWritable(tmpDir);
diff --git a/src/java/htsjdk/samtools/util/FormatUtil.java b/src/java/htsjdk/samtools/util/FormatUtil.java
index 98b33ab..a80c240 100644
--- a/src/java/htsjdk/samtools/util/FormatUtil.java
+++ b/src/java/htsjdk/samtools/util/FormatUtil.java
@@ -159,7 +159,7 @@ public class FormatUtil {
/** Parses a String into a boolean, as per the above convention that true = Y and false = N. */
public boolean parseBoolean(String value) {
- if (value == null || value.length() == 0) return false;
+ if (value == null || value.isEmpty()) return false;
char ch = Character.toUpperCase(value.charAt(0));
return (ch == 'Y');
}
diff --git a/src/java/htsjdk/samtools/util/IOUtil.java b/src/java/htsjdk/samtools/util/IOUtil.java
index 199c6d1..7f0495d 100644
--- a/src/java/htsjdk/samtools/util/IOUtil.java
+++ b/src/java/htsjdk/samtools/util/IOUtil.java
@@ -277,7 +277,7 @@ public class IOUtil {
/** Returns the name of the file minus the extension (i.e. text after the last "." in the filename). */
public static String basename(final File f) {
final String full = f.getName();
- final int index = full.lastIndexOf(".");
+ final int index = full.lastIndexOf('.');
if (index > 0 && index > full.lastIndexOf(File.separator)) {
return full.substring(0, index);
}
@@ -782,7 +782,7 @@ public class IOUtil {
/** Returns the name of the file extension (i.e. text after the last "." in the filename) including the . */
public static String fileSuffix(final File f) {
final String full = f.getName();
- final int index = full.lastIndexOf(".");
+ final int index = full.lastIndexOf('.');
if (index > 0 && index > full.lastIndexOf(File.separator)) {
return full.substring(index);
} else {
diff --git a/src/java/htsjdk/samtools/util/IntervalList.java b/src/java/htsjdk/samtools/util/IntervalList.java
index 6106766..8b46a1c 100644
--- a/src/java/htsjdk/samtools/util/IntervalList.java
+++ b/src/java/htsjdk/samtools/util/IntervalList.java
@@ -33,6 +33,7 @@ import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
+import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -240,7 +241,7 @@ public class IntervalList implements Iterable<Interval> {
}
}
- if (toBeMerged.size() > 0) unique.add(merge(toBeMerged, concatenateNames));
+ if (!toBeMerged.isEmpty()) unique.add(merge(toBeMerged, concatenateNames));
return unique;
}
@@ -447,7 +448,7 @@ public class IntervalList implements Iterable<Interval> {
// Then read in the intervals
final FormatUtil format = new FormatUtil();
do {
- if (line.trim().length() == 0) continue; // skip over blank lines
+ if (line.trim().isEmpty()) continue; // skip over blank lines
// Make sure we have the right number of fields
final String[] fields = line.split("\t");
@@ -733,7 +734,9 @@ public class IntervalList implements Iterable<Interval> {
* Comparator that orders intervals based on their sequence index, by coordinate
* then by strand and finally by name.
*/
-class IntervalCoordinateComparator implements Comparator<Interval> {
+class IntervalCoordinateComparator implements Comparator<Interval>, Serializable {
+ private static final long serialVersionUID = 1L;
+
private final SAMFileHeader header;
/** Constructs a comparator using the supplied sequence header. */
diff --git a/src/java/htsjdk/samtools/util/LocusComparator.java b/src/java/htsjdk/samtools/util/LocusComparator.java
index 881618f..e0f04d9 100644
--- a/src/java/htsjdk/samtools/util/LocusComparator.java
+++ b/src/java/htsjdk/samtools/util/LocusComparator.java
@@ -23,6 +23,7 @@
*/
package htsjdk.samtools.util;
+import java.io.Serializable;
import java.util.Comparator;
/**
@@ -30,7 +31,8 @@ import java.util.Comparator;
*
* @author Doug Voet (dvoet at broadinstitute dot org)
*/
-public class LocusComparator<T extends Locus> implements Comparator<T> {
+public class LocusComparator<T extends Locus> implements Comparator<T>, Serializable {
+ private static final long serialVersionUID = 1L;
public int compare(T thing1, T thing2) {
int refCompare = thing1.getSequenceIndex() - thing2.getSequenceIndex();
diff --git a/src/java/htsjdk/samtools/util/Log.java b/src/java/htsjdk/samtools/util/Log.java
index 57f82f0..d17e841 100644
--- a/src/java/htsjdk/samtools/util/Log.java
+++ b/src/java/htsjdk/samtools/util/Log.java
@@ -87,12 +87,12 @@ public final class Log {
private void emit(final LogLevel level, final Throwable throwable, final Object... parts) {
if (isEnabled(level)) {
StringBuffer tmp = new StringBuffer();
- tmp.append(level.name());
- tmp.append('\t');
- tmp.append(getTimestamp());
- tmp.append('\t');
- tmp.append(this.className);
- tmp.append('\t');
+ tmp.append(level.name())
+ .append('\t')
+ .append(getTimestamp())
+ .append('\t')
+ .append(this.className)
+ .append('\t');
for (final Object part : parts) {
if (part != null && part.getClass().isArray()) {
diff --git a/src/java/htsjdk/samtools/util/Murmur3.java b/src/java/htsjdk/samtools/util/Murmur3.java
index 9372008..b0c4a3a 100644
--- a/src/java/htsjdk/samtools/util/Murmur3.java
+++ b/src/java/htsjdk/samtools/util/Murmur3.java
@@ -30,11 +30,15 @@
*/
package htsjdk.samtools.util;
+import java.io.Serializable;
+
/**
* Provides an implementation of the Murmur3_32 hash algorithm that has desirable properties in terms of randomness
* and uniformity of the distribution of output values that make it a useful hashing algorithm for downsampling.
*/
-public final class Murmur3 {
+public final class Murmur3 implements Serializable{
+ private static final long serialVersionUID = 1L;
+
private final int seed ;
/** Constructs a Murmur3 hash with the given seed. */
diff --git a/src/java/htsjdk/samtools/util/ProcessExecutor.java b/src/java/htsjdk/samtools/util/ProcessExecutor.java
index 510cf01..5ff6673 100644
--- a/src/java/htsjdk/samtools/util/ProcessExecutor.java
+++ b/src/java/htsjdk/samtools/util/ProcessExecutor.java
@@ -236,7 +236,7 @@ public class ProcessExecutor {
private static class StringBuilderProcessOutputReader extends ProcessOutputReader {
private final StringBuilder sb = new StringBuilder();
public StringBuilderProcessOutputReader(final InputStream stream) { super(stream); }
- @Override protected void write(final String message) { sb.append(message).append("\n"); }
+ @Override protected void write(final String message) { sb.append(message).append('\n'); }
public String getOutput() { return sb.toString(); }
}
diff --git a/src/java/htsjdk/samtools/util/LocusComparator.java b/src/java/htsjdk/samtools/util/RuntimeScriptException.java
similarity index 69%
copy from src/java/htsjdk/samtools/util/LocusComparator.java
copy to src/java/htsjdk/samtools/util/RuntimeScriptException.java
index 881618f..570a64e 100644
--- a/src/java/htsjdk/samtools/util/LocusComparator.java
+++ b/src/java/htsjdk/samtools/util/RuntimeScriptException.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2010 The Broad Institute
+ * Pierre Lindenbaum PhD @yokofakun
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -23,17 +23,23 @@
*/
package htsjdk.samtools.util;
-import java.util.Comparator;
/**
- * compares first by sequence index then by position
- *
- * @author Doug Voet (dvoet at broadinstitute dot org)
+ * Thrown by classes handling script engines like the javascript-based filters for SAM/VCF
*/
-public class LocusComparator<T extends Locus> implements Comparator<T> {
+public class RuntimeScriptException extends RuntimeException {
+ public RuntimeScriptException() {
+ }
+
+ public RuntimeScriptException(final String s) {
+ super(s);
+ }
+
+ public RuntimeScriptException(final String s, final Throwable throwable) {
+ super(s, throwable);
+ }
- public int compare(T thing1, T thing2) {
- int refCompare = thing1.getSequenceIndex() - thing2.getSequenceIndex();
- return refCompare == 0 ? thing1.getPosition() - thing2.getPosition() : refCompare;
+ public RuntimeScriptException(final Throwable throwable) {
+ super(throwable);
}
}
diff --git a/src/java/htsjdk/samtools/util/SamLocusIterator.java b/src/java/htsjdk/samtools/util/SamLocusIterator.java
index 622b773..d9d189d 100644
--- a/src/java/htsjdk/samtools/util/SamLocusIterator.java
+++ b/src/java/htsjdk/samtools/util/SamLocusIterator.java
@@ -142,6 +142,19 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
*/
private boolean emitUncoveredLoci = true;
+ /**
+ * If set, this will cap the number of reads we accumulate for any given position.
+ * Note that if we hit the maximum threshold at the first position in the accumulation queue,
+ * then we throw further reads overlapping that position completely away (including for subsequent positions).
+ * This is a useful feature if one wants to minimize the memory footprint in files with a few massively large pileups,
+ * but it must be pointed out that it could cause major bias because of the non-random nature with which the cap is
+ * applied (the first maxReadsToAccumulatePerLocus reads are kept and all subsequent ones are dropped).
+ */
+ private int maxReadsToAccumulatePerLocus = Integer.MAX_VALUE;
+
+ // Set to true when we have enforced the accumulation limit for the first time
+ private boolean enforcedAccumulationLimit = false;
+
// When there is a target mask, these members remember the last locus for which a LocusInfo has been
// returned, so that any uncovered locus in the target mask can be covered by a 0-coverage LocusInfo
private int lastReferenceSequence = 0;
@@ -312,7 +325,7 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
}
// Store the loci for the read in the accumulator
- accumulateSamRecord(rec);
+ if (!surpassedAccumulationThreshold()) accumulateSamRecord(rec);
samIterator.next();
}
@@ -344,6 +357,18 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
}
/**
+ * @return true if we have surpassed the maximum accumulation threshold for the first locus in the accumulator, false otherwise
+ */
+ private boolean surpassedAccumulationThreshold() {
+ final boolean surpassesThreshold = !accumulator.isEmpty() && accumulator.get(0).recordAndOffsets.size() >= maxReadsToAccumulatePerLocus;
+ if (surpassesThreshold && !enforcedAccumulationLimit) {
+ LOG.warn("We have encountered greater than " + maxReadsToAccumulatePerLocus + " reads at position " + accumulator.get(0).toString() + " and will ignore the remaining reads at this position. Note that further warnings will be suppressed.");
+ enforcedAccumulationLimit = true;
+ }
+ return surpassesThreshold;
+ }
+
+ /**
* Capture the loci covered by the given SAMRecord in the LocusInfos in the accumulator,
* creating new LocusInfos as needed.
*/
@@ -512,5 +537,16 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
public void setEmitUncoveredLoci(final boolean emitUncoveredLoci) {
this.emitUncoveredLoci = emitUncoveredLoci;
}
+
+ public int getMaxReadsToAccumulatePerLocus() {
+ return maxReadsToAccumulatePerLocus;
+ }
+
+ /**
+ * If set, this will cap the number of reads we accumulate for any given position.
+ * As is pointed out above, setting this could cause major bias because of the non-random nature with which the
+ * cap is applied (the first maxReadsToAccumulatePerLocus reads are kept and all subsequent ones are dropped).
+ */
+ public void setMaxReadsToAccumulatePerLocus(final int maxReadsToAccumulatePerLocus) { this.maxReadsToAccumulatePerLocus = maxReadsToAccumulatePerLocus; }
}
diff --git a/src/java/htsjdk/samtools/util/SamRecordTrackingBuffer.java b/src/java/htsjdk/samtools/util/SamRecordTrackingBuffer.java
index 661c95a..46cf8bf 100644
--- a/src/java/htsjdk/samtools/util/SamRecordTrackingBuffer.java
+++ b/src/java/htsjdk/samtools/util/SamRecordTrackingBuffer.java
@@ -84,10 +84,10 @@ public class SamRecordTrackingBuffer<T extends SamRecordWithOrdinal> {
}
/** Returns true if we are tracking no records, false otherwise */
- public boolean isEmpty() { return (blocks.size() == 0 || this.blocks.getFirst().isEmpty()); }
+ public boolean isEmpty() { return (blocks.isEmpty() || this.blocks.getFirst().isEmpty()); }
/** Returns true if we can return the next record (it has been examined). */
- public boolean canEmit() { return (this.blocks.size() != 0 && this.blocks.getFirst().canEmit()); }
+ public boolean canEmit() { return (!this.blocks.isEmpty() && this.blocks.getFirst().canEmit()); }
/**
* Add the given SAMRecordIndex to the buffer. The records must be added in order.
@@ -103,7 +103,7 @@ public class SamRecordTrackingBuffer<T extends SamRecordWithOrdinal> {
throw new SAMException("The records were added out of order");
}
// If necessary, create a new block, using as much ram as available up to its total size
- if (this.blocks.size() == 0 || !this.blocks.getLast().canAdd()) {
+ if (this.blocks.isEmpty() || !this.blocks.getLast().canAdd()) {
// once ram is given to a block, we can't give it to another block (until some is recovered from the head of the queue)
final int blockRam = Math.min(this.blockSize, this.availableRecordsInMemory);
this.availableRecordsInMemory = this.availableRecordsInMemory - blockRam;
diff --git a/src/java/htsjdk/samtools/util/SequenceUtil.java b/src/java/htsjdk/samtools/util/SequenceUtil.java
index bd4bfdd..fc273b0 100644
--- a/src/java/htsjdk/samtools/util/SequenceUtil.java
+++ b/src/java/htsjdk/samtools/util/SequenceUtil.java
@@ -46,9 +46,46 @@ import java.util.regex.Pattern;
public class SequenceUtil {
/** Byte typed variables for all normal bases. */
public static final byte a = 'a', c = 'c', g = 'g', t = 't', n = 'n', A = 'A', C = 'C', G = 'G', T = 'T', N = 'N';
+
public static final byte[] VALID_BASES_UPPER = new byte[]{A, C, G, T};
public static final byte[] VALID_BASES_LOWER = new byte[]{a, c, g, t};
+ private static final byte A_MASK = 1;
+ private static final byte C_MASK = 2;
+ private static final byte G_MASK = 4;
+ private static final byte T_MASK = 8;
+
+ private static final byte[] bases = new byte[127];
+
+ /*
+ * Definition of IUPAC codes:
+ * http://www.bioinformatics.org/sms2/iupac.html
+ */
+ static {
+ Arrays.fill(bases, (byte) 0);
+ bases[A] = A_MASK;
+ bases[C] = C_MASK;
+ bases[G] = G_MASK;
+ bases[T] = T_MASK;
+ bases['M'] = A_MASK | C_MASK;
+ bases['R'] = A_MASK | G_MASK;
+ bases['W'] = A_MASK | T_MASK;
+ bases['S'] = C_MASK | G_MASK;
+ bases['Y'] = C_MASK | T_MASK;
+ bases['K'] = G_MASK | T_MASK;
+ bases['V'] = A_MASK | C_MASK | G_MASK;
+ bases['H'] = A_MASK | C_MASK | T_MASK;
+ bases['D'] = A_MASK | G_MASK | T_MASK;
+ bases['B'] = C_MASK | G_MASK | T_MASK;
+ bases['N'] = A_MASK | C_MASK | G_MASK | T_MASK;
+ // Also store the bases in lower case
+ for (int i = 'A'; i <= 'Z'; i++) {
+ bases[(byte) i + 32] = bases[(byte) i];
+ }
+ bases['.'] = A_MASK | C_MASK | G_MASK | T_MASK;
+ };
+
+
/**
* Calculate the reverse complement of the specified sequence
* (Stolen from Reseq)
@@ -62,15 +99,27 @@ public class SequenceUtil {
return htsjdk.samtools.util.StringUtil.bytesToString(bases);
}
- /** Attempts to efficiently compare two bases stored as bytes for equality. */
- public static boolean basesEqual(byte lhs, byte rhs) {
- if (lhs == rhs) return true;
- else {
- if (lhs > 90) lhs -= 32;
- if (rhs > 90) rhs -= 32;
- }
- return lhs == rhs;
+ /**
+ * Efficiently compare two IUPAC base codes, simply returning true if they are equal (ignoring case),
+ * without considering the set relationships between ambiguous codes.
+ */
+ public static boolean basesEqual(final byte lhs, final byte rhs) {
+ return (bases[lhs] == bases[rhs]);
+ }
+
+ /**
+ * Efficiently compare two IUPAC base codes, one coming from a read sequence and the other coming from
+ * a reference sequence, using the reference code as a 'pattern' that the read base must match.
+ *
+ * We take ambiguous codes into account, returning true if the set of possible bases
+ * represented by the read value is a (non-strict) subset of the possible bases represented
+ * by the reference value.
+ *
+ * Since the comparison is directional, make sure to pass read / ref codes in correct order.
+ */
+ public static boolean readBaseMatchesRefBaseWithAmbiguity(final byte readBase, final byte refBase) {
+ return (bases[readBase] & bases[refBase]) == bases[readBase];
}
/**
@@ -82,10 +131,11 @@ public class SequenceUtil {
/** Returns true if the byte is in [acgtACGT]. */
public static boolean isValidBase(final byte b) {
- for (final byte validBase : VALID_BASES_UPPER) {
- if (b == validBase) return true;
- }
- for (final byte validBase : VALID_BASES_LOWER) {
+ return isValidBase(b, VALID_BASES_UPPER) || isValidBase(b, VALID_BASES_LOWER);
+ }
+
+ private static boolean isValidBase(final byte b, final byte[] validBases) {
+ for (final byte validBase : validBases) {
if (b == validBase) return true;
}
return false;
@@ -307,6 +357,27 @@ public class SequenceUtil {
return Integer.toString(clipLength) + "S";
}
+ /**
+ * Helper method to handle the various use cases of base comparison.
+ *
+ * @param readBase the read base to match
+ * @param refBase the reference base to match
+ * @param negativeStrand set to true if the base to test is on the negative strand and should be reverse complemented (only applies if bisulfiteSequence is true)
+ * @param bisulfiteSequence set to true if the base to match is a bisulfite sequence and needs to be converted
+ * @param matchAmbiguousRef causes the match to return true when the read base is a subset of the possible IUPAC reference bases, but not the other way around
+ * @return true if the bases match, false otherwise
+ */
+ private static boolean basesMatch(final byte readBase, final byte refBase, final boolean negativeStrand,
+ final boolean bisulfiteSequence, final boolean matchAmbiguousRef) {
+ if (bisulfiteSequence) {
+ if (matchAmbiguousRef) return bisulfiteBasesMatchWithAmbiguity(negativeStrand, readBase, refBase);
+ else return bisulfiteBasesEqual(negativeStrand, readBase, refBase);
+ } else {
+ if (matchAmbiguousRef) return readBaseMatchesRefBaseWithAmbiguity(readBase, refBase);
+ else return basesEqual(readBase, refBase);
+ }
+ }
+
/** Calculates the number of mismatches between the read and the reference sequence provided. */
public static int countMismatches(final SAMRecord read, final byte[] referenceBases) {
return countMismatches(read, referenceBases, 0, false);
@@ -328,8 +399,12 @@ public class SequenceUtil {
* and C->T on the positive strand and G->A on the negative strand will not be counted
* as mismatches.
*/
+ public static int countMismatches(final SAMRecord read, final byte[] referenceBases, final int referenceOffset, final boolean bisulfiteSequence) {
+ return countMismatches(read, referenceBases, referenceOffset, bisulfiteSequence, false);
+ }
+
public static int countMismatches(final SAMRecord read, final byte[] referenceBases, final int referenceOffset,
- final boolean bisulfiteSequence) {
+ final boolean bisulfiteSequence, final boolean matchAmbiguousRef) {
try {
int mismatches = 0;
@@ -341,15 +416,9 @@ public class SequenceUtil {
final int length = block.getLength();
for (int i = 0; i < length; ++i) {
- if (!bisulfiteSequence) {
- if (!basesEqual(readBases[readBlockStart + i], referenceBases[referenceBlockStart + i])) {
- ++mismatches;
- }
- } else {
- if (!bisulfiteBasesEqual(read.getReadNegativeStrandFlag(), readBases[readBlockStart + i],
- referenceBases[referenceBlockStart + i])) {
- ++mismatches;
- }
+ if (!basesMatch(readBases[readBlockStart + i], referenceBases[referenceBlockStart + i],
+ read.getReadNegativeStrandFlag(), bisulfiteSequence, matchAmbiguousRef)) {
+ ++mismatches;
}
}
}
@@ -528,16 +597,18 @@ public class SequenceUtil {
}
/**
- * Calculates the for the predefined NM tag from the SAM spec. To the result of
- * countMismatches() it adds 1 for each indel.
+ * Calculates the predefined NM tag from the SAM spec: (# of mismatches + # of indels)
+ * For the purposes for calculating mismatches, we do not yet support IUPAC ambiguous codes
+ * (see <code>readBaseMatchesRefBaseWithAmbiguity</code> method).
*/
public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases) {
return calculateSamNmTag(read, referenceBases, 0, false);
}
/**
- * Calculates the for the predefined NM tag from the SAM spec. To the result of
- * countMismatches() it adds 1 for each indel.
+ * Calculates the predefined NM tag from the SAM spec: (# of mismatches + # of indels)
+ * For the purposes for calculating mismatches, we do not yet support IUPAC ambiguous codes
+ * (see <code>readBaseMatchesRefBaseWithAmbiguity</code> method).
*
* @param referenceOffset 0-based offset of the first element of referenceBases relative to the start
* of that reference sequence.
@@ -548,8 +619,9 @@ public class SequenceUtil {
}
/**
- * Calculates the for the predefined NM tag from the SAM spec. To the result of
- * countMismatches() it adds 1 for each indel.
+ * Calculates the predefined NM tag from the SAM spec: (# of mismatches + # of indels)
+ * For the purposes for calculating mismatches, we do not yet support IUPAC ambiguous codes
+ * (see <code>readBaseMatchesRefBaseWithAmbiguity</code> method).
*
* @param referenceOffset 0-based offset of the first element of referenceBases relative to the start
* of that reference sequence.
@@ -559,7 +631,7 @@ public class SequenceUtil {
*/
public static int calculateSamNmTag(final SAMRecord read, final byte[] referenceBases,
final int referenceOffset, final boolean bisulfiteSequence) {
- int samNm = countMismatches(read, referenceBases, referenceOffset, bisulfiteSequence);
+ int samNm = countMismatches(read, referenceBases, referenceOffset, bisulfiteSequence, false);
for (final CigarElement el : read.getCigar().getCigarElements()) {
if (el.getOperator() == CigarOperator.INSERTION || el.getOperator() == CigarOperator.DELETION) {
samNm += el.getLength();
@@ -569,7 +641,7 @@ public class SequenceUtil {
}
/**
- * Attempts to calculate the for the predefined NM tag from the SAM spec using the cigar string alone.
+ * Attempts to calculate the predefined NM tag from the SAM spec using the cigar string alone.
* It may calculate incorrectly if ambiguous operators (Like M) are used.
*
* Needed for testing infrastructure: SAMRecordSetBuilder
@@ -658,9 +730,9 @@ public class SequenceUtil {
}
/**
- * Returns true if the bases are equal OR if the mismatch cannot be accounted for by
- * bisfulite treatment. C->T on the positive strand and G->A on the negative strand
- * do not count as mismatches
+ * Returns true if the bases are equal OR if the mismatch can be accounted for by
+ * bisulfite treatment. C->T on the positive strand and G->A on the negative strand
+ * do not count as mismatches.
*/
public static boolean bisulfiteBasesEqual(final boolean negativeStrand, final byte read, final byte reference) {
return (basesEqual(read, reference)) || (isBisulfiteConverted(read, reference, negativeStrand));
@@ -671,6 +743,15 @@ public class SequenceUtil {
}
/**
+ * Same as above, but use <code>readBaseMatchesRefBaseWithAmbiguity</code> instead of <code>basesEqual</code>.
+ * Note that <code>isBisulfiteConverted</code> is not affected because it only applies when the
+ * reference base is non-ambiguous.
+ */
+ public static boolean bisulfiteBasesMatchWithAmbiguity(final boolean negativeStrand, final byte read, final byte reference) {
+ return (readBaseMatchesRefBaseWithAmbiguity(read, reference)) || (isBisulfiteConverted(read, reference, negativeStrand));
+ }
+
+ /**
* Checks for bisulfite conversion, C->T on the positive strand and G->A on the negative strand.
*/
public static boolean isBisulfiteConverted(final byte read, final byte reference, final boolean negativeStrand) {
@@ -767,7 +848,7 @@ public class SequenceUtil {
boolean matched = match.find();
if (matched) {
String mg;
- if (((mg = match.group(1)) != null) && (mg.length() > 0)) {
+ if (((mg = match.group(1)) != null) && (!mg.isEmpty())) {
// It's a number , meaning a series of matches
final int num = Integer.parseInt(mg);
for (int i = 0; i < num; i++) {
@@ -778,7 +859,7 @@ public class SequenceUtil {
}
basesMatched++;
}
- } else if (((mg = match.group(2)) != null) && (mg.length() > 0)) {
+ } else if (((mg = match.group(2)) != null) && (!mg.isEmpty())) {
// It's a single nucleotide, meaning a mismatch
if (basesMatched < cigElLen) {
ret[outIndex++] = StringUtil.charToByte(mg.charAt(0));
@@ -787,7 +868,7 @@ public class SequenceUtil {
throw new IllegalStateException("Should never happen.");
}
basesMatched++;
- } else if (((mg = match.group(3)) != null) && (mg.length() > 0)) {
+ } else if (((mg = match.group(3)) != null) && (!mg.isEmpty())) {
// It's a deletion, starting with a caret
// don't include caret
if (includeReferenceBasesForDeletions) {
@@ -889,7 +970,7 @@ public class SequenceUtil {
}
/**
- * A rip off samtools bam_md.c
+ * Calculate MD and NM similarly to Samtools, except that N->N is a match.
*
* @param record
* @param ref
@@ -980,7 +1061,7 @@ public class SequenceUtil {
public static List<byte[]> generateAllKmers(final int length) {
final List<byte[]> sofar = new LinkedList<byte[]>();
- if (sofar.size() == 0) {
+ if (sofar.isEmpty()) {
sofar.add(new byte[length]);
}
diff --git a/src/java/htsjdk/samtools/util/SortingCollection.java b/src/java/htsjdk/samtools/util/SortingCollection.java
index cabcad1..681f458 100644
--- a/src/java/htsjdk/samtools/util/SortingCollection.java
+++ b/src/java/htsjdk/samtools/util/SortingCollection.java
@@ -32,6 +32,7 @@ import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.io.Serializable;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
@@ -501,7 +502,8 @@ public class SortingCollection<T> implements Iterable<T> {
}
}
- class PeekFileRecordIteratorComparator implements Comparator<PeekFileRecordIterator> {
+ class PeekFileRecordIteratorComparator implements Comparator<PeekFileRecordIterator>, Serializable {
+ private static final long serialVersionUID = 1L;
public int compare(final PeekFileRecordIterator lhs, final PeekFileRecordIterator rhs) {
final int result = comparator.compare(lhs.peek(), rhs.peek());
diff --git a/src/java/htsjdk/samtools/util/SortingLongCollection.java b/src/java/htsjdk/samtools/util/SortingLongCollection.java
index 2864dd5..4cf0c36 100644
--- a/src/java/htsjdk/samtools/util/SortingLongCollection.java
+++ b/src/java/htsjdk/samtools/util/SortingLongCollection.java
@@ -31,6 +31,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
@@ -332,7 +333,8 @@ public class SortingLongCollection {
}
}
- private static class PeekFileValueIteratorComparator implements Comparator<PeekFileValueIterator> {
+ private static class PeekFileValueIteratorComparator implements Comparator<PeekFileValueIterator>, Serializable {
+ private static final long serialVersionUID = 1L;
public int compare(final PeekFileValueIterator it1, final PeekFileValueIterator it2) {
if (it1.peek() < it2.peek()) {
diff --git a/src/java/htsjdk/samtools/util/StringUtil.java b/src/java/htsjdk/samtools/util/StringUtil.java
index e205bbf..ecb1b3f 100644
--- a/src/java/htsjdk/samtools/util/StringUtil.java
+++ b/src/java/htsjdk/samtools/util/StringUtil.java
@@ -40,7 +40,7 @@ public class StringUtil {
* @return String that concatenates the result of each item's to String method for all items in objs, with separator between each of them.
*/
public static <T> String join(final String separator, final Collection<T> objs) {
- if (objs.size() == 0) {
+ if (objs.isEmpty()) {
return "";
}
@@ -95,7 +95,7 @@ public class StringUtil {
if (nTokens < maxTokens)
{
final String trailingString = aString.substring(start);
- if (trailingString.length() > 0)
+ if (!trailingString.isEmpty())
{
tokens[nTokens++] = trailingString;
}
@@ -134,7 +134,7 @@ public class StringUtil {
}
// Add the trailing string, if it is not empty.
final String trailingString = aString.substring(start);
- if (trailingString.length() > 0)
+ if (!trailingString.isEmpty())
{
tokens[nTokens++] = trailingString;
}
@@ -206,12 +206,12 @@ public class StringUtil {
final StringBuilder sb = new StringBuilder();
for (final String line: lines) {
if (sb.length() > 0) {
- sb.append("\n");
+ sb.append('\n');
}
sb.append(wordWrapSingleLine(line, maxLineLength));
}
if (s.endsWith("\n")) {
- sb.append("\n");
+ sb.append('\n');
}
return sb.toString();
}
@@ -237,7 +237,7 @@ public class StringUtil {
// Include any trailing whitespace
for (; lastSpaceIndex < s.length() && Character.isWhitespace(s.charAt(lastSpaceIndex)); ++lastSpaceIndex) {}
if (sb.length() > 0) {
- sb.append("\n");
+ sb.append('\n');
}
// Handle situation in which there is no word break. Just break the word in the middle.
if (lastSpaceIndex == startCopyFrom) {
diff --git a/src/java/htsjdk/samtools/util/TrimmingUtil.java b/src/java/htsjdk/samtools/util/TrimmingUtil.java
new file mode 100644
index 0000000..06667d3
--- /dev/null
+++ b/src/java/htsjdk/samtools/util/TrimmingUtil.java
@@ -0,0 +1,67 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.util;
+
+/**
+ * Utility code for performing quality trimming.
+ *
+ * @author Tim Fennell
+ */
+public class TrimmingUtil {
+ /**
+ * Implements phred-style quality trimming. Takes in an array of quality values as a
+ * byte[] and return the first index within the byte array that should be clipped,
+ * such that the caller can then invoke things like:
+ * int retval = findQualityTrimPoint(10, quals);
+ * final byte[] trimmedQuals = Arrays.copyOfRange(quals, 0, retval);
+ * final String trimmedBases = bases.substring(0, retval);
+ *
+ * If the entire read is of low quality this function may return 0! It is left to the caller
+ * to decide whether or not to trim reads down to 0-bases, or to enforce some minimum length.
+ *
+ * @param quals a byte[] of quality scores in phred scaling (i.e. integer values between 0 and ~60)
+ * @param trimQual the lowest quality that is considered "good". In the simplest case
+ * where a read is composed exclusively of "good" qualities followed by
+ * "bad" qualities, this is the lowest quality value left after trimming.
+ * @return The zero-based index of the first base within the quality string that should be trimmed.
+ * When no trimming is required, quals.length (i.e. an index one greater than the last valid
+ * index) will be returned.
+ */
+ public static int findQualityTrimPoint(final byte[] quals, final int trimQual) {
+ final int length = quals.length;
+ int score = 0, maxScore = 0, trimPoint = length;
+ if (trimQual < 1 || length == 0) return 0;
+
+ for (int i=length-1; i>=0; --i) {
+ score += trimQual - (quals[i]);
+ if (score < 0) break;
+ if (score > maxScore) {
+ maxScore = score;
+ trimPoint = i;
+ }
+ }
+
+ return trimPoint;
+ }
+}
diff --git a/src/java/htsjdk/samtools/util/Tuple.java b/src/java/htsjdk/samtools/util/Tuple.java
index a169dd9..6a865d7 100644
--- a/src/java/htsjdk/samtools/util/Tuple.java
+++ b/src/java/htsjdk/samtools/util/Tuple.java
@@ -13,4 +13,30 @@ public class Tuple<A, B> {
this.a = a;
this.b = b;
}
+
+ @Override
+ public boolean equals(final Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ final Tuple<?, ?> tuple = (Tuple<?, ?>) o;
+
+ if (a != null ? !a.equals(tuple.a) : tuple.a != null) return false;
+ return !(b != null ? !b.equals(tuple.b) : tuple.b != null);
+
+ }
+
+ @Override
+ public int hashCode() {
+ int result = a != null ? a.hashCode() : 0;
+ result = 31 * result + (b != null ? b.hashCode() : 0);
+
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return "[" + a + ", " + b + "]";
+ }
+
}
diff --git a/src/java/htsjdk/tribble/bed/BEDCodec.java b/src/java/htsjdk/tribble/bed/BEDCodec.java
index 222f1a5..0e91850 100644
--- a/src/java/htsjdk/tribble/bed/BEDCodec.java
+++ b/src/java/htsjdk/tribble/bed/BEDCodec.java
@@ -68,7 +68,7 @@ public class BEDCodec extends AsciiFeatureCodec<BEDFeature> {
@Override
public BEDFeature decode(String line) {
- if (line.trim().length() == 0) {
+ if (line.trim().isEmpty()) {
return null;
}
@@ -134,7 +134,7 @@ public class BEDCodec extends AsciiFeatureCodec<BEDFeature> {
// Strand
if (tokenCount > 5) {
String strandString = tokens[5].trim();
- char strand = (strandString.length() == 0)
+ char strand = (strandString.isEmpty())
? ' ' : strandString.charAt(0);
if (strand == '-') {
diff --git a/src/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java b/src/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java
index 22dfed8..854b05d 100644
--- a/src/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java
+++ b/src/java/htsjdk/tribble/index/interval/IntervalIndexCreator.java
@@ -64,9 +64,9 @@ public class IntervalIndexCreator extends TribbleIndexCreator {
public void addFeature(final Feature feature, final long filePosition) {
// if we don't have a chrIndex yet, or if the last one was for the previous contig, create a new one
- if (chrList.size() == 0 || !chrList.getLast().getName().equals(feature.getChr())) {
+ if (chrList.isEmpty() || !chrList.getLast().getName().equals(feature.getChr())) {
// if we're creating a new chrIndex (not the first), make sure to dump the intervals to the old chrIndex
- if (chrList.size() != 0)
+ if (!chrList.isEmpty())
addIntervalsToLastChr(filePosition);
// create a new chr index for the current contig
@@ -75,11 +75,11 @@ public class IntervalIndexCreator extends TribbleIndexCreator {
}
// if we're about to overflow the current bin, make a new one
- if (featureCount >= featuresPerInterval || intervals.size() == 0) {
+ if (featureCount >= featuresPerInterval || intervals.isEmpty()) {
final MutableInterval i = new MutableInterval();
i.setStart(feature.getStart());
i.setStartFilePosition(filePosition);
- if( intervals.size() > 0) intervals.get(intervals.size()-1).setEndFilePosition(filePosition);
+ if(!intervals.isEmpty()) intervals.get(intervals.size()-1).setEndFilePosition(filePosition);
featureCount = 0; // reset the feature count
intervals.add(i);
}
diff --git a/src/java/htsjdk/tribble/index/interval/IntervalTree.java b/src/java/htsjdk/tribble/index/interval/IntervalTree.java
index 2deff3b..855ade9 100644
--- a/src/java/htsjdk/tribble/index/interval/IntervalTree.java
+++ b/src/java/htsjdk/tribble/index/interval/IntervalTree.java
@@ -508,10 +508,10 @@ public class IntervalTree {
StringBuffer buf = new StringBuffer();
_toString(buf, keys);
- buf.append("\n");
+ buf.append('\n');
for (Map.Entry<Interval, Integer> entry : keys.entrySet()) {
buf.append(entry.getValue() + " = " + entry.getKey());
- buf.append("\n");
+ buf.append('\n');
}
return buf.toString();
@@ -520,7 +520,7 @@ public class IntervalTree {
public void _toString(StringBuffer buf, Map<Interval, Integer> keys) {
if (this == NIL) {
buf.append("nil");
- buf.append("\n");
+ buf.append('\n');
return;
}
@@ -542,7 +542,7 @@ public class IntervalTree {
buf.append(selfKey + " -> " + leftKey + " , " + rightKey);
- buf.append("\n");
+ buf.append('\n');
this.left._toString(buf, keys);
this.right._toString(buf, keys);
}
diff --git a/src/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java b/src/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java
index 11960f9..055888e 100644
--- a/src/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java
+++ b/src/java/htsjdk/tribble/index/interval/IntervalTreeIndex.java
@@ -138,7 +138,7 @@ public class IntervalTreeIndex extends AbstractIndex {
final List<Interval> intervals = tree.findOverlapping(new Interval(start, end));
// save time (and save throwing an exception) if the blocks are empty, return now
- if (intervals == null || intervals.size() == 0) return new ArrayList<Block>();
+ if (intervals == null || intervals.isEmpty()) return new ArrayList<Block>();
final Block[] blocks = new Block[intervals.size()];
int idx = 0;
diff --git a/src/java/htsjdk/tribble/index/linear/LinearIndexCreator.java b/src/java/htsjdk/tribble/index/linear/LinearIndexCreator.java
index b68c9f2..9e68072 100644
--- a/src/java/htsjdk/tribble/index/linear/LinearIndexCreator.java
+++ b/src/java/htsjdk/tribble/index/linear/LinearIndexCreator.java
@@ -66,9 +66,9 @@ public class LinearIndexCreator extends TribbleIndexCreator {
*/
public void addFeature(final Feature feature, final long filePosition) {
// fi we don't have a chrIndex yet, or if the last one was for the previous contig, create a new one
- if (chrList.size() == 0 || !chrList.getLast().getName().equals(feature.getChr())) {
+ if (chrList.isEmpty() || !chrList.getLast().getName().equals(feature.getChr())) {
// if we're creating a new chrIndex (not the first), make sure to dump the blocks to the old chrIndex
- if (chrList.size() != 0)
+ if (!chrList.isEmpty())
for (int x = 0; x < blocks.size(); x++) {
blocks.get(x).setEndPosition((x + 1 == blocks.size()) ? filePosition : blocks.get(x + 1).getStartPosition());
chrList.getLast().addBlock(blocks.get(x));
diff --git a/src/java/htsjdk/tribble/readers/TabixReader.java b/src/java/htsjdk/tribble/readers/TabixReader.java
index e0bd87c..8867d07 100644
--- a/src/java/htsjdk/tribble/readers/TabixReader.java
+++ b/src/java/htsjdk/tribble/readers/TabixReader.java
@@ -316,7 +316,7 @@ public class TabixReader {
String alt;
alt = end >= 0 ? s.substring(beg, end) : s.substring(beg);
if (col == 4) { // REF
- if (alt.length() > 0) intv.end = intv.beg + alt.length();
+ if (!alt.isEmpty()) intv.end = intv.beg + alt.length();
} else if (col == 8) { // INFO
int e_off = -1, i = alt.indexOf("END=");
if (i == 0) e_off = 4;
@@ -325,7 +325,7 @@ public class TabixReader {
if (i >= 0) e_off = i + 5;
}
if (e_off > 0) {
- i = alt.indexOf(";", e_off);
+ i = alt.indexOf(';', e_off);
intv.end = Integer.parseInt(i > e_off ? alt.substring(e_off, i) : alt.substring(e_off));
}
}
diff --git a/src/java/htsjdk/variant/bcf2/BCF2Codec.java b/src/java/htsjdk/variant/bcf2/BCF2Codec.java
index 9fcf0cf..9cbf1f5 100644
--- a/src/java/htsjdk/variant/bcf2/BCF2Codec.java
+++ b/src/java/htsjdk/variant/bcf2/BCF2Codec.java
@@ -354,7 +354,7 @@ public final class BCF2Codec extends BinaryFeatureCodec<VariantContext> {
builder.alleles(alleles);
- assert ref.length() > 0;
+ assert !ref.isEmpty();
return alleles;
}
diff --git a/src/java/htsjdk/variant/bcf2/BCF2Utils.java b/src/java/htsjdk/variant/bcf2/BCF2Utils.java
index e26ca21..f2fb1a8 100644
--- a/src/java/htsjdk/variant/bcf2/BCF2Utils.java
+++ b/src/java/htsjdk/variant/bcf2/BCF2Utils.java
@@ -146,7 +146,7 @@ public final class BCF2Utils {
for ( final String s : strings ) {
if ( s != null ) {
assert s.indexOf(",") == -1; // no commas in individual strings
- b.append(",").append(s);
+ b.append(',').append(s);
}
}
return b.toString();
@@ -169,7 +169,7 @@ public final class BCF2Utils {
}
public static boolean isCollapsedString(final String s) {
- return s.length() > 0 && s.charAt(0) == ',';
+ return !s.isEmpty() && s.charAt(0) == ',';
}
/**
diff --git a/src/java/htsjdk/variant/variantcontext/CommonInfo.java b/src/java/htsjdk/variant/variantcontext/CommonInfo.java
index 99d91d1..88b02fa 100644
--- a/src/java/htsjdk/variant/variantcontext/CommonInfo.java
+++ b/src/java/htsjdk/variant/variantcontext/CommonInfo.java
@@ -103,7 +103,7 @@ public final class CommonInfo implements Serializable {
}
public boolean isFiltered() {
- return filters == null ? false : filters.size() > 0;
+ return filters == null ? false : !filters.isEmpty();
}
public boolean isNotFiltered() {
@@ -207,7 +207,7 @@ public final class CommonInfo implements Serializable {
public void putAttributes(Map<String, ?> map) {
if ( map != null ) {
// for efficiency, we can skip the validation if the map is empty
- if ( attributes.size() == 0 ) {
+ if (attributes.isEmpty()) {
if ( attributes == NO_ATTRIBUTES ) // immutable -> mutable
attributes = new HashMap<String, Object>();
attributes.putAll(map);
diff --git a/src/java/htsjdk/variant/variantcontext/Genotype.java b/src/java/htsjdk/variant/variantcontext/Genotype.java
index 91d52b7..a104b0e 100644
--- a/src/java/htsjdk/variant/variantcontext/Genotype.java
+++ b/src/java/htsjdk/variant/variantcontext/Genotype.java
@@ -68,7 +68,7 @@ public abstract class Genotype implements Comparable<Genotype>, Serializable {
protected Genotype(final String sampleName, final String filters) {
this.sampleName = sampleName;
- this.filters = filters;
+ this.filters = filters == null || filters.isEmpty() ? null : filters;
}
/**
@@ -637,9 +637,9 @@ public abstract class Genotype implements Comparable<Genotype>, Serializable {
return "";
else {
StringBuilder b = new StringBuilder();
- b.append(" ").append(name).append(" ");
+ b.append(' ').append(name).append(' ');
for ( int i = 0; i < vs.length; i++ ) {
- if ( i != 0 ) b.append(",");
+ if ( i != 0 ) b.append(',');
b.append(vs[i]);
}
return b.toString();
@@ -663,4 +663,4 @@ public abstract class Genotype implements Comparable<Genotype>, Serializable {
protected final static boolean isForbiddenKey(final String key) {
return PRIMARY_KEYS.contains(key);
}
-}
\ No newline at end of file
+}
diff --git a/src/java/htsjdk/variant/variantcontext/GenotypeLikelihoods.java b/src/java/htsjdk/variant/variantcontext/GenotypeLikelihoods.java
index e0eb928..ee3e08d 100644
--- a/src/java/htsjdk/variant/variantcontext/GenotypeLikelihoods.java
+++ b/src/java/htsjdk/variant/variantcontext/GenotypeLikelihoods.java
@@ -29,14 +29,17 @@ import htsjdk.tribble.TribbleException;
import htsjdk.variant.utils.GeneralUtils;
import htsjdk.variant.vcf.VCFConstants;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumMap;
import java.util.List;
+import java.util.Map;
+import java.util.HashMap;
public class GenotypeLikelihoods {
private final static int NUM_LIKELIHOODS_CACHE_N_ALLELES = 5;
private final static int NUM_LIKELIHOODS_CACHE_PLOIDY = 10;
- // caching numAlleles up to 5 and ploidy up to 10
+ // caches likelihoods up to 5 alleles and up to 10 ploidy
private final static int[][] numLikelihoodCache = new int[NUM_LIKELIHOODS_CACHE_N_ALLELES][NUM_LIKELIHOODS_CACHE_PLOIDY];
public final static int MAX_PL = Integer.MAX_VALUE;
@@ -49,7 +52,6 @@ public class GenotypeLikelihoods {
private double[] log10Likelihoods = null;
private String likelihoodsAsString_PLs = null;
-
/**
* initialize num likelihoods cache
*/
@@ -63,14 +65,21 @@ public class GenotypeLikelihoods {
}
/**
- * The maximum number of alleles that we can represent as genotype likelihoods
+ * The maximum number of diploid alternate alleles that we can represent as genotype likelihoods
*/
- public final static int MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED = 50;
+ public final static int MAX_DIPLOID_ALT_ALLELES_THAT_CAN_BE_GENOTYPED = 50;
- /*
- * a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
- */
- private final static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = calculatePLcache(MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED);
+ /**
+ * A cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles
+ */
+ private final static GenotypeLikelihoodsAllelePair[] diploidPLIndexToAlleleIndex = calculateDiploidPLcache(MAX_DIPLOID_ALT_ALLELES_THAT_CAN_BE_GENOTYPED);
+
+ /**
+ * A cache of PL index to a list of alleles for any ploidy.
+ * For example, for a ploidy of 3, the allele lists for each PL index is:
+ * {0,0,0}, {0,0,1}, {0,1,1}, {1,1,1}, {0,0,2}, {0,1,2}, {1,1,2}, {0,2,2}, {1,2,2}, {2,2,2}
+ */
+ protected final static Map<Integer, List<List<Integer>>> anyploidPloidyToPLIndexToAlleleIndices = new HashMap<Integer, List<List<Integer>>>();
public final static GenotypeLikelihoods fromPLField(String PLs) {
return new GenotypeLikelihoods(PLs);
@@ -101,11 +110,11 @@ public class GenotypeLikelihoods {
}
/**
- * Returns the genotypes likelihoods in negative log10 vector format. pr{AA} = x, this
+ * The genotypes likelihoods in -10log10(x) vector format. pr{AA} = x, this
* vector returns math.log10(x) for each of the genotypes. Can return null if the
* genotype likelihoods are "missing".
*
- * @return
+ * @return genotypes likelihoods in negative log10 vector format
*/
public double[] getAsVector() {
// assumes one of the likelihoods vector or the string isn't null
@@ -264,7 +273,7 @@ public class GenotypeLikelihoods {
boolean first = true;
for ( final int pl : GLsToPLs(GLs) ) {
if ( ! first )
- s.append(",");
+ s.append(',');
else
first = false;
@@ -318,7 +327,13 @@ public class GenotypeLikelihoods {
}
}
- private static GenotypeLikelihoodsAllelePair[] calculatePLcache(final int altAlleles) {
+ /**
+ * Calculate the cache of diploid alleles for each PL index
+ *
+ * @param altAlleles number of alternate alleles
+ * @return cache of diploid alleles for each PL index
+ */
+ private static GenotypeLikelihoodsAllelePair[] calculateDiploidPLcache(final int altAlleles) {
final int numLikelihoods = numLikelihoods(1 + altAlleles, 2);
final GenotypeLikelihoodsAllelePair[] cache = new GenotypeLikelihoodsAllelePair[numLikelihoods];
@@ -338,6 +353,60 @@ public class GenotypeLikelihoods {
return cache;
}
+
+ /**
+ * Calculate the alleles for each PL index for a ploidy.
+ * Creates the ordering for all possible combinations of ploidy alleles. Computed recursively and the
+ * result is stored in a cache.
+ *
+ * The implementation is described in The Variant Call Format Specification VCF 4.3, Section 1.6.2 Genotype fields
+ * The likelihoods are ordered for ploidy P and N alternate alleles as follows:
+ * for aP = 0...N
+ * for aP-1 = 0...aP
+ * ...
+ * for a1 = 0...a2
+ * a1,a2..aP
+ *
+ * This is implemented recursively:
+ *
+ * PLIndexToAlleleIndices(N, P, suffix=empty):
+ * for a in 0...N
+ * if (P == 1) accum += (a + suffix) // have all the alleles for a PL index
+ * if (P > 1) PLIndexToAlleleIndices(a, P-1, a + suffix )
+ *
+ * @param altAlleles Number of alternate alleles
+ * @param ploidy Number of chromosomes in set
+ * @param anyploidPLIndexToAlleleIndices PL index to the alleles of general ploidy over all possible alternate alleles
+ * @param genotype An entry of ploidy alleles
+ */
+ private static void calculatePLIndexToAlleleIndices(final int altAlleles, final int ploidy, final List<List<Integer>> anyploidPLIndexToAlleleIndices,
+ final List<Integer> genotype) {
+ for (int a=0; a <= altAlleles; a++) {
+ final List<Integer> gt = new ArrayList<Integer>(Arrays.asList(a));
+ gt.addAll(genotype);
+ if ( ploidy == 1 ) {// have all ploidy alleles for a PL index
+ anyploidPLIndexToAlleleIndices.add(gt);
+ } else if ( ploidy > 1 ) {
+ calculatePLIndexToAlleleIndices(a, ploidy - 1, anyploidPLIndexToAlleleIndices, gt);
+ }
+ }
+ }
+
+ /**
+ * Calculate the cache of allele indices for each PL index for a ploidy.
+ * Calculation in @see #calculatePLIndexToAlleleIndices
+ *
+ * @param altAlleles Number of alternate alleles
+ * @param ploidy Number of chromosomes in set
+ * @return PL index to the alleles of general ploidy over all possible alternate alleles
+ * @return The alleles for each PL index for a ploidy
+ */
+ protected static List<List<Integer>> calculateAnyploidPLcache(final int altAlleles, final int ploidy) {
+ List<List<Integer>> anyploidPLIndexToAlleleIndices = new ArrayList<List<Integer>>();
+ calculatePLIndexToAlleleIndices(altAlleles, ploidy, anyploidPLIndexToAlleleIndices, new ArrayList<Integer>());
+ return anyploidPLIndexToAlleleIndices;
+ }
+
// -------------------------------------------------------------------------------------
//
// num likelihoods given number of alleles and ploidy
@@ -347,9 +416,9 @@ public class GenotypeLikelihoods {
/**
* Actually does the computation in @see #numLikelihoods
*
- * @param numAlleles
- * @param ploidy
- * @return
+ * @param numAlleles number of alleles
+ * @param ploidy number of chromosomes
+ * @return number of likelihoods
*/
private static final int calcNumLikelihoods(final int numAlleles, final int ploidy) {
if (numAlleles == 1)
@@ -408,18 +477,68 @@ public class GenotypeLikelihoods {
return (allele2Index * (allele2Index+1) / 2) + allele1Index;
}
+
/**
- * get the allele index pair for the given PL
+ * Get the diploid allele index pair for the given PL index
*
* @param PLindex the PL index
- * @return the allele index pair
+ * @return the diploid allele index pair
+ * @throws IllegalStateException if PLindex is negative value or greater than the cache computed by @see #calculateDiploidPLcache
*/
public static GenotypeLikelihoodsAllelePair getAllelePair(final int PLindex) {
- // make sure that we've cached enough data
- if ( PLindex >= PLIndexToAlleleIndex.length )
- throw new IllegalStateException("Internal limitation: cannot genotype more than " + MAX_ALT_ALLELES_THAT_CAN_BE_GENOTYPED + " alleles");
+ // check the index, make sure that we've cached enough data
+ if ( PLindex < 0 || PLindex >= diploidPLIndexToAlleleIndex.length ) {
+ final String msg = "The PL index " + PLindex + " cannot be " + (PLindex < 0 ? " negative" : " more than " + (diploidPLIndexToAlleleIndex.length - 1));
+ throw new IllegalStateException(msg);
+ }
+
+ return diploidPLIndexToAlleleIndex[PLindex];
+ }
+
+ /**
+ * Initialize cache of allele anyploid indices
+ * If initialized multiple times with the same ploidy, the alternate alleles from the last initialization will be used
+ *
+ * @param altAlleles number of alternate alleles
+ * @param ploidy number of chromosomes
+ * @throws IllegalArgumentException if altAlleles or ploidy <= 0
+ */
+ public static synchronized void initializeAnyploidPLIndexToAlleleIndices(final int altAlleles, final int ploidy) {
+ if ( altAlleles <= 0 )
+ throw new IllegalArgumentException("Must have at least one alternate allele, not " + altAlleles );
+
+ if ( ploidy <= 0 )
+ throw new IllegalArgumentException("Ploidy must be at least 1, not " + ploidy);
+
+ // create the allele indices for each PL index for a ploidy
+ anyploidPloidyToPLIndexToAlleleIndices.put(ploidy, calculateAnyploidPLcache(altAlleles, ploidy));
+ }
+
+ /**
+ * Get the allele ploidy indices for the given PL index
+ * Must use the same ploidy as @see #initializeAnyploidPLIndexToAlleleIndices
+ *
+ * @param PLindex the PL index
+ * @param ploidy number of chromosomes
+ * @return the ploidy allele indices
+ * @throws IllegalStateException if @see #anyploidPloidyToPLIndexToAlleleIndices does not contain the requested ploidy or PL index
+ */
+ public static synchronized List<Integer> getAlleles(final int PLindex, final int ploidy) {
+ if ( ploidy == 2 ) { // diploid
+ final GenotypeLikelihoodsAllelePair pair = getAllelePair(PLindex);
+ return Arrays.asList(pair.alleleIndex1, pair.alleleIndex2);
+ } else { // non-diploid
+ if (!anyploidPloidyToPLIndexToAlleleIndices.containsKey(ploidy))
+ throw new IllegalStateException("Must initialize the cache of allele anyploid indices for ploidy " + ploidy);
+
+ if (PLindex < 0 || PLindex >= anyploidPloidyToPLIndexToAlleleIndices.get(ploidy).size()) {
+ final String msg = "The PL index " + PLindex + " does not exist for " + ploidy + " ploidy, " +
+ (PLindex < 0 ? "cannot have a negative value." : "initialized the cache of allele anyploid indices with the incorrect number of alternate alleles.");
+ throw new IllegalStateException(msg);
+ }
- return PLIndexToAlleleIndex[PLindex];
+ return anyploidPloidyToPLIndexToAlleleIndices.get(ploidy).get(PLindex);
+ }
}
// An index conversion from the deprecated PL ordering to the new VCF-based ordering for up to 3 alternate alleles
diff --git a/src/java/htsjdk/variant/variantcontext/VariantContext.java b/src/java/htsjdk/variant/variantcontext/VariantContext.java
index d2cc5af..f64b0ff 100644
--- a/src/java/htsjdk/variant/variantcontext/VariantContext.java
+++ b/src/java/htsjdk/variant/variantcontext/VariantContext.java
@@ -25,6 +25,7 @@
package htsjdk.variant.variantcontext;
+import htsjdk.samtools.util.Tuple;
import htsjdk.tribble.Feature;
import htsjdk.tribble.TribbleException;
import htsjdk.tribble.util.ParsingUtils;
@@ -36,6 +37,7 @@ import htsjdk.variant.vcf.VCFHeaderLineCount;
import htsjdk.variant.vcf.VCFHeaderLineType;
import java.io.Serializable;
+import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -47,6 +49,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.stream.Collectors;
/**
*
@@ -1256,7 +1259,7 @@ public class VariantContext implements Feature, Serializable {
ArrayList<Integer> observedACs = new ArrayList<Integer>();
// if there are alternate alleles, record the relevant tags
- if ( getAlternateAlleles().size() > 0 ) {
+ if (!getAlternateAlleles().isEmpty()) {
for ( Allele allele : getAlternateAlleles() ) {
observedACs.add(getCalledChrCount(allele));
}
@@ -1586,7 +1589,7 @@ public class VariantContext implements Feature, Serializable {
return GenotypeLikelihoods.fromPLField((String)value);
final String string = (String)value;
- if ( string.indexOf(",") != -1 ) {
+ if ( string.indexOf(',') != -1 ) {
final String[] splits = string.split(",");
final List<Object> values = new ArrayList<Object>(splits.length);
for ( int i = 0; i < splits.length; i++ )
@@ -1687,30 +1690,19 @@ public class VariantContext implements Feature, Serializable {
}
public static boolean hasSymbolicAlleles( final List<Allele> alleles ) {
- for ( final Allele a: alleles ) {
- if (a.isSymbolic()) {
- return true;
- }
- }
- return false;
+ return alleles.stream().anyMatch(Allele::isSymbolic);
}
public Allele getAltAlleleWithHighestAlleleCount() {
- // optimization: for bi-allelic sites, just return the 1only alt allele
+ // optimization: for bi-allelic sites, just return the only alt allele
if ( isBiallelic() )
return getAlternateAllele(0);
- Allele best = null;
- int maxAC1 = 0;
- for ( Allele a : getAlternateAlleles() ) {
- final int ac = getCalledChrCount(a);
- if ( ac >= maxAC1 ) {
- maxAC1 = ac;
- best = a;
- }
-
- }
- return best;
+ return getAlternateAlleles().stream()
+ .map(allele -> new Tuple<>(allele, getCalledChrCount(allele)))
+ .max((alleleAndCount1, alleleAndCount2) -> Integer.compare(alleleAndCount1.b, alleleAndCount2.b))
+ .get()
+ .a;
}
/**
@@ -1730,10 +1722,9 @@ public class VariantContext implements Feature, Serializable {
* @return a list of indices for each allele, in order
*/
public List<Integer> getAlleleIndices(final Collection<Allele> alleles) {
- final List<Integer> indices = new LinkedList<Integer>();
- for ( final Allele allele : alleles )
- indices.add(getAlleleIndex(allele));
- return indices;
+ return alleles.stream()
+ .map(this::getAlleleIndex)
+ .collect(Collectors.toCollection(() -> new ArrayList<>(alleles.size())));
}
public int[] getGLIndecesOfAlternateAllele(Allele targetAllele) {
diff --git a/src/java/htsjdk/variant/variantcontext/VariantContextBuilder.java b/src/java/htsjdk/variant/variantcontext/VariantContextBuilder.java
index ee6201d..94eebca 100644
--- a/src/java/htsjdk/variant/variantcontext/VariantContextBuilder.java
+++ b/src/java/htsjdk/variant/variantcontext/VariantContextBuilder.java
@@ -94,7 +94,7 @@ public class VariantContextBuilder {
* Create an empty VariantContextBuilder where all values adopt their default values, but the bare min.
* of info (source, chr, start, stop, and alleles) have been provided to start.
*/
- public VariantContextBuilder(String source, String contig, long start, long stop, Collection<Allele> alleles) {
+ public VariantContextBuilder(final String source, final String contig, final long start, final long stop, final Collection<Allele> alleles) {
this.source = source;
this.contig = contig;
this.start = start;
@@ -110,14 +110,14 @@ public class VariantContextBuilder {
*
* @param parent Cannot be null
*/
- public VariantContextBuilder(VariantContext parent) {
+ public VariantContextBuilder(final VariantContext parent) {
if ( parent == null ) throw new IllegalArgumentException("BUG: VariantContextBuilder parent argument cannot be null in VariantContextBuilder");
- this.alleles = parent.alleles;
+ this.alleles = parent.getAlleles();
this.attributes = parent.getAttributes();
this.attributesCanBeModified = false;
- this.contig = parent.contig;
+ this.contig = parent.getContig();
this.filters = parent.getFiltersMaybeNull();
- this.genotypes = parent.genotypes;
+ this.genotypes = parent.getGenotypes();
this.ID = parent.getID();
this.log10PError = parent.getLog10PError();
this.source = parent.getSource();
@@ -126,7 +126,7 @@ public class VariantContextBuilder {
this.fullyDecoded = parent.isFullyDecoded();
}
- public VariantContextBuilder(VariantContextBuilder parent) {
+ public VariantContextBuilder(final VariantContextBuilder parent) {
if ( parent == null ) throw new IllegalArgumentException("BUG: VariantContext parent argument cannot be null in VariantContextBuilder");
this.alleles = parent.alleles;
this.attributesCanBeModified = false;
@@ -160,7 +160,7 @@ public class VariantContextBuilder {
}
public VariantContextBuilder alleles(final List<String> alleleStrings) {
- List<Allele> alleles = new ArrayList<Allele>(alleleStrings.size());
+ final List<Allele> alleles = new ArrayList<Allele>(alleleStrings.size());
for ( int i = 0; i < alleleStrings.size(); i++ ) {
alleles.add(Allele.create(alleleStrings.get(i), i == 0));
diff --git a/src/java/htsjdk/variant/variantcontext/VariantContextComparator.java b/src/java/htsjdk/variant/variantcontext/VariantContextComparator.java
index cda5bff..5754349 100644
--- a/src/java/htsjdk/variant/variantcontext/VariantContextComparator.java
+++ b/src/java/htsjdk/variant/variantcontext/VariantContextComparator.java
@@ -4,6 +4,7 @@ import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.variant.vcf.VCFContigHeaderLine;
+import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -18,7 +19,8 @@ import java.util.Set;
* A Comparator that orders VariantContexts by the ordering of the contigs/chromosomes in the List
* provided at construction time, then by start position with each contig/chromosome.
*/
-public class VariantContextComparator implements Comparator<VariantContext> {
+public class VariantContextComparator implements Comparator<VariantContext>, Serializable {
+ private static final long serialVersionUID = 1L;
public static List<String> getSequenceNameList(final SAMSequenceDictionary dictionary) {
final List<String> list = new ArrayList<String>();
@@ -32,7 +34,7 @@ public class VariantContextComparator implements Comparator<VariantContext> {
private final Map<String, Integer> contigIndexLookup;
public VariantContextComparator(final List<String> contigs) {
- if (contigs.size() == 0) throw new IllegalArgumentException("One or more contigs must be in the contig list.");
+ if (contigs.isEmpty()) throw new IllegalArgumentException("One or more contigs must be in the contig list.");
final Map<String, Integer> protoContigIndexLookup = new HashMap<String, Integer>();
int index = 0;
@@ -53,7 +55,7 @@ public class VariantContextComparator implements Comparator<VariantContext> {
*
*/
public VariantContextComparator(final Collection<VCFContigHeaderLine> headerLines) {
- if (headerLines.size() == 0) throw new IllegalArgumentException("One or more header lines must be in the header line collection.");
+ if (headerLines.isEmpty()) throw new IllegalArgumentException("One or more header lines must be in the header line collection.");
final Map<String, Integer> protoContigIndexLookup = new HashMap<String, Integer>();
for (final VCFContigHeaderLine headerLine : headerLines) {
diff --git a/src/java/htsjdk/variant/variantcontext/VariantContextUtils.java b/src/java/htsjdk/variant/variantcontext/VariantContextUtils.java
index a603ac5..ac4c43c 100644
--- a/src/java/htsjdk/variant/variantcontext/VariantContextUtils.java
+++ b/src/java/htsjdk/variant/variantcontext/VariantContextUtils.java
@@ -40,6 +40,7 @@ import org.apache.commons.jexl2.JexlEngine;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -126,7 +127,7 @@ public class VariantContextUtils {
attributes.put(VCFConstants.ALLELE_NUMBER_KEY, AN);
// if there are alternate alleles, record the relevant tags
- if ( vc.getAlternateAlleles().size() > 0 ) {
+ if (!vc.getAlternateAlleles().isEmpty()) {
ArrayList<Double> alleleFreqs = new ArrayList<Double>();
ArrayList<Integer> alleleCounts = new ArrayList<Integer>();
ArrayList<Integer> foundersAlleleCounts = new ArrayList<Integer>();
@@ -165,7 +166,7 @@ public class VariantContextUtils {
*/
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues) {
VariantContext vc = builder.make();
- builder.attributes(calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues, new HashSet<String>(0)));
+ builder.attributes(calculateChromosomeCounts(vc, new HashMap<>(vc.getAttributes()), removeStaleValues, new HashSet<>(0)));
}
/**
@@ -179,7 +180,7 @@ public class VariantContextUtils {
*/
public static void calculateChromosomeCounts(VariantContextBuilder builder, boolean removeStaleValues, final Set<String> founderIds) {
VariantContext vc = builder.make();
- builder.attributes(calculateChromosomeCounts(vc, new HashMap<String, Object>(vc.getAttributes()), removeStaleValues, founderIds));
+ builder.attributes(calculateChromosomeCounts(vc, new HashMap<>(vc.getAttributes()), removeStaleValues, founderIds));
}
public final static VCFCompoundHeaderLine getMetaDataForField(final VCFHeader header, final String field) {
@@ -268,7 +269,7 @@ public class VariantContextUtils {
* @return list of matches
*/
public static List<JexlVCMatchExp> initializeMatchExps(Map<String, String> names_and_exps) {
- List<JexlVCMatchExp> exps = new ArrayList<JexlVCMatchExp>();
+ List<JexlVCMatchExp> exps = new ArrayList<>();
for ( Map.Entry<String, String> elt : names_and_exps.entrySet() ) {
String name = elt.getKey();
@@ -293,7 +294,7 @@ public class VariantContextUtils {
* @return true if there is a match
*/
public static boolean match(VariantContext vc, JexlVCMatchExp exp) {
- return match(vc,Arrays.asList(exp)).get(exp);
+ return match(vc, Collections.singletonList(exp)).get(exp);
}
/**
@@ -319,7 +320,7 @@ public class VariantContextUtils {
* @return true if there is a match
*/
public static boolean match(VariantContext vc, Genotype g, JexlVCMatchExp exp) {
- return match(vc,g,Arrays.asList(exp)).get(exp);
+ return match(vc,g, Collections.singletonList(exp)).get(exp);
}
/**
@@ -338,6 +339,55 @@ public class VariantContextUtils {
}
/**
+ * Answers if the provided variant is transitional (otherwise, it's transversional).
+ * Transitions:
+ * A->G
+ * G->A
+ * C->T
+ * T->C
+ * <p/>
+ * Transversions:
+ * A->C
+ * A->T
+ * C->A
+ * C->G
+ * G->C
+ * G->T
+ * T->A
+ * T->G
+ *
+ * @param vc a biallelic polymorphic SNP
+ * @return true if a transition and false if transversion
+ * @throws IllegalArgumentException if vc is monomorphic, not a SNP or not bi-allelic.
+
+ */
+
+ static public boolean isTransition(final VariantContext vc) throws IllegalArgumentException {
+ final byte refAllele = vc.getReference().getBases()[0];
+ final Collection<Allele> altAlleles = vc.getAlternateAlleles();
+
+ if(vc.getType() == VariantContext.Type.NO_VARIATION) {
+ throw new IllegalArgumentException("Variant context is monomorphic: " + vc.toString());
+ }
+
+ if(vc.getType() != VariantContext.Type.SNP) {
+ throw new IllegalArgumentException("Variant context is not a SNP: " + vc.toString());
+ }
+
+ if(altAlleles.size() != 1 ) {
+ throw new IllegalArgumentException("Expected exactly 1 alternative Allele. Found: " + altAlleles.size());
+ }
+
+ final Byte altAllele = altAlleles.iterator().next().getBases()[0];
+
+ return (refAllele == 'A' && altAllele == 'G')
+ || (refAllele == 'G' && altAllele == 'A')
+ || (refAllele == 'C' && altAllele == 'T')
+ || (refAllele == 'T' && altAllele == 'C');
+ }
+
+
+ /**
* Returns a newly allocated VC that is the same as VC, but without genotypes
* @param vc variant context
* @return new VC without genotypes
diff --git a/src/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilter.java b/src/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilter.java
new file mode 100644
index 0000000..bf8fa8e
--- /dev/null
+++ b/src/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilter.java
@@ -0,0 +1,97 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Pierre Lindenbaum @yokofakun Institut du Thorax - Nantes - France
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.variant.variantcontext.filter;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Reader;
+
+import htsjdk.samtools.filter.AbstractJavascriptFilter;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFHeader;
+
+/**
+ * javascript based variant filter The script puts the following variables in
+ * the script context:
+ *
+ * - 'header' a htsjdk.variant.vcf.VCFHeader
+ * - 'variant' a htsjdk.variant.variantcontext.VariantContext
+ *
+ * @author Pierre Lindenbaum PhD Institut du Thorax - INSERM - Nantes - France
+ */
+public class JavascriptVariantFilter extends AbstractJavascriptFilter<VCFHeader, VariantContext>
+ implements VariantContextFilter {
+ /**
+ * constructor using a javascript File
+ *
+ * @param scriptFile
+ * the javascript file to be compiled
+ * @param header
+ * the SAMHeader
+ */
+ public JavascriptVariantFilter(final File scriptFile, final VCFHeader header) throws IOException {
+ super(scriptFile, header);
+ }
+
+ /**
+ * constructor using a Reader
+ *
+ * @param scriptReader
+ * the reader for the script to be compiled. Will be closed
+ * @param header
+ * the SAMHeader
+ */
+ public JavascriptVariantFilter(final Reader scriptReader, final VCFHeader header) throws IOException {
+ super(scriptReader, header);
+ }
+
+ /**
+ * constructor using a javascript expression
+ *
+ * @param scriptExpression
+ * the javascript expression to be compiled
+ * @param header
+ * the SAMHeader
+ */
+ public JavascriptVariantFilter(final String scriptExpression, final VCFHeader header) {
+ super(scriptExpression, header);
+ }
+
+ /**
+ * Determines whether a VariantContext matches this filter
+ *
+ * @param record
+ * the VariantContext to evaluate
+ * @return true if accept(record) returned true
+ */
+ @Override
+ public boolean test(final VariantContext record) {
+ return accept(record);
+ }
+
+ @Override
+ public String getRecordKey() {
+ return "variant";
+ }
+}
diff --git a/src/java/htsjdk/variant/variantcontext/writer/BCF2Encoder.java b/src/java/htsjdk/variant/variantcontext/writer/BCF2Encoder.java
index ef390c5..495cd93 100644
--- a/src/java/htsjdk/variant/variantcontext/writer/BCF2Encoder.java
+++ b/src/java/htsjdk/variant/variantcontext/writer/BCF2Encoder.java
@@ -114,7 +114,7 @@ public final class BCF2Encoder {
}
public final void encodeTyped(List<? extends Object> v, final BCF2Type type) throws IOException {
- if ( type == BCF2Type.CHAR && v.size() != 0 ) {
+ if ( type == BCF2Type.CHAR && !v.isEmpty()) {
final String s = BCF2Utils.collapseStringList((List<String>) v);
v = stringToBytes(s);
}
diff --git a/src/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java b/src/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java
index 032c050..11d2f10 100644
--- a/src/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java
+++ b/src/java/htsjdk/variant/variantcontext/writer/SortingVariantContextWriterBase.java
@@ -28,6 +28,7 @@ package htsjdk.variant.variantcontext.writer;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeader;
+import java.io.Serializable;
import java.util.Comparator;
import java.util.Queue;
import java.util.Set;
@@ -182,7 +183,9 @@ abstract class SortingVariantContextWriterBase implements VariantContextWriter {
}
}
- private static class VariantContextComparator implements Comparator<VCFRecord> {
+ private static class VariantContextComparator implements Comparator<VCFRecord>, Serializable {
+ private static final long serialVersionUID = 1L;
+
public int compare(VCFRecord r1, VCFRecord r2) {
return r1.vc.getStart() - r2.vc.getStart();
}
diff --git a/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java b/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java
index e24d878..51ac9d8 100644
--- a/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java
+++ b/src/java/htsjdk/variant/vcf/AbstractVCFCodec.java
@@ -177,7 +177,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
while ( arrayIndex < strings.length )
sampleNames.add(strings[arrayIndex++]);
- if ( sawFormatTag && sampleNames.size() == 0 )
+ if ( sawFormatTag && sampleNames.isEmpty())
throw new TribbleException.InvalidHeader("The FORMAT field was provided but there is no genotype/sample data");
// If we're performing sample name remapping and there is exactly one sample specified in the header, replace
@@ -211,7 +211,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
final VCFSimpleHeaderLine alt = new VCFSimpleHeaderLine(str.substring(6), version, VCFConstants.ALT_HEADER_START.substring(2), Arrays.asList("ID", "Description"));
metaData.add(alt);
} else {
- int equals = str.indexOf("=");
+ int equals = str.indexOf('=');
if ( equals != -1 )
metaData.add(new VCFHeaderLine(str.substring(2, equals), str.substring(equals+1)));
}
@@ -402,7 +402,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
generateException("The VCF specification requires a valid (non-zero length) info field");
if ( !infoField.equals(VCFConstants.EMPTY_INFO_FIELD) ) {
- if ( infoField.indexOf("\t") != -1 || infoField.indexOf(" ") != -1 )
+ if ( infoField.indexOf('\t') != -1 || infoField.indexOf(' ') != -1 )
generateException("The VCF specification does not allow for whitespace in the INFO field. Offending field value was \"" + infoField + "\"");
List<String> infoFields = ParsingUtils.split(infoField, VCFConstants.INFO_FIELD_SEPARATOR_CHAR);
@@ -532,7 +532,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
Allele refAllele = Allele.create(ref, true);
alleles.add(refAllele);
- if ( alts.indexOf(",") == -1 ) // only 1 alternatives, don't call string split
+ if ( alts.indexOf(',') == -1 ) // only 1 alternatives, don't call string split
parseSingleAltAllele(alleles, alts, lineNo);
else
for ( String alt : alts.split(",") )
diff --git a/src/java/htsjdk/variant/vcf/VCF3Codec.java b/src/java/htsjdk/variant/vcf/VCF3Codec.java
index 6bebfc2..5f4f48e 100644
--- a/src/java/htsjdk/variant/vcf/VCF3Codec.java
+++ b/src/java/htsjdk/variant/vcf/VCF3Codec.java
@@ -109,7 +109,7 @@ public class VCF3Codec extends AbstractVCFCodec {
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
return new ArrayList<String>(fFields);
- if ( filterString.length() == 0 )
+ if (filterString.isEmpty())
generateException("The VCF specification requires a valid filter status");
// do we have the filter string cached?
diff --git a/src/java/htsjdk/variant/vcf/VCFCodec.java b/src/java/htsjdk/variant/vcf/VCFCodec.java
index 3d03d09..89d6881 100644
--- a/src/java/htsjdk/variant/vcf/VCFCodec.java
+++ b/src/java/htsjdk/variant/vcf/VCFCodec.java
@@ -134,7 +134,7 @@ public class VCFCodec extends AbstractVCFCodec {
return Collections.emptyList();
if ( filterString.equals(VCFConstants.PASSES_FILTERS_v3) )
generateException(VCFConstants.PASSES_FILTERS_v3 + " is an invalid filter name in vcf4", lineNo);
- if ( filterString.length() == 0 )
+ if (filterString.isEmpty())
generateException("The VCF specification requires a valid filter status: filter was " + filterString, lineNo);
// do we have the filter string cached?
diff --git a/src/java/htsjdk/variant/vcf/VCFEncoder.java b/src/java/htsjdk/variant/vcf/VCFEncoder.java
index 0a8d372..f65a038 100644
--- a/src/java/htsjdk/variant/vcf/VCFEncoder.java
+++ b/src/java/htsjdk/variant/vcf/VCFEncoder.java
@@ -72,16 +72,13 @@ public class VCFEncoder {
final StringBuilder stringBuilder = new StringBuilder();
// CHROM
- stringBuilder.append(context.getChr()).append(VCFConstants.FIELD_SEPARATOR);
-
- // POS
- stringBuilder.append(String.valueOf(context.getStart())).append(VCFConstants.FIELD_SEPARATOR);
-
- // ID
- stringBuilder.append(context.getID()).append(VCFConstants.FIELD_SEPARATOR);
-
- // REF
- stringBuilder.append(context.getReference().getDisplayString()).append(VCFConstants.FIELD_SEPARATOR);
+ stringBuilder.append(context.getChr()).append(VCFConstants.FIELD_SEPARATOR)
+ // POS
+ .append(String.valueOf(context.getStart())).append(VCFConstants.FIELD_SEPARATOR)
+ // ID
+ .append(context.getID()).append(VCFConstants.FIELD_SEPARATOR)
+ // REF
+ .append(context.getReference().getDisplayString()).append(VCFConstants.FIELD_SEPARATOR);
// ALT
if ( context.isVariant() ) {
@@ -92,7 +89,7 @@ public class VCFEncoder {
for (int i = 1; i < context.getAlternateAlleles().size(); i++) {
altAllele = context.getAlternateAllele(i);
alt = altAllele.getDisplayString();
- stringBuilder.append(",");
+ stringBuilder.append(',');
stringBuilder.append(alt);
}
} else {
@@ -104,10 +101,9 @@ public class VCFEncoder {
// QUAL
if ( ! context.hasLog10PError()) stringBuilder.append(VCFConstants.MISSING_VALUE_v4);
else stringBuilder.append(formatQualValue(context.getPhredScaledQual()));
- stringBuilder.append(VCFConstants.FIELD_SEPARATOR);
-
- // FILTER
- stringBuilder.append(getFilterString(context)).append(VCFConstants.FIELD_SEPARATOR);
+ stringBuilder.append(VCFConstants.FIELD_SEPARATOR)
+ // FILTER
+ .append(getFilterString(context)).append(VCFConstants.FIELD_SEPARATOR);
// INFO
final Map<String, String> infoFields = new TreeMap<String, String>();
@@ -195,7 +191,7 @@ public class VCFEncoder {
return formatVCFField(null);
final StringBuilder sb = new StringBuilder(formatVCFField(Array.get(val, 0)));
for ( int i = 1; i < length; i++) {
- sb.append(",");
+ sb.append(',');
sb.append(formatVCFField(Array.get(val, i)));
}
result = sb.toString();
@@ -289,7 +285,7 @@ public class VCFEncoder {
final StringBuilder sb = new StringBuilder();
sb.append(intValues[0]);
for ( int i = 1; i < intValues.length; i++) {
- sb.append(",");
+ sb.append(',');
sb.append(intValues[i]);
}
outputValue = sb.toString();
@@ -305,7 +301,7 @@ public class VCFEncoder {
// For example, if Number=2, the string has to be ".,."
final StringBuilder sb = new StringBuilder(VCFConstants.MISSING_VALUE_v4);
for ( int i = 1; i < numInFormatField; i++ ) {
- sb.append(",");
+ sb.append(',');
sb.append(VCFConstants.MISSING_VALUE_v4);
}
val = sb.toString();
@@ -358,7 +354,7 @@ public class VCFEncoder {
if ( ! entry.getValue().equals("")) {
final VCFInfoHeaderLine metaData = this.header.getInfoHeaderLine(entry.getKey());
if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) {
- builder.append("=");
+ builder.append('=');
builder.append(entry.getValue());
}
}
diff --git a/src/java/htsjdk/variant/vcf/VCFHeaderLine.java b/src/java/htsjdk/variant/vcf/VCFHeaderLine.java
index a7bb1e6..c4c1e3b 100644
--- a/src/java/htsjdk/variant/vcf/VCFHeaderLine.java
+++ b/src/java/htsjdk/variant/vcf/VCFHeaderLine.java
@@ -136,7 +136,7 @@ public class VCFHeaderLine implements Comparable, Serializable {
* @return true if the line is a VCF meta data line, or false if it is not
*/
public static boolean isHeaderLine(String line) {
- return line != null && line.length() > 0 && VCFHeader.HEADER_INDICATOR.equals(line.substring(0,1));
+ return line != null && !line.isEmpty() && VCFHeader.HEADER_INDICATOR.equals(line.substring(0,1));
}
/**
@@ -146,21 +146,29 @@ public class VCFHeaderLine implements Comparable, Serializable {
*/
public static String toStringEncoding(Map<String, ? extends Object> keyValues) {
StringBuilder builder = new StringBuilder();
- builder.append("<");
+ builder.append('<');
boolean start = true;
for (Map.Entry<String,?> entry : keyValues.entrySet()) {
if (start) start = false;
- else builder.append(",");
+ else builder.append(',');
if ( entry.getValue() == null ) throw new TribbleException.InternalCodecException("Header problem: unbound value at " + entry + " from " + keyValues);
builder.append(entry.getKey());
- builder.append("=");
+ builder.append('=');
builder.append(entry.getValue().toString().contains(",") ||
entry.getValue().toString().contains(" ") ||
- entry.getKey().equals("Description") ? "\""+ entry.getValue() + "\"" : entry.getValue());
+ entry.getKey().equals("Description") ? "\""+ escapeQuotes(entry.getValue().toString()) + "\"" : entry.getValue());
}
- builder.append(">");
+ builder.append('>');
return builder.toString();
}
+
+ private static String escapeQuotes(final String value) {
+ // java escaping in a string literal makes this harder to read than it should be
+ // without string literal escaping and quoting the regex would be: replaceAll( ([^\])" , $1\" )
+ // ie replace: something that's not a backslash ([^\]) followed by a double quote
+ // with: the thing that wasn't a backslash ($1), followed by a backslash, followed by a double quote
+ return value.replaceAll("([^\\\\])\"", "$1\\\\\"");
+ }
}
\ No newline at end of file
diff --git a/src/java/htsjdk/variant/vcf/VCFUtils.java b/src/java/htsjdk/variant/vcf/VCFUtils.java
index a875b58..c8eceea 100644
--- a/src/java/htsjdk/variant/vcf/VCFUtils.java
+++ b/src/java/htsjdk/variant/vcf/VCFUtils.java
@@ -142,7 +142,7 @@ public class VCFUtils {
final String referenceValue;
if (referenceFile != null) {
if (referenceNameOnly) {
- final int extensionStart = referenceFile.getName().lastIndexOf(".");
+ final int extensionStart = referenceFile.getName().lastIndexOf('.');
referenceValue = extensionStart == -1 ? referenceFile.getName() : referenceFile.getName().substring(0, extensionStart);
}
else {
diff --git a/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java b/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java
index 4504ddc..235f23b 100644
--- a/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java
+++ b/src/tests/java/htsjdk/samtools/BAMFileWriterTest.java
@@ -142,7 +142,7 @@ public class BAMFileWriterTest {
}
}
- @Test
+ @Test(expectedExceptions = IllegalArgumentException.class)
public void testNullRecordsMismatchedHeader() throws Exception {
final SAMRecordSetBuilder samRecordSetBuilder = getRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname);
@@ -150,35 +150,37 @@ public class BAMFileWriterTest {
rec.setHeader(null);
}
- // create a fake header to make sure the records can still be written using an invalid
+ // create a fake header to make sure the records cannot be written using an invalid
// sequence dictionary and unresolvable references
final SAMFileHeader fakeHeader = new SAMFileHeader();
fakeHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
final File bamFile = File.createTempFile("test.", BamFileIoUtils.BAM_FILE_EXTENSION);
bamFile.deleteOnExit();
- final SAMFileWriter bamWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(fakeHeader, false, bamFile);
- for (SAMRecord rec : samRecordSetBuilder.getRecords()) {
- bamWriter.addAlignment(rec);
+ try (final SAMFileWriter bamWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(fakeHeader, false, bamFile);) {
+ for (SAMRecord rec : samRecordSetBuilder.getRecords()) {
+ bamWriter.addAlignment(rec);
+ }
}
- bamWriter.close();
+ }
- final SamReader bamReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(bamFile);
- final SamReader samReader = samRecordSetBuilder.getSamReader();
- samReader.getFileHeader().setSortOrder(bamReader.getFileHeader().getSortOrder());
- final CloseableIterator<SAMRecord> it = samReader.iterator();
- final CloseableIterator<SAMRecord> bamIt = bamReader.iterator();
- while (it.hasNext()) {
- Assert.assertTrue(bamIt.hasNext());
- final SAMRecord samRecord = it.next();
- final SAMRecord bamRecord = bamIt.next();
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void testRecordsMismatchedHeader() throws Exception {
+
+ final SAMRecordSetBuilder samRecordSetBuilder = getRecordSetBuilder(true, SAMFileHeader.SortOrder.queryname);
+
+ // create a fake header to make sure the records cannot be written using an invalid
+ // sequence dictionary and unresolvable references
+ final SAMFileHeader fakeHeader = new SAMFileHeader();
+ fakeHeader.setSortOrder(SAMFileHeader.SortOrder.queryname);
+ final File bamFile = File.createTempFile("test.", BamFileIoUtils.BAM_FILE_EXTENSION);
+ bamFile.deleteOnExit();
- // test only reference names since we'll have lost reference indices due to the fake null header
- Assert.assertEquals(bamRecord.getReferenceName(), samRecord.getReferenceName());
- Assert.assertEquals(bamRecord.getAlignmentStart(), samRecord.getAlignmentStart());
+ try (final SAMFileWriter bamWriter = new SAMFileWriterFactory().makeSAMOrBAMWriter(fakeHeader, false, bamFile);) {
+ for (SAMRecord rec : samRecordSetBuilder.getRecords()) {
+ bamWriter.addAlignment(rec);
+ }
}
- Assert.assertFalse(bamIt.hasNext());
- CloserUtil.close(samReader);
}
@Test(expectedExceptions = IllegalArgumentException.class)
diff --git a/src/tests/java/htsjdk/samtools/CRAMBAIIndexerTest.java b/src/tests/java/htsjdk/samtools/CRAMBAIIndexerTest.java
new file mode 100644
index 0000000..05f3b6f
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/CRAMBAIIndexerTest.java
@@ -0,0 +1,87 @@
+package htsjdk.samtools;
+
+import htsjdk.samtools.cram.build.ContainerFactory;
+import htsjdk.samtools.cram.structure.Container;
+import htsjdk.samtools.cram.structure.CramCompressionRecord;
+import htsjdk.samtools.cram.structure.Slice;
+import htsjdk.samtools.seekablestream.SeekableMemoryStream;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Created by vadim on 12/01/2016.
+ */
+public class CRAMBAIIndexerTest {
+
+ private static CramCompressionRecord createRecord(int recordIndex, int seqId, int start) {
+ byte[] bases = "AAAAA".getBytes();
+ int readLength = bases.length;
+
+ final CramCompressionRecord record = new CramCompressionRecord();
+ record.setSegmentUnmapped(false);
+ record.setMultiFragment(false);
+ record.sequenceId = seqId;
+ record.alignmentStart =start;
+ record.readBases = record.qualityScores = bases;
+ record.readName = Integer.toString(recordIndex);
+ record.readLength = readLength;
+ record.readFeatures = Collections.emptyList();
+
+ return record;
+ }
+ @Test
+ public void test_processMultiContainer() throws IOException, IllegalAccessException {
+ SAMFileHeader samFileHeader = new SAMFileHeader();
+ samFileHeader.addSequence(new SAMSequenceRecord("1", 10));
+ samFileHeader.addSequence(new SAMSequenceRecord("2", 10));
+ samFileHeader.addSequence(new SAMSequenceRecord("3", 10));
+ ByteArrayOutputStream indexBAOS = new ByteArrayOutputStream();
+ CRAMIndexer indexer = new CRAMIndexer(indexBAOS, samFileHeader);
+ int recordsPerContainer = 3;
+ ContainerFactory containerFactory = new ContainerFactory(samFileHeader, recordsPerContainer);
+ List<CramCompressionRecord> records = new ArrayList<>();
+ records.add(createRecord(0, 0, 1));
+ records.add(createRecord(1, 1, 2));
+ records.add(createRecord(2, 1, 3));
+
+ final Container container1 = containerFactory.buildContainer(records);
+ Assert.assertNotNull(container1);
+ Assert.assertEquals(container1.nofRecords, records.size());
+ Assert.assertEquals(container1.sequenceId, Slice.MULTI_REFERENCE);
+
+ indexer.processContainer(container1, ValidationStringency.STRICT);
+
+ records.clear();
+ records.add(createRecord(3, 1, 3));
+ records.add(createRecord(4, 2, 3));
+ records.add(createRecord(5, 2, 4));
+ final Container container2 = containerFactory.buildContainer(records);
+ Assert.assertNotNull(container2);
+ Assert.assertEquals(container2.nofRecords, records.size());
+ Assert.assertEquals(container2.sequenceId, Slice.MULTI_REFERENCE);
+
+ indexer.processContainer(container2, ValidationStringency.STRICT);
+
+ indexer.finish();
+
+ BAMIndex index = new CachingBAMFileIndex(new SeekableMemoryStream(indexBAOS.toByteArray(), null), samFileHeader.getSequenceDictionary());
+ final BAMIndexMetaData metaData_0 = index.getMetaData(0);
+ Assert.assertNotNull(metaData_0);
+ Assert.assertEquals(metaData_0.getAlignedRecordCount(), 1);
+
+ final BAMIndexMetaData metaData_1 = index.getMetaData(1);
+ Assert.assertNotNull(metaData_1);
+ Assert.assertEquals(metaData_1.getAlignedRecordCount(), 3);
+
+ final BAMIndexMetaData metaData_2 = index.getMetaData(2);
+ Assert.assertNotNull(metaData_2);
+ Assert.assertEquals(metaData_2.getAlignedRecordCount(), 2);
+ }
+
+}
diff --git a/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java b/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
index ae23787..432653f 100644
--- a/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMComplianceTest.java
@@ -84,7 +84,6 @@ public class CRAMComplianceTest {
@Test(dataProvider = "test1")
public void test(String name) throws IOException {
TestCase t = new TestCase(new File("testdata/htsjdk/samtools/cram/"), name);
-// TestCase t = new TestCase(new File("C:\\temp\\htslib\\test"), name);
ReferenceSource source = null;
if (t.refFile.exists())
@@ -101,8 +100,9 @@ public class CRAMComplianceTest {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
CRAMFileWriter cramFileWriter = new CRAMFileWriter(baos, source, samFileHeader, name);
- for (SAMRecord samRecord : samRecords)
+ for (SAMRecord samRecord : samRecords) {
cramFileWriter.addAlignment(samRecord);
+ }
cramFileWriter.close();
diff --git a/src/tests/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java b/src/tests/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java
new file mode 100644
index 0000000..0846846
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/CRAMContainerStreamWriterTest.java
@@ -0,0 +1,184 @@
+package htsjdk.samtools;
+
+import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
+import htsjdk.samtools.seekablestream.SeekableMemoryStream;
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.Log.LogLevel;
+import htsjdk.samtools.util.RuntimeIOException;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.StringWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+public class CRAMContainerStreamWriterTest {
+
+ @BeforeClass
+ public void initClass() {
+ Log.setGlobalLogLevel(LogLevel.ERROR);
+ }
+
+ private List<SAMRecord> createRecords(int count) {
+ final List<SAMRecord> list = new ArrayList<SAMRecord>(count);
+ final SAMRecordSetBuilder builder = new SAMRecordSetBuilder();
+ if (builder.getHeader().getReadGroups().isEmpty()) {
+ throw new IllegalStateException("Read group expected in the header");
+ }
+
+ int posInRef = 1;
+ for (int i = 0; i < count / 2; i++) {
+ builder.addPair(Integer.toString(i), i % 2, posInRef += 1, posInRef += 3);
+ }
+ list.addAll(builder.getRecords());
+
+ Collections.sort(list, new SAMRecordCoordinateComparator());
+
+ return list;
+ }
+
+ private SAMFileHeader createSAMHeader(SAMFileHeader.SortOrder sortOrder) {
+ final SAMFileHeader header = new SAMFileHeader();
+ header.setSortOrder(sortOrder);
+ header.addSequence(new SAMSequenceRecord("chr1", 123));
+ header.addSequence(new SAMSequenceRecord("chr2", 123));
+ SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("1");
+ header.addReadGroup(readGroupRecord);
+ return header;
+ }
+
+ private ReferenceSource createReferenceSource() {
+ final byte[] refBases = new byte[1024 * 1024];
+ Arrays.fill(refBases, (byte) 'A');
+ InMemoryReferenceSequenceFile rsf = new InMemoryReferenceSequenceFile();
+ rsf.add("chr1", refBases);
+ rsf.add("chr2", refBases);
+ return new ReferenceSource(rsf);
+ }
+
+ private void doTest(final List<SAMRecord> samRecords, final ByteArrayOutputStream outStream, final OutputStream indexStream) {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ReferenceSource refSource = createReferenceSource();
+
+ final CRAMContainerStreamWriter containerStream = new CRAMContainerStreamWriter(outStream, indexStream, refSource, header, "test");
+ containerStream.writeHeader(header);
+
+ for (SAMRecord record : samRecords) {
+ containerStream.writeAlignment(record);
+ }
+ containerStream.finish(true); // finish and issue EOF
+
+ // read all the records back in
+ final CRAMFileReader cReader = new CRAMFileReader(null, new ByteArrayInputStream(outStream.toByteArray()), refSource);
+ final SAMRecordIterator iterator = cReader.getIterator();
+ int count = 0;
+ while (iterator.hasNext()) {
+ SAMRecord actualRecord = iterator.next();
+ count++;
+ }
+ Assert.assertEquals(count, samRecords.size());
+ }
+
+ @Test(description = "Test CRAMContainerStream no index")
+ public void testCRAMContainerStreamNoIndex() {
+ final List<SAMRecord> samRecords = createRecords(100);
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ doTest(samRecords, outStream, null);
+ }
+
+ @Test(description = "Test CRAMContainerStream aggregating multiple partitions")
+ public void testCRAMContainerAggregatePartitions() throws IOException {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ReferenceSource refSource = createReferenceSource();
+
+ // create a bunch of records and write them out to separate streams in groups
+ final int nRecs = 100;
+ final int recsPerPartition = 20;
+ final int nPartitions = nRecs/recsPerPartition;
+
+ final List<SAMRecord> samRecords = createRecords(nRecs);
+ final ArrayList<ByteArrayOutputStream> byteStreamArray = new ArrayList<>(nPartitions);
+
+ for (int partition = 0, recNum = 0; partition < nPartitions; partition++) {
+ byteStreamArray.add(partition, new ByteArrayOutputStream());
+ final CRAMContainerStreamWriter containerStream =
+ new CRAMContainerStreamWriter(byteStreamArray.get(partition), null, refSource, header, "test");
+
+ // don't write a header for the intermediate streams
+ for (int i = 0; i < recsPerPartition; i++) {
+ containerStream.writeAlignment(samRecords.get(recNum++));
+ }
+ containerStream.finish(false); // finish but don't issue EOF container
+ }
+
+ // now create the final aggregate file by concatenating the individual streams, but this
+ // time with a CRAM and SAM header at the front and an EOF container at the end
+ final ByteArrayOutputStream aggregateStream = new ByteArrayOutputStream();
+ final CRAMContainerStreamWriter aggregateContainerStreamWriter = new CRAMContainerStreamWriter(aggregateStream, null, refSource, header, "test");
+ aggregateContainerStreamWriter .writeHeader(header); // write out one CRAM and SAM header
+ for (int j = 0; j < nPartitions; j++) {
+ byteStreamArray.get(j).writeTo(aggregateStream);
+ }
+ aggregateContainerStreamWriter.finish(true);// write out the EOF container
+
+ // now iterate through all the records in the aggregate file
+ final CRAMFileReader cReader = new CRAMFileReader(null, new ByteArrayInputStream(aggregateStream.toByteArray()), refSource);
+ final SAMRecordIterator iterator = cReader.getIterator();
+ int count = 0;
+ while (iterator.hasNext()) {
+ Assert.assertEquals(iterator.next().toString(), samRecords.get(count).toString());
+ count++;
+ }
+ Assert.assertEquals(count, nRecs);
+ }
+
+ @Test(description = "Test CRAMContainerStream with index")
+ public void testCRAMContainerStreamWithIndex() throws IOException {
+ final List<SAMRecord> samRecords = createRecords(100);
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ final ByteArrayOutputStream indexStream = new ByteArrayOutputStream();
+ doTest(samRecords, outStream, indexStream);
+ outStream.close();
+ indexStream.close();
+
+ // write the file out
+ final File cramTempFile = File.createTempFile("cramContainerStreamTest", ".cram");
+ cramTempFile.deleteOnExit();
+ final OutputStream cramFileStream = new FileOutputStream(cramTempFile);
+ cramFileStream.write(outStream.toByteArray());
+ cramFileStream.close();
+
+ // write the index out
+ final File indexTempFile = File.createTempFile("cramContainerStreamTest", ".bai");
+ indexTempFile.deleteOnExit();
+ OutputStream indexFileStream = new FileOutputStream(indexTempFile);
+ indexFileStream.write(indexStream.toByteArray());
+ indexFileStream.close();
+
+ final ReferenceSource refSource = createReferenceSource();
+ final CRAMFileReader reader = new CRAMFileReader(
+ cramTempFile,
+ indexTempFile,
+ refSource,
+ ValidationStringency.SILENT);
+ final CloseableIterator<SAMRecord> iterator = reader.query(1, 10, 10, true);
+ int count = 0;
+ while (iterator.hasNext()) {
+ SAMRecord actualRecord = iterator.next();
+ count++;
+ }
+ Assert.assertEquals(count, 2);
+ }
+
+}
diff --git a/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java b/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
index b1e1f2d..3cc147c 100644
--- a/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMFileIndexTest.java
@@ -1,18 +1,18 @@
package htsjdk.samtools;
+import htsjdk.samtools.cram.build.ContainerParser;
import htsjdk.samtools.cram.build.CramContainerIterator;
import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.cram.structure.AlignmentSpan;
import htsjdk.samtools.cram.structure.Container;
import htsjdk.samtools.reference.FakeReferenceSequenceFile;
import htsjdk.samtools.seekablestream.ByteArraySeekableStream;
-import htsjdk.samtools.seekablestream.SeekableBufferedStream;
import htsjdk.samtools.seekablestream.SeekableFileStream;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import org.testng.Assert;
-import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
@@ -23,6 +23,8 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
+import java.util.Map;
+import java.util.TreeSet;
/**
* A collection of tests for CRAM index write/read that use BAMFileIndexTest/index_test.bam file as the source of the test data.
@@ -102,7 +104,7 @@ public class CRAMFileIndexTest {
}
@Test
- public void scanAllMappedReads() throws IOException {
+ public void scanMappedReads() throws IOException {
SamReader samReader = SamReaderFactory.makeDefault().open(BAM_FILE);
SAMRecordIterator samRecordIterator = samReader.iterator();
CRAMFileReader reader = new CRAMFileReader(new ByteArraySeekableStream(cramBytes), new ByteArraySeekableStream(baiBytes), source, ValidationStringency.SILENT);
@@ -112,6 +114,8 @@ public class CRAMFileIndexTest {
while (samRecordIterator.hasNext()) {
SAMRecord samRecord = samRecordIterator.next();
if (samRecord.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) break;
+ // test only 1st and 2nd in every 100 to speed the test up:
+ if (counter++ %100 > 1) continue;
String s1 = samRecord.getSAMString();
CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart(samRecord.getReferenceName(), samRecord.getAlignmentStart());
@@ -120,10 +124,9 @@ public class CRAMFileIndexTest {
String s2 = cramRecord.getSAMString();
- Assert.assertEquals(samRecord.getReferenceName(), cramRecord.getReferenceName(), s1 + "\n" + s2);
- Assert.assertEquals(samRecord.getAlignmentStart(), cramRecord.getAlignmentStart(), s1 + "\n" + s2);
-
- counter++;
+ Assert.assertEquals(samRecord.getReferenceName(), cramRecord.getReferenceName(), s1 + s2);
+ // default 'overlap' is true, so test records intersect the query:
+ Assert.assertTrue(CoordMath.overlaps(cramRecord.getAlignmentStart(), cramRecord.getAlignmentEnd(), samRecord.getAlignmentStart(), samRecord.getAlignmentEnd()), s1 + s2);
}
samRecordIterator.close();
reader.close();
@@ -167,25 +170,26 @@ public class CRAMFileIndexTest {
}
@Test
- public void testIteratorFromFileSpan_SecondContainer() throws IOException {
+ public void testIteratorFromFileSpan_SecondContainer() throws IOException, IllegalAccessException {
CramContainerIterator it = new CramContainerIterator(new ByteArrayInputStream(cramBytes));
it.hasNext();
it.next();
it.hasNext();
Container secondContainer = it.next();
Assert.assertNotNull(secondContainer);
+ final Map<Integer, AlignmentSpan> references = new ContainerParser(it.getCramHeader().getSamFileHeader()).getReferences(secondContainer, ValidationStringency.STRICT);
it.close();
-
+ int refId = new TreeSet<Integer>(references.keySet()).iterator().next();
+ final AlignmentSpan alignmentSpan = references.get(refId);
CRAMFileReader reader = new CRAMFileReader(new ByteArraySeekableStream(cramBytes), new ByteArraySeekableStream(baiBytes), source, ValidationStringency.SILENT);
reader.setValidationStringency(ValidationStringency.SILENT);
final BAMIndex index = reader.getIndex();
- final SAMFileSpan spanOfSecondContainer = index.getSpanOverlapping(secondContainer.sequenceId, secondContainer.alignmentStart, secondContainer.alignmentStart + secondContainer.alignmentSpan);
+ final SAMFileSpan spanOfSecondContainer = index.getSpanOverlapping(refId, alignmentSpan.getStart(), alignmentSpan.getStart()+ alignmentSpan.getSpan());
Assert.assertNotNull(spanOfSecondContainer);
Assert.assertFalse(spanOfSecondContainer.isEmpty());
Assert.assertTrue(spanOfSecondContainer instanceof BAMFileSpan);
- Assert.assertEquals(((BAMFileSpan) spanOfSecondContainer).getChunks().size(), 1);
final CloseableIterator<SAMRecord> iterator = reader.getIterator(spanOfSecondContainer);
Assert.assertTrue(iterator.hasNext());
@@ -193,14 +197,14 @@ public class CRAMFileIndexTest {
boolean matchFound = false;
while (iterator.hasNext()) {
final SAMRecord record = iterator.next();
- if (record.getReferenceIndex().intValue() == secondContainer.sequenceId) {
- boolean overlaps = CoordMath.overlaps(record.getAlignmentStart(), record.getAlignmentEnd(), secondContainer.alignmentStart, secondContainer.alignmentStart + secondContainer.alignmentSpan);
+ if (record.getReferenceIndex().intValue() == refId) {
+ boolean overlaps = CoordMath.overlaps(record.getAlignmentStart(), record.getAlignmentEnd(), alignmentSpan.getStart(), alignmentSpan.getStart()+ alignmentSpan.getSpan());
if (overlaps) matchFound = true;
}
counter++;
}
Assert.assertTrue(matchFound);
- Assert.assertTrue(counter <= nofReadsPerContainer);
+ Assert.assertTrue(counter <= CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE);
}
@Test
@@ -258,7 +262,7 @@ public class CRAMFileIndexTest {
fos.write(cramBytes);
fos.close();
- CRAMIndexer.createIndex(new SeekableFileStream(cramFile), indexFile, null);
+ CRAMIndexer.createIndex(new SeekableFileStream(cramFile), indexFile, null, ValidationStringency.STRICT);
baiBytes = readFile(indexFile);
}
@@ -274,15 +278,15 @@ public class CRAMFileIndexTest {
final SamReader reader = SamReaderFactory.makeDefault().open(bamFile);
final SAMRecordIterator iterator = reader.iterator();
// to reduce granularity let's use this hacky approach:
- int previousValue = CRAMFileWriter.DEFAULT_RECORDS_PER_SLICE ;
- CRAMFileWriter.DEFAULT_RECORDS_PER_SLICE = nofReadsPerContainer;
+ int previousValue = CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE ;
+ CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE = nofReadsPerContainer;
CRAMFileWriter writer = new CRAMFileWriter(baos, source, reader.getFileHeader(), bamFile.getName());
while (iterator.hasNext()) {
SAMRecord record = iterator.next();
writer.addAlignment(record);
}
writer.close();
- CRAMFileWriter.DEFAULT_RECORDS_PER_SLICE = previousValue;
+ CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE = previousValue;
return baos.toByteArray();
}
}
diff --git a/src/tests/java/htsjdk/samtools/CRAMFileReaderTest.java b/src/tests/java/htsjdk/samtools/CRAMFileReaderTest.java
new file mode 100644
index 0000000..e9db7e8
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/CRAMFileReaderTest.java
@@ -0,0 +1,163 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools;
+
+import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
+import htsjdk.samtools.seekablestream.SeekableFileStream;
+import htsjdk.samtools.util.Log;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.File;
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * Additional tests for CRAMFileReader are in CRAMFileIndexTest
+ */
+public class CRAMFileReaderTest {
+
+ private static final File TEST_DATA_DIR = new File("testdata/htsjdk/samtools");
+
+ @BeforeClass
+ public void initClass() {
+ Log.setGlobalLogLevel(Log.LogLevel.ERROR);
+ }
+
+ private ReferenceSource createReferenceSource() {
+ byte[] refBases = new byte[10 * 10];
+ Arrays.fill(refBases, (byte) 'A');
+ InMemoryReferenceSequenceFile rsf = new InMemoryReferenceSequenceFile();
+ rsf.add("chr1", refBases);
+ return new ReferenceSource(rsf);
+ }
+
+ // constructor 1: CRAMFileReader(final File cramFile, final InputStream inputStream)
+
+ @Test(description = "Test CRAMReader 1 reference required", expectedExceptions = IllegalStateException.class)
+ public void testCRAMReader1_ReferenceRequired() {
+ File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
+ InputStream bis = null;
+ // assumes that reference_fasta property is not set and the download service is not enabled
+ new CRAMFileReader(file, bis);
+ }
+
+ // constructor 2: CRAMFileReader(final File cramFile, final InputStream inputStream, final ReferenceSource referenceSource)
+
+ @Test(description = "Test CRAMReader 2 reference required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader2ReferenceRequired() {
+ File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
+ InputStream bis = null;
+ new CRAMFileReader(file, bis, null);
+ }
+
+ @Test(description = "Test CRAMReader 2 input required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader2_InputRequired() {
+ File file = null;
+ InputStream bis = null;
+ new CRAMFileReader(file, bis, createReferenceSource());
+ }
+
+ // constructor 3: CRAMFileReader(final File cramFile, final File indexFile, final ReferenceSource referenceSource)
+
+ @Test(description = "Test CRAMReader 3 reference required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader3_RequiredReference() {
+ File inputFile = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
+ File indexFile = null;
+ ReferenceSource refSource = null;
+ new CRAMFileReader(inputFile, indexFile, refSource);
+ }
+
+ @Test(description = "Test CRAMReader 3 input required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader3_InputRequirted() {
+ File inputFile = null;
+ File indexFile = null;
+ ReferenceSource refSource = null;
+ new CRAMFileReader(inputFile, indexFile, refSource);
+ }
+
+ // constructor 4: CRAMFileReader(final File cramFile, final ReferenceSource referenceSource)
+
+ @Test(description = "Test CRAMReader 4 reference required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader4_ReferenceRequired() {
+ File inputFile = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
+ ReferenceSource refSource = null;
+ new CRAMFileReader(inputFile, refSource);
+ }
+
+ @Test(description = "Test CRAMReader 4 input required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader4_InputRequired() {
+ File inputFile = null;
+ new CRAMFileReader(inputFile, createReferenceSource());
+ }
+
+ // constructor 5: CRAMFileReader(final InputStream inputStream, final SeekableStream indexInputStream,
+ // final ReferenceSource referenceSource, final ValidationStringency validationStringency)
+ @Test(description = "Test CRAMReader 5 reference required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader5_ReferenceRequired() throws IOException {
+ InputStream bis = new ByteArrayInputStream(new byte[0]);
+ SeekableFileStream sfs = null;
+ ReferenceSource refSource = null;
+ new CRAMFileReader(bis, sfs, refSource, ValidationStringency.STRICT);
+ }
+
+ @Test(description = "Test CRAMReader 5 input required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader5_InputRequired() throws IOException {
+ InputStream bis = null;
+ SeekableFileStream sfs = null;
+ new CRAMFileReader(bis, sfs, createReferenceSource(), ValidationStringency.STRICT);
+ }
+
+ // constructor 6: CRAMFileReader(final InputStream stream, final File indexFile, final ReferenceSource referenceSource,
+ // final ValidationStringency validationStringency)
+ @Test(description = "Test CRAMReader 6 reference required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader6_ReferenceRequired() throws IOException {
+ InputStream bis = new ByteArrayInputStream(new byte[0]);
+ File file = null;
+ ReferenceSource refSource = null;
+ new CRAMFileReader(bis, file, refSource, ValidationStringency.STRICT);
+ }
+
+ @Test(description = "Test CRAMReader 6 input required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader6_InputRequired() throws IOException {
+ InputStream bis = null;
+ File file = null;
+ ReferenceSource refSource = null;
+ new CRAMFileReader(bis, file, createReferenceSource(), ValidationStringency.STRICT);
+ }
+
+ // constructor 7: CRAMFileReader(final File cramFile, final File indexFile, final ReferenceSource referenceSource,
+ // final ValidationStringency validationStringency)
+ @Test(description = "Test CRAMReader 7 reference required", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMReader7_ReferenceRequired() throws IOException {
+ InputStream bis = new ByteArrayInputStream(new byte[0]);
+ File file = new File(TEST_DATA_DIR, "cram_with_crai_index.cram");
+ ReferenceSource refSource = null;
+ new CRAMFileReader(file, file, refSource, ValidationStringency.STRICT);
+ }
+
+}
diff --git a/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java b/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java
index 1203121..3e07076 100644
--- a/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/CRAMFileWriterWithIndexTest.java
@@ -132,7 +132,7 @@ public class CRAMFileWriterWithIndexTest {
}
// reading after the 1st container should be ok:
- refID = 1;
+ refID = 2;
final CloseableIterator<SAMRecord> iterator = reader.queryAlignmentStart(header.getSequence(refID).getSequenceName(), 1);
Assert.assertNotNull(iterator);
Assert.assertTrue(iterator.hasNext());
@@ -147,7 +147,7 @@ public class CRAMFileWriterWithIndexTest {
SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord("1");
rsf = new InMemoryReferenceSequenceFile();
- int nofSequencesInDictionary = 30;
+ int nofSequencesInDictionary = 3;
int sequenceLength = 1024 * 1024;
for (int i = 0; i < nofSequencesInDictionary; i++)
addRandomSequence(header, sequenceLength, rsf);
@@ -164,7 +164,7 @@ public class CRAMFileWriterWithIndexTest {
CRAMFileWriter writer = new CRAMFileWriter(os, indexOS, source, header, null);
- int readPairsPerSequence = 100;
+ int readPairsPerSequence = CRAMContainerStreamWriter.DEFAULT_RECORDS_PER_SLICE;
for (SAMSequenceRecord sequenceRecord : header.getSequenceDictionary().getSequences()) {
int alignmentStart = 1;
diff --git a/src/tests/java/htsjdk/samtools/CigarTest.java b/src/tests/java/htsjdk/samtools/CigarTest.java
index 1d7d4c6..acdc224 100644
--- a/src/tests/java/htsjdk/samtools/CigarTest.java
+++ b/src/tests/java/htsjdk/samtools/CigarTest.java
@@ -27,6 +27,7 @@ import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import java.util.Arrays;
import java.util.List;
/**
@@ -91,4 +92,27 @@ public class CigarTest {
Assert.assertEquals(errors.size(), 1, String.format("Got %d error, expected exactly one error.", errors.size()));
Assert.assertEquals(errors.get(0).getType(), type);
}
+
+ @Test
+ public void testMakeCigarFromOperators() {
+ final List<CigarOperator> cigarOperators = Arrays.asList(
+ CigarOperator.S,
+ CigarOperator.M,
+ CigarOperator.M,
+ CigarOperator.M,
+ CigarOperator.I,
+ CigarOperator.M,
+ CigarOperator.D,
+ CigarOperator.M
+ );
+ final Cigar cigar = Cigar.fromCigarOperators(cigarOperators);
+ Assert.assertFalse(cigar.isEmpty());
+ Assert.assertEquals(cigar.numCigarElements(), 6);
+ Assert.assertEquals(cigar.toString(),"1S3M1I1M1D1M");
+ Assert.assertFalse(cigar.containsOperator(CigarOperator.N));
+ Assert.assertTrue(cigar.containsOperator(CigarOperator.D));
+ Assert.assertTrue(cigar.isLeftClipped());
+ Assert.assertFalse(cigar.isRightClipped());
+ Assert.assertTrue(cigar.isClipped());
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/CramFileWriterTest.java b/src/tests/java/htsjdk/samtools/CramFileWriterTest.java
index 89e9a68..0a5cbac 100644
--- a/src/tests/java/htsjdk/samtools/CramFileWriterTest.java
+++ b/src/tests/java/htsjdk/samtools/CramFileWriterTest.java
@@ -205,6 +205,27 @@ public class CramFileWriterTest {
Assert.assertTrue(indexStream.size() != 0);
}
+ @Test(description = "Test CRAMWriter constructor reference required 1", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMWriterConstructorRequiredReference_1() {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ new CRAMFileWriter(outStream, null, header, null);
+ }
+
+ @Test(description = "Test CRAMWriter constructor reference required 2", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMWriterConstructorRequiredReference_2() {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ new CRAMFileWriter(outStream, null, null, header, null);
+ }
+
+ @Test(description = "Test CRAMWriter constructor reference required 3", expectedExceptions = IllegalArgumentException.class)
+ public void testCRAMWriterConstructorRequiredReference_3() {
+ final SAMFileHeader header = createSAMHeader(SAMFileHeader.SortOrder.coordinate);
+ final ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ new CRAMFileWriter(outStream, null, true, null, header, null);
+ }
+
@Test
public void test_roundtrip_tlen_preserved() throws IOException {
SamReader reader = SamReaderFactory.make().open(new File("testdata/htsjdk/samtools/cram_tlen_reads.sorted.sam"));
@@ -231,4 +252,31 @@ public class CramFileWriterTest {
Assert.assertEquals(records.size(), i);
}
+ @Test
+ public void testCRAMQuerySort() throws IOException {
+ final File input = new File("testdata/htsjdk/samtools/cram_query_sorted.cram");
+ final File reference = new File("testdata/htsjdk/samtools/cram_query_sorted.fasta");
+ final File outputFile = File.createTempFile("tmp.", ".cram");
+
+ try (final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(reference).open(input);
+ final SAMFileWriter writer = new SAMFileWriterFactory().makeWriter(reader.getFileHeader().clone(), false, outputFile, reference)) {
+ for (SAMRecord rec : reader) {
+ writer.addAlignment(rec);
+ }
+ }
+
+ try (final SamReader outReader = SamReaderFactory.makeDefault().referenceSequence(reference).open(outputFile)) {
+ String prevName = null;
+ for (final SAMRecord rec : outReader) {
+ if (prevName == null) {
+ prevName = rec.getReadName();
+ continue;
+ }
+ // test if the read names are sorted alphabetically:
+ Assert.assertTrue(rec.getReadName().compareTo(prevName) >= 0);
+ }
+ }
+
+ }
+
}
diff --git a/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java b/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
index c8378a6..d158563 100644
--- a/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMFileReaderTest.java
@@ -142,7 +142,7 @@ public class SAMFileReaderTest {
return scenarios;
}
- @Test(dataProvider = "cramNegativeTestCases", expectedExceptions=CRAMException.class)
+ @Test(dataProvider = "cramNegativeTestCases", expectedExceptions=IllegalStateException.class)
public void testReferenceRequiredForCRAM(final String inputFile) {
final File input = new File(TEST_DATA_DIR, inputFile);
final SamReader reader = SamReaderFactory.makeDefault().open(input);
diff --git a/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java b/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
index a3c3e68..dda10b9 100644
--- a/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMRecordUnitTest.java
@@ -31,6 +31,7 @@ import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.*;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -456,7 +457,7 @@ public class SAMRecordUnitTest {
Assert.fail("Unexpected exception", e);
}
- record.setAttribute(tag, (long)Integer.MIN_VALUE-1L);
+ record.setAttribute(tag, (long) Integer.MIN_VALUE - 1L);
}
@Test(expectedExceptions = SAMException.class)
@@ -777,6 +778,23 @@ public class SAMRecordUnitTest {
Assert.assertEquals(deserializedSAMRecord, initialSAMRecord, "Deserialized SAMRecord not equal to original SAMRecord");
}
+
+ @Test
+ public void testValidateNonsenseCigar(){
+ // Create nonsense record
+ SAMRecord rec = createTestRecordHelper();
+ rec.setCigarString("nonsense");
+
+ //The default validationStringency of a sam record is SILENT.
+ rec.setValidationStringency(ValidationStringency.STRICT);
+ // Validate record
+ List<SAMValidationError> err = rec.validateCigar(-1);
+
+ Assert.assertNotNull(err);
+ Assert.assertEquals(err.size(), 1);
+ Assert.assertEquals(err.get(0).getType(), SAMValidationError.Type.INVALID_CIGAR);
+ }
+
@Test
public void testNullHeaderRecordValidation() {
final SAMRecord sam = createTestRecordHelper();
@@ -847,4 +865,146 @@ public class SAMRecordUnitTest {
testNullHeaderCigar(bamRec);
}
+
+ @Test
+ public void testSetHeaderStrictValid() {
+ SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ Integer originalRefIndex = sam.getReferenceIndex();
+ Assert.assertTrue(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX != originalRefIndex);
+
+ // force re-resolution of the reference name
+ sam.setHeaderStrict(samHeader);
+ Assert.assertEquals(sam.getReferenceIndex(), originalRefIndex);
+ }
+
+ @Test
+ public void testSetHeaderStrictValidHeaderless() {
+ SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ Integer originalRefIndex = sam.getReferenceIndex();
+ Assert.assertTrue(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX != originalRefIndex);
+
+ sam.setHeader(null);
+ // force re-resolution of the reference name
+ sam.setHeaderStrict(samHeader);
+ Assert.assertEquals(sam.getReferenceIndex(), originalRefIndex);
+ }
+
+ @Test
+ public void testSetHeaderStrictValidNewHeader() {
+ final SAMRecord sam = createTestRecordHelper();
+ final String origSequenceName = sam.getContig();
+
+ final SAMFileHeader origSamHeader = sam.getHeader();
+ final int origSequenceLength = origSamHeader.getSequence(origSequenceName).getSequenceLength();
+ final SAMFileHeader newHeader = new SAMFileHeader();
+ newHeader.addSequence(new SAMSequenceRecord(origSequenceName, origSequenceLength));
+
+ // force re-resolution of the reference name against the new header
+ sam.setHeaderStrict(newHeader);
+ Assert.assertEquals(sam.getReferenceIndex(), new Integer(0));
+ }
+
+ @Test(expectedExceptions=IllegalArgumentException.class)
+ public void testSetHeaderStrictInvalidReference() {
+ SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+
+ sam.setReferenceName("unresolvable");
+ Assert.assertEquals(new Integer(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX), sam.getReferenceIndex());
+
+ // throw on force re-resolution of the unresolvable reference name
+ sam.setHeaderStrict(samHeader);
+ }
+
+ @Test(expectedExceptions=IllegalArgumentException.class)
+ public void testSetHeaderStrictInvalidMateReference() {
+ SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+
+ sam.setMateReferenceName("unresolvable");
+ Assert.assertEquals(new Integer(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX), sam.getMateReferenceIndex());
+
+ // throw on force re-resolution of the unresolvable mate reference name
+ sam.setHeaderStrict(samHeader);
+ }
+
+ @Test
+ public void testSetHeaderStrictNull() {
+ SAMRecord sam = createTestRecordHelper();
+ Assert.assertNotNull(sam.getHeader());
+ sam.setHeaderStrict(null);
+ Assert.assertNull(sam.getHeader());
+ Assert.assertNull(sam.mReferenceIndex);
+ }
+
+ // resolveIndexFromName
+
+ @Test
+ public void testResolveIndexResolvable() {
+ final SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ final String contigName = sam.getContig();
+ Assert.assertEquals(SAMRecord.resolveIndexFromName(contigName, samHeader, true), new Integer(samHeader.getSequenceIndex(contigName)));
+ }
+
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testResolveIndexUnresolvableNullHeader() {
+ SAMRecord.resolveIndexFromName("unresolvable", null, false);
+ }
+
+ @Test(expectedExceptions=IllegalArgumentException.class)
+ public void testResolveIndexUnresolvableStrict() {
+ final SAMFileHeader samHeader = new SAMFileHeader();
+ SAMRecord.resolveIndexFromName("unresolvable", samHeader, true);
+ }
+
+ @Test
+ public void testResolveIndexUnresolvableNotStrict() {
+ final SAMFileHeader samHeader = new SAMFileHeader();
+ Assert.assertEquals(SAMRecord.resolveIndexFromName("unresolvable", samHeader, false), null);
+ }
+
+ @Test
+ public void testResolveIndexNoAlignment() {
+ final SAMFileHeader samHeader = new SAMFileHeader();
+ Assert.assertEquals(SAMRecord.resolveIndexFromName(
+ SAMRecord.NO_ALIGNMENT_REFERENCE_NAME, samHeader, true), new Integer(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ }
+
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testResolveIndexNullHeader() {
+ SAMRecord.resolveIndexFromName("unresolvable", null, true);
+ }
+
+ // resolveNameFromIndex
+
+ @Test
+ public void testResolveNameResolvable() {
+ final SAMRecord sam = createTestRecordHelper();
+ final SAMFileHeader samHeader = sam.getHeader();
+ final String contigName = sam.getContig();
+ final Integer contigIndex = samHeader.getSequenceIndex(contigName);
+ Assert.assertEquals(SAMRecord.resolveNameFromIndex(contigIndex, samHeader), contigName);
+ }
+
+ @Test(expectedExceptions=IllegalArgumentException.class)
+ public void testResolveNameUnresolvable() {
+ final SAMFileHeader samHeader = new SAMFileHeader();
+ SAMRecord.resolveNameFromIndex(99, samHeader);
+ }
+
+ @Test
+ public void testResolveNameNoAlignment() {
+ final SAMFileHeader samHeader = new SAMFileHeader();
+ Assert.assertEquals(SAMRecord.resolveNameFromIndex(
+ SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, samHeader), SAMRecord.NO_ALIGNMENT_REFERENCE_NAME);
+ }
+
+ @Test(expectedExceptions=IllegalStateException.class)
+ public void testResolveNameNullHeader() {
+ SAMRecord.resolveNameFromIndex(1, null);
+ }
+
}
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/samtools/SAMUtilsTest.java b/src/tests/java/htsjdk/samtools/SAMUtilsTest.java
index 441d662..48baf44 100644
--- a/src/tests/java/htsjdk/samtools/SAMUtilsTest.java
+++ b/src/tests/java/htsjdk/samtools/SAMUtilsTest.java
@@ -55,8 +55,8 @@ public class SAMUtilsTest {
record.setCigar(TextCigarCodec.decode("10M"));
record.setReferenceIndex(0);
record.setAlignmentStart(1);
- record.setMateAlignmentStart(6); // should overlap 5M
record.setMateReferenceIndex(0);
+ record.setMateAlignmentStart(6); // should overlap 5M
record.setReadBases("AAAAAAAAAA".getBytes());
final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
@@ -81,8 +81,8 @@ public class SAMUtilsTest {
record.setCigar(TextCigarCodec.decode("5M5S"));
record.setReferenceIndex(0);
record.setAlignmentStart(1);
- record.setMateAlignmentStart(5); // should overlap 1M5S
record.setMateReferenceIndex(0);
+ record.setMateAlignmentStart(5); // should overlap 1M5S
record.setReadBases("AAAAAAAAAA".getBytes());
final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
@@ -107,8 +107,8 @@ public class SAMUtilsTest {
record.setCigar(TextCigarCodec.decode("5M1I5M"));
record.setReferenceIndex(0);
record.setAlignmentStart(1);
- record.setMateAlignmentStart(5); // should overlap the 1M1I5M
record.setMateReferenceIndex(0);
+ record.setMateAlignmentStart(5); // should overlap the 1M1I5M
record.setReadBases("AAAAAAAAAAA".getBytes());
@@ -121,8 +121,6 @@ public class SAMUtilsTest {
}
- // TODO: deletion
-
@Test
public void testClippingOfRecordWithDeletion() {
/**
@@ -137,8 +135,8 @@ public class SAMUtilsTest {
record.setCigar(TextCigarCodec.decode("5M1D5M"));
record.setReferenceIndex(0);
record.setAlignmentStart(1);
- record.setMateAlignmentStart(5); // should overlap the 1M1D5M
record.setMateReferenceIndex(0);
+ record.setMateAlignmentStart(5); // should overlap the 1M1D5M
record.setReadBases("AAAAAAAAAA".getBytes());
final int numToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
@@ -148,4 +146,31 @@ public class SAMUtilsTest {
Assert.assertTrue(record.getCigar().equals(TextCigarCodec.decode("4M6S")));
}
+
+ @Test
+ public void testClippingOfRecordWithMateAtSamePosition() {
+ /**
+ * Tests that we clip the first end of a pair if we have perfect overlap of a pair
+ */
+
+ // setup the record
+ final SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord("1", 1000));
+ final SAMRecord record = new SAMRecord(header);
+ record.setReadPairedFlag(true);
+ record.setFirstOfPairFlag(true);
+ record.setCigar(TextCigarCodec.decode("10M"));
+ record.setReferenceIndex(0);
+ record.setAlignmentStart(1);
+ record.setMateReferenceIndex(0);
+ record.setMateAlignmentStart(1);
+ record.setReadBases("AAAAAAAAAA".getBytes());
+
+ Assert.assertEquals(SAMUtils.getNumOverlappingAlignedBasesToClip(record), 0);
+
+ // now make it the second end
+ record.setFirstOfPairFlag(false);
+ record.setSecondOfPairFlag(true);
+ Assert.assertEquals(SAMUtils.getNumOverlappingAlignedBasesToClip(record), 10);
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/SamFilesTest.java b/src/tests/java/htsjdk/samtools/SamFilesTest.java
index b37fc3a..a7e2fa7 100644
--- a/src/tests/java/htsjdk/samtools/SamFilesTest.java
+++ b/src/tests/java/htsjdk/samtools/SamFilesTest.java
@@ -12,6 +12,8 @@ import java.io.IOException;
* Created by vadim on 10/08/2015.
*/
public class SamFilesTest {
+ private static final String TEST_DATA = "testdata/htsjdk/samtools/BAMFileIndexTest/";
+ private static final File BAM_FILE = new File(TEST_DATA + "index_test.bam");
@DataProvider(name = "FindIndexParams")
public static Object[][] paramsFindIndexForSuffixes() {
@@ -57,4 +59,23 @@ public class SamFilesTest {
Assert.assertNotNull(foundIndexFile);
Assert.assertTrue(foundIndexFile.getName().endsWith(expectIndexSuffix));
}
+
+ @DataProvider(name = "filesAndIndicies")
+ public Object[][] getFilesAndIndicies() throws IOException {
+
+ final File REAL_INDEX_FILE = new File(BAM_FILE + ".bai"); //test regular file
+ final File SYMLINKED_BAM_WITH_SYMLINKED_INDEX = new File(TEST_DATA, "symlink_with_index.bam");
+
+ return new Object[][]{
+ {BAM_FILE, REAL_INDEX_FILE},
+ {SYMLINKED_BAM_WITH_SYMLINKED_INDEX, new File(SYMLINKED_BAM_WITH_SYMLINKED_INDEX + ".bai")},
+ {new File(TEST_DATA, "symlink_without_linked_index.bam"), REAL_INDEX_FILE.getCanonicalFile()},
+ {new File(TEST_DATA, "FileThatDoesntExist"), null}
+ };
+ }
+
+ @Test(dataProvider ="filesAndIndicies")
+ public void testIndexSymlinking(File bam, File expected_index) {
+ Assert.assertEquals(SamFiles.findIndex(bam), expected_index);
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/ValidateSamFileTest.java b/src/tests/java/htsjdk/samtools/ValidateSamFileTest.java
index c74d49f..5204a1e 100644
--- a/src/tests/java/htsjdk/samtools/ValidateSamFileTest.java
+++ b/src/tests/java/htsjdk/samtools/ValidateSamFileTest.java
@@ -24,6 +24,7 @@
package htsjdk.samtools;
+import htsjdk.samtools.BamIndexValidator.IndexValidationStringency;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
@@ -58,24 +59,24 @@ public class ValidateSamFileTest {
@Test
public void testValidSamFile() throws Exception {
final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, "valid.sam"));
- final Histogram<String> results = executeValidation(samReader, null);
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertTrue(results.isEmpty());
}
@Test
public void testSamFileVersion1pt5() throws Exception {
final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, "test_samfile_version_1pt5.bam"));
- final Histogram<String> results = executeValidation(samReader, null);
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertTrue(results.isEmpty());
}
@Test
public void testSortOrder() throws IOException {
Histogram<String> results = executeValidation(SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
- .open(new File(TEST_DATA_DIR, "invalid_coord_sort_order.sam")), null);
+ .open(new File(TEST_DATA_DIR, "invalid_coord_sort_order.sam")), null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.RECORD_OUT_OF_ORDER.getHistogramString()).getValue(), 1.0);
results = executeValidation(SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
- .open(new File(TEST_DATA_DIR, "invalid_queryname_sort_order.sam")), null);
+ .open(new File(TEST_DATA_DIR, "invalid_queryname_sort_order.sam")), null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.RECORD_OUT_OF_ORDER.getHistogramString()).getValue(), 5.0);
}
@@ -114,7 +115,7 @@ public class ValidateSamFileTest {
records.next().setSecondOfPairFlag(true);
records.next().setMateReferenceIndex(1);
- final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
+ final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_PROPER_PAIR.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_MATE_UNMAPPED.getHistogramString()).getValue(), 1.0);
@@ -140,7 +141,7 @@ public class ValidateSamFileTest {
records.next().setMateUnmappedFlag(!records.next().getReadUnmappedFlag());
- final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
+ final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_ALIGNMENT_START.getHistogramString()).getValue(), 3.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_MATE_UNMAPPED.getHistogramString()).getValue(), 1.0);
@@ -158,7 +159,7 @@ public class ValidateSamFileTest {
final Iterator<SAMRecord> records = samBuilder.iterator();
records.next();
records.remove();
- final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
+ final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.MATE_NOT_FOUND.getHistogramString()).getValue(), 1.0);
}
@@ -185,7 +186,7 @@ public class ValidateSamFileTest {
records.next().setMappingQuality(10);
records.next().setCigarString("36M");
- final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
+ final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_NOT_PRIM_ALIGNMENT.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_MAPPING_QUALITY.getHistogramString()).getValue(), 1.0);
@@ -202,7 +203,7 @@ public class ValidateSamFileTest {
records.next().setCigarString("25M3S25M");
records.next().setReferenceName("*");
- final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
+ final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_CIGAR.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_READ_UNMAPPED.getHistogramString()).getValue(), 1.0);
@@ -258,7 +259,7 @@ public class ValidateSamFileTest {
public void close() throws IOException {
//no-op
}
- });
+ }, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_TAG_NM.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.MISSING_TAG_NM.getHistogramString()).getValue(), 1.0);
@@ -268,7 +269,7 @@ public class ValidateSamFileTest {
public void testMateCigarScenarios(final String scenario, final String inputFile, final SAMValidationError.Type expectedError)
throws Exception {
final SamReader reader = SamReaderFactory.makeDefault().open(new File(TEST_DATA_DIR, inputFile));
- final Histogram<String> results = executeValidation(reader, null);
+ final Histogram<String> results = executeValidation(reader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertNotNull(results.get(expectedError.getHistogramString()));
Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0);
}
@@ -286,7 +287,7 @@ public class ValidateSamFileTest {
public void testTruncated(final String scenario, final String inputFile, final SAMValidationError.Type expectedError)
throws Exception {
final SamReader reader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, inputFile));
- final Histogram<String> results = executeValidation(reader, null);
+ final Histogram<String> results = executeValidation(reader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertNotNull(results.get(expectedError.getHistogramString()));
Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0);
}
@@ -306,7 +307,7 @@ public class ValidateSamFileTest {
@Test(expectedExceptions = SAMException.class, dataProvider = "testFatalParsingErrors")
public void testFatalParsingErrors(final String scenario, final String inputFile) throws Exception {
final SamReader reader = SamReaderFactory.makeDefault().open(new File(TEST_DATA_DIR, inputFile));
- executeValidation(reader, null);
+ executeValidation(reader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.fail("Exception should have been thrown.");
}
@@ -323,14 +324,14 @@ public class ValidateSamFileTest {
final String header = "@HD VN:Hi,Mom! SO:queryname";
final InputStream strm = new ByteArrayInputStream(StringUtil.stringToBytes(header));
final SamReader samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(strm));
- final Histogram<String> results = executeValidation(samReader, null);
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_VERSION_NUMBER.getHistogramString()).getValue(), 1.0);
}
@Test(enabled = false, description = "File is actually valid for Standard quality scores so this test fails with an NPE.")
public void testQualityFormatValidation() throws Exception {
final SamReader samReader = SamReaderFactory.makeDefault().open(new File("./testdata/htsjdk/samtools/util/QualityEncodingDetectorTest/illumina-as-standard.bam"));
- final Histogram<String> results = executeValidation(samReader, null);
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
final Histogram<String>.Bin bin = results.get(SAMValidationError.Type.INVALID_QUALITY_FORMAT.getHistogramString());
final double value = bin.getValue();
Assert.assertEquals(value, 1.0);
@@ -343,7 +344,7 @@ public class ValidateSamFileTest {
final int contigLength = samBuilder.getHeader().getSequence(0).getSequenceLength();
// Should hang off the end.
samBuilder.addFrag(String.valueOf(1), 0, contigLength - 1, false);
- final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
+ final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertNotNull(results.get(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE.getHistogramString()));
Assert.assertEquals(results.get(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE.getHistogramString()).getValue(), 1.0);
}
@@ -369,7 +370,7 @@ public class ValidateSamFileTest {
public void testHeaderValidation() throws Exception {
final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
.open(new File(TEST_DATA_DIR, "buggyHeader.sam"));
- final Histogram<String> results = executeValidation(samReader, null);
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.UNRECOGNIZED_HEADER_TYPE.getHistogramString()).getValue(), 3.0);
Assert.assertEquals(results.get(SAMValidationError.Type.HEADER_TAG_MULTIPLY_DEFINED.getHistogramString()).getValue(), 1.0);
}
@@ -378,7 +379,7 @@ public class ValidateSamFileTest {
public void testPlatformMissing() throws Exception {
final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
.open((new File(TEST_DATA_DIR, "missing_platform_unit.sam")));
- final Histogram<String> results = executeValidation(samReader, null);
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.MISSING_PLATFORM_VALUE.getHistogramString()).getValue(), 1.0);
}
@@ -386,7 +387,7 @@ public class ValidateSamFileTest {
public void testPlatformInvalid() throws Exception {
final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
.open((new File(TEST_DATA_DIR, "invalid_platform_unit.sam")));
- final Histogram<String> results = executeValidation(samReader, null);
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_PLATFORM_VALUE.getHistogramString()).getValue(), 1.0);
}
@@ -394,7 +395,7 @@ public class ValidateSamFileTest {
public void testDuplicateRGIDs() throws Exception {
final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
.open((new File(TEST_DATA_DIR, "duplicate_rg.sam")));
- final Histogram<String> results = executeValidation(samReader, null);
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.DUPLICATE_READ_GROUP_ID.getHistogramString()).getValue(), 1.0);
}
@@ -403,15 +404,19 @@ public class ValidateSamFileTest {
final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
.enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).open((new File(TEST_DATA_DIR, "bad_index.bam")));
- final Histogram<String> results = executeValidation(samReader, null);
+ Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_INDEX_FILE_POINTER.getHistogramString()).getValue(), 1.0);
+
+ results = executeValidation(samReader, null, IndexValidationStringency.LESS_EXHAUSTIVE);
+ Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_INDEX_FILE_POINTER.getHistogramString()).getValue(), 1.0);
+
}
- private Histogram<String> executeValidation(final SamReader samReader, final ReferenceSequenceFile reference) throws IOException {
+ private Histogram<String> executeValidation(final SamReader samReader, final ReferenceSequenceFile reference, final IndexValidationStringency stringency) throws IOException {
final File outFile = File.createTempFile("validation", ".txt");
outFile.deleteOnExit();
final PrintWriter out = new PrintWriter(outFile);
- new SamFileValidator(out, 8000).setValidateIndex(true).validateSamFileSummary(samReader, reference);
+ new SamFileValidator(out, 8000).setIndexValidationStringency(stringency).validateSamFileSummary(samReader, reference);
final LineNumberReader reader = new LineNumberReader(new FileReader(outFile));
if (reader.readLine().equals("No errors found")) {
return new Histogram<String>();
@@ -429,7 +434,7 @@ public class ValidateSamFileTest {
pw.println("@HD\tVN:" + version);
pw.close();
final SamReader reader = SamReaderFactory.makeDefault().open(samFile);
- final Histogram<String> results = executeValidation(reader, null);
+ final Histogram<String> results = executeValidation(reader, null, IndexValidationStringency.EXHAUSTIVE);
if (expectValid) Assert.assertNull(results.get(SAMValidationError.Type.INVALID_VERSION_NUMBER.getHistogramString()));
else {
Assert.assertNotNull(results.get(SAMValidationError.Type.INVALID_VERSION_NUMBER.getHistogramString()));
@@ -447,4 +452,20 @@ public class ValidateSamFileTest {
// Test an unacceptable version
testHeaderVersion("1.6", false);
}
+
+ @Test(enabled = false)
+ public void duplicateReads() throws Exception {
+ final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, "duplicated_reads.sam"));
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
+ Assert.assertFalse(results.isEmpty());
+ Assert.assertEquals(results.get(SAMValidationError.Type.MATES_ARE_SAME_END.getHistogramString()).getValue(), 2.0);
+ }
+
+ @Test
+ public void duplicateReadsOutOfOrder() throws Exception {
+ final SamReader samReader = SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT).open(new File(TEST_DATA_DIR, "duplicated_reads_out_of_order.sam"));
+ final Histogram<String> results = executeValidation(samReader, null, IndexValidationStringency.EXHAUSTIVE);
+ Assert.assertFalse(results.isEmpty());
+ Assert.assertEquals(results.get(SAMValidationError.Type.MATES_ARE_SAME_END.getHistogramString()).getValue(), 2.0);
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/cram/LosslessRoundTripTest.java b/src/tests/java/htsjdk/samtools/cram/LosslessRoundTripTest.java
new file mode 100644
index 0000000..67cd483
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/LosslessRoundTripTest.java
@@ -0,0 +1,68 @@
+package htsjdk.samtools.cram;
+
+import htsjdk.samtools.CRAMFileReader;
+import htsjdk.samtools.CRAMFileWriter;
+import htsjdk.samtools.Cigar;
+import htsjdk.samtools.CigarElement;
+import htsjdk.samtools.CigarOperator;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMReadGroupRecord;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.ValidationStringency;
+import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+
+/**
+ * Created by vadim on 19/02/2016.
+ */
+public class LosslessRoundTripTest {
+ @Test
+ public void test_MD_NM() throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ InMemoryReferenceSequenceFile rsf = new InMemoryReferenceSequenceFile();
+ rsf.add("1", "AAA".getBytes());
+ ReferenceSource source = new ReferenceSource(rsf);
+
+ SAMFileHeader samFileHeader = new SAMFileHeader();
+ samFileHeader.addSequence(new SAMSequenceRecord("1", 3));
+ samFileHeader.addReadGroup(new SAMReadGroupRecord("some read group"));
+
+ CRAMFileWriter w = new CRAMFileWriter(baos, source, samFileHeader, null);
+ SAMRecord record = new SAMRecord(samFileHeader);
+ record.setReadName("name");
+ record.setAlignmentStart(1);
+ record.setReferenceIndex(0);
+ record.setCigarString("3M");
+ record.setReadUnmappedFlag(false);
+ record.setReadBases("AAC".getBytes());
+ record.setBaseQualities("!!!".getBytes());
+
+ record.setAttribute("RG", "some read group");
+ // setting some bizzar values to provoke test failure if the values are auto-restored while reading CRAM:
+ record.setAttribute("MD", "nonsense");
+ record.setAttribute("NM", 123);
+ w.addAlignment(record);
+ w.close();
+
+ byte[] cramBytes = baos.toByteArray();
+ InputStream cramInputStream = new ByteArrayInputStream(cramBytes);
+ CRAMFileReader reader = new CRAMFileReader(cramInputStream, (File) null, source, ValidationStringency.STRICT);
+ final SAMRecordIterator iterator = reader.getIterator();
+ Assert.assertTrue(iterator.hasNext());
+ SAMRecord record2 = iterator.next();
+ Assert.assertNotNull(record2);
+
+ Assert.assertEquals(record2, record);
+ reader.close();
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/cram/build/CompressionHeaderFactoryTest.java b/src/tests/java/htsjdk/samtools/cram/build/CompressionHeaderFactoryTest.java
new file mode 100644
index 0000000..a3d91cd
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/build/CompressionHeaderFactoryTest.java
@@ -0,0 +1,208 @@
+package htsjdk.samtools.cram.build;
+
+import htsjdk.samtools.ValidationStringency;
+import htsjdk.samtools.cram.encoding.readfeatures.Substitution;
+import htsjdk.samtools.cram.structure.CompressionHeader;
+import htsjdk.samtools.cram.structure.CramCompressionRecord;
+import htsjdk.samtools.cram.structure.EncodingID;
+import htsjdk.samtools.cram.structure.EncodingKey;
+import htsjdk.samtools.cram.structure.ReadTag;
+import htsjdk.samtools.cram.structure.SubstitutionMatrix;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Created by vadim on 07/01/2016.
+ */
+public class CompressionHeaderFactoryTest {
+ @Test
+ public void testAllEncodingsPresent() {
+ final CompressionHeader header = new CompressionHeaderFactory().build(new ArrayList<>(), new SubstitutionMatrix(new long[256][256]), true);
+ for (final EncodingKey key : EncodingKey.values()) {
+ switch (key) {
+ // skip test marks and unused series:
+ case TV_TestMark:
+ case TM_TestMark:
+ case BB_bases:
+ case QQ_scores:
+ Assert.assertFalse(header.encodingMap.containsKey(key), "Unexpected encoding key found: " + key.name());
+ continue;
+ }
+ Assert.assertTrue(header.encodingMap.containsKey(key), "Encoding key not found: " + key.name());
+ Assert.assertNotNull(header.encodingMap.get(key));
+ Assert.assertFalse(header.encodingMap.get(key).id == EncodingID.NULL);
+ }
+ }
+
+ @Test
+ public void testAP_delta() {
+ boolean sorted = true;
+ CompressionHeader header = new CompressionHeaderFactory().build(new ArrayList<>(), new SubstitutionMatrix(new long[256][256]), sorted);
+ Assert.assertEquals(header.APDelta, sorted);
+
+ sorted = false;
+ header = new CompressionHeaderFactory().build(new ArrayList<>(), new SubstitutionMatrix(new long[256][256]), sorted);
+ Assert.assertEquals(header.APDelta, sorted);
+ }
+
+ @Test
+ public void testGetDataForTag() {
+ final CompressionHeaderFactory factory = new CompressionHeaderFactory();
+ final List<CramCompressionRecord> records = new ArrayList<>();
+ final CramCompressionRecord record = new CramCompressionRecord();
+ final int tagID = ReadTag.name3BytesToInt("ACi".getBytes());
+ final byte[] data = new byte[]{1, 2, 3, 4};
+ final ReadTag tag = new ReadTag(tagID, data, ValidationStringency.STRICT);
+ record.tags = new ReadTag[]{tag};
+ records.add(record);
+
+ final byte[] dataForTag = factory.getDataForTag(records, tagID);
+ Assert.assertEquals(dataForTag, data);
+ }
+
+ @Test
+ public void test_buildFrequencies() {
+ final CramCompressionRecord record = new CramCompressionRecord();
+ final Substitution s = new Substitution();
+ s.setPosition(1);
+ final byte refBase = 'A';
+ final byte readBase = 'C';
+
+ s.setBase(readBase);
+ s.setReferenceBase(refBase);
+ s.setCode((byte) 1);
+ record.readFeatures = new ArrayList<>();
+ record.readFeatures.add(s);
+ record.readLength = 2;
+
+ final List<CramCompressionRecord> records = new ArrayList<>();
+ records.add(record);
+
+ final long[][] frequencies = CompressionHeaderFactory.buildFrequencies(records);
+ for (int i = 0; i < frequencies.length; i++) {
+ for (int j = 0; j < frequencies[i].length; j++) {
+ if (i != refBase && j != readBase) {
+ Assert.assertEquals(frequencies[i][j], 0);
+ }
+ }
+
+ }
+ Assert.assertEquals(frequencies[refBase][readBase], 1);
+ }
+
+ @Test
+ public void test_getBestExternalCompressor() {
+ try {
+ Assert.assertNotNull(CompressionHeaderFactory.getBestExternalCompressor(null));
+ Assert.fail("NPE expected for null data");
+ } catch (final NullPointerException e) {
+
+ }
+ Assert.assertNotNull(CompressionHeaderFactory.getBestExternalCompressor("".getBytes()));
+ Assert.assertNotNull(CompressionHeaderFactory.getBestExternalCompressor("qwe".getBytes()));
+ }
+
+ @Test
+ public void test_geByteSizeRangeOfTagValues() {
+ final List<CramCompressionRecord> records = new ArrayList<>();
+ final int tagID = ReadTag.name3BytesToInt("ACi".getBytes());
+ // test empty list:
+ CompressionHeaderFactory.ByteSizeRange range = CompressionHeaderFactory.geByteSizeRangeOfTagValues(records, tagID);
+ Assert.assertNotNull(range);
+ Assert.assertEquals(range.min, Integer.MAX_VALUE);
+ Assert.assertEquals(range.max, Integer.MIN_VALUE);
+
+ // test single record with a single tag:
+ final CramCompressionRecord record = new CramCompressionRecord();
+ final byte[] data = new byte[]{1, 2, 3, 4};
+ final ReadTag tag = new ReadTag(tagID, data, ValidationStringency.STRICT);
+ record.tags = new ReadTag[]{tag};
+ records.add(record);
+
+ range = CompressionHeaderFactory.geByteSizeRangeOfTagValues(records, tagID);
+ Assert.assertNotNull(range);
+ Assert.assertEquals(range.min, 4);
+ Assert.assertEquals(range.max, 4);
+ }
+
+ @Test
+ public void test_getTagType() {
+ Assert.assertEquals(CompressionHeaderFactory.getTagType(ReadTag.name3BytesToInt("ACi".getBytes())), 'i');
+ }
+
+ @Test
+ public void test_getUnusedByte() {
+ final byte[] data = new byte[256];
+ for (int i = 0; i < data.length; i++) {
+ data[i] = (byte) i;
+ }
+
+ int unusedByte = CompressionHeaderFactory.getUnusedByte(data);
+ Assert.assertEquals(unusedByte, -1);
+
+ data[5] = 0;
+ unusedByte = CompressionHeaderFactory.getUnusedByte(data);
+ Assert.assertEquals(unusedByte, 5);
+ data[5] = 5;
+
+ data[150] = 0;
+ unusedByte = CompressionHeaderFactory.getUnusedByte(data);
+ Assert.assertEquals(unusedByte, 150);
+ }
+
+ @Test
+ public void test_updateSubstitutionCodes() {
+ final CramCompressionRecord record = new CramCompressionRecord();
+ final Substitution s = new Substitution();
+ s.setPosition(1);
+ final byte refBase = 'A';
+ final byte readBase = 'C';
+
+ s.setBase(readBase);
+ s.setReferenceBase(refBase);
+ record.readFeatures = new ArrayList<>();
+ record.readFeatures.add(s);
+ record.readLength = 2;
+
+ final List<CramCompressionRecord> records = new ArrayList<>();
+ records.add(record);
+
+ final long[][] frequencies = new long[256][256];
+ frequencies[refBase][readBase] = 1;
+ SubstitutionMatrix matrix = new SubstitutionMatrix(frequencies);
+
+ Assert.assertTrue(s.getCode() == -1);
+ CompressionHeaderFactory.updateSubstitutionCodes(records, matrix);
+ Assert.assertFalse(s.getCode() == -1);
+ Assert.assertEquals(s.getCode(), matrix.code(refBase, readBase));
+ }
+
+ @Test
+ public void test_getTagValueByteSize() {
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'i', 1), 4);
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'I', 1), 4);
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'c', (byte) 1), 1);
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'C', -(byte) 1), 1);
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 's', (short) 1), 2);
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'S', -(short) 1), 2);
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'A', 1), 1);
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'f', 1f), 4);
+
+ // string values are null-terminated:
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'Z', "blah-blah"), "blah-blah".length() + 1);
+
+ // byte length of an array tag value is: element type (1 byte) + nof bytes (4 bytes) + nof elements * byte size of element
+ int elementTypeLength = 1;
+ int arraySizeByteLength = 4;
+ int arraySize = 3;
+ int byteElementSize = 1;
+ int int_float_long_elementSize = 4;
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'B', new byte[]{0, 1, 2}), elementTypeLength + arraySizeByteLength + arraySize * byteElementSize);
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'B', new int[]{0, 1, 2}), elementTypeLength + arraySizeByteLength + arraySize * int_float_long_elementSize);
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'B', new float[]{0, 1, 2}), elementTypeLength + arraySizeByteLength + arraySize * int_float_long_elementSize);
+ Assert.assertEquals(CompressionHeaderFactory.getTagValueByteSize((byte) 'B', new long[]{0, 1, 2}), elementTypeLength + arraySizeByteLength + arraySize * int_float_long_elementSize);
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java b/src/tests/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java
new file mode 100644
index 0000000..cb004a7
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/build/ContainerFactoryTest.java
@@ -0,0 +1,134 @@
+package htsjdk.samtools.cram.build;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.cram.ref.ReferenceSource;
+import htsjdk.samtools.cram.structure.Container;
+import htsjdk.samtools.cram.structure.CramCompressionRecord;
+import htsjdk.samtools.cram.structure.Slice;
+import htsjdk.samtools.reference.InMemoryReferenceSequenceFile;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Created by vadim on 15/12/2015.
+ */
+public class ContainerFactoryTest {
+
+ @Test
+ public void testUnmapped() throws IOException, IllegalAccessException {
+ SAMFileHeader header = new SAMFileHeader();
+
+ int recordsPerContainer = 10;
+ ContainerFactory factory = new ContainerFactory(header, recordsPerContainer);
+
+ List<CramCompressionRecord> records = new ArrayList<>();
+ for (int i = 0; i < recordsPerContainer; i++) {
+ final CramCompressionRecord record = new CramCompressionRecord();
+ record.setSegmentUnmapped(true);
+ record.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
+ record.alignmentStart = SAMRecord.NO_ALIGNMENT_START;
+ record.readBases = record.qualityScores = "ACGTN".getBytes();
+ record.readName = Integer.toString(i);
+
+ records.add(record);
+ }
+
+ final Container container = factory.buildContainer(records);
+ Assert.assertNotNull(container);
+ Assert.assertEquals(container.nofRecords, records.size());
+
+ assertContainerAlignmentBoundaries(container, SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN);
+ }
+
+ @Test
+ public void testMapped() throws IOException, IllegalAccessException {
+ InMemoryReferenceSequenceFile refFile = new InMemoryReferenceSequenceFile();
+ String refName = "1";
+ String refString = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
+ refFile.add(refName, refString.getBytes());
+ ReferenceSource source = new ReferenceSource(refFile);
+ SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord(refName, refString.length()));
+ int sequenceId = header.getSequenceIndex(refName);
+
+ int recordsPerContainer = 10;
+ byte[] bases = "AAAAA".getBytes();
+ int readLength = bases.length;
+ int alignmentStartOffset = 3;
+ ContainerFactory factory = new ContainerFactory(header, recordsPerContainer);
+
+ List<CramCompressionRecord> records = new ArrayList<>();
+ int span = 0;
+ for (int i = 0; i < recordsPerContainer; i++) {
+ final CramCompressionRecord record = new CramCompressionRecord();
+ record.setSegmentUnmapped(false);
+ record.sequenceId = sequenceId;
+ record.alignmentStart = alignmentStartOffset + i;
+ record.readBases = record.qualityScores = bases;
+ record.readName = Integer.toString(i);
+ record.readLength = readLength;
+ record.readFeatures = Collections.emptyList();
+
+ records.add(record);
+ span = record.alignmentStart + readLength - alignmentStartOffset;
+ }
+
+ final Container container = factory.buildContainer(records);
+ Assert.assertNotNull(container);
+ Assert.assertEquals(container.nofRecords, records.size());
+
+ assertContainerAlignmentBoundaries(container, sequenceId, alignmentStartOffset, span);
+ }
+
+ @Test
+ public void testMultiref() throws IOException, IllegalAccessException {
+ SAMFileHeader header = new SAMFileHeader();
+ header.addSequence(new SAMSequenceRecord("1", 100));
+ header.addSequence(new SAMSequenceRecord("2", 200));
+
+ int recordsPerContainer = 10;
+ byte[] bases = "AAAAA".getBytes();
+ int readLength = bases.length;
+ int alignmentStartOffset = 3;
+ ContainerFactory factory = new ContainerFactory(header, recordsPerContainer);
+
+ List<CramCompressionRecord> records = new ArrayList<>();
+ for (int i = 0; i < recordsPerContainer; i++) {
+ final CramCompressionRecord record = new CramCompressionRecord();
+ record.setSegmentUnmapped(false);
+ record.sequenceId = i % 2;
+ record.alignmentStart = alignmentStartOffset + i;
+ record.readBases = record.qualityScores = bases;
+ record.readName = Integer.toString(i);
+ record.readLength = readLength;
+ record.readFeatures = Collections.emptyList();
+
+ records.add(record);
+ }
+
+ final Container container = factory.buildContainer(records);
+ Assert.assertNotNull(container);
+ Assert.assertEquals(container.nofRecords, records.size());
+
+ assertContainerAlignmentBoundaries(container, Slice.MULTI_REFERENCE, Slice.NO_ALIGNMENT_START, Slice.NO_ALIGNMENT_SPAN);
+ }
+
+
+ private void assertContainerAlignmentBoundaries(Container container, int sequenceId, int alignmentStart, int alignmentSpan) {
+ Assert.assertEquals(container.sequenceId, sequenceId);
+ Assert.assertEquals(container.alignmentStart, alignmentStart);
+ Assert.assertEquals(container.alignmentSpan, alignmentSpan);
+
+ if (sequenceId == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX || sequenceId == Slice.MULTI_REFERENCE) {
+ Assert.assertEquals(container.alignmentStart, Slice.NO_ALIGNMENT_START);
+ Assert.assertEquals(container.alignmentSpan, Slice.NO_ALIGNMENT_SPAN);
+ }
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/cram/build/ContainerParserTest.java b/src/tests/java/htsjdk/samtools/cram/build/ContainerParserTest.java
new file mode 100644
index 0000000..fe25ce6
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/build/ContainerParserTest.java
@@ -0,0 +1,185 @@
+package htsjdk.samtools.cram.build;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.ValidationStringency;
+import htsjdk.samtools.cram.common.CramVersions;
+import htsjdk.samtools.cram.common.Version;
+import htsjdk.samtools.cram.structure.AlignmentSpan;
+import htsjdk.samtools.cram.structure.Container;
+import htsjdk.samtools.cram.structure.ContainerIO;
+import htsjdk.samtools.cram.structure.CramCompressionRecord;
+import htsjdk.samtools.cram.structure.Slice;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Created by vadim on 11/01/2016.
+ */
+public class ContainerParserTest {
+
+ @Test
+ public void testEOF() throws IOException, IllegalAccessException {
+ ContainerParser parser = new ContainerParser(new SAMFileHeader());
+ ByteArrayOutputStream v2_baos = new ByteArrayOutputStream();
+ Version version = CramVersions.CRAM_v2_1;
+ CramIO.issueEOF(version, v2_baos);
+ Container container = ContainerIO.readContainer(version, new ByteArrayInputStream(v2_baos.toByteArray()));
+ Assert.assertTrue(container.isEOF());
+ Assert.assertTrue(parser.getRecords(container, null, ValidationStringency.STRICT).isEmpty());
+
+ ByteArrayOutputStream v3_baos = new ByteArrayOutputStream();
+ version = CramVersions.CRAM_v3;
+ CramIO.issueEOF(version, v3_baos);
+ container = ContainerIO.readContainer(version, new ByteArrayInputStream(v3_baos.toByteArray()));
+ Assert.assertTrue(container.isEOF());
+ Assert.assertTrue(parser.getRecords(container, null, ValidationStringency.STRICT).isEmpty());
+ }
+
+ @Test
+ public void testSingleRefContainer() throws IOException, IllegalAccessException {
+ SAMFileHeader samFileHeader = new SAMFileHeader();
+ ContainerFactory factory = new ContainerFactory(samFileHeader, 10);
+ List<CramCompressionRecord> records = new ArrayList<>();
+ for (int i=0; i<10; i++) {
+ CramCompressionRecord record = new CramCompressionRecord();
+ record.readBases="AAA".getBytes();
+ record.qualityScores="!!!".getBytes();
+ record.setSegmentUnmapped(false);
+ record.readName=""+i;
+ record.sequenceId=0;
+ record.setLastSegment(true);
+ record.readFeatures = Collections.emptyList();
+
+ records.add(record);
+ }
+
+ Container container = factory.buildContainer(records);
+ Assert.assertEquals(container.nofRecords, 10);
+ Assert.assertEquals(container.sequenceId, 0);
+
+ ContainerParser parser = new ContainerParser(samFileHeader);
+ final Map<Integer, AlignmentSpan> referenceSet = parser.getReferences(container, ValidationStringency.STRICT);
+ Assert.assertNotNull(referenceSet);
+ Assert.assertEquals(referenceSet.size(), 1);
+ Assert.assertTrue(referenceSet.containsKey(0));
+ }
+
+ @Test
+ public void testUnmappedContainer() throws IOException, IllegalAccessException {
+ SAMFileHeader samFileHeader = new SAMFileHeader();
+ ContainerFactory factory = new ContainerFactory(samFileHeader, 10);
+ List<CramCompressionRecord> records = new ArrayList<>();
+ for (int i=0; i<10; i++) {
+ CramCompressionRecord record = new CramCompressionRecord();
+ record.readBases="AAA".getBytes();
+ record.qualityScores="!!!".getBytes();
+ record.setSegmentUnmapped(true);
+ record.readName=""+i;
+ record.sequenceId= SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
+ record.setLastSegment(true);
+
+ records.add(record);
+ }
+
+ Container container = factory.buildContainer(records);
+ Assert.assertEquals(container.nofRecords, 10);
+ Assert.assertEquals(container.sequenceId, SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+
+ ContainerParser parser = new ContainerParser(samFileHeader);
+ final Map<Integer, AlignmentSpan> referenceSet = parser.getReferences(container, ValidationStringency.STRICT);
+ Assert.assertNotNull(referenceSet);
+ Assert.assertEquals(referenceSet.size(), 1);
+ Assert.assertTrue(referenceSet.containsKey(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+
+ }
+
+ @Test
+ public void testMappedAndUnmappedContainer() throws IOException, IllegalAccessException {
+ SAMFileHeader samFileHeader = new SAMFileHeader();
+ ContainerFactory factory = new ContainerFactory(samFileHeader, 10);
+ List<CramCompressionRecord> records = new ArrayList<>();
+ for (int i=0; i<10; i++) {
+ CramCompressionRecord record = new CramCompressionRecord();
+ record.readBases="AAA".getBytes();
+ record.qualityScores="!!!".getBytes();
+ record.readName=""+i;
+ record.alignmentStart=i+1;
+
+ record.setMultiFragment(false);
+ if (i%2==0) {
+ record.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
+ record.setSegmentUnmapped(true);
+ } else {
+ record.sequenceId=0;
+ record.readFeatures = Collections.emptyList();
+ record.setSegmentUnmapped(false);
+ }
+ records.add(record);
+ }
+
+
+
+ Container container = factory.buildContainer(records);
+ Assert.assertEquals(container.nofRecords, 10);
+ Assert.assertEquals(container.sequenceId, Slice.MULTI_REFERENCE);
+
+ ContainerParser parser = new ContainerParser(samFileHeader);
+ final Map<Integer, AlignmentSpan> referenceSet = parser.getReferences(container, ValidationStringency.STRICT);
+ Assert.assertNotNull(referenceSet);
+ Assert.assertEquals(referenceSet.size(), 2);
+ Assert.assertTrue(referenceSet.containsKey(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ Assert.assertTrue(referenceSet.containsKey(0));
+ }
+
+ @Test
+ public void testMultirefContainer() throws IOException, IllegalAccessException {
+ SAMFileHeader samFileHeader = new SAMFileHeader();
+ ContainerFactory factory = new ContainerFactory(samFileHeader, 10);
+ List<CramCompressionRecord> records = new ArrayList<>();
+ for (int i=0; i<10; i++) {
+ CramCompressionRecord record = new CramCompressionRecord();
+ record.readBases="AAA".getBytes();
+ record.qualityScores="!!!".getBytes();
+ record.readName=""+i;
+ record.alignmentStart=i+1;
+ record.readLength = 3;
+
+ record.setMultiFragment(false);
+ if (i < 9) {
+ record.sequenceId=i;
+ record.readFeatures = Collections.emptyList();
+ record.setSegmentUnmapped(false);
+ } else {
+ record.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
+ record.setSegmentUnmapped(true);
+ }
+ records.add(record);
+ }
+
+ Container container = factory.buildContainer(records);
+ Assert.assertEquals(container.nofRecords, 10);
+ Assert.assertEquals(container.sequenceId, Slice.MULTI_REFERENCE);
+
+ ContainerParser parser = new ContainerParser(samFileHeader);
+ final Map<Integer, AlignmentSpan> referenceSet = parser.getReferences(container, ValidationStringency.STRICT);
+ Assert.assertNotNull(referenceSet);
+ Assert.assertEquals(referenceSet.size(), 10);
+ Assert.assertTrue(referenceSet.containsKey(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX));
+ for (int i=0; i<9; i++) {
+ Assert.assertTrue(referenceSet.containsKey(i));
+ AlignmentSpan span = referenceSet.get(i);
+ Assert.assertEquals(span.getCount(), 1);
+ Assert.assertEquals(span.getStart(), i+1);
+ Assert.assertEquals(span.getSpan(), 3);
+ }
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanTest.java b/src/tests/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanTest.java
index dc0749d..f2ca2f2 100644
--- a/src/tests/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanTest.java
+++ b/src/tests/java/htsjdk/samtools/cram/encoding/huffman/codec/HuffmanTest.java
@@ -1,6 +1,5 @@
package htsjdk.samtools.cram.encoding.huffman.codec;
-import htsjdk.samtools.cram.build.CompressionHeaderFactory;
import htsjdk.samtools.cram.io.DefaultBitInputStream;
import htsjdk.samtools.cram.io.DefaultBitOutputStream;
import htsjdk.samtools.cram.structure.ReadTag;
@@ -19,7 +18,7 @@ public class HuffmanTest {
public void testHuffmanIntHelper() throws IOException {
int size = 1000000;
- CompressionHeaderFactory.HuffmanParamsCalculator cal = new CompressionHeaderFactory.HuffmanParamsCalculator();
+ HuffmanParamsCalculator cal = new HuffmanParamsCalculator();
cal.add(ReadTag.nameType3BytesToInt("OQ", 'Z'), size);
cal.add(ReadTag.nameType3BytesToInt("X0", 'C'), size);
cal.add(ReadTag.nameType3BytesToInt("X0", 'c'), size);
@@ -56,8 +55,9 @@ public class HuffmanTest {
for (int i = 0; i < size; i++) {
for (int b : cal.values()) {
int v = helper.read(bis);
- if (v != b)
+ if (v != b) {
Assert.fail("Mismatch: " + v + " vs " + b + " at " + counter);
+ }
counter++;
}
@@ -65,13 +65,14 @@ public class HuffmanTest {
}
@Test
- public void testHuffmanByteHelper () throws IOException {
- int size = 1000000;
+ public void testHuffmanByteHelper() throws IOException {
+ int size = 1000000;
long time5 = System.nanoTime();
- CompressionHeaderFactory.HuffmanParamsCalculator cal = new CompressionHeaderFactory.HuffmanParamsCalculator();
- for (byte i = 33; i < 33 + 15; i++)
+ HuffmanParamsCalculator cal = new HuffmanParamsCalculator();
+ for (byte i = 33; i < 33 + 15; i++) {
cal.add(i);
+ }
cal.calculate();
HuffmanByteHelper helper = new HuffmanByteHelper(cal.valuesAsBytes(), cal.bitLens());
@@ -94,8 +95,9 @@ public class HuffmanTest {
for (int i = 0; i < size; i++) {
for (int b : cal.values()) {
int v = helper.read(bis);
- if (v != b)
+ if (v != b) {
Assert.fail("Mismatch: " + v + " vs " + b + " at " + counter);
+ }
counter++;
}
diff --git a/src/tests/java/htsjdk/samtools/cram/structure/SliceTests.java b/src/tests/java/htsjdk/samtools/cram/structure/SliceTests.java
new file mode 100644
index 0000000..a5c1669
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/cram/structure/SliceTests.java
@@ -0,0 +1,36 @@
+package htsjdk.samtools.cram.structure;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.util.SequenceUtil;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Created by vadim on 07/12/2015.
+ */
+public class SliceTests {
+ @Test
+ public void testUnmappedValidateRef() {
+ Slice slice = new Slice();
+ slice.alignmentStart= SAMRecord.NO_ALIGNMENT_START;
+ slice.sequenceId = SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX;
+
+ Assert.assertTrue(slice.validateRefMD5(null));
+ Assert.assertTrue(slice.validateRefMD5(new byte[0]));
+ Assert.assertTrue(slice.validateRefMD5(new byte[1024]));
+ }
+
+ @Test
+ public void test_validateRef() {
+ byte[] ref = "AAAAA".getBytes();
+ final byte[] md5 = SequenceUtil.calculateMD5(ref, 0, Math.min(5, ref.length));
+ Slice slice = new Slice();
+ slice.sequenceId=0;
+ slice.alignmentSpan=5;
+ slice.alignmentStart=1;
+ slice.setRefMD5(ref);
+
+ Assert.assertEquals(slice.refMD5, md5);
+ Assert.assertTrue(slice.validateRefMD5(ref));
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/fastq/FastqWriterTest.java b/src/tests/java/htsjdk/samtools/fastq/FastqWriterTest.java
new file mode 100644
index 0000000..9610d02
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/fastq/FastqWriterTest.java
@@ -0,0 +1,73 @@
+/*
+ * The MIT License
+ *
+ * Pierre Lindenbaum PhD
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.fastq;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import htsjdk.samtools.util.TestUtil;
+
+import java.io.File;
+import java.util.ArrayList;
+
+/**
+ * test fastq
+ */
+public class FastqWriterTest {
+ private static final File TEST_DATA_DIR = new File("testdata/htsjdk/samtools/util/QualityEncodingDetectorTest");
+
+ @DataProvider(name = "fastqsource")
+ public Object[][] createTestData() {
+ return new Object[][]{
+ {"solexa_full_range_as_solexa.fastq"},
+ {"5k-30BB2AAXX.3.aligned.sam.fastq"}
+ };
+ }
+
+ @Test(dataProvider = "fastqsource")
+ public void testReadReadWriteFastq(final String basename) throws Exception {
+ final File tmpFile = File.createTempFile("test.", ".fastq");
+ tmpFile.deleteOnExit();
+ final FastqReader fastqReader = new FastqReader(new File(TEST_DATA_DIR,basename));
+ final FastqWriterFactory writerFactory = new FastqWriterFactory();
+ final FastqWriter fastqWriter = writerFactory.newWriter(tmpFile);
+ for(final FastqRecord rec: fastqReader) fastqWriter.write(rec);
+ fastqWriter.close();
+ fastqReader.close();
+ }
+
+ @Test(dataProvider = "fastqsource")
+ public void testFastqSerialize(final String basename) throws Exception {
+ //write
+ final ArrayList<FastqRecord> records = new ArrayList<>();
+ final FastqReader fastqReader = new FastqReader(new File(TEST_DATA_DIR,basename));
+ for(final FastqRecord rec: fastqReader) {
+ records.add(rec);
+ if(records.size()>100) break;
+ }
+ fastqReader.close();
+ Assert.assertEquals(TestUtil.serializeAndDeserialize(records),records);
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/filter/JavascriptSamRecordFilterTest.java b/src/tests/java/htsjdk/samtools/filter/JavascriptSamRecordFilterTest.java
new file mode 100644
index 0000000..7fc5fd2
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/filter/JavascriptSamRecordFilterTest.java
@@ -0,0 +1,74 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Pierre Lindenbaum @yokofakun Institut du Thorax - Nantes - France
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.filter;
+
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.util.CloserUtil;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * @author Pierre Lindenbaum PhD Institut du Thorax - INSERM - Nantes - France
+ */
+
+public class JavascriptSamRecordFilterTest {
+ final File testDir = new File("./testdata/htsjdk/samtools");
+
+ @DataProvider
+ public Object[][] jsData() {
+ return new Object[][] { { "unsorted.sam", "samFilter01.js", 8 }, { "unsorted.sam", "samFilter02.js", 10 }, };
+ }
+
+ @Test(dataProvider = "jsData")
+ public void testJavascriptFilters(final String samFile, final String javascriptFile, final int expectCount) {
+ final SamReaderFactory srf = SamReaderFactory.makeDefault();
+ final SamReader samReader = srf.open(new File(testDir, samFile));
+ final JavascriptSamRecordFilter filter;
+ try {
+ filter = new JavascriptSamRecordFilter(new File(testDir, javascriptFile),
+ samReader.getFileHeader());
+ } catch (IOException err) {
+ Assert.fail("Cannot read script",err);
+ return;
+ }
+ final SAMRecordIterator iter = samReader.iterator();
+ int count = 0;
+ while (iter.hasNext()) {
+ if (filter.filterOut(iter.next())) {
+ continue;
+ }
+ ++count;
+ }
+ iter.close();
+ CloserUtil.close(samReader);
+ Assert.assertEquals(count, expectCount, "Expected number of reads " + expectCount + " but got " + count);
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/sra/AbstractSRATest.java b/src/tests/java/htsjdk/samtools/sra/AbstractSRATest.java
new file mode 100644
index 0000000..c50f3b8
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/AbstractSRATest.java
@@ -0,0 +1,57 @@
+package htsjdk.samtools.sra;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import org.testng.Assert;
+import org.testng.SkipException;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.util.NoSuchElementException;
+
+ at Test(groups = "sra")
+public abstract class AbstractSRATest {
+
+ @BeforeMethod
+ public final void assertSRAIsSupported(){
+ if(!SRAAccession.isSupported()){
+ throw new SkipException("Skipping SRA Test because SRA native code is unavailable.");
+ }
+ }
+
+ /**
+ * Exhaust the iterator and check that it produce the expected number of mapped and unmapped reads.
+ * Also checks that the hasNext() agrees with the actual results of next() for the given iterator.
+ * @param expectedNumMapped expected number of mapped reads, specify -1 to skip this check
+ * @param expectedNumUnmapped expected number of unmapped reads, specify -1 to skip this check
+ */
+ static void assertCorrectCountsOfMappedAndUnmappedRecords(SAMRecordIterator samRecordIterator,
+ int expectedNumMapped, int expectedNumUnmapped) {
+ int numMapped = 0, numUnmapped = 0;
+ while (true) {
+ boolean hasRecord = samRecordIterator.hasNext();
+ SAMRecord record;
+ try {
+ record = samRecordIterator.next();
+ Assert.assertNotNull(record);
+ Assert.assertTrue(hasRecord); // exception is not thrown if we came to this point
+ } catch (final NoSuchElementException e) {
+ Assert.assertFalse(hasRecord);
+ break;
+ }
+
+ if (record.getReadUnmappedFlag()) {
+ numUnmapped++;
+ } else {
+ numMapped++;
+ }
+ }
+
+ if (expectedNumMapped != -1) {
+ Assert.assertEquals(numMapped, expectedNumMapped);
+ }
+ if (expectedNumUnmapped != -1) {
+ Assert.assertEquals(numUnmapped, expectedNumUnmapped);
+ }
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/sra/SRAAccessionTest.java b/src/tests/java/htsjdk/samtools/sra/SRAAccessionTest.java
new file mode 100644
index 0000000..dc03453
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/sra/SRAAccessionTest.java
@@ -0,0 +1,29 @@
+package htsjdk.samtools.sra;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+/**
+ * Tests for SRAAccession logic
+ */
+public class SRAAccessionTest extends AbstractSRATest {
+
+ @DataProvider(name = "isValidAccData")
+ private Object[][] getIsValidAccData() {
+ return new Object[][] {
+ { "SRR000123", true },
+ { "DRR000001", true },
+ { "SRR000000", false },
+ { "testdata/htsjdk/samtools/sra/test_archive.sra", true },
+ { "testdata/htsjdk/samtools/compressed.bam", false },
+ { "testdata/htsjdk/samtools/uncompressed.sam", false },
+ };
+ }
+
+ @Test(dataProvider = "isValidAccData")
+ public void testIsValidAcc(String accession, boolean isValid) {
+ Assert.assertEquals(isValid, SRAAccession.isValid(accession));
+ }
+
+}
diff --git a/src/tests/java/htsjdk/samtools/sra/SRAIndexTest.java b/src/tests/java/htsjdk/samtools/sra/SRAIndexTest.java
index 9cf0c28..a141203 100644
--- a/src/tests/java/htsjdk/samtools/sra/SRAIndexTest.java
+++ b/src/tests/java/htsjdk/samtools/sra/SRAIndexTest.java
@@ -35,11 +35,9 @@ import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
-import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
-import java.util.List;
import java.util.Set;
/**
@@ -47,16 +45,14 @@ import java.util.Set;
*
* Created by andrii.nikitiuk on 10/28/15.
*/
-public class SRAIndexTest {
+public class SRAIndexTest extends AbstractSRATest {
private static final SRAAccession DEFAULT_ACCESSION = new SRAAccession("SRR1298981");
private static final int LAST_BIN_LEVEL = GenomicIndexUtil.LEVEL_STARTS.length - 1;
private static final int SRA_BIN_OFFSET = GenomicIndexUtil.LEVEL_STARTS[LAST_BIN_LEVEL];
@Test
public void testLevelSize() {
- if (!SRAAccession.isSupported()) return;
-
- SRAIndex index = getIndex(DEFAULT_ACCESSION);
+ final SRAIndex index = getIndex(DEFAULT_ACCESSION);
Assert.assertEquals(index.getLevelSize(0), GenomicIndexUtil.LEVEL_STARTS[1] - GenomicIndexUtil.LEVEL_STARTS[0]);
Assert.assertEquals(index.getLevelSize(LAST_BIN_LEVEL), GenomicIndexUtil.MAX_BINS - GenomicIndexUtil.LEVEL_STARTS[LAST_BIN_LEVEL] - 1);
@@ -64,15 +60,13 @@ public class SRAIndexTest {
@Test
public void testLevelForBin() {
- if (!SRAAccession.isSupported()) return;
-
- SRAIndex index = getIndex(DEFAULT_ACCESSION);
- Bin bin = new Bin(0, SRA_BIN_OFFSET);
+ final SRAIndex index = getIndex(DEFAULT_ACCESSION);
+ final Bin bin = new Bin(0, SRA_BIN_OFFSET);
Assert.assertEquals(index.getLevelForBin(bin), LAST_BIN_LEVEL);
}
@DataProvider(name = "testBinLocuses")
- public Object[][] createDataForBinLocuses() {
+ private Object[][] createDataForBinLocuses() {
return new Object[][] {
{DEFAULT_ACCESSION, 0, 0, 1, SRAIndex.SRA_BIN_SIZE},
{DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 2}
@@ -81,35 +75,31 @@ public class SRAIndexTest {
@Test(dataProvider = "testBinLocuses")
public void testBinLocuses(SRAAccession acc, int reference, int binIndex, int firstLocus, int lastLocus) {
- if (!SRAAccession.isSupported()) return;
-
- SRAIndex index = getIndex(acc);
- Bin bin = new Bin(reference, SRA_BIN_OFFSET + binIndex);
+ final SRAIndex index = getIndex(acc);
+ final Bin bin = new Bin(reference, SRA_BIN_OFFSET + binIndex);
Assert.assertEquals(index.getFirstLocusInBin(bin), firstLocus);
Assert.assertEquals(index.getLastLocusInBin(bin), lastLocus);
}
@DataProvider(name = "testBinOverlappings")
- public Object[][] createDataForBinOverlappings() {
+ private Object[][] createDataForBinOverlappings() {
return new Object[][] {
- {DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE, new HashSet<Integer>(Arrays.asList(0))},
- {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 2, new HashSet<Integer>(Arrays.asList(1))},
- {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 3, new HashSet<Integer>(Arrays.asList(1, 2))},
- {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE * 2, SRAIndex.SRA_BIN_SIZE * 2 + 1, new HashSet<Integer>(Arrays.asList(1, 2))}
+ {DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE, new HashSet<>(Arrays.asList(0))},
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 2, new HashSet<>(Arrays.asList(1))},
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE + 1, SRAIndex.SRA_BIN_SIZE * 3, new HashSet<>(Arrays.asList(1, 2))},
+ {DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE * 2, SRAIndex.SRA_BIN_SIZE * 2 + 1, new HashSet<>(Arrays.asList(1, 2))}
};
}
@Test(dataProvider = "testBinOverlappings")
public void testBinOverlappings(SRAAccession acc, int reference, int firstLocus, int lastLocus, Set<Integer> binNumbers) {
- if (!SRAAccession.isSupported()) return;
-
- SRAIndex index = getIndex(acc);
- Iterator<Bin> binIterator = index.getBinsOverlapping(reference, firstLocus, lastLocus).iterator();
- Set<Integer> binNumbersFromIndex = new HashSet<Integer>();
+ final SRAIndex index = getIndex(acc);
+ final Iterator<Bin> binIterator = index.getBinsOverlapping(reference, firstLocus, lastLocus).iterator();
+ final Set<Integer> binNumbersFromIndex = new HashSet<>();
while (binIterator.hasNext()) {
- Bin bin = binIterator.next();
+ final Bin bin = binIterator.next();
binNumbersFromIndex.add(bin.getBinNumber() - SRA_BIN_OFFSET);
}
@@ -117,7 +107,7 @@ public class SRAIndexTest {
}
@DataProvider(name = "testSpanOverlappings")
- public Object[][] createDataForSpanOverlappings() {
+ private Object[][] createDataForSpanOverlappings() {
return new Object[][] {
{DEFAULT_ACCESSION, 0, 1, SRAIndex.SRA_BIN_SIZE, new long[] {0, SRAIndex.SRA_CHUNK_SIZE} },
{DEFAULT_ACCESSION, 0, SRAIndex.SRA_BIN_SIZE * 2, SRAIndex.SRA_BIN_SIZE * 2 + 1, new long[]{0, SRAIndex.SRA_CHUNK_SIZE} },
@@ -127,16 +117,10 @@ public class SRAIndexTest {
@Test(dataProvider = "testSpanOverlappings")
public void testSpanOverlappings(SRAAccession acc, int reference, int firstLocus, int lastLocus, long[] spanCoordinates) {
- if (!SRAAccession.isSupported()) return;
-
- SRAIndex index = getIndex(acc);
- BAMFileSpan span = index.getSpanOverlapping(reference, firstLocus, lastLocus);
+ final SRAIndex index = getIndex(acc);
+ final BAMFileSpan span = index.getSpanOverlapping(reference, firstLocus, lastLocus);
long[] coordinatesFromIndex = span.toCoordinateArray();
- List<Long> coordinatesListFromIndex = new ArrayList<Long>();
- for (long coordinate : coordinatesFromIndex) {
- coordinatesListFromIndex.add(coordinate);
- }
Assert.assertTrue(Arrays.equals(coordinatesFromIndex, spanCoordinates),
"Coordinates mismatch. Expected: " + Arrays.toString(spanCoordinates) +
@@ -144,7 +128,7 @@ public class SRAIndexTest {
}
private SRAIndex getIndex(SRAAccession acc) {
- SRAFileReader reader = new SRAFileReader(acc);
+ final SRAFileReader reader = new SRAFileReader(acc);
return (SRAIndex) reader.getIndex();
}
}
diff --git a/src/tests/java/htsjdk/samtools/sra/SRALazyRecordTest.java b/src/tests/java/htsjdk/samtools/sra/SRALazyRecordTest.java
index 9b6dccb..97a1ad8 100644
--- a/src/tests/java/htsjdk/samtools/sra/SRALazyRecordTest.java
+++ b/src/tests/java/htsjdk/samtools/sra/SRALazyRecordTest.java
@@ -11,19 +11,19 @@ import org.testng.annotations.Test;
/**
* Tests for SRA extension of SAMRecord objects which load fields on demand
*/
-public class SRALazyRecordTest {
+public class SRALazyRecordTest extends AbstractSRATest {
private static final SRAAccession DEFAULT_ACCESSION = new SRAAccession("SRR1298981");
@DataProvider(name = "serializationTestData")
- public Object[][] getSerializationTestData() {
+ private Object[][] getSerializationTestData() {
return new Object[][] {
{ DEFAULT_ACCESSION }
};
}
@Test(dataProvider = "serializationTestData")
- public void testSerialization(SRAAccession accession) throws Exception {
- SRAFileReader reader = new SRAFileReader(accession);
+ public void testSerialization(final SRAAccession accession) throws Exception {
+ final SRAFileReader reader = new SRAFileReader(accession);
final SAMRecord initialSAMRecord = reader.getIterator().next();
reader.close();
@@ -34,11 +34,11 @@ public class SRALazyRecordTest {
@Test
public void testCloneAndEquals() throws Exception {
- SRAFileReader reader = new SRAFileReader(DEFAULT_ACCESSION);
+ final SRAFileReader reader = new SRAFileReader(DEFAULT_ACCESSION);
final SAMRecord record = reader.getIterator().next();
reader.close();
- SAMRecord newRecord = (SAMRecord)record.clone();
+ final SAMRecord newRecord = (SAMRecord)record.clone();
Assert.assertFalse(record == newRecord);
Assert.assertNotSame(record, newRecord);
Assert.assertEquals(record, newRecord);
diff --git a/src/tests/java/htsjdk/samtools/sra/SRAQueryTest.java b/src/tests/java/htsjdk/samtools/sra/SRAQueryTest.java
index b37c37a..575b3dd 100644
--- a/src/tests/java/htsjdk/samtools/sra/SRAQueryTest.java
+++ b/src/tests/java/htsjdk/samtools/sra/SRAQueryTest.java
@@ -1,41 +1,35 @@
package htsjdk.samtools.sra;
-import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SamInputResource;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
-import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
-import java.util.NoSuchElementException;
-
-public class SRAQueryTest {
+public class SRAQueryTest extends AbstractSRATest {
@DataProvider(name = "testUnmappedCounts")
- public Object[][] createDataForUnmappedCounts() {
+ private Object[][] createDataForUnmappedCounts() {
return new Object[][] {
{"SRR2096940", 498}
};
}
@Test(dataProvider = "testUnmappedCounts")
- public void testUnmappedCounts(String acc, int numberUnalignments) {
- if (!SRAAccession.isSupported()) return;
-
+ public void testUnmappedCounts(String acc, int expectedNumUnmapped) {
SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
SamInputResource.of(new SRAAccession(acc))
);
final SAMRecordIterator samRecordIterator = reader.queryUnmapped();
- checkAlignedUnalignedCountsByIterator(samRecordIterator, 0, numberUnalignments);
+ assertCorrectCountsOfMappedAndUnmappedRecords(samRecordIterator, 0, expectedNumUnmapped);
}
@DataProvider(name = "testReferenceAlignedCounts")
- public Object[][] createDataForReferenceAlignedCounts() {
+ private Object[][] createDataForReferenceAlignedCounts() {
return new Object[][] {
{"SRR2096940", "CM000681.1", 0, 10591},
{"SRR2096940", "CM000681.1", 55627015, 10591},
@@ -44,20 +38,18 @@ public class SRAQueryTest {
}
@Test(dataProvider = "testReferenceAlignedCounts")
- public void testReferenceAlignedCounts(String acc, String reference, int refernceStart, int numberAlignments) {
- if (!SRAAccession.isSupported()) return;
-
+ public void testReferenceAlignedCounts(String acc, String reference, int referenceStart, int expectedNumMapped) {
SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
SamInputResource.of(new SRAAccession(acc))
);
- final SAMRecordIterator samRecordIterator = reader.queryAlignmentStart(reference, refernceStart);
+ final SAMRecordIterator samRecordIterator = reader.queryAlignmentStart(reference, referenceStart);
- checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, 0);
+ assertCorrectCountsOfMappedAndUnmappedRecords(samRecordIterator, expectedNumMapped, 0);
}
@DataProvider(name = "testQueryCounts")
- public Object[][] createDataForQueryCounts() {
+ private Object[][] createDataForQueryCounts() {
return new Object[][] {
{"SRR2096940", "CM000681.1", 0, 59128983, true, 10591, 0},
{"SRR2096940", "CM000681.1", 55627015, 59128983, true, 10591, 0},
@@ -67,50 +59,15 @@ public class SRAQueryTest {
}
@Test(dataProvider = "testQueryCounts")
- public void testQueryCounts(String acc, String reference, int refernceStart, int referenceEnd, boolean contained, int numberAlignments, int numberUnalignment) {
- if (!SRAAccession.isSupported()) return;
-
+ public void testQueryCounts(String acc, String reference, int referenceStart, int referenceEnd, boolean contained, int expectedNumMapped, int expectedNumUnmapped) {
SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
SamInputResource.of(new SRAAccession(acc))
);
- final SAMRecordIterator samRecordIterator = reader.query(reference, refernceStart, referenceEnd, contained);
+ final SAMRecordIterator samRecordIterator = reader.query(reference, referenceStart, referenceEnd, contained);
- checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, numberUnalignment);
+ assertCorrectCountsOfMappedAndUnmappedRecords(samRecordIterator, expectedNumMapped, expectedNumUnmapped);
}
- private void checkAlignedUnalignedCountsByIterator(SAMRecordIterator samRecordIterator,
- int numberAlignments, int numberUnalignments) {
- int countAlignments = 0, countUnalignments = 0;
- while (true) {
- boolean hasRecord = samRecordIterator.hasNext();
- SAMRecord record = null;
- try {
- record = samRecordIterator.next();
- Assert.assertTrue(hasRecord); // exception is not thrown if we came to this point
- } catch (NoSuchElementException e) {
- Assert.assertFalse(hasRecord);
- }
-
- Assert.assertEquals(hasRecord, record != null);
-
- if (record == null) {
- break;
- }
-
- if (record.getReadUnmappedFlag()) {
- countUnalignments++;
- } else {
- countAlignments++;
- }
- }
-
- if (numberAlignments != -1) {
- Assert.assertEquals(numberAlignments, countAlignments);
- }
- if (numberUnalignments != -1) {
- Assert.assertEquals(numberUnalignments, countUnalignments);
- }
- }
}
diff --git a/src/tests/java/htsjdk/samtools/sra/SRAReferenceTest.java b/src/tests/java/htsjdk/samtools/sra/SRAReferenceTest.java
index 1313b4d..ec1fa91 100644
--- a/src/tests/java/htsjdk/samtools/sra/SRAReferenceTest.java
+++ b/src/tests/java/htsjdk/samtools/sra/SRAReferenceTest.java
@@ -6,9 +6,9 @@ import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
-public class SRAReferenceTest {
+public class SRAReferenceTest extends AbstractSRATest {
@DataProvider(name = "testReference")
- public Object[][] createDataForReference() {
+ private Object[][] createDataForReference() {
return new Object[][] {
{"SRR2096940", "CM000681.1", 95001, 95050, "AGATGATTCAGTCTCACCAAGAACACTGAAAGTCACATGGCTACCAGCAT"},
};
@@ -16,10 +16,8 @@ public class SRAReferenceTest {
@Test(dataProvider = "testReference")
public void testReference(String acc, String refContig, int refStart, int refStop, String refBases) {
- if (!SRAAccession.isSupported()) return;
-
- ReferenceSequenceFile refSeqFile = new SRAIndexedSequenceFile(new SRAAccession(acc));
- ReferenceSequence refSeq = refSeqFile.getSubsequenceAt(refContig, refStart, refStop);
+ final ReferenceSequenceFile refSeqFile = new SRAIndexedSequenceFile(new SRAAccession(acc));
+ final ReferenceSequence refSeq = refSeqFile.getSubsequenceAt(refContig, refStart, refStop);
Assert.assertEquals(new String(refSeq.getBases()), refBases);
}
}
diff --git a/src/tests/java/htsjdk/samtools/sra/SRATest.java b/src/tests/java/htsjdk/samtools/sra/SRATest.java
index 86a5218..420a889 100644
--- a/src/tests/java/htsjdk/samtools/sra/SRATest.java
+++ b/src/tests/java/htsjdk/samtools/sra/SRATest.java
@@ -26,45 +26,57 @@
package htsjdk.samtools.sra;
-import htsjdk.samtools.*;
-
-import htsjdk.samtools.reference.ReferenceSequence;
-import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.samtools.BAMFileSpan;
+import htsjdk.samtools.BrowseableBAMIndex;
+import htsjdk.samtools.Chunk;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMReadGroupRecord;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.SAMUtils;
+import htsjdk.samtools.SAMValidationError;
+import htsjdk.samtools.SamInputResource;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
-import java.util.*;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
/**
* Integration tests for SRA functionality
*
* Created by andrii.nikitiuk on 8/24/15.
*/
-public class SRATest {
+public class SRATest extends AbstractSRATest {
@DataProvider(name = "testCounts")
- public Object[][] createDataForCounts() {
+ private Object[][] createDataForCounts() {
return new Object[][] {
{"SRR2096940", 10591, 498}
};
}
@Test(dataProvider = "testCounts")
- public void testCounts(String acc, int numberAlignments, int numberUnalignments) {
- if (!SRAAccession.isSupported()) return;
-
+ public void testCounts(String acc, int expectedNumMapped, int expectedNumUnmapped) {
SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
SamInputResource.of(new SRAAccession(acc))
);
final SAMRecordIterator samRecordIterator = reader.iterator();
- checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, numberUnalignments);
+ assertCorrectCountsOfMappedAndUnmappedRecords(samRecordIterator, expectedNumMapped, expectedNumUnmapped);
}
@DataProvider(name = "testCountsBySpan")
- public Object[][] createDataForCountsBySpan() {
+ private Object[][] createDataForCountsBySpan() {
return new Object[][] {
{"SRR2096940", Arrays.asList(new Chunk(0, 59128983), new Chunk(59128983, 59141089)), 10591, 498},
{"SRR2096940", Arrays.asList(new Chunk(0, 29128983), new Chunk(29128983, 59141089)), 10591, 498},
@@ -75,35 +87,31 @@ public class SRATest {
}
@Test(dataProvider = "testCountsBySpan")
- public void testCountsBySpan(String acc, List<Chunk> chunks, int numberAlignments, int numberUnalignments) {
- if (!SRAAccession.isSupported()) return;
-
+ public void testCountsBySpan(String acc, List<Chunk> chunks, int expectedNumMapped, int expectedNumUnmapped) {
SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
SamInputResource.of(new SRAAccession(acc))
);
final SAMRecordIterator samRecordIterator = ((SamReader.Indexing) reader).iterator(new BAMFileSpan(chunks));
- checkAlignedUnalignedCountsByIterator(samRecordIterator, numberAlignments, numberUnalignments);
+ assertCorrectCountsOfMappedAndUnmappedRecords(samRecordIterator, expectedNumMapped, expectedNumUnmapped);
}
@DataProvider(name = "testGroups")
- public Object[][] createDataForGroups() {
+ private Object[][] createDataForGroups() {
return new Object[][] {
- {"SRR822962", new TreeSet<String>(Arrays.asList(
- "GS54389-FS3-L08", "GS57511-FS3-L08", "GS54387-FS3-L02", "GS54387-FS3-L01",
- "GS57510-FS3-L01", "GS57510-FS3-L03", "GS54389-FS3-L07", "GS54389-FS3-L05",
- "GS54389-FS3-L06", "GS57510-FS3-L02", "GS57510-FS3-L04", "GS54387-FS3-L03",
- "GS46253-FS3-L03"))
+ {"SRR822962", new TreeSet<>(Arrays.asList(
+ "GS54389-FS3-L08", "GS57511-FS3-L08", "GS54387-FS3-L02", "GS54387-FS3-L01",
+ "GS57510-FS3-L01", "GS57510-FS3-L03", "GS54389-FS3-L07", "GS54389-FS3-L05",
+ "GS54389-FS3-L06", "GS57510-FS3-L02", "GS57510-FS3-L04", "GS54387-FS3-L03",
+ "GS46253-FS3-L03"))
},
- {"SRR2096940", new HashSet<String>(Arrays.asList("SRR2096940"))}
+ {"SRR2096940", new HashSet<>(Arrays.asList("SRR2096940"))}
};
}
@Test(dataProvider = "testGroups")
public void testGroups(String acc, Set<String> groups) {
- if (!SRAAccession.isSupported()) return;
-
SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
SamInputResource.of(new SRAAccession(acc))
);
@@ -111,7 +119,7 @@ public class SRATest {
final SAMRecordIterator samRecordIterator = reader.iterator();
SAMFileHeader header = reader.getFileHeader();
- Set<String> headerGroups = new TreeSet<String>();
+ Set<String> headerGroups = new TreeSet<>();
for (SAMReadGroupRecord group : header.getReadGroups()) {
Assert.assertEquals(group.getReadGroupId(), group.getId());
headerGroups.add(group.getReadGroupId());
@@ -119,7 +127,7 @@ public class SRATest {
Assert.assertEquals(groups, headerGroups);
- Set<String> foundGroups = new TreeSet<String>();
+ Set<String> foundGroups = new TreeSet<>();
for (int i = 0; i < 10000; i++) {
if (!samRecordIterator.hasNext()) {
@@ -136,7 +144,7 @@ public class SRATest {
}
@DataProvider(name = "testReferences")
- public Object[][] createDataForReferences() {
+ private Object[][] createDataForReferences() {
return new Object[][] {
// primary alignment only
{"SRR1063272", 1,
@@ -153,8 +161,6 @@ public class SRATest {
@Test(dataProvider = "testReferences")
public void testReferences(String acc, int numberFirstReferenceFound, List<String> references, List<Integer> refLengths) {
- if (!SRAAccession.isSupported()) return;
-
SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
SamInputResource.of(new SRAAccession(acc))
);
@@ -162,7 +168,7 @@ public class SRATest {
final SAMRecordIterator samRecordIterator = reader.iterator();
SAMFileHeader header = reader.getFileHeader();
- Set<String> headerRefNames = new TreeSet<String>();
+ Set<String> headerRefNames = new TreeSet<>();
for (SAMSequenceRecord ref : header.getSequenceDictionary().getSequences()) {
String refName = ref.getSequenceName();
@@ -175,9 +181,9 @@ public class SRATest {
headerRefNames.add(refName);
}
- Assert.assertEquals(new TreeSet<String>(references), headerRefNames);
+ Assert.assertEquals(new TreeSet<>(references), headerRefNames);
- Set<String> foundRefNames = new TreeSet<String>();
+ Set<String> foundRefNames = new TreeSet<>();
for (int i = 0; i < 10000; i++) {
if (!samRecordIterator.hasNext()) {
break;
@@ -194,11 +200,11 @@ public class SRATest {
foundRefNames.add(refName);
}
- Assert.assertEquals(new TreeSet<String>(references.subList(0, numberFirstReferenceFound)), foundRefNames);
+ Assert.assertEquals(new TreeSet<>(references.subList(0, numberFirstReferenceFound)), foundRefNames);
}
@DataProvider(name = "testRows")
- public Object[][] createDataForRowsTest() {
+ private Object[][] createDataForRowsTest() {
return new Object[][] {
// primary alignment only
{"SRR1063272", 0, 99, "SRR1063272.R.1",
@@ -234,8 +240,6 @@ public class SRATest {
@Test(dataProvider = "testRows")
public void testRows(String acc, int recordIndex, int flags, String readName, String bases, String quals, int refStart, String cigar,
String refName, int mapQ, boolean hasMate, boolean isSecondaryAlignment) {
- if (!SRAAccession.isSupported()) return;
-
SAMRecord record = getRecordByIndex(acc, recordIndex, false);
checkSAMRecord(record, flags, readName, bases, quals, refStart, cigar, refName, mapQ, hasMate, isSecondaryAlignment);
@@ -245,8 +249,6 @@ public class SRATest {
public void testRowsAfterIteratorDetach(String acc, int recordIndex, int flags, String readName, String bases, String quals,
int refStart, String cigar, String refName, int mapQ, boolean hasMate,
boolean isSecondaryAlignment) {
- if (!SRAAccession.isSupported()) return;
-
SAMRecord record = getRecordByIndex(acc, recordIndex, true);
checkSAMRecord(record, flags, readName, bases, quals, refStart, cigar, refName, mapQ, hasMate, isSecondaryAlignment);
@@ -256,8 +258,6 @@ public class SRATest {
public void testRowsOverrideValues(String acc, int recordIndex, int flags, String readName, String bases, String quals,
int refStart, String cigar, String refName, int mapQ, boolean hasMate,
boolean isSecondaryAlignment) {
- if (!SRAAccession.isSupported()) return;
-
SAMRecord record = getRecordByIndex(acc, recordIndex, true);
SAMFileHeader header = record.getHeader();
@@ -295,8 +295,6 @@ public class SRATest {
public void testRowsBySpan(String acc, int recordIndex, int flags, String readName, String bases, String quals,
int refStart, String cigar, String refName, int mapQ, boolean hasMate,
boolean isSecondaryAlignment) {
- if (!SRAAccession.isSupported()) return;
-
SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
SamInputResource.of(new SRAAccession(acc))
);
@@ -338,8 +336,6 @@ public class SRATest {
public void testRowsByIndex(String acc, int recordIndex, int flags, String readName, String bases, String quals,
int refStart, String cigar, String refName, int mapQ, boolean hasMate,
boolean isSecondaryAlignment) {
- if (!SRAAccession.isSupported()) return;
-
SamReader reader = SamReaderFactory.make().validationStringency(ValidationStringency.SILENT).open(
SamInputResource.of(new SRAAccession(acc))
);
@@ -432,33 +428,4 @@ public class SRATest {
}
}
- private void checkAlignedUnalignedCountsByIterator(SAMRecordIterator samRecordIterator,
- int numberAlignments, int numberUnalignments) {
- int countAlignments = 0, countUnalignments = 0;
- while (true) {
- boolean hasRecord = samRecordIterator.hasNext();
- SAMRecord record = null;
- try {
- record = samRecordIterator.next();
- Assert.assertTrue(hasRecord); // exception is not thrown if we came to this point
- } catch (NoSuchElementException e) {
- Assert.assertFalse(hasRecord);
- }
-
- Assert.assertEquals(hasRecord, record != null);
-
- if (record == null) {
- break;
- }
-
- if (record.getReadUnmappedFlag()) {
- countUnalignments++;
- } else {
- countAlignments++;
- }
- }
-
- Assert.assertEquals(numberAlignments, countAlignments);
- Assert.assertEquals(numberUnalignments, countUnalignments);
- }
}
diff --git a/src/tests/java/htsjdk/samtools/util/ComparableTupleTest.java b/src/tests/java/htsjdk/samtools/util/ComparableTupleTest.java
new file mode 100644
index 0000000..7e8b082
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/util/ComparableTupleTest.java
@@ -0,0 +1,61 @@
+package htsjdk.samtools.util;
+
+import htsjdk.variant.variantcontext.Allele;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+/**
+ * Created by farjoun on 1/28/16.
+ */
+public class ComparableTupleTest {
+
+ private enum Tenum {
+ Hi,
+ Bye,
+ Ciao
+ }
+
+ private Allele A = Allele.create("A", false);
+ private Allele Aref = Allele.create("A", true);
+ private Allele G = Allele.create("G", false);
+
+ @DataProvider(name = "testComparableTupleData")
+ public Object[][] testComparableTupleData() {
+ return new Object[][]{
+ new Object[]{new ComparableTuple<>(1, 2), new ComparableTuple<>(1, 1), 2 - 1},
+ new Object[]{new ComparableTuple<>(1, 2), new ComparableTuple<>(2, 2), 1 - 2},
+ new Object[]{new ComparableTuple<>(1, 2), new ComparableTuple<>(1, 2), 0},
+
+ new Object[]{new ComparableTuple<>(1, "hi"), new ComparableTuple<>(1, "bye"), "hi".compareTo("bye")},
+ new Object[]{new ComparableTuple<>(1, "hi"), new ComparableTuple<>(2, "bye"), 1 - 2},
+ new Object[]{new ComparableTuple<>(1, "hi"), new ComparableTuple<>(1, "hi"), 0},
+
+ new Object[]{new ComparableTuple<>(A, Tenum.Hi), new ComparableTuple<>(Aref, Tenum.Bye), A.compareTo(Aref)},
+ new Object[]{new ComparableTuple<>(Aref, Tenum.Hi), new ComparableTuple<>(Aref, Tenum.Bye), Tenum.Hi.compareTo(Tenum.Bye)},
+ new Object[]{new ComparableTuple<>(Aref, Tenum.Hi), new ComparableTuple<>(Aref, Tenum.Hi), 0},
+ new Object[]{new ComparableTuple<>(Aref, Tenum.Hi), new ComparableTuple<>(G, Tenum.Ciao), Aref.compareTo(G)},
+ new Object[]{new ComparableTuple<>(A, Tenum.Ciao), new ComparableTuple<>(G, Tenum.Hi), A.compareTo(G)}
+ };
+ }
+
+ @Test(dataProvider = "testComparableTupleData")
+ public <T extends Comparable<T>, R extends Comparable<R>> void testComparableTuple(final ComparableTuple<T,R> lhs, final ComparableTuple<T,R> rhs, final int result) {
+ Assert.assertEquals(lhs.compareTo(rhs), result);
+ }
+
+
+ @DataProvider(name = "testComparableTupleNullData")
+ public Object[][] testComparableTupleNullData() {
+ return new Object[][]{
+ new Object[]{null, 2},
+ new Object[]{null, null},
+ new Object[]{"string", null}
+ };
+ }
+
+ @Test(dataProvider = "testComparableTupleNullData", expectedExceptions = IllegalArgumentException.class)
+ public void testComparableTupleNullData(String left, Integer right) {
+ new ComparableTuple<>(left, right);
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/util/IupacTest.java b/src/tests/java/htsjdk/samtools/util/IupacTest.java
index 5a9f5f5..64b78c0 100644
--- a/src/tests/java/htsjdk/samtools/util/IupacTest.java
+++ b/src/tests/java/htsjdk/samtools/util/IupacTest.java
@@ -67,7 +67,7 @@ public class IupacTest {
reader.close();
}
- @DataProvider(name = "basidDataProvider")
+ @DataProvider(name = "basicDataProvider")
public Object[][] basicDataProvider() {
return new Object[][]{
{BamFileIoUtils.BAM_FILE_EXTENSION},
diff --git a/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java b/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
index 2ce0c79..a5157cf 100644
--- a/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
+++ b/src/tests/java/htsjdk/samtools/util/SequenceUtilTest.java
@@ -26,7 +26,6 @@ package htsjdk.samtools.util;
import htsjdk.samtools.Cigar;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceDictionary;
-import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.TextCigarCodec;
@@ -36,7 +35,6 @@ import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.HashSet;
-import java.util.List;
import java.util.Set;
/**
@@ -141,6 +139,43 @@ public class SequenceUtilTest {
};
}
+ @Test(dataProvider = "mismatchCountsDataProvider")
+ public void testCountMismatches(final String readString, final String cigar, final String reference,
+ final int expectedMismatchesExact, final int expectedMismatchesAmbiguous) {
+ final SAMRecord rec = new SAMRecord(null);
+ rec.setReadName("test");
+ rec.setReadString(readString);
+ rec.setCigarString(cigar);
+
+ final byte[] refBases = StringUtil.stringToBytes(reference);
+
+ final int nExact = SequenceUtil.countMismatches(rec, refBases, -1, false, false);
+ Assert.assertEquals(nExact, expectedMismatchesExact);
+
+ final int nAmbiguous = SequenceUtil.countMismatches(rec, refBases, -1, false, true);
+ Assert.assertEquals(nAmbiguous, expectedMismatchesAmbiguous);
+ }
+
+ @DataProvider(name="mismatchCountsDataProvider")
+ public Object[][] testMakeMismatchCountsDataProvider() {
+ // note: R=A|G
+ return new Object[][] {
+ {"A", "1M", "A", 0, 0},
+ {"A", "1M", "R", 1, 0},
+ {"G", "1M", "R", 1, 0},
+ {"C", "1M", "R", 1, 1},
+ {"T", "1M", "R", 1, 1},
+ {"N", "1M", "R", 1, 1},
+ {"R", "1M", "A", 1, 1},
+ {"R", "1M", "C", 1, 1},
+ {"R", "1M", "G", 1, 1},
+ {"R", "1M", "T", 1, 1},
+ {"R", "1M", "N", 1, 0},
+ {"R", "1M", "R", 0, 0},
+ {"N", "1M", "N", 0, 0}
+ };
+ }
+
@Test(dataProvider = "countInsertedAndDeletedBasesTestCases")
public void testCountInsertedAndDeletedBases(final String cigarString, final int insertedBases, final int deletedBases) {
final Cigar cigar = TextCigarCodec.decode(cigarString);
@@ -177,4 +212,201 @@ public class SequenceUtilTest {
final Set<String> expectedSet = new HashSet<String>(Arrays.asList(expectedKmers));
Assert.assertTrue(actualSet.equals(expectedSet));
}
+
+ @DataProvider(name = "testBisulfiteConversionDataProvider")
+ public Object[][] testBisulfiteConversionDataProvider() {
+ // C ref -> T read on the positive strand, and G ref -> A read on the negative strand
+ return new Object[][] {
+ {'C', 'T', false, false},
+ {'C', 'A', false, false},
+ {'C', 'C', false, false},
+ {'T', 'C', true, false},
+ {'G', 'T', false, false},
+ {'G', 'A', false, false},
+ {'G', 'G', false, false},
+ {'A', 'G', false, true}
+ };
+ }
+
+ @Test(dataProvider = "testBisulfiteConversionDataProvider")
+ public void testBisulfiteConversion(final char readBase, final char refBase, final boolean posStrandExpected, final boolean negStrandExpected) {
+ final boolean posStrand = SequenceUtil.isBisulfiteConverted((byte) readBase, (byte) refBase, false);
+ Assert.assertEquals(posStrand, posStrandExpected);
+ final boolean negStrand = SequenceUtil.isBisulfiteConverted((byte) readBase, (byte) refBase, true);
+ Assert.assertEquals(negStrand, negStrandExpected);
+ }
+
+ @Test(dataProvider = "basesEqualDataProvider")
+ public void testBasesEqual(final char base1, final char base2,
+ final boolean expectedB1EqualsB2,
+ final boolean expectedB1ReadMatchesB2Ref,
+ final boolean expectedB2ReadMatchesB1Ref) {
+
+ final char[] base1UcLc = new char[] { toUpperCase(base1), toLowerCase(base1) };
+ final char[] base2UcLc = new char[] { toUpperCase(base2), toLowerCase(base2) };
+ // Test over all permutations - uc vs uc, uc vs lc, lc vs uc, lc vs lc
+ for (char theBase1 : base1UcLc) {
+ for (char theBase2 : base2UcLc) {
+ // for equality, order should not matter
+ final boolean b1EqualsB2 = SequenceUtil.basesEqual((byte) theBase1, (byte) theBase2);
+ Assert.assertEquals(b1EqualsB2, expectedB1EqualsB2, "basesEqual test failed for '" + theBase1 + "' vs. '" + theBase2 + "'");
+ final boolean b2EqualsB1 = SequenceUtil.basesEqual((byte) theBase2, (byte) theBase1);
+ Assert.assertEquals(b2EqualsB1, expectedB1EqualsB2, "basesEqual test failed for '" + theBase1 + "' vs. '" + theBase2 + "'");
+
+ // for ambiguous read/ref matching, the order does matter
+ final boolean b1ReadMatchesB2Ref = SequenceUtil.readBaseMatchesRefBaseWithAmbiguity((byte) theBase1, (byte) theBase2);
+ Assert.assertEquals(b1ReadMatchesB2Ref, expectedB1ReadMatchesB2Ref, "readBaseMatchesRefBaseWithAmbiguity test failed for '" + theBase1 + "' vs. '" + theBase2 + "'");
+ final boolean b2ReadMatchesB1Ref = SequenceUtil.readBaseMatchesRefBaseWithAmbiguity((byte) theBase2, (byte) theBase1);
+ Assert.assertEquals(b2ReadMatchesB1Ref, expectedB2ReadMatchesB1Ref, "readBaseMatchesRefBaseWithAmbiguity test failed for '" + theBase1 + "' vs. '" + theBase2 + "'");
+ }
+ }
+ }
+
+ /*
+ * For reference:
+ * M = A|C
+ * R = A|G
+ * W = A|T
+ * S = C|G
+ * Y = C|T
+ * K = G|T
+ * V = A|C|G
+ * H = A|C|T
+ * D = A|G|T
+ * B = C|G|T
+ * N = A|C|G|T
+ */
+ @DataProvider(name="basesEqualDataProvider")
+ public Object[][] testBasesEqualDataProvider() {
+ return new Object[][] {
+ {'A', 'A', true, true, true},
+ {'A', 'C', false, false, false},
+ {'A', 'G', false, false, false},
+ {'A', 'T', false, false, false},
+ {'A', 'M', false, true, false},
+ {'A', 'R', false, true, false},
+ {'A', 'W', false, true, false},
+ {'A', 'S', false, false, false},
+ {'A', 'Y', false, false, false},
+ {'A', 'K', false, false, false},
+ {'A', 'V', false, true, false},
+ {'A', 'H', false, true, false},
+ {'A', 'D', false, true, false},
+ {'A', 'B', false, false, false},
+ {'A', 'N', false, true, false},
+ {'C', 'C', true, true, true},
+ {'C', 'G', false, false, false},
+ {'C', 'T', false, false, false},
+ {'C', 'M', false, true, false},
+ {'C', 'R', false, false, false},
+ {'C', 'W', false, false, false},
+ {'C', 'S', false, true, false},
+ {'C', 'Y', false, true, false},
+ {'C', 'K', false, false, false},
+ {'C', 'V', false, true, false},
+ {'C', 'H', false, true, false},
+ {'C', 'D', false, false, false},
+ {'C', 'N', false, true, false},
+ {'G', 'G', true, true, true},
+ {'G', 'T', false, false, false},
+ {'G', 'M', false, false, false},
+ {'G', 'R', false, true, false},
+ {'G', 'W', false, false, false},
+ {'G', 'S', false, true, false},
+ {'G', 'Y', false, false, false},
+ {'G', 'K', false, true, false},
+ {'G', 'V', false, true, false},
+ {'G', 'H', false, false, false},
+ {'G', 'N', false, true, false},
+ {'T', 'T', true, true, true},
+ {'T', 'W', false, true, false},
+ {'T', 'Y', false, true, false},
+ {'T', 'V', false, false, false},
+ {'M', 'T', false, false, false},
+ {'M', 'M', true, true, true},
+ {'M', 'R', false, false, false},
+ {'M', 'W', false, false, false},
+ {'M', 'S', false, false, false},
+ {'M', 'Y', false, false, false},
+ {'M', 'V', false, true, false},
+ {'M', 'N', false, true, false},
+ {'R', 'T', false, false, false},
+ {'R', 'R', true, true, true},
+ {'R', 'W', false, false, false},
+ {'R', 'S', false, false, false},
+ {'R', 'Y', false, false, false},
+ {'R', 'V', false, true, false},
+ {'W', 'W', true, true, true},
+ {'W', 'Y', false, false, false},
+ {'S', 'T', false, false, false},
+ {'S', 'W', false, false, false},
+ {'S', 'S', true, true, true},
+ {'S', 'Y', false, false, false},
+ {'S', 'V', false, true, false},
+ {'Y', 'Y', true, true, true},
+ {'K', 'T', false, false, true},
+ {'K', 'M', false, false, false},
+ {'K', 'R', false, false, false},
+ {'K', 'W', false, false, false},
+ {'K', 'S', false, false, false},
+ {'K', 'Y', false, false, false},
+ {'K', 'K', true, true, true},
+ {'K', 'V', false, false, false},
+ {'K', 'N', false, true, false},
+ {'V', 'W', false, false, false},
+ {'V', 'Y', false, false, false},
+ {'V', 'V', true, true, true},
+ {'H', 'T', false, false, true},
+ {'H', 'M', false, false, true},
+ {'H', 'R', false, false, false},
+ {'H', 'W', false, false, true},
+ {'H', 'S', false, false, false},
+ {'H', 'Y', false, false, true},
+ {'H', 'K', false, false, false},
+ {'H', 'V', false, false, false},
+ {'H', 'H', true, true, true},
+ {'H', 'N', false, true, false},
+ {'D', 'G', false, false, true},
+ {'D', 'T', false, false, true},
+ {'D', 'M', false, false, false},
+ {'D', 'R', false, false, true},
+ {'D', 'W', false, false, true},
+ {'D', 'S', false, false, false},
+ {'D', 'Y', false, false, false},
+ {'D', 'K', false, false, true},
+ {'D', 'V', false, false, false},
+ {'D', 'H', false, false, false},
+ {'D', 'D', true, true, true},
+ {'D', 'N', false, true, false},
+ {'B', 'C', false, false, true},
+ {'B', 'G', false, false, true},
+ {'B', 'T', false, false, true},
+ {'B', 'M', false, false, false},
+ {'B', 'R', false, false, false},
+ {'B', 'W', false, false, false},
+ {'B', 'S', false, false, true},
+ {'B', 'Y', false, false, true},
+ {'B', 'K', false, false, true},
+ {'B', 'V', false, false, false},
+ {'B', 'H', false, false, false},
+ {'B', 'D', false, false, false},
+ {'B', 'B', true, true, true},
+ {'B', 'N', false, true, false},
+ {'N', 'T', false, false, true},
+ {'N', 'R', false, false, true},
+ {'N', 'W', false, false, true},
+ {'N', 'S', false, false, true},
+ {'N', 'Y', false, false, true},
+ {'N', 'V', false, false, true},
+ {'N', 'N', true, true, true}
+ };
+ }
+
+ private char toUpperCase(final char base) {
+ return base > 90 ? (char) (base - 32) : base;
+ }
+
+ private char toLowerCase(final char base) {
+ return (char) (toUpperCase(base) + 32);
+ }
}
diff --git a/src/tests/java/htsjdk/samtools/util/TrimmingUtilTest.java b/src/tests/java/htsjdk/samtools/util/TrimmingUtilTest.java
new file mode 100644
index 0000000..12cffc6
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/util/TrimmingUtilTest.java
@@ -0,0 +1,70 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.samtools.util;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Tests for a simple phred-style quality trimming algorithm.
+ */
+public class TrimmingUtilTest {
+ @Test
+ public void testEasyCases() {
+ Assert.assertEquals(TrimmingUtil.findQualityTrimPoint(byteArray(30,30,30,30,30, 2, 2, 2, 2, 2), 15), 5);
+ Assert.assertEquals(TrimmingUtil.findQualityTrimPoint(byteArray(30,30,30,30,30,30,30,30,30,30), 15), 10);
+ Assert.assertEquals(TrimmingUtil.findQualityTrimPoint(byteArray(12,12,12,12,12,12,12,12,12,12), 15), 0);
+ }
+
+ @Test
+ public void testBoundaryCasesForTrimQual() {
+ Assert.assertEquals(TrimmingUtil.findQualityTrimPoint(byteArray(12,12,12,12,12,12,12,12,12,12), 11), 10);
+ Assert.assertEquals(TrimmingUtil.findQualityTrimPoint(byteArray(12,12,12,12,12,12,12,12,12,12), 12), 10);
+ Assert.assertEquals(TrimmingUtil.findQualityTrimPoint(byteArray(12,12,12,12,12,12,12,12,12,12), 13), 0);
+ }
+
+ @Test
+ public void testLowQualityWithOccasionalHighQuality() {
+ Assert.assertEquals(TrimmingUtil.findQualityTrimPoint(byteArray(30,30,30, 2, 5, 2, 3,20, 2, 6), 15), 3);
+ }
+
+ @Test
+ public void testAlternatingHighAndLowQuality() {
+ Assert.assertEquals(TrimmingUtil.findQualityTrimPoint(byteArray(30, 2,30, 2,30, 2,30, 2,30, 2), 15), 9);
+ }
+ @Test
+ public void testEmptyQuals() {
+ Assert.assertEquals(TrimmingUtil.findQualityTrimPoint(byteArray(), 15), 0);
+ }
+
+ /** Makes a byte[] from a variable length argument list of ints. */
+ byte[] byteArray(final int... ints) {
+ final byte[] bytes = new byte[ints.length];
+ for (int i=0; i<bytes.length; ++i) {
+ bytes[i] = (byte) ints[i];
+ }
+
+ return bytes;
+ }
+}
diff --git a/src/tests/java/htsjdk/samtools/util/TupleTest.java b/src/tests/java/htsjdk/samtools/util/TupleTest.java
new file mode 100644
index 0000000..bed4550
--- /dev/null
+++ b/src/tests/java/htsjdk/samtools/util/TupleTest.java
@@ -0,0 +1,62 @@
+package htsjdk.samtools.util;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Created by farjoun on 1/29/16.
+ */
+public class TupleTest {
+
+ @Test
+ public void testEquals() throws Exception {
+ Assert.assertEquals(new Tuple<>(1, "hi"), new Tuple<>(1, "hi"));
+
+ Assert.assertEquals(new Tuple<>(1, null), new Tuple<>(1, null));
+ Assert.assertEquals(new Tuple<>(null, "hi"), new Tuple<>(null, "hi"));
+ Assert.assertEquals(new Tuple<>(null, null), new Tuple<>(null, null));
+
+
+ Assert.assertNotSame(new Tuple<Integer, Integer>(1, null), new Tuple<Integer, String>(1, null));
+ Assert.assertNotSame(new Tuple<Integer, String>(null, "hi"), new Tuple<String, String>(null, "hi"));
+ Assert.assertNotSame(new Tuple<Integer, Integer>(null, null), new Tuple<Integer, String>(null, null));
+
+
+ Assert.assertNotSame(new Tuple<>(1, "hi"), new Tuple<>(1, "bye"));
+ Assert.assertNotSame(new Tuple<>(2, "hi"), new Tuple<>(1, "hi"));
+ Assert.assertNotSame(new Tuple<>(2, "hi"), new Tuple<>(1, null));
+ Assert.assertNotSame(new Tuple<>(2, "hi"), new Tuple<>(null, "hi"));
+
+ }
+
+ @Test
+ public void testHashCode() throws Exception {
+ Assert.assertEquals(new Tuple<>(1, "hi").hashCode(), new Tuple<>(1, "hi").hashCode());
+
+ Assert.assertEquals(new Tuple<>(1, null).hashCode(), new Tuple<>(1, null).hashCode());
+ Assert.assertEquals(new Tuple<>(null, "hi").hashCode(), new Tuple<>(null, "hi").hashCode());
+ Assert.assertEquals(new Tuple<>(null, null).hashCode(), new Tuple<>(null, null).hashCode());
+
+ //even though these are of different types, the value is null and so I have to make these equal...
+ Assert.assertEquals(new Tuple<Integer, Integer>(1, null).hashCode(), new Tuple<Integer, String>(1, null).hashCode());
+ Assert.assertEquals(new Tuple<Integer, String>(null, "hi").hashCode(), new Tuple<String, String>(null, "hi").hashCode());
+ Assert.assertEquals(new Tuple<Integer, Integer>(null, null).hashCode(), new Tuple<Integer, String>(null, null).hashCode());
+
+ Assert.assertNotSame(new Tuple<>(1, "hi").hashCode(), new Tuple<>(1, "bye").hashCode());
+ Assert.assertNotSame(new Tuple<>(2, "hi").hashCode(), new Tuple<>(1, "hi").hashCode());
+ Assert.assertNotSame(new Tuple<>(2, "hi").hashCode(), new Tuple<>(1, null).hashCode());
+ Assert.assertNotSame(new Tuple<>(2, "hi").hashCode(), new Tuple<>(null, "hi").hashCode());
+
+ }
+
+ @Test
+ public void testToString() throws Exception {
+ Assert.assertEquals(new Tuple<>(1, 2).toString(), "[1, 2]");
+ Assert.assertEquals(new Tuple<>(1, "hi!").toString(), "[1, hi!]");
+ Assert.assertEquals(new Tuple<>(1, new Tuple<>(2, 3)).toString(), "[1, [2, 3]]");
+
+ Assert.assertEquals(new Tuple<>(1, null).toString(), "[1, null]");
+ Assert.assertEquals(new Tuple<>(null, null).toString(), "[null, null]");
+
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/tribble/util/ftp/FTPClientTest.java b/src/tests/java/htsjdk/tribble/util/ftp/FTPClientTest.java
index 4855e7e..3979b08 100644
--- a/src/tests/java/htsjdk/tribble/util/ftp/FTPClientTest.java
+++ b/src/tests/java/htsjdk/tribble/util/ftp/FTPClientTest.java
@@ -11,9 +11,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.net.UnknownHostException;
-
-
-
/**
* @author Jim Robinson
* @since 10/3/11
@@ -30,7 +27,7 @@ public class FTPClientTest {
public void setUp() throws IOException {
client = new FTPClient();
FTPReply reply = client.connect(host);
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "connect");
}
@AfterMethod
@@ -48,10 +45,10 @@ public class FTPClientTest {
public void testPasv() throws Exception {
try {
FTPReply reply = client.login("anonymous", "igv at broadinstitute.org");
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "login");
reply = client.pasv();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "pasv");
} finally {
client.closeDataStream();
}
@@ -64,35 +61,34 @@ public class FTPClientTest {
Assert.assertTrue(reply.isSuccess());
reply = client.binary();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "binary");
reply = client.size(file);
String val = reply.getReplyString();
int size = Integer.parseInt(val);
- Assert.assertEquals(fileSize, size);
+ Assert.assertEquals(fileSize, size, "size");
}
-
@Test
public void testDownload() throws Exception {
try {
FTPReply reply = client.login("anonymous", "igv at broadinstitute.org");
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "login");
reply = client.binary();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "binary");
reply = client.pasv();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "pasv");
reply = client.retr(file);
- Assert.assertTrue(reply.getCode() == 150);
+ Assert.assertEquals(reply.getCode(), 150, "retr");
InputStream is = client.getDataStream();
int idx = 0;
int b;
while ((b = is.read()) >= 0) {
- Assert.assertEquals(expectedBytes[idx], (byte) b);
+ Assert.assertEquals(expectedBytes[idx], (byte) b,"reading from stream");
idx++;
}
@@ -100,35 +96,33 @@ public class FTPClientTest {
client.closeDataStream();
FTPReply reply = client.retr(file);
System.out.println(reply.getCode());
- Assert.assertTrue(reply.isSuccess());
-
+ Assert.assertTrue(reply.isSuccess(), "close");
}
}
-
@Test
public void testRest() throws Exception {
try {
FTPReply reply = client.login("anonymous", "igv at broadinstitute.org");
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "login");
reply = client.binary();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "binary");
reply = client.pasv();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "pasv");
final int restPosition = 5;
client.setRestPosition(restPosition);
reply = client.retr(file);
- Assert.assertTrue(reply.getCode() == 150);
+ Assert.assertEquals(reply.getCode(), 150, "retr");
InputStream is = client.getDataStream();
int idx = restPosition;
int b;
while ((b = is.read()) >= 0) {
- Assert.assertEquals(expectedBytes[idx], (byte) b);
+ Assert.assertEquals(expectedBytes[idx], (byte) b, "reading from stream");
idx++;
}
@@ -136,8 +130,7 @@ public class FTPClientTest {
client.closeDataStream();
FTPReply reply = client.retr(file);
System.out.println(reply.getCode());
- Assert.assertTrue(reply.isSuccess());
-
+ Assert.assertTrue(reply.isSuccess(), "close");
}
}
@@ -152,22 +145,21 @@ public class FTPClientTest {
FTPClient client = new FTPClient();
FTPReply reply = client.connect(host);
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "connect");
reply = client.login("anonymous", "igv at broadinstitute.org");
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "login");
reply = client.binary();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "binary");
reply = client.executeCommand("size " + file);
- Assert.assertEquals(550, reply.getCode());
+ Assert.assertEquals(550, reply.getCode(), "size");
client.disconnect();
-
}
- /**
+ /**
* Test accessing a non-existent server
*/
@Test
@@ -184,48 +176,40 @@ public class FTPClientTest {
// This is expected
}
-
client.disconnect();
-
}
-
@Test
public void testMultiplePasv() throws Exception {
try {
FTPReply reply = client.login("anonymous", "igv at broadinstitute.org");
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "login");
reply = client.pasv();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "pasv 1");
client.closeDataStream();
reply = client.pasv();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "pasv 2");
client.closeDataStream();
-
-
}
-
finally {
}
}
-
@Test
public void testMultipleRest() throws Exception {
FTPReply reply = client.login("anonymous", "igv at broadinstitute.org");
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "login");
reply = client.binary();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "binary");
restRetr(5, 10);
restRetr(2, 10);
restRetr(15, 10);
-
}
private void restRetr(int restPosition, int length) throws IOException {
@@ -234,7 +218,7 @@ public class FTPClientTest {
if (client.getDataStream() == null) {
FTPReply reply = client.pasv();
- Assert.assertTrue(reply.isSuccess());
+ Assert.assertTrue(reply.isSuccess(), "pasv");
}
client.setRestPosition(restPosition);
@@ -247,10 +231,9 @@ public class FTPClientTest {
byte[] buffer = new byte[length];
is.read(buffer);
-
for (int i = 0; i < length; i++) {
System.out.print((char) buffer[i]);
- Assert.assertEquals(expectedBytes[i + restPosition], buffer[i]);
+ Assert.assertEquals(expectedBytes[i + restPosition], buffer[i], "reading from stream");
}
System.out.println();
}
@@ -259,8 +242,6 @@ public class FTPClientTest {
client.closeDataStream();
FTPReply reply = client.getReply(); // <== MUST READ THE REPLY
System.out.println(reply.getReplyString());
-
-
}
}
}
diff --git a/src/tests/java/htsjdk/variant/variantcontext/GenotypeLikelihoodsUnitTest.java b/src/tests/java/htsjdk/variant/variantcontext/GenotypeLikelihoodsUnitTest.java
index de16f4c..b513396 100644
--- a/src/tests/java/htsjdk/variant/variantcontext/GenotypeLikelihoodsUnitTest.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/GenotypeLikelihoodsUnitTest.java
@@ -33,11 +33,15 @@ import htsjdk.tribble.TribbleException;
import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.utils.GeneralUtils;
import org.testng.Assert;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.util.Arrays;
import java.util.EnumMap;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
/**
@@ -49,6 +53,13 @@ public class GenotypeLikelihoodsUnitTest extends VariantBaseTest {
final static String vPLString = "93,0,39";
double[] triAllelic = new double[]{-4.2,-2.0,-3.0,-1.6,0.0,-4.0}; //AA,AB,AC,BB,BC,CC
+ @BeforeTest
+ public void initializeAnyploidPLIndexToAlleleIndices() {
+ GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(1, 1);
+ GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(2, 2);
+ GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(2, 3);
+ }
+
@Test
public void testFromVector2() {
GenotypeLikelihoods gl = GenotypeLikelihoods.fromLog10Likelihoods(v);
@@ -182,22 +193,144 @@ public class GenotypeLikelihoodsUnitTest extends VariantBaseTest {
}
}
- @Test
- public void testGetAllelePair(){
- allelePairTest(0, 0, 0);
- allelePairTest(1, 0, 1);
- allelePairTest(2, 1, 1);
- allelePairTest(3, 0, 2);
- allelePairTest(4, 1, 2);
- allelePairTest(5, 2, 2);
- allelePairTest(6, 0, 3);
- allelePairTest(7, 1, 3);
- allelePairTest(8, 2, 3);
- allelePairTest(9, 3, 3);
+ @DataProvider
+ public Object[][] testGetAllelePairData() {
+ return new Object[][]{
+ {0, 0, 0},
+ {1, 0, 1},
+ {2, 1, 1},
+ {3, 0, 2},
+ {4, 1, 2},
+ {5, 2, 2},
+ {6, 0, 3},
+ {7, 1, 3},
+ {8, 2, 3},
+ {9, 3, 3}
+ };
}
-
- private void allelePairTest(int PLindex, int allele1, int allele2) {
+
+ @Test(dataProvider = "testGetAllelePairData")
+ public void testGetAllelePair(final int PLindex, final int allele1, final int allele2) {
Assert.assertEquals(GenotypeLikelihoods.getAllelePair(PLindex).alleleIndex1, allele1, "allele index " + allele1 + " from PL index " + PLindex + " was not calculated correctly");
Assert.assertEquals(GenotypeLikelihoods.getAllelePair(PLindex).alleleIndex2, allele2, "allele index " + allele2 + " from PL index " + PLindex + " was not calculated correctly");
}
+
+ @DataProvider
+ public Object[][] testCalculateAnyploidPLcacheData() {
+ return new Object[][]{
+ {
+ 1, 1,
+ Arrays.asList(Arrays.asList(0),
+ Arrays.asList(1)
+ )
+ },
+ {
+ 2, 2,
+ Arrays.asList(Arrays.asList(0, 0),
+ Arrays.asList(0, 1),
+ Arrays.asList(1, 1),
+ Arrays.asList(0, 2),
+ Arrays.asList(1, 2),
+ Arrays.asList(2, 2)
+ )
+ },
+ {
+ 2, 3,
+ Arrays.asList(Arrays.asList(0, 0, 0),
+ Arrays.asList(0, 0, 1),
+ Arrays.asList(0, 1, 1),
+ Arrays.asList(1, 1, 1),
+ Arrays.asList(0, 0, 2),
+ Arrays.asList(0, 1, 2),
+ Arrays.asList(1, 1, 2),
+ Arrays.asList(0, 2, 2),
+ Arrays.asList(1, 2, 2),
+ Arrays.asList(2, 2, 2)
+ )
+ },
+ {
+ 2, -1,
+ Arrays.asList(Arrays.asList())
+ },
+ {
+ -1, 2,
+ Arrays.asList(Arrays.asList())
+ }
+ };
+ }
+
+ @Test(dataProvider = "testCalculateAnyploidPLcacheData")
+ public void testCalculateAnyploidPLcache(final int altAlleles, final int ploidy, final List<List<Integer>> expected) {
+ List<List<Integer>> anyploidPLIndexToAlleleIndices = GenotypeLikelihoods.calculateAnyploidPLcache(altAlleles, ploidy);
+ for ( int i=0; i < anyploidPLIndexToAlleleIndices.size(); i++ )
+ Assert.assertEquals(anyploidPLIndexToAlleleIndices.get(i), expected.get(i));
+ }
+
+ @Test(dataProvider = "testCalculateAnyploidPLcacheData")
+ public void testInitializeAnyploidPLIndexToAlleleIndices(final int altAlleles, final int ploidy, final List<List<Integer>> expected) {
+ if ( altAlleles >= 1 && ploidy >= 1 ) { // Bypass test with bad data
+ Map<Integer, List<List<Integer>>> expectedMap = new HashMap<Integer, List<List<Integer>>>();
+ expectedMap.put(ploidy, expected);
+ for (Map.Entry<Integer, List<List<Integer>>> entry : GenotypeLikelihoods.anyploidPloidyToPLIndexToAlleleIndices.entrySet()) {
+ if (expectedMap.containsKey(entry.getKey()))
+ Assert.assertEquals(entry.getValue(), expectedMap.get(entry.getKey()));
+ }
+ }
+ }
+
+ @DataProvider
+ public Object[][] testInitializeAnyploidPLIndexToAlleleIndiceseBadData() {
+ return new Object[][]{
+ { 2, -1 },
+ { -1, 2 }
+ };
+ }
+
+ @Test(dataProvider = "testInitializeAnyploidPLIndexToAlleleIndiceseBadData", expectedExceptions = IllegalArgumentException.class)
+ public void testInitializeAnyploidPLIndexToAlleleIndicesBadData(final int altAlleles, final int ploidy) {
+ GenotypeLikelihoods.initializeAnyploidPLIndexToAlleleIndices(altAlleles, ploidy);
+ }
+
+ @DataProvider
+ public Object[][] testGetAllelesData() {
+ return new Object[][]{
+ {0, 2, 3, Arrays.asList(0,0,0)},
+ {1, 2, 3, Arrays.asList(0,0,1)},
+ {2, 2, 3, Arrays.asList(0,1,1)},
+ {3, 2, 3, Arrays.asList(1,1,1)},
+ {4, 2, 3, Arrays.asList(0,0,2)},
+ {5, 2, 3, Arrays.asList(0,1,2)},
+ {6, 2, 3, Arrays.asList(1,1,2)},
+ {7, 2, 3, Arrays.asList(0,2,2)},
+ {8, 2, 3, Arrays.asList(1,2,2)},
+ {9, 2, 3, Arrays.asList(2,2,2)},
+ {1, 2, 1, Arrays.asList(1)}
+ };
+ }
+
+ @Test(dataProvider = "testGetAllelesData")
+ public void testGetAlleles(final int PLindex, final int altAlleles, final int ploidy, final List<Integer> expected ) {
+ Assert.assertEquals(GenotypeLikelihoods.getAlleles(PLindex, ploidy), expected);
+ }
+
+ @DataProvider
+ public Object[][] testGetAllelesIndexOutOfBoundsData() {
+ return new Object[][]{
+ {-1, 3}, // PL index too small, non-diploid
+ {10, 3}, // PL index too large, non-diploid
+ {-1, 2}, // PL index too small, diploid
+ {GenotypeLikelihoods.numLikelihoods(GenotypeLikelihoods.MAX_DIPLOID_ALT_ALLELES_THAT_CAN_BE_GENOTYPED+1,2), 2} // PL index too large, diploid
+ };
+ }
+
+ @Test(dataProvider = "testGetAllelesIndexOutOfBoundsData", expectedExceptions = IllegalStateException.class)
+ public void testGetAllelesOutOfBounds(final int PLindex, final int ploidy) {
+ final List<Integer> alleles = GenotypeLikelihoods.getAlleles(PLindex, ploidy);
+ }
+
+ @Test(expectedExceptions = IllegalStateException.class)
+ public void testGetAllelesUnitialized() {
+ GenotypeLikelihoods.anyploidPloidyToPLIndexToAlleleIndices.clear();
+ final List<Integer> alleles = GenotypeLikelihoods.getAlleles(0, 3);
+ }
}
\ No newline at end of file
diff --git a/src/tests/java/htsjdk/variant/variantcontext/GenotypeUnitTest.java b/src/tests/java/htsjdk/variant/variantcontext/GenotypeUnitTest.java
index 3012ed5..a698407 100644
--- a/src/tests/java/htsjdk/variant/variantcontext/GenotypeUnitTest.java
+++ b/src/tests/java/htsjdk/variant/variantcontext/GenotypeUnitTest.java
@@ -63,6 +63,8 @@ public class GenotypeUnitTest extends VariantBaseTest {
Assert.assertEquals(makeGB().filters("x", "y", "z").make().getFilters(), "x;y;z", "Multiple filter field values should be joined with ;");
Assert.assertTrue(makeGB().filters("x", "y", "z").make().isFiltered(), "Multiple filter values should be filtered");
Assert.assertEquals(makeGB().filter("x;y;z").make().getFilters(), "x;y;z", "Multiple filter field values should be joined with ;");
+ Assert.assertFalse(makeGB().filter("").make().isFiltered(), "empty filters should count as unfiltered");
+ Assert.assertEquals(makeGB().filter("").make().getFilters(), null, "empty filter string should result in null filters");
}
// public Genotype(String sampleName, List<Allele> alleles, double negLog10PError, Set<String> filters, Map<String, ?> attributes, boolean isPhased) {
diff --git a/src/tests/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java b/src/tests/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java
new file mode 100644
index 0000000..52a5889
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/variantcontext/filter/JavascriptVariantFilterTest.java
@@ -0,0 +1,72 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Pierre Lindenbaum @yokofakun Institut du Thorax - Nantes - France
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package htsjdk.variant.variantcontext.filter;
+
+import htsjdk.variant.vcf.VCFFileReader;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * @author Pierre Lindenbaum PhD Institut du Thorax - INSERM - Nantes - France
+ */
+
+public class JavascriptVariantFilterTest {
+ final File testDir = new File("testdata/htsjdk/variant");
+
+ @DataProvider
+ public Object[][] jsData() {
+ return new Object[][] {
+ { "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf", "variantFilter01.js",61 },
+ { "ILLUMINA.wex.broad_phase2_baseline.20111114.both.exome.genotypes.1000.vcf", "variantFilter02.js",38 }, };
+ }
+
+ @Test(dataProvider = "jsData")
+ public void testJavascriptFilters(final String vcfFile, final String javascriptFile, final int expectCount) {
+ final File vcfInput = new File(testDir, vcfFile);
+ final File jsInput = new File(testDir, javascriptFile);
+ final VCFFileReader vcfReader = new VCFFileReader(vcfInput, false);
+ final JavascriptVariantFilter filter;
+ try {
+ filter = new JavascriptVariantFilter(jsInput, vcfReader.getFileHeader());
+ } catch (IOException err) {
+ Assert.fail("cannot read script "+jsInput, err);
+ vcfReader.close();
+ return;
+ }
+ final FilteringIterator iter = new FilteringIterator(vcfReader.iterator(), filter);
+ int count = 0;
+ while (iter.hasNext()) {
+ iter.next();
+ ++count;
+ }
+ iter.close();
+ vcfReader.close();
+ Assert.assertEquals(count, expectCount, "Expected number of variants " + expectCount + " but got " + count);
+ }
+}
diff --git a/src/tests/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java b/src/tests/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
new file mode 100644
index 0000000..02cde53
--- /dev/null
+++ b/src/tests/java/htsjdk/variant/vcf/VCFHeaderLineUnitTest.java
@@ -0,0 +1,43 @@
+package htsjdk.variant.vcf;
+
+import htsjdk.variant.VariantBaseTest;
+import org.testng.annotations.Test;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotNull;
+
+public class VCFHeaderLineUnitTest extends VariantBaseTest {
+
+ @Test
+ public void testEncodeVCFHeaderLineWithUnescapedQuotes() {
+
+ final Map<String, String> attributes = new LinkedHashMap<>();
+ attributes.put("ID", "VariantFiltration");
+ attributes.put("CommandLineOptions", "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \"NA\" || ANNOTATION <= 2.0]");
+
+ final String encodedAttributes = VCFHeaderLine.toStringEncoding(attributes);
+ assertNotNull(encodedAttributes);
+
+ final String expectedEncoding = "<ID=VariantFiltration,CommandLineOptions=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">";
+ assertEquals(encodedAttributes, expectedEncoding);
+ }
+
+
+ @Test
+ public void testEncodeVCFHeaderLineWithEscapedQuotes() {
+
+ final Map<String, String> attributes = new LinkedHashMap<>();
+ attributes.put("ID", "VariantFiltration");
+ attributes.put("CommandLineOptions", "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]");
+
+ final String encodedAttributes = VCFHeaderLine.toStringEncoding(attributes);
+ assertNotNull(encodedAttributes);
+
+ final String expectedEncoding = "<ID=VariantFiltration,CommandLineOptions=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">";
+ assertEquals(encodedAttributes, expectedEncoding);
+ }
+
+}
diff --git a/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java b/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
index 1a53cd6..7f2437e 100644
--- a/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
+++ b/src/tests/java/htsjdk/variant/vcf/VCFHeaderUnitTest.java
@@ -25,7 +25,10 @@
package htsjdk.variant.vcf;
+import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.TestUtil;
+import htsjdk.tribble.AbstractFeatureReader;
+import htsjdk.tribble.FeatureReader;
import htsjdk.tribble.TribbleException;
import htsjdk.tribble.readers.AsciiLineReader;
import htsjdk.tribble.readers.AsciiLineReaderIterator;
@@ -33,16 +36,19 @@ import htsjdk.tribble.readers.LineIteratorImpl;
import htsjdk.tribble.readers.LineReaderUtil;
import htsjdk.variant.VariantBaseTest;
import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.writer.Options;
+import htsjdk.variant.variantcontext.writer.VariantContextWriter;
+import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import org.testng.Assert;
+import org.testng.annotations.AfterClass;
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.*;
import java.math.BigInteger;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
+import java.util.*;
/**
* Created by IntelliJ IDEA.
@@ -53,6 +59,8 @@ import java.util.List;
*/
public class VCFHeaderUnitTest extends VariantBaseTest {
+ private File tempDir;
+
private VCFHeader createHeader(String headerStr) {
VCFCodec codec = new VCFCodec();
VCFHeader header = (VCFHeader) codec.readActualHeader(new LineIteratorImpl(LineReaderUtil.fromStringReader(
@@ -61,6 +69,19 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
return header;
}
+ @BeforeClass
+ private void createTemporaryDirectory() {
+ tempDir = TestUtil.getTempDirectory("VCFHeader", "VCFHeaderTest");
+ }
+
+ @AfterClass
+ private void deleteTemporaryDirectory() {
+ for (File f : tempDir.listFiles()) {
+ f.delete();
+ }
+ tempDir.delete();
+ }
+
@Test
public void testVCF4ToVCF4() {
VCFHeader header = createHeader(VCF4headerStrings);
@@ -293,6 +314,80 @@ public class VCFHeaderUnitTest extends VariantBaseTest {
Assert.assertEquals(deserializedHeader.toString(), originalHeader.toString(), "String representation of header not the same before/after serialization");
}
+ @Test
+ public void testVCFHeaderQuoteEscaping() throws Exception {
+ // this test ensures that the end-to-end process of quote escaping is stable when headers are
+ // read and re-written; ie that quotes that are already escaped won't be re-escaped. It does
+ // this by reading a test file, adding a header line with an unescaped quote, writing out a copy
+ // of the file, reading it back in and writing a second copy, and finally reading back the second
+ // copy and comparing it to the first.
+
+ // read an existing VCF
+ final VCFFileReader originalFileReader = new VCFFileReader(new File("testdata/htsjdk/variant/HiSeq.10000.vcf"), false);
+ final VCFHeader originalHeader = originalFileReader.getFileHeader();
+
+ // add a header line with quotes to the header
+ final Map<String, String> attributes = new LinkedHashMap<>();
+ attributes.put("ID", "VariantFiltration");
+ attributes.put("CommandLineOptions", "filterName=[ANNOTATION] filterExpression=[ANNOTATION == \"NA\" || ANNOTATION <= 2.0]");
+ final VCFSimpleHeaderLine addedHeaderLine = new VCFSimpleHeaderLine("GATKCommandLine.Test", attributes);
+ originalHeader.addMetaDataLine(addedHeaderLine);
+
+ // write the file out into a new copy
+ final File firstCopyVCFFile = File.createTempFile("testEscapeHeaderQuotes1.", ".vcf");
+ firstCopyVCFFile.deleteOnExit();
+
+ final VariantContextWriter firstCopyWriter = new VariantContextWriterBuilder()
+ .setOutputFile(firstCopyVCFFile)
+ .setReferenceDictionary(createArtificialSequenceDictionary())
+ .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY))
+ .build();
+ firstCopyWriter.writeHeader(originalHeader);
+ final CloseableIterator<VariantContext> firstCopyVariantIterator = originalFileReader.iterator();
+ while (firstCopyVariantIterator.hasNext()) {
+ VariantContext variantContext = firstCopyVariantIterator.next();
+ firstCopyWriter.add(variantContext);
+ }
+ originalFileReader.close();
+ firstCopyWriter.close();
+
+ // read the copied file back in
+ final VCFFileReader firstCopyReader = new VCFFileReader(firstCopyVCFFile, false);
+ final VCFHeader firstCopyHeader = firstCopyReader.getFileHeader();
+ final VCFHeaderLine firstCopyNewHeaderLine = firstCopyHeader.getOtherHeaderLine("GATKCommandLine.Test");
+ Assert.assertNotNull(firstCopyNewHeaderLine);
+
+ // write one more copy to make sure things don't get double escaped
+ final File secondCopyVCFFile = File.createTempFile("testEscapeHeaderQuotes2.", ".vcf");
+ secondCopyVCFFile.deleteOnExit();
+ final VariantContextWriter secondCopyWriter = new VariantContextWriterBuilder()
+ .setOutputFile(secondCopyVCFFile)
+ .setReferenceDictionary(createArtificialSequenceDictionary())
+ .setOptions(EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY))
+ .build();
+ secondCopyWriter.writeHeader(firstCopyHeader);
+ final CloseableIterator<VariantContext> secondCopyVariantIterator = firstCopyReader.iterator();
+ while (secondCopyVariantIterator.hasNext()) {
+ VariantContext variantContext = secondCopyVariantIterator.next();
+ secondCopyWriter.add(variantContext);
+ }
+ secondCopyWriter.close();
+
+ // read the second copy back in and verify that the two files have the same header line
+ final VCFFileReader secondCopyReader = new VCFFileReader(secondCopyVCFFile, false);
+ final VCFHeader secondCopyHeader = secondCopyReader.getFileHeader();
+
+ final VCFHeaderLine secondCopyNewHeaderLine = secondCopyHeader.getOtherHeaderLine("GATKCommandLine.Test");
+ Assert.assertNotNull(secondCopyNewHeaderLine);
+ Assert.assertEquals(firstCopyNewHeaderLine, secondCopyNewHeaderLine);
+ Assert.assertEquals(firstCopyNewHeaderLine.toStringEncoding(), "GATKCommandLine.Test=<ID=VariantFiltration,CommandLineOptions=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">");
+ Assert.assertEquals(secondCopyNewHeaderLine.toStringEncoding(), "GATKCommandLine.Test=<ID=VariantFiltration,CommandLineOptions=\"filterName=[ANNOTATION] filterExpression=[ANNOTATION == \\\"NA\\\" || ANNOTATION <= 2.0]\">");
+
+ firstCopyReader.close();
+ secondCopyReader.close();
+
+ }
+
/**
* a little utility function for all tests to md5sum a file
* Shameless taken from:
diff --git a/testdata/htsjdk/samtools/BAMFileIndexTest/symlink_with_index.bam b/testdata/htsjdk/samtools/BAMFileIndexTest/symlink_with_index.bam
new file mode 120000
index 0000000..53313ae
--- /dev/null
+++ b/testdata/htsjdk/samtools/BAMFileIndexTest/symlink_with_index.bam
@@ -0,0 +1 @@
+index_test.bam
\ No newline at end of file
diff --git a/testdata/htsjdk/samtools/BAMFileIndexTest/symlink_with_index.bam.bai b/testdata/htsjdk/samtools/BAMFileIndexTest/symlink_with_index.bam.bai
new file mode 120000
index 0000000..0f95610
--- /dev/null
+++ b/testdata/htsjdk/samtools/BAMFileIndexTest/symlink_with_index.bam.bai
@@ -0,0 +1 @@
+index_test.bam.bai
\ No newline at end of file
diff --git a/testdata/htsjdk/samtools/BAMFileIndexTest/symlink_without_linked_index.bam b/testdata/htsjdk/samtools/BAMFileIndexTest/symlink_without_linked_index.bam
new file mode 120000
index 0000000..53313ae
--- /dev/null
+++ b/testdata/htsjdk/samtools/BAMFileIndexTest/symlink_without_linked_index.bam
@@ -0,0 +1 @@
+index_test.bam
\ No newline at end of file
diff --git a/testdata/htsjdk/samtools/ValidateSamFileTest/duplicated_reads.sam b/testdata/htsjdk/samtools/ValidateSamFileTest/duplicated_reads.sam
new file mode 100644
index 0000000..e9e003c
--- /dev/null
+++ b/testdata/htsjdk/samtools/ValidateSamFileTest/duplicated_reads.sam
@@ -0,0 +1,17 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:101
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:0 SM:Hi,Mom! LB:my-library PL:ILLUMINA
+ at RG ID:1 SM:Hi,Mom! LB:my-library PL:ILLUMINA
+ at RG ID:2 SM:Hi,Mom! LB:my-library PL:Illumina
+ at PG ID:1 PN:Hey! VN:2.0
+duplicate_read 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 PG:Z:1 NM:i:0 MQ:i:255 XT:Z:foo OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
+duplicate_read 141 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:1 PG:Z:1 NM:i:3 MQ:i:255 XT:Z:foo OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
+duplicate_read 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 PG:Z:1 NM:i:0 MQ:i:255 XT:Z:foo OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
+duplicate_read 141 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:1 PG:Z:1 NM:i:3 MQ:i:255 XT:Z:foo OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
diff --git a/testdata/htsjdk/samtools/ValidateSamFileTest/duplicated_reads_out_of_order.sam b/testdata/htsjdk/samtools/ValidateSamFileTest/duplicated_reads_out_of_order.sam
new file mode 100644
index 0000000..23094dc
--- /dev/null
+++ b/testdata/htsjdk/samtools/ValidateSamFileTest/duplicated_reads_out_of_order.sam
@@ -0,0 +1,17 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:101
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:0 SM:Hi,Mom! LB:my-library PL:ILLUMINA
+ at RG ID:1 SM:Hi,Mom! LB:my-library PL:ILLUMINA
+ at RG ID:2 SM:Hi,Mom! LB:my-library PL:Illumina
+ at PG ID:1 PN:Hey! VN:2.0
+duplicate_read 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 PG:Z:1 NM:i:0 MQ:i:255 XT:Z:foo OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
+duplicate_read 77 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:1 PG:Z:1 NM:i:3 MQ:i:255 XT:Z:foo OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
+duplicate_read 141 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:0 PG:Z:1 NM:i:0 MQ:i:255 XT:Z:foo OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
+duplicate_read 141 * 0 0 * * 0 0 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& RG:Z:1 PG:Z:1 NM:i:3 MQ:i:255 XT:Z:foo OQ:Z:11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111
diff --git a/testdata/htsjdk/samtools/cram_query_sorted.cram b/testdata/htsjdk/samtools/cram_query_sorted.cram
new file mode 100644
index 0000000..2397cd6
Binary files /dev/null and b/testdata/htsjdk/samtools/cram_query_sorted.cram differ
diff --git a/testdata/htsjdk/samtools/cram_query_sorted.fasta b/testdata/htsjdk/samtools/cram_query_sorted.fasta
new file mode 100644
index 0000000..9561151
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram_query_sorted.fasta
@@ -0,0 +1,40 @@
+>chr1
+TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC
+TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA
+A
+>chr2
+CATCTCTACAAGCGCGTCCTACCAGACGCGCTTCCGATCTGAGAGCATAC
+TTTTCATTGGATTCCAGCACAACTCCATTTTTGATCCACTTGACACCTTT
+T
+>chr3
+CGTATGCGCTTTTTATGTCGCCCACAGTGCCTAGTATAGCCCCTGCTAAT
+AAAAAGAGATGAATACGTTTACTTAAAAAACTGAAACTAGGAATGTGCAA
+A
+>chr4
+CGTGATACCAACTCATGTTCACAGCCAAAGCCTGAAGCTGTCTATTATAT
+TTCTCAACCATAAACTTTTGCCTCAGGCATCCGCAGAATGGTTTGCAGCC
+C
+>chr5
+NTCTCATTTAAAAATGGTTATAAAAACATTTATGCTGAAAAGGTGAAGTT
+CATTAATGAACAGGCTGACTGTCTCACTATCGCGTTCGCAAGACGTTATC
+T
+>chr6
+NAATTGTTCTTAGTTTCTCGGTTTATGTGCTCTTCCAGGTGGGTAACACA
+ATAATGGCCTTCCAGATCGTAAGAGCGACGTGTGTTGCACCAGTGTCGAT
+C
+>chr7
+CAACAGAAGGGGGGATCTGTGTTTGTGTTTCGGATTTCCTGCTGAAAAGG
+TTTTCGGGTCCCCCCCCCATCCCGATTTCCTTCCGCAGCTTACCTCCCGA
+AACGCGGCATCCCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCA
+GCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCCAGAGCATA
+CACAACAGAAGGGGGGATCTGTGTTTGTGTTTCGGATTTCCTGCTGAAAA
+GGTTTTCGGGTCCCCCCCCCATCCCGATTTCCTTCCGCAGCTTACCTCCC
+GAAACGCGGCATCCCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGG
+CAGCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCCAGAGCA
+TACA
+>chr8
+CACATCGTGAATCTTACAATCTGCGGTTTCAGATGTGGAGCGATGTGTGA
+GAGATTGAGCAACTGATCTGAAAAGCAGACACAGCTATTCCTAAGATGAC
+CCCAGGTTCAAATGTGCAGCCCCTTTTGAGAGATTTTTTTTTTGGGCTGG
+AAAAAAGACACAGCTATTCCTAAGATGACAAGATCAGAAAAAAAGTCAAG
+CA
diff --git a/testdata/htsjdk/samtools/cram_query_sorted.fasta.fai b/testdata/htsjdk/samtools/cram_query_sorted.fasta.fai
new file mode 100644
index 0000000..d5e1a06
--- /dev/null
+++ b/testdata/htsjdk/samtools/cram_query_sorted.fasta.fai
@@ -0,0 +1,8 @@
+chr1 101 6 50 51
+chr2 101 116 50 51
+chr3 101 226 50 51
+chr4 101 336 50 51
+chr5 101 446 50 51
+chr6 101 556 50 51
+chr7 404 666 50 51
+chr8 202 1085 50 51
diff --git a/testdata/htsjdk/samtools/samFilter01.js b/testdata/htsjdk/samtools/samFilter01.js
new file mode 100644
index 0000000..3fe7e00
--- /dev/null
+++ b/testdata/htsjdk/samtools/samFilter01.js
@@ -0,0 +1,2 @@
+/** answer to https://www.biostars.org/p/77802/#77966 */
+(record.referenceIndex==record.mateReferenceIndex && record.referenceIndex>=0 && record.readNegativeStrandFlag!=record.mateNegativeStrandFlag && ((record.mateNegativeStrandFlag && record.alignmentStart < record.mateAlignmentStart ) || (record.readNegativeStrandFlag && record.mateAlignmentStart < record.alignmentStart ) ))
diff --git a/testdata/htsjdk/samtools/samFilter02.js b/testdata/htsjdk/samtools/samFilter02.js
new file mode 100644
index 0000000..046e7ec
--- /dev/null
+++ b/testdata/htsjdk/samtools/samFilter02.js
@@ -0,0 +1,9 @@
+/** accept record if second base of DNA is a A */
+function accept(r)
+ {
+ /* using substring instead of charAt because http://developer.actuate.com/community/forum/index.php?/topic/25434-javascript-stringcharati-wont-return-a-character/ */
+ return r.getReadString().length()>2 &&
+ r.getReadString().substring(1,2)=="A";
+ }
+
+accept(record);
diff --git a/testdata/htsjdk/variant/variantFilter01.js b/testdata/htsjdk/variant/variantFilter01.js
new file mode 100644
index 0000000..0036477
--- /dev/null
+++ b/testdata/htsjdk/variant/variantFilter01.js
@@ -0,0 +1,2 @@
+/** get variant having position%2==0 */
+variant.getStart()%2 == 0;
diff --git a/testdata/htsjdk/variant/variantFilter02.js b/testdata/htsjdk/variant/variantFilter02.js
new file mode 100644
index 0000000..c102d25
--- /dev/null
+++ b/testdata/htsjdk/variant/variantFilter02.js
@@ -0,0 +1,20 @@
+/** prints a VARIATION if two samples at least have a DP>100 */
+function myfilterFunction(thevariant)
+ {
+ var samples=header.genotypeSamples;
+ var countOkDp=0;
+
+
+ for(var i=0; i< samples.size();++i)
+ {
+ var sampleName=samples.get(i);
+ if(! variant.hasGenotype(sampleName)) continue;
+ var genotype = thevariant.genotypes.get(sampleName);
+ if( ! genotype.hasDP()) continue;
+ var dp= genotype.getDP();
+ if(dp > 100 ) countOkDp++;
+ }
+ return (countOkDp>2)
+ }
+
+myfilterFunction(variant)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/htsjdk.git
More information about the debian-med-commit
mailing list