[med-svn] [picard-tools] 02/06: Imported Upstream version 2.1.0+dfsg
Andreas Tille
tille at debian.org
Sun Feb 21 18:51:50 UTC 2016
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository picard-tools.
commit c9f5b97355476b469ff44a562c769e95a5200fda
Author: Andreas Tille <tille at debian.org>
Date: Sun Feb 21 19:01:59 2016 +0100
Imported Upstream version 2.1.0+dfsg
---
.gitignore | 5 +-
.travis.yml | 9 +-
README.md | 9 +-
build.sbt | 8 +-
build.xml | 38 +-
src/java/picard/analysis/AdapterUtility.java | 117 +++
.../analysis/AlignmentSummaryMetricsCollector.java | 132 ++-
.../analysis/BaseDistributionByCycleMetrics.java | 24 +
src/java/picard/analysis/ChimeraUtil.java | 79 ++
.../analysis/CollectAlignmentSummaryMetrics.java | 55 +-
.../analysis/CollectBaseDistributionByCycle.java | 62 +-
src/java/picard/analysis/CollectGcBiasMetrics.java | 61 +-
.../picard/analysis/CollectInsertSizeMetrics.java | 43 +-
.../analysis/CollectJumpingLibraryMetrics.java | 28 +-
.../picard/analysis/CollectMultipleMetrics.java | 88 +-
src/java/picard/analysis/CollectOxoGMetrics.java | 49 +-
.../analysis/CollectQualityYieldMetrics.java | 141 +--
src/java/picard/analysis/CollectRawWgsMetrics.java | 67 +-
src/java/picard/analysis/CollectRrbsMetrics.java | 49 +-
src/java/picard/analysis/CollectWgsMetrics.java | 203 ++--
.../analysis/CollectWgsMetricsFromQuerySorted.java | 331 +++++--
.../CollectWgsMetricsFromSampledSites.java | 25 +
src/java/picard/analysis/CompareMetrics.java | 41 +-
.../analysis/FingerprintingDetailMetrics.java | 70 ++
.../analysis/FingerprintingSummaryMetrics.java | 79 ++
.../picard/analysis/GcBiasMetricsCollector.java | 27 +
src/java/picard/analysis/GcBiasSummaryMetrics.java | 9 +
src/java/picard/analysis/MeanQualityByCycle.java | 23 +-
.../picard/analysis/QualityScoreDistribution.java | 25 +-
src/java/picard/analysis/SinglePassSamProgram.java | 24 +
.../picard/analysis/TheoreticalSensitivity.java | 188 ++++
.../picard/analysis/artifacts/ArtifactCounter.java | 75 +-
.../CollectSequencingArtifactMetrics.java | 89 +-
.../analysis/artifacts/ContextAccumulator.java | 70 +-
.../artifacts/ConvertSequencingArtifactToOxoG.java | 34 +-
.../artifacts/SequencingArtifactMetrics.java | 33 +-
src/java/picard/analysis/artifacts/Transition.java | 42 +-
.../analysis/directed/CalculateHsMetrics.java | 63 +-
...lculateHsMetrics.java => CollectHsMetrics.java} | 27 +-
.../analysis/directed/CollectTargetedMetrics.java | 34 +-
.../directed/CollectTargetedPcrMetrics.java | 38 +-
.../analysis/directed/HsMetricCollector.java | 35 +-
src/java/picard/analysis/directed/HsMetrics.java | 66 +-
.../directed/InsertSizeMetricsCollector.java | 118 ++-
.../analysis/directed/RnaSeqMetricsCollector.java | 16 +-
.../analysis/directed/TargetMetricsCollector.java | 609 ++++++++----
.../analysis/directed/TargetedPcrMetrics.java | 41 +-
.../directed/TargetedPcrMetricsCollector.java | 36 +-
src/java/picard/cmdline/CommandLineParser.java | 57 +-
src/java/picard/cmdline/Option.java | 14 +
src/java/picard/cmdline/programgroups/Alpha.java | 13 +
src/java/picard/fastq/BamToBfq.java | 19 +-
.../CountingDuplicateFilter.java} | 20 +-
src/java/picard/filter/CountingFilter.java | 67 ++
.../CountingMapQFilter.java} | 24 +-
.../CountingPairedFilter.java} | 20 +-
src/java/picard/fingerprint/CheckFingerprint.java | 228 +++++
.../CrosscheckReadGroupFingerprints.java | 312 ++++++
src/java/picard/fingerprint/DiploidGenotype.java | 109 +++
.../DiploidHaplotype.java} | 22 +-
src/java/picard/fingerprint/Fingerprint.java | 100 ++
.../picard/fingerprint/FingerprintChecker.java | 635 ++++++++++++
.../FingerprintResults.java} | 40 +-
src/java/picard/fingerprint/GenotypeReader.java | 167 ++++
src/java/picard/fingerprint/HaplotypeBlock.java | 169 ++++
src/java/picard/fingerprint/HaplotypeMap.java | 388 ++++++++
.../picard/fingerprint/HaplotypeProbabilities.java | 241 +++++
...otypeProbabilitiesFromContaminatorSequence.java | 129 +++
.../HaplotypeProbabilitiesFromGenotype.java | 72 ++
...lotypeProbabilitiesFromGenotypeLikelihoods.java | 90 ++
.../HaplotypeProbabilitiesFromSequence.java | 126 +++
.../HaplotypeProbabilitiesUsingLogLikelihoods.java | 145 +++
.../HaplotypeProbabilityOfNormalGivenTumor.java | 101 ++
src/java/picard/fingerprint/LocusResult.java | 77 ++
src/java/picard/fingerprint/MatchResults.java | 96 ++
src/java/picard/fingerprint/Snp.java | 120 +++
.../picard/illumina/ClusterDataToSamConverter.java | 59 +-
.../CollectIlluminaBasecallingMetrics.java | 2 +-
.../illumina/CollectIlluminaLaneMetrics.java | 51 +-
.../picard/illumina/ExtractIlluminaBarcodes.java | 53 +-
.../illumina/IlluminaBasecallsConverter.java | 6 +-
.../picard/illumina/IlluminaBasecallsToFastq.java | 156 +--
.../picard/illumina/IlluminaBasecallsToSam.java | 31 +-
src/java/picard/illumina/MarkIlluminaAdapters.java | 4 +-
src/java/picard/illumina/parser/ReadStructure.java | 58 +-
src/java/picard/illumina/parser/ReadType.java | 9 +-
src/java/picard/illumina/parser/Tile.java | 15 +-
.../picard/illumina/parser/TileMetricsUtil.java | 37 +-
src/java/picard/reference/ExtractSequences.java | 25 +-
src/java/picard/reference/NonNFastaSize.java | 128 +++
src/java/picard/reference/NormalizeFasta.java | 26 +-
src/java/picard/sam/AddCommentsToBam.java | 23 +-
src/java/picard/sam/AddOrReplaceReadGroups.java | 30 +-
src/java/picard/sam/BamIndexStats.java | 21 +-
src/java/picard/sam/BuildBamIndex.java | 19 +-
.../picard/sam/CalculateReadGroupChecksum.java | 19 +-
src/java/picard/sam/CompareSAMs.java | 22 +-
src/java/picard/sam/CreateSequenceDictionary.java | 20 +-
src/java/picard/sam/DownsampleSam.java | 35 +-
src/java/picard/sam/FastqToSam.java | 27 +-
src/java/picard/sam/FilterSamReads.java | 21 +-
src/java/picard/sam/FixMateInformation.java | 31 +-
src/java/picard/sam/GatherBamFiles.java | 28 +-
src/java/picard/sam/MergeBamAlignment.java | 26 +-
src/java/picard/sam/MergeSamFiles.java | 62 +-
.../picard/sam/PositionBasedDownsampleSam.java | 17 +-
src/java/picard/sam/ReplaceSamHeader.java | 22 +-
src/java/picard/sam/RevertSam.java | 18 +-
src/java/picard/sam/SamToFastq.java | 93 +-
src/java/picard/sam/SortSam.java | 18 +-
src/java/picard/sam/ValidateSamFile.java | 44 +-
.../markduplicates/EstimateLibraryComplexity.java | 105 +-
.../picard/sam/markduplicates/MarkDuplicates.java | 106 +-
.../MarkDuplicatesWithMateCigar.java | 28 +-
.../MarkDuplicatesWithMateCigarIterator.java | 2 +-
.../AbstractMarkDuplicatesCommandLineProgram.java | 46 +-
.../util/OpticalDuplicateFinder.java | 205 ++--
.../util/PhysicalLocationForMateCigar.java | 27 +-
.../picard/sam/markduplicates/util/ReadEnds.java | 28 +-
.../util/ReadEndsForMarkDuplicatesCodec.java | 4 +-
src/java/picard/sam/util/PhysicalLocation.java | 117 +--
src/java/picard/sam/util/PhysicalLocationInt.java | 37 +
.../util/PhysicalLocationShort.java} | 29 +-
src/java/picard/sam/util/ReadNameParser.java | 193 ++++
src/java/picard/sam/util/ReadNameParsingUtils.java | 83 --
src/java/picard/util/BedToIntervalList.java | 49 +-
src/java/picard/util/IlluminaUtil.java | 19 +-
src/java/picard/util/IntervalListToBed.java | 90 ++
src/java/picard/util/IntervalListTools.java | 34 +-
src/java/picard/util/LiftOverIntervalList.java | 21 +-
src/java/picard/util/MathUtil.java | 32 +
src/java/picard/util/ScatterIntervalsByNs.java | 18 +-
src/java/picard/vcf/CallingMetricAccumulator.java | 202 ++--
.../picard/vcf/CollectVariantCallingMetrics.java | 61 +-
src/java/picard/vcf/GenotypeConcordance.java | 51 +-
src/java/picard/vcf/GvcfMetricAccumulator.java | 64 ++
src/java/picard/vcf/LiftoverVcf.java | 49 +-
src/java/picard/vcf/RenameSampleInVcf.java | 17 +-
src/java/picard/vcf/SortVcf.java | 23 +-
src/java/picard/vcf/SplitVcfs.java | 21 +-
src/java/picard/vcf/VcfFormatConverter.java | 22 +-
src/scripts/picard/analysis/insertSizeHistogram.R | 8 +-
src/scripts/release_picard.sh | 8 +-
.../CollectAlignmentSummaryMetricsTest.java | 24 +-
.../picard/analysis/CollectGcBiasMetricsTest.java | 34 +-
.../analysis/CollectMultipleMetricsTest.java | 19 +-
.../analysis/CollectQualityYieldMetricsTest.java | 75 ++
.../CollectWgsMetricsFromQuerySortedTest.java | 51 +-
.../CollectWgsMetricsFromSampledSitesTest.java | 10 +-
.../picard/analysis/CollectWgsMetricsTest.java | 174 ++++
.../analysis/TheoreticalSensitivityTest.java | 231 +++++
.../CollectSequencingArtifactMetricsTest.java | 9 +-
.../analysis/directed/CollectHsMetricsTest.java | 83 ++
.../directed/CollectTargetedMetricsTest.java | 17 +-
.../picard/fingerprint/FingerprintCheckerTest.java | 29 +
.../java/picard/fingerprint/HaplotypeMapTest.java | 102 ++
.../fingerprint/HaplotypeProbabilitiesTest.java | 188 ++++
...HaplotypeProbabilityOfNormalGivenTumorTest.java | 56 ++
.../illumina/ExtractIlluminaBarcodesTest.java | 17 +
.../illumina/IlluminaBasecallsToFastqTest.java | 29 +-
.../illumina/IlluminaBasecallsToSamTest.java | 51 +-
.../java/picard/illumina/ReadStructureTest.java | 106 +-
.../java/picard/reference/NonNFastaSizeTest.java | 87 ++
.../java/picard/sam/FixMateInformationTest.java | 65 ++
.../EstimateLibraryComplexityTest.java | 121 +++
.../MarkDuplicateWithMissingBarcodeTest.java | 33 +
...MarkDuplicateWithMissingReadOneBarcodeTest.java | 22 +
...MarkDuplicateWithMissingReadTwoBarcodeTest.java | 21 +
.../MarkDuplicateWithMissingSampleBarcodeTest.java | 21 +
.../SimpleMarkDuplicatesWithMateCigar.java | 15 +-
.../util/OpticalDuplicateFinderTest.java | 171 +++-
.../java/picard/sam/util/ReadNameParserTests.java | 142 +++
.../java/picard/util/BedToIntervalListTest.java | 1 +
.../java/picard/util/IntervalListToBedTest.java | 43 +
.../picard/vcf/CallingMetricAccumulatorTest.java | 78 ++
.../vcf/CollectVariantCallingMetricsTest.java | 122 ++-
src/tests/java/picard/vcf/LiftoverVcfTest.java | 32 +
src/tests/java/picard/vcf/TestFilterVcf.java | 14 +-
.../Solexa332667_BaseQ.histo | 31 +
.../Solexa332667_DepthDist.histo | 501 ++++++++++
.../test_25103070136.targeted_pcr_metrics | 1013 ++++++++++++++++++++
.../test_NexPond-359781.hsMetrics | 1013 ++++++++++++++++++++
.../test_Solexa-316269_sampled.wgs_metrics | 263 +++++
.../test_Solexa-332667.wgs_metrics | 263 +++++
.../with_context.bait_bias_detail_metrics | 240 ++---
.../with_context.bait_bias_summary_metrics | 26 +-
.../with_context.pre_adapter_detail_metrics | 240 ++---
.../with_context.pre_adapter_summary_metrics | 28 +-
.../directed/CollectHsMetrics/chrM.interval_list | 3 +
.../directed/CollectHsMetrics/lowbaseq.sam | 14 +
.../analysis/directed/CollectHsMetrics/lowmapq.sam | 14 +
.../directed/CollectHsMetrics/overlapping.sam | 14 +
testdata/picard/fingerprint/haplotypeMap.txt | 73 ++
.../25T8B25T/fastq_with_4M/AAAAAAAA.1.fastq | 0
.../25T8B25T/fastq_with_4M/AAAAAAAA.2.fastq | 0
.../fastq_with_4M/AAAAAAAA.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/AAAAAAAA.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/AAAAGAAG.1.fastq | 0
.../25T8B25T/fastq_with_4M/AAAAGAAG.2.fastq | 0
.../fastq_with_4M/AAAAGAAG.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/AAAAGAAG.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/AACAATGG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/AACAATGG.2.fastq | 16 +
.../fastq_with_4M/AACAATGG.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/AACAATGG.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/AACGCATT.1.fastq | 28 +
.../25T8B25T/fastq_with_4M/AACGCATT.2.fastq | 28 +
.../fastq_with_4M/AACGCATT.barcode_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/AACGCATT.index_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/ACAAAATT.1.fastq | 0
.../25T8B25T/fastq_with_4M/ACAAAATT.2.fastq | 0
.../fastq_with_4M/ACAAAATT.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/ACAAAATT.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/ACAGGTAT.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ACAGGTAT.2.fastq | 16 +
.../fastq_with_4M/ACAGGTAT.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ACAGGTAT.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ACAGTTGA.1.fastq | 8 +
.../25T8B25T/fastq_with_4M/ACAGTTGA.2.fastq | 8 +
.../fastq_with_4M/ACAGTTGA.barcode_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/ACAGTTGA.index_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/ACCAGTTG.1.fastq | 0
.../25T8B25T/fastq_with_4M/ACCAGTTG.2.fastq | 0
.../fastq_with_4M/ACCAGTTG.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/ACCAGTTG.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/ACGAAATC.1.fastq | 0
.../25T8B25T/fastq_with_4M/ACGAAATC.2.fastq | 0
.../fastq_with_4M/ACGAAATC.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/ACGAAATC.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/ACTAAGAC.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ACTAAGAC.2.fastq | 16 +
.../fastq_with_4M/ACTAAGAC.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ACTAAGAC.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ACTGTACC.1.fastq | 0
.../25T8B25T/fastq_with_4M/ACTGTACC.2.fastq | 0
.../fastq_with_4M/ACTGTACC.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/ACTGTACC.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/ACTGTATC.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ACTGTATC.2.fastq | 16 +
.../fastq_with_4M/ACTGTATC.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ACTGTATC.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/AGAAAAGA.1.fastq | 0
.../25T8B25T/fastq_with_4M/AGAAAAGA.2.fastq | 0
.../fastq_with_4M/AGAAAAGA.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/AGAAAAGA.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/AGCATGGA.1.fastq | 12 +
.../25T8B25T/fastq_with_4M/AGCATGGA.2.fastq | 12 +
.../fastq_with_4M/AGCATGGA.barcode_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/AGCATGGA.index_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/AGGTAAGG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/AGGTAAGG.2.fastq | 16 +
.../fastq_with_4M/AGGTAAGG.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/AGGTAAGG.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/AGGTCGCA.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/AGGTCGCA.2.fastq | 16 +
.../fastq_with_4M/AGGTCGCA.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/AGGTCGCA.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ATTATCAA.1.fastq | 20 +
.../25T8B25T/fastq_with_4M/ATTATCAA.2.fastq | 20 +
.../fastq_with_4M/ATTATCAA.barcode_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/ATTATCAA.index_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/ATTCCTCT.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ATTCCTCT.2.fastq | 16 +
.../fastq_with_4M/ATTCCTCT.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/ATTCCTCT.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/CAACTCTC.1.fastq | 20 +
.../25T8B25T/fastq_with_4M/CAACTCTC.2.fastq | 20 +
.../fastq_with_4M/CAACTCTC.barcode_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/CAACTCTC.index_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/CAATAGAC.1.fastq | 0
.../25T8B25T/fastq_with_4M/CAATAGAC.2.fastq | 0
.../fastq_with_4M/CAATAGAC.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/CAATAGAC.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/CAATAGTC.1.fastq | 28 +
.../25T8B25T/fastq_with_4M/CAATAGTC.2.fastq | 28 +
.../fastq_with_4M/CAATAGTC.barcode_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/CAATAGTC.index_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/CAGCGGAT.1.fastq | 0
.../25T8B25T/fastq_with_4M/CAGCGGAT.2.fastq | 0
.../fastq_with_4M/CAGCGGAT.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/CAGCGGAT.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/CAGCGGTA.1.fastq | 20 +
.../25T8B25T/fastq_with_4M/CAGCGGTA.2.fastq | 20 +
.../fastq_with_4M/CAGCGGTA.barcode_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/CAGCGGTA.index_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/CCAACATT.1.fastq | 28 +
.../25T8B25T/fastq_with_4M/CCAACATT.2.fastq | 28 +
.../fastq_with_4M/CCAACATT.barcode_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/CCAACATT.index_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/CCAGCACC.1.fastq | 12 +
.../25T8B25T/fastq_with_4M/CCAGCACC.2.fastq | 12 +
.../fastq_with_4M/CCAGCACC.barcode_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/CCAGCACC.index_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/CCATGCGT.1.fastq | 0
.../25T8B25T/fastq_with_4M/CCATGCGT.2.fastq | 0
.../fastq_with_4M/CCATGCGT.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/CCATGCGT.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/CGCCTTCC.1.fastq | 8 +
.../25T8B25T/fastq_with_4M/CGCCTTCC.2.fastq | 8 +
.../fastq_with_4M/CGCCTTCC.barcode_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/CGCCTTCC.index_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/CGCTATGT.1.fastq | 20 +
.../25T8B25T/fastq_with_4M/CGCTATGT.2.fastq | 20 +
.../fastq_with_4M/CGCTATGT.barcode_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/CGCTATGT.index_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/CTAACTCG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/CTAACTCG.2.fastq | 16 +
.../fastq_with_4M/CTAACTCG.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/CTAACTCG.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/CTATGCGC.1.fastq | 0
.../25T8B25T/fastq_with_4M/CTATGCGC.2.fastq | 0
.../fastq_with_4M/CTATGCGC.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/CTATGCGC.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/CTATGCGT.1.fastq | 28 +
.../25T8B25T/fastq_with_4M/CTATGCGT.2.fastq | 28 +
.../fastq_with_4M/CTATGCGT.barcode_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/CTATGCGT.index_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/CTGCGGAT.1.fastq | 12 +
.../25T8B25T/fastq_with_4M/CTGCGGAT.2.fastq | 12 +
.../fastq_with_4M/CTGCGGAT.barcode_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/CTGCGGAT.index_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/CTGTAATC.1.fastq | 24 +
.../25T8B25T/fastq_with_4M/CTGTAATC.2.fastq | 24 +
.../fastq_with_4M/CTGTAATC.barcode_1.fastq | 24 +
.../25T8B25T/fastq_with_4M/CTGTAATC.index_1.fastq | 24 +
.../25T8B25T/fastq_with_4M/GAAAAAAA.1.fastq | 0
.../25T8B25T/fastq_with_4M/GAAAAAAA.2.fastq | 0
.../fastq_with_4M/GAAAAAAA.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/GAAAAAAA.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/GAACGAT..1.fastq | 0
.../25T8B25T/fastq_with_4M/GAACGAT..2.fastq | 0
.../fastq_with_4M/GAACGAT..barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/GAACGAT..index_1.fastq | 0
.../25T8B25T/fastq_with_4M/GAAGGAAG.1.fastq | 12 +
.../25T8B25T/fastq_with_4M/GAAGGAAG.2.fastq | 12 +
.../fastq_with_4M/GAAGGAAG.barcode_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/GAAGGAAG.index_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/GACCAGGA.1.fastq | 28 +
.../25T8B25T/fastq_with_4M/GACCAGGA.2.fastq | 28 +
.../fastq_with_4M/GACCAGGA.barcode_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/GACCAGGA.index_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/GACCAGGC.1.fastq | 0
.../25T8B25T/fastq_with_4M/GACCAGGC.2.fastq | 0
.../fastq_with_4M/GACCAGGC.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/GACCAGGC.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/GACCGTTG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/GACCGTTG.2.fastq | 16 +
.../fastq_with_4M/GACCGTTG.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/GACCGTTG.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/GACCTAAC.1.fastq | 4 +
.../25T8B25T/fastq_with_4M/GACCTAAC.2.fastq | 4 +
.../fastq_with_4M/GACCTAAC.barcode_1.fastq | 4 +
.../25T8B25T/fastq_with_4M/GACCTAAC.index_1.fastq | 4 +
.../25T8B25T/fastq_with_4M/GATATCCA.1.fastq | 12 +
.../25T8B25T/fastq_with_4M/GATATCCA.2.fastq | 12 +
.../fastq_with_4M/GATATCCA.barcode_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/GATATCCA.index_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/GCCGTCGA.1.fastq | 20 +
.../25T8B25T/fastq_with_4M/GCCGTCGA.2.fastq | 20 +
.../fastq_with_4M/GCCGTCGA.barcode_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/GCCGTCGA.index_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/GCCTAGCC.1.fastq | 20 +
.../25T8B25T/fastq_with_4M/GCCTAGCC.2.fastq | 20 +
.../fastq_with_4M/GCCTAGCC.barcode_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/GCCTAGCC.index_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/GTAACATC.1.fastq | 8 +
.../25T8B25T/fastq_with_4M/GTAACATC.2.fastq | 8 +
.../fastq_with_4M/GTAACATC.barcode_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/GTAACATC.index_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/GTCCACAG.1.fastq | 8 +
.../25T8B25T/fastq_with_4M/GTCCACAG.2.fastq | 8 +
.../fastq_with_4M/GTCCACAG.barcode_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/GTCCACAG.index_1.fastq | 8 +
.../illumina/25T8B25T/fastq_with_4M/N.1.fastq | 64 ++
.../illumina/25T8B25T/fastq_with_4M/N.2.fastq | 64 ++
.../25T8B25T/fastq_with_4M/N.barcode_1.fastq | 64 ++
.../25T8B25T/fastq_with_4M/N.index_1.fastq | 64 ++
.../25T8B25T/fastq_with_4M/TAAGCACA.1.fastq | 8 +
.../25T8B25T/fastq_with_4M/TAAGCACA.2.fastq | 8 +
.../fastq_with_4M/TAAGCACA.barcode_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/TAAGCACA.index_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/TACCGTCT.1.fastq | 0
.../25T8B25T/fastq_with_4M/TACCGTCT.2.fastq | 0
.../fastq_with_4M/TACCGTCT.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/TACCGTCT.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/TAGCGGTA.1.fastq | 0
.../25T8B25T/fastq_with_4M/TAGCGGTA.2.fastq | 0
.../fastq_with_4M/TAGCGGTA.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/TAGCGGTA.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/TATCAGCC.1.fastq | 0
.../25T8B25T/fastq_with_4M/TATCAGCC.2.fastq | 0
.../fastq_with_4M/TATCAGCC.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/TATCAGCC.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/TATCCAGG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/TATCCAGG.2.fastq | 16 +
.../fastq_with_4M/TATCCAGG.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/TATCCAGG.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/TATCCATG.1.fastq | 0
.../25T8B25T/fastq_with_4M/TATCCATG.2.fastq | 0
.../fastq_with_4M/TATCCATG.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/TATCCATG.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/TATCTCGG.1.fastq | 0
.../25T8B25T/fastq_with_4M/TATCTCGG.2.fastq | 0
.../fastq_with_4M/TATCTCGG.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/TATCTCGG.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/TATCTGCC.1.fastq | 28 +
.../25T8B25T/fastq_with_4M/TATCTGCC.2.fastq | 28 +
.../fastq_with_4M/TATCTGCC.barcode_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/TATCTGCC.index_1.fastq | 28 +
.../25T8B25T/fastq_with_4M/TCCGTCTA.1.fastq | 0
.../25T8B25T/fastq_with_4M/TCCGTCTA.2.fastq | 0
.../fastq_with_4M/TCCGTCTA.barcode_1.fastq | 0
.../25T8B25T/fastq_with_4M/TCCGTCTA.index_1.fastq | 0
.../25T8B25T/fastq_with_4M/TCGCTAGA.1.fastq | 20 +
.../25T8B25T/fastq_with_4M/TCGCTAGA.2.fastq | 20 +
.../fastq_with_4M/TCGCTAGA.barcode_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/TCGCTAGA.index_1.fastq | 20 +
.../25T8B25T/fastq_with_4M/TCTGCAAG.1.fastq | 4 +
.../25T8B25T/fastq_with_4M/TCTGCAAG.2.fastq | 4 +
.../fastq_with_4M/TCTGCAAG.barcode_1.fastq | 4 +
.../25T8B25T/fastq_with_4M/TCTGCAAG.index_1.fastq | 4 +
.../25T8B25T/fastq_with_4M/TGCAAGTA.1.fastq | 8 +
.../25T8B25T/fastq_with_4M/TGCAAGTA.2.fastq | 8 +
.../fastq_with_4M/TGCAAGTA.barcode_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/TGCAAGTA.index_1.fastq | 8 +
.../25T8B25T/fastq_with_4M/TGCTGCTG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/TGCTGCTG.2.fastq | 16 +
.../fastq_with_4M/TGCTGCTG.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/TGCTGCTG.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/TGTAACTC.1.fastq | 4 +
.../25T8B25T/fastq_with_4M/TGTAACTC.2.fastq | 4 +
.../fastq_with_4M/TGTAACTC.barcode_1.fastq | 4 +
.../25T8B25T/fastq_with_4M/TGTAACTC.index_1.fastq | 4 +
.../25T8B25T/fastq_with_4M/TGTAATCA.1.fastq | 12 +
.../25T8B25T/fastq_with_4M/TGTAATCA.2.fastq | 12 +
.../fastq_with_4M/TGTAATCA.barcode_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/TGTAATCA.index_1.fastq | 12 +
.../25T8B25T/fastq_with_4M/TTGTCTAT.1.fastq | 16 +
.../25T8B25T/fastq_with_4M/TTGTCTAT.2.fastq | 16 +
.../fastq_with_4M/TTGTCTAT.barcode_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/TTGTCTAT.index_1.fastq | 16 +
.../25T8B25T/fastq_with_4M/mp_barcode.params | 62 ++
.../25T8B25T/fastq_with_4M4M/AAAAGAAG.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/AAAAGAAG.2.fastq | 0
.../fastq_with_4M4M/AAAAGAAG.barcode_1.fastq | 0
.../fastq_with_4M4M/AAAAGAAG.index_1.fastq | 0
.../fastq_with_4M4M/AAAAGAAG.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/AACAATGG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/AACAATGG.2.fastq | 16 +
.../fastq_with_4M4M/AACAATGG.barcode_1.fastq | 16 +
.../fastq_with_4M4M/AACAATGG.index_1.fastq | 16 +
.../fastq_with_4M4M/AACAATGG.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/AACGCATT.1.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/AACGCATT.2.fastq | 28 +
.../fastq_with_4M4M/AACGCATT.barcode_1.fastq | 28 +
.../fastq_with_4M4M/AACGCATT.index_1.fastq | 28 +
.../fastq_with_4M4M/AACGCATT.index_2.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/ACAAAATT.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/ACAAAATT.2.fastq | 0
.../fastq_with_4M4M/ACAAAATT.barcode_1.fastq | 0
.../fastq_with_4M4M/ACAAAATT.index_1.fastq | 0
.../fastq_with_4M4M/ACAAAATT.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/ACAGGTAT.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/ACAGGTAT.2.fastq | 16 +
.../fastq_with_4M4M/ACAGGTAT.barcode_1.fastq | 16 +
.../fastq_with_4M4M/ACAGGTAT.index_1.fastq | 16 +
.../fastq_with_4M4M/ACAGGTAT.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/ACAGTTGA.1.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/ACAGTTGA.2.fastq | 8 +
.../fastq_with_4M4M/ACAGTTGA.barcode_1.fastq | 8 +
.../fastq_with_4M4M/ACAGTTGA.index_1.fastq | 8 +
.../fastq_with_4M4M/ACAGTTGA.index_2.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/ACCAGTTG.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/ACCAGTTG.2.fastq | 0
.../fastq_with_4M4M/ACCAGTTG.barcode_1.fastq | 0
.../fastq_with_4M4M/ACCAGTTG.index_1.fastq | 0
.../fastq_with_4M4M/ACCAGTTG.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/ACGAAATC.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/ACGAAATC.2.fastq | 0
.../fastq_with_4M4M/ACGAAATC.barcode_1.fastq | 0
.../fastq_with_4M4M/ACGAAATC.index_1.fastq | 0
.../fastq_with_4M4M/ACGAAATC.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/ACTAAGAC.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/ACTAAGAC.2.fastq | 16 +
.../fastq_with_4M4M/ACTAAGAC.barcode_1.fastq | 16 +
.../fastq_with_4M4M/ACTAAGAC.index_1.fastq | 16 +
.../fastq_with_4M4M/ACTAAGAC.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/ACTGTACC.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/ACTGTACC.2.fastq | 0
.../fastq_with_4M4M/ACTGTACC.barcode_1.fastq | 0
.../fastq_with_4M4M/ACTGTACC.index_1.fastq | 0
.../fastq_with_4M4M/ACTGTACC.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/ACTGTATC.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/ACTGTATC.2.fastq | 16 +
.../fastq_with_4M4M/ACTGTATC.barcode_1.fastq | 16 +
.../fastq_with_4M4M/ACTGTATC.index_1.fastq | 16 +
.../fastq_with_4M4M/ACTGTATC.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/AGAAAAGA.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/AGAAAAGA.2.fastq | 0
.../fastq_with_4M4M/AGAAAAGA.barcode_1.fastq | 0
.../fastq_with_4M4M/AGAAAAGA.index_1.fastq | 0
.../fastq_with_4M4M/AGAAAAGA.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/AGCATGGA.1.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/AGCATGGA.2.fastq | 12 +
.../fastq_with_4M4M/AGCATGGA.barcode_1.fastq | 12 +
.../fastq_with_4M4M/AGCATGGA.index_1.fastq | 12 +
.../fastq_with_4M4M/AGCATGGA.index_2.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/AGGTAAGG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/AGGTAAGG.2.fastq | 16 +
.../fastq_with_4M4M/AGGTAAGG.barcode_1.fastq | 16 +
.../fastq_with_4M4M/AGGTAAGG.index_1.fastq | 16 +
.../fastq_with_4M4M/AGGTAAGG.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/AGGTCGCA.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/AGGTCGCA.2.fastq | 16 +
.../fastq_with_4M4M/AGGTCGCA.barcode_1.fastq | 16 +
.../fastq_with_4M4M/AGGTCGCA.index_1.fastq | 16 +
.../fastq_with_4M4M/AGGTCGCA.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/ATTATCAA.1.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/ATTATCAA.2.fastq | 20 +
.../fastq_with_4M4M/ATTATCAA.barcode_1.fastq | 20 +
.../fastq_with_4M4M/ATTATCAA.index_1.fastq | 20 +
.../fastq_with_4M4M/ATTATCAA.index_2.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/ATTCCTCT.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/ATTCCTCT.2.fastq | 16 +
.../fastq_with_4M4M/ATTCCTCT.barcode_1.fastq | 16 +
.../fastq_with_4M4M/ATTCCTCT.index_1.fastq | 16 +
.../fastq_with_4M4M/ATTCCTCT.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/CAACTCTC.1.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/CAACTCTC.2.fastq | 20 +
.../fastq_with_4M4M/CAACTCTC.barcode_1.fastq | 20 +
.../fastq_with_4M4M/CAACTCTC.index_1.fastq | 20 +
.../fastq_with_4M4M/CAACTCTC.index_2.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/CAATAGAC.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/CAATAGAC.2.fastq | 0
.../fastq_with_4M4M/CAATAGAC.barcode_1.fastq | 0
.../fastq_with_4M4M/CAATAGAC.index_1.fastq | 0
.../fastq_with_4M4M/CAATAGAC.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/CAATAGTC.1.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/CAATAGTC.2.fastq | 28 +
.../fastq_with_4M4M/CAATAGTC.barcode_1.fastq | 28 +
.../fastq_with_4M4M/CAATAGTC.index_1.fastq | 28 +
.../fastq_with_4M4M/CAATAGTC.index_2.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/CAGCGGAT.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/CAGCGGAT.2.fastq | 0
.../fastq_with_4M4M/CAGCGGAT.barcode_1.fastq | 0
.../fastq_with_4M4M/CAGCGGAT.index_1.fastq | 0
.../fastq_with_4M4M/CAGCGGAT.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/CAGCGGTA.1.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/CAGCGGTA.2.fastq | 20 +
.../fastq_with_4M4M/CAGCGGTA.barcode_1.fastq | 20 +
.../fastq_with_4M4M/CAGCGGTA.index_1.fastq | 20 +
.../fastq_with_4M4M/CAGCGGTA.index_2.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/CCAACATT.1.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/CCAACATT.2.fastq | 28 +
.../fastq_with_4M4M/CCAACATT.barcode_1.fastq | 28 +
.../fastq_with_4M4M/CCAACATT.index_1.fastq | 28 +
.../fastq_with_4M4M/CCAACATT.index_2.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/CCAGCACC.1.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/CCAGCACC.2.fastq | 12 +
.../fastq_with_4M4M/CCAGCACC.barcode_1.fastq | 12 +
.../fastq_with_4M4M/CCAGCACC.index_1.fastq | 12 +
.../fastq_with_4M4M/CCAGCACC.index_2.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/CCATGCGT.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/CCATGCGT.2.fastq | 0
.../fastq_with_4M4M/CCATGCGT.barcode_1.fastq | 0
.../fastq_with_4M4M/CCATGCGT.index_1.fastq | 0
.../fastq_with_4M4M/CCATGCGT.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/CGCCTTCC.1.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/CGCCTTCC.2.fastq | 8 +
.../fastq_with_4M4M/CGCCTTCC.barcode_1.fastq | 8 +
.../fastq_with_4M4M/CGCCTTCC.index_1.fastq | 8 +
.../fastq_with_4M4M/CGCCTTCC.index_2.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/CGCTATGT.1.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/CGCTATGT.2.fastq | 20 +
.../fastq_with_4M4M/CGCTATGT.barcode_1.fastq | 20 +
.../fastq_with_4M4M/CGCTATGT.index_1.fastq | 20 +
.../fastq_with_4M4M/CGCTATGT.index_2.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/CTAACTCG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/CTAACTCG.2.fastq | 16 +
.../fastq_with_4M4M/CTAACTCG.barcode_1.fastq | 16 +
.../fastq_with_4M4M/CTAACTCG.index_1.fastq | 16 +
.../fastq_with_4M4M/CTAACTCG.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/CTATGCGC.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/CTATGCGC.2.fastq | 0
.../fastq_with_4M4M/CTATGCGC.barcode_1.fastq | 0
.../fastq_with_4M4M/CTATGCGC.index_1.fastq | 0
.../fastq_with_4M4M/CTATGCGC.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/CTATGCGT.1.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/CTATGCGT.2.fastq | 28 +
.../fastq_with_4M4M/CTATGCGT.barcode_1.fastq | 28 +
.../fastq_with_4M4M/CTATGCGT.index_1.fastq | 28 +
.../fastq_with_4M4M/CTATGCGT.index_2.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/CTGCGGAT.1.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/CTGCGGAT.2.fastq | 12 +
.../fastq_with_4M4M/CTGCGGAT.barcode_1.fastq | 12 +
.../fastq_with_4M4M/CTGCGGAT.index_1.fastq | 12 +
.../fastq_with_4M4M/CTGCGGAT.index_2.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/CTGTAATC.1.fastq | 24 +
.../25T8B25T/fastq_with_4M4M/CTGTAATC.2.fastq | 24 +
.../fastq_with_4M4M/CTGTAATC.barcode_1.fastq | 24 +
.../fastq_with_4M4M/CTGTAATC.index_1.fastq | 24 +
.../fastq_with_4M4M/CTGTAATC.index_2.fastq | 24 +
.../25T8B25T/fastq_with_4M4M/GAAAAAAA.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/GAAAAAAA.2.fastq | 0
.../fastq_with_4M4M/GAAAAAAA.barcode_1.fastq | 0
.../fastq_with_4M4M/GAAAAAAA.index_1.fastq | 0
.../fastq_with_4M4M/GAAAAAAA.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/GAACGAT..1.fastq | 0
.../25T8B25T/fastq_with_4M4M/GAACGAT..2.fastq | 0
.../fastq_with_4M4M/GAACGAT..barcode_1.fastq | 0
.../fastq_with_4M4M/GAACGAT..index_1.fastq | 0
.../fastq_with_4M4M/GAACGAT..index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/GAAGGAAG.1.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/GAAGGAAG.2.fastq | 12 +
.../fastq_with_4M4M/GAAGGAAG.barcode_1.fastq | 12 +
.../fastq_with_4M4M/GAAGGAAG.index_1.fastq | 12 +
.../fastq_with_4M4M/GAAGGAAG.index_2.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/GACCAGGA.1.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/GACCAGGA.2.fastq | 28 +
.../fastq_with_4M4M/GACCAGGA.barcode_1.fastq | 28 +
.../fastq_with_4M4M/GACCAGGA.index_1.fastq | 28 +
.../fastq_with_4M4M/GACCAGGA.index_2.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/GACCAGGC.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/GACCAGGC.2.fastq | 0
.../fastq_with_4M4M/GACCAGGC.barcode_1.fastq | 0
.../fastq_with_4M4M/GACCAGGC.index_1.fastq | 0
.../fastq_with_4M4M/GACCAGGC.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/GACCGTTG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/GACCGTTG.2.fastq | 16 +
.../fastq_with_4M4M/GACCGTTG.barcode_1.fastq | 16 +
.../fastq_with_4M4M/GACCGTTG.index_1.fastq | 16 +
.../fastq_with_4M4M/GACCGTTG.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/GACCTAAC.1.fastq | 4 +
.../25T8B25T/fastq_with_4M4M/GACCTAAC.2.fastq | 4 +
.../fastq_with_4M4M/GACCTAAC.barcode_1.fastq | 4 +
.../fastq_with_4M4M/GACCTAAC.index_1.fastq | 4 +
.../fastq_with_4M4M/GACCTAAC.index_2.fastq | 4 +
.../25T8B25T/fastq_with_4M4M/GATATCCA.1.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/GATATCCA.2.fastq | 12 +
.../fastq_with_4M4M/GATATCCA.barcode_1.fastq | 12 +
.../fastq_with_4M4M/GATATCCA.index_1.fastq | 12 +
.../fastq_with_4M4M/GATATCCA.index_2.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/GCCGTCGA.1.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/GCCGTCGA.2.fastq | 20 +
.../fastq_with_4M4M/GCCGTCGA.barcode_1.fastq | 20 +
.../fastq_with_4M4M/GCCGTCGA.index_1.fastq | 20 +
.../fastq_with_4M4M/GCCGTCGA.index_2.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/GCCTAGCC.1.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/GCCTAGCC.2.fastq | 20 +
.../fastq_with_4M4M/GCCTAGCC.barcode_1.fastq | 20 +
.../fastq_with_4M4M/GCCTAGCC.index_1.fastq | 20 +
.../fastq_with_4M4M/GCCTAGCC.index_2.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/GTAACATC.1.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/GTAACATC.2.fastq | 8 +
.../fastq_with_4M4M/GTAACATC.barcode_1.fastq | 8 +
.../fastq_with_4M4M/GTAACATC.index_1.fastq | 8 +
.../fastq_with_4M4M/GTAACATC.index_2.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/GTCCACAG.1.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/GTCCACAG.2.fastq | 8 +
.../fastq_with_4M4M/GTCCACAG.barcode_1.fastq | 8 +
.../fastq_with_4M4M/GTCCACAG.index_1.fastq | 8 +
.../fastq_with_4M4M/GTCCACAG.index_2.fastq | 8 +
.../illumina/25T8B25T/fastq_with_4M4M/N.1.fastq | 64 ++
.../illumina/25T8B25T/fastq_with_4M4M/N.2.fastq | 64 ++
.../25T8B25T/fastq_with_4M4M/N.barcode_1.fastq | 64 ++
.../25T8B25T/fastq_with_4M4M/N.index_1.fastq | 64 ++
.../25T8B25T/fastq_with_4M4M/N.index_2.fastq | 64 ++
.../25T8B25T/fastq_with_4M4M/TAAGCACA.1.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/TAAGCACA.2.fastq | 8 +
.../fastq_with_4M4M/TAAGCACA.barcode_1.fastq | 8 +
.../fastq_with_4M4M/TAAGCACA.index_1.fastq | 8 +
.../fastq_with_4M4M/TAAGCACA.index_2.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/TACCGTCT.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/TACCGTCT.2.fastq | 0
.../fastq_with_4M4M/TACCGTCT.barcode_1.fastq | 0
.../fastq_with_4M4M/TACCGTCT.index_1.fastq | 0
.../fastq_with_4M4M/TACCGTCT.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/TAGCGGTA.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/TAGCGGTA.2.fastq | 0
.../fastq_with_4M4M/TAGCGGTA.barcode_1.fastq | 0
.../fastq_with_4M4M/TAGCGGTA.index_1.fastq | 0
.../fastq_with_4M4M/TAGCGGTA.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/TATCAGCC.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/TATCAGCC.2.fastq | 0
.../fastq_with_4M4M/TATCAGCC.barcode_1.fastq | 0
.../fastq_with_4M4M/TATCAGCC.index_1.fastq | 0
.../fastq_with_4M4M/TATCAGCC.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/TATCCAGG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/TATCCAGG.2.fastq | 16 +
.../fastq_with_4M4M/TATCCAGG.barcode_1.fastq | 16 +
.../fastq_with_4M4M/TATCCAGG.index_1.fastq | 16 +
.../fastq_with_4M4M/TATCCAGG.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/TATCCATG.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/TATCCATG.2.fastq | 0
.../fastq_with_4M4M/TATCCATG.barcode_1.fastq | 0
.../fastq_with_4M4M/TATCCATG.index_1.fastq | 0
.../fastq_with_4M4M/TATCCATG.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/TATCTCGG.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/TATCTCGG.2.fastq | 0
.../fastq_with_4M4M/TATCTCGG.barcode_1.fastq | 0
.../fastq_with_4M4M/TATCTCGG.index_1.fastq | 0
.../fastq_with_4M4M/TATCTCGG.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/TATCTGCC.1.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/TATCTGCC.2.fastq | 28 +
.../fastq_with_4M4M/TATCTGCC.barcode_1.fastq | 28 +
.../fastq_with_4M4M/TATCTGCC.index_1.fastq | 28 +
.../fastq_with_4M4M/TATCTGCC.index_2.fastq | 28 +
.../25T8B25T/fastq_with_4M4M/TCCGTCTA.1.fastq | 0
.../25T8B25T/fastq_with_4M4M/TCCGTCTA.2.fastq | 0
.../fastq_with_4M4M/TCCGTCTA.barcode_1.fastq | 0
.../fastq_with_4M4M/TCCGTCTA.index_1.fastq | 0
.../fastq_with_4M4M/TCCGTCTA.index_2.fastq | 0
.../25T8B25T/fastq_with_4M4M/TCGCTAGA.1.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/TCGCTAGA.2.fastq | 20 +
.../fastq_with_4M4M/TCGCTAGA.barcode_1.fastq | 20 +
.../fastq_with_4M4M/TCGCTAGA.index_1.fastq | 20 +
.../fastq_with_4M4M/TCGCTAGA.index_2.fastq | 20 +
.../25T8B25T/fastq_with_4M4M/TCTGCAAG.1.fastq | 4 +
.../25T8B25T/fastq_with_4M4M/TCTGCAAG.2.fastq | 4 +
.../fastq_with_4M4M/TCTGCAAG.barcode_1.fastq | 4 +
.../fastq_with_4M4M/TCTGCAAG.index_1.fastq | 4 +
.../fastq_with_4M4M/TCTGCAAG.index_2.fastq | 4 +
.../25T8B25T/fastq_with_4M4M/TGCAAGTA.1.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/TGCAAGTA.2.fastq | 8 +
.../fastq_with_4M4M/TGCAAGTA.barcode_1.fastq | 8 +
.../fastq_with_4M4M/TGCAAGTA.index_1.fastq | 8 +
.../fastq_with_4M4M/TGCAAGTA.index_2.fastq | 8 +
.../25T8B25T/fastq_with_4M4M/TGCTGCTG.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/TGCTGCTG.2.fastq | 16 +
.../fastq_with_4M4M/TGCTGCTG.barcode_1.fastq | 16 +
.../fastq_with_4M4M/TGCTGCTG.index_1.fastq | 16 +
.../fastq_with_4M4M/TGCTGCTG.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/TGTAACTC.1.fastq | 4 +
.../25T8B25T/fastq_with_4M4M/TGTAACTC.2.fastq | 4 +
.../fastq_with_4M4M/TGTAACTC.barcode_1.fastq | 4 +
.../fastq_with_4M4M/TGTAACTC.index_1.fastq | 4 +
.../fastq_with_4M4M/TGTAACTC.index_2.fastq | 4 +
.../25T8B25T/fastq_with_4M4M/TGTAATCA.1.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/TGTAATCA.2.fastq | 12 +
.../fastq_with_4M4M/TGTAATCA.barcode_1.fastq | 12 +
.../fastq_with_4M4M/TGTAATCA.index_1.fastq | 12 +
.../fastq_with_4M4M/TGTAATCA.index_2.fastq | 12 +
.../25T8B25T/fastq_with_4M4M/TTGTCTAT.1.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/TTGTCTAT.2.fastq | 16 +
.../fastq_with_4M4M/TTGTCTAT.barcode_1.fastq | 16 +
.../fastq_with_4M4M/TTGTCTAT.index_1.fastq | 16 +
.../fastq_with_4M4M/TTGTCTAT.index_2.fastq | 16 +
.../25T8B25T/fastq_with_4M4M/mp_barcode.params | 62 ++
.../sams/nonBarcodedWithMolecularIndex4M4M.sam | 182 ++++
.../sams/nonBarcodedWithMolecularIndex8M.sam | 182 ++++
.../illumina/25T8B25T/sams_with_4M/AAAAAAAA.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/AAAAGAAG.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/AACAATGG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/AACGCATT.sam | 16 +
.../illumina/25T8B25T/sams_with_4M/ACAAAATT.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/ACAGGTAT.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/ACAGTTGA.sam | 6 +
.../illumina/25T8B25T/sams_with_4M/ACCAGTTG.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/ACGAAATC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/ACTAAGAC.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/ACTGTACC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/ACTGTATC.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/AGAAAAGA.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/AGCATGGA.sam | 8 +
.../illumina/25T8B25T/sams_with_4M/AGGTAAGG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/AGGTCGCA.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/ATTATCAA.sam | 12 +
.../illumina/25T8B25T/sams_with_4M/ATTCCTCT.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/CAACTCTC.sam | 12 +
.../illumina/25T8B25T/sams_with_4M/CAATAGAC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/CAATAGTC.sam | 16 +
.../illumina/25T8B25T/sams_with_4M/CAGCGGAT.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/CAGCGGTA.sam | 12 +
.../illumina/25T8B25T/sams_with_4M/CCAACATT.sam | 16 +
.../illumina/25T8B25T/sams_with_4M/CCAGCACC.sam | 8 +
.../illumina/25T8B25T/sams_with_4M/CCATGCGT.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/CGCCTTCC.sam | 6 +
.../illumina/25T8B25T/sams_with_4M/CGCTATGT.sam | 12 +
.../illumina/25T8B25T/sams_with_4M/CTAACTCG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/CTATGCGC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/CTATGCGT.sam | 16 +
.../illumina/25T8B25T/sams_with_4M/CTGCGGAT.sam | 8 +
.../illumina/25T8B25T/sams_with_4M/CTGTAATC.sam | 14 +
.../illumina/25T8B25T/sams_with_4M/GAAAAAAA.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/GAACGAT..sam | 2 +
.../illumina/25T8B25T/sams_with_4M/GAAGGAAG.sam | 8 +
.../illumina/25T8B25T/sams_with_4M/GACCAGGA.sam | 16 +
.../illumina/25T8B25T/sams_with_4M/GACCAGGC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/GACCGTTG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/GACCTAAC.sam | 4 +
.../illumina/25T8B25T/sams_with_4M/GATATCCA.sam | 8 +
.../illumina/25T8B25T/sams_with_4M/GCCGTCGA.sam | 12 +
.../illumina/25T8B25T/sams_with_4M/GCCTAGCC.sam | 12 +
.../illumina/25T8B25T/sams_with_4M/GTAACATC.sam | 6 +
.../illumina/25T8B25T/sams_with_4M/GTCCACAG.sam | 6 +
.../picard/illumina/25T8B25T/sams_with_4M/N.sam | 34 +
.../illumina/25T8B25T/sams_with_4M/TAAGCACA.sam | 6 +
.../illumina/25T8B25T/sams_with_4M/TACCGTCT.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/TAGCGGTA.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/TATCAGCC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/TATCCAGG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/TATCCATG.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/TATCTCGG.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/TATCTGCC.sam | 16 +
.../illumina/25T8B25T/sams_with_4M/TCCGTCTA.sam | 2 +
.../illumina/25T8B25T/sams_with_4M/TCGCTAGA.sam | 12 +
.../illumina/25T8B25T/sams_with_4M/TCTGCAAG.sam | 4 +
.../illumina/25T8B25T/sams_with_4M/TGCAAGTA.sam | 6 +
.../illumina/25T8B25T/sams_with_4M/TGCTGCTG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/TGTAACTC.sam | 4 +
.../illumina/25T8B25T/sams_with_4M/TGTAATCA.sam | 8 +
.../illumina/25T8B25T/sams_with_4M/TTGTCTAT.sam | 10 +
.../illumina/25T8B25T/sams_with_4M/barcode.params | 63 ++
.../25T8B25T/sams_with_4M/final/nonBarcoded.sam | 0
.../final/nonBarcodedWithMolecularIndex4M4M.sam | 0
.../final/nonBarcodedWithMolecularIndex8M.sam | 0
.../25T8B25T/sams_with_4M/indicies/AAAAAAAA.sam | 2 +
.../sams_with_4M/indicies/AAAAAAAA.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/AAAAGAAG.sam | 2 +
.../sams_with_4M/indicies/AAAAGAAG.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/AACAATGG.sam | 6 +
.../sams_with_4M/indicies/AACAATGG.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/AACGCATT.sam | 9 +
.../sams_with_4M/indicies/AACGCATT.sam.bak | 9 +
.../25T8B25T/sams_with_4M/indicies/ACAAAATT.sam | 2 +
.../sams_with_4M/indicies/ACAAAATT.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/ACAGGTAT.sam | 6 +
.../sams_with_4M/indicies/ACAGGTAT.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/ACAGTTGA.sam | 4 +
.../sams_with_4M/indicies/ACAGTTGA.sam.bak | 4 +
.../25T8B25T/sams_with_4M/indicies/ACCAGTTG.sam | 2 +
.../sams_with_4M/indicies/ACCAGTTG.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/ACGAAATC.sam | 2 +
.../sams_with_4M/indicies/ACGAAATC.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/ACTAAGAC.sam | 6 +
.../sams_with_4M/indicies/ACTAAGAC.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/ACTGTACC.sam | 2 +
.../sams_with_4M/indicies/ACTGTACC.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/ACTGTATC.sam | 6 +
.../sams_with_4M/indicies/ACTGTATC.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/AGAAAAGA.sam | 2 +
.../sams_with_4M/indicies/AGAAAAGA.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/AGCATGGA.sam | 5 +
.../sams_with_4M/indicies/AGCATGGA.sam.bak | 5 +
.../25T8B25T/sams_with_4M/indicies/AGGTAAGG.sam | 6 +
.../sams_with_4M/indicies/AGGTAAGG.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/AGGTCGCA.sam | 6 +
.../sams_with_4M/indicies/AGGTCGCA.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/ATTATCAA.sam | 7 +
.../sams_with_4M/indicies/ATTATCAA.sam.bak | 7 +
.../25T8B25T/sams_with_4M/indicies/ATTCCTCT.sam | 6 +
.../sams_with_4M/indicies/ATTCCTCT.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/CAACTCTC.sam | 7 +
.../sams_with_4M/indicies/CAACTCTC.sam.bak | 7 +
.../25T8B25T/sams_with_4M/indicies/CAATAGAC.sam | 2 +
.../sams_with_4M/indicies/CAATAGAC.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/CAATAGTC.sam | 9 +
.../sams_with_4M/indicies/CAATAGTC.sam.bak | 9 +
.../25T8B25T/sams_with_4M/indicies/CAGCGGAT.sam | 2 +
.../sams_with_4M/indicies/CAGCGGAT.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/CAGCGGTA.sam | 7 +
.../sams_with_4M/indicies/CAGCGGTA.sam.bak | 7 +
.../25T8B25T/sams_with_4M/indicies/CCAACATT.sam | 9 +
.../sams_with_4M/indicies/CCAACATT.sam.bak | 9 +
.../25T8B25T/sams_with_4M/indicies/CCAGCACC.sam | 5 +
.../sams_with_4M/indicies/CCAGCACC.sam.bak | 5 +
.../25T8B25T/sams_with_4M/indicies/CCATGCGT.sam | 2 +
.../sams_with_4M/indicies/CCATGCGT.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/CGCCTTCC.sam | 4 +
.../sams_with_4M/indicies/CGCCTTCC.sam.bak | 4 +
.../25T8B25T/sams_with_4M/indicies/CGCTATGT.sam | 7 +
.../sams_with_4M/indicies/CGCTATGT.sam.bak | 7 +
.../25T8B25T/sams_with_4M/indicies/CTAACTCG.sam | 6 +
.../sams_with_4M/indicies/CTAACTCG.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/CTATGCGC.sam | 2 +
.../sams_with_4M/indicies/CTATGCGC.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/CTATGCGT.sam | 9 +
.../sams_with_4M/indicies/CTATGCGT.sam.bak | 9 +
.../25T8B25T/sams_with_4M/indicies/CTGCGGAT.sam | 5 +
.../sams_with_4M/indicies/CTGCGGAT.sam.bak | 5 +
.../25T8B25T/sams_with_4M/indicies/CTGTAATC.sam | 8 +
.../sams_with_4M/indicies/CTGTAATC.sam.bak | 8 +
.../25T8B25T/sams_with_4M/indicies/GAAAAAAA.sam | 2 +
.../sams_with_4M/indicies/GAAAAAAA.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/GAACGAT..sam | 2 +
.../sams_with_4M/indicies/GAACGAT..sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/GAAGGAAG.sam | 5 +
.../sams_with_4M/indicies/GAAGGAAG.sam.bak | 5 +
.../25T8B25T/sams_with_4M/indicies/GACCAGGA.sam | 9 +
.../sams_with_4M/indicies/GACCAGGA.sam.bak | 9 +
.../25T8B25T/sams_with_4M/indicies/GACCAGGC.sam | 2 +
.../sams_with_4M/indicies/GACCAGGC.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/GACCGTTG.sam | 6 +
.../sams_with_4M/indicies/GACCGTTG.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/GACCTAAC.sam | 3 +
.../sams_with_4M/indicies/GACCTAAC.sam.bak | 3 +
.../25T8B25T/sams_with_4M/indicies/GATATCCA.sam | 5 +
.../sams_with_4M/indicies/GATATCCA.sam.bak | 5 +
.../25T8B25T/sams_with_4M/indicies/GCCGTCGA.sam | 7 +
.../sams_with_4M/indicies/GCCGTCGA.sam.bak | 7 +
.../25T8B25T/sams_with_4M/indicies/GCCTAGCC.sam | 7 +
.../sams_with_4M/indicies/GCCTAGCC.sam.bak | 7 +
.../25T8B25T/sams_with_4M/indicies/GTAACATC.sam | 4 +
.../sams_with_4M/indicies/GTAACATC.sam.bak | 4 +
.../25T8B25T/sams_with_4M/indicies/GTCCACAG.sam | 4 +
.../sams_with_4M/indicies/GTCCACAG.sam.bak | 4 +
.../illumina/25T8B25T/sams_with_4M/indicies/N.sam | 18 +
.../25T8B25T/sams_with_4M/indicies/N.sam.bak | 18 +
.../25T8B25T/sams_with_4M/indicies/TAAGCACA.sam | 4 +
.../sams_with_4M/indicies/TAAGCACA.sam.bak | 4 +
.../25T8B25T/sams_with_4M/indicies/TACCGTCT.sam | 2 +
.../sams_with_4M/indicies/TACCGTCT.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/TAGCGGTA.sam | 2 +
.../sams_with_4M/indicies/TAGCGGTA.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/TATCAGCC.sam | 2 +
.../sams_with_4M/indicies/TATCAGCC.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/TATCCAGG.sam | 6 +
.../sams_with_4M/indicies/TATCCAGG.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/TATCCATG.sam | 2 +
.../sams_with_4M/indicies/TATCCATG.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/TATCTCGG.sam | 2 +
.../sams_with_4M/indicies/TATCTCGG.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/TATCTGCC.sam | 9 +
.../sams_with_4M/indicies/TATCTGCC.sam.bak | 9 +
.../25T8B25T/sams_with_4M/indicies/TCCGTCTA.sam | 2 +
.../sams_with_4M/indicies/TCCGTCTA.sam.bak | 2 +
.../25T8B25T/sams_with_4M/indicies/TCGCTAGA.sam | 7 +
.../sams_with_4M/indicies/TCGCTAGA.sam.bak | 7 +
.../25T8B25T/sams_with_4M/indicies/TCTGCAAG.sam | 3 +
.../sams_with_4M/indicies/TCTGCAAG.sam.bak | 3 +
.../25T8B25T/sams_with_4M/indicies/TGCAAGTA.sam | 4 +
.../sams_with_4M/indicies/TGCAAGTA.sam.bak | 4 +
.../25T8B25T/sams_with_4M/indicies/TGCTGCTG.sam | 6 +
.../sams_with_4M/indicies/TGCTGCTG.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/TGTAACTC.sam | 3 +
.../sams_with_4M/indicies/TGTAACTC.sam.bak | 3 +
.../25T8B25T/sams_with_4M/indicies/TGTAATCA.sam | 5 +
.../sams_with_4M/indicies/TGTAATCA.sam.bak | 5 +
.../25T8B25T/sams_with_4M/indicies/TTGTCTAT.sam | 6 +
.../sams_with_4M/indicies/TTGTCTAT.sam.bak | 6 +
.../25T8B25T/sams_with_4M/indicies/barcode.params | 63 ++
.../multiplexed_positive_rgtags.params | 63 ++
.../25T8B25T/sams_with_4M/templates/AAAAAAAA.sam | 2 +
.../25T8B25T/sams_with_4M/templates/AAAAGAAG.sam | 2 +
.../25T8B25T/sams_with_4M/templates/AACAATGG.sam | 10 +
.../25T8B25T/sams_with_4M/templates/AACGCATT.sam | 16 +
.../25T8B25T/sams_with_4M/templates/ACAAAATT.sam | 2 +
.../25T8B25T/sams_with_4M/templates/ACAGGTAT.sam | 10 +
.../25T8B25T/sams_with_4M/templates/ACAGTTGA.sam | 6 +
.../25T8B25T/sams_with_4M/templates/ACCAGTTG.sam | 2 +
.../25T8B25T/sams_with_4M/templates/ACGAAATC.sam | 2 +
.../25T8B25T/sams_with_4M/templates/ACTAAGAC.sam | 10 +
.../25T8B25T/sams_with_4M/templates/ACTGTACC.sam | 2 +
.../25T8B25T/sams_with_4M/templates/ACTGTATC.sam | 10 +
.../25T8B25T/sams_with_4M/templates/AGAAAAGA.sam | 2 +
.../25T8B25T/sams_with_4M/templates/AGCATGGA.sam | 8 +
.../25T8B25T/sams_with_4M/templates/AGGTAAGG.sam | 10 +
.../25T8B25T/sams_with_4M/templates/AGGTCGCA.sam | 10 +
.../25T8B25T/sams_with_4M/templates/ATTATCAA.sam | 12 +
.../25T8B25T/sams_with_4M/templates/ATTCCTCT.sam | 10 +
.../25T8B25T/sams_with_4M/templates/CAACTCTC.sam | 12 +
.../25T8B25T/sams_with_4M/templates/CAATAGAC.sam | 2 +
.../25T8B25T/sams_with_4M/templates/CAATAGTC.sam | 16 +
.../25T8B25T/sams_with_4M/templates/CAGCGGAT.sam | 2 +
.../25T8B25T/sams_with_4M/templates/CAGCGGTA.sam | 12 +
.../25T8B25T/sams_with_4M/templates/CCAACATT.sam | 16 +
.../25T8B25T/sams_with_4M/templates/CCAGCACC.sam | 8 +
.../25T8B25T/sams_with_4M/templates/CCATGCGT.sam | 2 +
.../25T8B25T/sams_with_4M/templates/CGCCTTCC.sam | 6 +
.../25T8B25T/sams_with_4M/templates/CGCTATGT.sam | 12 +
.../25T8B25T/sams_with_4M/templates/CTAACTCG.sam | 10 +
.../25T8B25T/sams_with_4M/templates/CTATGCGC.sam | 2 +
.../25T8B25T/sams_with_4M/templates/CTATGCGT.sam | 16 +
.../25T8B25T/sams_with_4M/templates/CTGCGGAT.sam | 8 +
.../25T8B25T/sams_with_4M/templates/CTGTAATC.sam | 14 +
.../25T8B25T/sams_with_4M/templates/GAAAAAAA.sam | 2 +
.../25T8B25T/sams_with_4M/templates/GAACGAT..sam | 2 +
.../25T8B25T/sams_with_4M/templates/GAAGGAAG.sam | 8 +
.../25T8B25T/sams_with_4M/templates/GACCAGGA.sam | 16 +
.../25T8B25T/sams_with_4M/templates/GACCAGGC.sam | 2 +
.../25T8B25T/sams_with_4M/templates/GACCGTTG.sam | 10 +
.../25T8B25T/sams_with_4M/templates/GACCTAAC.sam | 4 +
.../25T8B25T/sams_with_4M/templates/GATATCCA.sam | 8 +
.../25T8B25T/sams_with_4M/templates/GCCGTCGA.sam | 12 +
.../25T8B25T/sams_with_4M/templates/GCCTAGCC.sam | 12 +
.../25T8B25T/sams_with_4M/templates/GTAACATC.sam | 6 +
.../25T8B25T/sams_with_4M/templates/GTCCACAG.sam | 6 +
.../25T8B25T/sams_with_4M/templates/TAAGCACA.sam | 6 +
.../25T8B25T/sams_with_4M/templates/TACCGTCT.sam | 2 +
.../25T8B25T/sams_with_4M/templates/TAGCGGTA.sam | 2 +
.../25T8B25T/sams_with_4M/templates/TATCAGCC.sam | 2 +
.../25T8B25T/sams_with_4M/templates/TATCCAGG.sam | 10 +
.../25T8B25T/sams_with_4M/templates/TATCCATG.sam | 2 +
.../25T8B25T/sams_with_4M/templates/TATCTCGG.sam | 2 +
.../25T8B25T/sams_with_4M/templates/TATCTGCC.sam | 16 +
.../25T8B25T/sams_with_4M/templates/TCCGTCTA.sam | 2 +
.../25T8B25T/sams_with_4M/templates/TCGCTAGA.sam | 12 +
.../25T8B25T/sams_with_4M/templates/TCTGCAAG.sam | 4 +
.../25T8B25T/sams_with_4M/templates/TGCAAGTA.sam | 6 +
.../25T8B25T/sams_with_4M/templates/TGCTGCTG.sam | 10 +
.../25T8B25T/sams_with_4M/templates/TGTAACTC.sam | 4 +
.../25T8B25T/sams_with_4M/templates/TGTAATCA.sam | 8 +
.../25T8B25T/sams_with_4M/templates/TTGTCTAT.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/AAAAAAAA.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/AAAAGAAG.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/AACAATGG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/AACGCATT.sam | 16 +
.../illumina/25T8B25T/sams_with_4M4M/ACAAAATT.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/ACAGGTAT.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/ACAGTTGA.sam | 6 +
.../illumina/25T8B25T/sams_with_4M4M/ACCAGTTG.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/ACGAAATC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/ACTAAGAC.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/ACTGTACC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/ACTGTATC.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/AGAAAAGA.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/AGCATGGA.sam | 8 +
.../illumina/25T8B25T/sams_with_4M4M/AGGTAAGG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/AGGTCGCA.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/ATTATCAA.sam | 12 +
.../illumina/25T8B25T/sams_with_4M4M/ATTCCTCT.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/CAACTCTC.sam | 12 +
.../illumina/25T8B25T/sams_with_4M4M/CAATAGAC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/CAATAGTC.sam | 16 +
.../illumina/25T8B25T/sams_with_4M4M/CAGCGGAT.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/CAGCGGTA.sam | 12 +
.../illumina/25T8B25T/sams_with_4M4M/CCAACATT.sam | 16 +
.../illumina/25T8B25T/sams_with_4M4M/CCAGCACC.sam | 8 +
.../illumina/25T8B25T/sams_with_4M4M/CCATGCGT.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/CGCCTTCC.sam | 6 +
.../illumina/25T8B25T/sams_with_4M4M/CGCTATGT.sam | 12 +
.../illumina/25T8B25T/sams_with_4M4M/CTAACTCG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/CTATGCGC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/CTATGCGT.sam | 16 +
.../illumina/25T8B25T/sams_with_4M4M/CTGCGGAT.sam | 8 +
.../illumina/25T8B25T/sams_with_4M4M/CTGTAATC.sam | 14 +
.../illumina/25T8B25T/sams_with_4M4M/GAAAAAAA.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/GAACGAT..sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/GAAGGAAG.sam | 8 +
.../illumina/25T8B25T/sams_with_4M4M/GACCAGGA.sam | 16 +
.../illumina/25T8B25T/sams_with_4M4M/GACCAGGC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/GACCGTTG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/GACCTAAC.sam | 4 +
.../illumina/25T8B25T/sams_with_4M4M/GATATCCA.sam | 8 +
.../illumina/25T8B25T/sams_with_4M4M/GCCGTCGA.sam | 12 +
.../illumina/25T8B25T/sams_with_4M4M/GCCTAGCC.sam | 12 +
.../illumina/25T8B25T/sams_with_4M4M/GTAACATC.sam | 6 +
.../illumina/25T8B25T/sams_with_4M4M/GTCCACAG.sam | 6 +
.../picard/illumina/25T8B25T/sams_with_4M4M/N.sam | 34 +
.../illumina/25T8B25T/sams_with_4M4M/TAAGCACA.sam | 6 +
.../illumina/25T8B25T/sams_with_4M4M/TACCGTCT.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/TAGCGGTA.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/TATCAGCC.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/TATCCAGG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/TATCCATG.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/TATCTCGG.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/TATCTGCC.sam | 16 +
.../illumina/25T8B25T/sams_with_4M4M/TCCGTCTA.sam | 2 +
.../illumina/25T8B25T/sams_with_4M4M/TCGCTAGA.sam | 12 +
.../illumina/25T8B25T/sams_with_4M4M/TCTGCAAG.sam | 4 +
.../illumina/25T8B25T/sams_with_4M4M/TGCAAGTA.sam | 6 +
.../illumina/25T8B25T/sams_with_4M4M/TGCTGCTG.sam | 10 +
.../illumina/25T8B25T/sams_with_4M4M/TGTAACTC.sam | 4 +
.../illumina/25T8B25T/sams_with_4M4M/TGTAATCA.sam | 8 +
.../illumina/25T8B25T/sams_with_4M4M/TTGTCTAT.sam | 10 +
.../25T8B25T/sams_with_4M4M/barcode.params | 63 ++
.../25T8B8B25T/sams/N_with_molecular_index.sam | 40 +
.../illumina/mark_illumina_adapters_test.sam | 4 +
testdata/picard/reference/test.dict | 9 +
testdata/picard/reference/test.fasta | 36 +
testdata/picard/reference/test.fasta.fai | 8 +
testdata/picard/reference/test.intervals | 12 +
.../sam/CollectGcBiasMetrics/MSmallHeader.dict | 2 +
.../picard/sam/EstimateLibraryComplexity/dupes.sam | 16 +
.../picard/sam/FixMateInformation/missingMate.sam | 5 +
testdata/picard/sam/namesorted.test.sam | 12 +-
.../sam/summary_alignment_stats_test_chimeras.sam | 20 +
testdata/picard/util/interval_list_to_bed_test.bed | 13 +
.../util/interval_list_to_bed_test.interval_list | 19 +
testdata/picard/vcf/mini_gvcf.vcf | 290 ++++++
testdata/picard/vcf/mini_gvcf.vcf.idx | Bin 0 -> 165 bytes
testdata/picard/vcf/test.over.badContig.chain | 2 +
.../picard/vcf/testLiftoverUsingMissingContig.vcf | 4 +
1083 files changed, 23540 insertions(+), 2418 deletions(-)
diff --git a/.gitignore b/.gitignore
index 4dfc2d7..d0f6515 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,4 +10,7 @@ intellij.testclasses
intellij.classes
htsjdk
.idea
-target
\ No newline at end of file
+target
+report
+jacoco.data
+
diff --git a/.travis.yml b/.travis.yml
index 5e3111e..e1d4ea6 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,12 +1,11 @@
language: java
jdk:
- - oraclejdk7
- - openjdk7
+ - oraclejdk8
before_install:
- - curl -OL http://raw.github.com/craigcitro/r-travis/master/scripts/travis-tool.sh
- - chmod 755 ./travis-tool.sh
- - ./travis-tool.sh bootstrap
+ - sudo apt-get -qq update
+ - sudo apt-get install -y --no-install-recommends r-base-dev r-recommended qpdf
- sed -i -e 's_git at github.com:_https://github.com/_' build.xml
script:
- ant -lib lib/ant clone-htsjdk
+ - ant
- ant test
diff --git a/README.md b/README.md
index d321239..9ef1182 100644
--- a/README.md
+++ b/README.md
@@ -2,9 +2,11 @@
A set of Java command line tools for manipulating high-throughput sequencing (HTS) data and formats.
-Picard is implemented using the HTSJDK Java library[HTSJDK][1], supporting
-accessing of common file formats, such as [SAM][2] and [VCF][3], used for high-throughput
-sequencing data.
+Picard is implemented using the HTSJDK Java library [HTSJDK][1] to support
+accessing file formats that are commonly used for high-throughput
+sequencing data such as [SAM][2] and [VCF][3].
+
+As of version 2.0.1 (Nov. 2015) Picard requires Java 1.8 (jdk8u66). The last version to support Java 1.7 was release 1.141.
To clone and build:
Clone the repo:
@@ -68,6 +70,7 @@ GA4GH_CLIENT_SECRETS=../client_secrets.json
```
For Java 7 (as opposed to 8) use ```alpn-boot-7.1.3.v20150130.jar```.
+Picard is migrating to semantic versioning (http://semver.org/). We will eventually adhere to it strictly and bump our major version whenever there are breaking changes to our API, but until we more clearly define what constitutes our official API, clients should assume that every release potentially contains at least minor changes to public methods.
Please see the [Picard Documentation](http://broadinstitute.github.io/picard) for more information.
diff --git a/build.sbt b/build.sbt
index 1d93b1a..a2a67bf 100644
--- a/build.sbt
+++ b/build.sbt
@@ -4,7 +4,7 @@ import sbt.Package.ManifestAttributes
name := "picard"
-version := "1.141"
+version := "2.1.0"
organization := "com.github.broadinstitute"
@@ -15,7 +15,7 @@ javaSource in Test := baseDirectory.value / "src/tests"
unmanagedResourceDirectories in Test := Seq(baseDirectory.value / "src/scripts", baseDirectory.value / "testdata", baseDirectory.value / "src/tests/scripts")
libraryDependencies ++= Seq(
- "com.github.samtools" % "htsjdk" % "1.141",
+ "com.github.samtools" % "htsjdk" % "2.1.0",
("com.google.cloud.genomics" % "gatk-tools-java" % "1.1" % "picardopt").
exclude("org.mortbay.jetty", "servlet-api"),
"org.testng" % "testng" % "6.8.8" % Test
@@ -36,9 +36,7 @@ pomIncludeRepository := { _ => false }
crossPaths := false
-javacOptions in Compile ++= Seq("-source", "1.6")
-
-javacOptions in(Compile, compile) ++= Seq("-target", "1.6")
+javacOptions in (Compile,doc) ++= Seq("-Xdoclint:none")
versionWithGit
diff --git a/build.xml b/build.xml
index 4e72f5d..9568f5c 100755
--- a/build.xml
+++ b/build.xml
@@ -43,7 +43,7 @@
<!-- TODO: get this from the build.xml in htsjdk -->
<property name="htsjdk-classes" value="htsjdk/classes"/>
- <property name="javac.target" value="1.6"/>
+ <property name="javac.target" value="1.8"/>
<property name="javac.debug" value="true"/>
<!-- Get GIT hash, if available, otherwise leave it blank. -->
@@ -53,7 +53,7 @@
<arg value="--pretty=format:%H_%at"/>
</exec>
<property name="repository.revision" value=""/>
- <property name="picard-version" value="1.141"/>
+ <property name="picard-version" value="2.1.0"/>
<property name="command-line-html-dir" value="${dist}/html"/>
<property name="testng.verbosity" value="2"/>
<property name="test.debug.port" value="5005"/>
@@ -66,6 +66,12 @@
<condition property="isUnix">
<os family="unix"/>
</condition>
+
+ <!-- Import JaCoCo Ant tasks -->
+ <taskdef uri="antlib:org.jacoco.ant" resource="org/jacoco/ant/antlib.xml">
+ <classpath path="lib/ant/jacocoant.jar" />
+ </taskdef>
+
<target name="set_excluded_test_groups_unix" if="isUnix">
<property name="excludedTestGroups" value="slow, broken"/>
</target>
@@ -192,6 +198,7 @@
<!-- TEST -->
<target name="test" depends="compile, set_excluded_test_groups" description="Run unit tests">
<taskdef resource="testngtasks" classpathref="classpath"/>
+ <jacoco:coverage destfile="jacoco.data" xmlns:jacoco="antlib:org.jacoco.ant">
<testng suitename="picard-tests" classpathref="classpath" outputdir="${test.output}"
failureproperty="tests.failed" excludedgroups="${excludedTestGroups}" workingDir="${basedir}"
verbose="${testng.verbosity}">
@@ -206,6 +213,8 @@
</classfileset>
<jvmarg value="-Xmx2G"/>
</testng>
+ </jacoco:coverage>
+
<junitreport todir="${dist}/test">
<fileset dir="${test.output}">
<include name="*.xml"/>
@@ -216,6 +225,23 @@
<fail if="tests.failed" message="There were failed unit tests"/>
</target>
+ <target name="test-coverage-report" depends="test" description="Runs tests and creates an HTML code coverage report">
+ <jacoco:report xmlns:jacoco="antlib:org.jacoco.ant">
+ <executiondata>
+ <file file="jacoco.data"/>
+ </executiondata>
+ <structure name="Picard">
+ <classfiles>
+ <fileset dir="classes"/>
+ </classfiles>
+ <sourcefiles encoding="UTF-8">
+ <fileset dir="src"/>
+ </sourcefiles>
+ </structure>
+ <html destdir="report"/>
+ </jacoco:report>
+ </target>
+
<target name="single-test"
depends="compile, compile-tests"
description="Compile and run a single test.">
@@ -263,7 +289,9 @@
<mkdir dir="${dist}"/>
<mkdir dir="${dist.tmp}"/>
<unjar dest="${dist.tmp}">
- <fileset dir="${lib}" />
+ <fileset dir="${lib}">
+ <exclude name="**/jacocoant.jar"/> <!-- must exclude this jar from packing into picard - this is only used for testing -->
+ </fileset>
<fileset dir="${htsjdk_lib_dir}">
<include name="*.jar"/>
</fileset>
@@ -312,6 +340,7 @@
protected="true"
use="true"
version="true"
+ additionalparam="-Xdoclint:none -notimestamp"
failonerror="true"
excludepackagenames="htsjdk*">
<classpath>
@@ -340,6 +369,7 @@
protected="true"
use="true"
version="true"
+ additionalparam="-Xdoclint:none -notimestamp"
failonerror="true"
excludepackagenames="picard*">
<classpath>
@@ -398,10 +428,12 @@
<document-command title="CollectAlignmentSummaryMetrics" main-class="picard.analysis.CollectAlignmentSummaryMetrics"/>
<document-command title="CollectBaseDistributionByCycle" main-class="picard.analysis.CollectBaseDistributionByCycle"/>
<document-command title="CollectGcBiasMetrics" main-class="picard.analysis.CollectGcBiasMetrics"/>
+ <document-command title="CollectHsMetrics" main-class="picard.analysis.directed.CollectHsMetrics"/>
<document-command title="CollectInsertSizeMetrics" main-class="picard.analysis.CollectInsertSizeMetrics"/>
<document-command title="CollectMultipleMetrics" main-class="picard.analysis.CollectMultipleMetrics"/>
<document-command title="CollectTargetedPcrMetrics" main-class="picard.analysis.directed.CollectTargetedPcrMetrics"/>
<document-command title="CollectRnaSeqMetrics" main-class="picard.analysis.CollectRnaSeqMetrics"/>
+ <document-command title="CollectVariantCallingMetrics" main-class="picard.vcf.CollectVariantCallingMetrics"/>
<document-command title="CollectWgsMetrics" main-class="picard.analysis.CollectWgsMetrics"/>
<document-command title="CompareSAMs" main-class="picard.sam.CompareSAMs"/>
<document-command title="CreateSequenceDictionary" main-class="picard.sam.CreateSequenceDictionary"/>
diff --git a/src/java/picard/analysis/AdapterUtility.java b/src/java/picard/analysis/AdapterUtility.java
new file mode 100644
index 0000000..9b9135b
--- /dev/null
+++ b/src/java/picard/analysis/AdapterUtility.java
@@ -0,0 +1,117 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.util.CollectionUtil;
+import htsjdk.samtools.util.SequenceUtil;
+import htsjdk.samtools.util.StringUtil;
+import picard.util.IlluminaUtil;
+
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * A utility class for matching reads to adapters.
+ * Note that this is different from ClippingUtility in that it tries to match the starts of reads
+ * to any part of the adapter (as opposed to finding the start of the adapter anywhere in the read).
+ */
+public class AdapterUtility {
+
+ //The number of bases to check in order to map a read to an adapter
+ private static final int ADAPTER_MATCH_LENGTH = 16;
+
+ // The maximum number of mismatches a read can have and still be considered as matching an adapter
+ private static final int MAX_ADAPTER_ERRORS = 1;
+
+ // byte arrays in both fwd and rc for the adapter sequences
+ final byte [][] adapterKmers;
+
+ public static List<String> DEFAULT_ADAPTER_SEQUENCE = CollectionUtil.makeList(
+ IlluminaUtil.IlluminaAdapterPair.SINGLE_END.get5PrimeAdapter(),
+ IlluminaUtil.IlluminaAdapterPair.SINGLE_END.get3PrimeAdapter(),
+ IlluminaUtil.IlluminaAdapterPair.PAIRED_END.get5PrimeAdapter(),
+ IlluminaUtil.IlluminaAdapterPair.PAIRED_END.get3PrimeAdapter(),
+ IlluminaUtil.IlluminaAdapterPair.INDEXED.get5PrimeAdapter(),
+ IlluminaUtil.IlluminaAdapterPair.INDEXED.get3PrimeAdapter()
+ );
+ // TODO -- consider adding DUAL_INDEXED to the list above
+
+ public AdapterUtility(final List<String> adapterSequence) {
+ adapterKmers = prepareAdapterSequences(adapterSequence);
+ }
+
+ /** Converts the supplied adapter sequences to byte arrays in both fwd and rc */
+ private static byte [][] prepareAdapterSequences(final List<String> adapterSequence) {
+ final Set<String> kmers = new HashSet<>();
+
+ // Make a set of all kmers of adapterMatchLength
+ for (final String seq : adapterSequence) {
+ for (int i=0; i<=seq.length() - ADAPTER_MATCH_LENGTH; ++i) {
+ final String kmer = seq.substring(i, i+ADAPTER_MATCH_LENGTH).toUpperCase();
+
+ int ns = 0;
+ for (final char ch : kmer.toCharArray()) if (ch == 'N') ++ns;
+ if (ns <= MAX_ADAPTER_ERRORS) {
+ kmers.add(kmer);
+ kmers.add(SequenceUtil.reverseComplement(kmer));
+ }
+ }
+ }
+
+ // Make an array of byte[] for the kmers
+ final byte [][] adapterKmers = new byte[kmers.size()][];
+ int i=0;
+ for (final String kmer : kmers) {
+ adapterKmers[i++] = StringUtil.stringToBytes(kmer);
+ }
+ return adapterKmers;
+ }
+
+ /**
+ * Checks the first ADAPTER_MATCH_LENGTH bases of the read against known adapter sequences and returns
+ * true if the read matches an adapter sequence with MAX_ADAPTER_ERRORS mismsatches or fewer.
+ *
+ * @param read the basecalls for the read in the order and orientation the machine read them
+ * @return true if the read matches an adapter and false otherwise
+ */
+ public boolean isAdapterSequence(final byte[] read) {
+ if (read.length < ADAPTER_MATCH_LENGTH) return false;
+
+ for (final byte[] adapter : adapterKmers) {
+ int errors = 0;
+
+ for (int i=0; i<adapter.length; ++i) {
+ if (read[i] != adapter[i]) {
+ if (++errors > MAX_ADAPTER_ERRORS) break;
+ }
+ }
+
+ if (errors <= MAX_ADAPTER_ERRORS) return true;
+ }
+
+ return false;
+ }
+}
diff --git a/src/java/picard/analysis/AlignmentSummaryMetricsCollector.java b/src/java/picard/analysis/AlignmentSummaryMetricsCollector.java
index 7f5327d..43d78cc 100644
--- a/src/java/picard/analysis/AlignmentSummaryMetricsCollector.java
+++ b/src/java/picard/analysis/AlignmentSummaryMetricsCollector.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package picard.analysis;
import htsjdk.samtools.AlignmentBlock;
@@ -13,11 +37,11 @@ import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
+import htsjdk.samtools.SamPairUtil.PairOrientation;
import picard.metrics.PerUnitMetricCollector;
import picard.metrics.SAMRecordAndReference;
import picard.metrics.SAMRecordAndReferenceMultiLevelCollector;
-import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -25,15 +49,11 @@ public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMulti
// If we have a reference sequence, collect metrics on how well we aligned to it
private final boolean doRefMetrics;
- //the adapter sequences converted to byte arrays
- private final byte[][] adapterKmers;
-
- //A list of Strings representing the sequence of bases in an adapter
- private final List<String> adapterSequence;
-
//Paired end reads above this insert size will be considered chimeric along with inter-chromosomal pairs.
private final int maxInsertSize;
+ //Paired-end reads that do not have this expected orientation will be considered chimeric.
+ private final Set<PairOrientation> expectedOrientations;
//Whether the SAM or BAM file consists of bisulfite sequenced reads.
private final boolean isBisulfiteSequenced;
@@ -44,18 +64,16 @@ public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMulti
//The minimum quality a base has to meet in order to be consider hq_20
private final static int BASE_QUALITY_THRESHOLD = 20;
- //The number of bases to check in order to map a read to an adapter
- private static final int ADAPTER_MATCH_LENGTH = 16;
-
- // The maximum number of mismatches a read can have and still be considered as matching an adapter
- private static final int MAX_ADAPTER_ERRORS = 1;
+ //the adapter utility class
+ private final AdapterUtility adapterUtility;
public AlignmentSummaryMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords,
- final boolean doRefMetrics, final List<String> adapterSequence, final int maxInsertSize, boolean isBisulfiteSequenced) {
- this.doRefMetrics = doRefMetrics;
- this.adapterSequence = adapterSequence;
- this.adapterKmers = prepareAdapterSequences();
- this.maxInsertSize = maxInsertSize;
+ final boolean doRefMetrics, final List<String> adapterSequence, final int maxInsertSize,
+ final Set<PairOrientation> expectedOrientations, final boolean isBisulfiteSequenced) {
+ this.doRefMetrics = doRefMetrics;
+ this.adapterUtility = new AdapterUtility(adapterSequence);
+ this.maxInsertSize = maxInsertSize;
+ this.expectedOrientations = expectedOrientations;
this.isBisulfiteSequenced = isBisulfiteSequenced;
setup(accumulationLevels, samRgRecords);
}
@@ -72,58 +90,6 @@ public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMulti
}
}
- /** Converts the supplied adapter sequences to byte arrays in both fwd and rc. */
- private byte [][] prepareAdapterSequences() {
- final Set<String> kmers = new HashSet<String>();
-
- // Make a set of all kmers of adapterMatchLength
- for (final String seq : adapterSequence) {
- for (int i=0; i<=seq.length() - ADAPTER_MATCH_LENGTH; ++i) {
- final String kmer = seq.substring(i, i+ADAPTER_MATCH_LENGTH).toUpperCase();
-
- int ns = 0;
- for (final char ch : kmer.toCharArray()) if (ch == 'N') ++ns;
- if (ns <= MAX_ADAPTER_ERRORS) {
- kmers.add(kmer);
- kmers.add(SequenceUtil.reverseComplement(kmer));
- }
- }
- }
-
- // Make an array of byte[] for the kmers
- final byte [][] adapterKmers = new byte[kmers.size()][];
- int i=0;
- for (final String kmer : kmers) {
- adapterKmers[i++] = StringUtil.stringToBytes(kmer);
- }
- return adapterKmers;
- }
-
- /**
- * Checks the first ADAPTER_MATCH_LENGTH bases of the read against known adapter sequences and returns
- * true if the read matches an adapter sequence with MAX_ADAPTER_ERRORS mismsatches or fewer.
- *
- * @param read the basecalls for the read in the order and orientation the machine read them
- * @return true if the read matches an adapter and false otherwise
- */
- private boolean isAdapterSequence(final byte[] read) {
- if (read.length < ADAPTER_MATCH_LENGTH) return false;
-
- for (final byte[] adapter : adapterKmers) {
- int errors = 0;
-
- for (int i=0; i<adapter.length; ++i) {
- if (read[i] != adapter[i]) {
- if (++errors > MAX_ADAPTER_ERRORS) break;
- }
- }
-
- if (errors <= MAX_ADAPTER_ERRORS) return true;
- }
-
- return false;
- }
-
private class GroupAlignmentSummaryMetricsPerUnitMetricCollector implements PerUnitMetricCollector<AlignmentSummaryMetrics, Comparable<?>, SAMRecordAndReference> {
final IndividualAlignmentSummaryMetricsCollector unpairedCollector;
final IndividualAlignmentSummaryMetricsCollector firstOfPairCollector;
@@ -220,12 +186,12 @@ public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMulti
}
public void addRecord(final SAMRecord record, final ReferenceSequence ref) {
- if (record.isSecondaryOrSupplementary()) {
+ if (record.getNotPrimaryAlignmentFlag()) {
// only want 1 count per read so skip non primary alignments
return;
}
- collectReadData(record, ref);
+ collectReadData(record);
collectQualityData(record, ref);
}
@@ -260,7 +226,10 @@ public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMulti
}
}
- private void collectReadData(final SAMRecord record, final ReferenceSequence ref) {
+ private void collectReadData(final SAMRecord record) {
+ // NB: for read count metrics, do not include supplementary records, but for base count metrics, do include supplementary records.
+ if (record.getSupplementaryAlignmentFlag()) return;
+
metrics.TOTAL_READS++;
readLengthHistogram.increment(record.getReadBases().length);
@@ -273,14 +242,13 @@ public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMulti
final byte[] readBases = record.getReadBases();
if (!(record instanceof BAMRecord)) StringUtil.toUpperCase(readBases);
- if (isAdapterSequence(readBases)) {
+ if (adapterUtility.isAdapterSequence(readBases)) {
this.adapterReads++;
}
}
else if(doRefMetrics) {
metrics.PF_READS_ALIGNED++;
if (!record.getReadNegativeStrandFlag()) numPositiveStrand++;
-
if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
metrics.READS_ALIGNED_IN_PAIRS++;
@@ -290,18 +258,26 @@ public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMulti
++this.chimerasDenominator;
// With both reads mapped we can see if this pair is chimeric
- if (Math.abs(record.getInferredInsertSize()) > maxInsertSize ||
- !record.getReferenceIndex().equals(record.getMateReferenceIndex())) {
+ if (ChimeraUtil.isChimeric(record, maxInsertSize, expectedOrientations)) {
++this.chimeras;
}
}
}
+ else { // fragment reads or read pairs with one end that maps
+ // Consider chimeras that occur *within* the read using the SA tag
+ if (record.getMappingQuality() >= MAPPING_QUALITY_THRESOLD) {
+ ++this.chimerasDenominator;
+ if (record.getAttribute("SA") != null) ++this.chimeras;
+ }
+ }
}
}
}
private void collectQualityData(final SAMRecord record, final ReferenceSequence reference) {
- // If the read isnt an aligned PF read then look at the read for no-calls
+ // NB: for read count metrics, do not include supplementary records, but for base count metrics, do include supplementary records.
+
+ // If the read isn't an aligned PF read then look at the read for no-calls
if (record.getReadUnmappedFlag() || record.getReadFailsVendorQualityCheckFlag() || !doRefMetrics) {
final byte[] readBases = record.getReadBases();
for (int i = 0; i < readBases.length; i++) {
@@ -312,7 +288,7 @@ public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMulti
}
else if (!record.getReadFailsVendorQualityCheckFlag()) {
final boolean highQualityMapping = isHighQualityMapping(record);
- if (highQualityMapping) metrics.PF_HQ_ALIGNED_READS++;
+ if (highQualityMapping && !record.getSupplementaryAlignmentFlag()) metrics.PF_HQ_ALIGNED_READS++;
final byte[] readBases = record.getReadBases();
final byte[] refBases = reference.getBases();
diff --git a/src/java/picard/analysis/BaseDistributionByCycleMetrics.java b/src/java/picard/analysis/BaseDistributionByCycleMetrics.java
index 8884f06..2a9c9f6 100644
--- a/src/java/picard/analysis/BaseDistributionByCycleMetrics.java
+++ b/src/java/picard/analysis/BaseDistributionByCycleMetrics.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package picard.analysis;
import htsjdk.samtools.metrics.MetricBase;
diff --git a/src/java/picard/analysis/ChimeraUtil.java b/src/java/picard/analysis/ChimeraUtil.java
new file mode 100644
index 0000000..2235ccd
--- /dev/null
+++ b/src/java/picard/analysis/ChimeraUtil.java
@@ -0,0 +1,79 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SamPairUtil;
+import htsjdk.samtools.SamPairUtil.PairOrientation;
+
+import java.util.EnumSet;
+import java.util.Set;
+
+public class ChimeraUtil {
+ public static int DEFAULT_INSERT_SIZE_LIMIT = 100000;
+ public static Set<PairOrientation> DEFAULT_EXPECTED_ORIENTATIONS = EnumSet.of(PairOrientation.FR);
+
+ /**
+ * Checks whether the given read is part of a chimeric pair.
+ * Note that this method returns false if the read is unpaired or if either end of the pair is unmapped.
+ *
+ * @param rec the read
+ * @param maxInsertSize max insert size to be considered non-chimeric
+ * @param expectedOrientations set of orientations that are not chimeric; must not ne null
+ * @return true if this record is part of a chimeric read pair, false otherwise
+ */
+ public static boolean isChimeric(final SAMRecord rec, final int maxInsertSize, final Set<PairOrientation> expectedOrientations) {
+ return isMappedPair(rec) && // the read pair needs to be mapped and...
+ (Math.abs(rec.getInferredInsertSize()) > maxInsertSize || // either far apart on the same contig
+ !rec.getReferenceIndex().equals(rec.getMateReferenceIndex()) || // or on different contigs
+ !matchesExpectedOrientations(rec, expectedOrientations)); // or in unexpected orientations
+ }
+
+ /**
+ * Checks whether the given read is part of a chimeric pair.
+ * Note that this method returns false if either end of the pair is unmapped.
+ *
+ * @param r1 first read of the pair
+ * @param r2 second read of the pair
+ * @param maxInsertSize max insert size to be considered non-chimeric
+ * @param expectedOrientations set of orientations that are not chimeric
+ * @return true if this pair is chimeric, false otherwise
+ */
+ public static boolean isChimeric(final SAMRecord r1, final SAMRecord r2, final int maxInsertSize, final Set<PairOrientation> expectedOrientations) {
+ return isMappedPair(r1) && // the read pair needs to be mapped and...
+ (Math.abs(r1.getInferredInsertSize()) > maxInsertSize || // either far apart on the same contig
+ !r1.getReferenceIndex().equals(r2.getReferenceIndex()) || // or on different contigs
+ !matchesExpectedOrientations(r1, expectedOrientations) || // or in unexpected orientations
+ r2.getAttribute("SA") != null); // (another check for an unexpected orientation here)
+ }
+
+ private static boolean isMappedPair(final SAMRecord rec) {
+ return rec.getReadPairedFlag() && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag();
+ }
+
+ private static boolean matchesExpectedOrientations(final SAMRecord rec, final Set<PairOrientation> expectedOrientations) {
+ return expectedOrientations.contains(SamPairUtil.getPairOrientation(rec)) && rec.getAttribute("SA") == null;
+ }
+}
diff --git a/src/java/picard/analysis/CollectAlignmentSummaryMetrics.java b/src/java/picard/analysis/CollectAlignmentSummaryMetrics.java
index 5398e00..db41884 100644
--- a/src/java/picard/analysis/CollectAlignmentSummaryMetrics.java
+++ b/src/java/picard/analysis/CollectAlignmentSummaryMetrics.java
@@ -27,6 +27,7 @@ package picard.analysis;
import htsjdk.samtools.Defaults;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SamPairUtil.PairOrientation;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.CollectionUtil;
@@ -36,9 +37,9 @@ import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.Metrics;
-import picard.util.IlluminaUtil;
import java.io.File;
+import java.util.EnumSet;
import java.util.List;
import java.util.Set;
@@ -60,6 +61,14 @@ import java.util.Set;
* <li>Strand balance - reads mapped to positive strand / total mapped reads</li>
* </ul>
* Metrics are written for the first read of a pair, the second read, and combined for the pair.
+ *
+ * Chimeras are identified if any of the of following criteria are met:
+ * <ul>
+ * <li>the insert size is larger than MAX_INSERT_SIZE</li>
+ * <li>the ends of a pair map to different contigs</li>
+ * <li>the paired end orientation is different that the expected orientation</li>
+ * <li>the read contains an SA tag (chimeric alignment)</li>
+ * </ul>
*
* @author Doug Voet (dvoet at broadinstitute dot org)
*/
@@ -69,33 +78,33 @@ import java.util.Set;
programGroup = Metrics.class
)
public class CollectAlignmentSummaryMetrics extends SinglePassSamProgram {
- static final String USAGE_SUMMARY = "Produces a file containing summary alignment metrics from a SAM or BAM.";
- static final String USAGE_DETAILS = "<br />" +
+ static final String USAGE_SUMMARY = "Produce a summary of alignment metrics from a SAM or BAM file";
+ static final String USAGE_DETAILS = "Using read outputs from high throughput sequencing (HTS) technologies, this tool provides " +
+ "metrics regarding the quality of read alignments to a reference sequence, as well as the proportion of the reads " +
+ "that passed machine signal-to-noise threshold quality filters (Illumina)."+
"<h4>Usage example:</h4>" +
"<pre>" +
- " java -jar picard.jar CollectAlignmentMetrics \\<br />" +
- " R=reference.fasta \\<br />" +
- " I=input.bam \\<br />" +
- " O=output.txt" +
- "</pre>" +
+ " java -jar picard.jar CollectAlignmentSummaryMetrics \\<br />" +
+ " R=reference_sequence.fasta \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=output.txt" +
+ "</pre>"+
+ "Please see <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#AlignmentSummaryMetrics'>" +
+ "the AlignmentSummaryMetrics documentation</a> for detailed explanations of each metric. <br /> <br />" +
+ "Additional information about Illumina's quality filters can be found in the following documents on the Illumina website: " +
+ "<ul><li>http://support.illumina.com/content/dam/illumina-marketing/documents/products/technotes/hiseq-x-percent-pf-technical-note-770-2014-043.pdf</li> " +
+ "<li>http://support.illumina.com/content/dam/illumina-support/documents/documentation/system_documentation/hiseqx/hiseq-x-system-guide-15050091-d.pdf</li></ul>" +
"<hr />";
-
private static final Log log = Log.getInstance(CollectAlignmentSummaryMetrics.class);
- // Usage and parameters
+ @Option(doc="Paired-end reads above this insert size will be considered chimeric along with inter-chromosomal pairs.")
+ public int MAX_INSERT_SIZE = ChimeraUtil.DEFAULT_INSERT_SIZE_LIMIT;
- @Option(doc="Paired end reads above this insert size will be considered chimeric along with inter-chromosomal pairs.")
- public int MAX_INSERT_SIZE = 100000;
+ @Option(doc="Paired-end reads that do not have this expected orientation will be considered chimeric.")
+ public Set<PairOrientation> EXPECTED_PAIR_ORIENTATIONS = EnumSet.copyOf(ChimeraUtil.DEFAULT_EXPECTED_ORIENTATIONS);
@Option(doc="List of adapter sequences to use when processing the alignment metrics")
- public List<String> ADAPTER_SEQUENCE = CollectionUtil.makeList(
- IlluminaUtil.IlluminaAdapterPair.SINGLE_END.get5PrimeAdapter(),
- IlluminaUtil.IlluminaAdapterPair.SINGLE_END.get3PrimeAdapter(),
- IlluminaUtil.IlluminaAdapterPair.PAIRED_END.get5PrimeAdapter(),
- IlluminaUtil.IlluminaAdapterPair.PAIRED_END.get3PrimeAdapter(),
- IlluminaUtil.IlluminaAdapterPair.INDEXED.get5PrimeAdapter(),
- IlluminaUtil.IlluminaAdapterPair.INDEXED.get3PrimeAdapter()
- );
+ public List<String> ADAPTER_SEQUENCE = AdapterUtility.DEFAULT_ADAPTER_SEQUENCE;
@Option(shortName="LEVEL", doc="The level(s) at which to accumulate metrics. ")
public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);
@@ -104,7 +113,7 @@ public class CollectAlignmentSummaryMetrics extends SinglePassSamProgram {
public boolean IS_BISULFITE_SEQUENCED = false;
//overridden to make it visible on the commandline and to change the doc.
- @Option(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc = "Reference sequence file. Note that while this argument isn't required, without it only a small subset of the metrics will be calculated.", optional = true, overridable = true)
+ @Option(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc = "Reference sequence file. Note that while this argument isn't required, without it only a small subset of the metrics will be calculated. Note also that if a reference sequence is provided, it must be accompanied by a sequence dictionary.", optional = true, overridable = true)
public File REFERENCE_SEQUENCE = Defaults.REFERENCE_FASTA;
private AlignmentSummaryMetricsCollector collector;
@@ -122,12 +131,12 @@ public class CollectAlignmentSummaryMetrics extends SinglePassSamProgram {
if (header.getSequenceDictionary().isEmpty()) {
log.warn(INPUT.getAbsoluteFile() + " has no sequence dictionary. If any reads " +
- "in the file are aligned then alignment summary metrics collection will fail.");
+ "in the file are aligned, then alignment summary metrics collection will fail.");
}
final boolean doRefMetrics = REFERENCE_SEQUENCE != null;
collector = new AlignmentSummaryMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), doRefMetrics,
- ADAPTER_SEQUENCE, MAX_INSERT_SIZE, IS_BISULFITE_SEQUENCED);
+ ADAPTER_SEQUENCE, MAX_INSERT_SIZE, EXPECTED_PAIR_ORIENTATIONS, IS_BISULFITE_SEQUENCED);
}
@Override protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
diff --git a/src/java/picard/analysis/CollectBaseDistributionByCycle.java b/src/java/picard/analysis/CollectBaseDistributionByCycle.java
index fb3bacd..7a5af91 100644
--- a/src/java/picard/analysis/CollectBaseDistributionByCycle.java
+++ b/src/java/picard/analysis/CollectBaseDistributionByCycle.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package picard.analysis;
import htsjdk.samtools.SAMFileHeader;
@@ -19,20 +43,50 @@ import picard.cmdline.Option;
import picard.cmdline.programgroups.Metrics;
import picard.util.RExecutor;
+
+
@CommandLineProgramProperties(
- usage = "Program to chart the nucleotide distribution per cycle in a SAM or BAM file.",
- usageShort = "Program to chart the nucleotide distribution per cycle in a SAM or BAM file.",
+ usage = CollectBaseDistributionByCycle.USAGE_SUMMARY + CollectBaseDistributionByCycle.USAGE_DETAILS,
+ usageShort = CollectBaseDistributionByCycle.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectBaseDistributionByCycle extends SinglePassSamProgram {
-
+ static final String USAGE_SUMMARY = "Chart the nucleotide distribution per cycle in a SAM or BAM file";
+ static final String USAGE_DETAILS = "This tool produces a chart of the nucleotide distribution per cycle in a SAM or BAM file " +
+ "in order to enable assessment of systematic errors at specific positions in the reads.<br /><br />" +
+ "" +
+ "<h4>Interpretation notes</h4>" +
+ "Increased numbers of miscalled bases will be reflected in base distribution changes and increases in the number of Ns. " +
+ "In general, we expect that for any given cycle, or position within reads, the relative proportions of A, T, C and G " +
+ "should reflect the AT:GC content of the organism's genome. Thus, for all four nucleotides, flattish lines would be " +
+ "expected. Deviations from this expectation, for example a spike of A at a particular cycle (position within reads), " +
+ "would suggest a systematic sequencing error."+
+ "" +
+ "<h4>Note on quality trimming</h4>" +
+ "In the past, many sequencing data processing workflows included discarding the low-quality tails of reads by applying " +
+ "hard-clipping at some arbitrary base quality threshold value. This is no longer useful because most sophisticated " +
+ "analysis tools (such as the GATK variant discovery tools) are quality-aware, meaning that they are able to take base " +
+ "quality into account when weighing evidence provided by sequencing reads. Unnecessary clipping may interfere with other " +
+ "quality control evaluations and may lower the quality of analysis results. For example, trimming reduces the " +
+ "effectiveness of the Base Recalibration (BQSR) pre-processing step of the " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/best-practices'>GATK Best Practices for Variant Discovery</a>, " +
+ "which aims to correct some types of systematic biases that affect the accuracy of base quality scores. " +
+ "" +
+ "<br /><h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectBaseDistributionByCycle \\<br />" +
+ " CHART=collect_base_dist_by_cycle.pdf \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=output.txt" +
+ "</pre>" +
+ "<hr />";
@Option(shortName = "CHART", doc = "A file (with .pdf extension) to write the chart to.")
public File CHART_OUTPUT;
@Option(doc = "If set to true, calculate the base distribution over aligned reads only.")
public boolean ALIGNED_READS_ONLY = false;
- @Option(doc = "If set to true calculate the base distribution over PF reads only.")
+ @Option(doc = "If set to true, calculate the base distribution over PF reads only (Illumina specific). PF reads are reads that passed the internal quality filters applied by Illumina sequencers.")
public boolean PF_READS_ONLY = false;
private HistogramGenerator hist;
diff --git a/src/java/picard/analysis/CollectGcBiasMetrics.java b/src/java/picard/analysis/CollectGcBiasMetrics.java
index 7909e2e..a122731 100644
--- a/src/java/picard/analysis/CollectGcBiasMetrics.java
+++ b/src/java/picard/analysis/CollectGcBiasMetrics.java
@@ -52,15 +52,64 @@ import java.util.Set;
* edited by Kylee Bergin
*/
@CommandLineProgramProperties(
- usage = "Tool to collect information about GC bias in the reads in a given BAM file. Computes" +
- " the number of windows (of size specified by SCAN_WINDOW_SIZE) in the genome at each GC%" +
- " and counts the number of read starts in each GC bin. What is output and plotted is" +
- " the \"normalized coverage\" in each bin - i.e. the number of reads per window normalized" +
- " to the average number of reads per window across the whole genome..\n",
- usageShort = "Collects information about GC bias in the reads in the provided SAM or BAM",
+ usage = CollectGcBiasMetrics.USAGE_SUMMARY + CollectGcBiasMetrics.USAGE_DETAILS,
+ usageShort = CollectGcBiasMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectGcBiasMetrics extends SinglePassSamProgram {
+
+ static final String USAGE_SUMMARY = "Collect metrics regarding GC bias. ";
+ static final String USAGE_DETAILS = "This tool collects information about the relative proportions of guanine (G) and cytosine (C)" +
+ " nucleotides in a sample. Regions of high and low G + C content have been shown to interfere with mapping/aligning," +
+ " ultimately leading to fragmented genome assemblies and poor coverage in a phenomenon known as \"GC bias\". " +
+ "Detailed information on the effects of GC bias on the collection and analysis of sequencing data can be found at " +
+ "DOI: 10.1371/journal.pone.0062856/.<br /><br />." +
+ "" +
+ "The GC bias statistics are always output in a detailed long-form version, but a summary can also be produced. Both the " +
+ "detailed metrics and the summary metrics are output as tables (\".txt\" files) and an accompanying chart that plots the " +
+ "data (\".pdf\" file). <br /><br /> " +
+ "" +
+ "<h4>Detailed metrics</h4>" +
+ "The table of detailed metrics includes GC percentages for each bin (GC), the percentage of WINDOWS corresponding to each " +
+ "GC bin of the reference sequence, the numbers of reads that start within a particular %GC content bin (READ_STARTS), and " +
+ "the mean base quality of the reads that correspond to a specific GC content distribution window (MEAN_BASE_QUALITY). " +
+ "NORMALIZED_COVERAGE is a relative measure of sequence coverage by the reads at a particular GC content." +
+ "" +
+ "For each run, the corresponding reference sequence is divided into bins or windows based on the percentage of G + C" +
+ " content ranging from 0 - 100%. The percentages of G + C are determined from a defined length of sequence; the default " +
+ "value is set at 100 bases. The mean of the distribution will vary among organisms; human DNA has a mean GC content " +
+ "of 40%, suggesting a slight preponderance of AT-rich regions. <br /><br />" +
+ "" +
+ "<h4>Summary metrics</h4>" +
+ "The table of summary metrics captures run-specific bias information including WINDOW_SIZE, ALIGNED_READS, TOTAL_CLUSTERS, " +
+ "AT_DROPOUT, and GC_DROPOUT. While WINDOW_SIZE refers to the numbers of bases used for the distribution (see above), the " +
+ "ALIGNED_READS and TOTAL_CLUSTERS are the total number of aligned reads and the total number of reads (after filtering) " +
+ "produced in a run. In addition, the tool produces both AT_DROPOUT and GC_DROPOUT metrics, which indicate the percentage of " +
+ "misaligned reads that correlate with low (%-GC is < 50%) or high (%-GC is > 50%) GC content respectively. <br /><br />" +
+ "" +
+ "The percentage of \"coverage\" or depth in a GC bin is calculated by dividing the number of reads of a particular GC content " +
+ "by the mean number of reads of all GC bins. A number of 1 represents mean coverage, a number less than 1 represents lower " +
+ "than mean coverage (e.g. 0.5 means half as much coverage as average) while a number greater than 1 represents higher than " +
+ "mean coverage (e.g. 3.1 means this GC bin has 3.1 times more reads per window than average). " +
+ "" +
+ "This tool also tracks mean base-quality scores of the reads within each GC content bin, enabling the user to determine " +
+ "how base quality scores vary with GC content. <br /> <br />"+
+ "" +
+ "The chart output associated with this data table plots the NORMALIZED_COVERAGE, the distribution of WINDOWs corresponding " +
+ "to GC percentages, and base qualities corresponding to each %GC bin."+
+ "" +
+ "<h4>Usage Example:</h4>"+
+ "<pre>" +
+ "java -jar picard.jar CollectGcBiasMetrics \\<br />"+
+ " I=input.bam \\<br />"+
+ " O=gc_bias_metrics.txt \\<br />"+
+ " CHART=gc_bias_metrics.pdf \\<br />"+
+ " S=summary_metrics.txt \\<br />"+
+ " R=reference_sequence.fasta"+
+ "</pre>"+
+ "Please see <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#GcBiasMetrics'>" +
+ "the GcBiasMetrics documentation</a> for further explanations of each metric." +
+ "<hr />";
/** The location of the R script to do the plotting. */
private static final String R_SCRIPT = "picard/analysis/gcBias.R";
diff --git a/src/java/picard/analysis/CollectInsertSizeMetrics.java b/src/java/picard/analysis/CollectInsertSizeMetrics.java
index 420cccd..4dccc0c 100644
--- a/src/java/picard/analysis/CollectInsertSizeMetrics.java
+++ b/src/java/picard/analysis/CollectInsertSizeMetrics.java
@@ -48,13 +48,36 @@ import java.util.Set;
* @author Doug Voet (dvoet at broadinstitute dot org)
*/
@CommandLineProgramProperties(
- usage = "Reads a SAM or BAM file and writes a file containing metrics about " +
- "the statistical distribution of insert size (excluding duplicates) " +
- "and generates a Histogram plot.",
- usageShort = "Writes insert size distribution metrics for a SAM or BAM file",
+ usage = CollectInsertSizeMetrics.USAGE_SUMMARY + CollectInsertSizeMetrics.USAGE_BRIEF,
+ usageShort = CollectInsertSizeMetrics.USAGE_BRIEF,
programGroup = Metrics.class
)
public class CollectInsertSizeMetrics extends SinglePassSamProgram {
+ static final String USAGE_BRIEF = "Collect metrics about the insert size distribution of a paired-end library.";
+ static final String USAGE_SUMMARY = "This tool provides useful metrics for validating library construction including " +
+ "the insert size distribution and read orientation of paired-end libraries. <br /><br />" +
+ "" +
+ "The expected proportions of these metrics vary depending on the type of library preparation used, resulting from " +
+ "technical differences between pair-end libraries and mate-pair libraries. For a brief primer on paired-end sequencing and mate-pair reads, see <a href='http://gatkforums.broadinstitute.org/discussion/6327/paired-end-mate-pair'>the GATK Dictionary</a>" +
+ "" +
+ "<br /><br />The CollectInsertSizeMetrics tool outputs the percentages of read pairs in each of the three orientations " +
+ "(FR, RF, and TANDEM) as a histogram. In addition, the insert size distribution is output as both a histogram " +
+ "(.insert_size_Histogram.pdf) and as a data table (.insert_size_metrics.txt)." +
+ ""+
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectInsertSizeMetrics \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=insert_size_metrics.txt \\<br />" +
+ " H=insert_size_histogram.pdf \\<br />" +
+ " M=0.5" +
+ "</pre>" +
+ "Note: If processing a small file, set the minimum percentage option (M) to 0.5, otherwise an error may occur. "+
+ "<br /><br />" +
+ "Please see <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#InsertSizeMetrics'>" +
+ "the InsertSizeMetrics documentation</a> for further explanations of each metric." +
+ "<hr />";
+
private static final Log log = Log.getInstance(CollectInsertSizeMetrics.class);
protected static final String Histogram_R_SCRIPT = "picard/analysis/insertSizeHistogram.R";
@@ -68,7 +91,7 @@ public class CollectInsertSizeMetrics extends SinglePassSamProgram {
@Option(shortName="W", doc="Explicitly sets the Histogram width, overriding automatic truncation of Histogram tail. " +
"Also, when calculating mean and standard deviation, only bins <= Histogram_WIDTH will be included.", optional=true)
- public Integer Histogram_WIDTH = null;
+ public Integer HISTOGRAM_WIDTH = null;
@Option(shortName="M", doc="When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this " +
"percentage of overall reads. (Range: 0 to 1).")
@@ -77,6 +100,9 @@ public class CollectInsertSizeMetrics extends SinglePassSamProgram {
@Option(shortName="LEVEL", doc="The level(s) at which to accumulate metrics. ")
public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);
+ @Option(doc="If true, also include reads marked as duplicates in the insert size histogram.")
+ public boolean INCLUDE_DUPLICATES = false;
+
// Calculates InsertSizeMetrics for all METRIC_ACCUMULATION_LEVELs provided
private InsertSizeMetricsCollector multiCollector;
@@ -109,7 +135,8 @@ public class CollectInsertSizeMetrics extends SinglePassSamProgram {
IOUtil.assertFileIsWritable(Histogram_FILE);
//Delegate actual collection to InsertSizeMetricCollector
- multiCollector = new InsertSizeMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(), MINIMUM_PCT, Histogram_WIDTH, DEVIATIONS);
+ multiCollector = new InsertSizeMetricsCollector(METRIC_ACCUMULATION_LEVEL, header.getReadGroups(),
+ MINIMUM_PCT, HISTOGRAM_WIDTH, DEVIATIONS, INCLUDE_DUPLICATES);
}
@Override protected void acceptRead(final SAMRecord record, final ReferenceSequence ref) {
@@ -133,7 +160,7 @@ public class CollectInsertSizeMetrics extends SinglePassSamProgram {
file.write(OUTPUT);
final int rResult;
- if(Histogram_WIDTH == null) {
+ if(HISTOGRAM_WIDTH == null) {
rResult = RExecutor.executeFromClasspath(
Histogram_R_SCRIPT,
OUTPUT.getAbsolutePath(),
@@ -145,7 +172,7 @@ public class CollectInsertSizeMetrics extends SinglePassSamProgram {
OUTPUT.getAbsolutePath(),
Histogram_FILE.getAbsolutePath(),
INPUT.getName(),
- String.valueOf( Histogram_WIDTH ) ); //Histogram_WIDTH is passed because R automatically sets Histogram width to the last
+ String.valueOf(HISTOGRAM_WIDTH) ); //Histogram_WIDTH is passed because R automatically sets Histogram width to the last
//bin that has data, which may be less than Histogram_WIDTH and confuse the user.
}
diff --git a/src/java/picard/analysis/CollectJumpingLibraryMetrics.java b/src/java/picard/analysis/CollectJumpingLibraryMetrics.java
index a54035a..2b7a295 100644
--- a/src/java/picard/analysis/CollectJumpingLibraryMetrics.java
+++ b/src/java/picard/analysis/CollectJumpingLibraryMetrics.java
@@ -55,13 +55,33 @@ import java.util.List;
* @author ktibbett at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = "Computes jumping library metrics. Gets all data for computation from the first" +
- "read in each pair and assumes that the MQ tag is set with the mate's mapping quality. If the " +
- "MQ tag is not set, then the program assumes that the mate's mapping quality is >= MINIMUM_MAPPING_QUALITY",
- usageShort = "Produces jumping library metrics for the provided SAM/BAMs",
+ usage = CollectJumpingLibraryMetrics.USAGE_SUMMARY + CollectJumpingLibraryMetrics.USAGE_DETAILS,
+ usageShort = CollectJumpingLibraryMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectJumpingLibraryMetrics extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Collect jumping library metrics. ";
+ static final String USAGE_DETAILS = "This tool collects high-level metrics about the " +
+ "presence of outward-facing (jumping) and inward-facing (non-jumping) read pairs within a SAM or BAM file.<br /><br />" +
+ "For a brief primer on jumping libraries, see <a href='http://gatkforums.broadinstitute.org/discussion/6326/jumping-libraries'>" +
+ "the GATK Dictionary</a>." +
+ "<br /><br />." +
+ "This program gets all data for computation from the first read in each pair in which the mapping quality (MQ) tag " +
+ "is set with the mate's mapping quality. If the MQ tag is not set, then the program assumes that the mate's MQ is " +
+ "greater than or equal to MINIMUM_MAPPING_QUALITY (default value is 0).<br /><br /> "+
+ "All the output metrics files are structured text files." +
+ "<br /><br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectJumpingLibraryMetrics \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=jumping_metrics.txt" +
+ "</pre>" +
+ "<hr />" +
+ "" +
+ "Please see <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#JumpingLibraryMetrics'>" +
+ "the JumpingLibraryMetrics documentation</a> for details and explanations of the output metrics.";
+
// Usage and parameters
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "BAM file(s) of reads with duplicates marked")
diff --git a/src/java/picard/analysis/CollectMultipleMetrics.java b/src/java/picard/analysis/CollectMultipleMetrics.java
index 79da4b2..c0694f4 100644
--- a/src/java/picard/analysis/CollectMultipleMetrics.java
+++ b/src/java/picard/analysis/CollectMultipleMetrics.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package picard.analysis;
import htsjdk.samtools.util.CollectionUtil;
@@ -21,10 +45,9 @@ import java.util.*;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Takes an input BAM and reference sequence and runs one or more Picard " +
- "metrics modules at the same time to cut down on I/O. Currently all programs are run with " +
- "default options and fixed output extensions, but this may become more flexible in future.",
- usageShort = "A \"meta-metrics\" calculating program that produces multiple metrics for the provided SAM/BAM",
+
+ usage = CollectMultipleMetrics.USAGE_SUMMARY + CollectMultipleMetrics.USAGE_DETAILS,
+ usageShort = CollectMultipleMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectMultipleMetrics extends CommandLineProgram {
@@ -33,6 +56,35 @@ public class CollectMultipleMetrics extends CommandLineProgram {
* This interface allows developers to create Programs to run in addition to the ones defined in the Program enum.
* Includes a method for determining whether or not a Program explicitly needs a reference sequence (i.e. cannot be null)
*/
+
+ static final String USAGE_SUMMARY ="Collect multiple classes of metrics. ";
+ static final String USAGE_DETAILS ="This \"meta-metrics\" tool runs one or more of the metrics collection modules at the same time to cut down " +
+ "on the time spent reading in data from input files. Available modules include CollectAlignmentSummaryMetrics, " +
+ "CollectInsertSizeMetrics, QualityScoreDistribution, MeanQualityByCycle, and CollectBaseDistributionByCycle. " +
+ "The tool produces outputs of \".pdf\" and \".txt\" files for each module, except for the CollectAlignmentSummaryMetrics " +
+ "module, which outputs only a \".txt\" file. Output files are named by specifying a base name (without any file extensions)." +
+ "<br /><br />" +
+ "" +
+ "Currently all programs are run with default options and fixed output extensions, " +
+ "but this may become more flexible in future. Specifying a reference sequence file is required." +
+ "<br />" +
+ "<h4>Usage example (all modules on by default):</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectMultipleMetrics \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=multiple_metrics \\<br />" +
+ " R=reference_sequence.fasta <br />" +
+ "</pre>" +
+ "<h4>Usage example (two modules only):</h4>" +
+ "java -jar picard.jar CollectMultipleMetrics \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=multiple_metrics \\<br />" +
+ " R=reference_sequence.fasta \\<br />" +
+ " PROGRAM=null \\<br />" +
+ " PROGRAM=QualityScoreDistribution \\<br />" +
+ " PROGRAM=MeanQualityByCycle "+
+ "</pre>" +
+ "<hr />";
public static interface ProgramInterface {
SinglePassSamProgram makeInstance(final String outbase, final File input, final File reference,
final Set<MetricAccumulationLevel> metricAccumulationLevel, final File dbSnp, final File intervals);
@@ -232,13 +284,33 @@ public class CollectMultipleMetrics extends CommandLineProgram {
program.REFERENCE_SEQUENCE = reference;
return program;
}
+ },
+ CollectQualityYieldMetrics {
+ @Override
+ public boolean needsReferenceSequence() {
+ return false;
+ }
+ @Override
+ public boolean supportsMetricAccumulationLevel() {
+ return false;
+ }
+ @Override
+ public SinglePassSamProgram makeInstance(final String outbase, final File input, final File reference, final Set<MetricAccumulationLevel> metricAccumulationLevel, final File dbSnp, final File intervals) {
+ final CollectQualityYieldMetrics program = new CollectQualityYieldMetrics();
+ program.OUTPUT = new File(outbase + ".quality_yield_metrics");
+ // Generally programs should not be accessing these directly but it might make things smoother
+ // to just set them anyway. These are set here to make sure that in case of a the derived class
+ // overrides
+ program.INPUT = input;
+ program.REFERENCE_SEQUENCE = reference;
+ return program;
+ }
}
}
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input SAM or BAM file.")
public File INPUT;
-
@Option(doc = "If true (default), then the sort order in the header file will be ignored.",
shortName = StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME)
public boolean ASSUME_SORTED = true;
@@ -255,6 +327,9 @@ public class CollectMultipleMetrics extends CommandLineProgram {
@Option(shortName="LEVEL", doc="The level(s) at which to accumulate metrics.")
public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = new HashSet<MetricAccumulationLevel>(accumLevelDefault);
+ @Option(shortName = "EXT", doc="Append the given file extension to all metric file names (ex. OUTPUT.insert_size_metrics.EXT). None if null", optional=true)
+ public String FILE_EXTENSION = null;
+
@Option(doc = "List of metrics programs to apply during the pass through the SAM file.")
public List<Program> PROGRAM = CollectionUtil.makeList(Program.CollectAlignmentSummaryMetrics, Program.CollectBaseDistributionByCycle,
Program.CollectInsertSizeMetrics, Program.MeanQualityByCycle, Program.QualityScoreDistribution);
@@ -314,6 +389,9 @@ public class CollectMultipleMetrics extends CommandLineProgram {
}
final SinglePassSamProgram instance = program.makeInstance(OUTPUT, INPUT, REFERENCE_SEQUENCE, METRIC_ACCUMULATION_LEVEL, DB_SNP, INTERVALS);
+ // Add a file extension if desired
+ if (null != FILE_EXTENSION && !FILE_EXTENSION.isEmpty()) instance.OUTPUT = new File(instance.OUTPUT.getAbsolutePath() + FILE_EXTENSION);
+
// Generally programs should not be accessing these directly but it might make things smoother
// to just set them anyway
instance.INPUT = INPUT;
diff --git a/src/java/picard/analysis/CollectOxoGMetrics.java b/src/java/picard/analysis/CollectOxoGMetrics.java
index 3055ffc..920b21f 100644
--- a/src/java/picard/analysis/CollectOxoGMetrics.java
+++ b/src/java/picard/analysis/CollectOxoGMetrics.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package picard.analysis;
import htsjdk.samtools.SAMReadGroupRecord;
@@ -41,13 +65,30 @@ import static java.lang.Math.log10;
* Class for trying to quantify the CpCG->CpCA error rate.
*/
@CommandLineProgramProperties(
- usage = CollectOxoGMetrics.USAGE,
- usageShort = CollectOxoGMetrics.USAGE,
+ usage = CollectOxoGMetrics.USAGE_SUMMARY + CollectOxoGMetrics.USAGE_DETAILS,
+ usageShort = CollectOxoGMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectOxoGMetrics extends CommandLineProgram {
- static final String USAGE = "Collects metrics quantifying the CpCG -> CpCA error rate from the provided SAM/BAM";
-
+ static final String USAGE_SUMMARY = "Collect metrics to assess oxidative artifacts.";
+ static final String USAGE_DETAILS = "This tool collects metrics quantifying the error rate resulting from oxidative artifacts. " +
+ "For a brief primer on oxidative artifacts, see " +
+ "<a href='http://gatkforums.broadinstitute.org/discussion/6328/oxog-oxidative-artifacts'>" +
+ "the GATK Dictionary</a>." +
+ "<br /><br />" +
+ "This tool calculates the Phred-scaled probability that an alternate base call results from an oxidation artifact. This " +
+ "probability score is based on base context, sequencing read orientation, and the characteristic low allelic frequency " +
+ "(doi:10.1093/nar/gks1443). Lower probability values implicate artifacts resulting from 8-oxoguanine, while higher " +
+ "probability values suggest that an alternate base call is due to either some other type of artifact or is a real variant." +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectOxoGMetrics \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=oxoG_metrics.txt \\<br />" +
+ " R=reference_sequence.fasta" +
+ "</pre>" +
+ "" +
+ "<hr />";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
doc = "Input BAM file for analysis.")
public File INPUT;
diff --git a/src/java/picard/analysis/CollectQualityYieldMetrics.java b/src/java/picard/analysis/CollectQualityYieldMetrics.java
index 7c3cb8c..6d80eb8 100644
--- a/src/java/picard/analysis/CollectQualityYieldMetrics.java
+++ b/src/java/picard/analysis/CollectQualityYieldMetrics.java
@@ -24,11 +24,13 @@
package picard.analysis;
+import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
@@ -45,22 +47,36 @@ import java.io.File;
*
* @author Martha Borkan
*/
+
+
@CommandLineProgramProperties(
- usage = "Collects quality yield metrics, a set of metrics that quantify the quality and yield of sequence data from a " +
- "SAM/BAM input file. Note that the default behaviour of this program changed as of November 6th 2015 to no longer " +
- "include secondary and supplemental alignments in the computation.",
- usageShort = "Collects a set of metrics that quantify the quality and yield of sequence data from the provided SAM/BAM",
+ usage = CollectQualityYieldMetrics.USAGE_SUMMARY + CollectQualityYieldMetrics.USAGE_DETAILS,
+ usageShort = CollectQualityYieldMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
-public class CollectQualityYieldMetrics extends CommandLineProgram {
-
- @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
- doc = "A SAM or BAM file to process.")
- public File INPUT;
-
- @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME,
- doc = "The metrics file to write with quality yield metrics.")
- public File OUTPUT;
+public class CollectQualityYieldMetrics extends SinglePassSamProgram {
+ static final String USAGE_SUMMARY = "Collect metrics about reads that pass quality thresholds and Illumina-specific filters. ";
+ static final String USAGE_DETAILS = "This tool evaluates the overall quality of reads within a bam file containing one read group. " +
+ "The output indicates the total numbers of bases within a read group that pass a minimum base quality score threshold and " +
+ "(in the case of Illumina data) pass Illumina quality filters as described in the <a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>GATK Dictionary entry</a>. " +
+ "<br />" +
+ "<h4>Note on base quality score options</h4>" +
+ "If the quality score of read bases has been modified in a previous data processing step such as " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/article?id=44'>GATK Base Recalibration</a> " +
+ "and an OQ tag is available, this tool can be set to use the OQ value instead of the primary quality value for the evaluation. " +
+ "<br /><br />" +
+ "Note that the default behaviour of this program changed as of November 6th 2015 to no longer include secondary and " +
+ "supplemental alignments in the computation. <br />" +
+ "<h4>Usage Example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectQualityYieldMetrics \\<br /> " +
+ " I=input.bam \\<br /> "+
+ " O=quality_yield_metrics.txt \\<br />" +
+ "</pre>" +
+ "Please see " +
+ "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectQualityYieldMetrics.QualityYieldMetrics'>" +
+ "the QualityYieldMetrics documentation</a> for details and explanations of the output metrics." +
+ "<hr />";
@Option(shortName = StandardOptionDefinitions.USE_ORIGINAL_QUALITIES_SHORT_NAME,
doc = "If available in the OQ tag, use the original quality scores " +
@@ -75,79 +91,64 @@ public class CollectQualityYieldMetrics extends CommandLineProgram {
"of bases if there are supplemental alignments in the input file.")
public boolean INCLUDE_SUPPLEMENTAL_ALIGNMENTS = false;
- /** Stock main method for a command line program. */
- public static void main(final String[] argv) {
- new CollectQualityYieldMetrics().instanceMainWithExit(argv);
- }
+ // The metrics to be accumulated
+ private final QualityYieldMetrics metrics = new QualityYieldMetrics();
+
+ /** Ensure that we get all reads regardless of alignment status. */
+ @Override protected boolean usesNoRefReads() { return true; }
- /**
- * Main method for the program. Checks that all input files are present and
- * readable and that the output file can be written to. Then iterates through
- * all the records accumulating metrics. Finally writes metrics file
- */
- protected int doWork() {
- final Log log = Log.getInstance(getClass());
- final ProgressLogger progress = new ProgressLogger(log);
-
- // Some quick parameter checking
- IOUtil.assertFileIsReadable(INPUT);
+ @Override
+ protected void setup(final SAMFileHeader header, final File samFile) {
IOUtil.assertFileIsWritable(OUTPUT);
+ }
- log.info("Reading input file and calculating metrics.");
+ @Override
+ protected void acceptRead(final SAMRecord rec, final ReferenceSequence ref) {
+ if (!INCLUDE_SECONDARY_ALIGNMENTS && rec.getNotPrimaryAlignmentFlag()) return;
+ if (!INCLUDE_SUPPLEMENTAL_ALIGNMENTS && rec.getSupplementaryAlignmentFlag()) return;
- final SamReader sam = SamReaderFactory.makeDefault().open(INPUT);
+ final int length = rec.getReadLength();
+ metrics.TOTAL_READS++;
+ metrics.TOTAL_BASES += length;
- final MetricsFile<QualityYieldMetrics, Integer> metricsFile = getMetricsFile();
- final QualityYieldMetrics metrics = new QualityYieldMetrics();
+ final boolean isPfRead = !rec.getReadFailsVendorQualityCheckFlag();
+ if (isPfRead) {
+ metrics.PF_READS++;
+ metrics.PF_BASES += length;
+ }
- for (final SAMRecord rec : sam) {
- if (!INCLUDE_SECONDARY_ALIGNMENTS && rec.getNotPrimaryAlignmentFlag()) continue;
- if (!INCLUDE_SUPPLEMENTAL_ALIGNMENTS && rec.getSupplementaryAlignmentFlag()) continue;
+ final byte[] quals;
+ if (USE_ORIGINAL_QUALITIES) {
+ byte[] tmp = rec.getOriginalBaseQualities();
+ if (tmp == null) tmp = rec.getBaseQualities();
+ quals = tmp;
+ } else {
+ quals = rec.getBaseQualities();
+ }
- metrics.TOTAL_READS++;
- final int length = rec.getReadLength();
+ // add up quals, and quals >= 20
+ for (final int qual : quals) {
+ metrics.Q20_EQUIVALENT_YIELD += qual;
+ if (qual >= 20) metrics.Q20_BASES++;
+ if (qual >= 30) metrics.Q30_BASES++;
- final boolean isPfRead = !rec.getReadFailsVendorQualityCheckFlag();
if (isPfRead) {
- metrics.PF_READS++;
- metrics.PF_BASES += length;
+ metrics.PF_Q20_EQUIVALENT_YIELD += qual;
+ if (qual >= 20) metrics.PF_Q20_BASES++;
+ if (qual >= 30) metrics.PF_Q30_BASES++;
}
-
- metrics.TOTAL_BASES += length;
-
- final byte[] quals;
- if (USE_ORIGINAL_QUALITIES) {
- byte[] tmp = rec.getOriginalBaseQualities();
- if (tmp == null) tmp = rec.getBaseQualities();
- quals = tmp;
- } else {
- quals = rec.getBaseQualities();
- }
-
- // add up quals, and quals >= 20
- for (int i = 0; i < quals.length; ++i) {
- metrics.Q20_EQUIVALENT_YIELD += quals[i];
- if (quals[i] >= 20) metrics.Q20_BASES++;
- if (quals[i] >= 30) metrics.Q30_BASES++;
-
- if (isPfRead) {
- metrics.PF_Q20_EQUIVALENT_YIELD += quals[i];
- if (quals[i] >= 20) metrics.PF_Q20_BASES++;
- if (quals[i] >= 30) metrics.PF_Q30_BASES++;
- }
- }
-
- progress.record(rec);
}
+ }
+ @Override
+ protected void finish() {
+ final MetricsFile<QualityYieldMetrics, Integer> metricsFile = getMetricsFile();
metrics.READ_LENGTH = metrics.TOTAL_READS == 0 ? 0 : (int) (metrics.TOTAL_BASES / metrics.TOTAL_READS);
metrics.Q20_EQUIVALENT_YIELD = metrics.Q20_EQUIVALENT_YIELD / 20;
metrics.PF_Q20_EQUIVALENT_YIELD = metrics.PF_Q20_EQUIVALENT_YIELD / 20;
metricsFile.addMetric(metrics);
metricsFile.write(OUTPUT);
-
- return 0;
}
/** A set of metrics used to describe the general quality of a BAM file */
@@ -174,10 +175,10 @@ public class CollectQualityYieldMetrics extends CommandLineProgram {
/** The number of bases in PF reads that achieve quality score 20 or higher */
public long PF_Q20_BASES = 0;
- /** The number of bases in all reads that achieve quality score 20 or higher */
+ /** The number of bases in all reads that achieve quality score 30 or higher */
public long Q30_BASES = 0;
- /** The number of bases in PF reads that achieve quality score 20 or higher */
+ /** The number of bases in PF reads that achieve quality score 30 or higher */
public long PF_Q30_BASES = 0;
/** The sum of quality scores of all bases divided by 20 */
diff --git a/src/java/picard/analysis/CollectRawWgsMetrics.java b/src/java/picard/analysis/CollectRawWgsMetrics.java
index d6305fc..1d81529 100644
--- a/src/java/picard/analysis/CollectRawWgsMetrics.java
+++ b/src/java/picard/analysis/CollectRawWgsMetrics.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package picard.analysis;
import picard.cmdline.CommandLineProgramProperties;
@@ -5,18 +29,48 @@ import picard.cmdline.Option;
import picard.cmdline.programgroups.Metrics;
/**
- * Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing experiments, same implementation as CollectWgsMetrics, with different defaults: lacks baseQ and mappingQ filters and has much higher coverage cap.
+ * Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing
+ * experiments, same implementation as CollectWgsMetrics, with different defaults: lacks baseQ and mappingQ filters
+ * and has much higher coverage cap.
*
* @author farjoun
*/
@CommandLineProgramProperties(
- usage = "Computes a number of metrics that are useful for evaluating coverage and performance of " +
- "whole genome sequencing experiments. Defaults are different than for CollectWgsMetrics.",
- usageShort = "Writes whole genome sequencing-related metrics for a SAM or BAM file",
+ usage = CollectRawWgsMetrics.USAGE_SUMMARY + CollectRawWgsMetrics.USAGE_DETAILS,
+ usageShort = CollectRawWgsMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectRawWgsMetrics extends CollectWgsMetrics{
-
+ static final String USAGE_SUMMARY = "Collect whole genome sequencing-related metrics. ";
+ static final String USAGE_DETAILS = "This tool computes metrics that are useful for evaluating coverage and performance " +
+ "of whole genome sequencing experiments. These metrics include the percentages of reads that pass" +
+ " minimal base- and mapping- quality filters as well as coverage (read-depth) levels. " +
+ "<br /><br /> " +
+ "The histogram output is optional and for a given run, displays two separate outputs on the y-axis while using a single set" +
+ " of values for the x-axis. Specifically, the first column in the histogram table (x-axis) is labeled \"coverage\" and " +
+ "represents different possible coverage depths. However, it also represents the range of values for the base quality scores " +
+ "and thus should probably be labeled \"sequence depth and base quality scores\". The second and third columns (y-axes) " +
+ "correspond to the numbers of bases at a specific sequence depth \"count\" and the numbers of bases at a particular base " +
+ "quality score \"baseq_count\" respectively." +
+ "<br /><br />" +
+ "Although similar to the CollectWgsMetrics tool, the default thresholds for CollectRawWgsMetrics are less stringent. " +
+ "For example, the CollectRawWgsMetrics have base and mapping quality score thresholds set to \"3\" and \"0\" respectively, " +
+ "while the CollectWgsMetrics tool has the default threshold values set to \"20\" (at time of writing). Nevertheless, both " +
+ "tools enable the user to input specific threshold values." +
+ "" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectRawWgsMetrics \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=raw_wgs_metrics.txt \\<br />" +
+ " R=reference_sequence.fasta \\<br />" +
+ " INCLUDE_BQ_HISTOGRAM=true" +
+ "</pre>" +
+ "<hr />" +
+ "Please see " +
+ "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics'>" +
+ "the WgsMetrics documentation</a> for detailed explanations of the output metrics." +
+ "<hr />";
@Option(shortName="MQ", doc="Minimum mapping quality for a read to contribute coverage.")
public int MINIMUM_MAPPING_QUALITY = 0;
@@ -26,6 +80,9 @@ public class CollectRawWgsMetrics extends CollectWgsMetrics{
@Option(shortName="CAP", doc="Treat bases with coverage exceeding this value as if they had coverage at this value.")
public int COVERAGE_CAP = 100000;
+ @Option(doc="At positions with coverage exceeding this value, completely ignore reads that accumulate beyond this value (so that they will not be considered for PCT_EXC_CAPPED). Used to keep memory consumption in check, but could create bias if set too low")
+ public int LOCUS_ACCUMULATION_CAP = 200000;
+
// rename the class so that in the metric file it is annotated differently.
public static class RawWgsMetrics extends WgsMetrics {}
diff --git a/src/java/picard/analysis/CollectRrbsMetrics.java b/src/java/picard/analysis/CollectRrbsMetrics.java
index 16d8eff..f99bc0c 100644
--- a/src/java/picard/analysis/CollectRrbsMetrics.java
+++ b/src/java/picard/analysis/CollectRrbsMetrics.java
@@ -56,16 +56,53 @@ import java.util.Set;
*
* @author jgentry at broadinstitute.org
*/
+
@CommandLineProgramProperties(
- usage = CollectRrbsMetrics.USAGE,
- usageShort = CollectRrbsMetrics.USAGE,
+ usage = CollectRrbsMetrics.USAGE_SUMMARY + CollectRrbsMetrics.USAGE_DETAILS,
+ usageShort = CollectRrbsMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectRrbsMetrics extends CommandLineProgram {
- final static String USAGE = "Collects metrics about bisulfite conversion for RRBS data";
-
- // Path to R file for plotting purposes
- private static final String R_SCRIPT = "picard/analysis/rrbsQc.R";
+ static final String USAGE_SUMMARY = "Collect metrics from reduced representation bisulfite sequencing (RRBS) data. ";
+ static final String USAGE_DETAILS = "This tool collect metrics for RRBS data, based on the methylation status of cytosine (C) " +
+ "bases in both CpG and non-CpG sites across all reads of a BAM/SAM file. For a brief primer on bisulfite sequencing and " +
+ "cytosine methylation, see the " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6330'>GATK Dictionary</a>." +
+ "<br /><br />" +
+ "" +
+ "Since cytosine methylation is not exclusive for CpG \"hotspots\", the CollectRrbsMetrics tool outputs a summary table " +
+ "indicating the number of CpG and non-CpG cytosines as well as their conversion C -> T (+ strand) or G -> A (- strand) " +
+ "rates. The tool also outputs the numbers of reads having no CpG sites, and the numbers of reads discarded from the " +
+ "analysis due to inadequate size or excessive numbers of mismatches." +
+ "<br /><br />" +
+ "The tool also provides a table containing detailed information on CpG occurrence frequency, CpG conversion frequencies " +
+ "[C -> T (+ strand) or G -> A (- strand)], and the specific locations of the CpG sites in the genome. The conversion " +
+ "frequency helps determines the methylation status of a CpG site." +
+ "<br /><br />" +
+ "Finally, the tool provides graphical representation of four metrics in the form of a \".pdf\" document. These metrics " +
+ "are the bisulfite conversion rate for CpG and non-CpG cytosines, a distribution of the numbers of CpG sites as a " +
+ "function of CpG conversion rate, the distribution of CpG sites by read coverage, and the numbers of reads discarded due " +
+ "to high numbers of mismatches or inadequate read size." +
+ "" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectRrbsMetrics \\<br />" +
+ " I=input.bam \\<br />" +
+ " M=rrbs_metrics \\<br />" +
+ " R=reference_sequence.fasta" +
+ "</pre>" +
+ "<hr />" +
+ "" +
+ "Please see " +
+ "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#RrbsCpgDetailMetrics'>" +
+ "the RrbsCpgDetailMetrics documentation</a> and the " +
+ "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#RrbsSummaryMetrics'>" +
+ "the RrbsSummaryMetrics documentation</a>for detailed explanations of the output metrics." +
+ "<hr />";
+
+// Path to R file for plotting purposes
+
+private static final String R_SCRIPT = "picard/analysis/rrbsQc.R";
@Option(doc = "The BAM or SAM file containing aligned reads. Must be coordinate sorted", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
public File INPUT;
diff --git a/src/java/picard/analysis/CollectWgsMetrics.java b/src/java/picard/analysis/CollectWgsMetrics.java
index b0bb642..e9708fb 100644
--- a/src/java/picard/analysis/CollectWgsMetrics.java
+++ b/src/java/picard/analysis/CollectWgsMetrics.java
@@ -1,7 +1,28 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
package picard.analysis;
-import htsjdk.samtools.AlignmentBlock;
-import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.filter.SamRecordFilter;
@@ -16,12 +37,18 @@ import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Metrics;
import picard.cmdline.StandardOptionDefinitions;
+import picard.filter.CountingDuplicateFilter;
+import picard.filter.CountingFilter;
+import picard.filter.CountingMapQFilter;
+import picard.filter.CountingPairedFilter;
import picard.util.MathUtil;
import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
/**
* Computes a number of metrics that are useful for evaluating coverage and performance of whole genome sequencing experiments.
@@ -29,12 +56,26 @@ import java.util.List;
* @author tfennell
*/
@CommandLineProgramProperties(
- usage = "Computes a number of metrics that are useful for evaluating coverage and performance of " +
- "whole genome sequencing experiments.",
- usageShort = "Writes whole genome sequencing-related metrics for a SAM or BAM file",
+ usage = CollectWgsMetrics.USAGE_SUMMARY + CollectWgsMetrics.USAGE_DETAILS,
+ usageShort = CollectWgsMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectWgsMetrics extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Collect metrics about coverage and performance of whole genome sequencing (WGS) experiments.";
+ static final String USAGE_DETAILS = "This tool collects metrics about the percentages of reads that pass base- and mapping- quality " +
+ "filters as well as coverage (read-depth) levels. Both minimum base- and mapping-quality values as well as the maximum " +
+ "read depths (coverage cap) are user defined." +
+ "<h4>Usage Example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectWgsMetrics \\<br /> " +
+ " I=input.bam \\<br /> "+
+ " O=collect_wgs_metrics.txt \\<br /> " +
+ " R=reference_sequence.fasta " +
+ "</pre>" +
+ "Please see " +
+ "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics'>" +
+ "the WgsMetrics documentation</a>for detailed explanations of the output metrics." +
+ "<hr />";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input SAM or BAM file.")
public File INPUT;
@@ -51,9 +92,12 @@ public class CollectWgsMetrics extends CommandLineProgram {
@Option(shortName = "Q", doc = "Minimum base quality for a base to contribute coverage.", overridable = true)
public int MINIMUM_BASE_QUALITY = 20;
- @Option(shortName = "CAP", doc = "Treat bases with coverage exceeding this value as if they had coverage at this value.", overridable = true)
+ @Option(shortName = "CAP", doc = "Treat positions with coverage exceeding this value as if they had coverage at this value (but calculate the difference for PCT_EXC_CAPPED).", overridable = true)
public int COVERAGE_CAP = 250;
+ @Option(doc="At positions with coverage exceeding this value, completely ignore reads that accumulate beyond this value (so that they will not be considered for PCT_EXC_CAPPED). Used to keep memory consumption in check, but could create bias if set too low", overridable = true)
+ public int LOCUS_ACCUMULATION_CAP = 100000;
+
@Option(doc = "For debugging purposes, stop after processing this many genomic bases.")
public long STOP_AFTER = -1;
@@ -63,7 +107,11 @@ public class CollectWgsMetrics extends CommandLineProgram {
@Option(doc="If true, count unpaired reads, and paired reads with one end unmapped")
public boolean COUNT_UNPAIRED = false;
+ @Option(doc="Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000.", optional = true)
+ public int SAMPLE_SIZE=10000;
+
private final Log log = Log.getInstance(CollectWgsMetrics.class);
+ private static final double LOG_ODDS_THRESHOLD = 3.0;
/** Metrics for evaluating the performance of whole genome sequencing experiments. */
public static class WgsMetrics extends MetricBase {
@@ -93,6 +141,8 @@ public class CollectWgsMetrics extends CommandLineProgram {
/** The total fraction of aligned bases excluded due to all filters. */
public double PCT_EXC_TOTAL;
+ /** The fraction of bases that attained at least 1X sequence coverage in post-filtering bases. */
+ public double PCT_1X;
/** The fraction of bases that attained at least 5X sequence coverage in post-filtering bases. */
public double PCT_5X;
/** The fraction of bases that attained at least 10X sequence coverage in post-filtering bases. */
@@ -119,6 +169,12 @@ public class CollectWgsMetrics extends CommandLineProgram {
public double PCT_90X;
/** The fraction of bases that attained at least 100X sequence coverage in post-filtering bases. */
public double PCT_100X;
+
+ /** The theoretical HET SNP sensitivity. */
+ public double HET_SNP_SENSITIVITY;
+
+ /** The Phred Scaled Q Score of the theoretical HET SNP sensitivity. */
+ public double HET_SNP_Q;
}
public static void main(final String[] args) {
@@ -131,6 +187,12 @@ public class CollectWgsMetrics extends CommandLineProgram {
IOUtil.assertFileIsWritable(OUTPUT);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
+ // it doesn't make sense for the locus accumulation cap to be lower than the coverage cap
+ if (LOCUS_ACCUMULATION_CAP < COVERAGE_CAP) {
+ log.warn("Setting the LOCUS_ACCUMULATION_CAP to be equal to the COVERAGE_CAP (" + COVERAGE_CAP + ") because it should not be lower");
+ LOCUS_ACCUMULATION_CAP = COVERAGE_CAP;
+ }
+
// Setup all the inputs
final ProgressLogger progress = new ProgressLogger(log, 10000000, "Processed", "loci");
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
@@ -141,21 +203,25 @@ public class CollectWgsMetrics extends CommandLineProgram {
final CountingFilter dupeFilter = new CountingDuplicateFilter();
final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
final CountingPairedFilter pairFilter = new CountingPairedFilter();
+ // The order in which filters are added matters!
+ filters.add(new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count reads twice
filters.add(mapqFilter);
filters.add(dupeFilter);
if (!COUNT_UNPAIRED) {
filters.add(pairFilter);
}
- filters.add(new SecondaryAlignmentFilter()); // Not a counting filter because we never want to count reads twice
iterator.setSamFilters(filters);
iterator.setEmitUncoveredLoci(true);
iterator.setMappingQualityScoreCutoff(0); // Handled separately because we want to count bases
iterator.setQualityScoreCutoff(0); // Handled separately because we want to count bases
iterator.setIncludeNonPfReads(false);
+ iterator.setMaxReadsToAccumulatePerLocus(LOCUS_ACCUMULATION_CAP);
- final int max = COVERAGE_CAP;
- final long[] HistogramArray = new long[max + 1];
+ final int coverageCap = COVERAGE_CAP;
+ final long[] HistogramArray = new long[coverageCap + 1];
final long[] baseQHistogramArray = new long[Byte.MAX_VALUE];
+ // We need a separate Het Sens histogram for base quality because the original one excludes bases below baseQ 20
+ final long[] baseQHetSensHistogram = new long[Byte.MAX_VALUE];
final boolean usingStopAfter = STOP_AFTER > 0;
final long stopAfter = STOP_AFTER - 1;
long counter = 0;
@@ -176,18 +242,25 @@ public class CollectWgsMetrics extends CommandLineProgram {
// Figure out the coverage while not counting overlapping reads twice, and excluding various things
final HashSet<String> readNames = new HashSet<String>(info.getRecordAndPositions().size());
int pileupSize = 0;
+ int pileupSizeForBaseQHetSens = 0;
for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndPositions()) {
+ pileupSizeForBaseQHetSens++;
+ if(pileupSizeForBaseQHetSens <= coverageCap) {
+ baseQHetSensHistogram[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
+ }
- if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY) { ++basesExcludedByBaseq; continue; }
+ if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY) { ++basesExcludedByBaseq; continue; }
if (!readNames.add(recs.getRecord().getReadName())) { ++basesExcludedByOverlap; continue; }
+
pileupSize++;
- if (pileupSize <= max) {
+ if (pileupSize <= coverageCap) {
baseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
}
+
}
- final int depth = Math.min(readNames.size(), max);
- if (depth < readNames.size()) basesExcludedByCapping += readNames.size() - max;
+ final int depth = Math.min(readNames.size(), coverageCap);
+ if (depth < readNames.size()) basesExcludedByCapping += readNames.size() - coverageCap;
HistogramArray[depth]++;
// Record progress and perhaps stop
@@ -196,28 +269,41 @@ public class CollectWgsMetrics extends CommandLineProgram {
}
// Construct and write the outputs
- final Histogram<Integer> histo = new Histogram<Integer>("coverage", "count");
+ final Histogram<Integer> depthHistogram = new Histogram<Integer>("coverage", "count");
for (int i = 0; i < HistogramArray.length; ++i) {
- histo.increment(i, HistogramArray[i]);
+ depthHistogram.increment(i, HistogramArray[i]);
}
// Construct and write the outputs
- final Histogram<Integer> baseQHisto = new Histogram<Integer>("value", "baseq_count");
+ final Histogram<Integer> baseQHistogram = new Histogram<Integer>("value", "baseq_count");
for (int i=0; i<baseQHistogramArray.length; ++i) {
- baseQHisto.increment(i, baseQHistogramArray[i]);
+ baseQHistogram.increment(i, baseQHistogramArray[i]);
}
- final WgsMetrics metrics = generateWgsMetrics();
- metrics.GENOME_TERRITORY = (long) histo.getSumOfValues();
- metrics.MEAN_COVERAGE = histo.getMean();
- metrics.SD_COVERAGE = histo.getStandardDeviation();
- metrics.MEDIAN_COVERAGE = histo.getMedian();
- metrics.MAD_COVERAGE = histo.getMedianAbsoluteDeviation();
+ // Construct and write the outputs
+ final Histogram<Integer> baseQHetHistogram = new Histogram<Integer>("value", "baseq_count");
+ final int BASEQ_MAX = 50;
+ final Integer[] x = new Integer[BASEQ_MAX];
+ IntStream.range(0, BASEQ_MAX).forEach(i -> x[i] = i);
+ baseQHetHistogram.prefillBins(x);
+
+ //Haplotype caller uses 17 as a baseQ cut off, so we are too. Everything below 17 is squashed into the '0' bin.
+ final int BASEQ_MIN_CUTOFF = 17;
+ for (int i=0; i<baseQHetSensHistogram.length; ++i) {
+ baseQHetHistogram.increment( i < BASEQ_MIN_CUTOFF ? 0 : i, baseQHetSensHistogram[i]);
+ }
+ final WgsMetrics metrics = generateWgsMetrics();
+ metrics.GENOME_TERRITORY = (long) depthHistogram.getSumOfValues();
+ metrics.MEAN_COVERAGE = depthHistogram.getMean();
+ metrics.SD_COVERAGE = depthHistogram.getStandardDeviation();
+ metrics.MEDIAN_COVERAGE = depthHistogram.getMedian();
+ metrics.MAD_COVERAGE = depthHistogram.getMedianAbsoluteDeviation();
+
final long basesExcludedByDupes = getBasesExcludedBy(dupeFilter);
final long basesExcludedByMapq = getBasesExcludedBy(mapqFilter);
final long basesExcludedByPairing = getBasesExcludedBy(pairFilter);
- final double total = histo.getSum();
+ final double total = depthHistogram.getSum();
final double totalWithExcludes = total + basesExcludedByDupes + basesExcludedByMapq + basesExcludedByPairing + basesExcludedByBaseq + basesExcludedByOverlap + basesExcludedByCapping;
metrics.PCT_EXC_DUPE = basesExcludedByDupes / totalWithExcludes;
metrics.PCT_EXC_MAPQ = basesExcludedByMapq / totalWithExcludes;
@@ -227,7 +313,8 @@ public class CollectWgsMetrics extends CommandLineProgram {
metrics.PCT_EXC_CAPPED = basesExcludedByCapping / totalWithExcludes;
metrics.PCT_EXC_TOTAL = (totalWithExcludes - total) / totalWithExcludes;
- metrics.PCT_5X = MathUtil.sum(HistogramArray, 5, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_1X = MathUtil.sum(HistogramArray, 1, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_5X = MathUtil.sum(HistogramArray, 5, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_10X = MathUtil.sum(HistogramArray, 10, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_15X = MathUtil.sum(HistogramArray, 15, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_20X = MathUtil.sum(HistogramArray, 20, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
@@ -241,11 +328,17 @@ public class CollectWgsMetrics extends CommandLineProgram {
metrics.PCT_90X = MathUtil.sum(HistogramArray, 90, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
metrics.PCT_100X = MathUtil.sum(HistogramArray, 100, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ // Get Theoretical Het SNP Sensitivity
+ final double [] depthDoubleArray = TheoreticalSensitivity.normalizeHistogram(depthHistogram);
+ final double [] baseQDoubleArray = TheoreticalSensitivity.normalizeHistogram(baseQHetHistogram);
+ metrics.HET_SNP_SENSITIVITY = TheoreticalSensitivity.hetSNPSensitivity(depthDoubleArray, baseQDoubleArray, SAMPLE_SIZE, LOG_ODDS_THRESHOLD);
+ metrics.HET_SNP_Q = QualityUtil.getPhredScoreFromErrorProbability((1-metrics.HET_SNP_SENSITIVITY));
+
final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
out.addMetric(metrics);
- out.addHistogram(histo);
+ out.addHistogram(depthHistogram);
if (INCLUDE_BQ_HISTOGRAM) {
- out.addHistogram(baseQHisto);
+ out.addHistogram(baseQHistogram);
}
out.write(OUTPUT);
@@ -265,59 +358,3 @@ public class CollectWgsMetrics extends CommandLineProgram {
}
}
-/**
- * A SamRecordFilter that counts the number of aligned bases in the reads which it filters out. Abstract and designed
- * to be subclassed to implement the desired filter.
- */
-abstract class CountingFilter implements SamRecordFilter {
- private long filteredRecords = 0;
- private long filteredBases = 0;
-
- /** Gets the number of records that have been filtered out thus far. */
- public long getFilteredRecords() { return this.filteredRecords; }
-
- /** Gets the number of bases that have been filtered out thus far. */
- public long getFilteredBases() { return this.filteredBases; }
-
- @Override
- public final boolean filterOut(final SAMRecord record) {
- final boolean filteredOut = reallyFilterOut(record);
- if (filteredOut) {
- ++filteredRecords;
- for (final AlignmentBlock block : record.getAlignmentBlocks()) {
- this.filteredBases += block.getLength();
- }
- }
- return filteredOut;
- }
-
- abstract public boolean reallyFilterOut(final SAMRecord record);
-
- @Override
- public boolean filterOut(final SAMRecord first, final SAMRecord second) {
- throw new UnsupportedOperationException();
- }
-}
-
-/** Counting filter that discards reads that have been marked as duplicates. */
-class CountingDuplicateFilter extends CountingFilter {
- @Override
- public boolean reallyFilterOut(final SAMRecord record) { return record.getDuplicateReadFlag(); }
-}
-
-/** Counting filter that discards reads below a configurable mapping quality threshold. */
-class CountingMapQFilter extends CountingFilter {
- private final int minMapq;
-
- CountingMapQFilter(final int minMapq) { this.minMapq = minMapq; }
-
- @Override
- public boolean reallyFilterOut(final SAMRecord record) { return record.getMappingQuality() < minMapq; }
-}
-
-/** Counting filter that discards reads that are unpaired in sequencing and paired reads who's mates are not mapped. */
-class CountingPairedFilter extends CountingFilter {
- @Override
- public boolean reallyFilterOut(final SAMRecord record) { return !record.getReadPairedFlag() || record.getMateUnmappedFlag(); }
-}
-
diff --git a/src/java/picard/analysis/CollectWgsMetricsFromQuerySorted.java b/src/java/picard/analysis/CollectWgsMetricsFromQuerySorted.java
index b28fa50..42337bf 100644
--- a/src/java/picard/analysis/CollectWgsMetricsFromQuerySorted.java
+++ b/src/java/picard/analysis/CollectWgsMetricsFromQuerySorted.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package picard.analysis;
import htsjdk.samtools.*;
@@ -32,32 +56,83 @@ public class CollectWgsMetricsFromQuerySorted extends CommandLineProgram {
@Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Output metrics file.")
public File OUTPUT;
- @Option(shortName = "MQ", doc = "Minimum mapping quality for a read to contribute coverage.", overridable = true)
- public int MINIMUM_MAPPING_QUALITY = 20;
+ @Option(shortName = "USABLE_MQ", doc = "Minimum mapping quality for a read to contribute to usable coverage.", overridable = true, optional = true)
+ public int MINIMUM_USABLE_MAPPING_QUALITY = 20;
+
+ @Option(shortName = "USABLE_Q", doc = "Minimum base quality for a base to contribute to usable coverage.", overridable = true, optional = true)
+ public int MINIMUM_USABLE_BASE_QUALITY = 20;
+
+ @Option(shortName = "RAW_MQ", doc = "Minimum mapping quality for a read to contribute to raw coverage.", overridable = true, optional = true)
+ public int MINIMUM_RAW_MAPPING_QUALITY = 0;
- @Option(shortName = "Q", doc = "Minimum base quality for a base to contribute coverage.", overridable = true)
- public int MINIMUM_BASE_QUALITY = 20;
+ @Option(shortName = "RAW_Q", doc = "Minimum base quality for a base to contribute to raw coverage.", overridable = true, optional = true)
+ public int MINIMUM_RAW_BASE_QUALITY = 3;
+
+ @Option(doc = "The number of bases in the genome build of the input file to be used for calculating MEAN_COVERAGE. If not provided, we will assume that ALL bases in the genome should be used (including e.g. Ns)", overridable = true, optional = true)
+ public Long GENOME_TERRITORY = null;
private final Log log = Log.getInstance(CollectWgsMetricsFromQuerySorted.class);
+ //the adapter utility class
+ private AdapterUtility adapterUtility;
+
+ /** the various metrics types */
+ public enum FILTERING_STRINGENCY { RAW, USABLE }
+
/** Metrics for evaluating the performance of whole genome sequencing experiments. */
public static class QuerySortedSeqMetrics extends CollectWgsMetrics.WgsMetrics {
+ /** Identifier for metrics type */
+ public FILTERING_STRINGENCY TYPE;
+
/** The total number of bases, before any filters are applied. */
- public long TOTAL_BASES = 0;
- /** The number of usable bases, after all filters are applied. */
- public long TOTAL_USABLE_BASES = 0;
+ public long PF_BASES = 0;
+ /** The number of passing bases, after all filters are applied. */
+ public long PF_PASSING_BASES = 0;
/** The number of read pairs, before all filters are applied. */
- public long TOTAL_READ_PAIRS = 0;
- /** The number of duplicate read pairs, before all filters are applied. */
- public long TOTAL_DUPE_PAIRS = 0;
+ public long PF_READ_PAIRS = 0;
+ /** The number of duplicate read pairs, before any filters are applied. */
+ public long PF_DUPE_PAIRS = 0;
+
+ /** The number of aligned reads, before any filters are applied. */
+ public long PF_READS_ALIGNED = 0;
+
+ /**
+ * The number of PF reads that are marked as noise reads. A noise read is one which is composed
+ * entirely of A bases and/or N bases. These reads are marked as they are usually artifactual and
+ * are of no use in downstream analysis.
+ */
+ public long PF_NOISE_READS = 0;
+
+ /**
+ * The number of PF reads that map outside of a maximum insert size (100kb) or that have
+ * the two ends mapping to different chromosomes.
+ */
+ public long PF_CHIMERIC_PAIRS = 0;
+
+ /**
+ * The number of PF reads that are unaligned and match to a known adapter sequence right from the
+ * start of the read.
+ */
+ public long PF_ADAPTER_READS = 0;
/** The number of read pairs with standard orientations from which to calculate mean insert size, after filters are applied. */
- public long TOTAL_ORIENTED_PAIRS = 0;
+ public long PF_ORIENTED_PAIRS = 0;
/** The mean insert size, after filters are applied. */
public double MEAN_INSERT_SIZE = 0.0;
}
+ /** A private class to track the intermediate values of certain metrics */
+ private class IntermediateMetrics {
+ final QuerySortedSeqMetrics metrics = new QuerySortedSeqMetrics();
+ long basesExcludedByDupes = 0;
+ long basesExcludedByMapq = 0;
+ long basesExcludedByPairing = 0;
+ long basesExcludedByBaseq = 0;
+ long basesExcludedByOverlap = 0;
+ double insertSizeSum = 0.0;
+ }
+
public static void main(final String[] args) {
new CollectWgsMetricsFromQuerySorted().instanceMainWithExit(args);
}
@@ -72,65 +147,22 @@ public class CollectWgsMetricsFromQuerySorted extends CommandLineProgram {
// the SAM reader
final SamReader reader = SamReaderFactory.makeDefault().open(INPUT);
- final PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(reader.iterator());
+ final PeekableIterator<SAMRecord> iterator = new PeekableIterator<>(reader.iterator());
// the metrics to keep track of
- final QuerySortedSeqMetrics metrics = new QuerySortedSeqMetrics();
- long basesExcludedByDupes = 0;
- long basesExcludedByMapq = 0;
- long basesExcludedByPairing = 0;
- long basesExcludedByBaseq = 0;
- long basesExcludedByOverlap = 0;
- double insertSizeSum = 0.0;
+ final IntermediateMetrics usableMetrics = new IntermediateMetrics();
+ usableMetrics.metrics.TYPE = FILTERING_STRINGENCY.USABLE;
+ final IntermediateMetrics rawMetrics = new IntermediateMetrics();
+ rawMetrics.metrics.TYPE = FILTERING_STRINGENCY.RAW;
+
+ adapterUtility = new AdapterUtility(AdapterUtility.DEFAULT_ADAPTER_SEQUENCE);
// Loop through all the loci by read pairs
QuerySortedReadPairIteratorUtil.ReadPair pairToAnalyze = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
while (pairToAnalyze != null) {
-
- final boolean isProperPair = (pairToAnalyze.read2 != null);
-
- // how many bases do we have?
- final int read1bases = pairToAnalyze.read1.getReadLength();
- final int read2bases = isProperPair ? pairToAnalyze.read2.getReadLength() : 0;
- final int totalReadBases = read1bases + read2bases;
-
- // now compute metrics...
- metrics.TOTAL_BASES += totalReadBases;
- if (isProperPair) metrics.TOTAL_READ_PAIRS++;
-
- if (!isProperPair || pairToAnalyze.read1.getMateUnmappedFlag() || pairToAnalyze.read2.getMateUnmappedFlag()) {
- basesExcludedByPairing += totalReadBases;
- } else if (pairToAnalyze.read1.getDuplicateReadFlag()) {
- metrics.TOTAL_DUPE_PAIRS++;
- basesExcludedByDupes += totalReadBases;
- } else {
-
- // determine the bad bases from the reads
- final BaseExclusionHelper read1exclusions = determineBaseExclusions(pairToAnalyze.read1);
- final BaseExclusionHelper read2exclusions = determineBaseExclusions(pairToAnalyze.read2);
- basesExcludedByMapq += read1exclusions.basesExcludedByMapq + read2exclusions.basesExcludedByMapq;
- basesExcludedByBaseq += read1exclusions.lowBQcount + read2exclusions.lowBQcount;
-
- // keep track of the total usable bases
- int usableBaseCount = totalReadBases;
- usableBaseCount -= (read1exclusions.basesExcludedByMapq + read1exclusions.lowBQcount);
- usableBaseCount -= (read2exclusions.basesExcludedByMapq + read2exclusions.lowBQcount);
-
- // subtract out bad bases from overlaps between the reads, but only if both reads pass mapping quality thresholds
- if (read1exclusions.basesExcludedByMapq == 0 && read2exclusions.basesExcludedByMapq == 0) {
- final int overlapCount = getOverlappingBaseCount(read1exclusions, read2exclusions);
- basesExcludedByOverlap += overlapCount;
- usableBaseCount -= overlapCount;
- }
-
- metrics.TOTAL_USABLE_BASES += usableBaseCount;
-
- final int insertSize = Math.abs(pairToAnalyze.read1.getInferredInsertSize());
- if (insertSize > 0 && pairToAnalyze.read1.getProperPairFlag()) {
- metrics.TOTAL_ORIENTED_PAIRS++;
- insertSizeSum += insertSize;
- }
- }
+ // calculate intermediate metrics
+ calculateMetricsForRead(pairToAnalyze, usableMetrics, MINIMUM_USABLE_MAPPING_QUALITY, MINIMUM_USABLE_BASE_QUALITY);
+ calculateMetricsForRead(pairToAnalyze, rawMetrics, MINIMUM_RAW_MAPPING_QUALITY, MINIMUM_RAW_BASE_QUALITY);
// record progress
progress.record(pairToAnalyze.read1);
@@ -139,38 +171,161 @@ public class CollectWgsMetricsFromQuerySorted extends CommandLineProgram {
pairToAnalyze = QuerySortedReadPairIteratorUtil.getNextReadPair(iterator);
}
- // finalize metrics
- setUnusedMetrics(metrics);
- metrics.GENOME_TERRITORY = reader.getFileHeader().getSequenceDictionary().getReferenceLength();
- metrics.MEAN_COVERAGE = metrics.TOTAL_USABLE_BASES / (double)metrics.GENOME_TERRITORY;
- metrics.PCT_EXC_DUPE = basesExcludedByDupes / (double)metrics.TOTAL_BASES;
- metrics.PCT_EXC_MAPQ = basesExcludedByMapq / (double)metrics.TOTAL_BASES;
- metrics.PCT_EXC_UNPAIRED = basesExcludedByPairing / (double)metrics.TOTAL_BASES;
- metrics.PCT_EXC_BASEQ = basesExcludedByBaseq / (double)metrics.TOTAL_BASES;
- metrics.PCT_EXC_OVERLAP = basesExcludedByOverlap / (double)metrics.TOTAL_BASES;
- final double totalExcludedBases = metrics.TOTAL_BASES - metrics.TOTAL_USABLE_BASES;
- metrics.PCT_EXC_TOTAL = totalExcludedBases / metrics.TOTAL_BASES;
- metrics.MEAN_INSERT_SIZE = insertSizeSum / metrics.TOTAL_ORIENTED_PAIRS;
+ // finalize and write the metrics
+ final long genomeTerritory = (GENOME_TERRITORY == null || GENOME_TERRITORY < 1L) ? reader.getFileHeader().getSequenceDictionary().getReferenceLength() : GENOME_TERRITORY;
+ usableMetrics.metrics.GENOME_TERRITORY = genomeTerritory;
+ finalizeMetrics(usableMetrics);
+ rawMetrics.metrics.GENOME_TERRITORY = genomeTerritory;
+ finalizeMetrics(rawMetrics);
final MetricsFile<QuerySortedSeqMetrics, Integer> out = getMetricsFile();
- out.addMetric(metrics);
+ out.addMetric(usableMetrics.metrics);
+ out.addMetric(rawMetrics.metrics);
out.write(OUTPUT);
-
return 0;
}
/**
+ * Calculate the contribution to the intermediate metrics for a given read pair
+ *
+ * @param pairToAnalyze the read pair to grab metrics from
+ * @param metrics the intermediate metrics with all the data we need
+ * @param minimumMappingQuality the minimum mapping quality
+ * @param minimumBaseQuality the minimum base quality
+ */
+ private void calculateMetricsForRead(final QuerySortedReadPairIteratorUtil.ReadPair pairToAnalyze,
+ final IntermediateMetrics metrics,
+ final int minimumMappingQuality,
+ final int minimumBaseQuality) {
+ // don't bother at all with non-PF read pairs; if one is non-PF then the other is too so we only need to check the first one
+ if (pairToAnalyze.read1.getReadFailsVendorQualityCheckFlag()) {
+ return;
+ }
+
+ final boolean isPaired = (pairToAnalyze.read2 != null);
+
+ // how many bases do we have?
+ final int read1bases = pairToAnalyze.read1.getReadLength();
+ final int read2bases = isPaired ? pairToAnalyze.read2.getReadLength() : 0;
+ final int totalReadBases = read1bases + read2bases;
+
+ // now compute metrics...
+ metrics.metrics.PF_BASES += totalReadBases;
+ if (isNoiseRead(pairToAnalyze.read1)) metrics.metrics.PF_NOISE_READS++;
+
+ if (!pairToAnalyze.read1.getReadUnmappedFlag()) metrics.metrics.PF_READS_ALIGNED++;
+ else if (isAdapterRead(pairToAnalyze.read1)) metrics.metrics.PF_ADAPTER_READS++;
+
+ if (isPaired) {
+ metrics.metrics.PF_READ_PAIRS++;
+ if (!pairToAnalyze.read2.getReadUnmappedFlag()) metrics.metrics.PF_READS_ALIGNED++;
+ else if (isAdapterRead(pairToAnalyze.read2)) metrics.metrics.PF_ADAPTER_READS++;
+
+ if (isChimericReadPair(pairToAnalyze, minimumMappingQuality)) metrics.metrics.PF_CHIMERIC_PAIRS++;
+ }
+
+ // We note here several differences between this tool and CollectWgsMetrics:
+ // 1. CollectWgsMetrics does NOT count paired reads that are both unmapped in the PCT_EXC_UNPAIRED, but we do so here
+ // because this tool isn't a locus iterator and we need to ensure that our passing base numbers are accurate in the end.
+ // 2. For a similar reason, we DO count soft-clipped bases (and they are usually - but not always - filtered as part of
+ // PCT_EXC_BASEQ), while CollectWgsMetrics does not count them.
+ // 3. We DO count bases from insertions as part of the total coverage, while CollectWgsMetrics does not (because it cannot).
+ if (!isPaired || pairToAnalyze.read1.getMateUnmappedFlag() || pairToAnalyze.read2.getMateUnmappedFlag()) {
+ metrics.basesExcludedByPairing += totalReadBases;
+ } else if (pairToAnalyze.read1.getDuplicateReadFlag()) {
+ metrics.metrics.PF_DUPE_PAIRS++;
+ metrics.basesExcludedByDupes += totalReadBases;
+ } else {
+
+ // determine the bad bases from the reads
+ final BaseExclusionHelper read1exclusions = determineBaseExclusions(pairToAnalyze.read1, minimumMappingQuality, minimumBaseQuality);
+ final BaseExclusionHelper read2exclusions = determineBaseExclusions(pairToAnalyze.read2, minimumMappingQuality, minimumBaseQuality);
+ metrics.basesExcludedByMapq += read1exclusions.basesExcludedByMapq + read2exclusions.basesExcludedByMapq;
+ metrics.basesExcludedByBaseq += read1exclusions.lowBQcount + read2exclusions.lowBQcount;
+
+ // keep track of the total usable bases
+ int usableBaseCount = totalReadBases;
+ usableBaseCount -= (read1exclusions.basesExcludedByMapq + read1exclusions.lowBQcount);
+ usableBaseCount -= (read2exclusions.basesExcludedByMapq + read2exclusions.lowBQcount);
+
+ // subtract out bad bases from overlaps between the reads, but only if both reads pass mapping quality thresholds
+ if (read1exclusions.basesExcludedByMapq == 0 && read2exclusions.basesExcludedByMapq == 0) {
+ final int overlapCount = getOverlappingBaseCount(read1exclusions, read2exclusions, minimumBaseQuality);
+ metrics.basesExcludedByOverlap += overlapCount;
+ usableBaseCount -= overlapCount;
+ }
+
+ metrics.metrics.PF_PASSING_BASES += usableBaseCount;
+
+ final int insertSize = Math.abs(pairToAnalyze.read1.getInferredInsertSize());
+ if (insertSize > 0 && pairToAnalyze.read1.getProperPairFlag()) {
+ metrics.metrics.PF_ORIENTED_PAIRS++;
+ metrics.insertSizeSum += insertSize;
+ }
+ }
+ }
+
+ /**
+ * Finalize the metrics by doing some fun but easy math
+ *
+ * @param metrics the intermediate metrics with all the data we need
+ */
+ private void finalizeMetrics(final IntermediateMetrics metrics) {
+ setUnusedMetrics(metrics.metrics);
+ metrics.metrics.MEAN_COVERAGE = metrics.metrics.PF_PASSING_BASES / (double)metrics.metrics.GENOME_TERRITORY;
+ metrics.metrics.PCT_EXC_DUPE = metrics.basesExcludedByDupes / (double)metrics.metrics.PF_BASES;
+ metrics.metrics.PCT_EXC_MAPQ = metrics.basesExcludedByMapq / (double)metrics.metrics.PF_BASES;
+ metrics.metrics.PCT_EXC_UNPAIRED = metrics.basesExcludedByPairing / (double)metrics.metrics.PF_BASES;
+ metrics.metrics.PCT_EXC_BASEQ = metrics.basesExcludedByBaseq / (double)metrics.metrics.PF_BASES;
+ metrics.metrics.PCT_EXC_OVERLAP = metrics.basesExcludedByOverlap / (double)metrics.metrics.PF_BASES;
+ final double totalExcludedBases = metrics.metrics.PF_BASES - metrics.metrics.PF_PASSING_BASES;
+ metrics.metrics.PCT_EXC_TOTAL = totalExcludedBases / metrics.metrics.PF_BASES;
+ metrics.metrics.MEAN_INSERT_SIZE = metrics.insertSizeSum / metrics.metrics.PF_ORIENTED_PAIRS;
+ }
+
+ /**
+ * @param record the read
+ * @return true if the read is considered a "noise" read (all As and/or Ns), false otherwise
+ */
+ private boolean isNoiseRead(final SAMRecord record) {
+ final Object noiseAttribute = record.getAttribute(ReservedTagConstants.XN);
+ return (noiseAttribute != null && noiseAttribute.equals(1));
+ }
+
+ /**
+ * @param record the read
+ * @return true if the read is from known adapter sequence, false otherwise
+ */
+ private boolean isAdapterRead(final SAMRecord record) {
+ final byte[] readBases = record.getReadBases();
+ if (!(record instanceof BAMRecord)) StringUtil.toUpperCase(readBases);
+ return adapterUtility.isAdapterSequence(readBases);
+ }
+
+ /**
+ * @param readPair the read pair
+ * @param minimumMappingQuality the minimum mapping quality
+ * @return true if the read pair is considered chimeric, false otherwise
+ */
+ private boolean isChimericReadPair(final QuerySortedReadPairIteratorUtil.ReadPair readPair, final int minimumMappingQuality) {
+ // Check that both ends are aligned, have mapq > minimum, and fit the bill for chimeras
+ return (readPair.read1.getMappingQuality() >= minimumMappingQuality && readPair.read2.getMappingQuality() >= minimumMappingQuality &&
+ ChimeraUtil.isChimeric(readPair.read1, readPair.read2, ChimeraUtil.DEFAULT_INSERT_SIZE_LIMIT, ChimeraUtil.DEFAULT_EXPECTED_ORIENTATIONS));
+ }
+
+ /**
* Get the count of low quality and/or softclip bases in the given read
*
* @param exclusions the helper object
+ * @param minimumBaseQuality the minimum base quality
* @return non-negative int
*/
- private int getLowQualityOrSoftclipBaseCount(final BaseExclusionHelper exclusions) {
+ private int getLowQualityOrSoftclipBaseCount(final BaseExclusionHelper exclusions, final int minimumBaseQuality) {
final byte[] quals = exclusions.read.getBaseQualities();
int badCount = exclusions.firstUnclippedBaseIndex + (quals.length - exclusions.firstTrailingClippedBaseIndex);
for (int i = exclusions.firstUnclippedBaseIndex; i < exclusions.firstTrailingClippedBaseIndex; i++) {
- if (quals[i] < MINIMUM_BASE_QUALITY)
+ if (quals[i] < minimumBaseQuality)
badCount++;
}
return badCount;
@@ -185,6 +340,7 @@ public class CollectWgsMetricsFromQuerySorted extends CommandLineProgram {
metrics.SD_COVERAGE = -1;
metrics.MEDIAN_COVERAGE = -1;
metrics.MAD_COVERAGE = -1;
+ metrics.PCT_1X = -1;
metrics.PCT_5X = -1;
metrics.PCT_10X = -1;
metrics.PCT_15X = -1;
@@ -206,12 +362,13 @@ public class CollectWgsMetricsFromQuerySorted extends CommandLineProgram {
*
* @param read1exclusions the 1st read exclusions
* @param read2exclusions the 2nd read exclusions
+ * @param minimumBaseQuality the minimum base quality
* @return non-negative int
*/
- private int getOverlappingBaseCount(final BaseExclusionHelper read1exclusions, final BaseExclusionHelper read2exclusions) {
+ private int getOverlappingBaseCount(final BaseExclusionHelper read1exclusions, final BaseExclusionHelper read2exclusions, final int minimumBaseQuality) {
// make life easy by ensuring that reads come in coordinate order
if ( read2exclusions.read.getAlignmentStart() < read1exclusions.read.getAlignmentStart() ) {
- return getOverlappingBaseCount(read2exclusions, read1exclusions);
+ return getOverlappingBaseCount(read2exclusions, read1exclusions, minimumBaseQuality);
}
// must be overlapping
@@ -233,7 +390,7 @@ public class CollectWgsMetricsFromQuerySorted extends CommandLineProgram {
final int posInRead2 = read2exclusions.firstUnclippedBaseIndex + i;
// we only want to count it if they are both high quality (i.e. not already counted among bad bases)
- if (read1quals[posInRead1] >= MINIMUM_BASE_QUALITY && read2quals[posInRead2] >= MINIMUM_BASE_QUALITY) {
+ if (read1quals[posInRead1] >= minimumBaseQuality && read2quals[posInRead2] >= minimumBaseQuality) {
numHighQualityOverlappingBases++;
}
}
@@ -245,15 +402,17 @@ public class CollectWgsMetricsFromQuerySorted extends CommandLineProgram {
* Determine how many bases are excluded because of low mapping or base quality.
*
* @param read the read
+ * @param minimumMappingQuality the minimum mapping quality
+ * @param minimumBaseQuality the minimum base quality
* @return non-null object
*/
- private BaseExclusionHelper determineBaseExclusions(final SAMRecord read) {
+ private BaseExclusionHelper determineBaseExclusions(final SAMRecord read, final int minimumMappingQuality, final int minimumBaseQuality) {
final BaseExclusionHelper exclusions = new BaseExclusionHelper(read);
- if (read.getMappingQuality() < MINIMUM_MAPPING_QUALITY) {
+ if (read.getMappingQuality() < minimumMappingQuality) {
exclusions.basesExcludedByMapq = read.getReadLength();
} else {
- exclusions.lowBQcount = getLowQualityOrSoftclipBaseCount(exclusions);
+ exclusions.lowBQcount = getLowQualityOrSoftclipBaseCount(exclusions, minimumBaseQuality);
}
return exclusions;
diff --git a/src/java/picard/analysis/CollectWgsMetricsFromSampledSites.java b/src/java/picard/analysis/CollectWgsMetricsFromSampledSites.java
index d2fdec3..478afa3 100644
--- a/src/java/picard/analysis/CollectWgsMetricsFromSampledSites.java
+++ b/src/java/picard/analysis/CollectWgsMetricsFromSampledSites.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package picard.analysis;
import htsjdk.samtools.SamReader;
@@ -5,6 +29,7 @@ import htsjdk.samtools.util.*;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Metrics;
+import picard.filter.CountingFilter;
import java.io.File;
diff --git a/src/java/picard/analysis/CompareMetrics.java b/src/java/picard/analysis/CompareMetrics.java
index 130bba6..0ea5457 100644
--- a/src/java/picard/analysis/CompareMetrics.java
+++ b/src/java/picard/analysis/CompareMetrics.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package picard.analysis;
import htsjdk.samtools.metrics.MetricsFile;
@@ -17,13 +41,22 @@ import java.util.List;
* Compare two metrics files.
*/
@CommandLineProgramProperties(
- usage = CompareMetrics.USAGE,
- usageShort = CompareMetrics.USAGE,
+ usage = CompareMetrics.USAGE_SUMMARY + CompareMetrics.USAGE_DETAIL,
+ usageShort = CompareMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CompareMetrics extends CommandLineProgram {
-
- static final String USAGE = "Compare two metrics files";
+ static final String USAGE_SUMMARY = "Compare two metrics files.";
+ static final String USAGE_DETAIL = "This tool compares the metrics and histograms generated from metric tools to determine " +
+ "if the generated results are identical. This tool is useful to test and compare outputs when code changes are implemented. It is not meant for use by end-users of this toolkit.<br /><br /> " +
+ "The tool's output simply indicates whether two metrics files are equal or not equal. <br /> " +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CompareMetrics \\<br />" +
+ " metricfile1.txt \\<br />" +
+ " metricfile2.txt" +
+ "</pre>" +
+ "<hr />";
@PositionalArguments(minElements = 2, maxElements = 2)
public List<File> metricsFiles;
diff --git a/src/java/picard/analysis/FingerprintingDetailMetrics.java b/src/java/picard/analysis/FingerprintingDetailMetrics.java
new file mode 100644
index 0000000..20a19ae
--- /dev/null
+++ b/src/java/picard/analysis/FingerprintingDetailMetrics.java
@@ -0,0 +1,70 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.metrics.MetricBase;
+
+/**
+ * Detailed metrics about an individual SNP/Haplotype comparison within a fingerprint comparison.
+ *
+ * @author Tim Fennell
+ */
+public class FingerprintingDetailMetrics extends MetricBase {
+ /** The sequencing read group from which sequence data was fingerprinted. */
+ public String READ_GROUP;
+
+ /** The name of the sample who's genotypes the sequence data was compared to. */
+ public String SAMPLE;
+
+ /**
+ * The name of a representative SNP within the haplotype that was compared. Will usually be the
+ * exact SNP that was genotyped externally.
+ */
+ public String SNP;
+
+ /** The possible alleles for the SNP. */
+ public String SNP_ALLELES;
+
+ /** The chromosome on which the SNP resides. */
+ public String CHROM;
+
+ /** The position of the SNP on the chromosome. */
+ public int POSITION;
+
+ /** The expected genotype of the sample at the SNP locus. */
+ public String EXPECTED_GENOTYPE;
+
+ /** The most likely genotype given the observed evidence at the SNP locus in the sequencing data. */
+ public String OBSERVED_GENOTYPE;
+
+ /** The LOD score for OBSERVED_GENOTYPE vs. the next most likely genotype in the sequencing data. */
+ public double LOD;
+
+ /** The number of observations of the first, or A, allele of the SNP in the sequencing data. */
+ public int OBS_A;
+
+ /** The number of observations of the second, or B, allele of the SNP in the sequencing data. */
+ public int OBS_B;
+}
diff --git a/src/java/picard/analysis/FingerprintingSummaryMetrics.java b/src/java/picard/analysis/FingerprintingSummaryMetrics.java
new file mode 100644
index 0000000..a822e9e
--- /dev/null
+++ b/src/java/picard/analysis/FingerprintingSummaryMetrics.java
@@ -0,0 +1,79 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.metrics.MetricBase;
+
+/**
+ * Summary fingerprinting metrics and statistics about the comparison of the sequence data
+ * from a single read group (lane or index within a lane) vs. a set of known genotypes for
+ * the expected sample.
+ *
+ * @author Tim Fennell
+ */
+public class FingerprintingSummaryMetrics extends MetricBase {
+ /** The read group from which sequence data was drawn for comparison. */
+ public String READ_GROUP;
+
+ /** The sample whose known genotypes the sequence data was compared to. */
+ public String SAMPLE;
+
+ /** The Log Likelihood of the sequence data given the expected sample's genotypes. */
+ public double LL_EXPECTED_SAMPLE;
+
+ /** The Log Likelihood of the sequence data given a random sample from the human population. */
+ public double LL_RANDOM_SAMPLE;
+
+ /**
+ * The LOD for Expected Sample vs. Random Sample. A positive LOD indicates that the sequence data
+ * is more likely to come from the expected sample vs. a random sample from the population, by LOD logs.
+ * I.e. a value of 6 indicates that the sequence data is 1,000,000 more likely to come from the expected
+ * sample than from a random sample. A negative LOD indicates the reverse - that the sequence data is more
+ * likely to come from a random sample than from the expected sample.
+ */
+ public double LOD_EXPECTED_SAMPLE;
+
+ /** The number of haplotypes that had expected genotypes to compare to. */
+ public int HAPLOTYPES_WITH_GENOTYPES;
+
+ /**
+ * The subset of genotyped haplotypes for which there was sufficient sequence data to
+ * confidently genotype the haplotype. Note: all haplotypes with sequence coverage contribute to the
+ * LOD score, even if they cannot be "confidently checked" individually.
+ * */
+ public int HAPLOTYPES_CONFIDENTLY_CHECKED;
+
+ /** The subset of confidently checked haplotypes that match the expected genotypes. */
+ public int HAPLOTYPES_CONFIDENTLY_MATCHING;
+
+ /** num of hets, observed as homs with LOD > threshold */
+ public int HET_AS_HOM;
+
+ /** num of homs, observed as hets with LOD > threshold */
+ public int HOM_AS_HET;
+
+ /** num of homs, observed as other homs with LOD > threshold */
+ public int HOM_AS_OTHER_HOM;
+}
diff --git a/src/java/picard/analysis/GcBiasMetricsCollector.java b/src/java/picard/analysis/GcBiasMetricsCollector.java
index 614a131..79659b2 100644
--- a/src/java/picard/analysis/GcBiasMetricsCollector.java
+++ b/src/java/picard/analysis/GcBiasMetricsCollector.java
@@ -234,6 +234,12 @@ public class GcBiasMetricsCollector extends MultiLevelCollector<GcBiasMetrics, I
summary.WINDOW_SIZE = scanWindowSize;
summary.TOTAL_CLUSTERS = totalClusters;
summary.ALIGNED_READS = totalAlignedReads;
+ summary.GC_NC_0_19 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 0, 19);
+ summary.GC_NC_20_39 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 20, 39);
+ summary.GC_NC_40_59 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 40, 59);
+ summary.GC_NC_60_79 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 60, 79);
+ summary.GC_NC_80_100 = calculateGcNormCoverage(meanReadsPerWindow, readsByGc, 80, 100);
+
calculateDropoutMetrics(metrics.DETAILS.getMetrics(), summary);
@@ -246,6 +252,27 @@ public class GcBiasMetricsCollector extends MultiLevelCollector<GcBiasMetrics, I
}
/////////////////////////////////////////////////////////////////////////////
+ // Calculates the normalized coverage over a given gc content region
+ /////////////////////////////////////////////////////////////////////////////
+ private double calculateGcNormCoverage(final double meanReadsPerWindow, final int[] readsByGc, final int start, final int end) {
+ int windowsTotal = 0;
+ double sum = 0.0;
+ for (int i = start; i <= end; i++) {
+ if (windowsByGc[i] != 0) {
+ sum += (double) readsByGc[i];
+ windowsTotal += windowsByGc[i];
+ }
+ }
+
+ if (windowsTotal == 0) {
+ return 0.0;
+ }
+ else {
+ return (sum / (windowsTotal*meanReadsPerWindow));
+ }
+ }
+
+ /////////////////////////////////////////////////////////////////////////////
// Calculates the Illumina style AT and GC dropout numbers
/////////////////////////////////////////////////////////////////////////////
private void calculateDropoutMetrics(final Collection<GcBiasDetailMetrics> details,
diff --git a/src/java/picard/analysis/GcBiasSummaryMetrics.java b/src/java/picard/analysis/GcBiasSummaryMetrics.java
index f2ba5c7..00eea88 100644
--- a/src/java/picard/analysis/GcBiasSummaryMetrics.java
+++ b/src/java/picard/analysis/GcBiasSummaryMetrics.java
@@ -54,4 +54,13 @@ public class GcBiasSummaryMetrics extends MultilevelMetrics {
* (%ref_at_gc - %reads_at_gc) and summing all positive values for GC=[50..100].
*/
public double GC_DROPOUT;
+
+ /**
+ * Normalized coverage over each quintile of GC content.
+ */
+ public double GC_NC_0_19;
+ public double GC_NC_20_39;
+ public double GC_NC_40_59;
+ public double GC_NC_60_79;
+ public double GC_NC_80_100;
}
diff --git a/src/java/picard/analysis/MeanQualityByCycle.java b/src/java/picard/analysis/MeanQualityByCycle.java
index 8c7c11a..13c67c2 100644
--- a/src/java/picard/analysis/MeanQualityByCycle.java
+++ b/src/java/picard/analysis/MeanQualityByCycle.java
@@ -52,14 +52,27 @@ import java.util.List;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Program to generate a data table and pdf chart of " +
- "mean base quality by cycle from a SAM or BAM file. Works best on a single lane/run of data, but can be applied to" +
- "merged BAMs. Uses R to generate chart output.",
- usageShort = "Writes mean quality by cycle for a SAM or BAM file",
+ usage = MeanQualityByCycle.USAGE_SUMMARY + MeanQualityByCycle.USAGE_DETAILS,
+ usageShort = MeanQualityByCycle.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class MeanQualityByCycle extends SinglePassSamProgram {
-
+ static final String USAGE_SUMMARY = "Collect mean quality by cycle.";
+ static final String USAGE_DETAILS = "This tool generates a data table and chart of mean quality by cycle from a BAM file. It is " +
+ "intended to be used on a single lane or read group's worth of data, but can be applied to merged BAMs if needed. " +
+ "<br /><br />" +
+ "This metric gives an overall snapshot of sequencing machine performance. For most types of sequencing data, the output " +
+ "is expected to show a slight reduction in overall base quality scores towards the end of each read. Spikes in quality within " +
+ "reads are not expected and may indicate that technical problems occurred during sequencing." +
+ "<br /><br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar MeanQualityByCycle \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=mean_qual_by_cycle.txt \\<br />" +
+ " CHART=mean_qual_by_cycle.pdf" +
+ "</pre>" +
+ "<hr />";
@Option(shortName="CHART", doc="A file (with .pdf extension) to write the chart to.")
public File CHART_OUTPUT;
diff --git a/src/java/picard/analysis/QualityScoreDistribution.java b/src/java/picard/analysis/QualityScoreDistribution.java
index bc227d8..7499e8b 100644
--- a/src/java/picard/analysis/QualityScoreDistribution.java
+++ b/src/java/picard/analysis/QualityScoreDistribution.java
@@ -48,13 +48,30 @@ import java.util.List;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Program to chart " +
- "quality score distributions in a SAM or BAM file.",
- usageShort = "Charts quality score distributions for a SAM or BAM file",
+ usage = QualityScoreDistribution.USAGE_SUMMARY + QualityScoreDistribution.USAGE_DETAILS,
+ usageShort = QualityScoreDistribution.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class QualityScoreDistribution extends SinglePassSamProgram {
-
+ static final String USAGE_SUMMARY = "Chart the distribution of quality scores. ";
+ static final String USAGE_DETAILS = "This tool is used for determining the overall \"quality\" for a library in a given run. To " +
+ "that effect, it outputs a chart and tables indicating the range of quality scores and the total numbers of bases " +
+ "corresponding to those scores. Options include plotting the distribution of all of the reads, only the aligned reads, " +
+ "or reads that have passed the Illumina Chastity filter thresholds as described <a href='http://gatkforums.broadinstitute.org/discussion/6329/pf-reads-illumina-chastity-filter'>here</a>." +
+ "<br /> <br />" +
+ "<h4>Note on base quality score options</h4>" +
+ "If the quality score of read bases has been modified in a previous data processing step such as " +
+ "<a href='http://gatkforums.broadinstitute.org/discussion/44/base-quality-score-recalibration-bqsr'>GATK Base Recalibration</a> " +
+ "and an OQ tag is available, this tool can be set to plot the OQ value as well as the primary quality value for the evaluation. " +
+ "<br />" +
+ "<h4>Usage Example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar QualityScoreDistribution \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=qual_score_dist.txt \\<br />" +
+ " CHART=qual_score_dist.pdf" +
+ "</pre>" +
+ "<hr />";
@Option(shortName="CHART", doc="A file (with .pdf extension) to write the chart to.")
public File CHART_OUTPUT;
diff --git a/src/java/picard/analysis/SinglePassSamProgram.java b/src/java/picard/analysis/SinglePassSamProgram.java
index ac14bf5..7021188 100644
--- a/src/java/picard/analysis/SinglePassSamProgram.java
+++ b/src/java/picard/analysis/SinglePassSamProgram.java
@@ -1,3 +1,27 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package picard.analysis;
import htsjdk.samtools.SAMFileHeader;
diff --git a/src/java/picard/analysis/TheoreticalSensitivity.java b/src/java/picard/analysis/TheoreticalSensitivity.java
new file mode 100644
index 0000000..4e635da
--- /dev/null
+++ b/src/java/picard/analysis/TheoreticalSensitivity.java
@@ -0,0 +1,188 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.util.Histogram;
+import htsjdk.samtools.util.Log;
+import picard.PicardException;
+import picard.util.MathUtil;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Created by David Benjamin on 5/13/15.
+ */
+public class TheoreticalSensitivity {
+ /**
+ * @param the probability of depth n is depthDistribution[n] for n = 0, 1. . . N - 1
+ * @param the probability of quality q is qualityDistribution[q] for q = 0, 1. . . Q
+ * @param sample size is the number of random sums of quality scores for each m
+ * @param logOddsThreshold is the log_10 of the likelihood ratio required to call a SNP,
+ * for example 5 if the variant likelihood must be 10^5 times greater
+ */
+ private static final Log log = Log.getInstance(TheoreticalSensitivity.class);
+ private static final int SAMPLING_MAX = 600; //prevent 'infinite' loops
+ private static final int MAX_CONSIDERED_DEPTH = 1000; //no point in looking any deeper than this, otherwise GC overhead is too high.
+
+ public static double hetSNPSensitivity(final double[] depthDistribution, final double[] qualityDistribution,
+ final int sampleSize, final double logOddsThreshold) {
+ final int N = Math.min(depthDistribution.length, MAX_CONSIDERED_DEPTH + 1);
+
+ log.info("Creating Roulette Wheel");
+ final RouletteWheel qualitySampler = new RouletteWheel(qualityDistribution);
+
+ //qualitySums[m] is a random sample of sums of m quality scores, for m = 0, 1, N - 1
+ log.info("Calculating quality sums from quality sampler");
+ final List<ArrayList<Integer>> qualitySums = qualitySampler.sampleCumulativeSums(N, sampleSize);
+
+ //if a quality sum of m qualities exceeds the quality sum threshold for n total reads, a SNP is called
+ final ArrayList<Double> qualitySumThresholds = new ArrayList<>(N);
+ final double LOG_10 = Math.log10(2);
+
+ for (int n = 0; n < N; n++) qualitySumThresholds.add(10 * (n * LOG_10 + logOddsThreshold));
+
+ //probabilityToExceedThreshold[m][n] is the probability that the sum of m quality score
+ //exceeds the nth quality sum threshold
+ log.info("Calculating theoretical het sensitivity");
+ final List<ArrayList<Double>> probabilityToExceedThreshold = proportionsAboveThresholds(qualitySums, qualitySumThresholds);
+ final List<ArrayList<Double>> altDepthDistribution = hetAltDepthDistribution(N);
+ double result = 0.0;
+ for (int n = 0; n < N; n++) {
+ for (int m = 0; m <= n; m++) {
+ result += depthDistribution[n] * altDepthDistribution.get(n).get(m) * probabilityToExceedThreshold.get(m).get(n);
+ }
+ }
+ return result;
+ }
+
+ //given L lists of lists and N thresholds, count the proportion of each list above each threshold
+ public static List<ArrayList<Double>> proportionsAboveThresholds(final List<ArrayList<Integer>> lists, final List<Double> thresholds) {
+ final ArrayList<ArrayList<Double>> result = new ArrayList<>();
+
+ for (final ArrayList<Integer> list : lists) {
+ final ArrayList<Double> newRow = new ArrayList<>(Collections.nCopies(thresholds.size(), 0.0));
+ Collections.sort(list);
+ int n = 0;
+ int j = 0; //index within the ordered sample
+ while (n < thresholds.size() && j < list.size()) {
+ if (thresholds.get(n) > list.get(j)) j++;
+ else newRow.set(n++, (double) (list.size() - j) / list.size());
+ }
+ result.add(newRow);
+ }
+ return result;
+ }
+
+ //Utility function for making table of binomial distribution probabilities nCm * (0.5)^n
+ //for n = 0, 1 . . . N - 1 and m = 0, 1. . . n
+ public static List<ArrayList<Double>> hetAltDepthDistribution(final int N) {
+ final List<ArrayList<Double>> table = new ArrayList<>();
+ for (int n = 0; n < N; n++) {
+ final ArrayList<Double> nthRow = new ArrayList<>();
+
+ //add the 0th element, then elements 1 through n - 1, then the nth.
+ //Note that nCm = (n-1)C(m-1) * (n/m)
+ nthRow.add(Math.pow(0.5, n));
+ for (int m = 1; m < n; m++) nthRow.add((n * 0.5 / m) * table.get(n - 1).get(m - 1));
+ if (n > 0) nthRow.add(nthRow.get(0));
+
+ table.add(nthRow);
+ }
+ return table;
+ }
+
+ /*
+ Perform random draws from {0, 1. . . N - 1} according to a list of relative probabilities.
+
+ We use an O(1) stochastic acceptance algorithm -- see Physica A, Volume 391, Page 2193 (2012) --
+ which works well when the ratio of maximum weight to average weight is not large.
+ */
+ public static class RouletteWheel {
+ final private List<Double> probabilities;
+ final private int N;
+ private int count = 0;
+
+ RouletteWheel(final double[] weights) {
+ N = weights.length;
+
+ probabilities = new ArrayList<>();
+ final double wMax = MathUtil.max(weights);
+
+ if (wMax == 0) {
+ throw new PicardException("Quality score distribution is empty.");
+ }
+
+ for (final double w : weights) {
+ probabilities.add(w / wMax);
+ }
+ }
+
+ public int draw() {
+ while (true) {
+ final int n = (int) (N * Math.random());
+ count++;
+ if (Math.random() < probabilities.get(n)) {
+ count = 0;
+ return n;
+ } else if (count >= SAMPLING_MAX) {
+ count = 0;
+ return 0;
+ }
+ }
+ }
+
+ //get samples of sums of 0, 1, 2,. . . N - 1 draws
+ public List<ArrayList<Integer>> sampleCumulativeSums(final int maxNumberOfSummands, final int sampleSize) {
+ final List<ArrayList<Integer>> result = new ArrayList<>();
+ for (int m = 0; m < maxNumberOfSummands; m++) result.add(new ArrayList<>());
+
+ for (int iteration = 0; iteration < sampleSize; iteration++) {
+ int cumulativeSum = 0;
+ for (int m = 0; m < maxNumberOfSummands; m++) {
+ result.get(m).add(cumulativeSum);
+ cumulativeSum += draw();
+ }
+ if (iteration % 1000 == 0) {
+ log.info(iteration + " sampling iterations completed");
+ }
+ }
+ return result;
+ }
+ }
+
+ public static double[] normalizeHistogram(final Histogram<Integer> histogram) {
+ if (histogram == null) throw new PicardException("Histogram is null and cannot be normalized");
+
+ final double histogramSumOfValues = histogram.getSumOfValues();
+ final double[] normalizedHistogram = new double[histogram.size()];
+
+ for (int i = 0; i < histogram.size(); i++) {
+ normalizedHistogram[i] = histogram.get(i).getValue() / histogramSumOfValues;
+ }
+ return normalizedHistogram;
+ }
+}
diff --git a/src/java/picard/analysis/artifacts/ArtifactCounter.java b/src/java/picard/analysis/artifacts/ArtifactCounter.java
index 953b6f1..dba5fb4 100644
--- a/src/java/picard/analysis/artifacts/ArtifactCounter.java
+++ b/src/java/picard/analysis/artifacts/ArtifactCounter.java
@@ -21,10 +21,7 @@ class ArtifactCounter {
private final String sampleAlias;
private final String library;
- private final Set<String> fullContexts;
- private final Map<String, String> leadingContextMap;
- private final Map<String, String> trailingContextMap;
- private final Map<String, String> zeroContextMap;
+ private final Map<String, RefContext> contextMap = new HashMap<>();
private final ContextAccumulator fullContextAccumulator;
private final ContextAccumulator halfContextAccumulator;
@@ -35,38 +32,51 @@ class ArtifactCounter {
private final List<BaitBiasSummaryMetrics> baitBiasSummaryMetricsList;
private final List<BaitBiasDetailMetrics> baitBiasDetailMetricsList;
+ private final Set<String> leadingContexts = new HashSet<>();
+ private final Set<String> trailingContexts = new HashSet<>();
+
+ // tuple to keep track of the different types of sub-contexts from a given reference context
+ protected final class RefContext {
+ final String ref, leading, trailing, zero;
+
+ public RefContext(final String ref, final String leading, final String trailing, final String zero) {
+ this.ref = ref;
+ this.leading = leading;
+ this.trailing = trailing;
+ this.zero = zero;
+ }
+ }
+
public ArtifactCounter(final String sampleAlias, final String library, final int contextSize, final boolean expectedTandemReads) {
this.sampleAlias = sampleAlias;
this.library = library;
// define the contexts
- this.fullContexts = new HashSet<String>();
+ final HashSet<String> fullContexts = new HashSet<>();
for (final byte[] kmer : SequenceUtil.generateAllKmers(2 * contextSize + 1)) {
- this.fullContexts.add(StringUtil.bytesToString(kmer));
+ fullContexts.add(StringUtil.bytesToString(kmer));
}
+ final Set<String> zeroContexts = new HashSet<>();
+
// the half contexts specify either leading or trailing bases. the zero context is just the center.
// NB: we use N to represent a wildcard base, rather than an ambiguous base. It's assumed that all of the input
// contexts are unambiguous, and that any actual N's in the data have been dealt with elsewhere.
final String padding = StringUtil.repeatCharNTimes('N', contextSize);
- this.leadingContextMap = new HashMap<String, String>();
- this.trailingContextMap = new HashMap<String, String>();
- this.zeroContextMap = new HashMap<String, String>();
- for (final String context : this.fullContexts) {
- final String leading = context.substring(0, contextSize);
- final String trailing = context.substring(contextSize + 1, context.length());
- final char center = context.charAt(contextSize);
- this.leadingContextMap.put(context, leading + center + padding);
- this.trailingContextMap.put(context, padding + center + trailing);
- this.zeroContextMap.put(context, padding + center + padding);
+ for (final String context : fullContexts) {
+ final char centralBase = context.charAt(contextSize);
+ final String leading = context.substring(0, contextSize) + centralBase + padding;
+ final String trailing = padding + centralBase + context.substring(contextSize + 1, context.length());
+ final String zero = padding + centralBase + padding;
+ contextMap.put(context, new RefContext(context, leading, trailing, zero));
+
+ leadingContexts.add(leading);
+ trailingContexts.add(trailing);
+ zeroContexts.add(zero);
}
- // set up the accumulators
- final Set<String> halfContexts = new HashSet<String>();
- halfContexts.addAll(leadingContextMap.values());
- halfContexts.addAll(trailingContextMap.values());
- final Set<String> zeroContexts = new HashSet<String>();
- zeroContexts.addAll(zeroContextMap.values());
+ final Set<String> halfContexts = new HashSet<>(leadingContexts);
+ halfContexts.addAll(trailingContexts);
this.fullContextAccumulator = new ContextAccumulator(fullContexts, expectedTandemReads);
this.halfContextAccumulator = new ContextAccumulator(halfContexts, expectedTandemReads);
@@ -83,11 +93,12 @@ class ArtifactCounter {
* Add a record to all the accumulators.
*/
public void countRecord(final String refContext, final char calledBase, final SAMRecord rec) {
- if (this.fullContexts.contains(refContext)) {
- this.fullContextAccumulator.countRecord(refContext, calledBase, rec);
- this.halfContextAccumulator.countRecord(this.leadingContextMap.get(refContext), calledBase, rec);
- this.halfContextAccumulator.countRecord(this.trailingContextMap.get(refContext), calledBase, rec);
- this.zeroContextAccumulator.countRecord(this.zeroContextMap.get(refContext), calledBase, rec);
+ if (this.contextMap.containsKey(refContext)) {
+ final RefContext contexts = contextMap.get(refContext);
+ this.fullContextAccumulator.countRecord(contexts.ref, calledBase, rec);
+ this.halfContextAccumulator.countRecord(contexts.leading, calledBase, rec);
+ this.halfContextAccumulator.countRecord(contexts.trailing, calledBase, rec);
+ this.zeroContextAccumulator.countRecord(contexts.zero, calledBase, rec);
}
}
@@ -147,8 +158,8 @@ class ArtifactCounter {
if (!metrics.preAdapterMetrics.CONTEXT.equals(metrics.baitBiasMetrics.CONTEXT)) {
throw new PicardException("Input detail metrics are not matched up properly - contexts differ.");
}
- final boolean isLeading = this.leadingContextMap.containsValue(metrics.preAdapterMetrics.CONTEXT);
- final boolean isTrailing = this.trailingContextMap.containsValue(metrics.preAdapterMetrics.CONTEXT);
+ final boolean isLeading = leadingContexts.contains(metrics.preAdapterMetrics.CONTEXT);
+ final boolean isTrailing = trailingContexts.contains(metrics.preAdapterMetrics.CONTEXT);
// if the original contextSize is 0, there's no difference between leading and trailing, so add it to both
if (isLeading) leadingMetricsForTransition.add(metrics);
if (isTrailing) trailingMetricsForTransition.add(metrics);
@@ -208,8 +219,10 @@ class ArtifactCounter {
PreAdapterDetailMetrics worstPreAdapterMetrics = null;
BaitBiasDetailMetrics worstBaitBiasMetrics = null;
for (final DetailPair m : metrics) {
- if (worstPreAdapterMetrics == null || m.preAdapterMetrics.QSCORE < worstPreAdapterMetrics.QSCORE) worstPreAdapterMetrics = m.preAdapterMetrics;
- if (worstBaitBiasMetrics == null || m.baitBiasMetrics.QSCORE < worstBaitBiasMetrics.QSCORE) worstBaitBiasMetrics = m.baitBiasMetrics;
+
+ //The comparator first comparse by QSCORE and then uses other fields to guarrantee a deterministic order
+ if (worstPreAdapterMetrics == null || m.preAdapterMetrics.compareTo(worstPreAdapterMetrics) < 0) worstPreAdapterMetrics = m.preAdapterMetrics;
+ if (worstBaitBiasMetrics == null || m.baitBiasMetrics. compareTo(worstBaitBiasMetrics) < 0) worstBaitBiasMetrics = m.baitBiasMetrics;
}
return new DetailPair(worstPreAdapterMetrics, worstBaitBiasMetrics);
}
diff --git a/src/java/picard/analysis/artifacts/CollectSequencingArtifactMetrics.java b/src/java/picard/analysis/artifacts/CollectSequencingArtifactMetrics.java
index e9c9771..59451e5 100644
--- a/src/java/picard/analysis/artifacts/CollectSequencingArtifactMetrics.java
+++ b/src/java/picard/analysis/artifacts/CollectSequencingArtifactMetrics.java
@@ -53,13 +53,37 @@ import static htsjdk.samtools.util.CodeUtil.getOrElse;
*
*/
@CommandLineProgramProperties(
- usage = CollectSequencingArtifactMetrics.USAGE,
- usageShort = CollectSequencingArtifactMetrics.USAGE,
+ usage = CollectSequencingArtifactMetrics.USAGE_SUMMARY + CollectSequencingArtifactMetrics.USAGE_DETAILS,
+ usageShort = CollectSequencingArtifactMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectSequencingArtifactMetrics extends SinglePassSamProgram {
- static final String USAGE = "Collect metrics to quantify single-base sequencing artifacts.";
-
+ static final String USAGE_SUMMARY = "Collect metrics to quantify single-base sequencing artifacts.";
+ static final String USAGE_DETAILS = "This tool examines two sources of sequencing errors resulting from hybrid selection protocols:" +
+ " <a href='https://www.broadinstitute.org/gatk/guide/article?id=6333'>bait-bias</a> and " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6332'>" +
+ "pre-adapter artifacts</a>. For a brief primer on these types of artifacts, see the corresponding GATK Dictionary entries." +
+ "<br /><br />" +
+ "This tool produces four files; summary and detail metrics files for both pre-adapter and bait-bias artifacts. The detailed " +
+ "metrics show the error rates for each type of base substitution within every possible triplet base configuration. Error " +
+ "rates associated with these substitutions are Phred-scaled and provided as quality scores, the lower the value, the more " +
+ "likely it is that an alternate base call is due to an artifact. The summary metrics provide likelihood information on the " +
+ "\"worst-case\" errors. <br />" +
+ "" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectSequencingArtifactMetrics \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=artifact_metrics.txt \\<br />" +
+ " R=reference_sequence.fasta" +
+ "</pre>" +
+ "" +
+ "For additional information, please see " +
+ "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterDetailMetrics'>the PreAdapterDetailMetrics documentation</a>, the " +
+ "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterSummaryMetrics'>the PreAdapterSummaryMetrics documentation</a>, the " +
+ "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasDetailMetrics'>the BaitBiasDetailMetrics documentation</a>, and the " +
+ "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasSummaryMetrics'>the BaitBiasSummaryMetrics documentation</a>. " +
+ "<hr />" ;
@Option(doc = "An optional list of intervals to restrict analysis to.", optional = true)
public File INTERVALS;
@@ -108,6 +132,9 @@ public class CollectSequencingArtifactMetrics extends SinglePassSamProgram {
private DbSnpBitSetUtil dbSnpMask;
private SamRecordFilter recordFilter;
+ private String currentRefString = null;
+ private int currentRefIndex = -1;
+
private final Set<String> samples = new HashSet<String>();
private final Set<String> libraries = new HashSet<String>();
private final Map<String, ArtifactCounter> artifactCounters = new HashMap<String, ArtifactCounter>();
@@ -196,6 +223,18 @@ public class CollectSequencingArtifactMetrics extends SinglePassSamProgram {
throw new PicardException("Record contains library that is missing from header: " + library);
}
+ // set up some constants that don't change in the loop below
+ final int contextFullLength = 2 * CONTEXT_SIZE + 1;
+ final ArtifactCounter counter = artifactCounters.get(library);
+ final byte[] readBases = rec.getReadBases();
+ final byte[] readQuals;
+ if (USE_OQ) {
+ final byte[] tmp = rec.getOriginalBaseQualities();
+ readQuals = tmp == null ? rec.getBaseQualities() : tmp;
+ } else {
+ readQuals = rec.getBaseQualities();
+ }
+
// iterate over aligned positions
for (final AlignmentBlock block : rec.getAlignmentBlocks()) {
for (int offset = 0; offset < block.getLength(); offset++) {
@@ -203,6 +242,14 @@ public class CollectSequencingArtifactMetrics extends SinglePassSamProgram {
final int readPos = block.getReadStart() + offset;
final int refPos = block.getReferenceStart() + offset;
+ // skip low BQ sites
+ final byte qual = readQuals[readPos - 1];
+ if (qual < MINIMUM_QUALITY_SCORE) continue;
+
+ // skip N bases in read
+ final char readBase = Character.toUpperCase((char)readBases[readPos - 1]);
+ if (readBase == 'N') continue;
+
/**
* Skip regions outside of intervals.
*
@@ -218,26 +265,27 @@ public class CollectSequencingArtifactMetrics extends SinglePassSamProgram {
// skip the ends of the reference
final int contextStartIndex = refPos - CONTEXT_SIZE - 1;
- final int contextFullLength = 2 * CONTEXT_SIZE + 1;
if (contextStartIndex < 0 || contextStartIndex + contextFullLength > ref.length()) continue;
// skip contexts with N bases
- final String context = StringUtil.bytesToString(ref.getBases(), contextStartIndex, contextFullLength).toUpperCase();
+ final String context = getRefContext(ref, contextStartIndex, contextFullLength);
if (context.contains("N")) continue;
- // skip low BQ sites
- if (failsBaseQualityCutoff(readPos, rec)) continue;
-
- // skip N bases in read
- final char readBase = Character.toUpperCase((char) rec.getReadBases()[readPos - 1]);
- if (readBase == 'N') continue;
-
// count the base!
- artifactCounters.get(library).countRecord(context, readBase, rec);
+ counter.countRecord(context, readBase, rec);
}
}
}
+ private String getRefContext(final ReferenceSequence ref, final int contextStartIndex, final int contextFullLength) {
+ // cache the upper-cased string version of this reference so we don't need to create a string for every base in every read
+ if (currentRefIndex != ref.getContigIndex()) {
+ currentRefString = new String(ref.getBases()).toUpperCase();
+ currentRefIndex = ref.getContigIndex();
+ }
+ return currentRefString.substring(contextStartIndex, contextStartIndex + contextFullLength);
+ }
+
@Override
protected void finish() {
final MetricsFile<PreAdapterSummaryMetrics, Integer> preAdapterSummaryMetricsFile = getMetricsFile();
@@ -274,17 +322,4 @@ public class CollectSequencingArtifactMetrics extends SinglePassSamProgram {
@Override
protected boolean usesNoRefReads() { return false; }
-
- /**
- * Check if this read base fails the base quality cutoff.
- */
- private boolean failsBaseQualityCutoff(final int oneIndexedPos, final SAMRecord rec) {
- final byte qual;
- if (USE_OQ && rec.getOriginalBaseQualities() != null) {
- qual = rec.getOriginalBaseQualities()[oneIndexedPos - 1];
- } else {
- qual = rec.getBaseQualities()[oneIndexedPos - 1];
- }
- return (qual < MINIMUM_QUALITY_SCORE);
- }
}
diff --git a/src/java/picard/analysis/artifacts/ContextAccumulator.java b/src/java/picard/analysis/artifacts/ContextAccumulator.java
index 06f126b..c9e0e58 100644
--- a/src/java/picard/analysis/artifacts/ContextAccumulator.java
+++ b/src/java/picard/analysis/artifacts/ContextAccumulator.java
@@ -4,11 +4,14 @@ import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.ListMap;
import htsjdk.samtools.util.SequenceUtil;
import picard.PicardException;
-import picard.analysis.artifacts.SequencingArtifactMetrics.*;
+import picard.analysis.artifacts.SequencingArtifactMetrics.BaitBiasDetailMetrics;
+import picard.analysis.artifacts.SequencingArtifactMetrics.DetailPair;
+import picard.analysis.artifacts.SequencingArtifactMetrics.PreAdapterDetailMetrics;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
+import java.util.TreeSet;
/**
* Keeps track of the AlignmentAccumulators for each artifact / context of interest.
@@ -17,37 +20,44 @@ class ContextAccumulator {
// are the PE reads expected to face the same direction?
private final boolean expectedTandemReads;
- private final Map<Transition, Map<String, AlignmentAccumulator>> artifactMap;
+
+ // mapping from contexts to the accumulators
+ private final Map<String, AlignmentAccumulator[]> artifactMap;
public ContextAccumulator(final Set<String> contexts, final boolean expectedTandemReads) {
this.expectedTandemReads = expectedTandemReads;
- this.artifactMap = new HashMap<Transition, Map<String, AlignmentAccumulator>>();
- for (final Transition transition : Transition.values()) {
- this.artifactMap.put(transition, new HashMap<String, AlignmentAccumulator>());
- }
+ this.artifactMap = new HashMap<>();
for (final String context : contexts) {
- final char refBase = getCentralBase(context);
- for (final byte calledBase : SequenceUtil.VALID_BASES_UPPER) {
- final Transition transition = Transition.transitionOf(refBase, (char)calledBase);
- this.artifactMap.get(transition).put(context, new AlignmentAccumulator());
+
+ // sanity check that the context length is odd
+ if ((context.length() & 1) == 0) throw new PicardException("Contexts cannot have an even number of bases: " + context);
+
+ final AlignmentAccumulator[] accumulators = new AlignmentAccumulator[Transition.Base.values().length];
+ for (int i = 0; i < Transition.Base.values().length; i++) {
+ accumulators[i] = new AlignmentAccumulator();
}
+ this.artifactMap.put(context, accumulators);
}
}
public void countRecord(final String refContext, final char calledBase, final SAMRecord rec) {
- final char refBase = getCentralBase(refContext);
- final Transition transition = Transition.transitionOf(refBase, calledBase);
- this.artifactMap.get(transition).get(refContext).countRecord(rec);
+ artifactMap.get(refContext)[Transition.baseIndexMap[calledBase]].countRecord(rec);
}
/**
* Core method to compute detailed (i.e. context-by-context) metrics from this accumulator.
*/
public ListMap<Transition, DetailPair> calculateMetrics(final String sampleAlias, final String library) {
- final ListMap<Transition, DetailPair> detailMetricsMap = new ListMap<Transition, DetailPair>();
- for (final Transition altTransition : Transition.altValues()) {
- final Transition refTransition = altTransition.matchingRef();
- for (final String context : this.artifactMap.get(altTransition).keySet()) {
+ final ListMap<Transition, DetailPair> detailMetricsMap = new ListMap<>();
+ for (final String context : new TreeSet<>(artifactMap.keySet())) {
+
+ // sanity check that the context length is odd
+ if ((context.length() & 1) == 0) throw new PicardException("Contexts cannot have an even number of bases: " + context + ". This should never happen here!");
+ final char refBase = context.charAt(context.length() / 2);
+
+ for (final Transition.Base altBase : Transition.Base.values()) {
+ final Transition transition = Transition.transitionOf(refBase, (char)altBase.base);
+
// each combination of artifact + context represents a single metric row
final PreAdapterDetailMetrics preAdapterDetailMetrics = new PreAdapterDetailMetrics();
final BaitBiasDetailMetrics baitBiasDetailMetrics = new BaitBiasDetailMetrics();
@@ -56,20 +66,23 @@ class ContextAccumulator {
preAdapterDetailMetrics.SAMPLE_ALIAS = sampleAlias;
preAdapterDetailMetrics.LIBRARY = library;
preAdapterDetailMetrics.CONTEXT = context;
- preAdapterDetailMetrics.REF_BASE = altTransition.ref();
- preAdapterDetailMetrics.ALT_BASE = altTransition.call();
+ preAdapterDetailMetrics.REF_BASE = transition.ref();
+ preAdapterDetailMetrics.ALT_BASE = transition.call();
baitBiasDetailMetrics.SAMPLE_ALIAS = sampleAlias;
baitBiasDetailMetrics.LIBRARY = library;
baitBiasDetailMetrics.CONTEXT = context;
- baitBiasDetailMetrics.REF_BASE = altTransition.ref();
- baitBiasDetailMetrics.ALT_BASE = altTransition.call();
+ baitBiasDetailMetrics.REF_BASE = transition.ref();
+ baitBiasDetailMetrics.ALT_BASE = transition.call();
// retrieve all the necessary alignment counters.
- final AlignmentAccumulator fwdRefAlignments = this.artifactMap.get(refTransition).get(context);
- final AlignmentAccumulator fwdAltAlignments = this.artifactMap.get(altTransition).get(context);
- final AlignmentAccumulator revRefAlignments = this.artifactMap.get(refTransition.complement()).get(SequenceUtil.reverseComplement(context));
- final AlignmentAccumulator revAltAlignments = this.artifactMap.get(altTransition.complement()).get(SequenceUtil.reverseComplement(context));
+ final AlignmentAccumulator[] accumulators = artifactMap.get(context);
+ final AlignmentAccumulator[] reverseCompAccumulators = artifactMap.get(SequenceUtil.reverseComplement(context));
+
+ final AlignmentAccumulator fwdRefAlignments = accumulators[Transition.baseIndexMap[transition.ref()]];
+ final AlignmentAccumulator fwdAltAlignments = accumulators[Transition.baseIndexMap[transition.call()]];
+ final AlignmentAccumulator revRefAlignments = reverseCompAccumulators[Transition.baseIndexMap[transition.complement().ref()]];
+ final AlignmentAccumulator revAltAlignments = reverseCompAccumulators[Transition.baseIndexMap[transition.complement().call()]];
// categorize observations of pre-adapter artifacts
if (expectedTandemReads) {
@@ -97,17 +110,12 @@ class ContextAccumulator {
baitBiasDetailMetrics.calculateDerivedStatistics();
// add the finalized metrics to the map
- detailMetricsMap.add(altTransition, new DetailPair(preAdapterDetailMetrics, baitBiasDetailMetrics));
+ detailMetricsMap.add(transition, new DetailPair(preAdapterDetailMetrics, baitBiasDetailMetrics));
}
}
return detailMetricsMap;
}
- private char getCentralBase(final String context) {
- if (context.length() % 2 == 0) throw new PicardException("Contexts cannot have an even number of bases: " + context);
- else return context.charAt(context.length() / 2);
- }
-
/**
* Little class for breaking down alignments by read1/read2 and positive/negative strand.
*/
diff --git a/src/java/picard/analysis/artifacts/ConvertSequencingArtifactToOxoG.java b/src/java/picard/analysis/artifacts/ConvertSequencingArtifactToOxoG.java
index 1908431..4271d46 100644
--- a/src/java/picard/analysis/artifacts/ConvertSequencingArtifactToOxoG.java
+++ b/src/java/picard/analysis/artifacts/ConvertSequencingArtifactToOxoG.java
@@ -20,15 +20,35 @@ import java.util.Map;
import java.util.Set;
@CommandLineProgramProperties(
- usage = ConvertSequencingArtifactToOxoG.USAGE,
- usageShort = ConvertSequencingArtifactToOxoG.USAGE,
+ usage = ConvertSequencingArtifactToOxoG.USAGE_SUMMARY + ConvertSequencingArtifactToOxoG.USAGE_DETAILS,
+ usageShort = ConvertSequencingArtifactToOxoG.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class ConvertSequencingArtifactToOxoG extends CommandLineProgram {
- static final String USAGE = "Extract OxoG metrics format from generalized artifact metrics.";
-
+ static final String USAGE_SUMMARY = "Extract OxoG metrics from generalized artifacts metrics";
+ static final String USAGE_DETAILS = "This tool extracts 8-oxoguanine (OxoG) artifact metrics from the output of " +
+ "CollectSequencingArtifactsMetrics(a tool that provides detailed information on a variety of artifacts found in sequencing " +
+ "libraries) and converts them to the CollectOxoGMetrics tool's output format. This conveniently eliminates the need to run " +
+ "CollectOxoGMetrics if we already ran CollectSequencingArtifactsMetrics in our pipeline. See the documentation for " +
+ "CollectSequencingArtifactsMetrics and CollectOxoGMetrics for additional information on these tools.<br /><br />." +
+ "Note that only the base of the CollectSequencingArtifactsMetrics output file name is required for the file name input. " +
+ "For example, if the file name is artifact_metrics.txt.bait_bias_detail_metrics" +
+ " or artifact_metrics.txt.pre_adapter_detail_metrics, only the base of the file name \"artifact_metrics\" is " +
+ "required on the command line for \"input\". " +
+ "An output file called \"artifact_metrics.oxog_metrics\" will be generated automatically. " +
+ "A reference sequence is also required.<br />"+
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar ConvertSequencingArtifactToOxoG \\<br />" +
+ " I=artifact_metrics \\<br />" +
+ " R=reference.fasta" +
+ "</pre>" +
+ "For detailed explanations of the output metrics, please see:" +
+ "http://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectOxoGMetrics.CpcgMetrics" +
+ "<hr />"
+ ;
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
- doc = "Basename for input artifact metrics")
+ doc = "Basename of the input artifact metrics file (output by CollectSequencingArtifactMetrics)")
public File INPUT_BASE;
@Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME,
@@ -53,8 +73,8 @@ public class ConvertSequencingArtifactToOxoG extends CommandLineProgram {
IOUtil.assertFileIsReadable(BAIT_BIAS_IN);
IOUtil.assertFileIsWritable(OXOG_OUT);
- final List<PreAdapterDetailMetrics> preAdapterDetailMetricsList = (List<PreAdapterDetailMetrics>) MetricsFile.readBeans(PRE_ADAPTER_IN);
- final List<BaitBiasDetailMetrics> baitBiasDetailMetricsList = (List<BaitBiasDetailMetrics>) MetricsFile.readBeans(BAIT_BIAS_IN);
+ final List<PreAdapterDetailMetrics> preAdapterDetailMetricsList = MetricsFile.readBeans(PRE_ADAPTER_IN);
+ final List<BaitBiasDetailMetrics> baitBiasDetailMetricsList = MetricsFile.readBeans(BAIT_BIAS_IN);
// TODO should we validate that the two inputs match up as expected?
diff --git a/src/java/picard/analysis/artifacts/SequencingArtifactMetrics.java b/src/java/picard/analysis/artifacts/SequencingArtifactMetrics.java
index e4cb0ff..02eb22c 100644
--- a/src/java/picard/analysis/artifacts/SequencingArtifactMetrics.java
+++ b/src/java/picard/analysis/artifacts/SequencingArtifactMetrics.java
@@ -3,6 +3,8 @@ package picard.analysis.artifacts;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.util.QualityUtil;
+import java.util.Comparator;
+
public class SequencingArtifactMetrics {
public static final String PRE_ADAPTER_SUMMARY_EXT = ".pre_adapter_summary_metrics";
public static final String PRE_ADAPTER_DETAILS_EXT = ".pre_adapter_detail_metrics";
@@ -179,13 +181,27 @@ public class SequencingArtifactMetrics {
}
this.QSCORE = QualityUtil.getPhredScoreFromErrorProbability(this.ERROR_RATE);
}
+
+ public int compareTo(final PreAdapterDetailMetrics o) {
+ int retval = Double.compare(QSCORE, o.QSCORE);
+ if (retval != 0) return retval;
+
+ retval = REF_BASE - o.REF_BASE;
+ if (retval != 0) return retval;
+
+ retval = ALT_BASE - o.ALT_BASE;
+ if (retval != 0) return retval;
+
+ retval = CONTEXT.compareTo(o.CONTEXT);
+
+ return retval;
+ }
}
/**
* Bait bias artifacts broken down by context.
*/
public static class BaitBiasDetailMetrics extends MetricBase {
- /** The name of the sample being assayed. */
public String SAMPLE_ALIAS;
/** The name of the library being assayed. */
public String LIBRARY;
@@ -239,6 +255,21 @@ public class SequencingArtifactMetrics {
this.ERROR_RATE = Math.max(MIN_ERROR, this.FWD_ERROR_RATE - this.REV_ERROR_RATE);
this.QSCORE = QualityUtil.getPhredScoreFromErrorProbability(this.ERROR_RATE);
}
+
+ public int compareTo(final BaitBiasDetailMetrics o) {
+ int retval = Double.compare(QSCORE, o.QSCORE);
+ if (retval != 0) return retval;
+
+ retval = REF_BASE - o.REF_BASE;
+ if (retval != 0) return retval;
+
+ retval = ALT_BASE - o.ALT_BASE;
+ if (retval != 0) return retval;
+
+ retval = CONTEXT.compareTo(o.CONTEXT);
+
+ return retval;
+ }
}
/**
diff --git a/src/java/picard/analysis/artifacts/Transition.java b/src/java/picard/analysis/artifacts/Transition.java
index 303b0ee..beb54a6 100644
--- a/src/java/picard/analysis/artifacts/Transition.java
+++ b/src/java/picard/analysis/artifacts/Transition.java
@@ -2,6 +2,8 @@ package picard.analysis.artifacts;
import htsjdk.samtools.util.SequenceUtil;
+import java.util.Arrays;
+
enum Transition {
AtoA('A','A'), AtoC('A','C'), AtoG('A','G'), AtoT('A','T'),
CtoA('C','A'), CtoC('C','C'), CtoG('C','G'), CtoT('C','T'),
@@ -21,7 +23,7 @@ enum Transition {
}
public static Transition transitionOf(final char ref, final char call) {
- return valueOf(ref + "to" + call);
+ return transitionIndexMap[baseIndexMap[ref]][baseIndexMap[call]];
}
/**
@@ -36,13 +38,6 @@ enum Transition {
return transitionOf((char) SequenceUtil.complement((byte) this.ref), (char) SequenceUtil.complement((byte) this.call));
}
- /**
- * Return the ref:ref transition corresponding to this ref:alt transition.
- */
- public Transition matchingRef() {
- return transitionOf(this.ref, this.ref);
- }
-
public char ref() { return this.ref; }
public char call() { return this.call; }
@@ -51,4 +46,35 @@ enum Transition {
public String toString() {
return this.ref + ">" + this.call;
}
+
+ protected enum Base {
+ A ('A'),
+ C ('C'),
+ G ('G'),
+ T ('T');
+
+ public byte base;
+
+ private Base(final char base) {
+ this.base = (byte)base;
+ }
+ }
+
+ // only 4 of the values will be used but we want to optimize for speed by accessing via the int value of a char
+ static protected final int[] baseIndexMap = new int[256];
+ static {
+ Arrays.fill(baseIndexMap, -1);
+ for (final Base b : Base.values()) {
+ baseIndexMap[b.base] = b.ordinal();
+ }
+ }
+
+ static private final Transition[][] transitionIndexMap = new Transition[Base.values().length][Base.values().length];
+ static {
+ for (final Base b1 : Base.values()) {
+ for (final Base b2 : Base.values()) {
+ transitionIndexMap[b1.ordinal()][b2.ordinal()] = Transition.valueOf(b1.toString() + "to" + b2.toString());
+ }
+ }
+ }
}
\ No newline at end of file
diff --git a/src/java/picard/analysis/directed/CalculateHsMetrics.java b/src/java/picard/analysis/directed/CalculateHsMetrics.java
index 5d42821..d41c394 100644
--- a/src/java/picard/analysis/directed/CalculateHsMetrics.java
+++ b/src/java/picard/analysis/directed/CalculateHsMetrics.java
@@ -24,75 +24,34 @@
package picard.analysis.directed;
-import htsjdk.samtools.SAMReadGroupRecord;
-import htsjdk.samtools.reference.ReferenceSequenceFile;
-import htsjdk.samtools.util.IOUtil;
-import htsjdk.samtools.util.IntervalList;
-import htsjdk.samtools.util.StringUtil;
-import picard.analysis.MetricAccumulationLevel;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.Metrics;
-import java.io.File;
-import java.util.List;
-import java.util.Set;
-import java.util.SortedSet;
-import java.util.TreeSet;
-
/**
* Calculates a set of HS metrics from a sam or bam file. See HsMetricsCollector and CollectTargetedMetrics for more details.
*
* @author Tim Fennell
*/
+
@CommandLineProgramProperties(
- usage = "Calculates a set of Hybrid Selection specific metrics from an aligned SAM" +
+ usage = "DEPRECATED: Use CollectHsMetrics instead. Calculates a set of Hybrid Selection specific metrics from an aligned SAM" +
"or BAM file. If a reference sequence is provided, AT/GC dropout metrics will " +
"be calculated, and the PER_TARGET_COVERAGE option can be used to output GC and " +
"mean coverage information for every target.",
- usageShort = "Calculates Hybrid Selection-specific metrics for a SAM or BAM file",
+ usageShort = "DEPRECATED: Use CollectHsMetrics instead.",
programGroup = Metrics.class
)
-public class CalculateHsMetrics extends CollectTargetedMetrics<HsMetrics, HsMetricCollector> {
-
- @Option(shortName = "BI", doc = "An interval list file that contains the locations of the baits used.", minElements=1)
- public List<File> BAIT_INTERVALS;
-
- @Option(shortName = "N", doc = "Bait set name. If not provided it is inferred from the filename of the bait intervals.", optional = true)
- public String BAIT_SET_NAME;
+ at Deprecated
+public class CalculateHsMetrics extends CollectHsMetrics {
- @Override
- protected IntervalList getProbeIntervals() {
- for (final File file : BAIT_INTERVALS) IOUtil.assertFileIsReadable(file);
- return IntervalList.fromFiles(BAIT_INTERVALS);
- }
+ @Option(shortName = "MQ", doc = "Minimum mapping quality for a read to contribute coverage.", overridable = true)
+ public int MINIMUM_MAPPING_QUALITY = 1;
- @Override
- protected String getProbeSetName() {
- if (BAIT_SET_NAME != null) {
- return BAIT_SET_NAME;
- } else {
- final SortedSet<String> baitSetNames = new TreeSet<String>();
- for (final File file : BAIT_INTERVALS) {
- baitSetNames.add(CollectTargetedMetrics.renderProbeNameFromFile(file));
- }
- return StringUtil.join(".", baitSetNames);
- }
- }
+ @Option(shortName = "Q", doc = "Minimum base quality for a base to contribute coverage.", overridable = true)
+ public int MINIMUM_BASE_QUALITY = 0;
- /** Stock main method. */
- public static void main(final String[] argv) {
- System.exit(new CalculateHsMetrics().instanceMain(argv));
- }
+ @Option(doc = "True if we are to clip overlapping reads, false otherwise.", optional=true)
+ public boolean CLIP_OVERLAPPING_READS = false;
- @Override
- protected HsMetricCollector makeCollector(final Set<MetricAccumulationLevel> accumulationLevels,
- final List<SAMReadGroupRecord> samRgRecords,
- final ReferenceSequenceFile refFile,
- final File perTargetCoverage,
- final IntervalList targetIntervals,
- final IntervalList probeIntervals,
- final String probeSetName) {
- return new HsMetricCollector(accumulationLevels, samRgRecords, refFile, perTargetCoverage, targetIntervals, probeIntervals, probeSetName);
- }
}
diff --git a/src/java/picard/analysis/directed/CalculateHsMetrics.java b/src/java/picard/analysis/directed/CollectHsMetrics.java
similarity index 75%
copy from src/java/picard/analysis/directed/CalculateHsMetrics.java
copy to src/java/picard/analysis/directed/CollectHsMetrics.java
index 5d42821..e89beb0 100644
--- a/src/java/picard/analysis/directed/CalculateHsMetrics.java
+++ b/src/java/picard/analysis/directed/CollectHsMetrics.java
@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-
package picard.analysis.directed;
import htsjdk.samtools.SAMReadGroupRecord;
@@ -41,19 +40,19 @@ import java.util.SortedSet;
import java.util.TreeSet;
/**
- * Calculates a set of HS metrics from a sam or bam file. See HsMetricsCollector and CollectTargetedMetrics for more details.
+ * Collects a set of HS metrics from a sam or bam file. See HsMetricsCollector and CollectTargetedMetrics for more details.
*
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Calculates a set of Hybrid Selection specific metrics from an aligned SAM" +
+ usage = "Collects a set of Hybrid Selection specific metrics from an aligned SAM" +
"or BAM file. If a reference sequence is provided, AT/GC dropout metrics will " +
"be calculated, and the PER_TARGET_COVERAGE option can be used to output GC and " +
"mean coverage information for every target.",
- usageShort = "Calculates Hybrid Selection-specific metrics for a SAM or BAM file",
+ usageShort = "Collects Hybrid Selection-specific metrics for a SAM or BAM file",
programGroup = Metrics.class
)
-public class CalculateHsMetrics extends CollectTargetedMetrics<HsMetrics, HsMetricCollector> {
+public class CollectHsMetrics extends CollectTargetedMetrics<HsMetrics, HsMetricCollector> {
@Option(shortName = "BI", doc = "An interval list file that contains the locations of the baits used.", minElements=1)
public List<File> BAIT_INTERVALS;
@@ -61,6 +60,15 @@ public class CalculateHsMetrics extends CollectTargetedMetrics<HsMetrics, HsMetr
@Option(shortName = "N", doc = "Bait set name. If not provided it is inferred from the filename of the bait intervals.", optional = true)
public String BAIT_SET_NAME;
+ @Option(shortName = "MQ", doc = "Minimum mapping quality for a read to contribute coverage.", overridable = true)
+ public int MINIMUM_MAPPING_QUALITY = 20;
+
+ @Option(shortName = "Q", doc = "Minimum base quality for a base to contribute coverage.", overridable = true)
+ public int MINIMUM_BASE_QUALITY = 20;
+
+ @Option(doc = "True if we are to clip overlapping reads, false otherwise.", optional=true, overridable = true)
+ public boolean CLIP_OVERLAPPING_READS = true;
+
@Override
protected IntervalList getProbeIntervals() {
for (final File file : BAIT_INTERVALS) IOUtil.assertFileIsReadable(file);
@@ -90,9 +98,12 @@ public class CalculateHsMetrics extends CollectTargetedMetrics<HsMetrics, HsMetr
final List<SAMReadGroupRecord> samRgRecords,
final ReferenceSequenceFile refFile,
final File perTargetCoverage,
+ final File perBaseCoverage,
final IntervalList targetIntervals,
final IntervalList probeIntervals,
- final String probeSetName) {
- return new HsMetricCollector(accumulationLevels, samRgRecords, refFile, perTargetCoverage, targetIntervals, probeIntervals, probeSetName);
+ final String probeSetName,
+ final int nearProbeDistance) {
+ return new HsMetricCollector(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance,
+ MINIMUM_MAPPING_QUALITY, MINIMUM_BASE_QUALITY, CLIP_OVERLAPPING_READS, true, COVERAGE_CAP, SAMPLE_SIZE);
}
-}
+}
\ No newline at end of file
diff --git a/src/java/picard/analysis/directed/CollectTargetedMetrics.java b/src/java/picard/analysis/directed/CollectTargetedMetrics.java
index 93df377..d5e691e 100644
--- a/src/java/picard/analysis/directed/CollectTargetedMetrics.java
+++ b/src/java/picard/analysis/directed/CollectTargetedMetrics.java
@@ -2,6 +2,7 @@ package picard.analysis.directed;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.metrics.MetricsFile;
@@ -35,7 +36,7 @@ import java.util.Set;
*/
public abstract class CollectTargetedMetrics<METRIC extends MultilevelMetrics, COLLECTOR extends TargetMetricsCollector<METRIC>> extends CommandLineProgram {
- private static final Log log = Log.getInstance(CalculateHsMetrics.class);
+ private final Log log = Log.getInstance(getClass());
protected abstract IntervalList getProbeIntervals();
@@ -51,9 +52,11 @@ public abstract class CollectTargetedMetrics<METRIC extends MultilevelMetrics, C
final List<SAMReadGroupRecord> samRgRecords,
final ReferenceSequenceFile refFile,
final File perTargetCoverage,
+ final File perBaseCoverage,
final IntervalList targetIntervals,
final IntervalList probeIntervals,
- final String probeSetName);
+ final String probeSetName,
+ final int nearProbeDistance);
@Option(shortName = "TI", doc = "An interval list file that contains the locations of the targets.", minElements=1)
@@ -71,6 +74,29 @@ public abstract class CollectTargetedMetrics<METRIC extends MultilevelMetrics, C
@Option(optional = true, doc = "An optional file to output per target coverage information to.")
public File PER_TARGET_COVERAGE;
+ @Option(optional = true, doc = "An optional file to output per base coverage information to. The per-base file contains " +
+ "one line per target base and can grow very large. It is not recommended for use with large target sets.")
+ public File PER_BASE_COVERAGE;
+
+ @Option(optional = true, doc= "The maximum distance between a read and the nearest probe/bait/amplicon for the read to be " +
+ "considered 'near probe' and included in percent selected.")
+ public int NEAR_DISTANCE = TargetedPcrMetricsCollector.NEAR_PROBE_DISTANCE_DEFAULT;
+
+ @Option(shortName = "MQ", doc = "Minimum mapping quality for a read to contribute coverage.", overridable = true)
+ public int MINIMUM_MAPPING_QUALITY = 1;
+
+ @Option(shortName = "Q", doc = "Minimum base quality for a base to contribute coverage.", overridable = true)
+ public int MINIMUM_BASE_QUALITY = 0;
+
+ @Option(doc = "True if we are to clip overlapping reads, false otherwise.", optional=true, overridable = true)
+ public boolean CLIP_OVERLAPPING_READS = false;
+
+ @Option(shortName = "covMax", doc = "Parameter to set a max coverage limit for Theoretical Sensitivity calculations. Default is 200.", optional = true)
+ public int COVERAGE_CAP = 200;
+
+ @Option(doc="Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000.", optional = true)
+ public int SAMPLE_SIZE=10000;
+
/**
* Asserts that files are readable and writable and then fires off an
* HsMetricsCalculator instance to do the real work.
@@ -108,9 +134,11 @@ public abstract class CollectTargetedMetrics<METRIC extends MultilevelMetrics, C
reader.getFileHeader().getReadGroups(),
ref,
PER_TARGET_COVERAGE,
+ PER_BASE_COVERAGE,
targetIntervals,
getProbeIntervals(),
- getProbeSetName()
+ getProbeSetName(),
+ NEAR_DISTANCE
);
final ProgressLogger progress = new ProgressLogger(log);
diff --git a/src/java/picard/analysis/directed/CollectTargetedPcrMetrics.java b/src/java/picard/analysis/directed/CollectTargetedPcrMetrics.java
index 45d3bee..c3bb273 100644
--- a/src/java/picard/analysis/directed/CollectTargetedPcrMetrics.java
+++ b/src/java/picard/analysis/directed/CollectTargetedPcrMetrics.java
@@ -17,15 +17,34 @@ import java.util.Set;
* more information
*/
@CommandLineProgramProperties(
- usage = "Calculates a set of metrics to Illumina Truseq Custom Amplicon sequencing from an aligned SAM" +
- "or BAM file. If a reference sequence is provided, AT/GC dropout metrics will " +
- "be calculated, and the PER_TARGET_COVERAGE option can be used to output GC and " +
- "mean coverage information for every target.",
- usageShort = "Produces Targeted PCR-related metrics given the provided SAM/BAM",
+ usage = CollectTargetedPcrMetrics.USAGE_SUMMARY + CollectTargetedPcrMetrics.USAGE_DETAILS,
+ usageShort = CollectTargetedPcrMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectTargetedPcrMetrics extends CollectTargetedMetrics<TargetedPcrMetrics, TargetedPcrMetricsCollector> {
-
+ static final String USAGE_SUMMARY = "Calculate PCR-related metrics from targeted sequencing data. ";
+ static final String USAGE_DETAILS = "This tool calculates a set of PCR-related metrics from an aligned SAM or " +
+ "BAM file containing targeted sequencing data. It is appropriate for data produced with multiple small-target technologies " +
+ "including exome sequencing an custom amplicon panels such as the Illumina " +
+ "<a href='http://www.illumina.com/content/dam/illumina-marketing/documents/products/datasheets/datasheet_truseq_custom_amplicon.pdf'>TruSeq Custom Amplicon (TSCA)</a> kit. <br /><br />" +
+ "If a reference sequence is provided, AT/GC dropout metrics will be calculated and the PER_TARGET_COVERAGE option can be " +
+ "used to output GC content and mean coverage information for each target. The AT/GC dropout metrics indicate the degree of " +
+ "inadequate coverage of a particular region based on its AT or GC content. The PER_TARGET_COVERAGE option can be used to " +
+ "output GC content and mean sequence depth information for every target interval. <br /><br />" +
+ "Please note that coverage depth at each locus should not exceed a limit of java MAX_SHORT ~32K. This is because " +
+ "CollectTargetedPcrMetrics tool uses a short array to calculate coverage metrics." +
+ "<h4>Usage Example</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectTargetedPcrMetrics \\<br /> " +
+ " I=input.bam \\<br /> " +
+ " O=pcr_metrics.txt \\<br /> " +
+ " R=reference_sequence.fasta \\<br /> " +
+ " AMPLICON_INTERVALS=amplicon.interval_list \\<br /> " +
+ " TARGET_INTERVALS=targets.interval_list " +
+ "</pre>" +
+ "For explanations of the output metrics, see " +
+ "http://broadinstitute.github.io/picard/picard-metric-definitions.html#TargetedPcrMetrics" +
+ "<hr />";
@Option(shortName = "AI", doc = "An interval list file that contains the locations of the baits used.")
public File AMPLICON_INTERVALS;
@@ -58,9 +77,12 @@ public class CollectTargetedPcrMetrics extends CollectTargetedMetrics<TargetedPc
final List<SAMReadGroupRecord> samRgRecords,
final ReferenceSequenceFile refFile,
final File perTargetCoverage,
+ final File perBaseCoverage,
final IntervalList targetIntervals,
final IntervalList probeIntervals,
- final String probeSetName) {
- return new TargetedPcrMetricsCollector(accumulationLevels, samRgRecords, refFile, perTargetCoverage, targetIntervals, probeIntervals, probeSetName);
+ final String probeSetName,
+ final int nearProbeDistance) {
+ return new TargetedPcrMetricsCollector(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance,
+ MINIMUM_MAPPING_QUALITY, MINIMUM_BASE_QUALITY, CLIP_OVERLAPPING_READS, true, COVERAGE_CAP, SAMPLE_SIZE);
}
}
diff --git a/src/java/picard/analysis/directed/HsMetricCollector.java b/src/java/picard/analysis/directed/HsMetricCollector.java
index 23ae6e7..2a64f05 100644
--- a/src/java/picard/analysis/directed/HsMetricCollector.java
+++ b/src/java/picard/analysis/directed/HsMetricCollector.java
@@ -43,8 +43,39 @@ import java.util.Set;
*/
public class HsMetricCollector extends TargetMetricsCollector<HsMetrics> {
- public HsMetricCollector(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords, final ReferenceSequenceFile refFile, final File perTargetCoverage, final IntervalList targetIntervals, final IntervalList probeIntervals, final String probeSetName) {
- super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, targetIntervals, probeIntervals, probeSetName);
+ public HsMetricCollector(final Set<MetricAccumulationLevel> accumulationLevels,
+ final List<SAMReadGroupRecord> samRgRecords,
+ final ReferenceSequenceFile refFile,
+ final File perTargetCoverage,
+ final File perBaseCoverage,
+ final IntervalList targetIntervals,
+ final IntervalList probeIntervals,
+ final String probeSetName,
+ final int nearProbeDistance,
+ final int minimumMappingQuality,
+ final int minimumBaseQuality,
+ final boolean clipOverlappingReads,
+ final int coverageCap,
+ final int sampleSize) {
+ super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, coverageCap, sampleSize);
+ }
+
+ public HsMetricCollector(final Set<MetricAccumulationLevel> accumulationLevels,
+ final List<SAMReadGroupRecord> samRgRecords,
+ final ReferenceSequenceFile refFile,
+ final File perTargetCoverage,
+ final File perBaseCoverage,
+ final IntervalList targetIntervals,
+ final IntervalList probeIntervals,
+ final String probeSetName,
+ final int nearProbeDistance,
+ final int minimumMappingQuality,
+ final int minimumBaseQuality,
+ final boolean clipOverlappingReads,
+ final boolean noSideEffects,
+ final int coverageCap,
+ final int sampleSize) {
+ super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, noSideEffects, coverageCap, sampleSize);
}
@Override
diff --git a/src/java/picard/analysis/directed/HsMetrics.java b/src/java/picard/analysis/directed/HsMetrics.java
index c08391e..21efcc1 100644
--- a/src/java/picard/analysis/directed/HsMetrics.java
+++ b/src/java/picard/analysis/directed/HsMetrics.java
@@ -27,7 +27,24 @@ package picard.analysis.directed;
import picard.metrics.MultilevelMetrics;
/**
- * The set of metrics captured that are specific to a hybrid selection analysis.
+ * <p>Metrics generated by CollectHsMetrics for the analysis of target-capture sequencing experiments. The metrics
+ * in this class fall broadly into three categories:</p>
+ *
+ * <ul>
+ * <li>Basic sequencing metrics that are either generated as a baseline against which to evaluate other
+ * metrics or because they are used in the calculation of other metrics. This includes things like
+ * the genome size, the number of reads, the number of aligned reads etc.</li>
+ * <li>Metrics that are intended for evaluating the performance of the wet-lab assay that generated the data.
+ * This group includes metrics like the number of bases mapping on/off/near baits, %selected, fold 80 base
+ * penalty, hs library size and the hs penalty metrics. These metrics are calculated prior to some of the
+ * filters are applied (e.g. low mapping quality reads, low base quality bases and bases overlapping in the middle
+ * of paired-end reads are all counted).
+ * </li>
+ * <li>Metrics for assessing target coverage as a proxy for how well the data is likely to perform in downstream
+ * applications like variant calling. This group includes metrics like mean target coverage, the percentage of bases
+ * reaching various coverage levels, and the percentage of bases excluded by various filters. These metrics are computed
+ * using the strictest subset of the data, after all filters have been applied.</li>
+ * </ul>
*
* @author Tim Fennell
*/
@@ -68,6 +85,9 @@ public class HsMetrics extends MultilevelMetrics {
/** PF Reads Aligned / PF Reads. */
public double PCT_PF_UQ_READS_ALIGNED;
+ /** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */
+ public long PF_BASES_ALIGNED;
+
/** The number of bases in the PF aligned reads that are mapped to a reference base. Accounts for clipping and gaps. */
public long PF_UQ_BASES_ALIGNED;
@@ -95,9 +115,12 @@ public class HsMetrics extends MultilevelMetrics {
/** The mean coverage of all baits in the experiment. */
public double MEAN_BAIT_COVERAGE;
- /** The mean coverage of targets that received at least coverage depth = 2 at one base. */
+ /** The mean coverage of targets. */
public double MEAN_TARGET_COVERAGE;
+ /** The median coverage of targets. */
+ public double MEDIAN_TARGET_COVERAGE;
+
/** The number of aligned, de-duped, on-bait bases out of the PF bases available. */
public double PCT_USABLE_BASES_ON_BAIT;
@@ -107,28 +130,45 @@ public class HsMetrics extends MultilevelMetrics {
/** The fold by which the baited region has been amplified above genomic background. */
public double FOLD_ENRICHMENT;
- /** The number of targets that did not reach coverage=2 over any base. */
+ /** The fraction of targets that did not reach coverage=1 over any base. */
public double ZERO_CVG_TARGETS_PCT;
+ /** The fraction of aligned bases that were filtered out because they were in reads marked as duplicates. */
+ public double PCT_EXC_DUPE;
+
+ /** The fraction of aligned bases that were filtered out because they were in reads with low mapping quality. */
+ public double PCT_EXC_MAPQ;
+
+ /** The fraction of aligned bases that were filtered out because they were of low base quality. */
+ public double PCT_EXC_BASEQ;
+
+ /** The fraction of aligned bases that were filtered out because they were the second observation from an insert with overlapping reads. */
+ public double PCT_EXC_OVERLAP;
+
+ /** The fraction of aligned bases that were filtered out because they did not align over a target base. */
+ public double PCT_EXC_OFF_TARGET;
+
/**
* The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to
* the mean coverage level in those targets.
*/
public double FOLD_80_BASE_PENALTY;
- /** The percentage of ALL target bases achieving 2X or greater coverage. */
+ /** The percentage of all target bases achieving 1X or greater coverage. */
+ public double PCT_TARGET_BASES_1X;
+ /** The percentage of all target bases achieving 2X or greater coverage. */
public double PCT_TARGET_BASES_2X;
- /** The percentage of ALL target bases achieving 10X or greater coverage. */
+ /** The percentage of all target bases achieving 10X or greater coverage. */
public double PCT_TARGET_BASES_10X;
- /** The percentage of ALL target bases achieving 20X or greater coverage. */
+ /** The percentage of all target bases achieving 20X or greater coverage. */
public double PCT_TARGET_BASES_20X;
- /** The percentage of ALL target bases achieving 30X or greater coverage. */
+ /** The percentage of all target bases achieving 30X or greater coverage. */
public double PCT_TARGET_BASES_30X;
- /** The percentage of ALL target bases achieving 40X or greater coverage. */
+ /** The percentage of all target bases achieving 40X or greater coverage. */
public double PCT_TARGET_BASES_40X;
- /** The percentage of ALL target bases achieving 50X or greater coverage. */
+ /** The percentage of all target bases achieving 50X or greater coverage. */
public double PCT_TARGET_BASES_50X;
- /** The percentage of ALL target bases achieving 100X or greater coverage. */
+ /** The percentage of all target bases achieving 100X or greater coverage. */
public double PCT_TARGET_BASES_100X;
/** The estimated number of unique molecules in the selected part of the library. */
@@ -191,4 +231,10 @@ public class HsMetrics extends MultilevelMetrics {
* reads that should have mapped to GC>=50% regions mapped elsewhere.
*/
public double GC_DROPOUT;
+
+ /** The theoretical HET SNP sensitivity. */
+ public double HET_SNP_SENSITIVITY;
+
+ /** The Phred Scaled Q Score of the theoretical HET SNP sensitivity. */
+ public double HET_SNP_Q;
}
diff --git a/src/java/picard/analysis/directed/InsertSizeMetricsCollector.java b/src/java/picard/analysis/directed/InsertSizeMetricsCollector.java
index 27de45d..fcc7c0a 100644
--- a/src/java/picard/analysis/directed/InsertSizeMetricsCollector.java
+++ b/src/java/picard/analysis/directed/InsertSizeMetricsCollector.java
@@ -11,6 +11,7 @@ import picard.analysis.MetricAccumulationLevel;
import picard.metrics.MultiLevelCollector;
import picard.metrics.PerUnitMetricCollector;
+import java.util.Collections;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
@@ -33,11 +34,16 @@ public class InsertSizeMetricsCollector extends MultiLevelCollector<InsertSizeMe
//Also, when calculating mean and stdev, only bins <= Histogram_WIDTH will be included.
private final Integer histogramWidth;
+ // If set to true, then duplicates will also be included in the histogram
+ private final boolean includeDuplicates;
+
public InsertSizeMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords,
- final double minimumPct, final Integer HistogramWidth, final double deviations) {
+ final double minimumPct, final Integer histogramWidth, final double deviations,
+ final boolean includeDuplicates) {
this.minimumPct = minimumPct;
- this.histogramWidth = HistogramWidth;
+ this.histogramWidth = histogramWidth;
this.deviations = deviations;
+ this.includeDuplicates = includeDuplicates;
setup(accumulationLevels, samRgRecords);
}
@@ -64,7 +70,7 @@ public class InsertSizeMetricsCollector extends MultiLevelCollector<InsertSizeMe
record.getMateUnmappedFlag() ||
record.getFirstOfPairFlag() ||
record.isSecondaryOrSupplementary() ||
- record.getDuplicateReadFlag() ||
+ (record.getDuplicateReadFlag() && !this.includeDuplicates) ||
record.getInferredInsertSize() == 0) {
return;
}
@@ -74,7 +80,7 @@ public class InsertSizeMetricsCollector extends MultiLevelCollector<InsertSizeMe
/** A Collector for individual InsertSizeMetrics for a given SAMPLE or SAMPLE/LIBRARY or SAMPLE/LIBRARY/READ_GROUP (depending on aggregation levels) */
public class PerUnitInsertSizeMetricsCollector implements PerUnitMetricCollector<InsertSizeMetrics, Integer, InsertSizeCollectorArgs> {
- final EnumMap<SamPairUtil.PairOrientation, Histogram<Integer>> Histograms = new EnumMap<SamPairUtil.PairOrientation, Histogram<Integer>>(SamPairUtil.PairOrientation.class);
+ final EnumMap<SamPairUtil.PairOrientation, Histogram<Integer>> histograms = new EnumMap<SamPairUtil.PairOrientation, Histogram<Integer>>(SamPairUtil.PairOrientation.class);
final String sample;
final String library;
final String readGroup;
@@ -97,13 +103,13 @@ public class InsertSizeMetricsCollector extends MultiLevelCollector<InsertSizeMe
else {
prefix = "All_Reads.";
}
- Histograms.put(SamPairUtil.PairOrientation.FR, new Histogram<Integer>("insert_size", prefix + "fr_count"));
- Histograms.put(SamPairUtil.PairOrientation.TANDEM, new Histogram<Integer>("insert_size", prefix + "tandem_count"));
- Histograms.put(SamPairUtil.PairOrientation.RF, new Histogram<Integer>("insert_size", prefix + "rf_count"));
+ histograms.put(SamPairUtil.PairOrientation.FR, new Histogram<Integer>("insert_size", prefix + "fr_count"));
+ histograms.put(SamPairUtil.PairOrientation.TANDEM, new Histogram<Integer>("insert_size", prefix + "tandem_count"));
+ histograms.put(SamPairUtil.PairOrientation.RF, new Histogram<Integer>("insert_size", prefix + "rf_count"));
}
public void acceptRecord(final InsertSizeCollectorArgs args) {
- Histograms.get(args.getPairOrientation()).increment(args.getInsertSize());
+ histograms.get(args.getPairOrientation()).increment(args.getInsertSize());
}
public void finish() { }
@@ -113,65 +119,73 @@ public class InsertSizeMetricsCollector extends MultiLevelCollector<InsertSizeMe
}
public void addMetricsToFile(final MetricsFile<InsertSizeMetrics,Integer> file) {
- for (final Histogram<Integer> h : this.Histograms.values()) totalInserts += h.getCount();
+ // get the number of inserts, and the maximum and minimum keys across, across all orientations
+ for (final Histogram<Integer> h : this.histograms.values()) {
+ totalInserts += h.getCount();
+ }
+ if (0 == totalInserts) return; // nothing to store
- for(final Map.Entry<SamPairUtil.PairOrientation, Histogram<Integer>> entry : Histograms.entrySet()) {
+ for(final Map.Entry<SamPairUtil.PairOrientation, Histogram<Integer>> entry : histograms.entrySet()) {
final SamPairUtil.PairOrientation pairOrientation = entry.getKey();
- final Histogram<Integer> Histogram = entry.getValue();
- final double total = Histogram.getCount();
+ final Histogram<Integer> histogram = entry.getValue();
+ final double total = histogram.getCount();
// Only include a category if it has a sufficient percentage of the data in it
- if( total > totalInserts * minimumPct ) {
+ if( total >= totalInserts * minimumPct ) {
final InsertSizeMetrics metrics = new InsertSizeMetrics();
metrics.SAMPLE = this.sample;
metrics.LIBRARY = this.library;
metrics.READ_GROUP = this.readGroup;
metrics.PAIR_ORIENTATION = pairOrientation;
- metrics.READ_PAIRS = (long) total;
- metrics.MAX_INSERT_SIZE = (int) Histogram.getMax();
- metrics.MIN_INSERT_SIZE = (int) Histogram.getMin();
- metrics.MEDIAN_INSERT_SIZE = Histogram.getMedian();
- metrics.MEDIAN_ABSOLUTE_DEVIATION = Histogram.getMedianAbsoluteDeviation();
-
- final double median = Histogram.getMedian();
- double covered = 0;
- double low = median;
- double high = median;
-
- while (low >= Histogram.getMin() || high <= Histogram.getMax()) {
- final Histogram<Integer>.Bin lowBin = Histogram.get((int) low);
- if (lowBin != null) covered += lowBin.getValue();
-
- if (low != high) {
- final Histogram<Integer>.Bin highBin = Histogram.get((int) high);
- if (highBin != null) covered += highBin.getValue();
+ if (!histogram.isEmpty()) {
+ metrics.READ_PAIRS = (long) total;
+ metrics.MAX_INSERT_SIZE = (int) histogram.getMax();
+ metrics.MIN_INSERT_SIZE = (int) histogram.getMin();
+ metrics.MEDIAN_INSERT_SIZE = histogram.getMedian();
+ metrics.MEDIAN_ABSOLUTE_DEVIATION = histogram.getMedianAbsoluteDeviation();
+
+ final double median = histogram.getMedian();
+ double covered = 0;
+ double low = median;
+ double high = median;
+
+ while (low >= histogram.getMin() || high <= histogram.getMax()) {
+ final Histogram<Integer>.Bin lowBin = histogram.get((int) low);
+ if (lowBin != null) covered += lowBin.getValue();
+
+ if (low != high) {
+ final Histogram<Integer>.Bin highBin = histogram.get((int) high);
+ if (highBin != null) covered += highBin.getValue();
+ }
+
+ final double percentCovered = covered / total;
+ final int distance = (int) (high - low) + 1;
+ if (percentCovered >= 0.1 && metrics.WIDTH_OF_10_PERCENT == 0) metrics.WIDTH_OF_10_PERCENT = distance;
+ if (percentCovered >= 0.2 && metrics.WIDTH_OF_20_PERCENT == 0) metrics.WIDTH_OF_20_PERCENT = distance;
+ if (percentCovered >= 0.3 && metrics.WIDTH_OF_30_PERCENT == 0) metrics.WIDTH_OF_30_PERCENT = distance;
+ if (percentCovered >= 0.4 && metrics.WIDTH_OF_40_PERCENT == 0) metrics.WIDTH_OF_40_PERCENT = distance;
+ if (percentCovered >= 0.5 && metrics.WIDTH_OF_50_PERCENT == 0) metrics.WIDTH_OF_50_PERCENT = distance;
+ if (percentCovered >= 0.6 && metrics.WIDTH_OF_60_PERCENT == 0) metrics.WIDTH_OF_60_PERCENT = distance;
+ if (percentCovered >= 0.7 && metrics.WIDTH_OF_70_PERCENT == 0) metrics.WIDTH_OF_70_PERCENT = distance;
+ if (percentCovered >= 0.8 && metrics.WIDTH_OF_80_PERCENT == 0) metrics.WIDTH_OF_80_PERCENT = distance;
+ if (percentCovered >= 0.9 && metrics.WIDTH_OF_90_PERCENT == 0) metrics.WIDTH_OF_90_PERCENT = distance;
+ if (percentCovered >= 0.99 && metrics.WIDTH_OF_99_PERCENT == 0) metrics.WIDTH_OF_99_PERCENT = distance;
+
+ --low;
+ ++high;
}
-
- final double percentCovered = covered / total;
- final int distance = (int) (high - low) + 1;
- if (percentCovered >= 0.1 && metrics.WIDTH_OF_10_PERCENT == 0) metrics.WIDTH_OF_10_PERCENT = distance;
- if (percentCovered >= 0.2 && metrics.WIDTH_OF_20_PERCENT == 0) metrics.WIDTH_OF_20_PERCENT = distance;
- if (percentCovered >= 0.3 && metrics.WIDTH_OF_30_PERCENT == 0) metrics.WIDTH_OF_30_PERCENT = distance;
- if (percentCovered >= 0.4 && metrics.WIDTH_OF_40_PERCENT == 0) metrics.WIDTH_OF_40_PERCENT = distance;
- if (percentCovered >= 0.5 && metrics.WIDTH_OF_50_PERCENT == 0) metrics.WIDTH_OF_50_PERCENT = distance;
- if (percentCovered >= 0.6 && metrics.WIDTH_OF_60_PERCENT == 0) metrics.WIDTH_OF_60_PERCENT = distance;
- if (percentCovered >= 0.7 && metrics.WIDTH_OF_70_PERCENT == 0) metrics.WIDTH_OF_70_PERCENT = distance;
- if (percentCovered >= 0.8 && metrics.WIDTH_OF_80_PERCENT == 0) metrics.WIDTH_OF_80_PERCENT = distance;
- if (percentCovered >= 0.9 && metrics.WIDTH_OF_90_PERCENT == 0) metrics.WIDTH_OF_90_PERCENT = distance;
- if (percentCovered >= 0.99 && metrics.WIDTH_OF_99_PERCENT == 0) metrics.WIDTH_OF_99_PERCENT = distance;
-
- --low;
- ++high;
}
// Trim the Histogram down to get rid of outliers that would make the chart useless.
- final Histogram<Integer> trimmedHisto = Histogram; //alias it
- trimmedHisto.trimByWidth(getWidthToTrimTo(metrics));
+ final Histogram<Integer> trimmedHistogram = histogram; // alias it
+ trimmedHistogram.trimByWidth(getWidthToTrimTo(metrics));
- metrics.MEAN_INSERT_SIZE = trimmedHisto.getMean();
- metrics.STANDARD_DEVIATION = trimmedHisto.getStandardDeviation();
+ if (!trimmedHistogram.isEmpty()) {
+ metrics.MEAN_INSERT_SIZE = trimmedHistogram.getMean();
+ metrics.STANDARD_DEVIATION = trimmedHistogram.getStandardDeviation();
+ }
- file.addHistogram(trimmedHisto);
+ file.addHistogram(trimmedHistogram);
file.addMetric(metrics);
}
}
diff --git a/src/java/picard/analysis/directed/RnaSeqMetricsCollector.java b/src/java/picard/analysis/directed/RnaSeqMetricsCollector.java
index d6ab4f1..8385704 100644
--- a/src/java/picard/analysis/directed/RnaSeqMetricsCollector.java
+++ b/src/java/picard/analysis/directed/RnaSeqMetricsCollector.java
@@ -113,17 +113,23 @@ public class RnaSeqMetricsCollector extends SAMRecordMultiLevelCollector<RnaSeqM
}
public void acceptRecord(SAMRecord rec) {
+ // NB: for read count metrics, do not include supplementary records, but for base count metrics, do include supplementary records.
+
// Filter out some reads, and collect the total number of PF bases
- if (rec.getReadFailsVendorQualityCheckFlag() || rec.isSecondaryOrSupplementary()) return;
+ if (rec.getReadFailsVendorQualityCheckFlag()) return;
- this.metrics.PF_BASES += rec.getReadLength();
- if (rec.getReadUnmappedFlag()) return;
+ // NB: we count unmapped reads here
+ if (!rec.getNotPrimaryAlignmentFlag()) this.metrics.PF_BASES += rec.getReadLength();
- if (ignoredSequenceIndices.contains(rec.getReferenceIndex())) {
+ // NB: we count secondary mapped reads here
+ if (!rec.getReadUnmappedFlag() && !rec.getNotPrimaryAlignmentFlag() && ignoredSequenceIndices.contains(rec.getReferenceIndex())) {
++this.metrics.IGNORED_READS;
return;
}
+ // We can now ignore secondary or unmapped reads
+ if (rec.getNotPrimaryAlignmentFlag() || rec.getReadUnmappedFlag()) return;
+
// Grab information about the alignment and overlapping genes etc.
final Interval readInterval = new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd());
@@ -217,7 +223,7 @@ public class RnaSeqMetricsCollector extends SAMRecordMultiLevelCollector<RnaSeqM
// Strand-specificity is tallied on read basis rather than base at a time. A read that aligns to more than one
// gene is not counted.
- if (overlapsExon && strandSpecificity != StrandSpecificity.NONE && overlappingGenes.size() == 1) {
+ if (!rec.getNotPrimaryAlignmentFlag() && overlapsExon && strandSpecificity != StrandSpecificity.NONE && overlappingGenes.size() == 1) {
final boolean negativeTranscriptionStrand = overlappingGenes.iterator().next().isNegativeStrand();
final boolean negativeReadStrand = rec.getReadNegativeStrandFlag();
final boolean readAndTranscriptStrandsAgree = negativeReadStrand == negativeTranscriptionStrand;
diff --git a/src/java/picard/analysis/directed/TargetMetricsCollector.java b/src/java/picard/analysis/directed/TargetMetricsCollector.java
index 4828191..9e7d5c6 100644
--- a/src/java/picard/analysis/directed/TargetMetricsCollector.java
+++ b/src/java/picard/analysis/directed/TargetMetricsCollector.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -28,6 +28,7 @@ import htsjdk.samtools.AlignmentBlock;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
@@ -35,6 +36,9 @@ import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.CoordMath;
import htsjdk.samtools.util.FormatUtil;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.QualityUtil;
+import htsjdk.samtools.util.Histogram;
import htsjdk.samtools.util.Interval;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
@@ -44,10 +48,11 @@ import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import picard.PicardException;
import picard.analysis.MetricAccumulationLevel;
+import picard.filter.CountingMapQFilter;
import picard.metrics.MultilevelMetrics;
import picard.metrics.PerUnitMetricCollector;
import picard.metrics.SAMRecordMultiLevelCollector;
-import picard.util.MathUtil;
+import picard.analysis.TheoreticalSensitivity;
import java.io.File;
import java.io.IOException;
@@ -75,12 +80,12 @@ import java.util.Set;
* @author Jonathan Burke
*/
public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetrics> extends SAMRecordMultiLevelCollector<METRIC_TYPE, Integer> {
+ /** Default distance for a read to be considered "selected". */
+ public static final int NEAR_PROBE_DISTANCE_DEFAULT = 250;
+ private int nearProbeDistance = NEAR_PROBE_DISTANCE_DEFAULT;
- // What is considered "near" to the bait
- private static final int NEAR_PROBE_DISTANCE = 250;
-
- //If perTargetCoverage != null then coverage is computed for each specified target and output to this file
- private final File perTargetCoverage;
+ private final File perTargetCoverage; // If not null, per-target coverage summaries are written to this file
+ private final File perBaseCoverage; // If not null, per-base(!) coverage summaries are written to this file
//The name of the set of probes used
private final String probeSetName;
@@ -109,16 +114,63 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
private final long genomeSize;
+ private final int coverageCap;
+
+ private final int sampleSize;
+
+ private final Histogram<Integer> baseQHistogram = new Histogram<Integer>("value", "baseq_count");
+ private final Histogram<Integer> depthHistogram = new Histogram<Integer>("coverage", "count");
+
+ private static final double LOG_ODDS_THRESHOLD = 3.0;
+
+ private final int minimumMappingQuality;
+ private final int minimumBaseQuality;
+ private final boolean clipOverlappingReads;
+ private boolean noSideEffects;
+
//A map of coverage by target in which coverage is reset every read, this is done
//so that we can calculate overlap for a read once and the resulting coverage is
//than added to the cumulative coverage of every collector that collects
//information on that read
- private Map<Interval, Coverage> coverageByTargetForRead;
- private Coverage [] cov;
+ private final Map<Interval, Coverage> coverageByTargetForRead;
+ private final Coverage [] cov;
+
+ /** Gets the distance that is allowed between a read and the nearest probe for it to be considered "near probe" and "selected. */
+ public int getNearProbeDistance() { return nearProbeDistance; }
+
+ /** Sets the distance that is allowed between a read and the nearest probe for it to be considered "near probe" and "selected. */
+ public void setNearProbeDistance(final int nearProbeDistance) { this.nearProbeDistance = nearProbeDistance; }
//Converts a targetMetric into a more specific metric of METRIC_TYPE
public abstract METRIC_TYPE convertMetric(final TargetMetrics targetMetrics);
+
+ /**
+ * In the case of ignoring bases in overlapping reads from the same template,
+ * we choose to internally modify the SAM record's CIGAR to clip overlapping bases.
+ * We can either to modify the passed-in record (a side-effect), or modify a
+ * an internally clone of the record (no side-effect). Due to the overhead of
+ * cloning a SAMRecord object, we may see significant slow down of the
+ * performance to ensure there are no side effects. Therefore, users of this
+ * collector who do not care if the record passed to
+ * {@link #acceptRecord(htsjdk.samtools.SAMRecord, htsjdk.samtools.reference.ReferenceSequence)}
+ * is modified can pass in false to this method to realize performance gains.
+ * @param value the boolean value to set.
+ */
+ public void setNoSideEffects(final boolean value) {
+ this.noSideEffects = value;
+ }
+
+ /** Get the the number of bases in the given alignment block and record that have base quality greater or equal to the minimum */
+ public static int getNumBasesPassingMinimumBaseQuality(final SAMRecord record, final AlignmentBlock block, final int minimumBaseQuality) {
+ int basesInBlockAtMinimumQuality = 0;
+ final byte[] baseQualities = record.getBaseQualities();
+ for (int idx = block.getReadStart(); idx <= CoordMath.getEnd(block.getLength(), block.getReadStart()); idx++) { // idx is one-based
+ if (minimumBaseQuality <= baseQualities[idx-1]) basesInBlockAtMinimumQuality++;
+ }
+ return basesInBlockAtMinimumQuality;
+ }
+
/**
* Since the targeted metrics (HsMetrics, TargetedPcrMetrics,...) share many of the same values as TargetMetrics, this copy will copy all public attributes in targetMetrics
* to the outputMetrics' attributes of the same name. If no matching attribute exists in the outputMetrics or the attribute of the target metrics class also is found
@@ -168,7 +220,7 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
try {
final Field outputField = mtClass.getField(field.getName());
outputField.set(outputMetrics, field.get(targetMetrics));
- } catch (Exception e) {
+ } catch (final Exception e) {
throw new PicardException("Exception while copying targetMetrics to " + outputMetrics.getClass().getName(), e);
}
}
@@ -176,8 +228,8 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
for(int i = 0; i < targetKeys.length; i++) {
try {
- Field targetMetricField = TargetMetrics.class.getField(targetKeys[i]);
- Field outputMetricField = mtClass.getField(outputKeys[i]);
+ final Field targetMetricField = TargetMetrics.class.getField(targetKeys[i]);
+ final Field outputMetricField = mtClass.getField(outputKeys[i]);
outputMetricField.set(outputMetrics, targetMetricField.get(targetMetrics));
} catch(final Exception exc) {
throw new PicardException("Exception while copying TargetMetrics." + targetKeys[i] + " to " + mtClass.getName() + "." + outputKeys[i], exc);
@@ -185,16 +237,50 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
}
}
- public TargetMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords, final ReferenceSequenceFile refFile,
- final File perTargetCoverage, final IntervalList targetIntervals, final IntervalList probeIntervals, final String probeSetName) {
+ public TargetMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels,
+ final List<SAMReadGroupRecord> samRgRecords,
+ final ReferenceSequenceFile refFile,
+ final File perTargetCoverage,
+ final File perBaseCoverage,
+ final IntervalList targetIntervals,
+ final IntervalList probeIntervals,
+ final String probeSetName,
+ final int nearProbeDistance,
+ final int minimumMappingQuality,
+ final int minimumBaseQuality,
+ final boolean clipOverlappingReads,
+ final int coverageCap,
+ final int sampleSize) {
+ this(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, false, coverageCap, sampleSize);
+ }
+
+ public TargetMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels,
+ final List<SAMReadGroupRecord> samRgRecords,
+ final ReferenceSequenceFile refFile,
+ final File perTargetCoverage,
+ final File perBaseCoverage,
+ final IntervalList targetIntervals,
+ final IntervalList probeIntervals,
+ final String probeSetName,
+ final int nearProbeDistance,
+ final int minimumMappingQuality,
+ final int minimumBaseQuality,
+ final boolean clipOverlappingReads,
+ final boolean noSideEffects,
+ final int coverageCap,
+ final int sampleSize) {
this.perTargetCoverage = perTargetCoverage;
+ this.perBaseCoverage = perBaseCoverage;
this.probeSetName = probeSetName;
+ this.nearProbeDistance = nearProbeDistance;
this.allProbes = probeIntervals;
this.allTargets = targetIntervals;
+ this.coverageCap = coverageCap;
+ this.sampleSize = sampleSize;
final List<Interval> uniqueBaits = this.allProbes.uniqued().getIntervals();
- this.probeDetector = new OverlapDetector<Interval>(-NEAR_PROBE_DISTANCE, 0);
+ this.probeDetector = new OverlapDetector<Interval>(-this.nearProbeDistance, 0);
this.probeDetector.addAll(uniqueBaits, uniqueBaits);
this.probeTerritory = Interval.countBases(uniqueBaits);
@@ -228,6 +314,11 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
}
}
+ this.minimumMappingQuality = minimumMappingQuality;
+ this.minimumBaseQuality = minimumBaseQuality;
+ this.clipOverlappingReads = clipOverlappingReads;
+ this.noSideEffects = noSideEffects;
+
setup(accumulationLevels, samRgRecords);
}
@@ -235,7 +326,7 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
protected PerUnitMetricCollector<METRIC_TYPE, Integer, SAMRecord> makeChildCollector(final String sample, final String library, final String readGroup) {
final PerUnitTargetMetricCollector collector = new PerUnitTargetMetricCollector(probeSetName, coverageByTargetForRead.keySet(),
sample, library, readGroup, probeTerritory, targetTerritory, genomeSize,
- intervalToGc);
+ intervalToGc, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads);
if (this.probeSetName != null) {
collector.setBaitSetName(probeSetName);
}
@@ -246,9 +337,8 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
@Override
protected PerUnitMetricCollector<METRIC_TYPE, Integer, SAMRecord> makeAllReadCollector() {
final PerUnitTargetMetricCollector collector = (PerUnitTargetMetricCollector) makeChildCollector(null, null, null);
- if (perTargetCoverage != null) {
- collector.setPerTargetOutput(perTargetCoverage);
- }
+ if (perTargetCoverage != null) collector.setPerTargetOutput(perTargetCoverage);
+ if (perBaseCoverage != null) collector.setPerBaseOutput(perBaseCoverage);
return collector;
}
@@ -257,14 +347,18 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
* Collect the Target Metrics for one unit of "accumulation" (i.e. for one sample, or for one library ...)
*/
public class PerUnitTargetMetricCollector implements PerUnitMetricCollector<METRIC_TYPE, Integer, SAMRecord> {
-
- private final Map<Interval,Double> intervalToGc;
+ private final Map<Interval, Double> intervalToGc;
private File perTargetOutput;
+ private File perBaseOutput;
+ final long[] baseQHistogramArray = new long[Byte.MAX_VALUE];
// A Map to accumulate per-bait-region (i.e. merge of overlapping targets) coverage. */
private final Map<Interval, Coverage> coverageByTarget;
private final TargetMetrics metrics = new TargetMetrics();
+ private final int minimumBaseQuality;
+ private final CountingMapQFilter mapQFilter;
+ private final boolean clipOverlappingReads;
/**
* Constructor that parses the squashed reference to genome reference file and stores the
@@ -273,184 +367,281 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
public PerUnitTargetMetricCollector(final String probeSetName, final Set<Interval> coverageTargets,
final String sample, final String library, final String readGroup,
final long probeTerritory, final long targetTerritory, final long genomeSize,
- final Map<Interval, Double> intervalToGc) {
- this.metrics.SAMPLE = sample;
- this.metrics.LIBRARY = library;
- this.metrics.READ_GROUP = readGroup;
- this.metrics.PROBE_SET = probeSetName;
+ final Map<Interval, Double> intervalToGc,
+ final int minimumMappingQuality,
+ final int minimumBaseQuality,
+ final boolean clipOverlappingReads) {
+ this.metrics.SAMPLE = sample;
+ this.metrics.LIBRARY = library;
+ this.metrics.READ_GROUP = readGroup;
+ this.metrics.PROBE_SET = probeSetName;
metrics.PROBE_TERRITORY = probeTerritory;
metrics.TARGET_TERRITORY = targetTerritory;
metrics.GENOME_SIZE = genomeSize;
this.coverageByTarget = new LinkedHashMap<Interval, Coverage>(coverageTargets.size() * 2, 0.5f);
- for (Interval target : coverageTargets) {
- this.coverageByTarget.put(target, new Coverage(target,0));
+ for (final Interval target : coverageTargets) {
+ this.coverageByTarget.put(target, new Coverage(target, 0));
}
+ this.mapQFilter = new CountingMapQFilter(minimumMappingQuality);
+ this.minimumBaseQuality = minimumBaseQuality;
this.intervalToGc = intervalToGc;
+ this.clipOverlappingReads = clipOverlappingReads;
}
- /** If set, the metrics collector will output per target coverage information to this file. */
+ /** Sets the (optional) File to write per-target coverage information to. If null (the default), no file is produced. */
public void setPerTargetOutput(final File perTargetOutput) {
this.perTargetOutput = perTargetOutput;
}
+ /** Sets the (optional) File to write per-base coverage information to. If null (the default), no file is produced. */
+ public void setPerBaseOutput(final File perBaseOutput) {
+ this.perBaseOutput = perBaseOutput;
+ }
+
/** Sets the name of the bait set explicitly instead of inferring it from the bait file. */
public void setBaitSetName(final String name) {
this.metrics.PROBE_SET = name;
}
/** Adds information about an individual SAMRecord to the statistics. */
- public void acceptRecord(final SAMRecord rec) {
- // Just plain avoid records that are marked as not-primary
- if (rec.isSecondaryOrSupplementary()) return;
-
- this.metrics.TOTAL_READS += 1;
-
- // Check for PF reads
- if (rec.getReadFailsVendorQualityCheckFlag()) {
- return;
+ public void acceptRecord(final SAMRecord record) {
+ // Just ignore secondary alignments altogether
+ if (record.getNotPrimaryAlignmentFlag()) return;
+
+ // Cache some things, and compute the total number of bases aligned in the record.
+ final boolean mappedInPair = record.getReadPairedFlag() && !record.getReadUnmappedFlag() && !record.getMateUnmappedFlag() && !record.getSupplementaryAlignmentFlag();
+ final byte[] baseQualities = record.getBaseQualities();
+ int basesAlignedInRecord = 0;
+ if (!record.getReadUnmappedFlag()) {
+ for (final AlignmentBlock block : record.getAlignmentBlocks()) {
+ basesAlignedInRecord += block.getLength();
+ }
}
- // Prefetch the list of target and bait overlaps here as they're needed multiple times.
- final Collection<Interval> targets;
- final Collection<Interval> probes;
-
- if (!rec.getReadUnmappedFlag()) {
- final Interval read = new Interval(rec.getReferenceName(), rec.getAlignmentStart(), rec.getAlignmentEnd());
- targets = targetDetector.getOverlaps(read);
- probes = probeDetector.getOverlaps(read);
- }
- else {
- targets = null;
- probes = null;
+ /* Count metrics related to # of base and reads. Consider supplemental alignments for base counting but not record counting.
+ Do this counting *prior* to applying filters to ensure we match other metrics' computation for these values, such as AlignmentSummaryMetrics. */
+
+ // READ Based Metrics
+ if (!record.getSupplementaryAlignmentFlag()) { // only consider the primary
+ this.metrics.TOTAL_READS++;
+ if (!record.getReadFailsVendorQualityCheckFlag()) { // only reads that pass vendor's filters
+ this.metrics.PF_READS++;
+ if (!record.getDuplicateReadFlag()) { // ignore duplicates for unique reads/bases
+ this.metrics.PF_UNIQUE_READS++;
+ if (!record.getReadUnmappedFlag()) { // ignore unmapped reads
+ this.metrics.PF_UQ_READS_ALIGNED++;
+ }
+ }
+ }
}
- ++this.metrics.PF_READS;
- this.metrics.PF_BASES += rec.getReadLength();
-
- // And now calculate the values we need for HS_LIBRARY_SIZE
- if (rec.getReadPairedFlag() && rec.getFirstOfPairFlag() && !rec.getReadUnmappedFlag() && !rec.getMateUnmappedFlag()) {
- if (probes != null && !probes.isEmpty()) {
- ++this.metrics.PF_SELECTED_PAIRS;
- if (!rec.getDuplicateReadFlag()) ++this.metrics.PF_SELECTED_UNIQUE_PAIRS;
+ ///////////////////////////////////////////////////////////////////
+ // Non-PF reads can be totally ignored beyond this point
+ ///////////////////////////////////////////////////////////////////
+ if (record.getReadFailsVendorQualityCheckFlag()) return;
+
+ // BASE Based Metrics
+ // Strangely enough we should not count supplementals in PF_BASES, assuming that the
+ // main record also contains these bases! But we *do* count the aligned bases, assuming
+ // that those bases are not *aligned* in the primary record
+ if (!record.getSupplementaryAlignmentFlag()) this.metrics.PF_BASES += record.getReadLength();
+
+ if (!record.getReadUnmappedFlag()) {
+ this.metrics.PF_BASES_ALIGNED += basesAlignedInRecord;
+ if (!record.getDuplicateReadFlag()) {
+ this.metrics.PF_UQ_BASES_ALIGNED += basesAlignedInRecord;
}
}
- // Check for reads that are marked as duplicates
- if (rec.getDuplicateReadFlag()) {
- return;
- }
- else {
- ++this.metrics.PF_UNIQUE_READS;
+ ///////////////////////////////////////////////////////////////////
+ // Unmapped reads can be totally ignored beyond this point
+ ///////////////////////////////////////////////////////////////////
+ if (record.getReadUnmappedFlag()) return;
+
+ // Prefetch the list of target and bait overlaps here as they're needed multiple times.
+ final Interval read = new Interval(record.getReferenceName(), record.getAlignmentStart(), record.getAlignmentEnd());
+ final Collection<Interval> targets = targetDetector.getOverlaps(read);
+ final Collection<Interval> probes = probeDetector.getOverlaps(read);
+
+ // Calculate the values we need for HS_LIBRARY_SIZE
+ if (!record.getSupplementaryAlignmentFlag()) {
+ if (record.getReadPairedFlag() && record.getFirstOfPairFlag() && !record.getReadUnmappedFlag() && !record.getMateUnmappedFlag()) {
+ if (!probes.isEmpty()) {
+ ++this.metrics.PF_SELECTED_PAIRS;
+ if (!record.getDuplicateReadFlag()) ++this.metrics.PF_SELECTED_UNIQUE_PAIRS;
+ }
+ }
}
- // Don't bother with reads that didn't align uniquely
- if (rec.getReadUnmappedFlag() || rec.getMappingQuality() == 0) {
+ ///////////////////////////////////////////////////////////////////
+ // Duplicate reads can be totally ignored beyond this point
+ ///////////////////////////////////////////////////////////////////
+ if (record.getDuplicateReadFlag()) {
+ this.metrics.PCT_EXC_DUPE += basesAlignedInRecord;
return;
}
- this.metrics.PF_UQ_READS_ALIGNED += 1;
- for (final AlignmentBlock block : rec.getAlignmentBlocks()) {
- this.metrics.PF_UQ_BASES_ALIGNED += block.getLength();
- }
+ // Compute the bait-related metrics *before* applying the overlap clipping and
+ // the map-q threshold, since those would skew the assay-related metrics
+ {
+ final int mappedBases = basesAlignedInRecord;
+ int onBaitBases = 0;
- final boolean mappedInPair = rec.getReadPairedFlag() && !rec.getMateUnmappedFlag();
+ if (!probes.isEmpty()) {
+ for (final Interval bait : probes) {
+ for (final AlignmentBlock block : record.getAlignmentBlocks()) {
+ final int end = CoordMath.getEnd(block.getReferenceStart(), block.getLength());
- // Find the target overlaps
- if (targets != null && !targets.isEmpty()) {
- for (final Interval target : targets) {
- final Coverage coverage = this.coverageByTarget.get(target);
-
- for (final AlignmentBlock block : rec.getAlignmentBlocks()) {
- final int end = CoordMath.getEnd(block.getReferenceStart(), block.getLength());
- for (int pos=block.getReferenceStart(); pos<=end; ++ pos) {
- if (pos >= target.getStart() && pos <= target.getEnd()) {
- ++this.metrics.ON_TARGET_BASES;
- if (mappedInPair) ++this.metrics.ON_TARGET_FROM_PAIR_BASES;
- coverage.addBase(pos - target.getStart());
+ for (int pos = block.getReferenceStart(); pos <= end; ++pos) {
+ if (pos >= bait.getStart() && pos <= bait.getEnd()) ++onBaitBases;
}
}
}
+
+ this.metrics.ON_PROBE_BASES += onBaitBases;
+ this.metrics.NEAR_PROBE_BASES += (mappedBases - onBaitBases);
+ } else {
+ this.metrics.OFF_PROBE_BASES += mappedBases;
}
}
- // Now do the bait overlaps
- int mappedBases = 0;
- for (final AlignmentBlock block : rec.getAlignmentBlocks()) mappedBases += block.getLength();
- int onBaitBases = 0;
+ ///////////////////////////////////////////////////////////////////
+ // And lastly, ignore reads falling below the mapq threshold
+ ///////////////////////////////////////////////////////////////////
+ if (this.mapQFilter.filterOut(record)) return;
+
+ // NB: this could modify the record. See noSideEffects.
+ final SAMRecord rec;
+ if (clipOverlappingReads) {
+ final int numOverlappingBasesToClip = SAMUtils.getNumOverlappingAlignedBasesToClip(record);
+ rec = SAMUtils.clipOverlappingAlignedBases(record, numOverlappingBasesToClip, noSideEffects);
+ metrics.PCT_EXC_OVERLAP += numOverlappingBasesToClip;
+ } else rec = record;
- if (probes != null && !probes.isEmpty()) {
- for (final Interval bait : probes) {
- for (final AlignmentBlock block : rec.getAlignmentBlocks()) {
- final int end = CoordMath.getEnd(block.getReferenceStart(), block.getLength());
+ // Find the target overlaps
+ for (final AlignmentBlock block : rec.getAlignmentBlocks()) {
+ final int length = block.getLength(), refStart = block.getReferenceStart(), readStart = block.getReadStart();
+
+ for (int offset = 0; offset < length; ++offset) {
+ final int refPos = refStart + offset;
+ final int readPos = readStart + offset;
+ final int qual = baseQualities[readPos - 1];
+
+ if (qual < minimumBaseQuality) {
+ this.metrics.PCT_EXC_BASEQ++;
+ } else {
+ boolean isOnTarget = false;
+ for (final Interval target : targets) {
+ if (refPos >= target.getStart() && refPos <= target.getEnd()) {
+ isOnTarget = true;
+ ++this.metrics.ON_TARGET_BASES;
+ if (mappedInPair) ++this.metrics.ON_TARGET_FROM_PAIR_BASES;
- for (int pos=block.getReferenceStart(); pos<=end; ++pos) {
- if (pos >= bait.getStart() && pos <= bait.getEnd()) ++onBaitBases;
+ final int targetOffset = refPos - target.getStart();
+ final Coverage coverage = this.coverageByTarget.get(target);
+ coverage.addBase(targetOffset);
+ baseQHistogramArray[baseQualities[offset]]++;
+ }
}
+
+ if (!isOnTarget) this.metrics.PCT_EXC_OFF_TARGET++;
}
}
-
- this.metrics.ON_PROBE_BASES += onBaitBases;
- this.metrics.NEAR_PROBE_BASES += (mappedBases - onBaitBases);
- }
- else {
- this.metrics.OFF_PROBE_BASES += mappedBases;
}
-
}
@Override
public void finish() {
- metrics.PCT_PF_READS = metrics.PF_READS / (double) metrics.TOTAL_READS;
- metrics.PCT_PF_UQ_READS = metrics.PF_UNIQUE_READS / (double) metrics.TOTAL_READS;
+ metrics.PCT_PF_READS = metrics.PF_READS / (double) metrics.TOTAL_READS;
+ metrics.PCT_PF_UQ_READS = metrics.PF_UNIQUE_READS / (double) metrics.TOTAL_READS;
metrics.PCT_PF_UQ_READS_ALIGNED = metrics.PF_UQ_READS_ALIGNED / (double) metrics.PF_UNIQUE_READS;
- final double denominator = (metrics.ON_PROBE_BASES + metrics.NEAR_PROBE_BASES + metrics.OFF_PROBE_BASES);
+ final double denominator = (metrics.ON_PROBE_BASES + metrics.NEAR_PROBE_BASES + metrics.OFF_PROBE_BASES);
+
+ metrics.PCT_SELECTED_BASES = (metrics.ON_PROBE_BASES + metrics.NEAR_PROBE_BASES) / denominator;
+ metrics.PCT_OFF_PROBE = metrics.OFF_PROBE_BASES / denominator;
+ metrics.ON_PROBE_VS_SELECTED = metrics.ON_PROBE_BASES / (double) (metrics.ON_PROBE_BASES + metrics.NEAR_PROBE_BASES);
+ metrics.MEAN_PROBE_COVERAGE = metrics.ON_PROBE_BASES / (double) metrics.PROBE_TERRITORY;
+ metrics.FOLD_ENRICHMENT = (metrics.ON_PROBE_BASES/ denominator) / ((double) metrics.PROBE_TERRITORY / metrics.GENOME_SIZE);
+
+ metrics.PCT_EXC_DUPE /= (double) metrics.PF_BASES_ALIGNED;
+ metrics.PCT_EXC_MAPQ = mapQFilter.getFilteredBases() / (double) metrics.PF_BASES_ALIGNED;
+ metrics.PCT_EXC_BASEQ /= (double) metrics.PF_BASES_ALIGNED;
+ metrics.PCT_EXC_OVERLAP /= (double) metrics.PF_BASES_ALIGNED;
+ metrics.PCT_EXC_OFF_TARGET /= (double) metrics.PF_BASES_ALIGNED;
- metrics.PCT_SELECTED_BASES = (metrics.ON_PROBE_BASES + metrics.NEAR_PROBE_BASES) / denominator;
- metrics.PCT_OFF_PROBE = metrics.OFF_PROBE_BASES / denominator;
- metrics.ON_PROBE_VS_SELECTED = metrics.ON_PROBE_BASES / (double) (metrics.ON_PROBE_BASES + metrics.NEAR_PROBE_BASES);
- metrics.MEAN_PROBE_COVERAGE = metrics.ON_PROBE_BASES / (double) metrics.PROBE_TERRITORY;
- metrics.FOLD_ENRICHMENT = (metrics.ON_PROBE_BASES/ denominator) / ((double) metrics.PROBE_TERRITORY / metrics.GENOME_SIZE);
+ // Get Theoretical Het SNP Sensitivity and Calculate Target Coverage Metrics
+ final double[] coverageDistribution = calculateTargetCoverageMetrics();
+
+ // Construct and write the outputs
+ for (int i=0; i<baseQHistogramArray.length; ++i) {
+ baseQHistogram.increment(i, baseQHistogramArray[i]);
+ }
+
+ // Construct and write the outputs
+ for (int i = 0; i < coverageDistribution.length; ++i) {
+ depthHistogram.increment(i, coverageDistribution[i]);
+ }
+
+ final double [] depthDoubleArray = TheoreticalSensitivity.normalizeHistogram(depthHistogram);
+ final double [] baseQDoubleArray = TheoreticalSensitivity.normalizeHistogram(baseQHistogram);
+ metrics.HET_SNP_SENSITIVITY = TheoreticalSensitivity.hetSNPSensitivity(depthDoubleArray, baseQDoubleArray, sampleSize, LOG_ODDS_THRESHOLD);
+ metrics.HET_SNP_Q = QualityUtil.getPhredScoreFromErrorProbability((1 - metrics.HET_SNP_SENSITIVITY));
- calculateTargetCoverageMetrics();
calculateGcMetrics();
+ emitPerBaseCoverageIfRequested();
}
/** Calculates how much additional sequencing is needed to raise 80% of bases to the mean for the lane. */
- private void calculateTargetCoverageMetrics() {
- final int[] depths = new int[(int) this.metrics.TARGET_TERRITORY]; // may not use entire array
+ private double[] calculateTargetCoverageMetrics() {
+ final int[] depths = new int[(int) this.metrics.TARGET_TERRITORY];
+ final double[] coverageDistribution = new double[coverageCap+1];
int zeroCoverageTargets = 0;
int depthIndex = 0;
double totalCoverage = 0;
- int basesConsidered = 0;
- for (final Coverage c : this.coverageByTarget.values()) {
- if (!c.hasCoverage()) {
- ++zeroCoverageTargets;
- continue;
- }
+ // The "how many bases at at-least X" calculations.
+ final int targetBasesDepth[] = {0, 1, 2, 10, 20, 30, 40, 50, 100}; // NB: this should be in ascending order
+ final int targetBases[] = new int[targetBasesDepth.length]; // counts for how many target bases are at at least X coverage, where X corresponds to the value at the same offset in targetBasesDepth
+ // consider all bases in calculating the mean, median etc.
+ for (final Coverage c : this.coverageByTarget.values()) {
+ final boolean hasCoverage = c.hasCoverage();
final int[] targetDepths = c.getDepths();
- basesConsidered += targetDepths.length;
+
+ if (!hasCoverage) {
+ zeroCoverageTargets++;
+ coverageDistribution[0] += c.getDepths().length;
+ }
for (final int depth : targetDepths) {
- depths[depthIndex++] = depth;
- totalCoverage += depth;
+ if (0 < depth) totalCoverage += depth;
+ if (hasCoverage) depths[depthIndex++] = depth;
+ coverageDistribution[Math.min(depth, coverageCap)]++;
+
+ // Add to the "how many bases at at-least X" calculations.
+ for (int i = 0; i < targetBasesDepth.length; i++) {
+ if (depth >= targetBasesDepth[i]) targetBases[i]++;
+ else break; // NB: assumes that targetBasesDepth is sorted in ascending order
+ }
}
}
- this.metrics.MEAN_TARGET_COVERAGE = totalCoverage / basesConsidered;
+ // Sort the array (ASCENDING)
+ Arrays.sort(depths);
+
+ this.metrics.MEAN_TARGET_COVERAGE = totalCoverage / this.metrics.TARGET_TERRITORY;
+ this.metrics.MEDIAN_TARGET_COVERAGE =
+ (0 == (depths.length % 2)) ? ((depths[(depths.length/2) - 1] + depths[(depths.length/2)]) / 2.0) : depths[(depths.length-1)/2];
- // Sort the array (ASCENDING) and then find the base the coverage value that lies at the 80%
+ // find the base the coverage value that lies at the 80%
// line, which is actually at 20% into the array now
- Arrays.sort(depths);
- // Note. basesConsidered can be between 0 and depths.length inclusive. indexOf80thPercentile will be -1 in the latter case
- final int indexOf80thPercentile = Math.max((depths.length - 1 - basesConsidered) + (int) (basesConsidered * 0.2), 0);
+ final int indexOf80thPercentile = Math.max((int) (depthIndex * 0.2 ) - 1, 0);
final int coverageAt80thPercentile;
- if(depths.length > 0) {
+ if (depths.length > 0) {
coverageAt80thPercentile = depths[indexOf80thPercentile];
}
else {
@@ -459,51 +650,44 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
this.metrics.FOLD_80_BASE_PENALTY = this.metrics.MEAN_TARGET_COVERAGE / coverageAt80thPercentile;
this.metrics.ZERO_CVG_TARGETS_PCT = zeroCoverageTargets / (double) allTargets.getIntervals().size();
- // Now do the "how many bases at X" calculations.
- int totalTargetBases = 0;
- int targetBases2x = 0;
- int targetBases10x = 0;
- int targetBases20x = 0;
- int targetBases30x = 0;
- int targetBases40x = 0;
- int targetBases50x = 0;
- int targetBases100x = 0;
+ // Store the "how many bases at at-least X" calculations.
+ this.metrics.PCT_TARGET_BASES_1X = (double) targetBases[1] / (double) targetBases[0];
+ this.metrics.PCT_TARGET_BASES_2X = (double) targetBases[2] / (double) targetBases[0];
+ this.metrics.PCT_TARGET_BASES_10X = (double) targetBases[3] / (double) targetBases[0];
+ this.metrics.PCT_TARGET_BASES_20X = (double) targetBases[4] / (double) targetBases[0];
+ this.metrics.PCT_TARGET_BASES_30X = (double) targetBases[5] / (double) targetBases[0];
+ this.metrics.PCT_TARGET_BASES_40X = (double) targetBases[6] / (double) targetBases[0];
+ this.metrics.PCT_TARGET_BASES_50X = (double) targetBases[7] / (double) targetBases[0];
+ this.metrics.PCT_TARGET_BASES_100X = (double) targetBases[8] / (double) targetBases[0];
+
+ return coverageDistribution;
+ }
- for (final Coverage c : this.coverageByTarget.values()) {
- for (final int depth : c.getDepths()) {
- ++totalTargetBases;
-
- if (depth >= 2) {
- ++targetBases2x;
- if (depth >=10) {
- ++targetBases10x;
- if (depth >= 20) {
- ++targetBases20x;
- if (depth >=30) {
- ++targetBases30x;
- if (depth >=40) {
- ++targetBases40x;
- if (depth >=50) {
- ++targetBases50x;
- if (depth >=100) {
- ++targetBases100x;
- }
- }
- }
- }
- }
- }
- }
+ /** Emits a file with per base coverage if an output file has been set. */
+ private void emitPerBaseCoverageIfRequested() {
+ if (this.perBaseOutput == null) return;
+
+ final PrintWriter out = new PrintWriter(IOUtil.openFileForBufferedWriting(this.perBaseOutput));
+ out.println("chrom\tpos\ttarget\tcoverage");
+ for (final Map.Entry<Interval,Coverage> entry : this.coverageByTarget.entrySet()) {
+ final Interval interval = entry.getKey();
+ final String chrom = interval.getContig();
+ final int firstBase = interval.getStart();
+
+ final int[] cov = entry.getValue().getDepths();
+ for (int i = 0; i < cov.length; ++i) {
+ out.print(chrom);
+ out.print('\t');
+ out.print(firstBase + i);
+ out.print('\t');
+ out.print(interval.getName());
+ out.print('\t');
+ out.print(cov[i]);
+ out.println();
}
}
- this.metrics.PCT_TARGET_BASES_2X = (double) targetBases2x / (double) totalTargetBases;
- this.metrics.PCT_TARGET_BASES_10X = (double) targetBases10x / (double) totalTargetBases;
- this.metrics.PCT_TARGET_BASES_20X = (double) targetBases20x / (double) totalTargetBases;
- this.metrics.PCT_TARGET_BASES_30X = (double) targetBases30x / (double) totalTargetBases;
- this.metrics.PCT_TARGET_BASES_40X = (double) targetBases40x / (double) totalTargetBases;
- this.metrics.PCT_TARGET_BASES_50X = (double) targetBases50x / (double) totalTargetBases;
- this.metrics.PCT_TARGET_BASES_100X = (double) targetBases100x / (double) totalTargetBases;
+ out.close();
}
private void calculateGcMetrics() {
@@ -511,18 +695,18 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
log.info("Calculating GC metrics");
// Setup the output file if we're outputting per-target coverage
- FormatUtil fmt = new FormatUtil();
+ final FormatUtil fmt = new FormatUtil();
final PrintWriter out;
try {
if (perTargetOutput != null) {
out = new PrintWriter(perTargetOutput);
- out.println("chrom\tstart\tend\tlength\tname\t%gc\tmean_coverage\tnormalized_coverage\tmin_normalized_coverage");
+ out.println("chrom\tstart\tend\tlength\tname\t%gc\tmean_coverage\tnormalized_coverage\tmin_normalized_coverage\tmax_normalized_coverage\tmin_coverage\tmax_coverage\tpct_0x");
}
else {
out = null;
}
}
- catch (IOException ioe) { throw new RuntimeIOException(ioe); }
+ catch (final IOException ioe) { throw new RuntimeIOException(ioe); }
final int bins = 101;
final long[] targetBasesByGc = new long[bins];
@@ -545,7 +729,14 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
if (out != null) {
final double coverage = cov.getTotal() / (double) interval.length();
- final double min = MathUtil.min(cov.getDepths());
+ double min = Integer.MAX_VALUE;
+ double max = Integer.MIN_VALUE;
+ double targetBasesAt0x = 0.0;
+ for (final int d : cov.getDepths()) {
+ if (0 == d) targetBasesAt0x++;
+ if (d < min) min = d;
+ if (max < d) max = d;
+ }
out.println(interval.getSequence() + "\t" +
interval.getStart() + "\t" +
@@ -555,7 +746,11 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
fmt.format(gcDouble) + "\t" +
fmt.format(coverage) + "\t" +
fmt.format(coverage / this.metrics.MEAN_TARGET_COVERAGE) + "\t" +
- fmt.format(min / this.metrics.MEAN_TARGET_COVERAGE)
+ fmt.format(min / this.metrics.MEAN_TARGET_COVERAGE) + "\t" +
+ fmt.format(max / this.metrics.MEAN_TARGET_COVERAGE) + "\t" +
+ fmt.format(min) + "\t" +
+ fmt.format(max) + "\t" +
+ fmt.format(targetBasesAt0x / interval.length())
);
}
}
@@ -588,8 +783,10 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
@Override
- public void addMetricsToFile(MetricsFile<METRIC_TYPE, Integer> hsMetricsComparableMetricsFile) {
+ public void addMetricsToFile(final MetricsFile<METRIC_TYPE, Integer> hsMetricsComparableMetricsFile) {
hsMetricsComparableMetricsFile.addMetric(convertMetric(this.metrics));
+ hsMetricsComparableMetricsFile.addHistogram(depthHistogram);
+ hsMetricsComparableMetricsFile.addHistogram(baseQHistogram);
}
}
@@ -617,10 +814,11 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
}
}
- /** Returns true if any base in the range has coverage of > 1 */
+ /** Returns true if any base in the range has coverage of > 0 */
public boolean hasCoverage() {
+ // NB: if this is expensive, we could easily pre-compute this as we go along in addBase
for (final int s : depths) {
- if (s > 1) return true;
+ if (s > 0) return true;
}
return false;
@@ -671,16 +869,19 @@ class TargetMetrics extends MultilevelMetrics {
/** The number of PF reads that are not marked as duplicates. */
public long PF_UNIQUE_READS;
- // Tracks the number of read pairs that we see that are PF (used to calculate library size) */
+ /** Tracks the number of read pairs that we see that are PF (used to calculate library size) */
public long PF_SELECTED_PAIRS;
- // Tracks the number of unique PF reads pairs we see (used to calc library size)
+ /** Tracks the number of unique PF reads pairs we see (used to calc library size) */
public long PF_SELECTED_UNIQUE_PAIRS;
/** The number of PF unique reads that are aligned with mapping score > 0 to the reference genome. */
public long PF_UQ_READS_ALIGNED;
/** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */
+ public long PF_BASES_ALIGNED;
+
+ /** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */
public long PF_UQ_BASES_ALIGNED;
/** The number of PF aligned probed that mapped to a baited region of the genome. */
@@ -724,32 +925,52 @@ class TargetMetrics extends MultilevelMetrics {
/** The fold by which the probed region has been amplified above genomic background. */
public double FOLD_ENRICHMENT;
- /** The mean coverage of targets that recieved at least coverage depth = 2 at one base. */
+ /** The mean coverage of targets. */
public double MEAN_TARGET_COVERAGE;
- /** The number of targets that did not reach coverage=2 over any base. */
+ /** The median coverage of targets. */
+ public double MEDIAN_TARGET_COVERAGE;
+
+ /** The fraction of targets that did not reach coverage=1 over any base. */
public double ZERO_CVG_TARGETS_PCT;
+ /** The fraction of aligned bases that were filtered out because they were in reads marked as duplicates. */
+ public double PCT_EXC_DUPE;
+
+ /** The fraction of aligned bases that were filtered out because they were in reads with low mapping quality. */
+ public double PCT_EXC_MAPQ;
+
+ /** The fraction of aligned bases that were filtered out because they were of low base quality. */
+ public double PCT_EXC_BASEQ;
+
+ /** The fraction of aligned bases that were filtered out because they were the second observation from an insert with overlapping reads. */
+ public double PCT_EXC_OVERLAP;
+
+ /** The fraction of aligned bases that were filtered out because they did not align over a target base. */
+ public double PCT_EXC_OFF_TARGET;
+
/**
* The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to
* the mean coverage level in those targets.
*/
public double FOLD_80_BASE_PENALTY;
- /** The percentage of ALL target bases acheiving 2X or greater coverage. */
+ /** The percentage of all target bases achieving 1X or greater coverage. */
+ public double PCT_TARGET_BASES_1X;
+ /** The percentage of all target bases achieving 2X or greater coverage. */
public double PCT_TARGET_BASES_2X;
- /** The percentage of ALL target bases acheiving 10X or greater coverage. */
+ /** The percentage of all target bases achieving 10X or greater coverage. */
public double PCT_TARGET_BASES_10X;
- /** The percentage of ALL target bases acheiving 20X or greater coverage. */
+ /** The percentage of all target bases achieving 20X or greater coverage. */
public double PCT_TARGET_BASES_20X;
- /** The percentage of ALL target bases acheiving 30X or greater coverage. */
- public double PCT_TARGET_BASES_30X;
- /** The percentage of ALL target bases acheiving 40X or greater coverage. */
- public double PCT_TARGET_BASES_40X;
- /** The percentage of ALL target bases acheiving 50X or greater coverage. */
- public double PCT_TARGET_BASES_50X;
- /** The percentage of ALL target bases acheiving 100X or greater coverage. */
- public double PCT_TARGET_BASES_100X;
+ /** The percentage of all target bases achieving 30X or greater coverage. */
+ public double PCT_TARGET_BASES_30X;
+ /** The percentage of all target bases achieving 40X or greater coverage. */
+ public double PCT_TARGET_BASES_40X;
+ /** The percentage of all target bases achieving 50X or greater coverage. */
+ public double PCT_TARGET_BASES_50X;
+ /** The percentage of all target bases achieving 100X or greater coverage. */
+ public double PCT_TARGET_BASES_100X;
/**
* A measure of how undercovered <= 50% GC regions are relative to the mean. For each GC bin [0..50]
@@ -766,4 +987,10 @@ class TargetMetrics extends MultilevelMetrics {
* reads that should have mapped to GC>=50% regions mapped elsewhere.
*/
public double GC_DROPOUT;
-}
+
+ /** The theoretical HET SNP sensitivity. */
+ public double HET_SNP_SENSITIVITY;
+
+ /** The Phred Scaled Q Score of the theoretical HET SNP sensitivity. */
+ public double HET_SNP_Q;
+}
\ No newline at end of file
diff --git a/src/java/picard/analysis/directed/TargetedPcrMetrics.java b/src/java/picard/analysis/directed/TargetedPcrMetrics.java
index 63971ea..e4e560b 100644
--- a/src/java/picard/analysis/directed/TargetedPcrMetrics.java
+++ b/src/java/picard/analysis/directed/TargetedPcrMetrics.java
@@ -48,6 +48,9 @@ public class TargetedPcrMetrics extends MultilevelMetrics {
public double PCT_PF_UQ_READS_ALIGNED;
/** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */
+ public long PF_BASES_ALIGNED;
+
+ /** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */
public long PF_UQ_BASES_ALIGNED;
/** The number of PF aligned amplified that mapped to an amplified region of the genome. */
@@ -77,28 +80,48 @@ public class TargetedPcrMetrics extends MultilevelMetrics {
/** The mean coverage of all amplicons in the experiment. */
public double MEAN_AMPLICON_COVERAGE;
- /** The mean coverage of targets that recieved at least coverage depth = 2 at one base. */
+ /** The mean coverage of targets. */
public double MEAN_TARGET_COVERAGE;
+ /** The median coverage of targets. */
+ public double MEDIAN_TARGET_COVERAGE;
+
/** The fold by which the amplicon region has been amplified above genomic background. */
public double FOLD_ENRICHMENT;
- /** The number of targets that did not reach coverage=2 over any base. */
+ /** The fraction of targets that did not reach coverage=1 over any base. */
public double ZERO_CVG_TARGETS_PCT;
+ /** The fraction of aligned bases that were filtered out because they were in reads marked as duplicates. */
+ public double PCT_EXC_DUPE;
+
+ /** The fraction of aligned bases that were filtered out because they were in reads with low mapping quality. */
+ public double PCT_EXC_MAPQ;
+
+ /** The fraction of aligned bases that were filtered out because they were of low base quality. */
+ public double PCT_EXC_BASEQ;
+
+ /** The fraction of aligned bases that were filtered out because they were the second observation from an insert with overlapping reads. */
+ public double PCT_EXC_OVERLAP;
+
+ /** The fraction of aligned bases that were filtered out because they did not align over a target base. */
+ public double PCT_EXC_OFF_TARGET;
+
/**
* The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to
* the mean coverage level in those targets.
*/
public double FOLD_80_BASE_PENALTY;
- /** The percentage of ALL target bases achieving 2X or greater coverage. */
+ /** The percentage of all target bases achieving 1X or greater coverage. */
+ public double PCT_TARGET_BASES_1X;
+ /** The percentage of all target bases achieving 2X or greater coverage. */
public double PCT_TARGET_BASES_2X;
- /** The percentage of ALL target bases achieving 10X or greater coverage. */
+ /** The percentage of all target bases achieving 10X or greater coverage. */
public double PCT_TARGET_BASES_10X;
- /** The percentage of ALL target bases achieving 20X or greater coverage. */
+ /** The percentage of all target bases achieving 20X or greater coverage. */
public double PCT_TARGET_BASES_20X;
- /** The percentage of ALL target bases achieving 30X or greater coverage. */
+ /** The percentage of all target bases achieving 30X or greater coverage. */
public double PCT_TARGET_BASES_30X;
/**
@@ -116,4 +139,10 @@ public class TargetedPcrMetrics extends MultilevelMetrics {
* reads that should have mapped to GC>=50% regions mapped elsewhere.
*/
public double GC_DROPOUT;
+
+ /** The theoretical HET SNP sensitivity. */
+ public double HET_SNP_SENSITIVITY;
+
+ /** The Q Score of the theoretical HET SNP sensitivity. */
+ public double HET_SNP_Q;
}
diff --git a/src/java/picard/analysis/directed/TargetedPcrMetricsCollector.java b/src/java/picard/analysis/directed/TargetedPcrMetricsCollector.java
index 81f5c99..6002098 100644
--- a/src/java/picard/analysis/directed/TargetedPcrMetricsCollector.java
+++ b/src/java/picard/analysis/directed/TargetedPcrMetricsCollector.java
@@ -43,12 +43,42 @@ import java.util.Set;
public class TargetedPcrMetricsCollector extends TargetMetricsCollector<TargetedPcrMetrics> {
//maybe instead just inject this into the TargetedMetricCollector ->
- public TargetedPcrMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels, final List<SAMReadGroupRecord> samRgRecords, final ReferenceSequenceFile refFile, final File perTargetCoverage, final IntervalList targetIntervals, final IntervalList probeIntervals, final String probeSetName) {
- super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, targetIntervals, probeIntervals, probeSetName);
+ public TargetedPcrMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels,
+ final List<SAMReadGroupRecord> samRgRecords,
+ final ReferenceSequenceFile refFile,
+ final File perTargetCoverage,
+ final File perBaseCoverage,
+ final IntervalList targetIntervals,
+ final IntervalList probeIntervals,
+ final String probeSetName,
+ final int nearProbeDistance,
+ final int minimumMappingQuality,
+ final int minimumBaseQuality,
+ final boolean clipOverlappingReads,
+ final int coverageCap,
+ final int sampleSize) {
+ super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, coverageCap, sampleSize);
}
+ public TargetedPcrMetricsCollector(final Set<MetricAccumulationLevel> accumulationLevels,
+ final List<SAMReadGroupRecord> samRgRecords,
+ final ReferenceSequenceFile refFile,
+ final File perTargetCoverage,
+ final File perBaseCoverage,
+ final IntervalList targetIntervals,
+ final IntervalList probeIntervals,
+ final String probeSetName,
+ final int nearProbeDistance,
+ final int minimumMappingQuality,
+ final int minimumBaseQuality,
+ final boolean clipOverlappingReads,
+ final boolean noSideEffects,
+ final int coverageCap,
+ final int sampleSize) {
+ super(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance, minimumMappingQuality, minimumBaseQuality, clipOverlappingReads, noSideEffects, coverageCap, sampleSize);
+ }
@Override
- public TargetedPcrMetrics convertMetric(TargetMetrics targetMetrics) {
+ public TargetedPcrMetrics convertMetric(final TargetMetrics targetMetrics) {
final TargetedPcrMetrics pcrMetrics = new TargetedPcrMetrics();
TargetMetricsCollector.reflectiveCopy(targetMetrics, pcrMetrics,
new String[]{"PROBE_SET", "PROBE_TERRITORY", "ON_PROBE_BASES", "NEAR_PROBE_BASES", "OFF_PROBE_BASES", "PCT_SELECTED_BASES", "PCT_OFF_PROBE", "ON_PROBE_VS_SELECTED", "MEAN_PROBE_COVERAGE"},
diff --git a/src/java/picard/cmdline/CommandLineParser.java b/src/java/picard/cmdline/CommandLineParser.java
index 3c70bcc..964cdb8 100644
--- a/src/java/picard/cmdline/CommandLineParser.java
+++ b/src/java/picard/cmdline/CommandLineParser.java
@@ -26,7 +26,6 @@ package picard.cmdline;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CollectionUtil.MultiMap;
import htsjdk.samtools.util.StringUtil;
-import picard.PicardException;
import java.io.BufferedReader;
import java.io.File;
@@ -41,6 +40,8 @@ import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
@@ -48,6 +49,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import picard.PicardException;
+
/**
* Annotation-driven utility for parsing command-line arguments, checking for errors, and producing usage message.
* <p/>
@@ -246,19 +249,28 @@ public class CommandLineParser {
prefixDot = prefix + ".";
}
-
+ int fieldCounter = 1;
for (final Field field : getAllFields(this.callerOptions.getClass())) {
if (field.getAnnotation(PositionalArguments.class) != null) {
handlePositionalArgumentAnnotation(field);
}
if (field.getAnnotation(Option.class) != null) {
- handleOptionAnnotation(field);
+ handleOptionAnnotation(field, fieldCounter);
+ // only increase counter if the field had default printOrder
+ if (field.getAnnotation(Option.class).printOrder() == Integer.MAX_VALUE)
+ fieldCounter++;
} else if (!isCommandLineProgram() && field.getAnnotation(NestedOptions.class) != null) {
// If callerOptions is an instance of CommandLineProgram, defer creation of child
// CommandLineParsers until after parsing options for this parser, in case CommandLineProgram
// wants to do something dynamic based on values for this parser.
handleNestedOptionsAnnotation(field);
}
+
+ }
+
+ // make sure to sort options according to printOrder
+ if (optionDefinitions != null && !optionDefinitions.isEmpty()){
+ Collections.sort(optionDefinitions, new OptionDefinitionByPrintOrderComparator());
}
this.programProperties = this.callerOptions.getClass().getAnnotation(CommandLineProgramProperties.class);
@@ -316,7 +328,7 @@ public class CommandLineParser {
"unrecognized options are ignored. " + "A single-valued option set in an options file may be overridden " +
"by a subsequent command-line option. " +
"A line starting with '#' is considered a comment.",
- false, true, false, 0, Integer.MAX_VALUE, null, true, new String[0]);
+ false, true, false, 0, Integer.MAX_VALUE, null, true, new String[0], Integer.MAX_VALUE);
printOptionUsage(stream, optionsFileOptionDefinition);
}
@@ -869,7 +881,11 @@ public class CommandLineParser {
stream.print(sb);
}
- private void handleOptionAnnotation(final Field field) {
+ /**
+ * @param field the command line parameter as a {@link Field}
+ * @param fieldPosition the field number as returned by getAllFields() that returns all fields including those of superclasses
+ */
+ private void handleOptionAnnotation(final Field field, final int fieldPosition) {
try {
field.setAccessible(true);
final Option optionAnnotation = field.getAnnotation(Option.class);
@@ -892,13 +908,28 @@ public class CommandLineParser {
" must have a String ctor or be an enum");
}
+ int printOrder = optionAnnotation.printOrder();
+ /*
+ * check if we got the default printOrder (ie the print order was not specified in
+ * field annotation).
+ * If so we use the field position to set its default print order
+ * *but* we multiply the field position by 1000 to
+ * (1) make sure that custom ordering is preserved as long as it is below 1000
+ * (2) get rooms in between each options to be able to insert your own options
+ */
+ if (printOrder == Integer.MAX_VALUE) {
+ printOrder = fieldPosition * 1000;
+ }
+
+
final OptionDefinition optionDefinition = new OptionDefinition(field,
field.getName(),
optionAnnotation.shortName(),
optionAnnotation.doc(), optionAnnotation.optional() || (field.get(callerOptions) != null),
optionAnnotation.overridable(), isCollection, optionAnnotation.minElements(),
optionAnnotation.maxElements(), field.get(callerOptions), optionAnnotation.common(),
- optionAnnotation.mutex());
+ optionAnnotation.mutex(),
+ printOrder);
for (final String option : optionAnnotation.mutex()) {
final OptionDefinition mutextOptionDef = optionMap.get(option);
@@ -1080,6 +1111,16 @@ public class CommandLineParser {
String getHelpDoc();
}
+
+ protected static class OptionDefinitionByPrintOrderComparator implements Comparator<OptionDefinition> {
+
+ @Override
+ public int compare(OptionDefinition o1, OptionDefinition o2) {
+ return o1.printOrder - o2.printOrder;
+ }
+
+ }
+
protected static class OptionDefinition {
final Field field;
final String name;
@@ -1090,6 +1131,7 @@ public class CommandLineParser {
final boolean isCollection;
final int minElements;
final int maxElements;
+ final int printOrder;
final String defaultValue;
final boolean isCommon;
boolean hasBeenSet = false;
@@ -1100,7 +1142,7 @@ public class CommandLineParser {
private OptionDefinition(final Field field, final String name, final String shortName, final String doc,
final boolean optional, final boolean overridable, boolean collection, final int minElements,
final int maxElements, final Object defaultValue, final boolean isCommon,
- final String[] mutuallyExclusive) {
+ final String[] mutuallyExclusive, final int printOrder) {
this.field = field;
this.name = name.toUpperCase();
this.shortName = shortName.toUpperCase();
@@ -1123,6 +1165,7 @@ public class CommandLineParser {
}
this.isCommon = isCommon;
this.mutuallyExclusive = new HashSet<String>(Arrays.asList(mutuallyExclusive));
+ this.printOrder = printOrder;
}
}
diff --git a/src/java/picard/cmdline/Option.java b/src/java/picard/cmdline/Option.java
index faf49e7..0474478 100644
--- a/src/java/picard/cmdline/Option.java
+++ b/src/java/picard/cmdline/Option.java
@@ -83,4 +83,18 @@ public @interface Option {
* the options of the parent annotation are overridden with this annotation.
*/
boolean overridable() default false;
+
+ /**
+ * Overwrite default order in which Option are printed in usage by explicitly setting a
+ * print position e.g. printOrder=1 is printed before printOrder=2.
+ * Options without printOrder automatically receive a printOrder that (1) is a multiple of 1000
+ * and (2) reflects the order's default position. This gives you the option to insert your own options between
+ * options inherited from super classes (which order you do not control).
+ * The default ordering follows (1)the option declaration position in the class and (2) sub-classes options printed
+ * before superclass options.
+ *
+ * @author charles girardot
+ */
+ int printOrder() default Integer.MAX_VALUE;
+
}
diff --git a/src/java/picard/cmdline/programgroups/Alpha.java b/src/java/picard/cmdline/programgroups/Alpha.java
new file mode 100644
index 0000000..fbc1036
--- /dev/null
+++ b/src/java/picard/cmdline/programgroups/Alpha.java
@@ -0,0 +1,13 @@
+package picard.cmdline.programgroups;
+
+import picard.cmdline.CommandLineProgramGroup;
+
+/**
+ * @author ebanks
+ */
+public class Alpha implements CommandLineProgramGroup {
+ @Override
+ public String getName() { return "Alpha Tools"; }
+ @Override
+ public String getDescription() { return "Tools that are currently UNSUPPORTED until further testing and maturation."; }
+}
diff --git a/src/java/picard/fastq/BamToBfq.java b/src/java/picard/fastq/BamToBfq.java
index 168bbe2..541ca7a 100644
--- a/src/java/picard/fastq/BamToBfq.java
+++ b/src/java/picard/fastq/BamToBfq.java
@@ -38,12 +38,25 @@ import java.io.File;
* @author ktibbett at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = "Create BFQ files from a BAM file for use by the Maq aligner.",
- usageShort = "Create BFQ files from a BAM file for use by the Maq aligner.",
+ usage = BamToBfq.USAGE_SUMMARY + BamToBfq.USAGE_DETAILS,
+ usageShort = BamToBfq.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class BamToBfq extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Create BFQ files from a BAM file for use by the maq aligner. ";
+ static final String USAGE_DETAILS = "BFQ is a binary version of the FASTQ file format. This tool creates bfq files from a BAM file " +
+ "for use by the maq aligner." +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar BamToBfq \\<br />" +
+ " I=input.bam \\<br />" +
+ " ANALYSIS_DIR=analysis_dir \\<br />" +
+ " OUTPUT_FILE_PREFIX=output_file_1 \\<br />" +
+ " PAIRED_RUN=false" +
+ "</pre>" +
+ "<hr />"
+ ;
// The following attributes define the command-line arguments
@Option(doc="The BAM file to parse.", shortName=StandardOptionDefinitions.INPUT_SHORT_NAME) public File INPUT;
diff --git a/src/java/picard/illumina/parser/ReadType.java b/src/java/picard/filter/CountingDuplicateFilter.java
similarity index 63%
copy from src/java/picard/illumina/parser/ReadType.java
copy to src/java/picard/filter/CountingDuplicateFilter.java
index bd69cd5..89ca9e2 100644
--- a/src/java/picard/illumina/parser/ReadType.java
+++ b/src/java/picard/filter/CountingDuplicateFilter.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2011 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,18 +22,12 @@
* THE SOFTWARE.
*/
-package picard.illumina.parser;
+package picard.filter;
-/**
-* A read type describes a stretch of cycles in an ReadStructure
-* (e.g. Assume we have a paired end/barcoded run with the 76 template cycles followed by 8 barcode cycles followed by
-* another 76 template reads, the run would be represented by the ReadStructure 76T8B76T)
-* Note: Currently SKIP is unused by IlluminaBasecallsToSam, ExtractIlluminaBarcodes, and IlluminaDataProvider
-**/
-public enum ReadType {
- T, B, S;
+import htsjdk.samtools.SAMRecord;
- public static final ReadType Template = T;
- public static final ReadType Barcode = B;
- public static final ReadType Skip = S;
+/** Counting filter that discards reads that have been marked as duplicates. */
+public class CountingDuplicateFilter extends CountingFilter {
+ @Override
+ public boolean reallyFilterOut(final SAMRecord record) { return record.getDuplicateReadFlag(); }
}
diff --git a/src/java/picard/filter/CountingFilter.java b/src/java/picard/filter/CountingFilter.java
new file mode 100644
index 0000000..667acdc
--- /dev/null
+++ b/src/java/picard/filter/CountingFilter.java
@@ -0,0 +1,67 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.filter;
+
+import htsjdk.samtools.AlignmentBlock;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.filter.SamRecordFilter;
+
+/**
+ * A SamRecordFilter that counts the number of bases in the reads which it filters out. Abstract and designed
+ * to be sub-classed to implement the desired filter. The filterOut method will count the number of records
+ * and bases that would be filtered out using the result of the reallyFilterOut method.
+ */
+public abstract class CountingFilter implements SamRecordFilter {
+ private long filteredRecords = 0;
+ private long filteredBases = 0;
+
+ /** Gets the number of records that have been filtered out thus far. */
+ public long getFilteredRecords() { return this.filteredRecords; }
+
+ /** Gets the number of bases that have been filtered out thus far. */
+ public long getFilteredBases() { return this.filteredBases; }
+
+ @Override
+ public final boolean filterOut(final SAMRecord record) {
+ final boolean filteredOut = reallyFilterOut(record);
+ if (filteredOut) {
+ ++filteredRecords;
+ for (final AlignmentBlock block : record.getAlignmentBlocks()) {
+ this.filteredBases += block.getLength();
+ }
+ }
+ return filteredOut;
+ }
+
+ /**
+ * Return true if we are to filter this record out, false otherwise.
+ */
+ abstract public boolean reallyFilterOut(final SAMRecord record);
+
+ @Override
+ public boolean filterOut(final SAMRecord first, final SAMRecord second) {
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/src/java/picard/illumina/parser/ReadType.java b/src/java/picard/filter/CountingMapQFilter.java
similarity index 63%
copy from src/java/picard/illumina/parser/ReadType.java
copy to src/java/picard/filter/CountingMapQFilter.java
index bd69cd5..7a39524 100644
--- a/src/java/picard/illumina/parser/ReadType.java
+++ b/src/java/picard/filter/CountingMapQFilter.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2011 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,18 +22,16 @@
* THE SOFTWARE.
*/
-package picard.illumina.parser;
+package picard.filter;
-/**
-* A read type describes a stretch of cycles in an ReadStructure
-* (e.g. Assume we have a paired end/barcoded run with the 76 template cycles followed by 8 barcode cycles followed by
-* another 76 template reads, the run would be represented by the ReadStructure 76T8B76T)
-* Note: Currently SKIP is unused by IlluminaBasecallsToSam, ExtractIlluminaBarcodes, and IlluminaDataProvider
-**/
-public enum ReadType {
- T, B, S;
+import htsjdk.samtools.SAMRecord;
- public static final ReadType Template = T;
- public static final ReadType Barcode = B;
- public static final ReadType Skip = S;
+/** Counting filter that discards reads below a configurable mapping quality threshold. */
+public class CountingMapQFilter extends CountingFilter {
+ private final int minMapq;
+
+ public CountingMapQFilter(final int minMapq) { this.minMapq = minMapq; }
+
+ @Override
+ public boolean reallyFilterOut(final SAMRecord record) { return record.getMappingQuality() < minMapq; }
}
diff --git a/src/java/picard/illumina/parser/ReadType.java b/src/java/picard/filter/CountingPairedFilter.java
similarity index 63%
copy from src/java/picard/illumina/parser/ReadType.java
copy to src/java/picard/filter/CountingPairedFilter.java
index bd69cd5..a04f6d9 100644
--- a/src/java/picard/illumina/parser/ReadType.java
+++ b/src/java/picard/filter/CountingPairedFilter.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2011 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,18 +22,12 @@
* THE SOFTWARE.
*/
-package picard.illumina.parser;
+package picard.filter;
-/**
-* A read type describes a stretch of cycles in an ReadStructure
-* (e.g. Assume we have a paired end/barcoded run with the 76 template cycles followed by 8 barcode cycles followed by
-* another 76 template reads, the run would be represented by the ReadStructure 76T8B76T)
-* Note: Currently SKIP is unused by IlluminaBasecallsToSam, ExtractIlluminaBarcodes, and IlluminaDataProvider
-**/
-public enum ReadType {
- T, B, S;
+import htsjdk.samtools.SAMRecord;
- public static final ReadType Template = T;
- public static final ReadType Barcode = B;
- public static final ReadType Skip = S;
+/** Counting filter that discards reads that are unpaired in sequencing and paired reads whose mates are not mapped. */
+public class CountingPairedFilter extends CountingFilter {
+ @Override
+ public boolean reallyFilterOut(final SAMRecord record) { return !record.getReadPairedFlag() || record.getMateUnmappedFlag(); }
}
diff --git a/src/java/picard/fingerprint/CheckFingerprint.java b/src/java/picard/fingerprint/CheckFingerprint.java
new file mode 100644
index 0000000..bf11400
--- /dev/null
+++ b/src/java/picard/fingerprint/CheckFingerprint.java
@@ -0,0 +1,228 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import htsjdk.samtools.SAMReadGroupRecord;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.CloserUtil;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.SequenceUtil;
+import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
+import picard.PicardException;
+import picard.analysis.FingerprintingDetailMetrics;
+import picard.analysis.FingerprintingSummaryMetrics;
+import picard.cmdline.CommandLineProgram;
+import picard.cmdline.CommandLineProgramProperties;
+import picard.cmdline.Option;
+import picard.cmdline.StandardOptionDefinitions;
+import picard.cmdline.programgroups.Alpha;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Attempts to check the sample identity of the sequence data in the provided SAM/BAM file
+ * against a set of known genotypes in the supplied genotype file (in either GELI or VCF format).
+ *
+ * @author Tim Fennell
+ */
+ at CommandLineProgramProperties(
+ usage = CheckFingerprint.USAGE_DETAILS,
+ usageShort = "Computes a fingerprint from the supplied SAM/BAM file and compares it to the provided genotypes",
+ programGroup = Alpha.class // TODO -- when mature please move to a to-be-created Fingerprinting.class
+)
+public class CheckFingerprint extends CommandLineProgram {
+
+ static final String USAGE_DETAILS = "Computes a fingerprint from the supplied SAM/BAM file and " +
+ "compares it to the expected fingerprint genotypes provided. The key output is a LOD score " +
+ "which represents the relative likelihood of the sequence data originating from the same " +
+ "sample as the genotypes vs. from a random sample. Two outputs are produced: (1) a summary " +
+ "metrics file that gives metrics related single read group (lane or index within a lane) " +
+ "versus a set of known genotypes for the expected sample, and (2) a detail metrics file that " +
+ "contains an individual SNP/Haplotype comparison within a fingerprint comparison. The two " +
+ "files may be specified individually using the SUMMARY_OUTPUT and DETAIL_OUTPUT options. " +
+ "Alternatively the OUTPUT option may be used instead to give the base of the two output " +
+ "files, with the summary metrics having a file extension '" + CheckFingerprint.FINGERPRINT_SUMMARY_FILE_SUFFIX + "' " +
+ "and the detail metrics having a file extension '" + CheckFingerprint.FINGERPRINT_DETAIL_FILE_SUFFIX + "'.";
+
+ @Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input SAM or BAM file.")
+ public File INPUT;
+
+ @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The base of output files to write. The summary metrics " +
+ "will have the file extension '" + CheckFingerprint.FINGERPRINT_SUMMARY_FILE_SUFFIX + "' and the detail metrics will have " +
+ "the extension '" + CheckFingerprint.FINGERPRINT_DETAIL_FILE_SUFFIX + "'.", mutex = {"SUMMARY_OUTPUT", "DETAIL_OUTPUT"})
+ public String OUTPUT;
+
+ @Option(shortName = "S", doc = "The text file to which to write summary metrics.", mutex = {"OUTPUT"})
+ public File SUMMARY_OUTPUT;
+
+ @Option(shortName = "D", doc = "The text file to which to write detail metrics.", mutex = {"OUTPUT"})
+ public File DETAIL_OUTPUT;
+
+ @Option(shortName="G", doc = "File of genotypes (VCF or GELI) to be used in comparison. May contain " +
+ "any number of genotypes; CheckFingerprint will use only those that are usable for fingerprinting.")
+ public File GENOTYPES;
+
+ @Option(optional=true, doc = "If using VCF format genotypes, this parameter can be used to specify which sample's " +
+ "genotypes to use from the VCF file. If not supplied the sample name from the BAM read group header " +
+ "is used instead.")
+ public String SAMPLE_ALIAS;
+
+ @Option(shortName="H", doc = "A file of haplotype information produced by the CheckFingerprint program.")
+ public File HAPLOTYPE_MAP;
+
+ @Option(shortName="LOD", doc = "When counting haplotypes checked and matching, count only haplotypes " +
+ "where the most likely haplotype achieves at least this LOD.")
+ public double GENOTYPE_LOD_THRESHOLD = 5;
+
+ @Option(shortName="IGNORE_RG", doc = "If true, treat the entire input BAM as one single read group in the calculation, " +
+ "ignoring RG annotations, and producing a single fingerprint metric for the entire BAM.")
+ public boolean IGNORE_READ_GROUPS = false;
+
+ private final Log log = Log.getInstance(CheckFingerprint.class);
+
+ public static final String FINGERPRINT_SUMMARY_FILE_SUFFIX = "fingerprinting_summary_metrics";
+ public static final String FINGERPRINT_DETAIL_FILE_SUFFIX = "fingerprinting_detail_metrics";
+
+
+ // Stock main method
+ public static void main(final String[] args) {
+ new CheckFingerprint().instanceMainWithExit(args);
+ }
+
+ @Override
+ protected int doWork() {
+ final File outputDetailMetricsFile, outputSummaryMetricsFile;
+ if (OUTPUT == null) {
+ outputDetailMetricsFile = DETAIL_OUTPUT;
+ outputSummaryMetricsFile = SUMMARY_OUTPUT;
+ }
+ else {
+ if (!OUTPUT.endsWith(".")) OUTPUT = OUTPUT + ".";
+ outputDetailMetricsFile = new File(OUTPUT + FINGERPRINT_DETAIL_FILE_SUFFIX);
+ outputSummaryMetricsFile = new File(OUTPUT + FINGERPRINT_SUMMARY_FILE_SUFFIX);
+ }
+
+ IOUtil.assertFileIsReadable(INPUT);
+ IOUtil.assertFileIsReadable(HAPLOTYPE_MAP);
+ IOUtil.assertFileIsReadable(GENOTYPES);
+ IOUtil.assertFileIsWritable(outputDetailMetricsFile);
+ IOUtil.assertFileIsWritable(outputSummaryMetricsFile);
+
+ final FingerprintChecker checker = new FingerprintChecker(HAPLOTYPE_MAP);
+
+ SequenceUtil.assertSequenceDictionariesEqual(SAMSequenceDictionaryExtractor.extractDictionary(INPUT), SAMSequenceDictionaryExtractor.extractDictionary(GENOTYPES), true);
+ SequenceUtil.assertSequenceDictionariesEqual(SAMSequenceDictionaryExtractor.extractDictionary(INPUT), checker.getHeader().getSequenceDictionary(), true);
+
+ // If sample alias isn't supplied, assume it's the one from the INPUT file's RGs
+ if (SAMPLE_ALIAS == null) {
+ final SamReader in = SamReaderFactory.makeDefault().open(INPUT);
+ for (final SAMReadGroupRecord rec : in.getFileHeader().getReadGroups()) {
+ if (SAMPLE_ALIAS == null) {
+ SAMPLE_ALIAS = rec.getSample();
+ }
+ else if (!SAMPLE_ALIAS.equals(rec.getSample())) {
+ throw new PicardException("SAM File must not contain data from multiple samples.");
+ }
+ }
+ CloserUtil.close(in);
+ }
+
+
+ final List<FingerprintResults> results = checker.checkFingerprints(
+ Arrays.asList(INPUT),
+ Arrays.asList(GENOTYPES),
+ SAMPLE_ALIAS,
+ IGNORE_READ_GROUPS);
+
+ final MetricsFile<FingerprintingSummaryMetrics,?> summaryFile = getMetricsFile();
+ final MetricsFile<FingerprintingDetailMetrics,?> detailsFile = getMetricsFile();
+
+ for (final FingerprintResults fpr : results) {
+ final MatchResults mr = fpr.getMatchResults().first();
+
+ final FingerprintingSummaryMetrics metrics = new FingerprintingSummaryMetrics();
+ metrics.READ_GROUP = fpr.getReadGroup();
+ metrics.SAMPLE = SAMPLE_ALIAS;
+ metrics.LL_EXPECTED_SAMPLE = mr.getSampleLikelihood();
+ metrics.LL_RANDOM_SAMPLE = mr.getPopulationLikelihood();
+ metrics.LOD_EXPECTED_SAMPLE = mr.getLOD();
+
+ for (final LocusResult lr : mr.getLocusResults()) {
+ final DiploidGenotype expectedGenotype = lr.getExpectedGenotype();
+ final DiploidGenotype observedGenotype = lr.getMostLikelyGenotype();
+ // Update the summary metrics
+ metrics.HAPLOTYPES_WITH_GENOTYPES++;
+ if (lr.getLodGenotype() >= GENOTYPE_LOD_THRESHOLD) {
+ metrics.HAPLOTYPES_CONFIDENTLY_CHECKED++;
+
+ if (lr.getExpectedGenotype() == lr.getMostLikelyGenotype()) {
+ metrics.HAPLOTYPES_CONFIDENTLY_MATCHING++;
+ }
+
+ if (expectedGenotype.isHeterozygous() && observedGenotype.isHomomozygous()) {
+ metrics.HET_AS_HOM++;
+ }
+
+ if (expectedGenotype.isHomomozygous() && observedGenotype.isHeterozygous()) {
+ metrics.HOM_AS_HET++;
+ }
+
+ if (expectedGenotype.isHomomozygous() && observedGenotype.isHomomozygous()
+ && expectedGenotype.compareTo(observedGenotype) != 0) {
+ metrics.HOM_AS_OTHER_HOM++;
+ }
+ }
+
+ // Build the detail metrics
+ final FingerprintingDetailMetrics details = new FingerprintingDetailMetrics();
+ details.READ_GROUP = fpr.getReadGroup();
+ details.SAMPLE = SAMPLE_ALIAS;
+ details.SNP = lr.getSnp().getName();
+ details.SNP_ALLELES = lr.getSnp().getAlleleString();
+ details.CHROM = lr.getSnp().getChrom();
+ details.POSITION = lr.getSnp().getPos();
+ details.EXPECTED_GENOTYPE = expectedGenotype.toString();
+ details.OBSERVED_GENOTYPE = observedGenotype.toString();
+ details.LOD = lr.getLodGenotype();
+ details.OBS_A = lr.getAllele1Count();
+ details.OBS_B = lr.getAllele2Count();
+ detailsFile.addMetric(details);
+ }
+
+ summaryFile.addMetric(metrics);
+ log.info(metrics.READ_GROUP + " vs. " + metrics.SAMPLE + ": LOD = " + metrics.LOD_EXPECTED_SAMPLE);
+ }
+
+ summaryFile.write(outputSummaryMetricsFile);
+ detailsFile.write(outputDetailMetricsFile);
+
+ return 0;
+ }
+}
diff --git a/src/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java b/src/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java
new file mode 100644
index 0000000..e724224
--- /dev/null
+++ b/src/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java
@@ -0,0 +1,312 @@
+
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import htsjdk.samtools.BamFileIoUtils;
+import picard.cmdline.CommandLineProgramProperties;
+import picard.cmdline.CommandLineProgram;
+import picard.cmdline.Option;
+import picard.cmdline.StandardOptionDefinitions;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.FormatUtil;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.SAMReadGroupRecord;
+import picard.cmdline.programgroups.Alpha;
+
+import java.io.File;
+import java.io.PrintStream;
+import java.util.*;
+import java.util.concurrent.TimeUnit;
+
+/**
+ * Program to check that all read groups within the set of BAM files appear to come from the same
+ * individual.
+ *
+ * @author Tim Fennell
+ */
+ at CommandLineProgramProperties(
+ usage = "Checks if all read groups within a set of BAM files appear to come from the same individual",
+ usageShort = "Checks if all read groups appear to come from the same individual",
+ programGroup = Alpha.class // TODO -- when mature please move to a to-be-created Fingerprinting.class
+)
+public class CrosscheckReadGroupFingerprints extends CommandLineProgram {
+
+ @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME,
+ doc="One or more input BAM files (or lists of BAM files) to compare fingerprints for.")
+ public List<File> INPUT;
+
+ @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional=true,
+ doc="Optional output file to write metrics to. Default is to write to stdout.")
+ public File OUTPUT;
+
+ @Option(shortName="H", doc="The file of haplotype data to use to pick SNPs to fingerprint")
+ public File HAPLOTYPE_MAP;
+
+ @Option(shortName="LOD",
+ doc="If any two read groups match with a LOD score lower than the threshold the program will exit " +
+ "with a non-zero code to indicate error. 0 means equal probability the read groups match vs. " +
+ "come from different individuals, negative numbers mean N logs more likely that the read groups " +
+ "are from different individuals and positive numbers mean N logs more likely that the read groups " +
+ "are from the sample individual.")
+ public double LOD_THRESHOLD = 0;
+
+ @Option(doc="Instead of producing the normal comparison of read-groups, roll fingerprints up to the sample level " +
+ "and print out a sample x sample matrix with LOD scores.")
+ public boolean CROSSCHECK_SAMPLES = false;
+
+ @Option(doc="Instead of producing the normal comparison of read-groups, roll fingerprints up to the library level " +
+ "and print out a library x library matrix with LOD scores.")
+ public boolean CROSSCHECK_LIBRARIES = false;
+
+ @Option(doc="The number of threads to use to process BAM files and generate Fingerprints.")
+ public int NUM_THREADS = 1;
+
+ @Option(doc="Allow the use of duplicate reads in performing the comparison. Can be useful when duplicate " +
+ "marking has been overly aggressive and coverage is low.")
+ public boolean ALLOW_DUPLICATE_READS = false;
+
+ @Option(doc="Assumed genotyping error rate that provides a floor on the probability that a genotype comes from" +
+ " the expected sample.")
+ public double GENOTYPING_ERROR_RATE = 0.01;
+
+ @Option(doc="If true then only read groups that do not relate to each other as expected will have their LODs reported.")
+ public boolean OUTPUT_ERRORS_ONLY = false;
+
+ @Option(doc ="The rate at which a het in a normal sample turns into a hom in the tumor.", optional = true)
+ public double LOSS_OF_HET_RATE = 0.5;
+
+ @Option(doc="Expect all read groups' fingerprints to match, irrespective of their sample names. By default (with this value set to " +
+ "false), read groups with different sample names are expected to mismatch, and those with the same sample name are expected " +
+ "to match.")
+ public boolean EXPECT_ALL_READ_GROUPS_TO_MATCH = false;
+
+ @Option(doc="When one or more mismatches between read groups are detected, exit with this value instead of 0.")
+ public int EXIT_CODE_WHEN_MISMATCH = 1;
+
+ private final Log log = Log.getInstance(CrosscheckReadGroupFingerprints.class);
+
+ private final FormatUtil formatUtil = new FormatUtil();
+
+ // These are public so that other programs can parse status from the crosscheck file
+ public static final String EXPECTED_MATCH = "EXPECTED MATCH";
+ public static final String EXPECTED_MISMATCH = "EXPECTED MISMATCH";
+ public static final String UNEXPECTED_MATCH = "UNEXPECTED MATCH";
+ public static final String UNEXPECTED_MISMATCH = "UNEXPECTED MISMATCH";
+
+ /** Stock main method. */
+ public static void main(final String[] args) {
+ new CrosscheckReadGroupFingerprints().instanceMainWithExit(args);
+ }
+
+ @Override protected int doWork() {
+ // Check inputs
+ for (final File f : INPUT) IOUtil.assertFileIsReadable(f);
+ IOUtil.assertFileIsReadable(HAPLOTYPE_MAP);
+ if (OUTPUT != null) IOUtil.assertFileIsWritable(OUTPUT);
+
+ final HaplotypeMap map = new HaplotypeMap(HAPLOTYPE_MAP);
+ final FingerprintChecker checker = new FingerprintChecker(map);
+
+ checker.setAllowDuplicateReads(ALLOW_DUPLICATE_READS);
+
+ log.info("Done checking input files, moving onto fingerprinting files.");
+
+ List<File> unrolledFiles = IOUtil.unrollFiles(INPUT, BamFileIoUtils.BAM_FILE_EXTENSION, IOUtil.SAM_FILE_EXTENSION);
+ final Map<SAMReadGroupRecord, Fingerprint> fpMap = checker.fingerprintSamFiles(unrolledFiles, NUM_THREADS, 1, TimeUnit.DAYS);
+ final List<Fingerprint> fingerprints = new ArrayList<>(fpMap.values());
+
+ log.info("Finished generating fingerprints from BAM files, moving on to cross-checking.");
+
+ // Setup the output
+ final PrintStream out;
+ if (OUTPUT != null) {
+ out = new PrintStream(IOUtil.openFileForWriting(OUTPUT), true);
+ }
+ else {
+ out = System.out;
+ }
+
+ if (this.CROSSCHECK_SAMPLES) {
+ crossCheckSamples(fingerprints, out);
+ return 0;
+ }
+ else if (this.CROSSCHECK_LIBRARIES) {
+ crossCheckLibraries(fpMap, out);
+ return 0;
+ }
+ else {
+ return crossCheckReadGroups(fpMap, out);
+ }
+ }
+
+ /**
+ * Method that combines the fingerprint evidence across all the read groups for the same sample
+ * and then produces a matrix of LOD scores for comparing every sample with every other sample.
+ */
+ private void crossCheckSamples(final List<Fingerprint> fingerprints, final PrintStream out) {
+ final SortedMap<String,Fingerprint> sampleFps = FingerprintChecker.mergeFingerprintsBySample(fingerprints);
+ final SortedSet<String> samples = (SortedSet<String>) sampleFps.keySet();
+
+ // Print header row
+ out.print("\t");
+ for (final String sample : samples) { out.print(sample); out.print("\t"); }
+ out.println();
+
+ // Print results rows
+ for (final String sample : samples) {
+ out.print(sample);
+ final Fingerprint fp = sampleFps.get(sample);
+
+ for (final String otherSample : samples) {
+ final MatchResults results = FingerprintChecker.calculateMatchResults(fp, sampleFps.get(otherSample), GENOTYPING_ERROR_RATE, LOSS_OF_HET_RATE);
+ out.print("\t");
+ out.print(formatUtil.format(results.getLOD()));
+ }
+
+ out.println();
+ }
+ }
+
+ /**
+ * Method that combines the fingerprint evidence across all the read groups for the same library
+ * and then produces a matrix of LOD scores for comparing every library with every other library.
+ */
+ private void crossCheckLibraries(final Map<SAMReadGroupRecord,Fingerprint> fingerprints, final PrintStream out) {
+ final List<Fingerprint> fixedFps = new ArrayList<>();
+ for (final SAMReadGroupRecord rg : fingerprints.keySet()) {
+ final Fingerprint old = fingerprints.get(rg);
+ final String name = rg.getSample() + "::" + rg.getLibrary();
+ final Fingerprint newFp = new Fingerprint(name, old.getSource(), old.getInfo());
+ newFp.putAll(old);
+
+ fixedFps.add(newFp);
+ }
+
+ crossCheckSamples(fixedFps, out);
+ }
+
+ /**
+ * Method that pairwise checks every pair of read groups and reports a LOD score for the two read groups
+ * coming from the same sample.
+ */
+ private int crossCheckReadGroups(final Map<SAMReadGroupRecord,Fingerprint> fingerprints, final PrintStream out) {
+ int mismatches = 0;
+ int unexpectedMatches = 0;
+
+ final List<SAMReadGroupRecord> readGroupRecords = new ArrayList<>(fingerprints.keySet());
+ final List<String> output = new ArrayList<>();
+
+ for (int i = 0; i < readGroupRecords.size(); i++) {
+ final SAMReadGroupRecord lhsRg = readGroupRecords.get(i);
+ for (int j= i+1; j < readGroupRecords.size(); j++) {
+ final SAMReadGroupRecord rhsRg = readGroupRecords.get(j);
+ final boolean expectedToMatch = EXPECT_ALL_READ_GROUPS_TO_MATCH || lhsRg.getSample().equals(rhsRg.getSample());
+
+ final MatchResults results = FingerprintChecker.calculateMatchResults(fingerprints.get(lhsRg), fingerprints.get(rhsRg), GENOTYPING_ERROR_RATE, LOSS_OF_HET_RATE);
+ if (expectedToMatch) {
+ if (results.getLOD() < LOD_THRESHOLD) {
+ mismatches++;
+ output.add(getMatchDetails(UNEXPECTED_MISMATCH, results, lhsRg, rhsRg));
+ } else {
+ if (!OUTPUT_ERRORS_ONLY) {
+ output.add(getMatchDetails(EXPECTED_MATCH, results, lhsRg, rhsRg));
+ }
+ }
+ } else {
+ if (results.getLOD() > -LOD_THRESHOLD) {
+ unexpectedMatches++;
+ output.add(getMatchDetails(UNEXPECTED_MATCH, results, lhsRg, rhsRg));
+ } else {
+ if (!OUTPUT_ERRORS_ONLY) {
+ output.add(getMatchDetails(EXPECTED_MISMATCH, results, lhsRg, rhsRg));
+ }
+ }
+ }
+ }
+ }
+
+ if (!output.isEmpty()) {
+ out.println("RESULT\tLOD_SCORE\tLOD_SCORE_TUMOR_NORMAL\tLOD_SCORE_NORMAL_TUMOR\tLEFT_RUN_BARCODE\tLEFT_LANE\tLEFT_MOLECULAR_BARCODE_SEQUENCE\tLEFT_LIBRARY\tLEFT_SAMPLE\t" +
+ "RIGHT_RUN_BARCODE\tRIGHT_LANE\tRIGHT_MOLECULAR_BARCODE_SEQUENCE\tRIGHT_LIBRARY\tRIGHT_SAMPLE");
+ out.println(String.join("\n", output));
+ }
+
+ if (mismatches + unexpectedMatches > 0) {
+ log.info("WARNING: At least two read groups did not relate as expected.");
+ return EXIT_CODE_WHEN_MISMATCH;
+ }
+ else {
+ log.info("All read groups related as expected.");
+ return 0;
+ }
+ }
+
+ /**
+ * Generates tab delimited string containing details about a possible match between fingerprints on two different SAMReadGroupRecords
+ * @param matchResult String describing the match type.
+ * @param results MatchResults object
+ * @param left left hand side SAMReadGroupRecord
+ * @param right right hand side SAMReadGroupRecord
+ * @return tab delimited string containing details about a possible match
+ */
+ private String getMatchDetails(final String matchResult, final MatchResults results, final SAMReadGroupRecord left, final SAMReadGroupRecord right) {
+ final List<String> elements = new ArrayList<>(4);
+ elements.add(matchResult);
+ elements.add(formatUtil.format(results.getLOD()));
+ elements.add(formatUtil.format(results.getLodTN()));
+ elements.add(formatUtil.format(results.getLodNT()));
+ elements.add(getReadGroupDetails(left));
+ elements.add(getReadGroupDetails(right));
+ return String.join("\t", elements);
+ }
+
+ /**
+ * Generates tab delimited string containing details about the passed SAMReadGroupRecord
+ * @param readGroupRecord record
+ * @return tab delimited string containing details about the SAMReadGroupRecord
+ */
+ private String getReadGroupDetails(final SAMReadGroupRecord readGroupRecord) {
+ final List<String> elements = new ArrayList<>(5);
+
+ final String tmp[] = readGroupRecord.getPlatformUnit().split("\\."); // Expect to look like: D047KACXX110901.1.ACCAACTG
+ String runBarcode = "?";
+ String lane = "?";
+ String molBarcode = "?";
+ if ((tmp.length == 3) || (tmp.length == 2)) {
+ runBarcode = tmp[0];
+ lane = tmp[1];
+ molBarcode = (tmp.length == 3) ? tmp[2] : ""; // In older BAMS there may be no molecular barcode sequence
+ } else {
+ log.error("Unexpected format " + readGroupRecord.getPlatformUnit() + " for PU attribute");
+ }
+ elements.add(runBarcode);
+ elements.add(lane);
+ elements.add(molBarcode);
+ elements.add(readGroupRecord.getLibrary());
+ elements.add(readGroupRecord.getSample());
+ return String.join("\t", elements);
+ }
+}
diff --git a/src/java/picard/fingerprint/DiploidGenotype.java b/src/java/picard/fingerprint/DiploidGenotype.java
new file mode 100644
index 0000000..78dc73b
--- /dev/null
+++ b/src/java/picard/fingerprint/DiploidGenotype.java
@@ -0,0 +1,109 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package picard.fingerprint;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import picard.PicardException;
+
+import htsjdk.samtools.util.StringUtil;
+
+/**
+ * A genotype produced by one of the concrete implementations of AbstractAlleleCaller.
+ * DO NOT ADD TO OR REORDER THIS ENUM AS THAT WOULD BREAK THE GELI FILE FORMAT.
+ */
+public enum DiploidGenotype {
+ AA('A','A'),
+ AC('A','C'),
+ AG('A','G'),
+ AT('A','T'),
+ CC('C','C'),
+ CG('C','G'),
+ CT('C','T'),
+ GG('G','G'),
+ GT('G','T'),
+ TT('T','T');
+
+ private static final Map<Integer, DiploidGenotype> genotypes = new HashMap<Integer, DiploidGenotype>();
+
+ static {
+ for (final DiploidGenotype genotype : values()) {
+ // this relies on the fact that the integer sum of allele1 and allele2 is unique
+ if (genotypes.put(genotype.allele1 + genotype.allele2, genotype) != null) {
+ // this check is just for safety, this should never happen
+ throw new PicardException("sum of allele values are not unique!!!");
+ }
+ }
+ }
+
+ /** Converts a pair of bases into a DiploidGenotype regardless of base order or case */
+ public static DiploidGenotype fromBases(final byte[] bases) {
+ if (bases.length != 2) {
+ throw new IllegalArgumentException("bases must contain 2 and only 2 bases, it actually contained " + bases.length);
+ }
+ return fromBases(bases[0], bases[1]);
+ }
+
+ /** Converts a pair of bases into a DiploidGenotype regardless of base order or case */
+ public static DiploidGenotype fromBases(final byte base1, final byte base2) {
+ final byte first = StringUtil.toUpperCase(base1);
+ final byte second = StringUtil.toUpperCase(base2);
+ final DiploidGenotype genotype = genotypes.get(first + second);
+ if (genotype == null) {
+ throw new IllegalArgumentException("Unknown genotype string [" +
+ StringUtil.bytesToString(new byte[] {base1, base2}) +
+ "], any pair of ACTG case insensitive is acceptable");
+ }
+ return genotype;
+ }
+
+ /**
+ * @return true if this is a valid base, i.e. one of [ACGTacgt]
+ */
+ public static boolean isValidBase(final byte base) {
+ switch(StringUtil.toUpperCase(base)) {
+ case 'A':
+ case 'C':
+ case 'G':
+ case 'T':
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ private final byte allele1;
+ private final byte allele2;
+
+ private DiploidGenotype(final char allele1, final char allele2) {
+ this.allele1 = (byte)(allele1 & 0xff);
+ this.allele2 = (byte)(allele2 & 0xff);
+ }
+
+ public byte getAllele1() { return allele1; }
+ public byte getAllele2() { return allele2; }
+ public boolean isHeterozygous() { return this.allele1 != this.allele2; }
+ public boolean isHomomozygous() { return this.allele1 == this.allele2; }
+}
diff --git a/src/java/picard/illumina/parser/ReadType.java b/src/java/picard/fingerprint/DiploidHaplotype.java
similarity index 63%
copy from src/java/picard/illumina/parser/ReadType.java
copy to src/java/picard/fingerprint/DiploidHaplotype.java
index bd69cd5..358c18b 100644
--- a/src/java/picard/illumina/parser/ReadType.java
+++ b/src/java/picard/fingerprint/DiploidHaplotype.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2011 The Broad Institute
+ * Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,18 +22,14 @@
* THE SOFTWARE.
*/
-package picard.illumina.parser;
+package picard.fingerprint;
/**
-* A read type describes a stretch of cycles in an ReadStructure
-* (e.g. Assume we have a paired end/barcoded run with the 76 template cycles followed by 8 barcode cycles followed by
-* another 76 template reads, the run would be represented by the ReadStructure 76T8B76T)
-* Note: Currently SKIP is unused by IlluminaBasecallsToSam, ExtractIlluminaBarcodes, and IlluminaDataProvider
-**/
-public enum ReadType {
- T, B, S;
-
- public static final ReadType Template = T;
- public static final ReadType Barcode = B;
- public static final ReadType Skip = S;
+ * Simple enum to represent the three possible combinations of major/major, major/minor
+ * and minor/minor haplotypes for a diploid individual.
+ *
+ * @author Tim Fennell
+ */
+public enum DiploidHaplotype {
+ AA, Aa, aa
}
diff --git a/src/java/picard/fingerprint/Fingerprint.java b/src/java/picard/fingerprint/Fingerprint.java
new file mode 100644
index 0000000..00c53e0
--- /dev/null
+++ b/src/java/picard/fingerprint/Fingerprint.java
@@ -0,0 +1,100 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import java.io.File;
+import java.util.*;
+
+/**
+ * Small class to represent a genetic fingerprint as a set of HaplotypeProbabilities
+ * objects that give the relative probabilities of each of the possible haplotypes
+ * at a locus.
+ *
+ * @author Tim Fennell
+ */
+public class Fingerprint extends TreeMap<HaplotypeBlock, HaplotypeProbabilities> {
+ private final String sample;
+ private final File source;
+ private final String info;
+
+ public Fingerprint(final String sample, final File source, final String info) {
+ this.sample = sample;
+ this.source = source;
+ this.info = info;
+ }
+
+ public String getSample() { return sample; }
+
+ public File getSource() { return source; }
+
+ public String getInfo() { return info; }
+
+ public String getPrintableId() {
+ return getSample() + "@" + (source == null ? "" : source.getName()) + (info == null ? "" : (":" + info));
+ }
+
+ public void add(final HaplotypeProbabilities h) {
+ put(h.getHaplotype(), h);
+ }
+
+ /**
+ * Merges the likelihoods from the supplied Fingerprint into the likelihoods for this fingerprint.
+ */
+ public void merge(final Fingerprint other) {
+ final Set<HaplotypeBlock> haps = new HashSet<>();
+ haps.addAll(keySet());
+ haps.addAll(other.keySet());
+
+ for (final HaplotypeBlock haplotype : haps) {
+ HaplotypeProbabilities probabilities = get(haplotype);
+ final HaplotypeProbabilities otherProbabilities = other.get(haplotype);
+ if (probabilities == null) {
+ probabilities = otherProbabilities;
+ put(haplotype, probabilities);
+ } else if (otherProbabilities != null) {
+ probabilities.merge(otherProbabilities);
+ }
+ }
+ }
+
+ /**
+ * Attempts to filter out haplotypes that may have suspect genotyping by removing haplotypes that reach
+ * a minimum confidence score yet have a significant fraction of observations from a third or fourth allele.
+ */
+ public void filterSuspectSites() {
+ final Iterator<Map.Entry<HaplotypeBlock, HaplotypeProbabilities>> iterator = entrySet().iterator();
+ while (iterator.hasNext()) {
+ final Map.Entry<HaplotypeBlock, HaplotypeProbabilities> entry = iterator.next();
+ final HaplotypeProbabilities p = entry.getValue();
+ if (p instanceof HaplotypeProbabilitiesFromSequence) {
+ final HaplotypeProbabilitiesFromSequence probs = (HaplotypeProbabilitiesFromSequence) p;
+
+ if (probs.getLodMostProbableGenotype() >= 3 && probs.getFractionUnexpectedAlleleObs() > 0.1) {
+ iterator.remove();
+ }
+ }
+ }
+ }
+}
diff --git a/src/java/picard/fingerprint/FingerprintChecker.java b/src/java/picard/fingerprint/FingerprintChecker.java
new file mode 100644
index 0000000..9b5edff
--- /dev/null
+++ b/src/java/picard/fingerprint/FingerprintChecker.java
@@ -0,0 +1,635 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.filter.NotPrimaryAlignmentFilter;
+import htsjdk.samtools.filter.SamRecordFilter;
+import htsjdk.samtools.util.Interval;
+import htsjdk.samtools.util.IntervalList;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.SamLocusIterator;
+import htsjdk.samtools.SAMFileReader;
+import htsjdk.samtools.SAMReadGroupRecord;
+import htsjdk.samtools.util.SequenceUtil;
+import htsjdk.samtools.util.StringUtil;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.GenotypeLikelihoods;
+import htsjdk.variant.variantcontext.VariantContext;
+import picard.PicardException;
+
+import java.io.File;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Major class that coordinates the activities involved in comparing genetic fingerprint
+ * data whether the source is from a genotyping platform or derived from sequence data.
+ *
+ * @author Tim Fennell
+ */
+public class FingerprintChecker {
+ public static final double DEFAULT_GENOTYPING_ERROR_RATE = 0.01;
+ public static final int DEFAULT_MINIMUM_MAPPING_QUALITY = 10;
+ public static final int DEFAULT_MINIMUM_BASE_QUALITY = 20;
+ public static final int DEFAULT_MAXIMAL_PL_DIFFERENCE = 30;
+
+ private final HaplotypeMap haplotypes;
+ private int minimumBaseQuality = DEFAULT_MINIMUM_BASE_QUALITY;
+ private int minimumMappingQuality = DEFAULT_MINIMUM_MAPPING_QUALITY;
+ private double genotypingErrorRate = DEFAULT_GENOTYPING_ERROR_RATE;
+ private int maximalPLDifference = DEFAULT_MAXIMAL_PL_DIFFERENCE;
+
+ private boolean allowDuplicateReads = false;
+ private double pLossofHet = 0;
+
+ private final Log log = Log.getInstance(FingerprintChecker.class);
+
+ /**
+ * Creates a fingerprint checker that will work with the set of haplotypes stored in
+ * the supplied file.
+ */
+ public FingerprintChecker(final File haplotypeData) {
+ this.haplotypes = new HaplotypeMap(haplotypeData);
+ }
+
+ /** Creates a fingerprint checker that will work with the set of haplotyped provided. */
+ public FingerprintChecker(final HaplotypeMap haplotypes) {
+ this.haplotypes = haplotypes;
+ }
+
+ /** Sets the minimum base quality for bases used when computing a fingerprint from sequence data. */
+ public void setMinimumBaseQuality(final int minimumBaseQuality) {
+ this.minimumBaseQuality = minimumBaseQuality;
+ }
+
+ /** Sets the minimum mapping quality for reads used when computing fingerprints from sequence data. */
+ public void setMinimumMappingQuality(final int minimumMappingQuality) {
+ this.minimumMappingQuality = minimumMappingQuality;
+ }
+
+ /** Sets the assumed genotyping error rate used when accurate error rates are not available. */
+ public void setGenotypingErrorRate(final double genotypingErrorRate) {
+ this.genotypingErrorRate = genotypingErrorRate;
+ }
+ /** Sets the maximal difference in PL scores considered when reading PLs from a VCF. */
+ public void setmaximalPLDifference(final int maximalPLDifference) {
+ this.maximalPLDifference = maximalPLDifference;
+ }
+
+ public SAMFileHeader getHeader(){
+ return haplotypes.getHeader();
+ }
+ /**
+ * Sets whether duplicate reads should be allowed when calling genotypes from SAM files. This is
+ * useful when comparing read groups within a SAM file and individual read groups show artifactually
+ * high duplication (e.g. a single-ended read group mixed in with paired-end read groups).
+ * @param allowDuplicateReads
+ */
+ public void setAllowDuplicateReads(final boolean allowDuplicateReads) {
+ this.allowDuplicateReads = allowDuplicateReads;
+ }
+
+ //sets the value of the probability that a genotype underwent a Loss of Hetrozygosity (for Tumors)
+ public void setpLossofHet(final double pLossofHet) {
+ this.pLossofHet = pLossofHet;
+ }
+
+ /**
+ * Loads genotypes from the supplied file into one or more Fingerprint objects and returns them in a
+ * Map of Sample->Fingerprint.
+ *
+ * @param fingerprintFile - VCF file containing genotypes for one or more samples
+ * @param specificSample - null to load genotypes for all samples contained in the file or the name
+ * of an individual sample to load (and exclude all others).
+ * @return a Map of Sample name to Fingerprint
+ */
+ public Map<String,Fingerprint> loadFingerprints(final File fingerprintFile, final String specificSample) {
+ final IntervalList loci = this.haplotypes.getIntervalList();
+ final GenotypeReader reader = new GenotypeReader();
+ final GenotypeReader.VariantIterator iterator = reader.read(fingerprintFile, loci);
+
+ SequenceUtil.assertSequenceDictionariesEqual(this.haplotypes.getHeader().getSequenceDictionary(),
+ iterator.getSequenceDictionary());
+
+ final Map<String, Fingerprint> fingerprints = new HashMap<String, Fingerprint>();
+ Set<String> samples = null;
+ if (specificSample != null) {
+ samples = new HashSet<String>();
+ samples.add(specificSample);
+ }
+
+ while (iterator.hasNext()) {
+ // Setup the sample names set if needed
+ final VariantContext ctx = iterator.next();
+ if (samples == null) samples = ctx.getSampleNames();
+
+ if (isUsableSnp(ctx)) {
+ final HaplotypeBlock h = this.haplotypes.getHaplotype(ctx.getChr(), ctx.getStart());
+ final Snp snp = this.haplotypes.getSnp(ctx.getChr(), ctx.getStart());
+ if (h == null) continue;
+
+ // Check the alleles from the file against the expected set of genotypes
+ {
+ boolean allelesOk = true;
+ for (final Allele allele : ctx.getAlleles()) {
+ final byte[] bases = allele.getBases();
+ if (bases.length > 1 || (bases[0] != snp.getAllele1() && bases[0] != snp.getAllele2())) {
+ allelesOk = false;
+ }
+ }
+ if (!allelesOk) {
+ log.warn("Problem with genotype file '" + fingerprintFile.getName() + "': Alleles "
+ + ctx.getAlleles() + " do not match to alleles for SNP " + snp
+ + " with alleles " + snp.getAlleleString());
+ continue ;
+ }
+ }
+
+ for (final String sample : samples) {
+ Fingerprint fp = fingerprints.get(sample);
+ // Find or construct the fingerprint for this sample
+ if (fp == null) {
+ fp = new Fingerprint(sample, fingerprintFile, null);
+ fingerprints.put(sample, fp);
+ }
+
+ //PLs are preferred over GTs
+ //TODO: this code is replicated in various places (ReconstructTriosFromVCF for example). Needs refactoring.
+ //TODO: add a way to force using GTs when both are available (why?)
+
+ // Get the genotype for the sample and check that it is useful
+ final Genotype genotype = ctx.getGenotype(sample);
+ if (genotype == null) {
+ throw new IllegalArgumentException("Cannot find sample " + sample + " in provided file: " + fingerprintFile);
+ }
+ if (genotype.hasPL()) {
+
+ final HaplotypeProbabilitiesFromGenotypeLikelihoods hFp = new HaplotypeProbabilitiesFromGenotypeLikelihoods(h);
+ //do not modify the PL array directly fragile!!!!!
+ final int[] pls = genotype.getPL();
+ final int[] newPLs = new int[pls.length];
+ for (int i = 0; i < pls.length; i++) {
+ newPLs[i] = Math.min(maximalPLDifference, pls[i]);
+ }
+ hFp.addToLogLikelihoods(snp, ctx.getAlleles(), GenotypeLikelihoods.fromPLs(newPLs).getAsVector());
+ fp.add(hFp);
+ } else {
+
+ if (genotype.isNoCall()) continue;
+
+ // TODO: when multiple genotypes are available for a Haplotype check that they
+ // TODO: agree. Not urgent since DownloadGenotypes already does this.
+ if (fp.containsKey(h)) continue;
+
+ final boolean hom = genotype.isHom();
+ final byte allele = StringUtil.toUpperCase(genotype.getAllele(0).getBases()[0]);
+
+ final double halfError = this.genotypingErrorRate / 2;
+ final double accuracy = 1 - this.genotypingErrorRate;
+ final double[] probs = new double[]{
+ (hom && allele == snp.getAllele1()) ? accuracy : halfError,
+ (!hom) ? accuracy : halfError,
+ (hom && allele == snp.getAllele2()) ? accuracy : halfError
+ };
+
+ fp.add(new HaplotypeProbabilitiesFromGenotype(snp, h, probs[0], probs[1], probs[2]));
+ }
+ }
+ }
+ }
+
+ return fingerprints;
+ }
+
+ /**
+ * Quick method to check and see if the variant context represents a usable SNP variant. Unfortunately
+ * ctx.isSnp doesn't always work if the genotype(s) are all monomorphic and the alternate allele isn't
+ * listed.
+ */
+ public static boolean isUsableSnp(final VariantContext ctx) {
+ if (ctx.isFiltered()) return false;
+ if (ctx.isIndel()) return false;
+ if (ctx.isMixed()) return false;
+
+ // Also check that all alleles are length 1
+ for (final Allele a : ctx.getAlleles()) {
+ if (a.length() != 1) return false;
+ }
+
+ return true;
+ }
+
+
+ /**
+ * Takes a set of fingerprints and returns an IntervalList containing all the loci that
+ * can be productively examined in sequencing data to compare to one or more of the
+ * fingerprints.
+ */
+ public IntervalList getLociToGenotype(final Collection<Fingerprint> fingerprints) {
+ final IntervalList intervals = new IntervalList(this.haplotypes.getHeader());
+
+ for (final Fingerprint fp : fingerprints) {
+ for (final HaplotypeProbabilities genotype : fp.values()) {
+ final HaplotypeBlock h = genotype.getHaplotype();
+ for (final Snp snp : h.getSnps()) {
+ intervals.add(new Interval(snp.getChrom(), snp.getPos(), snp.getPos(), false, snp.getName()));
+ }
+ }
+ }
+
+ intervals.sort();
+ intervals.unique();
+ return intervals;
+ }
+
+ /**
+ * Generates a Fingerprint per read group in the supplied SAM file using the loci provided in
+ * the interval list.
+ */
+ public Map<SAMReadGroupRecord, Fingerprint> fingerprintSamFile(final File samFile, final IntervalList loci) {
+ final SAMFileReader in = new SAMFileReader(samFile);
+ in.enableIndexCaching(true);
+ SequenceUtil.assertSequenceDictionariesEqual(this.haplotypes.getHeader().getSequenceDictionary(),
+ in.getFileHeader().getSequenceDictionary());
+
+ final SamLocusIterator iterator = new SamLocusIterator(in, loci, in.hasIndex());
+ iterator.setEmitUncoveredLoci(true);
+ iterator.setMappingQualityScoreCutoff(this.minimumMappingQuality);
+ iterator.setQualityScoreCutoff(this.minimumBaseQuality);
+
+ // In some cases it is useful to allow duplicate reads to be used - the most common is in single-end
+ // sequence data where the duplicate marking may have been overly aggressive, and there is useful
+ // non-redundant data in the reads marked as "duplicates'.
+ if (this.allowDuplicateReads) {
+ final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>(1);
+ filters.add(new NotPrimaryAlignmentFilter());
+ iterator.setSamFilters(filters);
+ }
+
+ final Map<SAMReadGroupRecord, Fingerprint> fingerprintsByReadGroup = new HashMap<SAMReadGroupRecord, Fingerprint>();
+ final List<SAMReadGroupRecord> rgs = in.getFileHeader().getReadGroups();
+
+ for (final SAMReadGroupRecord rg : rgs) {
+ final Fingerprint fingerprint = new Fingerprint(rg.getSample(),
+ samFile,
+ rg.getPlatformUnit() != null ? rg.getPlatformUnit() : rg.getId());
+ fingerprintsByReadGroup.put(rg, fingerprint);
+
+ for (final HaplotypeBlock h : this.haplotypes.getHaplotypes()) {
+ fingerprint.add(new HaplotypeProbabilitiesFromSequence(h));
+ }
+ }
+
+ // Set of read/template names from which we have already sampled a base and a qual. Since we assume
+ // that all evidence for a haplotype is independent we can't sample two or more bases from a single
+ // read or read-pair because they would not be independent!
+ final Set<String> usedReadNames = new HashSet<String>(10000);
+
+ // Now go through the data at each locus and figure stuff out!
+ for (final SamLocusIterator.LocusInfo info : iterator) {
+ // TODO: Filter out the locus if the allele balance doesn't make sense for either a
+ // TODO: 50/50 het or a hom with some errors; in HS data with deep coverage any base
+ // TODO: with major strand bias could cause errors
+
+ // Find the matching Snp and HaplotypeProbs
+ final HaplotypeBlock haplotypeBlock = this.haplotypes.getHaplotype(info.getSequenceName(), info.getPosition());
+ final Snp snp = this.haplotypes.getSnp(info.getSequenceName(), info.getPosition());
+
+ for (final SamLocusIterator.RecordAndOffset rec : info.getRecordAndPositions()) {
+ final SAMReadGroupRecord rg = rec.getRecord().getReadGroup();
+ if (rg == null || !fingerprintsByReadGroup.containsKey(rg)) {
+ final PicardException e = new PicardException("Unknown read group: " + rg);
+ log.error(e);
+ throw e;
+ }
+ else {
+ final String readName = rec.getRecord().getReadName();
+ if (!usedReadNames.contains(readName)) {
+ final HaplotypeProbabilitiesFromSequence probs = (HaplotypeProbabilitiesFromSequence) fingerprintsByReadGroup.get(rg).get(haplotypeBlock);
+ final byte base = StringUtil.toUpperCase(rec.getReadBase());
+ final byte qual = rec.getBaseQuality();
+
+ probs.addToProbs(snp, base, qual);
+ usedReadNames.add(readName);
+ }
+ }
+ }
+ }
+
+ return fingerprintsByReadGroup;
+ }
+
+ /**
+ * Generates a per-sample Fingerprint for the contaminant in the supplied SAM file.
+ * Data is aggregated by sample, not read-group.
+ */
+ public Map<String, Fingerprint> identifyContaminant(final File samFile, final double contamination, final int locusMaxReads) {
+ final SamReader in = SamReaderFactory.makeDefault().enable(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES).open(samFile);
+ SequenceUtil.assertSequenceDictionariesEqual(this.haplotypes.getHeader().getSequenceDictionary(),
+ in.getFileHeader().getSequenceDictionary());
+
+ final SamLocusIterator iterator = new SamLocusIterator(in, haplotypes.getIntervalList(), in.hasIndex());
+ iterator.setEmitUncoveredLoci(true);
+ iterator.setMappingQualityScoreCutoff(this.minimumMappingQuality);
+ iterator.setQualityScoreCutoff(this.minimumBaseQuality);
+
+ // In some cases it is useful to allow duplicate reads to be used - the most common is in single-end
+ // sequence data where the duplicate marking may have been overly aggressive, and there is useful
+ // non-redundant data in the reads marked as "duplicates'.
+ if (this.allowDuplicateReads) {
+ final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>(1);
+ filters.add(new NotPrimaryAlignmentFilter());
+ iterator.setSamFilters(filters);
+ }
+
+ final Map<String, Fingerprint> fingerprintsBySample = new HashMap<>();
+ for (final SAMReadGroupRecord rg : in.getFileHeader().getReadGroups()) {
+ if (!fingerprintsBySample.containsKey(rg.getSample())) {
+ final Fingerprint fingerprint = new Fingerprint(rg.getSample(),
+ samFile,
+ rg.getSample());
+
+ for (final HaplotypeBlock h : this.haplotypes.getHaplotypes()) {
+ fingerprint.add(new HaplotypeProbabilitiesFromContaminatorSequence(h, contamination));
+ }
+ fingerprintsBySample.put(rg.getSample(), fingerprint);
+ }
+ }
+
+ // Set of read/template names from which we have already sampled a base and a qual. Since we assume
+ // that all evidence for a haplotype is independent we can't sample two or more bases from a single
+ // read or read-pair because they would not be independent!
+ final Set<String> usedReadNames = new HashSet<>(10000);
+
+ // Now go through the data at each locus and figure stuff out!
+ for (final SamLocusIterator.LocusInfo info : iterator) {
+
+ // Find the matching Snp and HaplotypeProbs
+ final HaplotypeBlock haplotypeBlock = this.haplotypes.getHaplotype(info.getSequenceName(), info.getPosition());
+ final Snp snp = this.haplotypes.getSnp(info.getSequenceName(), info.getPosition());
+
+ // randomly select locusMaxReads elements from the list
+ final List<SamLocusIterator.RecordAndOffset> recordAndOffsetList = randomSublist(info.getRecordAndPositions(), locusMaxReads);
+
+ for (final SamLocusIterator.RecordAndOffset rec : recordAndOffsetList) {
+ final SAMReadGroupRecord rg = rec.getRecord().getReadGroup();
+ if (rg == null || !fingerprintsBySample.containsKey(rg.getSample())) {
+ final PicardException e = new PicardException("Unknown sample: " + (rg != null ? rg.getSample() : "(null readgroup)"));
+ log.error(e);
+ throw e;
+ } else {
+ final String readName = rec.getRecord().getReadName();
+ if (!usedReadNames.contains(readName)) {
+ final HaplotypeProbabilitiesFromContaminatorSequence probs =
+ (HaplotypeProbabilitiesFromContaminatorSequence) fingerprintsBySample.get(rg.getSample()).get(haplotypeBlock);
+ final byte base = StringUtil.toUpperCase(rec.getReadBase());
+ final byte qual = rec.getBaseQuality();
+
+ probs.addToProbs(snp, base, qual);
+ usedReadNames.add(readName);
+ }
+ }
+ }
+ }
+
+ return fingerprintsBySample;
+ }
+
+ /**
+ * A small utility function to choose n random elements (un-shuffled) from a list
+ *
+ * @param list A list of elements
+ * @param n a number of elements requested from list
+ * @return a list of n randomly chosen (but in the original order) elements from list.
+ * If the list has less than n elements it is returned in its entirety.
+ */
+ protected static <T> List<T> randomSublist(final List<T> list, final int n) {
+ int availableElements = list.size();
+ if (availableElements <= n) return list;
+
+ int stillNeeded = n;
+ final Random rg = new Random();
+ final List<T> shortList = new ArrayList<>(n);
+ for (final T aList : list) {
+ if (rg.nextDouble() < stillNeeded / (double) availableElements) {
+ shortList.add(aList);
+ stillNeeded--;
+ }
+ if (stillNeeded == 0 ) break; // fast out if do not need more elements
+ availableElements--;
+ }
+
+ return shortList;
+ }
+
+ /**
+ * Fingerprints one or more SAM/BAM files at all available loci within the haplotype map, using multiple threads
+ * to speed up the processing.
+ */
+ public Map<SAMReadGroupRecord, Fingerprint> fingerprintSamFiles(final Collection<File> files, final int threads,
+ final int waitTime, final TimeUnit waitTimeUnit) {
+
+ // Generate fingerprints from each BAM file first
+ final AtomicInteger filesRead = new AtomicInteger(0);
+ final ExecutorService executor = Executors.newFixedThreadPool(threads);
+ final IntervalList intervals = this.haplotypes.getIntervalList();
+ final Map<SAMReadGroupRecord, Fingerprint> retval = new ConcurrentHashMap<SAMReadGroupRecord, Fingerprint>();
+
+ for (final File f : files) {
+ executor.submit(new Runnable() {
+ @Override public void run() {
+ retval.putAll(fingerprintSamFile(f, intervals));
+
+ if (filesRead.incrementAndGet() % 100 == 0) {
+ log.info("Processed " + filesRead.get() + " out of " + files.size());
+ }
+ }
+ });
+ }
+
+ executor.shutdown();
+ try { executor.awaitTermination(waitTime, waitTimeUnit); }
+ catch (InterruptedException ie) { log.warn(ie, "Interrupted while waiting for executor to terminate."); }
+
+ return retval;
+ }
+
+ /**
+ * Takes a collection of fingerprints and, assuming that they are independent, merged the fingerprints
+ * by samples and totals up the probabilities.
+ */
+ static public SortedMap<String, Fingerprint> mergeFingerprintsBySample(final Collection<Fingerprint> inputs) {
+ final SortedMap<String, Fingerprint> sampleFps = new TreeMap<String, Fingerprint>();
+ for (final Fingerprint fp : inputs) {
+ Fingerprint sampleFp = sampleFps.get(fp.getSample());
+ if (sampleFp == null) {
+ sampleFp = new Fingerprint(fp.getSample(), null, fp.getSample());
+ sampleFps.put(fp.getSample(), sampleFp);
+ }
+
+ sampleFp.merge(fp);
+ }
+
+ return sampleFps;
+ }
+
+
+ /**
+ * Top level method to take a set of one or more SAM files and one or more Genotype files and compare
+ * each read group in each SAM file to each set of fingerprint genotypes.
+ *
+ * @param samFiles the list of SAM files to fingerprint
+ * @param genotypeFiles the list of genotype files from which to pull fingerprint genotypes
+ * @param specificSample an optional single sample who's genotypes to load from the supplied files
+ * @param ignoreReadGroups aggregate data into one fingerprint per file, instead of splitting by RG
+ */
+ public List<FingerprintResults> checkFingerprints(final List<File> samFiles,
+ final List<File> genotypeFiles,
+ final String specificSample,
+ final boolean ignoreReadGroups) {
+ // Load the fingerprint genotypes
+ final List<Fingerprint> expectedFingerprints = new LinkedList<Fingerprint>();
+ for (final File f : genotypeFiles) {
+ expectedFingerprints.addAll(loadFingerprints(f, specificSample).values());
+ }
+
+ if (expectedFingerprints.isEmpty()) {
+ throw new IllegalStateException("Could not find any fingerprints in: " + genotypeFiles);
+ }
+
+ final List<FingerprintResults> resultsList = new ArrayList<FingerprintResults>();
+ final IntervalList intervals = getLociToGenotype(expectedFingerprints);
+
+ // Fingerprint the SAM files and calculate the results
+ for (final File f : samFiles) {
+ final Map<SAMReadGroupRecord, Fingerprint> fingerprintsByReadGroup = fingerprintSamFile(f, intervals);
+
+ if (ignoreReadGroups) {
+ final Fingerprint combinedFp = new Fingerprint(specificSample, f, null);
+ for (final Fingerprint observedFp : fingerprintsByReadGroup.values()) combinedFp.merge(observedFp);
+
+ final FingerprintResults results = new FingerprintResults(f, specificSample);
+ for (final Fingerprint expectedFp : expectedFingerprints) {
+ final MatchResults result = calculateMatchResults(combinedFp, expectedFp, 0, pLossofHet);
+ results.addResults(result);
+ }
+
+ resultsList.add(results);
+
+ } else {
+ for (final SAMReadGroupRecord rg : fingerprintsByReadGroup.keySet()) {
+ final FingerprintResults results = new FingerprintResults(f, rg.getPlatformUnit());
+ for (final Fingerprint expectedFp : expectedFingerprints) {
+ final MatchResults result = calculateMatchResults(fingerprintsByReadGroup.get(rg), expectedFp, 0, pLossofHet);
+ results.addResults(result);
+ }
+
+ resultsList.add(results);
+ }
+ }
+ }
+
+ return resultsList;
+ }
+
+ /**
+ * Compares two fingerprints and calculates a MatchResults object which contains detailed
+ * information about the match (or mismatch) between fingerprints including the LOD score
+ * for whether or not the two are likely from the same sample.
+ *
+ * If comparing sequencing data to genotype data then the sequencing data should be passed
+ * as the observedFp and the genotype data as the expectedFp in order to get the best output.
+ *
+ * In the cases where the most likely genotypes from the two fingerprints do not match the
+ * lExpectedSample is Max(actualpExpectedSample, minPExpected).
+ */
+ public static MatchResults calculateMatchResults(final Fingerprint observedFp, final Fingerprint expectedFp, final double minPExpected, final double pLoH) {
+ final List<LocusResult> locusResults = new ArrayList<LocusResult>();
+
+ double llThisSample = 0;
+ double llOtherSample = 0;
+
+ double lodExpectedSampleTumorNormal = 0;
+ double lodExpectedSampleNormalTumor = 0;
+
+ final double lminPExpected = Math.log10(minPExpected);
+
+ for (final HaplotypeProbabilities probs2 : expectedFp.values()) {
+ final HaplotypeBlock haplotypeBlock = probs2.getHaplotype();
+ final HaplotypeProbabilities probs1 = observedFp.get(haplotypeBlock);
+ if (probs1 == null) continue;
+
+ final HaplotypeProbabilityOfNormalGivenTumor normalizedProbs1 = new HaplotypeProbabilityOfNormalGivenTumor(probs1, pLoH);
+ final HaplotypeProbabilityOfNormalGivenTumor normalizedProbs2 = new HaplotypeProbabilityOfNormalGivenTumor(probs2, pLoH);
+
+ // If one is from genotype data we'd like to report the output relative
+ // to the genotyped SNP instead of against a random SNP from the haplotype
+ final Snp snp = probs2.getRepresentativeSnp();
+ final DiploidGenotype externalGenotype = probs2.getMostLikelyGenotype(snp);
+ final LocusResult lr = new LocusResult(snp,
+ externalGenotype,
+ probs1.getMostLikelyGenotype(snp),
+ probs1.getObsAllele1(),
+ probs1.getObsAllele2(),
+ probs1.getLodMostProbableGenotype(),
+ probs1.shiftedLogEvidenceProbabilityGivenOtherEvidence(probs2),
+ probs1.shiftedLogEvidenceProbability(),
+ probs2.shiftedLogEvidenceProbabilityGivenOtherEvidence(normalizedProbs1)-probs2.shiftedLogEvidenceProbability(),
+ probs1.shiftedLogEvidenceProbabilityGivenOtherEvidence(normalizedProbs2)-probs1.shiftedLogEvidenceProbability());
+ locusResults.add(lr);
+
+ if (probs1.hasEvidence() && probs2.hasEvidence()) {
+ final double lRandom = lr.lRandomSample();
+ //TODO: what's the mathematics behind the lminPexpected?
+ final double lExpected = Math.max(lminPExpected, lr.lExpectedSample());
+
+ llThisSample += lExpected;
+ llOtherSample += lRandom;
+ lodExpectedSampleTumorNormal += lr.getLodExpectedSampleTumorNormal();
+ lodExpectedSampleNormalTumor += lr.getLodExpectedSampleNormalTumor();
+ }
+ }
+
+ // TODO: prune the set of LocusResults for things that are too close together?
+ return new MatchResults(expectedFp.getSource(), expectedFp.getSample(), llThisSample, llOtherSample, lodExpectedSampleTumorNormal, lodExpectedSampleNormalTumor, locusResults);
+ }
+
+ /**
+ * Compares two fingerprints and calculates a MatchResults object which contains detailed
+ * information about the match (or mismatch) between fingerprints including the LOD score
+ * for whether or not the two are likely from the same sample.
+ *
+ * If comparing sequencing data to genotype data then the sequencing data should be passed
+ * as the observedFp and the genotype data as the expectedFp in order to get the best output.
+ */
+ public static MatchResults calculateMatchResults(final Fingerprint observedFp, final Fingerprint expectedFp) {
+ return calculateMatchResults(observedFp, expectedFp, 0, 0);
+ }
+}
diff --git a/src/java/picard/illumina/parser/ReadType.java b/src/java/picard/fingerprint/FingerprintResults.java
similarity index 54%
copy from src/java/picard/illumina/parser/ReadType.java
copy to src/java/picard/fingerprint/FingerprintResults.java
index bd69cd5..5d6b0bc 100644
--- a/src/java/picard/illumina/parser/ReadType.java
+++ b/src/java/picard/fingerprint/FingerprintResults.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2011 The Broad Institute
+ * Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -22,18 +22,34 @@
* THE SOFTWARE.
*/
-package picard.illumina.parser;
+package picard.fingerprint;
+
+import java.io.File;
+import java.util.SortedSet;
+import java.util.TreeSet;
/**
-* A read type describes a stretch of cycles in an ReadStructure
-* (e.g. Assume we have a paired end/barcoded run with the 76 template cycles followed by 8 barcode cycles followed by
-* another 76 template reads, the run would be represented by the ReadStructure 76T8B76T)
-* Note: Currently SKIP is unused by IlluminaBasecallsToSam, ExtractIlluminaBarcodes, and IlluminaDataProvider
-**/
-public enum ReadType {
- T, B, S;
+ * Class that is used to represent the results of comparing a read group within a SAM file
+ * against one or more set of fingerprint genotypes.
+ *
+ * @author Tim Fennell
+ */
+public class FingerprintResults {
+ private final File samFile;
+ private final String readGroup;
+ private final SortedSet<MatchResults> matchResults = new TreeSet<>();
- public static final ReadType Template = T;
- public static final ReadType Barcode = B;
- public static final ReadType Skip = S;
+ public FingerprintResults(final File samFile, final String readGroup) {
+ this.samFile = samFile;
+ this.readGroup = readGroup;
+ }
+
+ public void addResults(final MatchResults matchResults) {
+ this.matchResults.add(matchResults);
+ }
+
+ public File getSamFile() { return samFile; }
+ public String getReadGroup() { return readGroup; }
+ public SortedSet<MatchResults> getMatchResults() { return matchResults; }
}
+
diff --git a/src/java/picard/fingerprint/GenotypeReader.java b/src/java/picard/fingerprint/GenotypeReader.java
new file mode 100644
index 0000000..fc68728
--- /dev/null
+++ b/src/java/picard/fingerprint/GenotypeReader.java
@@ -0,0 +1,167 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import picard.PicardException;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Interval;
+import htsjdk.samtools.util.IntervalList;
+import htsjdk.samtools.util.OverlapDetector;
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.util.CloseableIterator;
+import htsjdk.tribble.readers.LineIterator;
+import htsjdk.tribble.readers.LineIteratorImpl;
+import htsjdk.tribble.readers.LineReaderUtil;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFCodec;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.util.Collection;
+
+/**
+ * Class that abstracts away the source of genotypes and provides abilities to read them
+ * in from various sources into VariantContext objects
+ *
+ * @deprecated Please use VCFFileReader instead of this class.
+ */
+public class GenotypeReader {
+ /** Small class to encapsulate an iterator over variants, optionally with a sequence dictionary. */
+ public abstract static class VariantIterator implements CloseableIterator<VariantContext> {
+ private final SAMSequenceDictionary dictionary;
+ private final Object header;
+
+ protected VariantIterator(final SAMSequenceDictionary dictionary, final Object header) {
+ this.dictionary = dictionary;
+ this.header = header;
+ }
+
+ public SAMSequenceDictionary getSequenceDictionary() {
+ return this.dictionary;
+ }
+
+ public Object getHeader() {
+ return header;
+ }
+ }
+
+ /**
+ * Reads in a file that contains genotype data and returns an iterator over every entry
+ * in the file as VariantContext objects.
+ *
+ * @deprecated Please use VCFFileReader in Picard-public instead of this class.
+ */
+ public VariantIterator read(final File file) {
+ if (isVcf(file)) return readVcf(file);
+ else throw new PicardException("File doe not appear to be of a supported type: " + file);
+ }
+
+ /**
+ * Reads in the file and returns an iterator over the set of entries in the file
+ * that overlap with the interval supplied.
+ *
+ * @deprecated Please use VCFFileReader in Picard-public instead of this class.
+ */
+ public VariantIterator read(final File file, final IntervalList intervals) {
+ final VariantIterator i = read(file);
+ final OverlapDetector<Interval> detector = new OverlapDetector<Interval>(0,0);
+ detector.addAll(intervals.getIntervals(), intervals.getIntervals());
+
+ // A little iterator that iterates over the full set of genotypes and returns just the ones
+ // the client is interested in.
+ return new VariantIterator(i.getSequenceDictionary(), i.getHeader()) {
+ private VariantContext next = null;
+
+ @Override public boolean hasNext() {
+ if (next == null) {
+ while (i.hasNext()) {
+ final VariantContext ctx = i.next();
+ final Interval ctxInterval = new Interval(ctx.getChr(), ctx.getStart(), ctx.getEnd());
+ final Collection<Interval> hits = detector.getOverlaps(ctxInterval);
+ if (hits != null && !hits.isEmpty()) {
+ next = ctx;
+ break;
+ }
+ }
+ }
+
+ return next != null;
+ }
+
+ /** Returns the next VariantContext object if available. */
+ @Override public VariantContext next() {
+ if (!hasNext()) throw new IllegalStateException("next() called on exhausted iterator.");
+
+ final VariantContext ctx = next;
+ next = null;
+ return ctx;
+ }
+
+ @Override public void remove() { throw new UnsupportedOperationException(); }
+
+ @Override
+ public void close() {
+ i.close();
+ }
+ };
+ }
+
+ /** Tests whether a file is a VCF file or not. */
+ boolean isVcf(final File f) {
+ final String name = f.getName();
+ return (name.endsWith(".vcf") || name.endsWith(".vcf.gz"));
+ }
+
+ /**
+ * Opens a VCF file and returns an iterator over VariantContext objects.
+ *
+ * @deprecated Please use VCFFileReader in Picard-public instead of this class.
+ */
+ VariantIterator readVcf(final File file) {
+ final LineIterator reader = new LineIteratorImpl(LineReaderUtil.fromBufferedStream(new BufferedInputStream(IOUtil.openFileForReading(file))));
+ final VCFCodec codec = new VCFCodec();
+ final Object header;
+ header = codec.readActualHeader(reader);
+
+ return new VariantIterator(null, header) {
+ @Override public boolean hasNext() {
+ return reader.hasNext();
+ }
+
+ @Override public VariantContext next() {
+ return codec.decode(reader.next());
+ }
+
+ @Override public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ @Override
+ public void close() {
+ codec.close(reader);
+ }
+ };
+ }
+}
diff --git a/src/java/picard/fingerprint/HaplotypeBlock.java b/src/java/picard/fingerprint/HaplotypeBlock.java
new file mode 100644
index 0000000..d730426
--- /dev/null
+++ b/src/java/picard/fingerprint/HaplotypeBlock.java
@@ -0,0 +1,169 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import picard.PicardException;
+
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Represents information about a group of SNPs that form a haplotype in perfect LD
+ * with one another.
+ *
+ * @author Tim Fennell
+ */
+public class HaplotypeBlock implements Comparable<HaplotypeBlock> {
+ private final double maf;
+ private final Map<String,Snp> snpsByName = new HashMap<String,Snp>();
+ private final double[] haplotypeFrequencies = new double[3];
+
+ private Snp firstSnp;
+ private String chrom;
+ private int start;
+ private int end;
+
+ /** Constructs a haplotype block with the provided minor allele frequency. */
+ public HaplotypeBlock(final double maf) {
+ this.maf = maf;
+
+ // Set the haplotype frequencies assuming hardy-weinberg
+ final double majorAf = (1 - maf);
+ this.haplotypeFrequencies[0] = majorAf * majorAf;
+ this.haplotypeFrequencies[1] = majorAf * maf * 2;
+ this.haplotypeFrequencies[2] = maf * maf;
+ }
+
+ /** Gets the set of haplotype frequencies. */
+ public double[] getHaplotypeFrequencies() { return this.haplotypeFrequencies; }
+
+ /** Adds a SNP to the haplotype. Will throw an exception if the SNP is on the wrong chromosome. */
+ public void addSnp(final Snp snp) {
+ if (this.snpsByName.isEmpty()) {
+ this.chrom = snp.getChrom();
+ this.start = snp.getPos();
+ this.end = snp.getPos();
+ this.firstSnp = snp;
+ }
+ else if (!this.chrom.equals(snp.getChrom())) {
+ throw new PicardException("Snp chromosome " + snp.getChrom() +
+ " does not agree with chromosome of existing snp(s): " + this.chrom);
+ }
+ else {
+ if (snp.getPos() < this.start) {
+ this.start = snp.getPos();
+ this.firstSnp = snp;
+ }
+ if (snp.getPos() > this.end) {
+ this.end = snp.getPos();
+ }
+ }
+
+ this.snpsByName.put(snp.getName(), snp);
+ }
+
+ /** Gets a SNP by name if it belongs to this haplotype. */
+ public Snp getSnp(final String name) {
+ return this.snpsByName.get(name);
+ }
+
+ /** Gets the arbitrarily first SNP in the haplotype. */
+ public Snp getFirstSnp() {
+ return this.firstSnp;
+ }
+
+ /** Returns true if the SNP is contained within the haplotype block, false otherwise. */
+ public boolean contains(final Snp snp) {
+ // Check is performed on SNP name and position because of the fact that some SNPs
+ // have multiple mappings in the genome and we're paranoid!
+ final Snp contained = this.snpsByName.get(snp.getName());
+ return contained != null && contained.getChrom().equals(snp.getChrom()) &&
+ contained.getPos() == snp.getPos();
+ }
+
+ /** Returns the number of SNPs within the haplotype block. */
+ public int size() {
+ return snpsByName.size();
+ }
+
+ /** Returns an unmodifiable, unordered, collection of all SNPs in this haplotype block. */
+ public Collection<Snp> getSnps() {
+ return Collections.unmodifiableCollection(this.snpsByName.values());
+ }
+
+ /**
+ * Gets the frequency of the i'th diploid haplotype where haplotypes are ordered accorinding
+ * to DiploidHaplotype.
+ */
+ public double getHaplotypeFrequency(final int i) {
+ if (i < 0 || i > 2) throw new IllegalArgumentException("Illegal haplotype index " + i);
+ else return this.haplotypeFrequencies[i];
+ }
+
+ /** Returns the minor allele frequency of this haplotype. */
+ public double getMaf() { return this.maf; }
+
+ /**
+ * Gets the expected genotype of the provided SNP given the provided haplotype of this
+ * haplotype block.
+ */
+ public DiploidGenotype getSnpGenotype(final Snp snp, final DiploidHaplotype haplotype) {
+ if (!contains(snp)) throw new IllegalArgumentException("Snp is not part of haplotype " + snp);
+ return snp.getGenotype(haplotype);
+ }
+
+ /**
+ * Gets the diploid haplotype for this haplotype block given the provided SNP and SNP
+ * genotype.
+ */
+ public DiploidHaplotype getDiploidHaplotype(final Snp snp, final DiploidGenotype gt) {
+ if (!contains(snp)) throw new IllegalArgumentException("Snp is not part of haplotype " + snp);
+ return DiploidHaplotype.values()[snp.indexOf(gt)];
+ }
+
+ @Override
+ public int compareTo(final HaplotypeBlock that) {
+ int retval = this.chrom.compareTo(that.chrom);
+ if (retval == 0) retval = this.start - that.start;
+ if (retval == 0) retval = this.end - that.end;
+ return retval;
+ }
+
+ @Override public boolean equals(final Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+ else return this.compareTo((HaplotypeBlock) o) == 0;
+ }
+
+ @Override public int hashCode() {
+ return this.start;
+ }
+
+ @Override public String toString() {
+ return this.chrom + "[" + this.start + "-" + this.end + "]";
+ }
+}
diff --git a/src/java/picard/fingerprint/HaplotypeMap.java b/src/java/picard/fingerprint/HaplotypeMap.java
new file mode 100644
index 0000000..19cdf8b
--- /dev/null
+++ b/src/java/picard/fingerprint/HaplotypeMap.java
@@ -0,0 +1,388 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import picard.PicardException;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.FormatUtil;
+import htsjdk.samtools.util.Interval;
+import htsjdk.samtools.util.IntervalList;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMTextHeaderCodec;
+import htsjdk.samtools.util.StringLineReader;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * A collection of metadata about Haplotype Blocks including multiple in memory "indices" of the data
+ * to make it easy to query the correct HaplotypeBlock or Snp by snp names, positions etc. Also has the
+ * ability to read and write itself to and from files.
+ *
+ * @author Tim Fennell / Kathleen Tibbetts
+ */
+public class HaplotypeMap {
+ private final List<HaplotypeBlock> haplotypeBlocks = new ArrayList<HaplotypeBlock>();
+ private final Map<Snp, HaplotypeBlock> haplotypesBySnp = new HashMap<Snp, HaplotypeBlock>();
+ private final Map<String, HaplotypeBlock> haplotypesBySnpName = new HashMap<String, HaplotypeBlock>();
+ private final Map<String, HaplotypeBlock> haplotypesBySnpLocus = new HashMap<String, HaplotypeBlock>();
+ private final Map<String,Snp> snpsByPosition = new HashMap<String,Snp>();
+ private final IntervalList intervals;
+ private final SAMFileHeader header;
+
+ /**
+ * Constructs a HaplotypeMap from the provided file.
+ */
+ public HaplotypeMap(final File file) {
+ BufferedReader in = null;
+ try {
+ in = new BufferedReader(new InputStreamReader(IOUtil.openFileForReading(file)));
+ // Setup a reader and parse the header
+ final StringBuilder builder = new StringBuilder(4096);
+ String line = null;
+
+ while ((line = in.readLine()) != null) {
+ if (line.startsWith("@")) {
+ builder.append(line).append('\n');
+ }
+ else {
+ break;
+ }
+ }
+
+ if (builder.length() == 0) {
+ throw new IllegalStateException("Haplotype map file must contain header: " + file.getAbsolutePath());
+ }
+
+ this.header = new SAMTextHeaderCodec().decode(new StringLineReader(builder.toString()), "BufferedReader");
+ this.intervals = new IntervalList(header);
+
+ // Then read in the file
+ final FormatUtil format = new FormatUtil();
+ final List<HaplotypeMapFileEntry> entries = new ArrayList<HaplotypeMapFileEntry>();
+ final Map<String, HaplotypeBlock> anchorToHaplotype = new HashMap<String, HaplotypeBlock>();
+
+ do {
+ if (line.trim().length() == 0) continue; // skip over blank lines
+ if (line.startsWith("#")) continue; // ignore comments/headers
+
+ // Make sure we have the right number of fields
+ final String[] fields = line.split("\\t");
+ if (fields.length < 6 || fields.length > 8) {
+ throw new PicardException("Invalid haplotype map record contains " +
+ fields.length + " fields: " + line);
+ }
+
+ // Then parse them out
+ final String chrom = fields[0];
+ final int pos = format.parseInt(fields[1]);
+ final String name = fields[2];
+ final byte major = (byte)fields[3].charAt(0);
+ final byte minor = (byte)fields[4].charAt(0);
+ final double maf = format.parseDouble(fields[5]);
+ final String anchor = fields.length > 6 ? fields[6] : null;
+ final String fpPanels = fields.length > 7 ? fields[7] : null;
+ List<String> panels = null;
+ if (fpPanels != null) {
+ panels = new ArrayList<String>();
+ for (final String panel : fpPanels.split(",")) {
+ panels.add(panel);
+ }
+ }
+
+ // If it's the anchor snp, start the haplotype
+ if (anchor == null || anchor.trim().equals("") || name.equals(anchor)) {
+ final HaplotypeBlock type = new HaplotypeBlock(maf);
+ type.addSnp(new Snp(name, chrom, pos, major, minor, maf, panels));
+ anchorToHaplotype.put(name, type);
+ }
+ else { // Otherwise save it for later
+ final HaplotypeMapFileEntry entry = new HaplotypeMapFileEntry(
+ chrom, pos, name, major, minor, maf, anchor, panels);
+ entries.add(entry);
+ }
+ }
+ while ((line = in.readLine()) != null);
+
+ // Now, go through and add all the anchored snps
+ for (final HaplotypeMapFileEntry entry : entries) {
+ final HaplotypeBlock block = anchorToHaplotype.get(entry.anchorSnp);
+
+ if (block == null) {
+ throw new PicardException("No haplotype found for anchor snp " + entry.anchorSnp);
+ }
+
+ block.addSnp(new Snp(entry.snpName, entry.chromosome, entry.position,
+ entry.majorAllele, entry.minorAllele,
+ entry.minorAlleleFrequency, entry.panels));
+ }
+
+ // And add them all
+ for (final HaplotypeBlock block : anchorToHaplotype.values()) {
+ addHaplotype(block);
+ }
+ }
+ catch (IOException ioe) {
+ throw new PicardException("Error parsing haplotype map.", ioe);
+ }
+ finally {
+ if (in != null) {
+ try { in.close(); } catch (Exception e) { /* do nothing */ }
+ }
+ }
+ }
+
+ /** Constructs an empty HaplotypeMap using the provided SAMFileHeader's sequence dictionary. */
+ public HaplotypeMap(final SAMFileHeader header) {
+ this.header = header;
+ this.intervals = new IntervalList(header);
+ }
+
+ /**
+ * Adds a HaplotypeBlock to the map and updates all the relevant caches/indices.
+ */
+ public void addHaplotype(final HaplotypeBlock haplotypeBlock) {
+ this.haplotypeBlocks.add(haplotypeBlock);
+
+ for (final Snp snp : haplotypeBlock.getSnps()) {
+ this.haplotypesBySnp.put(snp, haplotypeBlock);
+ this.haplotypesBySnpName.put(snp.getName(), haplotypeBlock);
+ this.haplotypesBySnpLocus.put(toKey(snp.getChrom(), snp.getPos()), haplotypeBlock);
+ this.snpsByPosition.put(toKey(snp.getChrom(), snp.getPos()), snp);
+ this.intervals.add(new Interval(snp.getChrom(), snp.getPos(), snp.getPos(), false, snp.getName()));
+ }
+ }
+
+ /** Queries a HaplotypeBlock by Snp object. Returns NULL if none found. */
+ public HaplotypeBlock getHaplotype(final Snp snp) {
+ return this.haplotypesBySnp.get(snp);
+ }
+
+ /** Queries a HaplotypeBlock by Snp name. Returns NULL if none found. */
+ public HaplotypeBlock getHaplotype(final String snpName) {
+ return this.haplotypesBySnpName.get(snpName);
+ }
+
+ /** Queries a HaplotypeBlock by Snp chromosome and position. Returns NULL if none found. */
+ public HaplotypeBlock getHaplotype(final String chrom, final int pos) {
+ return this.haplotypesBySnpLocus.get(toKey(chrom, pos));
+ }
+
+ /** Returns an unmodifiable collection of all the haplotype blocks in the map. */
+ public List<HaplotypeBlock> getHaplotypes() {
+ return Collections.unmodifiableList(this.haplotypeBlocks);
+ }
+
+ /** Queries a Snp by chromosome and position. Returns NULL if none found. */
+ public Snp getSnp(final String chrom, final int pos) {
+ return this.snpsByPosition.get(toKey(chrom, pos));
+ }
+
+ /** Returns an unmodifiable collection of all SNPs in all Haplotype blocks. */
+ public Set<Snp> getAllSnps() {
+ return Collections.unmodifiableSet(haplotypesBySnp.keySet());
+ }
+
+ /** Returns an IntervalList with an entry for every SNP in every Haplotype in the map. */
+ public IntervalList getIntervalList() {
+ this.intervals.sort(); // TODO: should probably do this elsewhere
+ return this.intervals;
+ }
+
+ private String toKey(final String chrom, final int pos) {
+ return chrom + ":" + pos;
+ }
+
+ /**
+ * Returns a copy of this haplotype map that excludes haplotypes on the chromosomes provided.
+ * @param chroms a set of zero or more chromosome names
+ */
+ public HaplotypeMap withoutChromosomes(final Set<String> chroms) {
+ final HaplotypeMap out = new HaplotypeMap(getHeader());
+ for (final HaplotypeBlock block : this.haplotypeBlocks) {
+ if (!chroms.contains(block.getFirstSnp().getChrom())) {
+ out.addHaplotype(block);
+ }
+ }
+
+ return out;
+ }
+
+ /** Writes out a HaplotypeMap file with the contents of this map. */
+ public void writeToFile(final File file) {
+ try {
+ final BufferedWriter out = new BufferedWriter(new OutputStreamWriter(IOUtil.openFileForWriting(file)));
+ final FormatUtil format = new FormatUtil();
+
+ // Write out the header
+ if (this.header != null) {
+ final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
+ codec.encode(out, this.header);
+ }
+
+ // Write the header for the entries.
+ out.write("#CHROMOSOME\tPOSITION\tNAME\tMAJOR_ALLELE\tMINOR_ALLELE\tMAF\tANCHOR_SNP\tPANELS");
+ out.newLine();
+
+ final List<HaplotypeMapFileEntry> entries = new ArrayList<HaplotypeMapFileEntry>();
+ for (final HaplotypeBlock block : this.getHaplotypes()) {
+ String anchor = null;
+ final SortedSet<Snp> snps = new TreeSet<Snp>(block.getSnps());
+
+ for (final Snp snp : snps) {
+ entries.add(new HaplotypeMapFileEntry(snp.getChrom(), snp.getPos(), snp.getName(),
+ snp.getAllele1(), snp.getAllele2(), snp.getMaf(), anchor, snp.getFingerprintPanels()));
+
+ if (anchor == null) {
+ anchor = snp.getName();
+ }
+ }
+ }
+
+ Collections.sort(entries);
+ for (final HaplotypeMapFileEntry entry : entries) {
+ out.write(entry.chromosome + "\t");
+ out.write(format.format(entry.position) + "\t");
+ out.write(entry.snpName + "\t");
+ out.write((char)entry.majorAllele + "\t");
+ out.write((char)entry.minorAllele + "\t");
+ out.write(format.format(entry.minorAlleleFrequency) + "\t");
+ if (entry.anchorSnp != null) {
+ out.write(entry.anchorSnp);
+ }
+ out.write("\t");
+ if (entry.getPanels() != null) {
+ out.write(entry.getPanels());
+ }
+ out.newLine();
+ }
+ out.flush();
+ out.close();
+ }
+ catch (IOException ioe) {
+ throw new PicardException("Error writing out maplotype map to file: " + file.getAbsolutePath(), ioe);
+ }
+ }
+
+ public SAMFileHeader getHeader() { return header; }
+
+ /** Class used to represent all the information for a row in a haplotype map file, used in reading and writing. */
+ private class HaplotypeMapFileEntry implements Comparable {
+ private final String chromosome;
+ private final int position;
+ private final String snpName;
+ private final byte majorAllele;
+ private final byte minorAllele;
+ private final double minorAlleleFrequency;
+ private final String anchorSnp;
+ private final List<String> panels;
+
+ public HaplotypeMapFileEntry(final String chrom, final int pos, final String name,
+ final byte major, final byte minor, final double maf,
+ final String anchorSnp, final List<String> fingerprintPanels) {
+ this.chromosome = chrom;
+ this.position = pos;
+ this.snpName = name;
+ this.majorAllele = major;
+ this.minorAllele = minor;
+ this.minorAlleleFrequency = maf;
+ this.anchorSnp = anchorSnp;
+
+ // Always sort the list of panels so they are in a predictable order
+ this.panels = new ArrayList<String>();
+ if (fingerprintPanels != null) {
+ this.panels.addAll(fingerprintPanels);
+ Collections.sort(this.panels);
+ }
+ }
+
+ public String getPanels() {
+ if (panels == null) return "";
+ final StringBuilder sb = new StringBuilder();
+
+ for (final String panel : panels) {
+ if (sb.length() > 0) sb.append(",");
+ sb.append(panel);
+ }
+
+ return sb.toString();
+ }
+
+ public int compareTo(final Object o) {
+ final HaplotypeMapFileEntry that = (HaplotypeMapFileEntry) o;
+ int diff = header.getSequenceIndex(this.chromosome) - header.getSequenceIndex(that.chromosome);
+ if (diff != 0) return diff;
+
+ diff = this.position - that.position;
+ if (diff != 0) return diff;
+
+ diff = this.snpName.compareTo(that.snpName);
+ if (diff != 0) return diff;
+
+ diff = this.majorAllele - that.majorAllele;
+ if (diff != 0) return diff;
+
+ diff = this.minorAllele - that.minorAllele;
+ if (diff != 0) return diff;
+
+ diff = Double.compare(this.minorAlleleFrequency, that.minorAlleleFrequency);
+ if (diff != 0) return diff;
+
+ if (this.anchorSnp != null) {
+ if (that.anchorSnp != null) {
+ diff = this.anchorSnp.compareTo(that.anchorSnp);
+ }
+ else {
+ diff = 1;
+ }
+ }
+ else {
+ if (that.anchorSnp != null) {
+ diff = -1;
+ }
+ else {
+ diff = 0;
+ }
+
+ }
+ if (diff != 0) return diff;
+
+ final String p1 = this.getPanels();
+ final String p2 = that.getPanels();
+ if (p1 != null) {
+ if (p2 != null) {
+ return p1.compareTo(p2);
+ }
+ return 1;
+ }
+ else if (p2 != null) {
+ return -1;
+ }
+ else {
+ return 0;
+ }
+ }
+ }
+}
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilities.java b/src/java/picard/fingerprint/HaplotypeProbabilities.java
new file mode 100644
index 0000000..29bb2b0
--- /dev/null
+++ b/src/java/picard/fingerprint/HaplotypeProbabilities.java
@@ -0,0 +1,241 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import java.util.Arrays;
+
+import static java.lang.Math.log10;
+import static picard.util.MathUtil.multiply;
+import static picard.util.MathUtil.pNormalizeVector;
+
+/**
+ * Abstract class for storing and calculating various likelihoods and probabilities
+ * for haplotype alleles given evidence.
+ *
+ * @author Tim Fennell
+ */
+public abstract class HaplotypeProbabilities {
+
+ private final HaplotypeBlock haplotypeBlock;
+
+ protected HaplotypeProbabilities(final HaplotypeBlock haplotypeBlock) {
+ this.haplotypeBlock = haplotypeBlock;
+ }
+
+ /** Returns the haplotype for which the probabilities apply. */
+ public HaplotypeBlock getHaplotype() {
+ return this.haplotypeBlock;
+ }
+
+ public double [] getPriorProbablities(){
+ return getHaplotype().getHaplotypeFrequencies();
+ }
+
+ /** Returns the probabilities, in order, of the AA, Aa and aa haplotypes.
+
+ * Mathematically, this is P(H | D, F) where and H is the vector of possible haplotypes {AA,Aa,aa}.
+ * D is the data seen by the class, and
+ * F is the population frequency of each genotype.
+ */
+ /** Returns the posterior probabilities using the population frequency as a prior. */
+ public double[] getPosteriorProbabilities() {
+ return pNormalizeVector(multiply(getLikelihoods(), getPriorProbablities()));}
+
+ /**
+ * Returns the likelihoods, in order, of the AA, Aa and aa haplotypes given the evidence
+ *
+ * Mathematically this is P(evidence | haplotype) where haplotype={AA,Aa,aa}.
+ */
+ public abstract double[] getLikelihoods();
+
+ public double[] getLogLikelihoods() {
+ final double[] likelihoods = getLikelihoods();
+ final double[] lLikelihoods = new double [likelihoods.length];
+ for (int i = 0; i < likelihoods.length; ++i) {
+ lLikelihoods[i] = Math.log10(likelihoods[i]);
+ }
+ return lLikelihoods;
+ }
+
+ /**
+ * Returns a representative SNP for this haplotype. Different subclasses may implement this in
+ * different ways, but should do so in a deterministic/repeatable fashion.
+ */
+ public abstract Snp getRepresentativeSnp();
+
+ /**
+ * Returns the number of observations of alleles supporting the first/major haplotype allele.
+ * Strictly this doesn't make sense for all subclasses, but it's nice to have it part of the API so
+ * a default implementation is provided here.
+ * @return int
+ */
+ public int getObsAllele1() { return 0; }
+
+ /**
+ * Returns the number of observations of alleles supporting the second/minor haplotype allele.
+ * Strictly this doesn't make sense for all subclasses, but it's nice to have it part of the API so
+ * a default implementation is provided here.
+ * @return int
+ */
+ public int getObsAllele2() { return 0; }
+
+ /**
+ * Returns the total number of observations of any allele.
+ * Strictly this doesn't make sense for all subclasses, but it's nice to have it part of the API so
+ * a default implementation is provided here.
+ * @return int
+ */
+ public int getTotalObs() { return 0; }
+
+ /** Returns true if evidence has been added, false if the probabilities are just the priors. */
+ public boolean hasEvidence() {
+ return true;
+ }
+
+ /** Merges in the likelihood information from the supplied haplotype probabilities object. */
+ public abstract void merge(final HaplotypeProbabilities other);
+
+ /**
+ * Returns the index of the highest probability which can then be used to look up
+ * DiploidHaplotypes or DiploidGenotypes as appropriate.
+ */
+ int getMostLikelyIndex() {
+ final double[] probs = getPosteriorProbabilities();
+
+ if (probs[0] > probs[1] && probs[0] > probs[2]) return 0;
+ else if (probs[1] > probs[2]) return 1;
+ else return 2;
+ }
+
+ /** Gets the most likely haplotype given the probabilities. */
+ public DiploidHaplotype getMostLikelyHaplotype() {
+ return DiploidHaplotype.values()[getMostLikelyIndex()];
+ }
+
+ /** Gets the genotype for this Snp given the most likely haplotype. */
+ public DiploidGenotype getMostLikelyGenotype(final Snp snp) {
+ assertSnpPartOfHaplotype(snp);
+ return snp.getGenotype(getMostLikelyHaplotype());
+ }
+
+ /** Throws an exception if the passed SNP is not part of this haplotype. */
+ void assertSnpPartOfHaplotype(final Snp snp) {
+ if (!this.haplotypeBlock.getSnps().contains(snp)) {
+ throw new IllegalArgumentException("Snp " + snp + " does not belong to haplotype " + this.haplotypeBlock);
+ }
+ }
+
+ /** This function returns the scaled probability of the evidence collected
+ * given a vector of priors on the haplotype using the internal likelihood, which may be
+ * scaled by an unknown factor. This factor causes the result to be scaled, hence the name.
+ *
+ * Mathematically:
+ *
+ * P(Evidence| P(h_i)=F_i) = \sum_i P(Evidence | h_i) P(h_i)
+ * = \sum_i P(Evidence | h_i) F_i
+ * = c * \sum_i Likelihood_i * F_i
+ *
+ * Here, h_i are the three possible haplotypes, F_i are the given priors, and Likelihood_i
+ * are the stored likelihoods which are scaled from the actually likelihoods by an unknown
+ * factor, c. Note that the calculation ignores the internal haplotype probabilities (i.e. priors)
+ *
+ * @param genotypeFrequencies vector of (possibly scaled) probabilities of the three haplotypes
+ * @return P(evidence | P_h)) / c
+ */
+
+ public double scaledEvidenceProbabilityUsingGenotypeFrequencies(final double[] genotypeFrequencies) {
+ final double[] likelihoods = getLikelihoods();
+ assert (genotypeFrequencies.length == likelihoods.length);
+
+ double result = 0;
+ for (int i = 0; i < likelihoods.length; ++i) {
+ result += likelihoods[i] * genotypeFrequencies[i];
+ }
+ return result;
+ }
+
+ public double shiftedLogEvidenceProbabilityUsingGenotypeFrequencies(final double[] genotypeFrequencies) {
+ return log10(scaledEvidenceProbabilityUsingGenotypeFrequencies(genotypeFrequencies));
+ }
+
+ /**
+ * returns the log-probability the evidence, using as priors the posteriors of another object
+ *
+ * @param otherHp an additional HaplotypeProbabilities object representing the same underlying HaplotypeBlock
+ * @return log10( P(evidence| P(h_i)=P(h_i|otherHp) ) + c where c is an unknown constant
+ */
+ public double shiftedLogEvidenceProbabilityGivenOtherEvidence(final HaplotypeProbabilities otherHp) {
+ if (!this.haplotypeBlock.equals(otherHp.getHaplotype())) {
+ throw new IllegalArgumentException("Haplotypes are from different HaplotypeBlocks!");
+ }
+ /** Get the posterior from the other otherHp. Use this posterior as the prior to calculate probability.
+ *
+ * P(hap|x,y) = P(x|hap,y) P(hap|y) / P(x|y)
+ * = P(x | hap) * P(hap | y) / P(x)
+ * likelihood * other.posterior
+ *
+ * = P(x|hap) P(y|hap) P(hap)/P(x)P(y)
+ * = A P(x| hap) P(y| hap) P(hap) # where A is an unknown scaling factor
+ */
+ return shiftedLogEvidenceProbabilityUsingGenotypeFrequencies(otherHp.getPosteriorProbabilities());
+ }
+
+ /**
+ * Returns log (p(evidence)) + c assuming that the prior on haplotypes is given by
+ * the internal haplotypeFrequencies
+ */
+ public double shiftedLogEvidenceProbability() {
+ return shiftedLogEvidenceProbabilityUsingGenotypeFrequencies(getPriorProbablities());
+ }
+
+ /** Returns the LOD score between the most probable haplotype and the second most probable. */
+ public double getLodMostProbableGenotype() {
+ final double[] probs = getPosteriorProbabilities();
+ final double[] logs = new double[probs.length];
+ for (int i = 0; i < probs.length; ++i) {
+ logs[i] = log10(probs[i]);
+ }
+
+ Arrays.sort(logs);
+ return logs[2] - logs[1];
+ }
+
+ /** Log10(P(evidence| haplotype)) for the 3 different possible haplotypes
+ * {aa, ab, bb}
+ */
+
+ //an enum whose only role in life is to help iterate over the 3 possible diploid genotypes
+ protected enum Genotype {
+ HOM_ALLELE1(0),
+ HET_ALLELE12(1),
+ HOM_ALLELE2(2);
+
+ int v; //value is the number of chromosomes in the genotype that have ALLELE2.
+
+ Genotype(final int v) {
+ this.v = v;
+ }
+ }
+}
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilitiesFromContaminatorSequence.java b/src/java/picard/fingerprint/HaplotypeProbabilitiesFromContaminatorSequence.java
new file mode 100644
index 0000000..b38f070
--- /dev/null
+++ b/src/java/picard/fingerprint/HaplotypeProbabilitiesFromContaminatorSequence.java
@@ -0,0 +1,129 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import htsjdk.samtools.util.QualityUtil;
+import picard.util.MathUtil;
+
+import static java.lang.Math.log10;
+
+/**
+ * Represents the probability of the underlying haplotype of the contaminating sample given the data.
+ * By convention the alleles stored for each SNP are in phase.
+ *
+ * @author Yossi Farjoun
+ */
+
+public class HaplotypeProbabilitiesFromContaminatorSequence extends HaplotypeProbabilitiesFromSequence {
+
+ public double contamination;
+
+ // for each model (contGenotype, mainGenotype) there's a likelihood of the data. These need to be collected separately
+ // and only collated once all the data is in.
+ double[][] likelihoodMap = {{1, 1, 1}, {1, 1, 1}, {1, 1, 1}};
+
+ public HaplotypeProbabilitiesFromContaminatorSequence(final HaplotypeBlock haplotypeBlock, final double contamination) {
+ super(haplotypeBlock);
+
+ assert (contamination <= 1.0);
+ assert (contamination >= 0.0);
+
+ this.contamination = contamination;
+ }
+
+ /**
+ * Adds a base observation with the observed quality to the evidence for this haplotype
+ * based on the fact that the SNP is part of the haplotype.
+ */
+ public void addToProbs(final Snp snp, final byte base, final byte qual) {
+ assertSnpPartOfHaplotype(snp);
+
+ // Skip bases that don't match either expected allele for this SNP
+ final boolean altAllele;
+ if (base == snp.getAllele1()) {
+ this.obsAllele1++;
+ altAllele = false;
+ } else if (base == snp.getAllele2()) {
+ this.obsAllele2++;
+ altAllele = true;
+ } else {
+ this.obsAlleleOther++;
+ return;
+ }
+ final double pErr = QualityUtil.getErrorProbabilityFromPhredScore(qual);
+
+ // we need to keep the 9 models separate until all the reads have been seen.
+ // Once we have seen all the reads, we add across the mainGeno and the three likelihoods
+ // are the likelihoods of the contaminator, the main sample's genotype needs to be summed over in each case:
+
+ for (final Genotype contGeno : Genotype.values()) {
+ for (final Genotype mainGeno : Genotype.values()) {
+ //theta is the expected frequency of the alternate allele
+ final double theta = 0.5 * ((1 - contamination) * mainGeno.v + contamination * contGeno.v);
+ likelihoodMap[contGeno.v][mainGeno.v] *= (( altAllele ? theta : (1 - theta)) * (1 - pErr) +
+ (!altAllele ? theta : (1 - theta)) * pErr);
+ }
+ }
+ }
+
+ //a function needed to update the logLikelihoods from the likelihoodMap.
+ private void updateLikelihoods() {
+ final double[] ll = new double[Genotype.values().length];
+ for (final Genotype contGeno : Genotype.values()) {
+ // p(a | g_c) = \sum_g_m { P(g_m) \prod_i P(a_i| g_m, g_c)}
+ ll[contGeno.v] = log10(MathUtil.sum(MathUtil.multiply(this.getPriorProbablities(), likelihoodMap[contGeno.v])));
+ }
+ setLogLikelihoods(ll);
+ }
+
+ @Override
+ public void merge(final HaplotypeProbabilities other) {
+ super.merge(other);
+
+ if (!this.getHaplotype().equals(other.getHaplotype())) {
+ throw new IllegalArgumentException("Mismatched haplotypes in call to HaplotypeProbabilities.merge(): " +
+ getHaplotype() + ", " + other.getHaplotype());
+ }
+
+ if (!(other instanceof HaplotypeProbabilitiesFromContaminatorSequence)) {
+ throw new IllegalArgumentException("Can only merge HaplotypeProbabilities of same class.");
+ }
+
+ final HaplotypeProbabilitiesFromContaminatorSequence o = (HaplotypeProbabilitiesFromContaminatorSequence) other;
+ if (o.contamination != this.contamination) {
+ throw new IllegalArgumentException("Can only merge HaplotypeProbabilitiesFromContaminatorSequence with the same contamination value.");
+ }
+
+ for (final Genotype contGeno : Genotype.values()) {
+ this.likelihoodMap[contGeno.v] = MathUtil.multiply(this.likelihoodMap[contGeno.v], o.likelihoodMap[contGeno.v]);
+ }
+ }
+
+ @Override
+ public double[] getLogLikelihoods() {
+ updateLikelihoods();
+ return super.getLogLikelihoods();
+ }
+}
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotype.java b/src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotype.java
new file mode 100644
index 0000000..d6d0231
--- /dev/null
+++ b/src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotype.java
@@ -0,0 +1,72 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import static picard.util.MathUtil.*;
+
+/**
+ * Represents a set of HaplotypeProbabilities that were derived from a single SNP
+ * genotype at a point in time.
+ */
+public class HaplotypeProbabilitiesFromGenotype extends HaplotypeProbabilities {
+ private final Snp snp;
+ private final double[] likelihoods;
+
+ public HaplotypeProbabilitiesFromGenotype(final Snp snp, final HaplotypeBlock haplotypeBlock,
+ final double AA, final double Aa, final double aa) {
+ super(haplotypeBlock);
+ this.snp = snp;
+ this.likelihoods = new double[] {AA, Aa, aa};
+ }
+
+ /** Returns the SNP who's genotype was used to construct the likelihoods. */
+ @Override public Snp getRepresentativeSnp() { return snp; }
+
+
+ // TODO: this can't be right in general. At least one needs to divide by the prior to set things straight.
+ // TODO: The only saving grace is that this is normally used for cases where the priors are large and similar to each other.
+
+
+ // simply returns the _likelihoods_ that were passed into the constructor.
+ public double[] getLikelihoods() {
+ return likelihoods;
+ }
+
+ @Override
+ public void merge(final HaplotypeProbabilities other) {
+ if (!this.getHaplotype().equals(other.getHaplotype())) {
+ throw new IllegalArgumentException("Mismatched haplotypes in call to HaplotypeProbabilities.merge(): " +
+ getHaplotype() + ", " + other.getHaplotype());
+ }
+
+ if (! (other instanceof HaplotypeProbabilitiesFromGenotype)) {
+ throw new IllegalArgumentException("Can only merge HaplotypeProbabilities of same class.");
+ }
+
+ this.likelihoods[0] = this.likelihoods[0] * other.getLikelihoods()[0];
+ this.likelihoods[1] = this.likelihoods[1] * other.getLikelihoods()[1];
+ this.likelihoods[2] = this.likelihoods[2] * other.getLikelihoods()[2];
+ }
+}
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotypeLikelihoods.java b/src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotypeLikelihoods.java
new file mode 100644
index 0000000..acea750
--- /dev/null
+++ b/src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotypeLikelihoods.java
@@ -0,0 +1,90 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import htsjdk.variant.variantcontext.Allele;
+import picard.util.MathUtil;
+
+import java.util.List;
+
+/**
+ * Represents the likelihood of the HaplotypeBlock given the GenotypeLikelihoods (GL field from a VCF, which is actually a log10-likelihood)
+ * for each of the SNPs in that block. By convention the alleles stored for each SNP are in phase.
+ *
+ * @author Yossi Farjoun
+ */
+public class HaplotypeProbabilitiesFromGenotypeLikelihoods extends HaplotypeProbabilitiesUsingLogLikelihoods {
+
+ public HaplotypeProbabilitiesFromGenotypeLikelihoods(final HaplotypeBlock haplotypeBlock) {
+ super(haplotypeBlock);
+ }
+
+ /**
+ * Adds a base observation with the observed quality to the evidence for this haplotype
+ * based on the fact that the SNP is part of the haplotype.
+ *
+ * @param snp The snp in the haplotypeblock to which the likelihoods belong
+ * @param alleles the (ordered) alleles to which the biallelic genotype likelihoods correspond. So that if the alleles are [A,B], the
+ * @param logGenotypeLikelihoods correspond to the logLikelihoods of [AA, AB, BB]. Log is assumed to be in base 10.
+ */
+
+ public void addToLogLikelihoods(final Snp snp, final List<Allele> alleles, final double logGenotypeLikelihoods[]) {
+ assertSnpPartOfHaplotype(snp);
+
+ // only allow biallelic snps
+ assert (logGenotypeLikelihoods.length == Genotype.values().length);
+ assert (alleles.size() == 2);
+
+ //make sure that alleles are comparable to SNPs
+ for (int i = 0; i < 2; i++) {
+ assert (alleles.get(i).getBases().length == 1);
+ }
+
+ final byte allele1 = alleles.get(0).getBases()[0];
+ final byte allele2 = alleles.get(1).getBases()[0];
+
+ // alleles as given might be swapped with alleles in haplotype block.
+ // if that is the case, swap them around.
+
+ if (snp.getAllele1() == allele1 &&
+ snp.getAllele2() == allele2) {
+ setLogLikelihoods(MathUtil.sum(getLogLikelihoods(),logGenotypeLikelihoods));
+ return;
+ }
+ if (snp.getAllele2() == allele1 &&
+ snp.getAllele1() == allele2) {
+ final double [] ll = getLogLikelihoods();
+ ll[Genotype.HOM_ALLELE1.v] += logGenotypeLikelihoods[Genotype.HOM_ALLELE2.v];
+ ll[Genotype.HET_ALLELE12.v] += logGenotypeLikelihoods[Genotype.HET_ALLELE12.v];
+ ll[Genotype.HOM_ALLELE2.v] += logGenotypeLikelihoods[Genotype.HOM_ALLELE1.v];
+
+ setLogLikelihoods(ll);
+ return;
+ }
+
+ // if we are here it means that there was a mismatch in alleles...
+ assert true;
+ }
+}
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilitiesFromSequence.java b/src/java/picard/fingerprint/HaplotypeProbabilitiesFromSequence.java
new file mode 100644
index 0000000..f51868d
--- /dev/null
+++ b/src/java/picard/fingerprint/HaplotypeProbabilitiesFromSequence.java
@@ -0,0 +1,126 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import htsjdk.samtools.util.QualityUtil;
+import static java.lang.Math.log10;
+
+/**
+ * Represents the probability of the underlying haplotype given the data. By convention the
+ * alleles stored for each SNP are in phase.
+ *
+ * @author Tim Fennell
+ */
+public class HaplotypeProbabilitiesFromSequence extends HaplotypeProbabilitiesUsingLogLikelihoods {
+ protected int obsAllele1, obsAllele2, obsAlleleOther;
+
+ public HaplotypeProbabilitiesFromSequence(final HaplotypeBlock haplotypeBlock) {
+ super(haplotypeBlock);
+ }
+
+ @Override
+ public boolean hasEvidence() {
+ return super.hasEvidence() || obsAllele1 > 0 || obsAllele2 > 0;
+ }
+
+ /**
+ * Adds a base observation with the observed quality to the evidence for this haplotype
+ * based on the fact that the SNP is part of the haplotype.
+ *
+ * @param snp The snp in the HaplotypeBlock to which evidence is being added
+ * @param base the base observed
+ * @param qual the quality of the observed base
+ */
+ public void addToProbs(final Snp snp, final byte base, final byte qual) {
+ assertSnpPartOfHaplotype(snp);
+ final double [] ll = getLogLikelihoods();
+ final double pError = QualityUtil.getErrorProbabilityFromPhredScore(qual);
+ // Skip bases that don't match either expected allele for this SNP
+ if (base == snp.getAllele1()) {
+ obsAllele1++;
+ for (final Genotype g:Genotype.values()){
+ final double pAlt = g.v / 2d;
+ ll[g.v] += log10((1d - pAlt) * (1d - pError) + pAlt * pError);
+ }
+
+ } else if (base == snp.getAllele2()) {
+ obsAllele2++;
+ for (final Genotype g:Genotype.values()){
+ final double pAlt = 1 - g.v / 2d;
+ ll[g.v] += log10((1d - pAlt) * (1d - pError) + pAlt * pError);
+ }
+ } else {
+ obsAlleleOther++;
+ }
+ //technically not needed since we were changing the actual array, but good practice perhaps.
+ setLogLikelihoods(ll);
+ }
+
+ /**
+ * Merges information from another haplotype probabilities object for the same haplotype into
+ * this object. Useful for when probabilities need to be merged to levels higher than the
+ * read group, e.g. the sample or individual.
+ *
+ * @param other Another haplotype probabilities object to merge in
+ */
+ @Override
+ public void merge(final HaplotypeProbabilities other) {
+ super.merge(other);
+
+ if (!this.getHaplotype().equals(other.getHaplotype())) {
+ throw new IllegalArgumentException("Mismatched haplotypes in call to HaplotypeProbabilities.merge(): " +
+ getHaplotype() + ", " + other.getHaplotype());
+ }
+
+ if (! (other instanceof HaplotypeProbabilitiesFromSequence)) {
+ throw new IllegalArgumentException("Can only merge() HaplotypeProbabilities of same class: Tried to merge a " +
+ this.getClass().getName() + " with a " + other.getClass().getName() +"." );
+ }
+
+ final HaplotypeProbabilitiesFromSequence o = (HaplotypeProbabilitiesFromSequence) other;
+ this.obsAllele1 += o.obsAllele1;
+ this.obsAllele2 += o.obsAllele2;
+ this.obsAlleleOther += o.obsAlleleOther;
+ }
+
+ /** Returns the number of bases/reads that support the first allele. */
+ @Override public int getObsAllele1() {
+ return obsAllele1;
+ }
+
+ /** Returns the number of bases/reads that support the second allele. */
+ @Override public int getObsAllele2() {
+ return obsAllele2;
+ }
+
+ /** Gets the total number of observations presented at this locus. */
+ @Override
+ public int getTotalObs() { return obsAllele1 + obsAllele2 + obsAlleleOther; }
+
+ /* Returns the faction of base observations that were presented that were from an allele other than the two expected ones. */
+ public double getFractionUnexpectedAlleleObs() {
+ return obsAlleleOther / (double) (getTotalObs());
+ }
+}
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilitiesUsingLogLikelihoods.java b/src/java/picard/fingerprint/HaplotypeProbabilitiesUsingLogLikelihoods.java
new file mode 100644
index 0000000..a75d312
--- /dev/null
+++ b/src/java/picard/fingerprint/HaplotypeProbabilitiesUsingLogLikelihoods.java
@@ -0,0 +1,145 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import picard.util.MathUtil;
+import java.util.Arrays;
+import static java.lang.Math.log10;
+
+/**
+ * Represents the probability of the underlying haplotype using log likelihoods as the basic datum for each of the SNPs. By convention the
+ * alleles stored for each SNP are in phase.
+ *
+ *
+ * @author Tim Fennell
+ * @author Yossi Farjoun
+ */
+abstract class HaplotypeProbabilitiesUsingLogLikelihoods extends HaplotypeProbabilities {
+
+ // some derived classes might need to incorporate accumulated data before logLikelihood is usable.
+ // use the getter to allow these classes to calculate the likelihood from the data.
+ private final double[] loglikelihoods = new double[Genotype.values().length];
+
+ public HaplotypeProbabilitiesUsingLogLikelihoods(final HaplotypeBlock haplotypeBlock) {
+ super(haplotypeBlock);
+ }
+
+ /** Simple returns the SNP from the haplotype that has the lowest genome coordinate. */
+ @Override
+ public Snp getRepresentativeSnp() {
+ return getHaplotype().getFirstSnp();
+ }
+
+ @Override
+ public boolean hasEvidence() {
+ final double [] ll = this.getLogLikelihoods();
+ return ll[Genotype.HOM_ALLELE1.v] != 0 ||
+ ll[Genotype.HET_ALLELE12.v] != 0 ||
+ ll[Genotype.HOM_ALLELE2.v] != 0;
+ }
+
+ /**
+ * Merges information from another haplotype probabilities object for the same haplotype into
+ * this object. Useful for when probabilities need to be merged to levels higher than the
+ * read group, e.g. the sample or individual.
+ *
+ * @param other Another haplotype probabilities object to merge in (must of the the same class and for the same HaplotypeBlock)
+ *
+ */
+ @Override
+ public void merge(final HaplotypeProbabilities other) {
+ if (!this.getHaplotype().equals(other.getHaplotype())) {
+ throw new IllegalArgumentException("Mismatched haplotypes in call to HaplotypeProbabilities.merge(): " +
+ getHaplotype() + ", " + other.getHaplotype());
+ }
+
+ if (!(other instanceof HaplotypeProbabilitiesUsingLogLikelihoods)) {
+ throw new IllegalArgumentException("Can only merge HaplotypeProbabilities of same class.");
+ }
+
+ final HaplotypeProbabilitiesUsingLogLikelihoods o = (HaplotypeProbabilitiesUsingLogLikelihoods) other;
+
+ setLogLikelihoods(MathUtil.sum(getLogLikelihoods(), o.getLogLikelihoods()));
+ }
+
+ /**
+ * Returns the posterior probability of the haplotypes given the evidence (uses the internal prior)
+ *
+ */
+ public double[] getPosteriorProbabilities() {
+ return MathUtil.pNormalizeLogProbability(getShiftedLogPosterior());
+ }
+
+ /** Makes a copy of the loglikelihoods array and applies the priors.
+ * returns log10( P(haplotype | evidence) ) + C where C is unknown.
+ * One can recover C by normalizing, but this might be unneeded depending on the application
+ * uses Bayes P(m|x)=P(x|m)*P(m)/P(x) but then doesn't divide by P(x)
+ *
+ * uses the internal prior as P(m)
+ * */
+ private double[] getShiftedLogPosterior() {
+ final double[] ll = this.getLogLikelihoods();
+ final double[] shiftedLogPosterior = new double [Genotype.values().length];
+ final double[] haplotypeFrequencies = getPriorProbablities();
+ for (final Genotype g : Genotype.values()){
+ shiftedLogPosterior[g.v] = ll[g.v] + log10(haplotypeFrequencies[g.v]);
+ }
+ return shiftedLogPosterior;
+ }
+
+ /**
+ * Converts the loglikelihoods into linear-space.
+ */
+ @Override
+ public double[] getLikelihoods() {
+ return MathUtil.pNormalizeLogProbability(getLogLikelihoods());
+ }
+
+ /**
+ * Since this class uses loglikelihoods natively, we override and return the native variable
+ */
+ @Override
+ public double[] getLogLikelihoods() {
+ return this.loglikelihoods;
+ }
+
+ public void setLogLikelihoods(final double[] ll) {
+ assert (ll.length == Genotype.values().length);
+
+ System.arraycopy(ll, 0, loglikelihoods, 0, ll.length);
+
+ }
+ /**
+ * Overridden to calculate the LOD from the loglikelihoods instead of the probabilities
+ * because it will allow for more accurate calculation before overflowing.
+ */
+ @Override
+ public double getLodMostProbableGenotype() {
+ final double[] logs = getShiftedLogPosterior();
+ Arrays.sort(logs);
+ return logs[Genotype.values().length-1] - logs[Genotype.values().length-2];
+ }
+
+}
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumor.java b/src/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumor.java
new file mode 100644
index 0000000..2c6f90a
--- /dev/null
+++ b/src/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumor.java
@@ -0,0 +1,101 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+/**
+ * A wrapper class for any HaplotypeProbabilities instance that will assume that the given evidence is that of a tumor sample and
+ * provide an hp for the normal sample that tumor came from. This models possible loss of hetrozygosity where het genotypes
+ * turn into a homozygous genotype with probability pLoH.
+ *
+ * The shortcoming of this model is that we assume that the events are all independent, but this way they are allowed.
+ *
+ * @author farjoun
+ */
+
+public class HaplotypeProbabilityOfNormalGivenTumor extends HaplotypeProbabilities {
+
+ private final double[][] transitionMatrix;
+ private final HaplotypeProbabilities hpOfTumor;
+
+ public HaplotypeProbabilityOfNormalGivenTumor(final HaplotypeProbabilities hpOfTumor, final double pLoH) {
+ super(hpOfTumor.getHaplotype());
+
+ this.hpOfTumor = hpOfTumor;
+ transitionMatrix = new double[][]{
+ //This is P(g_t|g_n)
+ //tumor genotype are the columns.
+ {1, 0, 0}, //normal is hom_ref => tumor must be the same
+ {pLoH / 2, 1 - pLoH, pLoH / 2}, //normal is het => tumor might transit
+ {0, 0, 1}}; //normal is hom_var => tumor must be the same
+ }
+
+ // This function needs to be overridden since we want likelihood to mean the probability of the
+ // data given a particular _normal_ genotype, however, the likelihood as given is that where the
+ // genotype is of the tumor (if that's what the data was measuring)
+
+ // P(D_t|g_n) = \sum_{g_n} P(D_t|g_t,g_n) = \sum P(D_t|g_t) P(g_t|g_n) = hpOfTumor.getLikelihoods() * transitionMatrix
+
+ @Override
+ public double[] getLikelihoods() {
+ final double[] asTumorLikelihoods = new double[3];
+ final double[] asNormalLikelihoods = hpOfTumor.getLikelihoods();
+ for (final Genotype g_n : Genotype.values()) {
+ for (final Genotype g_t : Genotype.values()) {
+ asTumorLikelihoods[g_t.v] += asNormalLikelihoods[g_n.v] * transitionMatrix[g_n.v][g_t.v];
+ }
+ }
+ return asTumorLikelihoods;
+ }
+
+ @Override
+ public Snp getRepresentativeSnp() {
+ return hpOfTumor.getRepresentativeSnp();
+ }
+
+ @Override
+ public void merge(final HaplotypeProbabilities ignored) {
+ throw new IllegalArgumentException("Cannot merge HaplotypeProbabilityOfNormalGivenTumor. Merge the underlying object and create a new wrapper.");
+ }
+
+ @Override
+ public int getObsAllele1() {
+ return hpOfTumor.getObsAllele1();
+ }
+
+ @Override
+ public int getObsAllele2() {
+ return hpOfTumor.getObsAllele2();
+ }
+
+ @Override
+ public int getTotalObs() {
+ return hpOfTumor.getTotalObs();
+ }
+
+ @Override
+ public boolean hasEvidence() {
+ return hpOfTumor.hasEvidence();
+ }
+}
diff --git a/src/java/picard/fingerprint/LocusResult.java b/src/java/picard/fingerprint/LocusResult.java
new file mode 100644
index 0000000..6ddc969
--- /dev/null
+++ b/src/java/picard/fingerprint/LocusResult.java
@@ -0,0 +1,77 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+/**
+ * Represents the results of comparing evidence for a single haplotype locus between
+ * two sources of evidence (typically external genotyping data vs. sequencing data.).
+ *
+ * @author Tim Fennell
+ */
+class LocusResult implements Comparable<LocusResult> {
+ private final Snp snp;
+ private final DiploidGenotype expectedGenotype;
+ private final DiploidGenotype mostLikelyGenotype;
+ private final int allele1Count;
+ private final int allele2Count;
+ private final double lodGenotype;
+ private final double lodExpectedSampleTumorNormal; //LOD assuming that the first sample is from a tumor and the second is from the normal
+ private final double lodExpectedSampleNormalTumor; //LOD assuming that the first sample is from the normal and the second is from a tumor
+
+ private final double lExpectedSample; // log probability of expected sample
+ private final double lRandomSample; // log probability of random sample
+
+ LocusResult(final Snp snp, final DiploidGenotype expectedGenotype, final DiploidGenotype mostLikelyGenotype,
+ final int allele1Count, final int allele2Count, final double lodGenotype,
+ final double lExpectedSample, final double lRandomSample,
+ final double lodGenotypeTumorNormal, final double lodGenotypeNormalTumor) {
+ this.snp = snp;
+ this.expectedGenotype = expectedGenotype;
+ this.mostLikelyGenotype = mostLikelyGenotype;
+ this.allele1Count = allele1Count;
+ this.allele2Count = allele2Count;
+ this.lodGenotype = lodGenotype;
+ this.lExpectedSample = lExpectedSample;
+ this.lRandomSample = lRandomSample;
+ this.lodExpectedSampleTumorNormal = lodGenotypeTumorNormal;
+ this.lodExpectedSampleNormalTumor = lodGenotypeNormalTumor;
+ }
+
+ public Snp getSnp() { return snp; }
+ public DiploidGenotype getExpectedGenotype() { return expectedGenotype; }
+ public DiploidGenotype getMostLikelyGenotype() { return mostLikelyGenotype; }
+ public int getAllele1Count() { return allele1Count; }
+ public int getAllele2Count() { return allele2Count; }
+ public double getLodGenotype() { return lodGenotype; }
+ public double getLodExpectedSampleNormalTumor() { return lodExpectedSampleNormalTumor; }
+ public double getLodExpectedSampleTumorNormal() { return lodExpectedSampleTumorNormal; }
+ public double lExpectedSample() { return lExpectedSample; }
+ public double lRandomSample() { return lRandomSample; }
+
+ @Override
+ public int compareTo(final LocusResult that) {
+ return this.snp.compareTo(that.snp);
+ }
+}
diff --git a/src/java/picard/fingerprint/MatchResults.java b/src/java/picard/fingerprint/MatchResults.java
new file mode 100644
index 0000000..2f4cfd0
--- /dev/null
+++ b/src/java/picard/fingerprint/MatchResults.java
@@ -0,0 +1,96 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import java.io.File;
+import java.util.Collection;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+/**
+ * Represents the results of a fingerprint comparison between one dataset and a specific
+ * fingerprint file. Implements Comparable so that better matches (higher positive LODs)
+ * are sorted earlier.
+ *
+ * @author Tim Fennell
+ */
+public class MatchResults implements Comparable<MatchResults> {
+ private final File fingerprintFile;
+ private final String sample;
+ private final double sampleLikelihood;
+ private final double populationLikelihood;
+ private final double LOD;
+ //the lod score when assuming the left sample is tumor and the right is normal
+ private final double lodTN;
+ //the lod score when assuming the left sample is tumor and the right is normal
+ private final double lodNT;
+
+ public double getLodNT() {
+ return lodNT;
+ }
+
+ public double getLodTN() {
+ return lodTN;
+ }
+
+ private final SortedSet<LocusResult> locusResults = new TreeSet<LocusResult>();
+
+ MatchResults(final File fingerprintFile, final String sample,
+ final double sampleLikelihood, final double populationLikelihood, final double lodTN, final double lodNT,
+ final Collection<LocusResult> locusResults) {
+ this.fingerprintFile = fingerprintFile;
+ this.sample = sample;
+ this.sampleLikelihood = sampleLikelihood;
+ this.populationLikelihood = populationLikelihood;
+ this.LOD = sampleLikelihood - populationLikelihood;
+ this.lodTN = lodTN;
+ this.lodNT = lodNT;
+
+ if (locusResults != null) {
+ this.locusResults.addAll(locusResults);
+ }
+ }
+
+ public void addLocusResult(final LocusResult result) {
+ this.locusResults.add(result);
+ }
+
+ /** Provides a natural sort so that better matches (by LOD) sort earlier. */
+ @Override public int compareTo(final MatchResults that) {
+ if (this.LOD != that.LOD) {
+ return this.LOD > that.LOD ? -1 : 1;
+ }
+ else {
+ return this.sample.compareTo(that.sample);
+ }
+ }
+
+ public String getSample() { return sample; }
+ public double getSampleLikelihood() { return sampleLikelihood; }
+ public double getPopulationLikelihood() { return populationLikelihood; }
+ public double getLOD() { return LOD; }
+ public SortedSet<LocusResult> getLocusResults() { return locusResults; }
+ public File getFingerprintFile() { return this.fingerprintFile; }
+}
diff --git a/src/java/picard/fingerprint/Snp.java b/src/java/picard/fingerprint/Snp.java
new file mode 100644
index 0000000..367cac7
--- /dev/null
+++ b/src/java/picard/fingerprint/Snp.java
@@ -0,0 +1,120 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.fingerprint;
+
+import htsjdk.samtools.util.StringUtil;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Class to represent a SNP in context of a haplotype block that is used in fingerprinting.
+ *
+ * @author Tim Fennell
+ */
+public class Snp implements Comparable<Snp> {
+ private final String name;
+ private final String chrom;
+ private final int pos;
+ private final byte allele1;
+ private final byte allele2;
+ private final double maf; // technically the allele frequency of allele2
+ private final List<String> fingerprintPanels;
+
+ private final DiploidGenotype[] genotypes = new DiploidGenotype[3];
+
+ public Snp(final String name, final String chrom, final int pos, final byte allele1, final byte allele2,
+ final double maf, final List<String> fingerprintPanels) {
+ this.name = name;
+ this.chrom = chrom;
+ this.pos = pos;
+ this.allele1 = StringUtil.toUpperCase(allele1);
+ this.allele2 = StringUtil.toUpperCase(allele2);
+ this.maf = maf;
+ this.fingerprintPanels = fingerprintPanels == null ? new ArrayList<String>() : fingerprintPanels;
+
+ // Construct the genotypes for ease of comparison
+ this.genotypes[0] = DiploidGenotype.fromBases(allele1, allele1);
+ this.genotypes[1] = DiploidGenotype.fromBases(allele1, allele2);
+ this.genotypes[2] = DiploidGenotype.fromBases(allele2, allele2);
+ }
+
+ /** Returns a new SNP object with the alleles swapped and MAF corrected. */
+ public Snp flip() {
+ return new Snp(name, chrom, pos, allele2, allele1, 1-maf, fingerprintPanels);
+ }
+
+ public String getName() { return name; }
+ public String getChrom() { return chrom; }
+ public int getPos() { return pos; }
+ public byte getAllele1() { return allele1; }
+ public byte getAllele2() { return allele2; }
+ public double getMaf() { return maf; }
+ public List<String> getFingerprintPanels() { return this.fingerprintPanels; }
+
+ public DiploidGenotype getHomozygousAllele1Genotype() { return this.genotypes[0]; }
+ public DiploidGenotype getHeterogyzousGenotype() { return this.genotypes[1]; }
+ public DiploidGenotype getHomozygousAllele2Genotype() { return this.genotypes[2]; }
+
+ /** Gets the genotype with the given index. */
+ DiploidGenotype getGenotype(final DiploidHaplotype haplotype) { return this.genotypes[haplotype.ordinal()]; }
+
+ /** Gets the index of the supplied genotype within the genotypes for this SNP. */
+ int indexOf(final DiploidGenotype gt) {
+ for (int i=0; i<this.genotypes.length; ++i) {
+ if (gt == this.genotypes[i]) return i;
+ }
+
+ throw new IllegalArgumentException("Genotype " + gt + " is not valid for this SNP.");
+ }
+
+ public String getAlleleString() {
+ return StringUtil.bytesToString(new byte[] {allele1, StringUtil.toLowerCase(allele2)});
+ }
+
+ @Override
+ public int compareTo(final Snp that) {
+ int retval = this.chrom.compareTo(that.chrom);
+ if (retval == 0) retval = this.pos - that.pos;
+ return retval;
+ }
+
+ @Override
+ public boolean equals(final Object o) {
+ return (this == o) || ((o instanceof Snp) && compareTo((Snp) o) == 0);
+ }
+
+ @Override
+ public int hashCode() {
+ int result = chrom.hashCode();
+ result = 31 * result + pos;
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return this.chrom + ":" + this.pos;
+ }
+}
diff --git a/src/java/picard/illumina/ClusterDataToSamConverter.java b/src/java/picard/illumina/ClusterDataToSamConverter.java
index ad4716f..08f3f7a 100644
--- a/src/java/picard/illumina/ClusterDataToSamConverter.java
+++ b/src/java/picard/illumina/ClusterDataToSamConverter.java
@@ -26,6 +26,7 @@ package picard.illumina;
import htsjdk.samtools.ReservedTagConstants;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMTag;
+import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.filter.SamRecordFilter;
import htsjdk.samtools.filter.SolexaNoiseFilter;
import picard.fastq.IlluminaReadNameEncoder;
@@ -54,13 +55,16 @@ public class ClusterDataToSamConverter implements
private final String readGroupId;
private final SamRecordFilter filters = new SolexaNoiseFilter();
private final boolean isPairedEnd;
- private final boolean isBarcoded;
+ private final boolean hasSampleBarcode;
+ private final boolean hasMolecularBarcode;
private final int [] templateIndices;
- private final int [] barcodeIndices;
+ private final int [] sampleBarcodeIndices;
+ private final int [] molecularBarcodeIndices;
+
private final AdapterMarker adapterMarker;
private final int outputRecordsPerCluster;
- private final ReadNameEncoder readNameEncoder;
-
+ private final ReadNameEncoder readNameEncoder;
+
/**
* Constructor
*
@@ -80,7 +84,8 @@ public class ClusterDataToSamConverter implements
this.readNameEncoder = new IlluminaReadNameEncoder(runBarcode);
this.isPairedEnd = readStructure.templates.length() == 2;
- this.isBarcoded = !readStructure.barcodes.isEmpty();
+ this.hasSampleBarcode = !readStructure.sampleBarcodes.isEmpty();
+ this.hasMolecularBarcode = !readStructure.molecularBarcode.isEmpty();
if (adapters.isEmpty()) {
this.adapterMarker = null;
@@ -89,7 +94,8 @@ public class ClusterDataToSamConverter implements
}
this.templateIndices = readStructure.templates.getIndices();
- this.barcodeIndices = readStructure.barcodes.getIndices();
+ this.sampleBarcodeIndices = readStructure.sampleBarcodes.getIndices();
+ this.molecularBarcodeIndices = readStructure.molecularBarcode.getIndices();
this.outputRecordsPerCluster = readStructure.templates.length();
}
@@ -97,7 +103,8 @@ public class ClusterDataToSamConverter implements
/**
* Creates a new SAM record from the basecall data
*/
- private SAMRecord createSamRecord(final ReadData readData, final String readName, final boolean isPf, final boolean firstOfPair, final String unmatchedBarcode) {
+ private SAMRecord createSamRecord(final ReadData readData, final String readName, final boolean isPf, final boolean firstOfPair,
+ final String unmatchedBarcode, final String molecularIndex, final String molecularIndexQ) {
final SAMRecord sam = new SAMRecord(null);
sam.setReadName(readName);
sam.setReadBases(readData.getBases());
@@ -127,6 +134,14 @@ public class ClusterDataToSamConverter implements
sam.setAttribute(SAMTag.BC.name(), unmatchedBarcode);
}
+ if (molecularIndex != null) {
+ //TODO: add RX and QX to the list of SAMTags and change this. initial discussion
+ //TODO: here: https://github.com/broadinstitute/picard/issues/287
+ //TODO: HTS-spec issue: https://github.com/samtools/hts-specs/issues/109
+ sam.setAttribute("RX", molecularIndex);
+ sam.setAttribute("QX", molecularIndexQ);
+ }
+
return sam;
}
@@ -140,23 +155,41 @@ public class ClusterDataToSamConverter implements
// Get and transform the unmatched barcode, if any, to store with the reads
String unmatchedBarcode = null;
- if (isBarcoded && cluster.getMatchedBarcode() == null) {
- final byte barcode[][] = new byte[barcodeIndices.length][];
- for (int i = 0; i < barcodeIndices.length; i++) {
- barcode[i] = cluster.getRead(barcodeIndices[i]).getBases();
+ if (hasSampleBarcode && cluster.getMatchedBarcode() == null) {
+ final byte barcode[][] = new byte[sampleBarcodeIndices.length][];
+ for (int i = 0; i < sampleBarcodeIndices.length; i++) {
+ barcode[i] = cluster.getRead(sampleBarcodeIndices[i]).getBases();
}
unmatchedBarcode = IlluminaUtil.barcodeSeqsToString(barcode).replace('.', 'N'); //TODO: This has a separator, where as in other places we do not use a separator
}
+ final String joinedMolecularIndex ;
+ final String joinedMolecularIndexQ ;
+ if (hasMolecularBarcode) {
+ final StringBuilder joinedMolecularIndexQBuilder = new StringBuilder();
+ final byte molecularIndex[][] = new byte[molecularBarcodeIndices.length][];
+ final byte molecularIndexQ[][] = new byte[molecularBarcodeIndices.length][];
+ for (int i = 0; i < molecularBarcodeIndices.length; i++) {
+ molecularIndex[i] = cluster.getRead(molecularBarcodeIndices[i]).getBases();
+ molecularIndexQ[i] = cluster.getRead(molecularBarcodeIndices[i]).getQualities();
+ joinedMolecularIndexQBuilder.append(SAMUtils.phredToFastq(molecularIndexQ[i]));
+ }
+ joinedMolecularIndex = IlluminaUtil.byteArrayToString(molecularIndex, "").replace('.', 'N');
+ joinedMolecularIndexQ = joinedMolecularIndexQBuilder.toString();
+ } else {
+ joinedMolecularIndex = null;
+ joinedMolecularIndexQ = null;
+ }
+
final SAMRecord firstOfPair = createSamRecord(
- cluster.getRead(templateIndices[0]), readName, cluster.isPf(), true,unmatchedBarcode);
+ cluster.getRead(templateIndices[0]), readName, cluster.isPf(), true, unmatchedBarcode, joinedMolecularIndex, joinedMolecularIndexQ);
ret.records[0] = firstOfPair;
SAMRecord secondOfPair = null;
if(isPairedEnd) {
secondOfPair = createSamRecord(
- cluster.getRead(templateIndices[1]), readName, cluster.isPf(), false, unmatchedBarcode);
+ cluster.getRead(templateIndices[1]), readName, cluster.isPf(), false, unmatchedBarcode, joinedMolecularIndex, joinedMolecularIndexQ);
ret.records[1] = secondOfPair;
}
diff --git a/src/java/picard/illumina/CollectIlluminaBasecallingMetrics.java b/src/java/picard/illumina/CollectIlluminaBasecallingMetrics.java
index f15e4de..51df55e 100644
--- a/src/java/picard/illumina/CollectIlluminaBasecallingMetrics.java
+++ b/src/java/picard/illumina/CollectIlluminaBasecallingMetrics.java
@@ -111,7 +111,7 @@ public class CollectIlluminaBasecallingMetrics extends CommandLineProgram {
for (final TabbedTextFileWithHeaderParser.Row row : barcodesParser) {
final String barcodeName = row.getField(BARCODE_NAME_COLUMN);
final StringBuilder barcode = new StringBuilder();
- for (int i = 1; i <= readStructure.barcodes.length(); i++) {
+ for (int i = 1; i <= readStructure.sampleBarcodes.length(); i++) {
barcode.append(row.getField(BARCODE_SEQUENCE_COLUMN_NAME_STUB + i));
if (barcodeLength == 0) barcodeLength = barcode.length();
}
diff --git a/src/java/picard/illumina/CollectIlluminaLaneMetrics.java b/src/java/picard/illumina/CollectIlluminaLaneMetrics.java
index a73a157..99e0ef8 100644
--- a/src/java/picard/illumina/CollectIlluminaLaneMetrics.java
+++ b/src/java/picard/illumina/CollectIlluminaLaneMetrics.java
@@ -24,24 +24,24 @@
package picard.illumina;
+import htsjdk.samtools.metrics.MetricBase;
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.Log;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
-import picard.cmdline.programgroups.Illumina;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
+import picard.cmdline.programgroups.Illumina;
import picard.illumina.parser.ReadStructure;
import picard.illumina.parser.Tile;
import picard.illumina.parser.TileMetricsUtil;
-import htsjdk.samtools.metrics.MetricBase;
-import htsjdk.samtools.metrics.MetricsFile;
-import htsjdk.samtools.util.CollectionUtil;
-import htsjdk.samtools.util.Log;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.Collection;
import java.util.Map;
+import java.util.stream.Collectors;
/**
* Command-line wrapper around {@link IlluminaLaneMetricsCollector}.
@@ -55,7 +55,7 @@ import java.util.Map;
)
public class CollectIlluminaLaneMetrics extends CommandLineProgram {
static final String USAGE = "Collects Illumina lane metrics for the given basecalling analysis directory";
-
+
@Option(doc = "The Illumina run directory of the run for which the lane metrics are to be generated")
public File RUN_DIRECTORY;
@@ -65,7 +65,7 @@ public class CollectIlluminaLaneMetrics extends CommandLineProgram {
@Option(doc = "The prefix to be prepended to the file name of the output file; an appropriate suffix will be applied", shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME)
public String OUTPUT_PREFIX;
- @Option(doc= ReadStructure.PARAMETER_DOC, shortName="RS")
+ @Option(doc = ReadStructure.PARAMETER_DOC, shortName = "RS")
public ReadStructure READ_STRUCTURE;
@Override
@@ -84,25 +84,19 @@ public class CollectIlluminaLaneMetrics extends CommandLineProgram {
* Utility for collating Tile records from the Illumina TileMetrics file into lane-level and phasing-level metrics.
*/
public static class IlluminaLaneMetricsCollector {
-
+
private final static Log LOG = Log.getInstance(IlluminaLaneMetricsCollector.class);
-
+
/** Returns a partitioned collection of lane number to Tile objects from the provided basecall directory. */
- public static Map<Integer, Collection<Tile>> readLaneTiles(final File illuminaRunDirectory, final ReadStructure readStructure) {
+ public static Map<Integer, ? extends Collection<Tile>> readLaneTiles(final File illuminaRunDirectory, final ReadStructure readStructure) {
final Collection<Tile> tiles;
try {
tiles = TileMetricsUtil.parseTileMetrics(TileMetricsUtil.renderTileMetricsFileFromBasecallingDirectory(illuminaRunDirectory), readStructure);
} catch (final FileNotFoundException e) {
throw new PicardException("Unable to open laneMetrics file.", e);
}
-
- return CollectionUtil.partition(tiles,
- new CollectionUtil.Partitioner<Tile, Integer>() {
- @Override
- public Integer getPartition(final Tile tile) {
- return tile.getLaneNumber();
- }
- });
+
+ return tiles.stream().collect(Collectors.groupingBy(Tile::getLaneNumber));
}
/** Parses the tile data from the basecall directory and writes to both the lane and phasing metrics files */
@@ -110,32 +104,27 @@ public class CollectIlluminaLaneMetrics extends CommandLineProgram {
final MetricsFile<MetricBase, Comparable<?>> laneMetricsFile,
final MetricsFile<MetricBase, Comparable<?>> phasingMetricsFile,
final ReadStructure readStructure) {
- final Map<Integer, Collection<Tile>> laneTiles = readLaneTiles(runDirectory, readStructure);
+ final Map<Integer, ? extends Collection<Tile>> laneTiles = readLaneTiles(runDirectory, readStructure);
writeLaneMetrics(laneTiles, outputDirectory, outputPrefix, laneMetricsFile);
writePhasingMetrics(laneTiles, outputDirectory, outputPrefix, phasingMetricsFile);
}
- public static File writePhasingMetrics(final Map<Integer, Collection<Tile>> laneTiles, final File outputDirectory,
+ public static File writePhasingMetrics(final Map<Integer, ? extends Collection<Tile>> laneTiles, final File outputDirectory,
final String outputPrefix, final MetricsFile<MetricBase, Comparable<?>> phasingMetricsFile) {
- for (final Map.Entry<Integer, Collection<Tile>> entry : laneTiles.entrySet()) {
- for (final IlluminaPhasingMetrics phasingMetric : IlluminaPhasingMetrics.getPhasingMetricsForTiles(entry.getKey()
- .longValue(),
- entry.getValue())) {
- phasingMetricsFile.addMetric(phasingMetric);
- }
- }
+ laneTiles.entrySet().stream().forEach(entry -> IlluminaPhasingMetrics.getPhasingMetricsForTiles(entry.getKey().longValue(),
+ entry.getValue()).forEach(phasingMetricsFile::addMetric));
return writeMetrics(phasingMetricsFile, outputDirectory, outputPrefix, IlluminaPhasingMetrics.getExtension());
}
- public static File writeLaneMetrics(final Map<Integer, Collection<Tile>> laneTiles, final File outputDirectory,
+ public static File writeLaneMetrics(final Map<Integer, ? extends Collection<Tile>> laneTiles, final File outputDirectory,
final String outputPrefix, final MetricsFile<MetricBase, Comparable<?>> laneMetricsFile) {
- for (final Map.Entry<Integer, Collection<Tile>> entry : laneTiles.entrySet()) {
+ laneTiles.entrySet().stream().forEach(entry -> {
final IlluminaLaneMetrics laneMetric = new IlluminaLaneMetrics();
laneMetric.LANE = entry.getKey().longValue();
laneMetric.CLUSTER_DENSITY = calculateLaneDensityFromTiles(entry.getValue());
laneMetricsFile.addMetric(laneMetric);
- }
+ });
return writeMetrics(laneMetricsFile, outputDirectory, outputPrefix, IlluminaLaneMetrics.getExtension());
}
@@ -155,7 +144,7 @@ public class CollectIlluminaLaneMetrics extends CommandLineProgram {
area += (tile.getClusterCount() / tile.getClusterDensity());
clusters += tile.getClusterCount();
}
- return clusters/area;
+ return clusters / area;
}
}
}
diff --git a/src/java/picard/illumina/ExtractIlluminaBarcodes.java b/src/java/picard/illumina/ExtractIlluminaBarcodes.java
index 4d2f590..4027378 100644
--- a/src/java/picard/illumina/ExtractIlluminaBarcodes.java
+++ b/src/java/picard/illumina/ExtractIlluminaBarcodes.java
@@ -73,14 +73,15 @@ import java.util.concurrent.TimeUnit;
* @author jburke at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = "Determine the barcode for each read in an Illumina lane.\n" +
+ usage = "Determine the sample barcode for each read in an Illumina lane.\n" +
"For each tile, a file is written to the basecalls directory of the form s_<lane>_<tile>_barcode.txt. " +
"An output file contains a line for each read in the tile, aligned with the regular basecall output. \n" +
"The output file contains the following tab-separated columns: \n" +
" * read subsequence at barcode position\n" +
" * Y or N indicating if there was a barcode match\n" +
" * matched barcode sequence\n" +
- "Note that the order of specification of barcodes can cause arbitrary differences in output for poorly matching barcodes.\n\n",
+ "Note 1: that the order of specification of barcodes can cause arbitrary differences in output for poorly matching barcodes.\n" +
+ "Note 2: molecular barcodes (M in the read structure) are not the barcode being extracted here and will be ignored here.\n\n",
usageShort = "Tool to determine the barcode for each read in an Illumina lane",
programGroup = Illumina.class
)
@@ -163,7 +164,7 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
IOUtil.assertDirectoryIsWritable(OUTPUT_DIR);
// Create BarcodeMetric for counting reads that don't match any barcode
- final String[] noMatchBarcode = new String[readStructure.barcodes.length()];
+ final String[] noMatchBarcode = new String[readStructure.sampleBarcodes.length()];
int index = 0;
for (final ReadDescriptor d : readStructure.descriptors) {
if (d.type == ReadType.Barcode) {
@@ -234,6 +235,26 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
}
// Finish metrics tallying.
+ finalizeMetrics(barcodeToMetrics, noMatchMetric);
+
+ // Warn about minimum qualities and assert that we've achieved the minimum.
+ for (Map.Entry<Byte, Integer> entry : bclQualityEvaluationStrategy.getPoorQualityFrequencies().entrySet()) {
+ LOG.warn(String.format("Observed low quality of %s %s times.", entry.getKey(), entry.getValue()));
+ }
+ bclQualityEvaluationStrategy.assertMinimumQualities();
+
+ final MetricsFile<BarcodeMetric, Integer> metrics = getMetricsFile();
+ for (final BarcodeMetric barcodeMetric : barcodeToMetrics.values()) {
+ metrics.addMetric(barcodeMetric);
+ }
+ metrics.addMetric(noMatchMetric);
+ metrics.write(METRICS_FILE);
+ return 0;
+ }
+
+ public static void finalizeMetrics(final Map<String, BarcodeMetric> barcodeToMetrics,
+ final BarcodeMetric noMatchMetric) {
+ // Finish metrics tallying.
int totalReads = noMatchMetric.READS;
int totalPfReads = noMatchMetric.PF_READS;
int totalPfReadsAssigned = 0;
@@ -281,12 +302,6 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
}
}
- // Warn about minimum qualities and assert that we've achieved the minimum.
- for (Map.Entry<Byte, Integer> entry : bclQualityEvaluationStrategy.getPoorQualityFrequencies().entrySet()) {
- LOG.warn(String.format("Observed low quality of %s %s times.", entry.getKey(), entry.getValue()));
- }
- bclQualityEvaluationStrategy.assertMinimumQualities();
-
// Calculate the normalized matches
if (totalPfReadsAssigned > 0) {
final double mean = (double) totalPfReadsAssigned / (double) barcodeToMetrics.values().size();
@@ -294,14 +309,6 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
m.PF_NORMALIZED_MATCHES = m.PF_READS / mean;
}
}
-
- final MetricsFile<BarcodeMetric, Integer> metrics = getMetricsFile();
- for (final BarcodeMetric barcodeMetric : barcodeToMetrics.values()) {
- metrics.addMetric(barcodeMetric);
- }
- metrics.addMetric(noMatchMetric);
- metrics.write(METRICS_FILE);
- return 0;
}
/** Create a barcode filename corresponding to the given tile qseq file. */
@@ -323,11 +330,11 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
this.bclQualityEvaluationStrategy = new BclQualityEvaluationStrategy(MINIMUM_QUALITY);
/**
- * In extract illumina barcodes we NEVER want to look at the template reads, therefore replace them with skips because
- * IlluminaDataProvider and its factory will not open these nor produce ClusterData with the template reads in them, thus reducing
- * the file IO and value copying done by the data provider
+ * In extract illumina barcodes we NEVER want to look at the template reads nor the molecular barcodes, therefore replace them with
+ * skips because IlluminaDataProvider and its factory will neither open these nor produce ClusterData with the template reads in them,
+ * thus reducing the file IO and value copying done by the data provider
*/
- readStructure = new ReadStructure(READ_STRUCTURE.replaceAll("T", "S"));
+ readStructure = new ReadStructure(READ_STRUCTURE.replaceAll("T|M", "S"));
final IlluminaDataType[] datatypes = (MINIMUM_BASE_QUALITY > 0) ?
new IlluminaDataType[]{IlluminaDataType.BaseCalls, IlluminaDataType.PF, IlluminaDataType.QualityScores} :
new IlluminaDataType[]{IlluminaDataType.BaseCalls, IlluminaDataType.PF};
@@ -376,7 +383,7 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
}
final boolean hasBarcodeName = barcodesParser.hasColumn(BARCODE_NAME_COLUMN);
final boolean hasLibraryName = barcodesParser.hasColumn(LIBRARY_NAME_COLUMN);
- final int numBarcodes = readStructure.barcodes.length();
+ final int numBarcodes = readStructure.sampleBarcodes.length();
final Set<String> barcodes = new HashSet<String>();
for (final TabbedTextFileWithHeaderParser.Row row : barcodesParser) {
final String bcStrings[] = new String[numBarcodes];
@@ -575,7 +582,7 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
//Most likely we have SKIPS in our read structure since we replace all template reads with skips in the input data structure
//(see customCommnandLineValidation), therefore we must use the outputReadStructure to index into the output cluster data
- final int[] barcodeIndices = outputReadStructure.barcodes.getIndices();
+ final int[] barcodeIndices = outputReadStructure.sampleBarcodes.getIndices();
final BufferedWriter writer = IOUtil.openFileForBufferedWriting(barcodeFile);
final byte barcodeSubsequences[][] = new byte[barcodeIndices.length][];
final byte qualityScores[][] = usingQualityScores ? new byte[barcodeIndices.length][] : null;
diff --git a/src/java/picard/illumina/IlluminaBasecallsConverter.java b/src/java/picard/illumina/IlluminaBasecallsConverter.java
index d56f0fe..cdf3d19 100644
--- a/src/java/picard/illumina/IlluminaBasecallsConverter.java
+++ b/src/java/picard/illumina/IlluminaBasecallsConverter.java
@@ -453,8 +453,8 @@ public class IlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> {
private synchronized SortingCollection<CLUSTER_OUTPUT_RECORD> newSortingCollection() {
final int maxRecordsInRam =
- maxReadsInRamPerTile /
- barcodeRecordWriterMap.size();
+ Math.max(1, maxReadsInRamPerTile /
+ barcodeRecordWriterMap.size());
return SortingCollection.newInstance(
outputRecordClass,
codecPrototype.clone(),
@@ -894,7 +894,7 @@ public class IlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> {
*/
private static IlluminaDataType[] getDataTypesFromReadStructure(final ReadStructure readStructure,
final boolean demultiplex) {
- if (readStructure.barcodes.isEmpty() || !demultiplex) {
+ if (readStructure.sampleBarcodes.isEmpty() || !demultiplex) {
return DATA_TYPES_NO_BARCODE;
} else {
return DATA_TYPES_WITH_BARCODE;
diff --git a/src/java/picard/illumina/IlluminaBasecallsToFastq.java b/src/java/picard/illumina/IlluminaBasecallsToFastq.java
index 50a641e..d3c3b4d 100644
--- a/src/java/picard/illumina/IlluminaBasecallsToFastq.java
+++ b/src/java/picard/illumina/IlluminaBasecallsToFastq.java
@@ -115,7 +115,7 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
public File MULTIPLEX_PARAMS;
@Option(doc = "Which adapters to look for in the read.")
- public List<IlluminaUtil.IlluminaAdapterPair> ADAPTERS_TO_CHECK = new ArrayList<IlluminaUtil.IlluminaAdapterPair>(
+ public List<IlluminaUtil.IlluminaAdapterPair> ADAPTERS_TO_CHECK = new ArrayList<>(
Arrays.asList(IlluminaUtil.IlluminaAdapterPair.INDEXED,
IlluminaUtil.IlluminaAdapterPair.DUAL_INDEXED,
IlluminaUtil.IlluminaAdapterPair.NEXTERA_V2,
@@ -169,19 +169,14 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
CASAVA_1_8, ILLUMINA
}
- private final Map<String, FastqRecordsWriter> barcodeFastqWriterMap = new HashMap<String, FastqRecordsWriter>();
+ private final Map<String, FastqRecordsWriter> sampleBarcodeFastqWriterMap = new HashMap<>();
private ReadStructure readStructure;
IlluminaBasecallsConverter<FastqRecordsForCluster> basecallsConverter;
private static final Log log = Log.getInstance(IlluminaBasecallsToFastq.class);
private final FastqWriterFactory fastqWriterFactory = new FastqWriterFactory();
private ReadNameEncoder readNameEncoder;
- private static final Comparator<FastqRecordsForCluster> queryNameComparator = new Comparator<FastqRecordsForCluster>() {
- @Override
- public int compare(final FastqRecordsForCluster r1, final FastqRecordsForCluster r2) {
- return SAMRecordQueryNameComparator.compareReadNames(r1.templateRecords[0].getReadHeader(),
- r2.templateRecords[0].getReadHeader());
- }
- };
+ private static final Comparator<FastqRecordsForCluster> queryNameComparator = (r1, r2) -> SAMRecordQueryNameComparator.compareReadNames(r1.templateRecords[0].getReadHeader(),
+ r2.templateRecords[0].getReadHeader());
@Override
protected int doWork() {
@@ -231,26 +226,25 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
}
final boolean demultiplex;
if (OUTPUT_PREFIX != null) {
- barcodeFastqWriterMap.put(null, buildWriter(OUTPUT_PREFIX));
+ sampleBarcodeFastqWriterMap.put(null, buildWriter(OUTPUT_PREFIX));
demultiplex = false;
} else {
populateWritersFromMultiplexParams();
demultiplex = true;
}
- final int readsPerCluster = readStructure.templates.length() + readStructure.barcodes.length();
- basecallsConverter = new IlluminaBasecallsConverter<FastqRecordsForCluster>(BASECALLS_DIR, BARCODES_DIR, LANE, readStructure,
- barcodeFastqWriterMap, demultiplex, MAX_READS_IN_RAM_PER_TILE/readsPerCluster, TMP_DIR, NUM_PROCESSORS,
+ final int readsPerCluster = readStructure.templates.length() + readStructure.sampleBarcodes.length();
+ basecallsConverter = new IlluminaBasecallsConverter<>(BASECALLS_DIR, BARCODES_DIR, LANE, readStructure,
+ sampleBarcodeFastqWriterMap, demultiplex, Math.max(1, MAX_READS_IN_RAM_PER_TILE / readsPerCluster), TMP_DIR, NUM_PROCESSORS,
FORCE_GC, FIRST_TILE, TILE_LIMIT, queryNameComparator,
new FastqRecordsForClusterCodec(readStructure.templates.length(),
- readStructure.barcodes.length()), FastqRecordsForCluster.class, bclQualityEvaluationStrategy,
+ readStructure.sampleBarcodes.length(), readStructure.molecularBarcode.length()), FastqRecordsForCluster.class, bclQualityEvaluationStrategy,
this.APPLY_EAMSS_FILTER, INCLUDE_NON_PF_READS, IGNORE_UNEXPECTED_BARCODES);
log.info("READ STRUCTURE IS " + readStructure.toString());
basecallsConverter.setConverter(
- new ClusterToFastqRecordsForClusterConverter(
- basecallsConverter.getFactory().getOutputReadStructure()));
-
+ new ClusterToFastqRecordsForClusterConverter(
+ basecallsConverter.getFactory().getOutputReadStructure()));
}
/**
@@ -260,7 +254,7 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
* @param expectedCols The columns that are REQUIRED
*/
private void assertExpectedColumns(final Set<String> actualCols, final Set<String> expectedCols) {
- final Set<String> missingColumns = new HashSet<String>(expectedCols);
+ final Set<String> missingColumns = new HashSet<>(expectedCols);
missingColumns.removeAll(actualCols);
if (missingColumns.size() > 0) {
@@ -272,41 +266,41 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
}
/**
- * For each line in the MULTIPLEX_PARAMS file create a FastqRecordsWriter and put it in the barcodeFastqWriterMap map,
- * where the key to the map is the concatenation of all barcodes in order for the given line.
+ * For each line in the MULTIPLEX_PARAMS file create a FastqRecordsWriter and put it in the sampleBarcodeFastqWriterMap map,
+ * where the key to the map is the concatenation of all sampleBarcodes in order for the given line.
*/
private void populateWritersFromMultiplexParams() {
final TabbedTextFileWithHeaderParser libraryParamsParser = new TabbedTextFileWithHeaderParser(MULTIPLEX_PARAMS);
final Set<String> expectedColumnLabels = CollectionUtil.makeSet("OUTPUT_PREFIX");
- final List<String> barcodeColumnLabels = new ArrayList<String>();
- for (int i = 1; i <= readStructure.barcodes.length(); i++) {
- barcodeColumnLabels.add("BARCODE_" + i);
+ final List<String> sampleBarcodeColumnLabels = new ArrayList<>();
+ for (int i = 1; i <= readStructure.sampleBarcodes.length(); i++) {
+ sampleBarcodeColumnLabels.add("BARCODE_" + i);
}
- expectedColumnLabels.addAll(barcodeColumnLabels);
+ expectedColumnLabels.addAll(sampleBarcodeColumnLabels);
assertExpectedColumns(libraryParamsParser.columnLabels(), expectedColumnLabels);
for (final TabbedTextFileWithHeaderParser.Row row : libraryParamsParser) {
- List<String> barcodeValues = null;
+ List<String> sampleBarcodeValues = null;
- if (barcodeColumnLabels.size() > 0) {
- barcodeValues = new ArrayList<String>();
- for (final String barcodeLabel : barcodeColumnLabels) {
- barcodeValues.add(row.getField(barcodeLabel));
+ if (sampleBarcodeColumnLabels.size() > 0) {
+ sampleBarcodeValues = new ArrayList<>();
+ for (final String sampleBarcodeLabel : sampleBarcodeColumnLabels) {
+ sampleBarcodeValues.add(row.getField(sampleBarcodeLabel));
}
}
- final String key = (barcodeValues == null || barcodeValues.contains("N")) ? null : StringUtil.join("", barcodeValues);
- if (barcodeFastqWriterMap.containsKey(key)) { //This will catch the case of having more than 1 line in a non-barcoded MULTIPLEX_PARAMS file
+ final String key = (sampleBarcodeValues == null || sampleBarcodeValues.contains("N")) ? null : StringUtil.join("", sampleBarcodeValues);
+ if (sampleBarcodeFastqWriterMap.containsKey(key)) { //This will catch the case of having more than 1 line in a non-barcoded MULTIPLEX_PARAMS file
throw new PicardException("Row for barcode " + key + " appears more than once in MULTIPLEX_PARAMS file " +
MULTIPLEX_PARAMS);
}
final FastqRecordsWriter writer = buildWriter(new File(row.getField("OUTPUT_PREFIX")));
- barcodeFastqWriterMap.put(key, writer);
+ sampleBarcodeFastqWriterMap.put(key, writer);
}
- if (barcodeFastqWriterMap.isEmpty()) {
+ if (sampleBarcodeFastqWriterMap.isEmpty()) {
throw new PicardException("MULTIPLEX_PARAMS file " + MULTIPLEX_PARAMS + " does have any data rows.");
}
libraryParamsParser.close();
@@ -322,16 +316,24 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
final String prefixString = outputPrefix.getName();
final String suffixString = COMPRESS_OUTPUTS ? "fastq.gz" : "fastq";
final FastqWriter[] templateWriters = new FastqWriter[readStructure.templates.length()];
- final FastqWriter[] barcodeWriters = new FastqWriter[readStructure.barcodes.length()];
+ final FastqWriter[] sampleBarcodeWriters = new FastqWriter[readStructure.sampleBarcodes.length()];
+ final FastqWriter[] molecularBarcodeWriters = new FastqWriter[readStructure.molecularBarcode.length()];
+
for (int i = 0; i < templateWriters.length; ++i) {
final String filename = String.format("%s.%d.%s", prefixString, i+1, suffixString);
templateWriters[i] = fastqWriterFactory.newWriter(new File(outputDir, filename));
}
- for (int i = 0; i < barcodeWriters.length; ++i) {
+
+ for (int i = 0; i < sampleBarcodeWriters.length; ++i) {
final String filename = String.format("%s.barcode_%d.%s", prefixString, i+1, suffixString);
- barcodeWriters[i] = fastqWriterFactory.newWriter(new File(outputDir, filename));
+ sampleBarcodeWriters[i] = fastqWriterFactory.newWriter(new File(outputDir, filename));
+ }
+
+ for (int i = 0; i < molecularBarcodeWriters.length; ++i) {
+ final String filename = String.format("%s.index_%d.%s", prefixString, i+1, suffixString);
+ molecularBarcodeWriters[i] = fastqWriterFactory.newWriter(new File(outputDir, filename));
}
- return new FastqRecordsWriter(templateWriters, barcodeWriters);
+ return new FastqRecordsWriter(templateWriters, sampleBarcodeWriters, molecularBarcodeWriters);
}
public static void main(final String[] args) {
@@ -339,25 +341,30 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
}
/**
- * Container for various FastqWriters, one for each template read and one for each barcode read.
+ * Container for various FastqWriters, one for each template read, one for each sample barcode read,
+ * and one for each molecular barcode read.
*/
private static class FastqRecordsWriter implements IlluminaBasecallsConverter.ConvertedClusterDataWriter<FastqRecordsForCluster> {
final FastqWriter[] templateWriters;
- final FastqWriter[] barcodeWriters;
+ final FastqWriter[] sampleBarcodeWriters;
+ final FastqWriter[] molecularBarcodeWriters;
/**
* @param templateWriters Writers for template reads in order, e,g. 0th element is for template read 1.
- * @param barcodeWriters Writers for barcode reads in order, e,g. 0th element is for barcode read 1.
+ * @param sampleBarcodeWriters Writers for sample barcode reads in order, e,g. 0th element is for sample barcode read 1.
+ * @param molecularBarcodeWriters Writers for molecular barcode reads in order, e,g. 0th element is for molecualr barcode read 1.
*/
- private FastqRecordsWriter(final FastqWriter[] templateWriters, final FastqWriter[] barcodeWriters) {
- this.templateWriters = templateWriters;
- this.barcodeWriters = barcodeWriters;
+ private FastqRecordsWriter(final FastqWriter[] templateWriters, final FastqWriter[] sampleBarcodeWriters, final FastqWriter[] molecularBarcodeWriters) {
+ this.templateWriters = templateWriters;
+ this.sampleBarcodeWriters = sampleBarcodeWriters;
+ this.molecularBarcodeWriters = molecularBarcodeWriters;
}
@Override
public void write(final FastqRecordsForCluster records) {
- write(templateWriters, records.templateRecords);
- write(barcodeWriters, records.barcodeRecords);
+ write(templateWriters, records.templateRecords);
+ write(sampleBarcodeWriters, records.sampleBarcodeRecords);
+ write(molecularBarcodeWriters, records.molecularBarcodeRecords);
}
private void write(final FastqWriter[] writers, final FastqRecord[] records) {
@@ -371,7 +378,10 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
for (final FastqWriter writer : templateWriters) {
writer.close();
}
- for (final FastqWriter writer : barcodeWriters) {
+ for (final FastqWriter writer : sampleBarcodeWriters) {
+ writer.close();
+ }
+ for (final FastqWriter writer : molecularBarcodeWriters) {
writer.close();
}
}
@@ -383,11 +393,13 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
static class FastqRecordsForCluster {
// These are accessed directly by converter and writer rather than through getters and setters.
final FastqRecord[] templateRecords;
- final FastqRecord[] barcodeRecords;
+ final FastqRecord[] sampleBarcodeRecords;
+ final FastqRecord[] molecularBarcodeRecords;
- FastqRecordsForCluster(final int numTemplates, final int numBarcodes) {
- templateRecords = new FastqRecord[numTemplates];
- barcodeRecords = new FastqRecord[numBarcodes];
+ FastqRecordsForCluster(final int numTemplates, final int numSampleBarcodes, final int numMolecularBarcodes) {
+ templateRecords = new FastqRecord[numTemplates];
+ sampleBarcodeRecords = new FastqRecord[numSampleBarcodes];
+ molecularBarcodeRecords = new FastqRecord[numMolecularBarcodes];
}
}
@@ -398,19 +410,25 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
implements IlluminaBasecallsConverter.ClusterDataConverter<FastqRecordsForCluster> {
private final int [] templateIndices;
- private final int [] barcodeIndices;
+ private final int [] sampleBarcodeIndicies;
+ private final int [] molecularBarcodeIndicies;
ClusterToFastqRecordsForClusterConverter(final ReadStructure outputReadStructure) {
- this.templateIndices = outputReadStructure.templates.getIndices();
- this.barcodeIndices = outputReadStructure.barcodes.getIndices();
+ this.templateIndices = outputReadStructure.templates.getIndices();
+ this.sampleBarcodeIndicies = outputReadStructure.sampleBarcodes.getIndices();
+ this.molecularBarcodeIndicies = outputReadStructure.molecularBarcode.getIndices();
}
@Override
public FastqRecordsForCluster convertClusterToOutputRecord(final ClusterData cluster) {
- final FastqRecordsForCluster ret = new FastqRecordsForCluster(readStructure.templates.length(), readStructure.barcodes.length());
- final boolean appendReadNumberSuffix = ret.templateRecords.length > 1;
- makeFastqRecords(ret.templateRecords, templateIndices, cluster, appendReadNumberSuffix);
- makeFastqRecords(ret.barcodeRecords, barcodeIndices, cluster, false);
+ final FastqRecordsForCluster ret = new FastqRecordsForCluster(readStructure.templates.length(), readStructure.sampleBarcodes.length(), readStructure.molecularBarcode.length());
+ final boolean appendTemplateNumberSuffix = ret.templateRecords.length > 1;
+ final boolean appendMolecularBarcodeNumber = ret.molecularBarcodeRecords.length > 1;
+
+ makeFastqRecords(ret.templateRecords, templateIndices, cluster, appendTemplateNumberSuffix);
+ makeFastqRecords(ret.sampleBarcodeRecords, sampleBarcodeIndicies, cluster, false);
+ makeFastqRecords(ret.molecularBarcodeRecords, molecularBarcodeIndicies, cluster, appendMolecularBarcodeNumber);
+
return ret;
}
@@ -431,17 +449,20 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
}
/**
- * Coded passed to IlluminaBasecallsConverter for use in SortingCollections of output records.
+ * Codec passed to IlluminaBasecallsConverter for use in SortingCollections of output records.
*/
static class FastqRecordsForClusterCodec implements SortingCollection.Codec<FastqRecordsForCluster> {
private final int numTemplates;
- private final int numBarcodes;
+ private final int numSampleBarcodes;
+ private final int numMolecularBarcodes;
+
private BasicFastqWriter writer = null;
private FastqReader reader = null;
- FastqRecordsForClusterCodec(final int numTemplates, final int numBarcodes) {
+ FastqRecordsForClusterCodec(final int numTemplates, final int numSampleBarcodes, final int numMolecularBarcodes) {
this.numTemplates = numTemplates;
- this.numBarcodes = numBarcodes;
+ this.numSampleBarcodes = numSampleBarcodes;
+ this.numMolecularBarcodes = numMolecularBarcodes;
}
@Override
@@ -454,12 +475,14 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
reader = new FastqReader(new BufferedReader(new InputStreamReader(is)));
}
+ //TODO: add tests to encode and decode
@Override
public void encode(final FastqRecordsForCluster val) {
if (numTemplates != val.templateRecords.length) throw new IllegalStateException();
- if (numBarcodes != val.barcodeRecords.length) throw new IllegalStateException();
+ if (numSampleBarcodes != val.sampleBarcodeRecords.length) throw new IllegalStateException();
encodeArray(val.templateRecords);
- encodeArray(val.barcodeRecords);
+ encodeArray(val.sampleBarcodeRecords);
+ encodeArray(val.molecularBarcodeRecords);
writer.flush();
}
@@ -472,9 +495,10 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
@Override
public FastqRecordsForCluster decode() {
if (!reader.hasNext()) return null;
- final FastqRecordsForCluster ret = new FastqRecordsForCluster(numTemplates, numBarcodes);
+ final FastqRecordsForCluster ret = new FastqRecordsForCluster(numTemplates, numSampleBarcodes, numMolecularBarcodes);
decodeArray(ret.templateRecords);
- decodeArray(ret.barcodeRecords);
+ decodeArray(ret.sampleBarcodeRecords);
+ decodeArray(ret.molecularBarcodeRecords);
return ret;
}
@@ -486,7 +510,7 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
@Override
public SortingCollection.Codec<FastqRecordsForCluster> clone() {
- return new FastqRecordsForClusterCodec(numTemplates, numBarcodes);
+ return new FastqRecordsForClusterCodec(numTemplates, numSampleBarcodes, numMolecularBarcodes);
}
}
}
diff --git a/src/java/picard/illumina/IlluminaBasecallsToSam.java b/src/java/picard/illumina/IlluminaBasecallsToSam.java
index 839a9d9..c205052 100644
--- a/src/java/picard/illumina/IlluminaBasecallsToSam.java
+++ b/src/java/picard/illumina/IlluminaBasecallsToSam.java
@@ -58,6 +58,7 @@ import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -306,14 +307,14 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
/**
* For each line in the LIBRARY_PARAMS file create a SamFileWriter and put it in the barcodeSamWriterMap map, where
- * the key to the map is the concatenation of all barcodes in order for the given line
+ * the key to the map is the concatenation of all sampleBarcodes in order for the given line
*/
private void populateWritersFromLibraryParams() {
final TabbedTextFileWithHeaderParser libraryParamsParser = new TabbedTextFileWithHeaderParser(LIBRARY_PARAMS);
final Set<String> expectedColumnLabels = CollectionUtil.makeSet("OUTPUT", "SAMPLE_ALIAS", "LIBRARY_NAME");
final List<String> barcodeColumnLabels = new ArrayList<String>();
- if (readStructure.barcodes.length() == 1) {
+ if (readStructure.sampleBarcodes.length() == 1) {
//For the single barcode read case, the barcode label name can either by BARCODE or BARCODE_1
if (libraryParamsParser.hasColumn("BARCODE")) {
barcodeColumnLabels.add("BARCODE");
@@ -323,7 +324,7 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
throw new PicardException("LIBRARY_PARAMS(BARCODE_PARAMS) file " + LIBRARY_PARAMS + " does not have column BARCODE or BARCODE_1.");
}
} else {
- for (int i = 1; i <= readStructure.barcodes.length(); i++) {
+ for (int i = 1; i <= readStructure.sampleBarcodes.length(); i++) {
barcodeColumnLabels.add("BARCODE_" + i);
}
}
@@ -365,28 +366,23 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
}
/**
- * Create the list of headers that will be added to the SAMFileHeader for a library with the given barcodes (or
- * the entire run if barcodes == NULL). Note that any value that is null will NOT be added via buildSamFileWriter
+ * Create the list of headers that will be added to the SAMFileHeader for a library with the given sampleBarcodes (or
+ * the entire run if sampleBarcodes == NULL). Note that any value that is null will NOT be added via buildSamFileWriter
* but is placed in the map in order to be able to query the tags that we automatically add.
*
- * @param barcodes The list of barcodes that uniquely identify the read group we are building parameters for
+ * @param barcodes The list of sampleBarcodes that uniquely identify the read group we are building parameters for
* @return A Map of ReadGroupHeaderTags -> Values
*/
private Map<String, String> buildSamHeaderParameters(final List<String> barcodes) {
- final Map<String, String> params = new HashMap<String, String>();
+ final Map<String, String> params = new LinkedHashMap<String, String>();
String platformUnit = RUN_BARCODE + "." + LANE;
if (barcodes != null) platformUnit += ("." + IlluminaUtil.barcodeSeqsToString(barcodes));
- params.put("PU", platformUnit);
- params.put("CN", SEQUENCING_CENTER);
params.put("PL", PLATFORM);
- if (RUN_START_DATE != null) {
- final Iso8601Date date = new Iso8601Date(RUN_START_DATE);
- params.put("DT", date.toString());
- } else {
- params.put("DT", null);
- }
+ params.put("PU", platformUnit);
+ params.put("CN", SEQUENCING_CENTER);
+ params.put("DT", RUN_START_DATE == null ? null : new Iso8601Date(RUN_START_DATE).toString());
return params;
}
@@ -414,6 +410,7 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
}
final SAMFileHeader header = new SAMFileHeader();
+
header.setSortOrder(SAMFileHeader.SortOrder.queryname);
header.addReadGroup(rg);
return new SAMFileWriterWrapper(new SAMFileWriterFactory().makeSAMOrBAMWriter(header, true, output));
@@ -440,7 +437,7 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
final ArrayList<String> messages = new ArrayList<String>();
readStructure = new ReadStructure(READ_STRUCTURE);
- if (!readStructure.barcodes.isEmpty()) {
+ if (!readStructure.sampleBarcodes.isEmpty()) {
if (LIBRARY_PARAMS == null) {
messages.add("BARCODE_PARAMS or LIBRARY_PARAMS is missing. If READ_STRUCTURE contains a B (barcode)" +
" then either LIBRARY_PARAMS or BARCODE_PARAMS(deprecated) must be provided!");
@@ -536,7 +533,7 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
for (int i = 1; i < numRecords; ++i) {
ret.records[i] = bamCodec.decode();
if (ret.records[i] == null) {
- throw new IllegalStateException(String.format("Expected to read % records but read only %d", numRecords, i));
+ throw new IllegalStateException(String.format("Expected to read %d records but read only %d", numRecords, i));
}
}
return ret;
diff --git a/src/java/picard/illumina/MarkIlluminaAdapters.java b/src/java/picard/illumina/MarkIlluminaAdapters.java
index 5cc05fa..d7e39f7 100644
--- a/src/java/picard/illumina/MarkIlluminaAdapters.java
+++ b/src/java/picard/illumina/MarkIlluminaAdapters.java
@@ -224,8 +224,8 @@ public class MarkIlluminaAdapters extends CommandLineProgram {
progress.record(r);
if (out != null) out.addAlignment(r);
- final Integer clip = rec.getIntegerAttribute(ReservedTagConstants.XT);
- if (clip != null) histo.increment(rec.getReadLength() - clip + 1);
+ final Integer clip = r.getIntegerAttribute(ReservedTagConstants.XT);
+ if (clip != null) histo.increment(r.getReadLength() - clip + 1);
}
}
}
diff --git a/src/java/picard/illumina/parser/ReadStructure.java b/src/java/picard/illumina/parser/ReadStructure.java
index 7bf53b9..513e44d 100644
--- a/src/java/picard/illumina/parser/ReadStructure.java
+++ b/src/java/picard/illumina/parser/ReadStructure.java
@@ -35,10 +35,12 @@ import java.util.regex.Pattern;
/**
* Describes the intended logical output structure of clusters of an Illumina run.
* (e.g. If the input data consists of 80 base
- * clusters and we provide a read structure of "36T8B36T" then those bases should be split into 3 reads:
- * read one should be 36 cycles of template,
- * read two should be 8 cycles of barcode,
- * read three should be another 36 cycle template read.)
+ * clusters and we provide a read structure of "28T8M8B8S28T" then those bases should be split into 4 reads:
+ * read one should be 28 cycles of template,
+ * read two should be 8 cycles of molecular barcode,
+ * read three should be 8 cycles of sample barcode,
+ * 8 cycles are skipped,
+ * read four should be another 36 cycle template read.)
* Note: In future releases, ReadStructures will be specified by clients of IlluminaDataProvider(currently
* read structures are detected by IlluminaDataProviderFactory via the structure of QSeq files). When working with
* QSeq formats, while the individual reads need not fall on QSeq end file boundaries the total number of cycles
@@ -50,19 +52,24 @@ import java.util.regex.Pattern;
*/
public class ReadStructure {
public static final String PARAMETER_DOC =
- "A description of the logical structure of clusters in an Illumina Run, i.e. a description of the structure IlluminaBasecallsToSam " +
+ "A description of the logical structure of clusters in an Illumina Run, i.e. a description of the structure IlluminaBasecallsToSam " +
"assumes the data to be in. It should consist of integer/character pairs describing the number of cycles and the type of those " +
- "cycles (B for Barcode, T for Template, and S for skip). E.g. If the input data consists of 80 base clusters and we provide a " +
- "read structure of \"36T8B8S28T\" then, before being converted to SAM records those bases will be split into 4 reads where " +
- "read one consists of 36 cycles of template, read two consists of 8 cycles of barcode, read three will be an 8 base read of " +
- "skipped cycles and read four is another 28 cycle template read. The read consisting of skipped cycles would NOT be included " +
- "in output SAM/BAM file read groups.";
+ "cycles (B for Sample Barcode, M for molecular barcode, T for Template, and S for skip). E.g. If the input data consists of 80 " +
+ "base clusters and we provide a read structure of \"28T8M8B8S28T\" then the sequence may be split up into four reads:\n" +
+ "* read one with 28 cycles (bases) of template\n" +
+ "* read two with 8 cycles (bases) of molecular barcode (ex. unique molecular barcode)\n" +
+ "* read three with 8 cycles (bases) of sample barcode\n" +
+ "* 8 cycles (bases) skipped.\n" +
+ "* read four with 28 cycles (bases) of template\n" +
+ "The skipped cycles would NOT be included in an output SAM/BAM file or in read groups therein.";
public final List<ReadDescriptor> descriptors;
public final int totalCycles;
public final int [] readLengths;
- public final Substructure barcodes;
+ public final Substructure sampleBarcodes;
public final Substructure templates;
+ public final Substructure molecularBarcode;
+
public final Substructure skips;
//nonSkips include barcode and template indices in the order they appear in the descriptors list
@@ -112,10 +119,11 @@ public class ReadStructure {
this.descriptors = Collections.unmodifiableList(collection);
int cycles = 0;
- final List<Integer> nonSkipIndicesList = new ArrayList<Integer>();
- final List<Integer> barcodeIndicesList = new ArrayList<Integer>();
- final List<Integer> templateIndicesList = new ArrayList<Integer>();
- final List<Integer> skipIndicesList = new ArrayList<Integer>();
+ final List<Integer> nonSkipIndicesList = new ArrayList<Integer>();
+ final List<Integer> sampleBarcodeIndicesList = new ArrayList<Integer>();
+ final List<Integer> templateIndicesList = new ArrayList<Integer>();
+ final List<Integer> molecularBarcodeIndicesList = new ArrayList<Integer>();
+ final List<Integer> skipIndicesList = new ArrayList<Integer>();
readLengths = new int[collection.size()];
int currentCycleIndex = 0; // Current cycle in the entire read structure
@@ -134,7 +142,7 @@ public class ReadStructure {
switch(desc.type) {
case B:
nonSkipIndicesList.add(descIndex);
- barcodeIndicesList.add(descIndex);
+ sampleBarcodeIndicesList.add(descIndex);
break;
case T:
nonSkipIndicesList.add(descIndex);
@@ -143,6 +151,10 @@ public class ReadStructure {
case S:
skipIndicesList.add(descIndex);
break;
+ case M:
+ nonSkipIndicesList.add(descIndex);
+ molecularBarcodeIndicesList.add(descIndex);
+ break;
default:
throw new IllegalArgumentException("Unsupported ReadType (" + desc.type + ") encountered by IlluminaRunConfiugration!");
@@ -150,11 +162,12 @@ public class ReadStructure {
++descIndex;
}
- this.totalCycles = cycles;
- this.barcodes = new Substructure(barcodeIndicesList, allRanges);
- this.templates = new Substructure(templateIndicesList, allRanges);
- this.skips = new Substructure(skipIndicesList, allRanges);
- this.nonSkips = new Substructure(nonSkipIndicesList, allRanges);
+ this.totalCycles = cycles;
+ this.sampleBarcodes = new Substructure(sampleBarcodeIndicesList, allRanges);
+ this.templates = new Substructure(templateIndicesList, allRanges);
+ this.skips = new Substructure(skipIndicesList, allRanges);
+ this.molecularBarcode = new Substructure(molecularBarcodeIndicesList, allRanges);
+ this.nonSkips = new Substructure(nonSkipIndicesList, allRanges);
}
/**
@@ -196,7 +209,6 @@ public class ReadStructure {
throw new IllegalArgumentException(readStructure + " cannot be parsed as a ReadStructure! " + ReadStructureMsg);
}
-
final Matcher subMatcher = SubPattern.matcher(readStructure);
final List<ReadDescriptor> descriptors = new ArrayList<ReadDescriptor>();
while(subMatcher.find()) {
@@ -237,7 +249,7 @@ public class ReadStructure {
}
/** Represents a subset of ReadDescriptors in the containing ReadStructure, they ARE NOT necessarily contiguous
- * in the containing ReadStrucure but they ARE in the order they appear in the containing ReadStructure */
+ * in the containing ReadStructure but they ARE in the order they appear in the containing ReadStructure */
public class Substructure implements Iterable<ReadDescriptor> {
/** Total number of descriptors == readTypeIndices.length */
private final int numDescriptors;
diff --git a/src/java/picard/illumina/parser/ReadType.java b/src/java/picard/illumina/parser/ReadType.java
index bd69cd5..87f3654 100644
--- a/src/java/picard/illumina/parser/ReadType.java
+++ b/src/java/picard/illumina/parser/ReadType.java
@@ -31,9 +31,10 @@ package picard.illumina.parser;
* Note: Currently SKIP is unused by IlluminaBasecallsToSam, ExtractIlluminaBarcodes, and IlluminaDataProvider
**/
public enum ReadType {
- T, B, S;
+ T, B, M, S;
- public static final ReadType Template = T;
- public static final ReadType Barcode = B;
- public static final ReadType Skip = S;
+ public static final ReadType Template = T;
+ public static final ReadType Barcode = B;
+ public static final ReadType MolecularIndex = M;
+ public static final ReadType Skip = S;
}
diff --git a/src/java/picard/illumina/parser/Tile.java b/src/java/picard/illumina/parser/Tile.java
index a08f219..763f459 100644
--- a/src/java/picard/illumina/parser/Tile.java
+++ b/src/java/picard/illumina/parser/Tile.java
@@ -31,7 +31,9 @@ import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
+import java.util.List;
import java.util.Map;
+import java.util.stream.Collectors;
/** Represents a tile from TileMetricsOut.bin. Stores information on location (lane & tile #, density, number of clusters and the
* phasing/prephasing values associated with this tile
@@ -101,15 +103,10 @@ public class Tile {
/** For any given TileTemplateRead, we want to make sure that there is only a single TilePhasingValue */
private static Collection<TilePhasingValue> ensureSoleTilePhasingValuesPerRead(final Collection<TilePhasingValue> tilePhasingValues) {
- final Map<TileTemplateRead, Collection<TilePhasingValue>> partitionedMap = CollectionUtil.partition(tilePhasingValues,
- new CollectionUtil.Partitioner<TilePhasingValue, TileTemplateRead>() {
- @Override
- public TileTemplateRead getPartition(final TilePhasingValue phasingValue) {
- return phasingValue.getTileTemplateRead();
- }
- });
-
- final Collection<TilePhasingValue> newTilePhasingValues = new LinkedList<TilePhasingValue>();
+ final Map<TileTemplateRead, List<TilePhasingValue>> partitionedMap =
+ tilePhasingValues.stream().collect(Collectors.groupingBy(TilePhasingValue::getTileTemplateRead));
+
+ final Collection<TilePhasingValue> newTilePhasingValues = new LinkedList<>();
for (final TileTemplateRead read : partitionedMap.keySet()) {
newTilePhasingValues.add(CollectionUtil.getSoleElement(partitionedMap.get(read)));
}
diff --git a/src/java/picard/illumina/parser/TileMetricsUtil.java b/src/java/picard/illumina/parser/TileMetricsUtil.java
index 04af80f..2888d9d 100644
--- a/src/java/picard/illumina/parser/TileMetricsUtil.java
+++ b/src/java/picard/illumina/parser/TileMetricsUtil.java
@@ -38,8 +38,10 @@ import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
+import java.util.List;
import java.util.Map;
import java.util.Set;
+import java.util.stream.Collectors;
/**
* Utility for reading the tile data from an Illumina run directory's TileMetricsOut.bin file
@@ -73,13 +75,13 @@ public class TileMetricsUtil {
(tileMetricsOutFile));
// Collect the tiles by lane & tile, and then collect the metrics by lane
- final Map<String, Collection<IlluminaTileMetrics>> locationToMetricsMap = partitionTileMetricsByLocation(tileMetrics);
- final Collection<Tile> tiles = new LinkedList<Tile>();
- for (final Map.Entry<String, Collection<IlluminaTileMetrics>> entry : locationToMetricsMap.entrySet()) {
+ final Map<String, ? extends Collection<IlluminaTileMetrics>> locationToMetricsMap = partitionTileMetricsByLocation(tileMetrics);
+ final Collection<Tile> tiles = new LinkedList<>();
+ for (final Map.Entry<String, ? extends Collection<IlluminaTileMetrics>> entry : locationToMetricsMap.entrySet()) {
final Collection<IlluminaTileMetrics> tileRecords = entry.getValue();
// Get a mapping from metric code number to the corresponding IlluminaTileMetrics
- final Map<Integer, Collection<IlluminaTileMetrics>> codeMetricsMap = partitionTileMetricsByCode(tileRecords);
+ final Map<Integer, ? extends Collection<IlluminaTileMetrics>> codeMetricsMap = partitionTileMetricsByCode(tileRecords);
final Set<Integer> observedCodes = codeMetricsMap.keySet();
if (!(observedCodes.contains(IlluminaMetricsCode.DENSITY_ID.getMetricsCode()) && observedCodes.contains(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode())))
@@ -100,9 +102,9 @@ public class TileMetricsUtil {
}
/** Pulls out the phasing & prephasing value for the template reads and returns a collection of TilePhasingValues representing these */
- private static Collection<TilePhasingValue> getTilePhasingValues(final Map<Integer, Collection<IlluminaTileMetrics>> codeMetricsMap, final ReadStructure readStructure) {
+ private static Collection<TilePhasingValue> getTilePhasingValues(final Map<Integer, ? extends Collection<IlluminaTileMetrics>> codeMetricsMap, final ReadStructure readStructure) {
boolean isFirstRead = true;
- final Collection<TilePhasingValue> tilePhasingValues = new ArrayList<TilePhasingValue>();
+ final Collection<TilePhasingValue> tilePhasingValues = new ArrayList<>();
for (int descriptorIndex = 0; descriptorIndex < readStructure.descriptors.size(); descriptorIndex++) {
if (readStructure.descriptors.get(descriptorIndex).type == ReadType.Template) {
final TileTemplateRead tileTemplateRead = isFirstRead ? TileTemplateRead.FIRST : TileTemplateRead.SECOND;
@@ -127,8 +129,8 @@ public class TileMetricsUtil {
/** According to Illumina, for every lane/tile/code combination they will only use the last value. Filter out the previous values */
private static Collection<IlluminaTileMetrics> determineLastValueForLaneTileMetricsCode(final Iterator<IlluminaTileMetrics>
tileMetricsIterator) {
- final Map<TileMetricsOutReader.IlluminaLaneTileCode, IlluminaTileMetrics> filteredTileMetrics = new HashMap<TileMetricsOutReader.IlluminaLaneTileCode, IlluminaTileMetrics>();
- for (final IlluminaTileMetrics illuminaTileMetrics : new IterableAdapter<IlluminaTileMetrics>(tileMetricsIterator)) {
+ final Map<TileMetricsOutReader.IlluminaLaneTileCode, IlluminaTileMetrics> filteredTileMetrics = new HashMap<>();
+ for (final IlluminaTileMetrics illuminaTileMetrics : new IterableAdapter<>(tileMetricsIterator)) {
filteredTileMetrics.put(illuminaTileMetrics.getLaneTileCode(), illuminaTileMetrics);
}
@@ -140,22 +142,13 @@ public class TileMetricsUtil {
}
// Wrapper around CollectionUtil.Partitioner, purely to de-bulk the actual methods
- private static Map<Integer, Collection<IlluminaTileMetrics>> partitionTileMetricsByCode(final Collection<IlluminaTileMetrics> tileMetrics) {
- return CollectionUtil.partition(tileMetrics, new CollectionUtil.Partitioner<IlluminaTileMetrics, Integer>() {
- @Override
- public Integer getPartition(final IlluminaTileMetrics metric) {
- return metric.getMetricCode();
- }
- });
+ private static Map<Integer, ? extends Collection<IlluminaTileMetrics>> partitionTileMetricsByCode(final Collection<IlluminaTileMetrics> tileMetrics) {
+ return tileMetrics.stream().collect(Collectors.groupingBy(IlluminaTileMetrics::getMetricCode));
}
// Wrapper around CollectionUtil.Partitioner, purely to de-bulk the actual methods
- private static Map<String, Collection<IlluminaTileMetrics>> partitionTileMetricsByLocation(final Collection<IlluminaTileMetrics> tileMetrics) {
- return CollectionUtil.partition(tileMetrics, new CollectionUtil.Partitioner<IlluminaTileMetrics, String>() {
- @Override
- public String getPartition(final IlluminaTileMetrics metric) {
- return renderMetricLocationKey(metric);
- }
- });
+ private static Map<String, ? extends Collection<IlluminaTileMetrics>> partitionTileMetricsByLocation(final Collection<IlluminaTileMetrics> tileMetrics) {
+ return tileMetrics.stream().collect(Collectors.groupingBy(TileMetricsUtil::renderMetricLocationKey));
}
+
}
diff --git a/src/java/picard/reference/ExtractSequences.java b/src/java/picard/reference/ExtractSequences.java
index 2270b6c..4dc6c04 100644
--- a/src/java/picard/reference/ExtractSequences.java
+++ b/src/java/picard/reference/ExtractSequences.java
@@ -50,21 +50,32 @@ import java.io.IOException;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Extracts one or more intervals described in an interval_list file " +
- "from a given reference sequence and writes them out in FASTA format. Requires a fasta index " +
- "file to be present.",
- usageShort = "Extracts intervals from a reference sequence, writing them to a FASTA file",
+ usage = ExtractSequences.USAGE_SUMMARY + ExtractSequences.USAGE_DETAILS,
+ usageShort = ExtractSequences.USAGE_SUMMARY,
programGroup = Fasta.class
)
public class ExtractSequences extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY ="Subsets intervals from a reference sequence to a new FASTA file.";
+ static final String USAGE_DETAILS ="This tool takes a list of intervals, reads the corresponding subsquences from a reference " +
+ "FASTA file and writes them to a new FASTA file as separate records. Note that the reference FASTA file must be " +
+ "accompanied by an index file and the interval list must be provided in Picard list format. The names provided for the " +
+ "intervals will be used to name the corresponding records in the output file." +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar ExtractSequences \\<br />" +
+ " INTERVAL_LIST=regions_of_interest.interval_list \\<br />" +
+ " R=reference.fasta \\<br />" +
+ " O=extracted_IL_sequences.fasta" +
+ "</pre>" +
+ "<hr />";
@Option(doc="Interval list describing intervals to be extracted from the reference sequence.")
public File INTERVAL_LIST;
- @Option(shortName= StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc="Reference sequence file.")
+ @Option(shortName= StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc="Reference sequence FASTA file.")
public File REFERENCE_SEQUENCE;
- @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="Output fasta file.")
+ @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="Output FASTA file.")
public File OUTPUT;
@Option(doc="Maximum line length for sequence data.")
diff --git a/src/java/picard/reference/NonNFastaSize.java b/src/java/picard/reference/NonNFastaSize.java
new file mode 100644
index 0000000..fb093c1
--- /dev/null
+++ b/src/java/picard/reference/NonNFastaSize.java
@@ -0,0 +1,128 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package picard.reference;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.reference.ReferenceSequence;
+import htsjdk.samtools.reference.ReferenceSequenceFile;
+import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.IntervalList;
+import htsjdk.samtools.util.IntervalListReferenceSequenceMask;
+import htsjdk.samtools.util.ReferenceSequenceMask;
+import htsjdk.samtools.util.SequenceUtil;
+import htsjdk.samtools.util.StringUtil;
+import htsjdk.samtools.util.WholeGenomeReferenceSequenceMask;
+import picard.PicardException;
+import picard.cmdline.CommandLineProgram;
+import picard.cmdline.CommandLineProgramProperties;
+import picard.cmdline.Option;
+import picard.cmdline.StandardOptionDefinitions;
+import picard.cmdline.programgroups.Fasta;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.IOException;
+
+/**
+ * A tool to count the number of non-N bases in a fasta file
+ */
+ at CommandLineProgramProperties(
+ usage = NonNFastaSize.USAGE_SUMMARY + NonNFastaSize.USAGE_DETAILS,
+ usageShort = NonNFastaSize.USAGE_SUMMARY,
+ programGroup = Fasta.class
+)
+public class NonNFastaSize extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Counts the number of non-N bases in a fasta file.";
+
+ static final String USAGE_DETAILS = "This tool takes any FASTA-formatted file and counts the number of non-N bases in it." +
+ "Note that it requires that the fasta file have associated index (.fai) and dictionary (.dict) files.<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar NonNFastaSize \\<br />" +
+ " I=input_sequence.fasta \\<br />" +
+ " O=count.txt" +
+ "</pre>" +
+ "<hr />"
+ ;
+ @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input FASTA file.")
+ public File INPUT;
+
+ @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output file in which to record the count.")
+ public File OUTPUT;
+
+ @Option(shortName = "INTERVALS", doc = "An interval list file that contains the locations of the positions to assess. If not provided, the entire reference will be used", optional = true)
+ public File INTERVALS = null;
+
+ public static void main(final String[] args) {
+ new NonNFastaSize().instanceMainWithExit(args);
+ }
+
+ @Override
+ protected int doWork() {
+ IOUtil.assertFileIsReadable(INPUT);
+ IOUtil.assertFileIsWritable(OUTPUT);
+
+ // set up the reference and a mask so that we only count the positions requested by the user
+ final ReferenceSequenceFile ref = ReferenceSequenceFileFactory.getReferenceSequenceFile(INPUT);
+ final ReferenceSequenceMask referenceSequenceMask;
+ if (INTERVALS != null) {
+ IOUtil.assertFileIsReadable(INTERVALS);
+ final IntervalList intervalList = IntervalList.fromFile(INTERVALS);
+ referenceSequenceMask = new IntervalListReferenceSequenceMask(intervalList);
+ } else {
+ final SAMFileHeader header = new SAMFileHeader();
+ header.setSequenceDictionary(ref.getSequenceDictionary());
+ referenceSequenceMask = new WholeGenomeReferenceSequenceMask(header);
+ }
+
+ long nonNbases = 0L;
+
+ for (final SAMSequenceRecord rec : ref.getSequenceDictionary().getSequences()) {
+ // pull out the contig and set up the bases
+ final ReferenceSequence sequence = ref.getSequence(rec.getSequenceName());
+ final byte[] bases = sequence.getBases();
+ StringUtil.toUpperCase(bases);
+
+ for (int i = 0; i < bases.length; i++) {
+ // only investigate this position if it's within our mask
+ if (referenceSequenceMask.get(sequence.getContigIndex(), i+1)) {
+ nonNbases += bases[i] == SequenceUtil.N ? 0 : 1;
+ }
+ }
+ }
+
+ try {
+ final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
+ out.write(nonNbases + "\n");
+ out.close();
+ }
+ catch (IOException ioe) {
+ throw new PicardException("Error writing to file " + OUTPUT.getAbsolutePath(), ioe);
+ }
+
+ return 0;
+ }
+}
diff --git a/src/java/picard/reference/NormalizeFasta.java b/src/java/picard/reference/NormalizeFasta.java
index 4f798fb..8298324 100644
--- a/src/java/picard/reference/NormalizeFasta.java
+++ b/src/java/picard/reference/NormalizeFasta.java
@@ -22,21 +22,31 @@ import java.io.IOException;
* same length, and are a reasonable length!
*/
@CommandLineProgramProperties(
- usage = "Takes any file that conforms to the fasta format and " +
- "normalizes it so that all lines of sequence except the last line per named sequence " +
- "are of the same length.",
- usageShort = "Normalizes lines of sequence in a fasta file to be of the same length",
+ usage = NormalizeFasta.USAGE_SUMMARY + NormalizeFasta.USAGE_DETAILS,
+ usageShort = NormalizeFasta.USAGE_SUMMARY,
programGroup = Fasta.class
)
public class NormalizeFasta extends CommandLineProgram {
-
- @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The input fasta file to normalize.")
+ static final String USAGE_SUMMARY = "Normalizes lines of sequence in a FASTA file to be of the same length.";
+ static final String USAGE_DETAILS = "This tool takes any FASTA-formatted file and reformats the sequence to ensure that all of the " +
+ "sequence record lines are of the same length (with the exception of the last line). Although the default setting is 100 bases " +
+ "per line, a custom line_length can be specified by the user. In addition, record names can be truncated at the first " +
+ "instance of a whitespace character to ensure downstream compatibility.<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar NormalizeFasta \\<br />" +
+ " I=input_sequence.fasta \\<br />" +
+ " O=normalized_sequence.fasta" +
+ "</pre>" +
+ "<hr />"
+ ;
+ @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The input FASTA file to normalize.")
public File INPUT;
- @Option(shortName= StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="The output fasta file to write.")
+ @Option(shortName= StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="The output FASTA file to write.")
public File OUTPUT;
- @Option(doc="The line length to be used for the output fasta file.")
+ @Option(doc="The line length to be used for the output FASTA file.")
public int LINE_LENGTH=100;
@Option(doc="Truncate sequence names at first whitespace.")
diff --git a/src/java/picard/sam/AddCommentsToBam.java b/src/java/picard/sam/AddCommentsToBam.java
index aa4c23e..3071f62 100644
--- a/src/java/picard/sam/AddCommentsToBam.java
+++ b/src/java/picard/sam/AddCommentsToBam.java
@@ -16,19 +16,30 @@ import java.util.List;
/**
* A tool to add comments to a BAM file header. Effectively copies the BAM file except for the addition of the @CO records
- * in the header. This tool does not support SAM files.
+ * in the header. This tool does not support SAM files. A block copying method is used to ensure efficient transfer to the output file.
*
* @author jgentry
*/
@CommandLineProgramProperties(
- usage = "Adds one or more comments to the header of a specified BAM file. Copies the file with the " +
- "modified header to a specified output file. Note that a block copying method is used to ensure efficient transfer to the " +
- "output file. SAM files are not supported",
- usageShort = "Adds comments to the header of a BAM file",
+ usage = AddCommentsToBam.USAGE_SUMMARY + AddCommentsToBam.USAGE_DETAILS,
+ usageShort = AddCommentsToBam.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class AddCommentsToBam extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Adds comments to the header of a BAM file.";
+ static final String USAGE_DETAILS = "This tool makes a copy of the input bam file, with a modified header that includes the comments " +
+ "specified at the command line (prefixed by @CO). Use double quotes to wrap comments that include whitespace or special characters. <br /><br />" +
+ "Note that this tool cannot be run on SAM files.<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar AddCommentsToBam \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=modified_bam.bam \\<br />" +
+ " C=comment_1 \\<br />" +
+ " C=\"comment 2\"" +
+ "</pre>" +
+ "" +
+ "<hr />";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input BAM file to add a comment to the header")
public File INPUT;
diff --git a/src/java/picard/sam/AddOrReplaceReadGroups.java b/src/java/picard/sam/AddOrReplaceReadGroups.java
index 27d6976..539e89e 100644
--- a/src/java/picard/sam/AddOrReplaceReadGroups.java
+++ b/src/java/picard/sam/AddOrReplaceReadGroups.java
@@ -30,17 +30,33 @@ import java.util.Arrays;
* @author mdepristo
*/
@CommandLineProgramProperties(
- usage = "Replaces all read groups in the INPUT file with a single new read group and assigns " +
- "all reads to this read group in the OUTPUT BAM",
- usageShort = "Replaces read groups in a BAM or SAM file with a single new read group",
+ usage = AddOrReplaceReadGroups.USAGE_SUMMARY + AddOrReplaceReadGroups.USAGE_DETAILS,
+ usageShort = AddOrReplaceReadGroups.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class AddOrReplaceReadGroups extends CommandLineProgram {
-
- @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Input file (bam or sam or a GA4GH url).")
+ static final String USAGE_SUMMARY = "Replace read groups in a BAM file.";
+ static final String USAGE_DETAILS = "This tool enables the user to replace all read groups in the INPUT file with a single new read " +
+ "group and assign all reads to this read group in the OUTPUT BAM file.<br /><br />" +
+ "For more information about read groups, see the <a href='https://www.broadinstitute.org/gatk/guide/article?id=6472'>" +
+ "GATK Dictionary entry.</a> <br /><br /> " +
+ "This tool accepts INPUT BAM and SAM files or URLs from the Global Alliance for Genomics and Health (GA4GH) (see http://ga4gh.org/#/documentation)." +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar AddOrReplaceReadGroups \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=output.bam \\<br />" +
+ " RGID=4 \\<br />" +
+ " RGLB=lib1 \\<br />" +
+ " RGPL=illumina \\<br />" +
+ " RGPU=unit1 \\<br />" +
+ " RGSM=20" +
+ "</pre>" +
+ "<hr />" ;
+ @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Input file (BAM or SAM or a GA4GH url).")
public String INPUT = null;
- @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Output file (bam or sam).")
+ @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Output file (BAM or SAM).")
public File OUTPUT = null;
@Option(shortName = StandardOptionDefinitions.SORT_ORDER_SHORT_NAME, optional = true,
@@ -50,7 +66,7 @@ public class AddOrReplaceReadGroups extends CommandLineProgram {
@Option(shortName = "ID", doc = "Read Group ID")
public String RGID = "1";
- @Option(shortName = "LB", doc = "Read Group Library")
+ @Option(shortName = "LB", doc = "Read Group library")
public String RGLB;
@Option(shortName = "PL", doc = "Read Group platform (e.g. illumina, solid)")
diff --git a/src/java/picard/sam/BamIndexStats.java b/src/java/picard/sam/BamIndexStats.java
index 17d6e7f..95faeb6 100755
--- a/src/java/picard/sam/BamIndexStats.java
+++ b/src/java/picard/sam/BamIndexStats.java
@@ -45,14 +45,23 @@ import java.io.File;
* @author Martha Borkan
*/
@CommandLineProgramProperties(
- usage = "Generates BAM index statistics, including the number of aligned and unaligned SAMRecords for each reference sequence, " +
- "and the number of SAMRecords with no coordinate." +
- "Input BAM file must have a corresponding index file.\n",
- usageShort = "Generates index statistics from a BAM file",
+ usage = BamIndexStats.USAGE_SUMMARY + BamIndexStats.USAGE_DETAILS,
+ usageShort = BamIndexStats.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class BamIndexStats extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Generate index statistics from a BAM file";
+ static final String USAGE_DETAILS = "This tool calculates statistics from a BAM index (.bai) file, emulating the behavior of the " +
+ "\"samtools idxstats\" command. The statistics collected include counts of aligned and unaligned reads as well as all " +
+ "records with no start coordinate. The input to the tool is the BAM file name but it must be accompanied by a corresponding " +
+ "index file.<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar BamIndexStats \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=output" +
+ "</pre>" +
+ "<hr />" ;
private static final Log log = Log.getInstance(BamIndexStats.class);
@Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME,
@@ -71,7 +80,7 @@ public class BamIndexStats extends CommandLineProgram {
protected int doWork() {
if (INPUT.getName().endsWith(BAMIndex.BAMIndexSuffix))
- log.warn("INPUT should be BAM file not index file");
+ log.warn("INPUT should be the BAM file name, not its index file");
IOUtil.assertFileIsReadable(INPUT);
BAMIndexMetaData.printIndexStats(INPUT);
diff --git a/src/java/picard/sam/BuildBamIndex.java b/src/java/picard/sam/BuildBamIndex.java
index 177df98..9dea8a6 100755
--- a/src/java/picard/sam/BuildBamIndex.java
+++ b/src/java/picard/sam/BuildBamIndex.java
@@ -50,16 +50,25 @@ import java.net.URL;
* @author Martha Borkan
*/
@CommandLineProgramProperties(
- usage = "Generates a BAM index (.bai) file.",
- usageShort = "Generates a BAM index (.bai) file",
+ usage = BuildBamIndex.USAGE_SUMMARY + BuildBamIndex.USAGE_DETAILS,
+ usageShort = BuildBamIndex.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class BuildBamIndex extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Generates a BAM index \".bai\" file. ";
+ static final String USAGE_DETAILS = "This tool creates an index file for the input BAM that allows fast look-up of data in a " +
+ "BAM file, lke an index on a database. Note that this tool cannot be run on SAM files, and that the input BAM file must be " +
+ "sorted in coordinate order." +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar BuildBamIndex \\<br />" +
+ " I=input.bam" +
+ "</pre>" +
+ "<hr />";
private static final Log log = Log.getInstance(BuildBamIndex.class);
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
- doc = "A BAM file or URL to process. Must be sorted in coordinate order.")
+ doc = "A BAM file or GA4GH URL to process. Must be sorted in coordinate order.")
public String INPUT;
URL inputUrl = null; // INPUT as URL
@@ -132,7 +141,7 @@ public class BuildBamIndex extends CommandLineProgram {
}
if (!bam.getFileHeader().getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)) {
- throw new SAMException("Input bam file must be sorted by coordinates");
+ throw new SAMException("Input bam file must be sorted by coordinate");
}
BAMIndexer.createIndex(bam, OUTPUT);
diff --git a/src/java/picard/sam/CalculateReadGroupChecksum.java b/src/java/picard/sam/CalculateReadGroupChecksum.java
index cbc53bb..31ad4ed 100644
--- a/src/java/picard/sam/CalculateReadGroupChecksum.java
+++ b/src/java/picard/sam/CalculateReadGroupChecksum.java
@@ -14,14 +14,23 @@ import java.io.FileWriter;
import java.io.IOException;
@CommandLineProgramProperties(
- usage = "Creates a hash code based on identifying information in the RG (read group) " +
- "records in a SAM file's header. This hash code changes any time read groups are added or removed " +
- "comparing one file's hash code to another tells you if the read groups in the BAM files are different.",
- usageShort = "Creates a hash code based on the read groups (RG) in the SAM or BAM header.",
+ usage = CalculateReadGroupChecksum.USAGE_SUMMARY + CalculateReadGroupChecksum.USAGE_DETAILS,
+ usageShort = CalculateReadGroupChecksum.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class CalculateReadGroupChecksum extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Creates a hash code based on the read groups (RG). ";
+ static final String USAGE_DETAILS = "This tool creates a hash code based on identifying information in the read groups " +
+ "(RG) of a \".BAM\" or \"SAM\" file header. Addition or removal of RGs changes the hash code, enabling the user to " +
+ "quickly determine if changes have been made to the read group information. " +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CalculateReadGroupChecksum \\<br />" +
+ " I=input.bam" +
+ "</pre>" +
+ "Please see the AddOrReplaceReadGroups tool documentation for information regarding the addition, subtraction, or merging of read groups." +
+ "<hr />";
private static final String OUTPUT_FILE_EXTENSION = ".read_group_md5";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input SAM or BAM file. ")
diff --git a/src/java/picard/sam/CompareSAMs.java b/src/java/picard/sam/CompareSAMs.java
index 38d9d86..a45d17b 100644
--- a/src/java/picard/sam/CompareSAMs.java
+++ b/src/java/picard/sam/CompareSAMs.java
@@ -50,16 +50,24 @@ import java.util.Map;
* @author alecw at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = "USAGE: CompareSAMS <SAMFile1> <SAMFile2>\n" +
- "Compares the headers of the two input SAM or BAM files, and, if possible, the SAMRecords. " +
- "For SAMRecords, compares only the readUnmapped flag, reference name, start position and strand. " +
- "Reports the number of SAMRecords that match, differ in alignment, are mapped in only one input, " +
- "or are missing in one of the files",
- usageShort = "Compares two input SAM or BAM files",
+ usage = CompareSAMs.USAGE_SUMMARY + CompareSAMs.USAGE_DETAILS,
+ usageShort = CompareSAMs.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class CompareSAMs extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Compare two input \".sam\" or \".bam\" files. ";
+ static final String USAGE_DETAILS = "This tool initially compares the headers of SAM or BAM files. " +
+ " If the file headers are comparable, the tool will examine and compare readUnmapped flag, reference name, " +
+ "start position and strand between the SAMRecords. The tool summarizes information on the number of read " +
+ "pairs that match or mismatch, and of reads that are missing or unmapped (stratified by direction: " +
+ "forward or reverse)." +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CompareSAMs \\<br />" +
+ " file_1.bam \\<br />" +
+ " file_2.bam" +
+ "</pre>" +
+ "<hr />";
@PositionalArguments(minElements = 2, maxElements = 2)
public List<File> samFiles;
diff --git a/src/java/picard/sam/CreateSequenceDictionary.java b/src/java/picard/sam/CreateSequenceDictionary.java
index c9f98bf..62a09c6 100644
--- a/src/java/picard/sam/CreateSequenceDictionary.java
+++ b/src/java/picard/sam/CreateSequenceDictionary.java
@@ -53,12 +53,26 @@ import java.util.Set;
* SAMRecords, and the header contains only sequence records.
*/
@CommandLineProgramProperties(
- usage = "Read fasta or fasta.gz containing reference sequences, and write as a SAM or BAM file with only sequence dictionary.\n",
- usageShort = "Creates a SAM or BAM file from reference sequence in fasta format",
+ usage = CreateSequenceDictionary.USAGE_SUMMARY + CreateSequenceDictionary.USAGE_DETAILS,
+ usageShort = CreateSequenceDictionary.USAGE_SUMMARY,
programGroup = Fasta.class
)
public class CreateSequenceDictionary extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Creates a sequence dictionary for a reference sequence. ";
+ static final String USAGE_DETAILS = "This tool creates a sequence dictionary file (with \".dict\" extension) from a reference " +
+ "sequence provided in FASTA format, which is required by many processing and analysis tools. The output file contains a " +
+ "header but no SAMRecords, and the header contains only sequence records." +
+ "<br /><br />" +
+ "The reference sequence can be gzipped (both .fasta and .fasta.gz are supported)." +
+ "" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CreateSequenceDictionary \\ <br />" +
+ " R=reference.fasta \\ <br />" +
+ " O=reference.dict" +
+ "" +
+ "</pre>" +
+ "<hr />";
// The following attributes define the command-line arguments
@Option(doc = "Input reference fasta or fasta.gz", shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME)
diff --git a/src/java/picard/sam/DownsampleSam.java b/src/java/picard/sam/DownsampleSam.java
index 4be3f7b..7fcda82 100644
--- a/src/java/picard/sam/DownsampleSam.java
+++ b/src/java/picard/sam/DownsampleSam.java
@@ -54,20 +54,31 @@ import java.util.Random;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Randomly down-sample a SAM or BAM file to retain only a subset of the reads in the file. " +
- "All reads for a templates are kept or discarded as a unit, with the goal of retaining reads" +
- "from PROBABILITY * input templates. While this will usually result in approximately " +
- "PROBABILITY * input reads being retained also, for very small PROBABILITIES this may not " +
- "be the case.\n" +
- "A number of different downsampling strategies are supported using the STRATEGY option:\n\n" +
- "ConstantMemory: " + DownsamplingIteratorFactory.CONSTANT_MEMORY_DESCRPTION + "\n\n" +
- "HighAccuracy: " + DownsamplingIteratorFactory.HIGH_ACCURACY_DESCRIPTION + "\n\n" +
- "Chained: " + DownsamplingIteratorFactory.CHAINED_DESCRIPTION + "\n",
- usageShort = "Down-sample a SAM or BAM file to retain a random subset of the reads",
+ usage = DownsampleSam.USAGE_SUMMARY + DownsampleSam.USAGE_DETAILS,
+ usageShort = DownsampleSam.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class DownsampleSam extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Downsample a SAM or BAM file. ";
+ static final String USAGE_DETAILS = "This tool applies a random downsampling algorithm to a SAM or BAM file to retain " +
+ "only a random subset of the reads. Reads in a mate-pair are either both kept or both discarded. Reads marked as not primary " +
+ "alignments are all discarded. Each read is given a probability P of being retained so that runs performed with the exact " +
+ "same input in the same order and with the same value for RANDOM_SEED will produce the same results." +
+ "All reads for a template are kept or discarded as a unit, with the goal of retaining reads" +
+ "from PROBABILITY * input templates. While this will usually result in approximately " +
+ "PROBABILITY * input reads being retained also, for very small PROBABILITIES this may not " +
+ "be the case.\n" +
+ "A number of different downsampling strategies are supported using the STRATEGY option:\n\n" +
+ "ConstantMemory: " + DownsamplingIteratorFactory.CONSTANT_MEMORY_DESCRPTION + "\n\n" +
+ "HighAccuracy: " + DownsamplingIteratorFactory.HIGH_ACCURACY_DESCRIPTION + "\n\n" +
+ "Chained: " + DownsamplingIteratorFactory.CHAINED_DESCRIPTION + "\n\n" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar DownsampleSam \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=downsampled.bam" +
+ "</pre>" +
+ "<hr />";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input SAM or BAM file to downsample.")
public File INPUT;
@@ -77,7 +88,7 @@ public class DownsampleSam extends CommandLineProgram {
@Option(shortName="S", doc="The downsampling strategy to use. See usage for discussion.")
public Strategy STRATEGY = Strategy.ConstantMemory;
- @Option(shortName = "R", doc = "Random seed to use if reproducibilty is desired. " +
+ @Option(shortName = "R", doc = "Random seed to use if deterministic behavior is desired. " +
"Setting to null will cause multiple invocations to produce different results.")
public Integer RANDOM_SEED = 1;
diff --git a/src/java/picard/sam/FastqToSam.java b/src/java/picard/sam/FastqToSam.java
index 9aa68fc..37bf41e 100644
--- a/src/java/picard/sam/FastqToSam.java
+++ b/src/java/picard/sam/FastqToSam.java
@@ -62,12 +62,25 @@ import java.util.List;
* Input files can be in GZip format (end in .gz).
*/
@CommandLineProgramProperties(
- usage = "Extracts read sequences and qualities from the input fastq file and writes them into the output file in unaligned BAM format."
- + " Input files can be in GZip format (end in .gz).\n",
- usageShort = "Converts a fastq file to an unaligned BAM or SAM file",
+ usage = FastqToSam.USAGE_SUMMARY + FastqToSam.USAGE_DETAILS,
+ usageShort = FastqToSam.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class FastqToSam extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Converts a FASTQ file to an unaligned BAM or SAM file. ";
+ static final String USAGE_DETAILS = "This tool extracts read sequences and base qualities from the input FASTQ file and writes them" +
+ " out to a new file in unaligned BAM (uBAM) format. Read group information can be provided on the command line. <br /><br /> " +
+ "Three versions of FASTQ quality scales are supported: FastqSanger, FastqSolexa and FastqIllumina " +
+ "(see http://maq.sourceforge.net/fastq.shtml for details). Input FASTQ files can be in GZip format " +
+ "(with .gz extension)." +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar FastqToSam \\<br />" +
+ " F1=file_1.fastq \\<br />" +
+ " O=fastq_to_bam.bam \\<br />" +
+ " SM=for_tool_testing " +
+ "</pre>" +
+ "<hr />";
private static final Log LOG = Log.getInstance(FastqToSam.class);
@Option(shortName="F1", doc="Input fastq file (optionally gzipped) for single end data, or first read in paired end data.")
@@ -79,9 +92,9 @@ public class FastqToSam extends CommandLineProgram {
@Option(doc="Use sequential fastq files with the suffix <prefix>_###.fastq or <prefix>_###.fastq.gz", optional=true)
public boolean USE_SEQUENTIAL_FASTQS = false;
- @Option(shortName="V", doc="A value describing how the quality values are encoded in the fastq. Either Solexa for pre-pipeline 1.3 " +
- "style scores (solexa scaling + 66), Illumina for pipeline 1.3 and above (phred scaling + 64) or Standard for phred scaled " +
- "scores with a character shift of 33. If this value is not specified, the quality format will be detected automatically.", optional = true)
+ @Option(shortName="V", doc="A value describing how the quality values are encoded in the input FASTQ file. " +
+ "Either Solexa (phred scaling + 66), Illumina (phred scaling + 64) or Standard (phred scaling + 33). " +
+ "If this value is not specified, the quality format will be detected automatically.", optional = true)
public FastqQualityFormat QUALITY_FORMAT;
@Option(doc="Output SAM/BAM file. ", shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME)
@@ -132,7 +145,7 @@ public class FastqToSam extends CommandLineProgram {
@Option(doc="Maximum quality allowed in the input fastq. An exception will be thrown if a quality is greater than this value.")
public int MAX_Q = SAMUtils.MAX_PHRED_SCORE;
- @Option(doc="If true and this is an unpaired fastq any occurance of '/1' will be removed from the end of a read name.")
+ @Option(doc="If true and this is an unpaired fastq any occurrence of '/1' will be removed from the end of a read name.")
public Boolean STRIP_UNPAIRED_MATE_NUMBER = false;
@Option(doc="Allow (and ignore) empty lines")
diff --git a/src/java/picard/sam/FilterSamReads.java b/src/java/picard/sam/FilterSamReads.java
index 6d5a44a..234262b 100644
--- a/src/java/picard/sam/FilterSamReads.java
+++ b/src/java/picard/sam/FilterSamReads.java
@@ -57,13 +57,26 @@ import java.text.DecimalFormat;
* $Id$
*/
@CommandLineProgramProperties(
- usage = "Produces a new SAM or BAM file by including or excluding aligned reads " +
- "or a list of reads names supplied in the READ_LIST_FILE from the INPUT SAM or BAM file.\n",
- usageShort = "Creates a new SAM or BAM file by including or excluding aligned reads",
+ usage = FilterSamReads.USAGE_SUMMARY + FilterSamReads.USAGE_DETAILS,
+ usageShort = FilterSamReads.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class FilterSamReads extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Subset read data from a SAM or BAM file";
+ static final String USAGE_DETAILS = "This tool takes a SAM or BAM file and subsets it to a new file that either excludes or " +
+ "only includes either aligned or unaligned reads (set using FILTER), or specific reads based on a list of reads names " +
+ "supplied in the READ_LIST_FILE. " +
+ "" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar FilterSamReads \\<br /> " +
+ " I=input.bam \\ <br /> " +
+ " O=output.bam \\<br /> " +
+ " READ_LIST_FILE=read_names.txt" +
+ " FILTER=filter_value" +
+ "</pre> " +
+ "For information on the SAM format, please see: http://samtools.sourceforge.net" +
+ "<hr />";
private static final Log log = Log.getInstance(FilterSamReads.class);
private static enum Filter {
diff --git a/src/java/picard/sam/FixMateInformation.java b/src/java/picard/sam/FixMateInformation.java
index fe2c605..058150f 100644
--- a/src/java/picard/sam/FixMateInformation.java
+++ b/src/java/picard/sam/FixMateInformation.java
@@ -64,16 +64,25 @@ import java.util.List;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Ensure that all mate-pair information is in sync between each read " +
- "and its mate pair. If no OUTPUT file is supplied then the output is written to a temporary file " +
- "and then copied over the INPUT file. Reads marked with the secondary alignment flag are written " +
- "to the output file unchanged.",
- usageShort = "Ensure that all mate-pair information is in sync between each read and its mate pair",
+ usage = FixMateInformation.USAGE_SUMMARY + FixMateInformation.USAGE_DETAILS,
+ usageShort = FixMateInformation.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class FixMateInformation extends CommandLineProgram {
-
- @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input file to fix.")
+ static final String USAGE_SUMMARY = "Verify mate-pair information between mates and fix if needed.";
+ static final String USAGE_DETAILS = "This tool ensures that all mate-pair information is in sync between each read " +
+ "and its mate pair. If no OUTPUT file is supplied then the output is written to a temporary file " +
+ "and then copied over the INPUT file. Reads marked with the secondary alignment flag are written " +
+ "to the output file unchanged." +
+ "" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar FixMateInformation \\<br /> " +
+ " I=input.bam \\ <br /> " +
+ " O=fixed_mate.bam" +
+ "</pre> " +
+ "<hr />";
+ @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input file to check and fix.")
public List<File> INPUT;
@Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, optional = true,
@@ -91,6 +100,9 @@ public class FixMateInformation extends CommandLineProgram {
@Option(shortName = "MC", optional = true, doc = "Adds the mate CIGAR tag (MC) if true, does not if false.")
public Boolean ADD_MATE_CIGAR = true;
+ @Option(doc = "If true, ignore missing mates, otherwise will throw an exception when missing mates are found.", optional = true)
+ public Boolean IGNORE_MISSING_MATES = true;
+
private static final Log log = Log.getInstance(FixMateInformation.class);
protected SAMFileWriter out;
@@ -154,7 +166,7 @@ public class FixMateInformation extends CommandLineProgram {
// And now deal with re-sorting if necessary
if (ASSUME_SORTED || allQueryNameSorted) {
- iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(tmp), ADD_MATE_CIGAR);
+ iterator = new SamPairUtil.SetMateInfoIterator(new PeekableIterator<SAMRecord>(tmp), ADD_MATE_CIGAR, IGNORE_MISSING_MATES);
} else {
log.info("Sorting input into queryname order.");
final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class,
@@ -173,10 +185,9 @@ public class FixMateInformation extends CommandLineProgram {
super.close();
sorter.cleanup();
}
- }, ADD_MATE_CIGAR);
+ }, ADD_MATE_CIGAR, IGNORE_MISSING_MATES);
log.info("Sorting by queryname complete.");
}
-
// Deal with the various sorting complications
final SortOrder outputSortOrder = SORT_ORDER == null ? readers.get(0).getFileHeader().getSortOrder() : SORT_ORDER;
log.info("Output will be sorted by " + outputSortOrder);
diff --git a/src/java/picard/sam/GatherBamFiles.java b/src/java/picard/sam/GatherBamFiles.java
index 78722ef..b2efb46 100644
--- a/src/java/picard/sam/GatherBamFiles.java
+++ b/src/java/picard/sam/GatherBamFiles.java
@@ -27,18 +27,30 @@ import java.util.List;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Concatenates one or more BAM files together as efficiently as possible. Assumes that the " +
- "list of BAM files provided as INPUT are in the order that they should be concatenated and simply concatenates the bodies " +
- "of the BAM files while retaining the header from the first file. Operates via copying of the gzip blocks directly for speed " +
- "but also supports generation of an MD5 on the output and indexing of the output BAM file. Only support BAM files, does not " +
- "support SAM files.",
- usageShort = "Concatenates one or more BAM files together as efficiently as possible",
+ usage = GatherBamFiles.USAGE_SUMMARY + GatherBamFiles.USAGE_DETAILS,
+ usageShort = GatherBamFiles.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class GatherBamFiles extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Concatenate one or more BAM files as efficiently as possible";
+ static final String USAGE_DETAILS = "This tool performs a rapid \"gather\" operation on BAM files after scatter" +
+ " operations where the same process has been performed on different regions of a BAM file creating many " +
+ "smaller BAM files that now need to be concatenated (reassembled) back together." +
+ "<br /><br />" +
+ "Assumes that the list of BAM files provided as INPUT are in the order that they should be concatenated and" +
+ " simply concatenates the bodies of the BAM files while retaining the header from the first file. " +
+ "Operates via copying of the gzip blocks directly for speed but also supports generation of an MD5 on the" +
+ " output and indexing of the output BAM file. Only supports BAM files, does not support SAM files." +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar GatherBamFiles \\<br /> " +
+ " I=input1.bam \\ <br /> " +
+ " I=input2.bam \\ <br /> " +
+ " O=gathered_files.bam" +
+ "</pre> " +
+ "<hr />";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
- doc = "One or more BAM files or text files containing lists of BAM files one per line.")
+ doc = "Two or more BAM files or text files containing lists of BAM files (one per line).")
public List<File> INPUT;
@Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output BAM file to write.")
diff --git a/src/java/picard/sam/MergeBamAlignment.java b/src/java/picard/sam/MergeBamAlignment.java
index d72f375..57e1ece 100644
--- a/src/java/picard/sam/MergeBamAlignment.java
+++ b/src/java/picard/sam/MergeBamAlignment.java
@@ -47,17 +47,27 @@ import java.util.List;
* @author ktibbett at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = "Merges alignment data from a SAM or BAM " +
- "file with additional data stored in an unmapped BAM file and produces a third SAM " +
- "or BAM file of aligned and unaligned reads. The purpose is to use information from the " +
- "unmapped BAM to fix up aligner output, so that the resulting file is valid for use by other " +
- "Picard programs. For simple BAM file merges, use MergeSamFiles. NOTE that MergeBamAlignment expects to " +
- "find a sequence dictionary in the same directory as REFERENCE_SEQUENCE and expects it " +
- "to have the same base name as the reference fasta except with the extension '.dict'",
- usageShort = "Merges alignment data from a SAM or BAM with data in an unmapped BAM file",
+ usage = MergeBamAlignment.USAGE_SUMMARY + MergeBamAlignment.USAGE_DETAILS,
+ usageShort = MergeBamAlignment.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class MergeBamAlignment extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Merge alignment data from a SAM or BAM with data in an unmapped BAM file. ";
+ static final String USAGE_DETAILS = "This tool produces a new SAM or BAM file that includes all aligned and unaligned reads and also carries " +
+ "forward additional read attributes from the unmapped BAM (attributes that are otherwise lost in the process of alignment)." +
+ " The purpose of this tool is to use information from the unmapped BAM to fix up aligner output. The resulting file will be valid " +
+ "for use by other Picard tools. For simple BAM file merges, use MergeSamFiles. Note that MergeBamAlignment expects to " +
+ "find a sequence dictionary in the same directory as REFERENCE_SEQUENCE and expects it " +
+ "to have the same base name as the reference FASTA except with the extension \".dict\". " +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar MergeBamAlignment \\<br /> " +
+ " ALIGNED=aligned.bam \\ <br /> " +
+ " UNMAPPED=unmapped.bam \\ <br /> " +
+ " O=merge_alignments.bam \\<br /> " +
+ " R=reference_sequence.fasta" +
+ "</pre> " +
+ "<hr />";
@Option(shortName = "UNMAPPED",
doc = "Original SAM or BAM file of unmapped reads, which must be in queryname order.")
diff --git a/src/java/picard/sam/MergeSamFiles.java b/src/java/picard/sam/MergeSamFiles.java
index 7e95029..54db0ab 100644
--- a/src/java/picard/sam/MergeSamFiles.java
+++ b/src/java/picard/sam/MergeSamFiles.java
@@ -32,10 +32,15 @@ import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SamFileHeaderMerger;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Interval;
+import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
+import htsjdk.samtools.util.SamRecordIntervalIteratorFactory;
+import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
@@ -44,7 +49,9 @@ import picard.cmdline.programgroups.SamOrBam;
import java.io.File;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
/**
* Reads a SAM or BAM file and combines the output to one file
@@ -52,13 +59,30 @@ import java.util.List;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Merges multiple SAM/BAM files into one file.",
- usageShort = "Merges multiple SAM or BAM files into one file",
+ usage = MergeSamFiles.USAGE_SUMMARY + MergeSamFiles.USAGE_DETAILS,
+ usageShort = MergeSamFiles.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class MergeSamFiles extends CommandLineProgram {
private static final Log log = Log.getInstance(MergeSamFiles.class);
+ static final String USAGE_SUMMARY = "Merges multiple SAM and/or BAM files into a single file. ";
+ static final String USAGE_DETAILS = "This tool is used for combining SAM and/or BAM files from different runs or read groups, similarly " +
+ "to the \"merge\" function of Samtools (http://www.htslib.org/doc/samtools.html). " +
+ "<br /><br />Note that to prevent errors in downstream processing, it is critical to identify/label read groups appropriately. " +
+ "If different samples contain identical read group IDs, this tool will avoid collisions by modifying the read group IDs to be " +
+ "unique. For more information about read groups, see the " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6472'>GATK Dictionary entry.</a> <br /><br />" +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar MergeSamFiles \\<br />" +
+ " I=input_1.bam \\<br />" +
+ " I=input_2.bam \\<br />" +
+ " O=merged_files.bam" +
+ "</pre>" +
+ "<hr />"
+ ;
@Option(shortName = "I", doc = "SAM or BAM input file", minElements = 1)
public List<File> INPUT = new ArrayList<File>();
@@ -83,6 +107,13 @@ public class MergeSamFiles extends CommandLineProgram {
@Option(doc = "Comment(s) to include in the merged output file's header.", optional = true, shortName = "CO")
public List<String> COMMENT = new ArrayList<String>();
+ @Option(shortName = "RGN", doc = "An interval list file that contains the locations of the positions to merge. "+
+ "Assume bam are sorted and indexed. "+
+ "The resulting file will contain alignments that may overlap with genomic regions outside the requested region. "+
+ "Unmapped reads are discarded.",
+ optional = true)
+ public File INTERVALS = null;
+
private static final int PROGRESS_INTERVAL = 1000000;
/** Required main method implementation. */
@@ -94,7 +125,12 @@ public class MergeSamFiles extends CommandLineProgram {
@Override
protected int doWork() {
boolean matchedSortOrders = true;
-
+
+ // read interval list if it is defined
+ final List<Interval> intervalList = (INTERVALS == null ? null : IntervalList.fromFile(INTERVALS).uniqued().getIntervals() );
+ // map reader->iterator used if INTERVALS is defined
+ final Map<SamReader, CloseableIterator<SAMRecord> > samReaderToIterator = new HashMap<SamReader, CloseableIterator<SAMRecord> >(INPUT.size());
+
// Open the files for reading and writing
final List<SamReader> readers = new ArrayList<SamReader>();
final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
@@ -104,6 +140,12 @@ public class MergeSamFiles extends CommandLineProgram {
for (final File inFile : INPUT) {
IOUtil.assertFileIsReadable(inFile);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(inFile);
+ if ( INTERVALS != null ) {
+ if( ! in.hasIndex() ) throw new PicardException("Merging with interval but Bam file is not indexed "+ inFile);
+ final CloseableIterator<SAMRecord> samIterator = new SamRecordIntervalIteratorFactory().makeSamRecordIntervalIterator(in, intervalList, true);
+ samReaderToIterator.put(in, samIterator);
+ }
+
readers.add(in);
headers.add(in.getFileHeader());
@@ -127,7 +169,7 @@ public class MergeSamFiles extends CommandLineProgram {
final SAMFileHeader.SortOrder headerMergerSortOrder;
final boolean mergingSamRecordIteratorAssumeSorted;
- if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED) {
+ if (matchedSortOrders || SORT_ORDER == SAMFileHeader.SortOrder.unsorted || ASSUME_SORTED || INTERVALS != null ) {
log.info("Input files are in same order as output so sorting to temp directory is not needed.");
headerMergerSortOrder = SORT_ORDER;
mergingSamRecordIteratorAssumeSorted = ASSUME_SORTED;
@@ -139,7 +181,16 @@ public class MergeSamFiles extends CommandLineProgram {
presorted = false;
}
final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headerMergerSortOrder, headers, MERGE_SEQUENCE_DICTIONARIES);
- final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
+ final MergingSamRecordIterator iterator;
+ // no interval defined, get an iterator for the whole bam
+ if( intervalList == null) {
+ iterator = new MergingSamRecordIterator(headerMerger, readers, mergingSamRecordIteratorAssumeSorted);
+ }
+ else {
+ // show warning related to https://github.com/broadinstitute/picard/pull/314/files
+ log.info("Warning: merged bams from different interval lists may contain the same read in both files");
+ iterator = new MergingSamRecordIterator(headerMerger, samReaderToIterator, true);
+ }
final SAMFileHeader header = headerMerger.getMergedHeader();
for (final String comment : COMMENT) {
header.addComment(comment);
@@ -160,6 +211,7 @@ public class MergeSamFiles extends CommandLineProgram {
}
log.info("Finished reading inputs.");
+ for(final CloseableIterator<SAMRecord> iter : samReaderToIterator.values()) CloserUtil.close(iter);
CloserUtil.close(readers);
out.close();
return 0;
diff --git a/src/java/picard/sam/PositionBasedDownsampleSam.java b/src/java/picard/sam/PositionBasedDownsampleSam.java
index 3a596f9..b95dd6c 100644
--- a/src/java/picard/sam/PositionBasedDownsampleSam.java
+++ b/src/java/picard/sam/PositionBasedDownsampleSam.java
@@ -43,7 +43,8 @@ import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.SamOrBam;
-import picard.sam.util.PhysicalLocation;
+import picard.sam.markduplicates.util.OpticalDuplicateFinder;
+import picard.sam.util.PhysicalLocationInt;
import java.io.File;
import java.util.ArrayList;
@@ -113,7 +114,7 @@ public class PositionBasedDownsampleSam extends CommandLineProgram {
private final Log log = Log.getInstance(PositionBasedDownsampleSam.class);
- private PhysicalLocation opticalDuplicateFinder;
+ private OpticalDuplicateFinder opticalDuplicateFinder;
private long total = 0;
private long kept = 0;
public static String PG_PROGRAM_NAME = "PositionBasedDownsampleSam";
@@ -164,7 +165,7 @@ public class PositionBasedDownsampleSam extends CommandLineProgram {
log.info("Checking to see if input file has been downsampled with this program before.");
checkProgramRecords();
- opticalDuplicateFinder = new PhysicalLocation();
+ opticalDuplicateFinder = new OpticalDuplicateFinder();
log.info("Starting first pass. Examining read distribution in tiles.");
fillTileMinMaxCoord();
@@ -206,7 +207,7 @@ public class PositionBasedDownsampleSam extends CommandLineProgram {
total++;
- final PhysicalLocation pos = getSamRecordLocation(rec);
+ final PhysicalLocationInt pos = getSamRecordLocation(rec);
if (!xPositions.containsKey(pos.getTile())) {
xPositions.put(pos.getTile(), new Histogram<Short>(pos.getTile() + "-xpos", "count"));
@@ -266,7 +267,7 @@ public class PositionBasedDownsampleSam extends CommandLineProgram {
total++;
progress.record(rec);
- final PhysicalLocation location = getSamRecordLocation(rec);
+ final PhysicalLocationInt location = getSamRecordLocation(rec);
//Defaulting map will create a new Coord if it's not there.
@@ -298,8 +299,8 @@ public class PositionBasedDownsampleSam extends CommandLineProgram {
CloserUtil.close(in);
}
- private PhysicalLocation getSamRecordLocation(final SAMRecord rec) {
- final PhysicalLocation pos = new PhysicalLocation();
+ private PhysicalLocationInt getSamRecordLocation(final SAMRecord rec) {
+ final PhysicalLocationInt pos = new PhysicalLocationInt();
opticalDuplicateFinder.addLocationInformation(rec.getReadName(), pos);
return pos;
}
@@ -352,7 +353,7 @@ public class PositionBasedDownsampleSam extends CommandLineProgram {
private double roundedPart(final double x) {return x - Math.round(x);}
// this function checks to see if the location of the read is within the masking circle
- private boolean select(final PhysicalLocation coord, final Coord tileCoord) {
+ private boolean select(final PhysicalLocationInt coord, final Coord tileCoord) {
// r^2 = (x-x_0)^2 + (y-y_0)^2, where both x_0 and y_0 equal offset
final double distanceSquared =
Math.pow(roundedPart(((coord.getX() - tileCoord.minX) / (double) (tileCoord.maxX - tileCoord.minX)) - offset), 2) +
diff --git a/src/java/picard/sam/ReplaceSamHeader.java b/src/java/picard/sam/ReplaceSamHeader.java
index 0c05044..3f76d4c 100644
--- a/src/java/picard/sam/ReplaceSamHeader.java
+++ b/src/java/picard/sam/ReplaceSamHeader.java
@@ -48,14 +48,26 @@ import java.io.File;
* @author alecw at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = "Replace the SAMFileHeader in a SAM file with the given header. " +
- "Validation is minimal. It is up to the user to ensure that all the elements referred to in the SAMRecords " +
- "are present in the new header. Sort order of the two input files must be the same.",
- usageShort = "Replace the SAMFileHeader in a SAM file with the given header",
+ usage = ReplaceSamHeader.USAGE_SUMMARY + ReplaceSamHeader.USAGE_DETAILS,
+ usageShort = ReplaceSamHeader.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class ReplaceSamHeader extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Replaces the SAMFileHeader in a SAM or BAM file. ";
+ static final String USAGE_DETAILS = "This tool makes it possible to replace the header of a SAM or BAM file with the header of another" +
+ "file, or a header block that has been edited manually (in a stub SAM file). The sort order (@SO) of the two input files must " +
+ "be the same.<br /><br />" +
+ "Note that validation is minimal, so it is up to the user to ensure that all the elements referred to in the SAMRecords " +
+ "are present in the new header. " +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar ReplaceSamHeader \\<br />" +
+ " I=input_1.bam \\<br />" +
+ " HEADER=input_2.bam \\<br />" +
+ " O=bam_with_new_head.bam" +
+ "</pre>" +
+ "<hr />";
@Option(doc = "SAM file from which SAMRecords will be read.", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
public File INPUT;
diff --git a/src/java/picard/sam/RevertSam.java b/src/java/picard/sam/RevertSam.java
index ab0d1be..65c1aea 100644
--- a/src/java/picard/sam/RevertSam.java
+++ b/src/java/picard/sam/RevertSam.java
@@ -68,13 +68,23 @@ import java.util.Map;
* all alignment information.
*/
@CommandLineProgramProperties(
- usage = "Reverts SAM or BAM files to a previous state by removing certain types of information and/or " +
- "substituting in the original quality scores when available.",
- usageShort = "Reverts SAM or BAM files to a previous state",
+ usage = RevertSam.USAGE_SUMMARY + RevertSam.USAGE_DETAILS,
+ usageShort = RevertSam.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class RevertSam extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY ="Reverts SAM or BAM files to a previous state. ";
+ static final String USAGE_DETAILS ="This tool removes or restores certain properties of the SAM records, including alignment " +
+ "information, which can be used to produce an unmapped BAM (uBAM) from a previously aligned BAM. It is also capable of " +
+ "restoring the original quality scores of a BAM file that has already undergone base quality score recalibration (BQSR) if the" +
+ "original qualities were retained." +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar RevertSam \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=reverted.bam" +
+ "</pre>" +
+ "<hr />";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input SAM/BAM file to revert the state of.")
public File INPUT;
diff --git a/src/java/picard/sam/SamToFastq.java b/src/java/picard/sam/SamToFastq.java
index 83172d0..8d06444 100755
--- a/src/java/picard/sam/SamToFastq.java
+++ b/src/java/picard/sam/SamToFastq.java
@@ -39,6 +39,7 @@ import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
+import htsjdk.samtools.util.TrimmingUtil;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
@@ -63,41 +64,48 @@ import java.util.Set;
* the original read sequence as it was generated by the sequencer.
*/
@CommandLineProgramProperties(
- usage = "Extracts read sequences and qualities from the input SAM/BAM file and writes them into " +
- "the output file in Sanger fastq format. In the RC mode (default is True), if the read is aligned and the alignment is to the reverse strand on the genome, " +
- "the read's sequence from input SAM file will be reverse-complemented prior to writing it to fastq in order restore correctly" +
- "the original read sequence as it was generated by the sequencer.",
- usageShort = "Converts a SAM/BAM into a FASTQ",
+ usage = SamToFastq.USAGE_SUMMARY + SamToFastq.USAGE_DETAILS,
+ usageShort = SamToFastq.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class SamToFastq extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Converts a SAM or BAM file to FASTQ. ";
+ static final String USAGE_DETAILS = "This tool extracts read sequences and base quality scores from the input SAM/BAM file and " +
+ "outputs them in FASTQ format. This can be used (by way of a pipe) to run BWA MEM on unmapped BAM (uBAM) files."+
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar SamToFASTQ \\<br />" +
+ " I=input.bam \\<br />" +
+ " FASTQ=output.fastq" +
+ "</pre>" +
+ "<hr />";
@Option(doc = "Input SAM/BAM file to extract reads from", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
public File INPUT;
- @Option(shortName = "F", doc = "Output fastq file (single-end fastq or, if paired, first end of the pair fastq).",
+ @Option(shortName = "F", doc = "Output FASTQ file (single-end fastq or, if paired, first end of the pair FASTQ).",
mutex = {"OUTPUT_PER_RG"})
public File FASTQ;
- @Option(shortName = "F2", doc = "Output fastq file (if paired, second end of the pair fastq).", optional = true,
+ @Option(shortName = "F2", doc = "Output FASTQ file (if paired, second end of the pair FASTQ).", optional = true,
mutex = {"OUTPUT_PER_RG"})
public File SECOND_END_FASTQ;
- @Option(shortName = "FU", doc = "Output fastq file for unpaired reads; may only be provided in paired-fastq mode", optional = true, mutex = {"OUTPUT_PER_RG"})
+ @Option(shortName = "FU", doc = "Output FASTQ file for unpaired reads; may only be provided in paired-FASTQ mode", optional = true, mutex = {"OUTPUT_PER_RG"})
public File UNPAIRED_FASTQ;
- @Option(shortName = "OPRG", doc = "Output a fastq file per read group (two fastq files per read group if the group is paired).",
+ @Option(shortName = "OPRG", doc = "Output a FASTQ file per read group (two FASTQ files per read group if the group is paired).",
optional = true, mutex = {"FASTQ", "SECOND_END_FASTQ", "UNPAIRED_FASTQ"})
public boolean OUTPUT_PER_RG;
- @Option(shortName="RGT", doc = "The read group tag (PU or ID) to be used to output a fastq file per read group.")
- public String RG_TAG = "PU";
-
- @Option(shortName = "ODIR", doc = "Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true.",
+ @Option(shortName="RGT", doc = "The read group tag (PU or ID) to be used to output a FASTQ file per read group.")
+ public String RG_TAG = "PU";
+
+ @Option(shortName = "ODIR", doc = "Directory in which to output the FASTQ file(s). Used only when OUTPUT_PER_RG is true.",
optional = true)
public File OUTPUT_DIR;
- @Option(shortName = "RC", doc = "Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq",
+ @Option(shortName = "RC", doc = "Re-reverse bases and qualities of reads with negative strand flag set before writing them to FASTQ",
optional = true)
public boolean RE_REVERSE = true;
@@ -106,7 +114,7 @@ public class SamToFastq extends CommandLineProgram {
@Option(shortName = "NON_PF", doc = "Include non-PF reads from the SAM file into the output " +
"FASTQ files. PF means 'passes filtering'. Reads whose 'not passing quality controls' " +
- "flag is set are non-PF reads.")
+ "flag is set are non-PF reads. See GATK Dictionary for more info.")
public boolean INCLUDE_NON_PF_READS = false;
@Option(shortName = "CLIP_ATTR", doc = "The attribute that stores the position at which " +
@@ -136,6 +144,9 @@ public class SamToFastq extends CommandLineProgram {
"value is null then all bases left after trimming will be written.", optional = true)
public Integer READ2_MAX_BASES_TO_WRITE;
+ @Option(shortName="Q", doc="End-trim reads using the phred/bwa quality trimming algorithm and this quality.", optional=true)
+ public Integer QUALITY;
+
@Option(doc = "If true, include non-primary alignments in the output. Support of non-primary alignments in SamToFastq " +
"is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments.")
public boolean INCLUDE_NON_PRIMARY_ALIGNMENTS = false;
@@ -289,31 +300,39 @@ public class SamToFastq extends CommandLineProgram {
final Integer clipPoint = (Integer) read.getAttribute(CLIPPING_ATTRIBUTE);
if (clipPoint != null) {
if (CLIPPING_ACTION.equalsIgnoreCase("X")) {
- readString = clip(readString, clipPoint, null,
- !read.getReadNegativeStrandFlag());
- baseQualities = clip(baseQualities, clipPoint, null,
- !read.getReadNegativeStrandFlag());
-
- } else if (CLIPPING_ACTION.equalsIgnoreCase("N")) {
- readString = clip(readString, clipPoint, 'N',
- !read.getReadNegativeStrandFlag());
- } else {
- final char newQual = SAMUtils.phredToFastq(
- new byte[]{(byte) Integer.parseInt(CLIPPING_ACTION)}).charAt(0);
- baseQualities = clip(baseQualities, clipPoint, newQual,
- !read.getReadNegativeStrandFlag());
+ readString = clip(readString, clipPoint, null, !read.getReadNegativeStrandFlag());
+ baseQualities = clip(baseQualities, clipPoint, null, !read.getReadNegativeStrandFlag());
+ }
+ else if (CLIPPING_ACTION.equalsIgnoreCase("N")) {
+ readString = clip(readString, clipPoint, 'N', !read.getReadNegativeStrandFlag());
+ }
+ else {
+ final char newQual = SAMUtils.phredToFastq(new byte[]{(byte) Integer.parseInt(CLIPPING_ACTION)}).charAt(0);
+ baseQualities = clip(baseQualities, clipPoint, newQual, !read.getReadNegativeStrandFlag());
}
}
}
+
if (RE_REVERSE && read.getReadNegativeStrandFlag()) {
readString = SequenceUtil.reverseComplement(readString);
baseQualities = StringUtil.reverseString(baseQualities);
}
+
if (basesToTrim > 0) {
readString = readString.substring(basesToTrim);
baseQualities = baseQualities.substring(basesToTrim);
}
+ // Perform quality trimming if desired, making sure to leave at least one base!
+ if (QUALITY != null) {
+ final byte[] quals = SAMUtils.fastqToPhred(baseQualities);
+ final int qualityTrimIndex = Math.max(1, TrimmingUtil.findQualityTrimPoint(quals, QUALITY));
+ if (qualityTrimIndex < quals.length) {
+ readString = readString.substring(0, qualityTrimIndex);
+ baseQualities = baseQualities.substring(0, qualityTrimIndex);
+ }
+ }
+
if (maxBasesToWrite != null && maxBasesToWrite < readString.length()) {
readString = readString.substring(0, maxBasesToWrite);
baseQualities = baseQualities.substring(0, maxBasesToWrite);
@@ -402,14 +421,14 @@ public class SamToFastq extends CommandLineProgram {
"If OUTPUT_PER_RG is true, then OUTPUT_DIR should be set. " +
"If "};
}
-
- if (OUTPUT_PER_RG) {
- if (RG_TAG == null) {
- return new String[]{"If OUTPUT_PER_RG is true, then RG_TAG should be set."};
- } else if (! (RG_TAG.equalsIgnoreCase("PU") || RG_TAG.equalsIgnoreCase("ID")) ){
- return new String[]{"RG_TAG must be: PU or ID"};
- }
- }
+
+ if (OUTPUT_PER_RG) {
+ if (RG_TAG == null) {
+ return new String[]{"If OUTPUT_PER_RG is true, then RG_TAG should be set."};
+ } else if (! (RG_TAG.equalsIgnoreCase("PU") || RG_TAG.equalsIgnoreCase("ID")) ){
+ return new String[]{"RG_TAG must be: PU or ID"};
+ }
+ }
return null;
}
diff --git a/src/java/picard/sam/SortSam.java b/src/java/picard/sam/SortSam.java
index 25c8745..6dd1c41 100644
--- a/src/java/picard/sam/SortSam.java
+++ b/src/java/picard/sam/SortSam.java
@@ -45,13 +45,23 @@ import java.io.File;
* @author alecw at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = "Sorts the input SAM or BAM.\n" +
- "Input and output formats are determined by file extension.",
- usageShort = "Sorts a SAM or BAM file",
+ usage = SortSam.USAGE_SUMMARY + SortSam.USAGE_DETAILS,
+ usageShort = SortSam.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class SortSam extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Sorts a SAM or BAM file. ";
+ static final String USAGE_DETAILS = "This tool sorts the input SAM or BAM file by coordinate, queryname or some other property " +
+ "of the SAMRecord. Input and output formats (SAM or BAM) are determined by the file extension." +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar SortSam \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=sorted.bam \\<br />" +
+ " SORT_ORDER=coordinate" +
+ "</pre>" +
+ "<hr />";
@Option(doc = "The BAM or SAM file to sort.", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
public File INPUT;
diff --git a/src/java/picard/sam/ValidateSamFile.java b/src/java/picard/sam/ValidateSamFile.java
index 7b2b578..70cfc27 100644
--- a/src/java/picard/sam/ValidateSamFile.java
+++ b/src/java/picard/sam/ValidateSamFile.java
@@ -29,6 +29,7 @@ import htsjdk.samtools.SamFileValidator;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.ValidationStringency;
+import htsjdk.samtools.BamIndexValidator.IndexValidationStringency;
import htsjdk.samtools.reference.ReferenceSequenceFile;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.IOUtil;
@@ -51,12 +52,27 @@ import java.util.List;
* @author Doug Voet
*/
@CommandLineProgramProperties(
- usage = "Read a SAM or BAM file and report on its validity.",
- usageShort = "Validates a SAM or BAM file",
+ usage = ValidateSamFile.USAGE_SUMMARY + ValidateSamFile.USAGE_DETAILS,
+ usageShort = ValidateSamFile.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class ValidateSamFile extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Validates a SAM or BAM file. ";
+ static final String USAGE_DETAILS = "This tool reports on the validity of a SAM or BAM file relative to the SAM format specification " +
+ "(see http://samtools.github.io/hts-specs/SAMv1.pdf), which is useful for troubleshooting errors encountered with other tools " +
+ "that may be caused by improper formatting.<br /><br />" +
+ "By default, the tool runs in VERBOSE mode and will exit after finding 100 errors and output them to the " +
+ "console (stdout). It is often practical to start by running this tool with the SUMMARY mode option, which summarizes the " +
+ "\"errors\" and \"warnings\". Consequently, specific validation warnings or errors that are of lesser concern can be ignored " +
+ "using the IGNORE and/or IGNORE_WARNINGS arguments in order to focus on blocking errors. " +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar ValidateSamFile \\<br />" +
+ " I=input.bam \\<br />" +
+ " MODE=SUMMARY" +
+ "</pre>" +
+ "<hr />";
public enum Mode {VERBOSE, SUMMARY}
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
@@ -82,9 +98,17 @@ public class ValidateSamFile extends CommandLineProgram {
@Option(doc = "If true, only report errors and ignore warnings.")
public boolean IGNORE_WARNINGS = false;
- @Option(doc = "If true and input is a BAM file with an index file, also validates the index.")
+ @Option(doc = "DEPRECATED. Use INDEX_VALIDATION_STRINGENCY instead. If true and input is " +
+ "a BAM file with an index file, also validates the index. Until this parameter is retired " +
+ "VALIDATE INDEX and INDEX_VALIDATION_STRINGENCY must agree on whether to validate the index.")
public boolean VALIDATE_INDEX = true;
+ @Option(doc = "If set to anything other than IndexValidationStringency.NONE and input is " +
+ "a BAM file with an index file, also validates the index at the specified stringency. " +
+ "Until VALIDATE_INDEX is retired, VALIDATE INDEX and INDEX_VALIDATION_STRINGENCY " +
+ "must agree on whether to validate the index.")
+ public IndexValidationStringency INDEX_VALIDATION_STRINGENCY = IndexValidationStringency.EXHAUSTIVE;
+
@Option(shortName = "BISULFITE",
doc = "Whether the SAM or BAM file consists of bisulfite sequenced reads. " +
"If so, C->T is not counted as an error in computing the value of the NM tag.")
@@ -170,4 +194,16 @@ public class ValidateSamFile extends CommandLineProgram {
return result ? 0 : 1;
}
+
+ @Override
+ protected String[] customCommandLineValidation() {
+ if ((!VALIDATE_INDEX && INDEX_VALIDATION_STRINGENCY != IndexValidationStringency.NONE) ||
+ (VALIDATE_INDEX && INDEX_VALIDATION_STRINGENCY == IndexValidationStringency.NONE)) {
+ return new String[]{"VALIDATE_INDEX and INDEX_VALIDATION_STRINGENCY must be consistent: " +
+ "VALIDATE_INDEX is " + VALIDATE_INDEX + " and INDEX_VALIDATION_STRINGENCY is " +
+ INDEX_VALIDATION_STRINGENCY};
+ }
+
+ return super.customCommandLineValidation();
+ }
}
diff --git a/src/java/picard/sam/markduplicates/EstimateLibraryComplexity.java b/src/java/picard/sam/markduplicates/EstimateLibraryComplexity.java
index 077ac67..d8e9718 100644
--- a/src/java/picard/sam/markduplicates/EstimateLibraryComplexity.java
+++ b/src/java/picard/sam/markduplicates/EstimateLibraryComplexity.java
@@ -46,6 +46,8 @@ import picard.cmdline.programgroups.Metrics;
import picard.sam.DuplicationMetrics;
import picard.sam.markduplicates.util.AbstractOpticalDuplicateFinderCommandLineProgram;
import picard.sam.markduplicates.util.OpticalDuplicateFinder;
+import picard.sam.util.PhysicalLocation;
+import picard.sam.util.PhysicalLocationShort;
import java.io.DataInputStream;
import java.io.DataOutputStream;
@@ -82,25 +84,39 @@ import static java.lang.Math.pow;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Attempts to estimate library complexity from sequence of read pairs alone. Does so by sorting all reads " +
- "by the first N bases (5 by default) of each read and then comparing reads with the first " +
- "N bases identical to each other for duplicates. Reads are considered to be duplicates if " +
- "they match each other with no gaps and an overall mismatch rate less than or equal to " +
- "MAX_DIFF_RATE (0.03 by default).\n\n" +
- "Reads of poor quality are filtered out so as to provide a more accurate estimate. The filtering " +
- "removes reads with any no-calls in the first N bases or with a mean base quality lower than " +
- "MIN_MEAN_QUALITY across either the first or second read.\n\n" +
- "Unpaired reads are ignored in this computation.\n\n" +
- "The algorithm attempts to detect optical duplicates separately from PCR duplicates and excludes " +
- "these in the calculation of library size. Also, since there is no alignment to screen out technical " +
- "reads one further filter is applied on the data. After examining all reads a Histogram is built of " +
- "[#reads in duplicate set -> #of duplicate sets] all bins that contain exactly one duplicate set are " +
- "then removed from the Histogram as outliers before library size is estimated.",
- usageShort = "Estimates library complexity from the sequence of read pairs",
+ usage = EstimateLibraryComplexity.USAGE_SUMMARY + EstimateLibraryComplexity.USAGE_DETAILS,
+ usageShort = EstimateLibraryComplexity.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Estimates library complexity. ";
+ static final String USAGE_DETAILS = "This tool outputs quality metrics for a sequencing library preparation." +
+ "<br /><br />Library complexity refers to the number of unique DNA fragments present in a given library. " +
+ "Reductions in complexity compromise downstream analyses and can result from either PCR or optical duplicates." +
+ "" +
+ "<br /><br />PCR-induced duplicates can result from: inadequate amounts of starting material (genomic DNA, cDNA, etc.), " +
+ "losses during cleanups and size selection, and amplification-associated artifacts. These duplicates " +
+ "are removed via the creation of a histogram consisting of the numbers of reads in duplicate set vs. the numbers " +
+ "of duplicate sets. All bins that contain exactly one duplicate set are then removed from the histogram prior" +
+ " to estimating library size.<br /> <br />" +
+ "" +
+ "Optical duplicates result from reads that have identical sequences and whose clusters are proximal to each other. " +
+ "These are identified using both the READ_NAME_REGEX string and the values for the OPTICAL_DUPLICATE_PIXEL_DISTANCE.<br /> <br />" +
+ "" +
+ "Sequence identity is determined by scanning the first five (default value) bases of a read. Sequences are considered identical if" +
+ " they match each other with no gaps, have an overall mismatch rate less than or equal to the MAX_DIFF_RATE" +
+ " (0.03 is the default value), and the cluster proximity is less than the defined value (default is set at 100 pixels)." +
+ "" +
+ "<br /><br />Finally, the algorithm filters out poor quality reads defined as those with ambiguous base calls \"N\"s in the first 5 (default value) bases, " +
+ "reads with a mean base quality score lower than MIN_MEAN_QUALITY (20 is the default value), or any unpaired reads. " +
+
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar EstimateLibraryComplexity \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=est_lib_complex_metrics.txt" +
+ "</pre>" +
+ "<hr />";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "One or more files to combine and " +
"estimate library complexity from. Reads can be mapped or unmapped.")
public List<File> INPUT;
@@ -136,15 +152,35 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
@Option(doc = "Read two barcode SAM tag (ex. BX for 10X Genomics)", optional = true)
public String READ_TWO_BARCODE_TAG = null;
+ @Option(doc = "The maximum number of bases to consider when comparing reads (0 means no maximum).", optional = true)
+ public int MAX_READ_LENGTH = 0;
+
+ @Option(doc = "Minimum number group count. On a per-library basis, we count the number of groups of duplicates " +
+ "that have a particular size. Omit from consideration any count that is less than this value. For " +
+ "example, if we see only one group of duplicates with size 500, we omit it from the metric calculations if " +
+ "MIN_GROUP_COUNT is set to two. Setting this to two may help remove technical artifacts from the library " +
+ "size calculation, for example, adapter dimers.", optional = true)
+ public int MIN_GROUP_COUNT = 2;
+
private final Log log = Log.getInstance(EstimateLibraryComplexity.class);
+ @Override
+ protected String[] customCommandLineValidation() {
+ final List<String> errorMsgs = new ArrayList<String>();
+ if (0 < MAX_READ_LENGTH && MAX_READ_LENGTH < MIN_IDENTICAL_BASES) {
+ errorMsgs.add("MAX_READ_LENGTH must be greater than MIN_IDENTICAL_BASES");
+ }
+ if (MIN_IDENTICAL_BASES <= 0) {
+ errorMsgs.add("MIN_IDENTICAL_BASES must be greater than 0");
+ }
+ return errorMsgs.size() == 0 ? super.customCommandLineValidation() : errorMsgs.toArray(new String[errorMsgs.size()]);
+ }
+
/**
* Little class to hold the sequence of a pair of reads and tile location information.
*/
- static class PairedReadSequence implements OpticalDuplicateFinder.PhysicalLocation {
+ static class PairedReadSequence extends PhysicalLocationShort {
short readGroup = -1;
- short tile = -1;
- short x = -1, y = -1;
boolean qualityOk = true;
byte[] read1;
byte[] read2;
@@ -158,18 +194,6 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
public void setReadGroup(final short readGroup) { this.readGroup = readGroup; }
- public short getTile() { return this.tile; }
-
- public void setTile(final short tile) { this.tile = tile; }
-
- public short getX() { return this.x; }
-
- public void setX(final short x) { this.x = x; }
-
- public short getY() { return this.y; }
-
- public void setY(final short y) { this.y = y; }
-
public short getLibraryId() { return this.libraryId; }
public void setLibraryId(final short libraryId) { this.libraryId = libraryId; }
@@ -460,7 +484,7 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
CloserUtil.close(in);
}
- log.info("Finished reading - moving on to scanning for duplicates.");
+ log.info(String.format("Finished reading - read %d records - moving on to scanning for duplicates.", progress.getCount()));
// Now go through the sorted reads and attempt to find duplicates
final PeekableIterator<PairedReadSequence> iterator = new PeekableIterator<PairedReadSequence>(sorter.iterator());
@@ -521,7 +545,7 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
final int duplicateCount = dupes.size();
duplicationHisto.increment(duplicateCount);
- final boolean[] flags = opticalDuplicateFinder.findOpticalDuplicates(dupes);
+ final boolean[] flags = opticalDuplicateFinder.findOpticalDuplicates(dupes, lhs);
for (final boolean b : flags) {
if (b) opticalHisto.increment(duplicateCount);
}
@@ -549,12 +573,12 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
final DuplicationMetrics metrics = new DuplicationMetrics();
metrics.LIBRARY = library;
- // Filter out any bins that have only a single entry in them and calcu
+ // Filter out any bins that have fewer than MIN_GROUP_COUNT entries in them and calculate derived metrics
for (final Integer bin : duplicationHisto.keySet()) {
final double duplicateGroups = duplicationHisto.get(bin).getValue();
final double opticalDuplicates = opticalHisto.get(bin) == null ? 0 : opticalHisto.get(bin).getValue();
- if (duplicateGroups > 1) {
+ if (duplicateGroups >= MIN_GROUP_COUNT) {
metrics.READ_PAIRS_EXAMINED += (bin * duplicateGroups);
metrics.READ_PAIR_DUPLICATES += ((bin - 1) * duplicateGroups);
metrics.READ_PAIR_OPTICAL_DUPLICATES += opticalDuplicates;
@@ -577,8 +601,9 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
* errors/diffs as dictated by the maxDiffRate.
*/
private boolean matches(final PairedReadSequence lhs, final PairedReadSequence rhs, final double maxDiffRate, final boolean useBarcodes) {
- final int read1Length = Math.min(lhs.read1.length, rhs.read1.length);
- final int read2Length = Math.min(lhs.read2.length, rhs.read2.length);
+ final int maxReadLength = (MAX_READ_LENGTH <= 0) ? Integer.MAX_VALUE : MAX_READ_LENGTH;
+ final int read1Length = Math.min(Math.min(lhs.read1.length, rhs.read1.length), maxReadLength);
+ final int read2Length = Math.min(Math.min(lhs.read2.length, rhs.read2.length), maxReadLength);
final int maxErrors = (int) Math.floor((read1Length + read2Length) * maxDiffRate);
int errors = 0;
@@ -670,8 +695,10 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
if (SequenceUtil.isNoCall(bases[i])) return false;
}
+ final int maxReadLength = (MAX_READ_LENGTH <= 0) ? Integer.MAX_VALUE : MAX_READ_LENGTH;
+ final int readLength = Math.min(bases.length, maxReadLength);
int total = 0;
- for (final byte b : quals) total += b;
- return total / quals.length >= minQuality;
+ for (int i = 0; i < readLength; i++) total += quals[i];
+ return total / readLength >= minQuality;
}
}
diff --git a/src/java/picard/sam/markduplicates/MarkDuplicates.java b/src/java/picard/sam/markduplicates/MarkDuplicates.java
index fc982a0..1eaa37d 100644
--- a/src/java/picard/sam/markduplicates/MarkDuplicates.java
+++ b/src/java/picard/sam/markduplicates/MarkDuplicates.java
@@ -57,19 +57,56 @@ import java.util.*;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. " +
- "All records are then written to the output file with the duplicate records flagged.",
- usageShort = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules.",
+ usage = MarkDuplicates.USAGE_SUMMARY + MarkDuplicates.USAGE_DETAILS,
+ usageShort = MarkDuplicates.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
+ static final String USAGE_SUMMARY = "Identifies duplicate reads. ";
+ static final String USAGE_DETAILS =
+ "This tool locates and tags duplicate reads (both PCR and optical/sequencing-driven) in a BAM or SAM file, where\n" +
+ "duplicate reads are defined as originating from the same original fragment of DNA. Duplicates are identified as read\n" +
+ "pairs having identical 5' positions (coordinate and strand) for both reads in a mate pair (and optinally, matching\n" +
+ "unique molecular identifier reads; see BARCODE_TAG option). Optical, or more broadly Sequencing, duplicates are\n" +
+ "duplicates that appear clustered together spatially during sequencing and can arise from optical/imagine-processing\n" +
+ "artifacts or from bio-chemical processes during clonal amplification and sequencing; they are identified using the\n" +
+ "READ_NAME_REGEX and the OPTICAL_DUPLICATE_PIXEL_DISTANCE options.\n" +
+ "\n" +
+ "The tool's main output is a new SAM or BAM file in which duplicates have been identified in the SAM flags field, or\n" +
+ "optionally removed (see REMOVE_DUPLICATE and REMOVE_SEQUENCING_DUPLICATES), and optionally marked with a duplicate type\n" +
+ "in the 'DT' optional attribute. In addition, it also outputs a metrics file containing the numbers of\n" +
+ "READ_PAIRS_EXAMINED, UNMAPPED_READS, UNPAIRED_READS, UNPAIRED_READ DUPLICATES, READ_PAIR_DUPLICATES, and\n" +
+ "READ_PAIR_OPTICAL_DUPLICATES.\n" +
+ "\n" +
+ "Usage example: java -jar picard.jar MarkDuplicates I=input.bam \\\n" +
+ " O=marked_duplicates.bam M=marked_dup_metrics.txt\n";
+
+ /** Enum used to control how duplicates are flagged in the DT optional tag on each read. */
+ public enum DuplicateTaggingPolicy { DontTag, OpticalOnly, All }
+
+ /** The optional attribute in SAM/BAM files used to store the duplicate type. */
+ public static final String DUPLICATE_TYPE_TAG = "DT";
+ /** The duplicate type tag value for duplicate type: library. */
+ public static final String DUPLICATE_TYPE_LIBRARY = "LB";
+ /** The duplicate type tag value for duplicate type: sequencing (optical & pad-hopping, or "co-localized"). */
+ public static final String DUPLICATE_TYPE_SEQUENCING = "SQ";
+
+ /** Enum for the possible values that a duplicate read can be tagged with in the DT attribute. */
+ public enum DuplicateType {
+ LIBRARY(DUPLICATE_TYPE_LIBRARY),
+ SEQUENCING(DUPLICATE_TYPE_SEQUENCING);
+
+ private final String code;
+ DuplicateType(final String code) { this.code = code; }
+ public String code() { return this.code; }
+ }
+
private final Log log = Log.getInstance(MarkDuplicates.class);
/**
* If more than this many sequences in SAM file, don't spill to disk because there will not
* be enough file handles.
*/
-
@Option(shortName = "MAX_SEQS",
doc = "This option is obsolete. ReadEnds will always be spilled to disk.")
public int MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP = 50000;
@@ -93,9 +130,18 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
@Option(doc = "Read two barcode SAM tag (ex. BX for 10X Genomics)", optional = true)
public String READ_TWO_BARCODE_TAG = null;
+ @Option(doc = "If true remove 'optical' duplicates and other duplicates that appear to have arisen from the " +
+ "sequencing process instead of the library preparation process, even if REMOVE_DUPLICATES is false. " +
+ "If REMOVE_DUPLICATES is true, all duplicates are removed and this option is ignored.")
+ public boolean REMOVE_SEQUENCING_DUPLICATES = false;
+
+ @Option(doc= "Determines how duplicate types are recorded in the DT optional attribute.")
+ public DuplicateTaggingPolicy TAGGING_POLICY = DuplicateTaggingPolicy.DontTag;
+
private SortingCollection<ReadEndsForMarkDuplicates> pairSort;
private SortingCollection<ReadEndsForMarkDuplicates> fragSort;
private SortingLongCollection duplicateIndexes;
+ private SortingLongCollection opticalDuplicateIndexes;
private int numDuplicateIndices = 0;
protected LibraryIdGenerator libraryIdGenerator = null; // this is initialized in buildSortedReadEndLists
@@ -138,7 +184,7 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
log.info("Reading input file and constructing read end information.");
buildSortedReadEndLists(useBarcodes);
reportMemoryStats("After buildSortedReadEndLists");
- generateDuplicateIndexes(useBarcodes);
+ generateDuplicateIndexes(useBarcodes, this.REMOVE_SEQUENCING_DUPLICATES || this.TAGGING_POLICY != DuplicateTaggingPolicy.DontTag);
reportMemoryStats("After generateDuplicateIndexes");
log.info("Marking " + this.numDuplicateIndices + " records as duplicates.");
@@ -165,6 +211,7 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
// Now copy over the file while marking all the necessary indexes as duplicates
long recordInFileIndex = 0;
long nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : -1);
+ long nextOpticalDuplicateIndex = this.opticalDuplicateIndexes != null && this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : -1;
final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Written");
final CloseableIterator<SAMRecord> iterator = headerAndIterator.iterator;
@@ -210,15 +257,28 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
rec.setDuplicateReadFlag(false);
}
}
- recordInFileIndex++;
- if (!this.REMOVE_DUPLICATES || !rec.getDuplicateReadFlag()) {
- if (PROGRAM_RECORD_ID != null) {
- rec.setAttribute(SAMTag.PG.name(), chainedPgIds.get(rec.getStringAttribute(SAMTag.PG.name())));
+ // Manage the flagging of optical/sequencing duplicates
+ final boolean isOpticalDuplicate = (recordInFileIndex == nextOpticalDuplicateIndex);
+ if (isOpticalDuplicate) nextOpticalDuplicateIndex = this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : -1;
+ rec.setAttribute(DUPLICATE_TYPE_TAG, null);
+
+ if (this.TAGGING_POLICY != DuplicateTaggingPolicy.DontTag && rec.getDuplicateReadFlag()) {
+ if (isOpticalDuplicate) {
+ rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.SEQUENCING.code());
+ } else if (this.TAGGING_POLICY == DuplicateTaggingPolicy.All) {
+ rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.LIBRARY.code());
}
- out.addAlignment(rec);
- progress.record(rec);
}
+
+ // Output the record if desired and bump the record index
+ recordInFileIndex++;
+ if (this.REMOVE_DUPLICATES && rec.getDuplicateReadFlag()) continue;
+ if (this.REMOVE_SEQUENCING_DUPLICATES && isOpticalDuplicate) continue;
+
+ if (PROGRAM_RECORD_ID != null) rec.setAttribute(SAMTag.PG.name(), chainedPgIds.get(rec.getStringAttribute(SAMTag.PG.name())));
+ out.addAlignment(rec);
+ progress.record(rec);
}
// remember to close the inputs
@@ -444,10 +504,14 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
*
* @return an array with an ordered list of indexes into the source file
*/
- private void generateDuplicateIndexes(final boolean useBarcodes) {
+ private void generateDuplicateIndexes(final boolean useBarcodes, final boolean indexOpticalDuplicates) {
// Keep this number from getting too large even if there is a huge heap.
- final int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / SortingLongCollection.SIZEOF,
- (double) (Integer.MAX_VALUE - 5));
+ int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / SortingLongCollection.SIZEOF, (double) (Integer.MAX_VALUE - 5));
+ // If we're also tracking optical duplicates, cut maxInMemory in half, since we'll need two sorting collections
+ if (indexOpticalDuplicates) {
+ maxInMemory /= 2;
+ this.opticalDuplicateIndexes = new SortingLongCollection(maxInMemory, TMP_DIR.toArray(new File[TMP_DIR.size()]));
+ }
log.info("Will retain up to " + maxInMemory + " duplicate indices before spilling to disk.");
this.duplicateIndexes = new SortingLongCollection(maxInMemory, TMP_DIR.toArray(new File[TMP_DIR.size()]));
@@ -504,6 +568,7 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
log.info("Sorting list of duplicate records.");
this.duplicateIndexes.doneAddingStartIteration();
+ if (this.opticalDuplicateIndexes != null) this.opticalDuplicateIndexes.doneAddingStartIteration();
}
private boolean areComparableForDuplicates(final ReadEndsForMarkDuplicates lhs, final ReadEndsForMarkDuplicates rhs, final boolean compareRead2, final boolean useBarcodes) {
@@ -554,15 +619,20 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
}
}
+ if (this.READ_NAME_REGEX != null) {
+ AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(list, best, opticalDuplicateFinder, libraryIdGenerator);
+ }
+
for (final ReadEndsForMarkDuplicates end : list) {
if (end != best) {
addIndexAsDuplicate(end.read1IndexInFile);
addIndexAsDuplicate(end.read2IndexInFile);
- }
- }
- if (this.READ_NAME_REGEX != null) {
- AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(list, opticalDuplicateFinder, libraryIdGenerator);
+ if (end.isOpticalDuplicate && this.opticalDuplicateIndexes != null) {
+ this.opticalDuplicateIndexes.add(end.read1IndexInFile);
+ this.opticalDuplicateIndexes.add(end.read2IndexInFile);
+ }
+ }
}
}
diff --git a/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java b/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
index 42fd436..a5d8563 100644
--- a/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
+++ b/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
@@ -57,12 +57,34 @@ import java.util.*;
* @author Nils Homer
*/
@CommandLineProgramProperties(
- usage = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. " +
- "All records are then written to the output file with the duplicate records flagged.",
- usageShort = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules.",
+ usage = MarkDuplicatesWithMateCigar.USAGE_SUMMARY + MarkDuplicatesWithMateCigar.USAGE_DETAILS,
+ usageShort = MarkDuplicatesWithMateCigar.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
public class MarkDuplicatesWithMateCigar extends AbstractMarkDuplicatesCommandLineProgram {
+ static final String USAGE_SUMMARY = "Identifies duplicate reads, accounting for mate CIGAR. ";
+ static final String USAGE_DETAILS = "This tool locates and tags duplicate reads (both PCR and optical) in a BAM or SAM file, where " +
+ "duplicate reads are defined as originating from the same original fragment of DNA, taking into account the CIGAR string of " +
+ "read mates. <br /><br />" +
+ "" +
+ "It is intended as an improvement upon the original MarkDuplicates algorithm, from which it differs in several ways, including" +
+ "differences in how it breaks ties. It may be the most effective duplicate marking program available, as it handles all cases " +
+ "including clipped and gapped alignments and locates duplicate molecules using mate cigar information. However, please note " +
+ "that it is not yet used in the Broad's production pipeline, so use it at your own risk. <br /><br />" +
+ "" +
+ "Note also that this tool will not work with alignments that have large gaps or deletions, such as those from RNA-seq data. " +
+ "This is due to the need to buffer small genomic windows to ensure integrity of the duplicate marking, while large skips " +
+ "(ex. skipping introns) in the alignment records would force making that window very large, thus exhausting memory. <br />" +
+
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar MarkDuplicatesWithMateCigar \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=mark_dups_w_mate_cig.bam \\<br />" +
+ " M=mark_dups_w_mate_cig_metrics.txt" +
+ "</pre>" +
+ "<hr />";
+
private final Log log = Log.getInstance(MarkDuplicatesWithMateCigar.class);
@Option(doc = "The minimum distance to buffer records to account for clipping on the 5' end of the records." +
diff --git a/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java b/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java
index 29860f6..d4ce284 100644
--- a/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java
+++ b/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java
@@ -590,7 +590,7 @@ public class MarkDuplicatesWithMateCigarIterator implements SAMRecordIterator {
final Set<ReadEnds> locations = toMarkQueue.getLocations(next);
if (!locations.isEmpty()) {
- AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(new ArrayList<ReadEnds>(locations),
+ AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(new ArrayList<ReadEnds>(locations), null,
opticalDuplicateFinder, libraryIdGenerator);
}
}
diff --git a/src/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java b/src/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
index b46fb18..7b6adce 100644
--- a/src/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
+++ b/src/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
@@ -40,6 +40,7 @@ import picard.PicardException;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.sam.DuplicationMetrics;
+import picard.sam.util.PhysicalLocation;
import java.io.File;
import java.util.ArrayList;
@@ -69,6 +70,17 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
doc = "File to write duplication metrics to")
public File METRICS_FILE;
+ @Option(doc = "If true do not write duplicates to the output file instead of writing them with appropriate flags set.")
+ public boolean REMOVE_DUPLICATES = false;
+
+ @Option(shortName = StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME,
+ doc = "If true, assume that the input file is coordinate sorted even if the header says otherwise.")
+ public boolean ASSUME_SORTED = false;
+
+ @Option(shortName = "DS", doc = "The scoring strategy for choosing the non-duplicate among candidates.")
+ public ScoringStrategy DUPLICATE_SCORING_STRATEGY = ScoringStrategy.TOTAL_MAPPED_REFERENCE_LENGTH;
+
+
@Option(shortName = StandardOptionDefinitions.PROGRAM_RECORD_ID_SHORT_NAME,
doc = "The program record ID for the @PG record(s) created by this program. Set to null to disable " +
"PG record creation. This string may have a suffix appended to avoid collision with other " +
@@ -95,16 +107,6 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
optional = true)
public List<String> COMMENT = new ArrayList<String>();
- @Option(doc = "If true do not write duplicates to the output file instead of writing them with appropriate flags set.")
- public boolean REMOVE_DUPLICATES = false;
-
- @Option(shortName = StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME,
- doc = "If true, assume that the input file is coordinate sorted even if the header says otherwise.")
- public boolean ASSUME_SORTED = false;
-
- @Option(shortName = "DS", doc = "The scoring strategy for choosing the non-duplicate among candidates.")
- public ScoringStrategy DUPLICATE_SCORING_STRATEGY = ScoringStrategy.TOTAL_MAPPED_REFERENCE_LENGTH;
-
/** The program groups that have been seen during the course of examining the input records. */
protected final Set<String> pgIdsSeen = new HashSet<String>();
@@ -223,8 +225,11 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
/**
* Looks through the set of reads and identifies how many of the duplicates are
* in fact optical duplicates, and stores the data in the instance level histogram.
+ * Additionally sets the transient isOpticalDuplicate flag on each read end that is
+ * identified as an optical duplicate.
*/
public static void trackOpticalDuplicates(List<? extends ReadEnds> ends,
+ final ReadEnds keeper,
final OpticalDuplicateFinder opticalDuplicateFinder,
final LibraryIdGenerator libraryIdGenerator) {
boolean hasFR = false, hasRF = false;
@@ -256,10 +261,10 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
}
// track the duplicates
- trackOpticalDuplicates(trackOpticalDuplicatesF, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
- trackOpticalDuplicates(trackOpticalDuplicatesR, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
+ trackOpticalDuplicates(trackOpticalDuplicatesF, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
+ trackOpticalDuplicates(trackOpticalDuplicatesR, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
} else { // No need to partition
- AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(ends, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
+ AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(ends, keeper, opticalDuplicateFinder, libraryIdGenerator.getOpticalDuplicatesByLibraryIdMap());
}
}
@@ -270,16 +275,23 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
* We expect only reads with FR or RF orientations, not a mixture of both.
*
* In PCR duplicate detection, a duplicates can be a have FR and RF when fixing the orientation order to the first end of the mate. In
- * optical duplicate detection, we do not consider them duplicates if one read as FR ann the other RF when we order orientation by the
+ * optical duplicate detection, we do not consider them duplicates if one read as FR and the other RF when we order orientation by the
* first mate sequenced (read #1 of the pair).
*/
- private static void trackOpticalDuplicates(final List<? extends OpticalDuplicateFinder.PhysicalLocation> list,
+ private static void trackOpticalDuplicates(final List<? extends ReadEnds> list,
+ final ReadEnds keeper,
final OpticalDuplicateFinder opticalDuplicateFinder,
final Histogram<Short> opticalDuplicatesByLibraryId) {
- final boolean[] opticalDuplicateFlags = opticalDuplicateFinder.findOpticalDuplicates(list);
+ final boolean[] opticalDuplicateFlags = opticalDuplicateFinder.findOpticalDuplicates(list, keeper);
int opticalDuplicates = 0;
- for (final boolean b : opticalDuplicateFlags) if (b) ++opticalDuplicates;
+ for (int i=0; i<opticalDuplicateFlags.length; ++i) {
+ if (opticalDuplicateFlags[i]) {
+ ++opticalDuplicates;
+ list.get(i).isOpticalDuplicate = true;
+ }
+ }
+
if (opticalDuplicates > 0) {
opticalDuplicatesByLibraryId.increment(list.get(0).getLibraryId(), opticalDuplicates);
}
diff --git a/src/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java b/src/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java
index 8b3ff6b..3c1c5c6 100644
--- a/src/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java
+++ b/src/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java
@@ -25,179 +25,116 @@
package picard.sam.markduplicates.util;
import htsjdk.samtools.util.Log;
-import picard.sam.util.ReadNameParsingUtils;
+import picard.sam.util.PhysicalLocation;
+import picard.sam.util.ReadNameParser;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
/**
- * Contains methods for finding optical duplicates.
+ * Contains methods for finding optical/co-localized/sequencing duplicates.
*
* @author Tim Fennell
* @author Nils Homer
*/
-public class OpticalDuplicateFinder {
-
- public static final String DEFAULT_READ_NAME_REGEX = "[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*".intern();
-
- public static final int DEFAULT_OPTICAL_DUPLICATE_DISTANCE = 100;
-
- public String readNameRegex;
+public class OpticalDuplicateFinder extends ReadNameParser {
public int opticalDuplicatePixelDistance;
- private Pattern readNamePattern;
-
- private boolean warnedAboutRegexNotMatching = false;
-
- private final Log log;
-
- public OpticalDuplicateFinder() {
- this(DEFAULT_READ_NAME_REGEX, DEFAULT_OPTICAL_DUPLICATE_DISTANCE);
- }
-
- public OpticalDuplicateFinder(final int opticalDuplicatePixelDistance) {
- this(DEFAULT_READ_NAME_REGEX, opticalDuplicatePixelDistance);
- }
-
- public OpticalDuplicateFinder(final String readNameRegex) {
- this(readNameRegex, DEFAULT_OPTICAL_DUPLICATE_DISTANCE);
- }
-
- public OpticalDuplicateFinder(final String readNameRegex, final int opticalDuplicatePixelDistance) {
- this(readNameRegex, opticalDuplicatePixelDistance, null);
- }
-
- public OpticalDuplicateFinder(final String readNameRegex, final int opticalDuplicatePixelDistance, final Log log) {
- this.readNameRegex = readNameRegex;
- this.opticalDuplicatePixelDistance = opticalDuplicatePixelDistance;
- this.log = log;
- }
+ public static final int DEFAULT_OPTICAL_DUPLICATE_DISTANCE = 100;
/**
- * Small interface that provides access to the physical location information about a cluster.
- * All values should be defaulted to -1 if unavailable. ReadGroup and Tile should only allow
- * non-zero positive integers, x and y coordinates may be negative.
+ * Uses the default duplicate distance {@value DEFAULT_OPTICAL_DUPLICATE_DISTANCE} and the default read name regex
+ * {@link ReadNameParser#DEFAULT_READ_NAME_REGEX}.
*/
- public static interface PhysicalLocation {
- public short getReadGroup();
-
- public void setReadGroup(short rg);
-
- public short getTile();
-
- public void setTile(short tile);
-
- public short getX();
-
- public void setX(short x);
-
- public short getY();
-
- public void setY(short y);
-
- public short getLibraryId();
-
- public void setLibraryId(short libraryId);
+ public OpticalDuplicateFinder() {
+ super();
+ this.opticalDuplicatePixelDistance = DEFAULT_OPTICAL_DUPLICATE_DISTANCE;
}
- private final int[] tmpLocationFields = new int[10]; // for optimization of addLocationInformation
/**
- * Method used to extract tile/x/y from the read name and add it to the PhysicalLocation so that it
- * can be used later to determine optical duplication
*
- * @param readName the name of the read/cluster
- * @param loc the object to add tile/x/y to
- * @return true if the read name contained the information in parsable form, false otherwise
+ * @param readNameRegex see {@link ReadNameParser#DEFAULT_READ_NAME_REGEX}.
+ * @param opticalDuplicatePixelDistance the optical duplicate pixel distance
+ * @param log the log to which to write messages.
*/
- public boolean addLocationInformation(final String readName, final PhysicalLocation loc) {
- // Optimized version if using the default read name regex (== used on purpose):
- if (this.readNameRegex == this.DEFAULT_READ_NAME_REGEX) {
- final int fields = ReadNameParsingUtils.getRapidDefaultReadNameRegexSplit(readName, ':', tmpLocationFields);
- if (!(fields == 5 || fields == 7)) {
- if (null != log && !this.warnedAboutRegexNotMatching) {
- this.log.warn(String.format("Default READ_NAME_REGEX '%s' did not match read name '%s'. " +
- "You may need to specify a READ_NAME_REGEX in order to correctly identify optical duplicates. " +
- "Note that this message will not be emitted again even if other read names do not match the regex.",
- this.readNameRegex, readName));
- this.warnedAboutRegexNotMatching = true;
- }
- return false;
- }
- final int offset = fields == 7 ? 2 : 0;
- loc.setTile((short) tmpLocationFields[offset + 2]);
- loc.setX((short) tmpLocationFields[offset + 3]);
- loc.setY((short) tmpLocationFields[offset + 4]);
- return true;
- } else if (this.readNameRegex == null) {
- return false;
- } else {
- // Standard version that will use the regex
- if (this.readNamePattern == null) this.readNamePattern = Pattern.compile(this.readNameRegex);
-
- final Matcher m = this.readNamePattern.matcher(readName);
- if (m.matches()) {
- loc.setTile((short) Integer.parseInt(m.group(1)));
- loc.setX((short) Integer.parseInt(m.group(2)));
- loc.setY((short) Integer.parseInt(m.group(3)));
- return true;
- } else {
- if (null != log && !this.warnedAboutRegexNotMatching) {
- this.log.warn(String.format("READ_NAME_REGEX '%s' did not match read name '%s'. Your regex may not be correct. " +
- "Note that this message will not be emitted again even if other read names do not match the regex.",
- this.readNameRegex, readName));
- warnedAboutRegexNotMatching = true;
- }
- return false;
- }
- }
+ public OpticalDuplicateFinder(final String readNameRegex, final int opticalDuplicatePixelDistance, final Log log) {
+ super(readNameRegex, log);
+ this.opticalDuplicatePixelDistance = opticalDuplicatePixelDistance;
}
/**
- * Finds which reads within the list of duplicates are likely to be optical duplicates of
- * one another.
- * <p/>
- * Note: this method will perform a sort() of the list; if it is imperative that the list be
- * unmodified a copy of the list should be passed to this method.
+ * Finds which reads within the list of duplicates that are likely to be optical/co-localized duplicates of
+ * one another. Within each cluster of optical duplicates that is found, one read remains un-flagged for
+ * optical duplication and the rest are flagged as optical duplicates. The set of reads that are considered
+ * optical duplicates are indicated by returning "true" at the same index in the resulting boolean[] as the
+ * read appeared in the input list of physical locations.
*
* @param list a list of reads that are determined to be duplicates of one another
+ * @param keeper a single PhysicalLocation that is the one being kept as non-duplicate, and thus should never be
+ * annotated as an optical duplicate. May in some cases be null, or a PhysicalLocation not
+ * contained within the list!
* @return a boolean[] of the same length as the incoming list marking which reads are optical duplicates
*/
- public boolean[] findOpticalDuplicates(final List<? extends PhysicalLocation> list) {
+ public boolean[] findOpticalDuplicates(final List<? extends PhysicalLocation> list, final PhysicalLocation keeper) {
+ // If there is only one or zero reads passed in, then just return an array of all false
+ if (list.size() < 2) return new boolean[list.size()];
+
final int length = list.size();
final boolean[] opticalDuplicateFlags = new boolean[length];
+ final int distance = this.opticalDuplicatePixelDistance;
+
+ final PhysicalLocation actualKeeper = keeperOrNull(list, keeper);
- Collections.sort(list, new Comparator<PhysicalLocation>() {
- public int compare(final PhysicalLocation lhs, final PhysicalLocation rhs) {
- int retval = lhs.getReadGroup() - rhs.getReadGroup();
- if (retval == 0) retval = lhs.getTile() - rhs.getTile();
- if (retval == 0) retval = lhs.getX() - rhs.getX();
- if (retval == 0) retval = lhs.getY() - rhs.getY();
- return retval;
+ // First go through and compare all the reads to the keeper
+ if (actualKeeper != null) {
+ for (int i=0; i<length; ++i) {
+ final PhysicalLocation other = list.get(i);
+ opticalDuplicateFlags[i] = closeEnough(actualKeeper, other, distance);
}
- });
+ }
- outer:
- for (int i = 0; i < length; ++i) {
+ // Now go through and do each pairwise comparison not involving the actualKeeper
+ for (int i=0; i<length; ++i) {
final PhysicalLocation lhs = list.get(i);
- if (lhs.getTile() < 0) continue;
+ if (lhs == actualKeeper) continue; // no comparisons to actualKeeper since those are all handled above
- for (int j = i + 1; j < length; ++j) {
+ for (int j =i+1; j<length; ++j) {
final PhysicalLocation rhs = list.get(j);
-
- if (opticalDuplicateFlags[j]) continue;
- if (lhs.getReadGroup() != rhs.getReadGroup()) continue outer;
- if (lhs.getTile() != rhs.getTile()) continue outer;
- if (rhs.getX() > lhs.getX() + this.opticalDuplicatePixelDistance) continue outer;
-
- if (Math.abs(lhs.getY() - rhs.getY()) <= this.opticalDuplicatePixelDistance) {
- opticalDuplicateFlags[j] = true;
+ if (rhs == actualKeeper) continue; // no comparisons to actualKeeper since those are all handled above
+ if (opticalDuplicateFlags[i] && opticalDuplicateFlags[j]) continue; // both already marked, no need to check
+
+ if (closeEnough(lhs, rhs, distance)) {
+ // At this point we want to mark either lhs or rhs as duplicate. Either could have been marked
+ // as a duplicate of the keeper (but not both - that's checked above), so be careful about which
+ // one to now mark as a duplicate.
+ final int index = opticalDuplicateFlags[j] ? i : j;
+ opticalDuplicateFlags[index] = true;
}
}
}
+
return opticalDuplicateFlags;
}
+
+ /** Returns the keeper if it is contained within the list and has location information, otherwise null. */
+ private PhysicalLocation keeperOrNull(final List<? extends PhysicalLocation> list, final PhysicalLocation keeper) {
+ if (keeper != null && keeper.hasLocation()) {
+ for (final PhysicalLocation loc : list) {
+ if (loc == keeper) return keeper;
+ }
+ }
+ return null;
+ }
+
+ /** Simple method to test whether two physical locations are close enough to each other to be deemed optical dupes. */
+ private boolean closeEnough(final PhysicalLocation lhs, final PhysicalLocation rhs, final int distance) {
+ return lhs != rhs && // no comparing an object to itself (checked using object identity)!
+ lhs.hasLocation() && rhs.hasLocation() && // no comparing objects without locations
+ lhs.getReadGroup() == rhs.getReadGroup() && // must be in the same RG to be optical duplicates
+ lhs.getTile() == rhs.getTile() && // and the same tile
+ Math.abs(lhs.getX() - rhs.getX()) <= distance &&
+ Math.abs(lhs.getY() - rhs.getY()) <= distance;
+ }
}
diff --git a/src/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigar.java b/src/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigar.java
index 20d3840..9e6dba3 100644
--- a/src/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigar.java
+++ b/src/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigar.java
@@ -28,16 +28,17 @@ package picard.sam.markduplicates.util;
* @author nhomer
*/
+import picard.sam.util.PhysicalLocation;
+import picard.sam.util.PhysicalLocationShort;
+
/** Stores the minimal information needed for optical duplicate detection. */
-public class PhysicalLocationForMateCigar implements OpticalDuplicateFinder.PhysicalLocation {
+public class PhysicalLocationForMateCigar extends PhysicalLocationShort {
// Information used to detect optical dupes
short readGroup = -1;
- short tile = -1;
- short x = -1, y = -1;
short libraryId;
- public PhysicalLocationForMateCigar(final OpticalDuplicateFinder.PhysicalLocation rec) {
+ public PhysicalLocationForMateCigar(final PhysicalLocation rec) {
this.setReadGroup(rec.getReadGroup());
this.setTile(rec.getTile());
this.setX(rec.getX());
@@ -52,24 +53,6 @@ public class PhysicalLocationForMateCigar implements OpticalDuplicateFinder.Phys
public void setReadGroup(final short rg) { this.readGroup = rg; }
@Override
- public short getTile() { return this.tile; }
-
- @Override
- public void setTile(final short tile) { this.tile = tile; }
-
- @Override
- public short getX() { return this.x; }
-
- @Override
- public void setX(final short x) { this.x = x; }
-
- @Override
- public short getY() { return this.y; }
-
- @Override
- public void setY(final short y) { this.y = y;}
-
- @Override
public short getLibraryId() { return this.libraryId; }
@Override
diff --git a/src/java/picard/sam/markduplicates/util/ReadEnds.java b/src/java/picard/sam/markduplicates/util/ReadEnds.java
index 396df39..f267a08 100644
--- a/src/java/picard/sam/markduplicates/util/ReadEnds.java
+++ b/src/java/picard/sam/markduplicates/util/ReadEnds.java
@@ -23,8 +23,10 @@
*/
package picard.sam.markduplicates.util;
+import picard.sam.util.PhysicalLocationShort;
+
/** Little struct-like class to hold read pair (and fragment) end data for duplicate marking. */
-abstract public class ReadEnds implements OpticalDuplicateFinder.PhysicalLocation {
+abstract public class ReadEnds extends PhysicalLocationShort {
public static final byte F = 0, R = 1, FF = 2, FR = 3, RR = 4, RF = 5;
@@ -35,14 +37,14 @@ abstract public class ReadEnds implements OpticalDuplicateFinder.PhysicalLocatio
public int read2ReferenceIndex = -1;
public int read2Coordinate = -1;
- // Information used to detect optical dupes
+ // Additional information used to detect optical dupes
public short readGroup = -1;
- public short tile = -1;
- public short x = -1, y = -1;
/** For optical duplicate detection the orientation matters regard to 1st or 2nd end of a mate */
public byte orientationForOpticalDuplicates = -1;
+ /** A *transient* flag marking this read end as being an optical duplicate. */
+ public transient boolean isOpticalDuplicate = false;
public boolean isPaired() { return this.read2ReferenceIndex != -1; }
@@ -53,24 +55,6 @@ abstract public class ReadEnds implements OpticalDuplicateFinder.PhysicalLocatio
public void setReadGroup(final short readGroup) { this.readGroup = readGroup; }
@Override
- public short getTile() { return this.tile; }
-
- @Override
- public void setTile(final short tile) { this.tile = tile; }
-
- @Override
- public short getX() { return this.x; }
-
- @Override
- public void setX(final short x) { this.x = x; }
-
- @Override
- public short getY() { return this.y; }
-
- @Override
- public void setY(final short y) { this.y = y; }
-
- @Override
public short getLibraryId() { return this.libraryId; }
@Override
diff --git a/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesCodec.java b/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesCodec.java
index 8b9d464..d266de7 100644
--- a/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesCodec.java
+++ b/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesCodec.java
@@ -66,8 +66,8 @@ public class ReadEndsForMarkDuplicatesCodec implements SortingCollection.Codec<R
this.out.writeShort(read.readGroup);
this.out.writeShort(read.tile);
- this.out.writeShort(read.x);
- this.out.writeShort(read.y);
+ this.out.writeShort((short)read.x);
+ this.out.writeShort((short)read.y);
this.out.writeByte(read.orientationForOpticalDuplicates);
} catch (final IOException ioe) {
throw new PicardException("Exception writing ReadEnds to file.", ioe);
diff --git a/src/java/picard/sam/util/PhysicalLocation.java b/src/java/picard/sam/util/PhysicalLocation.java
index 6d79aaf..eaf0fc4 100644
--- a/src/java/picard/sam/util/PhysicalLocation.java
+++ b/src/java/picard/sam/util/PhysicalLocation.java
@@ -1,118 +1,35 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2015 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
package picard.sam.util;
-import picard.PicardException;
-import picard.sam.markduplicates.util.OpticalDuplicateFinder;
-
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Contains class for figuring out the location of reads.
- *
- * @author Tim Fennell
- * @author Nils Homer
- * @author Yossi Farjoun
- */
-
/**
* Small interface that provides access to the physical location information about a cluster.
- * All values should be defaulted to -1 if unavailable. Tile should only allow
- * non-zero positive integers, x and y coordinates must be non-negative.
- * This is different from OpticalDuplicateFinder.PhysicalLocation in that the x and y positions are ints, not shorts
- * thus, they do not overflow within a HiSeqX tile.
+ * All values should be defaulted to -1 if unavailable. ReadGroup and Tile should only allow
+ * non-zero positive integers, x and y coordinates may be negative.
*/
-public class PhysicalLocation {
- //FLOWCELL----:LANE-:TILE----:X_COORD-:Y_COORD-UNK
- public static final String DEFAULT_READ_NAME_REGEX = "[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*";
-
- private final String readNameRegex;
-
- public PhysicalLocation() {this(DEFAULT_READ_NAME_REGEX);}
-
- public PhysicalLocation(final String readNameRegExp) {this.readNameRegex = readNameRegExp;}
-
- private Pattern readNamePattern;
-
- private short tile = -1;
- private int x = -1, y = -1;
-
+public interface PhysicalLocation {
+ public int NO_VALUE = -1;
- public short getTile() { return tile; }
+ public short getReadGroup();
- public void setTile(final short tile) { this.tile = tile; }
+ public void setReadGroup(short rg);
- public int getX() { return x; }
+ public short getTile();
- public void setX(final int x) { this.x = x; }
+ public void setTile(short tile);
- public int getY() { return y; }
+ public int getX();
- public void setY(final int y) { this.y = y; }
+ public void setX(int x);
+ public int getY();
- private final int[] tmpLocationFields = new int[10]; // for optimization of addLocationInformation
+ public void setY(int y);
- /**
- * Method used to extract tile/x/y from the read name and add it to the PhysicalLocation so that it
- * can be used later to determine optical duplication
- *
- * @param readName the name of the read/cluster
- * @param loc the object to add tile/x/y to
- * @return true if the read name contained the information in parsable form, false otherwise
- */
- public boolean addLocationInformation(final String readName, final PhysicalLocation loc) {
- // Optimized version if using the default read name regex (== used on purpose):
- if (readNameRegex == DEFAULT_READ_NAME_REGEX) {
- final int fields = ReadNameParsingUtils.getRapidDefaultReadNameRegexSplit(readName, ':', tmpLocationFields);
- if (!(fields == 5 || fields == 7)) {
- throw new PicardException(String.format(" READ_NAME_REGEX '%s' did not match read name '%s'. " ,
- this.readNameRegex, readName));
- }
+ public short getLibraryId();
- final int offset = fields == 7 ? 2 : 0;
- loc.setTile((short) tmpLocationFields[offset + 2]);
- loc.setX(tmpLocationFields[offset + 3]);
- loc.setY(tmpLocationFields[offset + 4]);
- return true;
- } else if (readNameRegex == null) {
- return false;
- } else {
- // Standard version that will use the regex
- if (readNamePattern == null) readNamePattern = Pattern.compile(readNameRegex);
+ public void setLibraryId(short libraryId);
- final Matcher m = readNamePattern.matcher(readName);
- if (m.matches()) {
- loc.setTile((short) Integer.parseInt(m.group(1)));
- loc.setX(Integer.parseInt(m.group(2)));
- loc.setY(Integer.parseInt(m.group(3)));
- return true;
- } else {
- throw new PicardException(String.format("READ_NAME_REGEX '%s' did not match read name '%s'. ", readNameRegex, readName));
- }
- }
+ /** Default implementation of a method to check whether real location data has been set. */
+ default public boolean hasLocation() {
+ return getTile() != NO_VALUE;
}
}
diff --git a/src/java/picard/sam/util/PhysicalLocationInt.java b/src/java/picard/sam/util/PhysicalLocationInt.java
new file mode 100644
index 0000000..cee020e
--- /dev/null
+++ b/src/java/picard/sam/util/PhysicalLocationInt.java
@@ -0,0 +1,37 @@
+package picard.sam.util;
+
+import picard.PicardException;
+
+/**
+ * Small class that provides access to the physical location information about a cluster.
+ * All values should be defaulted to -1 if unavailable. Tile should only allow
+ * non-zero positive integers, x and y coordinates must be non-negative.
+ * This is different from PhysicalLocationShort in that the x and y positions are ints, not shorts
+ * thus, they do not overflow within a HiSeqX tile.
+ */
+public class PhysicalLocationInt implements PhysicalLocation {
+
+ public short tile = -1;
+ public int x = -1, y = -1;
+
+ public short getReadGroup() { throw new PicardException("Not Implemented"); }
+
+ public void setReadGroup(final short readGroup) { throw new PicardException("Not Implemented"); }
+
+ public short getTile() { return tile; }
+
+ public void setTile(final short tile) { this.tile = tile; }
+
+ public int getX() { return x; }
+
+ public void setX(final int x) { this.x = x; }
+
+ public int getY() { return y; }
+
+ public void setY(final int y) { this.y = y; }
+
+ public short getLibraryId() { throw new PicardException("Not Implemented"); }
+
+ public void setLibraryId(final short libraryId) { throw new PicardException("Not Implemented"); }
+
+}
diff --git a/src/java/picard/illumina/parser/ReadType.java b/src/java/picard/sam/util/PhysicalLocationShort.java
similarity index 57%
copy from src/java/picard/illumina/parser/ReadType.java
copy to src/java/picard/sam/util/PhysicalLocationShort.java
index bd69cd5..e43c363 100644
--- a/src/java/picard/illumina/parser/ReadType.java
+++ b/src/java/picard/sam/util/PhysicalLocationShort.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2011 The Broad Institute
+ * Copyright (c) 2015 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -15,25 +15,28 @@
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON INFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package picard.illumina.parser;
+package picard.sam.util;
+
/**
-* A read type describes a stretch of cycles in an ReadStructure
-* (e.g. Assume we have a paired end/barcoded run with the 76 template cycles followed by 8 barcode cycles followed by
-* another 76 template reads, the run would be represented by the ReadStructure 76T8B76T)
-* Note: Currently SKIP is unused by IlluminaBasecallsToSam, ExtractIlluminaBarcodes, and IlluminaDataProvider
-**/
-public enum ReadType {
- T, B, S;
+ * Small class that provides access to the physical location information about a cluster.
+ * All values should be defaulted to -1 if unavailable. Tile should only allow
+ * non-zero positive integers, x and y coordinates must be non-negative.
+ * This is different from PhysicalLocationInt in that the x and y positions are shorts, not ints
+ * thus, they may overflow within a HiSeqX tile.
+ */
+public class PhysicalLocationShort extends PhysicalLocationInt {
+
+ @Override
+ public void setX(final int x) { super.setX((short)x); }
- public static final ReadType Template = T;
- public static final ReadType Barcode = B;
- public static final ReadType Skip = S;
+ @Override
+ public void setY(final int y) { super.setY((short)y); }
}
diff --git a/src/java/picard/sam/util/ReadNameParser.java b/src/java/picard/sam/util/ReadNameParser.java
new file mode 100644
index 0000000..f972b9b
--- /dev/null
+++ b/src/java/picard/sam/util/ReadNameParser.java
@@ -0,0 +1,193 @@
+package picard.sam.util;
+
+import htsjdk.samtools.util.Log;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Provides access to the physical location information about a cluster.
+ * All values should be defaulted to -1 if unavailable. ReadGroup and Tile should only allow
+ * non-zero positive integers, x and y coordinates may be negative.
+ */
+public class ReadNameParser {
+
+ /**
+ * The read name regular expression (regex) is used to extract three pieces of information from the read name: tile, x location,
+ * and y location. Any read name regex should parse the read name to produce these and only these values. An example regex is:
+ * (?:.*:)?([0-9]+)[^:]*:([0-9]+)[^:]*:([0-9]+)[^:]*$
+ * which assumes that fields in the read name are delimited by ':' and the last three fields correspond to the tile, x and y locations,
+ * ignoring any trailing non-digit characters.
+ *
+ * The default regex is optimized for fast parsing (see {@link #getLastThreeFields(String, char, int[])}) by searching for the last
+ * three fields, ignoring any trailing non-digit characters, assuming the delimiter ':'. This should consider correctly read names
+ * where we have 5 or 7 field with the last three fields being tile/x/y, as is the case for the majority of read names produced by
+ * Illumina technology.
+ */
+ public static final String DEFAULT_READ_NAME_REGEX = "<optimized capture of last three ':' separated fields as numeric values>".intern();
+
+ private final int[] tmpLocationFields = new int[3]; // for optimization of addLocationInformation
+
+ private String readNameRegex = null;
+
+ private Pattern readNamePattern;
+
+ private boolean warnedAboutRegexNotMatching = false;
+
+ private final Log log;
+
+ /**
+ * Creates are read name parser using the default read name regex and optical duplicate distance. See {@link #DEFAULT_READ_NAME_REGEX}
+ * for an explanation on how the read name is parsed.
+ */
+ public ReadNameParser() {
+ this(DEFAULT_READ_NAME_REGEX);
+ }
+
+ /**
+ * Creates are read name parser using the given read name regex. See {@link #DEFAULT_READ_NAME_REGEX} for an explanation on how to
+ * format the regular expression (regex) string.
+ * @param readNameRegex the read name regular expression string to parse read names, null to never parse location information.
+ */
+ public ReadNameParser(final String readNameRegex) {
+ this(readNameRegex, null);
+ }
+
+ /**
+ * Creates are read name parser using the given read name regex. See {@link #DEFAULT_READ_NAME_REGEX} for an explanation on how to
+ * format the regular expression (regex) string.
+ * @param readNameRegex the read name regular expression string to parse read names, null to never parse location information..
+ * @param log the log to which to write messages.
+ */
+ public ReadNameParser(final String readNameRegex, final Log log) {
+ this.readNameRegex = readNameRegex;
+ this.log = log;
+ }
+
+ /**
+ * Method used to extract tile/x/y from the read name and add it to the PhysicalLocationShort so that it
+ * can be used later to determine optical duplication
+ *
+ * @param readName the name of the read/cluster
+ * @param loc the object to add tile/x/y to
+ * @return true if the read name contained the information in parsable form, false otherwise
+ */
+ public boolean addLocationInformation(final String readName, final PhysicalLocation loc) {
+ try {
+ // Optimized version if using the default read name regex (== used on purpose):
+ if (this.readNameRegex == ReadNameParser.DEFAULT_READ_NAME_REGEX) {
+ final int fields = getLastThreeFields(readName, ':', tmpLocationFields);
+ if (!(fields == 5 || fields == 7)) {
+ if (null != log && !this.warnedAboutRegexNotMatching) {
+ this.log.warn(String.format("Default READ_NAME_REGEX '%s' did not match read name '%s'. " +
+ "You may need to specify a READ_NAME_REGEX in order to correctly identify optical duplicates. " +
+ "Note that this message will not be emitted again even if other read names do not match the regex.",
+ this.readNameRegex, readName));
+ this.warnedAboutRegexNotMatching = true;
+ }
+ return false;
+ }
+ loc.setTile((short) tmpLocationFields[0]);
+ loc.setX(tmpLocationFields[1]);
+ loc.setY(tmpLocationFields[2]);
+ return true;
+ } else if (this.readNameRegex == null) {
+ return false;
+ } else {
+ // Standard version that will use the regex
+ if (this.readNamePattern == null) this.readNamePattern = Pattern.compile(this.readNameRegex);
+
+ final Matcher m = this.readNamePattern.matcher(readName);
+ if (m.matches()) {
+ loc.setTile((short) Integer.parseInt(m.group(1)));
+ loc.setX(Integer.parseInt(m.group(2)));
+ loc.setY(Integer.parseInt(m.group(3)));
+ return true;
+ } else {
+ if (null != log && !this.warnedAboutRegexNotMatching) {
+ this.log.warn(String.format("READ_NAME_REGEX '%s' did not match read name '%s'. Your regex may not be correct. " +
+ "Note that this message will not be emitted again even if other read names do not match the regex.",
+ this.readNameRegex, readName));
+ warnedAboutRegexNotMatching = true;
+ }
+ return false;
+ }
+ }
+ }
+ catch (NumberFormatException nfe) {
+ if (log != null && !this.warnedAboutRegexNotMatching) {
+ this.log.warn("A field field parsed out of a read name was expected to contain an integer and did not. ",
+ "Read name: ", readName, ". Cause: ", nfe.getMessage());
+ warnedAboutRegexNotMatching = true;
+ }
+ return false;
+ }
+ }
+
+ /**
+ * Given a string, splits the string by the delimiter, and returns the the last three fields parsed as integers. Parsing a field
+ * considers only a sequence of digits up until the first non-digit character. The three values are stored in the passed-in array.
+ *
+ * @throws NumberFormatException if any of the tokens that should contain numbers do not start with parsable numbers
+ */
+ public static int getLastThreeFields(final String readName, final char delim, final int[] tokens) throws NumberFormatException {
+ int tokensIdx = 2; // start at the last token
+ int numFields = 0;
+ int i, endIdx;
+ endIdx = readName.length();
+ // find the last three tokens only
+ for (i = readName.length() - 1; 0 <= i && 0 <= tokensIdx; i--) {
+ if (readName.charAt(i) == delim || 0 == i) {
+ numFields++;
+ tokens[tokensIdx] = rapidParseInt(readName.substring((0 == i) ? 0 : (i+1), endIdx));
+ tokensIdx--;
+ endIdx = i;
+ }
+ }
+ // continue to find the # of fields
+ while (0 <= i) {
+ if (readName.charAt(i) == delim || 0 == i) numFields++;
+ i--;
+ }
+ if (numFields < 3) {
+ tokens[0] = tokens[1] = tokens[2] = -1;
+ return -1;
+ }
+ else {
+ return numFields;
+ }
+ }
+
+ /**
+ * Very specialized method to rapidly parse a sequence of digits from a String up until the first
+ * non-digit character.
+ *
+ * @throws NumberFormatException if the String does not start with an optional - followed by at least on digit
+ */
+ public static int rapidParseInt(final String input) throws NumberFormatException {
+ final int len = input.length();
+ int val = 0;
+ int i = 0;
+ boolean isNegative = false;
+
+ if (0 < len && '-' == input.charAt(0)) {
+ i = 1;
+ isNegative = true;
+ }
+
+ boolean hasDigits = false;
+ for (; i < len; ++i) {
+ final char ch = input.charAt(i);
+ if (Character.isDigit(ch)) {
+ val = (val * 10) + (ch - 48);
+ hasDigits = true;
+ } else {
+ break;
+ }
+ }
+
+ if (!hasDigits) throw new NumberFormatException("String '" + input + "' did not start with a parsable number.");
+ if (isNegative) val = -val;
+ return val;
+ }
+}
diff --git a/src/java/picard/sam/util/ReadNameParsingUtils.java b/src/java/picard/sam/util/ReadNameParsingUtils.java
deleted file mode 100644
index ac6e268..0000000
--- a/src/java/picard/sam/util/ReadNameParsingUtils.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2015 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package picard.sam.util;
-
-/**
- * Common functions for quickly parsing strings. Used for parsing the tile and coordinates from the read names
- */
-public class ReadNameParsingUtils {
-
- /**
- * Single pass method to parse the read name for the default regex. This will only insert the 2nd to the 4th
- * tokens (inclusive). It will also stop after the fifth token has been successfully parsed.
- */
- public static int getRapidDefaultReadNameRegexSplit(final String readName, final char delim, final int[] tokens) {
- int tokensIdx = 0;
- int prevIdx = 0;
- for (int i = 0; i < readName.length(); i++) {
- if (readName.charAt(i) == delim) {
- if (1 < tokensIdx && tokensIdx < 5)
- tokens[tokensIdx] = rapidParseInt(readName.substring(prevIdx, i)); // only fill in 2-4 inclusive
- tokensIdx++;
- if (4 < tokensIdx) return tokensIdx; // early return, only consider the first five tokens
- prevIdx = i + 1;
- }
- }
- if (prevIdx < readName.length()) {
- if (1 < tokensIdx && tokensIdx < 5)
- tokens[tokensIdx] = rapidParseInt(readName.substring(prevIdx, readName.length())); // only fill in 2-4 inclusive
- tokensIdx++;
- }
- return tokensIdx;
- }
-
- /**
- * Very specialized method to rapidly parse a sequence of digits from a String up until the first
- * non-digit character.
- */
- public static int rapidParseInt(final String input) {
- final int len = input.length();
- int val = 0;
- int i = 0;
- boolean isNegative = false;
-
- if (0 < len && '-' == input.charAt(0)) {
- i = 1;
- isNegative = true;
- }
-
- for (; i < len; ++i) {
- final char ch = input.charAt(i);
- if (Character.isDigit(ch)) {
- val = (val * 10) + (ch - 48);
- } else {
- break;
- }
- }
-
- if (isNegative) val = -val;
-
- return val;
- }
-}
diff --git a/src/java/picard/util/BedToIntervalList.java b/src/java/picard/util/BedToIntervalList.java
index 113e864..c893713 100644
--- a/src/java/picard/util/BedToIntervalList.java
+++ b/src/java/picard/util/BedToIntervalList.java
@@ -31,12 +31,53 @@ import java.io.IOException;
* @author nhomer
*/
@CommandLineProgramProperties(
- usage = "Converts a BED file to an Picard Interval List.",
- usageShort = "Converts a BED file to an Picard Interval List.",
+ usage = BedToIntervalList.USAGE_SUMMARY + BedToIntervalList.USAGE_DETAILS,
+ usageShort = BedToIntervalList.USAGE_SUMMARY,
programGroup = Intervals.class
)
public class BedToIntervalList extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Converts a BED file to a Picard Interval List. " ;
+ static final String USAGE_DETAILS = "This tool provides easy conversion from BED to the Picard interval_list format which is " +
+ "required by many Picard processing tools. Note that the coordinate system of BED files is such that the first base or " +
+ "position in a sequence is numbered \"0\", while in interval_list files it is numbered \"1\"." +
+ "<br /><br />" +
+ "BED files contain sequence data displayed in a flexible format that includes nine optional fields, " +
+ "in addition to three required fields within the annotation tracks. The required fields of a BED file include:" +
+ "<pre>" +
+ " chrom - The name of the chromosome (e.g. chr20) or scaffold (e.g. scaffold10671) <br />" +
+ " chromStart - The starting position of the feature in the chromosome or scaffold. The first base in a chromosome is numbered \"0\" <br />" +
+ " chromEnd - The ending position of the feature in the chromosome or scaffold. The chromEnd base is not" +
+ " included in the display of the feature. For example, the first 100 bases of a " +
+ "chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99." +
+ "</pre>" +
+ "In each annotation track, the number of fields per line must be consistent throughout a data set. " +
+ "For additional information regarding BED files and the annotation field options, please see:" +
+ " http://genome.ucsc.edu/FAQ/FAQformat.html#format1." +
+ "<br /> <br /> " +
+ "Interval_list files contain sequence data distributed into intervals. The interval_list file format is relatively simple " +
+ "and reflects the SAM alignment format to a degree. A SAM style header must be present in the file that lists the sequence " +
+ "records against which the intervals are described. After the header, the file then contains records, one per line in plain " +
+ "text format with the following values tab-separated::" +
+ "<pre> " +
+ " -Sequence name (SN) - The name of the sequence in the file for identification purposes, can be chromosome number e.g. chr20 <br /> " +
+ " -Start position - Interval start position (starts at +1) <br /> " +
+ " -End position - Interval end position (1-based, end inclusive) <br /> " +
+ " -Strand - Indicates +/- strand for the interval (either + or -) <br /> " +
+ " -Interval name - (Each interval should have a unique name) " +
+ "</pre>" +
+ " <br /><br />" +
+ "This tool requires sequence dictionary file (with \".dict\" extension), which can be created from a reference sequence " +
+ "using Picard's CreateSequenceDictionary tool."+
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar BedToIntervalList \\<br />" +
+ " I=input.bed \\<br />" +
+ " O=list.interval_list \\<br />" +
+ " SD=reference_sequence.dict" +
+ "</pre>" +
+ "<br /> <br /> "+
+ "<hr />"
+ ;
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input BED file")
public File INPUT;
@@ -50,7 +91,7 @@ public class BedToIntervalList extends CommandLineProgram {
public boolean SORT = true;
@Option(doc="If true, unique the output interval list by merging overlapping regions, before writing it (implies sort=true).")
- public boolean UNIQUE = true;
+ public boolean UNIQUE = false;
final Log LOG = Log.getInstance(getClass());
diff --git a/src/java/picard/util/IlluminaUtil.java b/src/java/picard/util/IlluminaUtil.java
index 34a369f..82e456f 100644
--- a/src/java/picard/util/IlluminaUtil.java
+++ b/src/java/picard/util/IlluminaUtil.java
@@ -201,9 +201,22 @@ public class IlluminaUtil {
* @return A single string representation of all the barcodes
*/
public static String barcodeSeqsToString(final String barcodes[]) {
+ return stringSeqsToString(barcodes, BARCODE_DELIMITER);
+ }
+
+ /**
+ * Concatenates all the barcode sequences with BARCODE_DELIMITER
+ * @param barcodes
+ * @return A single string representation of all the barcodes
+ */
+ public static String barcodeSeqsToString(final byte barcodes[][]) {
+ return byteArrayToString(barcodes, BARCODE_DELIMITER);
+ }
+
+ public static String stringSeqsToString(final String barcodes[], String delim) {
final StringBuilder sb = new StringBuilder();
for (final String bc : barcodes) {
- if (sb.length() > 0) sb.append(BARCODE_DELIMITER);
+ if (sb.length() > 0) sb.append(delim);
sb.append(bc);
}
return sb.toString();
@@ -214,11 +227,11 @@ public class IlluminaUtil {
* @param barcodes
* @return A single string representation of all the barcodes
*/
- public static String barcodeSeqsToString(final byte barcodes[][]) {
+ public static String byteArrayToString(final byte barcodes[][], String delim) {
final String bcs[] = new String[barcodes.length];
for (int i = 0; i < barcodes.length; i++) {
bcs[i] = StringUtil.bytesToString(barcodes[i]);
}
- return barcodeSeqsToString(bcs);
+ return stringSeqsToString(bcs, delim);
}
}
diff --git a/src/java/picard/util/IntervalListToBed.java b/src/java/picard/util/IntervalListToBed.java
new file mode 100644
index 0000000..6dd2fde
--- /dev/null
+++ b/src/java/picard/util/IntervalListToBed.java
@@ -0,0 +1,90 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 Tim Fennell
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package picard.util;
+
+import htsjdk.samtools.util.CollectionUtil;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Interval;
+import htsjdk.samtools.util.IntervalList;
+import htsjdk.samtools.util.RuntimeIOException;
+import picard.cmdline.CommandLineProgram;
+import picard.cmdline.CommandLineProgramProperties;
+import picard.cmdline.Option;
+import picard.cmdline.StandardOptionDefinitions;
+import picard.cmdline.programgroups.Intervals;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Trivially simple command line program to convert an IntervalList file to a BED file.
+ * @author Tim Fennell
+ */
+ at CommandLineProgramProperties(
+ usage = "Converts an Picard IntervalList file to a BED file.",
+ usageShort = "Converts an Picard IntervalList file to a BED file.",
+ programGroup = Intervals.class
+)
+public class IntervalListToBed extends CommandLineProgram {
+ @Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Input IntervalList file.")
+ public File INPUT;
+
+ @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="Output BED file.")
+ public File OUTPUT;
+
+ @Option(doc="The score, between 0-1000, to output for each interval in the BED file.")
+ public int SCORE = 500;
+
+ @Option(doc="If true, sort the interval list prior to outputting as BED file.")
+ public boolean SORT = true;
+
+ @Override
+ protected int doWork() {
+ IOUtil.assertFileIsReadable(INPUT);
+ IOUtil.assertFileIsWritable(OUTPUT);
+
+ IntervalList intervals = IntervalList.fromFile(INPUT);
+ if (SORT) intervals = intervals.sorted();
+
+ try {
+ final BufferedWriter out = IOUtil.openFileForBufferedWriting(OUTPUT);
+ for (final Interval i : intervals) {
+ final String strand = i.isNegativeStrand() ? "-" : "+";
+ final List<?> fields = CollectionUtil.makeList(i.getContig(), i.getStart()-1, i.getEnd(), i.getName(), SCORE, strand);
+ out.append(fields.stream().map(String::valueOf).collect(Collectors.joining("\t")));
+ out.newLine();
+ }
+
+ out.close();
+ }
+ catch (IOException ioe) {
+ throw new RuntimeIOException(ioe);
+ }
+
+ return 0;
+ }
+}
diff --git a/src/java/picard/util/IntervalListTools.java b/src/java/picard/util/IntervalListTools.java
index 759fba7..7dc06d7 100644
--- a/src/java/picard/util/IntervalListTools.java
+++ b/src/java/picard/util/IntervalListTools.java
@@ -32,14 +32,38 @@ import java.util.Set;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = " General tool for manipulating interval lists, " +
- "including sorting, merging, padding, uniqueifying, and other set-theoretic operations. Default operation if given one or more inputs is to " +
- "merge and sort them. Other options are controlled by arguments.",
- usageShort = "General tool for manipulating interval lists",
+ usage = IntervalListTools.USAGE_SUMMARY + IntervalListTools.USAGE_DETAILS,
+ usageShort = IntervalListTools.USAGE_SUMMARY,
programGroup = Intervals.class
)
public class IntervalListTools extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Manipulates interval lists. ";
+ static final String USAGE_DETAILS = "This tool offers multiple interval list file manipulation capabilities include sorting, " +
+ "merging, subtracting, padding, customizing, and other set-theoretic operations. If given one or more inputs, the default " +
+ "operation is to merge and sort them. Other options e.g. interval subtraction are controlled by the arguments. The tool " +
+ "lists intervals with respect to a reference sequence." +
+ "<br /><br />" +
+ "Both interval_list and VCF files are accepted as input. The interval_list file format is relatively simple" +
+ " and reflects the SAM alignment format to a degree. A SAM style header must be present in the file that " +
+ "lists the sequence records against which the intervals are described. After the header, the file then" +
+ " contains records, one per line in text format with the following" +
+ " values tab-separated: " +
+ "<pre>" +
+ " -Sequence name (SN) <br />" +
+ " -Start position (1-based)** <br />" +
+ " -End position (1-based, end inclusive) <br />" +
+ " -Strand (either + or -) <br />" +
+ " -Interval name (ideally unique names for intervals)" +
+ "</pre>" +
+ "The coordinate system of interval_list files is such that the first base or position in a sequence is position \"1\"." +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar IntervalListTools \\<br />" +
+ " I=input.interval_list \\<br />" +
+ " SI=input_2.interval_list \\<br />" +
+ " O=new.interval_list" +
+ "</pre>" +
+ "<hr />";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
doc = "One or more interval lists. If multiple interval lists are provided the output is the" +
"result of merging the inputs. Supported formats are interval_list and VCF.", minElements = 1)
diff --git a/src/java/picard/util/LiftOverIntervalList.java b/src/java/picard/util/LiftOverIntervalList.java
index 9d1b9f3..5d5f3f8 100644
--- a/src/java/picard/util/LiftOverIntervalList.java
+++ b/src/java/picard/util/LiftOverIntervalList.java
@@ -43,13 +43,26 @@ import java.util.List;
* @author alecw at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = "Lifts over an interval list from one reference build to another. Based on UCSC liftOver." +
- " Uses a UCSC chain file to guide the liftOver.",
- usageShort = "Lifts over an interval list from one reference build to another",
+ usage = LiftOverIntervalList.USAGE_SUMMARY + LiftOverIntervalList.USAGE_DETAILS,
+ usageShort = LiftOverIntervalList.USAGE_SUMMARY,
programGroup = Intervals.class
)
public class LiftOverIntervalList extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Lifts over an interval list from one reference build to another. ";
+ static final String USAGE_DETAILS = "This tool adjusts the coordinates in an interval list derived from one reference to match " +
+ "a new reference, based on a chain file that describes the correspondence between the two references. It is based on the " +
+ "UCSC liftOver tool (see: http://genome.ucsc.edu/cgi-bin/hgLiftOver) and uses a UCSC chain file to guide its operation. " +
+ "It accepts both Picard interval_list files or VCF files as interval inputs." +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar LiftOverIntervalList \\<br />" +
+ " I=input.interval_list \\<br />" +
+ " O=output.interval_list \\<br />" +
+ " SD=reference_sequence.dict \\<br />" +
+ " CHAIN=build.chain" +
+ "</pre>" +
+ "<hr />";
private static final Log LOG = Log.getInstance(LiftOverIntervalList.class);
@Option(doc = "Interval list to be lifted over.", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
diff --git a/src/java/picard/util/MathUtil.java b/src/java/picard/util/MathUtil.java
index 2cfebe7..bd04497 100644
--- a/src/java/picard/util/MathUtil.java
+++ b/src/java/picard/util/MathUtil.java
@@ -134,6 +134,28 @@ final public class MathUtil {
return index;
}
+ /** Returns the largest value stored in the array. */
+ public static long max(final long[] nums) {
+ return nums[indexOfMax(nums)];
+ }
+
+ /**
+ * Returns the index of the largest element in the array. If there are multiple equal maxima then
+ * the earliest one in the array is returned.
+ */
+ public static int indexOfMax(final long[] nums) {
+ double max = nums[0];
+ int index = 0;
+ for (int i = 1; i < nums.length; ++i) {
+ if (nums[i] > max) {
+ max = nums[i];
+ index = i;
+ }
+ }
+
+ return index;
+ }
+
/** Returns the smallest value stored in the array. */
public static double min(final double[] nums) {
double min = nums[0];
@@ -164,6 +186,16 @@ final public class MathUtil {
return min;
}
+ /** Returns the smallest value stored in the array. */
+ public static byte min(final byte[] nums) {
+ byte min = nums[0];
+ for (int i = 1; i < nums.length; ++i) {
+ if (nums[i] < min) min = nums[i];
+ }
+
+ return min;
+ }
+
/** Mimic's R's seq() function to produce a sequence of equally spaced numbers. */
public static double[] seq(final double from, final double to, final double by) {
if (from < to && by <= 0) return new double[0];
diff --git a/src/java/picard/util/ScatterIntervalsByNs.java b/src/java/picard/util/ScatterIntervalsByNs.java
index 7612105..588b545 100644
--- a/src/java/picard/util/ScatterIntervalsByNs.java
+++ b/src/java/picard/util/ScatterIntervalsByNs.java
@@ -33,12 +33,24 @@ import java.util.Set;
*/
@CommandLineProgramProperties(
- usage = "Writes an interval list based on splitting the reference by Ns.",
- usageShort = "Writes an interval list based on splitting the reference by Ns",
+ usage = ScatterIntervalsByNs.USAGE_SUMMARY + ScatterIntervalsByNs.USAGE_DETAILS,
+ usageShort = ScatterIntervalsByNs.USAGE_SUMMARY,
programGroup = Intervals.class
)
public class ScatterIntervalsByNs extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Writes an interval list based on splitting the reference by Ns. ";
+ static final String USAGE_DETAILS = "This tool identifies positions in the reference where the basecalls are Ns and writes out an " +
+ "interval list using the resulting coordinates (excluding the N bases). This can be used to create an interval list for " +
+ "whole genome sequence (WGS) for e.g. scatter-gather purposes, as an alternative to using fixed-length intervals. The number " +
+ "of contiguous Ns that can be tolerated before creating a break is adjustable from the command line.<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar ScatterIntervalsByNs \\<br />" +
+ " R=reference_sequence.fasta \\<br />" +
+ " OT=BOTH \\<br />" +
+ " O=output.interval_list" +
+ "</pre>" +
+ "<hr />";
@Option(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, doc = "Reference sequence to use.")
public File REFERENCE;
diff --git a/src/java/picard/vcf/CallingMetricAccumulator.java b/src/java/picard/vcf/CallingMetricAccumulator.java
index c0082c3..275e233 100644
--- a/src/java/picard/vcf/CallingMetricAccumulator.java
+++ b/src/java/picard/vcf/CallingMetricAccumulator.java
@@ -1,23 +1,50 @@
package picard.vcf;
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
-import htsjdk.variant.variantcontext.Allele;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextUtils;
+import htsjdk.variant.vcf.VCFHeader;
import picard.util.DbSnpBitSetUtil;
import picard.vcf.processor.VariantProcessor;
-import static picard.vcf.CollectVariantCallingMetrics.VariantCallingSummaryMetrics;
-import static picard.vcf.CollectVariantCallingMetrics.VariantCallingDetailMetrics;
-
import java.util.ArrayList;
import java.util.Collection;
+import java.util.List;
import java.util.Map;
+import java.util.stream.Collectors;
+
+import static picard.vcf.CollectVariantCallingMetrics.VariantCallingDetailMetrics;
+import static picard.vcf.CollectVariantCallingMetrics.VariantCallingSummaryMetrics;
/**
- * Collects variants and generates metrics about them. To use, construct, call
+ * Collects variants and generates metrics about them. To use, construct, call {@link #setup(VCFHeader)} once, then
* {@link #accumulate(htsjdk.variant.variantcontext.VariantContext)} as desired, then call {@link #result()}.
*
* @author mccowan
@@ -33,28 +60,23 @@ public class CallingMetricAccumulator implements VariantProcessor.Accumulator<Ca
}
public static Result merge(final Collection<Result> results) {
- final Collection<VariantCallingDetailMetrics> details = new ArrayList<VariantCallingDetailMetrics>();
- final Collection<VariantCallingSummaryMetrics> summaries = new ArrayList<VariantCallingSummaryMetrics>();
- for (final Result result : results) {
+ final Collection<VariantCallingDetailMetrics> details = new ArrayList<>();
+ final Collection<VariantCallingSummaryMetrics> summaries = new ArrayList<>();
+ results.stream().forEach(result -> {
summaries.add(result.summary);
details.addAll(result.details);
- }
+ });
+ final Map<String, List<VariantCallingDetailMetrics>> sampleDetailsMap =
+ details.stream().collect(Collectors.groupingBy(vcDetailMetrics -> vcDetailMetrics.SAMPLE_ALIAS));
- final Map<String, Collection<CollectVariantCallingMetrics.VariantCallingDetailMetrics>> sampleDetailsMap = CollectionUtil.partition(details,
- new CollectionUtil.Partitioner<CollectVariantCallingMetrics.VariantCallingDetailMetrics, String>() {
- @Override
- public String getPartition(final CollectVariantCallingMetrics.VariantCallingDetailMetrics variantCallingDetailMetrics) {
- return variantCallingDetailMetrics.SAMPLE_ALIAS;
- }
- });
- final Collection<CollectVariantCallingMetrics.VariantCallingDetailMetrics> collapsedDetails = new ArrayList<VariantCallingDetailMetrics>();
- for (final Collection<VariantCallingDetailMetrics> sampleDetails : sampleDetailsMap.values()) {
+ final Collection<CollectVariantCallingMetrics.VariantCallingDetailMetrics> collapsedDetails = new ArrayList<>();
+
+ sampleDetailsMap.values().stream().forEach(sampleDetails -> {
final VariantCallingDetailMetrics collapsed = new VariantCallingDetailMetrics();
VariantCallingDetailMetrics.foldInto(collapsed, sampleDetails);
collapsedDetails.add(collapsed);
- }
-
+ });
final VariantCallingSummaryMetrics collapsedSummary = new VariantCallingSummaryMetrics();
VariantCallingSummaryMetrics.foldInto(collapsedSummary, summaries);
@@ -73,34 +95,34 @@ public class CallingMetricAccumulator implements VariantProcessor.Accumulator<Ca
* then returned.
*/
private final CollectionUtil.DefaultingMap<String, VariantCallingDetailMetrics> sampleMetricsMap =
- new CollectionUtil.DefaultingMap<String, VariantCallingDetailMetrics>(
- new CollectionUtil.DefaultingMap.Factory<VariantCallingDetailMetrics, String>() {
- @Override
- public VariantCallingDetailMetrics make(final String sampleName) {
- final VariantCallingDetailMetrics detail = new VariantCallingDetailMetrics();
- detail.SAMPLE_ALIAS = sampleName;
- return detail;
- }
+ new CollectionUtil.DefaultingMap<>(
+ sampleName -> {
+ final VariantCallingDetailMetrics detail = new VariantCallingDetailMetrics();
+ detail.SAMPLE_ALIAS = sampleName;
+ return detail;
}, true);
public CallingMetricAccumulator(final DbSnpBitSetUtil.DbSnpBitSets dbsnp) {
this.dbsnp = dbsnp;
}
+ public void setup(final VCFHeader vcfHeader) {
+ //noop.
+ }
+
/** Incorporates the provided variant's data into the metric analysis. */
@Override
public void accumulate(final VariantContext vc) {
- progress.record(vc.getChr(), vc.getStart());
+ progress.record(vc.getContig(), vc.getStart());
if (!isVariantExcluded(vc)) {
final String singletonSample = getSingletonSample(vc);
updateSummaryMetric(summaryMetric, null, vc, singletonSample != null); // The summary metric has no genotype.
- for (final String sampleName : vc.getSampleNames()) {
- // Skip homozygous reference calls.
- if (!vc.getGenotype(sampleName).isHomRef()) {
- updateDetailMetric(sampleMetricsMap.get(sampleName), vc.getGenotype(sampleName), vc,
- sampleName.equals(singletonSample));
- }
- }
+
+ vc.getSampleNames().stream()
+ .filter(sampleName -> !vc.getGenotype(sampleName).isHomRef())
+ .forEach(sampleName ->
+ updateDetailMetric(sampleMetricsMap.get(sampleName), vc.getGenotype(sampleName), vc,
+ sampleName.equals(singletonSample)));
}
}
@@ -108,50 +130,44 @@ public class CallingMetricAccumulator implements VariantProcessor.Accumulator<Ca
* @return Sample name if there is only one sample that contains alternate allele(s), else null if either multiple samples that
* are not homref, or no samples that are not homref.
*/
- private String getSingletonSample(final VariantContext vc) {
- String singletonSample = null;
- for (final String sampleName : vc.getSampleNames()) {
- final Genotype genotype = vc.getGenotype(sampleName);
- if (genotype.isHomVar()) {
- return null;
- } else if (genotype.isHet()) {
- if (singletonSample != null) {
- // second sample with non-reference allele, so not a singleton
- return null;
- } else {
- singletonSample = sampleName;
- }
- }
+ protected static String getSingletonSample(final VariantContext vc) {
+
+ // peek can only change effectively final variables...workaround
+ final String[] sampleName = new String[1];
+
+ if (vc.getGenotypes()
+ .stream()
+ // look at het or homVar genotypes
+ .filter(genotype -> genotype.isHet() || genotype.isHomVar())
+ // two such genotypes will be enough
+ .limit(2)
+ //get any of the sample names
+ .peek(genotype -> sampleName[0] = genotype.getSampleName())
+ //map to the number of variant chromosomes
+ .mapToInt(genotype -> genotype.isHet() ? 1 : 2)
+ //add them up
+ .reduce(Integer::sum)
+ // compare to 1 with 0 as default
+ .orElse(0) == 1) {
+ return sampleName[0];
+ } else {
+ return null;
}
- return singletonSample;
}
public Result result() {
final Collection<VariantCallingDetailMetrics> values = sampleMetricsMap.values();
- for (final VariantCallingDetailMetrics value : values) {
- value.updateDerivedValuesInPlace();
- }
+ values.forEach(CollectVariantCallingMetrics.VariantCallingDetailMetrics::updateDerivedValuesInPlace);
summaryMetric.updateDerivedValuesInPlace();
return new Result(summaryMetric, values);
}
/** Returns true if the variant is --NOT-- interesting enough to be included in metrics calculations. */
- private boolean isVariantExcluded(final VariantContext vc) {
+ static private boolean isVariantExcluded(final VariantContext vc) {
// If the entire record is not a variant, exclude it
- if (!vc.isVariant()) {
- return true;
- }
-
- // Skip calls which are homozygous reference for all samples.
- for (final String sample : vc.getSampleNames()) {
- if (!vc.getGenotype(sample).isHomRef()) {
- return false; // TODO: Is this correct?
- }
- }
-
- return true;
+ return !vc.isVariant() || vc.getGenotypes().stream().allMatch(Genotype::isHomRef);
}
private void updateDetailMetric(final VariantCallingDetailMetrics metric,
@@ -192,8 +208,8 @@ public class CallingMetricAccumulator implements VariantProcessor.Accumulator<Ca
if (vc.isBiallelic() && vc.isSNP()) {
// Biallelic SNPs
- final boolean isInDbSnp = dbsnp.snps.isDbSnpSite(vc.getChr(), vc.getStart());
- final boolean isTransition = isTransition(vc);
+ final boolean isInDbSnp = dbsnp.snps.isDbSnpSite(vc.getContig(), vc.getStart());
+ final boolean isTransition = VariantContextUtils.isTransition(vc);
metric.TOTAL_SNPS++;
@@ -231,10 +247,10 @@ public class CallingMetricAccumulator implements VariantProcessor.Accumulator<Ca
} else if (vc.isSNP() && vc.getAlternateAlleles().size() > 1) {
// Multiallelic SNPs
metric.TOTAL_MULTIALLELIC_SNPS++;
- if (dbsnp.snps.isDbSnpSite(vc.getChr(), vc.getStart())) metric.NUM_IN_DB_SNP_MULTIALLELIC++;
+ if (dbsnp.snps.isDbSnpSite(vc.getContig(), vc.getStart())) metric.NUM_IN_DB_SNP_MULTIALLELIC++;
} else if (vc.isIndel() && !vc.isComplexIndel()) {
// Simple Indels
- final boolean isInDbSnp = dbsnp.indels.isDbSnpSite(vc.getChr(), vc.getStart());
+ final boolean isInDbSnp = dbsnp.indels.isDbSnpSite(vc.getContig(), vc.getStart());
final boolean isInsertion = vc.isSimpleInsertion();
metric.TOTAL_INDELS++;
@@ -252,49 +268,7 @@ public class CallingMetricAccumulator implements VariantProcessor.Accumulator<Ca
} else if (vc.isComplexIndel()) {
// Complex Indels
metric.TOTAL_COMPLEX_INDELS++;
- if (dbsnp.indels.isDbSnpSite(vc.getChr(), vc.getStart())) metric.NUM_IN_DB_SNP_COMPLEX_INDELS++;
+ if (dbsnp.indels.isDbSnpSite(vc.getContig(), vc.getStart())) metric.NUM_IN_DB_SNP_COMPLEX_INDELS++;
}
-
- }
-
- /**
- * Answers if the provided variant is transitional (otherwise, it's transversional).
- * Transitions:
- * A->G
- * G->A
- * C->T
- * T->C
- * <p/>
- * Transversions:
- * A->C
- * A->T
- * C->A
- * C->G
- * G->C
- * G->T
- * T->A
- * T->G
- */
- static private boolean isTransition(final VariantContext vc) {
- final byte refAllele = vc.getReference().getBases()[0];
- final Collection<Allele> altAlleles = vc.getAlternateAlleles();
-
- Byte altAllele = null;
- for (final Allele a : altAlleles) {
- if (a.getBases()[0] != refAllele) {
- altAllele = a.getBases()[0];
- break;
- }
- }
- if (altAllele == null) {
- // This should never happen
- throw new IllegalArgumentException("All alternate alleles match the reference base " + (char) refAllele);
- }
-
- return (refAllele == 'A' && altAllele == 'G')
- || (refAllele == 'G' && altAllele == 'A')
- || (refAllele == 'C' && altAllele == 'T')
- || (refAllele == 'T' && altAllele == 'C');
}
-
}
diff --git a/src/java/picard/vcf/CollectVariantCallingMetrics.java b/src/java/picard/vcf/CollectVariantCallingMetrics.java
index a360910..b436bf5 100644
--- a/src/java/picard/vcf/CollectVariantCallingMetrics.java
+++ b/src/java/picard/vcf/CollectVariantCallingMetrics.java
@@ -23,19 +23,20 @@
*/
package picard.vcf;
-import picard.cmdline.CommandLineProgram;
-import picard.cmdline.CommandLineProgramProperties;
-import picard.cmdline.Option;
-import picard.cmdline.StandardOptionDefinitions;
-import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.CloserUtil;
+import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
-import htsjdk.samtools.SAMFileReader;
-import htsjdk.samtools.SAMSequenceDictionary;
-import htsjdk.samtools.util.CloserUtil;
+import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
import htsjdk.variant.vcf.VCFFileReader;
+import htsjdk.variant.vcf.VCFHeader;
+import picard.cmdline.CommandLineProgram;
+import picard.cmdline.CommandLineProgramProperties;
+import picard.cmdline.Option;
+import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.Metrics;
import picard.util.DbSnpBitSetUtil;
import picard.vcf.processor.VariantProcessor;
@@ -67,11 +68,11 @@ public class CollectVariantCallingMetrics extends CommandLineProgram {
public File TARGET_INTERVALS;
@Option(shortName = StandardOptionDefinitions.SEQUENCE_DICTIONARY_SHORT_NAME, optional = true,
- doc = "If present, speeds loading of dbSNP file")
+ doc = "If present, speeds loading of dbSNP file, will look for dictionary in vcf if not present here.")
public File SEQUENCE_DICTIONARY = null;
- @Option(doc = "Deprecated option will be removed in a future release.")
- public Boolean REQUIRE_INDEX = false;
+ @Option(doc = "Set to true if running on a single-sample gvcf.", optional = true)
+ public boolean GVCF_INPUT = false;
@Option
public int THREAD_COUNT = 1;
@@ -88,14 +89,12 @@ public class CollectVariantCallingMetrics extends CommandLineProgram {
final boolean requiresIndex = this.TARGET_INTERVALS != null || this.THREAD_COUNT > 1;
final VCFFileReader variantReader = new VCFFileReader(INPUT, requiresIndex);
- final SAMSequenceDictionary sequenceDictionary;
- if (SEQUENCE_DICTIONARY != null) {
- sequenceDictionary = SAMFileReader.getSequenceDictionary(SEQUENCE_DICTIONARY);
- } else {
- sequenceDictionary = variantReader.getFileHeader().getSequenceDictionary();
- }
+ final VCFHeader vcfHeader = variantReader.getFileHeader();
CloserUtil.close(variantReader);
+ final SAMSequenceDictionary sequenceDictionary =
+ SAMSequenceDictionaryExtractor.extractDictionary(SEQUENCE_DICTIONARY == null ? INPUT : SEQUENCE_DICTIONARY);
+
log.info("Loading dbSNP file ...");
final DbSnpBitSetUtil.DbSnpBitSets dbsnp = DbSnpBitSetUtil.createSnpAndIndelBitSets(DBSNP, sequenceDictionary);
@@ -103,21 +102,12 @@ public class CollectVariantCallingMetrics extends CommandLineProgram {
final VariantProcessor.Builder<CallingMetricAccumulator, CallingMetricAccumulator.Result> builder =
VariantProcessor.Builder
- .generatingAccumulatorsBy(
- new VariantProcessor.AccumulatorGenerator<CallingMetricAccumulator, CallingMetricAccumulator.Result>() {
- @Override
- public CallingMetricAccumulator build() {
- return new CallingMetricAccumulator(dbsnp);
- }
- })
- .combiningResultsBy(
- new VariantProcessor.ResultMerger<CallingMetricAccumulator.Result>() {
- @Override
- public CallingMetricAccumulator.Result merge(final Collection<CallingMetricAccumulator.Result>
- resultsToReduce) {
- return CallingMetricAccumulator.Result.merge(resultsToReduce);
- }
- })
+ .generatingAccumulatorsBy(() -> {
+ CallingMetricAccumulator accumulator = GVCF_INPUT ? new GvcfMetricAccumulator(dbsnp) : new CallingMetricAccumulator(dbsnp);
+ accumulator.setup(vcfHeader);
+ return accumulator;
+ })
+ .combiningResultsBy(CallingMetricAccumulator.Result::merge)
.withInput(INPUT)
.multithreadingBy(THREAD_COUNT);
@@ -131,9 +121,8 @@ public class CollectVariantCallingMetrics extends CommandLineProgram {
final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Integer> detail = getMetricsFile();
final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Integer> summary = getMetricsFile();
summary.addMetric(result.summary);
- for (final CollectVariantCallingMetrics.VariantCallingDetailMetrics detailMetric : result.details) {
- detail.addMetric(detailMetric);
- }
+ result.details.forEach(detail::addMetric);
+
final String outputPrefix = OUTPUT.getAbsolutePath() + ".";
detail.write(new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingDetailMetrics.getFileExtension()));
summary.write(new File(outputPrefix + CollectVariantCallingMetrics.VariantCallingSummaryMetrics.getFileExtension()));
@@ -261,7 +250,6 @@ public class CollectVariantCallingMetrics extends CommandLineProgram {
}
target.updateDerivedValuesInPlace();
}
-
}
/** A collection of metrics relating to snps and indels within a variant-calling file (VCF) for a given sample. */
@@ -305,6 +293,5 @@ public class CollectVariantCallingMetrics extends CommandLineProgram {
// Divide by zero should be OK -- NaN should get propagated to metrics file and to DB.
HET_HOMVAR_RATIO = numHets / (double) numHomVar;
}
-
}
}
diff --git a/src/java/picard/vcf/GenotypeConcordance.java b/src/java/picard/vcf/GenotypeConcordance.java
index f10e973..4f86ac2 100644
--- a/src/java/picard/vcf/GenotypeConcordance.java
+++ b/src/java/picard/vcf/GenotypeConcordance.java
@@ -62,14 +62,55 @@ import static htsjdk.variant.variantcontext.VariantContext.Type.*;
* @author George Grant
*/
@CommandLineProgramProperties(
- usage = "Calculates the concordance between genotype data for two samples in two different VCFs - one being considered the truth (or reference) " +
- "the other being considered the call. The concordance is broken into separate results sections for SNPs and indels. Summary and detailed statistics are reported\n\n" +
- "Note that for any pair of variants to compare, only the alleles for the samples under interrogation are considered " +
- "and MNP, Symbolic, and Mixed classes of variants are not included.",
- usageShort = "Calculates the concordance between genotype data for two samples in two different VCFs",
+ usage = GenotypeConcordance.USAGE_SUMMARY + GenotypeConcordance.USAGE_DETAILS,
+ usageShort = GenotypeConcordance.USAGE_SUMMARY,
programGroup = VcfOrBcf.class
)
public class GenotypeConcordance extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Evaluate genotype concordance between callsets.";
+ static final String USAGE_DETAILS = "This tool evaluates the concordance between genotype calls for samples in different " +
+ "callsets where one is being considered as the truth (aka standard, or reference) and the other as the call that is being " +
+ "evaluated for accuracy. <br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar GenotypeConcordance \\<br />" +
+ " CALL_VCF=input.vcf \\<br />" +
+ " CALL_SAMPLE=sample_name \\<br />" +
+ " O=gc_concordance.vcf \\<br />" +
+ " TRUTH_VCF=truth_set.vcf \\<br />" +
+ " TRUTH_SAMPLE=truth_sample#" +
+ "</pre>" +
+ "" +
+ "<h4>Output Metrics:</h4>" +
+ "<ul>" +
+ "<li>Output metrics include GenotypeConcordanceContingencyMetrics, GenotypeConcordanceSummaryMetrics, and " +
+ "GenotypeConcordanceDetailMetrics. For each set of metrics, the data is broken into separate sections for " +
+ "SNPs and INDELs. Note that only SNP and INDEL variants are considered, MNP, Symbolic, and Mixed classes" +
+ " of variants are not included. </li>" +
+ "<li>GenotypeConcordanceContingencyMetrics enumerate the constituents of each contingent in a callset " +
+ "including true-positive (TP), true-negative (TN), false-positive (FP), and false-negative (FN) calls. See " +
+ "http://broadinstitute.github.io/picard/picard-metric-definitions.html#GenotypeConcordanceContingencyMetrics" +
+ " for more details.</li>" +
+ "<li>GenotypeConcordanceDetailMetrics include the numbers of SNPs and INDELs for each contingent genotype as well " +
+ "as the number of validated genotypes. See " +
+ "http://broadinstitute.github.io/picard/picard-metric-definitions.html#GenotypeConcordanceDetailMetrics for more details.</li>" +
+ "<li>GenotypeConcordanceSummaryMetrics provide specific details for the variant caller performance on a callset including: " +
+ "values for sensitivity, specificity, and positive predictive values. See " +
+ "http://broadinstitute.github.io/picard/picard-metric-definitions.html#GenotypeConcordanceSummaryMetrics for more details.</li>" +
+ "</ul>" +
+ "<br /><br />" +
+ "Useful definitions applicable to alleles and genotypes:<br /> " +
+ "<ul>"+
+ "<li>Truthset - A callset (typically in VCF format) containing variant calls and genotypes that have been cross-validated " +
+ "with multiple technologies e.g. Genome In A Bottle Consortium (GIAB) (https://sites.stanford.edu/abms/giab)</li>" +
+ "<li>TP - True positives are variant calls that match a 'truthset'</li>" +
+ "<li>FP - False-positives are reference sites miscalled as variant</li>" +
+ "<li>FN - False-negatives are variant sites miscalled as reference</li>" +
+ "<li>TN - True negatives are correctly called reference sites</li>" +
+ "<li>Validated genotypes - are TP sites where the exact genotype (HET or HOM-VAR) has been validated </li> " +
+ "</ul>"+
+ "<hr />"
+ ;
@Option(shortName = "TV", doc="The VCF containing the truth sample")
public File TRUTH_VCF;
diff --git a/src/java/picard/vcf/GvcfMetricAccumulator.java b/src/java/picard/vcf/GvcfMetricAccumulator.java
new file mode 100644
index 0000000..9d5c6a8
--- /dev/null
+++ b/src/java/picard/vcf/GvcfMetricAccumulator.java
@@ -0,0 +1,64 @@
+package picard.vcf;
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFHeader;
+import picard.util.DbSnpBitSetUtil;
+
+import java.util.List;
+
+/**
+ * An accumulator for collecting metrics about a single-sample GVCF. The main point here is to subset the
+ * context of each {@link VariantContext} as it comes by to the alleles present in the genotype of the only sample.
+ * Since this is a GVCF we expect a symbolic \<NON_REF\> allele to be present in each VC. If we do not subset
+ * the context this symbolic allele will cause the regular {@link CallingMetricAccumulator} to return only a
+ * small subset of the relevant metrics.
+ *
+ * @author farjoun
+ */
+public class GvcfMetricAccumulator extends CallingMetricAccumulator {
+ String sample = null;
+
+ public GvcfMetricAccumulator(final DbSnpBitSetUtil.DbSnpBitSets dbsnp) {
+ super(dbsnp);
+ }
+
+ @Override
+ public void setup(final VCFHeader vcfHeader) {
+ final List<String> samples = vcfHeader.getGenotypeSamples();
+ if (samples == null || samples.size() != 1) {
+ throw new IllegalArgumentException("Expected to have exactly 1 sample in a GVCF, found " + ((samples == null) ? "0" : samples.size()));
+ }
+ sample = samples.get(0);
+ }
+
+ @Override
+ public void accumulate(final VariantContext vc) {
+ //since a gvcf always has a <NON_REF> allele, in order to get meaningful results we need to subset the context of
+ // the variant to the alleles that actually appear in the only sample's genotype
+ final VariantContext subContext = vc.subContextFromSample(sample);
+ super.accumulate(subContext);
+ }
+}
diff --git a/src/java/picard/vcf/LiftoverVcf.java b/src/java/picard/vcf/LiftoverVcf.java
index 4b5346e..1faa79f 100644
--- a/src/java/picard/vcf/LiftoverVcf.java
+++ b/src/java/picard/vcf/LiftoverVcf.java
@@ -43,14 +43,30 @@ import java.util.Map;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Lifts a VCF over from one genome build to another using UCSC liftover. The output file will be sorted " +
- "and indexed. Records may be rejected because they cannot be lifted over or because post-liftover the " +
- "reference allele mismatches the target genome build. Rejected records will be emitted with filters " +
- "to the REJECT file, on the source genome.",
- usageShort = "Lifts a VCF between genome builds.",
+ usage = LiftoverVcf.USAGE_SUMMARY + LiftoverVcf.USAGE_DETAILS,
+ usageShort = LiftoverVcf.USAGE_SUMMARY,
programGroup = VcfOrBcf.class
)
public class LiftoverVcf extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Lifts over a VCF file from one reference build to another. ";
+ static final String USAGE_DETAILS = "This tool adjusts the coordinates of variants within a VCF file to match a new reference. The " +
+ "output file will be sorted and indexed using the target reference build. To be clear, REFERENCE_SEQUENCE should be the " +
+ "<em>target</em> reference build. The tool is based on the UCSC liftOver tool (see: http://genome.ucsc.edu/cgi-bin/hgLiftOver) " +
+ "and uses a UCSC chain file to guide its operation. <br /><br />" +
+ "Note that records may be rejected because they cannot be lifted over or because of sequence incompatibilities between the " +
+ "source and target reference genomes. Rejected records will be emitted with filters to the REJECT file, using the source " +
+ "genome coordinates.<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar LiftoverVcf \\<br />" +
+ " I=input.vcf \\<br />" +
+ " O=lifted_over.vcf \\<br />" +
+ " CHAIN=b37tohg19.chain \\<br />" +
+ " REJECT=rejected_variants.vcf \\<br />" +
+ " R=reference_sequence.fasta" +
+ "</pre>" +
+ "For additional information, please see: http://genome.ucsc.edu/cgi-bin/hgLiftOver" +
+ "<hr />";
@Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The input VCF/BCF file to be lifted over.")
public File INPUT;
@@ -66,9 +82,16 @@ public class LiftoverVcf extends CommandLineProgram {
@Option(shortName = StandardOptionDefinitions.REFERENCE_SHORT_NAME, common=false,
doc = "The reference sequence (fasta) for the TARGET genome build. The fasta file must have an " +
- "accompanying sqeuence dictionary (.dict file).")
+ "accompanying sequence dictionary (.dict file).")
public File REFERENCE_SEQUENCE = Defaults.REFERENCE_FASTA;
+ // Option on whether or not to provide a warning, or error message and exit if a missing contig is encountered
+ @Option(shortName = "WMC", doc = "Warn on missing contig.", optional = true)
+ public boolean WARN_ON_MISSING_CONTIG = false;
+
+ // When a contig used in the chain is not in the reference, exit with this value instead of 0.
+ protected static int EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE = 1;
+
/** Filter name to use when a target cannot be lifted over. */
public static final String FILTER_CANNOT_LIFTOVER_INDEL = "ReverseComplementedIndel";
@@ -157,8 +180,18 @@ public class LiftoverVcf extends CommandLineProgram {
final String reason = (target == null) ? FILTER_NO_TARGET : FILTER_CANNOT_LIFTOVER_INDEL;
rejects.add(new VariantContextBuilder(ctx).filter(reason).make());
failedLiftover++;
- }
- else {
+ } else if (!refSeqs.containsValue(target.getContig())) {
+ rejects.add(new VariantContextBuilder(ctx).filter(FILTER_NO_TARGET).make());
+ failedLiftover++;
+
+ String missingContigMessage = "Encountered a contig, " + target.getContig() + " that is not part of the target reference.";
+ if(WARN_ON_MISSING_CONTIG) {
+ log.warn(missingContigMessage);
+ } else {
+ log.error(missingContigMessage);
+ return EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE;
+ }
+ } else {
// Fix the alleles if we went from positive to negative strand
reverseComplementAlleleMap.clear();
final List<Allele> alleles = new ArrayList<Allele>();
diff --git a/src/java/picard/vcf/RenameSampleInVcf.java b/src/java/picard/vcf/RenameSampleInVcf.java
index a5ff626..120dadd 100644
--- a/src/java/picard/vcf/RenameSampleInVcf.java
+++ b/src/java/picard/vcf/RenameSampleInVcf.java
@@ -42,11 +42,24 @@ import java.io.File;
import java.util.EnumSet;
@CommandLineProgramProperties(
- usage = "Rename a sample within a VCF or BCF.",
- usageShort = "Rename a sample within a VCF or BCF.",
+ usage = RenameSampleInVcf.USAGE_SUMMARY + RenameSampleInVcf.USAGE_DETAILS,
+ usageShort = RenameSampleInVcf.USAGE_SUMMARY,
programGroup = VcfOrBcf.class
)
public class RenameSampleInVcf extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Renames a sample within a VCF or BCF. ";
+ static final String USAGE_DETAILS = "This tool enables the user to rename a sample in either a VCF or BCF file. " +
+ "It is intended to change the name of a sample in a VCF prior to merging with VCF files in which one or more samples have " +
+ "similar names. Note that the input VCF file must be single-sample VCF and that the NEW_SAMPLE_NAME is required." +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar RenameSampleInVcf \\<br />" +
+ " I=input.vcf \\<br />" +
+ " O=renamed.vcf \\<br />" +
+ " NEW_SAMPLE_NAME=sample123" +
+ "</pre>" +
+ "<hr />";
@Option(shortName=StandardOptionDefinitions.INPUT_SHORT_NAME, doc="Input single sample VCF.")
public File INPUT;
diff --git a/src/java/picard/vcf/SortVcf.java b/src/java/picard/vcf/SortVcf.java
index 005a018..b3dd79d 100644
--- a/src/java/picard/vcf/SortVcf.java
+++ b/src/java/picard/vcf/SortVcf.java
@@ -33,14 +33,27 @@ import java.util.List;
* the same sequence dictionaries
*/
@CommandLineProgramProperties(
- usage = "Sorts one or more VCF files according to the order of the contigs in the header/sequence dictionary and then by coordinate. " +
- "Can accept an external sequence dictionary. If no external dictionary is supplied, multiple inputs' headers must have " +
- "the same sequence dictionaries. Multiple inputs must have the same sample names (in order)\n",
- usageShort = "Sorts one or more VCF files",
+ usage = SortVcf.USAGE_SUMMARY + SortVcf.USAGE_DETAILS,
+ usageShort = SortVcf.USAGE_SUMMARY,
programGroup = VcfOrBcf.class
)
public class SortVcf extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Sorts one or more VCF files. ";
+ static final String USAGE_DETAILS = "This tool sorts the records in VCF files according to the order of the contigs " +
+ "in the header/sequence dictionary and then by coordinate. It can accept an external sequence dictionary. If no external " +
+ "dictionary is supplied, the VCF file headers of multiple inputs must have the same sequence dictionaries." +
+ "<br /><br />" +
+ "If running on multiple inputs (originating from e.g. some scatter-gather runs), the input files must contain the same sample " +
+ "names in the same column order. " +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar SortVcf \\<br />" +
+ " I=vcf_1.vcf \\<br />" +
+ " I=vcf_2.vcf \\<br />" +
+ " O=sorted.vcf" +
+ "</pre>" +
+ "<hr />" ;
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input VCF(s) to be sorted. Multiple inputs must have the same sample names (in order)")
public List<File> INPUT;
diff --git a/src/java/picard/vcf/SplitVcfs.java b/src/java/picard/vcf/SplitVcfs.java
index b5b3c06..b9521fa 100644
--- a/src/java/picard/vcf/SplitVcfs.java
+++ b/src/java/picard/vcf/SplitVcfs.java
@@ -30,14 +30,25 @@ import java.io.File;
* extension will create gzip-compressed output.
*/
@CommandLineProgramProperties(
- usage = "Splits an input VCF or BCF file into two VCF files, one for indel records and one for SNPs. The" +
- "headers of the two output files will be identical. An index file is created and a" +
- "sequence dictionary is required by default.",
- usageShort = "Splits an input VCF or BCF file into two VCF or BCF files",
+ usage = SplitVcfs.USAGE_SUMMARY + SplitVcfs.USAGE_DETAILS,
+ usageShort = SplitVcfs.USAGE_SUMMARY,
programGroup = VcfOrBcf.class
)
public class SplitVcfs extends CommandLineProgram {
-
+ static final String USAGE_SUMMARY = "Splits SNPs and INDELs into separate files. ";
+ static final String USAGE_DETAILS = "This tool reads in a VCF or BCF file and writes out the SNPs and INDELs it contains to separate " +
+ "files. The headers of the two output files will be identical and index files will be created for both outputs. If records " +
+ "other than SNPs or INDELs are present, set the STRICT option to \"false\", otherwise the tool will raise an exception and " +
+ "quit. <br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar SplitVcfs \\<br />" +
+ " I=input.vcf \\<br />" +
+ " SNP_OUTPUT=snp.vcf \\<br />" +
+ " INDEL_OUTPUT=indel.vcf \\<br />" +
+ " STRICT=false" +
+ "</pre>" +
+ "<hr />" ;
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The VCF or BCF input file")
public File INPUT;
diff --git a/src/java/picard/vcf/VcfFormatConverter.java b/src/java/picard/vcf/VcfFormatConverter.java
index 0c62ec2..28d9a3a 100644
--- a/src/java/picard/vcf/VcfFormatConverter.java
+++ b/src/java/picard/vcf/VcfFormatConverter.java
@@ -53,19 +53,31 @@ import java.io.File;
* @author jgentry at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = "Convert a VCF file to a BCF file, or BCF to VCF.\n" +
- "Input and output formats are determined by file extension.",
- usageShort = "Converts a VCF file to a BCF file, or BCF to VCF",
+ usage = VcfFormatConverter.USAGE_SUMMARY + VcfFormatConverter.USAGE_DETAILS,
+ usageShort = VcfFormatConverter.USAGE_SUMMARY,
programGroup = VcfOrBcf.class
)
public class VcfFormatConverter extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Converts VCF to BCF or BCF to VCF. ";
+ static final String USAGE_DETAILS = "This tool converts files between the plain-text VCF format and its binary compressed equivalent, " +
+ "BCF. Input and output formats are determined by file extensions specified in the file names. For best results," +
+ " it is recommended to ensure that an index file is present and set the REQUIRE_INDEX option to true." +
+ "<br />" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar VcfFormatConverter \\<br />" +
+ " I=input.vcf \\<br />" +
+ " O=output.bcf \\<br />" +
+ " REQUIRE_INDEX=true" +
+ "</pre>"+
+ "<hr />";
// The following attributes define the command-line arguments
public static final Log LOG = Log.getInstance(VcfFormatConverter.class);
- @Option(doc="The BCF or VCF input file. The file format is determined by file extension.", shortName= StandardOptionDefinitions.INPUT_SHORT_NAME)
+ @Option(doc="The BCF or VCF input file.", shortName= StandardOptionDefinitions.INPUT_SHORT_NAME)
public File INPUT;
- @Option(doc="The BCF or VCF output file. The file format is determined by file extension.", shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME)
+ @Option(doc="The BCF or VCF output file name.", shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME)
public File OUTPUT;
@Option(doc="Fail if an index is not available for the input VCF/BCF")
diff --git a/src/scripts/picard/analysis/insertSizeHistogram.R b/src/scripts/picard/analysis/insertSizeHistogram.R
index b6a0bf1..79c1838 100644
--- a/src/scripts/picard/analysis/insertSizeHistogram.R
+++ b/src/scripts/picard/analysis/insertSizeHistogram.R
@@ -30,7 +30,7 @@ getCumulative <- function(y, yrange) {
yLength <- nrow(y)
ySum <- sum(y[,1])
for (i in 1:yLength) {
- yNew[i] <- (yrange * sum(y[i:yLength,1]) / ySum)
+ yNew[i] <- (yrange * sum(as.numeric(y[i:yLength,1])) / ySum)
}
return (yNew)
}
@@ -87,19 +87,19 @@ for (i in 1:length(levels)) {
if (fr %in% names(histogram) ) {
lines(histogram$insert_size, as.matrix(histogram[fr]), type="h", col="red")
- lines(histogram$insert_size, getCumulative(histogram[fr], yrange), col="darkred", lty=2)
+ lines(histogram$insert_size, getCumulative(histogram[fr], frrange), col="darkred", lty=2)
colors <- c(colors, "red")
labels <- c(labels, "FR")
}
if (rf %in% names(histogram)) {
lines(histogram$insert_size, as.matrix(histogram[rf]), type="h", col="blue")
- lines(histogram$insert_size, getCumulative(histogram[rf], yrange), col="darkblue", lty=2)
+ lines(histogram$insert_size, getCumulative(histogram[rf], rfrange), col="darkblue", lty=2)
colors <- c(colors, "blue")
labels <- c(labels, "RF")
}
if (tandem %in% names(histogram)) {
lines(histogram$insert_size, as.matrix(histogram[tandem]), type="h", col="orange")
- lines(histogram$insert_size, getCumulative(histogram[tandem], yrange), col="darkorange", lty=2)
+ lines(histogram$insert_size, getCumulative(histogram[tandem], tandemrange), col="darkorange", lty=2)
colors <- c(colors, "orange")
labels <- c(labels, "TANDEM")
}
diff --git a/src/scripts/release_picard.sh b/src/scripts/release_picard.sh
index d237931..94654ec 100755
--- a/src/scripts/release_picard.sh
+++ b/src/scripts/release_picard.sh
@@ -163,11 +163,7 @@ then echo "EDITOR environment variable must be set." >&2
exit 1
fi
-# Require actual Java 1.6. This is not necessary for compiling, because can run 1.7 with -target 1.6,
-# but this is necessary in order to force unit tests to run with 1.6.
-(echo $JAVA_HOME | fgrep -q 1.6 ) || { echo "JAVA_HOME $JAVA_HOME is not 1.6" ; exit 1; }
java_version=`java -version 2>&1 | fgrep -i version`
-(echo $java_version | fgrep -q 1.6. ) || { echo "java -version: $java_version is not 1.6"; exit 1; }
PICARDGITROOT=git at github.com:broadinstitute/picard.git
REMOTE=origin
@@ -254,7 +250,7 @@ do pushd $sandbox
# Add the new javadoc files
find javadoc/$sandbox | xargs git add
# Commit!
- git commit -m "Updating javadoc for release: $RELEASE_ID"
+ git commit -a --allow-empty -m "Updating javadoc for release: $RELEASE_ID"
# NB: assumes the push will not fail
git push $REMOTE $GHPAGES_BRANCH
# Reset the repository to master
@@ -280,7 +276,7 @@ cd dist/html
cp inc/*.html program_usage/*.html picard-metric-definitions.html ../../_includes/.
cd ../../
find _includes | xargs git add
-git commit -m "Adding website files for $RELEASE_ID"
+git commit -a --allow-empty -m "Adding website files for $RELEASE_ID"
git push $REMOTE $GHPAGES_BRANCH
# Move back to master just in case
diff --git a/src/tests/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java b/src/tests/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java
index c0610d9..11b1fc4 100644
--- a/src/tests/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java
+++ b/src/tests/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java
@@ -503,7 +503,29 @@ public class CollectAlignmentSummaryMetricsTest extends CommandLineProgramTest {
Assert.fail("Data does not contain this sample: " + metrics.SAMPLE);
}
}
-
}
+ @Test
+ public void testChimeras() throws IOException {
+ final File input = new File(TEST_DATA_DIR, "summary_alignment_stats_test_chimeras.sam");
+ final File reference = new File(TEST_DATA_DIR, "summary_alignment_stats_test.fasta");
+ final File outfile = File.createTempFile("alignmentMetrics", ".txt");
+ outfile.deleteOnExit();
+ final String[] args = new String[] {
+ "INPUT=" + input.getAbsolutePath(),
+ "OUTPUT=" + outfile.getAbsolutePath(),
+ "MAX_INSERT_SIZE=20",
+ "REFERENCE_SEQUENCE=" + reference.getAbsolutePath(),
+ };
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+
+ final MetricsFile<AlignmentSummaryMetrics, Comparable<?>> output = new MetricsFile<AlignmentSummaryMetrics, Comparable<?>>();
+ output.read(new FileReader(outfile));
+
+ for (final AlignmentSummaryMetrics metrics : output.getMetrics()) {
+ if (metrics.CATEGORY == AlignmentSummaryMetrics.Category.FIRST_OF_PAIR) {
+ Assert.assertEquals(metrics.PCT_CHIMERAS, 0.8);
+ }
+ }
+ }
}
diff --git a/src/tests/java/picard/analysis/CollectGcBiasMetricsTest.java b/src/tests/java/picard/analysis/CollectGcBiasMetricsTest.java
index ba8fe96..16b405f 100644
--- a/src/tests/java/picard/analysis/CollectGcBiasMetricsTest.java
+++ b/src/tests/java/picard/analysis/CollectGcBiasMetricsTest.java
@@ -110,8 +110,8 @@ public class CollectGcBiasMetricsTest extends CommandLineProgramTest {
/////////////////////////////////////////////////////////////////////////////
@Test
public void runGcBiasMultiLevelTest() throws IOException {
- final File outfile = File.createTempFile("test", ".gc_bias_summary_metrics");
- final File detailsOutfile = File.createTempFile("test", ".gc_bias_detail_metrics");
+ final File outfile = File.createTempFile("test", ".gc_bias.summary_metrics");
+ final File detailsOutfile = File.createTempFile("test", ".gc_bias.detail_metrics");
outfile.deleteOnExit();
detailsOutfile.deleteOnExit();
@@ -126,31 +126,61 @@ public class CollectGcBiasMetricsTest extends CommandLineProgramTest {
Assert.assertEquals(metrics.ALIGNED_READS, 600);
Assert.assertEquals(metrics.AT_DROPOUT, 21.624498);
Assert.assertEquals(metrics.GC_DROPOUT, 3.525922);
+ Assert.assertEquals(metrics.GC_NC_0_19, 0.0);
+ Assert.assertEquals(metrics.GC_NC_20_39, 0.831374);
+ Assert.assertEquals(metrics.GC_NC_40_59, 1.049672);
+ Assert.assertEquals(metrics.GC_NC_60_79, 0.0);
+ Assert.assertEquals(metrics.GC_NC_80_100, 0.0);
} else if (metrics.READ_GROUP != null && metrics.READ_GROUP.equals("TestReadGroup1")) { //Library 1
Assert.assertEquals(metrics.TOTAL_CLUSTERS, 100);
Assert.assertEquals(metrics.ALIGNED_READS, 200);
Assert.assertEquals(metrics.AT_DROPOUT, 23.627784);
Assert.assertEquals(metrics.GC_DROPOUT, 2.582877);
+ Assert.assertEquals(metrics.GC_NC_0_19, 0.0);
+ Assert.assertEquals(metrics.GC_NC_20_39, 0.793584);
+ Assert.assertEquals(metrics.GC_NC_40_59, 1.060382);
+ Assert.assertEquals(metrics.GC_NC_60_79, 0.0);
+ Assert.assertEquals(metrics.GC_NC_80_100, 0.0);
} else if (metrics.READ_GROUP != null && metrics.READ_GROUP.equals("TestReadGroup2")) {//Library 2
Assert.assertEquals(metrics.TOTAL_CLUSTERS, 100);
Assert.assertEquals(metrics.ALIGNED_READS, 200);
Assert.assertEquals(metrics.AT_DROPOUT, 23.784958);
Assert.assertEquals(metrics.GC_DROPOUT, 4.025922);
+ Assert.assertEquals(metrics.GC_NC_0_19, 0.0);
+ Assert.assertEquals(metrics.GC_NC_20_39, 0.816258);
+ Assert.assertEquals(metrics.GC_NC_40_59, 1.053956);
+ Assert.assertEquals(metrics.GC_NC_60_79, 0.0);
+ Assert.assertEquals(metrics.GC_NC_80_100, 0.0);
} else if (metrics.READ_GROUP != null && metrics.READ_GROUP.equals("TestReadGroup3")) {//Library 3
Assert.assertEquals(metrics.TOTAL_CLUSTERS, 100);
Assert.assertEquals(metrics.ALIGNED_READS, 200);
Assert.assertEquals(metrics.AT_DROPOUT, 21.962578);
Assert.assertEquals(metrics.GC_DROPOUT, 4.559328);
+ Assert.assertEquals(metrics.GC_NC_0_19, 0.0);
+ Assert.assertEquals(metrics.GC_NC_20_39, 0.88428);
+ Assert.assertEquals(metrics.GC_NC_40_59, 1.034676);
+ Assert.assertEquals(metrics.GC_NC_60_79, 0.0);
+ Assert.assertEquals(metrics.GC_NC_80_100, 0.0);
} else if (metrics.SAMPLE != null && metrics.SAMPLE.equals("TestSample1")) {//Library 1 and 2
Assert.assertEquals(metrics.TOTAL_CLUSTERS, 200);
Assert.assertEquals(metrics.ALIGNED_READS, 400);
Assert.assertEquals(metrics.AT_DROPOUT, 23.194597);
Assert.assertEquals(metrics.GC_DROPOUT, 3.275922);
+ Assert.assertEquals(metrics.GC_NC_0_19, 0.0);
+ Assert.assertEquals(metrics.GC_NC_20_39, 0.804921);
+ Assert.assertEquals(metrics.GC_NC_40_59, 1.057169);
+ Assert.assertEquals(metrics.GC_NC_60_79, 0.0);
+ Assert.assertEquals(metrics.GC_NC_80_100, 0.0);
} else if (metrics.SAMPLE != null && metrics.SAMPLE.equals("TestSample2")) {//Library 3
Assert.assertEquals(metrics.TOTAL_CLUSTERS, 100);
Assert.assertEquals(metrics.ALIGNED_READS, 200);
Assert.assertEquals(metrics.AT_DROPOUT, 21.962578);
Assert.assertEquals(metrics.GC_DROPOUT, 4.559328);
+ Assert.assertEquals(metrics.GC_NC_0_19, 0.0);
+ Assert.assertEquals(metrics.GC_NC_20_39, 0.88428);
+ Assert.assertEquals(metrics.GC_NC_40_59, 1.034676);
+ Assert.assertEquals(metrics.GC_NC_60_79, 0.0);
+ Assert.assertEquals(metrics.GC_NC_80_100, 0.0);
} else {
Assert.fail("Unexpected metric: " + metrics);
}
diff --git a/src/tests/java/picard/analysis/CollectMultipleMetricsTest.java b/src/tests/java/picard/analysis/CollectMultipleMetricsTest.java
index 9b42ea0..517e41e 100644
--- a/src/tests/java/picard/analysis/CollectMultipleMetricsTest.java
+++ b/src/tests/java/picard/analysis/CollectMultipleMetricsTest.java
@@ -42,7 +42,7 @@ public class CollectMultipleMetricsTest extends CommandLineProgramTest {
public void testAlignmentSummaryViaMultipleMetrics() throws IOException {
final File input = new File(TEST_DATA_DIR, "summary_alignment_stats_test.sam");
final File reference = new File(TEST_DATA_DIR, "summary_alignment_stats_test.fasta");
- final File outfile = File.createTempFile("alignmentMetrics", ".txt");
+ final File outfile = File.createTempFile("alignmentMetrics", "");
outfile.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + input.getAbsolutePath(),
@@ -56,7 +56,7 @@ public class CollectMultipleMetricsTest extends CommandLineProgramTest {
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<AlignmentSummaryMetrics, Comparable<?>> output = new MetricsFile<AlignmentSummaryMetrics, Comparable<?>>();
- output.read(new FileReader(outfile));
+ output.read(new FileReader(outfile + ".alignment_summary_metrics"));
for (final AlignmentSummaryMetrics metrics : output.getMetrics()) {
Assert.assertEquals(metrics.MEAN_READ_LENGTH, 101.0);
@@ -104,7 +104,7 @@ public class CollectMultipleMetricsTest extends CommandLineProgramTest {
@Test
public void testInsertSize() throws IOException {
final File input = new File(TEST_DATA_DIR, "insert_size_metrics_test.sam");
- final File outfile = File.createTempFile("test", ".insert_size_metrics");
+ final File outfile = File.createTempFile("test", "");
final File reference = new File(TEST_DATA_DIR, "summary_alignment_stats_test.fasta");
final File pdf = File.createTempFile("test", ".pdf");
outfile.deleteOnExit();
@@ -121,7 +121,7 @@ public class CollectMultipleMetricsTest extends CommandLineProgramTest {
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<InsertSizeMetrics, Comparable<?>> output = new MetricsFile<InsertSizeMetrics, Comparable<?>>();
- output.read(new FileReader(outfile));
+ output.read(new FileReader(outfile + ".insert_size_metrics"));
for (final InsertSizeMetrics metrics : output.getMetrics()) {
Assert.assertEquals(metrics.PAIR_ORIENTATION.name(), "FR");
@@ -261,14 +261,14 @@ public class CollectMultipleMetricsTest extends CommandLineProgramTest {
}
}
}
-
+
@Test //test all gcBias collection levels
public void testGcBiasMetrics() throws IOException{
runGcTest(tempSamFile);
}
public void runGcTest(final File input) throws IOException {
- final File outfile = File.createTempFile("test", ".gc_bias_summary_metrics");
+ final File outfile = File.createTempFile("test", "");
final String referenceFile = "testdata/picard/quality/chrM.reference.fasta";
outfile.deleteOnExit();
final String[] args = new String[]{
@@ -284,7 +284,7 @@ public class CollectMultipleMetricsTest extends CommandLineProgramTest {
Assert.assertEquals(runPicardCommandLine(args), 0);
final MetricsFile<GcBiasSummaryMetrics, Comparable<?>> output = new MetricsFile<GcBiasSummaryMetrics, Comparable<?>>();
- output.read(new FileReader(outfile));
+ output.read(new FileReader(outfile + ".gc_bias.summary_metrics"));
for (final GcBiasSummaryMetrics metrics : output.getMetrics()) {
if (metrics.ACCUMULATION_LEVEL.equals("All Reads")) { //ALL_READS level
@@ -292,6 +292,11 @@ public class CollectMultipleMetricsTest extends CommandLineProgramTest {
Assert.assertEquals(metrics.ALIGNED_READS, 600);
Assert.assertEquals(metrics.AT_DROPOUT, 7.234062);
Assert.assertEquals(metrics.GC_DROPOUT, 4.086217);
+ Assert.assertEquals(metrics.GC_NC_0_19, 0.0);
+ Assert.assertEquals(metrics.GC_NC_20_39, 1.06826);
+ Assert.assertEquals(metrics.GC_NC_40_59, 0.987036);
+ Assert.assertEquals(metrics.GC_NC_60_79, 0.0);
+ Assert.assertEquals(metrics.GC_NC_80_100, 0.0);
} else {
Assert.fail("Unexpected metric: " + metrics);
}
diff --git a/src/tests/java/picard/analysis/CollectQualityYieldMetricsTest.java b/src/tests/java/picard/analysis/CollectQualityYieldMetricsTest.java
new file mode 100644
index 0000000..cccfc58
--- /dev/null
+++ b/src/tests/java/picard/analysis/CollectQualityYieldMetricsTest.java
@@ -0,0 +1,75 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.metrics.MetricsFile;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import picard.cmdline.CommandLineProgramTest;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+/**
+ * Created by kbergin on 11/23/15.
+ */
+public class CollectQualityYieldMetricsTest extends CommandLineProgramTest {
+ private static final File TEST_DATA_DIR = new File("testdata/picard/sam/");
+
+ public String getCommandLineProgramName() {
+ return CollectQualityYieldMetrics.class.getSimpleName();
+ }
+
+ @Test
+ public void test() throws IOException {
+ final File input = new File(TEST_DATA_DIR, "insert_size_metrics_test.sam");
+ final File outfile = File.createTempFile("test", ".quality_yield_metrics");
+ outfile.deleteOnExit();
+ final String[] args = new String[] {
+ "INPUT=" + input.getAbsolutePath(),
+ "OUTPUT=" + outfile.getAbsolutePath(),
+ };
+
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+
+ final MetricsFile<CollectQualityYieldMetrics.QualityYieldMetrics, Comparable<?>> output = new MetricsFile<CollectQualityYieldMetrics.QualityYieldMetrics, Comparable<?>>();
+ output.read(new FileReader(outfile));
+
+ for (final CollectQualityYieldMetrics.QualityYieldMetrics metrics : output.getMetrics()) {
+ Assert.assertEquals(metrics.TOTAL_READS, 52);
+ Assert.assertEquals(metrics.PF_READS, 52);
+ Assert.assertEquals(metrics.READ_LENGTH, 101);
+ Assert.assertEquals(metrics.TOTAL_BASES, 5252);
+ Assert.assertEquals(metrics.PF_BASES, 5252);
+ Assert.assertEquals(metrics.Q20_BASES, 3532);
+ Assert.assertEquals(metrics.PF_Q20_BASES, 3532);
+ Assert.assertEquals(metrics.Q30_BASES, 3145);
+ Assert.assertEquals(metrics.PF_Q30_BASES, 3145);
+ Assert.assertEquals(metrics.Q20_EQUIVALENT_YIELD, 6497);
+ Assert.assertEquals(metrics.PF_Q20_EQUIVALENT_YIELD, 6497);
+ }
+ }
+}
diff --git a/src/tests/java/picard/analysis/CollectWgsMetricsFromQuerySortedTest.java b/src/tests/java/picard/analysis/CollectWgsMetricsFromQuerySortedTest.java
index 584d6c5..d4ae1e4 100644
--- a/src/tests/java/picard/analysis/CollectWgsMetricsFromQuerySortedTest.java
+++ b/src/tests/java/picard/analysis/CollectWgsMetricsFromQuerySortedTest.java
@@ -6,6 +6,9 @@ import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
import java.io.*;
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.List;
/**
* Tests for methods in CollectWgsMetricsFromQuerySorted
@@ -36,17 +39,45 @@ public class CollectWgsMetricsFromQuerySortedTest extends CommandLineProgramTest
final MetricsFile<CollectWgsMetricsFromQuerySorted.QuerySortedSeqMetrics, Comparable<?>> output = new MetricsFile<CollectWgsMetricsFromQuerySorted.QuerySortedSeqMetrics, Comparable<?>>();
output.read(new FileReader(outfile));
+ validateMetrics(output.getMetrics(), 3095693981L);
+ }
+
+ @Test
+ public void testPassingInGenomeTerritory() throws IOException {
+ final File input = new File(TEST_DATA_DIR, "namesorted.test.sam");
+ final File outfile = File.createTempFile("metrics", ".txt");
+ outfile.deleteOnExit();
+ final String[] args = new String[] {
+ "INPUT=" + input.getAbsolutePath(),
+ "OUTPUT=" + outfile.getAbsolutePath(),
+ "GENOME_TERRITORY=1000"
+ };
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+
+ final MetricsFile<CollectWgsMetricsFromQuerySorted.QuerySortedSeqMetrics, Comparable<?>> output = new MetricsFile<CollectWgsMetricsFromQuerySorted.QuerySortedSeqMetrics, Comparable<?>>();
+ output.read(new FileReader(outfile));
+ validateMetrics(output.getMetrics(), 1000L);
+ }
+
+ private void validateMetrics(final List<CollectWgsMetricsFromQuerySorted.QuerySortedSeqMetrics> metrics, final long genomeSize) {
+ for (final CollectWgsMetricsFromQuerySorted.QuerySortedSeqMetrics row : metrics) {
+ final boolean isRaw = row.TYPE == CollectWgsMetricsFromQuerySorted.FILTERING_STRINGENCY.RAW;
+
+ Assert.assertEquals(row.GENOME_TERRITORY, genomeSize);
+ Assert.assertEquals(row.PF_BASES, 606);
+ Assert.assertEquals(row.PF_PASSING_BASES, isRaw ? 238 : 200);
+ Assert.assertEquals(row.PCT_EXC_OVERLAP, isRaw ? 0.085809 : 0.013201); // raw: 52/606, usable: 8/606
+ Assert.assertEquals(row.PCT_EXC_BASEQ, isRaw ? 0.188119 : 0.156766); // raw: 114/606, usable 95/606
+ Assert.assertEquals(row.PCT_EXC_MAPQ, isRaw ? 0.0 : 0.166667); // raw: 0/606, usable:101/606
+ Assert.assertEquals(row.PCT_EXC_DUPE, 0.333333); // both: 202/606
+ Assert.assertEquals(row.PF_READ_PAIRS, 3);
+ Assert.assertEquals(row.PF_DUPE_PAIRS, 1);
+ Assert.assertEquals(row.PF_READS_ALIGNED, 6);
+ Assert.assertEquals(row.PF_ORIENTED_PAIRS, 2);
+ Assert.assertEquals(row.MEAN_INSERT_SIZE, 118.0);
- for (final CollectWgsMetricsFromQuerySorted.QuerySortedSeqMetrics metrics : output.getMetrics()) {
- Assert.assertEquals(metrics.TOTAL_BASES, 606);
- Assert.assertEquals(metrics.TOTAL_USABLE_BASES, 238);
- Assert.assertEquals(metrics.PCT_EXC_OVERLAP, 0.085809); // 52 of 606 bases
- Assert.assertEquals(metrics.PCT_EXC_BASEQ, 0.188119); // 114 of 606 bases
- Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.333333); // 202 of 606 bases
- Assert.assertEquals(metrics.TOTAL_READ_PAIRS, 3);
- Assert.assertEquals(metrics.TOTAL_DUPE_PAIRS, 1);
- Assert.assertEquals(metrics.TOTAL_ORIENTED_PAIRS, 2);
- Assert.assertEquals(metrics.MEAN_INSERT_SIZE, 118.0);
+ final BigDecimal meanCov = new BigDecimal((double)row.PF_PASSING_BASES / genomeSize).setScale(6, RoundingMode.HALF_UP);
+ Assert.assertEquals(Double.compare(row.MEAN_COVERAGE, meanCov.doubleValue()), 0);
}
}
}
\ No newline at end of file
diff --git a/src/tests/java/picard/analysis/CollectWgsMetricsFromSampledSitesTest.java b/src/tests/java/picard/analysis/CollectWgsMetricsFromSampledSitesTest.java
index 4b956b4..2912bde 100755
--- a/src/tests/java/picard/analysis/CollectWgsMetricsFromSampledSitesTest.java
+++ b/src/tests/java/picard/analysis/CollectWgsMetricsFromSampledSitesTest.java
@@ -48,12 +48,14 @@ public class CollectWgsMetricsFromSampledSitesTest extends CommandLineProgramTes
final File outfile = File.createTempFile("test", ".wgs_metrics");
final File ref = new File(TEST_DATA_DIR, "merger.fasta");
final File intervals = new File(TEST_DATA_DIR, "onePos.interval_list");
+ final int sampleSize = 1000;
outfile.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + input.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + ref.getAbsolutePath(),
- "INTERVALS=" + intervals.getAbsolutePath()
+ "INTERVALS=" + intervals.getAbsolutePath(),
+ "SAMPLE_SIZE=" + sampleSize
};
Assert.assertEquals(runPicardCommandLine(args), 0);
@@ -67,6 +69,7 @@ public class CollectWgsMetricsFromSampledSitesTest extends CommandLineProgramTes
Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.181818); // 2 of 11
Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.090909); // 1 of 9
Assert.assertEquals(metrics.PCT_EXC_BASEQ, 0.090909); // 1 of 9
+ Assert.assertEquals(metrics.HET_SNP_SENSITIVITY, 0.34655, .02);
}
}
@@ -76,12 +79,14 @@ public class CollectWgsMetricsFromSampledSitesTest extends CommandLineProgramTes
final File outfile = File.createTempFile("test", ".wgs_metrics");
final File ref = new File(TEST_DATA_DIR, "merger.fasta");
final File intervals = new File(TEST_DATA_DIR, "contiguous.interval_list");
+ final int sampleSize = 1000;
outfile.deleteOnExit();
final String[] args = new String[] {
"INPUT=" + input.getAbsolutePath(),
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + ref.getAbsolutePath(),
- "INTERVALS=" + intervals.getAbsolutePath()
+ "INTERVALS=" + intervals.getAbsolutePath(),
+ "SAMPLE_SIZE=" + sampleSize
};
Assert.assertEquals(runPicardCommandLine(args), 0);
@@ -93,6 +98,7 @@ public class CollectWgsMetricsFromSampledSitesTest extends CommandLineProgramTes
Assert.assertEquals(metrics.MEAN_COVERAGE, 2.6);
Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.0);
Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.066667);
+ Assert.assertEquals(metrics.HET_SNP_SENSITIVITY, 0.393802, .02);
}
}
}
diff --git a/src/tests/java/picard/analysis/CollectWgsMetricsTest.java b/src/tests/java/picard/analysis/CollectWgsMetricsTest.java
new file mode 100644
index 0000000..ccf444e
--- /dev/null
+++ b/src/tests/java/picard/analysis/CollectWgsMetricsTest.java
@@ -0,0 +1,174 @@
+package picard.analysis;
+
+import htsjdk.samtools.SAMException;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMFileWriter;
+import htsjdk.samtools.SAMFileWriterFactory;
+import htsjdk.samtools.SAMReadGroupRecord;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordSetBuilder;
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
+import org.testng.Assert;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import picard.cmdline.CommandLineProgramTest;
+import picard.sam.SortSam;
+
+import java.io.*;
+import java.util.Random;
+
+/**
+ * Tests for methods in CollectWgsMetrics
+ *
+ *
+ * @author Kylee Bergin
+ */
+
+public class CollectWgsMetricsTest extends CommandLineProgramTest {
+
+ private final static File REF_DICT_DIR = new File("testdata/picard/sam/CollectGcBiasMetrics/");
+ private final static File TEST_DIR = new File("testdata/picard/sam/");
+ private final File referenceDict = new File(REF_DICT_DIR, "MSmallHeader.dict");
+ private File tempSamFile;
+ private File outfile;
+
+ private final static int LENGTH = 99;
+ private final static String SAMPLE = "TestSample1";
+ private final static String READ_GROUP_ID = "TestReadGroup1";
+ private final static String PLATFORM = "ILLUMINA";
+ private final static String LIBRARY = "TestLibrary1";
+ private final static int NUM_READS = 40000;
+
+ public String getCommandLineProgramName() {
+ return CollectWgsMetrics.class.getSimpleName();
+ }
+
+ @DataProvider(name = "wgsDataProvider")
+ public Object[][] wgsDataProvider() {
+ final String referenceFile = "testdata/picard/quality/chrM.reference.fasta";
+
+ return new Object[][] {
+ {tempSamFile, outfile, referenceFile}
+ };
+ }
+
+ @Test(dataProvider = "wgsDataProvider")
+ public void testMetricsFromWGS(final File input, final File outfile, final String referenceFile) throws IOException {
+ outfile.deleteOnExit();
+ final int sampleSize = 1000;
+
+ final String[] args = new String[] {
+ "INPUT=" + input.getAbsolutePath(),
+ "OUTPUT=" + outfile.getAbsolutePath(),
+ "REFERENCE_SEQUENCE=" + referenceFile,
+ "SAMPLE_SIZE=" + sampleSize
+ };
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+
+ final MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>> output = new MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>>();
+ output.read(new FileReader(outfile));
+
+ for (final CollectWgsMetrics.WgsMetrics metrics : output.getMetrics()) {
+ Assert.assertEquals(metrics.MEAN_COVERAGE, 13.985155, .02);
+ Assert.assertEquals(metrics.PCT_EXC_OVERLAP, 0.0); // 52 of 606 bases
+ Assert.assertEquals(metrics.PCT_EXC_BASEQ, 0.399906, .02); // 114 of 606 bases
+ Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.0); // 202 of 606 bases
+ Assert.assertEquals(metrics.SD_COVERAGE, 57.364434, .02);
+ Assert.assertEquals(metrics.MEDIAN_COVERAGE, 0.0);
+ Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.0);
+ Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.0);
+ Assert.assertEquals(metrics.PCT_EXC_CAPPED, 0.519542, .001);
+ Assert.assertEquals(metrics.PCT_EXC_TOTAL, 0.919537, .001);
+ Assert.assertEquals(metrics.PCT_1X, 0.056364, .0001);
+ Assert.assertEquals(metrics.PCT_5X, 0.056364, .0001);
+ Assert.assertEquals(metrics.PCT_10X, 0.056364, .0001);
+ Assert.assertEquals(metrics.PCT_15X, 0.056364, .0001);
+ Assert.assertEquals(metrics.PCT_20X, 0.056364, .0001);
+ Assert.assertEquals(metrics.PCT_25X, 0.056303, .0001);
+ Assert.assertEquals(metrics.PCT_30X, 0.056303, .0001);
+ Assert.assertEquals(metrics.PCT_40X, 0.056243, .0001);
+ Assert.assertEquals(metrics.PCT_50X, 0.056243, .0001);
+ Assert.assertEquals(metrics.PCT_60X, 0.056182, .0001);
+ Assert.assertEquals(metrics.PCT_70X, 0.056182, .0001);
+ Assert.assertEquals(metrics.PCT_80X, 0.056122, .0001);
+ Assert.assertEquals(metrics.PCT_90X, 0.056062, .0001);
+ Assert.assertEquals(metrics.PCT_100X, 0.056062, .0001);
+ Assert.assertEquals(metrics.HET_SNP_SENSITIVITY, 0.056362, .02);
+ Assert.assertEquals(metrics.HET_SNP_Q, 0.0);
+
+ }
+ }
+
+ //create a samfile for testing.
+ @BeforeTest
+ void setupBuilder() throws IOException {
+ final String readName = "TESTBARCODE";
+
+ //Create Sam Files
+ tempSamFile = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
+ final File tempSamFileUnsorted = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
+ tempSamFileUnsorted.deleteOnExit();
+ tempSamFile.deleteOnExit();
+ final SAMFileHeader header = new SAMFileHeader();
+
+ //Check that dictionary file is readable and then set header dictionary
+ try {
+ header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(referenceDict));
+ header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
+ } catch (final SAMException e) {
+ e.printStackTrace();
+ }
+
+ //Set readGroupRecord
+ final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(READ_GROUP_ID);
+ readGroupRecord.setSample(SAMPLE);
+ readGroupRecord.setPlatform(PLATFORM);
+ readGroupRecord.setLibrary(LIBRARY);
+ readGroupRecord.setPlatformUnit(READ_GROUP_ID);
+ header.addReadGroup(readGroupRecord);
+
+ //Add to setBuilder
+ final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate);
+ setBuilder.setReadGroup(readGroupRecord);
+ setBuilder.setUseNmFlag(true);
+ setBuilder.setHeader(header);
+
+ //Read settings
+ final String separator = ":";
+ final int ID = 1;
+ final int maxReadStart = 800;
+ final int minReadStart = 1;
+ final Random rg = new Random(5);
+
+ for (int i = 0; i < NUM_READS; i++) {
+ final int start = rg.nextInt(maxReadStart) + minReadStart;
+ final String newReadName = readName + separator + ID + separator + i;
+ setBuilder.addPair(newReadName, 0, start + ID, start + ID + LENGTH);
+ }
+
+ //Write SAM file
+ final SAMFileWriter writer = new SAMFileWriterFactory()
+ .setCreateIndex(true).makeBAMWriter(header, false, tempSamFileUnsorted);
+
+ for (final SAMRecord record : setBuilder) {
+ writer.addAlignment(record);
+ }
+ writer.close();
+
+ //sort the temp file
+ final SortSam sorter = new SortSam();
+ final String[] args = new String[]{
+ "INPUT=" + tempSamFileUnsorted.getAbsolutePath(),
+ "OUTPUT=" + tempSamFile.getAbsolutePath(),
+ "SORT_ORDER=coordinate"
+ };
+
+ sorter.instanceMain(args);
+
+ //create output files for tests
+ outfile = File.createTempFile("testWgsMetrics", ".txt");
+ outfile.deleteOnExit();
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/picard/analysis/TheoreticalSensitivityTest.java b/src/tests/java/picard/analysis/TheoreticalSensitivityTest.java
new file mode 100644
index 0000000..5f28768
--- /dev/null
+++ b/src/tests/java/picard/analysis/TheoreticalSensitivityTest.java
@@ -0,0 +1,231 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.Histogram;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Arrays;
+import java.io.FileReader;
+import java.io.File;
+import java.util.Scanner;
+
+/**
+ * Created by davidben on 5/18/15.
+ */
+public class TheoreticalSensitivityTest {
+
+ private final static File TEST_DIR = new File("testdata/picard/analysis/TheoreticalSensitivity/");
+ private final static File DEPTH = new File(TEST_DIR, "Solexa332667_DepthDist.histo");
+ private final static File BASEQ = new File(TEST_DIR, "Solexa332667_BaseQ.histo");
+
+ @Test
+ public void testRouletteWheel() throws Exception {
+
+ //test that a deterministic roulette wheel only gives one value
+ final double[] deterministicWeights = {0.0, 1.0, 0.0};
+ final TheoreticalSensitivity.RouletteWheel deterministicWheel = new TheoreticalSensitivity.RouletteWheel(deterministicWeights);
+ for (int n = 0; n < 10; n++) Assert.assertEquals(deterministicWheel.draw(), 1);
+
+ //test the sums of this deterministic wheel: a sum of n 1's equals n
+ final List<ArrayList<Integer>> deterministicSums = deterministicWheel.sampleCumulativeSums(10, 1);
+ for (int n = 0; n < 10; n++) Assert.assertEquals(deterministicSums.get(n).get(0), (Integer) n);
+ }
+
+ @Test
+ public void testProportionsAboveThresholds() throws Exception {
+ final List<ArrayList<Integer>> sums = new ArrayList<ArrayList<Integer>>();
+ sums.add(new ArrayList<Integer>(Arrays.asList(0,0,0)));
+ sums.add(new ArrayList<Integer>(Arrays.asList(10, 10)));
+ sums.add(new ArrayList<Integer>(Arrays.asList(5, 11, -2, 4)));
+ final List<Double> thresholds = Arrays.asList(-1.0, 1.0, 6.0);
+ Assert.assertEquals(sums.size(), 3);
+ Assert.assertEquals(thresholds.size(), 3);
+
+ final List<ArrayList<Double>> proportions = TheoreticalSensitivity.proportionsAboveThresholds(sums, thresholds);
+ Assert.assertEquals(proportions.size(), 3);
+
+ Assert.assertEquals(proportions.get(0).get(0), (double) 3/3);
+ Assert.assertEquals(proportions.get(0).get(1), (double) 0/3);
+ Assert.assertEquals(proportions.get(0).get(2), (double) 0/3);
+ Assert.assertEquals(proportions.get(1).get(0), (double) 2/2);
+ Assert.assertEquals(proportions.get(1).get(1), (double) 2/2);
+ Assert.assertEquals(proportions.get(1).get(2), (double) 2/2);
+ Assert.assertEquals(proportions.get(2).get(0), (double) 3/4);
+ Assert.assertEquals(proportions.get(2).get(1), (double) 3/4);
+ Assert.assertEquals(proportions.get(2).get(2), (double) 1/4);
+ }
+
+ @Test
+ public void testHetAltDepthDistribution() throws Exception {
+ final int N = 6;
+ final double p = 0.5;
+ final List<ArrayList<Double>> distribution = TheoreticalSensitivity.hetAltDepthDistribution(N);
+
+ for (int n = 0; n < N-1; n++) {
+ for (int m = 0; m <= n; m++) {
+ //TODO: java has no built-in binomial coefficient when this is in hellbender, use apache commons
+ int binomialCoefficient = 1;
+ for (int i = n; i > (n - m); i--) binomialCoefficient *= i;
+ for (int i = m; i > 0; i--) binomialCoefficient /= i;
+
+ Assert.assertEquals(distribution.get(n).get(m), binomialCoefficient*Math.pow(p,n));
+ }
+ }
+ }
+
+ //test that large-sample sums from the RouletteWheel converge to a normal distribution
+ //using the empirical CDF as measured by proportionsAboveThresholds
+ @Test
+ public void testCentralLimitTheorem() throws Exception {
+ //use a RouletteWheel that gives 0, 1, 2 with equal probability
+ final double[] weights = {1.0, 1.0, 1.0};
+ final TheoreticalSensitivity.RouletteWheel wheel = new TheoreticalSensitivity.RouletteWheel(weights);
+
+ final int sampleSize = 1000;
+ final int numSummands = 100;
+
+ //the mean and standard deviation of a single roulette draw and of many draws
+ final double muSingleDraw = 1.0;
+ final double sigmaSingleDraw = Math.sqrt(2.0 / 3.0);
+ final double mu = numSummands * muSingleDraw;
+ final double sigma = Math.sqrt(numSummands) * sigmaSingleDraw;
+
+ //test the sums of this deterministic wheel: a sum of n 1's equals n
+ final List<ArrayList<Integer>> sums = wheel.sampleCumulativeSums(numSummands, sampleSize);
+ //we only want the last set of sums, those with numSummands summands
+ sums.subList(0, sums.size() - 1).clear();
+
+ Assert.assertEquals(sums.size(), 1);
+
+ //test whether the number of elements within one standard deviation agrees with the normal distribution
+ final List<Double> thresholds = Arrays.asList(mu - sigma, mu + sigma);
+
+ //sums is 1 x sampleSize, thresholds is a 2-vector, so proportions is 1 x 2
+ final List<ArrayList<Double>> proportions = TheoreticalSensitivity.proportionsAboveThresholds(sums, thresholds);
+ final double empiricalProportionWithinOneSigma = proportions.get(0).get(0) - proportions.get(0).get(1);
+
+ //the proportion within one sigma for the normal distribution
+ //hence whether any element falls within one sigma is a Bernoulli variable
+ final double theoreticalProportionWithinOneSigma = 0.682689492;
+ final double samplingStandardDeviationOfProportion = Math.sqrt(theoreticalProportionWithinOneSigma*(1-theoreticalProportionWithinOneSigma) / sampleSize);
+
+ Assert.assertEquals(empiricalProportionWithinOneSigma, theoreticalProportionWithinOneSigma, 5*samplingStandardDeviationOfProportion);
+ }
+
+ //Put it all together for deterministic quality and depths
+ @Test
+ public void testDeterministicQualityAndDepth() throws Exception {
+ final double logOddsThreshold = 0.0;
+ final double tolerance = 0.001;
+ final int sampleSize = 1; //quality is deterministic, hence no sampling error
+ for (int q = 5; q < 10; q++) {
+ for (int n = 5; n < 10; n++) {
+ final double minAltCount = 10*n*Math.log10(2)/q; //alts required to call when log odds ratio threshold = 1
+ double expectedResult = 0.0;
+
+ final List<ArrayList<Double>> altCountProbabilities = TheoreticalSensitivity.hetAltDepthDistribution(n+1);
+ for (int altCount = n; altCount > minAltCount; altCount--) {
+ expectedResult += altCountProbabilities.get(n).get(altCount);
+ }
+
+ //deterministic weights that always yield q are 0.0 for 0 through q - 1 and 1.0 for q
+ final double[] qualityDistribution = new double[q+1];
+ Arrays.fill(qualityDistribution, 0L);
+ qualityDistribution[qualityDistribution.length-1]=1L;
+ final double[] depthDistribution = new double[n+1];
+ Arrays.fill(depthDistribution, 0L);
+ depthDistribution[depthDistribution.length-1]=1L;
+
+ final double result = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, logOddsThreshold);
+ Assert.assertEquals(result, expectedResult, tolerance);
+ }
+ }
+ }
+
+ @Test
+ public void testHetSensDistributions() throws Exception {
+ //Expect theoretical sens to be close to .9617 for Solexa-332667
+ final double tolerance = 0.02;
+ final double expectedResult = .9617;
+ final int maxDepth = 500;
+ final double [] depthDistribution = new double[maxDepth+1];
+ final double [] qualityDistribution = new double[50];
+
+ final Scanner scanDepth = new Scanner(DEPTH);
+ for (int i = 0; scanDepth.hasNextDouble(); i++) {
+ depthDistribution[i] = scanDepth.nextDouble();
+ }
+ final Scanner scanBaseQ = new Scanner(BASEQ);
+ for (int j = 0; scanBaseQ.hasNextDouble(); j++) {
+ qualityDistribution[j] = scanBaseQ.nextDouble();
+ }
+
+ final int sampleSize = 1000;
+ final double logOddsThreshold = 3.0;
+ final double result = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, logOddsThreshold);
+ Assert.assertEquals(result, expectedResult, tolerance);
+ }
+
+ @DataProvider(name = "hetSensDataProvider")
+ public Object[][] hetSensDataProvider() {
+ final File wgsMetricsFile = new File(TEST_DIR, "test_Solexa-332667.wgs_metrics");
+ final File hsMetricsFile = new File(TEST_DIR, "test_NexPond-359781.hsMetrics");
+ final File targetedMetricsFile = new File(TEST_DIR, "test_25103070136.targeted_pcr_metrics");
+ final File wgsSampledMetricsFile = new File(TEST_DIR, "test_Solexa-316269_sampled.wgs_metrics");
+
+ return new Object[][] {
+ {.9130, wgsMetricsFile},
+ {.9784, hsMetricsFile},
+ {.9562, targetedMetricsFile},
+ {.9892, wgsSampledMetricsFile}
+ };
+ }
+
+ @Test(dataProvider = "hetSensDataProvider")
+ public void testHetSensTargeted(final double expected, final File metricsFile) throws Exception{
+ final double tolerance = 0.02;
+
+ final MetricsFile Metrics = new MetricsFile();
+ Metrics.read(new FileReader(metricsFile));
+ final List<Histogram> histograms = Metrics.getAllHistograms();
+ final Histogram depthHistogram = histograms.get(0);
+ final Histogram qualityHistogram = histograms.get(1);
+
+ final double [] depthDistribution = TheoreticalSensitivity.normalizeHistogram(depthHistogram);
+ final double [] qualityDistribution = TheoreticalSensitivity.normalizeHistogram(qualityHistogram);
+
+ final int sampleSize = 1000;
+ final double logOddsThreshold = 3.0;
+
+ final double result = TheoreticalSensitivity.hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, logOddsThreshold);
+ Assert.assertEquals(result, expected, tolerance);
+ }
+}
diff --git a/src/tests/java/picard/analysis/artifacts/CollectSequencingArtifactMetricsTest.java b/src/tests/java/picard/analysis/artifacts/CollectSequencingArtifactMetricsTest.java
index 7e10d71..d044ad0 100644
--- a/src/tests/java/picard/analysis/artifacts/CollectSequencingArtifactMetricsTest.java
+++ b/src/tests/java/picard/analysis/artifacts/CollectSequencingArtifactMetricsTest.java
@@ -68,11 +68,10 @@ public class CollectSequencingArtifactMetricsTest extends CommandLineProgramTest
}
private void assertAllFilesEqual(final File expectedBase, final File actualBase) {
- boolean equal = areMetricsEqual(expectedBase, actualBase, SequencingArtifactMetrics.PRE_ADAPTER_SUMMARY_EXT);
- equal = equal && areMetricsEqual(expectedBase, actualBase, SequencingArtifactMetrics.PRE_ADAPTER_DETAILS_EXT);
- equal = equal && areMetricsEqual(expectedBase, actualBase, SequencingArtifactMetrics.BAIT_BIAS_SUMMARY_EXT);
- equal = equal && areMetricsEqual(expectedBase, actualBase, SequencingArtifactMetrics.BAIT_BIAS_DETAILS_EXT);
- Assert.assertTrue(equal);
+ Assert.assertTrue(areMetricsEqual(expectedBase, actualBase, SequencingArtifactMetrics.PRE_ADAPTER_SUMMARY_EXT),"Pre-Adapter summary files differ.");
+ Assert.assertTrue(areMetricsEqual(expectedBase, actualBase, SequencingArtifactMetrics.PRE_ADAPTER_DETAILS_EXT),"Pre-Adapter details files differ.");
+ Assert.assertTrue(areMetricsEqual(expectedBase, actualBase, SequencingArtifactMetrics.BAIT_BIAS_SUMMARY_EXT), "Bait-Bias summary files differ.");
+ Assert.assertTrue(areMetricsEqual(expectedBase, actualBase, SequencingArtifactMetrics.BAIT_BIAS_DETAILS_EXT), "Bait-bias details files differ.");
}
private boolean areMetricsEqual(final File expectedBase, final File actualBase, final String extension) {
diff --git a/src/tests/java/picard/analysis/directed/CollectHsMetricsTest.java b/src/tests/java/picard/analysis/directed/CollectHsMetricsTest.java
new file mode 100644
index 0000000..7d84b6a
--- /dev/null
+++ b/src/tests/java/picard/analysis/directed/CollectHsMetricsTest.java
@@ -0,0 +1,83 @@
+package picard.analysis.directed;
+
+import htsjdk.samtools.metrics.MetricsFile;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import picard.cmdline.CommandLineProgramTest;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+
+public class CollectHsMetricsTest extends CommandLineProgramTest {
+ private final static File TEST_DIR = new File("testdata/picard/analysis/directed/CollectHsMetrics");
+
+ @Override
+ public String getCommandLineProgramName() {
+ return CollectHsMetrics.class.getSimpleName();
+ }
+
+ @DataProvider(name = "collectHsMetricsDataProvider")
+ public Object[][] targetedIntervalDataProvider() {
+ final String referenceFile = TEST_DIR + "/chrM.fasta";
+ final String intervals = TEST_DIR + "/chrM.interval_list";
+
+ return new Object[][] {
+ // test that all bases (read 2) with base quality 1 are filtered out
+ {TEST_DIR + "/lowbaseq.sam", referenceFile, intervals, "NONE", 1, 1, true, 2, 202, 0.5, 0.0, 0.505, 0.0, 1000},
+ // test that read 2 (with mapping quality 1) is filtered out with minimum mapping quality 2
+ {TEST_DIR + "/lowmapq.sam", referenceFile, intervals, "NONE", 2, 0, true, 2, 202, 0, 0.0, 0.505, 0.0, 1000},
+ // test that we clip overlapping bases
+ {TEST_DIR + "/overlapping.sam", referenceFile, intervals, "NONE", 0, 0, true, 2, 202, 0, 0.5, 0.505, 0.505, 1000},
+ // test that we do not clip overlapping bases
+ {TEST_DIR + "/overlapping.sam", referenceFile, intervals, "NONE", 0, 0, false, 2, 202, 0, 0.0, 0.505, 0.505, 1000}
+ };
+ }
+
+ @Test(dataProvider = "collectHsMetricsDataProvider")
+ public void runCollectTargetedMetricsTest(final String input,
+ final String referenceFile,
+ final String targetIntervals,
+ final String metricsFile,
+ final int minimumMappingQuality,
+ final int minimumBaseQuality,
+ final boolean clipOverlappingReads,
+ final int totalReads,
+ final int pfUqBasesAligned,
+ final double pctExcBaseq,
+ final double pctExcOverlap,
+ final double pctTargetBases1x,
+ final double pctTargetBases2x,
+ final int sampleSize) throws IOException {
+
+ final File outfile = File.createTempFile("CollectHsMetrics", ".hs_metrics", TEST_DIR);
+ outfile.deleteOnExit();
+
+ final String[] args = new String[] {
+ "TARGET_INTERVALS=" + targetIntervals,
+ "BAIT_INTERVALS=" + targetIntervals,
+ "INPUT=" + input,
+ "OUTPUT=" + outfile,
+ "MINIMUM_MAPPING_QUALITY=" + minimumMappingQuality,
+ "MINIMUM_BASE_QUALITY=" + minimumBaseQuality,
+ "CLIP_OVERLAPPING_READS=" + clipOverlappingReads,
+ "SAMPLE_SIZE=" + sampleSize
+ };
+
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+
+ final MetricsFile<HsMetrics, Comparable<?>> output = new MetricsFile<HsMetrics, Comparable<?>>();
+ output.read(new FileReader(outfile));
+
+ for (final HsMetrics metrics : output.getMetrics()) {
+ // overlap
+ Assert.assertEquals(metrics.TOTAL_READS, totalReads);
+ Assert.assertEquals(metrics.PF_UQ_BASES_ALIGNED, pfUqBasesAligned);
+ Assert.assertEquals(metrics.PCT_EXC_BASEQ, pctExcBaseq);
+ Assert.assertEquals(metrics.PCT_EXC_OVERLAP, pctExcOverlap);
+ Assert.assertEquals(metrics.PCT_TARGET_BASES_1X, pctTargetBases1x);
+ Assert.assertEquals(metrics.PCT_TARGET_BASES_2X, pctTargetBases2x);
+ }
+ }
+}
diff --git a/src/tests/java/picard/analysis/directed/CollectTargetedMetricsTest.java b/src/tests/java/picard/analysis/directed/CollectTargetedMetricsTest.java
index a72517c..5b99adc 100644
--- a/src/tests/java/picard/analysis/directed/CollectTargetedMetricsTest.java
+++ b/src/tests/java/picard/analysis/directed/CollectTargetedMetricsTest.java
@@ -7,14 +7,9 @@ import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordSetBuilder;
-import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.metrics.MetricsFile;
-import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
-import htsjdk.samtools.util.IOUtil;
-import htsjdk.samtools.util.Log;
import org.testng.Assert;
-import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -22,8 +17,6 @@ import picard.cmdline.CommandLineProgramTest;
import picard.sam.SortSam;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.Random;
@@ -127,14 +120,14 @@ public class CollectTargetedMetricsTest extends CommandLineProgramTest {
final String singleIntervals = "testdata/picard/quality/chrM.single.interval_list";
return new Object[][] {
- {tempSamFile, outfile, perTargetOutfile, referenceFile, singleIntervals},
- {tempSamFile, outfile, perTargetOutfile, referenceFile, emptyIntervals}
+ {tempSamFile, outfile, perTargetOutfile, referenceFile, singleIntervals, 1000},
+ {tempSamFile, outfile, perTargetOutfile, referenceFile, emptyIntervals, 1000}
};
}
@Test(dataProvider = "targetedIntervalDataProvider")
public void runCollectTargetedMetricsTest(final File input, final File outfile, final File perTargetOutfile, final String referenceFile,
- final String targetIntervals) throws IOException {
+ final String targetIntervals, final int sampleSize) throws IOException {
final String[] args = new String[] {
"TARGET_INTERVALS=" + targetIntervals,
@@ -143,7 +136,8 @@ public class CollectTargetedMetricsTest extends CommandLineProgramTest {
"REFERENCE_SEQUENCE=" + referenceFile,
"PER_TARGET_COVERAGE=" + perTargetOutfile.getAbsolutePath(),
"LEVEL=ALL_READS",
- "AMPLICON_INTERVALS=" + targetIntervals
+ "AMPLICON_INTERVALS=" + targetIntervals,
+ "SAMPLE_SIZE=" + sampleSize
};
Assert.assertEquals(runPicardCommandLine(args), 0);
@@ -153,6 +147,7 @@ public class CollectTargetedMetricsTest extends CommandLineProgramTest {
for (final TargetedPcrMetrics metrics : output.getMetrics()) {
Assert.assertEquals(metrics.TOTAL_READS, numReads * 2);
+ Assert.assertEquals(metrics.HET_SNP_SENSITIVITY, .997972, .02);
}
}
}
\ No newline at end of file
diff --git a/src/tests/java/picard/fingerprint/FingerprintCheckerTest.java b/src/tests/java/picard/fingerprint/FingerprintCheckerTest.java
new file mode 100644
index 0000000..831a422
--- /dev/null
+++ b/src/tests/java/picard/fingerprint/FingerprintCheckerTest.java
@@ -0,0 +1,29 @@
+package picard.fingerprint;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.testng.Assert.*;
+
+/**
+ * Created by farjoun on 8/27/15.
+ */
+public class FingerprintCheckerTest {
+
+ @Test
+ public void testRandomSublist() throws Exception {
+
+ List<Integer> list = new ArrayList<>();
+ list.add(1);
+ list.add(2);
+ list.add(3);
+
+ Assert.assertEquals(list, FingerprintChecker.randomSublist(list, 3));
+ Assert.assertEquals(list, FingerprintChecker.randomSublist(list, 4));
+
+ Assert.assertEquals(FingerprintChecker.randomSublist(list, 2).size(), 2);
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/picard/fingerprint/HaplotypeMapTest.java b/src/tests/java/picard/fingerprint/HaplotypeMapTest.java
new file mode 100755
index 0000000..b6acd96
--- /dev/null
+++ b/src/tests/java/picard/fingerprint/HaplotypeMapTest.java
@@ -0,0 +1,102 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package picard.fingerprint;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.SAMSequenceRecord;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.Arrays;
+
+/**
+ */
+public class HaplotypeMapTest {
+
+ public static final File TEST_MAP =
+ new File("testdata/picard/fingerprint/haplotypeMap.txt");
+
+ @Test
+ public void testHaplotypeMapReader() {
+ HaplotypeMap map = new HaplotypeMap(TEST_MAP);
+ Assert.assertEquals(map.getHaplotypes().size(), 23, "Wrong number of haplotypes returned.");
+ Assert.assertEquals(map.getAllSnps().size(), 26, "Wrong number of snps returned.");
+
+ }
+
+ @Test
+ public void testHaplotypeMapWriter() throws Exception {
+ SAMFileHeader header = new SAMFileHeader();
+ header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+ SAMSequenceDictionary sd = new SAMSequenceDictionary();
+ sd.addSequence(new SAMSequenceRecord("chr1", 15000000));
+ sd.addSequence(new SAMSequenceRecord("chr2", 15000000));
+ sd.addSequence(new SAMSequenceRecord("chr3", 15000000));
+ header.setSequenceDictionary(sd);
+
+
+ HaplotypeMap newMap = new HaplotypeMap(header);
+ HaplotypeBlock t1 = new HaplotypeBlock(0.151560926);
+ t1.addSnp(new Snp("snp1", "chr1", 13969408, (byte)'T', (byte)'C',
+ 0.151560926, null));
+ t1.addSnp(new Snp("snp2", "chr1", 1234567, (byte)'A', (byte)'T', 1-0.151560926,
+ Arrays.asList("SQNM_1CHIP_FingerprintAssays")));
+ newMap.addHaplotype(t1);
+ HaplotypeBlock t2 = new HaplotypeBlock(.02d);
+ t2.addSnp(new Snp("snp3", "chr2", 1234567, (byte)'C', (byte)'G', .02, null));
+ newMap.addHaplotype(t2);
+ File temp = File.createTempFile("haplotypeMap", "txt");
+ temp.deleteOnExit();
+ newMap.writeToFile(temp);
+
+ BufferedReader reader = new BufferedReader(new FileReader(temp));
+ // Skip the header and sequence dictionary
+ for (int i = 0; i < 5; i++) {
+ reader.readLine();
+ }
+
+ String first[] = reader.readLine().split("\t");
+ Assert.assertEquals(first[0], "chr1", "Wrong chromosome on first snp: " + first[0]);
+ Assert.assertEquals(first[2], "snp2", "Wrong name on first snp: " + first[2]);
+ Assert.assertEquals(first[6].trim(), "", "anchor snp should be null on first snp: " + first[6] );
+ Assert.assertEquals(first[7], "SQNM_1CHIP_FingerprintAssays",
+ "Incorrect fingerprint panel on first snp: " + first[7] );
+
+ String second[] = reader.readLine().split("\t");
+ Assert.assertEquals(second[0], "chr1", "Wrong chromosome on second snp: " + second[0]);
+ Assert.assertEquals(second[2], "snp1", "Wrong name on second snp: " + second[2]);
+ Assert.assertEquals(second[6], "snp2", "anchor snp is incorrect on second snp: " + second[6] );
+
+ String third[] = reader.readLine().split("\t");
+ Assert.assertEquals(third[0], "chr2", "Wrong chromosome on third snp: " + third[0]);
+ Assert.assertEquals(third[2], "snp3", "Wrong name on third snp: " + third[2]);
+ Assert.assertEquals(6, third.length, "Third snp should not have anchor snp or fingerprint " + Arrays.asList(third) );
+
+ }
+
+}
\ No newline at end of file
diff --git a/src/tests/java/picard/fingerprint/HaplotypeProbabilitiesTest.java b/src/tests/java/picard/fingerprint/HaplotypeProbabilitiesTest.java
new file mode 100644
index 0000000..98ab0ac
--- /dev/null
+++ b/src/tests/java/picard/fingerprint/HaplotypeProbabilitiesTest.java
@@ -0,0 +1,188 @@
+package picard.fingerprint;
+
+import htsjdk.samtools.util.CollectionUtil;
+import htsjdk.samtools.util.QualityUtil;
+import htsjdk.variant.variantcontext.Allele;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import picard.util.MathUtil;
+
+import java.util.Collections;
+import java.util.List;
+
+import static picard.util.TestNGUtil.assertEqualDoubleArrays;
+import static java.lang.Math.log10;
+
+/**
+ * Basic tests for HaplotypeProbabilities and derived classes
+ *
+ * @author yossi farjoun
+ */
+public class HaplotypeProbabilitiesTest {
+
+ static Snp snp1, snp2;
+ static HaplotypeBlock hb1, hb2;
+
+ @BeforeTest
+ public static void initializeHaplotypeBlock() {
+ snp1 = new Snp("SNP1", "test", 1, (byte) 'A', (byte) 'T', 0.25, Collections.<String>emptyList());
+ snp2 = new Snp("SNP2", "test", 2, (byte) 'A', (byte) 'G', 0.5, Collections.<String>emptyList());
+
+ hb1 = new HaplotypeBlock(.25);
+ hb1.addSnp(snp1);
+
+ hb2 = new HaplotypeBlock(.4);
+ hb2.addSnp(snp1);
+ hb2.addSnp(snp2);
+ }
+
+ @DataProvider(name = "dataTestpEvidenceGivenPriorFromGLs")
+ public Object[][] dataTestpEvidenceGivenPriorFromGLs() {
+ return new Object[][]{
+ new Object[]{
+ new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb1),
+ Collections.singletonList(snp1),
+ Collections.singletonList(false),
+ Collections.singletonList(new double[]{0, -1, -2})},
+
+ new Object[]{
+ new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb1),
+ Collections.singletonList(snp1),
+ Collections.singletonList(true),
+ Collections.singletonList(new double[]{0, -1, -2})},
+
+ new Object[]{
+ new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb2),
+ CollectionUtil.makeList(snp1, snp2),
+ CollectionUtil.makeList(false, false),
+ CollectionUtil.makeList(new double[]{0, -1, -2}, new double[]{0, -1, -2})},
+
+ new Object[]{
+ new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb2),
+ CollectionUtil.makeList(snp1, snp2),
+ CollectionUtil.makeList(false, false),
+ CollectionUtil.makeList(new double[]{0, -1, -2}, new double[]{-1, 0, -1})},
+
+ new Object[]{
+ new HaplotypeProbabilitiesFromGenotypeLikelihoods(hb2),
+ CollectionUtil.makeList(snp1, snp2),
+ CollectionUtil.makeList(false, false),
+ CollectionUtil.makeList(new double[]{0, -1, -2}, new double[]{-2, -1, 0})},
+ };
+ }
+
+ @Test(dataProvider = "dataTestpEvidenceGivenPriorFromGLs")
+ public void testpEvidenceGivenPriorFromGLs(final HaplotypeProbabilitiesFromGenotypeLikelihoods hp, final List<Snp> snps, final List<Boolean> swaps, final List<double[]> GLs) throws Exception {
+
+ for (int i = 0; i < snps.size(); ++i) {
+ final Allele a = Allele.create(swaps.get(i) ? snps.get(i).getAllele2() : snps.get(i).getAllele1());
+ final Allele b = Allele.create(swaps.get(i) ? snps.get(i).getAllele1() : snps.get(i).getAllele2());
+
+ hp.addToLogLikelihoods(snps.get(i), CollectionUtil.makeList(a, b), GLs.get(i));
+ }
+
+ final double[] logLikelihood = new double[3];
+ for (int genotype = 0; genotype < 3; genotype++) {
+ logLikelihood[genotype] = log10(hp.getHaplotype().getHaplotypeFrequency(genotype));
+ for (int i = 0; i < GLs.size(); i++) {
+ final double[] genotypeLogLikelihoods = GLs.get(i);
+ if (swaps.get(i))
+ logLikelihood[genotype] += genotypeLogLikelihoods[2 - genotype];
+ else
+ logLikelihood[genotype] += genotypeLogLikelihoods[genotype];
+ }
+
+ }
+ assertEqualDoubleArrays(hp.getPosteriorProbabilities(), MathUtil.pNormalizeLogProbability(logLikelihood), 1e-10);
+ }
+
+ @DataProvider(name = "dataTestHaplotypeProbabilitiesFromSequenceAddToProbs")
+ public Object[][] dataTestHaplotypeProbabilitiesFromSequenceAddToProbs() {
+ return new Object[][]{
+ {new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{}, 7},
+ {new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'A'}, 7},
+ {new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'G'}, 7},
+ {new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'T'}, 7},
+ {new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'A', 'T', 'A', 'A', 'A', 'A', 'A', 'A'}, 7},
+ {new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'A', 'T', 'A', 'A', 'G', 'A', 'A', 'A'}, 7},
+ {new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'T', 'T', 'A', 'A', 'A', 'T', 'T', 'A', 'T'}, 7},
+ {new HaplotypeProbabilitiesFromSequence(hb1), snp1, new byte[]{'T', 'A', 'T', 'T', 'T', 'T', 'T', 'T', 'A'}, 7}
+ };
+ }
+
+ @Test(dataProvider = "dataTestHaplotypeProbabilitiesFromSequenceAddToProbs")
+ public void testHaplotypeProbabilitiesFromSequenceAddToProbs(final HaplotypeProbabilitiesFromSequence hp, final Snp snp, final byte[] bases, final int qual) throws Exception {
+
+ for (final byte base : bases) {
+ hp.addToProbs(snp, base, (byte) qual);
+ }
+
+ final double pError = QualityUtil.getErrorProbabilityFromPhredScore(qual);
+ final double[] logLikelihood = new double[3];
+
+ for (int genotype = 0; genotype < 3; genotype++) {
+ logLikelihood[genotype] = log10(hp.getHaplotype().getHaplotypeFrequency(genotype));
+ for (final byte a : bases) {
+ final double theta = 0.5 * genotype;
+ if (a == snp.getAllele1())
+ logLikelihood[genotype] += log10((1 - theta) * (1 - pError) + theta * pError);
+ if (a == snp.getAllele2())
+ logLikelihood[genotype] += log10((1 - theta) * (pError) + theta * (1 - pError));
+ }
+ }
+ final double[] posterior = MathUtil.pNormalizeLogProbability(logLikelihood);
+ assertEqualDoubleArrays(hp.getPosteriorProbabilities(), posterior, 1e-10);
+ }
+
+ @DataProvider(name = "dataTestHaplotypeProbabilitiesFromContaminatorSequenceAddToProbs")
+ public Object[][] dataTestHaplotypeProbabilitiesFromContaminatorSequenceAddToProbs() {
+ return new Object[][]{
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 0, 0},
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 0, 1},
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 0, 76},
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 3, 76},
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 7, 76},
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 35, 76},
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 40, 76},
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 45, 76},
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 69, 76},
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 73, 76},
+ {new HaplotypeProbabilitiesFromContaminatorSequence(hb1, .1), snp1, 76, 76}
+ };
+ }
+
+ static final int[] genotypes = {0, 1, 2};
+
+ @Test(dataProvider = "dataTestHaplotypeProbabilitiesFromContaminatorSequenceAddToProbs")
+ public void testHaplotypeProbabilitiesFromContaminatorSequenceAddToProbs(final HaplotypeProbabilitiesFromContaminatorSequence hp, final Snp snp, final int nAlt, final int nTotal) throws Exception {
+
+ final byte qual = 7;
+ for (int i = 0; i < nAlt; i++) {
+ hp.addToProbs(snp, snp.getAllele2(), qual);
+ }
+ for (int i = nAlt; i < nTotal; i++) {
+ hp.addToProbs(snp, snp.getAllele1(), qual);
+ }
+
+ final double pError = QualityUtil.getErrorProbabilityFromPhredScore(qual);
+ final double[] unnormalizedLikelihood = {0d, 0d, 0d};
+
+ for (final int contG : genotypes) {
+ for (final int mainG : genotypes) {
+ final double pAlt = (hp.contamination * contG + (1 - hp.contamination) * mainG) / 2;
+ double l = hp.getHaplotype().getHaplotypeFrequency(mainG);
+ for (int i = 0; i < nAlt; i++) {
+ l *= pAlt * (1 - pError) + (1 - pAlt) * pError;
+ }
+ for (int i = nAlt; i < nTotal; i++) {
+ l *= pAlt * (pError) + (1 - pAlt) * (1 - pError);
+ }
+ unnormalizedLikelihood[contG] += l;
+ }
+ }
+ final double[] likelihood = MathUtil.pNormalizeVector(unnormalizedLikelihood);
+
+ assertEqualDoubleArrays(hp.getLikelihoods(), likelihood, 1e-10);
+ }
+}
diff --git a/src/tests/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumorTest.java b/src/tests/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumorTest.java
new file mode 100644
index 0000000..53a9880
--- /dev/null
+++ b/src/tests/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumorTest.java
@@ -0,0 +1,56 @@
+package picard.fingerprint;
+
+import picard.util.TestNGUtil;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Created by farjoun on 5/29/15.
+ */
+public class HaplotypeProbabilityOfNormalGivenTumorTest {
+
+ private double maf = 0.4;
+ private Snp snp = new Snp("test", "chr1", 1, (byte) 'A', (byte) 'C', maf, Collections.singletonList("dummy"));
+ private HaplotypeBlock hb = new HaplotypeBlock(maf);
+
+ @DataProvider(name = "testGetLikelihoodsData")
+ public Iterator<Object[]> testGetLikelihoodsData() {
+ List<Object[]> testData = new ArrayList<>();
+
+ //make sure that giving 0 pLoH doesn't change the underlying likelihoods:
+ testData.add(new Object[]{0.0, new double[]{1, 0, 0}, new double[]{1, 0, 0}});
+ testData.add(new Object[]{0.0, new double[]{0, 1, 0}, new double[]{0, 1, 0}});
+ testData.add(new Object[]{0.0, new double[]{0, 0, 1}, new double[]{0, 0, 1}});
+ testData.add(new Object[]{0.0, new double[]{0, 0.4, 0.6}, new double[]{0, 0.4, 0.6}});
+ testData.add(new Object[]{0.0, new double[]{0.3, 0.7, 0}, new double[]{0.3, 0.7, 0}});
+
+ //make sure that pLoH will not affect HOM likelihoods:
+ testData.add(new Object[]{0.1, new double[]{1, 0, 0}, new double[]{1, 0, 0}});
+ testData.add(new Object[]{0.2, new double[]{0, 0, 1}, new double[]{0, 0, 1}});
+ testData.add(new Object[]{0.3, new double[]{.3, 0, .7}, new double[]{.3, 0, .7}});
+
+ //see that non zero pLoH changes the likelihood of a HET site as expected:
+ testData.add(new Object[]{0.1, new double[]{0, 1, 0}, new double[]{.1/2, 1-0.1, .1/2}});
+ testData.add(new Object[]{0.1, new double[]{0, .5, .5}, new double[]{0.5*0.1*0.5, 0.5*(1-0.1), 0.5*1+0.5*0.1/2}});
+ testData.add(new Object[]{0.1, new double[]{0.5, 0.5, 0}, new double[]{.5+0.5*0.1*0.5, 0.5*(1-0.1), 0.5*0.1*0.5}});
+
+ return testData.iterator();
+ }
+
+ @Test(dataProvider = "testGetLikelihoodsData")
+ public void testGetLikelihoods(double pLoH, double[] underlyingLikelihood, double[] tumorLikelihood) throws Exception {
+ HaplotypeProbabilities hp = new HaplotypeProbabilitiesFromGenotype(snp, hb, underlyingLikelihood[0], underlyingLikelihood[1], underlyingLikelihood[2]);
+
+ HaplotypeProbabilities hpTumor = new HaplotypeProbabilityOfNormalGivenTumor(hp, pLoH);
+
+ TestNGUtil.assertEqualDoubleArrays(hpTumor.getLikelihoods(), tumorLikelihood, 0.0001);
+
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/picard/illumina/ExtractIlluminaBarcodesTest.java b/src/tests/java/picard/illumina/ExtractIlluminaBarcodesTest.java
index 76ef594..748043e 100644
--- a/src/tests/java/picard/illumina/ExtractIlluminaBarcodesTest.java
+++ b/src/tests/java/picard/illumina/ExtractIlluminaBarcodesTest.java
@@ -105,18 +105,35 @@ public class ExtractIlluminaBarcodesTest extends CommandLineProgramTest {
}
@Test
+ public void testSingleEndWithBarcodeAtStartAndMolecularIndicies() throws Exception {
+ final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "8B4M21T");
+ Assert.assertEquals(metricsFile.getMetrics().get(11).PERFECT_MATCHES, 1);
+ }
+
+ @Test
public void testSingleEndWithBarcodeAtEnd() throws Exception {
final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "25T8B");
Assert.assertEquals(metricsFile.getMetrics().get(0).PERFECT_MATCHES, 5);
}
@Test
+ public void testSingleEndWithBarcodeAtEndAndMolecularIndicies() throws Exception {
+ final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "4M21T8B");
+ Assert.assertEquals(metricsFile.getMetrics().get(0).PERFECT_MATCHES, 5);
+ }
+
+ @Test
public void testPairedEndWithBarcodeOnFirstEnd() throws Exception {
final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "25T8B25T");
Assert.assertEquals(metricsFile.getMetrics().get(0).PERFECT_MATCHES, 5);
}
@Test
+ public void testPairedEndWithBarcodeAndMolecularIndicies() throws Exception {
+ final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "4M21T8B21T4M");
+ Assert.assertEquals(metricsFile.getMetrics().get(0).PERFECT_MATCHES, 5);
+ }
+ @Test
public void testPairedEndWithBarcodeOnSecondEnd() throws Exception {
final MetricsFile<ExtractIlluminaBarcodes.BarcodeMetric, Integer> metricsFile = runIt(1, "25T25T8B");
Assert.assertEquals(metricsFile.getMetrics().get(12).PERFECT_MATCHES, 1);
diff --git a/src/tests/java/picard/illumina/IlluminaBasecallsToFastqTest.java b/src/tests/java/picard/illumina/IlluminaBasecallsToFastqTest.java
index c0d5e6f..b2e431b 100644
--- a/src/tests/java/picard/illumina/IlluminaBasecallsToFastqTest.java
+++ b/src/tests/java/picard/illumina/IlluminaBasecallsToFastqTest.java
@@ -28,7 +28,6 @@ import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.LineReader;
import htsjdk.samtools.util.StringUtil;
import htsjdk.samtools.util.TestUtil;
-import org.testng.annotations.AfterMethod;
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
import picard.illumina.parser.ReadStructure;
@@ -45,6 +44,9 @@ public class IlluminaBasecallsToFastqTest extends CommandLineProgramTest {
private static final File BASECALLS_DIR = new File("testdata/picard/illumina/25T8B25T/Data/Intensities/BaseCalls");
private static final File DUAL_BASECALLS_DIR = new File("testdata/picard/illumina/25T8B8B25T/Data/Intensities/BaseCalls");
private static final File TEST_DATA_DIR = new File("testdata/picard/illumina/25T8B25T/fastq");
+ private static final File TEST_DATA_DIR_WITH_4M = new File("testdata/picard/illumina/25T8B25T/fastq_with_4M");
+ private static final File TEST_DATA_DIR_WITH_4M4M = new File("testdata/picard/illumina/25T8B25T/fastq_with_4M4M");
+
private static final File DUAL_TEST_DATA_DIR = new File("testdata/picard/illumina/25T8B8B25T/fastq");
public String getCommandLineProgramName() {
@@ -67,7 +69,8 @@ public class IlluminaBasecallsToFastqTest extends CommandLineProgramTest {
"OUTPUT_PREFIX=" + outputPrefix,
"RUN_BARCODE=HiMom",
"MACHINE_NAME=machine1",
- "FLOWCELL_BARCODE=abcdeACXX"
+ "FLOWCELL_BARCODE=abcdeACXX",
+ "MAX_READS_IN_RAM_PER_TILE=100" //force spill to disk to test encode/decode
});
IOUtil.assertFilesEqual(outputFastq1, new File(TEST_DATA_DIR, "nonBarcoded.1.fastq"));
IOUtil.assertFilesEqual(outputFastq2, new File(TEST_DATA_DIR, "nonBarcoded.2.fastq"));
@@ -92,7 +95,8 @@ public class IlluminaBasecallsToFastqTest extends CommandLineProgramTest {
"OUTPUT_PREFIX=" + outputPrefix.getAbsolutePath(),
"MACHINE_NAME=machine1",
"FLOWCELL_BARCODE=abcdeACXX",
- "READ_NAME_FORMAT=" + IlluminaBasecallsToFastq.ReadNameFormat.ILLUMINA
+ "READ_NAME_FORMAT=" + IlluminaBasecallsToFastq.ReadNameFormat.ILLUMINA,
+ "MAX_READS_IN_RAM_PER_TILE=100" //force spill to disk to test encode/decode
});
final String[] filenames = new String[]{
@@ -114,6 +118,16 @@ public class IlluminaBasecallsToFastqTest extends CommandLineProgramTest {
}
@Test
+ public void testDeMultiplexedWithIndex() throws Exception {
+ runStandardTest(1, "multiplexedBarcodeWithIndex.", "mp_barcode.params", 1, "25T8B4M21T", BASECALLS_DIR, TEST_DATA_DIR_WITH_4M);
+ }
+
+ @Test
+ public void testDeMultiplexedWithtwoIndexes() throws Exception {
+ runStandardTest(1, "multiplexedBarcodeWithTwoIndexes.", "mp_barcode.params", 1, "25T8B4M4M17T", BASECALLS_DIR, TEST_DATA_DIR_WITH_4M4M);
+ }
+
+ @Test
public void testDualBarcodes() throws Exception {
runStandardTest(1, "dualBarcode.", "barcode_double.params", 2, "25T8B8B25T", DUAL_BASECALLS_DIR, DUAL_TEST_DATA_DIR);
}
@@ -170,7 +184,8 @@ public class IlluminaBasecallsToFastqTest extends CommandLineProgramTest {
"READ_STRUCTURE=" + readStructureString,
"MULTIPLEX_PARAMS=" + libraryParams,
"MACHINE_NAME=machine1",
- "FLOWCELL_BARCODE=abcdeACXX"
+ "FLOWCELL_BARCODE=abcdeACXX",
+ "MAX_READS_IN_RAM_PER_TILE=100" //force spill to disk to test encode/decode
});
final ReadStructure readStructure = new ReadStructure(readStructureString);
@@ -179,10 +194,14 @@ public class IlluminaBasecallsToFastqTest extends CommandLineProgramTest {
final String filename = outputSam.getName() + "." + i + ".fastq";
IOUtil.assertFilesEqual(new File(outputSam.getParentFile(), filename), new File(testDataDir, filename));
}
- for (int i = 1; i <= readStructure.barcodes.length(); ++i) {
+ for (int i = 1; i <= readStructure.sampleBarcodes.length(); ++i) {
final String filename = outputSam.getName() + ".barcode_" + i + ".fastq";
IOUtil.assertFilesEqual(new File(outputSam.getParentFile(), filename), new File(testDataDir, filename));
}
+ for (int i = 1; i <= readStructure.molecularBarcode.length(); ++i) {
+ final String filename = outputSam.getName() + ".index_" + i + ".fastq";
+ IOUtil.assertFilesEqual(new File(outputSam.getParentFile(), filename), new File(testDataDir, filename));
+ }
}
} finally {
TestUtil.recursiveDelete(outputDir);
diff --git a/src/tests/java/picard/illumina/IlluminaBasecallsToSamTest.java b/src/tests/java/picard/illumina/IlluminaBasecallsToSamTest.java
index 675f472..0d9fe17 100644
--- a/src/tests/java/picard/illumina/IlluminaBasecallsToSamTest.java
+++ b/src/tests/java/picard/illumina/IlluminaBasecallsToSamTest.java
@@ -50,6 +50,8 @@ public class IlluminaBasecallsToSamTest extends CommandLineProgramTest {
private static final File DUAL_BASECALLS_DIR = new File("testdata/picard/illumina/25T8B8B25T/Data/Intensities/BaseCalls");
private static final File TEST_DATA_DIR = new File("testdata/picard/illumina/25T8B25T/sams");
private static final File DUAL_TEST_DATA_DIR = new File("testdata/picard/illumina/25T8B8B25T/sams");
+ private static final File TEST_DATA_DIR_WITH_4M_INDEX = new File("testdata/picard/illumina/25T8B25T/sams_with_4M");
+ private static final File TEST_DATA_DIR_WITH_4M4M_INDEX = new File("testdata/picard/illumina/25T8B25T/sams_with_4M4M");
public String getCommandLineProgramName() {
return IlluminaBasecallsToSam.class.getSimpleName();
@@ -82,17 +84,64 @@ public class IlluminaBasecallsToSamTest extends CommandLineProgramTest {
}
@Test
+ public void testNonBarcodedWithMoleclarIndex() throws Exception {
+ final File outputBam = File.createTempFile("nonBarcodedWithMI.", ".sam");
+ outputBam.deleteOnExit();
+ final int lane = 1;
+
+ runPicardCommandLine(new String[]{
+ "BASECALLS_DIR=" + BASECALLS_DIR,
+ "LANE=" + lane,
+ "READ_STRUCTURE=25S8M25T",
+ "OUTPUT=" + outputBam,
+ "RUN_BARCODE=HiMom",
+ "SAMPLE_ALIAS=HiDad",
+ "LIBRARY_NAME=Hello, World"
+ });
+ IOUtil.assertFilesEqual(outputBam, new File(TEST_DATA_DIR, "nonBarcodedWithMolecularIndex8M.sam"));
+ }
+
+ @Test
+ public void testNonBarcodedWithDualMoleclarIndex() throws Exception {
+ final File outputBam = File.createTempFile("nonBarcodedWithDualMI.", ".sam");
+ outputBam.deleteOnExit();
+ final int lane = 1;
+
+ runPicardCommandLine(new String[]{
+ "BASECALLS_DIR=" + BASECALLS_DIR,
+ "LANE=" + lane,
+ "READ_STRUCTURE=25S4M4M25T",
+ "OUTPUT=" + outputBam,
+ "RUN_BARCODE=HiMom",
+ "SAMPLE_ALIAS=HiDad",
+ "LIBRARY_NAME=Hello, World"
+ });
+ IOUtil.assertFilesEqual(outputBam, new File(TEST_DATA_DIR, "nonBarcodedWithMolecularIndex4M4M.sam"));
+
+ }
+
+ @Test
public void testMultiplexed() throws Exception {
runStandardTest(1, "multiplexedBarcode.", "barcode.params", 1, "25T8B25T", BASECALLS_DIR, TEST_DATA_DIR);
}
+ @Test
+ public void testMultiplexedWith4MIndex() throws Exception {
+ runStandardTest(1, "multiplexedBarcode.", "barcode.params", 1, "25T8B4M21T", BASECALLS_DIR, TEST_DATA_DIR_WITH_4M_INDEX);
+ }
+
+ @Test
+ public void testMultiplexedWith4M4MIndex() throws Exception {
+ runStandardTest(1, "multiplexedBarcode2.", "barcode.params", 1, "25T8B4M4M17T", BASECALLS_DIR, TEST_DATA_DIR_WITH_4M4M_INDEX);
+ }
+
//Same as testMultiplexed except we use BARCODE_1 instead of BARCODE
@Test
public void testMultiplexedWithAlternateBarcodeName() throws Exception {
runStandardTest(1, "singleBarcodeAltName.", "multiplexed_positive_rgtags.params", 1, "25T8B25T", BASECALLS_DIR, TEST_DATA_DIR);
}
- @Test
+ @Test(enabled = false)
public void testDualBarcodes() throws Exception {
runStandardTest(1, "dualBarcode.", "barcode_double.params", 1, "25T8B8B25T", DUAL_BASECALLS_DIR, DUAL_TEST_DATA_DIR);
}
diff --git a/src/tests/java/picard/illumina/ReadStructureTest.java b/src/tests/java/picard/illumina/ReadStructureTest.java
index 2081a4a..d23c8f7 100644
--- a/src/tests/java/picard/illumina/ReadStructureTest.java
+++ b/src/tests/java/picard/illumina/ReadStructureTest.java
@@ -24,32 +24,38 @@ public class ReadStructureTest {
@DataProvider(name="validReadStructures")
public Object[][] validReadStructures() {
return new Object[][] {
- {"2T", makeList(rd(2, T)), 1, 0, 0},
- {"1234B", makeList(rd(1234, B)), 0, 1, 0},
- {Integer.MAX_VALUE + "S", makeList(rd(Integer.MAX_VALUE, S)), 0, 0, 1},
-
- {"76T76T", makeList(rd(76, T), rd(76, T)), 2, 0, 0},
- {"76T1B", makeList(rd(76, T), rd(1, B)), 1, 1, 0},
- {"76B1T", makeList(rd(76, B), rd(1, T)), 1, 1, 0},
- {"1S1B", makeList(rd(1, S), rd(1, B)), 0, 1, 1},
- {"1T999S", makeList(rd(1, T), rd(999, S)), 1, 0, 1},
-
- {"100T20T100T", makeList(rd(100, T), rd(20, T), rd(100, T)), 3, 0, 0},
- {"2S50S10S", makeList(rd(2, S), rd(50, S), rd(10, S)), 0, 0, 3},
- {"10T1B11T", makeList(rd(10, T), rd(1, B), rd(11, T)), 2, 1, 0},
- {"201T13T111B", makeList(rd(201, T), rd(13, T), rd(111, B)), 2, 1, 0},
- {"15B1T1T", makeList(rd(15, B), rd(1, T), rd(1, T)), 2, 1, 0},
- {"99B7T6B", makeList(rd(99, B), rd(7, T), rd(6, B)), 1, 2, 0},
- {"631B776S638T", makeList(rd(631, B), rd(776, S), rd(638, T)), 1, 1, 1},
-
-
- {"3T7B60S2T", makeList(rd(3, T), rd(7, B), rd(60, S), rd(2, T)), 2, 1, 1},
- {"20B9S100T1T", makeList(rd(20, B), rd(9, S), rd(100, T), rd(1, T)), 2, 1, 1},
- {"33T42B9T81B", makeList(rd(33, T), rd(42, B), rd(9, T), rd(81, B)), 2, 2, 0},
- {"28B56B13T123S", makeList(rd(28, B), rd(56, B), rd(13, T), rd(123,S)), 1, 2, 1},
- {"92S8B8B32B", makeList(rd(92, S), rd(8, B), rd(8, B), rd(32, B)), 0, 3, 1},
-
- {"2S88B7T8S9T9T84B100S2S4B3B", makeList(rd(2,S), rd(88,B), rd(7,T), rd(8,S), rd(9,T), rd(9,T), rd(84,B), rd(100,S), rd(2,S), rd(4,B), rd(3,B)), 3, 4, 4}
+ {"2T", makeList(rd(2, T)), 1, 0, 0, 0},
+ {"1234B", makeList(rd(1234, B)), 0, 1, 0, 0},
+ {Integer.MAX_VALUE + "S", makeList(rd(Integer.MAX_VALUE, S)), 0, 0, 1, 0},
+ {Integer.MAX_VALUE + "M", makeList(rd(Integer.MAX_VALUE, M)), 0, 0, 0, 1},
+
+
+ {"76T76T", makeList(rd(76, T), rd(76, T)), 2, 0, 0, 0},
+ {"76T1B", makeList(rd(76, T), rd(1, B)), 1, 1, 0, 0},
+ {"76B1T", makeList(rd(76, B), rd(1, T)), 1, 1, 0, 0},
+ {"1S1B", makeList(rd(1, S), rd(1, B)), 0, 1, 1, 0},
+ {"1S1B1M", makeList(rd(1, S), rd(1, B), rd(1, M)), 0, 1, 1, 1},
+ {"1T999S", makeList(rd(1, T), rd(999, S)), 1, 0, 1, 0},
+
+ {"100T20T100T", makeList(rd(100, T), rd(20, T), rd(100, T)), 3, 0, 0, 0},
+ {"2S50S10S", makeList(rd(2, S), rd(50, S), rd(10, S)), 0, 0, 3, 0},
+ {"10T1B11T", makeList(rd(10, T), rd(1, B), rd(11, T)), 2, 1, 0, 0},
+ {"201T13T111B", makeList(rd(201, T), rd(13, T), rd(111, B)), 2, 1, 0, 0},
+ {"15B1T1T", makeList(rd(15, B), rd(1, T), rd(1, T)), 2, 1, 0, 0},
+ {"99B7T6B", makeList(rd(99, B), rd(7, T), rd(6, B)), 1, 2, 0, 0},
+ {"631B776S638T", makeList(rd(631, B), rd(776, S), rd(638, T)), 1, 1, 1, 0},
+ {"631M776S638T", makeList(rd(631, M), rd(776, S), rd(638, T)), 1, 0, 1, 1},
+
+
+ {"3T7B60S2T", makeList(rd(3, T), rd(7, B), rd(60, S), rd(2, T)), 2, 1, 1, 0},
+ {"20B9S100T1T", makeList(rd(20, B), rd(9, S), rd(100, T), rd(1, T)), 2, 1, 1, 0},
+ {"33T42B9T81B", makeList(rd(33, T), rd(42, B), rd(9, T), rd(81, B)), 2, 2, 0, 0},
+ {"28B56B13T123S", makeList(rd(28, B), rd(56, B), rd(13, T), rd(123,S)), 1, 2, 1, 0},
+ {"92S8B8B32B", makeList(rd(92, S), rd(8, B), rd(8, B), rd(32, B)), 0, 3, 1, 0},
+ {"92S8M8M32M", makeList(rd(92, S), rd(8, M), rd(8, M), rd(32, M)), 0, 0, 1, 3},
+
+ {"2S88B7T8S9T9T84B100S2S4B3B", makeList(rd(2,S), rd(88,B), rd(7,T), rd(8,S), rd(9,T), rd(9,T), rd(84,B), rd(100,S), rd(2,S), rd(4,B), rd(3,B)), 3, 4, 4, 0},
+ {"2S88B7T8S9T9T84B3M100S2S4B3M3B", makeList(rd(2,S), rd(88,B), rd(7,T), rd(8,S), rd(9,T), rd(9,T), rd(84,B), rd(3,M), rd(100,S), rd(2,S), rd(4,B), rd(3, M), rd(3,B)), 3, 4, 4, 2}
};
}
@@ -60,10 +66,13 @@ public class ReadStructureTest {
{"0T", makeList(rd(0, T))},
{"-1T", makeList(rd(-1, T))},
{"0S" , makeList(rd(0, S))},
+ {"0M" , makeList(rd(0, M))},
{"-1B", makeList(rd(-1, B))},
+ {"-1M", makeList(rd(-1, M))},
{"8C", null},
{"B5", null},
{"SS", null},
+ {"SM", null},
{"75TS", null},
{"8*T", null},
{"-66S1B", makeList(rd(-66, S), rd(1, B))},
@@ -92,18 +101,18 @@ public class ReadStructureTest {
}
@Test(dataProvider = "validReadStructures")
- public void testValidStructuresFromString(final String rsString, final List<ReadDescriptor> descriptors, final int numTemplates, final int numBarcodes, final int numSkips) {
+ public void testValidStructuresFromString(final String rsString, final List<ReadDescriptor> descriptors, final int numTemplates, final int numBarcodes, final int numSkips, final int numMolecularIndexes) {
final ReadStructure readStructure = new ReadStructure(rsString);
- testReadStructure(readStructure, rsString, descriptors, numTemplates, numBarcodes, numSkips);
+ testReadStructure(readStructure, rsString, descriptors, numTemplates, numBarcodes, numSkips, numMolecularIndexes);
}
@Test(dataProvider = "validReadStructures")
- public void testValidStructuresFromList(final String rsString, final List<ReadDescriptor> descriptors, final int numTemplates, final int numBarcodes, final int numSkips) {
+ public void testValidStructuresFromList(final String rsString, final List<ReadDescriptor> descriptors, final int numTemplates, final int numBarcodes, final int numSkips, final int numMolecularIndexes) {
final ReadStructure readStructure = new ReadStructure(descriptors);
- testReadStructure(readStructure, rsString, descriptors, numTemplates, numBarcodes, numSkips);
+ testReadStructure(readStructure, rsString, descriptors, numTemplates, numBarcodes, numSkips, numMolecularIndexes);
}
- private void testReadStructure(final ReadStructure readStructure, final String structureString, final List<ReadDescriptor> descriptors, final int numTemplates, final int numBarcodes, final int numSkips) {
+ private void testReadStructure(final ReadStructure readStructure, final String structureString, final List<ReadDescriptor> descriptors, final int numTemplates, final int numBarcodes, final int numSkips, final int numMolecularIndexes) {
Assert.assertEquals(readStructure.toString(), structureString);
int totalCycles = 0;
@@ -111,6 +120,7 @@ public class ReadStructureTest {
int tIndex = 0;
int bIndex = 0;
int sIndex = 0;
+ int mIndex = 0;
for(int i = 0; i < descriptors.size(); i++) {
Assert.assertEquals(readStructure.descriptors.get(i), descriptors.get(i));
@@ -119,11 +129,15 @@ public class ReadStructureTest {
Assert.assertEquals(i, readStructure.templates.getIndices()[tIndex++]);
break;
case B:
- Assert.assertEquals(i, readStructure.barcodes.getIndices()[bIndex++]);
+ Assert.assertEquals(i, readStructure.sampleBarcodes.getIndices()[bIndex++]);
break;
case S:
Assert.assertEquals(i, readStructure.skips.getIndices()[sIndex++]);
break;
+ case M:
+ Assert.assertEquals(i, readStructure.molecularBarcode.getIndices()[mIndex++]);
+ break;
+
default:
Assert.fail("Unrecognized read type: " + readStructure.descriptors.get(i).type);
}
@@ -131,8 +145,9 @@ public class ReadStructureTest {
}
Assert.assertEquals(readStructure.totalCycles, totalCycles);
- Assert.assertEquals(readStructure.barcodes.length(), numBarcodes);
+ Assert.assertEquals(readStructure.sampleBarcodes.length(), numBarcodes);
Assert.assertEquals(readStructure.templates.length(), numTemplates);
+ Assert.assertEquals(readStructure.molecularBarcode.length(), numMolecularIndexes);
Assert.assertEquals(readStructure.skips.length(), numSkips);
}
@@ -150,11 +165,15 @@ public class ReadStructureTest {
@DataProvider(name="substructuresToReadStructureData")
public Object [][] substructureToReadStructureData() {
return new Object[][] {
- {new ReadStructure("10T10T").templates, "10T10T" },
- {new ReadStructure("10T8B10T").nonSkips, "10T8B10T"},
- {new ReadStructure("8S10T8B8S10T").nonSkips, "10T8B10T"},
- {new ReadStructure("10T8S8S10T").skips, "8S8S" },
- {new ReadStructure("8B").barcodes, "8B" }
+ {new ReadStructure("10T10T"). templates, "10T10T" },
+ {new ReadStructure("10T4M10T").templates, "10T10T" },
+ {new ReadStructure("10T8B10T"). nonSkips, "10T8B10T" },
+ {new ReadStructure("10T8B5M10T"). nonSkips, "10T8B5M10T"},
+ {new ReadStructure("8S10T8B8S10T").nonSkips, "10T8B10T" },
+ {new ReadStructure("10T8S8S10T"). skips, "8S8S" },
+ {new ReadStructure("10T8S8S3M10T").skips, "8S8S" },
+ {new ReadStructure("8B").sampleBarcodes, "8B" },
+ {new ReadStructure("10T8S8M10T").molecularBarcode, "8M" }
};
}
@@ -166,9 +185,14 @@ public class ReadStructureTest {
@DataProvider(name="substructureToReadStructureNegativeData")
public Object[][] substructureToReadStructureNegativeData() {
return new Object[][] {
- {new ReadStructure("10T").barcodes },
- {new ReadStructure("10S").nonSkips },
- {new ReadStructure("10S8B").templates},
+ {new ReadStructure("10T").sampleBarcodes },
+ {new ReadStructure("10M").sampleBarcodes },
+ {new ReadStructure("10T").molecularBarcode},
+ {new ReadStructure("10S").nonSkips },
+ {new ReadStructure("10M").skips },
+ {new ReadStructure("10S8B").templates },
+ {new ReadStructure("10S8B4M").templates },
+
};
}
diff --git a/src/tests/java/picard/reference/NonNFastaSizeTest.java b/src/tests/java/picard/reference/NonNFastaSizeTest.java
new file mode 100644
index 0000000..0e6e330
--- /dev/null
+++ b/src/tests/java/picard/reference/NonNFastaSizeTest.java
@@ -0,0 +1,87 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package picard.reference;
+
+import htsjdk.samtools.util.IOUtil;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.IOException;
+import java.lang.Exception;
+import java.lang.String;
+
+/**
+ * @author ebanks
+ */
+
+public class NonNFastaSizeTest {
+
+ private static final String REFERENCE = "testdata/picard/reference/test.fasta";
+
+ @Test
+ public void noIntervals() throws IOException {
+ final File input = new File(REFERENCE);
+ final File outfile = File.createTempFile("nonNcount", ".txt");
+ outfile.deleteOnExit();
+ final String[] args = new String[] {
+ "INPUT=" + input.getAbsolutePath(),
+ "OUTPUT=" + outfile.getAbsolutePath()
+ };
+ Assert.assertEquals(new NonNFastaSize().instanceMain(args), 0);
+
+ final BufferedReader reader = IOUtil.openFileForBufferedReading(outfile);
+ final String count = reader.readLine();
+
+ try {
+ Assert.assertEquals(Long.parseLong(count), 1008);
+ } catch (Exception e) {
+ System.err.println("Failed to read in count because of error: " + e.getMessage());
+ }
+ }
+
+ @Test
+ public void withIntervals() throws IOException {
+ final File input = new File(REFERENCE);
+ final File outfile = File.createTempFile("nonNcount", ".txt");
+ final File intervals = new File("testdata/picard/reference/test.intervals");
+ outfile.deleteOnExit();
+ final String[] args = new String[] {
+ "INPUT=" + input.getAbsolutePath(),
+ "OUTPUT=" + outfile.getAbsolutePath(),
+ "INTERVALS=" + intervals.getAbsolutePath()
+ };
+ Assert.assertEquals(new NonNFastaSize().instanceMain(args), 0);
+
+ final BufferedReader reader = IOUtil.openFileForBufferedReading(outfile);
+ final String count = reader.readLine();
+
+ try {
+ Assert.assertEquals(Long.parseLong(count), 53);
+ } catch (Exception e) {
+ System.err.println("Failed to read in count because of error: " + e.getMessage());
+ }
+ }
+}
diff --git a/src/tests/java/picard/sam/FixMateInformationTest.java b/src/tests/java/picard/sam/FixMateInformationTest.java
new file mode 100644
index 0000000..1761085
--- /dev/null
+++ b/src/tests/java/picard/sam/FixMateInformationTest.java
@@ -0,0 +1,65 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package picard.sam;
+
+import htsjdk.samtools.SAMException;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import picard.cmdline.CommandLineProgramTest;
+
+import java.io.File;
+import java.io.IOException;
+
+public class FixMateInformationTest extends CommandLineProgramTest {
+ private static final File TEST_DATA_DIR = new File("testdata/picard/sam/FixMateInformation");
+ private static final String MISSING_MATE_TEST = "missingMate.sam";
+
+ public String getCommandLineProgramName() {
+ return FixMateInformation.class.getSimpleName();
+ }
+
+ public int missingMateTestHelper(final boolean ignoreMissingMates) throws IOException {
+ final File inSamFile = new File(TEST_DATA_DIR, MISSING_MATE_TEST);
+ final File outSamFile = File.createTempFile("outMissingMateTest", "sam");
+ outSamFile.deleteOnExit();
+
+ final String[] args = new String[]{
+ "INPUT=" + inSamFile.getAbsolutePath(),
+ "OUTPUT=" + outSamFile.getAbsolutePath(),
+ "IGNORE_MISSING_MATES=" + ignoreMissingMates
+ };
+
+ return new FixMateInformation().instanceMain(args);
+ }
+
+ @Test
+ public void ignoreMissingMateTest() throws IOException {
+ Assert.assertEquals(missingMateTestHelper(true), 0);
+ }
+
+ @Test(expectedExceptions = SAMException.class)
+ public void ignoreMissingMateExceptionTest() throws IOException {
+ missingMateTestHelper(false);
+ }
+}
diff --git a/src/tests/java/picard/sam/markduplicates/EstimateLibraryComplexityTest.java b/src/tests/java/picard/sam/markduplicates/EstimateLibraryComplexityTest.java
new file mode 100644
index 0000000..bcaadc4
--- /dev/null
+++ b/src/tests/java/picard/sam/markduplicates/EstimateLibraryComplexityTest.java
@@ -0,0 +1,121 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015 Nils Homer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.sam.markduplicates;
+
+import htsjdk.samtools.metrics.MetricsFile;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import picard.cmdline.CommandLineProgramTest;
+import picard.sam.DuplicationMetrics;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class EstimateLibraryComplexityTest extends CommandLineProgramTest {
+
+ private static final File TEST_DATA_DIR = new File("testdata/picard/sam/EstimateLibraryComplexity");
+
+ public String getCommandLineProgramName() {
+ return EstimateLibraryComplexity.class.getSimpleName();
+ }
+
+ private void examineMetricsFile(final File output, final int numDuplicates, final int numReadPairsExamined) {
+ final List<DuplicationMetrics> metricsList = MetricsFile.readBeans(output);
+ Assert.assertEquals(metricsList.size(), 1);
+ final DuplicationMetrics metrics = metricsList.get(0);
+ Assert.assertEquals(metrics.READ_PAIR_DUPLICATES*2 + metrics.UNPAIRED_READ_DUPLICATES, numDuplicates);
+ Assert.assertEquals(metrics.READ_PAIRS_EXAMINED, numReadPairsExamined);
+ }
+
+ /** Finds duplicates as expected. */
+ @Test
+ public void testSimpleDuplicate() throws IOException {
+ final File input = new File(TEST_DATA_DIR, "dupes.sam");
+ final File output = File.createTempFile("estimateLibraryComplexity",".els_metrics");
+ output.deleteOnExit();
+
+ final List<String> args =new ArrayList<String>();
+ args.add("INPUT=" + input.getAbsolutePath());
+ args.add("OUTPUT=" + output.getAbsolutePath());
+ args.add("MIN_GROUP_COUNT=1");
+
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+ examineMetricsFile(output, 2, 2);
+ }
+
+ /** Does not find duplicates since the difference rate was too high across the entire read */
+ @Test
+ public void testMaxDiffRate() throws IOException {
+ final File input = new File(TEST_DATA_DIR, "dupes.sam");
+ final File output = File.createTempFile("estimateLibraryComplexity",".els_metrics");
+ output.deleteOnExit();
+
+ final List<String> args =new ArrayList<String>();
+ args.add("INPUT=" + input.getAbsolutePath());
+ args.add("OUTPUT=" + output.getAbsolutePath());
+ args.add("MAX_DIFF_RATE=0.0");
+ args.add("MIN_GROUP_COUNT=1");
+
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+ examineMetricsFile(output, 0, 2);
+ }
+
+ /** Finds duplicates since the we examine only the fist ten bases. */
+ @Test
+ public void testSimpleDuplicateWithMaxReadLength() throws IOException {
+ final File input = new File(TEST_DATA_DIR, "dupes.sam");
+ final File output = File.createTempFile("estimateLibraryComplexity",".els_metrics");
+ output.deleteOnExit();
+
+ final List<String> args =new ArrayList<String>();
+ args.add("INPUT=" + input.getAbsolutePath());
+ args.add("OUTPUT=" + output.getAbsolutePath());
+ args.add("MAX_DIFF_RATE=0.0");
+ args.add("MIN_GROUP_COUNT=1");
+ args.add("MAX_READ_LENGTH=10");
+
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+ examineMetricsFile(output, 2, 2);
+ }
+
+ /** Does not find any duplicates since there was only one group of duplicates of size one. Also
+ * there are no reads examined due to this filtering step.
+ */
+ @Test
+ public void testDefaultMinGroupCount() throws IOException {
+ final File input = new File(TEST_DATA_DIR, "dupes.sam");
+ final File output = File.createTempFile("estimateLibraryComplexity",".els_metrics");
+ output.deleteOnExit();
+
+ final List<String> args =new ArrayList<String>();
+ args.add("INPUT=" + input.getAbsolutePath());
+ args.add("OUTPUT=" + output.getAbsolutePath());
+
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+ examineMetricsFile(output, 0, 0); // no read pairs examined!!!
+ }
+}
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java b/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java
new file mode 100644
index 0000000..ea01e04
--- /dev/null
+++ b/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java
@@ -0,0 +1,33 @@
+package picard.sam.markduplicates;
+
+/**
+ * The purpose of this class is to show that MarkDuplicates gives the same results when run on files that do not have a
+ * molecular barcode tag, even if the code is trying to use the molecular barcode
+ */
+
+abstract public class MarkDuplicateWithMissingBarcodeTest extends MarkDuplicatesTest {
+
+ protected AbstractMarkDuplicatesCommandLineProgramTester getTester() {
+ return new MarkDuplicatesWithMissingBarcodesTester();
+ }
+
+ abstract protected String getArgumentName();
+
+ abstract protected String getTagValue();
+
+ private class MarkDuplicatesWithMissingBarcodesTester extends MarkDuplicatesTester {
+ @Override
+ public void runTest() {
+ boolean hasRX = false;
+ for (final String argument : this.getArgs()) {
+ if (argument.startsWith(getArgumentName())) {
+ hasRX = true;
+ break;
+ }
+ }
+ if (!hasRX) addArg(getArgumentName() + "=" + getTagValue());
+
+ super.runTest();
+ }
+ }
+}
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadOneBarcodeTest.java b/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadOneBarcodeTest.java
new file mode 100644
index 0000000..86825c3
--- /dev/null
+++ b/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadOneBarcodeTest.java
@@ -0,0 +1,22 @@
+package picard.sam.markduplicates;
+
+/**
+ * Created by farjoun on 12/8/15.
+ *
+ * The purpose of this class is to show that MarkDuplicates gives the same results when run on files that do not have a
+ * molecular barcode tag, even if the code is trying to use the molecular barcode
+ *
+ */
+
+public class MarkDuplicateWithMissingReadOneBarcodeTest extends MarkDuplicateWithMissingBarcodeTest {
+
+ @Override
+ protected String getArgumentName() {
+ return "READ_TWO_BARCODE_TAG";
+ }
+
+ @Override
+ protected String getTagValue() {
+ return "RX";
+ }
+}
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java b/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java
new file mode 100644
index 0000000..0d868e4
--- /dev/null
+++ b/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java
@@ -0,0 +1,21 @@
+package picard.sam.markduplicates;
+
+/**
+ *
+ * The purpose of this class is to show that MarkDuplicates gives the same results when run on files that do not have a
+ * molecular barcode tag, even if the code is trying to use the molecular barcode
+ *
+ */
+
+public class MarkDuplicateWithMissingReadTwoBarcodeTest extends MarkDuplicateWithMissingBarcodeTest {
+
+ @Override
+ protected String getArgumentName() {
+ return "READ_TWO_BARCODE_TAG";
+ }
+
+ @Override
+ protected String getTagValue() {
+ return "RX";
+ }
+}
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingSampleBarcodeTest.java b/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingSampleBarcodeTest.java
new file mode 100644
index 0000000..1de9316
--- /dev/null
+++ b/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingSampleBarcodeTest.java
@@ -0,0 +1,21 @@
+package picard.sam.markduplicates;
+
+/**
+ *
+ * The purpose of this class is to show that MarkDuplicates gives the same results when run on files that do not have a
+ * molecular barcode tag, even if the code is trying to use the molecular barcode
+ *
+ */
+
+public class MarkDuplicateWithMissingSampleBarcodeTest extends MarkDuplicateWithMissingBarcodeTest {
+
+ @Override
+ protected String getArgumentName() {
+ return "BARCODE_TAG";
+ }
+
+ @Override
+ protected String getTagValue() {
+ return "BC";
+ }
+}
diff --git a/src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java b/src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
index 32de1d6..0dde3eb 100644
--- a/src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
+++ b/src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
@@ -30,6 +30,7 @@ import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordDuplicateComparator;
import htsjdk.samtools.SAMTag;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IterableAdapter;
@@ -43,6 +44,7 @@ import picard.sam.markduplicates.util.LibraryIdGenerator;
import picard.sam.markduplicates.util.ReadEnds;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
@@ -109,11 +111,14 @@ public class SimpleMarkDuplicatesWithMateCigar extends MarkDuplicates {
final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader,
false,
OUTPUT);
-
+
+ final SAMRecordDuplicateComparator comparator = new SAMRecordDuplicateComparator(Collections.singletonList(headerAndIterator.header));
+ comparator.setScoringStrategy(this.DUPLICATE_SCORING_STRATEGY);
+
final DuplicateSetIterator iterator = new DuplicateSetIterator(headerAndIterator.iterator,
- headerAndIterator.header);
-
- iterator.setScoringStrategy(this.DUPLICATE_SCORING_STRATEGY);
+ headerAndIterator.header,
+ false,
+ comparator);
// progress logger!
final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Read");
@@ -195,7 +200,7 @@ public class SimpleMarkDuplicatesWithMateCigar extends MarkDuplicates {
// Track the optical duplicates
if (this.READ_NAME_REGEX != null && 1 < duplicateReadEnds.size()) {
- AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(duplicateReadEnds, opticalDuplicateFinder, libraryIdGenerator);
+ AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(duplicateReadEnds, duplicateReadEnds.get(0), opticalDuplicateFinder, libraryIdGenerator);
}
}
diff --git a/src/tests/java/picard/sam/markduplicates/util/OpticalDuplicateFinderTest.java b/src/tests/java/picard/sam/markduplicates/util/OpticalDuplicateFinderTest.java
index 4ca3cd9..1adf4f9 100644
--- a/src/tests/java/picard/sam/markduplicates/util/OpticalDuplicateFinderTest.java
+++ b/src/tests/java/picard/sam/markduplicates/util/OpticalDuplicateFinderTest.java
@@ -1,9 +1,20 @@
package picard.sam.markduplicates.util;
+import htsjdk.samtools.util.CollectionUtil;
+import htsjdk.samtools.util.Log;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
-import picard.sam.util.ReadNameParsingUtils;
+import picard.sam.util.PhysicalLocation;
+import picard.sam.util.PhysicalLocationInt;
+import picard.sam.util.PhysicalLocationShort;
+import picard.sam.util.ReadNameParser;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
/**
* Tests for OpticalDuplicateFinder
@@ -11,66 +22,134 @@ import picard.sam.util.ReadNameParsingUtils;
* @author Nils Homer
*/
public class OpticalDuplicateFinderTest {
-
- /** Tests rapidParseInt for positive and negative numbers, as well as non-digit suffixes */
@Test
- public void testRapidParseInt() {
+ public void testDefaultRegex() {
+ final String readName1 = "000000000-ZZZZZ:1:1105:17981:23325";
+ final String readName2 = "000000000-ZZZZZ:1:1109:22981:17995";
+
+ final int[] tokens = new int[3];
+ Assert.assertEquals(ReadNameParser.getLastThreeFields(readName1, ':', tokens), 5);
+ Assert.assertEquals(ReadNameParser.getLastThreeFields(readName2, ':', tokens), 5);
+
final OpticalDuplicateFinder opticalDuplicateFinder = new OpticalDuplicateFinder();
- for (int i = -100; i < 100; i++) {
- Assert.assertEquals(ReadNameParsingUtils.rapidParseInt(Integer.toString(i)), i);
+ final PhysicalLocation loc1 = new ReadEndsForMarkDuplicates();
+ final PhysicalLocation loc2 = new ReadEndsForMarkDuplicates();
- // trailing characters
- Assert.assertEquals(ReadNameParsingUtils.rapidParseInt(Integer.toString(i)+"A"), i);
- Assert.assertEquals(ReadNameParsingUtils.rapidParseInt(Integer.toString(i)+"ACGT"), i);
- Assert.assertEquals(ReadNameParsingUtils.rapidParseInt(Integer.toString(i)+".1"), i);
+ Assert.assertTrue(opticalDuplicateFinder.addLocationInformation(readName1, loc1));
+ Assert.assertTrue(opticalDuplicateFinder.addLocationInformation(readName2, loc2));
+
+ final boolean[] opticalDuplicateFlags = opticalDuplicateFinder.findOpticalDuplicates(Arrays.asList(loc1, loc2), null);
+ for (final boolean opticalDuplicateFlag : opticalDuplicateFlags) {
+ Assert.assertFalse(opticalDuplicateFlag);
}
}
- /** Helper for testGetRapidDefaultReadNameRegexSplit */
- private void doTestGetRapidDefaultReadNameRegexSplit(int numFields, final OpticalDuplicateFinder opticalDuplicateFinder) {
- final int[] inputFields = new int[numFields];
- final int[] expectedFields = new int[numFields];
- String readName = "";
- for (int i = 0; i < inputFields.length; i++) {
- inputFields[i] = -1;
- expectedFields[i] = -1;
- if (0 < i) readName += ":";
- readName += Integer.toString(i);
- }
- if (2 < numFields) expectedFields[2] = 2;
- if (3 < numFields) expectedFields[3] = 3;
- if (4 < numFields) expectedFields[4] = 4;
- Assert.assertEquals(ReadNameParsingUtils.getRapidDefaultReadNameRegexSplit(readName, ':', inputFields), numFields);
- for (int i = 0; i < inputFields.length; i++) {
- Assert.assertEquals(inputFields[i], expectedFields[i]);
+ @Test
+ public void testVeryLongReadNames() {
+ final String readName1 = "M01234:123:000000000-ZZZZZ:1:1105:17981:23325";
+ final String readName2 = "M01234:123:000000000-ZZZZZ:1:1109:22981:17995";
+
+ final int[] tokens = new int[3];
+ Assert.assertEquals(ReadNameParser.getLastThreeFields(readName1, ':', tokens), 7);
+ Assert.assertEquals(ReadNameParser.getLastThreeFields(readName2, ':', tokens), 7);
+
+ final OpticalDuplicateFinder opticalDuplicateFinder = new OpticalDuplicateFinder();
+ final PhysicalLocation loc1 = new ReadEndsForMarkDuplicates();
+ final PhysicalLocation loc2 = new ReadEndsForMarkDuplicates();
+
+ Assert.assertTrue(opticalDuplicateFinder.addLocationInformation(readName1, loc1));
+ Assert.assertTrue(opticalDuplicateFinder.addLocationInformation(readName2, loc2));
+
+ final boolean[] opticalDuplicateFlags = opticalDuplicateFinder.findOpticalDuplicates(Arrays.asList(loc1, loc2), null);
+ for (final boolean opticalDuplicateFlag : opticalDuplicateFlags) {
+ Assert.assertFalse(opticalDuplicateFlag);
}
}
- /** Tests that we split the string early, with the correct # of fields, and modified values */
@Test
- public void testGetRapidDefaultReadNameRegexSplit() {
- final OpticalDuplicateFinder opticalDuplicateFinder = new OpticalDuplicateFinder();
- for (int i = 1; i < 10; i++) {
- doTestGetRapidDefaultReadNameRegexSplit((i <= 5) ? i : 5, opticalDuplicateFinder);
+ public void testKeeper() {
+ final Log log = Log.getInstance(OpticalDuplicateFinderTest.class);
+ final OpticalDuplicateFinder finder = new OpticalDuplicateFinder(OpticalDuplicateFinder.DEFAULT_READ_NAME_REGEX, 100, log);
+ List<PhysicalLocation> locs = Arrays.asList(
+ loc(7, 1500, 1500),
+ loc(7, 1501, 1501),
+ loc(5, 1500, 1500),
+ loc(7, 1490, 1502),
+ loc(7, 2500, 2500),
+ loc(7, 10, 10)
+ );
+
+ assertEquals(finder.findOpticalDuplicates(locs, null ), new boolean[] {false, true, false, true, false, false});
+ assertEquals(finder.findOpticalDuplicates(locs, locs.get(0)), new boolean[] {false, true, false, true, false, false});
+ assertEquals(finder.findOpticalDuplicates(locs, locs.get(1)), new boolean[] {true, false, false, true, false, false});
+ assertEquals(finder.findOpticalDuplicates(locs, locs.get(3)), new boolean[] {true, true, false, false, false, false});
+
+ for (int i=0; i<100; ++i) {
+ final Random random = new Random(i);
+ final List<PhysicalLocation> shuffled = new ArrayList<>(locs);
+ final List<PhysicalLocation> keepers = Arrays.asList(locs.get(0), locs.get(1), locs.get(3));
+ final PhysicalLocation keeper = keepers.get(random.nextInt(keepers.size()));
+ Collections.shuffle(shuffled);
+
+ int opticalDupeCount = countTrue(finder.findOpticalDuplicates(shuffled, keeper));
+ Assert.assertEquals(opticalDupeCount, 2);
}
}
- // NB: these tests fails due to overflow in the duplicate finder test. This has been the behavior previously, so keep it for now.
- @Test(dataProvider = "testParseReadNameDataProvider", enabled = false)
- public void testParseReadName(final String readName, final int tile, final int x, final int y) {
- OpticalDuplicateFinder opticalDuplicateFinder = new OpticalDuplicateFinder();
- OpticalDuplicateFinder.PhysicalLocation loc = new ReadEndsForMarkDuplicates();
- Assert.assertTrue(opticalDuplicateFinder.addLocationInformation(readName, loc));
- Assert.assertEquals(loc.getTile(), tile);
- Assert.assertEquals(loc.getX(), x);
- Assert.assertEquals(loc.getY(), y);
+ /**
+ * Tests the case where the "keeper" record is not in the list that is passed to the OpticalDuplicateFinder. This can happen
+ * when there are, e.g. FR and RF reads, which can all be molecular duplicates of one another, but cannot be duplicates of one
+ * another and are thus partitioned into two sets for optical duplicate checking.
+ */
+ @Test
+ public void testKeeperNotInList() {
+ final Log log = Log.getInstance(OpticalDuplicateFinderTest.class);
+ final OpticalDuplicateFinder finder = new OpticalDuplicateFinder(OpticalDuplicateFinder.DEFAULT_READ_NAME_REGEX, 100, log);
+ List<PhysicalLocation> locs = Arrays.asList(
+ loc(1, 100, 100),
+ loc(1, 101, 101),
+ loc(1, 99, 99),
+ loc(1, 99, 102)
+ );
+
+ Assert.assertEquals(countTrue(finder.findOpticalDuplicates(locs, loc(7, 5000, 5000))), 3);
}
- @DataProvider(name = "testParseReadNameDataProvider")
- public Object[][] testParseReadNameDataProvider() {
- return new Object[][]{
- {"RUNID:7:1203:2886:82292", 1203, 2886, 82292},
- {"RUNID:7:1203:2884:16834", 1203, 2884, 16834}
+ @Test
+ public void testKeeperAtEndWithinCliqueOfAllOpticalDuplicates() {
+ final Log log = Log.getInstance(OpticalDuplicateFinderTest.class);
+ final OpticalDuplicateFinder finder = new OpticalDuplicateFinder(OpticalDuplicateFinder.DEFAULT_READ_NAME_REGEX, 15, log);
+ List<PhysicalLocation> locs = Arrays.asList(
+ loc(1, 10, 0),
+ loc(1, 20, 0),
+ loc(1, 30, 0)
+ );
+
+ assertEquals(finder.findOpticalDuplicates(locs, locs.get(2)), new boolean[] {true, true, false});
+ }
+
+ /** Helper method to create a physical location. */
+ private PhysicalLocation loc(final int tile, final int x, final int y) {
+ final PhysicalLocation l = new PhysicalLocationInt() {
+ @Override
+ public short getReadGroup() { return 1; }
};
+ l.setTile((short) tile);
+ l.setX(x);
+ l.setY(y);
+ return l;
+ }
+
+ void assertEquals(final boolean[] actual, final boolean[] expected) {
+ if (!Arrays.equals(actual, expected)) {
+ throw new AssertionError("expected: " + Arrays.toString(expected) + " but was: " + Arrays.toString(actual));
+ }
+ }
+
+ /** Simply counts the true values in a boolean array. */
+ int countTrue(final boolean[] bs) {
+ int count = 0;
+ for (final boolean b : bs) if (b) ++count;
+ return count;
}
}
diff --git a/src/tests/java/picard/sam/util/ReadNameParserTests.java b/src/tests/java/picard/sam/util/ReadNameParserTests.java
new file mode 100644
index 0000000..8a1e2f6
--- /dev/null
+++ b/src/tests/java/picard/sam/util/ReadNameParserTests.java
@@ -0,0 +1,142 @@
+package picard.sam.util;
+
+import htsjdk.samtools.util.CollectionUtil;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.List;
+
+/**
+ * Tests for the ReadNameParser class.
+ */
+public class ReadNameParserTests {
+ /** Tests rapidParseInt for positive and negative numbers, as well as non-digit suffixes */
+ @Test
+ public void testRapidParseInt() {
+ for (int i = -100; i < 100; i++) {
+ Assert.assertEquals(ReadNameParser.rapidParseInt(Integer.toString(i)), i);
+
+ // trailing characters
+ Assert.assertEquals(ReadNameParser.rapidParseInt(Integer.toString(i)+"A"), i);
+ Assert.assertEquals(ReadNameParser.rapidParseInt(Integer.toString(i)+"ACGT"), i);
+ Assert.assertEquals(ReadNameParser.rapidParseInt(Integer.toString(i)+".1"), i);
+ }
+ }
+
+ /** Tests rapidParseInt for positive and negative numbers, as well as non-digit suffixes */
+ @Test
+ public void testRapidParseIntFails() {
+ List<String> values = CollectionUtil.makeList("foo", "bar", "abc123", "-foo", "f00", "-f00");
+ for (String s : values) {
+ try {
+ ReadNameParser.rapidParseInt(s);
+ Assert.fail("Should have failed to rapid-parse " + s + " as an int.");
+ }
+ catch (NumberFormatException nfe) {
+ /* expected */
+ }
+ }
+ }
+
+ /** Helper for testGetRapidDefaultReadNameRegexSplit */
+ private void doTestGetRapidDefaultReadNameRegexSplit(int numFields) {
+ final int[] inputFields = new int[3];
+ final int[] expectedFields = new int[3];
+ String readName = "";
+ for (int i = 0; i < numFields; i++) {
+ if (0 < i) readName += ":";
+ readName += Integer.toString(i);
+ }
+ inputFields[0] = inputFields[1] = inputFields[2] = -1;
+ if (numFields < 3) {
+ Assert.assertEquals(ReadNameParser.getLastThreeFields(readName, ':', inputFields), -1);
+ }
+ else {
+ Assert.assertEquals(ReadNameParser.getLastThreeFields(readName, ':', inputFields), numFields);
+ expectedFields[0] = expectedFields[1] = expectedFields[2] = -1;
+ if (0 < numFields) expectedFields[0] = numFields-3;
+ if (1 < numFields) expectedFields[1] = numFields-2;
+ if (2 < numFields) expectedFields[2] = numFields-1;
+ for (int i = 0; i < inputFields.length; i++) {
+ Assert.assertEquals(inputFields[i], expectedFields[i]);
+ }
+ }
+ }
+
+ /** Tests that we split the string with the correct # of fields, and modified values */
+ @Test
+ public void testGetRapidDefaultReadNameRegexSplit() {
+ for (int i = 1; i < 10; i++) {
+ doTestGetRapidDefaultReadNameRegexSplit(i);
+ }
+ }
+
+ @DataProvider(name = "testParseReadNameDataProvider")
+ public Object[][] testParseReadNameDataProvider() {
+ return new Object[][]{
+ {"RUNID:7:1203:2886:82292", 1203, 2886, 82292},
+ {"RUNID:7:1203:2884:16834", 1203, 2884, 16834}
+ };
+ }
+
+ // NB: these test fail s due to overflow in the duplicate finder test. This has been the behavior previously, so keep it for now.
+ @Test(dataProvider = "testParseReadNameDataProvider", enabled = true)
+ public void testParseReadNameOverflow(final String readName, final int tile, final int x, final int y) {
+ ReadNameParser parser = new ReadNameParser();
+ PhysicalLocation loc = new PhysicalLocationShort();
+ Assert.assertTrue(parser.addLocationInformation(readName, loc));
+ Assert.assertEquals(loc.getTile(), tile);
+ Assert.assertEquals(loc.getX(), (short)x); // casting to short for the overflow
+ Assert.assertEquals(loc.getY(), (short)y); // casting to short for the overflow
+ }
+
+ // NB: this test the case where we do not overflow in the duplicate finder test.
+ @Test(dataProvider = "testParseReadNameDataProvider", enabled = true)
+ public void testParseReadNameOK(final String readName, final int tile, final int x, final int y) {
+ ReadNameParser parser = new ReadNameParser();
+ PhysicalLocation loc = new PhysicalLocationInt();
+ Assert.assertTrue(parser.addLocationInformation(readName, loc));
+ Assert.assertEquals(loc.getTile(), tile);
+ Assert.assertEquals(loc.getX(), x); // we store ints, so we should not overflow
+ Assert.assertEquals(loc.getY(), y); // we store ints, so we should not overflow
+ }
+
+ @DataProvider(name = "testReadNameParsing")
+ public Object[][] testReadNameParsingDataProvider() {
+ final String lastThreeFieldsRegex = "(?:.*:)?([0-9]+)[^:]*:([0-9]+)[^:]*:([0-9]+)[^:]*$";
+ return new Object[][]{
+ {lastThreeFieldsRegex, "RUNID:123:000000000-ZZZZZ:1:1105:17981:23325", 1105, 17981, 23325, true},
+ {lastThreeFieldsRegex, "RUNID:123:000000000-ZZZZZ:1:1109:22981:17995", 1109, 22981, 17995, true},
+ {lastThreeFieldsRegex, "1109:22981:17995", 1109, 22981, 17995, true},
+ {lastThreeFieldsRegex, "RUNID:7:1203:2886:82292", 1203, 2886, 82292, true},
+ {lastThreeFieldsRegex, "RUNID:7:1203:2884:16834", 1203, 2884, 16834, true},
+ {lastThreeFieldsRegex, "1109ABC:22981DEF:17995GHI", 1109, 22981, 17995, true},
+ {ReadNameParser.DEFAULT_READ_NAME_REGEX, "RUNID:123:000000000-ZZZZZ:1:1105:17981:23325", 1105, 17981, 23325, true},
+ {ReadNameParser.DEFAULT_READ_NAME_REGEX, "RUNID:123:000000000-ZZZZZ:1:1109:22981:17995", 1109, 22981, 17995, true},
+ {ReadNameParser.DEFAULT_READ_NAME_REGEX, "1109:22981:17995", 1109, 22981, 17995, false},
+ {ReadNameParser.DEFAULT_READ_NAME_REGEX, "RUNID:7:1203:2886:82292", 1203, 2886, 82292, true},
+ {ReadNameParser.DEFAULT_READ_NAME_REGEX, "RUNID:7:1203:2884:16834", 1203, 2884, 16834, true}
+ };
+ }
+
+ @Test(dataProvider = "testReadNameParsing")
+ public void testReadNameParsing(final String readNameRegex, final String readName, final int tile, final int x, final int y, final boolean addLocationInformationSucceeds) {
+ final ReadNameParser parser = new ReadNameParser(readNameRegex);
+ final PhysicalLocationInt loc = new PhysicalLocationInt();
+ Assert.assertEquals(parser.addLocationInformation(readName, loc), addLocationInformationSucceeds);
+ if (addLocationInformationSucceeds) { // just check the location
+ Assert.assertEquals(loc.getTile(), tile);
+ Assert.assertEquals(loc.getX(), x);
+ Assert.assertEquals(loc.getY(), y);
+ }
+ else if (readNameRegex == ReadNameParser.DEFAULT_READ_NAME_REGEX) { // additional testing on the default regex
+ int[] tokens = new int[3];
+ ReadNameParser.getLastThreeFields(readName, ':', tokens);
+ Assert.assertEquals(tokens[0], tile);
+ Assert.assertEquals(tokens[1], x);
+ Assert.assertEquals(tokens[2], y);
+ }
+ }
+
+}
diff --git a/src/tests/java/picard/util/BedToIntervalListTest.java b/src/tests/java/picard/util/BedToIntervalListTest.java
index 7a825bf..6ad561e 100644
--- a/src/tests/java/picard/util/BedToIntervalListTest.java
+++ b/src/tests/java/picard/util/BedToIntervalListTest.java
@@ -26,6 +26,7 @@ public class BedToIntervalListTest {
program.INPUT = inputBedFile;
program.SEQUENCE_DICTIONARY = new File(TEST_DATA_DIR, header);
program.OUTPUT = outputFile;
+ program.UNIQUE = true;
program.doWork();
// Assert they are equal
diff --git a/src/tests/java/picard/util/IntervalListToBedTest.java b/src/tests/java/picard/util/IntervalListToBedTest.java
new file mode 100644
index 0000000..8626d78
--- /dev/null
+++ b/src/tests/java/picard/util/IntervalListToBedTest.java
@@ -0,0 +1,43 @@
+package picard.util;
+
+import htsjdk.samtools.util.IOUtil;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.List;
+
+/**
+ * Tests for IntervalListToBed
+ */
+public class IntervalListToBedTest {
+ private final String TEST_DATA_DIR = "testdata/picard/util/";
+ private final File INTERVAL_LIST = new File(TEST_DATA_DIR, "interval_list_to_bed_test.interval_list");
+ private final File BED_FILE = new File(TEST_DATA_DIR, "interval_list_to_bed_test.bed");
+
+ @Test
+ public void testConvertToBed() throws Exception {
+ final IntervalListToBed program = new IntervalListToBed();
+ final File tmp = File.createTempFile("interval_list_to_bed_test_output", ".bed");
+ tmp.deleteOnExit();
+
+ final String[] args = {
+ "INPUT=" + INTERVAL_LIST.getAbsolutePath(),
+ "OUTPUT=" + tmp.getAbsolutePath(),
+ "SCORE=333"
+ };
+ program.instanceMain(args);
+
+ final List<String> expected = IOUtil.slurpLines(BED_FILE);
+ final List<String> actual = IOUtil.slurpLines(tmp);
+
+ // Make sure we got the same number of entries!
+ Assert.assertEquals(actual.size(), expected.size());
+
+ // Then make sure the entries are the same.
+ for (int i=0; i<expected.size(); ++i) {
+ Assert.assertEquals(actual.get(i), expected.get(i));
+ }
+ }
+
+}
diff --git a/src/tests/java/picard/vcf/CallingMetricAccumulatorTest.java b/src/tests/java/picard/vcf/CallingMetricAccumulatorTest.java
new file mode 100644
index 0000000..226eabc
--- /dev/null
+++ b/src/tests/java/picard/vcf/CallingMetricAccumulatorTest.java
@@ -0,0 +1,78 @@
+package picard.vcf;
+
+import htsjdk.samtools.util.CollectionUtil;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.FastGenotype;
+import htsjdk.variant.variantcontext.Genotype;
+import htsjdk.variant.variantcontext.GenotypeBuilder;
+import htsjdk.variant.variantcontext.GenotypesContext;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import static org.testng.Assert.*;
+
+/**
+ * Created by farjoun on 12/26/15.
+ */
+public class CallingMetricAccumulatorTest {
+
+ @DataProvider(name = "getSingletonSampleData")
+ public Object[][] getSingletonSampleData() {
+ final List<Object[]> retval = new ArrayList<>(10);
+
+ final Allele ARef = Allele.create("A", true);
+ final Allele G = Allele.create("G", false);
+ final Allele C = Allele.create("C", false);
+
+ final GenotypeBuilder genotypeBuilder = new GenotypeBuilder().alleles(CollectionUtil.makeList(ARef, C));
+ final VariantContextBuilder builder = new VariantContextBuilder();
+
+ // one het
+ final Genotype het = genotypeBuilder.name("het").make();
+ builder.chr("1").start(1).stop(1).alleles(CollectionUtil.makeList(ARef, C, G)).genotypes(Collections.singletonList(het));
+ retval.add(new Object[]{builder.make(), "het"});
+
+ //a het and a hom ref
+ final Genotype homref = genotypeBuilder.name("homref").alleles(CollectionUtil.makeList(ARef)).make();
+ builder.genotypes(CollectionUtil.makeList(het, homref));
+ retval.add(new Object[]{builder.make(), "het"});
+
+ // a het, a homvar and a homref
+ final Genotype homvar = genotypeBuilder.name("homvar").alleles(CollectionUtil.makeList(C)).make();
+ builder.genotypes(CollectionUtil.makeList(het, homref, homvar));
+ retval.add(new Object[]{builder.make(), null});
+
+ // two hets and a homref
+ final Genotype het2 = genotypeBuilder.name("het2").alleles(CollectionUtil.makeList(ARef, G)).make();
+ builder.genotypes(CollectionUtil.makeList(het, homref, het2));
+ retval.add(new Object[]{builder.make(), null});
+
+ // a homvar
+ builder.genotypes(CollectionUtil.makeList(homvar));
+ retval.add(new Object[]{builder.make(), null});
+
+ // a homvar, and a homref
+ builder.genotypes(CollectionUtil.makeList(homvar, homref));
+ retval.add(new Object[]{builder.make(), null});
+
+ // two homrefs
+ final Genotype homref2 = genotypeBuilder.name("homref2").alleles(CollectionUtil.makeList(ARef)).make();
+ builder.genotypes(CollectionUtil.makeList(homref, homref2));
+ retval.add(new Object[]{builder.make(), null});
+
+ return retval.toArray(new Object[retval.size()][]);
+ }
+
+ @Test(dataProvider = "getSingletonSampleData")
+ public void testGetSingletonSample(final VariantContext vc, final String sample) throws Exception {
+ Assert.assertEquals(CallingMetricAccumulator.getSingletonSample(vc), sample);
+ }
+}
diff --git a/src/tests/java/picard/vcf/CollectVariantCallingMetricsTest.java b/src/tests/java/picard/vcf/CollectVariantCallingMetricsTest.java
index 80feaa8..afad347 100644
--- a/src/tests/java/picard/vcf/CollectVariantCallingMetricsTest.java
+++ b/src/tests/java/picard/vcf/CollectVariantCallingMetricsTest.java
@@ -60,7 +60,7 @@ public class CollectVariantCallingMetricsTest {
Assert.assertEquals(program.doWork(), 0);
- final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Comparable<?>> summary = new MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Comparable<?>>();
+ final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Comparable<?>> summary = new MetricsFile<>();
summary.read(new FileReader(summaryFile));
boolean parsedSummary = false;
@@ -87,33 +87,105 @@ public class CollectVariantCallingMetricsTest {
Assert.assertTrue(parsedSummary, "Did not parse summary metrics.");
- final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Comparable<?>> detail = new MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Comparable<?>>();
+ final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Comparable<?>> detail = new MetricsFile<>();
detail.read(new FileReader(detailFile));
final List<CollectVariantCallingMetrics.VariantCallingDetailMetrics> detailMetrics = detail.getMetrics();
- for (final CollectVariantCallingMetrics.VariantCallingDetailMetrics metrics : detail.getMetrics()) {
- if (metrics.SAMPLE_ALIAS.equals("HG00160")) {
- Assert.assertEquals(metrics.HET_HOMVAR_RATIO, 0.72549, 0.0001);
- Assert.assertEquals(metrics.TOTAL_SNPS, 81);
- Assert.assertEquals(metrics.NUM_IN_DB_SNP, 44);
- Assert.assertEquals(metrics.NOVEL_SNPS, 37);
- Assert.assertEquals(metrics.PCT_DBSNP, 0.543210, 0.01);
- Assert.assertEquals(metrics.DBSNP_TITV, 6.333333, 0.01);
- Assert.assertEquals(metrics.NOVEL_TITV, 2.7, 0.01);
- Assert.assertEquals(metrics.TOTAL_INDELS, 6);
- Assert.assertEquals(metrics.NOVEL_INDELS, 3);
- Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 3);
- Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0.5, 0.01);
- Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 0.0, 0.01);
- Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO, 0.0, 0.01);
- Assert.assertEquals(metrics.TOTAL_MULTIALLELIC_SNPS, 0.0, 0.01);
- Assert.assertEquals(metrics.NUM_IN_DB_SNP_MULTIALLELIC, 0, 0.01);
- Assert.assertEquals(metrics.TOTAL_COMPLEX_INDELS, 1.0, 0.01);
- Assert.assertEquals(metrics.NUM_IN_DB_SNP_COMPLEX_INDELS, 0, 0.01);
- Assert.assertEquals(metrics.SNP_REFERENCE_BIAS, 0.510204, 0.01);
- Assert.assertEquals(metrics.NUM_SINGLETONS, 3);
- }
- }
+ detail.getMetrics().stream().filter(metrics -> metrics.SAMPLE_ALIAS.equals("HG00160")).forEach(metrics -> {
+ Assert.assertEquals(metrics.HET_HOMVAR_RATIO, 0.72549, 0.0001);
+ Assert.assertEquals(metrics.TOTAL_SNPS, 81);
+ Assert.assertEquals(metrics.NUM_IN_DB_SNP, 44);
+ Assert.assertEquals(metrics.NOVEL_SNPS, 37);
+ Assert.assertEquals(metrics.PCT_DBSNP, 0.543210, 0.01);
+ Assert.assertEquals(metrics.DBSNP_TITV, 6.333333, 0.01);
+ Assert.assertEquals(metrics.NOVEL_TITV, 2.7, 0.01);
+ Assert.assertEquals(metrics.TOTAL_INDELS, 6);
+ Assert.assertEquals(metrics.NOVEL_INDELS, 3);
+ Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 3);
+ Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0.5, 0.01);
+ Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 0.0, 0.01);
+ Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO, 0.0, 0.01);
+ Assert.assertEquals(metrics.TOTAL_MULTIALLELIC_SNPS, 0.0, 0.01);
+ Assert.assertEquals(metrics.NUM_IN_DB_SNP_MULTIALLELIC, 0, 0.01);
+ Assert.assertEquals(metrics.TOTAL_COMPLEX_INDELS, 1.0, 0.01);
+ Assert.assertEquals(metrics.NUM_IN_DB_SNP_COMPLEX_INDELS, 0, 0.01);
+ Assert.assertEquals(metrics.SNP_REFERENCE_BIAS, 0.510204, 0.01);
+ Assert.assertEquals(metrics.NUM_SINGLETONS, 3);
+ });
Assert.assertEquals(detailMetrics.size(), 50, "Did not parse the desired number of detail metrics.");
}
+
+
+ @Test
+ public void testMetricsTinyGVCF() throws IOException {
+ final File dbSnpFile = new File(TEST_DATA_DIR, "mini.dbsnp.vcf");
+ final File vcfFile = new File(TEST_DATA_DIR, "mini_gvcf.vcf");
+
+ final File outFile = new File(TEST_DATA_DIR, "vcmetrics_tiny_gvcf");
+ final File summaryFile = new File(outFile+".variant_calling_summary_metrics");
+ final File detailFile = new File(outFile+".variant_calling_detail_metrics");
+
+ summaryFile.deleteOnExit();
+ detailFile.deleteOnExit();
+
+ final CollectVariantCallingMetrics program = new CollectVariantCallingMetrics();
+ program.INPUT = vcfFile;
+ program.DBSNP = dbSnpFile;
+ program.OUTPUT = outFile;
+ program.GVCF_INPUT = true;
+ Assert.assertEquals(program.doWork(), 0);
+
+ final MetricsFile<CollectVariantCallingMetrics.VariantCallingSummaryMetrics, Comparable<?>> summary = new MetricsFile<>();
+ summary.read(new FileReader(summaryFile));
+
+ boolean parsedSummary = false;
+ for (final CollectVariantCallingMetrics.VariantCallingSummaryMetrics metrics : summary.getMetrics()) {
+ Assert.assertEquals(metrics.TOTAL_SNPS, 20);
+ Assert.assertEquals(metrics.NOVEL_SNPS, 19);
+ Assert.assertEquals(metrics.NUM_IN_DB_SNP, 1);
+ Assert.assertEquals(metrics.FILTERED_SNPS, 0);
+
+ Assert.assertEquals(metrics.PCT_DBSNP, 0.05, 0.001);
+ Assert.assertEquals(metrics.DBSNP_TITV, 0D, 0.01);
+ Assert.assertEquals(metrics.NOVEL_TITV, 12D/(19-12), 0.01);
+
+ Assert.assertEquals(metrics.TOTAL_INDELS, 7);
+ Assert.assertEquals(metrics.NOVEL_INDELS, 7);
+ Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 0);
+ Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO,3/4D,0.01);
+ Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0, 0.01);
+ Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 0, 0.01);
+ Assert.assertEquals(metrics.NUM_SINGLETONS, 8);
+
+ parsedSummary = true;
+ }
+
+ Assert.assertTrue(parsedSummary, "Did not parse summary metrics.");
+
+ final MetricsFile<CollectVariantCallingMetrics.VariantCallingDetailMetrics, Comparable<?>> detail = new MetricsFile<>();
+ detail.read(new FileReader(detailFile));
+ final List<CollectVariantCallingMetrics.VariantCallingDetailMetrics> detailMetrics = detail.getMetrics();
+ detail.getMetrics().stream().filter(metrics -> metrics.SAMPLE_ALIAS.equals("HG00160")).forEach(metrics -> {
+ Assert.assertEquals(metrics.HET_HOMVAR_RATIO, .6, 0.0001);
+ Assert.assertEquals(metrics.TOTAL_SNPS, 20);
+ Assert.assertEquals(metrics.NUM_IN_DB_SNP, 1);
+ Assert.assertEquals(metrics.NOVEL_SNPS, 19);
+ Assert.assertEquals(metrics.PCT_DBSNP, 1D/20, 0.01);
+ Assert.assertEquals(metrics.DBSNP_TITV, 0D, 0.01);
+ Assert.assertEquals(metrics.NOVEL_TITV, 12D/(19-12), 0.01);
+ Assert.assertEquals(metrics.TOTAL_INDELS, 7);
+ Assert.assertEquals(metrics.NOVEL_INDELS, 7);
+ Assert.assertEquals(metrics.NUM_IN_DB_SNP_INDELS, 0);
+ Assert.assertEquals(metrics.PCT_DBSNP_INDELS, 0, 0.01);
+ Assert.assertEquals(metrics.DBSNP_INS_DEL_RATIO, 0.0, 0.01);
+ Assert.assertEquals(metrics.NOVEL_INS_DEL_RATIO, 3/4D, 0.01);
+ Assert.assertEquals(metrics.TOTAL_MULTIALLELIC_SNPS, 0.0, 0.01);
+ Assert.assertEquals(metrics.NUM_IN_DB_SNP_MULTIALLELIC, 0, 0.01);
+ Assert.assertEquals(metrics.TOTAL_COMPLEX_INDELS, 0, 0.01);
+ Assert.assertEquals(metrics.NUM_IN_DB_SNP_COMPLEX_INDELS, 0, 0.01);
+ Assert.assertEquals(metrics.NUM_SINGLETONS, 8);
+ });
+
+ Assert.assertEquals(detailMetrics.size(), 1, "Did not parse the expected number of detail metrics.");
+ }
}
diff --git a/src/tests/java/picard/vcf/LiftoverVcfTest.java b/src/tests/java/picard/vcf/LiftoverVcfTest.java
index f71cf55..0a691f9 100644
--- a/src/tests/java/picard/vcf/LiftoverVcfTest.java
+++ b/src/tests/java/picard/vcf/LiftoverVcfTest.java
@@ -5,6 +5,7 @@ import htsjdk.variant.variantcontext.*;
import htsjdk.variant.vcf.VCFFileReader;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
@@ -20,6 +21,7 @@ public class LiftoverVcfTest extends CommandLineProgramTest {
private static final File TEST_DATA_PATH = new File("testdata/picard/vcf/");
private static final File CHAIN_FILE = new File(TEST_DATA_PATH, "test.over.chain");
+ private static final File CHAIN_FILE_WITH_BAD_CONTIG = new File(TEST_DATA_PATH, "test.over.badContig.chain");
private static final File REFERENCE_FILE = new File(TEST_DATA_PATH, "dummy.reference.fasta");
private static final File OUTPUT_DATA_PATH = IOUtil.createTempDir("LiftoverVcfsTest", null);
@@ -92,4 +94,34 @@ public class LiftoverVcfTest extends CommandLineProgramTest {
Assert.assertEquals(expected.get(1), actual.get(1));
}
}
+
+ @DataProvider(name = "dataTestMissingContigInReference")
+ public Object[][] dataTestHaplotypeProbabilitiesFromSequenceAddToProbs() {
+ return new Object[][]{
+ {false, LiftoverVcf.EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE},
+ {true, 0}
+ };
+ }
+
+ @Test(dataProvider = "dataTestMissingContigInReference")
+ public void testMissingContigInReference(boolean warnOnMissingContext, int expectedReturnCode) {
+ final File liftOutputFile = new File(OUTPUT_DATA_PATH, "lift-delete-me.vcf");
+ final File rejectOutputFile = new File(OUTPUT_DATA_PATH, "reject-delete-me.vcf");
+ final File input = new File(TEST_DATA_PATH, "testLiftoverUsingMissingContig.vcf");
+
+ liftOutputFile.deleteOnExit();
+ rejectOutputFile.deleteOnExit();
+
+ // Test using WMC option
+ final String[] argsWithWarnOnMissingContig = new String[]{
+ "INPUT=" + input.getAbsolutePath(),
+ "OUTPUT=" + liftOutputFile.getAbsolutePath(),
+ "REJECT=" + rejectOutputFile.getAbsolutePath(),
+ "CHAIN=" + CHAIN_FILE_WITH_BAD_CONTIG,
+ "REFERENCE_SEQUENCE=" + REFERENCE_FILE,
+ "CREATE_INDEX=false",
+ "WMC=" + warnOnMissingContext
+ };
+ Assert.assertEquals(runPicardCommandLine(argsWithWarnOnMissingContig), expectedReturnCode);
+ }
}
diff --git a/src/tests/java/picard/vcf/TestFilterVcf.java b/src/tests/java/picard/vcf/TestFilterVcf.java
index 8629746..306488b 100644
--- a/src/tests/java/picard/vcf/TestFilterVcf.java
+++ b/src/tests/java/picard/vcf/TestFilterVcf.java
@@ -33,6 +33,7 @@ import picard.PicardException;
import java.io.File;
import java.util.Set;
+import java.util.SortedSet;
import java.util.TreeSet;
/**
@@ -42,6 +43,9 @@ public class TestFilterVcf {
private final File INPUT = new File("testdata/picard/vcf/filter/testFiltering.vcf");
private final File BAD_INPUT = new File("testdata/picard/vcf/filter/testFilteringNoSeqDictionary.vcf");
+ /** Returns a sorted copy of the supplied set, for safer comparison. */
+ <T extends Comparable> SortedSet<T> sorted(Set<T> in) { return new TreeSet<T>(in); }
+
/** Tests that all records get PASS set as their filter when extreme values are used for filtering. */
@Test public void testNoFiltering() throws Exception {
final File out = testFiltering(INPUT, ".vcf.gz", 0, 0, 0, Double.MAX_VALUE);
@@ -58,7 +62,7 @@ public class TestFilterVcf {
final Set<String> fails = CollectionUtil.makeSet("tf2", "rs28566954", "rs28548431");
final File out = testFiltering(INPUT, ".vcf.gz", 0.4, 0, 0, Double.MAX_VALUE);
final ListMap<String,String> filters = slurpFilters(out);
- Assert.assertEquals(filters.keySet(), fails, "Failed sites did not match expected set of failed sites.");
+ Assert.assertEquals(sorted(filters.keySet()), sorted(fails), "Failed sites did not match expected set of failed sites.");
}
/** Tests that genotypes with DP < 18 are marked as failed, but not >= 18. */
@@ -66,7 +70,7 @@ public class TestFilterVcf {
final Set<String> fails = CollectionUtil.makeSet("rs71509448", "rs71628926", "rs13302979", "rs2710876");
final File out = testFiltering(INPUT, ".vcf.gz", 0, 18, 0, Double.MAX_VALUE);
final ListMap<String,String> filters = slurpFilters(out);
- Assert.assertEquals(filters.keySet(), fails, "Failed sites did not match expected set of failed sites.");
+ Assert.assertEquals(sorted(filters.keySet()), sorted(fails), "Failed sites did not match expected set of failed sites.");
}
/** Tests that genotypes with DP < 18 are marked as failed, but not >= 18. */
@@ -74,7 +78,7 @@ public class TestFilterVcf {
final Set<String> fails = CollectionUtil.makeSet("rs71509448", "rs71628926", "rs13302979", "rs2710876");
final File out = testFiltering(INPUT, ".vcf", 0, 18, 0, Double.MAX_VALUE);
final ListMap<String,String> filters = slurpFilters(out);
- Assert.assertEquals(filters.keySet(), fails, "Failed sites did not match expected set of failed sites.");
+ Assert.assertEquals(sorted(filters.keySet()), sorted(fails), "Failed sites did not match expected set of failed sites.");
}
/** Tests that genotypes with low GQ are filtered appropriately. */
@@ -94,7 +98,7 @@ public class TestFilterVcf {
{
final File out = testFiltering(INPUT, ".vcf.gz", 0, 0, 22, Double.MAX_VALUE);
final ListMap<String, String> filters = slurpFilters(out);
- Assert.assertEquals(filters.keySet(), fails, "Failed sites did not match expected set of failed sites.");
+ Assert.assertEquals(sorted(filters.keySet()), sorted(fails), "Failed sites did not match expected set of failed sites.");
}
}
@@ -103,7 +107,7 @@ public class TestFilterVcf {
final Set<String> fails = CollectionUtil.makeSet("rs13303033", "rs28548431", "rs2799066");
final File out = testFiltering(INPUT, ".vcf.gz", 0, 0, 0, 5.0d);
final ListMap<String,String> filters = slurpFilters(out);
- Assert.assertEquals(filters.keySet(), fails, "Failed sites did not match expected set of failed sites.");
+ Assert.assertEquals(sorted(filters.keySet()), sorted(fails), "Failed sites did not match expected set of failed sites.");
}
@Test public void testCombinedFiltering() throws Exception {
diff --git a/testdata/picard/analysis/TheoreticalSensitivity/Solexa332667_BaseQ.histo b/testdata/picard/analysis/TheoreticalSensitivity/Solexa332667_BaseQ.histo
new file mode 100644
index 0000000..9e66e96
--- /dev/null
+++ b/testdata/picard/analysis/TheoreticalSensitivity/Solexa332667_BaseQ.histo
@@ -0,0 +1,31 @@
+0.128729716
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0.013145387
+0.011969151
+0.008891578
+0.00357811
+0.005038972
+0.00957659
+0.021563097
+0.028143785
+0.054015738
+0.118509035
+0.212726541
+0.22215857
+0.131805004
+0.030148724
\ No newline at end of file
diff --git a/testdata/picard/analysis/TheoreticalSensitivity/Solexa332667_DepthDist.histo b/testdata/picard/analysis/TheoreticalSensitivity/Solexa332667_DepthDist.histo
new file mode 100644
index 0000000..264ee07
--- /dev/null
+++ b/testdata/picard/analysis/TheoreticalSensitivity/Solexa332667_DepthDist.histo
@@ -0,0 +1,501 @@
+0.002356597
+0.002640274
+0.0043077
+0.006385692
+0.008813331
+0.011347967
+0.013849727
+0.016188326
+0.018278904
+0.020077608
+0.02156548
+0.022769664
+0.023672605
+0.024313442
+0.024718161
+0.02490613
+0.024943139
+0.024829145
+0.024586913
+0.024231935
+0.023799478
+0.023306373
+0.022753084
+0.02216748
+0.021540106
+0.020898714
+0.020238736
+0.019569512
+0.018896692
+0.018232878
+0.017585959
+0.016935742
+0.016300313
+0.015694512
+0.015096179
+0.014500968
+0.013932179
+0.013379929
+0.012832538
+0.012315216
+0.01181144
+0.011332007
+0.010863665
+0.010419263
+0.009992089
+0.00958355
+0.009185056
+0.008803729
+0.008431886
+0.008076616
+0.007744843
+0.007418659
+0.007108604
+0.006813973
+0.006533209
+0.006265335
+0.006004816
+0.005759956
+0.005523625
+0.005291377
+0.005071465
+0.004862542
+0.004663565
+0.004472928
+0.004288363
+0.004110733
+0.003941365
+0.003779137
+0.003626283
+0.003479599
+0.003338644
+0.003205037
+0.003075364
+0.002946045
+0.002829273
+0.002715661
+0.002604348
+0.00249898
+0.002400247
+0.002306583
+0.002214805
+0.002127224
+0.002044846
+0.001961479
+0.001886052
+0.001811585
+0.001739836
+0.001671326
+0.001606725
+0.001546628
+0.001488002
+0.001429226
+0.001376025
+0.001323712
+0.001273998
+0.001223736
+0.001178873
+0.001135413
+0.001092118
+0.001051263
+0.001011255
+0.000973433
+0.000937819
+0.000904006
+0.000869498
+0.000836618
+0.000806609
+0.000776157
+0.000748288
+0.000722159
+0.000695742
+0.000671987
+0.000648738
+0.000624383
+0.000601933
+0.000581006
+0.000559767
+0.000539987
+0.000520752
+0.000502843
+0.00048467
+0.000468713
+0.000451395
+0.00043621
+0.000422185
+0.000407462
+0.000393738
+0.000379984
+0.000367142
+0.000354459
+0.00034352
+0.000331417
+0.000319844
+0.000309041
+0.000298988
+0.000289161
+0.000279341
+0.00027016
+0.000261663
+0.000252957
+0.000245015
+0.0002377
+0.000229615
+0.000222216
+0.000215118
+0.000208411
+0.00020212
+0.000195964
+0.00019026
+0.000183962
+0.000178106
+0.000172942
+0.000167399
+0.000162088
+0.000156634
+0.000151683
+0.000147019
+0.000142916
+0.000138027
+0.000134257
+0.000130533
+0.000125803
+0.000122447
+0.000119192
+0.000115369
+0.000112349
+0.000108777
+0.000105641
+0.000103043
+9.95547E-05
+9.65619E-05
+9.38056E-05
+9.13689E-05
+8.85E-05
+8.60669E-05
+8.41271E-05
+8.14831E-05
+7.9597E-05
+7.74795E-05
+7.50012E-05
+7.31928E-05
+7.13377E-05
+6.92747E-05
+6.72547E-05
+6.53498E-05
+6.36018E-05
+6.19117E-05
+6.04429E-05
+5.8858E-05
+5.73897E-05
+5.58521E-05
+5.46348E-05
+5.30256E-05
+5.14873E-05
+5.04558E-05
+4.89344E-05
+4.80134E-05
+4.68657E-05
+4.54454E-05
+4.43807E-05
+4.32072E-05
+4.23755E-05
+4.14093E-05
+4.0001E-05
+3.92637E-05
+3.81901E-05
+3.72476E-05
+3.63086E-05
+3.54529E-05
+3.44765E-05
+3.37056E-05
+3.29177E-05
+3.2079E-05
+3.10845E-05
+3.04704E-05
+2.9993E-05
+2.9143E-05
+2.84837E-05
+2.78381E-05
+2.73702E-05
+2.65541E-05
+2.57104E-05
+2.52178E-05
+2.4782E-05
+2.4294E-05
+2.36407E-05
+2.30799E-05
+2.27306E-05
+2.20272E-05
+2.16769E-05
+2.10691E-05
+2.08597E-05
+2.04324E-05
+2.00231E-05
+1.97734E-05
+1.91392E-05
+1.89651E-05
+1.86042E-05
+1.82178E-05
+1.76239E-05
+1.73007E-05
+1.69561E-05
+1.67244E-05
+1.63653E-05
+1.59101E-05
+1.56936E-05
+1.53694E-05
+1.52366E-05
+1.48323E-05
+1.43096E-05
+1.41987E-05
+1.38523E-05
+1.35988E-05
+1.34833E-05
+1.30645E-05
+1.28579E-05
+1.27021E-05
+1.23345E-05
+1.20456E-05
+1.18564E-05
+1.16974E-05
+1.1356E-05
+1.13019E-05
+1.1E-05
+1.07387E-05
+1.07959E-05
+1.05095E-05
+1.02619E-05
+1.0127E-05
+9.85511E-06
+9.62345E-06
+9.56518E-06
+9.2237E-06
+9.20286E-06
+9.08244E-06
+8.84548E-06
+8.73565E-06
+8.63642E-06
+8.33272E-06
+8.2236E-06
+8.10636E-06
+7.91778E-06
+7.85845E-06
+7.77052E-06
+7.61796E-06
+7.52367E-06
+7.45905E-06
+7.36405E-06
+7.22633E-06
+6.99926E-06
+7.02292E-06
+6.78278E-06
+6.7457E-06
+6.59986E-06
+6.39009E-06
+6.3071E-06
+6.19516E-06
+6.09769E-06
+5.98468E-06
+5.87945E-06
+5.80917E-06
+5.6206E-06
+5.59199E-06
+5.56586E-06
+5.4804E-06
+5.38894E-06
+5.3176E-06
+5.16469E-06
+5.18093E-06
+5.02661E-06
+5.02697E-06
+4.91149E-06
+4.91149E-06
+4.77341E-06
+4.63215E-06
+4.54952E-06
+4.54457E-06
+4.51138E-06
+4.38354E-06
+4.36836E-06
+4.30161E-06
+4.22039E-06
+4.23275E-06
+4.09714E-06
+4.06713E-06
+4.01239E-06
+3.89585E-06
+3.9068E-06
+3.85842E-06
+3.83511E-06
+3.73341E-06
+3.62817E-06
+3.57096E-06
+3.53459E-06
+3.54271E-06
+3.44736E-06
+3.45054E-06
+3.42194E-06
+3.31776E-06
+3.27115E-06
+3.25172E-06
+3.25843E-06
+3.12459E-06
+3.10976E-06
+3.11011E-06
+3.03384E-06
+3.03066E-06
+2.99499E-06
+2.97839E-06
+2.87386E-06
+2.85197E-06
+2.83078E-06
+2.83184E-06
+2.76898E-06
+2.72978E-06
+2.69553E-06
+2.66763E-06
+2.58641E-06
+2.62419E-06
+2.54085E-06
+2.47305E-06
+2.48823E-06
+2.47093E-06
+2.36887E-06
+2.35757E-06
+2.40595E-06
+2.33073E-06
+2.30213E-06
+2.27564E-06
+2.23397E-06
+2.21949E-06
+2.20678E-06
+2.16864E-06
+2.13191E-06
+2.12838E-06
+2.13191E-06
+2.03727E-06
+2.08106E-06
+2.00019E-06
+1.98501E-06
+1.96311E-06
+1.9783E-06
+1.95781E-06
+1.94793E-06
+1.84057E-06
+1.81409E-06
+1.83033E-06
+1.84587E-06
+1.84022E-06
+1.82962E-06
+1.76994E-06
+1.77736E-06
+1.76571E-06
+1.6979E-06
+1.67848E-06
+1.65553E-06
+1.62727E-06
+1.59832E-06
+1.68413E-06
+1.58984E-06
+1.57678E-06
+1.58066E-06
+1.55488E-06
+1.49485E-06
+1.52027E-06
+1.48708E-06
+1.51709E-06
+1.50615E-06
+1.43481E-06
+1.47119E-06
+1.4394E-06
+1.43728E-06
+1.39915E-06
+1.41822E-06
+1.41716E-06
+1.41857E-06
+1.37937E-06
+1.36701E-06
+1.34653E-06
+1.30627E-06
+1.34017E-06
+1.30521E-06
+1.29038E-06
+1.29073E-06
+1.28685E-06
+1.29921E-06
+1.27661E-06
+1.25718E-06
+1.23423E-06
+1.21587E-06
+1.1975E-06
+1.22116E-06
+1.18055E-06
+1.1809E-06
+1.16572E-06
+1.17631E-06
+1.14842E-06
+1.13005E-06
+1.14559E-06
+1.147E-06
+1.12793E-06
+1.1018E-06
+1.10569E-06
+1.10992E-06
+1.08626E-06
+1.06013E-06
+1.10321E-06
+1.0566E-06
+1.03258E-06
+1.03612E-06
+1.02588E-06
+1.01846E-06
+1.04177E-06
+1.01281E-06
+1.0121E-06
+1.00327E-06
+9.85617E-07
+9.72198E-07
+9.25583E-07
+9.25936E-07
+9.31587E-07
+9.18873E-07
+9.18167E-07
+8.91328E-07
+8.90975E-07
+8.83206E-07
+8.9486E-07
+8.83206E-07
+8.6449E-07
+8.83912E-07
+8.68021E-07
+8.28116E-07
+8.27763E-07
+8.50364E-07
+8.30588E-07
+8.13637E-07
+8.06928E-07
+8.2176E-07
+7.93861E-07
+8.30235E-07
+7.82208E-07
+7.64551E-07
+7.87152E-07
+8.04103E-07
+7.476E-07
+7.33121E-07
+7.60313E-07
+7.45834E-07
+7.44775E-07
+7.08754E-07
+7.15817E-07
+6.97807E-07
+7.06282E-07
+7.07695E-07
+6.89685E-07
+6.77325E-07
+7.13698E-07
+7.05223E-07
+7.36299E-07
+6.76972E-07
+0.000225321
\ No newline at end of file
diff --git a/testdata/picard/analysis/TheoreticalSensitivity/test_25103070136.targeted_pcr_metrics b/testdata/picard/analysis/TheoreticalSensitivity/test_25103070136.targeted_pcr_metrics
new file mode 100644
index 0000000..27650ee
--- /dev/null
+++ b/testdata/picard/analysis/TheoreticalSensitivity/test_25103070136.targeted_pcr_metrics
@@ -0,0 +1,1013 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.analysis.directed.CollectTargetedPcrMetrics
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Thu Dec 03 13:56:55 EST 2015
+
+## METRICS CLASS picard.analysis.directed.TargetedPcrMetrics
+CUSTOM_AMPLICON_SET GENOME_SIZE AMPLICON_TERRITORY TARGET_TERRITORY TOTAL_READS PF_READS PF_BASES PF_UNIQUE_READS PCT_PF_READS PCT_PF_UQ_READS PF_UQ_READS_ALIGNED PF_SELECTED_PAIRS PF_SELECTED_UNIQUE_PAIRS PCT_PF_UQ_READS_ALIGNED PF_UQ_BASES_ALIGNED ON_AMPLICON_BASES NEAR_AMPLICON_BASES OFF_AMPLICON_BASES ON_TARGET_BASES ON_TARGET_FROM_PAIR_BASES PCT_AMPLIFIED_BASES PCT_OFF_AMPLICON ON_AMPLICON_VS_SELECTED MEAN_AMPLICON_COVERAGE MEAN_TARGET_COVERAGE FOLD_ENRICHMENT ZERO_CVG_TARGETS_PCT F [...]
+MLNM_Jun_2013_REF15026592 3101976562 204964 326478 55111504 50241790 7536268500 50241790 0.911639 0.911639 49571880 24325502 24325502 0.986666 7213664307 4916562864 2198833012 98268431 7209267910 7171188501 0.986377 0.013623 0.690975 23987.445912 22676.144098 10314.937296 0.042895 3.784403 0.957097 0.952125 0.947843 0.94547 0.321379 15.943736 0.95621 14
+
+## HISTOGRAM java.lang.Integer
+coverage count baseq_count
+0 13516 0
+1 491 264525
+2 204 86580277
+3 312 277404
+4 131 480854
+5 502 4328106
+6 50 1959765
+7 150 1742175
+8 187 1418798
+9 87 2251156
+10 147 3362760
+11 187 2925281
+12 74 2551169
+13 250 3187872
+14 94 3456967
+15 201 3419962
+16 142 2911932
+17 241 3644493
+18 62 5491472
+19 0 5118530
+20 55 6309890
+21 135 7594204
+22 45 12370867
+23 55 16359799
+24 2 21236142
+25 8 44141627
+26 172 81854030
+27 72 137262614
+28 163 137789873
+29 68 206519451
+30 176 258452231
+31 11 350597877
+32 111 451804793
+33 22 590970788
+34 101 770523153
+35 165 825341429
+36 93 860482759
+37 64 890899318
+38 44 747917596
+39 24 365848699
+40 71 228284150
+41 61 55956002
+42 61 5377120
+43 198 0
+44 62 0
+45 37 0
+46 82 0
+47 123 0
+48 64 0
+49 283 0
+50 160 0
+51 12 0
+52 114 0
+53 71 0
+54 14 0
+55 23 0
+56 19 0
+57 30 0
+58 113 0
+59 83 0
+60 5 0
+61 110 0
+62 80 0
+63 15 0
+64 74 0
+65 48 0
+66 40 0
+67 37 0
+68 115 0
+69 15 0
+70 33 0
+71 15 0
+72 14 0
+73 22 0
+74 23 0
+75 15 0
+76 43 0
+77 73 0
+78 150 0
+79 24 0
+80 61 0
+81 1 0
+82 10 0
+83 21 0
+84 19 0
+85 109 0
+86 98 0
+87 91 0
+88 39 0
+89 63 0
+90 53 0
+91 142 0
+92 61 0
+93 43 0
+94 83 0
+95 208 0
+96 24 0
+97 35 0
+98 19 0
+99 66 0
+100 12 0
+101 29 0
+102 51 0
+103 20 0
+104 74 0
+105 8 0
+106 50 0
+107 19 0
+108 26 0
+109 17 0
+110 22 0
+111 29 0
+112 79 0
+113 83 0
+114 3 0
+115 53 0
+116 6 0
+117 6 0
+118 37 0
+119 4 0
+120 19 0
+121 29 0
+122 45 0
+123 7 0
+124 9 0
+125 9 0
+126 28 0
+127 52 0
+128 24 0
+129 90 0
+130 83 0
+131 60 0
+132 26 0
+133 7 0
+134 32 0
+135 46 0
+136 41 0
+137 90 0
+138 54 0
+139 93 0
+140 29 0
+141 15 0
+142 25 0
+143 11 0
+144 25 0
+145 19 0
+146 6 0
+147 16 0
+148 16 0
+149 20 0
+150 52 0
+151 62 0
+152 41 0
+153 101 0
+154 56 0
+155 21 0
+156 33 0
+157 40 0
+158 29 0
+159 37 0
+160 55 0
+161 104 0
+162 48 0
+163 42 0
+164 88 0
+165 21 0
+166 44 0
+167 21 0
+168 36 0
+169 35 0
+170 39 0
+171 27 0
+172 5 0
+173 4 0
+174 16 0
+175 11 0
+176 9 0
+177 31 0
+178 23 0
+179 12 0
+180 20 0
+181 3 0
+182 52 0
+183 11 0
+184 2 0
+185 9 0
+186 9 0
+187 5 0
+188 19 0
+189 18 0
+190 17 0
+191 23 0
+192 39 0
+193 5 0
+194 12 0
+195 11 0
+196 13 0
+197 10 0
+198 2 0
+199 6 0
+200 6 0
+201 9 0
+202 23 0
+203 27 0
+204 8 0
+205 6 0
+206 9 0
+207 6 0
+208 1 0
+209 7 0
+210 16 0
+211 3 0
+212 8 0
+213 8 0
+214 10 0
+215 5 0
+216 8 0
+217 2 0
+218 1 0
+219 4 0
+220 2 0
+221 0 0
+222 0 0
+223 0 0
+224 1 0
+225 1 0
+226 0 0
+227 2 0
+228 1 0
+229 6 0
+230 0 0
+231 3 0
+232 27 0
+233 4 0
+234 27 0
+235 60 0
+236 46 0
+237 13 0
+238 71 0
+239 13 0
+240 75 0
+241 13 0
+242 8 0
+243 4 0
+244 5 0
+245 16 0
+246 38 0
+247 43 0
+248 4 0
+249 5 0
+250 0 0
+251 1 0
+252 10 0
+253 24 0
+254 3 0
+255 6 0
+256 9 0
+257 1 0
+258 3 0
+259 10 0
+260 6 0
+261 6 0
+262 2 0
+263 10 0
+264 14 0
+265 30 0
+266 21 0
+267 20 0
+268 2 0
+269 7 0
+270 2 0
+271 8 0
+272 0 0
+273 11 0
+274 20 0
+275 6 0
+276 14 0
+277 28 0
+278 10 0
+279 25 0
+280 12 0
+281 98 0
+282 7 0
+283 10 0
+284 49 0
+285 44 0
+286 33 0
+287 90 0
+288 12 0
+289 6 0
+290 9 0
+291 14 0
+292 1 0
+293 1 0
+294 35 0
+295 75 0
+296 16 0
+297 4 0
+298 11 0
+299 14 0
+300 4 0
+301 11 0
+302 6 0
+303 5 0
+304 9 0
+305 0 0
+306 3 0
+307 14 0
+308 9 0
+309 5 0
+310 10 0
+311 2 0
+312 14 0
+313 47 0
+314 5 0
+315 7 0
+316 4 0
+317 4 0
+318 2 0
+319 12 0
+320 37 0
+321 22 0
+322 19 0
+323 2 0
+324 5 0
+325 26 0
+326 7 0
+327 7 0
+328 34 0
+329 39 0
+330 4 0
+331 2 0
+332 0 0
+333 1 0
+334 1 0
+335 3 0
+336 0 0
+337 1 0
+338 39 0
+339 12 0
+340 26 0
+341 17 0
+342 13 0
+343 1 0
+344 3 0
+345 8 0
+346 8 0
+347 13 0
+348 5 0
+349 36 0
+350 3 0
+351 1 0
+352 3 0
+353 4 0
+354 1 0
+355 2 0
+356 5 0
+357 16 0
+358 3 0
+359 0 0
+360 0 0
+361 6 0
+362 5 0
+363 7 0
+364 13 0
+365 9 0
+366 12 0
+367 5 0
+368 22 0
+369 9 0
+370 21 0
+371 4 0
+372 9 0
+373 13 0
+374 17 0
+375 15 0
+376 15 0
+377 18 0
+378 37 0
+379 14 0
+380 63 0
+381 39 0
+382 3 0
+383 20 0
+384 9 0
+385 9 0
+386 5 0
+387 43 0
+388 24 0
+389 28 0
+390 15 0
+391 16 0
+392 4 0
+393 16 0
+394 29 0
+395 42 0
+396 26 0
+397 26 0
+398 48 0
+399 12 0
+400 8 0
+401 16 0
+402 29 0
+403 25 0
+404 23 0
+405 55 0
+406 16 0
+407 12 0
+408 13 0
+409 28 0
+410 33 0
+411 9 0
+412 28 0
+413 15 0
+414 17 0
+415 53 0
+416 76 0
+417 11 0
+418 15 0
+419 32 0
+420 13 0
+421 24 0
+422 13 0
+423 8 0
+424 6 0
+425 3 0
+426 19 0
+427 3 0
+428 1 0
+429 5 0
+430 5 0
+431 17 0
+432 5 0
+433 41 0
+434 15 0
+435 25 0
+436 3 0
+437 12 0
+438 24 0
+439 27 0
+440 5 0
+441 6 0
+442 36 0
+443 8 0
+444 39 0
+445 15 0
+446 6 0
+447 49 0
+448 42 0
+449 19 0
+450 25 0
+451 33 0
+452 15 0
+453 3 0
+454 7 0
+455 17 0
+456 20 0
+457 6 0
+458 10 0
+459 17 0
+460 34 0
+461 15 0
+462 8 0
+463 13 0
+464 68 0
+465 6 0
+466 18 0
+467 14 0
+468 9 0
+469 3 0
+470 8 0
+471 4 0
+472 28 0
+473 18 0
+474 8 0
+475 80 0
+476 1 0
+477 5 0
+478 7 0
+479 35 0
+480 10 0
+481 6 0
+482 3 0
+483 15 0
+484 21 0
+485 6 0
+486 6 0
+487 2 0
+488 3 0
+489 4 0
+490 5 0
+491 2 0
+492 3 0
+493 3 0
+494 7 0
+495 7 0
+496 7 0
+497 1 0
+498 9 0
+499 6 0
+500 6 0
+501 6 0
+502 19 0
+503 9 0
+504 27 0
+505 28 0
+506 34 0
+507 49 0
+508 15 0
+509 11 0
+510 62 0
+511 34 0
+512 4 0
+513 5 0
+514 5 0
+515 6 0
+516 2 0
+517 2 0
+518 5 0
+519 2 0
+520 3 0
+521 4 0
+522 5 0
+523 20 0
+524 4 0
+525 10 0
+526 8 0
+527 8 0
+528 13 0
+529 9 0
+530 16 0
+531 11 0
+532 6 0
+533 33 0
+534 2 0
+535 7 0
+536 10 0
+537 5 0
+538 2 0
+539 3 0
+540 3 0
+541 8 0
+542 8 0
+543 17 0
+544 33 0
+545 24 0
+546 16 0
+547 11 0
+548 0 0
+549 2 0
+550 0 0
+551 3 0
+552 2 0
+553 4 0
+554 0 0
+555 16 0
+556 3 0
+557 3 0
+558 1 0
+559 5 0
+560 11 0
+561 8 0
+562 22 0
+563 14 0
+564 6 0
+565 13 0
+566 4 0
+567 11 0
+568 9 0
+569 8 0
+570 10 0
+571 24 0
+572 13 0
+573 16 0
+574 9 0
+575 7 0
+576 10 0
+577 12 0
+578 10 0
+579 8 0
+580 17 0
+581 36 0
+582 13 0
+583 39 0
+584 2 0
+585 2 0
+586 3 0
+587 4 0
+588 12 0
+589 8 0
+590 10 0
+591 15 0
+592 11 0
+593 18 0
+594 13 0
+595 10 0
+596 18 0
+597 18 0
+598 19 0
+599 14 0
+600 12 0
+601 30 0
+602 4 0
+603 5 0
+604 37 0
+605 4 0
+606 2 0
+607 7 0
+608 5 0
+609 17 0
+610 17 0
+611 25 0
+612 21 0
+613 20 0
+614 16 0
+615 12 0
+616 28 0
+617 6 0
+618 15 0
+619 16 0
+620 17 0
+621 14 0
+622 10 0
+623 55 0
+624 10 0
+625 6 0
+626 6 0
+627 12 0
+628 29 0
+629 26 0
+630 8 0
+631 19 0
+632 4 0
+633 3 0
+634 2 0
+635 32 0
+636 41 0
+637 15 0
+638 11 0
+639 14 0
+640 20 0
+641 6 0
+642 9 0
+643 6 0
+644 11 0
+645 5 0
+646 38 0
+647 12 0
+648 8 0
+649 3 0
+650 2 0
+651 43 0
+652 82 0
+653 2 0
+654 4 0
+655 5 0
+656 2 0
+657 7 0
+658 2 0
+659 3 0
+660 34 0
+661 3 0
+662 14 0
+663 11 0
+664 9 0
+665 19 0
+666 10 0
+667 22 0
+668 51 0
+669 15 0
+670 5 0
+671 7 0
+672 4 0
+673 3 0
+674 10 0
+675 11 0
+676 5 0
+677 1 0
+678 2 0
+679 4 0
+680 6 0
+681 7 0
+682 15 0
+683 5 0
+684 2 0
+685 27 0
+686 11 0
+687 8 0
+688 4 0
+689 4 0
+690 15 0
+691 5 0
+692 3 0
+693 3 0
+694 10 0
+695 10 0
+696 1 0
+697 5 0
+698 3 0
+699 1 0
+700 5 0
+701 3 0
+702 10 0
+703 3 0
+704 3 0
+705 3 0
+706 3 0
+707 6 0
+708 11 0
+709 11 0
+710 20 0
+711 32 0
+712 10 0
+713 9 0
+714 2 0
+715 2 0
+716 5 0
+717 5 0
+718 3 0
+719 5 0
+720 9 0
+721 4 0
+722 9 0
+723 13 0
+724 10 0
+725 15 0
+726 29 0
+727 39 0
+728 12 0
+729 36 0
+730 27 0
+731 49 0
+732 12 0
+733 4 0
+734 11 0
+735 19 0
+736 18 0
+737 4 0
+738 9 0
+739 35 0
+740 23 0
+741 7 0
+742 7 0
+743 8 0
+744 4 0
+745 5 0
+746 40 0
+747 3 0
+748 3 0
+749 13 0
+750 5 0
+751 3 0
+752 14 0
+753 7 0
+754 2 0
+755 4 0
+756 2 0
+757 5 0
+758 0 0
+759 14 0
+760 7 0
+761 6 0
+762 4 0
+763 5 0
+764 10 0
+765 12 0
+766 10 0
+767 4 0
+768 22 0
+769 32 0
+770 13 0
+771 15 0
+772 17 0
+773 6 0
+774 6 0
+775 7 0
+776 9 0
+777 18 0
+778 7 0
+779 22 0
+780 27 0
+781 15 0
+782 30 0
+783 7 0
+784 9 0
+785 20 0
+786 3 0
+787 4 0
+788 11 0
+789 4 0
+790 9 0
+791 9 0
+792 7 0
+793 2 0
+794 7 0
+795 2 0
+796 20 0
+797 10 0
+798 26 0
+799 24 0
+800 8 0
+801 9 0
+802 13 0
+803 27 0
+804 5 0
+805 13 0
+806 15 0
+807 6 0
+808 6 0
+809 2 0
+810 37 0
+811 2 0
+812 2 0
+813 4 0
+814 21 0
+815 6 0
+816 10 0
+817 5 0
+818 10 0
+819 9 0
+820 9 0
+821 11 0
+822 16 0
+823 14 0
+824 29 0
+825 17 0
+826 2 0
+827 3 0
+828 6 0
+829 2 0
+830 4 0
+831 1 0
+832 1 0
+833 5 0
+834 1 0
+835 1 0
+836 5 0
+837 1 0
+838 0 0
+839 3 0
+840 3 0
+841 6 0
+842 10 0
+843 5 0
+844 19 0
+845 5 0
+846 2 0
+847 20 0
+848 4 0
+849 6 0
+850 5 0
+851 3 0
+852 7 0
+853 9 0
+854 10 0
+855 5 0
+856 8 0
+857 6 0
+858 8 0
+859 8 0
+860 3 0
+861 8 0
+862 7 0
+863 3 0
+864 4 0
+865 5 0
+866 4 0
+867 5 0
+868 38 0
+869 1 0
+870 2 0
+871 3 0
+872 1 0
+873 0 0
+874 5 0
+875 4 0
+876 1 0
+877 3 0
+878 4 0
+879 6 0
+880 5 0
+881 10 0
+882 5 0
+883 6 0
+884 5 0
+885 9 0
+886 17 0
+887 14 0
+888 7 0
+889 3 0
+890 5 0
+891 11 0
+892 6 0
+893 6 0
+894 91 0
+895 13 0
+896 12 0
+897 9 0
+898 4 0
+899 7 0
+900 4 0
+901 9 0
+902 5 0
+903 5 0
+904 10 0
+905 33 0
+906 8 0
+907 8 0
+908 12 0
+909 7 0
+910 7 0
+911 11 0
+912 21 0
+913 4 0
+914 6 0
+915 10 0
+916 14 0
+917 6 0
+918 6 0
+919 7 0
+920 7 0
+921 32 0
+922 5 0
+923 5 0
+924 6 0
+925 1 0
+926 8 0
+927 3 0
+928 5 0
+929 3 0
+930 3 0
+931 8 0
+932 3 0
+933 36 0
+934 4 0
+935 4 0
+936 12 0
+937 0 0
+938 1 0
+939 7 0
+940 9 0
+941 21 0
+942 29 0
+943 13 0
+944 13 0
+945 11 0
+946 33 0
+947 4 0
+948 6 0
+949 48 0
+950 23 0
+951 6 0
+952 3 0
+953 4 0
+954 5 0
+955 3 0
+956 8 0
+957 6 0
+958 9 0
+959 5 0
+960 9 0
+961 6 0
+962 3 0
+963 7 0
+964 6 0
+965 12 0
+966 8 0
+967 5 0
+968 17 0
+969 30 0
+970 5 0
+971 9 0
+972 3 0
+973 6 0
+974 1 0
+975 12 0
+976 12 0
+977 1 0
+978 4 0
+979 4 0
+980 2 0
+981 6 0
+982 5 0
+983 2 0
+984 7 0
+985 2 0
+986 2 0
+987 2 0
+988 1 0
+989 2 0
+990 3 0
+991 6 0
+992 4 0
+993 3 0
+994 2 0
+995 3 0
+996 8 0
+997 5 0
+998 9 0
+999 13 0
+1000 291079 0
+
diff --git a/testdata/picard/analysis/TheoreticalSensitivity/test_NexPond-359781.hsMetrics b/testdata/picard/analysis/TheoreticalSensitivity/test_NexPond-359781.hsMetrics
new file mode 100644
index 0000000..1dc2d25
--- /dev/null
+++ b/testdata/picard/analysis/TheoreticalSensitivity/test_NexPond-359781.hsMetrics
@@ -0,0 +1,1013 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.analysis.directed.CalculateHsMetrics
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Thu Dec 03 13:58:35 EST 2015
+
+## METRICS CLASS picard.analysis.directed.HsMetrics
+BAIT_SET GENOME_SIZE BAIT_TERRITORY TARGET_TERRITORY BAIT_DESIGN_EFFICIENCY TOTAL_READS PF_READS PF_UNIQUE_READS PCT_PF_READS PCT_PF_UQ_READS PF_UQ_READS_ALIGNED PCT_PF_UQ_READS_ALIGNED PF_UQ_BASES_ALIGNED ON_BAIT_BASES NEAR_BAIT_BASES OFF_BAIT_BASES ON_TARGET_BASES PCT_SELECTED_BASES PCT_OFF_BAIT ON_BAIT_VS_SELECTED MEAN_BAIT_COVERAGE MEAN_TARGET_COVERAGE PCT_USABLE_BASES_ON_BAIT PCT_USABLE_BASES_ON_TARGET FOLD_ENRICHMENT ZERO_CVG_TARGETS_PCT FOLD_80_BASE_PENALTY PCT_TARGET_BASES_2X PCT [...]
+whole_exome_illumina_coding_v1 3101976562 28665628 37693246 1.314928 174464766 174464766 103793651 1 0.594926 93835960 0.904063 7061118624 3692561508 2442094561 926462555 4278932271 0.868794 0.131206 0.601918 128.814952 115.622534 0.278488 0.322711 56.588904 0.021594 2.408803 0.972666 0.953571 0.923559 0.882375 0.831861 0.775202 0.481278 62814838 4.298481 4.710999 5.240267 5.987469 7.164313 -1 7.510191 1.918392 0.968006 15
+
+## HISTOGRAM java.lang.Integer
+coverage count baseq_count
+0 919673 0
+1 110620 5018
+2 101178 2432605
+3 85083 50038
+4 87005 188212
+5 83331 386520
+6 88695 714864
+7 89455 1228643
+8 92059 2948220
+9 92979 5090201
+10 97157 4673584
+11 99047 3256965
+12 103143 4433519
+13 106798 3909398
+14 111385 2999384
+15 113867 2735852
+16 118549 2656600
+17 122652 3699582
+18 126326 4832306
+19 132310 5311969
+20 137384 6284419
+21 140943 8054146
+22 144532 10257202
+23 149242 12919039
+24 154010 17023983
+25 157233 21393773
+26 161570 28294058
+27 165843 52362120
+28 169009 120902803
+29 172582 261245428
+30 176174 450851980
+31 180601 574096440
+32 182797 711301560
+33 186345 661668517
+34 189570 620040737
+35 192993 482862968
+36 194724 187819618
+37 198505 0
+38 200020 0
+39 202305 0
+40 204973 0
+41 207156 0
+42 209176 0
+43 210954 0
+44 212871 0
+45 214280 0
+46 216460 0
+47 218927 0
+48 219788 0
+49 221098 0
+50 223681 0
+51 225261 0
+52 223855 0
+53 226153 0
+54 226779 0
+55 226340 0
+56 227597 0
+57 227809 0
+58 229952 0
+59 230104 0
+60 229537 0
+61 229208 0
+62 229779 0
+63 231647 0
+64 230097 0
+65 230839 0
+66 231354 0
+67 230111 0
+68 231207 0
+69 229502 0
+70 232115 0
+71 230375 0
+72 230232 0
+73 228423 0
+74 227378 0
+75 227891 0
+76 226324 0
+77 224671 0
+78 223957 0
+79 223304 0
+80 221950 0
+81 221654 0
+82 220093 0
+83 219246 0
+84 218621 0
+85 217087 0
+86 216262 0
+87 214950 0
+88 213821 0
+89 213004 0
+90 211915 0
+91 210523 0
+92 208795 0
+93 207208 0
+94 205784 0
+95 204720 0
+96 203630 0
+97 203424 0
+98 201312 0
+99 199469 0
+100 199037 0
+101 197398 0
+102 195701 0
+103 194353 0
+104 192460 0
+105 191096 0
+106 188788 0
+107 188762 0
+108 186235 0
+109 185718 0
+110 184151 0
+111 182639 0
+112 180819 0
+113 179253 0
+114 177219 0
+115 175953 0
+116 174671 0
+117 172800 0
+118 171730 0
+119 170181 0
+120 168354 0
+121 166976 0
+122 165824 0
+123 164705 0
+124 162479 0
+125 160810 0
+126 159510 0
+127 158059 0
+128 156810 0
+129 154541 0
+130 153962 0
+131 152343 0
+132 151255 0
+133 149112 0
+134 148782 0
+135 147180 0
+136 145018 0
+137 142886 0
+138 141770 0
+139 140395 0
+140 139631 0
+141 138042 0
+142 136018 0
+143 135028 0
+144 132516 0
+145 132082 0
+146 130855 0
+147 128269 0
+148 128055 0
+149 126204 0
+150 125068 0
+151 123596 0
+152 122277 0
+153 121261 0
+154 119256 0
+155 117465 0
+156 116089 0
+157 115333 0
+158 114244 0
+159 113307 0
+160 111401 0
+161 109900 0
+162 109120 0
+163 107615 0
+164 105289 0
+165 104453 0
+166 102626 0
+167 102159 0
+168 100607 0
+169 99254 0
+170 98935 0
+171 97931 0
+172 97129 0
+173 95481 0
+174 94100 0
+175 93205 0
+176 92166 0
+177 90676 0
+178 89589 0
+179 88772 0
+180 87885 0
+181 86286 0
+182 85618 0
+183 84382 0
+184 83592 0
+185 82665 0
+186 81804 0
+187 80782 0
+188 79764 0
+189 78536 0
+190 77804 0
+191 77069 0
+192 76604 0
+193 75282 0
+194 74004 0
+195 73011 0
+196 72622 0
+197 71735 0
+198 70447 0
+199 69954 0
+200 69110 0
+201 68095 0
+202 67640 0
+203 67045 0
+204 66326 0
+205 65506 0
+206 64125 0
+207 63645 0
+208 62667 0
+209 61828 0
+210 60889 0
+211 60500 0
+212 60046 0
+213 59125 0
+214 58689 0
+215 57551 0
+216 57316 0
+217 56168 0
+218 55879 0
+219 54828 0
+220 54303 0
+221 53560 0
+222 52646 0
+223 52035 0
+224 51746 0
+225 51092 0
+226 50573 0
+227 49834 0
+228 49265 0
+229 48629 0
+230 47571 0
+231 47399 0
+232 47209 0
+233 46154 0
+234 45376 0
+235 45065 0
+236 44452 0
+237 43680 0
+238 43347 0
+239 42745 0
+240 42180 0
+241 41488 0
+242 40803 0
+243 41227 0
+244 40206 0
+245 39936 0
+246 39415 0
+247 38640 0
+248 38386 0
+249 37685 0
+250 37282 0
+251 36878 0
+252 36064 0
+253 35587 0
+254 35527 0
+255 34917 0
+256 34627 0
+257 34127 0
+258 33753 0
+259 33492 0
+260 32484 0
+261 32425 0
+262 31736 0
+263 31427 0
+264 31183 0
+265 30498 0
+266 30436 0
+267 29833 0
+268 29584 0
+269 29280 0
+270 28974 0
+271 28333 0
+272 27812 0
+273 27367 0
+274 27321 0
+275 26606 0
+276 26453 0
+277 26463 0
+278 25900 0
+279 25789 0
+280 25479 0
+281 24897 0
+282 24621 0
+283 24283 0
+284 24450 0
+285 23761 0
+286 23668 0
+287 23202 0
+288 22848 0
+289 22319 0
+290 22198 0
+291 21864 0
+292 21719 0
+293 21434 0
+294 20907 0
+295 20308 0
+296 20348 0
+297 20208 0
+298 20044 0
+299 19503 0
+300 19645 0
+301 19410 0
+302 18916 0
+303 18372 0
+304 18337 0
+305 18176 0
+306 17845 0
+307 17705 0
+308 17644 0
+309 17358 0
+310 17361 0
+311 16753 0
+312 16545 0
+313 16462 0
+314 15984 0
+315 15745 0
+316 15468 0
+317 15340 0
+318 15144 0
+319 14956 0
+320 14632 0
+321 14623 0
+322 14524 0
+323 14079 0
+324 14089 0
+325 13785 0
+326 13543 0
+327 13474 0
+328 13182 0
+329 13037 0
+330 12926 0
+331 12718 0
+332 12345 0
+333 12264 0
+334 12138 0
+335 12011 0
+336 11676 0
+337 11586 0
+338 11524 0
+339 11175 0
+340 11113 0
+341 11193 0
+342 10938 0
+343 10749 0
+344 10528 0
+345 10535 0
+346 10512 0
+347 10130 0
+348 9842 0
+349 9938 0
+350 9685 0
+351 9575 0
+352 9506 0
+353 9148 0
+354 9374 0
+355 9055 0
+356 9047 0
+357 8794 0
+358 8735 0
+359 8515 0
+360 8344 0
+361 8039 0
+362 7988 0
+363 7823 0
+364 7715 0
+365 7486 0
+366 7518 0
+367 7436 0
+368 7250 0
+369 7341 0
+370 6971 0
+371 6994 0
+372 6901 0
+373 6714 0
+374 6502 0
+375 6592 0
+376 6526 0
+377 6321 0
+378 6233 0
+379 6111 0
+380 5953 0
+381 5912 0
+382 5798 0
+383 5778 0
+384 5663 0
+385 5586 0
+386 5522 0
+387 5370 0
+388 5219 0
+389 5425 0
+390 5250 0
+391 5192 0
+392 4989 0
+393 4944 0
+394 4852 0
+395 4617 0
+396 4739 0
+397 4492 0
+398 4518 0
+399 4518 0
+400 4319 0
+401 4184 0
+402 4030 0
+403 4145 0
+404 4080 0
+405 3973 0
+406 3879 0
+407 3842 0
+408 3748 0
+409 3718 0
+410 3661 0
+411 3609 0
+412 3454 0
+413 3450 0
+414 3457 0
+415 3319 0
+416 3379 0
+417 3320 0
+418 3277 0
+419 3212 0
+420 3185 0
+421 3154 0
+422 3012 0
+423 2981 0
+424 2934 0
+425 2924 0
+426 2796 0
+427 2713 0
+428 2731 0
+429 2554 0
+430 2589 0
+431 2565 0
+432 2531 0
+433 2402 0
+434 2349 0
+435 2433 0
+436 2373 0
+437 2276 0
+438 2316 0
+439 2226 0
+440 2153 0
+441 2212 0
+442 2226 0
+443 2067 0
+444 2053 0
+445 2065 0
+446 2011 0
+447 1918 0
+448 1908 0
+449 1831 0
+450 1805 0
+451 1723 0
+452 1783 0
+453 1766 0
+454 1696 0
+455 1648 0
+456 1624 0
+457 1572 0
+458 1604 0
+459 1540 0
+460 1588 0
+461 1526 0
+462 1520 0
+463 1449 0
+464 1457 0
+465 1476 0
+466 1417 0
+467 1341 0
+468 1250 0
+469 1288 0
+470 1211 0
+471 1212 0
+472 1244 0
+473 1233 0
+474 1190 0
+475 1149 0
+476 1043 0
+477 1097 0
+478 1082 0
+479 1069 0
+480 1046 0
+481 958 0
+482 958 0
+483 956 0
+484 899 0
+485 955 0
+486 876 0
+487 889 0
+488 878 0
+489 894 0
+490 800 0
+491 872 0
+492 816 0
+493 792 0
+494 754 0
+495 722 0
+496 712 0
+497 720 0
+498 725 0
+499 717 0
+500 680 0
+501 701 0
+502 689 0
+503 649 0
+504 636 0
+505 643 0
+506 605 0
+507 572 0
+508 635 0
+509 623 0
+510 646 0
+511 561 0
+512 541 0
+513 538 0
+514 578 0
+515 561 0
+516 511 0
+517 545 0
+518 548 0
+519 506 0
+520 482 0
+521 438 0
+522 414 0
+523 470 0
+524 455 0
+525 441 0
+526 446 0
+527 397 0
+528 434 0
+529 437 0
+530 410 0
+531 418 0
+532 414 0
+533 394 0
+534 415 0
+535 405 0
+536 353 0
+537 355 0
+538 347 0
+539 347 0
+540 305 0
+541 346 0
+542 302 0
+543 317 0
+544 316 0
+545 288 0
+546 305 0
+547 306 0
+548 315 0
+549 301 0
+550 263 0
+551 268 0
+552 261 0
+553 211 0
+554 222 0
+555 243 0
+556 229 0
+557 226 0
+558 256 0
+559 216 0
+560 226 0
+561 211 0
+562 204 0
+563 202 0
+564 219 0
+565 172 0
+566 209 0
+567 161 0
+568 211 0
+569 180 0
+570 184 0
+571 167 0
+572 181 0
+573 172 0
+574 161 0
+575 167 0
+576 167 0
+577 169 0
+578 165 0
+579 136 0
+580 156 0
+581 144 0
+582 178 0
+583 149 0
+584 132 0
+585 132 0
+586 146 0
+587 154 0
+588 136 0
+589 141 0
+590 135 0
+591 138 0
+592 124 0
+593 142 0
+594 127 0
+595 130 0
+596 119 0
+597 128 0
+598 118 0
+599 105 0
+600 126 0
+601 89 0
+602 107 0
+603 110 0
+604 106 0
+605 119 0
+606 117 0
+607 100 0
+608 91 0
+609 95 0
+610 84 0
+611 94 0
+612 103 0
+613 102 0
+614 100 0
+615 85 0
+616 89 0
+617 96 0
+618 105 0
+619 97 0
+620 90 0
+621 106 0
+622 92 0
+623 74 0
+624 91 0
+625 82 0
+626 73 0
+627 104 0
+628 95 0
+629 84 0
+630 69 0
+631 80 0
+632 86 0
+633 81 0
+634 76 0
+635 83 0
+636 76 0
+637 100 0
+638 69 0
+639 76 0
+640 79 0
+641 80 0
+642 86 0
+643 80 0
+644 69 0
+645 86 0
+646 73 0
+647 84 0
+648 89 0
+649 84 0
+650 71 0
+651 64 0
+652 68 0
+653 69 0
+654 65 0
+655 73 0
+656 52 0
+657 82 0
+658 63 0
+659 59 0
+660 50 0
+661 63 0
+662 58 0
+663 69 0
+664 58 0
+665 73 0
+666 50 0
+667 41 0
+668 41 0
+669 41 0
+670 73 0
+671 59 0
+672 62 0
+673 45 0
+674 45 0
+675 47 0
+676 52 0
+677 44 0
+678 43 0
+679 45 0
+680 49 0
+681 45 0
+682 43 0
+683 62 0
+684 45 0
+685 42 0
+686 43 0
+687 43 0
+688 29 0
+689 31 0
+690 52 0
+691 43 0
+692 36 0
+693 47 0
+694 45 0
+695 44 0
+696 49 0
+697 43 0
+698 46 0
+699 46 0
+700 40 0
+701 53 0
+702 47 0
+703 53 0
+704 49 0
+705 41 0
+706 31 0
+707 43 0
+708 39 0
+709 40 0
+710 36 0
+711 38 0
+712 25 0
+713 41 0
+714 48 0
+715 32 0
+716 35 0
+717 34 0
+718 43 0
+719 37 0
+720 45 0
+721 23 0
+722 33 0
+723 54 0
+724 48 0
+725 44 0
+726 37 0
+727 38 0
+728 42 0
+729 36 0
+730 32 0
+731 35 0
+732 38 0
+733 35 0
+734 35 0
+735 28 0
+736 29 0
+737 34 0
+738 41 0
+739 35 0
+740 24 0
+741 31 0
+742 30 0
+743 21 0
+744 23 0
+745 26 0
+746 28 0
+747 23 0
+748 28 0
+749 21 0
+750 28 0
+751 22 0
+752 34 0
+753 23 0
+754 14 0
+755 26 0
+756 34 0
+757 25 0
+758 31 0
+759 25 0
+760 19 0
+761 26 0
+762 23 0
+763 17 0
+764 25 0
+765 28 0
+766 21 0
+767 15 0
+768 21 0
+769 24 0
+770 25 0
+771 24 0
+772 18 0
+773 21 0
+774 20 0
+775 19 0
+776 25 0
+777 16 0
+778 22 0
+779 25 0
+780 19 0
+781 23 0
+782 17 0
+783 29 0
+784 22 0
+785 23 0
+786 30 0
+787 30 0
+788 18 0
+789 27 0
+790 36 0
+791 21 0
+792 21 0
+793 35 0
+794 26 0
+795 23 0
+796 30 0
+797 24 0
+798 31 0
+799 23 0
+800 23 0
+801 27 0
+802 25 0
+803 26 0
+804 24 0
+805 20 0
+806 13 0
+807 18 0
+808 17 0
+809 16 0
+810 18 0
+811 22 0
+812 18 0
+813 25 0
+814 28 0
+815 10 0
+816 23 0
+817 25 0
+818 13 0
+819 19 0
+820 18 0
+821 16 0
+822 13 0
+823 19 0
+824 17 0
+825 13 0
+826 27 0
+827 23 0
+828 25 0
+829 19 0
+830 20 0
+831 20 0
+832 21 0
+833 19 0
+834 15 0
+835 14 0
+836 16 0
+837 17 0
+838 20 0
+839 9 0
+840 21 0
+841 12 0
+842 20 0
+843 12 0
+844 21 0
+845 12 0
+846 20 0
+847 18 0
+848 16 0
+849 19 0
+850 22 0
+851 14 0
+852 27 0
+853 20 0
+854 19 0
+855 22 0
+856 11 0
+857 7 0
+858 18 0
+859 13 0
+860 24 0
+861 11 0
+862 14 0
+863 13 0
+864 18 0
+865 18 0
+866 22 0
+867 25 0
+868 10 0
+869 14 0
+870 13 0
+871 17 0
+872 19 0
+873 15 0
+874 19 0
+875 16 0
+876 13 0
+877 18 0
+878 16 0
+879 15 0
+880 11 0
+881 12 0
+882 13 0
+883 17 0
+884 13 0
+885 13 0
+886 14 0
+887 16 0
+888 10 0
+889 13 0
+890 12 0
+891 12 0
+892 18 0
+893 14 0
+894 10 0
+895 13 0
+896 13 0
+897 12 0
+898 14 0
+899 9 0
+900 17 0
+901 7 0
+902 13 0
+903 6 0
+904 10 0
+905 8 0
+906 13 0
+907 20 0
+908 16 0
+909 10 0
+910 16 0
+911 16 0
+912 11 0
+913 12 0
+914 15 0
+915 14 0
+916 10 0
+917 16 0
+918 13 0
+919 13 0
+920 11 0
+921 7 0
+922 11 0
+923 8 0
+924 12 0
+925 18 0
+926 13 0
+927 9 0
+928 5 0
+929 11 0
+930 13 0
+931 13 0
+932 9 0
+933 14 0
+934 13 0
+935 10 0
+936 8 0
+937 13 0
+938 13 0
+939 6 0
+940 11 0
+941 8 0
+942 11 0
+943 13 0
+944 11 0
+945 14 0
+946 13 0
+947 15 0
+948 6 0
+949 17 0
+950 8 0
+951 10 0
+952 12 0
+953 11 0
+954 11 0
+955 13 0
+956 11 0
+957 14 0
+958 13 0
+959 11 0
+960 8 0
+961 9 0
+962 6 0
+963 17 0
+964 11 0
+965 10 0
+966 15 0
+967 14 0
+968 12 0
+969 10 0
+970 15 0
+971 5 0
+972 11 0
+973 17 0
+974 11 0
+975 10 0
+976 11 0
+977 6 0
+978 7 0
+979 8 0
+980 8 0
+981 9 0
+982 11 0
+983 9 0
+984 9 0
+985 15 0
+986 11 0
+987 12 0
+988 13 0
+989 5 0
+990 22 0
+991 13 0
+992 11 0
+993 7 0
+994 8 0
+995 6 0
+996 10 0
+997 4 0
+998 6 0
+999 12 0
+1000 3769 0
+
diff --git a/testdata/picard/analysis/TheoreticalSensitivity/test_Solexa-316269_sampled.wgs_metrics b/testdata/picard/analysis/TheoreticalSensitivity/test_Solexa-316269_sampled.wgs_metrics
new file mode 100644
index 0000000..28932da
--- /dev/null
+++ b/testdata/picard/analysis/TheoreticalSensitivity/test_Solexa-316269_sampled.wgs_metrics
@@ -0,0 +1,263 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.analysis.CollectWgsMetricsFromSampledSites
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Fri Dec 04 16:40:10 EST 2015
+
+## METRICS CLASS picard.analysis.CollectWgsMetricsFromSampledSites$SampledWgsMetrics
+GENOME_TERRITORY MEAN_COVERAGE SD_COVERAGE MEDIAN_COVERAGE MAD_COVERAGE PCT_EXC_MAPQ PCT_EXC_DUPE PCT_EXC_UNPAIRED PCT_EXC_BASEQ PCT_EXC_OVERLAP PCT_EXC_CAPPED PCT_EXC_TOTAL PCT_5X PCT_10X PCT_15X PCT_20X PCT_25X PCT_30X PCT_40X PCT_50X PCT_60X PCT_70X PCT_80X PCT_90X PCT_100X HET_SNP_SENSITIVITY HET_SNP_Q
+12537 89.411023 15.888243 90 9 0.007874 0.0881 0.005697 0.119761 0.016499 0.000285 0.238216 0.995055 0.994097 0.993459 0.992981 0.992582 0.991705 0.988913 0.982053 0.967297 0.925261 0.79724 0.532504 0.235942 0.994971 23
+
+## HISTOGRAM java.lang.Integer
+coverage count baseq_count
+0 52 0
+1 6 0
+2 2 0
+3 0 0
+4 2 0
+5 2 0
+6 2 0
+7 2 0
+8 4 0
+9 2 0
+10 2 0
+11 2 0
+12 1 0
+13 1 0
+14 2 0
+15 1 0
+16 3 0
+17 0 0
+18 2 0
+19 0 0
+20 1 15343
+21 0 16643
+22 1 38608
+23 1 39355
+24 2 37916
+25 2 34107
+26 2 59004
+27 3 150470
+28 3 318206
+29 1 247261
+30 2 111650
+31 2 41249
+32 1 10249
+33 4 874
+34 4 11
+35 6 0
+36 6 0
+37 3 0
+38 2 0
+39 5 0
+40 6 0
+41 4 0
+42 7 0
+43 11 0
+44 9 0
+45 9 0
+46 15 0
+47 10 0
+48 8 0
+49 7 0
+50 12 0
+51 16 0
+52 17 0
+53 21 0
+54 17 0
+55 16 0
+56 16 0
+57 20 0
+58 29 0
+59 21 0
+60 26 0
+61 26 0
+62 27 0
+63 36 0
+64 49 0
+65 65 0
+66 65 0
+67 70 0
+68 75 0
+69 88 0
+70 94 0
+71 112 0
+72 99 0
+73 143 0
+74 159 0
+75 161 0
+76 164 0
+77 182 0
+78 241 0
+79 250 0
+80 235 0
+81 287 0
+82 310 0
+83 305 0
+84 340 0
+85 343 0
+86 326 0
+87 373 0
+88 408 0
+89 392 0
+90 416 0
+91 411 0
+92 403 0
+93 373 0
+94 384 0
+95 358 0
+96 398 0
+97 334 0
+98 312 0
+99 329 0
+100 288 0
+101 273 0
+102 211 0
+103 269 0
+104 212 0
+105 206 0
+106 201 0
+107 157 0
+108 150 0
+109 138 0
+110 127 0
+111 116 0
+112 97 0
+113 72 0
+114 67 0
+115 57 0
+116 66 0
+117 55 0
+118 29 0
+119 24 0
+120 24 0
+121 25 0
+122 13 0
+123 16 0
+124 10 0
+125 17 0
+126 5 0
+127 7 0
+128 6 0
+129 4 0
+130 2 0
+131 1 0
+132 2 0
+133 1 0
+134 1 0
+135 1 0
+136 0 0
+137 0 0
+138 0 0
+139 1 0
+140 0 0
+141 0 0
+142 0 0
+143 1 0
+144 1 0
+145 0 0
+146 0 0
+147 0 0
+148 0 0
+149 1 0
+150 0 0
+151 0 0
+152 0 0
+153 0 0
+154 0 0
+155 0 0
+156 0 0
+157 0 0
+158 0 0
+159 0 0
+160 0 0
+161 0 0
+162 0 0
+163 0 0
+164 0 0
+165 0 0
+166 0 0
+167 0 0
+168 0 0
+169 0 0
+170 0 0
+171 0 0
+172 1 0
+173 0 0
+174 0 0
+175 0 0
+176 0 0
+177 0 0
+178 0 0
+179 0 0
+180 0 0
+181 0 0
+182 0 0
+183 0 0
+184 0 0
+185 0 0
+186 0 0
+187 0 0
+188 0 0
+189 0 0
+190 0 0
+191 0 0
+192 0 0
+193 0 0
+194 0 0
+195 0 0
+196 0 0
+197 0 0
+198 0 0
+199 0 0
+200 0 0
+201 0 0
+202 0 0
+203 0 0
+204 0 0
+205 0 0
+206 0 0
+207 0 0
+208 1 0
+209 0 0
+210 0 0
+211 0 0
+212 0 0
+213 0 0
+214 0 0
+215 0 0
+216 0 0
+217 0 0
+218 0 0
+219 0 0
+220 0 0
+221 0 0
+222 0 0
+223 0 0
+224 0 0
+225 0 0
+226 0 0
+227 0 0
+228 0 0
+229 0 0
+230 0 0
+231 0 0
+232 0 0
+233 0 0
+234 0 0
+235 0 0
+236 0 0
+237 0 0
+238 0 0
+239 0 0
+240 0 0
+241 0 0
+242 0 0
+243 0 0
+244 0 0
+245 0 0
+246 0 0
+247 0 0
+248 0 0
+249 0 0
+250 2 0
+
diff --git a/testdata/picard/analysis/TheoreticalSensitivity/test_Solexa-332667.wgs_metrics b/testdata/picard/analysis/TheoreticalSensitivity/test_Solexa-332667.wgs_metrics
new file mode 100644
index 0000000..400cfa7
--- /dev/null
+++ b/testdata/picard/analysis/TheoreticalSensitivity/test_Solexa-332667.wgs_metrics
@@ -0,0 +1,263 @@
+## htsjdk.samtools.metrics.StringHeader
+# picard.analysis.CollectWgsMetrics
+## htsjdk.samtools.metrics.StringHeader
+# Started on: Thu Oct 15 16:14:51 EDT 2015
+
+## METRICS CLASS picard.analysis.CollectWgsMetrics$WgsMetrics
+GENOME_TERRITORY MEAN_COVERAGE SD_COVERAGE MEDIAN_COVERAGE MAD_COVERAGE PCT_EXC_MAPQ PCT_EXC_DUPE PCT_EXC_UNPAIRED PCT_EXC_BASEQ PCT_EXC_OVERLAP PCT_EXC_CAPPED PCT_EXC_TOTAL PCT_5X PCT_10X PCT_15X PCT_20X PCT_25X PCT_30X PCT_40X PCT_50X PCT_60X PCT_70X PCT_80X PCT_90X PCT_100X HET_SNP_SENSITIVITY
+2864957046 23.053724 19.649654 18 9 0.104938 0.034936 0.015472 0.104884 0.127448 0.010399 0.398076 0.926259 0.782435 0.612487 0.462918 0.345051 0.256229 0.142493 0.081214 0.04779 0.029087 0.018326 0.011941 0.00804 0.040266
+
+## HISTOGRAM java.lang.Integer
+coverage count baseq_count
+0 66919004 11508713712
+1 21193664 0
+2 29065112 0
+3 40697992 0
+4 53388046 0
+5 65436406 0
+6 75991039 0
+7 84514983 0
+8 90898371 0
+9 95210740 0
+10 97720309 0
+11 98671198 0
+12 98359378 0
+13 97090329 0
+14 95051645 0
+15 92363120 0
+16 89305139 0
+17 85907233 0
+18 82322496 0
+19 78611040 0
+20 74832421 278813071
+21 71119996 400527320
+22 67449581 779428206
+23 63876119 1761101994
+24 60404480 2244253687
+25 57055054 4231311074
+26 53850611 9428534484
+27 50763608 16921131555
+28 47797091 17554605384
+29 45004885 10167881851
+30 42345868 2280340444
+31 39824494 0
+32 37456731 0
+33 35221923 0
+34 33100322 0
+35 31105634 0
+36 29248760 0
+37 27466153 0
+38 25815384 0
+39 24263544 0
+40 22825608 0
+41 21442869 0
+42 20156329 0
+43 18950588 0
+44 17813070 0
+45 16749916 0
+46 15748724 0
+47 14823317 0
+48 13933205 0
+49 13118039 0
+50 12347439 0
+51 11622754 0
+52 10946579 0
+53 10312711 0
+54 9709990 0
+55 9151707 0
+56 8629397 0
+57 8133687 0
+58 7671117 0
+59 7234466 0
+60 6831139 0
+61 6448562 0
+62 6091512 0
+63 5759001 0
+64 5435170 0
+65 5137863 0
+66 4855154 0
+67 4584558 0
+68 4337206 0
+69 4103479 0
+70 3886496 0
+71 3674615 0
+72 3483616 0
+73 3297476 0
+74 3126721 0
+75 2960441 0
+76 2808046 0
+77 2665742 0
+78 2529316 0
+79 2397230 0
+80 2273727 0
+81 2162514 0
+82 2052841 0
+83 1950368 0
+84 1853787 0
+85 1762140 0
+86 1676451 0
+87 1594892 0
+88 1517190 0
+89 1446557 0
+90 1377952 0
+91 1310069 0
+92 1247419 0
+93 1188845 0
+94 1133870 0
+95 1078877 0
+96 1028193 0
+97 980714 0
+98 937595 0
+99 894220 0
+100 853571 0
+101 816382 0
+102 780450 0
+103 745105 0
+104 712139 0
+105 680502 0
+106 650781 0
+107 623339 0
+108 597317 0
+109 571797 0
+110 546995 0
+111 523234 0
+112 502471 0
+113 480997 0
+114 460046 0
+115 440454 0
+116 422241 0
+117 404024 0
+118 388898 0
+119 372979 0
+120 357493 0
+121 344459 0
+122 329416 0
+123 316246 0
+124 304547 0
+125 292495 0
+126 280720 0
+127 269782 0
+128 259723 0
+129 250260 0
+130 240314 0
+131 231739 0
+132 223262 0
+133 215207 0
+134 207331 0
+135 199753 0
+136 192547 0
+137 185905 0
+138 178846 0
+139 173598 0
+140 166698 0
+141 161737 0
+142 155200 0
+143 149496 0
+144 145154 0
+145 140446 0
+146 135068 0
+147 129774 0
+148 126005 0
+149 121285 0
+150 116866 0
+151 113897 0
+152 110319 0
+153 105413 0
+154 102568 0
+155 99437 0
+156 96265 0
+157 92704 0
+158 90681 0
+159 87444 0
+160 84085 0
+161 81764 0
+162 79351 0
+163 76953 0
+164 74623 0
+165 71909 0
+166 69920 0
+167 67777 0
+168 65973 0
+169 64097 0
+170 62659 0
+171 60449 0
+172 58798 0
+173 56994 0
+174 55160 0
+175 53516 0
+176 52139 0
+177 50780 0
+178 48988 0
+179 47751 0
+180 46721 0
+181 44745 0
+182 43541 0
+183 42690 0
+184 41550 0
+185 40539 0
+186 39131 0
+187 38362 0
+188 37475 0
+189 36335 0
+190 35078 0
+191 34355 0
+192 33629 0
+193 32559 0
+194 32142 0
+195 31072 0
+196 30404 0
+197 29260 0
+198 28873 0
+199 28031 0
+200 27616 0
+201 26912 0
+202 26205 0
+203 25670 0
+204 25180 0
+205 24275 0
+206 23784 0
+207 23090 0
+208 22445 0
+209 21896 0
+210 21374 0
+211 21032 0
+212 20268 0
+213 19913 0
+214 19536 0
+215 19054 0
+216 18634 0
+217 18292 0
+218 17965 0
+219 17551 0
+220 17077 0
+221 16777 0
+222 16528 0
+223 16174 0
+224 15755 0
+225 15263 0
+226 14901 0
+227 14695 0
+228 14215 0
+229 14042 0
+230 13851 0
+231 13416 0
+232 13093 0
+233 13013 0
+234 12604 0
+235 12551 0
+236 12213 0
+237 12047 0
+238 11508 0
+239 11444 0
+240 11103 0
+241 10983 0
+242 10824 0
+243 10489 0
+244 10347 0
+245 10338 0
+246 10164 0
+247 9819 0
+248 9474 0
+249 9388 0
+250 1125279 0
+
diff --git a/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.bait_bias_detail_metrics b/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.bait_bias_detail_metrics
index a154521..efd5cd0 100644
--- a/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.bait_bias_detail_metrics
+++ b/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.bait_bias_detail_metrics
@@ -5,197 +5,197 @@
## METRICS CLASS picard.analysis.artifacts.SequencingArtifactMetrics$BaitBiasDetailMetrics
SAMPLE_ALIAS LIBRARY REF_BASE ALT_BASE CONTEXT FWD_CXT_REF_BASES FWD_CXT_ALT_BASES REV_CXT_REF_BASES REV_CXT_ALT_BASES FWD_ERROR_RATE REV_ERROR_RATE ERROR_RATE QSCORE
+sample1 library1 A C AAA 1 0 0 0 0 0 0 100
+sample1 library1 A C AAC 0 0 0 0 0 0 0 100
+sample1 library1 A C AAG 3 0 4 0 0 0 0 100
sample1 library1 A C AAT 1 0 0 0 0 0 0 100
-sample1 library1 A C GAT 5 0 4 0 0 0 0 100
-sample1 library1 A C CAG 3 0 4 0 0 0 0 100
-sample1 library1 A C CAC 0 0 1 0 0 0 0 100
-sample1 library1 A C TAT 2 0 1 0 0 0 0 100
sample1 library1 A C CAA 2 0 0 0 0 0 0 100
+sample1 library1 A C CAC 0 0 1 0 0 0 0 100
+sample1 library1 A C CAG 3 0 4 0 0 0 0 100
+sample1 library1 A C CAT 3 0 5 0 0 0 0 100
+sample1 library1 A C GAA 2 0 4 0 0 0 0 100
+sample1 library1 A C GAC 0 0 2 0 0 0 0 100
+sample1 library1 A C GAG 0 0 1 0 0 0 0 100
+sample1 library1 A C GAT 5 0 4 0 0 0 0 100
sample1 library1 A C TAA 0 0 1 0 0 0 0 100
sample1 library1 A C TAC 4 0 3 0 0 0 0 100
-sample1 library1 A C AAG 3 0 4 0 0 0 0 100
-sample1 library1 A C GAG 0 0 1 0 0 0 0 100
-sample1 library1 A C AAA 1 0 0 0 0 0 0 100
-sample1 library1 A C AAC 0 0 0 0 0 0 0 100
-sample1 library1 A C GAC 0 0 2 0 0 0 0 100
-sample1 library1 A C GAA 2 0 4 0 0 0 0 100
-sample1 library1 A C CAT 3 0 5 0 0 0 0 100
sample1 library1 A C TAG 0 0 2 0 0 0 0 100
+sample1 library1 A C TAT 2 0 1 0 0 0 0 100
+sample1 library1 A G AAA 1 0 0 0 0 0 0 100
+sample1 library1 A G AAC 0 0 0 0 0 0 0 100
+sample1 library1 A G AAG 3 0 4 0 0 0 0 100
sample1 library1 A G AAT 1 0 0 0 0 0 0 100
-sample1 library1 A G GAT 5 0 4 0 0 0 0 100
-sample1 library1 A G CAG 3 0 4 0 0 0 0 100
-sample1 library1 A G CAC 0 0 1 0 0 0 0 100
-sample1 library1 A G TAT 2 0 1 0 0 0 0 100
sample1 library1 A G CAA 2 0 0 0 0 0 0 100
+sample1 library1 A G CAC 0 0 1 0 0 0 0 100
+sample1 library1 A G CAG 3 0 4 0 0 0 0 100
+sample1 library1 A G CAT 3 0 5 0 0 0 0 100
+sample1 library1 A G GAA 2 0 4 0 0 0 0 100
+sample1 library1 A G GAC 0 0 2 0 0 0 0 100
+sample1 library1 A G GAG 0 0 1 0 0 0 0 100
+sample1 library1 A G GAT 5 0 4 0 0 0 0 100
sample1 library1 A G TAA 0 0 1 0 0 0 0 100
sample1 library1 A G TAC 4 0 3 0 0 0 0 100
-sample1 library1 A G AAG 3 0 4 0 0 0 0 100
-sample1 library1 A G GAG 0 0 1 0 0 0 0 100
-sample1 library1 A G AAA 1 0 0 0 0 0 0 100
-sample1 library1 A G AAC 0 0 0 0 0 0 0 100
-sample1 library1 A G GAC 0 0 2 0 0 0 0 100
-sample1 library1 A G GAA 2 0 4 0 0 0 0 100
-sample1 library1 A G CAT 3 0 5 0 0 0 0 100
sample1 library1 A G TAG 0 0 2 0 0 0 0 100
+sample1 library1 A G TAT 2 0 1 0 0 0 0 100
+sample1 library1 A T AAA 1 0 0 0 0 0 0 100
+sample1 library1 A T AAC 0 0 0 0 0 0 0 100
+sample1 library1 A T AAG 3 0 4 0 0 0 0 100
sample1 library1 A T AAT 1 0 0 0 0 0 0 100
-sample1 library1 A T GAT 5 0 4 0 0 0 0 100
-sample1 library1 A T CAG 3 0 4 0 0 0 0 100
-sample1 library1 A T CAC 0 0 1 0 0 0 0 100
-sample1 library1 A T TAT 2 0 1 0 0 0 0 100
sample1 library1 A T CAA 2 0 0 0 0 0 0 100
+sample1 library1 A T CAC 0 0 1 0 0 0 0 100
+sample1 library1 A T CAG 3 0 4 0 0 0 0 100
+sample1 library1 A T CAT 3 0 5 0 0 0 0 100
+sample1 library1 A T GAA 2 0 4 0 0 0 0 100
+sample1 library1 A T GAC 0 0 2 0 0 0 0 100
+sample1 library1 A T GAG 0 0 1 0 0 0 0 100
+sample1 library1 A T GAT 5 0 4 0 0 0 0 100
sample1 library1 A T TAA 0 0 1 0 0 0 0 100
sample1 library1 A T TAC 4 0 3 0 0 0 0 100
-sample1 library1 A T AAG 3 0 4 0 0 0 0 100
-sample1 library1 A T GAG 0 0 1 0 0 0 0 100
-sample1 library1 A T AAA 1 0 0 0 0 0 0 100
-sample1 library1 A T AAC 0 0 0 0 0 0 0 100
-sample1 library1 A T GAC 0 0 2 0 0 0 0 100
-sample1 library1 A T GAA 2 0 4 0 0 0 0 100
-sample1 library1 A T CAT 3 0 5 0 0 0 0 100
sample1 library1 A T TAG 0 0 2 0 0 0 0 100
+sample1 library1 A T TAT 2 0 1 0 0 0 0 100
+sample1 library1 C A ACA 4 0 0 0 0 0 0 100
+sample1 library1 C A ACC 1 0 0 0 0 0 0 100
+sample1 library1 C A ACG 2 0 4 1 0 0.2 0 100
sample1 library1 C A ACT 0 0 0 1 0 1 0 100
-sample1 library1 C A GCT 4 0 3 0 0 0 0 100
sample1 library1 C A CCA 2 0 0 0 0 0 0 100
sample1 library1 C A CCC 1 1 0 0 0.5 0 0.5 3
-sample1 library1 C A TCT 0 1 1 0 1 0 1 0
-sample1 library1 C A ACA 4 0 0 0 0 0 0 100
sample1 library1 C A CCG 0 0 0 0 0 0 0 100
+sample1 library1 C A CCT 0 1 0 0 1 0 1 0
+sample1 library1 C A GCA 0 1 4 1 1 0.2 0.8 1
+sample1 library1 C A GCC 2 1 0 0 0.333333 0 0.333333 5
sample1 library1 C A GCG 1 0 4 1 0 0.2 0 100
-sample1 library1 C A ACC 1 0 0 0 0 0 0 100
-sample1 library1 C A TCG 2 0 3 1 0 0.25 0 100
-sample1 library1 C A ACG 2 0 4 1 0 0.2 0 100
+sample1 library1 C A GCT 4 0 3 0 0 0 0 100
sample1 library1 C A TCA 3 0 2 1 0 0.333333 0 100
-sample1 library1 C A GCC 2 1 0 0 0.333333 0 0.333333 5
sample1 library1 C A TCC 0 0 0 0 0 0 0 100
-sample1 library1 C A CCT 0 1 0 0 1 0 1 0
-sample1 library1 C A GCA 0 1 4 1 1 0.2 0.8 1
+sample1 library1 C A TCG 2 0 3 1 0 0.25 0 100
+sample1 library1 C A TCT 0 1 1 0 1 0 1 0
+sample1 library1 C G ACA 4 0 0 0 0 0 0 100
+sample1 library1 C G ACC 1 0 0 0 0 0 0 100
+sample1 library1 C G ACG 2 0 4 0 0 0 0 100
sample1 library1 C G ACT 0 0 0 0 0 0 0 100
-sample1 library1 C G GCT 4 0 3 0 0 0 0 100
sample1 library1 C G CCA 2 0 0 0 0 0 0 100
sample1 library1 C G CCC 1 0 0 0 0 0 0 100
-sample1 library1 C G TCT 0 0 1 0 0 0 0 100
-sample1 library1 C G ACA 4 0 0 0 0 0 0 100
sample1 library1 C G CCG 0 0 0 0 0 0 0 100
+sample1 library1 C G CCT 0 0 0 0 0 0 0 100
+sample1 library1 C G GCA 0 0 4 0 0 0 0 100
+sample1 library1 C G GCC 2 0 0 0 0 0 0 100
sample1 library1 C G GCG 1 0 4 0 0 0 0 100
-sample1 library1 C G ACC 1 0 0 0 0 0 0 100
-sample1 library1 C G TCG 2 0 3 0 0 0 0 100
-sample1 library1 C G ACG 2 0 4 0 0 0 0 100
+sample1 library1 C G GCT 4 0 3 0 0 0 0 100
sample1 library1 C G TCA 3 0 2 0 0 0 0 100
-sample1 library1 C G GCC 2 0 0 0 0 0 0 100
sample1 library1 C G TCC 0 0 0 0 0 0 0 100
-sample1 library1 C G CCT 0 0 0 0 0 0 0 100
-sample1 library1 C G GCA 0 0 4 0 0 0 0 100
+sample1 library1 C G TCG 2 0 3 0 0 0 0 100
+sample1 library1 C G TCT 0 0 1 0 0 0 0 100
+sample1 library1 C T ACA 4 0 0 0 0 0 0 100
+sample1 library1 C T ACC 1 0 0 0 0 0 0 100
+sample1 library1 C T ACG 2 0 4 0 0 0 0 100
sample1 library1 C T ACT 0 0 0 0 0 0 0 100
-sample1 library1 C T GCT 4 2 3 0 0.333333 0 0.333333 5
sample1 library1 C T CCA 2 0 0 0 0 0 0 100
sample1 library1 C T CCC 1 0 0 0 0 0 0 100
-sample1 library1 C T TCT 0 2 1 0 1 0 1 0
-sample1 library1 C T ACA 4 0 0 0 0 0 0 100
sample1 library1 C T CCG 0 2 0 0 1 0 1 0
+sample1 library1 C T CCT 0 2 0 0 1 0 1 0
+sample1 library1 C T GCA 0 0 4 0 0 0 0 100
+sample1 library1 C T GCC 2 0 0 0 0 0 0 100
sample1 library1 C T GCG 1 4 4 0 0.8 0 0.8 1
-sample1 library1 C T ACC 1 0 0 0 0 0 0 100
-sample1 library1 C T TCG 2 0 3 0 0 0 0 100
-sample1 library1 C T ACG 2 0 4 0 0 0 0 100
+sample1 library1 C T GCT 4 2 3 0 0.333333 0 0.333333 5
sample1 library1 C T TCA 3 0 2 0 0 0 0 100
-sample1 library1 C T GCC 2 0 0 0 0 0 0 100
sample1 library1 C T TCC 0 4 0 0 1 0 1 0
-sample1 library1 C T CCT 0 2 0 0 1 0 1 0
-sample1 library1 C T GCA 0 0 4 0 0 0 0 100
+sample1 library1 C T TCG 2 0 3 0 0 0 0 100
+sample1 library1 C T TCT 0 2 1 0 1 0 1 0
sample1 library1 G A AGA 1 0 0 2 0 1 0 100
+sample1 library1 G A AGC 3 0 4 2 0 0.333333 0 100
+sample1 library1 G A AGG 0 0 0 2 0 1 0 100
+sample1 library1 G A AGT 0 0 0 0 0 0 0 100
sample1 library1 G A CGA 3 0 2 0 0 0 0 100
-sample1 library1 G A GGA 0 0 0 4 0 1 0 100
sample1 library1 G A CGC 4 0 1 4 0 0.8 0 100
+sample1 library1 G A CGG 0 0 0 2 0 1 0 100
+sample1 library1 G A CGT 4 0 2 0 0 0 0 100
+sample1 library1 G A GGA 0 0 0 4 0 1 0 100
sample1 library1 G A GGC 0 0 2 0 0 0 0 100
-sample1 library1 G A AGC 3 0 4 2 0 0.333333 0 100
sample1 library1 G A GGG 0 0 1 0 0 0 0 100
-sample1 library1 G A TGG 0 0 2 0 0 0 0 100
-sample1 library1 G A TGC 4 0 0 0 0 0 0 100
-sample1 library1 G A CGT 4 0 2 0 0 0 0 100
-sample1 library1 G A TGA 2 0 3 0 0 0 0 100
sample1 library1 G A GGT 0 0 1 0 0 0 0 100
-sample1 library1 G A AGT 0 0 0 0 0 0 0 100
+sample1 library1 G A TGA 2 0 3 0 0 0 0 100
+sample1 library1 G A TGC 4 0 0 0 0 0 0 100
+sample1 library1 G A TGG 0 0 2 0 0 0 0 100
sample1 library1 G A TGT 0 0 4 0 0 0 0 100
-sample1 library1 G A AGG 0 0 0 2 0 1 0 100
-sample1 library1 G A CGG 0 0 0 2 0 1 0 100
sample1 library1 G C AGA 1 0 0 0 0 0 0 100
+sample1 library1 G C AGC 3 0 4 0 0 0 0 100
+sample1 library1 G C AGG 0 0 0 0 0 0 0 100
+sample1 library1 G C AGT 0 0 0 0 0 0 0 100
sample1 library1 G C CGA 3 0 2 0 0 0 0 100
-sample1 library1 G C GGA 0 0 0 0 0 0 0 100
sample1 library1 G C CGC 4 0 1 0 0 0 0 100
+sample1 library1 G C CGG 0 0 0 0 0 0 0 100
+sample1 library1 G C CGT 4 0 2 0 0 0 0 100
+sample1 library1 G C GGA 0 0 0 0 0 0 0 100
sample1 library1 G C GGC 0 0 2 0 0 0 0 100
-sample1 library1 G C AGC 3 0 4 0 0 0 0 100
sample1 library1 G C GGG 0 0 1 0 0 0 0 100
-sample1 library1 G C TGG 0 0 2 0 0 0 0 100
-sample1 library1 G C TGC 4 0 0 0 0 0 0 100
-sample1 library1 G C CGT 4 0 2 0 0 0 0 100
-sample1 library1 G C TGA 2 0 3 0 0 0 0 100
sample1 library1 G C GGT 0 0 1 0 0 0 0 100
-sample1 library1 G C AGT 0 0 0 0 0 0 0 100
+sample1 library1 G C TGA 2 0 3 0 0 0 0 100
+sample1 library1 G C TGC 4 0 0 0 0 0 0 100
+sample1 library1 G C TGG 0 0 2 0 0 0 0 100
sample1 library1 G C TGT 0 0 4 0 0 0 0 100
-sample1 library1 G C AGG 0 0 0 0 0 0 0 100
-sample1 library1 G C CGG 0 0 0 0 0 0 0 100
sample1 library1 G T AGA 1 0 0 1 0 1 0 100
+sample1 library1 G T AGC 3 0 4 0 0 0 0 100
+sample1 library1 G T AGG 0 0 0 1 0 1 0 100
+sample1 library1 G T AGT 0 1 0 0 1 0 1 0
sample1 library1 G T CGA 3 1 2 0 0.25 0 0.25 6
-sample1 library1 G T GGA 0 0 0 0 0 0 0 100
sample1 library1 G T CGC 4 1 1 0 0.2 0 0.2 7
+sample1 library1 G T CGG 0 0 0 0 0 0 0 100
+sample1 library1 G T CGT 4 1 2 0 0.2 0 0.2 7
+sample1 library1 G T GGA 0 0 0 0 0 0 0 100
sample1 library1 G T GGC 0 0 2 1 0 0.333333 0 100
-sample1 library1 G T AGC 3 0 4 0 0 0 0 100
sample1 library1 G T GGG 0 0 1 1 0 0.5 0 100
-sample1 library1 G T TGG 0 0 2 0 0 0 0 100
-sample1 library1 G T TGC 4 1 0 1 0.2 1 0 100
-sample1 library1 G T CGT 4 1 2 0 0.2 0 0.2 7
-sample1 library1 G T TGA 2 1 3 0 0.333333 0 0.333333 5
sample1 library1 G T GGT 0 0 1 0 0 0 0 100
-sample1 library1 G T AGT 0 1 0 0 1 0 1 0
+sample1 library1 G T TGA 2 1 3 0 0.333333 0 0.333333 5
+sample1 library1 G T TGC 4 1 0 1 0.2 1 0 100
+sample1 library1 G T TGG 0 0 2 0 0 0 0 100
sample1 library1 G T TGT 0 0 4 0 0 0 0 100
-sample1 library1 G T AGG 0 0 0 1 0 1 0 100
-sample1 library1 G T CGG 0 0 0 0 0 0 0 100
-sample1 library1 T A GTC 2 0 0 0 0 0 0 100
+sample1 library1 T A ATA 1 0 2 0 0 0 0 100
+sample1 library1 T A ATC 4 0 5 0 0 0 0 100
sample1 library1 T A ATG 5 0 3 0 0 0 0 100
-sample1 library1 T A TTC 4 0 2 0 0 0 0 100
+sample1 library1 T A ATT 0 0 1 0 0 0 0 100
+sample1 library1 T A CTA 2 0 0 0 0 0 0 100
+sample1 library1 T A CTC 1 0 0 0 0 0 0 100
+sample1 library1 T A CTG 4 0 3 0 0 0 0 100
+sample1 library1 T A CTT 4 0 3 0 0 0 0 100
sample1 library1 T A GTA 3 0 4 0 0 0 0 100
-sample1 library1 T A TTA 1 0 0 0 0 0 0 100
-sample1 library1 T A ATC 4 0 5 0 0 0 0 100
-sample1 library1 T A TTG 0 0 2 0 0 0 0 100
+sample1 library1 T A GTC 2 0 0 0 0 0 0 100
sample1 library1 T A GTG 1 0 0 0 0 0 0 100
sample1 library1 T A GTT 0 0 0 0 0 0 0 100
-sample1 library1 T A CTG 4 0 3 0 0 0 0 100
-sample1 library1 T A CTA 2 0 0 0 0 0 0 100
+sample1 library1 T A TTA 1 0 0 0 0 0 0 100
+sample1 library1 T A TTC 4 0 2 0 0 0 0 100
+sample1 library1 T A TTG 0 0 2 0 0 0 0 100
sample1 library1 T A TTT 0 0 1 0 0 0 0 100
-sample1 library1 T A CTC 1 0 0 0 0 0 0 100
-sample1 library1 T A ATA 1 0 2 0 0 0 0 100
-sample1 library1 T A ATT 0 0 1 0 0 0 0 100
-sample1 library1 T A CTT 4 0 3 0 0 0 0 100
-sample1 library1 T C GTC 2 0 0 0 0 0 0 100
+sample1 library1 T C ATA 1 0 2 0 0 0 0 100
+sample1 library1 T C ATC 4 0 5 0 0 0 0 100
sample1 library1 T C ATG 5 0 3 0 0 0 0 100
-sample1 library1 T C TTC 4 0 2 0 0 0 0 100
+sample1 library1 T C ATT 0 0 1 0 0 0 0 100
+sample1 library1 T C CTA 2 0 0 0 0 0 0 100
+sample1 library1 T C CTC 1 0 0 0 0 0 0 100
+sample1 library1 T C CTG 4 0 3 0 0 0 0 100
+sample1 library1 T C CTT 4 0 3 0 0 0 0 100
sample1 library1 T C GTA 3 0 4 0 0 0 0 100
-sample1 library1 T C TTA 1 0 0 0 0 0 0 100
-sample1 library1 T C ATC 4 0 5 0 0 0 0 100
-sample1 library1 T C TTG 0 0 2 0 0 0 0 100
+sample1 library1 T C GTC 2 0 0 0 0 0 0 100
sample1 library1 T C GTG 1 0 0 0 0 0 0 100
sample1 library1 T C GTT 0 0 0 0 0 0 0 100
-sample1 library1 T C CTG 4 0 3 0 0 0 0 100
-sample1 library1 T C CTA 2 0 0 0 0 0 0 100
+sample1 library1 T C TTA 1 0 0 0 0 0 0 100
+sample1 library1 T C TTC 4 0 2 0 0 0 0 100
+sample1 library1 T C TTG 0 0 2 0 0 0 0 100
sample1 library1 T C TTT 0 0 1 0 0 0 0 100
-sample1 library1 T C CTC 1 0 0 0 0 0 0 100
-sample1 library1 T C ATA 1 0 2 0 0 0 0 100
-sample1 library1 T C ATT 0 0 1 0 0 0 0 100
-sample1 library1 T C CTT 4 0 3 0 0 0 0 100
-sample1 library1 T G GTC 2 0 0 0 0 0 0 100
+sample1 library1 T G ATA 1 0 2 0 0 0 0 100
+sample1 library1 T G ATC 4 0 5 0 0 0 0 100
sample1 library1 T G ATG 5 0 3 0 0 0 0 100
-sample1 library1 T G TTC 4 0 2 0 0 0 0 100
+sample1 library1 T G ATT 0 0 1 0 0 0 0 100
+sample1 library1 T G CTA 2 0 0 0 0 0 0 100
+sample1 library1 T G CTC 1 0 0 0 0 0 0 100
+sample1 library1 T G CTG 4 0 3 0 0 0 0 100
+sample1 library1 T G CTT 4 0 3 0 0 0 0 100
sample1 library1 T G GTA 3 0 4 0 0 0 0 100
-sample1 library1 T G TTA 1 0 0 0 0 0 0 100
-sample1 library1 T G ATC 4 0 5 0 0 0 0 100
-sample1 library1 T G TTG 0 0 2 0 0 0 0 100
+sample1 library1 T G GTC 2 0 0 0 0 0 0 100
sample1 library1 T G GTG 1 0 0 0 0 0 0 100
sample1 library1 T G GTT 0 0 0 0 0 0 0 100
-sample1 library1 T G CTG 4 0 3 0 0 0 0 100
-sample1 library1 T G CTA 2 0 0 0 0 0 0 100
+sample1 library1 T G TTA 1 0 0 0 0 0 0 100
+sample1 library1 T G TTC 4 0 2 0 0 0 0 100
+sample1 library1 T G TTG 0 0 2 0 0 0 0 100
sample1 library1 T G TTT 0 0 1 0 0 0 0 100
-sample1 library1 T G CTC 1 0 0 0 0 0 0 100
-sample1 library1 T G ATA 1 0 2 0 0 0 0 100
-sample1 library1 T G ATT 0 0 1 0 0 0 0 100
-sample1 library1 T G CTT 4 0 3 0 0 0 0 100
diff --git a/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.bait_bias_summary_metrics b/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.bait_bias_summary_metrics
index 529a61e..3af6fe2 100644
--- a/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.bait_bias_summary_metrics
+++ b/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.bait_bias_summary_metrics
@@ -1,21 +1,21 @@
## htsjdk.samtools.metrics.StringHeader
-# picard.analysis.artifacts.CollectSequencingArtifactMetrics MINIMUM_INSERT_SIZE=30 MAXIMUM_INSERT_SIZE=30 CONTEXT_SIZE=1 INPUT=/Users/msooknah/Documents/gp_projects/picard-private/Picard-public/testdata/picard/analysis/CollectSequencingArtifactMetrics/test.sam OUTPUT=/Users/msooknah/Documents/scratch/picardgroup/oxog/unit/tmp_output/with_context REFERENCE_SEQUENCE=/Users/msooknah/Documents/gp_projects/picard-private/Picard-public/testdata/picard/analysis/CollectSequencingArtifactMetrics [...]
+# picard.analysis.artifacts.CollectSequencingArtifactMetrics MINIMUM_INSERT_SIZE=30 MAXIMUM_INSERT_SIZE=30 CONTEXT_SIZE=1 INPUT=/Users/farjoun/picard-private/Picard-public/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/test.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/artifactMetrics.9014093439364407247.tmp/with_context REFERENCE_SEQUENCE=/Users/farjoun/picard-private/Picard-public/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/tes [...]
## htsjdk.samtools.metrics.StringHeader
-# Started on: Thu Mar 19 17:31:03 EDT 2015
+# Started on: Fri Nov 27 18:08:05 EST 2015
## METRICS CLASS picard.analysis.artifacts.SequencingArtifactMetrics$BaitBiasSummaryMetrics
SAMPLE_ALIAS LIBRARY REF_BASE ALT_BASE TOTAL_QSCORE WORST_CXT WORST_CXT_QSCORE WORST_PRE_CXT WORST_PRE_CXT_QSCORE WORST_POST_CXT WORST_POST_CXT_QSCORE ARTIFACT_NAME
-sample1 library1 A C 100 AAT 100 GAN 100 NAG 100 NA
-sample1 library1 A G 100 AAT 100 GAN 100 NAG 100 NA
-sample1 library1 A T 100 AAT 100 GAN 100 NAG 100 NA
-sample1 library1 C A 100 TCT 0 CCN 4 NCC 5 Cref
-sample1 library1 C G 100 ACT 100 TCN 100 NCC 100 NA
-sample1 library1 C T 4 TCT 0 CCN 2 NCT 2 NA
-sample1 library1 G A 100 AGA 100 AGN 100 NGC 100 NA
-sample1 library1 G C 100 AGA 100 AGN 100 NGC 100 NA
+sample1 library1 A C 100 AAA 100 AAN 100 NAA 100 NA
+sample1 library1 A G 100 AAA 100 AAN 100 NAA 100 NA
+sample1 library1 A T 100 AAA 100 AAN 100 NAA 100 NA
+sample1 library1 C A 100 CCT 0 CCN 4 NCC 5 Cref
+sample1 library1 C G 100 ACA 100 ACN 100 NCA 100 NA
+sample1 library1 C T 4 CCG 0 CCN 2 NCT 2 NA
+sample1 library1 G A 100 AGA 100 AGN 100 NGA 100 NA
+sample1 library1 G C 100 AGA 100 AGN 100 NGA 100 NA
sample1 library1 G T 14 AGT 0 CGN 7 NGT 5 Gref
-sample1 library1 T A 100 GTC 100 CTN 100 NTC 100 NA
-sample1 library1 T C 100 GTC 100 CTN 100 NTC 100 NA
-sample1 library1 T G 100 GTC 100 CTN 100 NTC 100 NA
+sample1 library1 T A 100 ATA 100 ATN 100 NTA 100 NA
+sample1 library1 T C 100 ATA 100 ATN 100 NTA 100 NA
+sample1 library1 T G 100 ATA 100 ATN 100 NTA 100 NA
diff --git a/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.pre_adapter_detail_metrics b/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.pre_adapter_detail_metrics
index e70b6ed..3ddc734 100644
--- a/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.pre_adapter_detail_metrics
+++ b/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.pre_adapter_detail_metrics
@@ -5,197 +5,197 @@
## METRICS CLASS picard.analysis.artifacts.SequencingArtifactMetrics$PreAdapterDetailMetrics
SAMPLE_ALIAS LIBRARY REF_BASE ALT_BASE CONTEXT PRO_REF_BASES PRO_ALT_BASES CON_REF_BASES CON_ALT_BASES ERROR_RATE QSCORE
+sample1 library1 A C AAA 0 0 1 0 0 100
+sample1 library1 A C AAC 0 0 0 0 0 100
+sample1 library1 A C AAG 3 0 4 0 0 100
sample1 library1 A C AAT 0 0 1 0 0 100
-sample1 library1 A C GAT 4 0 5 0 0 100
-sample1 library1 A C CAG 4 0 3 0 0 100
-sample1 library1 A C CAC 0 0 1 0 0 100
-sample1 library1 A C TAT 2 0 1 0 0 100
sample1 library1 A C CAA 1 0 1 0 0 100
+sample1 library1 A C CAC 0 0 1 0 0 100
+sample1 library1 A C CAG 4 0 3 0 0 100
+sample1 library1 A C CAT 4 0 4 0 0 100
+sample1 library1 A C GAA 3 0 3 0 0 100
+sample1 library1 A C GAC 1 0 1 0 0 100
+sample1 library1 A C GAG 1 0 0 0 0 100
+sample1 library1 A C GAT 4 0 5 0 0 100
sample1 library1 A C TAA 1 0 0 0 0 100
sample1 library1 A C TAC 3 0 4 0 0 100
-sample1 library1 A C AAG 3 0 4 0 0 100
-sample1 library1 A C GAG 1 0 0 0 0 100
-sample1 library1 A C AAA 0 0 1 0 0 100
-sample1 library1 A C AAC 0 0 0 0 0 100
-sample1 library1 A C GAC 1 0 1 0 0 100
-sample1 library1 A C GAA 3 0 3 0 0 100
-sample1 library1 A C CAT 4 0 4 0 0 100
sample1 library1 A C TAG 1 0 1 0 0 100
+sample1 library1 A C TAT 2 0 1 0 0 100
+sample1 library1 A G AAA 0 0 1 0 0 100
+sample1 library1 A G AAC 0 0 0 0 0 100
+sample1 library1 A G AAG 3 0 4 0 0 100
sample1 library1 A G AAT 0 0 1 0 0 100
-sample1 library1 A G GAT 4 0 5 0 0 100
-sample1 library1 A G CAG 4 0 3 0 0 100
-sample1 library1 A G CAC 0 0 1 0 0 100
-sample1 library1 A G TAT 2 0 1 0 0 100
sample1 library1 A G CAA 1 0 1 0 0 100
+sample1 library1 A G CAC 0 0 1 0 0 100
+sample1 library1 A G CAG 4 0 3 0 0 100
+sample1 library1 A G CAT 4 0 4 0 0 100
+sample1 library1 A G GAA 3 0 3 0 0 100
+sample1 library1 A G GAC 1 0 1 0 0 100
+sample1 library1 A G GAG 1 0 0 0 0 100
+sample1 library1 A G GAT 4 0 5 0 0 100
sample1 library1 A G TAA 1 0 0 0 0 100
sample1 library1 A G TAC 3 0 4 0 0 100
-sample1 library1 A G AAG 3 0 4 0 0 100
-sample1 library1 A G GAG 1 0 0 0 0 100
-sample1 library1 A G AAA 0 0 1 0 0 100
-sample1 library1 A G AAC 0 0 0 0 0 100
-sample1 library1 A G GAC 1 0 1 0 0 100
-sample1 library1 A G GAA 3 0 3 0 0 100
-sample1 library1 A G CAT 4 0 4 0 0 100
sample1 library1 A G TAG 1 0 1 0 0 100
+sample1 library1 A G TAT 2 0 1 0 0 100
+sample1 library1 A T AAA 0 0 1 0 0 100
+sample1 library1 A T AAC 0 0 0 0 0 100
+sample1 library1 A T AAG 3 0 4 0 0 100
sample1 library1 A T AAT 0 0 1 0 0 100
-sample1 library1 A T GAT 4 0 5 0 0 100
-sample1 library1 A T CAG 4 0 3 0 0 100
-sample1 library1 A T CAC 0 0 1 0 0 100
-sample1 library1 A T TAT 2 0 1 0 0 100
sample1 library1 A T CAA 1 0 1 0 0 100
+sample1 library1 A T CAC 0 0 1 0 0 100
+sample1 library1 A T CAG 4 0 3 0 0 100
+sample1 library1 A T CAT 4 0 4 0 0 100
+sample1 library1 A T GAA 3 0 3 0 0 100
+sample1 library1 A T GAC 1 0 1 0 0 100
+sample1 library1 A T GAG 1 0 0 0 0 100
+sample1 library1 A T GAT 4 0 5 0 0 100
sample1 library1 A T TAA 1 0 0 0 0 100
sample1 library1 A T TAC 3 0 4 0 0 100
-sample1 library1 A T AAG 3 0 4 0 0 100
-sample1 library1 A T GAG 1 0 0 0 0 100
-sample1 library1 A T AAA 0 0 1 0 0 100
-sample1 library1 A T AAC 0 0 0 0 0 100
-sample1 library1 A T GAC 1 0 1 0 0 100
-sample1 library1 A T GAA 3 0 3 0 0 100
-sample1 library1 A T CAT 4 0 4 0 0 100
sample1 library1 A T TAG 1 0 1 0 0 100
+sample1 library1 A T TAT 2 0 1 0 0 100
+sample1 library1 C A ACA 4 0 0 0 0 100
+sample1 library1 C A ACC 1 0 0 0 0 100
+sample1 library1 C A ACG 3 0 3 1 0 100
sample1 library1 C A ACT 0 0 0 1 0 100
-sample1 library1 C A GCT 6 0 1 0 0 100
sample1 library1 C A CCA 2 0 0 0 0 100
sample1 library1 C A CCC 1 0 0 1 0 100
-sample1 library1 C A TCT 1 0 0 1 0 100
-sample1 library1 C A ACA 4 0 0 0 0 100
sample1 library1 C A CCG 0 0 0 0 0 100
+sample1 library1 C A CCT 0 0 0 1 0 100
+sample1 library1 C A GCA 1 0 3 2 0 100
+sample1 library1 C A GCC 2 0 0 1 0 100
sample1 library1 C A GCG 2 0 3 1 0 100
-sample1 library1 C A ACC 1 0 0 0 0 100
-sample1 library1 C A TCG 3 0 2 1 0 100
-sample1 library1 C A ACG 3 0 3 1 0 100
+sample1 library1 C A GCT 6 0 1 0 0 100
sample1 library1 C A TCA 3 0 2 1 0 100
-sample1 library1 C A GCC 2 0 0 1 0 100
sample1 library1 C A TCC 0 0 0 0 0 100
-sample1 library1 C A CCT 0 0 0 1 0 100
-sample1 library1 C A GCA 1 0 3 2 0 100
+sample1 library1 C A TCG 3 0 2 1 0 100
+sample1 library1 C A TCT 1 0 0 1 0 100
+sample1 library1 C G ACA 4 0 0 0 0 100
+sample1 library1 C G ACC 1 0 0 0 0 100
+sample1 library1 C G ACG 3 0 3 0 0 100
sample1 library1 C G ACT 0 0 0 0 0 100
-sample1 library1 C G GCT 6 0 1 0 0 100
sample1 library1 C G CCA 2 0 0 0 0 100
sample1 library1 C G CCC 1 0 0 0 0 100
-sample1 library1 C G TCT 1 0 0 0 0 100
-sample1 library1 C G ACA 4 0 0 0 0 100
sample1 library1 C G CCG 0 0 0 0 0 100
+sample1 library1 C G CCT 0 0 0 0 0 100
+sample1 library1 C G GCA 1 0 3 0 0 100
+sample1 library1 C G GCC 2 0 0 0 0 100
sample1 library1 C G GCG 2 0 3 0 0 100
-sample1 library1 C G ACC 1 0 0 0 0 100
-sample1 library1 C G TCG 3 0 2 0 0 100
-sample1 library1 C G ACG 3 0 3 0 0 100
+sample1 library1 C G GCT 6 0 1 0 0 100
sample1 library1 C G TCA 3 0 2 0 0 100
-sample1 library1 C G GCC 2 0 0 0 0 100
sample1 library1 C G TCC 0 0 0 0 0 100
-sample1 library1 C G CCT 0 0 0 0 0 100
-sample1 library1 C G GCA 1 0 3 0 0 100
+sample1 library1 C G TCG 3 0 2 0 0 100
+sample1 library1 C G TCT 1 0 0 0 0 100
+sample1 library1 C T ACA 4 0 0 0 0 100
+sample1 library1 C T ACC 1 0 0 0 0 100
+sample1 library1 C T ACG 3 0 3 0 0 100
sample1 library1 C T ACT 0 0 0 0 0 100
-sample1 library1 C T GCT 6 1 1 1 0 100
sample1 library1 C T CCA 2 0 0 0 0 100
sample1 library1 C T CCC 1 0 0 0 0 100
-sample1 library1 C T TCT 1 1 0 1 0 100
-sample1 library1 C T ACA 4 0 0 0 0 100
sample1 library1 C T CCG 0 1 0 1 0 100
+sample1 library1 C T CCT 0 1 0 1 0 100
+sample1 library1 C T GCA 1 0 3 0 0 100
+sample1 library1 C T GCC 2 0 0 0 0 100
sample1 library1 C T GCG 2 2 3 2 0 100
-sample1 library1 C T ACC 1 0 0 0 0 100
-sample1 library1 C T TCG 3 0 2 0 0 100
-sample1 library1 C T ACG 3 0 3 0 0 100
+sample1 library1 C T GCT 6 1 1 1 0 100
sample1 library1 C T TCA 3 0 2 0 0 100
-sample1 library1 C T GCC 2 0 0 0 0 100
sample1 library1 C T TCC 0 2 0 2 0 100
-sample1 library1 C T CCT 0 1 0 1 0 100
-sample1 library1 C T GCA 1 0 3 0 0 100
+sample1 library1 C T TCG 3 0 2 0 0 100
+sample1 library1 C T TCT 1 1 0 1 0 100
sample1 library1 G A AGA 0 1 1 1 0 100
+sample1 library1 G A AGC 1 1 6 1 0 100
+sample1 library1 G A AGG 0 1 0 1 0 100
+sample1 library1 G A AGT 0 0 0 0 0 100
sample1 library1 G A CGA 2 0 3 0 0 100
-sample1 library1 G A GGA 0 2 0 2 0 100
sample1 library1 G A CGC 3 2 2 2 0 100
+sample1 library1 G A CGG 0 1 0 1 0 100
+sample1 library1 G A CGT 3 0 3 0 0 100
+sample1 library1 G A GGA 0 2 0 2 0 100
sample1 library1 G A GGC 0 0 2 0 0 100
-sample1 library1 G A AGC 1 1 6 1 0 100
sample1 library1 G A GGG 0 0 1 0 0 100
-sample1 library1 G A TGG 0 0 2 0 0 100
-sample1 library1 G A TGC 3 0 1 0 0 100
-sample1 library1 G A CGT 3 0 3 0 0 100
-sample1 library1 G A TGA 2 0 3 0 0 100
sample1 library1 G A GGT 0 0 1 0 0 100
-sample1 library1 G A AGT 0 0 0 0 0 100
+sample1 library1 G A TGA 2 0 3 0 0 100
+sample1 library1 G A TGC 3 0 1 0 0 100
+sample1 library1 G A TGG 0 0 2 0 0 100
sample1 library1 G A TGT 0 0 4 0 0 100
-sample1 library1 G A AGG 0 1 0 1 0 100
-sample1 library1 G A CGG 0 1 0 1 0 100
sample1 library1 G C AGA 0 0 1 0 0 100
+sample1 library1 G C AGC 1 0 6 0 0 100
+sample1 library1 G C AGG 0 0 0 0 0 100
+sample1 library1 G C AGT 0 0 0 0 0 100
sample1 library1 G C CGA 2 0 3 0 0 100
-sample1 library1 G C GGA 0 0 0 0 0 100
sample1 library1 G C CGC 3 0 2 0 0 100
+sample1 library1 G C CGG 0 0 0 0 0 100
+sample1 library1 G C CGT 3 0 3 0 0 100
+sample1 library1 G C GGA 0 0 0 0 0 100
sample1 library1 G C GGC 0 0 2 0 0 100
-sample1 library1 G C AGC 1 0 6 0 0 100
sample1 library1 G C GGG 0 0 1 0 0 100
-sample1 library1 G C TGG 0 0 2 0 0 100
-sample1 library1 G C TGC 3 0 1 0 0 100
-sample1 library1 G C CGT 3 0 3 0 0 100
-sample1 library1 G C TGA 2 0 3 0 0 100
sample1 library1 G C GGT 0 0 1 0 0 100
-sample1 library1 G C AGT 0 0 0 0 0 100
+sample1 library1 G C TGA 2 0 3 0 0 100
+sample1 library1 G C TGC 3 0 1 0 0 100
+sample1 library1 G C TGG 0 0 2 0 0 100
sample1 library1 G C TGT 0 0 4 0 0 100
-sample1 library1 G C AGG 0 0 0 0 0 100
-sample1 library1 G C CGG 0 0 0 0 0 100
sample1 library1 G T AGA 0 1 1 0 0.5 3
+sample1 library1 G T AGC 1 0 6 0 0 100
+sample1 library1 G T AGG 0 1 0 0 1 0
+sample1 library1 G T AGT 0 1 0 0 1 0
sample1 library1 G T CGA 2 1 3 0 0.166667 8
-sample1 library1 G T GGA 0 0 0 0 0 100
sample1 library1 G T CGC 3 1 2 0 0.166667 8
+sample1 library1 G T CGG 0 0 0 0 0 100
+sample1 library1 G T CGT 3 1 3 0 0.142857 8
+sample1 library1 G T GGA 0 0 0 0 0 100
sample1 library1 G T GGC 0 1 2 0 0.333333 5
-sample1 library1 G T AGC 1 0 6 0 0 100
sample1 library1 G T GGG 0 1 1 0 0.5 3
-sample1 library1 G T TGG 0 0 2 0 0 100
-sample1 library1 G T TGC 3 2 1 0 0.333333 5
-sample1 library1 G T CGT 3 1 3 0 0.142857 8
-sample1 library1 G T TGA 2 1 3 0 0.166667 8
sample1 library1 G T GGT 0 0 1 0 0 100
-sample1 library1 G T AGT 0 1 0 0 1 0
+sample1 library1 G T TGA 2 1 3 0 0.166667 8
+sample1 library1 G T TGC 3 2 1 0 0.333333 5
+sample1 library1 G T TGG 0 0 2 0 0 100
sample1 library1 G T TGT 0 0 4 0 0 100
-sample1 library1 G T AGG 0 1 0 0 1 0
-sample1 library1 G T CGG 0 0 0 0 0 100
-sample1 library1 T A GTC 1 0 1 0 0 100
+sample1 library1 T A ATA 1 0 2 0 0 100
+sample1 library1 T A ATC 5 0 4 0 0 100
sample1 library1 T A ATG 4 0 4 0 0 100
-sample1 library1 T A TTC 3 0 3 0 0 100
+sample1 library1 T A ATT 1 0 0 0 0 100
+sample1 library1 T A CTA 1 0 1 0 0 100
+sample1 library1 T A CTC 0 0 1 0 0 100
+sample1 library1 T A CTG 3 0 4 0 0 100
+sample1 library1 T A CTT 4 0 3 0 0 100
sample1 library1 T A GTA 4 0 3 0 0 100
-sample1 library1 T A TTA 0 0 1 0 0 100
-sample1 library1 T A ATC 5 0 4 0 0 100
-sample1 library1 T A TTG 1 0 1 0 0 100
+sample1 library1 T A GTC 1 0 1 0 0 100
sample1 library1 T A GTG 1 0 0 0 0 100
sample1 library1 T A GTT 0 0 0 0 0 100
-sample1 library1 T A CTG 3 0 4 0 0 100
-sample1 library1 T A CTA 1 0 1 0 0 100
+sample1 library1 T A TTA 0 0 1 0 0 100
+sample1 library1 T A TTC 3 0 3 0 0 100
+sample1 library1 T A TTG 1 0 1 0 0 100
sample1 library1 T A TTT 1 0 0 0 0 100
-sample1 library1 T A CTC 0 0 1 0 0 100
-sample1 library1 T A ATA 1 0 2 0 0 100
-sample1 library1 T A ATT 1 0 0 0 0 100
-sample1 library1 T A CTT 4 0 3 0 0 100
-sample1 library1 T C GTC 1 0 1 0 0 100
+sample1 library1 T C ATA 1 0 2 0 0 100
+sample1 library1 T C ATC 5 0 4 0 0 100
sample1 library1 T C ATG 4 0 4 0 0 100
-sample1 library1 T C TTC 3 0 3 0 0 100
+sample1 library1 T C ATT 1 0 0 0 0 100
+sample1 library1 T C CTA 1 0 1 0 0 100
+sample1 library1 T C CTC 0 0 1 0 0 100
+sample1 library1 T C CTG 3 0 4 0 0 100
+sample1 library1 T C CTT 4 0 3 0 0 100
sample1 library1 T C GTA 4 0 3 0 0 100
-sample1 library1 T C TTA 0 0 1 0 0 100
-sample1 library1 T C ATC 5 0 4 0 0 100
-sample1 library1 T C TTG 1 0 1 0 0 100
+sample1 library1 T C GTC 1 0 1 0 0 100
sample1 library1 T C GTG 1 0 0 0 0 100
sample1 library1 T C GTT 0 0 0 0 0 100
-sample1 library1 T C CTG 3 0 4 0 0 100
-sample1 library1 T C CTA 1 0 1 0 0 100
+sample1 library1 T C TTA 0 0 1 0 0 100
+sample1 library1 T C TTC 3 0 3 0 0 100
+sample1 library1 T C TTG 1 0 1 0 0 100
sample1 library1 T C TTT 1 0 0 0 0 100
-sample1 library1 T C CTC 0 0 1 0 0 100
-sample1 library1 T C ATA 1 0 2 0 0 100
-sample1 library1 T C ATT 1 0 0 0 0 100
-sample1 library1 T C CTT 4 0 3 0 0 100
-sample1 library1 T G GTC 1 0 1 0 0 100
+sample1 library1 T G ATA 1 0 2 0 0 100
+sample1 library1 T G ATC 5 0 4 0 0 100
sample1 library1 T G ATG 4 0 4 0 0 100
-sample1 library1 T G TTC 3 0 3 0 0 100
+sample1 library1 T G ATT 1 0 0 0 0 100
+sample1 library1 T G CTA 1 0 1 0 0 100
+sample1 library1 T G CTC 0 0 1 0 0 100
+sample1 library1 T G CTG 3 0 4 0 0 100
+sample1 library1 T G CTT 4 0 3 0 0 100
sample1 library1 T G GTA 4 0 3 0 0 100
-sample1 library1 T G TTA 0 0 1 0 0 100
-sample1 library1 T G ATC 5 0 4 0 0 100
-sample1 library1 T G TTG 1 0 1 0 0 100
+sample1 library1 T G GTC 1 0 1 0 0 100
sample1 library1 T G GTG 1 0 0 0 0 100
sample1 library1 T G GTT 0 0 0 0 0 100
-sample1 library1 T G CTG 3 0 4 0 0 100
-sample1 library1 T G CTA 1 0 1 0 0 100
+sample1 library1 T G TTA 0 0 1 0 0 100
+sample1 library1 T G TTC 3 0 3 0 0 100
+sample1 library1 T G TTG 1 0 1 0 0 100
sample1 library1 T G TTT 1 0 0 0 0 100
-sample1 library1 T G CTC 0 0 1 0 0 100
-sample1 library1 T G ATA 1 0 2 0 0 100
-sample1 library1 T G ATT 1 0 0 0 0 100
-sample1 library1 T G CTT 4 0 3 0 0 100
diff --git a/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.pre_adapter_summary_metrics b/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.pre_adapter_summary_metrics
index 068011b..715f5e3 100644
--- a/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.pre_adapter_summary_metrics
+++ b/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/ExpectedMetricsOutput/with_context.pre_adapter_summary_metrics
@@ -1,21 +1,21 @@
## htsjdk.samtools.metrics.StringHeader
-# picard.analysis.artifacts.CollectSequencingArtifactMetrics MINIMUM_INSERT_SIZE=30 MAXIMUM_INSERT_SIZE=30 CONTEXT_SIZE=1 INPUT=/Users/msooknah/Documents/gp_projects/picard-private/Picard-public/testdata/picard/analysis/CollectSequencingArtifactMetrics/test.sam OUTPUT=/Users/msooknah/Documents/scratch/picardgroup/oxog/unit/tmp_output/with_context REFERENCE_SEQUENCE=/Users/msooknah/Documents/gp_projects/picard-private/Picard-public/testdata/picard/analysis/CollectSequencingArtifactMetrics [...]
+# picard.analysis.artifacts.CollectSequencingArtifactMetrics MINIMUM_INSERT_SIZE=30 MAXIMUM_INSERT_SIZE=30 CONTEXT_SIZE=1 INPUT=/Users/farjoun/picard-private/Picard-public/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/test.sam OUTPUT=/var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/artifactMetrics.113467359995630975.tmp/with_context REFERENCE_SEQUENCE=/Users/farjoun/picard-private/Picard-public/testdata/picard/analysis/artifacts/CollectSequencingArtifactMetrics/test [...]
## htsjdk.samtools.metrics.StringHeader
-# Started on: Thu Mar 19 17:31:03 EDT 2015
+# Started on: Fri Nov 27 17:25:36 EST 2015
## METRICS CLASS picard.analysis.artifacts.SequencingArtifactMetrics$PreAdapterSummaryMetrics
SAMPLE_ALIAS LIBRARY REF_BASE ALT_BASE TOTAL_QSCORE WORST_CXT WORST_CXT_QSCORE WORST_PRE_CXT WORST_PRE_CXT_QSCORE WORST_POST_CXT WORST_POST_CXT_QSCORE ARTIFACT_NAME
-sample1 library1 A C 100 AAT 100 GAN 100 NAG 100 NA
-sample1 library1 A G 100 AAT 100 GAN 100 NAG 100 NA
-sample1 library1 A T 100 AAT 100 GAN 100 NAG 100 NA
-sample1 library1 C A 100 ACT 100 TCN 100 NCC 100 NA
-sample1 library1 C G 100 ACT 100 TCN 100 NCC 100 NA
-sample1 library1 C T 100 ACT 100 TCN 100 NCC 100 Deamination
-sample1 library1 G A 100 AGA 100 AGN 100 NGC 100 NA
-sample1 library1 G C 100 AGA 100 AGN 100 NGC 100 NA
-sample1 library1 G T 7 AGT 0 GGN 5 NGG 4 OxoG
-sample1 library1 T A 100 GTC 100 CTN 100 NTC 100 NA
-sample1 library1 T C 100 GTC 100 CTN 100 NTC 100 NA
-sample1 library1 T G 100 GTC 100 CTN 100 NTC 100 NA
+sample1 library1 A C 100 AAA 100 AAN 100 NAA 100 NA
+sample1 library1 A G 100 AAA 100 AAN 100 NAA 100 NA
+sample1 library1 A T 100 AAA 100 AAN 100 NAA 100 NA
+sample1 library1 C A 100 ACA 100 ACN 100 NCA 100 NA
+sample1 library1 C G 100 ACA 100 ACN 100 NCA 100 NA
+sample1 library1 C T 100 ACA 100 ACN 100 NCA 100 Deamination
+sample1 library1 G A 100 AGA 100 AGN 100 NGA 100 NA
+sample1 library1 G C 100 AGA 100 AGN 100 NGA 100 NA
+sample1 library1 G T 7 AGG 0 GGN 5 NGG 4 OxoG
+sample1 library1 T A 100 ATA 100 ATN 100 NTA 100 NA
+sample1 library1 T C 100 ATA 100 ATN 100 NTA 100 NA
+sample1 library1 T G 100 ATA 100 ATN 100 NTA 100 NA
diff --git a/testdata/picard/analysis/directed/CollectHsMetrics/chrM.interval_list b/testdata/picard/analysis/directed/CollectHsMetrics/chrM.interval_list
new file mode 100644
index 0000000..eb0fab5
--- /dev/null
+++ b/testdata/picard/analysis/directed/CollectHsMetrics/chrM.interval_list
@@ -0,0 +1,3 @@
+ at HD VN:1.5 GO:none SO:coordinate
+ at SQ SN:chrM LN:16571 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d2ed829b8a1628d16cbeee88e88e39eb SP:Homo sapiens
+chrM 1 200 + interval-1
diff --git a/testdata/picard/analysis/directed/CollectHsMetrics/lowbaseq.sam b/testdata/picard/analysis/directed/CollectHsMetrics/lowbaseq.sam
new file mode 100644
index 0000000..2478ede
--- /dev/null
+++ b/testdata/picard/analysis/directed/CollectHsMetrics/lowbaseq.sam
@@ -0,0 +1,14 @@
+ at HD VN:1.5 GO:none SO:coordinate
+ at SQ SN:chrM LN:16571 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d2ed829b8a1628d16cbeee88e88e39eb SP:Homo sapiens
+ at RG ID:1 PL:ILLUMINA SM:sample1
+ at RG ID:1.1 PL:ILLUMINA SM:sample2
+ at RG ID:1.1.2 PL:ILLUMINA SM:sample3
+ at PG ID:1 PN:A
+ at PG ID:1.1 PN:X
+ at PG ID:2 PN:B
+ at PG ID:2.3 PN:B PP:1
+ at PG ID:2.4 PN:B PP:1.1
+ at PG ID:3 PN:C PP:1
+ at PG ID:3.6 PN:Y PP:2.4
+FRAGMENT_6194MAAXX100108:2:55:3087:12679:BI 0 chrM 1 100 101M * 0 0 TCAAACATAGTCAAAGAGAGGGAGATTTCTGGATAATCACTTAAGCCCATGGTTAAACATAAATGCAAATATGTTAATGTTTACTGAATAACTTATCTGTG """"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" RG:Z:1 E2:Z:CAGGGAGGGCGAGGGTGAGATTGTGGGGAGATGGGTGAGACGTTAAGACATTGATCCACATTTATACCCATAAGATTATGAATAATTTATTAGATATCTGT OQ:Z:CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCBCCCCCBCCCCCC at CCDCCDDCDACCC@@C [...]
+LOWBASEQ_6194MAAXX100108:2:55:3087:12679:BI 0 chrM 1 100 101M * 0 0 TCAAACATAGTCAAAGAGAGGGAGATTTCTGGATAATCACTTAAGCCCATGGTTAAACATAAATGCAAATATGTTAATGTTTACTGAATAACTTATCTGTG !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! RG:Z:1 E2:Z:CAGGGAGGGCGAGGGTGAGATTGTGGGGAGATGGGTGAGACGTTAAGACATTGATCCACATTTATACCCATAAGATTATGAATAATTTATTAGATATCTGT OQ:Z:CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCBCCCCCBCCCCCC at CCDCCDDCDACCC@@C [...]
diff --git a/testdata/picard/analysis/directed/CollectHsMetrics/lowmapq.sam b/testdata/picard/analysis/directed/CollectHsMetrics/lowmapq.sam
new file mode 100644
index 0000000..71b5f39
--- /dev/null
+++ b/testdata/picard/analysis/directed/CollectHsMetrics/lowmapq.sam
@@ -0,0 +1,14 @@
+ at HD VN:1.5 GO:none SO:coordinate
+ at SQ SN:chrM LN:16571 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d2ed829b8a1628d16cbeee88e88e39eb SP:Homo sapiens
+ at RG ID:1 PL:ILLUMINA SM:sample1
+ at RG ID:1.1 PL:ILLUMINA SM:sample2
+ at RG ID:1.1.2 PL:ILLUMINA SM:sample3
+ at PG ID:1 PN:A
+ at PG ID:1.1 PN:X
+ at PG ID:2 PN:B
+ at PG ID:2.3 PN:B PP:1
+ at PG ID:2.4 PN:B PP:1.1
+ at PG ID:3 PN:C PP:1
+ at PG ID:3.6 PN:Y PP:2.4
+FRAGMENT_6194MAAXX100108:2:55:3087:12679:BI 0 chrM 1 100 101M * 0 0 TCAAACATAGTCAAAGAGAGGGAGATTTCTGGATAATCACTTAAGCCCATGGTTAAACATAAATGCAAATATGTTAATGTTTACTGAATAACTTATCTGTG 9<9 at A?A@?A=@BCCA at B@BAA at B@ABBACAA at A@C at ABACB@CBAA at BABA>BACCABAACDABABCCAA at B?BACAB>CCAABBACAAD?CB>?>?<9< RG:Z:1 E2:Z:CAGGGAGGGCGAGGGTGAGATTGTGGGGAGATGGGTGAGACGTTAAGACATTGATCCACATTTATACCCATAAGATTATGAATAATTTATTAGATATCTGT OQ:Z:CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCBCCCCCBCCCCCC at CCDCCDDCDACCC@@C [...]
+LOWMAPQ_6194MAAXX100108:2:55:3087:12679:BI 0 chrM 1 1 101M * 0 0 TCAAACATAGTCAAAGAGAGGGAGATTTCTGGATAATCACTTAAGCCCATGGTTAAACATAAATGCAAATATGTTAATGTTTACTGAATAACTTATCTGTG 9<9 at A?A@?A=@BCCA at B@BAA at B@ABBACAA at A@C at ABACB@CBAA at BABA>BACCABAACDABABCCAA at B?BACAB>CCAABBACAAD?CB>?>?<9< RG:Z:1 E2:Z:CAGGGAGGGCGAGGGTGAGATTGTGGGGAGATGGGTGAGACGTTAAGACATTGATCCACATTTATACCCATAAGATTATGAATAATTTATTAGATATCTGT OQ:Z:CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCBCCCCCBCCCCCC at CCDCCDDCDACCC@@C PG [...]
diff --git a/testdata/picard/analysis/directed/CollectHsMetrics/overlapping.sam b/testdata/picard/analysis/directed/CollectHsMetrics/overlapping.sam
new file mode 100644
index 0000000..df6ccfb
--- /dev/null
+++ b/testdata/picard/analysis/directed/CollectHsMetrics/overlapping.sam
@@ -0,0 +1,14 @@
+ at HD VN:1.5 GO:none SO:coordinate
+ at SQ SN:chrM LN:16571 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d2ed829b8a1628d16cbeee88e88e39eb SP:Homo sapiens
+ at RG ID:1 PL:ILLUMINA SM:sample1
+ at RG ID:1.1 PL:ILLUMINA SM:sample2
+ at RG ID:1.1.2 PL:ILLUMINA SM:sample3
+ at PG ID:1 PN:A
+ at PG ID:1.1 PN:X
+ at PG ID:2 PN:B
+ at PG ID:2.3 PN:B PP:1
+ at PG ID:2.4 PN:B PP:1.1
+ at PG ID:3 PN:C PP:1
+ at PG ID:3.6 PN:Y PP:2.4
+OVERLAPPING_6194MAAXX100108:2:55:3087:12679:BI 67 chrM 1 100 101M = 1 360 TCAAACATAGTCAAAGAGAGGGAGATTTCTGGATAATCACTTAAGCCCATGGTTAAACATAAATGCAAATATGTTAATGTTTACTGAATAACTTATCTGTG 9<9 at A?A@?A=@BCCA at B@BAA at B@ABBACAA at A@C at ABACB@CBAA at BABA>BACCABAACDABABCCAA at B?BACAB>CCAABBACAAD?CB>?>?<9< RG:Z:1 E2:Z:CAGGGAGGGCGAGGGTGAGATTGTGGGGAGATGGGTGAGACGTTAAGACATTGATCCACATTTATACCCATAAGATTATGAATAATTTATTAGATATCTGT OQ:Z:CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCBCCCCCBCCCCCC at CCDCCDDCDA [...]
+OVERLAPPING_6194MAAXX100108:2:55:3087:12679:BI 131 chrM 1 100 101M = 1 360 TCAAACATAGTCAAAGAGAGGGAGATTTCTGGATAATCACTTAAGCCCATGGTTAAACATAAATGCAAATATGTTAATGTTTACTGAATAACTTATCTGTG 9<9 at A?A@?A=@BCCA at B@BAA at B@ABBACAA at A@C at ABACB@CBAA at BABA>BACCABAACDABABCCAA at B?BACAB>CCAABBACAAD?CB>?>?<9< RG:Z:1 E2:Z:CAGGGAGGGCGAGGGTGAGATTGTGGGGAGATGGGTGAGACGTTAAGACATTGATCCACATTTATACCCATAAGATTATGAATAATTTATTAGATATCTGT OQ:Z:CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCDCCCCCCCCCCCCCCCCCCBCCCCCBCCCCCC at CCDCCDDCD [...]
diff --git a/testdata/picard/fingerprint/haplotypeMap.txt b/testdata/picard/fingerprint/haplotypeMap.txt
new file mode 100644
index 0000000..994f5f8
--- /dev/null
+++ b/testdata/picard/fingerprint/haplotypeMap.txt
@@ -0,0 +1,73 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chrM LN:16571 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d2ed829b8a1628d16cbeee88e88e39eb SP:Homo sapiens
+ at SQ SN:chr1 LN:247249719 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:9ebc6df9496613f373e73396d5b3b6b6 SP:Homo sapiens
+ at SQ SN:chr2 LN:242951149 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:b12c7373e3882120332983be99aeb18d SP:Homo sapiens
+ at SQ SN:chr3 LN:199501827 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:0e48ed7f305877f66e6fd4addbae2b9a SP:Homo sapiens
+ at SQ SN:chr4 LN:191273063 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:cf37020337904229dca8401907b626c2 SP:Homo sapiens
+ at SQ SN:chr5 LN:180857866 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:031c851664e31b2c17337fd6f9004858 SP:Homo sapiens
+ at SQ SN:chr6 LN:170899992 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:bfe8005c536131276d448ead33f1b583 SP:Homo sapiens
+ at SQ SN:chr7 LN:158821424 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:74239c5ceee3b28f0038123d958114cb SP:Homo sapiens
+ at SQ SN:chr8 LN:146274826 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:1eb00fe1ce26ce6701d2cd75c35b5ccb SP:Homo sapiens
+ at SQ SN:chr9 LN:140273252 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:ea244473e525dde0393d353ef94f974b SP:Homo sapiens
+ at SQ SN:chr10 LN:135374737 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:4ca41bf2d7d33578d2cd7ee9411e1533 SP:Homo sapiens
+ at SQ SN:chr11 LN:134452384 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:425ba5eb6c95b60bafbf2874493a56c3 SP:Homo sapiens
+ at SQ SN:chr12 LN:132349534 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d17d70060c56b4578fa570117bf19716 SP:Homo sapiens
+ at SQ SN:chr13 LN:114142980 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:c4f3084a20380a373bbbdb9ae30da587 SP:Homo sapiens
+ at SQ SN:chr14 LN:106368585 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:c1ff5d44683831e9c7c1db23f93fbb45 SP:Homo sapiens
+ at SQ SN:chr15 LN:100338915 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:5cd9622c459fe0a276b27f6ac06116d8 SP:Homo sapiens
+ at SQ SN:chr16 LN:88827254 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:3e81884229e8dc6b7f258169ec8da246 SP:Homo sapiens
+ at SQ SN:chr17 LN:78774742 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:2a5c95ed99c5298bb107f313c7044588 SP:Homo sapiens
+ at SQ SN:chr18 LN:76117153 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:3d11df432bcdc1407835d5ef2ce62634 SP:Homo sapiens
+ at SQ SN:chr19 LN:63811651 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:2f1a59077cfad51df907ac25723bff28 SP:Homo sapiens
+ at SQ SN:chr20 LN:62435964 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:f126cdf8a6e0c7f379d618ff66beb2da SP:Homo sapiens
+ at SQ SN:chr21 LN:46944323 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:f1b74b7f9f4cdbaeb6832ee86cb426c6 SP:Homo sapiens
+ at SQ SN:chr22 LN:49691432 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:2041e6a0c914b48dd537922cca63acb8 SP:Homo sapiens
+ at SQ SN:chrX LN:154913754 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d7e626c80ad172a4d7c95aadb94d9040 SP:Homo sapiens
+ at SQ SN:chrY LN:57772954 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:62f69d0e82a12af74bad85e2e4a8bd91 SP:Homo sapiens
+ at SQ SN:chr1_random LN:1663265 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:cc05cb1554258add2eb62e88c0746394 SP:Homo sapiens
+ at SQ SN:chr2_random LN:185571 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:18ceab9e4667a25c8a1f67869a4356ea SP:Homo sapiens
+ at SQ SN:chr3_random LN:749256 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:9cc571e918ac18afa0b2053262cadab6 SP:Homo sapiens
+ at SQ SN:chr4_random LN:842648 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:9cab2949ccf26ee0f69a875412c93740 SP:Homo sapiens
+ at SQ SN:chr5_random LN:143687 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:05926bdbff978d4a0906862eb3f773d0 SP:Homo sapiens
+ at SQ SN:chr6_random LN:1875562 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:d62eb2919ba7b9c1d382c011c5218094 SP:Homo sapiens
+ at SQ SN:chr7_random LN:549659 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:28ebfb89c858edbc4d71ff3f83d52231 SP:Homo sapiens
+ at SQ SN:chr8_random LN:943810 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:0ed5b088d843d6f6e6b181465b9e82ed SP:Homo sapiens
+ at SQ SN:chr9_random LN:1146434 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:1e3d2d2f141f0550fa28a8d0ed3fd1cf SP:Homo sapiens
+ at SQ SN:chr10_random LN:113275 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:50be2d2c6720dabeff497ffb53189daa SP:Homo sapiens
+ at SQ SN:chr11_random LN:215294 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:bfc93adc30c621d5c83eee3f0d841624 SP:Homo sapiens
+ at SQ SN:chr13_random LN:186858 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:563531689f3dbd691331fd6c5730a88b SP:Homo sapiens
+ at SQ SN:chr15_random LN:784346 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:bf885e99940d2d439d83eba791804a48 SP:Homo sapiens
+ at SQ SN:chr16_random LN:105485 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:dd06ea813a80b59d9c626b31faf6ae7f SP:Homo sapiens
+ at SQ SN:chr17_random LN:2617613 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:34d5e2005dffdfaaced1d34f60ed8fc2 SP:Homo sapiens
+ at SQ SN:chr18_random LN:4262 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:f3814841f1939d3ca19072d9e89f3fd7 SP:Homo sapiens
+ at SQ SN:chr19_random LN:301858 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:420ce95da035386cc8c63094288c49e2 SP:Homo sapiens
+ at SQ SN:chr21_random LN:1679693 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:a7252115bfe5bb5525f34d039eecd096 SP:Homo sapiens
+ at SQ SN:chr22_random LN:257318 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:4f2d259b82f7647d3b668063cf18378b SP:Homo sapiens
+ at SQ SN:chrX_random LN:1719168 AS:HG18 UR:/seq/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta M5:f4d71e0758986c15e5455bf3e14e5d6f SP:Homo sapiens
+#CHROMOSOME POSITION NAME MAJOR_ALLELE MINOR_ALLELE MAF ANCHOR_SNP PANELS
+chr1 13900000 rs1 T C 0.159601 foo
+chr1 14600000 rs2 A G 0.223794 foo
+chr2 80000000 rs3 A G 0.437188 foo
+chr2 67000000 rs4 G A 0.447215 foo
+chr2 223500000 rs5 C A 0.605985 foo
+chr3 17000000 rs6 C T 0.623026 foo
+chr3 37600000 rs7 T C 0.740000 rs8 foo
+chr3 37600100 rs8 T C 0.736301
+chr3 153300000 rs9 T A 0.722731 foo
+chr4 26700000 rs10 T G 0.205686 foo
+chr5 88400000 rs11 C T 0.558528 foo
+chr6 79400000 rs12 T C 0.212500 rs13
+chr6 79400200 rs13 C A 0.258333
+chr6 79400600 rs14 A G 0.256803 rs13 foo
+chr6 153300000 rs15 T A 0.452579 foo
+chr7 86800000 rs16 A G 0.662917 foo
+chr8 71100000 rs17 A G 0.392887 foo
+chr10 129000000 rs18 G A 0.228188 foo
+chr13 34100000 rs19 T C 0.425957 foo
+chr14 95100000 rs20 A G 0.376667 foo
+chr15 45600000 rs21 C T 0.296296 foo
+chr16 49600000 rs22 G C 0.547421 foo
+chr17 20700000 rs23 A G 0.175000 foo
+chr17 40400000 rs24 A T 0.628333 foo
+chr22 31500000 rs25 A G 0.554863 foo
+chrX 24000000 rs26 T C 0.706000 foo
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAAAAA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAAAAA.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAAAAA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAAAAA.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAAAAA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAAAAA.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAAAAA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAAAAA.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAGAAG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAGAAG.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAGAAG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAGAAG.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAGAAG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAGAAG.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAGAAG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AAAAGAAG.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.1.fastq
new file mode 100644
index 0000000..a7b70ea
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1138:2141 1:N:0:AACAATGG
+NTTACCAAGGTTTTCTGTTTAGTGA
++
+#1=DDFFFHHFHHJJJIHJIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1206:2126 1:N:0:AACAATGG
+NATTCTGCCATATTGGTCCGACAGT
++
+#1=DDFFFHHHHHJJJJJJJJJIJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1077:2139 1:N:0:AACAATGG
+CACAGGCTTCCACGGACTTAACGTC
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1112:2245 1:N:0:AACAATGG
+TGCCATCTGCTCTGGGAAGCACCAG
++
+1:=DDDDDFBC:DEFIFFFIEF at BE
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.2.fastq
new file mode 100644
index 0000000..df2b3d1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1138:2141 2:N:0:AACAATGG
+ATCTGCTTCAGGTCGATCAGA
++
+FFFFHGHHHJJIGHIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1206:2126 2:N:0:AACAATGG
+GTCCAGTGGTGCACTGAATGT
++
+FFFFHHHHHHIIJJJJIJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1077:2139 2:N:0:AACAATGG
+AGTTGGCGGATGAAGCAGATA
++
+FFFFHHHHHJJJJJJJJJIJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1112:2245 2:N:0:AACAATGG
+AGTGTTGTAATTTCGTCTTCT
++
+BDDDCCFCAACGGFFCBFFAE
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.barcode_1.fastq
new file mode 100644
index 0000000..82cdde4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1138:2141 :N:0:AACAATGG
+AACAATGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1206:2126 :N:0:AACAATGG
+AACAATGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1077:2139 :N:0:AACAATGG
+AACAATGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1112:2245 :N:0:AACAATGG
+AACAATGG
++
+@@?BBDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.index_1.fastq
new file mode 100644
index 0000000..3116bdd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACAATGG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1138:2141 :N:0:AACAATGG
+TCCG
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1206:2126 :N:0:AACAATGG
+ATCT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1077:2139 :N:0:AACAATGG
+NATT
++
+#4=D
+ at machine1:HiMom:abcdeACXX:1:2101:1112:2245 :N:0:AACAATGG
+TCGT
++
+?8?D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.1.fastq
new file mode 100644
index 0000000..1c1c29f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1197:2200 1:N:0:AACGCATT
+GGGCGCCCCGTGAGGACCCAGTCCT
++
+ at C@FFADDFFCFCEHIIJIJJIEFC
+ at machine1:HiMom:abcdeACXX:1:1101:1308:2153 1:Y:0:AACGCATT
+TTTTGGAAGAGACCTCAATTACTGT
++
+???DDDDD?:22AE:A2<3,AF?3A
+ at machine1:HiMom:abcdeACXX:1:1101:1452:2132 1:N:0:AACGCATT
+NCGTCCTGGAAAACGGGGCGCGGCT
++
+#1=BDBDDFHHHHF at FHDHIGIIII
+ at machine1:HiMom:abcdeACXX:1:1201:1150:2161 1:N:0:AACGCATT
+AAGTCACCTAATATCTTTTTTTTTT
++
+@@<??;?D?CFD,A4CDDHFBIIID
+ at machine1:HiMom:abcdeACXX:1:2101:1240:2197 1:Y:0:AACGCATT
+ATAAAACATAGCAATATTTTCCTAT
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:2101:1336:2109 1:N:0:AACGCATT
+NACTATCAGGATCGTGGCTATTTTG
++
+#1BDDFFFHHHHHJIJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1427:2081 1:N:0:AACGCATT
+NCGAGTGCCTAGTGGGCCACTTTTG
++
+#4=DDBDFHHHHFHIJJJJIJJJJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.2.fastq
new file mode 100644
index 0000000..dddf65b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1197:2200 2:N:0:AACGCATT
+TCCACTGGAACCACAGAACCC
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1308:2153 2:Y:0:AACGCATT
+TAAGGTAATCCCCGCATGTGT
++
+4===AFFDFFGFDGFB at CFB:
+ at machine1:HiMom:abcdeACXX:1:1101:1452:2132 2:N:0:AACGCATT
+ACCCTTGTGTCGAGGGCTGAC
++
+FFFFHHHHHJJJJJJJIJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1150:2161 2:N:0:AACGCATT
+CACTACTGTGATTGTGCCACT
++
+FFFFGHHHHGIIIICEHCFGH
+ at machine1:HiMom:abcdeACXX:1:2101:1240:2197 2:Y:0:AACGCATT
+GAGATCCTTGTTACATGCCCA
++
++A:DD?:ADEE@::C4:C<E:
+ at machine1:HiMom:abcdeACXX:1:2101:1336:2109 2:N:0:AACGCATT
+CAGAACAGCTCCAGGTGCTCC
++
+FFFFHHHHHJJJJJJCGHIJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1427:2081 2:N:0:AACGCATT
+CTTCCATGGCCACCGTCCTGC
++
+FFFFHHHHHJJJIIGFIIJJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.barcode_1.fastq
new file mode 100644
index 0000000..8fa98a2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1197:2200 :N:0:AACGCATT
+AACGCATT
++
+ at CCFDFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1308:2153 :Y:0:AACGCATT
+AACGCATT
++
+:?@B?@DD
+ at machine1:HiMom:abcdeACXX:1:1101:1452:2132 :N:0:AACGCATT
+AACGCATT
++
+ at CCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1150:2161 :N:0:AACGCATT
+AACGCATT
++
+@@@FDDDD
+ at machine1:HiMom:abcdeACXX:1:2101:1240:2197 :Y:0:AACGCATT
+AACGCATT
++
+88+AD@?8
+ at machine1:HiMom:abcdeACXX:1:2101:1336:2109 :N:0:AACGCATT
+AACGCATT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1427:2081 :N:0:AACGCATT
+AACGCATT
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.index_1.fastq
new file mode 100644
index 0000000..48729b4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AACGCATT.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1197:2200 :N:0:AACGCATT
+ATAT
++
+@@@F
+ at machine1:HiMom:abcdeACXX:1:1101:1308:2153 :Y:0:AACGCATT
+TCTG
++
+1?1=
+ at machine1:HiMom:abcdeACXX:1:1101:1452:2132 :N:0:AACGCATT
+ACAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1150:2161 :N:0:AACGCATT
+TTCT
++
+ at C@F
+ at machine1:HiMom:abcdeACXX:1:2101:1240:2197 :Y:0:AACGCATT
+ACTG
++
+??##
+ at machine1:HiMom:abcdeACXX:1:2101:1336:2109 :N:0:AACGCATT
+AGAC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1427:2081 :N:0:AACGCATT
+CCGA
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAAAATT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAAAATT.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAAAATT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAAAATT.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAAAATT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAAAATT.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAAAATT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAAAATT.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.1.fastq
new file mode 100644
index 0000000..5fc0cdd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1236:2121 1:N:0:ACAGGTAT
+NGGTGCTTCATATCCCTCTAGAGGA
++
+#1=BDDFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1341:2116 1:N:0:ACAGGTAT
+NAGAAGCCCCAGGAGGAAGACAGTC
++
+#1=DDFFFHHHHHHHJIIJJJJJGI
+ at machine1:HiMom:abcdeACXX:1:2101:1063:2206 1:N:0:ACAGGTAT
+TCCTATTCGCCTACACAATTCTCCG
++
+CCCFFFFFHHHHHJJJJJJJHJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1325:2083 1:N:0:ACAGGTAT
+NCAGAAGAAAGGGCCTTGTCGGAGG
++
+#1=DDDDDHHFHDGI at EEHG:?FA8
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.2.fastq
new file mode 100644
index 0000000..eb4468a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1236:2121 2:N:0:ACAGGTAT
+GCTTACTTTGTAGCCTTCATC
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1341:2116 2:N:0:ACAGGTAT
+CAGCGAGACTGGCAACTTAAA
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:2101:1063:2206 2:N:0:ACAGGTAT
+TAGGATGAGGATGGATAGTAA
++
+DDFFHHHHHJHIIJHIIIHHJ
+ at machine1:HiMom:abcdeACXX:1:2101:1325:2083 2:N:0:ACAGGTAT
+CTCTTCCGATCTGGAGAAAAA
++
+#####################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.barcode_1.fastq
new file mode 100644
index 0000000..22e52fa
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1236:2121 :N:0:ACAGGTAT
+ACAGGTAT
++
+CCCFFDDF
+ at machine1:HiMom:abcdeACXX:1:1201:1341:2116 :N:0:ACAGGTAT
+ACAGGTAT
++
+CCCFFBDD
+ at machine1:HiMom:abcdeACXX:1:2101:1063:2206 :N:0:ACAGGTAT
+ACAGGTAT
++
+CCCFFDFF
+ at machine1:HiMom:abcdeACXX:1:2101:1325:2083 :N:0:ACAGGTAT
+ACAGGTAT
++
+@@@BD=DD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.index_1.fastq
new file mode 100644
index 0000000..73165d1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGGTAT.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1236:2121 :N:0:ACAGGTAT
+TTGC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1341:2116 :N:0:ACAGGTAT
+ATAA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1063:2206 :N:0:ACAGGTAT
+NTGC
++
+#1=D
+ at machine1:HiMom:abcdeACXX:1:2101:1325:2083 :N:0:ACAGGTAT
+TGTG
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.1.fastq
new file mode 100644
index 0000000..29bc1af
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1048:2238 1:N:0:ACAGTTGA
+NCTGCCGTGTCCTGACTTCTGGAAT
++
+#1:B?ADDACF<DCG;EG<FHH at CE
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2193 1:N:0:ACAGTTGA
+TTTTCTTGGCCTCTGTTTTTTTTTT
++
+BCCFDFFFHHFFHJIGIJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.2.fastq
new file mode 100644
index 0000000..776e78d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1048:2238 2:N:0:ACAGTTGA
+ACATCGTTGAAGCACTGGATC
++
+DDDB<CFFHCHGDBHGIIIII
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2193 2:N:0:ACAGTTGA
+ATGACACTGCATTTTAAATAC
++
+DDDDHFFHHGGDFHFHIIHGG
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.barcode_1.fastq
new file mode 100644
index 0000000..5c55767
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1048:2238 :N:0:ACAGTTGA
+ACAGTTGA
++
+?@7DDDDA
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2193 :N:0:ACAGTTGA
+ACAGTTGA
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.index_1.fastq
new file mode 100644
index 0000000..71a0058
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACAGTTGA.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1048:2238 :N:0:ACAGTTGA
+NGTC
++
+#11A
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2193 :N:0:ACAGTTGA
+AGGC
++
+@@@D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACCAGTTG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACCAGTTG.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACCAGTTG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACCAGTTG.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACCAGTTG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACCAGTTG.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACCAGTTG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACCAGTTG.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACGAAATC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACGAAATC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACGAAATC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACGAAATC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACGAAATC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACGAAATC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACGAAATC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACGAAATC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.1.fastq
new file mode 100644
index 0000000..0789790
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1259:2152 1:N:0:ACTAAGAC
+CACCTATAATCCCAGCTACTCCAGA
++
+CCCFFFFFHHHHHJJJJJJIJJJIJ
+ at machine1:HiMom:abcdeACXX:1:1101:1261:2127 1:Y:0:ACTAAGAC
+NTGAAATCTGGATAGGCTGGAGTTA
++
+#0-@@@###################
+ at machine1:HiMom:abcdeACXX:1:2101:1021:2209 1:N:0:ACTAAGAC
+NGGCCCCACCCTCCTCCAGCACGTC
++
+#1=DDFFFHHHHHJJJJJJHIIHFH
+ at machine1:HiMom:abcdeACXX:1:2101:1262:2128 1:Y:0:ACTAAGAC
+AGCAGAAGGGCAAAAGCTGGCTTGA
++
+9;<@:@###################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.2.fastq
new file mode 100644
index 0000000..53afa37
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1259:2152 2:N:0:ACTAAGAC
+TTATATTTTTTTAGACATAGG
++
+FFFFGHHHHJJJJIGIIJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1261:2127 2:Y:0:ACTAAGAC
+TTTTTTTTTTTTTTTTTTTTT
++
+FFFFHGHHHJJIFDDDDDDDD
+ at machine1:HiMom:abcdeACXX:1:2101:1021:2209 2:N:0:ACTAAGAC
+AAGGCTGCTAGCTGGCCAGAG
++
+@>??@@??@?????????>?@
+ at machine1:HiMom:abcdeACXX:1:2101:1262:2128 2:Y:0:ACTAAGAC
+GTGGTAACTTTTCTGACACCT
++
+-9@;@?:8>?4:>?@######
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.barcode_1.fastq
new file mode 100644
index 0000000..e674b5f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1259:2152 :N:0:ACTAAGAC
+ACTAAGAC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1261:2127 :Y:0:ACTAAGAC
+ACTAAGAC
++
+>7+ at A7A7
+ at machine1:HiMom:abcdeACXX:1:2101:1021:2209 :N:0:ACTAAGAC
+ACTAAGAC
++
+ at CCDFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1262:2128 :Y:0:ACTAAGAC
+ACTAAGAC
++
+1+8?ADD8
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.index_1.fastq
new file mode 100644
index 0000000..ab66259
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTAAGAC.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1259:2152 :N:0:ACTAAGAC
+ATTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1261:2127 :Y:0:ACTAAGAC
+TTTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1021:2209 :N:0:ACTAAGAC
+NNGG
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1262:2128 :Y:0:ACTAAGAC
+TCTT
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTACC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTACC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTACC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTACC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTACC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTACC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTACC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTACC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.1.fastq
new file mode 100644
index 0000000..b474248
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1458:2109 1:N:0:ACTGTATC
+NGAGACCATAGAGCGGATGCTTTCA
++
+#1=DDDFFHHGHGIJJIGIIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1105:2131 1:N:0:ACTGTATC
+TTGGAACACAGCGGGAATCACAGCA
++
+CCCFFFFFHHHHHJIJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1349:2084 1:N:0:ACTGTATC
+NCAAGTAGCAGTGTCACGCCTTAGC
++
+#1=DDBDDADFDDBEH at HC=CEGG@
+ at machine1:HiMom:abcdeACXX:1:2101:1365:2094 1:N:0:ACTGTATC
+NAAGGTGAAGGCCGGCGCGCTCGCC
++
+#1=BDDDFFHHHHJGGGIGFIHIIJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.2.fastq
new file mode 100644
index 0000000..5bc8925
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1458:2109 2:N:0:ACTGTATC
+CGAACACACAAGAACTTTTTT
++
+FFFFHHHHHJJJJJJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1105:2131 2:N:0:ACTGTATC
+AGCAGCAACAGCAGAAACATG
++
+FFFFHHHHHJJJJJIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1349:2084 2:N:0:ACTGTATC
+TGAATCATTGGTGTCTGAAGA
++
+?=>=>>?##############
+ at machine1:HiMom:abcdeACXX:1:2101:1365:2094 2:N:0:ACTGTATC
+TTCCGATCTTGTGCTCTTCCG
++
+FFFDHFHHGJJIIJIJJIHII
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.barcode_1.fastq
new file mode 100644
index 0000000..6a8b924
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1458:2109 :N:0:ACTGTATC
+ACTGTATC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1105:2131 :N:0:ACTGTATC
+ACTGTATC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1349:2084 :N:0:ACTGTATC
+ACTGTATC
++
+=;7+22<A
+ at machine1:HiMom:abcdeACXX:1:2101:1365:2094 :N:0:ACTGTATC
+ACTGTACC
++
+########
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.index_1.fastq
new file mode 100644
index 0000000..a2d9e63
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ACTGTATC.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1458:2109 :N:0:ACTGTATC
+GATA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1105:2131 :N:0:ACTGTATC
+CAGC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1349:2084 :N:0:ACTGTATC
+AGTC
++
+<5;?
+ at machine1:HiMom:abcdeACXX:1:2101:1365:2094 :N:0:ACTGTATC
+GCTC
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGAAAAGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGAAAAGA.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGAAAAGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGAAAAGA.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGAAAAGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGAAAAGA.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGAAAAGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGAAAAGA.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.1.fastq
new file mode 100644
index 0000000..014fee9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1406:2222 1:N:0:AGCATGGA
+CTCCCCCCGGGCTGAACCAGGGTAC
++
+CCCFFDDDDHDFHIIIIIIIII9DG
+ at machine1:HiMom:abcdeACXX:1:1201:1291:2158 1:N:0:AGCATGGA
+AGAAGGGGAAAGCCTTCATCTTGGC
++
+BCBFFFFFHHHHHJJJJJIIFIJIJ
+ at machine1:HiMom:abcdeACXX:1:2101:1370:2116 1:N:0:AGCATGGA
+NTGGTGGTCCATAGAGATTTGAAAC
++
+#1:4BD7DACF?FCA:4+<ACHIIH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.2.fastq
new file mode 100644
index 0000000..d9cae5e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1406:2222 2:N:0:AGCATGGA
+GGACTCCCCTGGTTCTGGGCA
++
+DDBD?FHDFGIIIGIGHHIII
+ at machine1:HiMom:abcdeACXX:1:1201:1291:2158 2:N:0:AGCATGGA
+TGCTCTTCCGATCTGATGGGC
++
+FFDD?FHHFGEHHIIDHIIII
+ at machine1:HiMom:abcdeACXX:1:2101:1370:2116 2:N:0:AGCATGGA
+ATCTGACATCATGTTTGAAAG
++
+FFFDFFHDHIGBHHII<HEDB
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.barcode_1.fastq
new file mode 100644
index 0000000..75daae1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1406:2222 :N:0:AGCATGGA
+AGCATGGA
++
+C@@DBFEF
+ at machine1:HiMom:abcdeACXX:1:1201:1291:2158 :N:0:AGCATGGA
+AGCATGGA
++
+ at CCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1370:2116 :N:0:AGCATGGA
+AGCATGGA
++
+?:8A?3:B
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.index_1.fastq
new file mode 100644
index 0000000..30eb647
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGCATGGA.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1406:2222 :N:0:AGCATGGA
+GGCT
++
+;?@D
+ at machine1:HiMom:abcdeACXX:1:1201:1291:2158 :N:0:AGCATGGA
+CGTG
++
+ at CCF
+ at machine1:HiMom:abcdeACXX:1:2101:1370:2116 :N:0:AGCATGGA
+CACC
++
+@@@D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.1.fastq
new file mode 100644
index 0000000..eeb6b90
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1263:2236 1:Y:0:AGGTAAGG
+CTTTGAAGACATTGTGAGATCTGTA
++
+<==A<42 at C+A4A?,2A@=4 at 7A??
+ at machine1:HiMom:abcdeACXX:1:2101:1054:2162 1:N:0:AGGTAAGG
+NCCAGGTGTCTTCCCGGGCCCTGCC
++
+#1=DDFBDFHHHHJJJJJIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2203 1:N:0:AGGTAAGG
+TCTCCATGTGAAACAAGCAAAAAGA
++
+CCCFFFFFHHHHGJJJIJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1249:2231 1:N:0:AGGTAAGG
+GTTATTGATAGGATACTGTACAAAC
++
+ at BCFFFFDHHHHFIJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.2.fastq
new file mode 100644
index 0000000..c86c74f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1263:2236 2:Y:0:AGGTAAGG
+CTTCAGTAATTTTAGTACTGC
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:2101:1054:2162 2:N:0:AGGTAAGG
+CAGGGAAGGGAAGGAAGGGTG
++
+DFDFHHHHHJIJIIDHHGICG
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2203 2:N:0:AGGTAAGG
+TTCACTTATGTATTTATGAAT
++
+DFFFHHHHHJHIIJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1249:2231 2:N:0:AGGTAAGG
+TCGGCCTTCCACTCTAGCATA
++
+FFFFFHHGHIJJJGJIIJHIJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.barcode_1.fastq
new file mode 100644
index 0000000..19fae2f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1263:2236 :Y:0:AGGTAAGG
+AGGTAAGG
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1054:2162 :N:0:AGGTAAGG
+AGGTAAGG
++
+B at BDDFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2203 :N:0:AGGTAAGG
+AGGTAAGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1249:2231 :N:0:AGGTAAGG
+AGGTAAGG
++
+@@CBDFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.index_1.fastq
new file mode 100644
index 0000000..0a71673
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTAAGG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1263:2236 :Y:0:AGGTAAGG
+AGTT
++
+((0@
+ at machine1:HiMom:abcdeACXX:1:2101:1054:2162 :N:0:AGGTAAGG
+NGGA
++
+#4=D
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2203 :N:0:AGGTAAGG
+TTGG
++
+ at CCF
+ at machine1:HiMom:abcdeACXX:1:2101:1249:2231 :N:0:AGGTAAGG
+TCTC
++
+@@@F
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.1.fastq
new file mode 100644
index 0000000..dbfde57
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1150:2228 1:N:0:AGGTCGCA
+GCTACTCAGTAGACAGTCCCACCCT
++
+@@CADDDDFCFHHIIIIGGIIGGGI
+ at machine1:HiMom:abcdeACXX:1:1101:1491:2120 1:N:0:AGGTCGCA
+NGGCAGGTGCCCCCACTTGACTCTC
++
+#1?DFFFFGHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1190:2194 1:N:0:AGGTCGCA
+AACCTGGCGCTAAACCATTCGTAGA
++
+CCCFFFFFHHHHHJJJJJJJJIJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1188:2195 1:N:0:AGGTCGCA
+TTAGACCGTCGTGAGACAGGTTAGT
++
+ at CCFFFFFHHHHHJJJJJIIEHIJH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.2.fastq
new file mode 100644
index 0000000..e398ec4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1150:2228 2:N:0:AGGTCGCA
+GAGGCGATTCCTAGGGGGTTG
++
+D8;@BH6DHD<FGGGEIGHIG
+ at machine1:HiMom:abcdeACXX:1:1101:1491:2120 2:N:0:AGGTCGCA
+AGGCTGAACTTCTGAGCTGCT
++
+FFFFHHHGHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1190:2194 2:N:0:AGGTCGCA
+ACCCTTGTGTCGAGGGCTGAC
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1188:2195 2:N:0:AGGTCGCA
+ATACACCAAATGTCTGAACCT
++
+FFFFHHHHHJJJHIJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.barcode_1.fastq
new file mode 100644
index 0000000..7bc729d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1150:2228 :N:0:AGGTCGCA
+AGGTCGCA
++
+@@@DDFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1491:2120 :N:0:AGGTCGCA
+AGGTCGCA
++
+BCCDFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1190:2194 :N:0:AGGTCGCA
+AGGTCGCA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1188:2195 :N:0:AGGTCGCA
+AGGTCGCA
++
+BCCDFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.index_1.fastq
new file mode 100644
index 0000000..998a9b1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/AGGTCGCA.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1150:2228 :N:0:AGGTCGCA
+ATGG
++
+8?=D
+ at machine1:HiMom:abcdeACXX:1:1101:1491:2120 :N:0:AGGTCGCA
+GGCC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1190:2194 :N:0:AGGTCGCA
+ACAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1188:2195 :N:0:AGGTCGCA
+GCAC
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.1.fastq
new file mode 100644
index 0000000..26ed061
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1100:2207 1:N:0:ATTATCAA
+ACGACAGACGTTCTTTCTTTGCTGC
++
+CCCFFFFFHHFHHJIJJJJJHIJJH
+ at machine1:HiMom:abcdeACXX:1:1101:1157:2135 1:N:0:ATTATCAA
+NGGACATTGTAATCATTTCTTACAA
++
+#1=DD?DDHHHHHGGHIIIIIIIII
+ at machine1:HiMom:abcdeACXX:1:1101:1269:2170 1:N:0:ATTATCAA
+ACAGTGTGGGAGGCAGACGAAGAGA
++
+@@@DDDDDFA:C at EGA?FD<FFHII
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2217 1:Y:0:ATTATCAA
+NTTTCTCTGGGCGCAAAGATGTTCA
++
+#07;8=8<<99(:=@@/@7>>6=?>
+ at machine1:HiMom:abcdeACXX:1:1201:1118:2198 1:N:0:ATTATCAA
+CAAGTGTACAGGATTAGACTGGGTT
++
+BCCFDEBDHHHHHIJJJGIIIJJGH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.2.fastq
new file mode 100644
index 0000000..3f2e0bc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1100:2207 2:N:0:ATTATCAA
+TNNNNNNNNNNNNGNNNNNNN
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1101:1157:2135 2:N:0:ATTATCAA
+AAGTCTTAATCAAAGATGATA
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1269:2170 2:N:0:ATTATCAA
+AAGCCTGTGCTTTAAGGAAAA
++
+DBDBDF8DDCFH at GIE@@GGH
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2217 2:Y:0:ATTATCAA
+NNNNNNNNNNNNNNNNNNNNN
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1201:1118:2198 2:N:0:ATTATCAA
+AACTTTATTAAAGCAGTTAAA
++
+FFFFHDHHHGIIIJJJIJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.barcode_1.fastq
new file mode 100644
index 0000000..bfe2de2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1100:2207 :N:0:ATTATCAA
+ATTATCAA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1157:2135 :N:0:ATTATCAA
+ATTATCAA
++
+C at CFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1269:2170 :N:0:ATTATCAA
+ATTATCAA
++
+@@@DDDF?
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2217 :Y:0:ATTATCAA
+ATTATCAA
++
+;<;:BBDD
+ at machine1:HiMom:abcdeACXX:1:1201:1118:2198 :N:0:ATTATCAA
+ATTATCAA
++
+@@@DDBDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.index_1.fastq
new file mode 100644
index 0000000..209b676
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTATCAA.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1100:2207 :N:0:ATTATCAA
+AGGC
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1157:2135 :N:0:ATTATCAA
+TTTA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1269:2170 :N:0:ATTATCAA
+TTCC
++
+@@<A
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2217 :Y:0:ATTATCAA
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1118:2198 :N:0:ATTATCAA
+AATA
++
+C at CF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.1.fastq
new file mode 100644
index 0000000..ce65a9e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1309:2210 1:N:0:ATTCCTCT
+ACACCAACCACCCAACTATCTATAA
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2133 1:N:0:ATTCCTCT
+NAAAACTTGAGGATGCTATGCAAGC
++
+#1:B:ADDDDDDDEEAEBF9FFEBF
+ at machine1:HiMom:abcdeACXX:1:1201:1073:2225 1:N:0:ATTCCTCT
+GGGGCTGAGACCTTTGCTGATGGTG
++
+@@@FFFFFHHHGHJJJJJIIIGICH
+ at machine1:HiMom:abcdeACXX:1:1201:1242:2207 1:N:0:ATTCCTCT
+ATGGCAAAGTGGTGTCTGAGACCAA
++
+BCCFFFFFGHHHHHIIIJFHIJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.2.fastq
new file mode 100644
index 0000000..acf5427
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1309:2210 2:N:0:ATTCCTCT
+GGCTAGGGCATTTTTAATCTT
++
+FFDFHHHDFHJIJJIJGIIIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2133 2:N:0:ATTCCTCT
+NNNNNNNNNNNNNNNNNNNNN
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1201:1073:2225 2:N:0:ATTCCTCT
+TGCTCTTCCGATCTGGAGGGT
++
+FFFFHHHHHJJJJJJJJJJJ:
+ at machine1:HiMom:abcdeACXX:1:1201:1242:2207 2:N:0:ATTCCTCT
+TTTATTGGCCTCCTGCTCCCC
++
+FFFFHHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.barcode_1.fastq
new file mode 100644
index 0000000..ea5fd93
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1309:2210 :N:0:ATTCCTCT
+ATTCCTCT
++
+?@@ADEEF
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2133 :N:0:ATTCCTCT
+ATTCCTCT
++
+8??=BBBA
+ at machine1:HiMom:abcdeACXX:1:1201:1073:2225 :N:0:ATTCCTCT
+ATTCCTCT
++
+B@@BDEFF
+ at machine1:HiMom:abcdeACXX:1:1201:1242:2207 :N:0:ATTCCTCT
+ATTCCTCT
++
+?BBDDDFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.index_1.fastq
new file mode 100644
index 0000000..e0efd80
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/ATTCCTCT.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1309:2210 :N:0:ATTCCTCT
+AGTG
++
+@@?D
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2133 :N:0:ATTCCTCT
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1073:2225 :N:0:ATTCCTCT
+CGTG
++
+ at BBD
+ at machine1:HiMom:abcdeACXX:1:1201:1242:2207 :N:0:ATTCCTCT
+ATCT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.1.fastq
new file mode 100644
index 0000000..c48bd97
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1140:2120 1:N:0:CAACTCTC
+NCCCCAACATTCTAATTATGCCTCA
++
+#1:BDFFDHFFDFIJJJIIJIIIII
+ at machine1:HiMom:abcdeACXX:1:1101:1328:2225 1:N:0:CAACTCTC
+GAAATGCATCTGTCTTAGAAACTGG
++
+??@=BDDDFDD<<,<2:C<F:FFEA
+ at machine1:HiMom:abcdeACXX:1:1201:1127:2112 1:Y:0:CAACTCTC
+NGTCAAGGATGTTCGTCGTGGCAAC
++
+#1=BDDDDDDDDDID<AE?@<CEEE
+ at machine1:HiMom:abcdeACXX:1:1201:1452:2143 1:N:0:CAACTCTC
+TATCCCCTCTAAGACGGACCTGGGT
++
+CCCFFFFFHHHHHJJIIIJJJJJJG
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2146 1:Y:0:CAACTCTC
+GTTCTCTGTCCCCAGGTCCTGTCTC
++
+===A7<7222<<=C=?+<7>@?ACB
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.2.fastq
new file mode 100644
index 0000000..33eba72
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1140:2120 2:N:0:CAACTCTC
+TTTTTTTTTAACTTTGCAAAT
++
+DDDDHHHHFB at 9FHI@BFH@@
+ at machine1:HiMom:abcdeACXX:1:1101:1328:2225 2:N:0:CAACTCTC
+AATTAGGACTTACCTGACATA
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1201:1127:2112 2:Y:0:CAACTCTC
+CACCTGAGCAGTGAAGCCAGC
++
+BDDDHD?FDBHI?AHGGGDFH
+ at machine1:HiMom:abcdeACXX:1:1201:1452:2143 2:N:0:CAACTCTC
+AGTCTTAGCATTTACTTTCCC
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2146 2:Y:0:CAACTCTC
+TTTTTTTTTTTTTTTTTGGGC
++
+??@??@???????########
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.barcode_1.fastq
new file mode 100644
index 0000000..052fd17
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1140:2120 :N:0:CAACTCTC
+CAACTCTC
++
+@@@DDFDF
+ at machine1:HiMom:abcdeACXX:1:1101:1328:2225 :N:0:CAACTCTC
+CAACTCTC
++
+??;=A:B=
+ at machine1:HiMom:abcdeACXX:1:1201:1127:2112 :Y:0:CAACTCTC
+CAACTCTC
++
+=??BA?BD
+ at machine1:HiMom:abcdeACXX:1:1201:1452:2143 :N:0:CAACTCTC
+CAACTCTC
++
+BC at DDFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2146 :Y:0:CAACTCTC
+CAACTCTC
++
+?@@1:DBD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.index_1.fastq
new file mode 100644
index 0000000..77d0e31
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAACTCTC.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1140:2120 :N:0:CAACTCTC
+TTTT
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:1101:1328:2225 :N:0:CAACTCTC
+AGGA
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1127:2112 :Y:0:CAACTCTC
+TAAT
++
+@<@?
+ at machine1:HiMom:abcdeACXX:1:1201:1452:2143 :N:0:CAACTCTC
+TTTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2146 :Y:0:CAACTCTC
+TTTT
++
+<<<@
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGAC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGAC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGAC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGAC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGAC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGAC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGAC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGAC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.1.fastq
new file mode 100644
index 0000000..3b0d15e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1316:2126 1:N:0:CAATAGTC
+NAAAAAAAAAAAAAAAAAAAAAAAA
++
+#1BDFFFFHHHHHJJJJFDDDDDDD
+ at machine1:HiMom:abcdeACXX:1:1101:1399:2128 1:N:0:CAATAGTC
+NTGCCCTTCGTCCTGGGAAACGGGG
++
+#1BDFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1054:2151 1:N:0:CAATAGTC
+NTAGTGCTGGGCACTAAGTAATACC
++
+#4=DDDFFHHHHHJJJJJHIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1345:2181 1:N:0:CAATAGTC
+GGATAATCCTATTTATTACCTCAGA
++
+BBBDDFFFHHHHHJJJJJJJJJIJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2184 1:N:0:CAATAGTC
+TTTCAGATTGGTCATTGTTAGTGTA
++
+??@BDDDEHBHADHHIIEHDHFHFF
+ at machine1:HiMom:abcdeACXX:1:2101:1172:2152 1:Y:0:CAATAGTC
+AACACGGACAAAGGAGTCTAACACG
++
+<<<??8@@#################
+ at machine1:HiMom:abcdeACXX:1:2101:1491:2093 1:N:0:CAATAGTC
+NCTATGCCGATCGGGTGTCCGCACT
++
+#1=DDDDDHHFHHIIEHHHBGHGII
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.2.fastq
new file mode 100644
index 0000000..07d86b6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1316:2126 2:N:0:CAATAGTC
+TTTTTTTTTTTTTTTTTTTTT
++
+FFFFHHHHHJJJJHFDDDDDD
+ at machine1:HiMom:abcdeACXX:1:1101:1399:2128 2:N:0:CAATAGTC
+ACCCTTGTGTCGAGGGCTGAC
++
+FFFFHHHHHIJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1054:2151 2:N:0:CAATAGTC
+GGCACTGAGAATATATGGGTG
++
+FFFFHHHHHJJJJJJJJJJEG
+ at machine1:HiMom:abcdeACXX:1:1201:1345:2181 2:N:0:CAATAGTC
+GGATGTGTTTAGGAGTGGGAC
++
+FFFFHHHHHIIJJHJFHIJIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2184 2:N:0:CAATAGTC
+TTATTCATTTGTATGATCTTA
++
+FFFFHFFHFHIHIIJIJJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1172:2152 2:Y:0:CAATAGTC
+TTTCTGGGGACTAGTGAGGCG
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:2101:1491:2093 2:N:0:CAATAGTC
+ACGGGGTCTCGCTATGTTGCC
++
+FFFFHHHHHJIIJJJJIJIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.barcode_1.fastq
new file mode 100644
index 0000000..1b351d4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1316:2126 :N:0:CAATAGTC
+CAATAGAC
++
+1>>7A###
+ at machine1:HiMom:abcdeACXX:1:1101:1399:2128 :N:0:CAATAGTC
+CAATAGTC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1054:2151 :N:0:CAATAGTC
+CAATAGTC
++
+CCCFFFDF
+ at machine1:HiMom:abcdeACXX:1:1201:1345:2181 :N:0:CAATAGTC
+CAATAGTC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2184 :N:0:CAATAGTC
+CAATAGTC
++
+ at CCFFDDE
+ at machine1:HiMom:abcdeACXX:1:2101:1172:2152 :Y:0:CAATAGTC
+CAATAGTC
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1491:2093 :N:0:CAATAGTC
+CAATAGTC
++
+@@@FDEBD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.index_1.fastq
new file mode 100644
index 0000000..2cb3531
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAATAGTC.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1316:2126 :N:0:CAATAGTC
+TCTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1399:2128 :N:0:CAATAGTC
+ACAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1054:2151 :N:0:CAATAGTC
+GTCA
++
+CBCF
+ at machine1:HiMom:abcdeACXX:1:1201:1345:2181 :N:0:CAATAGTC
+ATAC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2184 :N:0:CAATAGTC
+ATCT
++
+@@BF
+ at machine1:HiMom:abcdeACXX:1:2101:1172:2152 :Y:0:CAATAGTC
+ATCG
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1491:2093 :N:0:CAATAGTC
+AGAG
++
+BCCD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGAT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGAT.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGAT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGAT.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGAT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGAT.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGAT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGAT.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.1.fastq
new file mode 100644
index 0000000..9053f51
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1420:2213 1:N:0:CAGCGGTA
+TACCTGGTTGATCCTGCCAGTAGCA
++
+@@CFFFFDDHHGHJGGHIJJIHGBH
+ at machine1:HiMom:abcdeACXX:1:1201:1364:2113 1:N:0:CAGCGGTA
+NCACTCATTTTCTTATGTGGGATAT
++
+#1=DDFDFHHHHHIJJIFHIIHHHI
+ at machine1:HiMom:abcdeACXX:1:2101:1072:2170 1:N:0:CAGCGGTA
+ATCACCGCACTCATTTCCCGCTTCC
++
+CCCFFFFFHHHACEEGHIIBHIIII
+ at machine1:HiMom:abcdeACXX:1:2101:1123:2095 1:N:0:CAGCGGTA
+NTGGACAACATGTTCGAGAGCTACA
++
+#1=BBDDDFFFFDGFGIG?F;HHFI
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2236 1:Y:0:CAGCGGTA
+TTAAAGAGGTTCAGGGATGCAGAGT
++
+#########################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.2.fastq
new file mode 100644
index 0000000..ee8578e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1420:2213 2:N:0:CAGCGGTA
+CTGTACCGGCCGTGCGTACTT
++
+FFFDHHHFGIJJJJJJGHIGG
+ at machine1:HiMom:abcdeACXX:1:1201:1364:2113 2:N:0:CAGCGGTA
+GAGAGCCAGTGGAGTTACGAC
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:2101:1072:2170 2:N:0:CAGCGGTA
+GAGACAGAGAGGATCAGAAGT
++
+DDFDHHDFHEGFEGGIJIIIG
+ at machine1:HiMom:abcdeACXX:1:2101:1123:2095 2:N:0:CAGCGGTA
+CCTCCAGCTTCAGCTTCTCCT
++
+DDFFHHHHHJHGGJIJJJEHH
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2236 2:Y:0:CAGCGGTA
+AAGCCTCTTTATCCTTGGCAT
++
+#####################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.barcode_1.fastq
new file mode 100644
index 0000000..b9b5bba
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1420:2213 :N:0:CAGCGGTA
+CAGCGGTA
++
+ at C@FFFDF
+ at machine1:HiMom:abcdeACXX:1:1201:1364:2113 :N:0:CAGCGGTA
+CAGCGGTA
++
+C at CFFF@D
+ at machine1:HiMom:abcdeACXX:1:2101:1072:2170 :N:0:CAGCGGTA
+CAGCGGTA
++
+B@@DFDDF
+ at machine1:HiMom:abcdeACXX:1:2101:1123:2095 :N:0:CAGCGGTA
+CAGCGGTA
++
+@?@DDF@@
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2236 :Y:0:CAGCGGTA
+TAGCGGTA
++
+########
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.index_1.fastq
new file mode 100644
index 0000000..7dbce20
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CAGCGGTA.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1420:2213 :N:0:CAGCGGTA
+TTCA
++
+ at CCF
+ at machine1:HiMom:abcdeACXX:1:1201:1364:2113 :N:0:CAGCGGTA
+TAAA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1072:2170 :N:0:CAGCGGTA
+NGGG
++
+#4=B
+ at machine1:HiMom:abcdeACXX:1:2101:1123:2095 :N:0:CAGCGGTA
+TCCG
++
+@@@F
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2236 :Y:0:CAGCGGTA
+TTTG
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.1.fastq
new file mode 100644
index 0000000..c27e9fd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1083:2193 1:N:0:CCAACATT
+TTCTACCTCACCTTAGGGAGAAGAC
++
+@@@DDBDDD>F><C<4CG?EHGHIG
+ at machine1:HiMom:abcdeACXX:1:1101:1175:2197 1:N:0:CCAACATT
+CCCCTGAGGACACCATCCCACTCCA
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1138:2227 1:Y:0:CCAACATT
+GCTGACACAATCTCTTCCGCCTGGT
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:1201:1260:2165 1:N:0:CCAACATT
+GGACACGGACAGGATTGACAGATTG
++
+BCBFFFFFHHHHHHIIJHIIIFHIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1281:2133 1:N:0:CCAACATT
+NGGAAATCCAGAAAACATAGAAGAT
++
+#1=DDFFFHHHHHIJJJJJJJJIJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1331:2162 1:N:0:CCAACATT
+ACGCTCGGCTAATTTTTGTATTTTT
++
+ at CCFFFDFHHHHHIJJJJHIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1186:2093 1:N:0:CCAACATT
+NCGACCATAAACGATGCCGACCGGC
++
+#4=DFFFFHHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.2.fastq
new file mode 100644
index 0000000..5f8d321
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1083:2193 2:N:0:CCAACATT
+TNNNNNNNNNNNNNNNNNNNN
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1101:1175:2197 2:N:0:CCAACATT
+GCTGGGGAACATCCAGAAAGG
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1138:2227 2:Y:0:CCAACATT
+AATATAGGAAATAGAAGCTAT
++
+=AAA,2?4>7C<<4<A+3<AB
+ at machine1:HiMom:abcdeACXX:1:1201:1260:2165 2:N:0:CCAACATT
+GATCTAAGTTGGGGGACGCCG
++
+FDFFHHHHHJJJIJIIIGIJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1281:2133 2:N:0:CCAACATT
+CAAAATTTCATATGACTTAGC
++
+FFFFHHHHHJJIIIHICHIIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1331:2162 2:N:0:CCAACATT
+CCCAGTACTTTGGGAGGCCAA
++
+FFFFHHHHHJJJJIJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1186:2093 2:N:0:CCAACATT
+TTGGGAGGACAATGATGGAAA
++
+#####################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.barcode_1.fastq
new file mode 100644
index 0000000..569551b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1083:2193 :N:0:CCAACATT
+CCAACATT
++
+?@;DD?BD
+ at machine1:HiMom:abcdeACXX:1:1101:1175:2197 :N:0:CCAACATT
+CCAACATT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1138:2227 :Y:0:CCAACATT
+CCAACATT
++
+########
+ at machine1:HiMom:abcdeACXX:1:1201:1260:2165 :N:0:CCAACATT
+CCAACATT
++
+C at CFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1281:2133 :N:0:CCAACATT
+CCAACATT
++
+C at CFFFDF
+ at machine1:HiMom:abcdeACXX:1:1201:1331:2162 :N:0:CCAACATT
+CCAACATT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1186:2093 :N:0:CCAACATT
+CCAACATT
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.index_1.fastq
new file mode 100644
index 0000000..48995ed
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAACATT.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1083:2193 :N:0:CCAACATT
+AGGC
++
+19;3
+ at machine1:HiMom:abcdeACXX:1:1101:1175:2197 :N:0:CCAACATT
+AAGA
++
+BC at F
+ at machine1:HiMom:abcdeACXX:1:1201:1138:2227 :Y:0:CCAACATT
+GACA
++
+=1=A
+ at machine1:HiMom:abcdeACXX:1:1201:1260:2165 :N:0:CCAACATT
+ATCT
++
+@@@F
+ at machine1:HiMom:abcdeACXX:1:1201:1281:2133 :N:0:CCAACATT
+GCAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1331:2162 :N:0:CCAACATT
+TAAT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1186:2093 :N:0:CCAACATT
+AATG
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.1.fastq
new file mode 100644
index 0000000..c070e32
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1212:2230 1:N:0:CCAGCACC
+TTTCTATTAGCTCTTAGTAAGATTA
++
+CCCFFFFFHHHHHJJJIJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1204:2228 1:N:0:CCAGCACC
+CCGATACGCTGAGTGTGGTTTGCGG
++
+CCCFFFFFHHHFHEGGHIHIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1100:2085 1:N:0:CCAGCACC
+NCACATGGATGAGGAGAATGAGGAT
++
+#1=DDFFFFHHHHJHIGIHHHIJEH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.2.fastq
new file mode 100644
index 0000000..77fa2ab
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1212:2230 2:N:0:CCAGCACC
+AGCTTTATTGGGGAGGGGGTG
++
+FFFFHHGHHJJJJGJJJJJDF
+ at machine1:HiMom:abcdeACXX:1:1201:1204:2228 2:N:0:CCAGCACC
+CTTGTCGATGAGGAACTTGGT
++
+FFFFDHHGHJIJJGHIIJJJH
+ at machine1:HiMom:abcdeACXX:1:2101:1100:2085 2:N:0:CCAGCACC
+TGATCTCCTCCTTCTTGGCCT
++
+DDDDHHFHFEIIIIHHBAHBG
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.barcode_1.fastq
new file mode 100644
index 0000000..07b0fae
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1212:2230 :N:0:CCAGCACC
+CCAGCACC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1204:2228 :N:0:CCAGCACC
+CCAGCACC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1100:2085 :N:0:CCAGCACC
+CCAGCACC
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.index_1.fastq
new file mode 100644
index 0000000..aca921b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCAGCACC.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1212:2230 :N:0:CCAGCACC
+TTTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1204:2228 :N:0:CCAGCACC
+TCTT
++
+@?@F
+ at machine1:HiMom:abcdeACXX:1:2101:1100:2085 :N:0:CCAGCACC
+ATCT
++
+@@@D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCATGCGT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCATGCGT.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCATGCGT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCATGCGT.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCATGCGT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCATGCGT.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCATGCGT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CCATGCGT.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.1.fastq
new file mode 100644
index 0000000..ab2fd21
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1122:2227 1:N:0:CGCCTTCC
+AGAAGACGAGGCTGAGAGTGACATC
++
+@@@FFFFFHHHDHJGHGHCHHJJIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1160:2109 1:N:0:CGCCTTCC
+NAGAAGCCTTTGCACCCTGGGAGGA
++
+#1=DDDFFHHHHHJJJJJJJJIIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.2.fastq
new file mode 100644
index 0000000..752129c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1122:2227 2:N:0:CGCCTTCC
+TATAAGGCCCAGTCCAAGGAA
++
+FFFFHHHGGIJIGGIJFIJII
+ at machine1:HiMom:abcdeACXX:1:1201:1160:2109 2:N:0:CGCCTTCC
+CCTTCCCATGCCACCAACTCG
++
+FFFFGHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.barcode_1.fastq
new file mode 100644
index 0000000..4c987f7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1122:2227 :N:0:CGCCTTCC
+CGCCTTCC
++
+@@@DDFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1160:2109 :N:0:CGCCTTCC
+CGCCTTCC
++
+C at BFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.index_1.fastq
new file mode 100644
index 0000000..59a8d15
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCCTTCC.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1122:2227 :N:0:CGCCTTCC
+GTCA
++
+@@@F
+ at machine1:HiMom:abcdeACXX:1:1201:1160:2109 :N:0:CGCCTTCC
+ACAT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.1.fastq
new file mode 100644
index 0000000..22dfb08
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1291:2150 1:N:0:CGCTATGT
+CGTGGGGAACCTGGCGCTAAACCAT
++
+ at BBFFFFFHHHHHJJJJIJJJJJIJ
+ at machine1:HiMom:abcdeACXX:1:1101:1314:2233 1:N:0:CGCTATGT
+GTTTATTGGGGCATTCCTTATCCCA
++
+@??DDDDBDHF>FCHGGGBFAAED9
+ at machine1:HiMom:abcdeACXX:1:1101:1441:2148 1:N:0:CGCTATGT
+ACTTTCACCGCTACACGACCGGGGG
++
+CCCFFFFFHGFFHIIFIHJIGGII>
+ at machine1:HiMom:abcdeACXX:1:1201:1043:2246 1:N:0:CGCTATGT
+NTTCTCGGCTGTCATGTGCAACATT
++
+#1=DDBDFHHHDFFBHGHGHIIJEH
+ at machine1:HiMom:abcdeACXX:1:1201:1134:2144 1:N:0:CGCTATGT
+TGCCAGGAAGTGTTTTTTCTGGGTC
++
+ at CCFFEFFHHFFFGIJJJJJJJJGH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.2.fastq
new file mode 100644
index 0000000..e437809
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1291:2150 2:N:0:CGCTATGT
+ACCCTTGTGTCGAGGGCTGAC
++
+FFFFHHFHHIJJJIIIGIJIJ
+ at machine1:HiMom:abcdeACXX:1:1101:1314:2233 2:N:0:CGCTATGT
+AAGTTGGGCTGACCTGACAGA
++
+DD;=FBFADBCGDEH?F;FCG
+ at machine1:HiMom:abcdeACXX:1:1101:1441:2148 2:N:0:CGCTATGT
+GGCTCTAGAGGGGGTAGAGGG
++
+FFFFHHDFBHIIJJ1?FGHIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1043:2246 2:N:0:CGCTATGT
+TCATTTCNNGCTTCTCTCTGT
++
+@??@=@>##22=;@??><@??
+ at machine1:HiMom:abcdeACXX:1:1201:1134:2144 2:N:0:CGCTATGT
+TGAGTAATGGTTGAGAGGTGG
++
+DDFFFHHGHJHHGFIHHIFGI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.barcode_1.fastq
new file mode 100644
index 0000000..fbfd111
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1291:2150 :N:0:CGCTATGT
+CGCTATGT
++
+@@@FFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1314:2233 :N:0:CGCTATGT
+CGCTATGT
++
+@<@?B@;A
+ at machine1:HiMom:abcdeACXX:1:1101:1441:2148 :N:0:CGCTATGT
+CGCTATGT
++
+@@BFFDDD
+ at machine1:HiMom:abcdeACXX:1:1201:1043:2246 :N:0:CGCTATGT
+CGCTATGT
++
+@<?DD:B=
+ at machine1:HiMom:abcdeACXX:1:1201:1134:2144 :N:0:CGCTATGT
+CGCTATGT
++
+CCCFFFFD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.index_1.fastq
new file mode 100644
index 0000000..6b28e4b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CGCTATGT.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1291:2150 :N:0:CGCTATGT
+ACAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1314:2233 :N:0:CGCTATGT
+AGGA
++
+@@<D
+ at machine1:HiMom:abcdeACXX:1:1101:1441:2148 :N:0:CGCTATGT
+TTTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1043:2246 :N:0:CGCTATGT
+NGCA
++
+#0;@
+ at machine1:HiMom:abcdeACXX:1:1201:1134:2144 :N:0:CGCTATGT
+AGTG
++
+B@?D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.1.fastq
new file mode 100644
index 0000000..d0aacee
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1363:2138 1:N:0:CTAACTCG
+NGTCTGGCCTGCACAGACATCCTAC
++
+#1=DDFFFHHHHHJJJIJJIJJJIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1393:2143 1:N:0:CTAACTCG
+TGGTTGATCCTGCCAGTAGCATATG
++
+@@@ADADDFHFFDBHE?G at HIIIEE
+ at machine1:HiMom:abcdeACXX:1:2101:1273:2119 1:Y:0:CTAACTCG
+NAGATAAGAGTCCACACAGTTGAGT
++
+#11AAAAA<A?4=C=7?733<ACA3
+ at machine1:HiMom:abcdeACXX:1:2101:1414:2098 1:N:0:CTAACTCG
+NAGGACATCGATAAAGGCGAGGTGT
++
+#1=DDFFFHHHHHJJJJJJJJJHHG
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.2.fastq
new file mode 100644
index 0000000..8218978
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1363:2138 2:N:0:CTAACTCG
+TTAAACCTGTTAGAACTTCTG
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1393:2143 2:N:0:CTAACTCG
+AATGCACGCATCCCCCCCGCG
++
+FFFFGGHHHHJJJJJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1273:2119 2:Y:0:CTAACTCG
+TGGATCTTCTCTAACTTGTCA
++
+AAAAA+2AA?CB4@@ABB3?A
+ at machine1:HiMom:abcdeACXX:1:2101:1414:2098 2:N:0:CTAACTCG
+GGCCGGTGCCGTCGGGCCCAA
++
+FFFFHHHHGJJIJJJJJJJIJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.barcode_1.fastq
new file mode 100644
index 0000000..748b2b5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1363:2138 :N:0:CTAACTCG
+CTAACTCG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1393:2143 :N:0:CTAACTCG
+CTAACTCG
++
+@@CFDDFD
+ at machine1:HiMom:abcdeACXX:1:2101:1273:2119 :Y:0:CTAACTCG
+CTAACTCG
++
+=++==ADB
+ at machine1:HiMom:abcdeACXX:1:2101:1414:2098 :N:0:CTAACTCG
+CTAACTCG
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.index_1.fastq
new file mode 100644
index 0000000..3fd8771
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTAACTCG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1363:2138 :N:0:CTAACTCG
+GTTC
++
+C@@F
+ at machine1:HiMom:abcdeACXX:1:1201:1393:2143 :N:0:CTAACTCG
+GATA
++
+C at CF
+ at machine1:HiMom:abcdeACXX:1:2101:1273:2119 :Y:0:CTAACTCG
+ATGA
++
+>=><
+ at machine1:HiMom:abcdeACXX:1:2101:1414:2098 :N:0:CTAACTCG
+TTGG
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.1.fastq
new file mode 100644
index 0000000..4ca668c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1083:2121 1:N:0:CTATGCGT
+NAGAACTGGCGCTGCGGGATGAACC
++
+#1=BDFFFHHHHHJJJJJHIJIJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1185:2143 1:N:0:CTATGCGT
+ATCTGCCTGGTTCGGCCCGCCTGCC
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1219:2115 1:N:0:CTATGCGT
+NTATAGTGGAGGCCGGAGCAGGAAC
++
+#1:DABADHHHFHIIIGGHGIIIII
+ at machine1:HiMom:abcdeACXX:1:1201:1472:2121 1:Y:0:CTATGCGT
+NTAAAGTGTGAACAAGGAAGGTCAT
++
+#07>@<9=@################
+ at machine1:HiMom:abcdeACXX:1:2101:1013:2146 1:N:0:CTATGCGT
+NACACTGCTGCAGATGACAAGCAGC
++
+#4BDFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1231:2208 1:N:0:CTATGCGT
+ACGCCGCAAGTCAGAGCCCCCCAGA
++
+@@@DDDFFFFB:DBBEBEFDHBDDB
+ at machine1:HiMom:abcdeACXX:1:2101:1233:2133 1:Y:0:CTATGCGT
+GAGAGAAGCACTCTTGAGCGGGATA
++
+0;(@((@)2@###############
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.2.fastq
new file mode 100644
index 0000000..7fbde82
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1083:2121 2:N:0:CTATGCGT
+ACAACACCACCGCCCTCCCCC
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1201:1185:2143 2:N:0:CTATGCGT
+AAGGCCCGTGGGCCAGAGGTG
++
+FFFFHHHHHJJJJJJJJJJHI
+ at machine1:HiMom:abcdeACXX:1:1201:1219:2115 2:N:0:CTATGCGT
+AGTAGTTCCCTGCTAAGGGAG
++
+BDBDADDDDIEID:AFFD:?8
+ at machine1:HiMom:abcdeACXX:1:1201:1472:2121 2:Y:0:CTATGCGT
+GCTCTTCCGATCTGGAGGATG
++
+?A4A==A at 7A<?#########
+ at machine1:HiMom:abcdeACXX:1:2101:1013:2146 2:N:0:CTATGCGT
+CGCTAGAACCAACTTATTCAT
++
+24=?@@?@?@@?@@@@@@?@@
+ at machine1:HiMom:abcdeACXX:1:2101:1231:2208 2:N:0:CTATGCGT
+AGTGTTGGTGTGTTGACTGTT
++
+ADABCF;BF<AACGCHEBHC<
+ at machine1:HiMom:abcdeACXX:1:2101:1233:2133 2:Y:0:CTATGCGT
+TTTTTTTTTTTTTTTTTTTTT
++
+FFFFGHHHHJJJFDDDDDDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.barcode_1.fastq
new file mode 100644
index 0000000..e684e0e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1083:2121 :N:0:CTATGCGT
+CTATGCGT
++
+CCCFFFFD
+ at machine1:HiMom:abcdeACXX:1:1201:1185:2143 :N:0:CTATGCGT
+CTATGCGT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1219:2115 :N:0:CTATGCGT
+CCATGCGT
++
+??<DDA?D
+ at machine1:HiMom:abcdeACXX:1:1201:1472:2121 :Y:0:CTATGCGT
+CTATGCGC
++
+;?=D####
+ at machine1:HiMom:abcdeACXX:1:2101:1013:2146 :N:0:CTATGCGT
+CTATGCGT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1231:2208 :N:0:CTATGCGT
+CTATGCGT
++
+@<@?D8 at D
+ at machine1:HiMom:abcdeACXX:1:2101:1233:2133 :Y:0:CTATGCGT
+CTATGCGT
++
+=??B####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.index_1.fastq
new file mode 100644
index 0000000..6675abc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTATGCGT.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1083:2121 :N:0:CTATGCGT
+ACAC
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1185:2143 :N:0:CTATGCGT
+GCTG
++
+ at CCF
+ at machine1:HiMom:abcdeACXX:1:1201:1219:2115 :N:0:CTATGCGT
+TGGG
++
+???D
+ at machine1:HiMom:abcdeACXX:1:1201:1472:2121 :Y:0:CTATGCGT
+GTGT
++
+=+=?
+ at machine1:HiMom:abcdeACXX:1:2101:1013:2146 :N:0:CTATGCGT
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1231:2208 :N:0:CTATGCGT
+AGCC
++
+@@;1
+ at machine1:HiMom:abcdeACXX:1:2101:1233:2133 :Y:0:CTATGCGT
+TTTT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.1.fastq
new file mode 100644
index 0000000..4a7196d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1102:2221 1:N:0:CTGCGGAT
+TTTCATCTTATTTCATTGGTTTATA
++
+CCCFFFFFHHHHHJIJJJJIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1126:2082 1:N:0:CTGCGGAT
+NGTTTTAGGGGTGCGCAGGAGTCAA
++
+#11=A=DD?DF at D@CCGHIEFH at BG
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2172 1:N:0:CTGCGGAT
+TTTCTTCGCAGGATTTTTCTGAGCC
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.2.fastq
new file mode 100644
index 0000000..738c4f7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1102:2221 2:N:0:CTGCGGAT
+CTGACTCTACTCAGTAGATTA
++
+FFFFHHHHHJJJJJIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1126:2082 2:N:0:CTGCGGAT
+TTTCCACCTTGGTCACCTTCC
++
+DDFFHHHHHJEGGIHHIJGIH
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2172 2:N:0:CTGCGGAT
+TTCTAGGGGATTTAGCGGGGT
++
+FFFFHHHHHJJJJJJJJJJJD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.barcode_1.fastq
new file mode 100644
index 0000000..92dcd7d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1102:2221 :N:0:CTGCGGAT
+CTGCGGAT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1126:2082 :N:0:CTGCGGAT
+CTGCGGAT
++
+@@@FFFDA
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2172 :N:0:CTGCGGAT
+CAGCGGAT
++
+C at CFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.index_1.fastq
new file mode 100644
index 0000000..06a63f7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGCGGAT.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1102:2221 :N:0:CTGCGGAT
+ATAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1126:2082 :N:0:CTGCGGAT
+TCTC
++
+ at C@D
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2172 :N:0:CTGCGGAT
+GGAC
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.1.fastq
new file mode 100644
index 0000000..243a935
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.1.fastq
@@ -0,0 +1,24 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1403:2194 1:N:0:CTGTAATC
+CTAAACAGAGAGAAGGTTTCTCTTT
++
+CCCFFFFFHHHHHJJJFHIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1045:2105 1:Y:0:CTGTAATC
+NTAAAGAGAAATCAAGAATACTATT
++
+#-4@?(@)@@###############
+ at machine1:HiMom:abcdeACXX:1:1201:1483:2126 1:Y:0:CTGTAATC
+NTGATAAGGTGTTGCTATGTTACCC
++
+#1:D?DDDDA??2:<CC4:AEDF>?
+ at machine1:HiMom:abcdeACXX:1:2101:1011:2102 1:N:0:CTGTAATC
+NAAACAAAACTGTAGAACTGTGTAT
++
+#1=DDFFFHHHHHJJIJJJIHHHJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1245:2154 1:N:0:CTGTAATC
+TCGTTAAGTATATTCTTAGGTATTT
++
+CCCFFDFFFHFHHIIJJJJJFJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1386:2105 1:N:0:CTGTAATC
+NTACTAAAGAAAAAGTTGAAGAACT
++
+#1=DDDFFHHHHHJJGHIJJJJIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.2.fastq
new file mode 100644
index 0000000..37d5eff
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.2.fastq
@@ -0,0 +1,24 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1403:2194 2:N:0:CTGTAATC
+GGTGAAACCCTGTCTCTACTA
++
+FFDDHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1045:2105 2:Y:0:CTGTAATC
+TTTTTTTNNTTTTTTTTTTTT
++
+@@@@@@?##0:????????=<
+ at machine1:HiMom:abcdeACXX:1:1201:1483:2126 2:Y:0:CTGTAATC
+GCAGCTGGGTGCTGTGATGCA
++
+DDBB<DD8F<<CGG?AA?A<F
+ at machine1:HiMom:abcdeACXX:1:2101:1011:2102 2:N:0:CTGTAATC
+NTCACACATAATTTTAAAATT
++
+#22@?@@??@@@@@??@@@@@
+ at machine1:HiMom:abcdeACXX:1:2101:1245:2154 2:N:0:CTGTAATC
+ATCAGTAGCACCACTATACAC
++
+FFFFHHHHHJJJJJJIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1386:2105 2:N:0:CTGTAATC
+ATTATTCTTCTGCCATAAGGT
++
+DFFFHGFHHIJJJJJGIGIJH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.barcode_1.fastq
new file mode 100644
index 0000000..152bcc2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.barcode_1.fastq
@@ -0,0 +1,24 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1403:2194 :N:0:CTGTAATC
+CTGTAATC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1045:2105 :Y:0:CTGTAATC
+CTGTAATC
++
+1112 at A##
+ at machine1:HiMom:abcdeACXX:1:1201:1483:2126 :Y:0:CTGTAATC
+CTGTAATC
++
+ at C<DD:B?
+ at machine1:HiMom:abcdeACXX:1:2101:1011:2102 :N:0:CTGTAATC
+CTGTAATC
++
+C at CFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1245:2154 :N:0:CTGTAATC
+CTGTAATC
++
+ at CCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1386:2105 :N:0:CTGTAATC
+CTGTAATC
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.index_1.fastq
new file mode 100644
index 0000000..606424d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/CTGTAATC.index_1.fastq
@@ -0,0 +1,24 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1403:2194 :N:0:CTGTAATC
+ACAT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1045:2105 :Y:0:CTGTAATC
+NTTT
++
+#0;@
+ at machine1:HiMom:abcdeACXX:1:1201:1483:2126 :Y:0:CTGTAATC
+GCAT
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1011:2102 :N:0:CTGTAATC
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1245:2154 :N:0:CTGTAATC
+ACCA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1386:2105 :N:0:CTGTAATC
+AGGA
++
+B@@D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAAAAAA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAAAAAA.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAAAAAA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAAAAAA.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAAAAAA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAAAAAA.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAAAAAA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAAAAAA.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAACGAT..1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAACGAT..1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAACGAT..2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAACGAT..2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAACGAT..barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAACGAT..barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAACGAT..index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAACGAT..index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.1.fastq
new file mode 100644
index 0000000..b55be00
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1338:2175 1:N:0:GAAGGAAG
+CCCACCTTCCGGCGGCCGAAGACAC
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1028:2202 1:N:0:GAAGGAAG
+NTCCTGGGAAACGGGGCGCGGCTGG
++
+#4BDDDFFHHHHHIJIIJJJJJJIJ
+ at machine1:HiMom:abcdeACXX:1:2101:1084:2188 1:N:0:GAAGGAAG
+TTGCTGCATGGGTTAATTGAGAATA
++
+CCCFFFFFHHHHFHHIIJJIJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.2.fastq
new file mode 100644
index 0000000..b767a87
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1338:2175 2:N:0:GAAGGAAG
+GTTGGCTTTAACATCCACAAT
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1028:2202 2:N:0:GAAGGAAG
+ACNCNTNNNNNNNGGNNTGNN
++
+@?###################
+ at machine1:HiMom:abcdeACXX:1:2101:1084:2188 2:N:0:GAAGGAAG
+AGGTCAAAATCAGCAACAAGT
++
+FFFDHHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.barcode_1.fastq
new file mode 100644
index 0000000..f38597a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1338:2175 :N:0:GAAGGAAG
+GAAGGAAG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1028:2202 :N:0:GAAGGAAG
+GAAGGAAG
++
+CCCFFDFF
+ at machine1:HiMom:abcdeACXX:1:2101:1084:2188 :N:0:GAAGGAAG
+GAAGGAAG
++
+ at B@FFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.index_1.fastq
new file mode 100644
index 0000000..f7afdfc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GAAGGAAG.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1338:2175 :N:0:GAAGGAAG
+GCTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1028:2202 :N:0:GAAGGAAG
+NNAA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1084:2188 :N:0:GAAGGAAG
+TACA
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.1.fastq
new file mode 100644
index 0000000..41fcefa
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1089:2172 1:N:0:GACCAGGA
+TTCCAGCATGCGGTTTAAGTAGGAT
++
+ at CCFDFDBDFBF:<CEBHAFHHICH
+ at machine1:HiMom:abcdeACXX:1:1101:1347:2149 1:N:0:GACCAGGA
+GAGCAGATCGGAAGAGCACAGATCG
++
+@@@FFDDDHHHHHIJJBGGHJIHEG
+ at machine1:HiMom:abcdeACXX:1:1201:1095:2146 1:N:0:GACCAGGA
+GCTGAGTCATGTAGTAAGCCTGTGC
++
+BB at FDDDFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1123:2161 1:Y:0:GACCAGGA
+CACTAACTCCTGACCTCAAATAATC
++
+?7?=DD?DD+CDBE>E at EEF@+<CF
+ at machine1:HiMom:abcdeACXX:1:1201:1439:2156 1:N:0:GACCAGGA
+AGCCGCGAGGTGCTGGCGGACTTCC
++
+:;1BDDDAA88A<?<E1C:D#####
+ at machine1:HiMom:abcdeACXX:1:2101:1207:2084 1:Y:0:GACCAGGA
+NTAGATGACCAAAACTTGCAGGGCA
++
+#1:A<?@A+7A=?CBCCBCCBAAAA
+ at machine1:HiMom:abcdeACXX:1:2101:1312:2105 1:N:0:GACCAGGA
+NTTCCCTCAGGATAGCTGGCGCTCT
++
+#1=DDFFFGHGHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.2.fastq
new file mode 100644
index 0000000..4dc59b5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1089:2172 2:N:0:GACCAGGA
+GNNNNNNNNNNNNNNNNNNNN
++
+?####################
+ at machine1:HiMom:abcdeACXX:1:1101:1347:2149 2:N:0:GACCAGGA
+TTCCGATCTGTGCTCTTCCGA
++
+FFFFDFHHFIJDGIGGHGIGH
+ at machine1:HiMom:abcdeACXX:1:1201:1095:2146 2:N:0:GACCAGGA
+ACAACACCAAATGCTGCTAAG
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1123:2161 2:Y:0:GACCAGGA
+TGCTCTTCCGATCTGCATACA
++
+AAA8AAAA<AAA)@CBA9>A#
+ at machine1:HiMom:abcdeACXX:1:1201:1439:2156 2:N:0:GACCAGGA
+ATTATTTGCCTTGAAGTAAGC
++
+2<>>@>8@>8;@#########
+ at machine1:HiMom:abcdeACXX:1:2101:1207:2084 2:Y:0:GACCAGGA
+CACTCTTCTGGGCATCCCCTG
++
+DEDFHHHHHIJIHHGHGGJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1312:2105 2:N:0:GACCAGGA
+AGAATAGGTTGAGATCGTTTC
++
+FFDFHHFHDHIJJJJJJJIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.barcode_1.fastq
new file mode 100644
index 0000000..3a43c99
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1089:2172 :N:0:GACCAGGA
+GACCAGGA
++
+?@@FF;=B
+ at machine1:HiMom:abcdeACXX:1:1101:1347:2149 :N:0:GACCAGGA
+GACCAGGA
++
+CC at DFFFD
+ at machine1:HiMom:abcdeACXX:1:1201:1095:2146 :N:0:GACCAGGA
+GACCAGGA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1123:2161 :Y:0:GACCAGGA
+GACCAGGA
++
+?;@DFDFF
+ at machine1:HiMom:abcdeACXX:1:1201:1439:2156 :N:0:GACCAGGA
+GACCAGGC
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1207:2084 :Y:0:GACCAGGA
+GACCAGGA
++
+@@CDFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1312:2105 :N:0:GACCAGGA
+GACCAGGA
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.index_1.fastq
new file mode 100644
index 0000000..c3347b2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGA.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1089:2172 :N:0:GACCAGGA
+TCCG
++
+:<<?
+ at machine1:HiMom:abcdeACXX:1:1101:1347:2149 :N:0:GACCAGGA
+GCTC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1095:2146 :N:0:GACCAGGA
+ACTG
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1123:2161 :Y:0:GACCAGGA
+CGTG
++
+===A
+ at machine1:HiMom:abcdeACXX:1:1201:1439:2156 :N:0:GACCAGGA
+GGAG
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1207:2084 :Y:0:GACCAGGA
+TCAC
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1312:2105 :N:0:GACCAGGA
+GTTG
++
+ at CCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCAGGC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.1.fastq
new file mode 100644
index 0000000..2e2beb8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1218:2200 1:N:0:GACCGTTG
+GCACCGGAAGAGCACACAGATCGGA
++
+CCCFFFFDFHGHHJJIJIJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:1101:1257:2223 1:N:0:GACCGTTG
+TGTATTCGAGAGATCAAAGAGAGAG
++
+@@=DDBDD?FFHHEIDBDFCEDBAF
+ at machine1:HiMom:abcdeACXX:1:1201:1180:2119 1:N:0:GACCGTTG
+NTGAAAGATTTAGAGAGCTTACAAA
++
+#1=DDDDDHHHGHJJIIJJJJIJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1036:2087 1:N:0:GACCGTTG
+NTGTAGTTTCTTTAGGCAAATTTGT
++
+#4=BDDDFHHHHHJJJJJJIIJJJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.2.fastq
new file mode 100644
index 0000000..8105399
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1218:2200 2:N:0:GACCGTTG
+TTCCGATCTATCTGCTCGTCC
++
+34???3;@#############
+ at machine1:HiMom:abcdeACXX:1:1101:1257:2223 2:N:0:GACCGTTG
+CTTCCGATCTTTTAGCAAAGC
++
+DBDDHFFHDGIGIIJJJGGGI
+ at machine1:HiMom:abcdeACXX:1:1201:1180:2119 2:N:0:GACCGTTG
+TAAATTTTGCTTTTCTACAGC
++
+FFFFHHHHHJJJJIJIJJIJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1036:2087 2:N:0:GACCGTTG
+CACTTACGAAGCAAATACTTT
++
+DFFFHHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.barcode_1.fastq
new file mode 100644
index 0000000..fb6bf3f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1218:2200 :N:0:GACCGTTG
+GACCGTTG
++
+ at CCFFDDF
+ at machine1:HiMom:abcdeACXX:1:1101:1257:2223 :N:0:GACCGTTG
+GACCGTTG
++
+;@@DD=DD
+ at machine1:HiMom:abcdeACXX:1:1201:1180:2119 :N:0:GACCGTTG
+GACCGTTG
++
+CCCFFDFF
+ at machine1:HiMom:abcdeACXX:1:2101:1036:2087 :N:0:GACCGTTG
+GACCGTTG
++
+B at CFFDFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.index_1.fastq
new file mode 100644
index 0000000..7ff093e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCGTTG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1218:2200 :N:0:GACCGTTG
+GCTC
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1257:2223 :N:0:GACCGTTG
+TGCT
++
+:?@D
+ at machine1:HiMom:abcdeACXX:1:1201:1180:2119 :N:0:GACCGTTG
+GCTC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1036:2087 :N:0:GACCGTTG
+NGTC
++
+#4=D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.1.fastq
new file mode 100644
index 0000000..1f6b7ec
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1302:2244 1:N:0:GACCTAAC
+GGAAAAGACGGAAAGGTTCTATCTC
++
+ at C@DFFFDFHHHHJIJHHIJJJJJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.2.fastq
new file mode 100644
index 0000000..4a9d8e5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.2.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1302:2244 2:N:0:GACCTAAC
+TACATATAACAAATGCAAAAA
++
+FFFFHHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.barcode_1.fastq
new file mode 100644
index 0000000..17c61fd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.barcode_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1302:2244 :N:0:GACCTAAC
+GACCTAAC
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.index_1.fastq
new file mode 100644
index 0000000..ad9c545
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GACCTAAC.index_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1302:2244 :N:0:GACCTAAC
+TGAA
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.1.fastq
new file mode 100644
index 0000000..eca4c3a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1460:2176 1:N:0:GATATCCA
+AGTCCAGGCTGAGCCCAGGGAAGAA
++
+CCCFFFFFHHHHGJIJJIJJHIJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1031:2163 1:N:0:GATATCCA
+NTTTCCATGGCCGTCACCTTTGGGT
++
+#4=DDFFFHHHHHJJJJJJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1226:2088 1:N:0:GATATCCA
+NGATCGGAAGAGCACACGTTTGACT
++
+#4=DAA=DDFHFHIIBFGHHIG>EG
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.2.fastq
new file mode 100644
index 0000000..3c70cd4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1460:2176 2:N:0:GATATCCA
+AAAAGACACAACAAGTCCAAC
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:2101:1031:2163 2:N:0:GATATCCA
+ATTTGTCACCACTAGCCACCA
++
+@?@@@@@@@@@@?@@@@@@@?
+ at machine1:HiMom:abcdeACXX:1:2101:1226:2088 2:N:0:GATATCCA
+TTCCGATCTAGGTAATAGCTA
++
+DFFFDCDDHFFFAFHDHIJGJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.barcode_1.fastq
new file mode 100644
index 0000000..d27976c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1460:2176 :N:0:GATATCCA
+GATATCCA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1031:2163 :N:0:GATATCCA
+GATATCCA
++
+B at BFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1226:2088 :N:0:GATATCCA
+GATATCCA
++
+@@@:DDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.index_1.fastq
new file mode 100644
index 0000000..d4f21d5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GATATCCA.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1460:2176 :N:0:GATATCCA
+AGGA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1031:2163 :N:0:GATATCCA
+NNAC
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1226:2088 :N:0:GATATCCA
+GCTC
++
+==?B
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.1.fastq
new file mode 100644
index 0000000..3b50e29
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1111:2148 1:N:0:GCCGTCGA
+GTGGAGACCACCTCCGAGGCCTTGT
++
+BBCFFFFFHHHHHJJJIJJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:1101:1221:2143 1:N:0:GCCGTCGA
+TTTGGTGGAAATTTTTTGTTATGAT
++
+CCCFFBDBHFD?FBFHIIGGIC at EF
+ at machine1:HiMom:abcdeACXX:1:1101:1327:2200 1:Y:0:GCCGTCGA
+AGGGGGATCCGCCGGGGGACCACAA
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:2101:1122:2136 1:N:0:GCCGTCGA
+GTAGGCGCTCAGCAAATACTTGTCG
++
+@@@DDDD8?<CACEHHBBHDAAFH@
+ at machine1:HiMom:abcdeACXX:1:2101:1459:2083 1:N:0:GCCGTCGA
+NCACACGCCACACGGAGCACACTTT
++
+#4=DDFFFHHHHHJJJJJJJJIIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.2.fastq
new file mode 100644
index 0000000..74c4477
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1111:2148 2:N:0:GCCGTCGA
+ANANNNNNNNNNNGGACGACN
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1101:1221:2143 2:N:0:GCCGTCGA
+TGAATGTCTGCACAGCCGCTT
++
+FFFDHHHHHJJJIIIJGHIJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1327:2200 2:Y:0:GCCGTCGA
+TCTGGGCTGTCGACAGGTGTC
++
+FFFFHHHHGIJJJJJJIFHHI
+ at machine1:HiMom:abcdeACXX:1:2101:1122:2136 2:N:0:GCCGTCGA
+CCAGCCTGCAGGCCCCGCGGC
++
+BAABDD?DDIID)A:3<EADD
+ at machine1:HiMom:abcdeACXX:1:2101:1459:2083 2:N:0:GCCGTCGA
+CACCAAAATAATCAGAAGGCC
++
+FFFDBHGHHIGGIJFJJGGFH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.barcode_1.fastq
new file mode 100644
index 0000000..9b4fd3a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1111:2148 :N:0:GCCGTCGA
+GCCGTCGA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1221:2143 :N:0:GCCGTCGA
+GCCGTCGA
++
+@@CDDDDF
+ at machine1:HiMom:abcdeACXX:1:1101:1327:2200 :Y:0:GCCGTCGA
+GCCGTCGA
++
+BCCFDFFD
+ at machine1:HiMom:abcdeACXX:1:2101:1122:2136 :N:0:GCCGTCGA
+GCCGTCGA
++
+?@<DDDD?
+ at machine1:HiMom:abcdeACXX:1:2101:1459:2083 :N:0:GCCGTCGA
+GCCGTCGA
++
+@@CFDDFD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.index_1.fastq
new file mode 100644
index 0000000..0d66262
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCGTCGA.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1111:2148 :N:0:GCCGTCGA
+GCGA
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1221:2143 :N:0:GCCGTCGA
+CAAT
++
+@@@F
+ at machine1:HiMom:abcdeACXX:1:1101:1327:2200 :Y:0:GCCGTCGA
+GTCA
++
+ at B@F
+ at machine1:HiMom:abcdeACXX:1:2101:1122:2136 :N:0:GCCGTCGA
+CTTG
++
+???B
+ at machine1:HiMom:abcdeACXX:1:2101:1459:2083 :N:0:GCCGTCGA
+ATTT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.1.fastq
new file mode 100644
index 0000000..3c181dd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1165:2239 1:N:0:GCCTAGCC
+GGCGGAGGCAGCATTTCAGCTGTGA
++
+CCCFFDFFHHHHHIJJIGHHHJHHF
+ at machine1:HiMom:abcdeACXX:1:1101:1290:2225 1:N:0:GCCTAGCC
+CTTGGGCGCATGGTGAGGGAGGGAG
++
+@@@FFDDFHDFH??CBEBHHIGDCD
+ at machine1:HiMom:abcdeACXX:1:1201:1280:2179 1:N:0:GCCTAGCC
+TTCAAGGAATCGTCCTGCCTCAGCC
++
+BCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1300:2137 1:N:0:GCCTAGCC
+NTGTAATCCCAGCTCTCAGGGAGGC
++
+#1=ADDDDDDDBBA?@AE?E at FE8;
+ at machine1:HiMom:abcdeACXX:1:2101:1023:2237 1:Y:0:GCCTAGCC
+NTAAACAGCTTCTGCACAGCCAAAG
++
+#00@@?>=39>9;<412@?######
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.2.fastq
new file mode 100644
index 0000000..ac73b63
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1165:2239 2:N:0:GCCTAGCC
+AAGTCGAGACAGAAGTGAGAA
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1101:1290:2225 2:N:0:GCCTAGCC
+TTCACTGGCAAAGACAGTCAC
++
+BEDDFHFHGIIICEHGDHBHE
+ at machine1:HiMom:abcdeACXX:1:1201:1280:2179 2:N:0:GCCTAGCC
+ACTGCTTGAGTCCAGGAGTTC
++
+FDEFGHHHHIFGCHIJJJGGI
+ at machine1:HiMom:abcdeACXX:1:1201:1300:2137 2:N:0:GCCTAGCC
+TTCCGATCTTTTTTTTAATTT
++
+DDDDFDHADEHGIGGED3?FD
+ at machine1:HiMom:abcdeACXX:1:2101:1023:2237 2:Y:0:GCCTAGCC
+TGTTTGAGTTCCTTGTAGATT
++
+=@?>?@???@:>?@??>?;?<
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.barcode_1.fastq
new file mode 100644
index 0000000..9920e3b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1165:2239 :N:0:GCCTAGCC
+GCCTAGCC
++
+B@@DFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1290:2225 :N:0:GCCTAGCC
+GCCTAGCC
++
+?<@DFBBD
+ at machine1:HiMom:abcdeACXX:1:1201:1280:2179 :N:0:GCCTAGCC
+GCCTAGCC
++
+BCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1300:2137 :N:0:GCCTAGCC
+GCCTAGCC
++
+8?84B23?
+ at machine1:HiMom:abcdeACXX:1:2101:1023:2237 :Y:0:GCCTAGCC
+GCCTAGCC
++
+########
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.index_1.fastq
new file mode 100644
index 0000000..ee1c16c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GCCTAGCC.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1165:2239 :N:0:GCCTAGCC
+ATGG
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1290:2225 :N:0:GCCTAGCC
+TCAG
++
+C@@F
+ at machine1:HiMom:abcdeACXX:1:1201:1280:2179 :N:0:GCCTAGCC
+GAGG
++
+@@BF
+ at machine1:HiMom:abcdeACXX:1:1201:1300:2137 :N:0:GCCTAGCC
+GCTC
++
+@@?D
+ at machine1:HiMom:abcdeACXX:1:2101:1023:2237 :Y:0:GCCTAGCC
+NNTT
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.1.fastq
new file mode 100644
index 0000000..ad39be4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1188:2237 1:N:0:GTAACATC
+TCCCCCTCCCTTTTGCGCACACACC
++
+@?@DDADDHDHBDH<EFHIIHG?HF
+ at machine1:HiMom:abcdeACXX:1:2101:1208:2231 1:Y:0:GTAACATC
+TCACTAAACATCCAAACATCACTTT
++
+#########################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.2.fastq
new file mode 100644
index 0000000..c242dda
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1188:2237 2:N:0:GTAACATC
+CCTTCAAGACAGAAGTGAGAA
++
+FDDEFHHFFE at FDHHAIAFHG
+ at machine1:HiMom:abcdeACXX:1:2101:1208:2231 2:Y:0:GTAACATC
+TTTTTTTTTTTTTTTTTTTTT
++
+FFFFHHHHHJJJHFDDDDDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.barcode_1.fastq
new file mode 100644
index 0000000..e43e83f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1188:2237 :N:0:GTAACATC
+GTAACATC
++
+@@?DFFDF
+ at machine1:HiMom:abcdeACXX:1:2101:1208:2231 :Y:0:GTAACATC
+GTAACATC
++
+1+:A1A22
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.index_1.fastq
new file mode 100644
index 0000000..4629269
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTAACATC.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1188:2237 :N:0:GTAACATC
+GCTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1208:2231 :Y:0:GTAACATC
+CTTT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.1.fastq
new file mode 100644
index 0000000..b48b617
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1069:2159 1:N:0:GTCCACAG
+TCCCTTACCATCAAATCAATTGNCC
++
+CCCFFFFFHHHHHJJJJJJJJJ#3A
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2109 1:N:0:GTCCACAG
+NCACCTCCTAGCCCCTCACTTCTGT
++
+#1=B;BDDHHHGFIIIIIIIIIGGG
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.2.fastq
new file mode 100644
index 0000000..c567bf1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1069:2159 2:N:0:GTCCACAG
+TNNNNNNNNNNNNNNNNNNNN
++
+?####################
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2109 2:N:0:GTCCACAG
+GTGCTCTTCCCGATCTGTATA
++
+F?DDFBHHHJJIIDHJIJJJH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.barcode_1.fastq
new file mode 100644
index 0000000..8a71cab
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1069:2159 :N:0:GTCCACAG
+GTCCACAG
++
+ at BBFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2109 :N:0:GTCCACAG
+GTCCACAG
++
+CCCFFFFD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.index_1.fastq
new file mode 100644
index 0000000..0aa4bb7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/GTCCACAG.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1069:2159 :N:0:GTCCACAG
+GACG
++
+<<<@
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2109 :N:0:GTCCACAG
+ACGT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.1.fastq
new file mode 100644
index 0000000..aef9f77
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.1.fastq
@@ -0,0 +1,64 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1031:2224 1:Y:0:
+NAATANNNNNNNNNNNNTNNNNNNN
++
+#0;@@####################
+ at machine1:HiMom:abcdeACXX:1:1101:1039:2147 1:Y:0:
+NCCAANGNNGGNNNNATGTAANNNN
++
+#4;@@#4##2<####43@@@@####
+ at machine1:HiMom:abcdeACXX:1:1101:1046:2175 1:Y:0:
+NTGCCNGNGTTNCGNGGTCTTNNNN
++
+#4;@@####################
+ at machine1:HiMom:abcdeACXX:1:1101:1047:2122 1:Y:0:
+NCTAANGNACTNTGNGTGTGCNNNN
++
+#0;@@#4#3@@#3@#2<@@@@####
+ at machine1:HiMom:abcdeACXX:1:1101:1048:2197 1:Y:0:
+NCTCCNGNTCANCANGTGGAGNNNN
++
+#0;?@####################
+ at machine1:HiMom:abcdeACXX:1:1101:1065:2193 1:N:0:
+GAAGTACGCCCTGCCCCTGGTTNGC
++
+?@@DAADAHHFHBEBEGGHG?####
+ at machine1:HiMom:abcdeACXX:1:1101:1162:2207 1:Y:0:
+ACCTTGAGGAGAACATAAGAGCAAA
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:1201:1159:2179 1:Y:0:
+GTTAGCACAGATATTGGATGAGTGA
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:1201:1414:2174 1:Y:0:
+GCCAAAAAAAAGAACCAGCCCAAGG
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:2101:1040:2208 1:Y:0:
+NATGCCCACCTCCCTCCTACGCACC
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:2101:1059:2083 1:N:0:
+NAAGAGGGGTCAAGAGTTAAACTTA
++
+#1=DDFFFHFHHGIGHGHJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1143:2137 1:N:0:
+ATGCAGCAGCTGCCACGGAGCACCA
++
+CC at FFDFDFHFHHGIDHEHIGJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2182 1:Y:0:
+TTGTTTTGGCTTATAATGACAAGAA
++
+;;8-2).2())(<6=@8;?4??>>?
+ at machine1:HiMom:abcdeACXX:1:2101:1215:2110 1:N:0:
+NAATATAATTTGGAGACCCTTTGTT
++
+#1=DDDDDEDDDDIDDBB3ABAB##
+ at machine1:HiMom:abcdeACXX:1:2101:1285:2105 1:Y:0:
+NGCGGGGAGCCGGGCGTGGAATGCG
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:2101:1450:2134 1:N:0:
+AGCACGCTGCCGCGGGACCTGCCCA
++
+?@@AD at DDHFH?DGIIIIG at FGFBF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.2.fastq
new file mode 100644
index 0000000..26d3bb7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.2.fastq
@@ -0,0 +1,64 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1031:2224 2:Y:0:
+NNNNNNNNNNNNNNNNNNNNN
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1101:1039:2147 2:Y:0:
+NNNNNNNNNNNNNNNNNNNNN
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1101:1046:2175 2:Y:0:
+ANNNNNNNNNNNNNNNNNNNN
++
+@####################
+ at machine1:HiMom:abcdeACXX:1:1101:1047:2122 2:Y:0:
+ANNNNNNNNNNNNNNNNNNNN
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1101:1048:2197 2:Y:0:
+GNNNNNNNNNNNNNNNNNNNN
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1101:1065:2193 2:N:0:
+GNNNNNNNNNNNNNNNNNNNN
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1101:1162:2207 2:Y:0:
+ACTGGGGAAGTTAGAGGAATG
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1201:1159:2179 2:Y:0:
+TTTTTATTTTTCTAAATACTT
++
+A####################
+ at machine1:HiMom:abcdeACXX:1:1201:1414:2174 2:Y:0:
+TTTTTTTTTTTTTTTTTTTTT
++
+BDADF????FFEB>B6=BBBB
+ at machine1:HiMom:abcdeACXX:1:2101:1040:2208 2:Y:0:
+ATAGTCACTGAAATGAATTCA
++
+>(2 at .22@@############
+ at machine1:HiMom:abcdeACXX:1:2101:1059:2083 2:N:0:
+TGTCTTAGAAGGATGCTTCTC
++
+DDDEHHGHHJJJJJIJJIIJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1143:2137 2:N:0:
+TTCAGATCTAGGGGGAACAGC
++
+D?=DCAFFFHIIDG:EFHIII
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2182 2:Y:0:
+TTTTTTTTTTTTTTTTTTTTA
++
+@?@;5=?##############
+ at machine1:HiMom:abcdeACXX:1:2101:1215:2110 2:N:0:
+TTCCCCCATTAAGAACAGCAA
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:2101:1285:2105 2:Y:0:
+TATATCAACCAACACCTCTTC
++
+():94:9:???##########
+ at machine1:HiMom:abcdeACXX:1:2101:1450:2134 2:N:0:
+ACCCTTGTGTCGAGGGCTGAC
++
+DFDFFDFHFGIIE1CGGHBGE
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.barcode_1.fastq
new file mode 100644
index 0000000..f9b35e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.barcode_1.fastq
@@ -0,0 +1,64 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1031:2224 :Y:0:
+NNNNNNNN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1039:2147 :Y:0:
+NNNNNNNN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1046:2175 :Y:0:
+NNNNNNNN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1047:2122 :Y:0:
+NNNANNNN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1048:2197 :Y:0:
+NNNCNNNN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1065:2193 :N:0:
+GAACGATN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1162:2207 :Y:0:
+ACAAAATT
++
+########
+ at machine1:HiMom:abcdeACXX:1:1201:1159:2179 :Y:0:
+AAAAAAAA
++
+########
+ at machine1:HiMom:abcdeACXX:1:1201:1414:2174 :Y:0:
+AGAAAAGA
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1040:2208 :Y:0:
+ACGAAATC
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1059:2083 :N:0:
+TACCGTCT
++
+1:?D####
+ at machine1:HiMom:abcdeACXX:1:2101:1143:2137 :N:0:
+TCCGTCTA
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2182 :Y:0:
+GAAAAAAA
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1215:2110 :N:0:
+AAAAGAAG
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1285:2105 :Y:0:
+TATCTCGG
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1450:2134 :N:0:
+ACCAGTTG
++
+ at C@DDDB?
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.index_1.fastq
new file mode 100644
index 0000000..e59bd66
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/N.index_1.fastq
@@ -0,0 +1,64 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1031:2224 :Y:0:
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1039:2147 :Y:0:
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1046:2175 :Y:0:
+NNGG
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1047:2122 :Y:0:
+NNTC
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1048:2197 :Y:0:
+NNGT
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1065:2193 :N:0:
+NCTT
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1162:2207 :Y:0:
+TAAA
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1159:2179 :Y:0:
+TTTT
++
+===A
+ at machine1:HiMom:abcdeACXX:1:1201:1414:2174 :Y:0:
+TTTT
++
+@;@1
+ at machine1:HiMom:abcdeACXX:1:2101:1040:2208 :Y:0:
+NCTG
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1059:2083 :N:0:
+NGAA
++
+#1=B
+ at machine1:HiMom:abcdeACXX:1:2101:1143:2137 :N:0:
+GCTC
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2182 :Y:0:
+TTTT
++
+9<<?
+ at machine1:HiMom:abcdeACXX:1:2101:1215:2110 :N:0:
+ATCT
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1285:2105 :Y:0:
+TGTC
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1450:2134 :N:0:
+ACAA
++
+CC at F
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.1.fastq
new file mode 100644
index 0000000..11586af
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1064:2239 1:N:0:TAAGCACA
+CATGCAGCGCAAGTAGGTCTACAAG
++
+@@;DFAFFHHHHAHEGHFDGGFABG
+ at machine1:HiMom:abcdeACXX:1:2101:1258:2092 1:N:0:TAAGCACA
+NCACACACACACTCATTCACAGCTT
++
+#1=DDDFFHHHFHJJIJGGGIIGIJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.2.fastq
new file mode 100644
index 0000000..6168946
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1064:2239 2:N:0:TAAGCACA
+TGGGAGGGCGATGAGGACTAG
++
+DDDACC:FHHGIH<EGDDDFH
+ at machine1:HiMom:abcdeACXX:1:2101:1258:2092 2:N:0:TAAGCACA
+ACAAAACACCAAAATAAAATA
++
+#####################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.barcode_1.fastq
new file mode 100644
index 0000000..c09def6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1064:2239 :N:0:TAAGCACA
+TAAGCACA
++
+@@@FFADB
+ at machine1:HiMom:abcdeACXX:1:2101:1258:2092 :N:0:TAAGCACA
+TAAGCACA
++
+@@CDDFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.index_1.fastq
new file mode 100644
index 0000000..0ad20a8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAAGCACA.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1064:2239 :N:0:TAAGCACA
+GGGA
++
+8?@:
+ at machine1:HiMom:abcdeACXX:1:2101:1258:2092 :N:0:TAAGCACA
+TTAG
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TACCGTCT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TACCGTCT.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TACCGTCT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TACCGTCT.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TACCGTCT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TACCGTCT.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TACCGTCT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TACCGTCT.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAGCGGTA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAGCGGTA.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAGCGGTA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAGCGGTA.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAGCGGTA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAGCGGTA.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAGCGGTA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TAGCGGTA.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCAGCC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCAGCC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCAGCC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCAGCC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCAGCC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCAGCC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCAGCC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCAGCC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.1.fastq
new file mode 100644
index 0000000..db9ec5c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1071:2233 1:N:0:TATCCAGG
+TTTGACAGTCTCTGAATGAGAANGG
++
+CCCFFFFFHHHHHJIIIJJJIJ#4A
+ at machine1:HiMom:abcdeACXX:1:1201:1140:2125 1:N:0:TATCCAGG
+NTTTCAGTTCAGAGAACTGCAGAAT
++
+#1=DBDFDHHHHGJIJJJJJIIIJI
+ at machine1:HiMom:abcdeACXX:1:1201:1236:2187 1:N:0:TATCCAGG
+TTTAAATGGGTAAGAAGCCCGGCTC
++
+ at BCDDFEFHHDHHJJJJJIJJIJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1133:2239 1:N:0:TATCCAGG
+AGACAGAAGTACGGGAAGGCGAAGA
++
+@@@FFFFEHFHHHJJCGDHIIECD@
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.2.fastq
new file mode 100644
index 0000000..08fdca5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1071:2233 2:N:0:TATCCAGG
+GNNNNNNNNNNNNNNNNNNNN
++
+@####################
+ at machine1:HiMom:abcdeACXX:1:1201:1140:2125 2:N:0:TATCCAGG
+TAAATTGGTCTTAGATGTTGC
++
+FFFFHHHHFGIJIIIJIJIJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1236:2187 2:N:0:TATCCAGG
+TTAGCGGATTCCGACTTCCAT
++
+FFFDHHHHGIJJIGIGIJJGG
+ at machine1:HiMom:abcdeACXX:1:2101:1133:2239 2:N:0:TATCCAGG
+TTTTGTTTCCTAGCTTGTCTT
++
+DFFFHHHHF4ACFHIJHHHGH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.barcode_1.fastq
new file mode 100644
index 0000000..743cd5c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1071:2233 :N:0:TATCCAGG
+TATCCAGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1140:2125 :N:0:TATCCAGG
+TATCCAGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1236:2187 :N:0:TATCCAGG
+TATCCAGG
++
+@@BFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1133:2239 :N:0:TATCCAGG
+TATCCATG
++
+@@@BDDDF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.index_1.fastq
new file mode 100644
index 0000000..f5db088
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCAGG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1071:2233 :N:0:TATCCAGG
+GTTT
++
+<<<@
+ at machine1:HiMom:abcdeACXX:1:1201:1140:2125 :N:0:TATCCAGG
+TTCA
++
+CC at F
+ at machine1:HiMom:abcdeACXX:1:1201:1236:2187 :N:0:TATCCAGG
+CTCC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1133:2239 :N:0:TATCCAGG
+AGCT
++
+?@?D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCATG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCATG.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCATG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCATG.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCATG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCATG.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCATG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCCATG.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTCGG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTCGG.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTCGG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTCGG.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTCGG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTCGG.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTCGG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTCGG.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.1.fastq
new file mode 100644
index 0000000..2635d7d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1267:2209 1:N:0:TATCTGCC
+GAGACGGAGGCCAACGGGGGCCTGG
++
+@@CFFFFD8FDHFHIGIBG?@BCDG
+ at machine1:HiMom:abcdeACXX:1:1101:1353:2226 1:N:0:TATCTGCC
+TTGCTTGTCTGTAAAGTATTTTATT
++
+ at C@DDFFDHHFHFHHIBGG>IHHII
+ at machine1:HiMom:abcdeACXX:1:1101:1435:2194 1:N:0:TATCTGCC
+GAGAAAGAACATGACTACAGAGATG
++
+CCCFFFFFHHHHHJJJJJJJJJHJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1084:2204 1:N:0:TATCTGCC
+GGCCCGTGGACGCCGCCGAAGAAGC
++
+CCCFFFFFHHHHHJJJJJIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1142:2242 1:N:0:TATCTGCC
+TGTTGATAGTCCTTCTTATCTTAGT
++
+???DB?==CC2<AC:CC<CFEF<FF
+ at machine1:HiMom:abcdeACXX:1:1201:1187:2100 1:N:0:TATCTGCC
+NGCGGTAATTCCAGCTCCAATAGCG
++
+#1:BB2 at DHHFHHIIIIHHIIGHGG
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2109 1:N:0:TATCTGCC
+NCTGAAGAGGCCAAAGCGCCCTCCA
++
+#1=DDFFFHHHHHJJJJJJJJJJJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.2.fastq
new file mode 100644
index 0000000..dfd8101
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1267:2209 2:N:0:TATCTGCC
+GAGTCTCCAACAGCCCCGTAC
++
+DDD?CCFHAIIIGGIIGE at EG
+ at machine1:HiMom:abcdeACXX:1:1101:1353:2226 2:N:0:TATCTGCC
+TCTTCCGATCTTCAGGTTACC
++
+FFFFHHHHHJJJJJJJIJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1435:2194 2:N:0:TATCTGCC
+GTTTTCTTTTACTGAAGTGTA
++
+FDFFHHHHHJJJJIHIJHHHJ
+ at machine1:HiMom:abcdeACXX:1:1201:1084:2204 2:N:0:TATCTGCC
+TCCTCAGGCTCTCATCAGTTG
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1142:2242 2:N:0:TATCTGCC
+AATGTAAAATAATAAAAAATG
++
+DDD;AF<DF<FFFFIIIFF@<
+ at machine1:HiMom:abcdeACXX:1:1201:1187:2100 2:N:0:TATCTGCC
+AAGAGCCCGCATTGCCGAGAC
++
+AA###################
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2109 2:N:0:TATCTGCC
+GACAGGGGGATTTGGGCTGTG
++
+FFFFHHHHHHJJJHIJIJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.barcode_1.fastq
new file mode 100644
index 0000000..f396a3e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1267:2209 :N:0:TATCTGCC
+TATCAGCC
++
+?@@D;ADD
+ at machine1:HiMom:abcdeACXX:1:1101:1353:2226 :N:0:TATCTGCC
+TATCTGCC
++
+ at B@FFEFF
+ at machine1:HiMom:abcdeACXX:1:1101:1435:2194 :N:0:TATCTGCC
+TATCTGCC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1084:2204 :N:0:TATCTGCC
+TATCTGCC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1142:2242 :N:0:TATCTGCC
+TATCTGCC
++
+??<D?D83
+ at machine1:HiMom:abcdeACXX:1:1201:1187:2100 :N:0:TATCTGCC
+TATCTGCC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2109 :N:0:TATCTGCC
+TATCTGCC
++
+CCCDF?DD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.index_1.fastq
new file mode 100644
index 0000000..8c2d0b7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TATCTGCC.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1267:2209 :N:0:TATCTGCC
+GGCA
++
+=;?D
+ at machine1:HiMom:abcdeACXX:1:1101:1353:2226 :N:0:TATCTGCC
+GTGC
++
+BBBF
+ at machine1:HiMom:abcdeACXX:1:1101:1435:2194 :N:0:TATCTGCC
+TTTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1084:2204 :N:0:TATCTGCC
+TGGC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1142:2242 :N:0:TATCTGCC
+GTAA
++
+?=?D
+ at machine1:HiMom:abcdeACXX:1:1201:1187:2100 :N:0:TATCTGCC
+AAAA
++
+=<=;
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2109 :N:0:TATCTGCC
+GTCA
++
+BBCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCCGTCTA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCCGTCTA.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCCGTCTA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCCGTCTA.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCCGTCTA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCCGTCTA.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCCGTCTA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCCGTCTA.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.1.fastq
new file mode 100644
index 0000000..25c9e17
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1143:2192 1:N:0:TCGCTAGA
+GGAGCGAGTCTGGGTCTCAGCCCCG
++
+CCCFFFFFHHHHHJGHIIIHJJJJI
+ at machine1:HiMom:abcdeACXX:1:1101:1479:2221 1:N:0:TCGCTAGA
+TGTAAAGTATGCTGGCTCAGTGTAT
++
+BBBFDFFEHHHHHJJJJJJJIJHJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1312:2112 1:N:0:TCGCTAGA
+NTCCCAGCGAACCCGCGTGCAACCT
++
+#1=DFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1416:2128 1:N:0:TCGCTAGA
+NACAGGCGTGGAGGAGGCGGCGGCC
++
+#4=DDDFFHHHHHJIGJHFHHFFED
+ at machine1:HiMom:abcdeACXX:1:2101:1064:2242 1:N:0:TCGCTAGA
+ATGAACAAAGGAAGAATTATGCACG
++
+?;?D;DDDF?;:+<<CFFCHE433A
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.2.fastq
new file mode 100644
index 0000000..3e867d2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1143:2192 2:N:0:TCGCTAGA
+AAGTCTGGCTTATCACTCATC
++
+FFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1479:2221 2:N:0:TCGCTAGA
+AAATCTATTTTTATGTAAAAA
++
+FFFFHHHHHJIGIJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1312:2112 2:N:0:TCGCTAGA
+GCAGGAGCCGGCGCAGGTGCA
++
+FFFFHHHHHJJJIJJJJGHIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1416:2128 2:N:0:TCGCTAGA
+TGTGGAGGCGGTGGCGGGATC
++
+DDDDHHFHHII:?GGHIIB6?
+ at machine1:HiMom:abcdeACXX:1:2101:1064:2242 2:N:0:TCGCTAGA
+AAAAGGTTGTCAAGCGTTAAA
++
+(<?##################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.barcode_1.fastq
new file mode 100644
index 0000000..77b869b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1143:2192 :N:0:TCGCTAGA
+TCGCTAGA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1479:2221 :N:0:TCGCTAGA
+TCGCTAGA
++
+ at BCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1312:2112 :N:0:TCGCTAGA
+TCGCTAGA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1416:2128 :N:0:TCGCTAGA
+TCGCTAGA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1064:2242 :N:0:TCGCTAGA
+TCGCTAGA
++
+;@<:AA at A
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.index_1.fastq
new file mode 100644
index 0000000..c038d75
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCGCTAGA.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1143:2192 :N:0:TCGCTAGA
+CGAC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1479:2221 :N:0:TCGCTAGA
+GGGG
++
+ at CCF
+ at machine1:HiMom:abcdeACXX:1:1201:1312:2112 :N:0:TCGCTAGA
+ATTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1416:2128 :N:0:TCGCTAGA
+TTGG
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1064:2242 :N:0:TCGCTAGA
+NGGA
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.1.fastq
new file mode 100644
index 0000000..2b9e8b0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1042:2174 1:N:0:TCTGCAAG
+NGTTGGTGTCTTCATTTTATGTATA
++
+#1=DDFDFHHHHHJIJJJHIJHIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.2.fastq
new file mode 100644
index 0000000..abb2c2f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.2.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1042:2174 2:N:0:TCTGCAAG
+GGAAGGCNNCAAAAAAAGAAA
++
+@@?@?<@##3<@@?@@?????
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.barcode_1.fastq
new file mode 100644
index 0000000..5f73cc8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.barcode_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1042:2174 :N:0:TCTGCAAG
+TCTGCAAG
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.index_1.fastq
new file mode 100644
index 0000000..85773e9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TCTGCAAG.index_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1042:2174 :N:0:TCTGCAAG
+NTCA
++
+#0;@
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.1.fastq
new file mode 100644
index 0000000..45daf27
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1242:2170 1:N:0:TGCAAGTA
+ATGGCAGGGCAGAGTTCTGATGAGT
++
+CCCFFFFFHHGGGIFHEIIGIIII?
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2222 1:N:0:TGCAAGTA
+GAGCAGGCAAGGAGGACTTCTTGTT
++
+CCCFFFFFGHHHHJJHHIJJJJJIJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.2.fastq
new file mode 100644
index 0000000..a9608a3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1242:2170 2:N:0:TGCAAGTA
+GGAAAAGAAGCACAAGTACAT
++
+FDFFHHHGHHGIIGJJEHHIG
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2222 2:N:0:TGCAAGTA
+GATAATGGTTCTTTTCCTCAC
++
+FFFFHHHHHJJJJJJJIJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.barcode_1.fastq
new file mode 100644
index 0000000..c310ad9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1242:2170 :N:0:TGCAAGTA
+TGCAAGTA
++
+@@CFFF?D
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2222 :N:0:TGCAAGTA
+TGCAAGTA
++
+CCCFFFEF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.index_1.fastq
new file mode 100644
index 0000000..29e1553
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCAAGTA.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1242:2170 :N:0:TGCAAGTA
+GGAA
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2222 :N:0:TGCAAGTA
+GAGC
++
+@@@D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.1.fastq
new file mode 100644
index 0000000..56c58e4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1084:2136 1:N:0:TGCTGCTG
+NTCTCACTGTGAATTTGTGGTGGGC
++
+#1=DDFFFHHHHHJJJJGIJIJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1285:2100 1:N:0:TGCTGCTG
+NAATGACATGTTTAAAGATGGACTC
++
+#1:BDDFFHHFHHGIJIJIIIIGII
+ at machine1:HiMom:abcdeACXX:1:2101:1162:2139 1:N:0:TGCTGCTG
+AGAGGTGAAATTCTTGGACCGGCGC
++
+@@@DDDDDHFHHHDB:EFHHCAG?D
+ at machine1:HiMom:abcdeACXX:1:2101:1195:2150 1:N:0:TGCTGCTG
+CCGAGAGAGTGAGAGCGCTCCTGGG
++
+CCCFFFFFHFHHHJJJJIJJJJIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.2.fastq
new file mode 100644
index 0000000..0a688c7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1084:2136 2:N:0:TGCTGCTG
+TNNNNNNNNNNNNNNNNNNNN
++
+@####################
+ at machine1:HiMom:abcdeACXX:1:1201:1285:2100 2:N:0:TGCTGCTG
+TTTTTTGCTTTGTAGTTATAG
++
+FFFFHHHHHIIGIABCFFHBF
+ at machine1:HiMom:abcdeACXX:1:2101:1162:2139 2:N:0:TGCTGCTG
+TTTATGGTCGGAACTACGACG
++
+FFFFHHHHHIJJJJJJJIJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1195:2150 2:N:0:TGCTGCTG
+GAACTTCACCACCCAGAGGAA
++
+FFFFHHHHHJJJJJJIJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.barcode_1.fastq
new file mode 100644
index 0000000..1d013d1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1084:2136 :N:0:TGCTGCTG
+TGCTGCTG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1285:2100 :N:0:TGCTGCTG
+TGCTGCTG
++
+@@@FFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1162:2139 :N:0:TGCTGCTG
+TGCTGCTG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1195:2150 :N:0:TGCTGCTG
+TGCTGCTG
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.index_1.fastq
new file mode 100644
index 0000000..39ae92d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGCTGCTG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1084:2136 :N:0:TGCTGCTG
+TTTC
++
+<<<@
+ at machine1:HiMom:abcdeACXX:1:1201:1285:2100 :N:0:TGCTGCTG
+GATC
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1162:2139 :N:0:TGCTGCTG
+ATCG
++
+BCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1195:2150 :N:0:TGCTGCTG
+AATT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.1.fastq
new file mode 100644
index 0000000..17d82b7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1421:2154 1:N:0:TGTAACTC
+TGTGTGTGTGGGTGTGTGTATATAT
++
+?@?DDFFFFFHH at GEFCCCHGIGJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.2.fastq
new file mode 100644
index 0000000..b3ef374
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.2.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1421:2154 2:N:0:TGTAACTC
+CTCTTCCGATCTTGTGCTCTT
++
+FFFFHHHHHJJJJFHIHHIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.barcode_1.fastq
new file mode 100644
index 0000000..2ec05e5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.barcode_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1421:2154 :N:0:TGTAACTC
+TGTAACTC
++
+@@@FFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.index_1.fastq
new file mode 100644
index 0000000..5dc8010
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAACTC.index_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1421:2154 :N:0:TGTAACTC
+TGTG
++
+BC at D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.1.fastq
new file mode 100644
index 0000000..3eb756e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1419:2119 1:N:0:TGTAATCA
+NATGACTATGGTAACTGAAAGAAAA
++
+#1:A1BDADBFFDFIIIEEHECACF
+ at machine1:HiMom:abcdeACXX:1:1201:1208:2132 1:N:0:TGTAATCA
+NCCTCAATGAGCGGCACTATGGGGG
++
+#1=DDFFFHHHHGJJIJJGHIJGIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1344:2147 1:N:0:TGTAATCA
+TATCCTCCCTACTATGCCTAGAAGG
++
+=?@DADEFHBHDFG>EFGDHGFGHD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.2.fastq
new file mode 100644
index 0000000..f05f8b4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1419:2119 2:N:0:TGTAATCA
+TCCTTTTTTGTTTTACTTTAA
++
+#####################
+ at machine1:HiMom:abcdeACXX:1:1201:1208:2132 2:N:0:TGTAATCA
+AGAAAGGATGGTCGGGCTCCA
++
+FFFFGHFHHJIJJGJIBHJJG
+ at machine1:HiMom:abcdeACXX:1:1201:1344:2147 2:N:0:TGTAATCA
+TTAGTTTTAGCATTGGAGTAG
++
+DDDDFHHHFGGHHIIIGGAGH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.barcode_1.fastq
new file mode 100644
index 0000000..13972ab
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1419:2119 :N:0:TGTAATCA
+TGTAATCA
++
+@@@DFDFD
+ at machine1:HiMom:abcdeACXX:1:1201:1208:2132 :N:0:TGTAATCA
+TGTAATCA
++
+CC at FFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1344:2147 :N:0:TGTAATCA
+TGTAATCA
++
+=?1AA:=D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.index_1.fastq
new file mode 100644
index 0000000..bf97040
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TGTAATCA.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1419:2119 :N:0:TGTAATCA
+ACTT
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1208:2132 :N:0:TGTAATCA
+CTGT
++
+@@CD
+ at machine1:HiMom:abcdeACXX:1:1201:1344:2147 :N:0:TGTAATCA
+ACGA
++
+@<??
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.1.fastq
new file mode 100644
index 0000000..b8f172c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1219:2164 1:N:0:TTGTCTAT
+TCAAGCAGGAGCAGCTAAGTCCTAA
++
+CCCFFFFFHHHHHJJJJJJHIJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1103:2184 1:N:0:TTGTCTAT
+GTAAGAACTACCCTGGGTCCCCGTG
++
+@@BFFFFFHHHHHJJJJGIJJJJHI
+ at machine1:HiMom:abcdeACXX:1:1201:1107:2109 1:N:0:TTGTCTAT
+NGGGAACCTGGCGCTAAACCATTCG
++
+#1=DFFFFHHHHHJJJJJJJJJIJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1252:2141 1:N:0:TTGTCTAT
+NTTCCCCCCATGTAATTATTGTGAA
++
+#1=DDFFFHHHHHJJJJJJJJIJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.2.fastq
new file mode 100644
index 0000000..4361176
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1219:2164 2:N:0:TTGTCTAT
+TATCCACTCCTTCCACTTTGG
++
+FFFFHHHHHJJIJJJJJJJIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1103:2184 2:N:0:TTGTCTAT
+GTTTCAGAATTGTGGCCCCAT
++
+FDEFHHHHHJJJGHIJJJJJI
+ at machine1:HiMom:abcdeACXX:1:1201:1107:2109 2:N:0:TTGTCTAT
+ACCCTTGTGTCGAGGGCTGAC
++
+FFFFHHGHHJJJJIIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1252:2141 2:N:0:TTGTCTAT
+ATTTTGCCTATGTCCAACAAG
++
+FFFFGHHHHJIJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.barcode_1.fastq
new file mode 100644
index 0000000..e53ec2d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1219:2164 :N:0:TTGTCTAT
+TTGTCTAT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1103:2184 :N:0:TTGTCTAT
+TTGTCTAT
++
+ at CCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1107:2109 :N:0:TTGTCTAT
+TTGTCTAT
++
+B at CFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1252:2141 :N:0:TTGTCTAT
+TTGTCTAT
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.index_1.fastq
new file mode 100644
index 0000000..4c8dea2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/TTGTCTAT.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1219:2164 :N:0:TTGTCTAT
+ATCT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1103:2184 :N:0:TTGTCTAT
+AGAA
++
+B at BF
+ at machine1:HiMom:abcdeACXX:1:1201:1107:2109 :N:0:TTGTCTAT
+ACAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1252:2141 :N:0:TTGTCTAT
+AGTT
++
+BCBF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M/mp_barcode.params b/testdata/picard/illumina/25T8B25T/fastq_with_4M/mp_barcode.params
new file mode 100644
index 0000000..ae21f31
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M/mp_barcode.params
@@ -0,0 +1,62 @@
+BARCODE_1 SAMPLE_ALIAS LIBRARY_NAME
+AAAAGAAG SA_AAAAGAAG LN_AAAAGAAG
+AACAATGG SA_AACAATGG LN_AACAATGG
+AACGCATT SA_AACGCATT LN_AACGCATT
+ACAAAATT SA_ACAAAATT LN_ACAAAATT
+ACAGGTAT SA_ACAGGTAT LN_ACAGGTAT
+ACAGTTGA SA_ACAGTTGA LN_ACAGTTGA
+ACCAGTTG SA_ACCAGTTG LN_ACCAGTTG
+ACGAAATC SA_ACGAAATC LN_ACGAAATC
+ACTAAGAC SA_ACTAAGAC LN_ACTAAGAC
+ACTGTACC SA_ACTGTACC LN_ACTGTACC
+ACTGTATC SA_ACTGTATC LN_ACTGTATC
+AGAAAAGA SA_AGAAAAGA LN_AGAAAAGA
+AGCATGGA SA_AGCATGGA LN_AGCATGGA
+AGGTAAGG SA_AGGTAAGG LN_AGGTAAGG
+AGGTCGCA SA_AGGTCGCA LN_AGGTCGCA
+ATTATCAA SA_ATTATCAA LN_ATTATCAA
+ATTCCTCT SA_ATTCCTCT LN_ATTCCTCT
+CAACTCTC SA_CAACTCTC LN_CAACTCTC
+CAATAGAC SA_CAATAGAC LN_CAATAGAC
+CAATAGTC SA_CAATAGTC LN_CAATAGTC
+CAGCGGAT SA_CAGCGGAT LN_CAGCGGAT
+CAGCGGTA SA_CAGCGGTA LN_CAGCGGTA
+CCAACATT SA_CCAACATT LN_CCAACATT
+CCAGCACC SA_CCAGCACC LN_CCAGCACC
+CCATGCGT SA_CCATGCGT LN_CCATGCGT
+CGCCTTCC SA_CGCCTTCC LN_CGCCTTCC
+CGCTATGT SA_CGCTATGT LN_CGCTATGT
+CTAACTCG SA_CTAACTCG LN_CTAACTCG
+CTATGCGC SA_CTATGCGC LN_CTATGCGC
+CTATGCGT SA_CTATGCGT LN_CTATGCGT
+CTGCGGAT SA_CTGCGGAT LN_CTGCGGAT
+CTGTAATC SA_CTGTAATC LN_CTGTAATC
+GAAAAAAA SA_GAAAAAAA LN_GAAAAAAA
+GAACGAT. SA_GAACGAT. LN_GAACGAT.
+GAAGGAAG SA_GAAGGAAG LN_GAAGGAAG
+GACCAGGA SA_GACCAGGA LN_GACCAGGA
+GACCAGGC SA_GACCAGGC LN_GACCAGGC
+GACCGTTG SA_GACCGTTG LN_GACCGTTG
+GACCTAAC SA_GACCTAAC LN_GACCTAAC
+GATATCCA SA_GATATCCA LN_GATATCCA
+GCCGTCGA SA_GCCGTCGA LN_GCCGTCGA
+GCCTAGCC SA_GCCTAGCC LN_GCCTAGCC
+GTAACATC SA_GTAACATC LN_GTAACATC
+GTCCACAG SA_GTCCACAG LN_GTCCACAG
+TAAGCACA SA_TAAGCACA LN_TAAGCACA
+TACCGTCT SA_TACCGTCT LN_TACCGTCT
+TAGCGGTA SA_TAGCGGTA LN_TAGCGGTA
+TATCAGCC SA_TATCAGCC LN_TATCAGCC
+TATCCAGG SA_TATCCAGG LN_TATCCAGG
+TATCCATG SA_TATCCATG LN_TATCCATG
+TATCTCGG SA_TATCTCGG LN_TATCTCGG
+TATCTGCC SA_TATCTGCC LN_TATCTGCC
+TCCGTCTA SA_TCCGTCTA LN_TCCGTCTA
+TCGCTAGA SA_TCGCTAGA LN_TCGCTAGA
+TCTGCAAG SA_TCTGCAAG LN_TCTGCAAG
+TGCAAGTA SA_TGCAAGTA LN_TGCAAGTA
+TGCTGCTG SA_TGCTGCTG LN_TGCTGCTG
+TGTAACTC SA_TGTAACTC LN_TGTAACTC
+TGTAATCA SA_TGTAATCA LN_TGTAATCA
+TTGTCTAT SA_TTGTCTAT LN_TTGTCTAT
+N SA_N LN_N
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AAAAGAAG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AAAAGAAG.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AAAAGAAG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AAAAGAAG.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AAAAGAAG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AAAAGAAG.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AAAAGAAG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AAAAGAAG.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AAAAGAAG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AAAAGAAG.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.1.fastq
new file mode 100644
index 0000000..a7b70ea
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1138:2141 1:N:0:AACAATGG
+NTTACCAAGGTTTTCTGTTTAGTGA
++
+#1=DDFFFHHFHHJJJIHJIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1206:2126 1:N:0:AACAATGG
+NATTCTGCCATATTGGTCCGACAGT
++
+#1=DDFFFHHHHHJJJJJJJJJIJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1077:2139 1:N:0:AACAATGG
+CACAGGCTTCCACGGACTTAACGTC
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1112:2245 1:N:0:AACAATGG
+TGCCATCTGCTCTGGGAAGCACCAG
++
+1:=DDDDDFBC:DEFIFFFIEF at BE
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.2.fastq
new file mode 100644
index 0000000..9a050b2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1138:2141 2:N:0:AACAATGG
+GCTTCAGGTCGATCAGA
++
+HGHHHJJIGHIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1206:2126 2:N:0:AACAATGG
+AGTGGTGCACTGAATGT
++
+HHHHHHIIJJJJIJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1077:2139 2:N:0:AACAATGG
+GGCGGATGAAGCAGATA
++
+HHHHHJJJJJJJJJIJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1112:2245 2:N:0:AACAATGG
+TTGTAATTTCGTCTTCT
++
+CCFCAACGGFFCBFFAE
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.barcode_1.fastq
new file mode 100644
index 0000000..82cdde4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1138:2141 :N:0:AACAATGG
+AACAATGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1206:2126 :N:0:AACAATGG
+AACAATGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1077:2139 :N:0:AACAATGG
+AACAATGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1112:2245 :N:0:AACAATGG
+AACAATGG
++
+@@?BBDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.index_1.fastq
new file mode 100644
index 0000000..3855f97
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1138:2141 1:N:0:AACAATGG
+TCCG
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1206:2126 1:N:0:AACAATGG
+ATCT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1077:2139 1:N:0:AACAATGG
+NATT
++
+#4=D
+ at machine1:HiMom:abcdeACXX:1:2101:1112:2245 1:N:0:AACAATGG
+TCGT
++
+?8?D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.index_2.fastq
new file mode 100644
index 0000000..bea8aa1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACAATGG.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1138:2141 2:N:0:AACAATGG
+ATCT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1206:2126 2:N:0:AACAATGG
+GTCC
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1077:2139 2:N:0:AACAATGG
+AGTT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1112:2245 2:N:0:AACAATGG
+AGTG
++
+BDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.1.fastq
new file mode 100644
index 0000000..1c1c29f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1197:2200 1:N:0:AACGCATT
+GGGCGCCCCGTGAGGACCCAGTCCT
++
+ at C@FFADDFFCFCEHIIJIJJIEFC
+ at machine1:HiMom:abcdeACXX:1:1101:1308:2153 1:Y:0:AACGCATT
+TTTTGGAAGAGACCTCAATTACTGT
++
+???DDDDD?:22AE:A2<3,AF?3A
+ at machine1:HiMom:abcdeACXX:1:1101:1452:2132 1:N:0:AACGCATT
+NCGTCCTGGAAAACGGGGCGCGGCT
++
+#1=BDBDDFHHHHF at FHDHIGIIII
+ at machine1:HiMom:abcdeACXX:1:1201:1150:2161 1:N:0:AACGCATT
+AAGTCACCTAATATCTTTTTTTTTT
++
+@@<??;?D?CFD,A4CDDHFBIIID
+ at machine1:HiMom:abcdeACXX:1:2101:1240:2197 1:Y:0:AACGCATT
+ATAAAACATAGCAATATTTTCCTAT
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:2101:1336:2109 1:N:0:AACGCATT
+NACTATCAGGATCGTGGCTATTTTG
++
+#1BDDFFFHHHHHJIJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1427:2081 1:N:0:AACGCATT
+NCGAGTGCCTAGTGGGCCACTTTTG
++
+#4=DDBDFHHHHFHIJJJJIJJJJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.2.fastq
new file mode 100644
index 0000000..ed16a91
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1197:2200 2:N:0:AACGCATT
+CTGGAACCACAGAACCC
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1308:2153 2:Y:0:AACGCATT
+GTAATCCCCGCATGTGT
++
+AFFDFFGFDGFB at CFB:
+ at machine1:HiMom:abcdeACXX:1:1101:1452:2132 2:N:0:AACGCATT
+TTGTGTCGAGGGCTGAC
++
+HHHHHJJJJJJJIJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1150:2161 2:N:0:AACGCATT
+ACTGTGATTGTGCCACT
++
+GHHHHGIIIICEHCFGH
+ at machine1:HiMom:abcdeACXX:1:2101:1240:2197 2:Y:0:AACGCATT
+TCCTTGTTACATGCCCA
++
+D?:ADEE@::C4:C<E:
+ at machine1:HiMom:abcdeACXX:1:2101:1336:2109 2:N:0:AACGCATT
+ACAGCTCCAGGTGCTCC
++
+HHHHHJJJJJJCGHIJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1427:2081 2:N:0:AACGCATT
+CATGGCCACCGTCCTGC
++
+HHHHHJJJIIGFIIJJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.barcode_1.fastq
new file mode 100644
index 0000000..8fa98a2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1197:2200 :N:0:AACGCATT
+AACGCATT
++
+ at CCFDFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1308:2153 :Y:0:AACGCATT
+AACGCATT
++
+:?@B?@DD
+ at machine1:HiMom:abcdeACXX:1:1101:1452:2132 :N:0:AACGCATT
+AACGCATT
++
+ at CCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1150:2161 :N:0:AACGCATT
+AACGCATT
++
+@@@FDDDD
+ at machine1:HiMom:abcdeACXX:1:2101:1240:2197 :Y:0:AACGCATT
+AACGCATT
++
+88+AD@?8
+ at machine1:HiMom:abcdeACXX:1:2101:1336:2109 :N:0:AACGCATT
+AACGCATT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1427:2081 :N:0:AACGCATT
+AACGCATT
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.index_1.fastq
new file mode 100644
index 0000000..cfaf9b4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1197:2200 1:N:0:AACGCATT
+ATAT
++
+@@@F
+ at machine1:HiMom:abcdeACXX:1:1101:1308:2153 1:Y:0:AACGCATT
+TCTG
++
+1?1=
+ at machine1:HiMom:abcdeACXX:1:1101:1452:2132 1:N:0:AACGCATT
+ACAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1150:2161 1:N:0:AACGCATT
+TTCT
++
+ at C@F
+ at machine1:HiMom:abcdeACXX:1:2101:1240:2197 1:Y:0:AACGCATT
+ACTG
++
+??##
+ at machine1:HiMom:abcdeACXX:1:2101:1336:2109 1:N:0:AACGCATT
+AGAC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1427:2081 1:N:0:AACGCATT
+CCGA
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.index_2.fastq
new file mode 100644
index 0000000..a3aad40
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AACGCATT.index_2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1197:2200 2:N:0:AACGCATT
+TCCA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1308:2153 2:Y:0:AACGCATT
+TAAG
++
+4===
+ at machine1:HiMom:abcdeACXX:1:1101:1452:2132 2:N:0:AACGCATT
+ACCC
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1150:2161 2:N:0:AACGCATT
+CACT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1240:2197 2:Y:0:AACGCATT
+GAGA
++
++A:D
+ at machine1:HiMom:abcdeACXX:1:2101:1336:2109 2:N:0:AACGCATT
+CAGA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1427:2081 2:N:0:AACGCATT
+CTTC
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAAAATT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAAAATT.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAAAATT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAAAATT.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAAAATT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAAAATT.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAAAATT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAAAATT.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAAAATT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAAAATT.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.1.fastq
new file mode 100644
index 0000000..5fc0cdd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1236:2121 1:N:0:ACAGGTAT
+NGGTGCTTCATATCCCTCTAGAGGA
++
+#1=BDDFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1341:2116 1:N:0:ACAGGTAT
+NAGAAGCCCCAGGAGGAAGACAGTC
++
+#1=DDFFFHHHHHHHJIIJJJJJGI
+ at machine1:HiMom:abcdeACXX:1:2101:1063:2206 1:N:0:ACAGGTAT
+TCCTATTCGCCTACACAATTCTCCG
++
+CCCFFFFFHHHHHJJJJJJJHJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1325:2083 1:N:0:ACAGGTAT
+NCAGAAGAAAGGGCCTTGTCGGAGG
++
+#1=DDDDDHHFHDGI at EEHG:?FA8
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.2.fastq
new file mode 100644
index 0000000..bfee694
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1236:2121 2:N:0:ACAGGTAT
+ACTTTGTAGCCTTCATC
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1341:2116 2:N:0:ACAGGTAT
+GAGACTGGCAACTTAAA
++
+#################
+ at machine1:HiMom:abcdeACXX:1:2101:1063:2206 2:N:0:ACAGGTAT
+ATGAGGATGGATAGTAA
++
+HHHHHJHIIJHIIIHHJ
+ at machine1:HiMom:abcdeACXX:1:2101:1325:2083 2:N:0:ACAGGTAT
+TCCGATCTGGAGAAAAA
++
+=@###############
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.barcode_1.fastq
new file mode 100644
index 0000000..22e52fa
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1236:2121 :N:0:ACAGGTAT
+ACAGGTAT
++
+CCCFFDDF
+ at machine1:HiMom:abcdeACXX:1:1201:1341:2116 :N:0:ACAGGTAT
+ACAGGTAT
++
+CCCFFBDD
+ at machine1:HiMom:abcdeACXX:1:2101:1063:2206 :N:0:ACAGGTAT
+ACAGGTAT
++
+CCCFFDFF
+ at machine1:HiMom:abcdeACXX:1:2101:1325:2083 :N:0:ACAGGTAT
+ACAGGTAT
++
+@@@BD=DD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.index_1.fastq
new file mode 100644
index 0000000..7f5332e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1236:2121 1:N:0:ACAGGTAT
+TTGC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1341:2116 1:N:0:ACAGGTAT
+ATAA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1063:2206 1:N:0:ACAGGTAT
+NTGC
++
+#1=D
+ at machine1:HiMom:abcdeACXX:1:2101:1325:2083 1:N:0:ACAGGTAT
+TGTG
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.index_2.fastq
new file mode 100644
index 0000000..049df11
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGGTAT.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1236:2121 2:N:0:ACAGGTAT
+GCTT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1341:2116 2:N:0:ACAGGTAT
+CAGC
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1063:2206 2:N:0:ACAGGTAT
+TAGG
++
+DDFF
+ at machine1:HiMom:abcdeACXX:1:2101:1325:2083 2:N:0:ACAGGTAT
+CTCT
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.1.fastq
new file mode 100644
index 0000000..29bc1af
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1048:2238 1:N:0:ACAGTTGA
+NCTGCCGTGTCCTGACTTCTGGAAT
++
+#1:B?ADDACF<DCG;EG<FHH at CE
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2193 1:N:0:ACAGTTGA
+TTTTCTTGGCCTCTGTTTTTTTTTT
++
+BCCFDFFFHHFFHJIGIJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.2.fastq
new file mode 100644
index 0000000..83d8edf
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1048:2238 2:N:0:ACAGTTGA
+CGTTGAAGCACTGGATC
++
+<CFFHCHGDBHGIIIII
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2193 2:N:0:ACAGTTGA
+CACTGCATTTTAAATAC
++
+HFFHHGGDFHFHIIHGG
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.barcode_1.fastq
new file mode 100644
index 0000000..5c55767
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1048:2238 :N:0:ACAGTTGA
+ACAGTTGA
++
+?@7DDDDA
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2193 :N:0:ACAGTTGA
+ACAGTTGA
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.index_1.fastq
new file mode 100644
index 0000000..3931539
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1048:2238 1:N:0:ACAGTTGA
+NGTC
++
+#11A
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2193 1:N:0:ACAGTTGA
+AGGC
++
+@@@D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.index_2.fastq
new file mode 100644
index 0000000..e245ca2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACAGTTGA.index_2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1048:2238 2:N:0:ACAGTTGA
+ACAT
++
+DDDB
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2193 2:N:0:ACAGTTGA
+ATGA
++
+DDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACCAGTTG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACCAGTTG.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACCAGTTG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACCAGTTG.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACCAGTTG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACCAGTTG.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACCAGTTG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACCAGTTG.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACCAGTTG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACCAGTTG.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACGAAATC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACGAAATC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACGAAATC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACGAAATC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACGAAATC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACGAAATC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACGAAATC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACGAAATC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACGAAATC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACGAAATC.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.1.fastq
new file mode 100644
index 0000000..0789790
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1259:2152 1:N:0:ACTAAGAC
+CACCTATAATCCCAGCTACTCCAGA
++
+CCCFFFFFHHHHHJJJJJJIJJJIJ
+ at machine1:HiMom:abcdeACXX:1:1101:1261:2127 1:Y:0:ACTAAGAC
+NTGAAATCTGGATAGGCTGGAGTTA
++
+#0-@@@###################
+ at machine1:HiMom:abcdeACXX:1:2101:1021:2209 1:N:0:ACTAAGAC
+NGGCCCCACCCTCCTCCAGCACGTC
++
+#1=DDFFFHHHHHJJJJJJHIIHFH
+ at machine1:HiMom:abcdeACXX:1:2101:1262:2128 1:Y:0:ACTAAGAC
+AGCAGAAGGGCAAAAGCTGGCTTGA
++
+9;<@:@###################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.2.fastq
new file mode 100644
index 0000000..82538cd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1259:2152 2:N:0:ACTAAGAC
+ATTTTTTTAGACATAGG
++
+GHHHHJJJJIGIIJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1261:2127 2:Y:0:ACTAAGAC
+TTTTTTTTTTTTTTTTT
++
+HGHHHJJIFDDDDDDDD
+ at machine1:HiMom:abcdeACXX:1:2101:1021:2209 2:N:0:ACTAAGAC
+CTGCTAGCTGGCCAGAG
++
+@@??@?????????>?@
+ at machine1:HiMom:abcdeACXX:1:2101:1262:2128 2:Y:0:ACTAAGAC
+TAACTTTTCTGACACCT
++
+@?:8>?4:>?@######
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.barcode_1.fastq
new file mode 100644
index 0000000..e674b5f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1259:2152 :N:0:ACTAAGAC
+ACTAAGAC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1261:2127 :Y:0:ACTAAGAC
+ACTAAGAC
++
+>7+ at A7A7
+ at machine1:HiMom:abcdeACXX:1:2101:1021:2209 :N:0:ACTAAGAC
+ACTAAGAC
++
+ at CCDFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1262:2128 :Y:0:ACTAAGAC
+ACTAAGAC
++
+1+8?ADD8
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.index_1.fastq
new file mode 100644
index 0000000..0b539bf
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1259:2152 1:N:0:ACTAAGAC
+ATTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1261:2127 1:Y:0:ACTAAGAC
+TTTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1021:2209 1:N:0:ACTAAGAC
+NNGG
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1262:2128 1:Y:0:ACTAAGAC
+TCTT
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.index_2.fastq
new file mode 100644
index 0000000..2ee536f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTAAGAC.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1259:2152 2:N:0:ACTAAGAC
+TTAT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1261:2127 2:Y:0:ACTAAGAC
+TTTT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1021:2209 2:N:0:ACTAAGAC
+AAGG
++
+@>??
+ at machine1:HiMom:abcdeACXX:1:2101:1262:2128 2:Y:0:ACTAAGAC
+GTGG
++
+-9@;
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTACC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTACC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTACC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTACC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTACC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTACC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTACC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTACC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTACC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTACC.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.1.fastq
new file mode 100644
index 0000000..b474248
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1458:2109 1:N:0:ACTGTATC
+NGAGACCATAGAGCGGATGCTTTCA
++
+#1=DDDFFHHGHGIJJIGIIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1105:2131 1:N:0:ACTGTATC
+TTGGAACACAGCGGGAATCACAGCA
++
+CCCFFFFFHHHHHJIJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1349:2084 1:N:0:ACTGTATC
+NCAAGTAGCAGTGTCACGCCTTAGC
++
+#1=DDBDDADFDDBEH at HC=CEGG@
+ at machine1:HiMom:abcdeACXX:1:2101:1365:2094 1:N:0:ACTGTATC
+NAAGGTGAAGGCCGGCGCGCTCGCC
++
+#1=BDDDFFHHHHJGGGIGFIHIIJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.2.fastq
new file mode 100644
index 0000000..3ac3ac7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1458:2109 2:N:0:ACTGTATC
+CACACAAGAACTTTTTT
++
+HHHHHJJJJJJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1105:2131 2:N:0:ACTGTATC
+GCAACAGCAGAAACATG
++
+HHHHHJJJJJIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1349:2084 2:N:0:ACTGTATC
+TCATTGGTGTCTGAAGA
++
+>>?##############
+ at machine1:HiMom:abcdeACXX:1:2101:1365:2094 2:N:0:ACTGTATC
+GATCTTGTGCTCTTCCG
++
+HFHHGJJIIJIJJIHII
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.barcode_1.fastq
new file mode 100644
index 0000000..6a8b924
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1458:2109 :N:0:ACTGTATC
+ACTGTATC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1105:2131 :N:0:ACTGTATC
+ACTGTATC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1349:2084 :N:0:ACTGTATC
+ACTGTATC
++
+=;7+22<A
+ at machine1:HiMom:abcdeACXX:1:2101:1365:2094 :N:0:ACTGTATC
+ACTGTACC
++
+########
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.index_1.fastq
new file mode 100644
index 0000000..cc4cab3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1458:2109 1:N:0:ACTGTATC
+GATA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1105:2131 1:N:0:ACTGTATC
+CAGC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1349:2084 1:N:0:ACTGTATC
+AGTC
++
+<5;?
+ at machine1:HiMom:abcdeACXX:1:2101:1365:2094 1:N:0:ACTGTATC
+GCTC
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.index_2.fastq
new file mode 100644
index 0000000..4398cce
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ACTGTATC.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1458:2109 2:N:0:ACTGTATC
+CGAA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1105:2131 2:N:0:ACTGTATC
+AGCA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1349:2084 2:N:0:ACTGTATC
+TGAA
++
+?=>=
+ at machine1:HiMom:abcdeACXX:1:2101:1365:2094 2:N:0:ACTGTATC
+TTCC
++
+FFFD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGAAAAGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGAAAAGA.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGAAAAGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGAAAAGA.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGAAAAGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGAAAAGA.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGAAAAGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGAAAAGA.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGAAAAGA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGAAAAGA.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.1.fastq
new file mode 100644
index 0000000..014fee9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1406:2222 1:N:0:AGCATGGA
+CTCCCCCCGGGCTGAACCAGGGTAC
++
+CCCFFDDDDHDFHIIIIIIIII9DG
+ at machine1:HiMom:abcdeACXX:1:1201:1291:2158 1:N:0:AGCATGGA
+AGAAGGGGAAAGCCTTCATCTTGGC
++
+BCBFFFFFHHHHHJJJJJIIFIJIJ
+ at machine1:HiMom:abcdeACXX:1:2101:1370:2116 1:N:0:AGCATGGA
+NTGGTGGTCCATAGAGATTTGAAAC
++
+#1:4BD7DACF?FCA:4+<ACHIIH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.2.fastq
new file mode 100644
index 0000000..42b0c4c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1406:2222 2:N:0:AGCATGGA
+TCCCCTGGTTCTGGGCA
++
+?FHDFGIIIGIGHHIII
+ at machine1:HiMom:abcdeACXX:1:1201:1291:2158 2:N:0:AGCATGGA
+CTTCCGATCTGATGGGC
++
+?FHHFGEHHIIDHIIII
+ at machine1:HiMom:abcdeACXX:1:2101:1370:2116 2:N:0:AGCATGGA
+GACATCATGTTTGAAAG
++
+FFHDHIGBHHII<HEDB
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.barcode_1.fastq
new file mode 100644
index 0000000..75daae1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1406:2222 :N:0:AGCATGGA
+AGCATGGA
++
+C@@DBFEF
+ at machine1:HiMom:abcdeACXX:1:1201:1291:2158 :N:0:AGCATGGA
+AGCATGGA
++
+ at CCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1370:2116 :N:0:AGCATGGA
+AGCATGGA
++
+?:8A?3:B
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.index_1.fastq
new file mode 100644
index 0000000..725fc1a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1406:2222 1:N:0:AGCATGGA
+GGCT
++
+;?@D
+ at machine1:HiMom:abcdeACXX:1:1201:1291:2158 1:N:0:AGCATGGA
+CGTG
++
+ at CCF
+ at machine1:HiMom:abcdeACXX:1:2101:1370:2116 1:N:0:AGCATGGA
+CACC
++
+@@@D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.index_2.fastq
new file mode 100644
index 0000000..6a42f3b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGCATGGA.index_2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1406:2222 2:N:0:AGCATGGA
+GGAC
++
+DDBD
+ at machine1:HiMom:abcdeACXX:1:1201:1291:2158 2:N:0:AGCATGGA
+TGCT
++
+FFDD
+ at machine1:HiMom:abcdeACXX:1:2101:1370:2116 2:N:0:AGCATGGA
+ATCT
++
+FFFD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.1.fastq
new file mode 100644
index 0000000..eeb6b90
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1263:2236 1:Y:0:AGGTAAGG
+CTTTGAAGACATTGTGAGATCTGTA
++
+<==A<42 at C+A4A?,2A@=4 at 7A??
+ at machine1:HiMom:abcdeACXX:1:2101:1054:2162 1:N:0:AGGTAAGG
+NCCAGGTGTCTTCCCGGGCCCTGCC
++
+#1=DDFBDFHHHHJJJJJIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2203 1:N:0:AGGTAAGG
+TCTCCATGTGAAACAAGCAAAAAGA
++
+CCCFFFFFHHHHGJJJIJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1249:2231 1:N:0:AGGTAAGG
+GTTATTGATAGGATACTGTACAAAC
++
+ at BCFFFFDHHHHFIJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.2.fastq
new file mode 100644
index 0000000..08c5393
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1263:2236 2:Y:0:AGGTAAGG
+AGTAATTTTAGTACTGC
++
+#################
+ at machine1:HiMom:abcdeACXX:1:2101:1054:2162 2:N:0:AGGTAAGG
+GAAGGGAAGGAAGGGTG
++
+HHHHHJIJIIDHHGICG
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2203 2:N:0:AGGTAAGG
+CTTATGTATTTATGAAT
++
+HHHHHJHIIJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1249:2231 2:N:0:AGGTAAGG
+CCTTCCACTCTAGCATA
++
+FHHGHIJJJGJIIJHIJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.barcode_1.fastq
new file mode 100644
index 0000000..19fae2f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1263:2236 :Y:0:AGGTAAGG
+AGGTAAGG
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1054:2162 :N:0:AGGTAAGG
+AGGTAAGG
++
+B at BDDFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2203 :N:0:AGGTAAGG
+AGGTAAGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1249:2231 :N:0:AGGTAAGG
+AGGTAAGG
++
+@@CBDFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.index_1.fastq
new file mode 100644
index 0000000..87b99e1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1263:2236 1:Y:0:AGGTAAGG
+AGTT
++
+((0@
+ at machine1:HiMom:abcdeACXX:1:2101:1054:2162 1:N:0:AGGTAAGG
+NGGA
++
+#4=D
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2203 1:N:0:AGGTAAGG
+TTGG
++
+ at CCF
+ at machine1:HiMom:abcdeACXX:1:2101:1249:2231 1:N:0:AGGTAAGG
+TCTC
++
+@@@F
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.index_2.fastq
new file mode 100644
index 0000000..767933a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTAAGG.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1263:2236 2:Y:0:AGGTAAGG
+CTTC
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1054:2162 2:N:0:AGGTAAGG
+CAGG
++
+DFDF
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2203 2:N:0:AGGTAAGG
+TTCA
++
+DFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1249:2231 2:N:0:AGGTAAGG
+TCGG
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.1.fastq
new file mode 100644
index 0000000..dbfde57
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1150:2228 1:N:0:AGGTCGCA
+GCTACTCAGTAGACAGTCCCACCCT
++
+@@CADDDDFCFHHIIIIGGIIGGGI
+ at machine1:HiMom:abcdeACXX:1:1101:1491:2120 1:N:0:AGGTCGCA
+NGGCAGGTGCCCCCACTTGACTCTC
++
+#1?DFFFFGHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1190:2194 1:N:0:AGGTCGCA
+AACCTGGCGCTAAACCATTCGTAGA
++
+CCCFFFFFHHHHHJJJJJJJJIJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1188:2195 1:N:0:AGGTCGCA
+TTAGACCGTCGTGAGACAGGTTAGT
++
+ at CCFFFFFHHHHHJJJJJIIEHIJH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.2.fastq
new file mode 100644
index 0000000..63d7afe
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1150:2228 2:N:0:AGGTCGCA
+CGATTCCTAGGGGGTTG
++
+BH6DHD<FGGGEIGHIG
+ at machine1:HiMom:abcdeACXX:1:1101:1491:2120 2:N:0:AGGTCGCA
+TGAACTTCTGAGCTGCT
++
+HHHGHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1190:2194 2:N:0:AGGTCGCA
+TTGTGTCGAGGGCTGAC
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1188:2195 2:N:0:AGGTCGCA
+ACCAAATGTCTGAACCT
++
+HHHHHJJJHIJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.barcode_1.fastq
new file mode 100644
index 0000000..7bc729d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1150:2228 :N:0:AGGTCGCA
+AGGTCGCA
++
+@@@DDFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1491:2120 :N:0:AGGTCGCA
+AGGTCGCA
++
+BCCDFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1190:2194 :N:0:AGGTCGCA
+AGGTCGCA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1188:2195 :N:0:AGGTCGCA
+AGGTCGCA
++
+BCCDFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.index_1.fastq
new file mode 100644
index 0000000..ffa65f6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1150:2228 1:N:0:AGGTCGCA
+ATGG
++
+8?=D
+ at machine1:HiMom:abcdeACXX:1:1101:1491:2120 1:N:0:AGGTCGCA
+GGCC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1190:2194 1:N:0:AGGTCGCA
+ACAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1188:2195 1:N:0:AGGTCGCA
+GCAC
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.index_2.fastq
new file mode 100644
index 0000000..2ba6630
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/AGGTCGCA.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1150:2228 2:N:0:AGGTCGCA
+GAGG
++
+D8;@
+ at machine1:HiMom:abcdeACXX:1:1101:1491:2120 2:N:0:AGGTCGCA
+AGGC
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1190:2194 2:N:0:AGGTCGCA
+ACCC
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1188:2195 2:N:0:AGGTCGCA
+ATAC
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.1.fastq
new file mode 100644
index 0000000..26ed061
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1100:2207 1:N:0:ATTATCAA
+ACGACAGACGTTCTTTCTTTGCTGC
++
+CCCFFFFFHHFHHJIJJJJJHIJJH
+ at machine1:HiMom:abcdeACXX:1:1101:1157:2135 1:N:0:ATTATCAA
+NGGACATTGTAATCATTTCTTACAA
++
+#1=DD?DDHHHHHGGHIIIIIIIII
+ at machine1:HiMom:abcdeACXX:1:1101:1269:2170 1:N:0:ATTATCAA
+ACAGTGTGGGAGGCAGACGAAGAGA
++
+@@@DDDDDFA:C at EGA?FD<FFHII
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2217 1:Y:0:ATTATCAA
+NTTTCTCTGGGCGCAAAGATGTTCA
++
+#07;8=8<<99(:=@@/@7>>6=?>
+ at machine1:HiMom:abcdeACXX:1:1201:1118:2198 1:N:0:ATTATCAA
+CAAGTGTACAGGATTAGACTGGGTT
++
+BCCFDEBDHHHHHIJJJGIIIJJGH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.2.fastq
new file mode 100644
index 0000000..40871a0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1100:2207 2:N:0:ATTATCAA
+NNNNNNNNNGNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1157:2135 2:N:0:ATTATCAA
+CTTAATCAAAGATGATA
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1269:2170 2:N:0:ATTATCAA
+CTGTGCTTTAAGGAAAA
++
+DF8DDCFH at GIE@@GGH
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2217 2:Y:0:ATTATCAA
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1201:1118:2198 2:N:0:ATTATCAA
+TTATTAAAGCAGTTAAA
++
+HDHHHGIIIJJJIJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.barcode_1.fastq
new file mode 100644
index 0000000..bfe2de2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1100:2207 :N:0:ATTATCAA
+ATTATCAA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1157:2135 :N:0:ATTATCAA
+ATTATCAA
++
+C at CFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1269:2170 :N:0:ATTATCAA
+ATTATCAA
++
+@@@DDDF?
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2217 :Y:0:ATTATCAA
+ATTATCAA
++
+;<;:BBDD
+ at machine1:HiMom:abcdeACXX:1:1201:1118:2198 :N:0:ATTATCAA
+ATTATCAA
++
+@@@DDBDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.index_1.fastq
new file mode 100644
index 0000000..d002355
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1100:2207 1:N:0:ATTATCAA
+AGGC
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1157:2135 1:N:0:ATTATCAA
+TTTA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1269:2170 1:N:0:ATTATCAA
+TTCC
++
+@@<A
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2217 1:Y:0:ATTATCAA
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1118:2198 1:N:0:ATTATCAA
+AATA
++
+C at CF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.index_2.fastq
new file mode 100644
index 0000000..80a8f1c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTATCAA.index_2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1100:2207 2:N:0:ATTATCAA
+TNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1157:2135 2:N:0:ATTATCAA
+AAGT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1269:2170 2:N:0:ATTATCAA
+AAGC
++
+DBDB
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2217 2:Y:0:ATTATCAA
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1118:2198 2:N:0:ATTATCAA
+AACT
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.1.fastq
new file mode 100644
index 0000000..ce65a9e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1309:2210 1:N:0:ATTCCTCT
+ACACCAACCACCCAACTATCTATAA
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2133 1:N:0:ATTCCTCT
+NAAAACTTGAGGATGCTATGCAAGC
++
+#1:B:ADDDDDDDEEAEBF9FFEBF
+ at machine1:HiMom:abcdeACXX:1:1201:1073:2225 1:N:0:ATTCCTCT
+GGGGCTGAGACCTTTGCTGATGGTG
++
+@@@FFFFFHHHGHJJJJJIIIGICH
+ at machine1:HiMom:abcdeACXX:1:1201:1242:2207 1:N:0:ATTCCTCT
+ATGGCAAAGTGGTGTCTGAGACCAA
++
+BCCFFFFFGHHHHHIIIJFHIJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.2.fastq
new file mode 100644
index 0000000..075007b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1309:2210 2:N:0:ATTCCTCT
+AGGGCATTTTTAATCTT
++
+HHHDFHJIJJIJGIIIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2133 2:N:0:ATTCCTCT
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1201:1073:2225 2:N:0:ATTCCTCT
+CTTCCGATCTGGAGGGT
++
+HHHHHJJJJJJJJJJJ:
+ at machine1:HiMom:abcdeACXX:1:1201:1242:2207 2:N:0:ATTCCTCT
+TTGGCCTCCTGCTCCCC
++
+HHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.barcode_1.fastq
new file mode 100644
index 0000000..ea5fd93
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1309:2210 :N:0:ATTCCTCT
+ATTCCTCT
++
+?@@ADEEF
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2133 :N:0:ATTCCTCT
+ATTCCTCT
++
+8??=BBBA
+ at machine1:HiMom:abcdeACXX:1:1201:1073:2225 :N:0:ATTCCTCT
+ATTCCTCT
++
+B@@BDEFF
+ at machine1:HiMom:abcdeACXX:1:1201:1242:2207 :N:0:ATTCCTCT
+ATTCCTCT
++
+?BBDDDFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.index_1.fastq
new file mode 100644
index 0000000..099b5a7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1309:2210 1:N:0:ATTCCTCT
+AGTG
++
+@@?D
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2133 1:N:0:ATTCCTCT
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1073:2225 1:N:0:ATTCCTCT
+CGTG
++
+ at BBD
+ at machine1:HiMom:abcdeACXX:1:1201:1242:2207 1:N:0:ATTCCTCT
+ATCT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.index_2.fastq
new file mode 100644
index 0000000..35c6a49
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/ATTCCTCT.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1309:2210 2:N:0:ATTCCTCT
+GGCT
++
+FFDF
+ at machine1:HiMom:abcdeACXX:1:1201:1018:2133 2:N:0:ATTCCTCT
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1073:2225 2:N:0:ATTCCTCT
+TGCT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1242:2207 2:N:0:ATTCCTCT
+TTTA
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.1.fastq
new file mode 100644
index 0000000..c48bd97
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1140:2120 1:N:0:CAACTCTC
+NCCCCAACATTCTAATTATGCCTCA
++
+#1:BDFFDHFFDFIJJJIIJIIIII
+ at machine1:HiMom:abcdeACXX:1:1101:1328:2225 1:N:0:CAACTCTC
+GAAATGCATCTGTCTTAGAAACTGG
++
+??@=BDDDFDD<<,<2:C<F:FFEA
+ at machine1:HiMom:abcdeACXX:1:1201:1127:2112 1:Y:0:CAACTCTC
+NGTCAAGGATGTTCGTCGTGGCAAC
++
+#1=BDDDDDDDDDID<AE?@<CEEE
+ at machine1:HiMom:abcdeACXX:1:1201:1452:2143 1:N:0:CAACTCTC
+TATCCCCTCTAAGACGGACCTGGGT
++
+CCCFFFFFHHHHHJJIIIJJJJJJG
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2146 1:Y:0:CAACTCTC
+GTTCTCTGTCCCCAGGTCCTGTCTC
++
+===A7<7222<<=C=?+<7>@?ACB
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.2.fastq
new file mode 100644
index 0000000..8ec96bf
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1140:2120 2:N:0:CAACTCTC
+TTTTTAACTTTGCAAAT
++
+HHHHFB at 9FHI@BFH@@
+ at machine1:HiMom:abcdeACXX:1:1101:1328:2225 2:N:0:CAACTCTC
+AGGACTTACCTGACATA
++
+28?##############
+ at machine1:HiMom:abcdeACXX:1:1201:1127:2112 2:Y:0:CAACTCTC
+TGAGCAGTGAAGCCAGC
++
+HD?FDBHI?AHGGGDFH
+ at machine1:HiMom:abcdeACXX:1:1201:1452:2143 2:N:0:CAACTCTC
+TTAGCATTTACTTTCCC
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2146 2:Y:0:CAACTCTC
+TTTTTTTTTTTTTGGGC
++
+?@???????########
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.barcode_1.fastq
new file mode 100644
index 0000000..052fd17
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1140:2120 :N:0:CAACTCTC
+CAACTCTC
++
+@@@DDFDF
+ at machine1:HiMom:abcdeACXX:1:1101:1328:2225 :N:0:CAACTCTC
+CAACTCTC
++
+??;=A:B=
+ at machine1:HiMom:abcdeACXX:1:1201:1127:2112 :Y:0:CAACTCTC
+CAACTCTC
++
+=??BA?BD
+ at machine1:HiMom:abcdeACXX:1:1201:1452:2143 :N:0:CAACTCTC
+CAACTCTC
++
+BC at DDFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2146 :Y:0:CAACTCTC
+CAACTCTC
++
+?@@1:DBD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.index_1.fastq
new file mode 100644
index 0000000..db76cb0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1140:2120 1:N:0:CAACTCTC
+TTTT
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:1101:1328:2225 1:N:0:CAACTCTC
+AGGA
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1127:2112 1:Y:0:CAACTCTC
+TAAT
++
+@<@?
+ at machine1:HiMom:abcdeACXX:1:1201:1452:2143 1:N:0:CAACTCTC
+TTTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2146 1:Y:0:CAACTCTC
+TTTT
++
+<<<@
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.index_2.fastq
new file mode 100644
index 0000000..8e278ba
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAACTCTC.index_2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1140:2120 2:N:0:CAACTCTC
+TTTT
++
+DDDD
+ at machine1:HiMom:abcdeACXX:1:1101:1328:2225 2:N:0:CAACTCTC
+AATT
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1127:2112 2:Y:0:CAACTCTC
+CACC
++
+BDDD
+ at machine1:HiMom:abcdeACXX:1:1201:1452:2143 2:N:0:CAACTCTC
+AGTC
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2146 2:Y:0:CAACTCTC
+TTTT
++
+??@?
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGAC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGAC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGAC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGAC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGAC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGAC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGAC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGAC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGAC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGAC.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.1.fastq
new file mode 100644
index 0000000..3b0d15e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1316:2126 1:N:0:CAATAGTC
+NAAAAAAAAAAAAAAAAAAAAAAAA
++
+#1BDFFFFHHHHHJJJJFDDDDDDD
+ at machine1:HiMom:abcdeACXX:1:1101:1399:2128 1:N:0:CAATAGTC
+NTGCCCTTCGTCCTGGGAAACGGGG
++
+#1BDFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1054:2151 1:N:0:CAATAGTC
+NTAGTGCTGGGCACTAAGTAATACC
++
+#4=DDDFFHHHHHJJJJJHIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1345:2181 1:N:0:CAATAGTC
+GGATAATCCTATTTATTACCTCAGA
++
+BBBDDFFFHHHHHJJJJJJJJJIJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2184 1:N:0:CAATAGTC
+TTTCAGATTGGTCATTGTTAGTGTA
++
+??@BDDDEHBHADHHIIEHDHFHFF
+ at machine1:HiMom:abcdeACXX:1:2101:1172:2152 1:Y:0:CAATAGTC
+AACACGGACAAAGGAGTCTAACACG
++
+<<<??8@@#################
+ at machine1:HiMom:abcdeACXX:1:2101:1491:2093 1:N:0:CAATAGTC
+NCTATGCCGATCGGGTGTCCGCACT
++
+#1=DDDDDHHFHHIIEHHHBGHGII
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.2.fastq
new file mode 100644
index 0000000..ca281a6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1316:2126 2:N:0:CAATAGTC
+TTTTTTTTTTTTTTTTT
++
+HHHHHJJJJHFDDDDDD
+ at machine1:HiMom:abcdeACXX:1:1101:1399:2128 2:N:0:CAATAGTC
+TTGTGTCGAGGGCTGAC
++
+HHHHHIJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1054:2151 2:N:0:CAATAGTC
+CTGAGAATATATGGGTG
++
+HHHHHJJJJJJJJJJEG
+ at machine1:HiMom:abcdeACXX:1:1201:1345:2181 2:N:0:CAATAGTC
+GTGTTTAGGAGTGGGAC
++
+HHHHHIIJJHJFHIJIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2184 2:N:0:CAATAGTC
+TCATTTGTATGATCTTA
++
+HFFHFHIHIIJIJJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1172:2152 2:Y:0:CAATAGTC
+TGGGGACTAGTGAGGCG
++
+#################
+ at machine1:HiMom:abcdeACXX:1:2101:1491:2093 2:N:0:CAATAGTC
+GGTCTCGCTATGTTGCC
++
+HHHHHJIIJJJJIJIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.barcode_1.fastq
new file mode 100644
index 0000000..1b351d4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1316:2126 :N:0:CAATAGTC
+CAATAGAC
++
+1>>7A###
+ at machine1:HiMom:abcdeACXX:1:1101:1399:2128 :N:0:CAATAGTC
+CAATAGTC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1054:2151 :N:0:CAATAGTC
+CAATAGTC
++
+CCCFFFDF
+ at machine1:HiMom:abcdeACXX:1:1201:1345:2181 :N:0:CAATAGTC
+CAATAGTC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2184 :N:0:CAATAGTC
+CAATAGTC
++
+ at CCFFDDE
+ at machine1:HiMom:abcdeACXX:1:2101:1172:2152 :Y:0:CAATAGTC
+CAATAGTC
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1491:2093 :N:0:CAATAGTC
+CAATAGTC
++
+@@@FDEBD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.index_1.fastq
new file mode 100644
index 0000000..3fdca1a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1316:2126 1:N:0:CAATAGTC
+TCTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1399:2128 1:N:0:CAATAGTC
+ACAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1054:2151 1:N:0:CAATAGTC
+GTCA
++
+CBCF
+ at machine1:HiMom:abcdeACXX:1:1201:1345:2181 1:N:0:CAATAGTC
+ATAC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2184 1:N:0:CAATAGTC
+ATCT
++
+@@BF
+ at machine1:HiMom:abcdeACXX:1:2101:1172:2152 1:Y:0:CAATAGTC
+ATCG
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1491:2093 1:N:0:CAATAGTC
+AGAG
++
+BCCD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.index_2.fastq
new file mode 100644
index 0000000..a3d0e19
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAATAGTC.index_2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1316:2126 2:N:0:CAATAGTC
+TTTT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1399:2128 2:N:0:CAATAGTC
+ACCC
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1054:2151 2:N:0:CAATAGTC
+GGCA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1345:2181 2:N:0:CAATAGTC
+GGAT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2184 2:N:0:CAATAGTC
+TTAT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1172:2152 2:Y:0:CAATAGTC
+TTTC
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1491:2093 2:N:0:CAATAGTC
+ACGG
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGAT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGAT.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGAT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGAT.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGAT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGAT.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGAT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGAT.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGAT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGAT.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.1.fastq
new file mode 100644
index 0000000..9053f51
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1420:2213 1:N:0:CAGCGGTA
+TACCTGGTTGATCCTGCCAGTAGCA
++
+@@CFFFFDDHHGHJGGHIJJIHGBH
+ at machine1:HiMom:abcdeACXX:1:1201:1364:2113 1:N:0:CAGCGGTA
+NCACTCATTTTCTTATGTGGGATAT
++
+#1=DDFDFHHHHHIJJIFHIIHHHI
+ at machine1:HiMom:abcdeACXX:1:2101:1072:2170 1:N:0:CAGCGGTA
+ATCACCGCACTCATTTCCCGCTTCC
++
+CCCFFFFFHHHACEEGHIIBHIIII
+ at machine1:HiMom:abcdeACXX:1:2101:1123:2095 1:N:0:CAGCGGTA
+NTGGACAACATGTTCGAGAGCTACA
++
+#1=BBDDDFFFFDGFGIG?F;HHFI
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2236 1:Y:0:CAGCGGTA
+TTAAAGAGGTTCAGGGATGCAGAGT
++
+#########################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.2.fastq
new file mode 100644
index 0000000..1cd3f9e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1420:2213 2:N:0:CAGCGGTA
+ACCGGCCGTGCGTACTT
++
+HHHFGIJJJJJJGHIGG
+ at machine1:HiMom:abcdeACXX:1:1201:1364:2113 2:N:0:CAGCGGTA
+GCCAGTGGAGTTACGAC
++
+#################
+ at machine1:HiMom:abcdeACXX:1:2101:1072:2170 2:N:0:CAGCGGTA
+CAGAGAGGATCAGAAGT
++
+HHDFHEGFEGGIJIIIG
+ at machine1:HiMom:abcdeACXX:1:2101:1123:2095 2:N:0:CAGCGGTA
+CAGCTTCAGCTTCTCCT
++
+HHHHHJHGGJIJJJEHH
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2236 2:Y:0:CAGCGGTA
+CTCTTTATCCTTGGCAT
++
+#################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.barcode_1.fastq
new file mode 100644
index 0000000..b9b5bba
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1420:2213 :N:0:CAGCGGTA
+CAGCGGTA
++
+ at C@FFFDF
+ at machine1:HiMom:abcdeACXX:1:1201:1364:2113 :N:0:CAGCGGTA
+CAGCGGTA
++
+C at CFFF@D
+ at machine1:HiMom:abcdeACXX:1:2101:1072:2170 :N:0:CAGCGGTA
+CAGCGGTA
++
+B@@DFDDF
+ at machine1:HiMom:abcdeACXX:1:2101:1123:2095 :N:0:CAGCGGTA
+CAGCGGTA
++
+@?@DDF@@
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2236 :Y:0:CAGCGGTA
+TAGCGGTA
++
+########
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.index_1.fastq
new file mode 100644
index 0000000..ed92c06
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1420:2213 1:N:0:CAGCGGTA
+TTCA
++
+ at CCF
+ at machine1:HiMom:abcdeACXX:1:1201:1364:2113 1:N:0:CAGCGGTA
+TAAA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1072:2170 1:N:0:CAGCGGTA
+NGGG
++
+#4=B
+ at machine1:HiMom:abcdeACXX:1:2101:1123:2095 1:N:0:CAGCGGTA
+TCCG
++
+@@@F
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2236 1:Y:0:CAGCGGTA
+TTTG
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.index_2.fastq
new file mode 100644
index 0000000..3e1f6e4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CAGCGGTA.index_2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1420:2213 2:N:0:CAGCGGTA
+CTGT
++
+FFFD
+ at machine1:HiMom:abcdeACXX:1:1201:1364:2113 2:N:0:CAGCGGTA
+GAGA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1072:2170 2:N:0:CAGCGGTA
+GAGA
++
+DDFD
+ at machine1:HiMom:abcdeACXX:1:2101:1123:2095 2:N:0:CAGCGGTA
+CCTC
++
+DDFF
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2236 2:Y:0:CAGCGGTA
+AAGC
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.1.fastq
new file mode 100644
index 0000000..c27e9fd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1083:2193 1:N:0:CCAACATT
+TTCTACCTCACCTTAGGGAGAAGAC
++
+@@@DDBDDD>F><C<4CG?EHGHIG
+ at machine1:HiMom:abcdeACXX:1:1101:1175:2197 1:N:0:CCAACATT
+CCCCTGAGGACACCATCCCACTCCA
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1138:2227 1:Y:0:CCAACATT
+GCTGACACAATCTCTTCCGCCTGGT
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:1201:1260:2165 1:N:0:CCAACATT
+GGACACGGACAGGATTGACAGATTG
++
+BCBFFFFFHHHHHHIIJHIIIFHIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1281:2133 1:N:0:CCAACATT
+NGGAAATCCAGAAAACATAGAAGAT
++
+#1=DDFFFHHHHHIJJJJJJJJIJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1331:2162 1:N:0:CCAACATT
+ACGCTCGGCTAATTTTTGTATTTTT
++
+ at CCFFFDFHHHHHIJJJJHIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1186:2093 1:N:0:CCAACATT
+NCGACCATAAACGATGCCGACCGGC
++
+#4=DFFFFHHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.2.fastq
new file mode 100644
index 0000000..dde9369
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1083:2193 2:N:0:CCAACATT
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1175:2197 2:N:0:CCAACATT
+GGGAACATCCAGAAAGG
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1138:2227 2:Y:0:CCAACATT
+TAGGAAATAGAAGCTAT
++
+,2?4>7C<<4<A+3<AB
+ at machine1:HiMom:abcdeACXX:1:1201:1260:2165 2:N:0:CCAACATT
+TAAGTTGGGGGACGCCG
++
+HHHHHJJJIJIIIGIJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1281:2133 2:N:0:CCAACATT
+ATTTCATATGACTTAGC
++
+HHHHHJJIIIHICHIIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1331:2162 2:N:0:CCAACATT
+GTACTTTGGGAGGCCAA
++
+HHHHHJJJJIJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1186:2093 2:N:0:CCAACATT
+GAGGACAATGATGGAAA
++
+#################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.barcode_1.fastq
new file mode 100644
index 0000000..569551b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1083:2193 :N:0:CCAACATT
+CCAACATT
++
+?@;DD?BD
+ at machine1:HiMom:abcdeACXX:1:1101:1175:2197 :N:0:CCAACATT
+CCAACATT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1138:2227 :Y:0:CCAACATT
+CCAACATT
++
+########
+ at machine1:HiMom:abcdeACXX:1:1201:1260:2165 :N:0:CCAACATT
+CCAACATT
++
+C at CFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1281:2133 :N:0:CCAACATT
+CCAACATT
++
+C at CFFFDF
+ at machine1:HiMom:abcdeACXX:1:1201:1331:2162 :N:0:CCAACATT
+CCAACATT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1186:2093 :N:0:CCAACATT
+CCAACATT
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.index_1.fastq
new file mode 100644
index 0000000..a4ec899
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1083:2193 1:N:0:CCAACATT
+AGGC
++
+19;3
+ at machine1:HiMom:abcdeACXX:1:1101:1175:2197 1:N:0:CCAACATT
+AAGA
++
+BC at F
+ at machine1:HiMom:abcdeACXX:1:1201:1138:2227 1:Y:0:CCAACATT
+GACA
++
+=1=A
+ at machine1:HiMom:abcdeACXX:1:1201:1260:2165 1:N:0:CCAACATT
+ATCT
++
+@@@F
+ at machine1:HiMom:abcdeACXX:1:1201:1281:2133 1:N:0:CCAACATT
+GCAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1331:2162 1:N:0:CCAACATT
+TAAT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1186:2093 1:N:0:CCAACATT
+AATG
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.index_2.fastq
new file mode 100644
index 0000000..86908ed
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAACATT.index_2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1083:2193 2:N:0:CCAACATT
+TNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1175:2197 2:N:0:CCAACATT
+GCTG
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1138:2227 2:Y:0:CCAACATT
+AATA
++
+=AAA
+ at machine1:HiMom:abcdeACXX:1:1201:1260:2165 2:N:0:CCAACATT
+GATC
++
+FDFF
+ at machine1:HiMom:abcdeACXX:1:1201:1281:2133 2:N:0:CCAACATT
+CAAA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1331:2162 2:N:0:CCAACATT
+CCCA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1186:2093 2:N:0:CCAACATT
+TTGG
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.1.fastq
new file mode 100644
index 0000000..c070e32
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1212:2230 1:N:0:CCAGCACC
+TTTCTATTAGCTCTTAGTAAGATTA
++
+CCCFFFFFHHHHHJJJIJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1204:2228 1:N:0:CCAGCACC
+CCGATACGCTGAGTGTGGTTTGCGG
++
+CCCFFFFFHHHFHEGGHIHIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1100:2085 1:N:0:CCAGCACC
+NCACATGGATGAGGAGAATGAGGAT
++
+#1=DDFFFFHHHHJHIGIHHHIJEH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.2.fastq
new file mode 100644
index 0000000..7d54746
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1212:2230 2:N:0:CCAGCACC
+TTATTGGGGAGGGGGTG
++
+HHGHHJJJJGJJJJJDF
+ at machine1:HiMom:abcdeACXX:1:1201:1204:2228 2:N:0:CCAGCACC
+TCGATGAGGAACTTGGT
++
+DHHGHJIJJGHIIJJJH
+ at machine1:HiMom:abcdeACXX:1:2101:1100:2085 2:N:0:CCAGCACC
+CTCCTCCTTCTTGGCCT
++
+HHFHFEIIIIHHBAHBG
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.barcode_1.fastq
new file mode 100644
index 0000000..07b0fae
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1212:2230 :N:0:CCAGCACC
+CCAGCACC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1204:2228 :N:0:CCAGCACC
+CCAGCACC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1100:2085 :N:0:CCAGCACC
+CCAGCACC
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.index_1.fastq
new file mode 100644
index 0000000..93f948c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1212:2230 1:N:0:CCAGCACC
+TTTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1204:2228 1:N:0:CCAGCACC
+TCTT
++
+@?@F
+ at machine1:HiMom:abcdeACXX:1:2101:1100:2085 1:N:0:CCAGCACC
+ATCT
++
+@@@D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.index_2.fastq
new file mode 100644
index 0000000..76fb07f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCAGCACC.index_2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1212:2230 2:N:0:CCAGCACC
+AGCT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1204:2228 2:N:0:CCAGCACC
+CTTG
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1100:2085 2:N:0:CCAGCACC
+TGAT
++
+DDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCATGCGT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCATGCGT.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCATGCGT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCATGCGT.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCATGCGT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCATGCGT.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCATGCGT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCATGCGT.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCATGCGT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CCATGCGT.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.1.fastq
new file mode 100644
index 0000000..ab2fd21
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1122:2227 1:N:0:CGCCTTCC
+AGAAGACGAGGCTGAGAGTGACATC
++
+@@@FFFFFHHHDHJGHGHCHHJJIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1160:2109 1:N:0:CGCCTTCC
+NAGAAGCCTTTGCACCCTGGGAGGA
++
+#1=DDDFFHHHHHJJJJJJJJIIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.2.fastq
new file mode 100644
index 0000000..dd8a775
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1122:2227 2:N:0:CGCCTTCC
+AGGCCCAGTCCAAGGAA
++
+HHHGGIJIGGIJFIJII
+ at machine1:HiMom:abcdeACXX:1:1201:1160:2109 2:N:0:CGCCTTCC
+CCCATGCCACCAACTCG
++
+GHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.barcode_1.fastq
new file mode 100644
index 0000000..4c987f7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1122:2227 :N:0:CGCCTTCC
+CGCCTTCC
++
+@@@DDFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1160:2109 :N:0:CGCCTTCC
+CGCCTTCC
++
+C at BFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.index_1.fastq
new file mode 100644
index 0000000..dcf59a1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1122:2227 1:N:0:CGCCTTCC
+GTCA
++
+@@@F
+ at machine1:HiMom:abcdeACXX:1:1201:1160:2109 1:N:0:CGCCTTCC
+ACAT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.index_2.fastq
new file mode 100644
index 0000000..41910fc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCCTTCC.index_2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1122:2227 2:N:0:CGCCTTCC
+TATA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1160:2109 2:N:0:CGCCTTCC
+CCTT
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.1.fastq
new file mode 100644
index 0000000..22dfb08
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1291:2150 1:N:0:CGCTATGT
+CGTGGGGAACCTGGCGCTAAACCAT
++
+ at BBFFFFFHHHHHJJJJIJJJJJIJ
+ at machine1:HiMom:abcdeACXX:1:1101:1314:2233 1:N:0:CGCTATGT
+GTTTATTGGGGCATTCCTTATCCCA
++
+@??DDDDBDHF>FCHGGGBFAAED9
+ at machine1:HiMom:abcdeACXX:1:1101:1441:2148 1:N:0:CGCTATGT
+ACTTTCACCGCTACACGACCGGGGG
++
+CCCFFFFFHGFFHIIFIHJIGGII>
+ at machine1:HiMom:abcdeACXX:1:1201:1043:2246 1:N:0:CGCTATGT
+NTTCTCGGCTGTCATGTGCAACATT
++
+#1=DDBDFHHHDFFBHGHGHIIJEH
+ at machine1:HiMom:abcdeACXX:1:1201:1134:2144 1:N:0:CGCTATGT
+TGCCAGGAAGTGTTTTTTCTGGGTC
++
+ at CCFFEFFHHFFFGIJJJJJJJJGH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.2.fastq
new file mode 100644
index 0000000..3d5200f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1291:2150 2:N:0:CGCTATGT
+TTGTGTCGAGGGCTGAC
++
+HHFHHIJJJIIIGIJIJ
+ at machine1:HiMom:abcdeACXX:1:1101:1314:2233 2:N:0:CGCTATGT
+TGGGCTGACCTGACAGA
++
+FBFADBCGDEH?F;FCG
+ at machine1:HiMom:abcdeACXX:1:1101:1441:2148 2:N:0:CGCTATGT
+CTAGAGGGGGTAGAGGG
++
+HHDFBHIIJJ1?FGHIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1043:2246 2:N:0:CGCTATGT
+TTCNNGCTTCTCTCTGT
++
+=@>##22=;@??><@??
+ at machine1:HiMom:abcdeACXX:1:1201:1134:2144 2:N:0:CGCTATGT
+TAATGGTTGAGAGGTGG
++
+FHHGHJHHGFIHHIFGI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.barcode_1.fastq
new file mode 100644
index 0000000..fbfd111
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1291:2150 :N:0:CGCTATGT
+CGCTATGT
++
+@@@FFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1314:2233 :N:0:CGCTATGT
+CGCTATGT
++
+@<@?B@;A
+ at machine1:HiMom:abcdeACXX:1:1101:1441:2148 :N:0:CGCTATGT
+CGCTATGT
++
+@@BFFDDD
+ at machine1:HiMom:abcdeACXX:1:1201:1043:2246 :N:0:CGCTATGT
+CGCTATGT
++
+@<?DD:B=
+ at machine1:HiMom:abcdeACXX:1:1201:1134:2144 :N:0:CGCTATGT
+CGCTATGT
++
+CCCFFFFD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.index_1.fastq
new file mode 100644
index 0000000..31a06ae
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1291:2150 1:N:0:CGCTATGT
+ACAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1314:2233 1:N:0:CGCTATGT
+AGGA
++
+@@<D
+ at machine1:HiMom:abcdeACXX:1:1101:1441:2148 1:N:0:CGCTATGT
+TTTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1043:2246 1:N:0:CGCTATGT
+NGCA
++
+#0;@
+ at machine1:HiMom:abcdeACXX:1:1201:1134:2144 1:N:0:CGCTATGT
+AGTG
++
+B@?D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.index_2.fastq
new file mode 100644
index 0000000..5b39e9d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CGCTATGT.index_2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1291:2150 2:N:0:CGCTATGT
+ACCC
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1314:2233 2:N:0:CGCTATGT
+AAGT
++
+DD;=
+ at machine1:HiMom:abcdeACXX:1:1101:1441:2148 2:N:0:CGCTATGT
+GGCT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1043:2246 2:N:0:CGCTATGT
+TCAT
++
+@??@
+ at machine1:HiMom:abcdeACXX:1:1201:1134:2144 2:N:0:CGCTATGT
+TGAG
++
+DDFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.1.fastq
new file mode 100644
index 0000000..d0aacee
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1363:2138 1:N:0:CTAACTCG
+NGTCTGGCCTGCACAGACATCCTAC
++
+#1=DDFFFHHHHHJJJIJJIJJJIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1393:2143 1:N:0:CTAACTCG
+TGGTTGATCCTGCCAGTAGCATATG
++
+@@@ADADDFHFFDBHE?G at HIIIEE
+ at machine1:HiMom:abcdeACXX:1:2101:1273:2119 1:Y:0:CTAACTCG
+NAGATAAGAGTCCACACAGTTGAGT
++
+#11AAAAA<A?4=C=7?733<ACA3
+ at machine1:HiMom:abcdeACXX:1:2101:1414:2098 1:N:0:CTAACTCG
+NAGGACATCGATAAAGGCGAGGTGT
++
+#1=DDFFFHHHHHJJJJJJJJJHHG
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.2.fastq
new file mode 100644
index 0000000..37f4081
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1363:2138 2:N:0:CTAACTCG
+ACCTGTTAGAACTTCTG
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1393:2143 2:N:0:CTAACTCG
+CACGCATCCCCCCCGCG
++
+GGHHHHJJJJJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1273:2119 2:Y:0:CTAACTCG
+TCTTCTCTAACTTGTCA
++
+A+2AA?CB4@@ABB3?A
+ at machine1:HiMom:abcdeACXX:1:2101:1414:2098 2:N:0:CTAACTCG
+GGTGCCGTCGGGCCCAA
++
+HHHHGJJIJJJJJJJIJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.barcode_1.fastq
new file mode 100644
index 0000000..748b2b5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1363:2138 :N:0:CTAACTCG
+CTAACTCG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1393:2143 :N:0:CTAACTCG
+CTAACTCG
++
+@@CFDDFD
+ at machine1:HiMom:abcdeACXX:1:2101:1273:2119 :Y:0:CTAACTCG
+CTAACTCG
++
+=++==ADB
+ at machine1:HiMom:abcdeACXX:1:2101:1414:2098 :N:0:CTAACTCG
+CTAACTCG
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.index_1.fastq
new file mode 100644
index 0000000..1fda985
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1363:2138 1:N:0:CTAACTCG
+GTTC
++
+C@@F
+ at machine1:HiMom:abcdeACXX:1:1201:1393:2143 1:N:0:CTAACTCG
+GATA
++
+C at CF
+ at machine1:HiMom:abcdeACXX:1:2101:1273:2119 1:Y:0:CTAACTCG
+ATGA
++
+>=><
+ at machine1:HiMom:abcdeACXX:1:2101:1414:2098 1:N:0:CTAACTCG
+TTGG
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.index_2.fastq
new file mode 100644
index 0000000..8f94d1e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTAACTCG.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1363:2138 2:N:0:CTAACTCG
+TTAA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1393:2143 2:N:0:CTAACTCG
+AATG
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1273:2119 2:Y:0:CTAACTCG
+TGGA
++
+AAAA
+ at machine1:HiMom:abcdeACXX:1:2101:1414:2098 2:N:0:CTAACTCG
+GGCC
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGC.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.1.fastq
new file mode 100644
index 0000000..4ca668c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1083:2121 1:N:0:CTATGCGT
+NAGAACTGGCGCTGCGGGATGAACC
++
+#1=BDFFFHHHHHJJJJJHIJIJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1185:2143 1:N:0:CTATGCGT
+ATCTGCCTGGTTCGGCCCGCCTGCC
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1219:2115 1:N:0:CTATGCGT
+NTATAGTGGAGGCCGGAGCAGGAAC
++
+#1:DABADHHHFHIIIGGHGIIIII
+ at machine1:HiMom:abcdeACXX:1:1201:1472:2121 1:Y:0:CTATGCGT
+NTAAAGTGTGAACAAGGAAGGTCAT
++
+#07>@<9=@################
+ at machine1:HiMom:abcdeACXX:1:2101:1013:2146 1:N:0:CTATGCGT
+NACACTGCTGCAGATGACAAGCAGC
++
+#4BDFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1231:2208 1:N:0:CTATGCGT
+ACGCCGCAAGTCAGAGCCCCCCAGA
++
+@@@DDDFFFFB:DBBEBEFDHBDDB
+ at machine1:HiMom:abcdeACXX:1:2101:1233:2133 1:Y:0:CTATGCGT
+GAGAGAAGCACTCTTGAGCGGGATA
++
+0;(@((@)2@###############
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.2.fastq
new file mode 100644
index 0000000..052376c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1083:2121 2:N:0:CTATGCGT
+CACCACCGCCCTCCCCC
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1201:1185:2143 2:N:0:CTATGCGT
+CCCGTGGGCCAGAGGTG
++
+HHHHHJJJJJJJJJJHI
+ at machine1:HiMom:abcdeACXX:1:1201:1219:2115 2:N:0:CTATGCGT
+GTTCCCTGCTAAGGGAG
++
+ADDDDIEID:AFFD:?8
+ at machine1:HiMom:abcdeACXX:1:1201:1472:2121 2:Y:0:CTATGCGT
+TTCCGATCTGGAGGATG
++
+==A at 7A<?#########
+ at machine1:HiMom:abcdeACXX:1:2101:1013:2146 2:N:0:CTATGCGT
+AGAACCAACTTATTCAT
++
+@@?@?@@?@@@@@@?@@
+ at machine1:HiMom:abcdeACXX:1:2101:1231:2208 2:N:0:CTATGCGT
+TTGGTGTGTTGACTGTT
++
+CF;BF<AACGCHEBHC<
+ at machine1:HiMom:abcdeACXX:1:2101:1233:2133 2:Y:0:CTATGCGT
+TTTTTTTTTTTTTTTTT
++
+GHHHHJJJFDDDDDDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.barcode_1.fastq
new file mode 100644
index 0000000..e684e0e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1083:2121 :N:0:CTATGCGT
+CTATGCGT
++
+CCCFFFFD
+ at machine1:HiMom:abcdeACXX:1:1201:1185:2143 :N:0:CTATGCGT
+CTATGCGT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1219:2115 :N:0:CTATGCGT
+CCATGCGT
++
+??<DDA?D
+ at machine1:HiMom:abcdeACXX:1:1201:1472:2121 :Y:0:CTATGCGT
+CTATGCGC
++
+;?=D####
+ at machine1:HiMom:abcdeACXX:1:2101:1013:2146 :N:0:CTATGCGT
+CTATGCGT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1231:2208 :N:0:CTATGCGT
+CTATGCGT
++
+@<@?D8 at D
+ at machine1:HiMom:abcdeACXX:1:2101:1233:2133 :Y:0:CTATGCGT
+CTATGCGT
++
+=??B####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.index_1.fastq
new file mode 100644
index 0000000..1c364e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1083:2121 1:N:0:CTATGCGT
+ACAC
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1185:2143 1:N:0:CTATGCGT
+GCTG
++
+ at CCF
+ at machine1:HiMom:abcdeACXX:1:1201:1219:2115 1:N:0:CTATGCGT
+TGGG
++
+???D
+ at machine1:HiMom:abcdeACXX:1:1201:1472:2121 1:Y:0:CTATGCGT
+GTGT
++
+=+=?
+ at machine1:HiMom:abcdeACXX:1:2101:1013:2146 1:N:0:CTATGCGT
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1231:2208 1:N:0:CTATGCGT
+AGCC
++
+@@;1
+ at machine1:HiMom:abcdeACXX:1:2101:1233:2133 1:Y:0:CTATGCGT
+TTTT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.index_2.fastq
new file mode 100644
index 0000000..adf58e1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTATGCGT.index_2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1083:2121 2:N:0:CTATGCGT
+ACAA
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1185:2143 2:N:0:CTATGCGT
+AAGG
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1219:2115 2:N:0:CTATGCGT
+AGTA
++
+BDBD
+ at machine1:HiMom:abcdeACXX:1:1201:1472:2121 2:Y:0:CTATGCGT
+GCTC
++
+?A4A
+ at machine1:HiMom:abcdeACXX:1:2101:1013:2146 2:N:0:CTATGCGT
+CGCT
++
+24=?
+ at machine1:HiMom:abcdeACXX:1:2101:1231:2208 2:N:0:CTATGCGT
+AGTG
++
+ADAB
+ at machine1:HiMom:abcdeACXX:1:2101:1233:2133 2:Y:0:CTATGCGT
+TTTT
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.1.fastq
new file mode 100644
index 0000000..4a7196d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1102:2221 1:N:0:CTGCGGAT
+TTTCATCTTATTTCATTGGTTTATA
++
+CCCFFFFFHHHHHJIJJJJIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1126:2082 1:N:0:CTGCGGAT
+NGTTTTAGGGGTGCGCAGGAGTCAA
++
+#11=A=DD?DF at D@CCGHIEFH at BG
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2172 1:N:0:CTGCGGAT
+TTTCTTCGCAGGATTTTTCTGAGCC
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.2.fastq
new file mode 100644
index 0000000..8073e73
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1102:2221 2:N:0:CTGCGGAT
+CTCTACTCAGTAGATTA
++
+HHHHHJJJJJIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1126:2082 2:N:0:CTGCGGAT
+CACCTTGGTCACCTTCC
++
+HHHHHJEGGIHHIJGIH
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2172 2:N:0:CTGCGGAT
+AGGGGATTTAGCGGGGT
++
+HHHHHJJJJJJJJJJJD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.barcode_1.fastq
new file mode 100644
index 0000000..92dcd7d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1102:2221 :N:0:CTGCGGAT
+CTGCGGAT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1126:2082 :N:0:CTGCGGAT
+CTGCGGAT
++
+@@@FFFDA
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2172 :N:0:CTGCGGAT
+CAGCGGAT
++
+C at CFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.index_1.fastq
new file mode 100644
index 0000000..de00e13
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1102:2221 1:N:0:CTGCGGAT
+ATAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1126:2082 1:N:0:CTGCGGAT
+TCTC
++
+ at C@D
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2172 1:N:0:CTGCGGAT
+GGAC
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.index_2.fastq
new file mode 100644
index 0000000..044d5da
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGCGGAT.index_2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:2101:1102:2221 2:N:0:CTGCGGAT
+CTGA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1126:2082 2:N:0:CTGCGGAT
+TTTC
++
+DDFF
+ at machine1:HiMom:abcdeACXX:1:2101:1216:2172 2:N:0:CTGCGGAT
+TTCT
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.1.fastq
new file mode 100644
index 0000000..243a935
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.1.fastq
@@ -0,0 +1,24 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1403:2194 1:N:0:CTGTAATC
+CTAAACAGAGAGAAGGTTTCTCTTT
++
+CCCFFFFFHHHHHJJJFHIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1045:2105 1:Y:0:CTGTAATC
+NTAAAGAGAAATCAAGAATACTATT
++
+#-4@?(@)@@###############
+ at machine1:HiMom:abcdeACXX:1:1201:1483:2126 1:Y:0:CTGTAATC
+NTGATAAGGTGTTGCTATGTTACCC
++
+#1:D?DDDDA??2:<CC4:AEDF>?
+ at machine1:HiMom:abcdeACXX:1:2101:1011:2102 1:N:0:CTGTAATC
+NAAACAAAACTGTAGAACTGTGTAT
++
+#1=DDFFFHHHHHJJIJJJIHHHJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1245:2154 1:N:0:CTGTAATC
+TCGTTAAGTATATTCTTAGGTATTT
++
+CCCFFDFFFHFHHIIJJJJJFJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1386:2105 1:N:0:CTGTAATC
+NTACTAAAGAAAAAGTTGAAGAACT
++
+#1=DDDFFHHHHHJJGHIJJJJIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.2.fastq
new file mode 100644
index 0000000..d2df457
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.2.fastq
@@ -0,0 +1,24 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1403:2194 2:N:0:CTGTAATC
+AAACCCTGTCTCTACTA
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1045:2105 2:Y:0:CTGTAATC
+TTTNNTTTTTTTTTTTT
++
+@@?##0:????????=<
+ at machine1:HiMom:abcdeACXX:1:1201:1483:2126 2:Y:0:CTGTAATC
+CTGGGTGCTGTGATGCA
++
+<DD8F<<CGG?AA?A<F
+ at machine1:HiMom:abcdeACXX:1:2101:1011:2102 2:N:0:CTGTAATC
+CACATAATTTTAAAATT
++
+?@@??@@@@@??@@@@@
+ at machine1:HiMom:abcdeACXX:1:2101:1245:2154 2:N:0:CTGTAATC
+GTAGCACCACTATACAC
++
+HHHHHJJJJJJIJJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1386:2105 2:N:0:CTGTAATC
+TTCTTCTGCCATAAGGT
++
+HGFHHIJJJJJGIGIJH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.barcode_1.fastq
new file mode 100644
index 0000000..152bcc2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.barcode_1.fastq
@@ -0,0 +1,24 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1403:2194 :N:0:CTGTAATC
+CTGTAATC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1045:2105 :Y:0:CTGTAATC
+CTGTAATC
++
+1112 at A##
+ at machine1:HiMom:abcdeACXX:1:1201:1483:2126 :Y:0:CTGTAATC
+CTGTAATC
++
+ at C<DD:B?
+ at machine1:HiMom:abcdeACXX:1:2101:1011:2102 :N:0:CTGTAATC
+CTGTAATC
++
+C at CFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1245:2154 :N:0:CTGTAATC
+CTGTAATC
++
+ at CCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1386:2105 :N:0:CTGTAATC
+CTGTAATC
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.index_1.fastq
new file mode 100644
index 0000000..ee750ea
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.index_1.fastq
@@ -0,0 +1,24 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1403:2194 1:N:0:CTGTAATC
+ACAT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1045:2105 1:Y:0:CTGTAATC
+NTTT
++
+#0;@
+ at machine1:HiMom:abcdeACXX:1:1201:1483:2126 1:Y:0:CTGTAATC
+GCAT
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1011:2102 1:N:0:CTGTAATC
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1245:2154 1:N:0:CTGTAATC
+ACCA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1386:2105 1:N:0:CTGTAATC
+AGGA
++
+B@@D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.index_2.fastq
new file mode 100644
index 0000000..f4bfd31
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/CTGTAATC.index_2.fastq
@@ -0,0 +1,24 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1403:2194 2:N:0:CTGTAATC
+GGTG
++
+FFDD
+ at machine1:HiMom:abcdeACXX:1:1201:1045:2105 2:Y:0:CTGTAATC
+TTTT
++
+@@@@
+ at machine1:HiMom:abcdeACXX:1:1201:1483:2126 2:Y:0:CTGTAATC
+GCAG
++
+DDBB
+ at machine1:HiMom:abcdeACXX:1:2101:1011:2102 2:N:0:CTGTAATC
+NTCA
++
+#22@
+ at machine1:HiMom:abcdeACXX:1:2101:1245:2154 2:N:0:CTGTAATC
+ATCA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1386:2105 2:N:0:CTGTAATC
+ATTA
++
+DFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAAAAAA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAAAAAA.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAAAAAA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAAAAAA.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAAAAAA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAAAAAA.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAAAAAA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAAAAAA.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAAAAAA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAAAAAA.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAACGAT..1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAACGAT..1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAACGAT..2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAACGAT..2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAACGAT..barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAACGAT..barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAACGAT..index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAACGAT..index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAACGAT..index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAACGAT..index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.1.fastq
new file mode 100644
index 0000000..b55be00
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1338:2175 1:N:0:GAAGGAAG
+CCCACCTTCCGGCGGCCGAAGACAC
++
+CCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1028:2202 1:N:0:GAAGGAAG
+NTCCTGGGAAACGGGGCGCGGCTGG
++
+#4BDDDFFHHHHHIJIIJJJJJJIJ
+ at machine1:HiMom:abcdeACXX:1:2101:1084:2188 1:N:0:GAAGGAAG
+TTGCTGCATGGGTTAATTGAGAATA
++
+CCCFFFFFHHHHFHHIIJJIJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.2.fastq
new file mode 100644
index 0000000..e1b8447
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1338:2175 2:N:0:GAAGGAAG
+GCTTTAACATCCACAAT
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1028:2202 2:N:0:GAAGGAAG
+NTNNNNNNNGGNNTGNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:2101:1084:2188 2:N:0:GAAGGAAG
+CAAAATCAGCAACAAGT
++
+HHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.barcode_1.fastq
new file mode 100644
index 0000000..f38597a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1338:2175 :N:0:GAAGGAAG
+GAAGGAAG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1028:2202 :N:0:GAAGGAAG
+GAAGGAAG
++
+CCCFFDFF
+ at machine1:HiMom:abcdeACXX:1:2101:1084:2188 :N:0:GAAGGAAG
+GAAGGAAG
++
+ at B@FFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.index_1.fastq
new file mode 100644
index 0000000..d04f8ad
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1338:2175 1:N:0:GAAGGAAG
+GCTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1028:2202 1:N:0:GAAGGAAG
+NNAA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1084:2188 1:N:0:GAAGGAAG
+TACA
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.index_2.fastq
new file mode 100644
index 0000000..de86038
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GAAGGAAG.index_2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1338:2175 2:N:0:GAAGGAAG
+GTTG
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1028:2202 2:N:0:GAAGGAAG
+ACNC
++
+@?##
+ at machine1:HiMom:abcdeACXX:1:2101:1084:2188 2:N:0:GAAGGAAG
+AGGT
++
+FFFD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.1.fastq
new file mode 100644
index 0000000..41fcefa
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1089:2172 1:N:0:GACCAGGA
+TTCCAGCATGCGGTTTAAGTAGGAT
++
+ at CCFDFDBDFBF:<CEBHAFHHICH
+ at machine1:HiMom:abcdeACXX:1:1101:1347:2149 1:N:0:GACCAGGA
+GAGCAGATCGGAAGAGCACAGATCG
++
+@@@FFDDDHHHHHIJJBGGHJIHEG
+ at machine1:HiMom:abcdeACXX:1:1201:1095:2146 1:N:0:GACCAGGA
+GCTGAGTCATGTAGTAAGCCTGTGC
++
+BB at FDDDFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1123:2161 1:Y:0:GACCAGGA
+CACTAACTCCTGACCTCAAATAATC
++
+?7?=DD?DD+CDBE>E at EEF@+<CF
+ at machine1:HiMom:abcdeACXX:1:1201:1439:2156 1:N:0:GACCAGGA
+AGCCGCGAGGTGCTGGCGGACTTCC
++
+:;1BDDDAA88A<?<E1C:D#####
+ at machine1:HiMom:abcdeACXX:1:2101:1207:2084 1:Y:0:GACCAGGA
+NTAGATGACCAAAACTTGCAGGGCA
++
+#1:A<?@A+7A=?CBCCBCCBAAAA
+ at machine1:HiMom:abcdeACXX:1:2101:1312:2105 1:N:0:GACCAGGA
+NTTCCCTCAGGATAGCTGGCGCTCT
++
+#1=DDFFFGHGHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.2.fastq
new file mode 100644
index 0000000..b4216dc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1089:2172 2:N:0:GACCAGGA
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1347:2149 2:N:0:GACCAGGA
+GATCTGTGCTCTTCCGA
++
+DFHHFIJDGIGGHGIGH
+ at machine1:HiMom:abcdeACXX:1:1201:1095:2146 2:N:0:GACCAGGA
+CACCAAATGCTGCTAAG
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1123:2161 2:Y:0:GACCAGGA
+CTTCCGATCTGCATACA
++
+AAAA<AAA)@CBA9>A#
+ at machine1:HiMom:abcdeACXX:1:1201:1439:2156 2:N:0:GACCAGGA
+TTTGCCTTGAAGTAAGC
++
+@>8@>8;@#########
+ at machine1:HiMom:abcdeACXX:1:2101:1207:2084 2:Y:0:GACCAGGA
+CTTCTGGGCATCCCCTG
++
+HHHHHIJIHHGHGGJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1312:2105 2:N:0:GACCAGGA
+TAGGTTGAGATCGTTTC
++
+HHFHDHIJJJJJJJIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.barcode_1.fastq
new file mode 100644
index 0000000..3a43c99
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1089:2172 :N:0:GACCAGGA
+GACCAGGA
++
+?@@FF;=B
+ at machine1:HiMom:abcdeACXX:1:1101:1347:2149 :N:0:GACCAGGA
+GACCAGGA
++
+CC at DFFFD
+ at machine1:HiMom:abcdeACXX:1:1201:1095:2146 :N:0:GACCAGGA
+GACCAGGA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1123:2161 :Y:0:GACCAGGA
+GACCAGGA
++
+?;@DFDFF
+ at machine1:HiMom:abcdeACXX:1:1201:1439:2156 :N:0:GACCAGGA
+GACCAGGC
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1207:2084 :Y:0:GACCAGGA
+GACCAGGA
++
+@@CDFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1312:2105 :N:0:GACCAGGA
+GACCAGGA
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.index_1.fastq
new file mode 100644
index 0000000..6e1c765
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1089:2172 1:N:0:GACCAGGA
+TCCG
++
+:<<?
+ at machine1:HiMom:abcdeACXX:1:1101:1347:2149 1:N:0:GACCAGGA
+GCTC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1095:2146 1:N:0:GACCAGGA
+ACTG
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1123:2161 1:Y:0:GACCAGGA
+CGTG
++
+===A
+ at machine1:HiMom:abcdeACXX:1:1201:1439:2156 1:N:0:GACCAGGA
+GGAG
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1207:2084 1:Y:0:GACCAGGA
+TCAC
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1312:2105 1:N:0:GACCAGGA
+GTTG
++
+ at CCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.index_2.fastq
new file mode 100644
index 0000000..46e88a4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGA.index_2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1089:2172 2:N:0:GACCAGGA
+GNNN
++
+?###
+ at machine1:HiMom:abcdeACXX:1:1101:1347:2149 2:N:0:GACCAGGA
+TTCC
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1095:2146 2:N:0:GACCAGGA
+ACAA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1123:2161 2:Y:0:GACCAGGA
+TGCT
++
+AAA8
+ at machine1:HiMom:abcdeACXX:1:1201:1439:2156 2:N:0:GACCAGGA
+ATTA
++
+2<>>
+ at machine1:HiMom:abcdeACXX:1:2101:1207:2084 2:Y:0:GACCAGGA
+CACT
++
+DEDF
+ at machine1:HiMom:abcdeACXX:1:2101:1312:2105 2:N:0:GACCAGGA
+AGAA
++
+FFDF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCAGGC.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.1.fastq
new file mode 100644
index 0000000..2e2beb8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1218:2200 1:N:0:GACCGTTG
+GCACCGGAAGAGCACACAGATCGGA
++
+CCCFFFFDFHGHHJJIJIJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:1101:1257:2223 1:N:0:GACCGTTG
+TGTATTCGAGAGATCAAAGAGAGAG
++
+@@=DDBDD?FFHHEIDBDFCEDBAF
+ at machine1:HiMom:abcdeACXX:1:1201:1180:2119 1:N:0:GACCGTTG
+NTGAAAGATTTAGAGAGCTTACAAA
++
+#1=DDDDDHHHGHJJIIJJJJIJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1036:2087 1:N:0:GACCGTTG
+NTGTAGTTTCTTTAGGCAAATTTGT
++
+#4=BDDDFHHHHHJJJJJJIIJJJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.2.fastq
new file mode 100644
index 0000000..7fc881f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1218:2200 2:N:0:GACCGTTG
+GATCTATCTGCTCGTCC
++
+?3;@#############
+ at machine1:HiMom:abcdeACXX:1:1101:1257:2223 2:N:0:GACCGTTG
+CGATCTTTTAGCAAAGC
++
+HFFHDGIGIIJJJGGGI
+ at machine1:HiMom:abcdeACXX:1:1201:1180:2119 2:N:0:GACCGTTG
+TTTTGCTTTTCTACAGC
++
+HHHHHJJJJIJIJJIJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1036:2087 2:N:0:GACCGTTG
+TACGAAGCAAATACTTT
++
+HHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.barcode_1.fastq
new file mode 100644
index 0000000..fb6bf3f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1218:2200 :N:0:GACCGTTG
+GACCGTTG
++
+ at CCFFDDF
+ at machine1:HiMom:abcdeACXX:1:1101:1257:2223 :N:0:GACCGTTG
+GACCGTTG
++
+;@@DD=DD
+ at machine1:HiMom:abcdeACXX:1:1201:1180:2119 :N:0:GACCGTTG
+GACCGTTG
++
+CCCFFDFF
+ at machine1:HiMom:abcdeACXX:1:2101:1036:2087 :N:0:GACCGTTG
+GACCGTTG
++
+B at CFFDFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.index_1.fastq
new file mode 100644
index 0000000..0c72c4e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1218:2200 1:N:0:GACCGTTG
+GCTC
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1257:2223 1:N:0:GACCGTTG
+TGCT
++
+:?@D
+ at machine1:HiMom:abcdeACXX:1:1201:1180:2119 1:N:0:GACCGTTG
+GCTC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1036:2087 1:N:0:GACCGTTG
+NGTC
++
+#4=D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.index_2.fastq
new file mode 100644
index 0000000..866e2c1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCGTTG.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1218:2200 2:N:0:GACCGTTG
+TTCC
++
+34??
+ at machine1:HiMom:abcdeACXX:1:1101:1257:2223 2:N:0:GACCGTTG
+CTTC
++
+DBDD
+ at machine1:HiMom:abcdeACXX:1:1201:1180:2119 2:N:0:GACCGTTG
+TAAA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1036:2087 2:N:0:GACCGTTG
+CACT
++
+DFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.1.fastq
new file mode 100644
index 0000000..1f6b7ec
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1302:2244 1:N:0:GACCTAAC
+GGAAAAGACGGAAAGGTTCTATCTC
++
+ at C@DFFFDFHHHHJIJHHIJJJJJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.2.fastq
new file mode 100644
index 0000000..df8b3c4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.2.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1302:2244 2:N:0:GACCTAAC
+TATAACAAATGCAAAAA
++
+HHHHHJJJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.barcode_1.fastq
new file mode 100644
index 0000000..17c61fd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.barcode_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1302:2244 :N:0:GACCTAAC
+GACCTAAC
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.index_1.fastq
new file mode 100644
index 0000000..9d13cbe
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.index_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1302:2244 1:N:0:GACCTAAC
+TGAA
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.index_2.fastq
new file mode 100644
index 0000000..be3a170
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GACCTAAC.index_2.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1302:2244 2:N:0:GACCTAAC
+TACA
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.1.fastq
new file mode 100644
index 0000000..eca4c3a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1460:2176 1:N:0:GATATCCA
+AGTCCAGGCTGAGCCCAGGGAAGAA
++
+CCCFFFFFHHHHGJIJJIJJHIJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1031:2163 1:N:0:GATATCCA
+NTTTCCATGGCCGTCACCTTTGGGT
++
+#4=DDFFFHHHHHJJJJJJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1226:2088 1:N:0:GATATCCA
+NGATCGGAAGAGCACACGTTTGACT
++
+#4=DAA=DDFHFHIIBFGHHIG>EG
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.2.fastq
new file mode 100644
index 0000000..7183ea3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1460:2176 2:N:0:GATATCCA
+GACACAACAAGTCCAAC
++
+#################
+ at machine1:HiMom:abcdeACXX:1:2101:1031:2163 2:N:0:GATATCCA
+GTCACCACTAGCCACCA
++
+@@@@@@@@?@@@@@@@?
+ at machine1:HiMom:abcdeACXX:1:2101:1226:2088 2:N:0:GATATCCA
+GATCTAGGTAATAGCTA
++
+DCDDHFFFAFHDHIJGJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.barcode_1.fastq
new file mode 100644
index 0000000..d27976c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1460:2176 :N:0:GATATCCA
+GATATCCA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1031:2163 :N:0:GATATCCA
+GATATCCA
++
+B at BFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1226:2088 :N:0:GATATCCA
+GATATCCA
++
+@@@:DDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.index_1.fastq
new file mode 100644
index 0000000..1727869
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1460:2176 1:N:0:GATATCCA
+AGGA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1031:2163 1:N:0:GATATCCA
+NNAC
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1226:2088 1:N:0:GATATCCA
+GCTC
++
+==?B
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.index_2.fastq
new file mode 100644
index 0000000..469593d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GATATCCA.index_2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1460:2176 2:N:0:GATATCCA
+AAAA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1031:2163 2:N:0:GATATCCA
+ATTT
++
+@?@@
+ at machine1:HiMom:abcdeACXX:1:2101:1226:2088 2:N:0:GATATCCA
+TTCC
++
+DFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.1.fastq
new file mode 100644
index 0000000..3b50e29
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1111:2148 1:N:0:GCCGTCGA
+GTGGAGACCACCTCCGAGGCCTTGT
++
+BBCFFFFFHHHHHJJJIJJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:1101:1221:2143 1:N:0:GCCGTCGA
+TTTGGTGGAAATTTTTTGTTATGAT
++
+CCCFFBDBHFD?FBFHIIGGIC at EF
+ at machine1:HiMom:abcdeACXX:1:1101:1327:2200 1:Y:0:GCCGTCGA
+AGGGGGATCCGCCGGGGGACCACAA
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:2101:1122:2136 1:N:0:GCCGTCGA
+GTAGGCGCTCAGCAAATACTTGTCG
++
+@@@DDDD8?<CACEHHBBHDAAFH@
+ at machine1:HiMom:abcdeACXX:1:2101:1459:2083 1:N:0:GCCGTCGA
+NCACACGCCACACGGAGCACACTTT
++
+#4=DDFFFHHHHHJJJJJJJJIIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.2.fastq
new file mode 100644
index 0000000..5ee33ce
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1111:2148 2:N:0:GCCGTCGA
+NNNNNNNNNGGACGACN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1221:2143 2:N:0:GCCGTCGA
+TGTCTGCACAGCCGCTT
++
+HHHHHJJJIIIJGHIJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1327:2200 2:Y:0:GCCGTCGA
+GGCTGTCGACAGGTGTC
++
+HHHHGIJJJJJJIFHHI
+ at machine1:HiMom:abcdeACXX:1:2101:1122:2136 2:N:0:GCCGTCGA
+CCTGCAGGCCCCGCGGC
++
+DD?DDIID)A:3<EADD
+ at machine1:HiMom:abcdeACXX:1:2101:1459:2083 2:N:0:GCCGTCGA
+AAAATAATCAGAAGGCC
++
+BHGHHIGGIJFJJGGFH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.barcode_1.fastq
new file mode 100644
index 0000000..9b4fd3a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1111:2148 :N:0:GCCGTCGA
+GCCGTCGA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1221:2143 :N:0:GCCGTCGA
+GCCGTCGA
++
+@@CDDDDF
+ at machine1:HiMom:abcdeACXX:1:1101:1327:2200 :Y:0:GCCGTCGA
+GCCGTCGA
++
+BCCFDFFD
+ at machine1:HiMom:abcdeACXX:1:2101:1122:2136 :N:0:GCCGTCGA
+GCCGTCGA
++
+?@<DDDD?
+ at machine1:HiMom:abcdeACXX:1:2101:1459:2083 :N:0:GCCGTCGA
+GCCGTCGA
++
+@@CFDDFD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.index_1.fastq
new file mode 100644
index 0000000..b2b153b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1111:2148 1:N:0:GCCGTCGA
+GCGA
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1221:2143 1:N:0:GCCGTCGA
+CAAT
++
+@@@F
+ at machine1:HiMom:abcdeACXX:1:1101:1327:2200 1:Y:0:GCCGTCGA
+GTCA
++
+ at B@F
+ at machine1:HiMom:abcdeACXX:1:2101:1122:2136 1:N:0:GCCGTCGA
+CTTG
++
+???B
+ at machine1:HiMom:abcdeACXX:1:2101:1459:2083 1:N:0:GCCGTCGA
+ATTT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.index_2.fastq
new file mode 100644
index 0000000..c6d802e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCGTCGA.index_2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1111:2148 2:N:0:GCCGTCGA
+ANAN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1221:2143 2:N:0:GCCGTCGA
+TGAA
++
+FFFD
+ at machine1:HiMom:abcdeACXX:1:1101:1327:2200 2:Y:0:GCCGTCGA
+TCTG
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1122:2136 2:N:0:GCCGTCGA
+CCAG
++
+BAAB
+ at machine1:HiMom:abcdeACXX:1:2101:1459:2083 2:N:0:GCCGTCGA
+CACC
++
+FFFD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.1.fastq
new file mode 100644
index 0000000..3c181dd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1165:2239 1:N:0:GCCTAGCC
+GGCGGAGGCAGCATTTCAGCTGTGA
++
+CCCFFDFFHHHHHIJJIGHHHJHHF
+ at machine1:HiMom:abcdeACXX:1:1101:1290:2225 1:N:0:GCCTAGCC
+CTTGGGCGCATGGTGAGGGAGGGAG
++
+@@@FFDDFHDFH??CBEBHHIGDCD
+ at machine1:HiMom:abcdeACXX:1:1201:1280:2179 1:N:0:GCCTAGCC
+TTCAAGGAATCGTCCTGCCTCAGCC
++
+BCCFFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1300:2137 1:N:0:GCCTAGCC
+NTGTAATCCCAGCTCTCAGGGAGGC
++
+#1=ADDDDDDDBBA?@AE?E at FE8;
+ at machine1:HiMom:abcdeACXX:1:2101:1023:2237 1:Y:0:GCCTAGCC
+NTAAACAGCTTCTGCACAGCCAAAG
++
+#00@@?>=39>9;<412@?######
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.2.fastq
new file mode 100644
index 0000000..5a33fec
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1165:2239 2:N:0:GCCTAGCC
+CGAGACAGAAGTGAGAA
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1290:2225 2:N:0:GCCTAGCC
+CTGGCAAAGACAGTCAC
++
+FHFHGIIICEHGDHBHE
+ at machine1:HiMom:abcdeACXX:1:1201:1280:2179 2:N:0:GCCTAGCC
+CTTGAGTCCAGGAGTTC
++
+GHHHHIFGCHIJJJGGI
+ at machine1:HiMom:abcdeACXX:1:1201:1300:2137 2:N:0:GCCTAGCC
+GATCTTTTTTTTAATTT
++
+FDHADEHGIGGED3?FD
+ at machine1:HiMom:abcdeACXX:1:2101:1023:2237 2:Y:0:GCCTAGCC
+TGAGTTCCTTGTAGATT
++
+?@???@:>?@??>?;?<
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.barcode_1.fastq
new file mode 100644
index 0000000..9920e3b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1165:2239 :N:0:GCCTAGCC
+GCCTAGCC
++
+B@@DFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1290:2225 :N:0:GCCTAGCC
+GCCTAGCC
++
+?<@DFBBD
+ at machine1:HiMom:abcdeACXX:1:1201:1280:2179 :N:0:GCCTAGCC
+GCCTAGCC
++
+BCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1300:2137 :N:0:GCCTAGCC
+GCCTAGCC
++
+8?84B23?
+ at machine1:HiMom:abcdeACXX:1:2101:1023:2237 :Y:0:GCCTAGCC
+GCCTAGCC
++
+########
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.index_1.fastq
new file mode 100644
index 0000000..02e424d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1165:2239 1:N:0:GCCTAGCC
+ATGG
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1290:2225 1:N:0:GCCTAGCC
+TCAG
++
+C@@F
+ at machine1:HiMom:abcdeACXX:1:1201:1280:2179 1:N:0:GCCTAGCC
+GAGG
++
+@@BF
+ at machine1:HiMom:abcdeACXX:1:1201:1300:2137 1:N:0:GCCTAGCC
+GCTC
++
+@@?D
+ at machine1:HiMom:abcdeACXX:1:2101:1023:2237 1:Y:0:GCCTAGCC
+NNTT
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.index_2.fastq
new file mode 100644
index 0000000..2fc4afb
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GCCTAGCC.index_2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1165:2239 2:N:0:GCCTAGCC
+AAGT
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1290:2225 2:N:0:GCCTAGCC
+TTCA
++
+BEDD
+ at machine1:HiMom:abcdeACXX:1:1201:1280:2179 2:N:0:GCCTAGCC
+ACTG
++
+FDEF
+ at machine1:HiMom:abcdeACXX:1:1201:1300:2137 2:N:0:GCCTAGCC
+TTCC
++
+DDDD
+ at machine1:HiMom:abcdeACXX:1:2101:1023:2237 2:Y:0:GCCTAGCC
+TGTT
++
+=@?>
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.1.fastq
new file mode 100644
index 0000000..ad39be4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1188:2237 1:N:0:GTAACATC
+TCCCCCTCCCTTTTGCGCACACACC
++
+@?@DDADDHDHBDH<EFHIIHG?HF
+ at machine1:HiMom:abcdeACXX:1:2101:1208:2231 1:Y:0:GTAACATC
+TCACTAAACATCCAAACATCACTTT
++
+#########################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.2.fastq
new file mode 100644
index 0000000..52cac7b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1188:2237 2:N:0:GTAACATC
+CAAGACAGAAGTGAGAA
++
+FHHFFE at FDHHAIAFHG
+ at machine1:HiMom:abcdeACXX:1:2101:1208:2231 2:Y:0:GTAACATC
+TTTTTTTTTTTTTTTTT
++
+HHHHHJJJHFDDDDDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.barcode_1.fastq
new file mode 100644
index 0000000..e43e83f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1188:2237 :N:0:GTAACATC
+GTAACATC
++
+@@?DFFDF
+ at machine1:HiMom:abcdeACXX:1:2101:1208:2231 :Y:0:GTAACATC
+GTAACATC
++
+1+:A1A22
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.index_1.fastq
new file mode 100644
index 0000000..e50d5b2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1188:2237 1:N:0:GTAACATC
+GCTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1208:2231 1:Y:0:GTAACATC
+CTTT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.index_2.fastq
new file mode 100644
index 0000000..8cf353d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTAACATC.index_2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1188:2237 2:N:0:GTAACATC
+CCTT
++
+FDDE
+ at machine1:HiMom:abcdeACXX:1:2101:1208:2231 2:Y:0:GTAACATC
+TTTT
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.1.fastq
new file mode 100644
index 0000000..b48b617
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1069:2159 1:N:0:GTCCACAG
+TCCCTTACCATCAAATCAATTGNCC
++
+CCCFFFFFHHHHHJJJJJJJJJ#3A
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2109 1:N:0:GTCCACAG
+NCACCTCCTAGCCCCTCACTTCTGT
++
+#1=B;BDDHHHGFIIIIIIIIIGGG
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.2.fastq
new file mode 100644
index 0000000..c77599a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1069:2159 2:N:0:GTCCACAG
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2109 2:N:0:GTCCACAG
+TCTTCCCGATCTGTATA
++
+FBHHHJJIIDHJIJJJH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.barcode_1.fastq
new file mode 100644
index 0000000..8a71cab
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1069:2159 :N:0:GTCCACAG
+GTCCACAG
++
+ at BBFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2109 :N:0:GTCCACAG
+GTCCACAG
++
+CCCFFFFD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.index_1.fastq
new file mode 100644
index 0000000..67ca633
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1069:2159 1:N:0:GTCCACAG
+GACG
++
+<<<@
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2109 1:N:0:GTCCACAG
+ACGT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.index_2.fastq
new file mode 100644
index 0000000..832fb48
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/GTCCACAG.index_2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1069:2159 2:N:0:GTCCACAG
+TNNN
++
+?###
+ at machine1:HiMom:abcdeACXX:1:1201:1486:2109 2:N:0:GTCCACAG
+GTGC
++
+F?DD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.1.fastq
new file mode 100644
index 0000000..aef9f77
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.1.fastq
@@ -0,0 +1,64 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1031:2224 1:Y:0:
+NAATANNNNNNNNNNNNTNNNNNNN
++
+#0;@@####################
+ at machine1:HiMom:abcdeACXX:1:1101:1039:2147 1:Y:0:
+NCCAANGNNGGNNNNATGTAANNNN
++
+#4;@@#4##2<####43@@@@####
+ at machine1:HiMom:abcdeACXX:1:1101:1046:2175 1:Y:0:
+NTGCCNGNGTTNCGNGGTCTTNNNN
++
+#4;@@####################
+ at machine1:HiMom:abcdeACXX:1:1101:1047:2122 1:Y:0:
+NCTAANGNACTNTGNGTGTGCNNNN
++
+#0;@@#4#3@@#3@#2<@@@@####
+ at machine1:HiMom:abcdeACXX:1:1101:1048:2197 1:Y:0:
+NCTCCNGNTCANCANGTGGAGNNNN
++
+#0;?@####################
+ at machine1:HiMom:abcdeACXX:1:1101:1065:2193 1:N:0:
+GAAGTACGCCCTGCCCCTGGTTNGC
++
+?@@DAADAHHFHBEBEGGHG?####
+ at machine1:HiMom:abcdeACXX:1:1101:1162:2207 1:Y:0:
+ACCTTGAGGAGAACATAAGAGCAAA
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:1201:1159:2179 1:Y:0:
+GTTAGCACAGATATTGGATGAGTGA
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:1201:1414:2174 1:Y:0:
+GCCAAAAAAAAGAACCAGCCCAAGG
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:2101:1040:2208 1:Y:0:
+NATGCCCACCTCCCTCCTACGCACC
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:2101:1059:2083 1:N:0:
+NAAGAGGGGTCAAGAGTTAAACTTA
++
+#1=DDFFFHFHHGIGHGHJJJJJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1143:2137 1:N:0:
+ATGCAGCAGCTGCCACGGAGCACCA
++
+CC at FFDFDFHFHHGIDHEHIGJJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2182 1:Y:0:
+TTGTTTTGGCTTATAATGACAAGAA
++
+;;8-2).2())(<6=@8;?4??>>?
+ at machine1:HiMom:abcdeACXX:1:2101:1215:2110 1:N:0:
+NAATATAATTTGGAGACCCTTTGTT
++
+#1=DDDDDEDDDDIDDBB3ABAB##
+ at machine1:HiMom:abcdeACXX:1:2101:1285:2105 1:Y:0:
+NGCGGGGAGCCGGGCGTGGAATGCG
++
+#########################
+ at machine1:HiMom:abcdeACXX:1:2101:1450:2134 1:N:0:
+AGCACGCTGCCGCGGGACCTGCCCA
++
+?@@AD at DDHFH?DGIIIIG at FGFBF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.2.fastq
new file mode 100644
index 0000000..bf99cda
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.2.fastq
@@ -0,0 +1,64 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1031:2224 2:Y:0:
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1039:2147 2:Y:0:
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1046:2175 2:Y:0:
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1047:2122 2:Y:0:
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1048:2197 2:Y:0:
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1065:2193 2:N:0:
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1101:1162:2207 2:Y:0:
+GGGAAGTTAGAGGAATG
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1201:1159:2179 2:Y:0:
+TATTTTTCTAAATACTT
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1201:1414:2174 2:Y:0:
+TTTTTTTTTTTTTTTTT
++
+F????FFEB>B6=BBBB
+ at machine1:HiMom:abcdeACXX:1:2101:1040:2208 2:Y:0:
+TCACTGAAATGAATTCA
++
+.22@@############
+ at machine1:HiMom:abcdeACXX:1:2101:1059:2083 2:N:0:
+TTAGAAGGATGCTTCTC
++
+HHGHHJJJJJIJJIIJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1143:2137 2:N:0:
+GATCTAGGGGGAACAGC
++
+CAFFFHIIDG:EFHIII
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2182 2:Y:0:
+TTTTTTTTTTTTTTTTA
++
+5=?##############
+ at machine1:HiMom:abcdeACXX:1:2101:1215:2110 2:N:0:
+CCCATTAAGAACAGCAA
++
+#################
+ at machine1:HiMom:abcdeACXX:1:2101:1285:2105 2:Y:0:
+TCAACCAACACCTCTTC
++
+4:9:???##########
+ at machine1:HiMom:abcdeACXX:1:2101:1450:2134 2:N:0:
+TTGTGTCGAGGGCTGAC
++
+FDFHFGIIE1CGGHBGE
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.barcode_1.fastq
new file mode 100644
index 0000000..f9b35e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.barcode_1.fastq
@@ -0,0 +1,64 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1031:2224 :Y:0:
+NNNNNNNN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1039:2147 :Y:0:
+NNNNNNNN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1046:2175 :Y:0:
+NNNNNNNN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1047:2122 :Y:0:
+NNNANNNN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1048:2197 :Y:0:
+NNNCNNNN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1065:2193 :N:0:
+GAACGATN
++
+########
+ at machine1:HiMom:abcdeACXX:1:1101:1162:2207 :Y:0:
+ACAAAATT
++
+########
+ at machine1:HiMom:abcdeACXX:1:1201:1159:2179 :Y:0:
+AAAAAAAA
++
+########
+ at machine1:HiMom:abcdeACXX:1:1201:1414:2174 :Y:0:
+AGAAAAGA
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1040:2208 :Y:0:
+ACGAAATC
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1059:2083 :N:0:
+TACCGTCT
++
+1:?D####
+ at machine1:HiMom:abcdeACXX:1:2101:1143:2137 :N:0:
+TCCGTCTA
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2182 :Y:0:
+GAAAAAAA
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1215:2110 :N:0:
+AAAAGAAG
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1285:2105 :Y:0:
+TATCTCGG
++
+########
+ at machine1:HiMom:abcdeACXX:1:2101:1450:2134 :N:0:
+ACCAGTTG
++
+ at C@DDDB?
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.index_1.fastq
new file mode 100644
index 0000000..1600f1c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.index_1.fastq
@@ -0,0 +1,64 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1031:2224 1:Y:0:
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1039:2147 1:Y:0:
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1046:2175 1:Y:0:
+NNGG
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1047:2122 1:Y:0:
+NNTC
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1048:2197 1:Y:0:
+NNGT
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1065:2193 1:N:0:
+NCTT
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1162:2207 1:Y:0:
+TAAA
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1159:2179 1:Y:0:
+TTTT
++
+===A
+ at machine1:HiMom:abcdeACXX:1:1201:1414:2174 1:Y:0:
+TTTT
++
+@;@1
+ at machine1:HiMom:abcdeACXX:1:2101:1040:2208 1:Y:0:
+NCTG
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1059:2083 1:N:0:
+NGAA
++
+#1=B
+ at machine1:HiMom:abcdeACXX:1:2101:1143:2137 1:N:0:
+GCTC
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2182 1:Y:0:
+TTTT
++
+9<<?
+ at machine1:HiMom:abcdeACXX:1:2101:1215:2110 1:N:0:
+ATCT
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1285:2105 1:Y:0:
+TGTC
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1450:2134 1:N:0:
+ACAA
++
+CC at F
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.index_2.fastq
new file mode 100644
index 0000000..3313043
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/N.index_2.fastq
@@ -0,0 +1,64 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1031:2224 2:Y:0:
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1039:2147 2:Y:0:
+NNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1046:2175 2:Y:0:
+ANNN
++
+@###
+ at machine1:HiMom:abcdeACXX:1:1101:1047:2122 2:Y:0:
+ANNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1048:2197 2:Y:0:
+GNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1065:2193 2:N:0:
+GNNN
++
+####
+ at machine1:HiMom:abcdeACXX:1:1101:1162:2207 2:Y:0:
+ACTG
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1159:2179 2:Y:0:
+TTTT
++
+A70<
+ at machine1:HiMom:abcdeACXX:1:1201:1414:2174 2:Y:0:
+TTTT
++
+BDAD
+ at machine1:HiMom:abcdeACXX:1:2101:1040:2208 2:Y:0:
+ATAG
++
+>(2@
+ at machine1:HiMom:abcdeACXX:1:2101:1059:2083 2:N:0:
+TGTC
++
+DDDE
+ at machine1:HiMom:abcdeACXX:1:2101:1143:2137 2:N:0:
+TTCA
++
+D?=D
+ at machine1:HiMom:abcdeACXX:1:2101:1151:2182 2:Y:0:
+TTTT
++
+@?@;
+ at machine1:HiMom:abcdeACXX:1:2101:1215:2110 2:N:0:
+TTCC
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1285:2105 2:Y:0:
+TATA
++
+####
+ at machine1:HiMom:abcdeACXX:1:2101:1450:2134 2:N:0:
+ACCC
++
+DFDF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.1.fastq
new file mode 100644
index 0000000..11586af
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1064:2239 1:N:0:TAAGCACA
+CATGCAGCGCAAGTAGGTCTACAAG
++
+@@;DFAFFHHHHAHEGHFDGGFABG
+ at machine1:HiMom:abcdeACXX:1:2101:1258:2092 1:N:0:TAAGCACA
+NCACACACACACTCATTCACAGCTT
++
+#1=DDDFFHHHFHJJIJGGGIIGIJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.2.fastq
new file mode 100644
index 0000000..79f852e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1064:2239 2:N:0:TAAGCACA
+AGGGCGATGAGGACTAG
++
+CC:FHHGIH<EGDDDFH
+ at machine1:HiMom:abcdeACXX:1:2101:1258:2092 2:N:0:TAAGCACA
+AACACCAAAATAAAATA
++
+#################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.barcode_1.fastq
new file mode 100644
index 0000000..c09def6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1064:2239 :N:0:TAAGCACA
+TAAGCACA
++
+@@@FFADB
+ at machine1:HiMom:abcdeACXX:1:2101:1258:2092 :N:0:TAAGCACA
+TAAGCACA
++
+@@CDDFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.index_1.fastq
new file mode 100644
index 0000000..499c550
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1064:2239 1:N:0:TAAGCACA
+GGGA
++
+8?@:
+ at machine1:HiMom:abcdeACXX:1:2101:1258:2092 1:N:0:TAAGCACA
+TTAG
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.index_2.fastq
new file mode 100644
index 0000000..cf7827a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAAGCACA.index_2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1064:2239 2:N:0:TAAGCACA
+TGGG
++
+DDDA
+ at machine1:HiMom:abcdeACXX:1:2101:1258:2092 2:N:0:TAAGCACA
+ACAA
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TACCGTCT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TACCGTCT.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TACCGTCT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TACCGTCT.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TACCGTCT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TACCGTCT.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TACCGTCT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TACCGTCT.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TACCGTCT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TACCGTCT.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAGCGGTA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAGCGGTA.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAGCGGTA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAGCGGTA.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAGCGGTA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAGCGGTA.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAGCGGTA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAGCGGTA.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAGCGGTA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TAGCGGTA.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCAGCC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCAGCC.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCAGCC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCAGCC.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCAGCC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCAGCC.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCAGCC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCAGCC.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCAGCC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCAGCC.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.1.fastq
new file mode 100644
index 0000000..db9ec5c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1071:2233 1:N:0:TATCCAGG
+TTTGACAGTCTCTGAATGAGAANGG
++
+CCCFFFFFHHHHHJIIIJJJIJ#4A
+ at machine1:HiMom:abcdeACXX:1:1201:1140:2125 1:N:0:TATCCAGG
+NTTTCAGTTCAGAGAACTGCAGAAT
++
+#1=DBDFDHHHHGJIJJJJJIIIJI
+ at machine1:HiMom:abcdeACXX:1:1201:1236:2187 1:N:0:TATCCAGG
+TTTAAATGGGTAAGAAGCCCGGCTC
++
+ at BCDDFEFHHDHHJJJJJIJJIJJJ
+ at machine1:HiMom:abcdeACXX:1:2101:1133:2239 1:N:0:TATCCAGG
+AGACAGAAGTACGGGAAGGCGAAGA
++
+@@@FFFFEHFHHHJJCGDHIIECD@
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.2.fastq
new file mode 100644
index 0000000..badb490
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1071:2233 2:N:0:TATCCAGG
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1201:1140:2125 2:N:0:TATCCAGG
+TTGGTCTTAGATGTTGC
++
+HHHHFGIJIIIJIJIJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1236:2187 2:N:0:TATCCAGG
+CGGATTCCGACTTCCAT
++
+HHHHGIJJIGIGIJJGG
+ at machine1:HiMom:abcdeACXX:1:2101:1133:2239 2:N:0:TATCCAGG
+GTTTCCTAGCTTGTCTT
++
+HHHHF4ACFHIJHHHGH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.barcode_1.fastq
new file mode 100644
index 0000000..743cd5c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1071:2233 :N:0:TATCCAGG
+TATCCAGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1140:2125 :N:0:TATCCAGG
+TATCCAGG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1236:2187 :N:0:TATCCAGG
+TATCCAGG
++
+@@BFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1133:2239 :N:0:TATCCAGG
+TATCCATG
++
+@@@BDDDF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.index_1.fastq
new file mode 100644
index 0000000..6c9d1d0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1071:2233 1:N:0:TATCCAGG
+GTTT
++
+<<<@
+ at machine1:HiMom:abcdeACXX:1:1201:1140:2125 1:N:0:TATCCAGG
+TTCA
++
+CC at F
+ at machine1:HiMom:abcdeACXX:1:1201:1236:2187 1:N:0:TATCCAGG
+CTCC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1133:2239 1:N:0:TATCCAGG
+AGCT
++
+?@?D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.index_2.fastq
new file mode 100644
index 0000000..dd198fe
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCAGG.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1071:2233 2:N:0:TATCCAGG
+GNNN
++
+@###
+ at machine1:HiMom:abcdeACXX:1:1201:1140:2125 2:N:0:TATCCAGG
+TAAA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1236:2187 2:N:0:TATCCAGG
+TTAG
++
+FFFD
+ at machine1:HiMom:abcdeACXX:1:2101:1133:2239 2:N:0:TATCCAGG
+TTTT
++
+DFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCATG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCATG.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCATG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCATG.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCATG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCATG.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCATG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCATG.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCATG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCCATG.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTCGG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTCGG.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTCGG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTCGG.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTCGG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTCGG.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTCGG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTCGG.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTCGG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTCGG.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.1.fastq
new file mode 100644
index 0000000..2635d7d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1267:2209 1:N:0:TATCTGCC
+GAGACGGAGGCCAACGGGGGCCTGG
++
+@@CFFFFD8FDHFHIGIBG?@BCDG
+ at machine1:HiMom:abcdeACXX:1:1101:1353:2226 1:N:0:TATCTGCC
+TTGCTTGTCTGTAAAGTATTTTATT
++
+ at C@DDFFDHHFHFHHIBGG>IHHII
+ at machine1:HiMom:abcdeACXX:1:1101:1435:2194 1:N:0:TATCTGCC
+GAGAAAGAACATGACTACAGAGATG
++
+CCCFFFFFHHHHHJJJJJJJJJHJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1084:2204 1:N:0:TATCTGCC
+GGCCCGTGGACGCCGCCGAAGAAGC
++
+CCCFFFFFHHHHHJJJJJIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1142:2242 1:N:0:TATCTGCC
+TGTTGATAGTCCTTCTTATCTTAGT
++
+???DB?==CC2<AC:CC<CFEF<FF
+ at machine1:HiMom:abcdeACXX:1:1201:1187:2100 1:N:0:TATCTGCC
+NGCGGTAATTCCAGCTCCAATAGCG
++
+#1:BB2 at DHHFHHIIIIHHIIGHGG
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2109 1:N:0:TATCTGCC
+NCTGAAGAGGCCAAAGCGCCCTCCA
++
+#1=DDFFFHHHHHJJJJJJJJJJJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.2.fastq
new file mode 100644
index 0000000..1c33a1c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1267:2209 2:N:0:TATCTGCC
+CTCCAACAGCCCCGTAC
++
+CCFHAIIIGGIIGE at EG
+ at machine1:HiMom:abcdeACXX:1:1101:1353:2226 2:N:0:TATCTGCC
+CCGATCTTCAGGTTACC
++
+HHHHHJJJJJJJIJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1435:2194 2:N:0:TATCTGCC
+TCTTTTACTGAAGTGTA
++
+HHHHHJJJJIHIJHHHJ
+ at machine1:HiMom:abcdeACXX:1:1201:1084:2204 2:N:0:TATCTGCC
+CAGGCTCTCATCAGTTG
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1142:2242 2:N:0:TATCTGCC
+TAAAATAATAAAAAATG
++
+AF<DF<FFFFIIIFF@<
+ at machine1:HiMom:abcdeACXX:1:1201:1187:2100 2:N:0:TATCTGCC
+GCCCGCATTGCCGAGAC
++
+2<=;)<<):=@@#####
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2109 2:N:0:TATCTGCC
+GGGGGATTTGGGCTGTG
++
+HHHHHHJJJHIJIJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.barcode_1.fastq
new file mode 100644
index 0000000..f396a3e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.barcode_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1267:2209 :N:0:TATCTGCC
+TATCAGCC
++
+?@@D;ADD
+ at machine1:HiMom:abcdeACXX:1:1101:1353:2226 :N:0:TATCTGCC
+TATCTGCC
++
+ at B@FFEFF
+ at machine1:HiMom:abcdeACXX:1:1101:1435:2194 :N:0:TATCTGCC
+TATCTGCC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1084:2204 :N:0:TATCTGCC
+TATCTGCC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1142:2242 :N:0:TATCTGCC
+TATCTGCC
++
+??<D?D83
+ at machine1:HiMom:abcdeACXX:1:1201:1187:2100 :N:0:TATCTGCC
+TATCTGCC
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2109 :N:0:TATCTGCC
+TATCTGCC
++
+CCCDF?DD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.index_1.fastq
new file mode 100644
index 0000000..71ebc54
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.index_1.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1267:2209 1:N:0:TATCTGCC
+GGCA
++
+=;?D
+ at machine1:HiMom:abcdeACXX:1:1101:1353:2226 1:N:0:TATCTGCC
+GTGC
++
+BBBF
+ at machine1:HiMom:abcdeACXX:1:1101:1435:2194 1:N:0:TATCTGCC
+TTTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1084:2204 1:N:0:TATCTGCC
+TGGC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1142:2242 1:N:0:TATCTGCC
+GTAA
++
+?=?D
+ at machine1:HiMom:abcdeACXX:1:1201:1187:2100 1:N:0:TATCTGCC
+AAAA
++
+=<=;
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2109 1:N:0:TATCTGCC
+GTCA
++
+BBCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.index_2.fastq
new file mode 100644
index 0000000..5a31fee
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TATCTGCC.index_2.fastq
@@ -0,0 +1,28 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1267:2209 2:N:0:TATCTGCC
+GAGT
++
+DDD?
+ at machine1:HiMom:abcdeACXX:1:1101:1353:2226 2:N:0:TATCTGCC
+TCTT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1435:2194 2:N:0:TATCTGCC
+GTTT
++
+FDFF
+ at machine1:HiMom:abcdeACXX:1:1201:1084:2204 2:N:0:TATCTGCC
+TCCT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1142:2242 2:N:0:TATCTGCC
+AATG
++
+DDD;
+ at machine1:HiMom:abcdeACXX:1:1201:1187:2100 2:N:0:TATCTGCC
+AAGA
++
+AA##
+ at machine1:HiMom:abcdeACXX:1:1201:1392:2109 2:N:0:TATCTGCC
+GACA
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCCGTCTA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCCGTCTA.1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCCGTCTA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCCGTCTA.2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCCGTCTA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCCGTCTA.barcode_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCCGTCTA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCCGTCTA.index_1.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCCGTCTA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCCGTCTA.index_2.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.1.fastq
new file mode 100644
index 0000000..25c9e17
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1143:2192 1:N:0:TCGCTAGA
+GGAGCGAGTCTGGGTCTCAGCCCCG
++
+CCCFFFFFHHHHHJGHIIIHJJJJI
+ at machine1:HiMom:abcdeACXX:1:1101:1479:2221 1:N:0:TCGCTAGA
+TGTAAAGTATGCTGGCTCAGTGTAT
++
+BBBFDFFEHHHHHJJJJJJJIJHJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1312:2112 1:N:0:TCGCTAGA
+NTCCCAGCGAACCCGCGTGCAACCT
++
+#1=DFFFFHHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1416:2128 1:N:0:TCGCTAGA
+NACAGGCGTGGAGGAGGCGGCGGCC
++
+#4=DDDFFHHHHHJIGJHFHHFFED
+ at machine1:HiMom:abcdeACXX:1:2101:1064:2242 1:N:0:TCGCTAGA
+ATGAACAAAGGAAGAATTATGCACG
++
+?;?D;DDDF?;:+<<CFFCHE433A
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.2.fastq
new file mode 100644
index 0000000..146b62a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1143:2192 2:N:0:TCGCTAGA
+CTGGCTTATCACTCATC
++
+HHHHHJJJJJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1101:1479:2221 2:N:0:TCGCTAGA
+CTATTTTTATGTAAAAA
++
+HHHHHJIGIJJJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1312:2112 2:N:0:TCGCTAGA
+GAGCCGGCGCAGGTGCA
++
+HHHHHJJJIJJJJGHIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1416:2128 2:N:0:TCGCTAGA
+GAGGCGGTGGCGGGATC
++
+HHFHHII:?GGHIIB6?
+ at machine1:HiMom:abcdeACXX:1:2101:1064:2242 2:N:0:TCGCTAGA
+GGTTGTCAAGCGTTAAA
++
+#################
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.barcode_1.fastq
new file mode 100644
index 0000000..77b869b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.barcode_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1143:2192 :N:0:TCGCTAGA
+TCGCTAGA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1479:2221 :N:0:TCGCTAGA
+TCGCTAGA
++
+ at BCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1312:2112 :N:0:TCGCTAGA
+TCGCTAGA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1416:2128 :N:0:TCGCTAGA
+TCGCTAGA
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1064:2242 :N:0:TCGCTAGA
+TCGCTAGA
++
+;@<:AA at A
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.index_1.fastq
new file mode 100644
index 0000000..f3b696d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.index_1.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1143:2192 1:N:0:TCGCTAGA
+CGAC
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1101:1479:2221 1:N:0:TCGCTAGA
+GGGG
++
+ at CCF
+ at machine1:HiMom:abcdeACXX:1:1201:1312:2112 1:N:0:TCGCTAGA
+ATTT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1416:2128 1:N:0:TCGCTAGA
+TTGG
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1064:2242 1:N:0:TCGCTAGA
+NGGA
++
+####
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.index_2.fastq
new file mode 100644
index 0000000..aea4d94
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCGCTAGA.index_2.fastq
@@ -0,0 +1,20 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1143:2192 2:N:0:TCGCTAGA
+AAGT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1101:1479:2221 2:N:0:TCGCTAGA
+AAAT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1312:2112 2:N:0:TCGCTAGA
+GCAG
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1416:2128 2:N:0:TCGCTAGA
+TGTG
++
+DDDD
+ at machine1:HiMom:abcdeACXX:1:2101:1064:2242 2:N:0:TCGCTAGA
+AAAA
++
+(<?<
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.1.fastq
new file mode 100644
index 0000000..2b9e8b0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1042:2174 1:N:0:TCTGCAAG
+NGTTGGTGTCTTCATTTTATGTATA
++
+#1=DDFDFHHHHHJIJJJHIJHIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.2.fastq
new file mode 100644
index 0000000..5ddea06
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.2.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1042:2174 2:N:0:TCTGCAAG
+GGCNNCAAAAAAAGAAA
++
+?<@##3<@@?@@?????
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.barcode_1.fastq
new file mode 100644
index 0000000..5f73cc8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.barcode_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1042:2174 :N:0:TCTGCAAG
+TCTGCAAG
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.index_1.fastq
new file mode 100644
index 0000000..80392dc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.index_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1042:2174 1:N:0:TCTGCAAG
+NTCA
++
+#0;@
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.index_2.fastq
new file mode 100644
index 0000000..43602f0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TCTGCAAG.index_2.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1042:2174 2:N:0:TCTGCAAG
+GGAA
++
+@@?@
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.1.fastq
new file mode 100644
index 0000000..45daf27
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1242:2170 1:N:0:TGCAAGTA
+ATGGCAGGGCAGAGTTCTGATGAGT
++
+CCCFFFFFHHGGGIFHEIIGIIII?
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2222 1:N:0:TGCAAGTA
+GAGCAGGCAAGGAGGACTTCTTGTT
++
+CCCFFFFFGHHHHJJHHIJJJJJIJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.2.fastq
new file mode 100644
index 0000000..46b8689
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1242:2170 2:N:0:TGCAAGTA
+AAGAAGCACAAGTACAT
++
+HHHGHHGIIGJJEHHIG
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2222 2:N:0:TGCAAGTA
+ATGGTTCTTTTCCTCAC
++
+HHHHHJJJJJJJIJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.barcode_1.fastq
new file mode 100644
index 0000000..c310ad9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.barcode_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1242:2170 :N:0:TGCAAGTA
+TGCAAGTA
++
+@@CFFF?D
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2222 :N:0:TGCAAGTA
+TGCAAGTA
++
+CCCFFFEF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.index_1.fastq
new file mode 100644
index 0000000..27d0f82
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.index_1.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1242:2170 1:N:0:TGCAAGTA
+GGAA
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2222 1:N:0:TGCAAGTA
+GAGC
++
+@@@D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.index_2.fastq
new file mode 100644
index 0000000..14603ed
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCAAGTA.index_2.fastq
@@ -0,0 +1,8 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1242:2170 2:N:0:TGCAAGTA
+GGAA
++
+FDFF
+ at machine1:HiMom:abcdeACXX:1:2101:1163:2222 2:N:0:TGCAAGTA
+GATA
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.1.fastq
new file mode 100644
index 0000000..56c58e4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1084:2136 1:N:0:TGCTGCTG
+NTCTCACTGTGAATTTGTGGTGGGC
++
+#1=DDFFFHHHHHJJJJGIJIJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1285:2100 1:N:0:TGCTGCTG
+NAATGACATGTTTAAAGATGGACTC
++
+#1:BDDFFHHFHHGIJIJIIIIGII
+ at machine1:HiMom:abcdeACXX:1:2101:1162:2139 1:N:0:TGCTGCTG
+AGAGGTGAAATTCTTGGACCGGCGC
++
+@@@DDDDDHFHHHDB:EFHHCAG?D
+ at machine1:HiMom:abcdeACXX:1:2101:1195:2150 1:N:0:TGCTGCTG
+CCGAGAGAGTGAGAGCGCTCCTGGG
++
+CCCFFFFFHFHHHJJJJIJJJJIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.2.fastq
new file mode 100644
index 0000000..367a935
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1084:2136 2:N:0:TGCTGCTG
+NNNNNNNNNNNNNNNNN
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1201:1285:2100 2:N:0:TGCTGCTG
+TTGCTTTGTAGTTATAG
++
+HHHHHIIGIABCFFHBF
+ at machine1:HiMom:abcdeACXX:1:2101:1162:2139 2:N:0:TGCTGCTG
+TGGTCGGAACTACGACG
++
+HHHHHIJJJJJJJIJJI
+ at machine1:HiMom:abcdeACXX:1:2101:1195:2150 2:N:0:TGCTGCTG
+TTCACCACCCAGAGGAA
++
+HHHHHJJJJJJIJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.barcode_1.fastq
new file mode 100644
index 0000000..1d013d1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1084:2136 :N:0:TGCTGCTG
+TGCTGCTG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1285:2100 :N:0:TGCTGCTG
+TGCTGCTG
++
+@@@FFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1162:2139 :N:0:TGCTGCTG
+TGCTGCTG
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1195:2150 :N:0:TGCTGCTG
+TGCTGCTG
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.index_1.fastq
new file mode 100644
index 0000000..ad17481
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1084:2136 1:N:0:TGCTGCTG
+TTTC
++
+<<<@
+ at machine1:HiMom:abcdeACXX:1:1201:1285:2100 1:N:0:TGCTGCTG
+GATC
++
+@@@D
+ at machine1:HiMom:abcdeACXX:1:2101:1162:2139 1:N:0:TGCTGCTG
+ATCG
++
+BCCF
+ at machine1:HiMom:abcdeACXX:1:2101:1195:2150 1:N:0:TGCTGCTG
+AATT
++
+CCCF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.index_2.fastq
new file mode 100644
index 0000000..59102b0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGCTGCTG.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1084:2136 2:N:0:TGCTGCTG
+TNNN
++
+@###
+ at machine1:HiMom:abcdeACXX:1:1201:1285:2100 2:N:0:TGCTGCTG
+TTTT
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1162:2139 2:N:0:TGCTGCTG
+TTTA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:2101:1195:2150 2:N:0:TGCTGCTG
+GAAC
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.1.fastq
new file mode 100644
index 0000000..17d82b7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1421:2154 1:N:0:TGTAACTC
+TGTGTGTGTGGGTGTGTGTATATAT
++
+?@?DDFFFFFHH at GEFCCCHGIGJI
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.2.fastq
new file mode 100644
index 0000000..f271cf8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.2.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1421:2154 2:N:0:TGTAACTC
+TCCGATCTTGTGCTCTT
++
+HHHHHJJJJFHIHHIJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.barcode_1.fastq
new file mode 100644
index 0000000..2ec05e5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.barcode_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1421:2154 :N:0:TGTAACTC
+TGTAACTC
++
+@@@FFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.index_1.fastq
new file mode 100644
index 0000000..f2dda48
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.index_1.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1421:2154 1:N:0:TGTAACTC
+TGTG
++
+BC at D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.index_2.fastq
new file mode 100644
index 0000000..a3e8a3f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAACTC.index_2.fastq
@@ -0,0 +1,4 @@
+ at machine1:HiMom:abcdeACXX:1:1201:1421:2154 2:N:0:TGTAACTC
+CTCT
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.1.fastq
new file mode 100644
index 0000000..3eb756e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1419:2119 1:N:0:TGTAATCA
+NATGACTATGGTAACTGAAAGAAAA
++
+#1:A1BDADBFFDFIIIEEHECACF
+ at machine1:HiMom:abcdeACXX:1:1201:1208:2132 1:N:0:TGTAATCA
+NCCTCAATGAGCGGCACTATGGGGG
++
+#1=DDFFFHHHHGJJIJJGHIJGIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1344:2147 1:N:0:TGTAATCA
+TATCCTCCCTACTATGCCTAGAAGG
++
+=?@DADEFHBHDFG>EFGDHGFGHD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.2.fastq
new file mode 100644
index 0000000..c7da377
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1419:2119 2:N:0:TGTAATCA
+TTTTTGTTTTACTTTAA
++
+#################
+ at machine1:HiMom:abcdeACXX:1:1201:1208:2132 2:N:0:TGTAATCA
+AGGATGGTCGGGCTCCA
++
+GHFHHJIJJGJIBHJJG
+ at machine1:HiMom:abcdeACXX:1:1201:1344:2147 2:N:0:TGTAATCA
+TTTTAGCATTGGAGTAG
++
+FHHHFGGHHIIIGGAGH
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.barcode_1.fastq
new file mode 100644
index 0000000..13972ab
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.barcode_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1419:2119 :N:0:TGTAATCA
+TGTAATCA
++
+@@@DFDFD
+ at machine1:HiMom:abcdeACXX:1:1201:1208:2132 :N:0:TGTAATCA
+TGTAATCA
++
+CC at FFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1344:2147 :N:0:TGTAATCA
+TGTAATCA
++
+=?1AA:=D
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.index_1.fastq
new file mode 100644
index 0000000..61f0750
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.index_1.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1419:2119 1:N:0:TGTAATCA
+ACTT
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1208:2132 1:N:0:TGTAATCA
+CTGT
++
+@@CD
+ at machine1:HiMom:abcdeACXX:1:1201:1344:2147 1:N:0:TGTAATCA
+ACGA
++
+@<??
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.index_2.fastq
new file mode 100644
index 0000000..5001709
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TGTAATCA.index_2.fastq
@@ -0,0 +1,12 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1419:2119 2:N:0:TGTAATCA
+TCCT
++
+####
+ at machine1:HiMom:abcdeACXX:1:1201:1208:2132 2:N:0:TGTAATCA
+AGAA
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1344:2147 2:N:0:TGTAATCA
+TTAG
++
+DDDD
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.1.fastq
new file mode 100644
index 0000000..b8f172c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1219:2164 1:N:0:TTGTCTAT
+TCAAGCAGGAGCAGCTAAGTCCTAA
++
+CCCFFFFFHHHHHJJJJJJHIJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1103:2184 1:N:0:TTGTCTAT
+GTAAGAACTACCCTGGGTCCCCGTG
++
+@@BFFFFFHHHHHJJJJGIJJJJHI
+ at machine1:HiMom:abcdeACXX:1:1201:1107:2109 1:N:0:TTGTCTAT
+NGGGAACCTGGCGCTAAACCATTCG
++
+#1=DFFFFHHHHHJJJJJJJJJIJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1252:2141 1:N:0:TTGTCTAT
+NTTCCCCCCATGTAATTATTGTGAA
++
+#1=DDFFFHHHHHJJJJJJJJIJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.2.fastq
new file mode 100644
index 0000000..7eec7b8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1219:2164 2:N:0:TTGTCTAT
+CACTCCTTCCACTTTGG
++
+HHHHHJJIJJJJJJJIJ
+ at machine1:HiMom:abcdeACXX:1:1201:1103:2184 2:N:0:TTGTCTAT
+CAGAATTGTGGCCCCAT
++
+HHHHHJJJGHIJJJJJI
+ at machine1:HiMom:abcdeACXX:1:1201:1107:2109 2:N:0:TTGTCTAT
+TTGTGTCGAGGGCTGAC
++
+HHGHHJJJJIIJJJJJJ
+ at machine1:HiMom:abcdeACXX:1:1201:1252:2141 2:N:0:TTGTCTAT
+TGCCTATGTCCAACAAG
++
+GHHHHJIJJJJJJJJJJ
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.barcode_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.barcode_1.fastq
new file mode 100644
index 0000000..e53ec2d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.barcode_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1219:2164 :N:0:TTGTCTAT
+TTGTCTAT
++
+CCCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1103:2184 :N:0:TTGTCTAT
+TTGTCTAT
++
+ at CCFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1107:2109 :N:0:TTGTCTAT
+TTGTCTAT
++
+B at CFFFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1252:2141 :N:0:TTGTCTAT
+TTGTCTAT
++
+CCCFFFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.index_1.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.index_1.fastq
new file mode 100644
index 0000000..363b984
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.index_1.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1219:2164 1:N:0:TTGTCTAT
+ATCT
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1103:2184 1:N:0:TTGTCTAT
+AGAA
++
+B at BF
+ at machine1:HiMom:abcdeACXX:1:1201:1107:2109 1:N:0:TTGTCTAT
+ACAA
++
+CCCF
+ at machine1:HiMom:abcdeACXX:1:1201:1252:2141 1:N:0:TTGTCTAT
+AGTT
++
+BCBF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.index_2.fastq b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.index_2.fastq
new file mode 100644
index 0000000..ee76d11
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/TTGTCTAT.index_2.fastq
@@ -0,0 +1,16 @@
+ at machine1:HiMom:abcdeACXX:1:1101:1219:2164 2:N:0:TTGTCTAT
+TATC
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1103:2184 2:N:0:TTGTCTAT
+GTTT
++
+FDEF
+ at machine1:HiMom:abcdeACXX:1:1201:1107:2109 2:N:0:TTGTCTAT
+ACCC
++
+FFFF
+ at machine1:HiMom:abcdeACXX:1:1201:1252:2141 2:N:0:TTGTCTAT
+ATTT
++
+FFFF
diff --git a/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/mp_barcode.params b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/mp_barcode.params
new file mode 100644
index 0000000..3d2058b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/fastq_with_4M4M/mp_barcode.params
@@ -0,0 +1,62 @@
+BARCODE_1 SAMPLE_ALIAS LIBRARY_NAME OUTPUT_PREFIX
+AAAAGAAG SA_AAAAGAAG LN_AAAAGAAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/AAAAGAAG
+AACAATGG SA_AACAATGG LN_AACAATGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/AACAATGG
+AACGCATT SA_AACGCATT LN_AACGCATT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/AACGCATT
+ACAAAATT SA_ACAAAATT LN_ACAAAATT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/ACAAAATT
+ACAGGTAT SA_ACAGGTAT LN_ACAGGTAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/ACAGGTAT
+ACAGTTGA SA_ACAGTTGA LN_ACAGTTGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/ACAGTTGA
+ACCAGTTG SA_ACCAGTTG LN_ACCAGTTG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/ACCAGTTG
+ACGAAATC SA_ACGAAATC LN_ACGAAATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/ACGAAATC
+ACTAAGAC SA_ACTAAGAC LN_ACTAAGAC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/ACTAAGAC
+ACTGTACC SA_ACTGTACC LN_ACTGTACC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/ACTGTACC
+ACTGTATC SA_ACTGTATC LN_ACTGTATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/ACTGTATC
+AGAAAAGA SA_AGAAAAGA LN_AGAAAAGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/AGAAAAGA
+AGCATGGA SA_AGCATGGA LN_AGCATGGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/AGCATGGA
+AGGTAAGG SA_AGGTAAGG LN_AGGTAAGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/AGGTAAGG
+AGGTCGCA SA_AGGTCGCA LN_AGGTCGCA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/AGGTCGCA
+ATTATCAA SA_ATTATCAA LN_ATTATCAA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/ATTATCAA
+ATTCCTCT SA_ATTCCTCT LN_ATTCCTCT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/ATTCCTCT
+CAACTCTC SA_CAACTCTC LN_CAACTCTC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CAACTCTC
+CAATAGAC SA_CAATAGAC LN_CAATAGAC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CAATAGAC
+CAATAGTC SA_CAATAGTC LN_CAATAGTC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CAATAGTC
+CAGCGGAT SA_CAGCGGAT LN_CAGCGGAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CAGCGGAT
+CAGCGGTA SA_CAGCGGTA LN_CAGCGGTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CAGCGGTA
+CCAACATT SA_CCAACATT LN_CCAACATT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CCAACATT
+CCAGCACC SA_CCAGCACC LN_CCAGCACC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CCAGCACC
+CCATGCGT SA_CCATGCGT LN_CCATGCGT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CCATGCGT
+CGCCTTCC SA_CGCCTTCC LN_CGCCTTCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CGCCTTCC
+CGCTATGT SA_CGCTATGT LN_CGCTATGT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CGCTATGT
+CTAACTCG SA_CTAACTCG LN_CTAACTCG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CTAACTCG
+CTATGCGC SA_CTATGCGC LN_CTATGCGC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CTATGCGC
+CTATGCGT SA_CTATGCGT LN_CTATGCGT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CTATGCGT
+CTGCGGAT SA_CTGCGGAT LN_CTGCGGAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CTGCGGAT
+CTGTAATC SA_CTGTAATC LN_CTGTAATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/CTGTAATC
+GAAAAAAA SA_GAAAAAAA LN_GAAAAAAA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GAAAAAAA
+GAACGAT. SA_GAACGAT. LN_GAACGAT. /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GAACGAT.
+GAAGGAAG SA_GAAGGAAG LN_GAAGGAAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GAAGGAAG
+GACCAGGA SA_GACCAGGA LN_GACCAGGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GACCAGGA
+GACCAGGC SA_GACCAGGC LN_GACCAGGC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GACCAGGC
+GACCGTTG SA_GACCGTTG LN_GACCGTTG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GACCGTTG
+GACCTAAC SA_GACCTAAC LN_GACCTAAC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GACCTAAC
+GATATCCA SA_GATATCCA LN_GATATCCA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GATATCCA
+GCCGTCGA SA_GCCGTCGA LN_GCCGTCGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GCCGTCGA
+GCCTAGCC SA_GCCTAGCC LN_GCCTAGCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GCCTAGCC
+GTAACATC SA_GTAACATC LN_GTAACATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GTAACATC
+GTCCACAG SA_GTCCACAG LN_GTCCACAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/GTCCACAG
+TAAGCACA SA_TAAGCACA LN_TAAGCACA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TAAGCACA
+TACCGTCT SA_TACCGTCT LN_TACCGTCT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TACCGTCT
+TAGCGGTA SA_TAGCGGTA LN_TAGCGGTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TAGCGGTA
+TATCAGCC SA_TATCAGCC LN_TATCAGCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TATCAGCC
+TATCCAGG SA_TATCCAGG LN_TATCCAGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TATCCAGG
+TATCCATG SA_TATCCATG LN_TATCCATG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TATCCATG
+TATCTCGG SA_TATCTCGG LN_TATCTCGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TATCTCGG
+TATCTGCC SA_TATCTGCC LN_TATCTGCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TATCTGCC
+TCCGTCTA SA_TCCGTCTA LN_TCCGTCTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TCCGTCTA
+TCGCTAGA SA_TCGCTAGA LN_TCGCTAGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TCGCTAGA
+TCTGCAAG SA_TCTGCAAG LN_TCTGCAAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TCTGCAAG
+TGCAAGTA SA_TGCAAGTA LN_TGCAAGTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TGCAAGTA
+TGCTGCTG SA_TGCTGCTG LN_TGCTGCTG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TGCTGCTG
+TGTAACTC SA_TGTAACTC LN_TGTAACTC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TGTAACTC
+TGTAATCA SA_TGTAATCA LN_TGTAATCA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TGTAATCA
+TTGTCTAT SA_TTGTCTAT LN_TTGTCTAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/TTGTCTAT
+N SA_N LN_N /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcodeWithTwoIndexes.5894075407482771321.dir/N
diff --git a/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithMolecularIndex4M4M.sam b/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithMolecularIndex4M4M.sam
new file mode 100644
index 0000000..3b70d87
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithMolecularIndex4M4M.sam
@@ -0,0 +1,182 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:HiDad LB:Hello, World PL:illumina PU:HiMom.1 CN:BI
+HiMom:1:1101:1031:2224 516 * 0 0 * * 0 0 ......................... ######################### RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1039:2147 516 * 0 0 * * 0 0 ......................... ######################### RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1046:2175 516 * 0 0 * * 0 0 ..GGA.................... ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1047:2122 516 * 0 0 * * 0 0 ..TCA.................... ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNANNNN
+HiMom:1:1101:1048:2197 516 * 0 0 * * 0 0 ..GTG.................... ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNCNNNN
+HiMom:1:1101:1065:2193 4 * 0 0 * * 0 0 .CTTG.................... ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:GAACGATN
+HiMom:1:1101:1069:2159 4 * 0 0 * * 0 0 GACGT.................... <<<@?#################### RG:Z:HiMom.1 QX:Z:@BBFFFFF RX:Z:GTCCACAG
+HiMom:1:1101:1071:2233 4 * 0 0 * * 0 0 GTTTG.................... <<<@@#################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCCAGG
+HiMom:1:1101:1083:2193 4 * 0 0 * * 0 0 AGGCT.................... ######################### RG:Z:HiMom.1 QX:Z:?@;DD?BD RX:Z:CCAACATT
+HiMom:1:1101:1084:2136 4 * 0 0 * * 0 0 TTTCT.................... <<<@@#################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:1101:1089:2172 4 * 0 0 * * 0 0 TCCGG.................... :<<??#################### RG:Z:HiMom.1 QX:Z:?@@FF;=B RX:Z:GACCAGGA
+HiMom:1:1101:1100:2207 4 * 0 0 * * 0 0 AGGCT............G....... ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATTATCAA
+HiMom:1:1101:1111:2148 4 * 0 0 * * 0 0 GCGAA.A..........GGACGAC. ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCCGTCGA
+HiMom:1:1101:1138:2141 4 * 0 0 * * 0 0 TCCGATCTGCTTCAGGTCGATCAGA CCCFFFFFHGHHHJJIGHIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:1101:1140:2120 4 * 0 0 * * 0 0 TTTTTTTTTTTTTAACTTTGCAAAT @@@DDDDDHHHHFB at 9FHI@BFH@@ RG:Z:HiMom.1 QX:Z:@@@DDFDF RX:Z:CAACTCTC
+HiMom:1:1101:1143:2192 4 * 0 0 * * 0 0 CGACAAGTCTGGCTTATCACTCATC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1101:1150:2228 4 * 0 0 * * 0 0 ATGGGAGGCGATTCCTAGGGGGTTG 8?=DD8;@BH6DHD<FGGGEIGHIG RG:Z:HiMom.1 QX:Z:@@@DDFFF RX:Z:AGGTCGCA
+HiMom:1:1101:1157:2135 4 * 0 0 * * 0 0 TTTAAAGTCTTAATCAAAGATGATA CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:ATTATCAA
+HiMom:1:1101:1162:2207 516 * 0 0 * * 0 0 TAAAACTGGGGAAGTTAGAGGAATG ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:ACAAAATT
+HiMom:1:1101:1165:2239 4 * 0 0 * * 0 0 ATGGAAGTCGAGACAGAAGTGAGAA ######################### RG:Z:HiMom.1 QX:Z:B@@DFFFF RX:Z:GCCTAGCC
+HiMom:1:1101:1175:2197 4 * 0 0 * * 0 0 AAGAGCTGGGGAACATCCAGAAAGG BC at FFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:1101:1188:2237 4 * 0 0 * * 0 0 GCTTCCTTCAAGACAGAAGTGAGAA CCCFFDDEFHHFFE at FDHHAIAFHG RG:Z:HiMom.1 QX:Z:@@?DFFDF RX:Z:GTAACATC
+HiMom:1:1101:1197:2200 4 * 0 0 * * 0 0 ATATTCCACTGGAACCACAGAACCC @@@FFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCFDFFF RX:Z:AACGCATT
+HiMom:1:1101:1206:2126 4 * 0 0 * * 0 0 ATCTGTCCAGTGGTGCACTGAATGT CCCFFFFFHHHHHHIIJJJJIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:1101:1212:2230 4 * 0 0 * * 0 0 TTTTAGCTTTATTGGGGAGGGGGTG CCCFFFFFHHGHHJJJJGJJJJJDF RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:1101:1218:2200 4 * 0 0 * * 0 0 GCTCTTCCGATCTATCTGCTCGTCC (-(=34???3;@############# RG:Z:HiMom.1 QX:Z:@CCFFDDF RX:Z:GACCGTTG
+HiMom:1:1101:1219:2164 4 * 0 0 * * 0 0 ATCTTATCCACTCCTTCCACTTTGG CCCFFFFFHHHHHJJIJJJJJJJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1101:1221:2143 4 * 0 0 * * 0 0 CAATTGAATGTCTGCACAGCCGCTT @@@FFFFDHHHHHJJJIIIJGHIJJ RG:Z:HiMom.1 QX:Z:@@CDDDDF RX:Z:GCCGTCGA
+HiMom:1:1101:1236:2121 4 * 0 0 * * 0 0 TTGCGCTTACTTTGTAGCCTTCATC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFDDF RX:Z:ACAGGTAT
+HiMom:1:1101:1242:2170 4 * 0 0 * * 0 0 GGAAGGAAAAGAAGCACAAGTACAT @@@DFDFFHHHGHHGIIGJJEHHIG RG:Z:HiMom.1 QX:Z:@@CFFF?D RX:Z:TGCAAGTA
+HiMom:1:1101:1257:2223 4 * 0 0 * * 0 0 TGCTCTTCCGATCTTTTAGCAAAGC :?@DDBDDHFFHDGIGIIJJJGGGI RG:Z:HiMom.1 QX:Z:;@@DD=DD RX:Z:GACCGTTG
+HiMom:1:1101:1259:2152 4 * 0 0 * * 0 0 ATTTTTATATTTTTTTAGACATAGG CCCFFFFFGHHHHJJJJIGIIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTAAGAC
+HiMom:1:1101:1261:2127 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHGHHHJJIFDDDDDDDD RG:Z:HiMom.1 QX:Z:>7+ at A7A7 RX:Z:ACTAAGAC
+HiMom:1:1101:1263:2236 516 * 0 0 * * 0 0 AGTTCTTCAGTAATTTTAGTACTGC ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:AGGTAAGG
+HiMom:1:1101:1267:2209 4 * 0 0 * * 0 0 GGCAGAGTCTCCAACAGCCCCGTAC =;?DDDD?CCFHAIIIGGIIGE at EG RG:Z:HiMom.1 QX:Z:?@@D;ADD RX:Z:TATCAGCC
+HiMom:1:1101:1269:2170 4 * 0 0 * * 0 0 TTCCAAGCCTGTGCTTTAAGGAAAA @@<ADBDBDF8DDCFH at GIE@@GGH RG:Z:HiMom.1 QX:Z:@@@DDDF? RX:Z:ATTATCAA
+HiMom:1:1101:1290:2225 4 * 0 0 * * 0 0 TCAGTTCACTGGCAAAGACAGTCAC C@@FBEDDFHFHGIIICEHGDHBHE RG:Z:HiMom.1 QX:Z:?<@DFBBD RX:Z:GCCTAGCC
+HiMom:1:1101:1291:2150 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHFHHIJJJIIIGIJIJ RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:CGCTATGT
+HiMom:1:1101:1302:2244 4 * 0 0 * * 0 0 TGAATACATATAACAAATGCAAAAA CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCTAAC
+HiMom:1:1101:1308:2153 516 * 0 0 * * 0 0 TCTGTAAGGTAATCCCCGCATGTGT 1?1=4===AFFDFFGFDGFB at CFB: RG:Z:HiMom.1 QX:Z::?@B?@DD RX:Z:AACGCATT
+HiMom:1:1101:1309:2210 4 * 0 0 * * 0 0 AGTGGGCTAGGGCATTTTTAATCTT @@?DFFDFHHHDFHJIJJIJGIIIJ RG:Z:HiMom.1 QX:Z:?@@ADEEF RX:Z:ATTCCTCT
+HiMom:1:1101:1314:2233 4 * 0 0 * * 0 0 AGGAAAGTTGGGCTGACCTGACAGA @@<DDD;=FBFADBCGDEH?F;FCG RG:Z:HiMom.1 QX:Z:@<@?B@;A RX:Z:CGCTATGT
+HiMom:1:1101:1316:2126 4 * 0 0 * * 0 0 TCTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHHHHHJJJJHFDDDDDD RG:Z:HiMom.1 QX:Z:1>>7A### RX:Z:CAATAGAC
+HiMom:1:1101:1327:2200 516 * 0 0 * * 0 0 GTCATCTGGGCTGTCGACAGGTGTC @B at FFFFFHHHHGIJJJJJJIFHHI RG:Z:HiMom.1 QX:Z:BCCFDFFD RX:Z:GCCGTCGA
+HiMom:1:1101:1328:2225 4 * 0 0 * * 0 0 AGGAAATTAGGACTTACCTGACATA ######################### RG:Z:HiMom.1 QX:Z:??;=A:B= RX:Z:CAACTCTC
+HiMom:1:1101:1338:2175 4 * 0 0 * * 0 0 GCTTGTTGGCTTTAACATCCACAAT CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GAAGGAAG
+HiMom:1:1101:1347:2149 4 * 0 0 * * 0 0 GCTCTTCCGATCTGTGCTCTTCCGA CCCFFFFFDFHHFIJDGIGGHGIGH RG:Z:HiMom.1 QX:Z:CC at DFFFD RX:Z:GACCAGGA
+HiMom:1:1101:1353:2226 4 * 0 0 * * 0 0 GTGCTCTTCCGATCTTCAGGTTACC BBBFFFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:@B at FFEFF RX:Z:TATCTGCC
+HiMom:1:1101:1363:2138 4 * 0 0 * * 0 0 GTTCTTAAACCTGTTAGAACTTCTG C@@FFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTAACTCG
+HiMom:1:1101:1399:2128 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHIJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CAATAGTC
+HiMom:1:1101:1403:2194 4 * 0 0 * * 0 0 ACATGGTGAAACCCTGTCTCTACTA CCCFFFDDHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGTAATC
+HiMom:1:1101:1406:2222 4 * 0 0 * * 0 0 GGCTGGACTCCCCTGGTTCTGGGCA ;?@DDDBD?FHDFGIIIGIGHHIII RG:Z:HiMom.1 QX:Z:C@@DBFEF RX:Z:AGCATGGA
+HiMom:1:1101:1419:2119 4 * 0 0 * * 0 0 ACTTTCCTTTTTTGTTTTACTTTAA ######################### RG:Z:HiMom.1 QX:Z:@@@DFDFD RX:Z:TGTAATCA
+HiMom:1:1101:1420:2213 4 * 0 0 * * 0 0 TTCACTGTACCGGCCGTGCGTACTT @CCFFFFDHHHFGIJJJJJJGHIGG RG:Z:HiMom.1 QX:Z:@C at FFFDF RX:Z:CAGCGGTA
+HiMom:1:1101:1435:2194 4 * 0 0 * * 0 0 TTTTGTTTTCTTTTACTGAAGTGTA CCCFFDFFHHHHHJJJJIHIJHHHJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1101:1441:2148 4 * 0 0 * * 0 0 TTTTGGCTCTAGAGGGGGTAGAGGG CCCFFFFFHHDFBHIIJJ1?FGHIJ RG:Z:HiMom.1 QX:Z:@@BFFDDD RX:Z:CGCTATGT
+HiMom:1:1101:1452:2132 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:AACGCATT
+HiMom:1:1101:1460:2176 4 * 0 0 * * 0 0 AGGAAAAAGACACAACAAGTCCAAC ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GATATCCA
+HiMom:1:1101:1479:2221 4 * 0 0 * * 0 0 GGGGAAATCTATTTTTATGTAAAAA @CCFFFFFHHHHHJIGIJJJJJJJJ RG:Z:HiMom.1 QX:Z:@BCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1101:1491:2120 4 * 0 0 * * 0 0 GGCCAGGCTGAACTTCTGAGCTGCT CCCFFFFFHHHGHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:BCCDFFFF RX:Z:AGGTCGCA
+HiMom:1:1201:1018:2133 4 * 0 0 * * 0 0 ......................... ######################### RG:Z:HiMom.1 XN:i:1 QX:Z:8??=BBBA RX:Z:ATTCCTCT
+HiMom:1:1201:1018:2217 516 * 0 0 * * 0 0 ......................... ######################### RG:Z:HiMom.1 XN:i:1 QX:Z:;<;:BBDD RX:Z:ATTATCAA
+HiMom:1:1201:1028:2202 4 * 0 0 * * 0 0 ..AAAC.C.T.......GG..TG.. ##42@?################### RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:GAAGGAAG
+HiMom:1:1201:1042:2174 4 * 0 0 * * 0 0 .TCAGGAAGGC..CAAAAAAAGAAA #0;@@@?@?<@##3<@@?@@????? RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCTGCAAG
+HiMom:1:1201:1043:2246 4 * 0 0 * * 0 0 .GCATCATTTC..GCTTCTCTCTGT #0;@@??@=@>##22=;@??><@?? RG:Z:HiMom.1 QX:Z:@<?DD:B= RX:Z:CGCTATGT
+HiMom:1:1201:1045:2105 516 * 0 0 * * 0 0 .TTTTTTTTTT..TTTTTTTTTTTT #0;@@@@@@@?##0:????????=< RG:Z:HiMom.1 QX:Z:1112 at A## RX:Z:CTGTAATC
+HiMom:1:1201:1054:2151 4 * 0 0 * * 0 0 GTCAGGCACTGAGAATATATGGGTG CBCFFFFFHHHHHJJJJJJJJJJEG RG:Z:HiMom.1 QX:Z:CCCFFFDF RX:Z:CAATAGTC
+HiMom:1:1201:1064:2239 4 * 0 0 * * 0 0 GGGATGGGAGGGCGATGAGGACTAG 8?@:DDDACC:FHHGIH<EGDDDFH RG:Z:HiMom.1 QX:Z:@@@FFADB RX:Z:TAAGCACA
+HiMom:1:1201:1073:2225 4 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGGAGGGT @BBDFFFFHHHHHJJJJJJJJJJJ: RG:Z:HiMom.1 QX:Z:B@@BDEFF RX:Z:ATTCCTCT
+HiMom:1:1201:1083:2121 4 * 0 0 * * 0 0 ACACACAACACCACCGCCCTCCCCC ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:CTATGCGT
+HiMom:1:1201:1084:2204 4 * 0 0 * * 0 0 TGGCTCCTCAGGCTCTCATCAGTTG CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1201:1095:2146 4 * 0 0 * * 0 0 ACTGACAACACCAAATGCTGCTAAG CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCAGGA
+HiMom:1:1201:1103:2184 4 * 0 0 * * 0 0 AGAAGTTTCAGAATTGTGGCCCCAT B at BFFDEFHHHHHJJJGHIJJJJJI RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1107:2109 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHGHHJJJJIIJJJJJJ RG:Z:HiMom.1 QX:Z:B at CFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1118:2198 4 * 0 0 * * 0 0 AATAAACTTTATTAAAGCAGTTAAA C at CFFFFFHDHHHGIIIJJJIJJJJ RG:Z:HiMom.1 QX:Z:@@@DDBDD RX:Z:ATTATCAA
+HiMom:1:1201:1122:2227 4 * 0 0 * * 0 0 GTCATATAAGGCCCAGTCCAAGGAA @@@FFFFFHHHGGIJIGGIJFIJII RG:Z:HiMom.1 QX:Z:@@@DDFFF RX:Z:CGCCTTCC
+HiMom:1:1201:1123:2161 516 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGCATACA ===AAAA8AAAA<AAA)@CBA9>A# RG:Z:HiMom.1 QX:Z:?;@DFDFF RX:Z:GACCAGGA
+HiMom:1:1201:1127:2112 516 * 0 0 * * 0 0 TAATCACCTGAGCAGTGAAGCCAGC @<@?BDDDHD?FDBHI?AHGGGDFH RG:Z:HiMom.1 QX:Z:=??BA?BD RX:Z:CAACTCTC
+HiMom:1:1201:1134:2144 4 * 0 0 * * 0 0 AGTGTGAGTAATGGTTGAGAGGTGG B@?DDDFFFHHGHJHHGFIHHIFGI RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:CGCTATGT
+HiMom:1:1201:1138:2227 516 * 0 0 * * 0 0 GACAAATATAGGAAATAGAAGCTAT =1=A=AAA,2?4>7C<<4<A+3<AB RG:Z:HiMom.1 QX:Z:######## RX:Z:CCAACATT
+HiMom:1:1201:1140:2125 4 * 0 0 * * 0 0 TTCATAAATTGGTCTTAGATGTTGC CC at FFFFFHHHHFGIJIIIJIJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCCAGG
+HiMom:1:1201:1142:2242 4 * 0 0 * * 0 0 GTAAAATGTAAAATAATAAAAAATG ?=?DDDD;AF<DF<FFFFIIIFF@< RG:Z:HiMom.1 QX:Z:??<D?D83 RX:Z:TATCTGCC
+HiMom:1:1201:1150:2161 4 * 0 0 * * 0 0 TTCTCACTACTGTGATTGTGCCACT @C at FFFFFGHHHHGIIIICEHCFGH RG:Z:HiMom.1 QX:Z:@@@FDDDD RX:Z:AACGCATT
+HiMom:1:1201:1159:2179 516 * 0 0 * * 0 0 TTTTTTTTTATTTTTCTAAATACTT ===AA#################### RG:Z:HiMom.1 QX:Z:####+<0? RX:Z:AAAAAAAA
+HiMom:1:1201:1160:2109 4 * 0 0 * * 0 0 ACATCCTTCCCATGCCACCAACTCG CCCFFFFFGHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:C at BFFFFF RX:Z:CGCCTTCC
+HiMom:1:1201:1180:2119 4 * 0 0 * * 0 0 GCTCTAAATTTTGCTTTTCTACAGC CCCFFFFFHHHHHJJJJIJIJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:GACCGTTG
+HiMom:1:1201:1185:2143 4 * 0 0 * * 0 0 GCTGAAGGCCCGTGGGCCAGAGGTG @CCFFFFFHHHHHJJJJJJJJJJHI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTATGCGT
+HiMom:1:1201:1187:2100 4 * 0 0 * * 0 0 AAAAAAGAGCCCGCATTGCCGAGAC =<=;AA################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1201:1190:2194 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AGGTCGCA
+HiMom:1:1201:1204:2228 4 * 0 0 * * 0 0 TCTTCTTGTCGATGAGGAACTTGGT @?@FFFFFDHHGHJIJJGHIIJJJH RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:1201:1208:2132 4 * 0 0 * * 0 0 CTGTAGAAAGGATGGTCGGGCTCCA @@CDFFFFGHFHHJIJJGJIBHJJG RG:Z:HiMom.1 QX:Z:CC at FFFFF RX:Z:TGTAATCA
+HiMom:1:1201:1219:2115 4 * 0 0 * * 0 0 TGGGAGTAGTTCCCTGCTAAGGGAG ???DBDBDADDDDIEID:AFFD:?8 RG:Z:HiMom.1 QX:Z:??<DDA?D RX:Z:CCATGCGT
+HiMom:1:1201:1236:2187 4 * 0 0 * * 0 0 CTCCTTAGCGGATTCCGACTTCCAT CCCFFFFDHHHHGIJJIGIGIJJGG RG:Z:HiMom.1 QX:Z:@@BFFFFF RX:Z:TATCCAGG
+HiMom:1:1201:1242:2207 4 * 0 0 * * 0 0 ATCTTTTATTGGCCTCCTGCTCCCC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:?BBDDDFF RX:Z:ATTCCTCT
+HiMom:1:1201:1252:2141 4 * 0 0 * * 0 0 AGTTATTTTGCCTATGTCCAACAAG BCBFFFFFGHHHHJIJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1260:2165 4 * 0 0 * * 0 0 ATCTGATCTAAGTTGGGGGACGCCG @@@FFDFFHHHHHJJJIJIIIGIJJ RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CCAACATT
+HiMom:1:1201:1280:2179 4 * 0 0 * * 0 0 GAGGACTGCTTGAGTCCAGGAGTTC @@BFFDEFGHHHHIFGCHIJJJGGI RG:Z:HiMom.1 QX:Z:BCCFFFFF RX:Z:GCCTAGCC
+HiMom:1:1201:1281:2133 4 * 0 0 * * 0 0 GCAACAAAATTTCATATGACTTAGC CCCFFFFFHHHHHJJIIIHICHIIJ RG:Z:HiMom.1 QX:Z:C at CFFFDF RX:Z:CCAACATT
+HiMom:1:1201:1285:2100 4 * 0 0 * * 0 0 GATCTTTTTTGCTTTGTAGTTATAG @@@DFFFFHHHHHIIGIABCFFHBF RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:TGCTGCTG
+HiMom:1:1201:1291:2158 4 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGATGGGC @CCFFFDD?FHHFGEHHIIDHIIII RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:AGCATGGA
+HiMom:1:1201:1300:2137 4 * 0 0 * * 0 0 GCTCTTCCGATCTTTTTTTTAATTT @@?DDDDDFDHADEHGIGGED3?FD RG:Z:HiMom.1 QX:Z:8?84B23? RX:Z:GCCTAGCC
+HiMom:1:1201:1312:2112 4 * 0 0 * * 0 0 ATTTGCAGGAGCCGGCGCAGGTGCA CCCFFFFFHHHHHJJJIJJJJGHIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1201:1331:2162 4 * 0 0 * * 0 0 TAATCCCAGTACTTTGGGAGGCCAA CCCFFFFFHHHHHJJJJIJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:1201:1341:2116 4 * 0 0 * * 0 0 ATAACAGCGAGACTGGCAACTTAAA ######################### RG:Z:HiMom.1 QX:Z:CCCFFBDD RX:Z:ACAGGTAT
+HiMom:1:1201:1344:2147 4 * 0 0 * * 0 0 ACGATTAGTTTTAGCATTGGAGTAG @<??DDDDFHHHFGGHHIIIGGAGH RG:Z:HiMom.1 QX:Z:=?1AA:=D RX:Z:TGTAATCA
+HiMom:1:1201:1345:2181 4 * 0 0 * * 0 0 ATACGGATGTGTTTAGGAGTGGGAC CCCFFFFFHHHHHIIJJHJFHIJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CAATAGTC
+HiMom:1:1201:1364:2113 4 * 0 0 * * 0 0 TAAAGAGAGCCAGTGGAGTTACGAC ######################### RG:Z:HiMom.1 QX:Z:C at CFFF@D RX:Z:CAGCGGTA
+HiMom:1:1201:1392:2109 4 * 0 0 * * 0 0 GTCAGACAGGGGGATTTGGGCTGTG BBCFFFFFHHHHHHJJJHIJIJJJJ RG:Z:HiMom.1 QX:Z:CCCDF?DD RX:Z:TATCTGCC
+HiMom:1:1201:1392:2184 4 * 0 0 * * 0 0 ATCTTTATTCATTTGTATGATCTTA @@BFFFFFHFFHFHIHIIJIJJJJI RG:Z:HiMom.1 QX:Z:@CCFFDDE RX:Z:CAATAGTC
+HiMom:1:1201:1393:2143 4 * 0 0 * * 0 0 GATAAATGCACGCATCCCCCCCGCG C at CFFFFFGGHHHHJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:@@CFDDFD RX:Z:CTAACTCG
+HiMom:1:1201:1414:2174 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT @;@1BDADF????FFEB>B6=BBBB RG:Z:HiMom.1 QX:Z:######## RX:Z:AGAAAAGA
+HiMom:1:1201:1416:2128 4 * 0 0 * * 0 0 TTGGTGTGGAGGCGGTGGCGGGATC @@@DDDDDHHFHHII:?GGHIIB6? RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1201:1421:2154 4 * 0 0 * * 0 0 TGTGCTCTTCCGATCTTGTGCTCTT BC at DFFFFHHHHHJJJJFHIHHIJJ RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:TGTAACTC
+HiMom:1:1201:1439:2156 4 * 0 0 * * 0 0 GGAGATTATTTGCCTTGAAGTAAGC -;(22<>>@>8@>8;@######### RG:Z:HiMom.1 QX:Z:1;;=#### RX:Z:GACCAGGC
+HiMom:1:1201:1452:2143 4 * 0 0 * * 0 0 TTTTAGTCTTAGCATTTACTTTCCC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:BC at DDFFF RX:Z:CAACTCTC
+HiMom:1:1201:1458:2109 4 * 0 0 * * 0 0 GATACGAACACACAAGAACTTTTTT CCCFFFFFHHHHHJJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTGTATC
+HiMom:1:1201:1472:2121 516 * 0 0 * * 0 0 GTGTGCTCTTCCGATCTGGAGGATG =+=??A4A==A at 7A<?######### RG:Z:HiMom.1 QX:Z:;?=D#### RX:Z:CTATGCGC
+HiMom:1:1201:1483:2126 516 * 0 0 * * 0 0 GCATGCAGCTGGGTGCTGTGATGCA @@@DDDBB<DD8F<<CGG?AA?A<F RG:Z:HiMom.1 QX:Z:@C<DD:B? RX:Z:CTGTAATC
+HiMom:1:1201:1486:2109 4 * 0 0 * * 0 0 ACGTGTGCTCTTCCCGATCTGTATA CCCFF?DDFBHHHJJIIDHJIJJJH RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:GTCCACAG
+HiMom:1:1201:1486:2146 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTGGGC <<<@??@??@???????######## RG:Z:HiMom.1 QX:Z:?@@1:DBD RX:Z:CAACTCTC
+HiMom:1:2101:1011:2102 4 * 0 0 * * 0 0 .....TCACACATAATTTTAAAATT #####22@?@@??@@@@@??@@@@@ RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1013:2146 4 * 0 0 * * 0 0 ....CGCTAGAACCAACTTATTCAT ####24=?@@?@?@@?@@@@@@?@@ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTATGCGT
+HiMom:1:2101:1021:2209 4 * 0 0 * * 0 0 ..GGAAGGCTGCTAGCTGGCCAGAG ##08@>??@@??@?????????>?@ RG:Z:HiMom.1 QX:Z:@CCDFFFF RX:Z:ACTAAGAC
+HiMom:1:2101:1023:2237 516 * 0 0 * * 0 0 ..TTTGTTTGAGTTCCTTGTAGATT ##0:=@?>?@???@:>?@??>?;?< RG:Z:HiMom.1 QX:Z:=:1<#### RX:Z:GCCTAGCC
+HiMom:1:2101:1031:2163 4 * 0 0 * * 0 0 ..ACATTTGTCACCACTAGCCACCA ##0<@?@@@@@@@@@@?@@@@@@@? RG:Z:HiMom.1 QX:Z:B at BFFFFF RX:Z:GATATCCA
+HiMom:1:2101:1036:2087 4 * 0 0 * * 0 0 .GTCCACTTACGAAGCAAATACTTT #4=DDFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:B at CFFDFF RX:Z:GACCGTTG
+HiMom:1:2101:1040:2208 516 * 0 0 * * 0 0 .CTGATAGTCACTGAAATGAATTCA #-0=>(2 at .22@@############ RG:Z:HiMom.1 QX:Z:######## RX:Z:ACGAAATC
+HiMom:1:2101:1048:2238 4 * 0 0 * * 0 0 .GTCACATCGTTGAAGCACTGGATC #11ADDDB<CFFHCHGDBHGIIIII RG:Z:HiMom.1 QX:Z:?@7DDDDA RX:Z:ACAGTTGA
+HiMom:1:2101:1054:2162 4 * 0 0 * * 0 0 .GGACAGGGAAGGGAAGGAAGGGTG #4=DDFDFHHHHHJIJIIDHHGICG RG:Z:HiMom.1 QX:Z:B at BDDFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1059:2083 4 * 0 0 * * 0 0 .GAATGTCTTAGAAGGATGCTTCTC #1=BDDDEHHGHHJJJJJIJJIIJJ RG:Z:HiMom.1 QX:Z:1:?D#### RX:Z:TACCGTCT
+HiMom:1:2101:1063:2206 4 * 0 0 * * 0 0 .TGCTAGGATGAGGATGGATAGTAA #1=DDDFFHHHHHJHIIJHIIIHHJ RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:ACAGGTAT
+HiMom:1:2101:1064:2242 4 * 0 0 * * 0 0 .GGAAAAAGGTTGTCAAGCGTTAAA ######################### RG:Z:HiMom.1 QX:Z:;@<:AA at A RX:Z:TCGCTAGA
+HiMom:1:2101:1072:2170 4 * 0 0 * * 0 0 .GGGGAGACAGAGAGGATCAGAAGT #4=BDDFDHHDFHEGFEGGIJIIIG RG:Z:HiMom.1 QX:Z:B@@DFDDF RX:Z:CAGCGGTA
+HiMom:1:2101:1077:2139 4 * 0 0 * * 0 0 .ATTAGTTGGCGGATGAAGCAGATA #4=DFFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:2101:1084:2188 4 * 0 0 * * 0 0 TACAAGGTCAAAATCAGCAACAAGT CCCFFFFDHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@B at FFFFF RX:Z:GAAGGAAG
+HiMom:1:2101:1100:2085 4 * 0 0 * * 0 0 ATCTTGATCTCCTCCTTCTTGGCCT @@@DDDDDHHFHFEIIIIHHBAHBG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:2101:1102:2221 4 * 0 0 * * 0 0 ATAACTGACTCTACTCAGTAGATTA CCCFFFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGCGGAT
+HiMom:1:2101:1105:2131 4 * 0 0 * * 0 0 CAGCAGCAGCAACAGCAGAAACATG CCCFFFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTGTATC
+HiMom:1:2101:1112:2245 4 * 0 0 * * 0 0 TCGTAGTGTTGTAATTTCGTCTTCT ?8?DBDDDCCFCAACGGFFCBFFAE RG:Z:HiMom.1 QX:Z:@@?BBDDD RX:Z:AACAATGG
+HiMom:1:2101:1122:2136 4 * 0 0 * * 0 0 CTTGCCAGCCTGCAGGCCCCGCGGC ???BBAABDD?DDIID)A:3<EADD RG:Z:HiMom.1 QX:Z:?@<DDDD? RX:Z:GCCGTCGA
+HiMom:1:2101:1123:2095 4 * 0 0 * * 0 0 TCCGCCTCCAGCTTCAGCTTCTCCT @@@FDDFFHHHHHJHGGJIJJJEHH RG:Z:HiMom.1 QX:Z:@?@DDF@@ RX:Z:CAGCGGTA
+HiMom:1:2101:1126:2082 4 * 0 0 * * 0 0 TCTCTTTCCACCTTGGTCACCTTCC @C at DDDFFHHHHHJEGGIHHIJGIH RG:Z:HiMom.1 QX:Z:@@@FFFDA RX:Z:CTGCGGAT
+HiMom:1:2101:1133:2239 4 * 0 0 * * 0 0 AGCTTTTTGTTTCCTAGCTTGTCTT ?@?DDFFFHHHHF4ACFHIJHHHGH RG:Z:HiMom.1 QX:Z:@@@BDDDF RX:Z:TATCCATG
+HiMom:1:2101:1143:2137 4 * 0 0 * * 0 0 GCTCTTCAGATCTAGGGGGAACAGC @@@DD?=DCAFFFHIIDG:EFHIII RG:Z:HiMom.1 QX:Z:######## RX:Z:TCCGTCTA
+HiMom:1:2101:1151:2182 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTA 9<<?@?@;5=?############## RG:Z:HiMom.1 QX:Z:######## RX:Z:GAAAAAAA
+HiMom:1:2101:1151:2236 516 * 0 0 * * 0 0 TTTGAAGCCTCTTTATCCTTGGCAT ######################### RG:Z:HiMom.1 QX:Z:####(- at 5 RX:Z:TAGCGGTA
+HiMom:1:2101:1162:2139 4 * 0 0 * * 0 0 ATCGTTTATGGTCGGAACTACGACG BCCFFFFFHHHHHIJJJJJJJIJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:2101:1163:2203 4 * 0 0 * * 0 0 TTGGTTCACTTATGTATTTATGAAT @CCFDFFFHHHHHJHIIJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1163:2222 4 * 0 0 * * 0 0 GAGCGATAATGGTTCTTTTCCTCAC @@@DFFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFEF RX:Z:TGCAAGTA
+HiMom:1:2101:1172:2152 516 * 0 0 * * 0 0 ATCGTTTCTGGGGACTAGTGAGGCG ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:CAATAGTC
+HiMom:1:2101:1186:2093 4 * 0 0 * * 0 0 AATGTTGGGAGGACAATGATGGAAA ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:2101:1188:2195 4 * 0 0 * * 0 0 GCACATACACCAAATGTCTGAACCT CCCFFFFFHHHHHJJJHIJJJJJJJ RG:Z:HiMom.1 QX:Z:BCCDFFFF RX:Z:AGGTCGCA
+HiMom:1:2101:1195:2150 4 * 0 0 * * 0 0 AATTGAACTTCACCACCCAGAGGAA CCCFFFFFHHHHHJJJJJJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:2101:1207:2084 516 * 0 0 * * 0 0 TCACCACTCTTCTGGGCATCCCCTG @@@DDEDFHHHHHIJIHHGHGGJJJ RG:Z:HiMom.1 QX:Z:@@CDFFFF RX:Z:GACCAGGA
+HiMom:1:2101:1208:2231 516 * 0 0 * * 0 0 CTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHHHHHJJJHFDDDDDDD RG:Z:HiMom.1 QX:Z:1+:A1A22 RX:Z:GTAACATC
+HiMom:1:2101:1215:2110 4 * 0 0 * * 0 0 ATCTTTCCCCCATTAAGAACAGCAA ######################### RG:Z:HiMom.1 QX:Z:1:7<#### RX:Z:AAAAGAAG
+HiMom:1:2101:1216:2172 4 * 0 0 * * 0 0 GGACTTCTAGGGGATTTAGCGGGGT CCCFFFFFHHHHHJJJJJJJJJJJD RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CAGCGGAT
+HiMom:1:2101:1216:2193 4 * 0 0 * * 0 0 AGGCATGACACTGCATTTTAAATAC @@@DDDDDHFFHHGGDFHFHIIHGG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAGTTGA
+HiMom:1:2101:1226:2088 4 * 0 0 * * 0 0 GCTCTTCCGATCTAGGTAATAGCTA ==?BDFFFDCDDHFFFAFHDHIJGJ RG:Z:HiMom.1 QX:Z:@@@:DDDD RX:Z:GATATCCA
+HiMom:1:2101:1231:2208 4 * 0 0 * * 0 0 AGCCAGTGTTGGTGTGTTGACTGTT @@;1ADABCF;BF<AACGCHEBHC< RG:Z:HiMom.1 QX:Z:@<@?D8 at D RX:Z:CTATGCGT
+HiMom:1:2101:1233:2133 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFGHHHHJJJFDDDDDDDD RG:Z:HiMom.1 QX:Z:=??B#### RX:Z:CTATGCGT
+HiMom:1:2101:1240:2197 516 * 0 0 * * 0 0 ACTGGAGATCCTTGTTACATGCCCA ??+++A:DD?:ADEE@::C4:C<E: RG:Z:HiMom.1 QX:Z:88+AD@?8 RX:Z:AACGCATT
+HiMom:1:2101:1245:2154 4 * 0 0 * * 0 0 ACCAATCAGTAGCACCACTATACAC CCCFFFFFHHHHHJJJJJJIJJJJJ RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1249:2231 4 * 0 0 * * 0 0 TCTCTCGGCCTTCCACTCTAGCATA @@@FFFFFFHHGHIJJJGJIIJHIJ RG:Z:HiMom.1 QX:Z:@@CBDFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1258:2092 4 * 0 0 * * 0 0 TTAGACAAAACACCAAAATAAAATA ######################### RG:Z:HiMom.1 QX:Z:@@CDDFFF RX:Z:TAAGCACA
+HiMom:1:2101:1262:2128 516 * 0 0 * * 0 0 TCTTGTGGTAACTTTTCTGACACCT -(---9@;@?:8>?4:>?@###### RG:Z:HiMom.1 QX:Z:1+8?ADD8 RX:Z:ACTAAGAC
+HiMom:1:2101:1273:2119 516 * 0 0 * * 0 0 ATGATGGATCTTCTCTAACTTGTCA >=><AAAAA+2AA?CB4@@ABB3?A RG:Z:HiMom.1 QX:Z:####=ADB RX:Z:CTAACTCG
+HiMom:1:2101:1285:2105 516 * 0 0 * * 0 0 TGTCTATATCAACCAACACCTCTTC -(0(():94:9:???########## RG:Z:HiMom.1 QX:Z:######## RX:Z:TATCTCGG
+HiMom:1:2101:1312:2105 4 * 0 0 * * 0 0 GTTGAGAATAGGTTGAGATCGTTTC @CCFFFDFHHFHDHIJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCAGGA
+HiMom:1:2101:1325:2083 4 * 0 0 * * 0 0 TGTGCTCTTCCGATCTGGAGAAAAA ######################### RG:Z:HiMom.1 QX:Z:@@@BD=DD RX:Z:ACAGGTAT
+HiMom:1:2101:1336:2109 4 * 0 0 * * 0 0 AGACCAGAACAGCTCCAGGTGCTCC CCCFFFFFHHHHHJJJJJJCGHIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACGCATT
+HiMom:1:2101:1349:2084 4 * 0 0 * * 0 0 AGTCTGAATCATTGGTGTCTGAAGA <5;??=>=>>?############## RG:Z:HiMom.1 QX:Z:####22<A RX:Z:ACTGTATC
+HiMom:1:2101:1365:2094 4 * 0 0 * * 0 0 GCTCTTCCGATCTTGTGCTCTTCCG CCCFFFFDHFHHGJJIIJIJJIHII RG:Z:HiMom.1 QX:Z:######## RX:Z:ACTGTACC
+HiMom:1:2101:1370:2116 4 * 0 0 * * 0 0 CACCATCTGACATCATGTTTGAAAG @@@DFFFDFFHDHIGBHHII<HEDB RG:Z:HiMom.1 QX:Z:?:8A?3:B RX:Z:AGCATGGA
+HiMom:1:2101:1386:2105 4 * 0 0 * * 0 0 AGGAATTATTCTTCTGCCATAAGGT B@@DDFFFHGFHHIJJJJJGIGIJH RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1414:2098 4 * 0 0 * * 0 0 TTGGGGCCGGTGCCGTCGGGCCCAA CCCFFFFFHHHHGJJIJJJJJJJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTAACTCG
+HiMom:1:2101:1427:2081 4 * 0 0 * * 0 0 CCGACTTCCATGGCCACCGTCCTGC CCCFFFFFHHHHHJJJIIGFIIJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACGCATT
+HiMom:1:2101:1450:2134 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CC at FDFDFFDFHFGIIE1CGGHBGE RG:Z:HiMom.1 QX:Z:@C at DDDB? RX:Z:ACCAGTTG
+HiMom:1:2101:1459:2083 4 * 0 0 * * 0 0 ATTTCACCAAAATAATCAGAAGGCC CCCFFFFDBHGHHIGGIJFJJGGFH RG:Z:HiMom.1 QX:Z:@@CFDDFD RX:Z:GCCGTCGA
+HiMom:1:2101:1491:2093 4 * 0 0 * * 0 0 AGAGACGGGGTCTCGCTATGTTGCC BCCDFFFFHHHHHJIIJJJJIJIJJ RG:Z:HiMom.1 QX:Z:@@@FDEBD RX:Z:CAATAGTC
diff --git a/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithMolecularIndex8M.sam b/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithMolecularIndex8M.sam
new file mode 100644
index 0000000..06e33db
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithMolecularIndex8M.sam
@@ -0,0 +1,182 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:HiDad LB:Hello, World PL:illumina PU:HiMom.1 CN:BI
+HiMom:1:1101:1031:2224 516 * 0 0 * * 0 0 ......................... ######################### RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1039:2147 516 * 0 0 * * 0 0 ......................... ######################### RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1046:2175 516 * 0 0 * * 0 0 ..GGA.................... ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1047:2122 516 * 0 0 * * 0 0 ..TCA.................... ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNANNNN
+HiMom:1:1101:1048:2197 516 * 0 0 * * 0 0 ..GTG.................... ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNCNNNN
+HiMom:1:1101:1065:2193 4 * 0 0 * * 0 0 .CTTG.................... ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:GAACGATN
+HiMom:1:1101:1069:2159 4 * 0 0 * * 0 0 GACGT.................... <<<@?#################### RG:Z:HiMom.1 QX:Z:@BBFFFFF RX:Z:GTCCACAG
+HiMom:1:1101:1071:2233 4 * 0 0 * * 0 0 GTTTG.................... <<<@@#################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCCAGG
+HiMom:1:1101:1083:2193 4 * 0 0 * * 0 0 AGGCT.................... ######################### RG:Z:HiMom.1 QX:Z:?@;DD?BD RX:Z:CCAACATT
+HiMom:1:1101:1084:2136 4 * 0 0 * * 0 0 TTTCT.................... <<<@@#################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:1101:1089:2172 4 * 0 0 * * 0 0 TCCGG.................... :<<??#################### RG:Z:HiMom.1 QX:Z:?@@FF;=B RX:Z:GACCAGGA
+HiMom:1:1101:1100:2207 4 * 0 0 * * 0 0 AGGCT............G....... ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATTATCAA
+HiMom:1:1101:1111:2148 4 * 0 0 * * 0 0 GCGAA.A..........GGACGAC. ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCCGTCGA
+HiMom:1:1101:1138:2141 4 * 0 0 * * 0 0 TCCGATCTGCTTCAGGTCGATCAGA CCCFFFFFHGHHHJJIGHIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:1101:1140:2120 4 * 0 0 * * 0 0 TTTTTTTTTTTTTAACTTTGCAAAT @@@DDDDDHHHHFB at 9FHI@BFH@@ RG:Z:HiMom.1 QX:Z:@@@DDFDF RX:Z:CAACTCTC
+HiMom:1:1101:1143:2192 4 * 0 0 * * 0 0 CGACAAGTCTGGCTTATCACTCATC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1101:1150:2228 4 * 0 0 * * 0 0 ATGGGAGGCGATTCCTAGGGGGTTG 8?=DD8;@BH6DHD<FGGGEIGHIG RG:Z:HiMom.1 QX:Z:@@@DDFFF RX:Z:AGGTCGCA
+HiMom:1:1101:1157:2135 4 * 0 0 * * 0 0 TTTAAAGTCTTAATCAAAGATGATA CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:ATTATCAA
+HiMom:1:1101:1162:2207 516 * 0 0 * * 0 0 TAAAACTGGGGAAGTTAGAGGAATG ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:ACAAAATT
+HiMom:1:1101:1165:2239 4 * 0 0 * * 0 0 ATGGAAGTCGAGACAGAAGTGAGAA ######################### RG:Z:HiMom.1 QX:Z:B@@DFFFF RX:Z:GCCTAGCC
+HiMom:1:1101:1175:2197 4 * 0 0 * * 0 0 AAGAGCTGGGGAACATCCAGAAAGG BC at FFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:1101:1188:2237 4 * 0 0 * * 0 0 GCTTCCTTCAAGACAGAAGTGAGAA CCCFFDDEFHHFFE at FDHHAIAFHG RG:Z:HiMom.1 QX:Z:@@?DFFDF RX:Z:GTAACATC
+HiMom:1:1101:1197:2200 4 * 0 0 * * 0 0 ATATTCCACTGGAACCACAGAACCC @@@FFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCFDFFF RX:Z:AACGCATT
+HiMom:1:1101:1206:2126 4 * 0 0 * * 0 0 ATCTGTCCAGTGGTGCACTGAATGT CCCFFFFFHHHHHHIIJJJJIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:1101:1212:2230 4 * 0 0 * * 0 0 TTTTAGCTTTATTGGGGAGGGGGTG CCCFFFFFHHGHHJJJJGJJJJJDF RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:1101:1218:2200 4 * 0 0 * * 0 0 GCTCTTCCGATCTATCTGCTCGTCC (-(=34???3;@############# RG:Z:HiMom.1 QX:Z:@CCFFDDF RX:Z:GACCGTTG
+HiMom:1:1101:1219:2164 4 * 0 0 * * 0 0 ATCTTATCCACTCCTTCCACTTTGG CCCFFFFFHHHHHJJIJJJJJJJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1101:1221:2143 4 * 0 0 * * 0 0 CAATTGAATGTCTGCACAGCCGCTT @@@FFFFDHHHHHJJJIIIJGHIJJ RG:Z:HiMom.1 QX:Z:@@CDDDDF RX:Z:GCCGTCGA
+HiMom:1:1101:1236:2121 4 * 0 0 * * 0 0 TTGCGCTTACTTTGTAGCCTTCATC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFDDF RX:Z:ACAGGTAT
+HiMom:1:1101:1242:2170 4 * 0 0 * * 0 0 GGAAGGAAAAGAAGCACAAGTACAT @@@DFDFFHHHGHHGIIGJJEHHIG RG:Z:HiMom.1 QX:Z:@@CFFF?D RX:Z:TGCAAGTA
+HiMom:1:1101:1257:2223 4 * 0 0 * * 0 0 TGCTCTTCCGATCTTTTAGCAAAGC :?@DDBDDHFFHDGIGIIJJJGGGI RG:Z:HiMom.1 QX:Z:;@@DD=DD RX:Z:GACCGTTG
+HiMom:1:1101:1259:2152 4 * 0 0 * * 0 0 ATTTTTATATTTTTTTAGACATAGG CCCFFFFFGHHHHJJJJIGIIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTAAGAC
+HiMom:1:1101:1261:2127 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHGHHHJJIFDDDDDDDD RG:Z:HiMom.1 QX:Z:>7+ at A7A7 RX:Z:ACTAAGAC
+HiMom:1:1101:1263:2236 516 * 0 0 * * 0 0 AGTTCTTCAGTAATTTTAGTACTGC ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:AGGTAAGG
+HiMom:1:1101:1267:2209 4 * 0 0 * * 0 0 GGCAGAGTCTCCAACAGCCCCGTAC =;?DDDD?CCFHAIIIGGIIGE at EG RG:Z:HiMom.1 QX:Z:?@@D;ADD RX:Z:TATCAGCC
+HiMom:1:1101:1269:2170 4 * 0 0 * * 0 0 TTCCAAGCCTGTGCTTTAAGGAAAA @@<ADBDBDF8DDCFH at GIE@@GGH RG:Z:HiMom.1 QX:Z:@@@DDDF? RX:Z:ATTATCAA
+HiMom:1:1101:1290:2225 4 * 0 0 * * 0 0 TCAGTTCACTGGCAAAGACAGTCAC C@@FBEDDFHFHGIIICEHGDHBHE RG:Z:HiMom.1 QX:Z:?<@DFBBD RX:Z:GCCTAGCC
+HiMom:1:1101:1291:2150 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHFHHIJJJIIIGIJIJ RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:CGCTATGT
+HiMom:1:1101:1302:2244 4 * 0 0 * * 0 0 TGAATACATATAACAAATGCAAAAA CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCTAAC
+HiMom:1:1101:1308:2153 516 * 0 0 * * 0 0 TCTGTAAGGTAATCCCCGCATGTGT 1?1=4===AFFDFFGFDGFB at CFB: RG:Z:HiMom.1 QX:Z::?@B?@DD RX:Z:AACGCATT
+HiMom:1:1101:1309:2210 4 * 0 0 * * 0 0 AGTGGGCTAGGGCATTTTTAATCTT @@?DFFDFHHHDFHJIJJIJGIIIJ RG:Z:HiMom.1 QX:Z:?@@ADEEF RX:Z:ATTCCTCT
+HiMom:1:1101:1314:2233 4 * 0 0 * * 0 0 AGGAAAGTTGGGCTGACCTGACAGA @@<DDD;=FBFADBCGDEH?F;FCG RG:Z:HiMom.1 QX:Z:@<@?B@;A RX:Z:CGCTATGT
+HiMom:1:1101:1316:2126 4 * 0 0 * * 0 0 TCTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHHHHHJJJJHFDDDDDD RG:Z:HiMom.1 QX:Z:1>>7A### RX:Z:CAATAGAC
+HiMom:1:1101:1327:2200 516 * 0 0 * * 0 0 GTCATCTGGGCTGTCGACAGGTGTC @B at FFFFFHHHHGIJJJJJJIFHHI RG:Z:HiMom.1 QX:Z:BCCFDFFD RX:Z:GCCGTCGA
+HiMom:1:1101:1328:2225 4 * 0 0 * * 0 0 AGGAAATTAGGACTTACCTGACATA ######################### RG:Z:HiMom.1 QX:Z:??;=A:B= RX:Z:CAACTCTC
+HiMom:1:1101:1338:2175 4 * 0 0 * * 0 0 GCTTGTTGGCTTTAACATCCACAAT CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GAAGGAAG
+HiMom:1:1101:1347:2149 4 * 0 0 * * 0 0 GCTCTTCCGATCTGTGCTCTTCCGA CCCFFFFFDFHHFIJDGIGGHGIGH RG:Z:HiMom.1 QX:Z:CC at DFFFD RX:Z:GACCAGGA
+HiMom:1:1101:1353:2226 4 * 0 0 * * 0 0 GTGCTCTTCCGATCTTCAGGTTACC BBBFFFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:@B at FFEFF RX:Z:TATCTGCC
+HiMom:1:1101:1363:2138 4 * 0 0 * * 0 0 GTTCTTAAACCTGTTAGAACTTCTG C@@FFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTAACTCG
+HiMom:1:1101:1399:2128 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHIJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CAATAGTC
+HiMom:1:1101:1403:2194 4 * 0 0 * * 0 0 ACATGGTGAAACCCTGTCTCTACTA CCCFFFDDHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGTAATC
+HiMom:1:1101:1406:2222 4 * 0 0 * * 0 0 GGCTGGACTCCCCTGGTTCTGGGCA ;?@DDDBD?FHDFGIIIGIGHHIII RG:Z:HiMom.1 QX:Z:C@@DBFEF RX:Z:AGCATGGA
+HiMom:1:1101:1419:2119 4 * 0 0 * * 0 0 ACTTTCCTTTTTTGTTTTACTTTAA ######################### RG:Z:HiMom.1 QX:Z:@@@DFDFD RX:Z:TGTAATCA
+HiMom:1:1101:1420:2213 4 * 0 0 * * 0 0 TTCACTGTACCGGCCGTGCGTACTT @CCFFFFDHHHFGIJJJJJJGHIGG RG:Z:HiMom.1 QX:Z:@C at FFFDF RX:Z:CAGCGGTA
+HiMom:1:1101:1435:2194 4 * 0 0 * * 0 0 TTTTGTTTTCTTTTACTGAAGTGTA CCCFFDFFHHHHHJJJJIHIJHHHJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1101:1441:2148 4 * 0 0 * * 0 0 TTTTGGCTCTAGAGGGGGTAGAGGG CCCFFFFFHHDFBHIIJJ1?FGHIJ RG:Z:HiMom.1 QX:Z:@@BFFDDD RX:Z:CGCTATGT
+HiMom:1:1101:1452:2132 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:AACGCATT
+HiMom:1:1101:1460:2176 4 * 0 0 * * 0 0 AGGAAAAAGACACAACAAGTCCAAC ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GATATCCA
+HiMom:1:1101:1479:2221 4 * 0 0 * * 0 0 GGGGAAATCTATTTTTATGTAAAAA @CCFFFFFHHHHHJIGIJJJJJJJJ RG:Z:HiMom.1 QX:Z:@BCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1101:1491:2120 4 * 0 0 * * 0 0 GGCCAGGCTGAACTTCTGAGCTGCT CCCFFFFFHHHGHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:BCCDFFFF RX:Z:AGGTCGCA
+HiMom:1:1201:1018:2133 4 * 0 0 * * 0 0 ......................... ######################### RG:Z:HiMom.1 XN:i:1 QX:Z:8??=BBBA RX:Z:ATTCCTCT
+HiMom:1:1201:1018:2217 516 * 0 0 * * 0 0 ......................... ######################### RG:Z:HiMom.1 XN:i:1 QX:Z:;<;:BBDD RX:Z:ATTATCAA
+HiMom:1:1201:1028:2202 4 * 0 0 * * 0 0 ..AAAC.C.T.......GG..TG.. ##42@?################### RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:GAAGGAAG
+HiMom:1:1201:1042:2174 4 * 0 0 * * 0 0 .TCAGGAAGGC..CAAAAAAAGAAA #0;@@@?@?<@##3<@@?@@????? RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCTGCAAG
+HiMom:1:1201:1043:2246 4 * 0 0 * * 0 0 .GCATCATTTC..GCTTCTCTCTGT #0;@@??@=@>##22=;@??><@?? RG:Z:HiMom.1 QX:Z:@<?DD:B= RX:Z:CGCTATGT
+HiMom:1:1201:1045:2105 516 * 0 0 * * 0 0 .TTTTTTTTTT..TTTTTTTTTTTT #0;@@@@@@@?##0:????????=< RG:Z:HiMom.1 QX:Z:1112 at A## RX:Z:CTGTAATC
+HiMom:1:1201:1054:2151 4 * 0 0 * * 0 0 GTCAGGCACTGAGAATATATGGGTG CBCFFFFFHHHHHJJJJJJJJJJEG RG:Z:HiMom.1 QX:Z:CCCFFFDF RX:Z:CAATAGTC
+HiMom:1:1201:1064:2239 4 * 0 0 * * 0 0 GGGATGGGAGGGCGATGAGGACTAG 8?@:DDDACC:FHHGIH<EGDDDFH RG:Z:HiMom.1 QX:Z:@@@FFADB RX:Z:TAAGCACA
+HiMom:1:1201:1073:2225 4 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGGAGGGT @BBDFFFFHHHHHJJJJJJJJJJJ: RG:Z:HiMom.1 QX:Z:B@@BDEFF RX:Z:ATTCCTCT
+HiMom:1:1201:1083:2121 4 * 0 0 * * 0 0 ACACACAACACCACCGCCCTCCCCC ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:CTATGCGT
+HiMom:1:1201:1084:2204 4 * 0 0 * * 0 0 TGGCTCCTCAGGCTCTCATCAGTTG CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1201:1095:2146 4 * 0 0 * * 0 0 ACTGACAACACCAAATGCTGCTAAG CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCAGGA
+HiMom:1:1201:1103:2184 4 * 0 0 * * 0 0 AGAAGTTTCAGAATTGTGGCCCCAT B at BFFDEFHHHHHJJJGHIJJJJJI RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1107:2109 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHGHHJJJJIIJJJJJJ RG:Z:HiMom.1 QX:Z:B at CFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1118:2198 4 * 0 0 * * 0 0 AATAAACTTTATTAAAGCAGTTAAA C at CFFFFFHDHHHGIIIJJJIJJJJ RG:Z:HiMom.1 QX:Z:@@@DDBDD RX:Z:ATTATCAA
+HiMom:1:1201:1122:2227 4 * 0 0 * * 0 0 GTCATATAAGGCCCAGTCCAAGGAA @@@FFFFFHHHGGIJIGGIJFIJII RG:Z:HiMom.1 QX:Z:@@@DDFFF RX:Z:CGCCTTCC
+HiMom:1:1201:1123:2161 516 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGCATACA ===AAAA8AAAA<AAA)@CBA9>A# RG:Z:HiMom.1 QX:Z:?;@DFDFF RX:Z:GACCAGGA
+HiMom:1:1201:1127:2112 516 * 0 0 * * 0 0 TAATCACCTGAGCAGTGAAGCCAGC @<@?BDDDHD?FDBHI?AHGGGDFH RG:Z:HiMom.1 QX:Z:=??BA?BD RX:Z:CAACTCTC
+HiMom:1:1201:1134:2144 4 * 0 0 * * 0 0 AGTGTGAGTAATGGTTGAGAGGTGG B@?DDDFFFHHGHJHHGFIHHIFGI RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:CGCTATGT
+HiMom:1:1201:1138:2227 516 * 0 0 * * 0 0 GACAAATATAGGAAATAGAAGCTAT =1=A=AAA,2?4>7C<<4<A+3<AB RG:Z:HiMom.1 QX:Z:######## RX:Z:CCAACATT
+HiMom:1:1201:1140:2125 4 * 0 0 * * 0 0 TTCATAAATTGGTCTTAGATGTTGC CC at FFFFFHHHHFGIJIIIJIJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCCAGG
+HiMom:1:1201:1142:2242 4 * 0 0 * * 0 0 GTAAAATGTAAAATAATAAAAAATG ?=?DDDD;AF<DF<FFFFIIIFF@< RG:Z:HiMom.1 QX:Z:??<D?D83 RX:Z:TATCTGCC
+HiMom:1:1201:1150:2161 4 * 0 0 * * 0 0 TTCTCACTACTGTGATTGTGCCACT @C at FFFFFGHHHHGIIIICEHCFGH RG:Z:HiMom.1 QX:Z:@@@FDDDD RX:Z:AACGCATT
+HiMom:1:1201:1159:2179 516 * 0 0 * * 0 0 TTTTTTTTTATTTTTCTAAATACTT ===AA#################### RG:Z:HiMom.1 QX:Z:######## RX:Z:AAAAAAAA
+HiMom:1:1201:1160:2109 4 * 0 0 * * 0 0 ACATCCTTCCCATGCCACCAACTCG CCCFFFFFGHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:C at BFFFFF RX:Z:CGCCTTCC
+HiMom:1:1201:1180:2119 4 * 0 0 * * 0 0 GCTCTAAATTTTGCTTTTCTACAGC CCCFFFFFHHHHHJJJJIJIJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:GACCGTTG
+HiMom:1:1201:1185:2143 4 * 0 0 * * 0 0 GCTGAAGGCCCGTGGGCCAGAGGTG @CCFFFFFHHHHHJJJJJJJJJJHI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTATGCGT
+HiMom:1:1201:1187:2100 4 * 0 0 * * 0 0 AAAAAAGAGCCCGCATTGCCGAGAC =<=;AA################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1201:1190:2194 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AGGTCGCA
+HiMom:1:1201:1204:2228 4 * 0 0 * * 0 0 TCTTCTTGTCGATGAGGAACTTGGT @?@FFFFFDHHGHJIJJGHIIJJJH RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:1201:1208:2132 4 * 0 0 * * 0 0 CTGTAGAAAGGATGGTCGGGCTCCA @@CDFFFFGHFHHJIJJGJIBHJJG RG:Z:HiMom.1 QX:Z:CC at FFFFF RX:Z:TGTAATCA
+HiMom:1:1201:1219:2115 4 * 0 0 * * 0 0 TGGGAGTAGTTCCCTGCTAAGGGAG ???DBDBDADDDDIEID:AFFD:?8 RG:Z:HiMom.1 QX:Z:??<DDA?D RX:Z:CCATGCGT
+HiMom:1:1201:1236:2187 4 * 0 0 * * 0 0 CTCCTTAGCGGATTCCGACTTCCAT CCCFFFFDHHHHGIJJIGIGIJJGG RG:Z:HiMom.1 QX:Z:@@BFFFFF RX:Z:TATCCAGG
+HiMom:1:1201:1242:2207 4 * 0 0 * * 0 0 ATCTTTTATTGGCCTCCTGCTCCCC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:?BBDDDFF RX:Z:ATTCCTCT
+HiMom:1:1201:1252:2141 4 * 0 0 * * 0 0 AGTTATTTTGCCTATGTCCAACAAG BCBFFFFFGHHHHJIJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1260:2165 4 * 0 0 * * 0 0 ATCTGATCTAAGTTGGGGGACGCCG @@@FFDFFHHHHHJJJIJIIIGIJJ RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CCAACATT
+HiMom:1:1201:1280:2179 4 * 0 0 * * 0 0 GAGGACTGCTTGAGTCCAGGAGTTC @@BFFDEFGHHHHIFGCHIJJJGGI RG:Z:HiMom.1 QX:Z:BCCFFFFF RX:Z:GCCTAGCC
+HiMom:1:1201:1281:2133 4 * 0 0 * * 0 0 GCAACAAAATTTCATATGACTTAGC CCCFFFFFHHHHHJJIIIHICHIIJ RG:Z:HiMom.1 QX:Z:C at CFFFDF RX:Z:CCAACATT
+HiMom:1:1201:1285:2100 4 * 0 0 * * 0 0 GATCTTTTTTGCTTTGTAGTTATAG @@@DFFFFHHHHHIIGIABCFFHBF RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:TGCTGCTG
+HiMom:1:1201:1291:2158 4 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGATGGGC @CCFFFDD?FHHFGEHHIIDHIIII RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:AGCATGGA
+HiMom:1:1201:1300:2137 4 * 0 0 * * 0 0 GCTCTTCCGATCTTTTTTTTAATTT @@?DDDDDFDHADEHGIGGED3?FD RG:Z:HiMom.1 QX:Z:8?84B23? RX:Z:GCCTAGCC
+HiMom:1:1201:1312:2112 4 * 0 0 * * 0 0 ATTTGCAGGAGCCGGCGCAGGTGCA CCCFFFFFHHHHHJJJIJJJJGHIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1201:1331:2162 4 * 0 0 * * 0 0 TAATCCCAGTACTTTGGGAGGCCAA CCCFFFFFHHHHHJJJJIJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:1201:1341:2116 4 * 0 0 * * 0 0 ATAACAGCGAGACTGGCAACTTAAA ######################### RG:Z:HiMom.1 QX:Z:CCCFFBDD RX:Z:ACAGGTAT
+HiMom:1:1201:1344:2147 4 * 0 0 * * 0 0 ACGATTAGTTTTAGCATTGGAGTAG @<??DDDDFHHHFGGHHIIIGGAGH RG:Z:HiMom.1 QX:Z:=?1AA:=D RX:Z:TGTAATCA
+HiMom:1:1201:1345:2181 4 * 0 0 * * 0 0 ATACGGATGTGTTTAGGAGTGGGAC CCCFFFFFHHHHHIIJJHJFHIJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CAATAGTC
+HiMom:1:1201:1364:2113 4 * 0 0 * * 0 0 TAAAGAGAGCCAGTGGAGTTACGAC ######################### RG:Z:HiMom.1 QX:Z:C at CFFF@D RX:Z:CAGCGGTA
+HiMom:1:1201:1392:2109 4 * 0 0 * * 0 0 GTCAGACAGGGGGATTTGGGCTGTG BBCFFFFFHHHHHHJJJHIJIJJJJ RG:Z:HiMom.1 QX:Z:CCCDF?DD RX:Z:TATCTGCC
+HiMom:1:1201:1392:2184 4 * 0 0 * * 0 0 ATCTTTATTCATTTGTATGATCTTA @@BFFFFFHFFHFHIHIIJIJJJJI RG:Z:HiMom.1 QX:Z:@CCFFDDE RX:Z:CAATAGTC
+HiMom:1:1201:1393:2143 4 * 0 0 * * 0 0 GATAAATGCACGCATCCCCCCCGCG C at CFFFFFGGHHHHJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:@@CFDDFD RX:Z:CTAACTCG
+HiMom:1:1201:1414:2174 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT @;@1BDADF????FFEB>B6=BBBB RG:Z:HiMom.1 QX:Z:######## RX:Z:AGAAAAGA
+HiMom:1:1201:1416:2128 4 * 0 0 * * 0 0 TTGGTGTGGAGGCGGTGGCGGGATC @@@DDDDDHHFHHII:?GGHIIB6? RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1201:1421:2154 4 * 0 0 * * 0 0 TGTGCTCTTCCGATCTTGTGCTCTT BC at DFFFFHHHHHJJJJFHIHHIJJ RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:TGTAACTC
+HiMom:1:1201:1439:2156 4 * 0 0 * * 0 0 GGAGATTATTTGCCTTGAAGTAAGC -;(22<>>@>8@>8;@######### RG:Z:HiMom.1 QX:Z:######## RX:Z:GACCAGGC
+HiMom:1:1201:1452:2143 4 * 0 0 * * 0 0 TTTTAGTCTTAGCATTTACTTTCCC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:BC at DDFFF RX:Z:CAACTCTC
+HiMom:1:1201:1458:2109 4 * 0 0 * * 0 0 GATACGAACACACAAGAACTTTTTT CCCFFFFFHHHHHJJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTGTATC
+HiMom:1:1201:1472:2121 516 * 0 0 * * 0 0 GTGTGCTCTTCCGATCTGGAGGATG =+=??A4A==A at 7A<?######### RG:Z:HiMom.1 QX:Z:;?=D#### RX:Z:CTATGCGC
+HiMom:1:1201:1483:2126 516 * 0 0 * * 0 0 GCATGCAGCTGGGTGCTGTGATGCA @@@DDDBB<DD8F<<CGG?AA?A<F RG:Z:HiMom.1 QX:Z:@C<DD:B? RX:Z:CTGTAATC
+HiMom:1:1201:1486:2109 4 * 0 0 * * 0 0 ACGTGTGCTCTTCCCGATCTGTATA CCCFF?DDFBHHHJJIIDHJIJJJH RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:GTCCACAG
+HiMom:1:1201:1486:2146 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTGGGC <<<@??@??@???????######## RG:Z:HiMom.1 QX:Z:?@@1:DBD RX:Z:CAACTCTC
+HiMom:1:2101:1011:2102 4 * 0 0 * * 0 0 .....TCACACATAATTTTAAAATT #####22@?@@??@@@@@??@@@@@ RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1013:2146 4 * 0 0 * * 0 0 ....CGCTAGAACCAACTTATTCAT ####24=?@@?@?@@?@@@@@@?@@ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTATGCGT
+HiMom:1:2101:1021:2209 4 * 0 0 * * 0 0 ..GGAAGGCTGCTAGCTGGCCAGAG ##08@>??@@??@?????????>?@ RG:Z:HiMom.1 QX:Z:@CCDFFFF RX:Z:ACTAAGAC
+HiMom:1:2101:1023:2237 516 * 0 0 * * 0 0 ..TTTGTTTGAGTTCCTTGTAGATT ##0:=@?>?@???@:>?@??>?;?< RG:Z:HiMom.1 QX:Z:######## RX:Z:GCCTAGCC
+HiMom:1:2101:1031:2163 4 * 0 0 * * 0 0 ..ACATTTGTCACCACTAGCCACCA ##0<@?@@@@@@@@@@?@@@@@@@? RG:Z:HiMom.1 QX:Z:B at BFFFFF RX:Z:GATATCCA
+HiMom:1:2101:1036:2087 4 * 0 0 * * 0 0 .GTCCACTTACGAAGCAAATACTTT #4=DDFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:B at CFFDFF RX:Z:GACCGTTG
+HiMom:1:2101:1040:2208 516 * 0 0 * * 0 0 .CTGATAGTCACTGAAATGAATTCA #-0=>(2 at .22@@############ RG:Z:HiMom.1 QX:Z:######## RX:Z:ACGAAATC
+HiMom:1:2101:1048:2238 4 * 0 0 * * 0 0 .GTCACATCGTTGAAGCACTGGATC #11ADDDB<CFFHCHGDBHGIIIII RG:Z:HiMom.1 QX:Z:?@7DDDDA RX:Z:ACAGTTGA
+HiMom:1:2101:1054:2162 4 * 0 0 * * 0 0 .GGACAGGGAAGGGAAGGAAGGGTG #4=DDFDFHHHHHJIJIIDHHGICG RG:Z:HiMom.1 QX:Z:B at BDDFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1059:2083 4 * 0 0 * * 0 0 .GAATGTCTTAGAAGGATGCTTCTC #1=BDDDEHHGHHJJJJJIJJIIJJ RG:Z:HiMom.1 QX:Z:1:?D#### RX:Z:TACCGTCT
+HiMom:1:2101:1063:2206 4 * 0 0 * * 0 0 .TGCTAGGATGAGGATGGATAGTAA #1=DDDFFHHHHHJHIIJHIIIHHJ RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:ACAGGTAT
+HiMom:1:2101:1064:2242 4 * 0 0 * * 0 0 .GGAAAAAGGTTGTCAAGCGTTAAA ######################### RG:Z:HiMom.1 QX:Z:;@<:AA at A RX:Z:TCGCTAGA
+HiMom:1:2101:1072:2170 4 * 0 0 * * 0 0 .GGGGAGACAGAGAGGATCAGAAGT #4=BDDFDHHDFHEGFEGGIJIIIG RG:Z:HiMom.1 QX:Z:B@@DFDDF RX:Z:CAGCGGTA
+HiMom:1:2101:1077:2139 4 * 0 0 * * 0 0 .ATTAGTTGGCGGATGAAGCAGATA #4=DFFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:2101:1084:2188 4 * 0 0 * * 0 0 TACAAGGTCAAAATCAGCAACAAGT CCCFFFFDHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@B at FFFFF RX:Z:GAAGGAAG
+HiMom:1:2101:1100:2085 4 * 0 0 * * 0 0 ATCTTGATCTCCTCCTTCTTGGCCT @@@DDDDDHHFHFEIIIIHHBAHBG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:2101:1102:2221 4 * 0 0 * * 0 0 ATAACTGACTCTACTCAGTAGATTA CCCFFFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGCGGAT
+HiMom:1:2101:1105:2131 4 * 0 0 * * 0 0 CAGCAGCAGCAACAGCAGAAACATG CCCFFFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTGTATC
+HiMom:1:2101:1112:2245 4 * 0 0 * * 0 0 TCGTAGTGTTGTAATTTCGTCTTCT ?8?DBDDDCCFCAACGGFFCBFFAE RG:Z:HiMom.1 QX:Z:@@?BBDDD RX:Z:AACAATGG
+HiMom:1:2101:1122:2136 4 * 0 0 * * 0 0 CTTGCCAGCCTGCAGGCCCCGCGGC ???BBAABDD?DDIID)A:3<EADD RG:Z:HiMom.1 QX:Z:?@<DDDD? RX:Z:GCCGTCGA
+HiMom:1:2101:1123:2095 4 * 0 0 * * 0 0 TCCGCCTCCAGCTTCAGCTTCTCCT @@@FDDFFHHHHHJHGGJIJJJEHH RG:Z:HiMom.1 QX:Z:@?@DDF@@ RX:Z:CAGCGGTA
+HiMom:1:2101:1126:2082 4 * 0 0 * * 0 0 TCTCTTTCCACCTTGGTCACCTTCC @C at DDDFFHHHHHJEGGIHHIJGIH RG:Z:HiMom.1 QX:Z:@@@FFFDA RX:Z:CTGCGGAT
+HiMom:1:2101:1133:2239 4 * 0 0 * * 0 0 AGCTTTTTGTTTCCTAGCTTGTCTT ?@?DDFFFHHHHF4ACFHIJHHHGH RG:Z:HiMom.1 QX:Z:@@@BDDDF RX:Z:TATCCATG
+HiMom:1:2101:1143:2137 4 * 0 0 * * 0 0 GCTCTTCAGATCTAGGGGGAACAGC @@@DD?=DCAFFFHIIDG:EFHIII RG:Z:HiMom.1 QX:Z:######## RX:Z:TCCGTCTA
+HiMom:1:2101:1151:2182 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTA 9<<?@?@;5=?############## RG:Z:HiMom.1 QX:Z:######## RX:Z:GAAAAAAA
+HiMom:1:2101:1151:2236 516 * 0 0 * * 0 0 TTTGAAGCCTCTTTATCCTTGGCAT ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:TAGCGGTA
+HiMom:1:2101:1162:2139 4 * 0 0 * * 0 0 ATCGTTTATGGTCGGAACTACGACG BCCFFFFFHHHHHIJJJJJJJIJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:2101:1163:2203 4 * 0 0 * * 0 0 TTGGTTCACTTATGTATTTATGAAT @CCFDFFFHHHHHJHIIJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1163:2222 4 * 0 0 * * 0 0 GAGCGATAATGGTTCTTTTCCTCAC @@@DFFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFEF RX:Z:TGCAAGTA
+HiMom:1:2101:1172:2152 516 * 0 0 * * 0 0 ATCGTTTCTGGGGACTAGTGAGGCG ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:CAATAGTC
+HiMom:1:2101:1186:2093 4 * 0 0 * * 0 0 AATGTTGGGAGGACAATGATGGAAA ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:2101:1188:2195 4 * 0 0 * * 0 0 GCACATACACCAAATGTCTGAACCT CCCFFFFFHHHHHJJJHIJJJJJJJ RG:Z:HiMom.1 QX:Z:BCCDFFFF RX:Z:AGGTCGCA
+HiMom:1:2101:1195:2150 4 * 0 0 * * 0 0 AATTGAACTTCACCACCCAGAGGAA CCCFFFFFHHHHHJJJJJJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:2101:1207:2084 516 * 0 0 * * 0 0 TCACCACTCTTCTGGGCATCCCCTG @@@DDEDFHHHHHIJIHHGHGGJJJ RG:Z:HiMom.1 QX:Z:@@CDFFFF RX:Z:GACCAGGA
+HiMom:1:2101:1208:2231 516 * 0 0 * * 0 0 CTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHHHHHJJJHFDDDDDDD RG:Z:HiMom.1 QX:Z:1+:A1A22 RX:Z:GTAACATC
+HiMom:1:2101:1215:2110 4 * 0 0 * * 0 0 ATCTTTCCCCCATTAAGAACAGCAA ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:AAAAGAAG
+HiMom:1:2101:1216:2172 4 * 0 0 * * 0 0 GGACTTCTAGGGGATTTAGCGGGGT CCCFFFFFHHHHHJJJJJJJJJJJD RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CAGCGGAT
+HiMom:1:2101:1216:2193 4 * 0 0 * * 0 0 AGGCATGACACTGCATTTTAAATAC @@@DDDDDHFFHHGGDFHFHIIHGG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAGTTGA
+HiMom:1:2101:1226:2088 4 * 0 0 * * 0 0 GCTCTTCCGATCTAGGTAATAGCTA ==?BDFFFDCDDHFFFAFHDHIJGJ RG:Z:HiMom.1 QX:Z:@@@:DDDD RX:Z:GATATCCA
+HiMom:1:2101:1231:2208 4 * 0 0 * * 0 0 AGCCAGTGTTGGTGTGTTGACTGTT @@;1ADABCF;BF<AACGCHEBHC< RG:Z:HiMom.1 QX:Z:@<@?D8 at D RX:Z:CTATGCGT
+HiMom:1:2101:1233:2133 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFGHHHHJJJFDDDDDDDD RG:Z:HiMom.1 QX:Z:=??B#### RX:Z:CTATGCGT
+HiMom:1:2101:1240:2197 516 * 0 0 * * 0 0 ACTGGAGATCCTTGTTACATGCCCA ??+++A:DD?:ADEE@::C4:C<E: RG:Z:HiMom.1 QX:Z:88+AD@?8 RX:Z:AACGCATT
+HiMom:1:2101:1245:2154 4 * 0 0 * * 0 0 ACCAATCAGTAGCACCACTATACAC CCCFFFFFHHHHHJJJJJJIJJJJJ RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1249:2231 4 * 0 0 * * 0 0 TCTCTCGGCCTTCCACTCTAGCATA @@@FFFFFFHHGHIJJJGJIIJHIJ RG:Z:HiMom.1 QX:Z:@@CBDFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1258:2092 4 * 0 0 * * 0 0 TTAGACAAAACACCAAAATAAAATA ######################### RG:Z:HiMom.1 QX:Z:@@CDDFFF RX:Z:TAAGCACA
+HiMom:1:2101:1262:2128 516 * 0 0 * * 0 0 TCTTGTGGTAACTTTTCTGACACCT -(---9@;@?:8>?4:>?@###### RG:Z:HiMom.1 QX:Z:1+8?ADD8 RX:Z:ACTAAGAC
+HiMom:1:2101:1273:2119 516 * 0 0 * * 0 0 ATGATGGATCTTCTCTAACTTGTCA >=><AAAAA+2AA?CB4@@ABB3?A RG:Z:HiMom.1 QX:Z:=++==ADB RX:Z:CTAACTCG
+HiMom:1:2101:1285:2105 516 * 0 0 * * 0 0 TGTCTATATCAACCAACACCTCTTC -(0(():94:9:???########## RG:Z:HiMom.1 QX:Z:######## RX:Z:TATCTCGG
+HiMom:1:2101:1312:2105 4 * 0 0 * * 0 0 GTTGAGAATAGGTTGAGATCGTTTC @CCFFFDFHHFHDHIJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCAGGA
+HiMom:1:2101:1325:2083 4 * 0 0 * * 0 0 TGTGCTCTTCCGATCTGGAGAAAAA ######################### RG:Z:HiMom.1 QX:Z:@@@BD=DD RX:Z:ACAGGTAT
+HiMom:1:2101:1336:2109 4 * 0 0 * * 0 0 AGACCAGAACAGCTCCAGGTGCTCC CCCFFFFFHHHHHJJJJJJCGHIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACGCATT
+HiMom:1:2101:1349:2084 4 * 0 0 * * 0 0 AGTCTGAATCATTGGTGTCTGAAGA <5;??=>=>>?############## RG:Z:HiMom.1 QX:Z:=;7+22<A RX:Z:ACTGTATC
+HiMom:1:2101:1365:2094 4 * 0 0 * * 0 0 GCTCTTCCGATCTTGTGCTCTTCCG CCCFFFFDHFHHGJJIIJIJJIHII RG:Z:HiMom.1 QX:Z:######## RX:Z:ACTGTACC
+HiMom:1:2101:1370:2116 4 * 0 0 * * 0 0 CACCATCTGACATCATGTTTGAAAG @@@DFFFDFFHDHIGBHHII<HEDB RG:Z:HiMom.1 QX:Z:?:8A?3:B RX:Z:AGCATGGA
+HiMom:1:2101:1386:2105 4 * 0 0 * * 0 0 AGGAATTATTCTTCTGCCATAAGGT B@@DDFFFHGFHHIJJJJJGIGIJH RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1414:2098 4 * 0 0 * * 0 0 TTGGGGCCGGTGCCGTCGGGCCCAA CCCFFFFFHHHHGJJIJJJJJJJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTAACTCG
+HiMom:1:2101:1427:2081 4 * 0 0 * * 0 0 CCGACTTCCATGGCCACCGTCCTGC CCCFFFFFHHHHHJJJIIGFIIJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACGCATT
+HiMom:1:2101:1450:2134 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CC at FDFDFFDFHFGIIE1CGGHBGE RG:Z:HiMom.1 QX:Z:@C at DDDB? RX:Z:ACCAGTTG
+HiMom:1:2101:1459:2083 4 * 0 0 * * 0 0 ATTTCACCAAAATAATCAGAAGGCC CCCFFFFDBHGHHIGGIJFJJGGFH RG:Z:HiMom.1 QX:Z:@@CFDDFD RX:Z:GCCGTCGA
+HiMom:1:2101:1491:2093 4 * 0 0 * * 0 0 AGAGACGGGGTCTCGCTATGTTGCC BCCDFFFFHHHHHJIIJJJJIJIJJ RG:Z:HiMom.1 QX:Z:@@@FDEBD RX:Z:CAATAGTC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/AAAAAAAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/AAAAAAAA.sam
new file mode 100644
index 0000000..4882d35
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/AAAAAAAA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AAAAAAAA LB:LN_AAAAAAAA PL:illumina PU:HiMom.1.AAAAAAAA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/AAAAGAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/AAAAGAAG.sam
new file mode 100644
index 0000000..6446f79
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/AAAAGAAG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AAAAGAAG LB:LN_AAAAGAAG PL:illumina PU:HiMom.1.AAAAGAAG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/AACAATGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/AACAATGG.sam
new file mode 100644
index 0000000..3acf7e6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/AACAATGG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AACAATGG LB:LN_AACAATGG PL:illumina PU:HiMom.1.AACAATGG CN:BI
+HiMom:1:1101:1138:2141 77 * 0 0 * * 0 0 .TTACCAAGGTTTTCTGTTTAGTGA #1=DDFFFHHFHHJJJIHJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TCCG
+HiMom:1:1101:1138:2141 141 * 0 0 * * 0 0 ATCTGCTTCAGGTCGATCAGA FFFFHGHHHJJIGHIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TCCG
+HiMom:1:1101:1206:2126 77 * 0 0 * * 0 0 .ATTCTGCCATATTGGTCCGACAGT #1=DDFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATCT
+HiMom:1:1101:1206:2126 141 * 0 0 * * 0 0 GTCCAGTGGTGCACTGAATGT FFFFHHHHHHIIJJJJIJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATCT
+HiMom:1:2101:1077:2139 77 * 0 0 * * 0 0 CACAGGCTTCCACGGACTTAACGTC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:#4=D RX:Z:NATT
+HiMom:1:2101:1077:2139 141 * 0 0 * * 0 0 AGTTGGCGGATGAAGCAGATA FFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:#4=D RX:Z:NATT
+HiMom:1:2101:1112:2245 77 * 0 0 * * 0 0 TGCCATCTGCTCTGGGAAGCACCAG 1:=DDDDDFBC:DEFIFFFIEF at BE RG:Z:HiMom.1 QX:Z:?8?D RX:Z:TCGT
+HiMom:1:2101:1112:2245 141 * 0 0 * * 0 0 AGTGTTGTAATTTCGTCTTCT BDDDCCFCAACGGFFCBFFAE RG:Z:HiMom.1 QX:Z:?8?D RX:Z:TCGT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/AACGCATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/AACGCATT.sam
new file mode 100644
index 0000000..c309567
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/AACGCATT.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AACGCATT LB:LN_AACGCATT PL:illumina PU:HiMom.1.AACGCATT CN:BI
+HiMom:1:1101:1197:2200 77 * 0 0 * * 0 0 GGGCGCCCCGTGAGGACCCAGTCCT @C at FFADDFFCFCEHIIJIJJIEFC RG:Z:HiMom.1 QX:Z:@@@F RX:Z:ATAT
+HiMom:1:1101:1197:2200 141 * 0 0 * * 0 0 TCCACTGGAACCACAGAACCC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@@@F RX:Z:ATAT
+HiMom:1:1101:1308:2153 589 * 0 0 * * 0 0 TTTTGGAAGAGACCTCAATTACTGT ???DDDDD?:22AE:A2<3,AF?3A RG:Z:HiMom.1 QX:Z:1?1= RX:Z:TCTG
+HiMom:1:1101:1308:2153 653 * 0 0 * * 0 0 TAAGGTAATCCCCGCATGTGT 4===AFFDFFGFDGFB at CFB: RG:Z:HiMom.1 QX:Z:1?1= RX:Z:TCTG
+HiMom:1:1101:1452:2132 77 * 0 0 * * 0 0 .CGTCCTGGAAAACGGGGCGCGGCT #1=BDBDDFHHHHF at FHDHIGIIII RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAA
+HiMom:1:1101:1452:2132 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC FFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAA
+HiMom:1:1201:1150:2161 77 * 0 0 * * 0 0 AAGTCACCTAATATCTTTTTTTTTT @@<??;?D?CFD,A4CDDHFBIIID RG:Z:HiMom.1 QX:Z:@C at F RX:Z:TTCT
+HiMom:1:1201:1150:2161 141 * 0 0 * * 0 0 CACTACTGTGATTGTGCCACT FFFFGHHHHGIIIICEHCFGH RG:Z:HiMom.1 QX:Z:@C at F RX:Z:TTCT
+HiMom:1:2101:1240:2197 589 * 0 0 * * 0 0 ATAAAACATAGCAATATTTTCCTAT ######################### RG:Z:HiMom.1 QX:Z:??## RX:Z:ACTG
+HiMom:1:2101:1240:2197 653 * 0 0 * * 0 0 GAGATCCTTGTTACATGCCCA +A:DD?:ADEE@::C4:C<E: RG:Z:HiMom.1 QX:Z:??## RX:Z:ACTG
+HiMom:1:2101:1336:2109 77 * 0 0 * * 0 0 .ACTATCAGGATCGTGGCTATTTTG #1BDDFFFHHHHHJIJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:AGAC
+HiMom:1:2101:1336:2109 141 * 0 0 * * 0 0 CAGAACAGCTCCAGGTGCTCC FFFFHHHHHJJJJJJCGHIJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:AGAC
+HiMom:1:2101:1427:2081 77 * 0 0 * * 0 0 .CGAGTGCCTAGTGGGCCACTTTTG #4=DDBDFHHHHFHIJJJJIJJJJI RG:Z:HiMom.1 QX:Z:CCCF RX:Z:CCGA
+HiMom:1:2101:1427:2081 141 * 0 0 * * 0 0 CTTCCATGGCCACCGTCCTGC FFFFHHHHHJJJIIGFIIJJI RG:Z:HiMom.1 QX:Z:CCCF RX:Z:CCGA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/ACAAAATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACAAAATT.sam
new file mode 100644
index 0000000..3cfd422
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACAAAATT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAAAATT LB:LN_ACAAAATT PL:illumina PU:HiMom.1.ACAAAATT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/ACAGGTAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACAGGTAT.sam
new file mode 100644
index 0000000..a729d56
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACAGGTAT.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAGGTAT LB:LN_ACAGGTAT PL:illumina PU:HiMom.1.ACAGGTAT CN:BI
+HiMom:1:1101:1236:2121 77 * 0 0 * * 0 0 .GGTGCTTCATATCCCTCTAGAGGA #1=BDDFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTGC
+HiMom:1:1101:1236:2121 141 * 0 0 * * 0 0 GCTTACTTTGTAGCCTTCATC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTGC
+HiMom:1:1201:1341:2116 77 * 0 0 * * 0 0 .AGAAGCCCCAGGAGGAAGACAGTC #1=DDFFFHHHHHHHJIIJJJJJGI RG:Z:HiMom.1 QX:Z:#### RX:Z:ATAA
+HiMom:1:1201:1341:2116 141 * 0 0 * * 0 0 CAGCGAGACTGGCAACTTAAA ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:ATAA
+HiMom:1:2101:1063:2206 77 * 0 0 * * 0 0 TCCTATTCGCCTACACAATTCTCCG CCCFFFFFHHHHHJJJJJJJHJJJJ RG:Z:HiMom.1 QX:Z:#1=D RX:Z:NTGC
+HiMom:1:2101:1063:2206 141 * 0 0 * * 0 0 TAGGATGAGGATGGATAGTAA DDFFHHHHHJHIIJHIIIHHJ RG:Z:HiMom.1 QX:Z:#1=D RX:Z:NTGC
+HiMom:1:2101:1325:2083 77 * 0 0 * * 0 0 .CAGAAGAAAGGGCCTTGTCGGAGG #1=DDDDDHHFHDGI at EEHG:?FA8 RG:Z:HiMom.1 QX:Z:#### RX:Z:TGTG
+HiMom:1:2101:1325:2083 141 * 0 0 * * 0 0 CTCTTCCGATCTGGAGAAAAA ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:TGTG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/ACAGTTGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACAGTTGA.sam
new file mode 100644
index 0000000..904a5dd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACAGTTGA.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAGTTGA LB:LN_ACAGTTGA PL:illumina PU:HiMom.1.ACAGTTGA CN:BI
+HiMom:1:2101:1048:2238 77 * 0 0 * * 0 0 .CTGCCGTGTCCTGACTTCTGGAAT #1:B?ADDACF<DCG;EG<FHH at CE RG:Z:HiMom.1 QX:Z:#11A RX:Z:NGTC
+HiMom:1:2101:1048:2238 141 * 0 0 * * 0 0 ACATCGTTGAAGCACTGGATC DDDB<CFFHCHGDBHGIIIII RG:Z:HiMom.1 QX:Z:#11A RX:Z:NGTC
+HiMom:1:2101:1216:2193 77 * 0 0 * * 0 0 TTTTCTTGGCCTCTGTTTTTTTTTT BCCFDFFFHHFFHJIGIJJJJJJJJ RG:Z:HiMom.1 QX:Z:@@@D RX:Z:AGGC
+HiMom:1:2101:1216:2193 141 * 0 0 * * 0 0 ATGACACTGCATTTTAAATAC DDDDHFFHHGGDFHFHIIHGG RG:Z:HiMom.1 QX:Z:@@@D RX:Z:AGGC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/ACCAGTTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACCAGTTG.sam
new file mode 100644
index 0000000..0df9571
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACCAGTTG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACCAGTTG LB:LN_ACCAGTTG PL:illumina PU:HiMom.1.ACCAGTTG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/ACGAAATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACGAAATC.sam
new file mode 100644
index 0000000..1fbb7b4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACGAAATC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACGAAATC LB:LN_ACGAAATC PL:illumina PU:HiMom.1.ACGAAATC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/ACTAAGAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACTAAGAC.sam
new file mode 100644
index 0000000..223b00c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACTAAGAC.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTAAGAC LB:LN_ACTAAGAC PL:illumina PU:HiMom.1.ACTAAGAC CN:BI
+HiMom:1:1101:1259:2152 77 * 0 0 * * 0 0 CACCTATAATCCCAGCTACTCCAGA CCCFFFFFHHHHHJJJJJJIJJJIJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATTT
+HiMom:1:1101:1259:2152 141 * 0 0 * * 0 0 TTATATTTTTTTAGACATAGG FFFFGHHHHJJJJIGIIJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATTT
+HiMom:1:1101:1261:2127 589 * 0 0 * * 0 0 .TGAAATCTGGATAGGCTGGAGTTA #0-@@@################### RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:1101:1261:2127 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTT FFFFHGHHHJJIFDDDDDDDD RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:2101:1021:2209 77 * 0 0 * * 0 0 .GGCCCCACCCTCCTCCAGCACGTC #1=DDFFFHHHHHJJJJJJHIIHFH RG:Z:HiMom.1 QX:Z:#### RX:Z:NNGG
+HiMom:1:2101:1021:2209 141 * 0 0 * * 0 0 AAGGCTGCTAGCTGGCCAGAG @>??@@??@?????????>?@ RG:Z:HiMom.1 QX:Z:#### RX:Z:NNGG
+HiMom:1:2101:1262:2128 589 * 0 0 * * 0 0 AGCAGAAGGGCAAAAGCTGGCTTGA 9;<@:@################### RG:Z:HiMom.1 QX:Z:#### RX:Z:TCTT
+HiMom:1:2101:1262:2128 653 * 0 0 * * 0 0 GTGGTAACTTTTCTGACACCT -9@;@?:8>?4:>?@###### RG:Z:HiMom.1 QX:Z:#### RX:Z:TCTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/ACTGTACC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACTGTACC.sam
new file mode 100644
index 0000000..bcf9179
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACTGTACC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTGTACC LB:LN_ACTGTACC PL:illumina PU:HiMom.1.ACTGTACC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/ACTGTATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACTGTATC.sam
new file mode 100644
index 0000000..f6d2429
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/ACTGTATC.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTGTATC LB:LN_ACTGTATC PL:illumina PU:HiMom.1.ACTGTATC CN:BI
+HiMom:1:1201:1458:2109 77 * 0 0 * * 0 0 .GAGACCATAGAGCGGATGCTTTCA #1=DDDFFHHGHGIJJIGIIJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GATA
+HiMom:1:1201:1458:2109 141 * 0 0 * * 0 0 CGAACACACAAGAACTTTTTT FFFFHHHHHJJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GATA
+HiMom:1:2101:1105:2131 77 * 0 0 * * 0 0 TTGGAACACAGCGGGAATCACAGCA CCCFFFFFHHHHHJIJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:CAGC
+HiMom:1:2101:1105:2131 141 * 0 0 * * 0 0 AGCAGCAACAGCAGAAACATG FFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:CAGC
+HiMom:1:2101:1349:2084 77 * 0 0 * * 0 0 .CAAGTAGCAGTGTCACGCCTTAGC #1=DDBDDADFDDBEH at HC=CEGG@ RG:Z:HiMom.1 QX:Z:<5;? RX:Z:AGTC
+HiMom:1:2101:1349:2084 141 * 0 0 * * 0 0 TGAATCATTGGTGTCTGAAGA ?=>=>>?############## RG:Z:HiMom.1 QX:Z:<5;? RX:Z:AGTC
+HiMom:1:2101:1365:2094 77 * 0 0 * * 0 0 .AAGGTGAAGGCCGGCGCGCTCGCC #1=BDDDFFHHHHJGGGIGFIHIIJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCTC
+HiMom:1:2101:1365:2094 141 * 0 0 * * 0 0 TTCCGATCTTGTGCTCTTCCG FFFDHFHHGJJIIJIJJIHII RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCTC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/AGAAAAGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/AGAAAAGA.sam
new file mode 100644
index 0000000..d40aec2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/AGAAAAGA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGAAAAGA LB:LN_AGAAAAGA PL:illumina PU:HiMom.1.AGAAAAGA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/AGCATGGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/AGCATGGA.sam
new file mode 100644
index 0000000..62400d9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/AGCATGGA.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGCATGGA LB:LN_AGCATGGA PL:illumina PU:HiMom.1.AGCATGGA CN:BI
+HiMom:1:1101:1406:2222 77 * 0 0 * * 0 0 CTCCCCCCGGGCTGAACCAGGGTAC CCCFFDDDDHDFHIIIIIIIII9DG RG:Z:HiMom.1 QX:Z:;?@D RX:Z:GGCT
+HiMom:1:1101:1406:2222 141 * 0 0 * * 0 0 GGACTCCCCTGGTTCTGGGCA DDBD?FHDFGIIIGIGHHIII RG:Z:HiMom.1 QX:Z:;?@D RX:Z:GGCT
+HiMom:1:1201:1291:2158 77 * 0 0 * * 0 0 AGAAGGGGAAAGCCTTCATCTTGGC BCBFFFFFHHHHHJJJJJIIFIJIJ RG:Z:HiMom.1 QX:Z:@CCF RX:Z:CGTG
+HiMom:1:1201:1291:2158 141 * 0 0 * * 0 0 TGCTCTTCCGATCTGATGGGC FFDD?FHHFGEHHIIDHIIII RG:Z:HiMom.1 QX:Z:@CCF RX:Z:CGTG
+HiMom:1:2101:1370:2116 77 * 0 0 * * 0 0 .TGGTGGTCCATAGAGATTTGAAAC #1:4BD7DACF?FCA:4+<ACHIIH RG:Z:HiMom.1 QX:Z:@@@D RX:Z:CACC
+HiMom:1:2101:1370:2116 141 * 0 0 * * 0 0 ATCTGACATCATGTTTGAAAG FFFDFFHDHIGBHHII<HEDB RG:Z:HiMom.1 QX:Z:@@@D RX:Z:CACC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/AGGTAAGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/AGGTAAGG.sam
new file mode 100644
index 0000000..8a9dcc2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/AGGTAAGG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGGTAAGG LB:LN_AGGTAAGG PL:illumina PU:HiMom.1.AGGTAAGG CN:BI
+HiMom:1:1101:1263:2236 589 * 0 0 * * 0 0 CTTTGAAGACATTGTGAGATCTGTA <==A<42 at C+A4A?,2A@=4 at 7A?? RG:Z:HiMom.1 QX:Z:((0@ RX:Z:AGTT
+HiMom:1:1101:1263:2236 653 * 0 0 * * 0 0 CTTCAGTAATTTTAGTACTGC ##################### RG:Z:HiMom.1 QX:Z:((0@ RX:Z:AGTT
+HiMom:1:2101:1054:2162 77 * 0 0 * * 0 0 .CCAGGTGTCTTCCCGGGCCCTGCC #1=DDFBDFHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:#4=D RX:Z:NGGA
+HiMom:1:2101:1054:2162 141 * 0 0 * * 0 0 CAGGGAAGGGAAGGAAGGGTG DFDFHHHHHJIJIIDHHGICG RG:Z:HiMom.1 QX:Z:#4=D RX:Z:NGGA
+HiMom:1:2101:1163:2203 77 * 0 0 * * 0 0 TCTCCATGTGAAACAAGCAAAAAGA CCCFFFFFHHHHGJJJIJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCF RX:Z:TTGG
+HiMom:1:2101:1163:2203 141 * 0 0 * * 0 0 TTCACTTATGTATTTATGAAT DFFFHHHHHJHIIJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCF RX:Z:TTGG
+HiMom:1:2101:1249:2231 77 * 0 0 * * 0 0 GTTATTGATAGGATACTGTACAAAC @BCFFFFDHHHHFIJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@@@F RX:Z:TCTC
+HiMom:1:2101:1249:2231 141 * 0 0 * * 0 0 TCGGCCTTCCACTCTAGCATA FFFFFHHGHIJJJGJIIJHIJ RG:Z:HiMom.1 QX:Z:@@@F RX:Z:TCTC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/AGGTCGCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/AGGTCGCA.sam
new file mode 100644
index 0000000..76eae3a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/AGGTCGCA.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGGTCGCA LB:LN_AGGTCGCA PL:illumina PU:HiMom.1.AGGTCGCA CN:BI
+HiMom:1:1101:1150:2228 77 * 0 0 * * 0 0 GCTACTCAGTAGACAGTCCCACCCT @@CADDDDFCFHHIIIIGGIIGGGI RG:Z:HiMom.1 QX:Z:8?=D RX:Z:ATGG
+HiMom:1:1101:1150:2228 141 * 0 0 * * 0 0 GAGGCGATTCCTAGGGGGTTG D8;@BH6DHD<FGGGEIGHIG RG:Z:HiMom.1 QX:Z:8?=D RX:Z:ATGG
+HiMom:1:1101:1491:2120 77 * 0 0 * * 0 0 .GGCAGGTGCCCCCACTTGACTCTC #1?DFFFFGHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GGCC
+HiMom:1:1101:1491:2120 141 * 0 0 * * 0 0 AGGCTGAACTTCTGAGCTGCT FFFFHHHGHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GGCC
+HiMom:1:1201:1190:2194 77 * 0 0 * * 0 0 AACCTGGCGCTAAACCATTCGTAGA CCCFFFFFHHHHHJJJJJJJJIJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAA
+HiMom:1:1201:1190:2194 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAA
+HiMom:1:2101:1188:2195 77 * 0 0 * * 0 0 TTAGACCGTCGTGAGACAGGTTAGT @CCFFFFFHHHHHJJJJJIIEHIJH RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCAC
+HiMom:1:2101:1188:2195 141 * 0 0 * * 0 0 ATACACCAAATGTCTGAACCT FFFFHHHHHJJJHIJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCAC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/ATTATCAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/ATTATCAA.sam
new file mode 100644
index 0000000..0bbf905
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/ATTATCAA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ATTATCAA LB:LN_ATTATCAA PL:illumina PU:HiMom.1.ATTATCAA CN:BI
+HiMom:1:1101:1100:2207 77 * 0 0 * * 0 0 ACGACAGACGTTCTTTCTTTGCTGC CCCFFFFFHHFHHJIJJJJJHIJJH RG:Z:HiMom.1 QX:Z:#### RX:Z:AGGC
+HiMom:1:1101:1100:2207 141 * 0 0 * * 0 0 T............G....... ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:AGGC
+HiMom:1:1101:1157:2135 77 * 0 0 * * 0 0 .GGACATTGTAATCATTTCTTACAA #1=DD?DDHHHHHGGHIIIIIIIII RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTA
+HiMom:1:1101:1157:2135 141 * 0 0 * * 0 0 AAGTCTTAATCAAAGATGATA FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTA
+HiMom:1:1101:1269:2170 77 * 0 0 * * 0 0 ACAGTGTGGGAGGCAGACGAAGAGA @@@DDDDDFA:C at EGA?FD<FFHII RG:Z:HiMom.1 QX:Z:@@<A RX:Z:TTCC
+HiMom:1:1101:1269:2170 141 * 0 0 * * 0 0 AAGCCTGTGCTTTAAGGAAAA DBDBDF8DDCFH at GIE@@GGH RG:Z:HiMom.1 QX:Z:@@<A RX:Z:TTCC
+HiMom:1:1201:1018:2217 589 * 0 0 * * 0 0 .TTTCTCTGGGCGCAAAGATGTTCA #07;8=8<<99(:=@@/@7>>6=?> RG:Z:HiMom.1 QX:Z:#### RX:Z:NNNN
+HiMom:1:1201:1018:2217 653 * 0 0 * * 0 0 ..................... ##################### RG:Z:HiMom.1 XN:i:1 QX:Z:#### RX:Z:NNNN
+HiMom:1:1201:1118:2198 77 * 0 0 * * 0 0 CAAGTGTACAGGATTAGACTGGGTT BCCFDEBDHHHHHIJJJGIIIJJGH RG:Z:HiMom.1 QX:Z:C at CF RX:Z:AATA
+HiMom:1:1201:1118:2198 141 * 0 0 * * 0 0 AACTTTATTAAAGCAGTTAAA FFFFHDHHHGIIIJJJIJJJJ RG:Z:HiMom.1 QX:Z:C at CF RX:Z:AATA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/ATTCCTCT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/ATTCCTCT.sam
new file mode 100644
index 0000000..856cd22
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/ATTCCTCT.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ATTCCTCT LB:LN_ATTCCTCT PL:illumina PU:HiMom.1.ATTCCTCT CN:BI
+HiMom:1:1101:1309:2210 77 * 0 0 * * 0 0 ACACCAACCACCCAACTATCTATAA CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@@?D RX:Z:AGTG
+HiMom:1:1101:1309:2210 141 * 0 0 * * 0 0 GGCTAGGGCATTTTTAATCTT FFDFHHHDFHJIJJIJGIIIJ RG:Z:HiMom.1 QX:Z:@@?D RX:Z:AGTG
+HiMom:1:1201:1018:2133 77 * 0 0 * * 0 0 .AAAACTTGAGGATGCTATGCAAGC #1:B:ADDDDDDDEEAEBF9FFEBF RG:Z:HiMom.1 QX:Z:#### RX:Z:NNNN
+HiMom:1:1201:1018:2133 141 * 0 0 * * 0 0 ..................... ##################### RG:Z:HiMom.1 XN:i:1 QX:Z:#### RX:Z:NNNN
+HiMom:1:1201:1073:2225 77 * 0 0 * * 0 0 GGGGCTGAGACCTTTGCTGATGGTG @@@FFFFFHHHGHJJJJJIIIGICH RG:Z:HiMom.1 QX:Z:@BBD RX:Z:CGTG
+HiMom:1:1201:1073:2225 141 * 0 0 * * 0 0 TGCTCTTCCGATCTGGAGGGT FFFFHHHHHJJJJJJJJJJJ: RG:Z:HiMom.1 QX:Z:@BBD RX:Z:CGTG
+HiMom:1:1201:1242:2207 77 * 0 0 * * 0 0 ATGGCAAAGTGGTGTCTGAGACCAA BCCFFFFFGHHHHHIIIJFHIJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATCT
+HiMom:1:1201:1242:2207 141 * 0 0 * * 0 0 TTTATTGGCCTCCTGCTCCCC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATCT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CAACTCTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CAACTCTC.sam
new file mode 100644
index 0000000..02764d3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CAACTCTC.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAACTCTC LB:LN_CAACTCTC PL:illumina PU:HiMom.1.CAACTCTC CN:BI
+HiMom:1:1101:1140:2120 77 * 0 0 * * 0 0 .CCCCAACATTCTAATTATGCCTCA #1:BDFFDHFFDFIJJJIIJIIIII RG:Z:HiMom.1 QX:Z:@@@D RX:Z:TTTT
+HiMom:1:1101:1140:2120 141 * 0 0 * * 0 0 TTTTTTTTTAACTTTGCAAAT DDDDHHHHFB at 9FHI@BFH@@ RG:Z:HiMom.1 QX:Z:@@@D RX:Z:TTTT
+HiMom:1:1101:1328:2225 77 * 0 0 * * 0 0 GAAATGCATCTGTCTTAGAAACTGG ??@=BDDDFDD<<,<2:C<F:FFEA RG:Z:HiMom.1 QX:Z:#### RX:Z:AGGA
+HiMom:1:1101:1328:2225 141 * 0 0 * * 0 0 AATTAGGACTTACCTGACATA ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:AGGA
+HiMom:1:1201:1127:2112 589 * 0 0 * * 0 0 .GTCAAGGATGTTCGTCGTGGCAAC #1=BDDDDDDDDDID<AE?@<CEEE RG:Z:HiMom.1 QX:Z:@<@? RX:Z:TAAT
+HiMom:1:1201:1127:2112 653 * 0 0 * * 0 0 CACCTGAGCAGTGAAGCCAGC BDDDHD?FDBHI?AHGGGDFH RG:Z:HiMom.1 QX:Z:@<@? RX:Z:TAAT
+HiMom:1:1201:1452:2143 77 * 0 0 * * 0 0 TATCCCCTCTAAGACGGACCTGGGT CCCFFFFFHHHHHJJIIIJJJJJJG RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:1201:1452:2143 141 * 0 0 * * 0 0 AGTCTTAGCATTTACTTTCCC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:1201:1486:2146 589 * 0 0 * * 0 0 GTTCTCTGTCCCCAGGTCCTGTCTC ===A7<7222<<=C=?+<7>@?ACB RG:Z:HiMom.1 QX:Z:<<<@ RX:Z:TTTT
+HiMom:1:1201:1486:2146 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTGGGC ??@??@???????######## RG:Z:HiMom.1 QX:Z:<<<@ RX:Z:TTTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CAATAGAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CAATAGAC.sam
new file mode 100644
index 0000000..f800d53
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CAATAGAC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAATAGAC LB:LN_CAATAGAC PL:illumina PU:HiMom.1.CAATAGAC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CAATAGTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CAATAGTC.sam
new file mode 100644
index 0000000..ec89815
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CAATAGTC.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAATAGTC LB:LN_CAATAGTC PL:illumina PU:HiMom.1.CAATAGTC CN:BI
+HiMom:1:1101:1316:2126 77 * 0 0 * * 0 0 .AAAAAAAAAAAAAAAAAAAAAAAA #1BDFFFFHHHHHJJJJFDDDDDDD RG:Z:HiMom.1 XN:i:1 QX:Z:CCCF RX:Z:TCTT
+HiMom:1:1101:1316:2126 141 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTT FFFFHHHHHJJJJHFDDDDDD RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TCTT
+HiMom:1:1101:1399:2128 77 * 0 0 * * 0 0 .TGCCCTTCGTCCTGGGAAACGGGG #1BDFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAA
+HiMom:1:1101:1399:2128 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC FFFFHHHHHIJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAA
+HiMom:1:1201:1054:2151 77 * 0 0 * * 0 0 .TAGTGCTGGGCACTAAGTAATACC #4=DDDFFHHHHHJJJJJHIJJJJJ RG:Z:HiMom.1 QX:Z:CBCF RX:Z:GTCA
+HiMom:1:1201:1054:2151 141 * 0 0 * * 0 0 GGCACTGAGAATATATGGGTG FFFFHHHHHJJJJJJJJJJEG RG:Z:HiMom.1 QX:Z:CBCF RX:Z:GTCA
+HiMom:1:1201:1345:2181 77 * 0 0 * * 0 0 GGATAATCCTATTTATTACCTCAGA BBBDDFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATAC
+HiMom:1:1201:1345:2181 141 * 0 0 * * 0 0 GGATGTGTTTAGGAGTGGGAC FFFFHHHHHIIJJHJFHIJIJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATAC
+HiMom:1:1201:1392:2184 77 * 0 0 * * 0 0 TTTCAGATTGGTCATTGTTAGTGTA ??@BDDDEHBHADHHIIEHDHFHFF RG:Z:HiMom.1 QX:Z:@@BF RX:Z:ATCT
+HiMom:1:1201:1392:2184 141 * 0 0 * * 0 0 TTATTCATTTGTATGATCTTA FFFFHFFHFHIHIIJIJJJJI RG:Z:HiMom.1 QX:Z:@@BF RX:Z:ATCT
+HiMom:1:2101:1172:2152 589 * 0 0 * * 0 0 AACACGGACAAAGGAGTCTAACACG <<<??8@@################# RG:Z:HiMom.1 QX:Z:#### RX:Z:ATCG
+HiMom:1:2101:1172:2152 653 * 0 0 * * 0 0 TTTCTGGGGACTAGTGAGGCG ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:ATCG
+HiMom:1:2101:1491:2093 77 * 0 0 * * 0 0 .CTATGCCGATCGGGTGTCCGCACT #1=DDDDDHHFHHIIEHHHBGHGII RG:Z:HiMom.1 QX:Z:BCCD RX:Z:AGAG
+HiMom:1:2101:1491:2093 141 * 0 0 * * 0 0 ACGGGGTCTCGCTATGTTGCC FFFFHHHHHJIIJJJJIJIJJ RG:Z:HiMom.1 QX:Z:BCCD RX:Z:AGAG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CAGCGGAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CAGCGGAT.sam
new file mode 100644
index 0000000..c9d3cfe
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CAGCGGAT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAGCGGAT LB:LN_CAGCGGAT PL:illumina PU:HiMom.1.CAGCGGAT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CAGCGGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CAGCGGTA.sam
new file mode 100644
index 0000000..6ac0c2b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CAGCGGTA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAGCGGTA LB:LN_CAGCGGTA PL:illumina PU:HiMom.1.CAGCGGTA CN:BI
+HiMom:1:1101:1420:2213 77 * 0 0 * * 0 0 TACCTGGTTGATCCTGCCAGTAGCA @@CFFFFDDHHGHJGGHIJJIHGBH RG:Z:HiMom.1 QX:Z:@CCF RX:Z:TTCA
+HiMom:1:1101:1420:2213 141 * 0 0 * * 0 0 CTGTACCGGCCGTGCGTACTT FFFDHHHFGIJJJJJJGHIGG RG:Z:HiMom.1 QX:Z:@CCF RX:Z:TTCA
+HiMom:1:1201:1364:2113 77 * 0 0 * * 0 0 .CACTCATTTTCTTATGTGGGATAT #1=DDFDFHHHHHIJJIFHIIHHHI RG:Z:HiMom.1 QX:Z:#### RX:Z:TAAA
+HiMom:1:1201:1364:2113 141 * 0 0 * * 0 0 GAGAGCCAGTGGAGTTACGAC ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:TAAA
+HiMom:1:2101:1072:2170 77 * 0 0 * * 0 0 ATCACCGCACTCATTTCCCGCTTCC CCCFFFFFHHHACEEGHIIBHIIII RG:Z:HiMom.1 QX:Z:#4=B RX:Z:NGGG
+HiMom:1:2101:1072:2170 141 * 0 0 * * 0 0 GAGACAGAGAGGATCAGAAGT DDFDHHDFHEGFEGGIJIIIG RG:Z:HiMom.1 QX:Z:#4=B RX:Z:NGGG
+HiMom:1:2101:1123:2095 77 * 0 0 * * 0 0 .TGGACAACATGTTCGAGAGCTACA #1=BBDDDFFFFDGFGIG?F;HHFI RG:Z:HiMom.1 QX:Z:@@@F RX:Z:TCCG
+HiMom:1:2101:1123:2095 141 * 0 0 * * 0 0 CCTCCAGCTTCAGCTTCTCCT DDFFHHHHHJHGGJIJJJEHH RG:Z:HiMom.1 QX:Z:@@@F RX:Z:TCCG
+HiMom:1:2101:1151:2236 589 * 0 0 * * 0 0 TTAAAGAGGTTCAGGGATGCAGAGT ######################### RG:Z:HiMom.1 QX:Z:#### RX:Z:TTTG
+HiMom:1:2101:1151:2236 653 * 0 0 * * 0 0 AAGCCTCTTTATCCTTGGCAT ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:TTTG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CCAACATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CCAACATT.sam
new file mode 100644
index 0000000..596982f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CCAACATT.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCAACATT LB:LN_CCAACATT PL:illumina PU:HiMom.1.CCAACATT CN:BI
+HiMom:1:1101:1083:2193 77 * 0 0 * * 0 0 TTCTACCTCACCTTAGGGAGAAGAC @@@DDBDDD>F><C<4CG?EHGHIG RG:Z:HiMom.1 QX:Z:19;3 RX:Z:AGGC
+HiMom:1:1101:1083:2193 141 * 0 0 * * 0 0 T.................... ##################### RG:Z:HiMom.1 QX:Z:19;3 RX:Z:AGGC
+HiMom:1:1101:1175:2197 77 * 0 0 * * 0 0 CCCCTGAGGACACCATCCCACTCCA CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:BC at F RX:Z:AAGA
+HiMom:1:1101:1175:2197 141 * 0 0 * * 0 0 GCTGGGGAACATCCAGAAAGG FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:BC at F RX:Z:AAGA
+HiMom:1:1201:1138:2227 589 * 0 0 * * 0 0 GCTGACACAATCTCTTCCGCCTGGT ######################### RG:Z:HiMom.1 QX:Z:=1=A RX:Z:GACA
+HiMom:1:1201:1138:2227 653 * 0 0 * * 0 0 AATATAGGAAATAGAAGCTAT =AAA,2?4>7C<<4<A+3<AB RG:Z:HiMom.1 QX:Z:=1=A RX:Z:GACA
+HiMom:1:1201:1260:2165 77 * 0 0 * * 0 0 GGACACGGACAGGATTGACAGATTG BCBFFFFFHHHHHHIIJHIIIFHIJ RG:Z:HiMom.1 QX:Z:@@@F RX:Z:ATCT
+HiMom:1:1201:1260:2165 141 * 0 0 * * 0 0 GATCTAAGTTGGGGGACGCCG FDFFHHHHHJJJIJIIIGIJJ RG:Z:HiMom.1 QX:Z:@@@F RX:Z:ATCT
+HiMom:1:1201:1281:2133 77 * 0 0 * * 0 0 .GGAAATCCAGAAAACATAGAAGAT #1=DDFFFHHHHHIJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCAA
+HiMom:1:1201:1281:2133 141 * 0 0 * * 0 0 CAAAATTTCATATGACTTAGC FFFFHHHHHJJIIIHICHIIJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCAA
+HiMom:1:1201:1331:2162 77 * 0 0 * * 0 0 ACGCTCGGCTAATTTTTGTATTTTT @CCFFFDFHHHHHIJJJJHIJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TAAT
+HiMom:1:1201:1331:2162 141 * 0 0 * * 0 0 CCCAGTACTTTGGGAGGCCAA FFFFHHHHHJJJJIJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TAAT
+HiMom:1:2101:1186:2093 77 * 0 0 * * 0 0 .CGACCATAAACGATGCCGACCGGC #4=DFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:#### RX:Z:AATG
+HiMom:1:2101:1186:2093 141 * 0 0 * * 0 0 TTGGGAGGACAATGATGGAAA ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:AATG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CCAGCACC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CCAGCACC.sam
new file mode 100644
index 0000000..dc6d359
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CCAGCACC.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCAGCACC LB:LN_CCAGCACC PL:illumina PU:HiMom.1.CCAGCACC CN:BI
+HiMom:1:1101:1212:2230 77 * 0 0 * * 0 0 TTTCTATTAGCTCTTAGTAAGATTA CCCFFFFFHHHHHJJJIJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:1101:1212:2230 141 * 0 0 * * 0 0 AGCTTTATTGGGGAGGGGGTG FFFFHHGHHJJJJGJJJJJDF RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:1201:1204:2228 77 * 0 0 * * 0 0 CCGATACGCTGAGTGTGGTTTGCGG CCCFFFFFHHHFHEGGHIHIJJJJJ RG:Z:HiMom.1 QX:Z:@?@F RX:Z:TCTT
+HiMom:1:1201:1204:2228 141 * 0 0 * * 0 0 CTTGTCGATGAGGAACTTGGT FFFFDHHGHJIJJGHIIJJJH RG:Z:HiMom.1 QX:Z:@?@F RX:Z:TCTT
+HiMom:1:2101:1100:2085 77 * 0 0 * * 0 0 .CACATGGATGAGGAGAATGAGGAT #1=DDFFFFHHHHJHIGIHHHIJEH RG:Z:HiMom.1 QX:Z:@@@D RX:Z:ATCT
+HiMom:1:2101:1100:2085 141 * 0 0 * * 0 0 TGATCTCCTCCTTCTTGGCCT DDDDHHFHFEIIIIHHBAHBG RG:Z:HiMom.1 QX:Z:@@@D RX:Z:ATCT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CCATGCGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CCATGCGT.sam
new file mode 100644
index 0000000..29ae6d9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CCATGCGT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCATGCGT LB:LN_CCATGCGT PL:illumina PU:HiMom.1.CCATGCGT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CGCCTTCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CGCCTTCC.sam
new file mode 100644
index 0000000..78e3f4f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CGCCTTCC.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CGCCTTCC LB:LN_CGCCTTCC PL:illumina PU:HiMom.1.CGCCTTCC CN:BI
+HiMom:1:1201:1122:2227 77 * 0 0 * * 0 0 AGAAGACGAGGCTGAGAGTGACATC @@@FFFFFHHHDHJGHGHCHHJJIJ RG:Z:HiMom.1 QX:Z:@@@F RX:Z:GTCA
+HiMom:1:1201:1122:2227 141 * 0 0 * * 0 0 TATAAGGCCCAGTCCAAGGAA FFFFHHHGGIJIGGIJFIJII RG:Z:HiMom.1 QX:Z:@@@F RX:Z:GTCA
+HiMom:1:1201:1160:2109 77 * 0 0 * * 0 0 .AGAAGCCTTTGCACCCTGGGAGGA #1=DDDFFHHHHHJJJJJJJJIIJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAT
+HiMom:1:1201:1160:2109 141 * 0 0 * * 0 0 CCTTCCCATGCCACCAACTCG FFFFGHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CGCTATGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CGCTATGT.sam
new file mode 100644
index 0000000..59c4e1a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CGCTATGT.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CGCTATGT LB:LN_CGCTATGT PL:illumina PU:HiMom.1.CGCTATGT CN:BI
+HiMom:1:1101:1291:2150 77 * 0 0 * * 0 0 CGTGGGGAACCTGGCGCTAAACCAT @BBFFFFFHHHHHJJJJIJJJJJIJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAA
+HiMom:1:1101:1291:2150 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC FFFFHHFHHIJJJIIIGIJIJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAA
+HiMom:1:1101:1314:2233 77 * 0 0 * * 0 0 GTTTATTGGGGCATTCCTTATCCCA @??DDDDBDHF>FCHGGGBFAAED9 RG:Z:HiMom.1 QX:Z:@@<D RX:Z:AGGA
+HiMom:1:1101:1314:2233 141 * 0 0 * * 0 0 AAGTTGGGCTGACCTGACAGA DD;=FBFADBCGDEH?F;FCG RG:Z:HiMom.1 QX:Z:@@<D RX:Z:AGGA
+HiMom:1:1101:1441:2148 77 * 0 0 * * 0 0 ACTTTCACCGCTACACGACCGGGGG CCCFFFFFHGFFHIIFIHJIGGII> RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:1101:1441:2148 141 * 0 0 * * 0 0 GGCTCTAGAGGGGGTAGAGGG FFFFHHDFBHIIJJ1?FGHIJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:1201:1043:2246 77 * 0 0 * * 0 0 .TTCTCGGCTGTCATGTGCAACATT #1=DDBDFHHHDFFBHGHGHIIJEH RG:Z:HiMom.1 QX:Z:#0;@ RX:Z:NGCA
+HiMom:1:1201:1043:2246 141 * 0 0 * * 0 0 TCATTTC..GCTTCTCTCTGT @??@=@>##22=;@??><@?? RG:Z:HiMom.1 QX:Z:#0;@ RX:Z:NGCA
+HiMom:1:1201:1134:2144 77 * 0 0 * * 0 0 TGCCAGGAAGTGTTTTTTCTGGGTC @CCFFEFFHHFFFGIJJJJJJJJGH RG:Z:HiMom.1 QX:Z:B@?D RX:Z:AGTG
+HiMom:1:1201:1134:2144 141 * 0 0 * * 0 0 TGAGTAATGGTTGAGAGGTGG DDFFFHHGHJHHGFIHHIFGI RG:Z:HiMom.1 QX:Z:B@?D RX:Z:AGTG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CTAACTCG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CTAACTCG.sam
new file mode 100644
index 0000000..182aae0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CTAACTCG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTAACTCG LB:LN_CTAACTCG PL:illumina PU:HiMom.1.CTAACTCG CN:BI
+HiMom:1:1101:1363:2138 77 * 0 0 * * 0 0 .GTCTGGCCTGCACAGACATCCTAC #1=DDFFFHHHHHJJJIJJIJJJIJ RG:Z:HiMom.1 QX:Z:C@@F RX:Z:GTTC
+HiMom:1:1101:1363:2138 141 * 0 0 * * 0 0 TTAAACCTGTTAGAACTTCTG FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:C@@F RX:Z:GTTC
+HiMom:1:1201:1393:2143 77 * 0 0 * * 0 0 TGGTTGATCCTGCCAGTAGCATATG @@@ADADDFHFFDBHE?G at HIIIEE RG:Z:HiMom.1 QX:Z:C at CF RX:Z:GATA
+HiMom:1:1201:1393:2143 141 * 0 0 * * 0 0 AATGCACGCATCCCCCCCGCG FFFFGGHHHHJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:C at CF RX:Z:GATA
+HiMom:1:2101:1273:2119 589 * 0 0 * * 0 0 .AGATAAGAGTCCACACAGTTGAGT #11AAAAA<A?4=C=7?733<ACA3 RG:Z:HiMom.1 QX:Z:>=>< RX:Z:ATGA
+HiMom:1:2101:1273:2119 653 * 0 0 * * 0 0 TGGATCTTCTCTAACTTGTCA AAAAA+2AA?CB4@@ABB3?A RG:Z:HiMom.1 QX:Z:>=>< RX:Z:ATGA
+HiMom:1:2101:1414:2098 77 * 0 0 * * 0 0 .AGGACATCGATAAAGGCGAGGTGT #1=DDFFFHHHHHJJJJJJJJJHHG RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTGG
+HiMom:1:2101:1414:2098 141 * 0 0 * * 0 0 GGCCGGTGCCGTCGGGCCCAA FFFFHHHHGJJIJJJJJJJIJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTGG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CTATGCGC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CTATGCGC.sam
new file mode 100644
index 0000000..2a5a21c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CTATGCGC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTATGCGC LB:LN_CTATGCGC PL:illumina PU:HiMom.1.CTATGCGC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CTATGCGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CTATGCGT.sam
new file mode 100644
index 0000000..6fa5308
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CTATGCGT.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTATGCGT LB:LN_CTATGCGT PL:illumina PU:HiMom.1.CTATGCGT CN:BI
+HiMom:1:1201:1083:2121 77 * 0 0 * * 0 0 .AGAACTGGCGCTGCGGGATGAACC #1=BDFFFHHHHHJJJJJHIJIJJJ RG:Z:HiMom.1 QX:Z:#### RX:Z:ACAC
+HiMom:1:1201:1083:2121 141 * 0 0 * * 0 0 ACAACACCACCGCCCTCCCCC ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:ACAC
+HiMom:1:1201:1185:2143 77 * 0 0 * * 0 0 ATCTGCCTGGTTCGGCCCGCCTGCC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCF RX:Z:GCTG
+HiMom:1:1201:1185:2143 141 * 0 0 * * 0 0 AAGGCCCGTGGGCCAGAGGTG FFFFHHHHHJJJJJJJJJJHI RG:Z:HiMom.1 QX:Z:@CCF RX:Z:GCTG
+HiMom:1:1201:1219:2115 77 * 0 0 * * 0 0 .TATAGTGGAGGCCGGAGCAGGAAC #1:DABADHHHFHIIIGGHGIIIII RG:Z:HiMom.1 QX:Z:???D RX:Z:TGGG
+HiMom:1:1201:1219:2115 141 * 0 0 * * 0 0 AGTAGTTCCCTGCTAAGGGAG BDBDADDDDIEID:AFFD:?8 RG:Z:HiMom.1 QX:Z:???D RX:Z:TGGG
+HiMom:1:1201:1472:2121 589 * 0 0 * * 0 0 .TAAAGTGTGAACAAGGAAGGTCAT #07>@<9=@################ RG:Z:HiMom.1 QX:Z:=+=? RX:Z:GTGT
+HiMom:1:1201:1472:2121 653 * 0 0 * * 0 0 GCTCTTCCGATCTGGAGGATG ?A4A==A at 7A<?######### RG:Z:HiMom.1 QX:Z:=+=? RX:Z:GTGT
+HiMom:1:2101:1013:2146 77 * 0 0 * * 0 0 .ACACTGCTGCAGATGACAAGCAGC #4BDFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:#### RX:Z:NNNN
+HiMom:1:2101:1013:2146 141 * 0 0 * * 0 0 CGCTAGAACCAACTTATTCAT 24=?@@?@?@@?@@@@@@?@@ RG:Z:HiMom.1 QX:Z:#### RX:Z:NNNN
+HiMom:1:2101:1231:2208 77 * 0 0 * * 0 0 ACGCCGCAAGTCAGAGCCCCCCAGA @@@DDDFFFFB:DBBEBEFDHBDDB RG:Z:HiMom.1 QX:Z:@@;1 RX:Z:AGCC
+HiMom:1:2101:1231:2208 141 * 0 0 * * 0 0 AGTGTTGGTGTGTTGACTGTT ADABCF;BF<AACGCHEBHC< RG:Z:HiMom.1 QX:Z:@@;1 RX:Z:AGCC
+HiMom:1:2101:1233:2133 589 * 0 0 * * 0 0 GAGAGAAGCACTCTTGAGCGGGATA 0;(@((@)2@############### RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:2101:1233:2133 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTT FFFFGHHHHJJJFDDDDDDDD RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CTGCGGAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CTGCGGAT.sam
new file mode 100644
index 0000000..33898ee
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CTGCGGAT.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTGCGGAT LB:LN_CTGCGGAT PL:illumina PU:HiMom.1.CTGCGGAT CN:BI
+HiMom:1:2101:1102:2221 77 * 0 0 * * 0 0 TTTCATCTTATTTCATTGGTTTATA CCCFFFFFHHHHHJIJJJJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATAA
+HiMom:1:2101:1102:2221 141 * 0 0 * * 0 0 CTGACTCTACTCAGTAGATTA FFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATAA
+HiMom:1:2101:1126:2082 77 * 0 0 * * 0 0 .GTTTTAGGGGTGCGCAGGAGTCAA #11=A=DD?DF at D@CCGHIEFH at BG RG:Z:HiMom.1 QX:Z:@C at D RX:Z:TCTC
+HiMom:1:2101:1126:2082 141 * 0 0 * * 0 0 TTTCCACCTTGGTCACCTTCC DDFFHHHHHJEGGIHHIJGIH RG:Z:HiMom.1 QX:Z:@C at D RX:Z:TCTC
+HiMom:1:2101:1216:2172 77 * 0 0 * * 0 0 TTTCTTCGCAGGATTTTTCTGAGCC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GGAC
+HiMom:1:2101:1216:2172 141 * 0 0 * * 0 0 TTCTAGGGGATTTAGCGGGGT FFFFHHHHHJJJJJJJJJJJD RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GGAC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/CTGTAATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/CTGTAATC.sam
new file mode 100644
index 0000000..1f6b492
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/CTGTAATC.sam
@@ -0,0 +1,14 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTGTAATC LB:LN_CTGTAATC PL:illumina PU:HiMom.1.CTGTAATC CN:BI
+HiMom:1:1101:1403:2194 77 * 0 0 * * 0 0 CTAAACAGAGAGAAGGTTTCTCTTT CCCFFFFFHHHHHJJJFHIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAT
+HiMom:1:1101:1403:2194 141 * 0 0 * * 0 0 GGTGAAACCCTGTCTCTACTA FFDDHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAT
+HiMom:1:1201:1045:2105 589 * 0 0 * * 0 0 .TAAAGAGAAATCAAGAATACTATT #-4@?(@)@@############### RG:Z:HiMom.1 QX:Z:#0;@ RX:Z:NTTT
+HiMom:1:1201:1045:2105 653 * 0 0 * * 0 0 TTTTTTT..TTTTTTTTTTTT @@@@@@?##0:????????=< RG:Z:HiMom.1 QX:Z:#0;@ RX:Z:NTTT
+HiMom:1:1201:1483:2126 589 * 0 0 * * 0 0 .TGATAAGGTGTTGCTATGTTACCC #1:D?DDDDA??2:<CC4:AEDF>? RG:Z:HiMom.1 QX:Z:@@@D RX:Z:GCAT
+HiMom:1:1201:1483:2126 653 * 0 0 * * 0 0 GCAGCTGGGTGCTGTGATGCA DDBB<DD8F<<CGG?AA?A<F RG:Z:HiMom.1 QX:Z:@@@D RX:Z:GCAT
+HiMom:1:2101:1011:2102 77 * 0 0 * * 0 0 .AAACAAAACTGTAGAACTGTGTAT #1=DDFFFHHHHHJJIJJJIHHHJJ RG:Z:HiMom.1 QX:Z:#### RX:Z:NNNN
+HiMom:1:2101:1011:2102 141 * 0 0 * * 0 0 .TCACACATAATTTTAAAATT #22@?@@??@@@@@??@@@@@ RG:Z:HiMom.1 QX:Z:#### RX:Z:NNNN
+HiMom:1:2101:1245:2154 77 * 0 0 * * 0 0 TCGTTAAGTATATTCTTAGGTATTT CCCFFDFFFHFHHIIJJJJJFJJJI RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACCA
+HiMom:1:2101:1245:2154 141 * 0 0 * * 0 0 ATCAGTAGCACCACTATACAC FFFFHHHHHJJJJJJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACCA
+HiMom:1:2101:1386:2105 77 * 0 0 * * 0 0 .TACTAAAGAAAAAGTTGAAGAACT #1=DDDFFHHHHHJJGHIJJJJIJJ RG:Z:HiMom.1 QX:Z:B@@D RX:Z:AGGA
+HiMom:1:2101:1386:2105 141 * 0 0 * * 0 0 ATTATTCTTCTGCCATAAGGT DFFFHGFHHIJJJJJGIGIJH RG:Z:HiMom.1 QX:Z:B@@D RX:Z:AGGA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GAAAAAAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GAAAAAAA.sam
new file mode 100644
index 0000000..0bfb1f9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GAAAAAAA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAAAAAAA LB:LN_GAAAAAAA PL:illumina PU:HiMom.1.GAAAAAAA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GAACGAT..sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GAACGAT..sam
new file mode 100644
index 0000000..1636a99
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GAACGAT..sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAACGAT. LB:LN_GAACGAT. PL:illumina PU:HiMom.1.GAACGAT. CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GAAGGAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GAAGGAAG.sam
new file mode 100644
index 0000000..6e2cdda
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GAAGGAAG.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAAGGAAG LB:LN_GAAGGAAG PL:illumina PU:HiMom.1.GAAGGAAG CN:BI
+HiMom:1:1101:1338:2175 77 * 0 0 * * 0 0 CCCACCTTCCGGCGGCCGAAGACAC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCTT
+HiMom:1:1101:1338:2175 141 * 0 0 * * 0 0 GTTGGCTTTAACATCCACAAT FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCTT
+HiMom:1:1201:1028:2202 77 * 0 0 * * 0 0 .TCCTGGGAAACGGGGCGCGGCTGG #4BDDDFFHHHHHIJIIJJJJJJIJ RG:Z:HiMom.1 QX:Z:#### RX:Z:NNAA
+HiMom:1:1201:1028:2202 141 * 0 0 * * 0 0 AC.C.T.......GG..TG.. @?################### RG:Z:HiMom.1 QX:Z:#### RX:Z:NNAA
+HiMom:1:2101:1084:2188 77 * 0 0 * * 0 0 TTGCTGCATGGGTTAATTGAGAATA CCCFFFFFHHHHFHHIIJJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TACA
+HiMom:1:2101:1084:2188 141 * 0 0 * * 0 0 AGGTCAAAATCAGCAACAAGT FFFDHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TACA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCAGGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCAGGA.sam
new file mode 100644
index 0000000..9463a4a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCAGGA.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCAGGA LB:LN_GACCAGGA PL:illumina PU:HiMom.1.GACCAGGA CN:BI
+HiMom:1:1101:1089:2172 77 * 0 0 * * 0 0 TTCCAGCATGCGGTTTAAGTAGGAT @CCFDFDBDFBF:<CEBHAFHHICH RG:Z:HiMom.1 QX:Z::<<? RX:Z:TCCG
+HiMom:1:1101:1089:2172 141 * 0 0 * * 0 0 G.................... ?#################### RG:Z:HiMom.1 QX:Z::<<? RX:Z:TCCG
+HiMom:1:1101:1347:2149 77 * 0 0 * * 0 0 GAGCAGATCGGAAGAGCACAGATCG @@@FFDDDHHHHHIJJBGGHJIHEG RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCTC
+HiMom:1:1101:1347:2149 141 * 0 0 * * 0 0 TTCCGATCTGTGCTCTTCCGA FFFFDFHHFIJDGIGGHGIGH RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCTC
+HiMom:1:1201:1095:2146 77 * 0 0 * * 0 0 GCTGAGTCATGTAGTAAGCCTGTGC BB at FDDDFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACTG
+HiMom:1:1201:1095:2146 141 * 0 0 * * 0 0 ACAACACCAAATGCTGCTAAG FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACTG
+HiMom:1:1201:1123:2161 589 * 0 0 * * 0 0 CACTAACTCCTGACCTCAAATAATC ?7?=DD?DD+CDBE>E at EEF@+<CF RG:Z:HiMom.1 QX:Z:===A RX:Z:CGTG
+HiMom:1:1201:1123:2161 653 * 0 0 * * 0 0 TGCTCTTCCGATCTGCATACA AAA8AAAA<AAA)@CBA9>A# RG:Z:HiMom.1 QX:Z:===A RX:Z:CGTG
+HiMom:1:1201:1439:2156 77 * 0 0 * * 0 0 AGCCGCGAGGTGCTGGCGGACTTCC :;1BDDDAA88A<?<E1C:D##### RG:Z:HiMom.1 QX:Z:#### RX:Z:GGAG
+HiMom:1:1201:1439:2156 141 * 0 0 * * 0 0 ATTATTTGCCTTGAAGTAAGC 2<>>@>8@>8;@######### RG:Z:HiMom.1 QX:Z:#### RX:Z:GGAG
+HiMom:1:2101:1207:2084 589 * 0 0 * * 0 0 .TAGATGACCAAAACTTGCAGGGCA #1:A<?@A+7A=?CBCCBCCBAAAA RG:Z:HiMom.1 QX:Z:@@@D RX:Z:TCAC
+HiMom:1:2101:1207:2084 653 * 0 0 * * 0 0 CACTCTTCTGGGCATCCCCTG DEDFHHHHHIJIHHGHGGJJJ RG:Z:HiMom.1 QX:Z:@@@D RX:Z:TCAC
+HiMom:1:2101:1312:2105 77 * 0 0 * * 0 0 .TTCCCTCAGGATAGCTGGCGCTCT #1=DDFFFGHGHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCF RX:Z:GTTG
+HiMom:1:2101:1312:2105 141 * 0 0 * * 0 0 AGAATAGGTTGAGATCGTTTC FFDFHHFHDHIJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:@CCF RX:Z:GTTG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCAGGC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCAGGC.sam
new file mode 100644
index 0000000..71b9f20
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCAGGC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCAGGC LB:LN_GACCAGGC PL:illumina PU:HiMom.1.GACCAGGC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCGTTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCGTTG.sam
new file mode 100644
index 0000000..939f2de
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCGTTG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCGTTG LB:LN_GACCGTTG PL:illumina PU:HiMom.1.GACCGTTG CN:BI
+HiMom:1:1101:1218:2200 77 * 0 0 * * 0 0 GCACCGGAAGAGCACACAGATCGGA CCCFFFFDFHGHHJJIJIJJJJJJI RG:Z:HiMom.1 QX:Z:#### RX:Z:GCTC
+HiMom:1:1101:1218:2200 141 * 0 0 * * 0 0 TTCCGATCTATCTGCTCGTCC 34???3;@############# RG:Z:HiMom.1 QX:Z:#### RX:Z:GCTC
+HiMom:1:1101:1257:2223 77 * 0 0 * * 0 0 TGTATTCGAGAGATCAAAGAGAGAG @@=DDBDD?FFHHEIDBDFCEDBAF RG:Z:HiMom.1 QX:Z::?@D RX:Z:TGCT
+HiMom:1:1101:1257:2223 141 * 0 0 * * 0 0 CTTCCGATCTTTTAGCAAAGC DBDDHFFHDGIGIIJJJGGGI RG:Z:HiMom.1 QX:Z::?@D RX:Z:TGCT
+HiMom:1:1201:1180:2119 77 * 0 0 * * 0 0 .TGAAAGATTTAGAGAGCTTACAAA #1=DDDDDHHHGHJJIIJJJJIJJI RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCTC
+HiMom:1:1201:1180:2119 141 * 0 0 * * 0 0 TAAATTTTGCTTTTCTACAGC FFFFHHHHHJJJJIJIJJIJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCTC
+HiMom:1:2101:1036:2087 77 * 0 0 * * 0 0 .TGTAGTTTCTTTAGGCAAATTTGT #4=BDDDFHHHHHJJJJJJIIJJJI RG:Z:HiMom.1 QX:Z:#4=D RX:Z:NGTC
+HiMom:1:2101:1036:2087 141 * 0 0 * * 0 0 CACTTACGAAGCAAATACTTT DFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:#4=D RX:Z:NGTC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCTAAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCTAAC.sam
new file mode 100644
index 0000000..c4856e6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GACCTAAC.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCTAAC LB:LN_GACCTAAC PL:illumina PU:HiMom.1.GACCTAAC CN:BI
+HiMom:1:1101:1302:2244 77 * 0 0 * * 0 0 GGAAAAGACGGAAAGGTTCTATCTC @C at DFFFDFHHHHJIJHHIJJJJJI RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TGAA
+HiMom:1:1101:1302:2244 141 * 0 0 * * 0 0 TACATATAACAAATGCAAAAA FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TGAA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GATATCCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GATATCCA.sam
new file mode 100644
index 0000000..e2b0f40
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GATATCCA.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GATATCCA LB:LN_GATATCCA PL:illumina PU:HiMom.1.GATATCCA CN:BI
+HiMom:1:1101:1460:2176 77 * 0 0 * * 0 0 AGTCCAGGCTGAGCCCAGGGAAGAA CCCFFFFFHHHHGJIJJIJJHIJJI RG:Z:HiMom.1 QX:Z:#### RX:Z:AGGA
+HiMom:1:1101:1460:2176 141 * 0 0 * * 0 0 AAAAGACACAACAAGTCCAAC ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:AGGA
+HiMom:1:2101:1031:2163 77 * 0 0 * * 0 0 .TTTCCATGGCCGTCACCTTTGGGT #4=DDFFFHHHHHJJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:#### RX:Z:NNAC
+HiMom:1:2101:1031:2163 141 * 0 0 * * 0 0 ATTTGTCACCACTAGCCACCA @?@@@@@@@@@@?@@@@@@@? RG:Z:HiMom.1 QX:Z:#### RX:Z:NNAC
+HiMom:1:2101:1226:2088 77 * 0 0 * * 0 0 .GATCGGAAGAGCACACGTTTGACT #4=DAA=DDFHFHIIBFGHHIG>EG RG:Z:HiMom.1 QX:Z:==?B RX:Z:GCTC
+HiMom:1:2101:1226:2088 141 * 0 0 * * 0 0 TTCCGATCTAGGTAATAGCTA DFFFDCDDHFFFAFHDHIJGJ RG:Z:HiMom.1 QX:Z:==?B RX:Z:GCTC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GCCGTCGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GCCGTCGA.sam
new file mode 100644
index 0000000..01fc03b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GCCGTCGA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GCCGTCGA LB:LN_GCCGTCGA PL:illumina PU:HiMom.1.GCCGTCGA CN:BI
+HiMom:1:1101:1111:2148 77 * 0 0 * * 0 0 GTGGAGACCACCTCCGAGGCCTTGT BBCFFFFFHHHHHJJJIJJJJJJJI RG:Z:HiMom.1 QX:Z:#### RX:Z:GCGA
+HiMom:1:1101:1111:2148 141 * 0 0 * * 0 0 A.A..........GGACGAC. ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:GCGA
+HiMom:1:1101:1221:2143 77 * 0 0 * * 0 0 TTTGGTGGAAATTTTTTGTTATGAT CCCFFBDBHFD?FBFHIIGGIC at EF RG:Z:HiMom.1 QX:Z:@@@F RX:Z:CAAT
+HiMom:1:1101:1221:2143 141 * 0 0 * * 0 0 TGAATGTCTGCACAGCCGCTT FFFDHHHHHJJJIIIJGHIJJ RG:Z:HiMom.1 QX:Z:@@@F RX:Z:CAAT
+HiMom:1:1101:1327:2200 589 * 0 0 * * 0 0 AGGGGGATCCGCCGGGGGACCACAA ######################### RG:Z:HiMom.1 QX:Z:@B at F RX:Z:GTCA
+HiMom:1:1101:1327:2200 653 * 0 0 * * 0 0 TCTGGGCTGTCGACAGGTGTC FFFFHHHHGIJJJJJJIFHHI RG:Z:HiMom.1 QX:Z:@B at F RX:Z:GTCA
+HiMom:1:2101:1122:2136 77 * 0 0 * * 0 0 GTAGGCGCTCAGCAAATACTTGTCG @@@DDDD8?<CACEHHBBHDAAFH@ RG:Z:HiMom.1 QX:Z:???B RX:Z:CTTG
+HiMom:1:2101:1122:2136 141 * 0 0 * * 0 0 CCAGCCTGCAGGCCCCGCGGC BAABDD?DDIID)A:3<EADD RG:Z:HiMom.1 QX:Z:???B RX:Z:CTTG
+HiMom:1:2101:1459:2083 77 * 0 0 * * 0 0 .CACACGCCACACGGAGCACACTTT #4=DDFFFHHHHHJJJJJJJJIIJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATTT
+HiMom:1:2101:1459:2083 141 * 0 0 * * 0 0 CACCAAAATAATCAGAAGGCC FFFDBHGHHIGGIJFJJGGFH RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GCCTAGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GCCTAGCC.sam
new file mode 100644
index 0000000..d54ca9b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GCCTAGCC.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GCCTAGCC LB:LN_GCCTAGCC PL:illumina PU:HiMom.1.GCCTAGCC CN:BI
+HiMom:1:1101:1165:2239 77 * 0 0 * * 0 0 GGCGGAGGCAGCATTTCAGCTGTGA CCCFFDFFHHHHHIJJIGHHHJHHF RG:Z:HiMom.1 QX:Z:#### RX:Z:ATGG
+HiMom:1:1101:1165:2239 141 * 0 0 * * 0 0 AAGTCGAGACAGAAGTGAGAA ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:ATGG
+HiMom:1:1101:1290:2225 77 * 0 0 * * 0 0 CTTGGGCGCATGGTGAGGGAGGGAG @@@FFDDFHDFH??CBEBHHIGDCD RG:Z:HiMom.1 QX:Z:C@@F RX:Z:TCAG
+HiMom:1:1101:1290:2225 141 * 0 0 * * 0 0 TTCACTGGCAAAGACAGTCAC BEDDFHFHGIIICEHGDHBHE RG:Z:HiMom.1 QX:Z:C@@F RX:Z:TCAG
+HiMom:1:1201:1280:2179 77 * 0 0 * * 0 0 TTCAAGGAATCGTCCTGCCTCAGCC BCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@@BF RX:Z:GAGG
+HiMom:1:1201:1280:2179 141 * 0 0 * * 0 0 ACTGCTTGAGTCCAGGAGTTC FDEFGHHHHIFGCHIJJJGGI RG:Z:HiMom.1 QX:Z:@@BF RX:Z:GAGG
+HiMom:1:1201:1300:2137 77 * 0 0 * * 0 0 .TGTAATCCCAGCTCTCAGGGAGGC #1=ADDDDDDDBBA?@AE?E at FE8; RG:Z:HiMom.1 QX:Z:@@?D RX:Z:GCTC
+HiMom:1:1201:1300:2137 141 * 0 0 * * 0 0 TTCCGATCTTTTTTTTAATTT DDDDFDHADEHGIGGED3?FD RG:Z:HiMom.1 QX:Z:@@?D RX:Z:GCTC
+HiMom:1:2101:1023:2237 589 * 0 0 * * 0 0 .TAAACAGCTTCTGCACAGCCAAAG #00@@?>=39>9;<412@?###### RG:Z:HiMom.1 QX:Z:#### RX:Z:NNTT
+HiMom:1:2101:1023:2237 653 * 0 0 * * 0 0 TGTTTGAGTTCCTTGTAGATT =@?>?@???@:>?@??>?;?< RG:Z:HiMom.1 QX:Z:#### RX:Z:NNTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GTAACATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GTAACATC.sam
new file mode 100644
index 0000000..18273b7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GTAACATC.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GTAACATC LB:LN_GTAACATC PL:illumina PU:HiMom.1.GTAACATC CN:BI
+HiMom:1:1101:1188:2237 77 * 0 0 * * 0 0 TCCCCCTCCCTTTTGCGCACACACC @?@DDADDHDHBDH<EFHIIHG?HF RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCTT
+HiMom:1:1101:1188:2237 141 * 0 0 * * 0 0 CCTTCAAGACAGAAGTGAGAA FDDEFHHFFE at FDHHAIAFHG RG:Z:HiMom.1 QX:Z:CCCF RX:Z:GCTT
+HiMom:1:2101:1208:2231 589 * 0 0 * * 0 0 TCACTAAACATCCAAACATCACTTT ######################### RG:Z:HiMom.1 QX:Z:CCCF RX:Z:CTTT
+HiMom:1:2101:1208:2231 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTT FFFFHHHHHJJJHFDDDDDDD RG:Z:HiMom.1 QX:Z:CCCF RX:Z:CTTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/GTCCACAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/GTCCACAG.sam
new file mode 100644
index 0000000..9c36fad
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/GTCCACAG.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GTCCACAG LB:LN_GTCCACAG PL:illumina PU:HiMom.1.GTCCACAG CN:BI
+HiMom:1:1101:1069:2159 77 * 0 0 * * 0 0 TCCCTTACCATCAAATCAATTG.CC CCCFFFFFHHHHHJJJJJJJJJ#3A RG:Z:HiMom.1 QX:Z:<<<@ RX:Z:GACG
+HiMom:1:1101:1069:2159 141 * 0 0 * * 0 0 T.................... ?#################### RG:Z:HiMom.1 QX:Z:<<<@ RX:Z:GACG
+HiMom:1:1201:1486:2109 77 * 0 0 * * 0 0 .CACCTCCTAGCCCCTCACTTCTGT #1=B;BDDHHHGFIIIIIIIIIGGG RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACGT
+HiMom:1:1201:1486:2109 141 * 0 0 * * 0 0 GTGCTCTTCCCGATCTGTATA F?DDFBHHHJJIIDHJIJJJH RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACGT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/N.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/N.sam
new file mode 100644
index 0000000..e1344fe
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/N.sam
@@ -0,0 +1,34 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_N LB:LN_N PL:illumina PU:HiMom.1.N CN:BI
+HiMom:1:1101:1031:2224 589 * 0 0 * * 0 0 .AATA............T....... #0;@@#################### BC:Z:NNNNNNNN RG:Z:HiMom.1 QX:Z:#### RX:Z:NNNN
+HiMom:1:1101:1031:2224 653 * 0 0 * * 0 0 ..................... ##################### BC:Z:NNNNNNNN RG:Z:HiMom.1 XN:i:1 QX:Z:#### RX:Z:NNNN
+HiMom:1:1101:1039:2147 589 * 0 0 * * 0 0 .CCAA.G..GG....ATGTAA.... #4;@@#4##2<####43@@@@#### BC:Z:NNNNNNNN RG:Z:HiMom.1 QX:Z:#### RX:Z:NNNN
+HiMom:1:1101:1039:2147 653 * 0 0 * * 0 0 ..................... ##################### BC:Z:NNNNNNNN RG:Z:HiMom.1 XN:i:1 QX:Z:#### RX:Z:NNNN
+HiMom:1:1101:1046:2175 589 * 0 0 * * 0 0 .TGCC.G.GTT.CG.GGTCTT.... #4;@@#################### BC:Z:NNNNNNNN RG:Z:HiMom.1 QX:Z:#### RX:Z:NNGG
+HiMom:1:1101:1046:2175 653 * 0 0 * * 0 0 A.................... @#################### BC:Z:NNNNNNNN RG:Z:HiMom.1 XN:i:1 QX:Z:#### RX:Z:NNGG
+HiMom:1:1101:1047:2122 589 * 0 0 * * 0 0 .CTAA.G.ACT.TG.GTGTGC.... #0;@@#4#3@@#3@#2<@@@@#### BC:Z:NNNANNNN RG:Z:HiMom.1 QX:Z:#### RX:Z:NNTC
+HiMom:1:1101:1047:2122 653 * 0 0 * * 0 0 A.................... ##################### BC:Z:NNNANNNN RG:Z:HiMom.1 XN:i:1 QX:Z:#### RX:Z:NNTC
+HiMom:1:1101:1048:2197 589 * 0 0 * * 0 0 .CTCC.G.TCA.CA.GTGGAG.... #0;?@#################### BC:Z:NNNCNNNN RG:Z:HiMom.1 QX:Z:#### RX:Z:NNGT
+HiMom:1:1101:1048:2197 653 * 0 0 * * 0 0 G.................... ##################### BC:Z:NNNCNNNN RG:Z:HiMom.1 QX:Z:#### RX:Z:NNGT
+HiMom:1:1101:1065:2193 77 * 0 0 * * 0 0 GAAGTACGCCCTGCCCCTGGTT.GC ?@@DAADAHHFHBEBEGGHG?#### BC:Z:GAACGATN RG:Z:HiMom.1 QX:Z:#### RX:Z:NCTT
+HiMom:1:1101:1065:2193 141 * 0 0 * * 0 0 G.................... ##################### BC:Z:GAACGATN RG:Z:HiMom.1 QX:Z:#### RX:Z:NCTT
+HiMom:1:1101:1162:2207 589 * 0 0 * * 0 0 ACCTTGAGGAGAACATAAGAGCAAA ######################### BC:Z:ACAAAATT RG:Z:HiMom.1 QX:Z:#### RX:Z:TAAA
+HiMom:1:1101:1162:2207 653 * 0 0 * * 0 0 ACTGGGGAAGTTAGAGGAATG ##################### BC:Z:ACAAAATT RG:Z:HiMom.1 QX:Z:#### RX:Z:TAAA
+HiMom:1:1201:1159:2179 589 * 0 0 * * 0 0 GTTAGCACAGATATTGGATGAGTGA ######################### BC:Z:AAAAAAAA RG:Z:HiMom.1 QX:Z:===A RX:Z:TTTT
+HiMom:1:1201:1159:2179 653 * 0 0 * * 0 0 TTTTTATTTTTCTAAATACTT A#################### BC:Z:AAAAAAAA RG:Z:HiMom.1 QX:Z:===A RX:Z:TTTT
+HiMom:1:1201:1414:2174 589 * 0 0 * * 0 0 GCCAAAAAAAAGAACCAGCCCAAGG ######################### BC:Z:AGAAAAGA RG:Z:HiMom.1 QX:Z:@;@1 RX:Z:TTTT
+HiMom:1:1201:1414:2174 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTT BDADF????FFEB>B6=BBBB BC:Z:AGAAAAGA RG:Z:HiMom.1 QX:Z:@;@1 RX:Z:TTTT
+HiMom:1:2101:1040:2208 589 * 0 0 * * 0 0 .ATGCCCACCTCCCTCCTACGCACC ######################### BC:Z:ACGAAATC RG:Z:HiMom.1 QX:Z:#### RX:Z:NCTG
+HiMom:1:2101:1040:2208 653 * 0 0 * * 0 0 ATAGTCACTGAAATGAATTCA >(2 at .22@@############ BC:Z:ACGAAATC RG:Z:HiMom.1 QX:Z:#### RX:Z:NCTG
+HiMom:1:2101:1059:2083 77 * 0 0 * * 0 0 .AAGAGGGGTCAAGAGTTAAACTTA #1=DDFFFHFHHGIGHGHJJJJJJI BC:Z:TACCGTCT RG:Z:HiMom.1 QX:Z:#1=B RX:Z:NGAA
+HiMom:1:2101:1059:2083 141 * 0 0 * * 0 0 TGTCTTAGAAGGATGCTTCTC DDDEHHGHHJJJJJIJJIIJJ BC:Z:TACCGTCT RG:Z:HiMom.1 QX:Z:#1=B RX:Z:NGAA
+HiMom:1:2101:1143:2137 77 * 0 0 * * 0 0 ATGCAGCAGCTGCCACGGAGCACCA CC at FFDFDFHFHHGIDHEHIGJJJJ BC:Z:TCCGTCTA RG:Z:HiMom.1 QX:Z:@@@D RX:Z:GCTC
+HiMom:1:2101:1143:2137 141 * 0 0 * * 0 0 TTCAGATCTAGGGGGAACAGC D?=DCAFFFHIIDG:EFHIII BC:Z:TCCGTCTA RG:Z:HiMom.1 QX:Z:@@@D RX:Z:GCTC
+HiMom:1:2101:1151:2182 589 * 0 0 * * 0 0 TTGTTTTGGCTTATAATGACAAGAA ;;8-2).2())(<6=@8;?4??>>? BC:Z:GAAAAAAA RG:Z:HiMom.1 QX:Z:9<<? RX:Z:TTTT
+HiMom:1:2101:1151:2182 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTA @?@;5=?############## BC:Z:GAAAAAAA RG:Z:HiMom.1 QX:Z:9<<? RX:Z:TTTT
+HiMom:1:2101:1215:2110 77 * 0 0 * * 0 0 .AATATAATTTGGAGACCCTTTGTT #1=DDDDDEDDDDIDDBB3ABAB## BC:Z:AAAAGAAG RG:Z:HiMom.1 QX:Z:#### RX:Z:ATCT
+HiMom:1:2101:1215:2110 141 * 0 0 * * 0 0 TTCCCCCATTAAGAACAGCAA ##################### BC:Z:AAAAGAAG RG:Z:HiMom.1 QX:Z:#### RX:Z:ATCT
+HiMom:1:2101:1285:2105 589 * 0 0 * * 0 0 .GCGGGGAGCCGGGCGTGGAATGCG ######################### BC:Z:TATCTCGG RG:Z:HiMom.1 QX:Z:#### RX:Z:TGTC
+HiMom:1:2101:1285:2105 653 * 0 0 * * 0 0 TATATCAACCAACACCTCTTC ():94:9:???########## BC:Z:TATCTCGG RG:Z:HiMom.1 QX:Z:#### RX:Z:TGTC
+HiMom:1:2101:1450:2134 77 * 0 0 * * 0 0 AGCACGCTGCCGCGGGACCTGCCCA ?@@AD at DDHFH?DGIIIIG at FGFBF BC:Z:ACCAGTTG RG:Z:HiMom.1 QX:Z:CC at F RX:Z:ACAA
+HiMom:1:2101:1450:2134 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC DFDFFDFHFGIIE1CGGHBGE BC:Z:ACCAGTTG RG:Z:HiMom.1 QX:Z:CC at F RX:Z:ACAA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TAAGCACA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TAAGCACA.sam
new file mode 100644
index 0000000..c7c3ce5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TAAGCACA.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TAAGCACA LB:LN_TAAGCACA PL:illumina PU:HiMom.1.TAAGCACA CN:BI
+HiMom:1:1201:1064:2239 77 * 0 0 * * 0 0 CATGCAGCGCAAGTAGGTCTACAAG @@;DFAFFHHHHAHEGHFDGGFABG RG:Z:HiMom.1 QX:Z:8?@: RX:Z:GGGA
+HiMom:1:1201:1064:2239 141 * 0 0 * * 0 0 TGGGAGGGCGATGAGGACTAG DDDACC:FHHGIH<EGDDDFH RG:Z:HiMom.1 QX:Z:8?@: RX:Z:GGGA
+HiMom:1:2101:1258:2092 77 * 0 0 * * 0 0 .CACACACACACTCATTCACAGCTT #1=DDDFFHHHFHJJIJGGGIIGIJ RG:Z:HiMom.1 QX:Z:#### RX:Z:TTAG
+HiMom:1:2101:1258:2092 141 * 0 0 * * 0 0 ACAAAACACCAAAATAAAATA ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:TTAG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TACCGTCT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TACCGTCT.sam
new file mode 100644
index 0000000..04cf526
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TACCGTCT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TACCGTCT LB:LN_TACCGTCT PL:illumina PU:HiMom.1.TACCGTCT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TAGCGGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TAGCGGTA.sam
new file mode 100644
index 0000000..3455237
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TAGCGGTA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TAGCGGTA LB:LN_TAGCGGTA PL:illumina PU:HiMom.1.TAGCGGTA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCAGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCAGCC.sam
new file mode 100644
index 0000000..5f7c0e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCAGCC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCAGCC LB:LN_TATCAGCC PL:illumina PU:HiMom.1.TATCAGCC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCCAGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCCAGG.sam
new file mode 100644
index 0000000..40d47fe
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCCAGG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCCAGG LB:LN_TATCCAGG PL:illumina PU:HiMom.1.TATCCAGG CN:BI
+HiMom:1:1101:1071:2233 77 * 0 0 * * 0 0 TTTGACAGTCTCTGAATGAGAA.GG CCCFFFFFHHHHHJIIIJJJIJ#4A RG:Z:HiMom.1 QX:Z:<<<@ RX:Z:GTTT
+HiMom:1:1101:1071:2233 141 * 0 0 * * 0 0 G.................... @#################### RG:Z:HiMom.1 QX:Z:<<<@ RX:Z:GTTT
+HiMom:1:1201:1140:2125 77 * 0 0 * * 0 0 .TTTCAGTTCAGAGAACTGCAGAAT #1=DBDFDHHHHGJIJJJJJIIIJI RG:Z:HiMom.1 QX:Z:CC at F RX:Z:TTCA
+HiMom:1:1201:1140:2125 141 * 0 0 * * 0 0 TAAATTGGTCTTAGATGTTGC FFFFHHHHFGIJIIIJIJIJJ RG:Z:HiMom.1 QX:Z:CC at F RX:Z:TTCA
+HiMom:1:1201:1236:2187 77 * 0 0 * * 0 0 TTTAAATGGGTAAGAAGCCCGGCTC @BCDDFEFHHDHHJJJJJIJJIJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:CTCC
+HiMom:1:1201:1236:2187 141 * 0 0 * * 0 0 TTAGCGGATTCCGACTTCCAT FFFDHHHHGIJJIGIGIJJGG RG:Z:HiMom.1 QX:Z:CCCF RX:Z:CTCC
+HiMom:1:2101:1133:2239 77 * 0 0 * * 0 0 AGACAGAAGTACGGGAAGGCGAAGA @@@FFFFEHFHHHJJCGDHIIECD@ RG:Z:HiMom.1 QX:Z:?@?D RX:Z:AGCT
+HiMom:1:2101:1133:2239 141 * 0 0 * * 0 0 TTTTGTTTCCTAGCTTGTCTT DFFFHHHHF4ACFHIJHHHGH RG:Z:HiMom.1 QX:Z:?@?D RX:Z:AGCT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCCATG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCCATG.sam
new file mode 100644
index 0000000..a5373e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCCATG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCCATG LB:LN_TATCCATG PL:illumina PU:HiMom.1.TATCCATG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCTCGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCTCGG.sam
new file mode 100644
index 0000000..04096c5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCTCGG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCTCGG LB:LN_TATCTCGG PL:illumina PU:HiMom.1.TATCTCGG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCTGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCTGCC.sam
new file mode 100644
index 0000000..d30a97b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TATCTGCC.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCTGCC LB:LN_TATCTGCC PL:illumina PU:HiMom.1.TATCTGCC CN:BI
+HiMom:1:1101:1267:2209 77 * 0 0 * * 0 0 GAGACGGAGGCCAACGGGGGCCTGG @@CFFFFD8FDHFHIGIBG?@BCDG RG:Z:HiMom.1 QX:Z:=;?D RX:Z:GGCA
+HiMom:1:1101:1267:2209 141 * 0 0 * * 0 0 GAGTCTCCAACAGCCCCGTAC DDD?CCFHAIIIGGIIGE at EG RG:Z:HiMom.1 QX:Z:=;?D RX:Z:GGCA
+HiMom:1:1101:1353:2226 77 * 0 0 * * 0 0 TTGCTTGTCTGTAAAGTATTTTATT @C at DDFFDHHFHFHHIBGG>IHHII RG:Z:HiMom.1 QX:Z:BBBF RX:Z:GTGC
+HiMom:1:1101:1353:2226 141 * 0 0 * * 0 0 TCTTCCGATCTTCAGGTTACC FFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:BBBF RX:Z:GTGC
+HiMom:1:1101:1435:2194 77 * 0 0 * * 0 0 GAGAAAGAACATGACTACAGAGATG CCCFFFFFHHHHHJJJJJJJJJHJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:1101:1435:2194 141 * 0 0 * * 0 0 GTTTTCTTTTACTGAAGTGTA FDFFHHHHHJJJJIHIJHHHJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TTTT
+HiMom:1:1201:1084:2204 77 * 0 0 * * 0 0 GGCCCGTGGACGCCGCCGAAGAAGC CCCFFFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TGGC
+HiMom:1:1201:1084:2204 141 * 0 0 * * 0 0 TCCTCAGGCTCTCATCAGTTG FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:TGGC
+HiMom:1:1201:1142:2242 77 * 0 0 * * 0 0 TGTTGATAGTCCTTCTTATCTTAGT ???DB?==CC2<AC:CC<CFEF<FF RG:Z:HiMom.1 QX:Z:?=?D RX:Z:GTAA
+HiMom:1:1201:1142:2242 141 * 0 0 * * 0 0 AATGTAAAATAATAAAAAATG DDD;AF<DF<FFFFIIIFF@< RG:Z:HiMom.1 QX:Z:?=?D RX:Z:GTAA
+HiMom:1:1201:1187:2100 77 * 0 0 * * 0 0 .GCGGTAATTCCAGCTCCAATAGCG #1:BB2 at DHHFHHIIIIHHIIGHGG RG:Z:HiMom.1 QX:Z:=<=; RX:Z:AAAA
+HiMom:1:1201:1187:2100 141 * 0 0 * * 0 0 AAGAGCCCGCATTGCCGAGAC AA################### RG:Z:HiMom.1 QX:Z:=<=; RX:Z:AAAA
+HiMom:1:1201:1392:2109 77 * 0 0 * * 0 0 .CTGAAGAGGCCAAAGCGCCCTCCA #1=DDFFFHHHHHJJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:BBCF RX:Z:GTCA
+HiMom:1:1201:1392:2109 141 * 0 0 * * 0 0 GACAGGGGGATTTGGGCTGTG FFFFHHHHHHJJJHIJIJJJJ RG:Z:HiMom.1 QX:Z:BBCF RX:Z:GTCA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TCCGTCTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TCCGTCTA.sam
new file mode 100644
index 0000000..8f5dbc6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TCCGTCTA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCCGTCTA LB:LN_TCCGTCTA PL:illumina PU:HiMom.1.TCCGTCTA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TCGCTAGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TCGCTAGA.sam
new file mode 100644
index 0000000..c17cc57
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TCGCTAGA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCGCTAGA LB:LN_TCGCTAGA PL:illumina PU:HiMom.1.TCGCTAGA CN:BI
+HiMom:1:1101:1143:2192 77 * 0 0 * * 0 0 GGAGCGAGTCTGGGTCTCAGCCCCG CCCFFFFFHHHHHJGHIIIHJJJJI RG:Z:HiMom.1 QX:Z:CCCF RX:Z:CGAC
+HiMom:1:1101:1143:2192 141 * 0 0 * * 0 0 AAGTCTGGCTTATCACTCATC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:CGAC
+HiMom:1:1101:1479:2221 77 * 0 0 * * 0 0 TGTAAAGTATGCTGGCTCAGTGTAT BBBFDFFEHHHHHJJJJJJJIJHJJ RG:Z:HiMom.1 QX:Z:@CCF RX:Z:GGGG
+HiMom:1:1101:1479:2221 141 * 0 0 * * 0 0 AAATCTATTTTTATGTAAAAA FFFFHHHHHJIGIJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCF RX:Z:GGGG
+HiMom:1:1201:1312:2112 77 * 0 0 * * 0 0 .TCCCAGCGAACCCGCGTGCAACCT #1=DFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATTT
+HiMom:1:1201:1312:2112 141 * 0 0 * * 0 0 GCAGGAGCCGGCGCAGGTGCA FFFFHHHHHJJJIJJJJGHIJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATTT
+HiMom:1:1201:1416:2128 77 * 0 0 * * 0 0 .ACAGGCGTGGAGGAGGCGGCGGCC #4=DDDFFHHHHHJIGJHFHHFFED RG:Z:HiMom.1 QX:Z:@@@D RX:Z:TTGG
+HiMom:1:1201:1416:2128 141 * 0 0 * * 0 0 TGTGGAGGCGGTGGCGGGATC DDDDHHFHHII:?GGHIIB6? RG:Z:HiMom.1 QX:Z:@@@D RX:Z:TTGG
+HiMom:1:2101:1064:2242 77 * 0 0 * * 0 0 ATGAACAAAGGAAGAATTATGCACG ?;?D;DDDF?;:+<<CFFCHE433A RG:Z:HiMom.1 QX:Z:#### RX:Z:NGGA
+HiMom:1:2101:1064:2242 141 * 0 0 * * 0 0 AAAAGGTTGTCAAGCGTTAAA (<?################## RG:Z:HiMom.1 QX:Z:#### RX:Z:NGGA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TCTGCAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TCTGCAAG.sam
new file mode 100644
index 0000000..035b643
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TCTGCAAG.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCTGCAAG LB:LN_TCTGCAAG PL:illumina PU:HiMom.1.TCTGCAAG CN:BI
+HiMom:1:1201:1042:2174 77 * 0 0 * * 0 0 .GTTGGTGTCTTCATTTTATGTATA #1=DDFDFHHHHHJIJJJHIJHIJJ RG:Z:HiMom.1 QX:Z:#0;@ RX:Z:NTCA
+HiMom:1:1201:1042:2174 141 * 0 0 * * 0 0 GGAAGGC..CAAAAAAAGAAA @@?@?<@##3<@@?@@????? RG:Z:HiMom.1 QX:Z:#0;@ RX:Z:NTCA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TGCAAGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TGCAAGTA.sam
new file mode 100644
index 0000000..81bf39a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TGCAAGTA.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGCAAGTA LB:LN_TGCAAGTA PL:illumina PU:HiMom.1.TGCAAGTA CN:BI
+HiMom:1:1101:1242:2170 77 * 0 0 * * 0 0 ATGGCAGGGCAGAGTTCTGATGAGT CCCFFFFFHHGGGIFHEIIGIIII? RG:Z:HiMom.1 QX:Z:@@@D RX:Z:GGAA
+HiMom:1:1101:1242:2170 141 * 0 0 * * 0 0 GGAAAAGAAGCACAAGTACAT FDFFHHHGHHGIIGJJEHHIG RG:Z:HiMom.1 QX:Z:@@@D RX:Z:GGAA
+HiMom:1:2101:1163:2222 77 * 0 0 * * 0 0 GAGCAGGCAAGGAGGACTTCTTGTT CCCFFFFFGHHHHJJHHIJJJJJIJ RG:Z:HiMom.1 QX:Z:@@@D RX:Z:GAGC
+HiMom:1:2101:1163:2222 141 * 0 0 * * 0 0 GATAATGGTTCTTTTCCTCAC FFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:@@@D RX:Z:GAGC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TGCTGCTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TGCTGCTG.sam
new file mode 100644
index 0000000..b80a1bb
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TGCTGCTG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGCTGCTG LB:LN_TGCTGCTG PL:illumina PU:HiMom.1.TGCTGCTG CN:BI
+HiMom:1:1101:1084:2136 77 * 0 0 * * 0 0 .TCTCACTGTGAATTTGTGGTGGGC #1=DDFFFHHHHHJJJJGIJIJJJJ RG:Z:HiMom.1 QX:Z:<<<@ RX:Z:TTTC
+HiMom:1:1101:1084:2136 141 * 0 0 * * 0 0 T.................... @#################### RG:Z:HiMom.1 QX:Z:<<<@ RX:Z:TTTC
+HiMom:1:1201:1285:2100 77 * 0 0 * * 0 0 .AATGACATGTTTAAAGATGGACTC #1:BDDFFHHFHHGIJIJIIIIGII RG:Z:HiMom.1 QX:Z:@@@D RX:Z:GATC
+HiMom:1:1201:1285:2100 141 * 0 0 * * 0 0 TTTTTTGCTTTGTAGTTATAG FFFFHHHHHIIGIABCFFHBF RG:Z:HiMom.1 QX:Z:@@@D RX:Z:GATC
+HiMom:1:2101:1162:2139 77 * 0 0 * * 0 0 AGAGGTGAAATTCTTGGACCGGCGC @@@DDDDDHFHHHDB:EFHHCAG?D RG:Z:HiMom.1 QX:Z:BCCF RX:Z:ATCG
+HiMom:1:2101:1162:2139 141 * 0 0 * * 0 0 TTTATGGTCGGAACTACGACG FFFFHHHHHIJJJJJJJIJJI RG:Z:HiMom.1 QX:Z:BCCF RX:Z:ATCG
+HiMom:1:2101:1195:2150 77 * 0 0 * * 0 0 CCGAGAGAGTGAGAGCGCTCCTGGG CCCFFFFFHFHHHJJJJIJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:AATT
+HiMom:1:2101:1195:2150 141 * 0 0 * * 0 0 GAACTTCACCACCCAGAGGAA FFFFHHHHHJJJJJJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:AATT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TGTAACTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TGTAACTC.sam
new file mode 100644
index 0000000..8378cf9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TGTAACTC.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGTAACTC LB:LN_TGTAACTC PL:illumina PU:HiMom.1.TGTAACTC CN:BI
+HiMom:1:1201:1421:2154 77 * 0 0 * * 0 0 TGTGTGTGTGGGTGTGTGTATATAT ?@?DDFFFFFHH at GEFCCCHGIGJI RG:Z:HiMom.1 QX:Z:BC at D RX:Z:TGTG
+HiMom:1:1201:1421:2154 141 * 0 0 * * 0 0 CTCTTCCGATCTTGTGCTCTT FFFFHHHHHJJJJFHIHHIJJ RG:Z:HiMom.1 QX:Z:BC at D RX:Z:TGTG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TGTAATCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TGTAATCA.sam
new file mode 100644
index 0000000..77ab6f7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TGTAATCA.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGTAATCA LB:LN_TGTAATCA PL:illumina PU:HiMom.1.TGTAATCA CN:BI
+HiMom:1:1101:1419:2119 77 * 0 0 * * 0 0 .ATGACTATGGTAACTGAAAGAAAA #1:A1BDADBFFDFIIIEEHECACF RG:Z:HiMom.1 QX:Z:#### RX:Z:ACTT
+HiMom:1:1101:1419:2119 141 * 0 0 * * 0 0 TCCTTTTTTGTTTTACTTTAA ##################### RG:Z:HiMom.1 QX:Z:#### RX:Z:ACTT
+HiMom:1:1201:1208:2132 77 * 0 0 * * 0 0 .CCTCAATGAGCGGCACTATGGGGG #1=DDFFFHHHHGJJIJJGHIJGIJ RG:Z:HiMom.1 QX:Z:@@CD RX:Z:CTGT
+HiMom:1:1201:1208:2132 141 * 0 0 * * 0 0 AGAAAGGATGGTCGGGCTCCA FFFFGHFHHJIJJGJIBHJJG RG:Z:HiMom.1 QX:Z:@@CD RX:Z:CTGT
+HiMom:1:1201:1344:2147 77 * 0 0 * * 0 0 TATCCTCCCTACTATGCCTAGAAGG =?@DADEFHBHDFG>EFGDHGFGHD RG:Z:HiMom.1 QX:Z:@<?? RX:Z:ACGA
+HiMom:1:1201:1344:2147 141 * 0 0 * * 0 0 TTAGTTTTAGCATTGGAGTAG DDDDFHHHFGGHHIIIGGAGH RG:Z:HiMom.1 QX:Z:@<?? RX:Z:ACGA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/TTGTCTAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/TTGTCTAT.sam
new file mode 100644
index 0000000..4cf0a92
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/TTGTCTAT.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TTGTCTAT LB:LN_TTGTCTAT PL:illumina PU:HiMom.1.TTGTCTAT CN:BI
+HiMom:1:1101:1219:2164 77 * 0 0 * * 0 0 TCAAGCAGGAGCAGCTAAGTCCTAA CCCFFFFFHHHHHJJJJJJHIJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATCT
+HiMom:1:1101:1219:2164 141 * 0 0 * * 0 0 TATCCACTCCTTCCACTTTGG FFFFHHHHHJJIJJJJJJJIJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ATCT
+HiMom:1:1201:1103:2184 77 * 0 0 * * 0 0 GTAAGAACTACCCTGGGTCCCCGTG @@BFFFFFHHHHHJJJJGIJJJJHI RG:Z:HiMom.1 QX:Z:B at BF RX:Z:AGAA
+HiMom:1:1201:1103:2184 141 * 0 0 * * 0 0 GTTTCAGAATTGTGGCCCCAT FDEFHHHHHJJJGHIJJJJJI RG:Z:HiMom.1 QX:Z:B at BF RX:Z:AGAA
+HiMom:1:1201:1107:2109 77 * 0 0 * * 0 0 .GGGAACCTGGCGCTAAACCATTCG #1=DFFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAA
+HiMom:1:1201:1107:2109 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC FFFFHHGHHJJJJIIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCF RX:Z:ACAA
+HiMom:1:1201:1252:2141 77 * 0 0 * * 0 0 .TTCCCCCCATGTAATTATTGTGAA #1=DDFFFHHHHHJJJJJJJJIJJJ RG:Z:HiMom.1 QX:Z:BCBF RX:Z:AGTT
+HiMom:1:1201:1252:2141 141 * 0 0 * * 0 0 ATTTTGCCTATGTCCAACAAG FFFFGHHHHJIJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:BCBF RX:Z:AGTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/barcode.params b/testdata/picard/illumina/25T8B25T/sams_with_4M/barcode.params
new file mode 100644
index 0000000..6cd8a77
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/barcode.params
@@ -0,0 +1,63 @@
+BARCODE SAMPLE_ALIAS LIBRARY_NAME
+AAAAAAAA SA_AAAAAAAA LN_AAAAAAAA
+AAAAGAAG SA_AAAAGAAG LN_AAAAGAAG
+AACAATGG SA_AACAATGG LN_AACAATGG
+AACGCATT SA_AACGCATT LN_AACGCATT
+ACAAAATT SA_ACAAAATT LN_ACAAAATT
+ACAGGTAT SA_ACAGGTAT LN_ACAGGTAT
+ACAGTTGA SA_ACAGTTGA LN_ACAGTTGA
+ACCAGTTG SA_ACCAGTTG LN_ACCAGTTG
+ACGAAATC SA_ACGAAATC LN_ACGAAATC
+ACTAAGAC SA_ACTAAGAC LN_ACTAAGAC
+ACTGTACC SA_ACTGTACC LN_ACTGTACC
+ACTGTATC SA_ACTGTATC LN_ACTGTATC
+AGAAAAGA SA_AGAAAAGA LN_AGAAAAGA
+AGCATGGA SA_AGCATGGA LN_AGCATGGA
+AGGTAAGG SA_AGGTAAGG LN_AGGTAAGG
+AGGTCGCA SA_AGGTCGCA LN_AGGTCGCA
+ATTATCAA SA_ATTATCAA LN_ATTATCAA
+ATTCCTCT SA_ATTCCTCT LN_ATTCCTCT
+CAACTCTC SA_CAACTCTC LN_CAACTCTC
+CAATAGAC SA_CAATAGAC LN_CAATAGAC
+CAATAGTC SA_CAATAGTC LN_CAATAGTC
+CAGCGGAT SA_CAGCGGAT LN_CAGCGGAT
+CAGCGGTA SA_CAGCGGTA LN_CAGCGGTA
+CCAACATT SA_CCAACATT LN_CCAACATT
+CCAGCACC SA_CCAGCACC LN_CCAGCACC
+CCATGCGT SA_CCATGCGT LN_CCATGCGT
+CGCCTTCC SA_CGCCTTCC LN_CGCCTTCC
+CGCTATGT SA_CGCTATGT LN_CGCTATGT
+CTAACTCG SA_CTAACTCG LN_CTAACTCG
+CTATGCGC SA_CTATGCGC LN_CTATGCGC
+CTATGCGT SA_CTATGCGT LN_CTATGCGT
+CTGCGGAT SA_CTGCGGAT LN_CTGCGGAT
+CTGTAATC SA_CTGTAATC LN_CTGTAATC
+GAAAAAAA SA_GAAAAAAA LN_GAAAAAAA
+GAACGAT. SA_GAACGAT. LN_GAACGAT.
+GAAGGAAG SA_GAAGGAAG LN_GAAGGAAG
+GACCAGGA SA_GACCAGGA LN_GACCAGGA
+GACCAGGC SA_GACCAGGC LN_GACCAGGC
+GACCGTTG SA_GACCGTTG LN_GACCGTTG
+GACCTAAC SA_GACCTAAC LN_GACCTAAC
+GATATCCA SA_GATATCCA LN_GATATCCA
+GCCGTCGA SA_GCCGTCGA LN_GCCGTCGA
+GCCTAGCC SA_GCCTAGCC LN_GCCTAGCC
+GTAACATC SA_GTAACATC LN_GTAACATC
+GTCCACAG SA_GTCCACAG LN_GTCCACAG
+TAAGCACA SA_TAAGCACA LN_TAAGCACA
+TACCGTCT SA_TACCGTCT LN_TACCGTCT
+TAGCGGTA SA_TAGCGGTA LN_TAGCGGTA
+TATCAGCC SA_TATCAGCC LN_TATCAGCC
+TATCCAGG SA_TATCCAGG LN_TATCCAGG
+TATCCATG SA_TATCCATG LN_TATCCATG
+TATCTCGG SA_TATCTCGG LN_TATCTCGG
+TATCTGCC SA_TATCTGCC LN_TATCTGCC
+TCCGTCTA SA_TCCGTCTA LN_TCCGTCTA
+TCGCTAGA SA_TCGCTAGA LN_TCGCTAGA
+TCTGCAAG SA_TCTGCAAG LN_TCTGCAAG
+TGCAAGTA SA_TGCAAGTA LN_TGCAAGTA
+TGCTGCTG SA_TGCTGCTG LN_TGCTGCTG
+TGTAACTC SA_TGTAACTC LN_TGTAACTC
+TGTAATCA SA_TGTAATCA LN_TGTAATCA
+TTGTCTAT SA_TTGTCTAT LN_TTGTCTAT
+N SA_N LN_N
\ No newline at end of file
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/final/nonBarcoded.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/final/nonBarcoded.sam
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/final/nonBarcodedWithMolecularIndex4M4M.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/final/nonBarcodedWithMolecularIndex4M4M.sam
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/final/nonBarcodedWithMolecularIndex8M.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/final/nonBarcodedWithMolecularIndex8M.sam
new file mode 100644
index 0000000..e69de29
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAAAAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAAAAA.sam
new file mode 100644
index 0000000..3a6bae0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAAAAA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_AAAAAAAA LB:LN_AAAAAAAA PL:illumina PU:HiMomN1NAAAAAAAA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAAAAA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAAAAA.sam.bak
new file mode 100644
index 0000000..4882d35
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAAAAA.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AAAAAAAA LB:LN_AAAAAAAA PL:illumina PU:HiMom.1.AAAAAAAA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAGAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAGAAG.sam
new file mode 100644
index 0000000..3471b15
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAGAAG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_AAAAGAAG LB:LN_AAAAGAAG PL:illumina PU:HiMomN1NAAAAGAAG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAGAAG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAGAAG.sam.bak
new file mode 100644
index 0000000..6446f79
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AAAAGAAG.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AAAAGAAG LB:LN_AAAAGAAG PL:illumina PU:HiMom.1.AAAAGAAG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACAATGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACAATGG.sam
new file mode 100644
index 0000000..de87b88
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACAATGG.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_AACAATGG LB:LN_AACAATGG PL:illumina PU:HiMomN1NAACAATGG CN:BI
+HiMom:1:1101:1138:2141 4 * 0 0 * * 0 0 TCCG CCCF RG:Z:HiMomN1
+HiMom:1:1101:1206:2126 4 * 0 0 * * 0 0 ATCT CCCF RG:Z:HiMomN1
+HiMom:1:2101:1077:2139 4 * 0 0 * * 0 0 NATT #4=D RG:Z:HiMomN1
+HiMom:1:2101:1112:2245 4 * 0 0 * * 0 0 TCGT ?8?D RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACAATGG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACAATGG.sam.bak
new file mode 100644
index 0000000..d8ae59e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACAATGG.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AACAATGG LB:LN_AACAATGG PL:illumina PU:HiMom.1.AACAATGG CN:BI
+HiMom:1:1101:1138:2141 4 * 0 0 * * 0 0 TCCG CCCF RG:Z:HiMom.1
+HiMom:1:1101:1206:2126 4 * 0 0 * * 0 0 ATCT CCCF RG:Z:HiMom.1
+HiMom:1:2101:1077:2139 4 * 0 0 * * 0 0 .ATT #4=D RG:Z:HiMom.1
+HiMom:1:2101:1112:2245 4 * 0 0 * * 0 0 TCGT ?8?D RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACGCATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACGCATT.sam
new file mode 100644
index 0000000..62ba43b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACGCATT.sam
@@ -0,0 +1,9 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_AACGCATT LB:LN_AACGCATT PL:illumina PU:HiMomN1NAACGCATT CN:BI
+HiMom:1:1101:1197:2200 4 * 0 0 * * 0 0 ATAT @@@F RG:Z:HiMomN1
+HiMom:1:1101:1308:2153 516 * 0 0 * * 0 0 TCTG 1?1= RG:Z:HiMomN1
+HiMom:1:1101:1452:2132 4 * 0 0 * * 0 0 ACAA CCCF RG:Z:HiMomN1
+HiMom:1:1201:1150:2161 4 * 0 0 * * 0 0 TTCT @C at F RG:Z:HiMomN1
+HiMom:1:2101:1240:2197 516 * 0 0 * * 0 0 ACTG ??## RG:Z:HiMomN1
+HiMom:1:2101:1336:2109 4 * 0 0 * * 0 0 AGAC CCCF RG:Z:HiMomN1
+HiMom:1:2101:1427:2081 4 * 0 0 * * 0 0 CCGA CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACGCATT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACGCATT.sam.bak
new file mode 100644
index 0000000..0abc311
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AACGCATT.sam.bak
@@ -0,0 +1,9 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AACGCATT LB:LN_AACGCATT PL:illumina PU:HiMom.1.AACGCATT CN:BI
+HiMom:1:1101:1197:2200 4 * 0 0 * * 0 0 ATAT @@@F RG:Z:HiMom.1
+HiMom:1:1101:1308:2153 516 * 0 0 * * 0 0 TCTG 1?1= RG:Z:HiMom.1
+HiMom:1:1101:1452:2132 4 * 0 0 * * 0 0 ACAA CCCF RG:Z:HiMom.1
+HiMom:1:1201:1150:2161 4 * 0 0 * * 0 0 TTCT @C at F RG:Z:HiMom.1
+HiMom:1:2101:1240:2197 516 * 0 0 * * 0 0 ACTG ??## RG:Z:HiMom.1
+HiMom:1:2101:1336:2109 4 * 0 0 * * 0 0 AGAC CCCF RG:Z:HiMom.1
+HiMom:1:2101:1427:2081 4 * 0 0 * * 0 0 CCGA CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAAAATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAAAATT.sam
new file mode 100644
index 0000000..1827e25
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAAAATT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_ACAAAATT LB:LN_ACAAAATT PL:illumina PU:HiMomN1NACAAAATT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAAAATT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAAAATT.sam.bak
new file mode 100644
index 0000000..3cfd422
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAAAATT.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAAAATT LB:LN_ACAAAATT PL:illumina PU:HiMom.1.ACAAAATT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGGTAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGGTAT.sam
new file mode 100644
index 0000000..dbd3a57
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGGTAT.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_ACAGGTAT LB:LN_ACAGGTAT PL:illumina PU:HiMomN1NACAGGTAT CN:BI
+HiMom:1:1101:1236:2121 4 * 0 0 * * 0 0 TTGC CCCF RG:Z:HiMomN1
+HiMom:1:1201:1341:2116 4 * 0 0 * * 0 0 ATAA #### RG:Z:HiMomN1
+HiMom:1:2101:1063:2206 4 * 0 0 * * 0 0 NTGC #1=D RG:Z:HiMomN1
+HiMom:1:2101:1325:2083 4 * 0 0 * * 0 0 TGTG #### RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGGTAT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGGTAT.sam.bak
new file mode 100644
index 0000000..fe5bf02
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGGTAT.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAGGTAT LB:LN_ACAGGTAT PL:illumina PU:HiMom.1.ACAGGTAT CN:BI
+HiMom:1:1101:1236:2121 4 * 0 0 * * 0 0 TTGC CCCF RG:Z:HiMom.1
+HiMom:1:1201:1341:2116 4 * 0 0 * * 0 0 ATAA #### RG:Z:HiMom.1
+HiMom:1:2101:1063:2206 4 * 0 0 * * 0 0 .TGC #1=D RG:Z:HiMom.1
+HiMom:1:2101:1325:2083 4 * 0 0 * * 0 0 TGTG #### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGTTGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGTTGA.sam
new file mode 100644
index 0000000..599dbe1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGTTGA.sam
@@ -0,0 +1,4 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_ACAGTTGA LB:LN_ACAGTTGA PL:illumina PU:HiMomN1NACAGTTGA CN:BI
+HiMom:1:2101:1048:2238 4 * 0 0 * * 0 0 NGTC #11A RG:Z:HiMomN1
+HiMom:1:2101:1216:2193 4 * 0 0 * * 0 0 AGGC @@@D RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGTTGA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGTTGA.sam.bak
new file mode 100644
index 0000000..f30b883
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACAGTTGA.sam.bak
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAGTTGA LB:LN_ACAGTTGA PL:illumina PU:HiMom.1.ACAGTTGA CN:BI
+HiMom:1:2101:1048:2238 4 * 0 0 * * 0 0 .GTC #11A RG:Z:HiMom.1
+HiMom:1:2101:1216:2193 4 * 0 0 * * 0 0 AGGC @@@D RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACCAGTTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACCAGTTG.sam
new file mode 100644
index 0000000..681b9f6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACCAGTTG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_ACCAGTTG LB:LN_ACCAGTTG PL:illumina PU:HiMomN1NACCAGTTG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACCAGTTG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACCAGTTG.sam.bak
new file mode 100644
index 0000000..0df9571
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACCAGTTG.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACCAGTTG LB:LN_ACCAGTTG PL:illumina PU:HiMom.1.ACCAGTTG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACGAAATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACGAAATC.sam
new file mode 100644
index 0000000..acd2bd7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACGAAATC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_ACGAAATC LB:LN_ACGAAATC PL:illumina PU:HiMomN1NACGAAATC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACGAAATC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACGAAATC.sam.bak
new file mode 100644
index 0000000..1fbb7b4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACGAAATC.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACGAAATC LB:LN_ACGAAATC PL:illumina PU:HiMom.1.ACGAAATC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTAAGAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTAAGAC.sam
new file mode 100644
index 0000000..c87a5d8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTAAGAC.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_ACTAAGAC LB:LN_ACTAAGAC PL:illumina PU:HiMomN1NACTAAGAC CN:BI
+HiMom:1:1101:1259:2152 4 * 0 0 * * 0 0 ATTT CCCF RG:Z:HiMomN1
+HiMom:1:1101:1261:2127 516 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMomN1
+HiMom:1:2101:1021:2209 4 * 0 0 * * 0 0 NNGG #### RG:Z:HiMomN1
+HiMom:1:2101:1262:2128 516 * 0 0 * * 0 0 TCTT #### RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTAAGAC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTAAGAC.sam.bak
new file mode 100644
index 0000000..5553231
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTAAGAC.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTAAGAC LB:LN_ACTAAGAC PL:illumina PU:HiMom.1.ACTAAGAC CN:BI
+HiMom:1:1101:1259:2152 4 * 0 0 * * 0 0 ATTT CCCF RG:Z:HiMom.1
+HiMom:1:1101:1261:2127 516 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMom.1
+HiMom:1:2101:1021:2209 4 * 0 0 * * 0 0 ..GG #### RG:Z:HiMom.1
+HiMom:1:2101:1262:2128 516 * 0 0 * * 0 0 TCTT #### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTACC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTACC.sam
new file mode 100644
index 0000000..8249cb0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTACC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_ACTGTACC LB:LN_ACTGTACC PL:illumina PU:HiMomN1NACTGTACC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTACC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTACC.sam.bak
new file mode 100644
index 0000000..bcf9179
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTACC.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTGTACC LB:LN_ACTGTACC PL:illumina PU:HiMom.1.ACTGTACC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTATC.sam
new file mode 100644
index 0000000..9fc0585
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTATC.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_ACTGTATC LB:LN_ACTGTATC PL:illumina PU:HiMomN1NACTGTATC CN:BI
+HiMom:1:1201:1458:2109 4 * 0 0 * * 0 0 GATA CCCF RG:Z:HiMomN1
+HiMom:1:2101:1105:2131 4 * 0 0 * * 0 0 CAGC CCCF RG:Z:HiMomN1
+HiMom:1:2101:1349:2084 4 * 0 0 * * 0 0 AGTC <5;? RG:Z:HiMomN1
+HiMom:1:2101:1365:2094 4 * 0 0 * * 0 0 GCTC CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTATC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTATC.sam.bak
new file mode 100644
index 0000000..0abd96c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ACTGTATC.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTGTATC LB:LN_ACTGTATC PL:illumina PU:HiMom.1.ACTGTATC CN:BI
+HiMom:1:1201:1458:2109 4 * 0 0 * * 0 0 GATA CCCF RG:Z:HiMom.1
+HiMom:1:2101:1105:2131 4 * 0 0 * * 0 0 CAGC CCCF RG:Z:HiMom.1
+HiMom:1:2101:1349:2084 4 * 0 0 * * 0 0 AGTC <5;? RG:Z:HiMom.1
+HiMom:1:2101:1365:2094 4 * 0 0 * * 0 0 GCTC CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGAAAAGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGAAAAGA.sam
new file mode 100644
index 0000000..f78e1f7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGAAAAGA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_AGAAAAGA LB:LN_AGAAAAGA PL:illumina PU:HiMomN1NAGAAAAGA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGAAAAGA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGAAAAGA.sam.bak
new file mode 100644
index 0000000..d40aec2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGAAAAGA.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGAAAAGA LB:LN_AGAAAAGA PL:illumina PU:HiMom.1.AGAAAAGA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGCATGGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGCATGGA.sam
new file mode 100644
index 0000000..a7696b9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGCATGGA.sam
@@ -0,0 +1,5 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_AGCATGGA LB:LN_AGCATGGA PL:illumina PU:HiMomN1NAGCATGGA CN:BI
+HiMom:1:1101:1406:2222 4 * 0 0 * * 0 0 GGCT ;?@D RG:Z:HiMomN1
+HiMom:1:1201:1291:2158 4 * 0 0 * * 0 0 CGTG @CCF RG:Z:HiMomN1
+HiMom:1:2101:1370:2116 4 * 0 0 * * 0 0 CACC @@@D RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGCATGGA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGCATGGA.sam.bak
new file mode 100644
index 0000000..3ad5fb2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGCATGGA.sam.bak
@@ -0,0 +1,5 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGCATGGA LB:LN_AGCATGGA PL:illumina PU:HiMom.1.AGCATGGA CN:BI
+HiMom:1:1101:1406:2222 4 * 0 0 * * 0 0 GGCT ;?@D RG:Z:HiMom.1
+HiMom:1:1201:1291:2158 4 * 0 0 * * 0 0 CGTG @CCF RG:Z:HiMom.1
+HiMom:1:2101:1370:2116 4 * 0 0 * * 0 0 CACC @@@D RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTAAGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTAAGG.sam
new file mode 100644
index 0000000..09d356d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTAAGG.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_AGGTAAGG LB:LN_AGGTAAGG PL:illumina PU:HiMomN1NAGGTAAGG CN:BI
+HiMom:1:1101:1263:2236 516 * 0 0 * * 0 0 AGTT ((0@ RG:Z:HiMomN1
+HiMom:1:2101:1054:2162 4 * 0 0 * * 0 0 NGGA #4=D RG:Z:HiMomN1
+HiMom:1:2101:1163:2203 4 * 0 0 * * 0 0 TTGG @CCF RG:Z:HiMomN1
+HiMom:1:2101:1249:2231 4 * 0 0 * * 0 0 TCTC @@@F RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTAAGG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTAAGG.sam.bak
new file mode 100644
index 0000000..a49b502
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTAAGG.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGGTAAGG LB:LN_AGGTAAGG PL:illumina PU:HiMom.1.AGGTAAGG CN:BI
+HiMom:1:1101:1263:2236 516 * 0 0 * * 0 0 AGTT ((0@ RG:Z:HiMom.1
+HiMom:1:2101:1054:2162 4 * 0 0 * * 0 0 .GGA #4=D RG:Z:HiMom.1
+HiMom:1:2101:1163:2203 4 * 0 0 * * 0 0 TTGG @CCF RG:Z:HiMom.1
+HiMom:1:2101:1249:2231 4 * 0 0 * * 0 0 TCTC @@@F RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTCGCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTCGCA.sam
new file mode 100644
index 0000000..879abab
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTCGCA.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_AGGTCGCA LB:LN_AGGTCGCA PL:illumina PU:HiMomN1NAGGTCGCA CN:BI
+HiMom:1:1101:1150:2228 4 * 0 0 * * 0 0 ATGG 8?=D RG:Z:HiMomN1
+HiMom:1:1101:1491:2120 4 * 0 0 * * 0 0 GGCC CCCF RG:Z:HiMomN1
+HiMom:1:1201:1190:2194 4 * 0 0 * * 0 0 ACAA CCCF RG:Z:HiMomN1
+HiMom:1:2101:1188:2195 4 * 0 0 * * 0 0 GCAC CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTCGCA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTCGCA.sam.bak
new file mode 100644
index 0000000..7938ca9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/AGGTCGCA.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGGTCGCA LB:LN_AGGTCGCA PL:illumina PU:HiMom.1.AGGTCGCA CN:BI
+HiMom:1:1101:1150:2228 4 * 0 0 * * 0 0 ATGG 8?=D RG:Z:HiMom.1
+HiMom:1:1101:1491:2120 4 * 0 0 * * 0 0 GGCC CCCF RG:Z:HiMom.1
+HiMom:1:1201:1190:2194 4 * 0 0 * * 0 0 ACAA CCCF RG:Z:HiMom.1
+HiMom:1:2101:1188:2195 4 * 0 0 * * 0 0 GCAC CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTATCAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTATCAA.sam
new file mode 100644
index 0000000..2d4b0fa
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTATCAA.sam
@@ -0,0 +1,7 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_ATTATCAA LB:LN_ATTATCAA PL:illumina PU:HiMomN1NATTATCAA CN:BI
+HiMom:1:1101:1100:2207 4 * 0 0 * * 0 0 AGGC #### RG:Z:HiMomN1
+HiMom:1:1101:1157:2135 4 * 0 0 * * 0 0 TTTA CCCF RG:Z:HiMomN1
+HiMom:1:1101:1269:2170 4 * 0 0 * * 0 0 TTCC @@<A RG:Z:HiMomN1
+HiMom:1:1201:1018:2217 516 * 0 0 * * 0 0 NNNN #### RG:Z:HiMomN1 XN:i:1
+HiMom:1:1201:1118:2198 4 * 0 0 * * 0 0 AATA C at CF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTATCAA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTATCAA.sam.bak
new file mode 100644
index 0000000..06af99c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTATCAA.sam.bak
@@ -0,0 +1,7 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ATTATCAA LB:LN_ATTATCAA PL:illumina PU:HiMom.1.ATTATCAA CN:BI
+HiMom:1:1101:1100:2207 4 * 0 0 * * 0 0 AGGC #### RG:Z:HiMom.1
+HiMom:1:1101:1157:2135 4 * 0 0 * * 0 0 TTTA CCCF RG:Z:HiMom.1
+HiMom:1:1101:1269:2170 4 * 0 0 * * 0 0 TTCC @@<A RG:Z:HiMom.1
+HiMom:1:1201:1018:2217 516 * 0 0 * * 0 0 .... #### RG:Z:HiMom.1 XN:i:1
+HiMom:1:1201:1118:2198 4 * 0 0 * * 0 0 AATA C at CF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTCCTCT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTCCTCT.sam
new file mode 100644
index 0000000..cbfb1cf
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTCCTCT.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_ATTCCTCT LB:LN_ATTCCTCT PL:illumina PU:HiMomN1NATTCCTCT CN:BI
+HiMom:1:1101:1309:2210 4 * 0 0 * * 0 0 AGTG @@?D RG:Z:HiMomN1
+HiMom:1:1201:1018:2133 4 * 0 0 * * 0 0 NNNN #### RG:Z:HiMomN1 XN:i:1
+HiMom:1:1201:1073:2225 4 * 0 0 * * 0 0 CGTG @BBD RG:Z:HiMomN1
+HiMom:1:1201:1242:2207 4 * 0 0 * * 0 0 ATCT CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTCCTCT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTCCTCT.sam.bak
new file mode 100644
index 0000000..ac10e3d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/ATTCCTCT.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ATTCCTCT LB:LN_ATTCCTCT PL:illumina PU:HiMom.1.ATTCCTCT CN:BI
+HiMom:1:1101:1309:2210 4 * 0 0 * * 0 0 AGTG @@?D RG:Z:HiMom.1
+HiMom:1:1201:1018:2133 4 * 0 0 * * 0 0 .... #### RG:Z:HiMom.1 XN:i:1
+HiMom:1:1201:1073:2225 4 * 0 0 * * 0 0 CGTG @BBD RG:Z:HiMom.1
+HiMom:1:1201:1242:2207 4 * 0 0 * * 0 0 ATCT CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAACTCTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAACTCTC.sam
new file mode 100644
index 0000000..0bb3733
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAACTCTC.sam
@@ -0,0 +1,7 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CAACTCTC LB:LN_CAACTCTC PL:illumina PU:HiMomN1NCAACTCTC CN:BI
+HiMom:1:1101:1140:2120 4 * 0 0 * * 0 0 TTTT @@@D RG:Z:HiMomN1
+HiMom:1:1101:1328:2225 4 * 0 0 * * 0 0 AGGA #### RG:Z:HiMomN1
+HiMom:1:1201:1127:2112 516 * 0 0 * * 0 0 TAAT @<@? RG:Z:HiMomN1
+HiMom:1:1201:1452:2143 4 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMomN1
+HiMom:1:1201:1486:2146 516 * 0 0 * * 0 0 TTTT <<<@ RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAACTCTC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAACTCTC.sam.bak
new file mode 100644
index 0000000..a43c57d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAACTCTC.sam.bak
@@ -0,0 +1,7 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAACTCTC LB:LN_CAACTCTC PL:illumina PU:HiMom.1.CAACTCTC CN:BI
+HiMom:1:1101:1140:2120 4 * 0 0 * * 0 0 TTTT @@@D RG:Z:HiMom.1
+HiMom:1:1101:1328:2225 4 * 0 0 * * 0 0 AGGA #### RG:Z:HiMom.1
+HiMom:1:1201:1127:2112 516 * 0 0 * * 0 0 TAAT @<@? RG:Z:HiMom.1
+HiMom:1:1201:1452:2143 4 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMom.1
+HiMom:1:1201:1486:2146 516 * 0 0 * * 0 0 TTTT <<<@ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGAC.sam
new file mode 100644
index 0000000..3ca277e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGAC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CAATAGAC LB:LN_CAATAGAC PL:illumina PU:HiMomN1NCAATAGAC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGAC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGAC.sam.bak
new file mode 100644
index 0000000..f800d53
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGAC.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAATAGAC LB:LN_CAATAGAC PL:illumina PU:HiMom.1.CAATAGAC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGTC.sam
new file mode 100644
index 0000000..3a8aecc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGTC.sam
@@ -0,0 +1,9 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CAATAGTC LB:LN_CAATAGTC PL:illumina PU:HiMomN1NCAATAGTC CN:BI
+HiMom:1:1101:1316:2126 4 * 0 0 * * 0 0 TCTT CCCF RG:Z:HiMomN1
+HiMom:1:1101:1399:2128 4 * 0 0 * * 0 0 ACAA CCCF RG:Z:HiMomN1
+HiMom:1:1201:1054:2151 4 * 0 0 * * 0 0 GTCA CBCF RG:Z:HiMomN1
+HiMom:1:1201:1345:2181 4 * 0 0 * * 0 0 ATAC CCCF RG:Z:HiMomN1
+HiMom:1:1201:1392:2184 4 * 0 0 * * 0 0 ATCT @@BF RG:Z:HiMomN1
+HiMom:1:2101:1172:2152 516 * 0 0 * * 0 0 ATCG #### RG:Z:HiMomN1
+HiMom:1:2101:1491:2093 4 * 0 0 * * 0 0 AGAG BCCD RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGTC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGTC.sam.bak
new file mode 100644
index 0000000..d2c03cd
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAATAGTC.sam.bak
@@ -0,0 +1,9 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAATAGTC LB:LN_CAATAGTC PL:illumina PU:HiMom.1.CAATAGTC CN:BI
+HiMom:1:1101:1316:2126 4 * 0 0 * * 0 0 TCTT CCCF RG:Z:HiMom.1
+HiMom:1:1101:1399:2128 4 * 0 0 * * 0 0 ACAA CCCF RG:Z:HiMom.1
+HiMom:1:1201:1054:2151 4 * 0 0 * * 0 0 GTCA CBCF RG:Z:HiMom.1
+HiMom:1:1201:1345:2181 4 * 0 0 * * 0 0 ATAC CCCF RG:Z:HiMom.1
+HiMom:1:1201:1392:2184 4 * 0 0 * * 0 0 ATCT @@BF RG:Z:HiMom.1
+HiMom:1:2101:1172:2152 516 * 0 0 * * 0 0 ATCG #### RG:Z:HiMom.1
+HiMom:1:2101:1491:2093 4 * 0 0 * * 0 0 AGAG BCCD RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGAT.sam
new file mode 100644
index 0000000..ad90c77
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGAT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CAGCGGAT LB:LN_CAGCGGAT PL:illumina PU:HiMomN1NCAGCGGAT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGAT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGAT.sam.bak
new file mode 100644
index 0000000..c9d3cfe
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGAT.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAGCGGAT LB:LN_CAGCGGAT PL:illumina PU:HiMom.1.CAGCGGAT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGTA.sam
new file mode 100644
index 0000000..c89713d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGTA.sam
@@ -0,0 +1,7 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CAGCGGTA LB:LN_CAGCGGTA PL:illumina PU:HiMomN1NCAGCGGTA CN:BI
+HiMom:1:1101:1420:2213 4 * 0 0 * * 0 0 TTCA @CCF RG:Z:HiMomN1
+HiMom:1:1201:1364:2113 4 * 0 0 * * 0 0 TAAA #### RG:Z:HiMomN1
+HiMom:1:2101:1072:2170 4 * 0 0 * * 0 0 NGGG #4=B RG:Z:HiMomN1
+HiMom:1:2101:1123:2095 4 * 0 0 * * 0 0 TCCG @@@F RG:Z:HiMomN1
+HiMom:1:2101:1151:2236 516 * 0 0 * * 0 0 TTTG #### RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGTA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGTA.sam.bak
new file mode 100644
index 0000000..516b931
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CAGCGGTA.sam.bak
@@ -0,0 +1,7 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAGCGGTA LB:LN_CAGCGGTA PL:illumina PU:HiMom.1.CAGCGGTA CN:BI
+HiMom:1:1101:1420:2213 4 * 0 0 * * 0 0 TTCA @CCF RG:Z:HiMom.1
+HiMom:1:1201:1364:2113 4 * 0 0 * * 0 0 TAAA #### RG:Z:HiMom.1
+HiMom:1:2101:1072:2170 4 * 0 0 * * 0 0 .GGG #4=B RG:Z:HiMom.1
+HiMom:1:2101:1123:2095 4 * 0 0 * * 0 0 TCCG @@@F RG:Z:HiMom.1
+HiMom:1:2101:1151:2236 516 * 0 0 * * 0 0 TTTG #### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAACATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAACATT.sam
new file mode 100644
index 0000000..23ab6a7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAACATT.sam
@@ -0,0 +1,9 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CCAACATT LB:LN_CCAACATT PL:illumina PU:HiMomN1NCCAACATT CN:BI
+HiMom:1:1101:1083:2193 4 * 0 0 * * 0 0 AGGC 19;3 RG:Z:HiMomN1
+HiMom:1:1101:1175:2197 4 * 0 0 * * 0 0 AAGA BC at F RG:Z:HiMomN1
+HiMom:1:1201:1138:2227 516 * 0 0 * * 0 0 GACA =1=A RG:Z:HiMomN1
+HiMom:1:1201:1260:2165 4 * 0 0 * * 0 0 ATCT @@@F RG:Z:HiMomN1
+HiMom:1:1201:1281:2133 4 * 0 0 * * 0 0 GCAA CCCF RG:Z:HiMomN1
+HiMom:1:1201:1331:2162 4 * 0 0 * * 0 0 TAAT CCCF RG:Z:HiMomN1
+HiMom:1:2101:1186:2093 4 * 0 0 * * 0 0 AATG #### RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAACATT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAACATT.sam.bak
new file mode 100644
index 0000000..448e66a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAACATT.sam.bak
@@ -0,0 +1,9 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCAACATT LB:LN_CCAACATT PL:illumina PU:HiMom.1.CCAACATT CN:BI
+HiMom:1:1101:1083:2193 4 * 0 0 * * 0 0 AGGC 19;3 RG:Z:HiMom.1
+HiMom:1:1101:1175:2197 4 * 0 0 * * 0 0 AAGA BC at F RG:Z:HiMom.1
+HiMom:1:1201:1138:2227 516 * 0 0 * * 0 0 GACA =1=A RG:Z:HiMom.1
+HiMom:1:1201:1260:2165 4 * 0 0 * * 0 0 ATCT @@@F RG:Z:HiMom.1
+HiMom:1:1201:1281:2133 4 * 0 0 * * 0 0 GCAA CCCF RG:Z:HiMom.1
+HiMom:1:1201:1331:2162 4 * 0 0 * * 0 0 TAAT CCCF RG:Z:HiMom.1
+HiMom:1:2101:1186:2093 4 * 0 0 * * 0 0 AATG #### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAGCACC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAGCACC.sam
new file mode 100644
index 0000000..ce0040e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAGCACC.sam
@@ -0,0 +1,5 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CCAGCACC LB:LN_CCAGCACC PL:illumina PU:HiMomN1NCCAGCACC CN:BI
+HiMom:1:1101:1212:2230 4 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMomN1
+HiMom:1:1201:1204:2228 4 * 0 0 * * 0 0 TCTT @?@F RG:Z:HiMomN1
+HiMom:1:2101:1100:2085 4 * 0 0 * * 0 0 ATCT @@@D RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAGCACC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAGCACC.sam.bak
new file mode 100644
index 0000000..94b5235
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCAGCACC.sam.bak
@@ -0,0 +1,5 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCAGCACC LB:LN_CCAGCACC PL:illumina PU:HiMom.1.CCAGCACC CN:BI
+HiMom:1:1101:1212:2230 4 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMom.1
+HiMom:1:1201:1204:2228 4 * 0 0 * * 0 0 TCTT @?@F RG:Z:HiMom.1
+HiMom:1:2101:1100:2085 4 * 0 0 * * 0 0 ATCT @@@D RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCATGCGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCATGCGT.sam
new file mode 100644
index 0000000..27db005
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCATGCGT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CCATGCGT LB:LN_CCATGCGT PL:illumina PU:HiMomN1NCCATGCGT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCATGCGT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCATGCGT.sam.bak
new file mode 100644
index 0000000..29ae6d9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CCATGCGT.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCATGCGT LB:LN_CCATGCGT PL:illumina PU:HiMom.1.CCATGCGT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCCTTCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCCTTCC.sam
new file mode 100644
index 0000000..e25ff8c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCCTTCC.sam
@@ -0,0 +1,4 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CGCCTTCC LB:LN_CGCCTTCC PL:illumina PU:HiMomN1NCGCCTTCC CN:BI
+HiMom:1:1201:1122:2227 4 * 0 0 * * 0 0 GTCA @@@F RG:Z:HiMomN1
+HiMom:1:1201:1160:2109 4 * 0 0 * * 0 0 ACAT CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCCTTCC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCCTTCC.sam.bak
new file mode 100644
index 0000000..e25ff8c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCCTTCC.sam.bak
@@ -0,0 +1,4 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CGCCTTCC LB:LN_CGCCTTCC PL:illumina PU:HiMomN1NCGCCTTCC CN:BI
+HiMom:1:1201:1122:2227 4 * 0 0 * * 0 0 GTCA @@@F RG:Z:HiMomN1
+HiMom:1:1201:1160:2109 4 * 0 0 * * 0 0 ACAT CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCTATGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCTATGT.sam
new file mode 100644
index 0000000..32fd597
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCTATGT.sam
@@ -0,0 +1,7 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CGCTATGT LB:LN_CGCTATGT PL:illumina PU:HiMomN1NCGCTATGT CN:BI
+HiMom:1:1101:1291:2150 4 * 0 0 * * 0 0 ACAA CCCF RG:Z:HiMomN1
+HiMom:1:1101:1314:2233 4 * 0 0 * * 0 0 AGGA @@<D RG:Z:HiMomN1
+HiMom:1:1101:1441:2148 4 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMomN1
+HiMom:1:1201:1043:2246 4 * 0 0 * * 0 0 NGCA #0;@ RG:Z:HiMomN1
+HiMom:1:1201:1134:2144 4 * 0 0 * * 0 0 AGTG B@?D RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCTATGT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCTATGT.sam.bak
new file mode 100644
index 0000000..310ff53
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CGCTATGT.sam.bak
@@ -0,0 +1,7 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CGCTATGT LB:LN_CGCTATGT PL:illumina PU:HiMom.1.CGCTATGT CN:BI
+HiMom:1:1101:1291:2150 4 * 0 0 * * 0 0 ACAA CCCF RG:Z:HiMom.1
+HiMom:1:1101:1314:2233 4 * 0 0 * * 0 0 AGGA @@<D RG:Z:HiMom.1
+HiMom:1:1101:1441:2148 4 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMom.1
+HiMom:1:1201:1043:2246 4 * 0 0 * * 0 0 .GCA #0;@ RG:Z:HiMom.1
+HiMom:1:1201:1134:2144 4 * 0 0 * * 0 0 AGTG B@?D RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTAACTCG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTAACTCG.sam
new file mode 100644
index 0000000..f9ae045
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTAACTCG.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CTAACTCG LB:LN_CTAACTCG PL:illumina PU:HiMomN1NCTAACTCG CN:BI
+HiMom:1:1101:1363:2138 4 * 0 0 * * 0 0 GTTC C@@F RG:Z:HiMomN1
+HiMom:1:1201:1393:2143 4 * 0 0 * * 0 0 GATA C at CF RG:Z:HiMomN1
+HiMom:1:2101:1273:2119 516 * 0 0 * * 0 0 ATGA >=>< RG:Z:HiMomN1
+HiMom:1:2101:1414:2098 4 * 0 0 * * 0 0 TTGG CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTAACTCG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTAACTCG.sam.bak
new file mode 100644
index 0000000..bc5af8a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTAACTCG.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTAACTCG LB:LN_CTAACTCG PL:illumina PU:HiMom.1.CTAACTCG CN:BI
+HiMom:1:1101:1363:2138 4 * 0 0 * * 0 0 GTTC C@@F RG:Z:HiMom.1
+HiMom:1:1201:1393:2143 4 * 0 0 * * 0 0 GATA C at CF RG:Z:HiMom.1
+HiMom:1:2101:1273:2119 516 * 0 0 * * 0 0 ATGA >=>< RG:Z:HiMom.1
+HiMom:1:2101:1414:2098 4 * 0 0 * * 0 0 TTGG CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGC.sam
new file mode 100644
index 0000000..397e598
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CTATGCGC LB:LN_CTATGCGC PL:illumina PU:HiMomN1NCTATGCGC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGC.sam.bak
new file mode 100644
index 0000000..2a5a21c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGC.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTATGCGC LB:LN_CTATGCGC PL:illumina PU:HiMom.1.CTATGCGC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGT.sam
new file mode 100644
index 0000000..e3ed157
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGT.sam
@@ -0,0 +1,9 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CTATGCGT LB:LN_CTATGCGT PL:illumina PU:HiMomN1NCTATGCGT CN:BI
+HiMom:1:1201:1083:2121 4 * 0 0 * * 0 0 ACAC #### RG:Z:HiMomN1
+HiMom:1:1201:1185:2143 4 * 0 0 * * 0 0 GCTG @CCF RG:Z:HiMomN1
+HiMom:1:1201:1219:2115 4 * 0 0 * * 0 0 TGGG ???D RG:Z:HiMomN1
+HiMom:1:1201:1472:2121 516 * 0 0 * * 0 0 GTGT =+=? RG:Z:HiMomN1
+HiMom:1:2101:1013:2146 4 * 0 0 * * 0 0 NNNN #### RG:Z:HiMomN1 XN:i:1
+HiMom:1:2101:1231:2208 4 * 0 0 * * 0 0 AGCC @@;1 RG:Z:HiMomN1
+HiMom:1:2101:1233:2133 516 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGT.sam.bak
new file mode 100644
index 0000000..b822db4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTATGCGT.sam.bak
@@ -0,0 +1,9 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTATGCGT LB:LN_CTATGCGT PL:illumina PU:HiMom.1.CTATGCGT CN:BI
+HiMom:1:1201:1083:2121 4 * 0 0 * * 0 0 ACAC #### RG:Z:HiMom.1
+HiMom:1:1201:1185:2143 4 * 0 0 * * 0 0 GCTG @CCF RG:Z:HiMom.1
+HiMom:1:1201:1219:2115 4 * 0 0 * * 0 0 TGGG ???D RG:Z:HiMom.1
+HiMom:1:1201:1472:2121 516 * 0 0 * * 0 0 GTGT =+=? RG:Z:HiMom.1
+HiMom:1:2101:1013:2146 4 * 0 0 * * 0 0 .... #### RG:Z:HiMom.1 XN:i:1
+HiMom:1:2101:1231:2208 4 * 0 0 * * 0 0 AGCC @@;1 RG:Z:HiMom.1
+HiMom:1:2101:1233:2133 516 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGCGGAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGCGGAT.sam
new file mode 100644
index 0000000..d370684
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGCGGAT.sam
@@ -0,0 +1,5 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CTGCGGAT LB:LN_CTGCGGAT PL:illumina PU:HiMomN1NCTGCGGAT CN:BI
+HiMom:1:2101:1102:2221 4 * 0 0 * * 0 0 ATAA CCCF RG:Z:HiMomN1
+HiMom:1:2101:1126:2082 4 * 0 0 * * 0 0 TCTC @C at D RG:Z:HiMomN1
+HiMom:1:2101:1216:2172 4 * 0 0 * * 0 0 GGAC CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGCGGAT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGCGGAT.sam.bak
new file mode 100644
index 0000000..57c9ab3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGCGGAT.sam.bak
@@ -0,0 +1,5 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTGCGGAT LB:LN_CTGCGGAT PL:illumina PU:HiMom.1.CTGCGGAT CN:BI
+HiMom:1:2101:1102:2221 4 * 0 0 * * 0 0 ATAA CCCF RG:Z:HiMom.1
+HiMom:1:2101:1126:2082 4 * 0 0 * * 0 0 TCTC @C at D RG:Z:HiMom.1
+HiMom:1:2101:1216:2172 4 * 0 0 * * 0 0 GGAC CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGTAATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGTAATC.sam
new file mode 100644
index 0000000..3b34100
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGTAATC.sam
@@ -0,0 +1,8 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_CTGTAATC LB:LN_CTGTAATC PL:illumina PU:HiMomN1NCTGTAATC CN:BI
+HiMom:1:1101:1403:2194 4 * 0 0 * * 0 0 ACAT CCCF RG:Z:HiMomN1
+HiMom:1:1201:1045:2105 516 * 0 0 * * 0 0 NTTT #0;@ RG:Z:HiMomN1
+HiMom:1:1201:1483:2126 516 * 0 0 * * 0 0 GCAT @@@D RG:Z:HiMomN1
+HiMom:1:2101:1011:2102 4 * 0 0 * * 0 0 NNNN #### RG:Z:HiMomN1 XN:i:1
+HiMom:1:2101:1245:2154 4 * 0 0 * * 0 0 ACCA CCCF RG:Z:HiMomN1
+HiMom:1:2101:1386:2105 4 * 0 0 * * 0 0 AGGA B@@D RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGTAATC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGTAATC.sam.bak
new file mode 100644
index 0000000..25ea2e1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/CTGTAATC.sam.bak
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTGTAATC LB:LN_CTGTAATC PL:illumina PU:HiMom.1.CTGTAATC CN:BI
+HiMom:1:1101:1403:2194 4 * 0 0 * * 0 0 ACAT CCCF RG:Z:HiMom.1
+HiMom:1:1201:1045:2105 516 * 0 0 * * 0 0 .TTT #0;@ RG:Z:HiMom.1
+HiMom:1:1201:1483:2126 516 * 0 0 * * 0 0 GCAT @@@D RG:Z:HiMom.1
+HiMom:1:2101:1011:2102 4 * 0 0 * * 0 0 .... #### RG:Z:HiMom.1 XN:i:1
+HiMom:1:2101:1245:2154 4 * 0 0 * * 0 0 ACCA CCCF RG:Z:HiMom.1
+HiMom:1:2101:1386:2105 4 * 0 0 * * 0 0 AGGA B@@D RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAAAAAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAAAAAA.sam
new file mode 100644
index 0000000..f32f7c4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAAAAAA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GAAAAAAA LB:LN_GAAAAAAA PL:illumina PU:HiMomN1NGAAAAAAA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAAAAAA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAAAAAA.sam.bak
new file mode 100644
index 0000000..0bfb1f9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAAAAAA.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAAAAAAA LB:LN_GAAAAAAA PL:illumina PU:HiMom.1.GAAAAAAA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAACGAT..sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAACGAT..sam
new file mode 100644
index 0000000..adf1f45
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAACGAT..sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GAACGATN LB:LN_GAACGATN PL:illumina PU:HiMomN1NGAACGATN CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAACGAT..sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAACGAT..sam.bak
new file mode 100644
index 0000000..1636a99
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAACGAT..sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAACGAT. LB:LN_GAACGAT. PL:illumina PU:HiMom.1.GAACGAT. CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAGGAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAGGAAG.sam
new file mode 100644
index 0000000..fed7ab3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAGGAAG.sam
@@ -0,0 +1,5 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GAAGGAAG LB:LN_GAAGGAAG PL:illumina PU:HiMomN1NGAAGGAAG CN:BI
+HiMom:1:1101:1338:2175 4 * 0 0 * * 0 0 GCTT CCCF RG:Z:HiMomN1
+HiMom:1:1201:1028:2202 4 * 0 0 * * 0 0 NNAA #### RG:Z:HiMomN1 XN:i:1
+HiMom:1:2101:1084:2188 4 * 0 0 * * 0 0 TACA CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAGGAAG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAGGAAG.sam.bak
new file mode 100644
index 0000000..cd12272
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GAAGGAAG.sam.bak
@@ -0,0 +1,5 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAAGGAAG LB:LN_GAAGGAAG PL:illumina PU:HiMom.1.GAAGGAAG CN:BI
+HiMom:1:1101:1338:2175 4 * 0 0 * * 0 0 GCTT CCCF RG:Z:HiMom.1
+HiMom:1:1201:1028:2202 4 * 0 0 * * 0 0 ..AA #### RG:Z:HiMom.1 XN:i:1
+HiMom:1:2101:1084:2188 4 * 0 0 * * 0 0 TACA CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGA.sam
new file mode 100644
index 0000000..5c40ca1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGA.sam
@@ -0,0 +1,9 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GACCAGGA LB:LN_GACCAGGA PL:illumina PU:HiMomN1NGACCAGGA CN:BI
+HiMom:1:1101:1089:2172 4 * 0 0 * * 0 0 TCCG :<<? RG:Z:HiMomN1
+HiMom:1:1101:1347:2149 4 * 0 0 * * 0 0 GCTC CCCF RG:Z:HiMomN1
+HiMom:1:1201:1095:2146 4 * 0 0 * * 0 0 ACTG CCCF RG:Z:HiMomN1
+HiMom:1:1201:1123:2161 516 * 0 0 * * 0 0 CGTG ===A RG:Z:HiMomN1
+HiMom:1:1201:1439:2156 4 * 0 0 * * 0 0 GGAG #### RG:Z:HiMomN1
+HiMom:1:2101:1207:2084 516 * 0 0 * * 0 0 TCAC @@@D RG:Z:HiMomN1
+HiMom:1:2101:1312:2105 4 * 0 0 * * 0 0 GTTG @CCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGA.sam.bak
new file mode 100644
index 0000000..fbd1e80
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGA.sam.bak
@@ -0,0 +1,9 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCAGGA LB:LN_GACCAGGA PL:illumina PU:HiMom.1.GACCAGGA CN:BI
+HiMom:1:1101:1089:2172 4 * 0 0 * * 0 0 TCCG :<<? RG:Z:HiMom.1
+HiMom:1:1101:1347:2149 4 * 0 0 * * 0 0 GCTC CCCF RG:Z:HiMom.1
+HiMom:1:1201:1095:2146 4 * 0 0 * * 0 0 ACTG CCCF RG:Z:HiMom.1
+HiMom:1:1201:1123:2161 516 * 0 0 * * 0 0 CGTG ===A RG:Z:HiMom.1
+HiMom:1:1201:1439:2156 4 * 0 0 * * 0 0 GGAG #### RG:Z:HiMom.1
+HiMom:1:2101:1207:2084 516 * 0 0 * * 0 0 TCAC @@@D RG:Z:HiMom.1
+HiMom:1:2101:1312:2105 4 * 0 0 * * 0 0 GTTG @CCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGC.sam
new file mode 100644
index 0000000..265b243
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GACCAGGC LB:LN_GACCAGGC PL:illumina PU:HiMomN1NGACCAGGC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGC.sam.bak
new file mode 100644
index 0000000..71b9f20
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCAGGC.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCAGGC LB:LN_GACCAGGC PL:illumina PU:HiMom.1.GACCAGGC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCGTTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCGTTG.sam
new file mode 100644
index 0000000..2428095
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCGTTG.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GACCGTTG LB:LN_GACCGTTG PL:illumina PU:HiMomN1NGACCGTTG CN:BI
+HiMom:1:1101:1218:2200 4 * 0 0 * * 0 0 GCTC #### RG:Z:HiMomN1
+HiMom:1:1101:1257:2223 4 * 0 0 * * 0 0 TGCT :?@D RG:Z:HiMomN1
+HiMom:1:1201:1180:2119 4 * 0 0 * * 0 0 GCTC CCCF RG:Z:HiMomN1
+HiMom:1:2101:1036:2087 4 * 0 0 * * 0 0 NGTC #4=D RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCGTTG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCGTTG.sam.bak
new file mode 100644
index 0000000..ddc4c9c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCGTTG.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCGTTG LB:LN_GACCGTTG PL:illumina PU:HiMom.1.GACCGTTG CN:BI
+HiMom:1:1101:1218:2200 4 * 0 0 * * 0 0 GCTC #### RG:Z:HiMom.1
+HiMom:1:1101:1257:2223 4 * 0 0 * * 0 0 TGCT :?@D RG:Z:HiMom.1
+HiMom:1:1201:1180:2119 4 * 0 0 * * 0 0 GCTC CCCF RG:Z:HiMom.1
+HiMom:1:2101:1036:2087 4 * 0 0 * * 0 0 .GTC #4=D RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCTAAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCTAAC.sam
new file mode 100644
index 0000000..2afb00e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCTAAC.sam
@@ -0,0 +1,3 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GACCTAAC LB:LN_GACCTAAC PL:illumina PU:HiMomN1NGACCTAAC CN:BI
+HiMom:1:1101:1302:2244 4 * 0 0 * * 0 0 TGAA CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCTAAC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCTAAC.sam.bak
new file mode 100644
index 0000000..f849dd7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GACCTAAC.sam.bak
@@ -0,0 +1,3 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCTAAC LB:LN_GACCTAAC PL:illumina PU:HiMom.1.GACCTAAC CN:BI
+HiMom:1:1101:1302:2244 4 * 0 0 * * 0 0 TGAA CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GATATCCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GATATCCA.sam
new file mode 100644
index 0000000..a0fd7e2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GATATCCA.sam
@@ -0,0 +1,5 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GATATCCA LB:LN_GATATCCA PL:illumina PU:HiMomN1NGATATCCA CN:BI
+HiMom:1:1101:1460:2176 4 * 0 0 * * 0 0 AGGA #### RG:Z:HiMomN1
+HiMom:1:2101:1031:2163 4 * 0 0 * * 0 0 NNAC #### RG:Z:HiMomN1
+HiMom:1:2101:1226:2088 4 * 0 0 * * 0 0 GCTC ==?B RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GATATCCA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GATATCCA.sam.bak
new file mode 100644
index 0000000..dc7404e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GATATCCA.sam.bak
@@ -0,0 +1,5 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GATATCCA LB:LN_GATATCCA PL:illumina PU:HiMom.1.GATATCCA CN:BI
+HiMom:1:1101:1460:2176 4 * 0 0 * * 0 0 AGGA #### RG:Z:HiMom.1
+HiMom:1:2101:1031:2163 4 * 0 0 * * 0 0 ..AC #### RG:Z:HiMom.1
+HiMom:1:2101:1226:2088 4 * 0 0 * * 0 0 GCTC ==?B RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCGTCGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCGTCGA.sam
new file mode 100644
index 0000000..ad1d411
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCGTCGA.sam
@@ -0,0 +1,7 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GCCGTCGA LB:LN_GCCGTCGA PL:illumina PU:HiMomN1NGCCGTCGA CN:BI
+HiMom:1:1101:1111:2148 4 * 0 0 * * 0 0 GCGA #### RG:Z:HiMomN1
+HiMom:1:1101:1221:2143 4 * 0 0 * * 0 0 CAAT @@@F RG:Z:HiMomN1
+HiMom:1:1101:1327:2200 516 * 0 0 * * 0 0 GTCA @B at F RG:Z:HiMomN1
+HiMom:1:2101:1122:2136 4 * 0 0 * * 0 0 CTTG ???B RG:Z:HiMomN1
+HiMom:1:2101:1459:2083 4 * 0 0 * * 0 0 ATTT CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCGTCGA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCGTCGA.sam.bak
new file mode 100644
index 0000000..c7bcab6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCGTCGA.sam.bak
@@ -0,0 +1,7 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GCCGTCGA LB:LN_GCCGTCGA PL:illumina PU:HiMom.1.GCCGTCGA CN:BI
+HiMom:1:1101:1111:2148 4 * 0 0 * * 0 0 GCGA #### RG:Z:HiMom.1
+HiMom:1:1101:1221:2143 4 * 0 0 * * 0 0 CAAT @@@F RG:Z:HiMom.1
+HiMom:1:1101:1327:2200 516 * 0 0 * * 0 0 GTCA @B at F RG:Z:HiMom.1
+HiMom:1:2101:1122:2136 4 * 0 0 * * 0 0 CTTG ???B RG:Z:HiMom.1
+HiMom:1:2101:1459:2083 4 * 0 0 * * 0 0 ATTT CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCTAGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCTAGCC.sam
new file mode 100644
index 0000000..1c85d49
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCTAGCC.sam
@@ -0,0 +1,7 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GCCTAGCC LB:LN_GCCTAGCC PL:illumina PU:HiMomN1NGCCTAGCC CN:BI
+HiMom:1:1101:1165:2239 4 * 0 0 * * 0 0 ATGG #### RG:Z:HiMomN1
+HiMom:1:1101:1290:2225 4 * 0 0 * * 0 0 TCAG C@@F RG:Z:HiMomN1
+HiMom:1:1201:1280:2179 4 * 0 0 * * 0 0 GAGG @@BF RG:Z:HiMomN1
+HiMom:1:1201:1300:2137 4 * 0 0 * * 0 0 GCTC @@?D RG:Z:HiMomN1
+HiMom:1:2101:1023:2237 516 * 0 0 * * 0 0 NNTT #### RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCTAGCC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCTAGCC.sam.bak
new file mode 100644
index 0000000..76a906a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GCCTAGCC.sam.bak
@@ -0,0 +1,7 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GCCTAGCC LB:LN_GCCTAGCC PL:illumina PU:HiMom.1.GCCTAGCC CN:BI
+HiMom:1:1101:1165:2239 4 * 0 0 * * 0 0 ATGG #### RG:Z:HiMom.1
+HiMom:1:1101:1290:2225 4 * 0 0 * * 0 0 TCAG C@@F RG:Z:HiMom.1
+HiMom:1:1201:1280:2179 4 * 0 0 * * 0 0 GAGG @@BF RG:Z:HiMom.1
+HiMom:1:1201:1300:2137 4 * 0 0 * * 0 0 GCTC @@?D RG:Z:HiMom.1
+HiMom:1:2101:1023:2237 516 * 0 0 * * 0 0 ..TT #### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTAACATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTAACATC.sam
new file mode 100644
index 0000000..147bdfb
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTAACATC.sam
@@ -0,0 +1,4 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GTAACATC LB:LN_GTAACATC PL:illumina PU:HiMomN1NGTAACATC CN:BI
+HiMom:1:1101:1188:2237 4 * 0 0 * * 0 0 GCTT CCCF RG:Z:HiMomN1
+HiMom:1:2101:1208:2231 516 * 0 0 * * 0 0 CTTT CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTAACATC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTAACATC.sam.bak
new file mode 100644
index 0000000..2456527
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTAACATC.sam.bak
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GTAACATC LB:LN_GTAACATC PL:illumina PU:HiMom.1.GTAACATC CN:BI
+HiMom:1:1101:1188:2237 4 * 0 0 * * 0 0 GCTT CCCF RG:Z:HiMom.1
+HiMom:1:2101:1208:2231 516 * 0 0 * * 0 0 CTTT CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTCCACAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTCCACAG.sam
new file mode 100644
index 0000000..31a41b8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTCCACAG.sam
@@ -0,0 +1,4 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_GTCCACAG LB:LN_GTCCACAG PL:illumina PU:HiMomN1NGTCCACAG CN:BI
+HiMom:1:1101:1069:2159 4 * 0 0 * * 0 0 GACG <<<@ RG:Z:HiMomN1
+HiMom:1:1201:1486:2109 4 * 0 0 * * 0 0 ACGT CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTCCACAG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTCCACAG.sam.bak
new file mode 100644
index 0000000..da5c91f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/GTCCACAG.sam.bak
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GTCCACAG LB:LN_GTCCACAG PL:illumina PU:HiMom.1.GTCCACAG CN:BI
+HiMom:1:1101:1069:2159 4 * 0 0 * * 0 0 GACG <<<@ RG:Z:HiMom.1
+HiMom:1:1201:1486:2109 4 * 0 0 * * 0 0 ACGT CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/N.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/N.sam
new file mode 100644
index 0000000..61092ed
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/N.sam
@@ -0,0 +1,18 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_N LB:LN_N PL:illumina PU:HiMomN1NN CN:BI
+HiMom:1:1101:1031:2224 516 * 0 0 * * 0 0 NNNN #### BC:Z:NNNNNNNN RG:Z:HiMomN1 XN:i:1
+HiMom:1:1101:1039:2147 516 * 0 0 * * 0 0 NNNN #### BC:Z:NNNNNNNN RG:Z:HiMomN1 XN:i:1
+HiMom:1:1101:1046:2175 516 * 0 0 * * 0 0 NNGG #### BC:Z:NNNNNNNN RG:Z:HiMomN1
+HiMom:1:1101:1047:2122 516 * 0 0 * * 0 0 NNTC #### BC:Z:NNNANNNN RG:Z:HiMomN1
+HiMom:1:1101:1048:2197 516 * 0 0 * * 0 0 NNGT #### BC:Z:NNNCNNNN RG:Z:HiMomN1
+HiMom:1:1101:1065:2193 4 * 0 0 * * 0 0 NCTT #### BC:Z:GAACGATN RG:Z:HiMomN1
+HiMom:1:1101:1162:2207 516 * 0 0 * * 0 0 TAAA #### BC:Z:ACAAAATT RG:Z:HiMomN1
+HiMom:1:1201:1159:2179 516 * 0 0 * * 0 0 TTTT ===A BC:Z:AAAAAAAA RG:Z:HiMomN1
+HiMom:1:1201:1414:2174 516 * 0 0 * * 0 0 TTTT @;@1 BC:Z:AGAAAAGA RG:Z:HiMomN1
+HiMom:1:2101:1040:2208 516 * 0 0 * * 0 0 NCTG #### BC:Z:ACGAAATC RG:Z:HiMomN1
+HiMom:1:2101:1059:2083 4 * 0 0 * * 0 0 NGAA #1=B BC:Z:TACCGTCT RG:Z:HiMomN1
+HiMom:1:2101:1143:2137 4 * 0 0 * * 0 0 GCTC @@@D BC:Z:TCCGTCTA RG:Z:HiMomN1
+HiMom:1:2101:1151:2182 516 * 0 0 * * 0 0 TTTT 9<<? BC:Z:GAAAAAAA RG:Z:HiMomN1
+HiMom:1:2101:1215:2110 4 * 0 0 * * 0 0 ATCT #### BC:Z:AAAAGAAG RG:Z:HiMomN1
+HiMom:1:2101:1285:2105 516 * 0 0 * * 0 0 TGTC #### BC:Z:TATCTCGG RG:Z:HiMomN1
+HiMom:1:2101:1450:2134 4 * 0 0 * * 0 0 ACAA CC at F BC:Z:ACCAGTTG RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/N.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/N.sam.bak
new file mode 100644
index 0000000..bfdd2ea
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/N.sam.bak
@@ -0,0 +1,18 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_N LB:LN_N PL:illumina PU:HiMom.1.N CN:BI
+HiMom:1:1101:1031:2224 516 * 0 0 * * 0 0 .... #### BC:Z:NNNNNNNN RG:Z:HiMom.1 XN:i:1
+HiMom:1:1101:1039:2147 516 * 0 0 * * 0 0 .... #### BC:Z:NNNNNNNN RG:Z:HiMom.1 XN:i:1
+HiMom:1:1101:1046:2175 516 * 0 0 * * 0 0 ..GG #### BC:Z:NNNNNNNN RG:Z:HiMom.1
+HiMom:1:1101:1047:2122 516 * 0 0 * * 0 0 ..TC #### BC:Z:NNNANNNN RG:Z:HiMom.1
+HiMom:1:1101:1048:2197 516 * 0 0 * * 0 0 ..GT #### BC:Z:NNNCNNNN RG:Z:HiMom.1
+HiMom:1:1101:1065:2193 4 * 0 0 * * 0 0 .CTT #### BC:Z:GAACGATN RG:Z:HiMom.1
+HiMom:1:1101:1162:2207 516 * 0 0 * * 0 0 TAAA #### BC:Z:ACAAAATT RG:Z:HiMom.1
+HiMom:1:1201:1159:2179 516 * 0 0 * * 0 0 TTTT ===A BC:Z:AAAAAAAA RG:Z:HiMom.1
+HiMom:1:1201:1414:2174 516 * 0 0 * * 0 0 TTTT @;@1 BC:Z:AGAAAAGA RG:Z:HiMom.1
+HiMom:1:2101:1040:2208 516 * 0 0 * * 0 0 .CTG #### BC:Z:ACGAAATC RG:Z:HiMom.1
+HiMom:1:2101:1059:2083 4 * 0 0 * * 0 0 .GAA #1=B BC:Z:TACCGTCT RG:Z:HiMom.1
+HiMom:1:2101:1143:2137 4 * 0 0 * * 0 0 GCTC @@@D BC:Z:TCCGTCTA RG:Z:HiMom.1
+HiMom:1:2101:1151:2182 516 * 0 0 * * 0 0 TTTT 9<<? BC:Z:GAAAAAAA RG:Z:HiMom.1
+HiMom:1:2101:1215:2110 4 * 0 0 * * 0 0 ATCT #### BC:Z:AAAAGAAG RG:Z:HiMom.1
+HiMom:1:2101:1285:2105 516 * 0 0 * * 0 0 TGTC #### BC:Z:TATCTCGG RG:Z:HiMom.1
+HiMom:1:2101:1450:2134 4 * 0 0 * * 0 0 ACAA CC at F BC:Z:ACCAGTTG RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAAGCACA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAAGCACA.sam
new file mode 100644
index 0000000..9fc12a6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAAGCACA.sam
@@ -0,0 +1,4 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TAAGCACA LB:LN_TAAGCACA PL:illumina PU:HiMomN1NTAAGCACA CN:BI
+HiMom:1:1201:1064:2239 4 * 0 0 * * 0 0 GGGA 8?@: RG:Z:HiMomN1
+HiMom:1:2101:1258:2092 4 * 0 0 * * 0 0 TTAG #### RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAAGCACA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAAGCACA.sam.bak
new file mode 100644
index 0000000..436fa57
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAAGCACA.sam.bak
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TAAGCACA LB:LN_TAAGCACA PL:illumina PU:HiMom.1.TAAGCACA CN:BI
+HiMom:1:1201:1064:2239 4 * 0 0 * * 0 0 GGGA 8?@: RG:Z:HiMom.1
+HiMom:1:2101:1258:2092 4 * 0 0 * * 0 0 TTAG #### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TACCGTCT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TACCGTCT.sam
new file mode 100644
index 0000000..cfe6a67
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TACCGTCT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TACCGTCT LB:LN_TACCGTCT PL:illumina PU:HiMomN1NTACCGTCT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TACCGTCT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TACCGTCT.sam.bak
new file mode 100644
index 0000000..04cf526
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TACCGTCT.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TACCGTCT LB:LN_TACCGTCT PL:illumina PU:HiMom.1.TACCGTCT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAGCGGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAGCGGTA.sam
new file mode 100644
index 0000000..63d50a9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAGCGGTA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TAGCGGTA LB:LN_TAGCGGTA PL:illumina PU:HiMomN1NTAGCGGTA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAGCGGTA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAGCGGTA.sam.bak
new file mode 100644
index 0000000..3455237
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TAGCGGTA.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TAGCGGTA LB:LN_TAGCGGTA PL:illumina PU:HiMom.1.TAGCGGTA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCAGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCAGCC.sam
new file mode 100644
index 0000000..384054c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCAGCC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TATCAGCC LB:LN_TATCAGCC PL:illumina PU:HiMomN1NTATCAGCC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCAGCC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCAGCC.sam.bak
new file mode 100644
index 0000000..5f7c0e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCAGCC.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCAGCC LB:LN_TATCAGCC PL:illumina PU:HiMom.1.TATCAGCC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCAGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCAGG.sam
new file mode 100644
index 0000000..7d7ad06
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCAGG.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TATCCAGG LB:LN_TATCCAGG PL:illumina PU:HiMomN1NTATCCAGG CN:BI
+HiMom:1:1101:1071:2233 4 * 0 0 * * 0 0 GTTT <<<@ RG:Z:HiMomN1
+HiMom:1:1201:1140:2125 4 * 0 0 * * 0 0 TTCA CC at F RG:Z:HiMomN1
+HiMom:1:1201:1236:2187 4 * 0 0 * * 0 0 CTCC CCCF RG:Z:HiMomN1
+HiMom:1:2101:1133:2239 4 * 0 0 * * 0 0 AGCT ?@?D RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCAGG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCAGG.sam.bak
new file mode 100644
index 0000000..d5753c0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCAGG.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCCAGG LB:LN_TATCCAGG PL:illumina PU:HiMom.1.TATCCAGG CN:BI
+HiMom:1:1101:1071:2233 4 * 0 0 * * 0 0 GTTT <<<@ RG:Z:HiMom.1
+HiMom:1:1201:1140:2125 4 * 0 0 * * 0 0 TTCA CC at F RG:Z:HiMom.1
+HiMom:1:1201:1236:2187 4 * 0 0 * * 0 0 CTCC CCCF RG:Z:HiMom.1
+HiMom:1:2101:1133:2239 4 * 0 0 * * 0 0 AGCT ?@?D RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCATG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCATG.sam
new file mode 100644
index 0000000..978f4b7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCATG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TATCCATG LB:LN_TATCCATG PL:illumina PU:HiMomN1NTATCCATG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCATG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCATG.sam.bak
new file mode 100644
index 0000000..a5373e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCCATG.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCCATG LB:LN_TATCCATG PL:illumina PU:HiMom.1.TATCCATG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTCGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTCGG.sam
new file mode 100644
index 0000000..ee630bc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTCGG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TATCTCGG LB:LN_TATCTCGG PL:illumina PU:HiMomN1NTATCTCGG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTCGG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTCGG.sam.bak
new file mode 100644
index 0000000..04096c5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTCGG.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCTCGG LB:LN_TATCTCGG PL:illumina PU:HiMom.1.TATCTCGG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTGCC.sam
new file mode 100644
index 0000000..ba6d255
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTGCC.sam
@@ -0,0 +1,9 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TATCTGCC LB:LN_TATCTGCC PL:illumina PU:HiMomN1NTATCTGCC CN:BI
+HiMom:1:1101:1267:2209 4 * 0 0 * * 0 0 GGCA =;?D RG:Z:HiMomN1
+HiMom:1:1101:1353:2226 4 * 0 0 * * 0 0 GTGC BBBF RG:Z:HiMomN1
+HiMom:1:1101:1435:2194 4 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMomN1
+HiMom:1:1201:1084:2204 4 * 0 0 * * 0 0 TGGC CCCF RG:Z:HiMomN1
+HiMom:1:1201:1142:2242 4 * 0 0 * * 0 0 GTAA ?=?D RG:Z:HiMomN1
+HiMom:1:1201:1187:2100 4 * 0 0 * * 0 0 AAAA =<=; RG:Z:HiMomN1 XN:i:1
+HiMom:1:1201:1392:2109 4 * 0 0 * * 0 0 GTCA BBCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTGCC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTGCC.sam.bak
new file mode 100644
index 0000000..b66efde
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TATCTGCC.sam.bak
@@ -0,0 +1,9 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCTGCC LB:LN_TATCTGCC PL:illumina PU:HiMom.1.TATCTGCC CN:BI
+HiMom:1:1101:1267:2209 4 * 0 0 * * 0 0 GGCA =;?D RG:Z:HiMom.1
+HiMom:1:1101:1353:2226 4 * 0 0 * * 0 0 GTGC BBBF RG:Z:HiMom.1
+HiMom:1:1101:1435:2194 4 * 0 0 * * 0 0 TTTT CCCF RG:Z:HiMom.1
+HiMom:1:1201:1084:2204 4 * 0 0 * * 0 0 TGGC CCCF RG:Z:HiMom.1
+HiMom:1:1201:1142:2242 4 * 0 0 * * 0 0 GTAA ?=?D RG:Z:HiMom.1
+HiMom:1:1201:1187:2100 4 * 0 0 * * 0 0 AAAA =<=; RG:Z:HiMom.1 XN:i:1
+HiMom:1:1201:1392:2109 4 * 0 0 * * 0 0 GTCA BBCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCCGTCTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCCGTCTA.sam
new file mode 100644
index 0000000..5abd14d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCCGTCTA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TCCGTCTA LB:LN_TCCGTCTA PL:illumina PU:HiMomN1NTCCGTCTA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCCGTCTA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCCGTCTA.sam.bak
new file mode 100644
index 0000000..8f5dbc6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCCGTCTA.sam.bak
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCCGTCTA LB:LN_TCCGTCTA PL:illumina PU:HiMom.1.TCCGTCTA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCGCTAGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCGCTAGA.sam
new file mode 100644
index 0000000..7a73b5c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCGCTAGA.sam
@@ -0,0 +1,7 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TCGCTAGA LB:LN_TCGCTAGA PL:illumina PU:HiMomN1NTCGCTAGA CN:BI
+HiMom:1:1101:1143:2192 4 * 0 0 * * 0 0 CGAC CCCF RG:Z:HiMomN1
+HiMom:1:1101:1479:2221 4 * 0 0 * * 0 0 GGGG @CCF RG:Z:HiMomN1
+HiMom:1:1201:1312:2112 4 * 0 0 * * 0 0 ATTT CCCF RG:Z:HiMomN1
+HiMom:1:1201:1416:2128 4 * 0 0 * * 0 0 TTGG @@@D RG:Z:HiMomN1
+HiMom:1:2101:1064:2242 4 * 0 0 * * 0 0 NGGA #### RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCGCTAGA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCGCTAGA.sam.bak
new file mode 100644
index 0000000..9918fb0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCGCTAGA.sam.bak
@@ -0,0 +1,7 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCGCTAGA LB:LN_TCGCTAGA PL:illumina PU:HiMom.1.TCGCTAGA CN:BI
+HiMom:1:1101:1143:2192 4 * 0 0 * * 0 0 CGAC CCCF RG:Z:HiMom.1
+HiMom:1:1101:1479:2221 4 * 0 0 * * 0 0 GGGG @CCF RG:Z:HiMom.1
+HiMom:1:1201:1312:2112 4 * 0 0 * * 0 0 ATTT CCCF RG:Z:HiMom.1
+HiMom:1:1201:1416:2128 4 * 0 0 * * 0 0 TTGG @@@D RG:Z:HiMom.1
+HiMom:1:2101:1064:2242 4 * 0 0 * * 0 0 .GGA #### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCTGCAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCTGCAAG.sam
new file mode 100644
index 0000000..65d29cc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCTGCAAG.sam
@@ -0,0 +1,3 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TCTGCAAG LB:LN_TCTGCAAG PL:illumina PU:HiMomN1NTCTGCAAG CN:BI
+HiMom:1:1201:1042:2174 4 * 0 0 * * 0 0 NTCA #0;@ RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCTGCAAG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCTGCAAG.sam.bak
new file mode 100644
index 0000000..2c2caf0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TCTGCAAG.sam.bak
@@ -0,0 +1,3 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCTGCAAG LB:LN_TCTGCAAG PL:illumina PU:HiMom.1.TCTGCAAG CN:BI
+HiMom:1:1201:1042:2174 4 * 0 0 * * 0 0 .TCA #0;@ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCAAGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCAAGTA.sam
new file mode 100644
index 0000000..6008215
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCAAGTA.sam
@@ -0,0 +1,4 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TGCAAGTA LB:LN_TGCAAGTA PL:illumina PU:HiMomN1NTGCAAGTA CN:BI
+HiMom:1:1101:1242:2170 4 * 0 0 * * 0 0 GGAA @@@D RG:Z:HiMomN1
+HiMom:1:2101:1163:2222 4 * 0 0 * * 0 0 GAGC @@@D RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCAAGTA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCAAGTA.sam.bak
new file mode 100644
index 0000000..c8e5e0b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCAAGTA.sam.bak
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGCAAGTA LB:LN_TGCAAGTA PL:illumina PU:HiMom.1.TGCAAGTA CN:BI
+HiMom:1:1101:1242:2170 4 * 0 0 * * 0 0 GGAA @@@D RG:Z:HiMom.1
+HiMom:1:2101:1163:2222 4 * 0 0 * * 0 0 GAGC @@@D RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCTGCTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCTGCTG.sam
new file mode 100644
index 0000000..cb5f323
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCTGCTG.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TGCTGCTG LB:LN_TGCTGCTG PL:illumina PU:HiMomN1NTGCTGCTG CN:BI
+HiMom:1:1101:1084:2136 4 * 0 0 * * 0 0 TTTC <<<@ RG:Z:HiMomN1
+HiMom:1:1201:1285:2100 4 * 0 0 * * 0 0 GATC @@@D RG:Z:HiMomN1
+HiMom:1:2101:1162:2139 4 * 0 0 * * 0 0 ATCG BCCF RG:Z:HiMomN1
+HiMom:1:2101:1195:2150 4 * 0 0 * * 0 0 AATT CCCF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCTGCTG.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCTGCTG.sam.bak
new file mode 100644
index 0000000..077ddf5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGCTGCTG.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGCTGCTG LB:LN_TGCTGCTG PL:illumina PU:HiMom.1.TGCTGCTG CN:BI
+HiMom:1:1101:1084:2136 4 * 0 0 * * 0 0 TTTC <<<@ RG:Z:HiMom.1
+HiMom:1:1201:1285:2100 4 * 0 0 * * 0 0 GATC @@@D RG:Z:HiMom.1
+HiMom:1:2101:1162:2139 4 * 0 0 * * 0 0 ATCG BCCF RG:Z:HiMom.1
+HiMom:1:2101:1195:2150 4 * 0 0 * * 0 0 AATT CCCF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAACTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAACTC.sam
new file mode 100644
index 0000000..c62f4c9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAACTC.sam
@@ -0,0 +1,3 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TGTAACTC LB:LN_TGTAACTC PL:illumina PU:HiMomN1NTGTAACTC CN:BI
+HiMom:1:1201:1421:2154 4 * 0 0 * * 0 0 TGTG BC at D RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAACTC.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAACTC.sam.bak
new file mode 100644
index 0000000..9034a76
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAACTC.sam.bak
@@ -0,0 +1,3 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGTAACTC LB:LN_TGTAACTC PL:illumina PU:HiMom.1.TGTAACTC CN:BI
+HiMom:1:1201:1421:2154 4 * 0 0 * * 0 0 TGTG BC at D RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAATCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAATCA.sam
new file mode 100644
index 0000000..88ded6f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAATCA.sam
@@ -0,0 +1,5 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TGTAATCA LB:LN_TGTAATCA PL:illumina PU:HiMomN1NTGTAATCA CN:BI
+HiMom:1:1101:1419:2119 4 * 0 0 * * 0 0 ACTT #### RG:Z:HiMomN1
+HiMom:1:1201:1208:2132 4 * 0 0 * * 0 0 CTGT @@CD RG:Z:HiMomN1
+HiMom:1:1201:1344:2147 4 * 0 0 * * 0 0 ACGA @<?? RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAATCA.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAATCA.sam.bak
new file mode 100644
index 0000000..8880ffa
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TGTAATCA.sam.bak
@@ -0,0 +1,5 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGTAATCA LB:LN_TGTAATCA PL:illumina PU:HiMom.1.TGTAATCA CN:BI
+HiMom:1:1101:1419:2119 4 * 0 0 * * 0 0 ACTT #### RG:Z:HiMom.1
+HiMom:1:1201:1208:2132 4 * 0 0 * * 0 0 CTGT @@CD RG:Z:HiMom.1
+HiMom:1:1201:1344:2147 4 * 0 0 * * 0 0 ACGA @<?? RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TTGTCTAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TTGTCTAT.sam
new file mode 100644
index 0000000..f353f64
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TTGTCTAT.sam
@@ -0,0 +1,6 @@
+ at HD VN:1N5 SO:queryname
+ at RG ID:HiMomN1 SM:SA_TTGTCTAT LB:LN_TTGTCTAT PL:illumina PU:HiMomN1NTTGTCTAT CN:BI
+HiMom:1:1101:1219:2164 4 * 0 0 * * 0 0 ATCT CCCF RG:Z:HiMomN1
+HiMom:1:1201:1103:2184 4 * 0 0 * * 0 0 AGAA B at BF RG:Z:HiMomN1
+HiMom:1:1201:1107:2109 4 * 0 0 * * 0 0 ACAA CCCF RG:Z:HiMomN1
+HiMom:1:1201:1252:2141 4 * 0 0 * * 0 0 AGTT BCBF RG:Z:HiMomN1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TTGTCTAT.sam.bak b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TTGTCTAT.sam.bak
new file mode 100644
index 0000000..fbf1f1b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/TTGTCTAT.sam.bak
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TTGTCTAT LB:LN_TTGTCTAT PL:illumina PU:HiMom.1.TTGTCTAT CN:BI
+HiMom:1:1101:1219:2164 4 * 0 0 * * 0 0 ATCT CCCF RG:Z:HiMom.1
+HiMom:1:1201:1103:2184 4 * 0 0 * * 0 0 AGAA B at BF RG:Z:HiMom.1
+HiMom:1:1201:1107:2109 4 * 0 0 * * 0 0 ACAA CCCF RG:Z:HiMom.1
+HiMom:1:1201:1252:2141 4 * 0 0 * * 0 0 AGTT BCBF RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/barcode.params b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/barcode.params
new file mode 100644
index 0000000..67054bc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/indicies/barcode.params
@@ -0,0 +1,63 @@
+BARCODE SAMPLE_ALIAS LIBRARY_NAME OUTPUT
+AAAAAAAA SA_AAAAAAAA LN_AAAAAAAA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/AAAAAAAA.sam
+AAAAGAAG SA_AAAAGAAG LN_AAAAGAAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/AAAAGAAG.sam
+AACAATGG SA_AACAATGG LN_AACAATGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/AACAATGG.sam
+AACGCATT SA_AACGCATT LN_AACGCATT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/AACGCATT.sam
+ACAAAATT SA_ACAAAATT LN_ACAAAATT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/ACAAAATT.sam
+ACAGGTAT SA_ACAGGTAT LN_ACAGGTAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/ACAGGTAT.sam
+ACAGTTGA SA_ACAGTTGA LN_ACAGTTGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/ACAGTTGA.sam
+ACCAGTTG SA_ACCAGTTG LN_ACCAGTTG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/ACCAGTTG.sam
+ACGAAATC SA_ACGAAATC LN_ACGAAATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/ACGAAATC.sam
+ACTAAGAC SA_ACTAAGAC LN_ACTAAGAC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/ACTAAGAC.sam
+ACTGTACC SA_ACTGTACC LN_ACTGTACC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/ACTGTACC.sam
+ACTGTATC SA_ACTGTATC LN_ACTGTATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/ACTGTATC.sam
+AGAAAAGA SA_AGAAAAGA LN_AGAAAAGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/AGAAAAGA.sam
+AGCATGGA SA_AGCATGGA LN_AGCATGGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/AGCATGGA.sam
+AGGTAAGG SA_AGGTAAGG LN_AGGTAAGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/AGGTAAGG.sam
+AGGTCGCA SA_AGGTCGCA LN_AGGTCGCA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/AGGTCGCA.sam
+ATTATCAA SA_ATTATCAA LN_ATTATCAA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/ATTATCAA.sam
+ATTCCTCT SA_ATTCCTCT LN_ATTCCTCT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/ATTCCTCT.sam
+CAACTCTC SA_CAACTCTC LN_CAACTCTC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CAACTCTC.sam
+CAATAGAC SA_CAATAGAC LN_CAATAGAC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CAATAGAC.sam
+CAATAGTC SA_CAATAGTC LN_CAATAGTC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CAATAGTC.sam
+CAGCGGAT SA_CAGCGGAT LN_CAGCGGAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CAGCGGAT.sam
+CAGCGGTA SA_CAGCGGTA LN_CAGCGGTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CAGCGGTA.sam
+CCAACATT SA_CCAACATT LN_CCAACATT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CCAACATT.sam
+CCAGCACC SA_CCAGCACC LN_CCAGCACC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CCAGCACC.sam
+CCATGCGT SA_CCATGCGT LN_CCATGCGT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CCATGCGT.sam
+CGCCTTCC SA_CGCCTTCC LN_CGCCTTCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CGCCTTCC.sam
+CGCTATGT SA_CGCTATGT LN_CGCTATGT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CGCTATGT.sam
+CTAACTCG SA_CTAACTCG LN_CTAACTCG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CTAACTCG.sam
+CTATGCGC SA_CTATGCGC LN_CTATGCGC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CTATGCGC.sam
+CTATGCGT SA_CTATGCGT LN_CTATGCGT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CTATGCGT.sam
+CTGCGGAT SA_CTGCGGAT LN_CTGCGGAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CTGCGGAT.sam
+CTGTAATC SA_CTGTAATC LN_CTGTAATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/CTGTAATC.sam
+GAAAAAAA SA_GAAAAAAA LN_GAAAAAAA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GAAAAAAA.sam
+GAACGAT. SA_GAACGAT. LN_GAACGAT. /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GAACGAT..sam
+GAAGGAAG SA_GAAGGAAG LN_GAAGGAAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GAAGGAAG.sam
+GACCAGGA SA_GACCAGGA LN_GACCAGGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GACCAGGA.sam
+GACCAGGC SA_GACCAGGC LN_GACCAGGC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GACCAGGC.sam
+GACCGTTG SA_GACCGTTG LN_GACCGTTG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GACCGTTG.sam
+GACCTAAC SA_GACCTAAC LN_GACCTAAC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GACCTAAC.sam
+GATATCCA SA_GATATCCA LN_GATATCCA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GATATCCA.sam
+GCCGTCGA SA_GCCGTCGA LN_GCCGTCGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GCCGTCGA.sam
+GCCTAGCC SA_GCCTAGCC LN_GCCTAGCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GCCTAGCC.sam
+GTAACATC SA_GTAACATC LN_GTAACATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GTAACATC.sam
+GTCCACAG SA_GTCCACAG LN_GTCCACAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/GTCCACAG.sam
+TAAGCACA SA_TAAGCACA LN_TAAGCACA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TAAGCACA.sam
+TACCGTCT SA_TACCGTCT LN_TACCGTCT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TACCGTCT.sam
+TAGCGGTA SA_TAGCGGTA LN_TAGCGGTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TAGCGGTA.sam
+TATCAGCC SA_TATCAGCC LN_TATCAGCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TATCAGCC.sam
+TATCCAGG SA_TATCCAGG LN_TATCCAGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TATCCAGG.sam
+TATCCATG SA_TATCCATG LN_TATCCATG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TATCCATG.sam
+TATCTCGG SA_TATCTCGG LN_TATCTCGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TATCTCGG.sam
+TATCTGCC SA_TATCTGCC LN_TATCTGCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TATCTGCC.sam
+TCCGTCTA SA_TCCGTCTA LN_TCCGTCTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TCCGTCTA.sam
+TCGCTAGA SA_TCGCTAGA LN_TCGCTAGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TCGCTAGA.sam
+TCTGCAAG SA_TCTGCAAG LN_TCTGCAAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TCTGCAAG.sam
+TGCAAGTA SA_TGCAAGTA LN_TGCAAGTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TGCAAGTA.sam
+TGCTGCTG SA_TGCTGCTG LN_TGCTGCTG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TGCTGCTG.sam
+TGTAACTC SA_TGTAACTC LN_TGTAACTC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TGTAACTC.sam
+TGTAATCA SA_TGTAATCA LN_TGTAATCA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TGTAATCA.sam
+TTGTCTAT SA_TTGTCTAT LN_TTGTCTAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/TTGTCTAT.sam
+N SA_N LN_N /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode.607284330065160420.dir/N.sam
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/multiplexed_positive_rgtags.params b/testdata/picard/illumina/25T8B25T/sams_with_4M/multiplexed_positive_rgtags.params
new file mode 100644
index 0000000..8046f3b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/multiplexed_positive_rgtags.params
@@ -0,0 +1,63 @@
+BARCODE SAMPLE_ALIAS LIBRARY_NAME
+AAAAAAAA SA_AAAAAAAA LN_AAAAAAAA
+AAAAGAAG SA_AAAAGAAG LN_AAAAGAAG
+AACAATGG SA_AACAATGG LN_AACAATGG
+AACGCATT SA_AACGCATT LN_AACGCATT
+ACAAAATT SA_ACAAAATT LN_ACAAAATT
+ACAGGTAT SA_ACAGGTAT LN_ACAGGTAT
+ACAGTTGA SA_ACAGTTGA LN_ACAGTTGA
+ACCAGTTG SA_ACCAGTTG LN_ACCAGTTG
+ACGAAATC SA_ACGAAATC LN_ACGAAATC
+ACTAAGAC SA_ACTAAGAC LN_ACTAAGAC
+ACTGTACC SA_ACTGTACC LN_ACTGTACC
+ACTGTATC SA_ACTGTATC LN_ACTGTATC
+AGAAAAGA SA_AGAAAAGA LN_AGAAAAGA
+AGCATGGA SA_AGCATGGA LN_AGCATGGA
+AGGTAAGG SA_AGGTAAGG LN_AGGTAAGG
+AGGTCGCA SA_AGGTCGCA LN_AGGTCGCA
+ATTATCAA SA_ATTATCAA LN_ATTATCAA
+ATTCCTCT SA_ATTCCTCT LN_ATTCCTCT
+CAACTCTC SA_CAACTCTC LN_CAACTCTC
+CAATAGAC SA_CAATAGAC LN_CAATAGAC
+CAATAGTC SA_CAATAGTC LN_CAATAGTC
+CAGCGGAT SA_CAGCGGAT LN_CAGCGGAT
+CAGCGGTA SA_CAGCGGTA LN_CAGCGGTA
+CCAACATT SA_CCAACATT LN_CCAACATT
+CCAGCACC SA_CCAGCACC LN_CCAGCACC
+CCATGCGT SA_CCATGCGT LN_CCATGCGT
+CGCCTTCC SA_CGCCTTCC LN_CGCCTTCC
+CGCTATGT SA_CGCTATGT LN_CGCTATGT
+CTAACTCG SA_CTAACTCG LN_CTAACTCG
+CTATGCGC SA_CTATGCGC LN_CTATGCGC
+CTATGCGT SA_CTATGCGT LN_CTATGCGT
+CTGCGGAT SA_CTGCGGAT LN_CTGCGGAT
+CTGTAATC SA_CTGTAATC LN_CTGTAATC
+GAAAAAAA SA_GAAAAAAA LN_GAAAAAAA
+GAACGAT. SA_GAACGAT. LN_GAACGAT.
+GAAGGAAG SA_GAAGGAAG LN_GAAGGAAG
+GACCAGGA SA_GACCAGGA LN_GACCAGGA
+GACCAGGC SA_GACCAGGC LN_GACCAGGC
+GACCGTTG SA_GACCGTTG LN_GACCGTTG
+GACCTAAC SA_GACCTAAC LN_GACCTAAC
+GATATCCA SA_GATATCCA LN_GATATCCA
+GCCGTCGA SA_GCCGTCGA LN_GCCGTCGA
+GCCTAGCC SA_GCCTAGCC LN_GCCTAGCC
+GTAACATC SA_GTAACATC LN_GTAACATC
+GTCCACAG SA_GTCCACAG LN_GTCCACAG
+TAAGCACA SA_TAAGCACA LN_TAAGCACA
+TACCGTCT SA_TACCGTCT LN_TACCGTCT
+TAGCGGTA SA_TAGCGGTA LN_TAGCGGTA
+TATCAGCC SA_TATCAGCC LN_TATCAGCC
+TATCCAGG SA_TATCCAGG LN_TATCCAGG
+TATCCATG SA_TATCCATG LN_TATCCATG
+TATCTCGG SA_TATCTCGG LN_TATCTCGG
+TATCTGCC SA_TATCTGCC LN_TATCTGCC
+TCCGTCTA SA_TCCGTCTA LN_TCCGTCTA
+TCGCTAGA SA_TCGCTAGA LN_TCGCTAGA
+TCTGCAAG SA_TCTGCAAG LN_TCTGCAAG
+TGCAAGTA SA_TGCAAGTA LN_TGCAAGTA
+TGCTGCTG SA_TGCTGCTG LN_TGCTGCTG
+TGTAACTC SA_TGTAACTC LN_TGTAACTC
+TGTAATCA SA_TGTAATCA LN_TGTAATCA
+TTGTCTAT SA_TTGTCTAT LN_TTGTCTAT
+N SA_N LN_N
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AAAAAAAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AAAAAAAA.sam
new file mode 100644
index 0000000..4882d35
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AAAAAAAA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AAAAAAAA LB:LN_AAAAAAAA PL:illumina PU:HiMom.1.AAAAAAAA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AAAAGAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AAAAGAAG.sam
new file mode 100644
index 0000000..6446f79
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AAAAGAAG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AAAAGAAG LB:LN_AAAAGAAG PL:illumina PU:HiMom.1.AAAAGAAG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AACAATGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AACAATGG.sam
new file mode 100644
index 0000000..03a9eea
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AACAATGG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AACAATGG LB:LN_AACAATGG PL:illumina PU:HiMom.1.AACAATGG CN:BI
+HiMom:1:1101:1138:2141 77 * 0 0 * * 0 0 .TTACCAAGGTTTTCTGTTTAGTGA #1=DDFFFHHFHHJJJIHJIJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1138:2141 141 * 0 0 * * 0 0 ATCTGCTTCAGGTCGATCAGA FFFFHGHHHJJIGHIJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1206:2126 77 * 0 0 * * 0 0 .ATTCTGCCATATTGGTCCGACAGT #1=DDFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1
+HiMom:1:1101:1206:2126 141 * 0 0 * * 0 0 GTCCAGTGGTGCACTGAATGT FFFFHHHHHHIIJJJJIJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1077:2139 77 * 0 0 * * 0 0 CACAGGCTTCCACGGACTTAACGTC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1077:2139 141 * 0 0 * * 0 0 AGTTGGCGGATGAAGCAGATA FFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1
+HiMom:1:2101:1112:2245 77 * 0 0 * * 0 0 TGCCATCTGCTCTGGGAAGCACCAG 1:=DDDDDFBC:DEFIFFFIEF at BE RG:Z:HiMom.1
+HiMom:1:2101:1112:2245 141 * 0 0 * * 0 0 AGTGTTGTAATTTCGTCTTCT BDDDCCFCAACGGFFCBFFAE RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AACGCATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AACGCATT.sam
new file mode 100644
index 0000000..c221bbe
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AACGCATT.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AACGCATT LB:LN_AACGCATT PL:illumina PU:HiMom.1.AACGCATT CN:BI
+HiMom:1:1101:1197:2200 77 * 0 0 * * 0 0 GGGCGCCCCGTGAGGACCCAGTCCT @C at FFADDFFCFCEHIIJIJJIEFC RG:Z:HiMom.1
+HiMom:1:1101:1197:2200 141 * 0 0 * * 0 0 TCCACTGGAACCACAGAACCC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1308:2153 589 * 0 0 * * 0 0 TTTTGGAAGAGACCTCAATTACTGT ???DDDDD?:22AE:A2<3,AF?3A RG:Z:HiMom.1
+HiMom:1:1101:1308:2153 653 * 0 0 * * 0 0 TAAGGTAATCCCCGCATGTGT 4===AFFDFFGFDGFB at CFB: RG:Z:HiMom.1
+HiMom:1:1101:1452:2132 77 * 0 0 * * 0 0 .CGTCCTGGAAAACGGGGCGCGGCT #1=BDBDDFHHHHF at FHDHIGIIII RG:Z:HiMom.1
+HiMom:1:1101:1452:2132 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC FFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1150:2161 77 * 0 0 * * 0 0 AAGTCACCTAATATCTTTTTTTTTT @@<??;?D?CFD,A4CDDHFBIIID RG:Z:HiMom.1
+HiMom:1:1201:1150:2161 141 * 0 0 * * 0 0 CACTACTGTGATTGTGCCACT FFFFGHHHHGIIIICEHCFGH RG:Z:HiMom.1
+HiMom:1:2101:1240:2197 589 * 0 0 * * 0 0 ATAAAACATAGCAATATTTTCCTAT ######################### RG:Z:HiMom.1
+HiMom:1:2101:1240:2197 653 * 0 0 * * 0 0 GAGATCCTTGTTACATGCCCA +A:DD?:ADEE@::C4:C<E: RG:Z:HiMom.1
+HiMom:1:2101:1336:2109 77 * 0 0 * * 0 0 .ACTATCAGGATCGTGGCTATTTTG #1BDDFFFHHHHHJIJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1336:2109 141 * 0 0 * * 0 0 CAGAACAGCTCCAGGTGCTCC FFFFHHHHHJJJJJJCGHIJJ RG:Z:HiMom.1
+HiMom:1:2101:1427:2081 77 * 0 0 * * 0 0 .CGAGTGCCTAGTGGGCCACTTTTG #4=DDBDFHHHHFHIJJJJIJJJJI RG:Z:HiMom.1
+HiMom:1:2101:1427:2081 141 * 0 0 * * 0 0 CTTCCATGGCCACCGTCCTGC FFFFHHHHHJJJIIGFIIJJI RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACAAAATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACAAAATT.sam
new file mode 100644
index 0000000..3cfd422
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACAAAATT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAAAATT LB:LN_ACAAAATT PL:illumina PU:HiMom.1.ACAAAATT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACAGGTAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACAGGTAT.sam
new file mode 100644
index 0000000..cd68ad6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACAGGTAT.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAGGTAT LB:LN_ACAGGTAT PL:illumina PU:HiMom.1.ACAGGTAT CN:BI
+HiMom:1:1101:1236:2121 77 * 0 0 * * 0 0 .GGTGCTTCATATCCCTCTAGAGGA #1=BDDFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1236:2121 141 * 0 0 * * 0 0 GCTTACTTTGTAGCCTTCATC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1341:2116 77 * 0 0 * * 0 0 .AGAAGCCCCAGGAGGAAGACAGTC #1=DDFFFHHHHHHHJIIJJJJJGI RG:Z:HiMom.1
+HiMom:1:1201:1341:2116 141 * 0 0 * * 0 0 CAGCGAGACTGGCAACTTAAA ##################### RG:Z:HiMom.1
+HiMom:1:2101:1063:2206 77 * 0 0 * * 0 0 TCCTATTCGCCTACACAATTCTCCG CCCFFFFFHHHHHJJJJJJJHJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1063:2206 141 * 0 0 * * 0 0 TAGGATGAGGATGGATAGTAA DDFFHHHHHJHIIJHIIIHHJ RG:Z:HiMom.1
+HiMom:1:2101:1325:2083 77 * 0 0 * * 0 0 .CAGAAGAAAGGGCCTTGTCGGAGG #1=DDDDDHHFHDGI at EEHG:?FA8 RG:Z:HiMom.1
+HiMom:1:2101:1325:2083 141 * 0 0 * * 0 0 CTCTTCCGATCTGGAGAAAAA ##################### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACAGTTGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACAGTTGA.sam
new file mode 100644
index 0000000..1cd317e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACAGTTGA.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAGTTGA LB:LN_ACAGTTGA PL:illumina PU:HiMom.1.ACAGTTGA CN:BI
+HiMom:1:2101:1048:2238 77 * 0 0 * * 0 0 .CTGCCGTGTCCTGACTTCTGGAAT #1:B?ADDACF<DCG;EG<FHH at CE RG:Z:HiMom.1
+HiMom:1:2101:1048:2238 141 * 0 0 * * 0 0 ACATCGTTGAAGCACTGGATC DDDB<CFFHCHGDBHGIIIII RG:Z:HiMom.1
+HiMom:1:2101:1216:2193 77 * 0 0 * * 0 0 TTTTCTTGGCCTCTGTTTTTTTTTT BCCFDFFFHHFFHJIGIJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1216:2193 141 * 0 0 * * 0 0 ATGACACTGCATTTTAAATAC DDDDHFFHHGGDFHFHIIHGG RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACCAGTTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACCAGTTG.sam
new file mode 100644
index 0000000..0df9571
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACCAGTTG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACCAGTTG LB:LN_ACCAGTTG PL:illumina PU:HiMom.1.ACCAGTTG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACGAAATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACGAAATC.sam
new file mode 100644
index 0000000..1fbb7b4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACGAAATC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACGAAATC LB:LN_ACGAAATC PL:illumina PU:HiMom.1.ACGAAATC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACTAAGAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACTAAGAC.sam
new file mode 100644
index 0000000..10e74a4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACTAAGAC.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTAAGAC LB:LN_ACTAAGAC PL:illumina PU:HiMom.1.ACTAAGAC CN:BI
+HiMom:1:1101:1259:2152 77 * 0 0 * * 0 0 CACCTATAATCCCAGCTACTCCAGA CCCFFFFFHHHHHJJJJJJIJJJIJ RG:Z:HiMom.1
+HiMom:1:1101:1259:2152 141 * 0 0 * * 0 0 TTATATTTTTTTAGACATAGG FFFFGHHHHJJJJIGIIJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1261:2127 589 * 0 0 * * 0 0 .TGAAATCTGGATAGGCTGGAGTTA #0-@@@################### RG:Z:HiMom.1
+HiMom:1:1101:1261:2127 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTT FFFFHGHHHJJIFDDDDDDDD RG:Z:HiMom.1
+HiMom:1:2101:1021:2209 77 * 0 0 * * 0 0 .GGCCCCACCCTCCTCCAGCACGTC #1=DDFFFHHHHHJJJJJJHIIHFH RG:Z:HiMom.1
+HiMom:1:2101:1021:2209 141 * 0 0 * * 0 0 AAGGCTGCTAGCTGGCCAGAG @>??@@??@?????????>?@ RG:Z:HiMom.1
+HiMom:1:2101:1262:2128 589 * 0 0 * * 0 0 AGCAGAAGGGCAAAAGCTGGCTTGA 9;<@:@################### RG:Z:HiMom.1
+HiMom:1:2101:1262:2128 653 * 0 0 * * 0 0 GTGGTAACTTTTCTGACACCT -9@;@?:8>?4:>?@###### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACTGTACC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACTGTACC.sam
new file mode 100644
index 0000000..bcf9179
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACTGTACC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTGTACC LB:LN_ACTGTACC PL:illumina PU:HiMom.1.ACTGTACC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACTGTATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACTGTATC.sam
new file mode 100644
index 0000000..8ec6a1f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ACTGTATC.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTGTATC LB:LN_ACTGTATC PL:illumina PU:HiMom.1.ACTGTATC CN:BI
+HiMom:1:1201:1458:2109 77 * 0 0 * * 0 0 .GAGACCATAGAGCGGATGCTTTCA #1=DDDFFHHGHGIJJIGIIJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1458:2109 141 * 0 0 * * 0 0 CGAACACACAAGAACTTTTTT FFFFHHHHHJJJJJJJJJJJI RG:Z:HiMom.1
+HiMom:1:2101:1105:2131 77 * 0 0 * * 0 0 TTGGAACACAGCGGGAATCACAGCA CCCFFFFFHHHHHJIJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1105:2131 141 * 0 0 * * 0 0 AGCAGCAACAGCAGAAACATG FFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1349:2084 77 * 0 0 * * 0 0 .CAAGTAGCAGTGTCACGCCTTAGC #1=DDBDDADFDDBEH at HC=CEGG@ RG:Z:HiMom.1
+HiMom:1:2101:1349:2084 141 * 0 0 * * 0 0 TGAATCATTGGTGTCTGAAGA ?=>=>>?############## RG:Z:HiMom.1
+HiMom:1:2101:1365:2094 77 * 0 0 * * 0 0 .AAGGTGAAGGCCGGCGCGCTCGCC #1=BDDDFFHHHHJGGGIGFIHIIJ RG:Z:HiMom.1
+HiMom:1:2101:1365:2094 141 * 0 0 * * 0 0 TTCCGATCTTGTGCTCTTCCG FFFDHFHHGJJIIJIJJIHII RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGAAAAGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGAAAAGA.sam
new file mode 100644
index 0000000..d40aec2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGAAAAGA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGAAAAGA LB:LN_AGAAAAGA PL:illumina PU:HiMom.1.AGAAAAGA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGCATGGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGCATGGA.sam
new file mode 100644
index 0000000..deb98e8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGCATGGA.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGCATGGA LB:LN_AGCATGGA PL:illumina PU:HiMom.1.AGCATGGA CN:BI
+HiMom:1:1101:1406:2222 77 * 0 0 * * 0 0 CTCCCCCCGGGCTGAACCAGGGTAC CCCFFDDDDHDFHIIIIIIIII9DG RG:Z:HiMom.1
+HiMom:1:1101:1406:2222 141 * 0 0 * * 0 0 GGACTCCCCTGGTTCTGGGCA DDBD?FHDFGIIIGIGHHIII RG:Z:HiMom.1
+HiMom:1:1201:1291:2158 77 * 0 0 * * 0 0 AGAAGGGGAAAGCCTTCATCTTGGC BCBFFFFFHHHHHJJJJJIIFIJIJ RG:Z:HiMom.1
+HiMom:1:1201:1291:2158 141 * 0 0 * * 0 0 TGCTCTTCCGATCTGATGGGC FFDD?FHHFGEHHIIDHIIII RG:Z:HiMom.1
+HiMom:1:2101:1370:2116 77 * 0 0 * * 0 0 .TGGTGGTCCATAGAGATTTGAAAC #1:4BD7DACF?FCA:4+<ACHIIH RG:Z:HiMom.1
+HiMom:1:2101:1370:2116 141 * 0 0 * * 0 0 ATCTGACATCATGTTTGAAAG FFFDFFHDHIGBHHII<HEDB RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGGTAAGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGGTAAGG.sam
new file mode 100644
index 0000000..f59d4e1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGGTAAGG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGGTAAGG LB:LN_AGGTAAGG PL:illumina PU:HiMom.1.AGGTAAGG CN:BI
+HiMom:1:1101:1263:2236 589 * 0 0 * * 0 0 CTTTGAAGACATTGTGAGATCTGTA <==A<42 at C+A4A?,2A@=4 at 7A?? RG:Z:HiMom.1
+HiMom:1:1101:1263:2236 653 * 0 0 * * 0 0 CTTCAGTAATTTTAGTACTGC ##################### RG:Z:HiMom.1
+HiMom:1:2101:1054:2162 77 * 0 0 * * 0 0 .CCAGGTGTCTTCCCGGGCCCTGCC #1=DDFBDFHHHHJJJJJIJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1054:2162 141 * 0 0 * * 0 0 CAGGGAAGGGAAGGAAGGGTG DFDFHHHHHJIJIIDHHGICG RG:Z:HiMom.1
+HiMom:1:2101:1163:2203 77 * 0 0 * * 0 0 TCTCCATGTGAAACAAGCAAAAAGA CCCFFFFFHHHHGJJJIJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1163:2203 141 * 0 0 * * 0 0 TTCACTTATGTATTTATGAAT DFFFHHHHHJHIIJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1249:2231 77 * 0 0 * * 0 0 GTTATTGATAGGATACTGTACAAAC @BCFFFFDHHHHFIJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1249:2231 141 * 0 0 * * 0 0 TCGGCCTTCCACTCTAGCATA FFFFFHHGHIJJJGJIIJHIJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGGTCGCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGGTCGCA.sam
new file mode 100644
index 0000000..e633ab1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/AGGTCGCA.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGGTCGCA LB:LN_AGGTCGCA PL:illumina PU:HiMom.1.AGGTCGCA CN:BI
+HiMom:1:1101:1150:2228 77 * 0 0 * * 0 0 GCTACTCAGTAGACAGTCCCACCCT @@CADDDDFCFHHIIIIGGIIGGGI RG:Z:HiMom.1
+HiMom:1:1101:1150:2228 141 * 0 0 * * 0 0 GAGGCGATTCCTAGGGGGTTG D8;@BH6DHD<FGGGEIGHIG RG:Z:HiMom.1
+HiMom:1:1101:1491:2120 77 * 0 0 * * 0 0 .GGCAGGTGCCCCCACTTGACTCTC #1?DFFFFGHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1491:2120 141 * 0 0 * * 0 0 AGGCTGAACTTCTGAGCTGCT FFFFHHHGHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1190:2194 77 * 0 0 * * 0 0 AACCTGGCGCTAAACCATTCGTAGA CCCFFFFFHHHHHJJJJJJJJIJJJ RG:Z:HiMom.1
+HiMom:1:1201:1190:2194 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1188:2195 77 * 0 0 * * 0 0 TTAGACCGTCGTGAGACAGGTTAGT @CCFFFFFHHHHHJJJJJIIEHIJH RG:Z:HiMom.1
+HiMom:1:2101:1188:2195 141 * 0 0 * * 0 0 ATACACCAAATGTCTGAACCT FFFFHHHHHJJJHIJJJJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ATTATCAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ATTATCAA.sam
new file mode 100644
index 0000000..d1d5b5d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ATTATCAA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ATTATCAA LB:LN_ATTATCAA PL:illumina PU:HiMom.1.ATTATCAA CN:BI
+HiMom:1:1101:1100:2207 77 * 0 0 * * 0 0 ACGACAGACGTTCTTTCTTTGCTGC CCCFFFFFHHFHHJIJJJJJHIJJH RG:Z:HiMom.1
+HiMom:1:1101:1100:2207 141 * 0 0 * * 0 0 T............G....... ##################### RG:Z:HiMom.1
+HiMom:1:1101:1157:2135 77 * 0 0 * * 0 0 .GGACATTGTAATCATTTCTTACAA #1=DD?DDHHHHHGGHIIIIIIIII RG:Z:HiMom.1
+HiMom:1:1101:1157:2135 141 * 0 0 * * 0 0 AAGTCTTAATCAAAGATGATA FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1269:2170 77 * 0 0 * * 0 0 ACAGTGTGGGAGGCAGACGAAGAGA @@@DDDDDFA:C at EGA?FD<FFHII RG:Z:HiMom.1
+HiMom:1:1101:1269:2170 141 * 0 0 * * 0 0 AAGCCTGTGCTTTAAGGAAAA DBDBDF8DDCFH at GIE@@GGH RG:Z:HiMom.1
+HiMom:1:1201:1018:2217 589 * 0 0 * * 0 0 .TTTCTCTGGGCGCAAAGATGTTCA #07;8=8<<99(:=@@/@7>>6=?> RG:Z:HiMom.1
+HiMom:1:1201:1018:2217 653 * 0 0 * * 0 0 ..................... ##################### RG:Z:HiMom.1 XN:i:1
+HiMom:1:1201:1118:2198 77 * 0 0 * * 0 0 CAAGTGTACAGGATTAGACTGGGTT BCCFDEBDHHHHHIJJJGIIIJJGH RG:Z:HiMom.1
+HiMom:1:1201:1118:2198 141 * 0 0 * * 0 0 AACTTTATTAAAGCAGTTAAA FFFFHDHHHGIIIJJJIJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ATTCCTCT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ATTCCTCT.sam
new file mode 100644
index 0000000..7586745
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/ATTCCTCT.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ATTCCTCT LB:LN_ATTCCTCT PL:illumina PU:HiMom.1.ATTCCTCT CN:BI
+HiMom:1:1101:1309:2210 77 * 0 0 * * 0 0 ACACCAACCACCCAACTATCTATAA CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1309:2210 141 * 0 0 * * 0 0 GGCTAGGGCATTTTTAATCTT FFDFHHHDFHJIJJIJGIIIJ RG:Z:HiMom.1
+HiMom:1:1201:1018:2133 77 * 0 0 * * 0 0 .AAAACTTGAGGATGCTATGCAAGC #1:B:ADDDDDDDEEAEBF9FFEBF RG:Z:HiMom.1
+HiMom:1:1201:1018:2133 141 * 0 0 * * 0 0 ..................... ##################### RG:Z:HiMom.1 XN:i:1
+HiMom:1:1201:1073:2225 77 * 0 0 * * 0 0 GGGGCTGAGACCTTTGCTGATGGTG @@@FFFFFHHHGHJJJJJIIIGICH RG:Z:HiMom.1
+HiMom:1:1201:1073:2225 141 * 0 0 * * 0 0 TGCTCTTCCGATCTGGAGGGT FFFFHHHHHJJJJJJJJJJJ: RG:Z:HiMom.1
+HiMom:1:1201:1242:2207 77 * 0 0 * * 0 0 ATGGCAAAGTGGTGTCTGAGACCAA BCCFFFFFGHHHHHIIIJFHIJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1242:2207 141 * 0 0 * * 0 0 TTTATTGGCCTCCTGCTCCCC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAACTCTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAACTCTC.sam
new file mode 100644
index 0000000..3ebed9c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAACTCTC.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAACTCTC LB:LN_CAACTCTC PL:illumina PU:HiMom.1.CAACTCTC CN:BI
+HiMom:1:1101:1140:2120 77 * 0 0 * * 0 0 .CCCCAACATTCTAATTATGCCTCA #1:BDFFDHFFDFIJJJIIJIIIII RG:Z:HiMom.1
+HiMom:1:1101:1140:2120 141 * 0 0 * * 0 0 TTTTTTTTTAACTTTGCAAAT DDDDHHHHFB at 9FHI@BFH@@ RG:Z:HiMom.1
+HiMom:1:1101:1328:2225 77 * 0 0 * * 0 0 GAAATGCATCTGTCTTAGAAACTGG ??@=BDDDFDD<<,<2:C<F:FFEA RG:Z:HiMom.1
+HiMom:1:1101:1328:2225 141 * 0 0 * * 0 0 AATTAGGACTTACCTGACATA ##################### RG:Z:HiMom.1
+HiMom:1:1201:1127:2112 589 * 0 0 * * 0 0 .GTCAAGGATGTTCGTCGTGGCAAC #1=BDDDDDDDDDID<AE?@<CEEE RG:Z:HiMom.1
+HiMom:1:1201:1127:2112 653 * 0 0 * * 0 0 CACCTGAGCAGTGAAGCCAGC BDDDHD?FDBHI?AHGGGDFH RG:Z:HiMom.1
+HiMom:1:1201:1452:2143 77 * 0 0 * * 0 0 TATCCCCTCTAAGACGGACCTGGGT CCCFFFFFHHHHHJJIIIJJJJJJG RG:Z:HiMom.1
+HiMom:1:1201:1452:2143 141 * 0 0 * * 0 0 AGTCTTAGCATTTACTTTCCC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1486:2146 589 * 0 0 * * 0 0 GTTCTCTGTCCCCAGGTCCTGTCTC ===A7<7222<<=C=?+<7>@?ACB RG:Z:HiMom.1
+HiMom:1:1201:1486:2146 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTGGGC ??@??@???????######## RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAATAGAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAATAGAC.sam
new file mode 100644
index 0000000..f800d53
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAATAGAC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAATAGAC LB:LN_CAATAGAC PL:illumina PU:HiMom.1.CAATAGAC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAATAGTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAATAGTC.sam
new file mode 100644
index 0000000..b38652a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAATAGTC.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAATAGTC LB:LN_CAATAGTC PL:illumina PU:HiMom.1.CAATAGTC CN:BI
+HiMom:1:1101:1316:2126 77 * 0 0 * * 0 0 .AAAAAAAAAAAAAAAAAAAAAAAA #1BDFFFFHHHHHJJJJFDDDDDDD RG:Z:HiMom.1 XN:i:1
+HiMom:1:1101:1316:2126 141 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTT FFFFHHHHHJJJJHFDDDDDD RG:Z:HiMom.1
+HiMom:1:1101:1399:2128 77 * 0 0 * * 0 0 .TGCCCTTCGTCCTGGGAAACGGGG #1BDFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1399:2128 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC FFFFHHHHHIJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1054:2151 77 * 0 0 * * 0 0 .TAGTGCTGGGCACTAAGTAATACC #4=DDDFFHHHHHJJJJJHIJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1054:2151 141 * 0 0 * * 0 0 GGCACTGAGAATATATGGGTG FFFFHHHHHJJJJJJJJJJEG RG:Z:HiMom.1
+HiMom:1:1201:1345:2181 77 * 0 0 * * 0 0 GGATAATCCTATTTATTACCTCAGA BBBDDFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1
+HiMom:1:1201:1345:2181 141 * 0 0 * * 0 0 GGATGTGTTTAGGAGTGGGAC FFFFHHHHHIIJJHJFHIJIJ RG:Z:HiMom.1
+HiMom:1:1201:1392:2184 77 * 0 0 * * 0 0 TTTCAGATTGGTCATTGTTAGTGTA ??@BDDDEHBHADHHIIEHDHFHFF RG:Z:HiMom.1
+HiMom:1:1201:1392:2184 141 * 0 0 * * 0 0 TTATTCATTTGTATGATCTTA FFFFHFFHFHIHIIJIJJJJI RG:Z:HiMom.1
+HiMom:1:2101:1172:2152 589 * 0 0 * * 0 0 AACACGGACAAAGGAGTCTAACACG <<<??8@@################# RG:Z:HiMom.1
+HiMom:1:2101:1172:2152 653 * 0 0 * * 0 0 TTTCTGGGGACTAGTGAGGCG ##################### RG:Z:HiMom.1
+HiMom:1:2101:1491:2093 77 * 0 0 * * 0 0 .CTATGCCGATCGGGTGTCCGCACT #1=DDDDDHHFHHIIEHHHBGHGII RG:Z:HiMom.1
+HiMom:1:2101:1491:2093 141 * 0 0 * * 0 0 ACGGGGTCTCGCTATGTTGCC FFFFHHHHHJIIJJJJIJIJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAGCGGAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAGCGGAT.sam
new file mode 100644
index 0000000..c9d3cfe
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAGCGGAT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAGCGGAT LB:LN_CAGCGGAT PL:illumina PU:HiMom.1.CAGCGGAT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAGCGGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAGCGGTA.sam
new file mode 100644
index 0000000..66858bc
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CAGCGGTA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAGCGGTA LB:LN_CAGCGGTA PL:illumina PU:HiMom.1.CAGCGGTA CN:BI
+HiMom:1:1101:1420:2213 77 * 0 0 * * 0 0 TACCTGGTTGATCCTGCCAGTAGCA @@CFFFFDDHHGHJGGHIJJIHGBH RG:Z:HiMom.1
+HiMom:1:1101:1420:2213 141 * 0 0 * * 0 0 CTGTACCGGCCGTGCGTACTT FFFDHHHFGIJJJJJJGHIGG RG:Z:HiMom.1
+HiMom:1:1201:1364:2113 77 * 0 0 * * 0 0 .CACTCATTTTCTTATGTGGGATAT #1=DDFDFHHHHHIJJIFHIIHHHI RG:Z:HiMom.1
+HiMom:1:1201:1364:2113 141 * 0 0 * * 0 0 GAGAGCCAGTGGAGTTACGAC ##################### RG:Z:HiMom.1
+HiMom:1:2101:1072:2170 77 * 0 0 * * 0 0 ATCACCGCACTCATTTCCCGCTTCC CCCFFFFFHHHACEEGHIIBHIIII RG:Z:HiMom.1
+HiMom:1:2101:1072:2170 141 * 0 0 * * 0 0 GAGACAGAGAGGATCAGAAGT DDFDHHDFHEGFEGGIJIIIG RG:Z:HiMom.1
+HiMom:1:2101:1123:2095 77 * 0 0 * * 0 0 .TGGACAACATGTTCGAGAGCTACA #1=BBDDDFFFFDGFGIG?F;HHFI RG:Z:HiMom.1
+HiMom:1:2101:1123:2095 141 * 0 0 * * 0 0 CCTCCAGCTTCAGCTTCTCCT DDFFHHHHHJHGGJIJJJEHH RG:Z:HiMom.1
+HiMom:1:2101:1151:2236 589 * 0 0 * * 0 0 TTAAAGAGGTTCAGGGATGCAGAGT ######################### RG:Z:HiMom.1
+HiMom:1:2101:1151:2236 653 * 0 0 * * 0 0 AAGCCTCTTTATCCTTGGCAT ##################### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CCAACATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CCAACATT.sam
new file mode 100644
index 0000000..6920fc9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CCAACATT.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCAACATT LB:LN_CCAACATT PL:illumina PU:HiMom.1.CCAACATT CN:BI
+HiMom:1:1101:1083:2193 77 * 0 0 * * 0 0 TTCTACCTCACCTTAGGGAGAAGAC @@@DDBDDD>F><C<4CG?EHGHIG RG:Z:HiMom.1
+HiMom:1:1101:1083:2193 141 * 0 0 * * 0 0 T.................... ##################### RG:Z:HiMom.1
+HiMom:1:1101:1175:2197 77 * 0 0 * * 0 0 CCCCTGAGGACACCATCCCACTCCA CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1175:2197 141 * 0 0 * * 0 0 GCTGGGGAACATCCAGAAAGG FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1138:2227 589 * 0 0 * * 0 0 GCTGACACAATCTCTTCCGCCTGGT ######################### RG:Z:HiMom.1
+HiMom:1:1201:1138:2227 653 * 0 0 * * 0 0 AATATAGGAAATAGAAGCTAT =AAA,2?4>7C<<4<A+3<AB RG:Z:HiMom.1
+HiMom:1:1201:1260:2165 77 * 0 0 * * 0 0 GGACACGGACAGGATTGACAGATTG BCBFFFFFHHHHHHIIJHIIIFHIJ RG:Z:HiMom.1
+HiMom:1:1201:1260:2165 141 * 0 0 * * 0 0 GATCTAAGTTGGGGGACGCCG FDFFHHHHHJJJIJIIIGIJJ RG:Z:HiMom.1
+HiMom:1:1201:1281:2133 77 * 0 0 * * 0 0 .GGAAATCCAGAAAACATAGAAGAT #1=DDFFFHHHHHIJJJJJJJJIJJ RG:Z:HiMom.1
+HiMom:1:1201:1281:2133 141 * 0 0 * * 0 0 CAAAATTTCATATGACTTAGC FFFFHHHHHJJIIIHICHIIJ RG:Z:HiMom.1
+HiMom:1:1201:1331:2162 77 * 0 0 * * 0 0 ACGCTCGGCTAATTTTTGTATTTTT @CCFFFDFHHHHHIJJJJHIJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1331:2162 141 * 0 0 * * 0 0 CCCAGTACTTTGGGAGGCCAA FFFFHHHHHJJJJIJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1186:2093 77 * 0 0 * * 0 0 .CGACCATAAACGATGCCGACCGGC #4=DFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1186:2093 141 * 0 0 * * 0 0 TTGGGAGGACAATGATGGAAA ##################### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CCAGCACC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CCAGCACC.sam
new file mode 100644
index 0000000..491fec1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CCAGCACC.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCAGCACC LB:LN_CCAGCACC PL:illumina PU:HiMom.1.CCAGCACC CN:BI
+HiMom:1:1101:1212:2230 77 * 0 0 * * 0 0 TTTCTATTAGCTCTTAGTAAGATTA CCCFFFFFHHHHHJJJIJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1212:2230 141 * 0 0 * * 0 0 AGCTTTATTGGGGAGGGGGTG FFFFHHGHHJJJJGJJJJJDF RG:Z:HiMom.1
+HiMom:1:1201:1204:2228 77 * 0 0 * * 0 0 CCGATACGCTGAGTGTGGTTTGCGG CCCFFFFFHHHFHEGGHIHIJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1204:2228 141 * 0 0 * * 0 0 CTTGTCGATGAGGAACTTGGT FFFFDHHGHJIJJGHIIJJJH RG:Z:HiMom.1
+HiMom:1:2101:1100:2085 77 * 0 0 * * 0 0 .CACATGGATGAGGAGAATGAGGAT #1=DDFFFFHHHHJHIGIHHHIJEH RG:Z:HiMom.1
+HiMom:1:2101:1100:2085 141 * 0 0 * * 0 0 TGATCTCCTCCTTCTTGGCCT DDDDHHFHFEIIIIHHBAHBG RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CCATGCGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CCATGCGT.sam
new file mode 100644
index 0000000..29ae6d9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CCATGCGT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCATGCGT LB:LN_CCATGCGT PL:illumina PU:HiMom.1.CCATGCGT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CGCCTTCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CGCCTTCC.sam
new file mode 100644
index 0000000..4c34132
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CGCCTTCC.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CGCCTTCC LB:LN_CGCCTTCC PL:illumina PU:HiMom.1.CGCCTTCC CN:BI
+HiMom:1:1201:1122:2227 77 * 0 0 * * 0 0 AGAAGACGAGGCTGAGAGTGACATC @@@FFFFFHHHDHJGHGHCHHJJIJ RG:Z:HiMom.1
+HiMom:1:1201:1122:2227 141 * 0 0 * * 0 0 TATAAGGCCCAGTCCAAGGAA FFFFHHHGGIJIGGIJFIJII RG:Z:HiMom.1
+HiMom:1:1201:1160:2109 77 * 0 0 * * 0 0 .AGAAGCCTTTGCACCCTGGGAGGA #1=DDDFFHHHHHJJJJJJJJIIJJ RG:Z:HiMom.1
+HiMom:1:1201:1160:2109 141 * 0 0 * * 0 0 CCTTCCCATGCCACCAACTCG FFFFGHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CGCTATGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CGCTATGT.sam
new file mode 100644
index 0000000..24fd5f0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CGCTATGT.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CGCTATGT LB:LN_CGCTATGT PL:illumina PU:HiMom.1.CGCTATGT CN:BI
+HiMom:1:1101:1291:2150 77 * 0 0 * * 0 0 CGTGGGGAACCTGGCGCTAAACCAT @BBFFFFFHHHHHJJJJIJJJJJIJ RG:Z:HiMom.1
+HiMom:1:1101:1291:2150 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC FFFFHHFHHIJJJIIIGIJIJ RG:Z:HiMom.1
+HiMom:1:1101:1314:2233 77 * 0 0 * * 0 0 GTTTATTGGGGCATTCCTTATCCCA @??DDDDBDHF>FCHGGGBFAAED9 RG:Z:HiMom.1
+HiMom:1:1101:1314:2233 141 * 0 0 * * 0 0 AAGTTGGGCTGACCTGACAGA DD;=FBFADBCGDEH?F;FCG RG:Z:HiMom.1
+HiMom:1:1101:1441:2148 77 * 0 0 * * 0 0 ACTTTCACCGCTACACGACCGGGGG CCCFFFFFHGFFHIIFIHJIGGII> RG:Z:HiMom.1
+HiMom:1:1101:1441:2148 141 * 0 0 * * 0 0 GGCTCTAGAGGGGGTAGAGGG FFFFHHDFBHIIJJ1?FGHIJ RG:Z:HiMom.1
+HiMom:1:1201:1043:2246 77 * 0 0 * * 0 0 .TTCTCGGCTGTCATGTGCAACATT #1=DDBDFHHHDFFBHGHGHIIJEH RG:Z:HiMom.1
+HiMom:1:1201:1043:2246 141 * 0 0 * * 0 0 TCATTTC..GCTTCTCTCTGT @??@=@>##22=;@??><@?? RG:Z:HiMom.1
+HiMom:1:1201:1134:2144 77 * 0 0 * * 0 0 TGCCAGGAAGTGTTTTTTCTGGGTC @CCFFEFFHHFFFGIJJJJJJJJGH RG:Z:HiMom.1
+HiMom:1:1201:1134:2144 141 * 0 0 * * 0 0 TGAGTAATGGTTGAGAGGTGG DDFFFHHGHJHHGFIHHIFGI RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTAACTCG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTAACTCG.sam
new file mode 100644
index 0000000..ef8e179
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTAACTCG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTAACTCG LB:LN_CTAACTCG PL:illumina PU:HiMom.1.CTAACTCG CN:BI
+HiMom:1:1101:1363:2138 77 * 0 0 * * 0 0 .GTCTGGCCTGCACAGACATCCTAC #1=DDFFFHHHHHJJJIJJIJJJIJ RG:Z:HiMom.1
+HiMom:1:1101:1363:2138 141 * 0 0 * * 0 0 TTAAACCTGTTAGAACTTCTG FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1393:2143 77 * 0 0 * * 0 0 TGGTTGATCCTGCCAGTAGCATATG @@@ADADDFHFFDBHE?G at HIIIEE RG:Z:HiMom.1
+HiMom:1:1201:1393:2143 141 * 0 0 * * 0 0 AATGCACGCATCCCCCCCGCG FFFFGGHHHHJJJJJJJJJJI RG:Z:HiMom.1
+HiMom:1:2101:1273:2119 589 * 0 0 * * 0 0 .AGATAAGAGTCCACACAGTTGAGT #11AAAAA<A?4=C=7?733<ACA3 RG:Z:HiMom.1
+HiMom:1:2101:1273:2119 653 * 0 0 * * 0 0 TGGATCTTCTCTAACTTGTCA AAAAA+2AA?CB4@@ABB3?A RG:Z:HiMom.1
+HiMom:1:2101:1414:2098 77 * 0 0 * * 0 0 .AGGACATCGATAAAGGCGAGGTGT #1=DDFFFHHHHHJJJJJJJJJHHG RG:Z:HiMom.1
+HiMom:1:2101:1414:2098 141 * 0 0 * * 0 0 GGCCGGTGCCGTCGGGCCCAA FFFFHHHHGJJIJJJJJJJIJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTATGCGC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTATGCGC.sam
new file mode 100644
index 0000000..2a5a21c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTATGCGC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTATGCGC LB:LN_CTATGCGC PL:illumina PU:HiMom.1.CTATGCGC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTATGCGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTATGCGT.sam
new file mode 100644
index 0000000..4cc9d7b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTATGCGT.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTATGCGT LB:LN_CTATGCGT PL:illumina PU:HiMom.1.CTATGCGT CN:BI
+HiMom:1:1201:1083:2121 77 * 0 0 * * 0 0 .AGAACTGGCGCTGCGGGATGAACC #1=BDFFFHHHHHJJJJJHIJIJJJ RG:Z:HiMom.1
+HiMom:1:1201:1083:2121 141 * 0 0 * * 0 0 ACAACACCACCGCCCTCCCCC ##################### RG:Z:HiMom.1
+HiMom:1:1201:1185:2143 77 * 0 0 * * 0 0 ATCTGCCTGGTTCGGCCCGCCTGCC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1185:2143 141 * 0 0 * * 0 0 AAGGCCCGTGGGCCAGAGGTG FFFFHHHHHJJJJJJJJJJHI RG:Z:HiMom.1
+HiMom:1:1201:1219:2115 77 * 0 0 * * 0 0 .TATAGTGGAGGCCGGAGCAGGAAC #1:DABADHHHFHIIIGGHGIIIII RG:Z:HiMom.1
+HiMom:1:1201:1219:2115 141 * 0 0 * * 0 0 AGTAGTTCCCTGCTAAGGGAG BDBDADDDDIEID:AFFD:?8 RG:Z:HiMom.1
+HiMom:1:1201:1472:2121 589 * 0 0 * * 0 0 .TAAAGTGTGAACAAGGAAGGTCAT #07>@<9=@################ RG:Z:HiMom.1
+HiMom:1:1201:1472:2121 653 * 0 0 * * 0 0 GCTCTTCCGATCTGGAGGATG ?A4A==A at 7A<?######### RG:Z:HiMom.1
+HiMom:1:2101:1013:2146 77 * 0 0 * * 0 0 .ACACTGCTGCAGATGACAAGCAGC #4BDFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1013:2146 141 * 0 0 * * 0 0 CGCTAGAACCAACTTATTCAT 24=?@@?@?@@?@@@@@@?@@ RG:Z:HiMom.1
+HiMom:1:2101:1231:2208 77 * 0 0 * * 0 0 ACGCCGCAAGTCAGAGCCCCCCAGA @@@DDDFFFFB:DBBEBEFDHBDDB RG:Z:HiMom.1
+HiMom:1:2101:1231:2208 141 * 0 0 * * 0 0 AGTGTTGGTGTGTTGACTGTT ADABCF;BF<AACGCHEBHC< RG:Z:HiMom.1
+HiMom:1:2101:1233:2133 589 * 0 0 * * 0 0 GAGAGAAGCACTCTTGAGCGGGATA 0;(@((@)2@############### RG:Z:HiMom.1
+HiMom:1:2101:1233:2133 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTT FFFFGHHHHJJJFDDDDDDDD RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTGCGGAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTGCGGAT.sam
new file mode 100644
index 0000000..5d02dc6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTGCGGAT.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTGCGGAT LB:LN_CTGCGGAT PL:illumina PU:HiMom.1.CTGCGGAT CN:BI
+HiMom:1:2101:1102:2221 77 * 0 0 * * 0 0 TTTCATCTTATTTCATTGGTTTATA CCCFFFFFHHHHHJIJJJJIJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1102:2221 141 * 0 0 * * 0 0 CTGACTCTACTCAGTAGATTA FFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1126:2082 77 * 0 0 * * 0 0 .GTTTTAGGGGTGCGCAGGAGTCAA #11=A=DD?DF at D@CCGHIEFH at BG RG:Z:HiMom.1
+HiMom:1:2101:1126:2082 141 * 0 0 * * 0 0 TTTCCACCTTGGTCACCTTCC DDFFHHHHHJEGGIHHIJGIH RG:Z:HiMom.1
+HiMom:1:2101:1216:2172 77 * 0 0 * * 0 0 TTTCTTCGCAGGATTTTTCTGAGCC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1216:2172 141 * 0 0 * * 0 0 TTCTAGGGGATTTAGCGGGGT FFFFHHHHHJJJJJJJJJJJD RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTGTAATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTGTAATC.sam
new file mode 100644
index 0000000..80425df
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/CTGTAATC.sam
@@ -0,0 +1,14 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTGTAATC LB:LN_CTGTAATC PL:illumina PU:HiMom.1.CTGTAATC CN:BI
+HiMom:1:1101:1403:2194 77 * 0 0 * * 0 0 CTAAACAGAGAGAAGGTTTCTCTTT CCCFFFFFHHHHHJJJFHIJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1403:2194 141 * 0 0 * * 0 0 GGTGAAACCCTGTCTCTACTA FFDDHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1045:2105 589 * 0 0 * * 0 0 .TAAAGAGAAATCAAGAATACTATT #-4@?(@)@@############### RG:Z:HiMom.1
+HiMom:1:1201:1045:2105 653 * 0 0 * * 0 0 TTTTTTT..TTTTTTTTTTTT @@@@@@?##0:????????=< RG:Z:HiMom.1
+HiMom:1:1201:1483:2126 589 * 0 0 * * 0 0 .TGATAAGGTGTTGCTATGTTACCC #1:D?DDDDA??2:<CC4:AEDF>? RG:Z:HiMom.1
+HiMom:1:1201:1483:2126 653 * 0 0 * * 0 0 GCAGCTGGGTGCTGTGATGCA DDBB<DD8F<<CGG?AA?A<F RG:Z:HiMom.1
+HiMom:1:2101:1011:2102 77 * 0 0 * * 0 0 .AAACAAAACTGTAGAACTGTGTAT #1=DDFFFHHHHHJJIJJJIHHHJJ RG:Z:HiMom.1
+HiMom:1:2101:1011:2102 141 * 0 0 * * 0 0 .TCACACATAATTTTAAAATT #22@?@@??@@@@@??@@@@@ RG:Z:HiMom.1
+HiMom:1:2101:1245:2154 77 * 0 0 * * 0 0 TCGTTAAGTATATTCTTAGGTATTT CCCFFDFFFHFHHIIJJJJJFJJJI RG:Z:HiMom.1
+HiMom:1:2101:1245:2154 141 * 0 0 * * 0 0 ATCAGTAGCACCACTATACAC FFFFHHHHHJJJJJJIJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1386:2105 77 * 0 0 * * 0 0 .TACTAAAGAAAAAGTTGAAGAACT #1=DDDFFHHHHHJJGHIJJJJIJJ RG:Z:HiMom.1
+HiMom:1:2101:1386:2105 141 * 0 0 * * 0 0 ATTATTCTTCTGCCATAAGGT DFFFHGFHHIJJJJJGIGIJH RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GAAAAAAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GAAAAAAA.sam
new file mode 100644
index 0000000..0bfb1f9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GAAAAAAA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAAAAAAA LB:LN_GAAAAAAA PL:illumina PU:HiMom.1.GAAAAAAA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GAACGAT..sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GAACGAT..sam
new file mode 100644
index 0000000..1636a99
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GAACGAT..sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAACGAT. LB:LN_GAACGAT. PL:illumina PU:HiMom.1.GAACGAT. CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GAAGGAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GAAGGAAG.sam
new file mode 100644
index 0000000..6fe8e66
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GAAGGAAG.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAAGGAAG LB:LN_GAAGGAAG PL:illumina PU:HiMom.1.GAAGGAAG CN:BI
+HiMom:1:1101:1338:2175 77 * 0 0 * * 0 0 CCCACCTTCCGGCGGCCGAAGACAC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1338:2175 141 * 0 0 * * 0 0 GTTGGCTTTAACATCCACAAT FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1028:2202 77 * 0 0 * * 0 0 .TCCTGGGAAACGGGGCGCGGCTGG #4BDDDFFHHHHHIJIIJJJJJJIJ RG:Z:HiMom.1
+HiMom:1:1201:1028:2202 141 * 0 0 * * 0 0 AC.C.T.......GG..TG.. @?################### RG:Z:HiMom.1
+HiMom:1:2101:1084:2188 77 * 0 0 * * 0 0 TTGCTGCATGGGTTAATTGAGAATA CCCFFFFFHHHHFHHIIJJIJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1084:2188 141 * 0 0 * * 0 0 AGGTCAAAATCAGCAACAAGT FFFDHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCAGGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCAGGA.sam
new file mode 100644
index 0000000..c497480
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCAGGA.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCAGGA LB:LN_GACCAGGA PL:illumina PU:HiMom.1.GACCAGGA CN:BI
+HiMom:1:1101:1089:2172 77 * 0 0 * * 0 0 TTCCAGCATGCGGTTTAAGTAGGAT @CCFDFDBDFBF:<CEBHAFHHICH RG:Z:HiMom.1
+HiMom:1:1101:1089:2172 141 * 0 0 * * 0 0 G.................... ?#################### RG:Z:HiMom.1
+HiMom:1:1101:1347:2149 77 * 0 0 * * 0 0 GAGCAGATCGGAAGAGCACAGATCG @@@FFDDDHHHHHIJJBGGHJIHEG RG:Z:HiMom.1
+HiMom:1:1101:1347:2149 141 * 0 0 * * 0 0 TTCCGATCTGTGCTCTTCCGA FFFFDFHHFIJDGIGGHGIGH RG:Z:HiMom.1
+HiMom:1:1201:1095:2146 77 * 0 0 * * 0 0 GCTGAGTCATGTAGTAAGCCTGTGC BB at FDDDFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1095:2146 141 * 0 0 * * 0 0 ACAACACCAAATGCTGCTAAG FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1123:2161 589 * 0 0 * * 0 0 CACTAACTCCTGACCTCAAATAATC ?7?=DD?DD+CDBE>E at EEF@+<CF RG:Z:HiMom.1
+HiMom:1:1201:1123:2161 653 * 0 0 * * 0 0 TGCTCTTCCGATCTGCATACA AAA8AAAA<AAA)@CBA9>A# RG:Z:HiMom.1
+HiMom:1:1201:1439:2156 77 * 0 0 * * 0 0 AGCCGCGAGGTGCTGGCGGACTTCC :;1BDDDAA88A<?<E1C:D##### RG:Z:HiMom.1
+HiMom:1:1201:1439:2156 141 * 0 0 * * 0 0 ATTATTTGCCTTGAAGTAAGC 2<>>@>8@>8;@######### RG:Z:HiMom.1
+HiMom:1:2101:1207:2084 589 * 0 0 * * 0 0 .TAGATGACCAAAACTTGCAGGGCA #1:A<?@A+7A=?CBCCBCCBAAAA RG:Z:HiMom.1
+HiMom:1:2101:1207:2084 653 * 0 0 * * 0 0 CACTCTTCTGGGCATCCCCTG DEDFHHHHHIJIHHGHGGJJJ RG:Z:HiMom.1
+HiMom:1:2101:1312:2105 77 * 0 0 * * 0 0 .TTCCCTCAGGATAGCTGGCGCTCT #1=DDFFFGHGHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:2101:1312:2105 141 * 0 0 * * 0 0 AGAATAGGTTGAGATCGTTTC FFDFHHFHDHIJJJJJJJIJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCAGGC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCAGGC.sam
new file mode 100644
index 0000000..71b9f20
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCAGGC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCAGGC LB:LN_GACCAGGC PL:illumina PU:HiMom.1.GACCAGGC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCGTTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCGTTG.sam
new file mode 100644
index 0000000..f991548
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCGTTG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCGTTG LB:LN_GACCGTTG PL:illumina PU:HiMom.1.GACCGTTG CN:BI
+HiMom:1:1101:1218:2200 77 * 0 0 * * 0 0 GCACCGGAAGAGCACACAGATCGGA CCCFFFFDFHGHHJJIJIJJJJJJI RG:Z:HiMom.1
+HiMom:1:1101:1218:2200 141 * 0 0 * * 0 0 TTCCGATCTATCTGCTCGTCC 34???3;@############# RG:Z:HiMom.1
+HiMom:1:1101:1257:2223 77 * 0 0 * * 0 0 TGTATTCGAGAGATCAAAGAGAGAG @@=DDBDD?FFHHEIDBDFCEDBAF RG:Z:HiMom.1
+HiMom:1:1101:1257:2223 141 * 0 0 * * 0 0 CTTCCGATCTTTTAGCAAAGC DBDDHFFHDGIGIIJJJGGGI RG:Z:HiMom.1
+HiMom:1:1201:1180:2119 77 * 0 0 * * 0 0 .TGAAAGATTTAGAGAGCTTACAAA #1=DDDDDHHHGHJJIIJJJJIJJI RG:Z:HiMom.1
+HiMom:1:1201:1180:2119 141 * 0 0 * * 0 0 TAAATTTTGCTTTTCTACAGC FFFFHHHHHJJJJIJIJJIJJ RG:Z:HiMom.1
+HiMom:1:2101:1036:2087 77 * 0 0 * * 0 0 .TGTAGTTTCTTTAGGCAAATTTGT #4=BDDDFHHHHHJJJJJJIIJJJI RG:Z:HiMom.1
+HiMom:1:2101:1036:2087 141 * 0 0 * * 0 0 CACTTACGAAGCAAATACTTT DFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCTAAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCTAAC.sam
new file mode 100644
index 0000000..bf277d8
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GACCTAAC.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCTAAC LB:LN_GACCTAAC PL:illumina PU:HiMom.1.GACCTAAC CN:BI
+HiMom:1:1101:1302:2244 77 * 0 0 * * 0 0 GGAAAAGACGGAAAGGTTCTATCTC @C at DFFFDFHHHHJIJHHIJJJJJI RG:Z:HiMom.1
+HiMom:1:1101:1302:2244 141 * 0 0 * * 0 0 TACATATAACAAATGCAAAAA FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GATATCCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GATATCCA.sam
new file mode 100644
index 0000000..63d3af6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GATATCCA.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GATATCCA LB:LN_GATATCCA PL:illumina PU:HiMom.1.GATATCCA CN:BI
+HiMom:1:1101:1460:2176 77 * 0 0 * * 0 0 AGTCCAGGCTGAGCCCAGGGAAGAA CCCFFFFFHHHHGJIJJIJJHIJJI RG:Z:HiMom.1
+HiMom:1:1101:1460:2176 141 * 0 0 * * 0 0 AAAAGACACAACAAGTCCAAC ##################### RG:Z:HiMom.1
+HiMom:1:2101:1031:2163 77 * 0 0 * * 0 0 .TTTCCATGGCCGTCACCTTTGGGT #4=DDFFFHHHHHJJJJJJJJJJJI RG:Z:HiMom.1
+HiMom:1:2101:1031:2163 141 * 0 0 * * 0 0 ATTTGTCACCACTAGCCACCA @?@@@@@@@@@@?@@@@@@@? RG:Z:HiMom.1
+HiMom:1:2101:1226:2088 77 * 0 0 * * 0 0 .GATCGGAAGAGCACACGTTTGACT #4=DAA=DDFHFHIIBFGHHIG>EG RG:Z:HiMom.1
+HiMom:1:2101:1226:2088 141 * 0 0 * * 0 0 TTCCGATCTAGGTAATAGCTA DFFFDCDDHFFFAFHDHIJGJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GCCGTCGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GCCGTCGA.sam
new file mode 100644
index 0000000..acdca9a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GCCGTCGA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GCCGTCGA LB:LN_GCCGTCGA PL:illumina PU:HiMom.1.GCCGTCGA CN:BI
+HiMom:1:1101:1111:2148 77 * 0 0 * * 0 0 GTGGAGACCACCTCCGAGGCCTTGT BBCFFFFFHHHHHJJJIJJJJJJJI RG:Z:HiMom.1
+HiMom:1:1101:1111:2148 141 * 0 0 * * 0 0 A.A..........GGACGAC. ##################### RG:Z:HiMom.1
+HiMom:1:1101:1221:2143 77 * 0 0 * * 0 0 TTTGGTGGAAATTTTTTGTTATGAT CCCFFBDBHFD?FBFHIIGGIC at EF RG:Z:HiMom.1
+HiMom:1:1101:1221:2143 141 * 0 0 * * 0 0 TGAATGTCTGCACAGCCGCTT FFFDHHHHHJJJIIIJGHIJJ RG:Z:HiMom.1
+HiMom:1:1101:1327:2200 589 * 0 0 * * 0 0 AGGGGGATCCGCCGGGGGACCACAA ######################### RG:Z:HiMom.1
+HiMom:1:1101:1327:2200 653 * 0 0 * * 0 0 TCTGGGCTGTCGACAGGTGTC FFFFHHHHGIJJJJJJIFHHI RG:Z:HiMom.1
+HiMom:1:2101:1122:2136 77 * 0 0 * * 0 0 GTAGGCGCTCAGCAAATACTTGTCG @@@DDDD8?<CACEHHBBHDAAFH@ RG:Z:HiMom.1
+HiMom:1:2101:1122:2136 141 * 0 0 * * 0 0 CCAGCCTGCAGGCCCCGCGGC BAABDD?DDIID)A:3<EADD RG:Z:HiMom.1
+HiMom:1:2101:1459:2083 77 * 0 0 * * 0 0 .CACACGCCACACGGAGCACACTTT #4=DDFFFHHHHHJJJJJJJJIIJJ RG:Z:HiMom.1
+HiMom:1:2101:1459:2083 141 * 0 0 * * 0 0 CACCAAAATAATCAGAAGGCC FFFDBHGHHIGGIJFJJGGFH RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GCCTAGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GCCTAGCC.sam
new file mode 100644
index 0000000..61ecc6f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GCCTAGCC.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GCCTAGCC LB:LN_GCCTAGCC PL:illumina PU:HiMom.1.GCCTAGCC CN:BI
+HiMom:1:1101:1165:2239 77 * 0 0 * * 0 0 GGCGGAGGCAGCATTTCAGCTGTGA CCCFFDFFHHHHHIJJIGHHHJHHF RG:Z:HiMom.1
+HiMom:1:1101:1165:2239 141 * 0 0 * * 0 0 AAGTCGAGACAGAAGTGAGAA ##################### RG:Z:HiMom.1
+HiMom:1:1101:1290:2225 77 * 0 0 * * 0 0 CTTGGGCGCATGGTGAGGGAGGGAG @@@FFDDFHDFH??CBEBHHIGDCD RG:Z:HiMom.1
+HiMom:1:1101:1290:2225 141 * 0 0 * * 0 0 TTCACTGGCAAAGACAGTCAC BEDDFHFHGIIICEHGDHBHE RG:Z:HiMom.1
+HiMom:1:1201:1280:2179 77 * 0 0 * * 0 0 TTCAAGGAATCGTCCTGCCTCAGCC BCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1280:2179 141 * 0 0 * * 0 0 ACTGCTTGAGTCCAGGAGTTC FDEFGHHHHIFGCHIJJJGGI RG:Z:HiMom.1
+HiMom:1:1201:1300:2137 77 * 0 0 * * 0 0 .TGTAATCCCAGCTCTCAGGGAGGC #1=ADDDDDDDBBA?@AE?E at FE8; RG:Z:HiMom.1
+HiMom:1:1201:1300:2137 141 * 0 0 * * 0 0 TTCCGATCTTTTTTTTAATTT DDDDFDHADEHGIGGED3?FD RG:Z:HiMom.1
+HiMom:1:2101:1023:2237 589 * 0 0 * * 0 0 .TAAACAGCTTCTGCACAGCCAAAG #00@@?>=39>9;<412@?###### RG:Z:HiMom.1
+HiMom:1:2101:1023:2237 653 * 0 0 * * 0 0 TGTTTGAGTTCCTTGTAGATT =@?>?@???@:>?@??>?;?< RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GTAACATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GTAACATC.sam
new file mode 100644
index 0000000..bd83c8a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GTAACATC.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GTAACATC LB:LN_GTAACATC PL:illumina PU:HiMom.1.GTAACATC CN:BI
+HiMom:1:1101:1188:2237 77 * 0 0 * * 0 0 TCCCCCTCCCTTTTGCGCACACACC @?@DDADDHDHBDH<EFHIIHG?HF RG:Z:HiMom.1
+HiMom:1:1101:1188:2237 141 * 0 0 * * 0 0 CCTTCAAGACAGAAGTGAGAA FDDEFHHFFE at FDHHAIAFHG RG:Z:HiMom.1
+HiMom:1:2101:1208:2231 589 * 0 0 * * 0 0 TCACTAAACATCCAAACATCACTTT ######################### RG:Z:HiMom.1
+HiMom:1:2101:1208:2231 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTT FFFFHHHHHJJJHFDDDDDDD RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GTCCACAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GTCCACAG.sam
new file mode 100644
index 0000000..2489bba
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/GTCCACAG.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GTCCACAG LB:LN_GTCCACAG PL:illumina PU:HiMom.1.GTCCACAG CN:BI
+HiMom:1:1101:1069:2159 77 * 0 0 * * 0 0 TCCCTTACCATCAAATCAATTG.CC CCCFFFFFHHHHHJJJJJJJJJ#3A RG:Z:HiMom.1
+HiMom:1:1101:1069:2159 141 * 0 0 * * 0 0 T.................... ?#################### RG:Z:HiMom.1
+HiMom:1:1201:1486:2109 77 * 0 0 * * 0 0 .CACCTCCTAGCCCCTCACTTCTGT #1=B;BDDHHHGFIIIIIIIIIGGG RG:Z:HiMom.1
+HiMom:1:1201:1486:2109 141 * 0 0 * * 0 0 GTGCTCTTCCCGATCTGTATA F?DDFBHHHJJIIDHJIJJJH RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TAAGCACA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TAAGCACA.sam
new file mode 100644
index 0000000..499fd58
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TAAGCACA.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TAAGCACA LB:LN_TAAGCACA PL:illumina PU:HiMom.1.TAAGCACA CN:BI
+HiMom:1:1201:1064:2239 77 * 0 0 * * 0 0 CATGCAGCGCAAGTAGGTCTACAAG @@;DFAFFHHHHAHEGHFDGGFABG RG:Z:HiMom.1
+HiMom:1:1201:1064:2239 141 * 0 0 * * 0 0 TGGGAGGGCGATGAGGACTAG DDDACC:FHHGIH<EGDDDFH RG:Z:HiMom.1
+HiMom:1:2101:1258:2092 77 * 0 0 * * 0 0 .CACACACACACTCATTCACAGCTT #1=DDDFFHHHFHJJIJGGGIIGIJ RG:Z:HiMom.1
+HiMom:1:2101:1258:2092 141 * 0 0 * * 0 0 ACAAAACACCAAAATAAAATA ##################### RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TACCGTCT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TACCGTCT.sam
new file mode 100644
index 0000000..04cf526
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TACCGTCT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TACCGTCT LB:LN_TACCGTCT PL:illumina PU:HiMom.1.TACCGTCT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TAGCGGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TAGCGGTA.sam
new file mode 100644
index 0000000..3455237
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TAGCGGTA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TAGCGGTA LB:LN_TAGCGGTA PL:illumina PU:HiMom.1.TAGCGGTA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCAGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCAGCC.sam
new file mode 100644
index 0000000..5f7c0e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCAGCC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCAGCC LB:LN_TATCAGCC PL:illumina PU:HiMom.1.TATCAGCC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCCAGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCCAGG.sam
new file mode 100644
index 0000000..a99b64d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCCAGG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCCAGG LB:LN_TATCCAGG PL:illumina PU:HiMom.1.TATCCAGG CN:BI
+HiMom:1:1101:1071:2233 77 * 0 0 * * 0 0 TTTGACAGTCTCTGAATGAGAA.GG CCCFFFFFHHHHHJIIIJJJIJ#4A RG:Z:HiMom.1
+HiMom:1:1101:1071:2233 141 * 0 0 * * 0 0 G.................... @#################### RG:Z:HiMom.1
+HiMom:1:1201:1140:2125 77 * 0 0 * * 0 0 .TTTCAGTTCAGAGAACTGCAGAAT #1=DBDFDHHHHGJIJJJJJIIIJI RG:Z:HiMom.1
+HiMom:1:1201:1140:2125 141 * 0 0 * * 0 0 TAAATTGGTCTTAGATGTTGC FFFFHHHHFGIJIIIJIJIJJ RG:Z:HiMom.1
+HiMom:1:1201:1236:2187 77 * 0 0 * * 0 0 TTTAAATGGGTAAGAAGCCCGGCTC @BCDDFEFHHDHHJJJJJIJJIJJJ RG:Z:HiMom.1
+HiMom:1:1201:1236:2187 141 * 0 0 * * 0 0 TTAGCGGATTCCGACTTCCAT FFFDHHHHGIJJIGIGIJJGG RG:Z:HiMom.1
+HiMom:1:2101:1133:2239 77 * 0 0 * * 0 0 AGACAGAAGTACGGGAAGGCGAAGA @@@FFFFEHFHHHJJCGDHIIECD@ RG:Z:HiMom.1
+HiMom:1:2101:1133:2239 141 * 0 0 * * 0 0 TTTTGTTTCCTAGCTTGTCTT DFFFHHHHF4ACFHIJHHHGH RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCCATG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCCATG.sam
new file mode 100644
index 0000000..a5373e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCCATG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCCATG LB:LN_TATCCATG PL:illumina PU:HiMom.1.TATCCATG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCTCGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCTCGG.sam
new file mode 100644
index 0000000..04096c5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCTCGG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCTCGG LB:LN_TATCTCGG PL:illumina PU:HiMom.1.TATCTCGG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCTGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCTGCC.sam
new file mode 100644
index 0000000..993abfa
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TATCTGCC.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCTGCC LB:LN_TATCTGCC PL:illumina PU:HiMom.1.TATCTGCC CN:BI
+HiMom:1:1101:1267:2209 77 * 0 0 * * 0 0 GAGACGGAGGCCAACGGGGGCCTGG @@CFFFFD8FDHFHIGIBG?@BCDG RG:Z:HiMom.1
+HiMom:1:1101:1267:2209 141 * 0 0 * * 0 0 GAGTCTCCAACAGCCCCGTAC DDD?CCFHAIIIGGIIGE at EG RG:Z:HiMom.1
+HiMom:1:1101:1353:2226 77 * 0 0 * * 0 0 TTGCTTGTCTGTAAAGTATTTTATT @C at DDFFDHHFHFHHIBGG>IHHII RG:Z:HiMom.1
+HiMom:1:1101:1353:2226 141 * 0 0 * * 0 0 TCTTCCGATCTTCAGGTTACC FFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1435:2194 77 * 0 0 * * 0 0 GAGAAAGAACATGACTACAGAGATG CCCFFFFFHHHHHJJJJJJJJJHJJ RG:Z:HiMom.1
+HiMom:1:1101:1435:2194 141 * 0 0 * * 0 0 GTTTTCTTTTACTGAAGTGTA FDFFHHHHHJJJJIHIJHHHJ RG:Z:HiMom.1
+HiMom:1:1201:1084:2204 77 * 0 0 * * 0 0 GGCCCGTGGACGCCGCCGAAGAAGC CCCFFFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1084:2204 141 * 0 0 * * 0 0 TCCTCAGGCTCTCATCAGTTG FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1142:2242 77 * 0 0 * * 0 0 TGTTGATAGTCCTTCTTATCTTAGT ???DB?==CC2<AC:CC<CFEF<FF RG:Z:HiMom.1
+HiMom:1:1201:1142:2242 141 * 0 0 * * 0 0 AATGTAAAATAATAAAAAATG DDD;AF<DF<FFFFIIIFF@< RG:Z:HiMom.1
+HiMom:1:1201:1187:2100 77 * 0 0 * * 0 0 .GCGGTAATTCCAGCTCCAATAGCG #1:BB2 at DHHFHHIIIIHHIIGHGG RG:Z:HiMom.1
+HiMom:1:1201:1187:2100 141 * 0 0 * * 0 0 AAGAGCCCGCATTGCCGAGAC AA################### RG:Z:HiMom.1
+HiMom:1:1201:1392:2109 77 * 0 0 * * 0 0 .CTGAAGAGGCCAAAGCGCCCTCCA #1=DDFFFHHHHHJJJJJJJJJJJI RG:Z:HiMom.1
+HiMom:1:1201:1392:2109 141 * 0 0 * * 0 0 GACAGGGGGATTTGGGCTGTG FFFFHHHHHHJJJHIJIJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TCCGTCTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TCCGTCTA.sam
new file mode 100644
index 0000000..8f5dbc6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TCCGTCTA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCCGTCTA LB:LN_TCCGTCTA PL:illumina PU:HiMom.1.TCCGTCTA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TCGCTAGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TCGCTAGA.sam
new file mode 100644
index 0000000..b3ab8a7
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TCGCTAGA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCGCTAGA LB:LN_TCGCTAGA PL:illumina PU:HiMom.1.TCGCTAGA CN:BI
+HiMom:1:1101:1143:2192 77 * 0 0 * * 0 0 GGAGCGAGTCTGGGTCTCAGCCCCG CCCFFFFFHHHHHJGHIIIHJJJJI RG:Z:HiMom.1
+HiMom:1:1101:1143:2192 141 * 0 0 * * 0 0 AAGTCTGGCTTATCACTCATC FFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1479:2221 77 * 0 0 * * 0 0 TGTAAAGTATGCTGGCTCAGTGTAT BBBFDFFEHHHHHJJJJJJJIJHJJ RG:Z:HiMom.1
+HiMom:1:1101:1479:2221 141 * 0 0 * * 0 0 AAATCTATTTTTATGTAAAAA FFFFHHHHHJIGIJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1312:2112 77 * 0 0 * * 0 0 .TCCCAGCGAACCCGCGTGCAACCT #1=DFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1312:2112 141 * 0 0 * * 0 0 GCAGGAGCCGGCGCAGGTGCA FFFFHHHHHJJJIJJJJGHIJ RG:Z:HiMom.1
+HiMom:1:1201:1416:2128 77 * 0 0 * * 0 0 .ACAGGCGTGGAGGAGGCGGCGGCC #4=DDDFFHHHHHJIGJHFHHFFED RG:Z:HiMom.1
+HiMom:1:1201:1416:2128 141 * 0 0 * * 0 0 TGTGGAGGCGGTGGCGGGATC DDDDHHFHHII:?GGHIIB6? RG:Z:HiMom.1
+HiMom:1:2101:1064:2242 77 * 0 0 * * 0 0 ATGAACAAAGGAAGAATTATGCACG ?;?D;DDDF?;:+<<CFFCHE433A RG:Z:HiMom.1
+HiMom:1:2101:1064:2242 141 * 0 0 * * 0 0 AAAAGGTTGTCAAGCGTTAAA (<?################## RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TCTGCAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TCTGCAAG.sam
new file mode 100644
index 0000000..2d5ab39
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TCTGCAAG.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCTGCAAG LB:LN_TCTGCAAG PL:illumina PU:HiMom.1.TCTGCAAG CN:BI
+HiMom:1:1201:1042:2174 77 * 0 0 * * 0 0 .GTTGGTGTCTTCATTTTATGTATA #1=DDFDFHHHHHJIJJJHIJHIJJ RG:Z:HiMom.1
+HiMom:1:1201:1042:2174 141 * 0 0 * * 0 0 GGAAGGC..CAAAAAAAGAAA @@?@?<@##3<@@?@@????? RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGCAAGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGCAAGTA.sam
new file mode 100644
index 0000000..53e3f84
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGCAAGTA.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGCAAGTA LB:LN_TGCAAGTA PL:illumina PU:HiMom.1.TGCAAGTA CN:BI
+HiMom:1:1101:1242:2170 77 * 0 0 * * 0 0 ATGGCAGGGCAGAGTTCTGATGAGT CCCFFFFFHHGGGIFHEIIGIIII? RG:Z:HiMom.1
+HiMom:1:1101:1242:2170 141 * 0 0 * * 0 0 GGAAAAGAAGCACAAGTACAT FDFFHHHGHHGIIGJJEHHIG RG:Z:HiMom.1
+HiMom:1:2101:1163:2222 77 * 0 0 * * 0 0 GAGCAGGCAAGGAGGACTTCTTGTT CCCFFFFFGHHHHJJHHIJJJJJIJ RG:Z:HiMom.1
+HiMom:1:2101:1163:2222 141 * 0 0 * * 0 0 GATAATGGTTCTTTTCCTCAC FFFFHHHHHJJJJJJJIJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGCTGCTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGCTGCTG.sam
new file mode 100644
index 0000000..b9dc051
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGCTGCTG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGCTGCTG LB:LN_TGCTGCTG PL:illumina PU:HiMom.1.TGCTGCTG CN:BI
+HiMom:1:1101:1084:2136 77 * 0 0 * * 0 0 .TCTCACTGTGAATTTGTGGTGGGC #1=DDFFFHHHHHJJJJGIJIJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1084:2136 141 * 0 0 * * 0 0 T.................... @#################### RG:Z:HiMom.1
+HiMom:1:1201:1285:2100 77 * 0 0 * * 0 0 .AATGACATGTTTAAAGATGGACTC #1:BDDFFHHFHHGIJIJIIIIGII RG:Z:HiMom.1
+HiMom:1:1201:1285:2100 141 * 0 0 * * 0 0 TTTTTTGCTTTGTAGTTATAG FFFFHHHHHIIGIABCFFHBF RG:Z:HiMom.1
+HiMom:1:2101:1162:2139 77 * 0 0 * * 0 0 AGAGGTGAAATTCTTGGACCGGCGC @@@DDDDDHFHHHDB:EFHHCAG?D RG:Z:HiMom.1
+HiMom:1:2101:1162:2139 141 * 0 0 * * 0 0 TTTATGGTCGGAACTACGACG FFFFHHHHHIJJJJJJJIJJI RG:Z:HiMom.1
+HiMom:1:2101:1195:2150 77 * 0 0 * * 0 0 CCGAGAGAGTGAGAGCGCTCCTGGG CCCFFFFFHFHHHJJJJIJJJJIJJ RG:Z:HiMom.1
+HiMom:1:2101:1195:2150 141 * 0 0 * * 0 0 GAACTTCACCACCCAGAGGAA FFFFHHHHHJJJJJJIJJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGTAACTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGTAACTC.sam
new file mode 100644
index 0000000..817ba74
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGTAACTC.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGTAACTC LB:LN_TGTAACTC PL:illumina PU:HiMom.1.TGTAACTC CN:BI
+HiMom:1:1201:1421:2154 77 * 0 0 * * 0 0 TGTGTGTGTGGGTGTGTGTATATAT ?@?DDFFFFFHH at GEFCCCHGIGJI RG:Z:HiMom.1
+HiMom:1:1201:1421:2154 141 * 0 0 * * 0 0 CTCTTCCGATCTTGTGCTCTT FFFFHHHHHJJJJFHIHHIJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGTAATCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGTAATCA.sam
new file mode 100644
index 0000000..7c977f3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TGTAATCA.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGTAATCA LB:LN_TGTAATCA PL:illumina PU:HiMom.1.TGTAATCA CN:BI
+HiMom:1:1101:1419:2119 77 * 0 0 * * 0 0 .ATGACTATGGTAACTGAAAGAAAA #1:A1BDADBFFDFIIIEEHECACF RG:Z:HiMom.1
+HiMom:1:1101:1419:2119 141 * 0 0 * * 0 0 TCCTTTTTTGTTTTACTTTAA ##################### RG:Z:HiMom.1
+HiMom:1:1201:1208:2132 77 * 0 0 * * 0 0 .CCTCAATGAGCGGCACTATGGGGG #1=DDFFFHHHHGJJIJJGHIJGIJ RG:Z:HiMom.1
+HiMom:1:1201:1208:2132 141 * 0 0 * * 0 0 AGAAAGGATGGTCGGGCTCCA FFFFGHFHHJIJJGJIBHJJG RG:Z:HiMom.1
+HiMom:1:1201:1344:2147 77 * 0 0 * * 0 0 TATCCTCCCTACTATGCCTAGAAGG =?@DADEFHBHDFG>EFGDHGFGHD RG:Z:HiMom.1
+HiMom:1:1201:1344:2147 141 * 0 0 * * 0 0 TTAGTTTTAGCATTGGAGTAG DDDDFHHHFGGHHIIIGGAGH RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TTGTCTAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TTGTCTAT.sam
new file mode 100644
index 0000000..a122979
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M/templates/TTGTCTAT.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TTGTCTAT LB:LN_TTGTCTAT PL:illumina PU:HiMom.1.TTGTCTAT CN:BI
+HiMom:1:1101:1219:2164 77 * 0 0 * * 0 0 TCAAGCAGGAGCAGCTAAGTCCTAA CCCFFFFFHHHHHJJJJJJHIJJJJ RG:Z:HiMom.1
+HiMom:1:1101:1219:2164 141 * 0 0 * * 0 0 TATCCACTCCTTCCACTTTGG FFFFHHHHHJJIJJJJJJJIJ RG:Z:HiMom.1
+HiMom:1:1201:1103:2184 77 * 0 0 * * 0 0 GTAAGAACTACCCTGGGTCCCCGTG @@BFFFFFHHHHHJJJJGIJJJJHI RG:Z:HiMom.1
+HiMom:1:1201:1103:2184 141 * 0 0 * * 0 0 GTTTCAGAATTGTGGCCCCAT FDEFHHHHHJJJGHIJJJJJI RG:Z:HiMom.1
+HiMom:1:1201:1107:2109 77 * 0 0 * * 0 0 .GGGAACCTGGCGCTAAACCATTCG #1=DFFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1
+HiMom:1:1201:1107:2109 141 * 0 0 * * 0 0 ACCCTTGTGTCGAGGGCTGAC FFFFHHGHHJJJJIIJJJJJJ RG:Z:HiMom.1
+HiMom:1:1201:1252:2141 77 * 0 0 * * 0 0 .TTCCCCCCATGTAATTATTGTGAA #1=DDFFFHHHHHJJJJJJJJIJJJ RG:Z:HiMom.1
+HiMom:1:1201:1252:2141 141 * 0 0 * * 0 0 ATTTTGCCTATGTCCAACAAG FFFFGHHHHJIJJJJJJJJJJ RG:Z:HiMom.1
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AAAAAAAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AAAAAAAA.sam
new file mode 100644
index 0000000..4882d35
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AAAAAAAA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AAAAAAAA LB:LN_AAAAAAAA PL:illumina PU:HiMom.1.AAAAAAAA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AAAAGAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AAAAGAAG.sam
new file mode 100644
index 0000000..6446f79
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AAAAGAAG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AAAAGAAG LB:LN_AAAAGAAG PL:illumina PU:HiMom.1.AAAAGAAG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AACAATGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AACAATGG.sam
new file mode 100644
index 0000000..9c5c143
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AACAATGG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AACAATGG LB:LN_AACAATGG PL:illumina PU:HiMom.1.AACAATGG CN:BI
+HiMom:1:1101:1138:2141 77 * 0 0 * * 0 0 .TTACCAAGGTTTTCTGTTTAGTGA #1=DDFFFHHFHHJJJIHJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCCGATCT
+HiMom:1:1101:1138:2141 141 * 0 0 * * 0 0 GCTTCAGGTCGATCAGA HGHHHJJIGHIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCCGATCT
+HiMom:1:1101:1206:2126 77 * 0 0 * * 0 0 .ATTCTGCCATATTGGTCCGACAGT #1=DDFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATCTGTCC
+HiMom:1:1101:1206:2126 141 * 0 0 * * 0 0 AGTGGTGCACTGAATGT HHHHHHIIJJJJIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATCTGTCC
+HiMom:1:2101:1077:2139 77 * 0 0 * * 0 0 CACAGGCTTCCACGGACTTAACGTC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:#4=DFFFF RX:Z:NATTAGTT
+HiMom:1:2101:1077:2139 141 * 0 0 * * 0 0 GGCGGATGAAGCAGATA HHHHHJJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:#4=DFFFF RX:Z:NATTAGTT
+HiMom:1:2101:1112:2245 77 * 0 0 * * 0 0 TGCCATCTGCTCTGGGAAGCACCAG 1:=DDDDDFBC:DEFIFFFIEF at BE RG:Z:HiMom.1 QX:Z:?8?DBDDD RX:Z:TCGTAGTG
+HiMom:1:2101:1112:2245 141 * 0 0 * * 0 0 TTGTAATTTCGTCTTCT CCFCAACGGFFCBFFAE RG:Z:HiMom.1 QX:Z:?8?DBDDD RX:Z:TCGTAGTG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AACGCATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AACGCATT.sam
new file mode 100644
index 0000000..ab11663
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AACGCATT.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AACGCATT LB:LN_AACGCATT PL:illumina PU:HiMom.1.AACGCATT CN:BI
+HiMom:1:1101:1197:2200 77 * 0 0 * * 0 0 GGGCGCCCCGTGAGGACCCAGTCCT @C at FFADDFFCFCEHIIJIJJIEFC RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:ATATTCCA
+HiMom:1:1101:1197:2200 141 * 0 0 * * 0 0 CTGGAACCACAGAACCC HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:ATATTCCA
+HiMom:1:1101:1308:2153 589 * 0 0 * * 0 0 TTTTGGAAGAGACCTCAATTACTGT ???DDDDD?:22AE:A2<3,AF?3A RG:Z:HiMom.1 QX:Z:1?1=4=== RX:Z:TCTGTAAG
+HiMom:1:1101:1308:2153 653 * 0 0 * * 0 0 GTAATCCCCGCATGTGT AFFDFFGFDGFB at CFB: RG:Z:HiMom.1 QX:Z:1?1=4=== RX:Z:TCTGTAAG
+HiMom:1:1101:1452:2132 77 * 0 0 * * 0 0 .CGTCCTGGAAAACGGGGCGCGGCT #1=BDBDDFHHHHF at FHDHIGIIII RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAAACCC
+HiMom:1:1101:1452:2132 141 * 0 0 * * 0 0 TTGTGTCGAGGGCTGAC HHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAAACCC
+HiMom:1:1201:1150:2161 77 * 0 0 * * 0 0 AAGTCACCTAATATCTTTTTTTTTT @@<??;?D?CFD,A4CDDHFBIIID RG:Z:HiMom.1 QX:Z:@C at FFFFF RX:Z:TTCTCACT
+HiMom:1:1201:1150:2161 141 * 0 0 * * 0 0 ACTGTGATTGTGCCACT GHHHHGIIIICEHCFGH RG:Z:HiMom.1 QX:Z:@C at FFFFF RX:Z:TTCTCACT
+HiMom:1:2101:1240:2197 589 * 0 0 * * 0 0 ATAAAACATAGCAATATTTTCCTAT ######################### RG:Z:HiMom.1 QX:Z:??##+A:D RX:Z:ACTGGAGA
+HiMom:1:2101:1240:2197 653 * 0 0 * * 0 0 TCCTTGTTACATGCCCA D?:ADEE@::C4:C<E: RG:Z:HiMom.1 QX:Z:??##+A:D RX:Z:ACTGGAGA
+HiMom:1:2101:1336:2109 77 * 0 0 * * 0 0 .ACTATCAGGATCGTGGCTATTTTG #1BDDFFFHHHHHJIJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AGACCAGA
+HiMom:1:2101:1336:2109 141 * 0 0 * * 0 0 ACAGCTCCAGGTGCTCC HHHHHJJJJJJCGHIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AGACCAGA
+HiMom:1:2101:1427:2081 77 * 0 0 * * 0 0 .CGAGTGCCTAGTGGGCCACTTTTG #4=DDBDFHHHHFHIJJJJIJJJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCGACTTC
+HiMom:1:2101:1427:2081 141 * 0 0 * * 0 0 CATGGCCACCGTCCTGC HHHHHJJJIIGFIIJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCGACTTC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACAAAATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACAAAATT.sam
new file mode 100644
index 0000000..3cfd422
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACAAAATT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAAAATT LB:LN_ACAAAATT PL:illumina PU:HiMom.1.ACAAAATT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACAGGTAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACAGGTAT.sam
new file mode 100644
index 0000000..27822e1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACAGGTAT.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAGGTAT LB:LN_ACAGGTAT PL:illumina PU:HiMom.1.ACAGGTAT CN:BI
+HiMom:1:1101:1236:2121 77 * 0 0 * * 0 0 .GGTGCTTCATATCCCTCTAGAGGA #1=BDDFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGCGCTT
+HiMom:1:1101:1236:2121 141 * 0 0 * * 0 0 ACTTTGTAGCCTTCATC HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGCGCTT
+HiMom:1:1201:1341:2116 77 * 0 0 * * 0 0 .AGAAGCCCCAGGAGGAAGACAGTC #1=DDFFFHHHHHHHJIIJJJJJGI RG:Z:HiMom.1 QX:Z:######## RX:Z:ATAACAGC
+HiMom:1:1201:1341:2116 141 * 0 0 * * 0 0 GAGACTGGCAACTTAAA ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:ATAACAGC
+HiMom:1:2101:1063:2206 77 * 0 0 * * 0 0 TCCTATTCGCCTACACAATTCTCCG CCCFFFFFHHHHHJJJJJJJHJJJJ RG:Z:HiMom.1 QX:Z:#1=DDDFF RX:Z:NTGCTAGG
+HiMom:1:2101:1063:2206 141 * 0 0 * * 0 0 ATGAGGATGGATAGTAA HHHHHJHIIJHIIIHHJ RG:Z:HiMom.1 QX:Z:#1=DDDFF RX:Z:NTGCTAGG
+HiMom:1:2101:1325:2083 77 * 0 0 * * 0 0 .CAGAAGAAAGGGCCTTGTCGGAGG #1=DDDDDHHFHDGI at EEHG:?FA8 RG:Z:HiMom.1 QX:Z:######## RX:Z:TGTGCTCT
+HiMom:1:2101:1325:2083 141 * 0 0 * * 0 0 TCCGATCTGGAGAAAAA =@############### RG:Z:HiMom.1 QX:Z:######## RX:Z:TGTGCTCT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACAGTTGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACAGTTGA.sam
new file mode 100644
index 0000000..89464a6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACAGTTGA.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACAGTTGA LB:LN_ACAGTTGA PL:illumina PU:HiMom.1.ACAGTTGA CN:BI
+HiMom:1:2101:1048:2238 77 * 0 0 * * 0 0 .CTGCCGTGTCCTGACTTCTGGAAT #1:B?ADDACF<DCG;EG<FHH at CE RG:Z:HiMom.1 QX:Z:#11ADDDB RX:Z:NGTCACAT
+HiMom:1:2101:1048:2238 141 * 0 0 * * 0 0 CGTTGAAGCACTGGATC <CFFHCHGDBHGIIIII RG:Z:HiMom.1 QX:Z:#11ADDDB RX:Z:NGTCACAT
+HiMom:1:2101:1216:2193 77 * 0 0 * * 0 0 TTTTCTTGGCCTCTGTTTTTTTTTT BCCFDFFFHHFFHJIGIJJJJJJJJ RG:Z:HiMom.1 QX:Z:@@@DDDDD RX:Z:AGGCATGA
+HiMom:1:2101:1216:2193 141 * 0 0 * * 0 0 CACTGCATTTTAAATAC HFFHHGGDFHFHIIHGG RG:Z:HiMom.1 QX:Z:@@@DDDDD RX:Z:AGGCATGA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACCAGTTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACCAGTTG.sam
new file mode 100644
index 0000000..0df9571
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACCAGTTG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACCAGTTG LB:LN_ACCAGTTG PL:illumina PU:HiMom.1.ACCAGTTG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACGAAATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACGAAATC.sam
new file mode 100644
index 0000000..1fbb7b4
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACGAAATC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACGAAATC LB:LN_ACGAAATC PL:illumina PU:HiMom.1.ACGAAATC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACTAAGAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACTAAGAC.sam
new file mode 100644
index 0000000..1321f50
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACTAAGAC.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTAAGAC LB:LN_ACTAAGAC PL:illumina PU:HiMom.1.ACTAAGAC CN:BI
+HiMom:1:1101:1259:2152 77 * 0 0 * * 0 0 CACCTATAATCCCAGCTACTCCAGA CCCFFFFFHHHHHJJJJJJIJJJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATTTTTAT
+HiMom:1:1101:1259:2152 141 * 0 0 * * 0 0 ATTTTTTTAGACATAGG GHHHHJJJJIGIIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATTTTTAT
+HiMom:1:1101:1261:2127 589 * 0 0 * * 0 0 .TGAAATCTGGATAGGCTGGAGTTA #0-@@@################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTTTTTT
+HiMom:1:1101:1261:2127 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTT HGHHHJJIFDDDDDDDD RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTTTTTT
+HiMom:1:2101:1021:2209 77 * 0 0 * * 0 0 .GGCCCCACCCTCCTCCAGCACGTC #1=DDFFFHHHHHJJJJJJHIIHFH RG:Z:HiMom.1 QX:Z:####@>?? RX:Z:NNGGAAGG
+HiMom:1:2101:1021:2209 141 * 0 0 * * 0 0 CTGCTAGCTGGCCAGAG @@??@?????????>?@ RG:Z:HiMom.1 QX:Z:####@>?? RX:Z:NNGGAAGG
+HiMom:1:2101:1262:2128 589 * 0 0 * * 0 0 AGCAGAAGGGCAAAAGCTGGCTTGA 9;<@:@################### RG:Z:HiMom.1 QX:Z:####-9@; RX:Z:TCTTGTGG
+HiMom:1:2101:1262:2128 653 * 0 0 * * 0 0 TAACTTTTCTGACACCT @?:8>?4:>?@###### RG:Z:HiMom.1 QX:Z:####-9@; RX:Z:TCTTGTGG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACTGTACC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACTGTACC.sam
new file mode 100644
index 0000000..bcf9179
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACTGTACC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTGTACC LB:LN_ACTGTACC PL:illumina PU:HiMom.1.ACTGTACC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACTGTATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACTGTATC.sam
new file mode 100644
index 0000000..dc583f2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ACTGTATC.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ACTGTATC LB:LN_ACTGTATC PL:illumina PU:HiMom.1.ACTGTATC CN:BI
+HiMom:1:1201:1458:2109 77 * 0 0 * * 0 0 .GAGACCATAGAGCGGATGCTTTCA #1=DDDFFHHGHGIJJIGIIJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GATACGAA
+HiMom:1:1201:1458:2109 141 * 0 0 * * 0 0 CACACAAGAACTTTTTT HHHHHJJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GATACGAA
+HiMom:1:2101:1105:2131 77 * 0 0 * * 0 0 TTGGAACACAGCGGGAATCACAGCA CCCFFFFFHHHHHJIJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CAGCAGCA
+HiMom:1:2101:1105:2131 141 * 0 0 * * 0 0 GCAACAGCAGAAACATG HHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CAGCAGCA
+HiMom:1:2101:1349:2084 77 * 0 0 * * 0 0 .CAAGTAGCAGTGTCACGCCTTAGC #1=DDBDDADFDDBEH at HC=CEGG@ RG:Z:HiMom.1 QX:Z:<5;??=>= RX:Z:AGTCTGAA
+HiMom:1:2101:1349:2084 141 * 0 0 * * 0 0 TCATTGGTGTCTGAAGA >>?############## RG:Z:HiMom.1 QX:Z:<5;??=>= RX:Z:AGTCTGAA
+HiMom:1:2101:1365:2094 77 * 0 0 * * 0 0 .AAGGTGAAGGCCGGCGCGCTCGCC #1=BDDDFFHHHHJGGGIGFIHIIJ RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:GCTCTTCC
+HiMom:1:2101:1365:2094 141 * 0 0 * * 0 0 GATCTTGTGCTCTTCCG HFHHGJJIIJIJJIHII RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:GCTCTTCC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGAAAAGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGAAAAGA.sam
new file mode 100644
index 0000000..d40aec2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGAAAAGA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGAAAAGA LB:LN_AGAAAAGA PL:illumina PU:HiMom.1.AGAAAAGA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGCATGGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGCATGGA.sam
new file mode 100644
index 0000000..e77d45d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGCATGGA.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGCATGGA LB:LN_AGCATGGA PL:illumina PU:HiMom.1.AGCATGGA CN:BI
+HiMom:1:1101:1406:2222 77 * 0 0 * * 0 0 CTCCCCCCGGGCTGAACCAGGGTAC CCCFFDDDDHDFHIIIIIIIII9DG RG:Z:HiMom.1 QX:Z:;?@DDDBD RX:Z:GGCTGGAC
+HiMom:1:1101:1406:2222 141 * 0 0 * * 0 0 TCCCCTGGTTCTGGGCA ?FHDFGIIIGIGHHIII RG:Z:HiMom.1 QX:Z:;?@DDDBD RX:Z:GGCTGGAC
+HiMom:1:1201:1291:2158 77 * 0 0 * * 0 0 AGAAGGGGAAAGCCTTCATCTTGGC BCBFFFFFHHHHHJJJJJIIFIJIJ RG:Z:HiMom.1 QX:Z:@CCFFFDD RX:Z:CGTGTGCT
+HiMom:1:1201:1291:2158 141 * 0 0 * * 0 0 CTTCCGATCTGATGGGC ?FHHFGEHHIIDHIIII RG:Z:HiMom.1 QX:Z:@CCFFFDD RX:Z:CGTGTGCT
+HiMom:1:2101:1370:2116 77 * 0 0 * * 0 0 .TGGTGGTCCATAGAGATTTGAAAC #1:4BD7DACF?FCA:4+<ACHIIH RG:Z:HiMom.1 QX:Z:@@@DFFFD RX:Z:CACCATCT
+HiMom:1:2101:1370:2116 141 * 0 0 * * 0 0 GACATCATGTTTGAAAG FFHDHIGBHHII<HEDB RG:Z:HiMom.1 QX:Z:@@@DFFFD RX:Z:CACCATCT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGGTAAGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGGTAAGG.sam
new file mode 100644
index 0000000..3baedd3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGGTAAGG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGGTAAGG LB:LN_AGGTAAGG PL:illumina PU:HiMom.1.AGGTAAGG CN:BI
+HiMom:1:1101:1263:2236 589 * 0 0 * * 0 0 CTTTGAAGACATTGTGAGATCTGTA <==A<42 at C+A4A?,2A@=4 at 7A?? RG:Z:HiMom.1 QX:Z:((0@#### RX:Z:AGTTCTTC
+HiMom:1:1101:1263:2236 653 * 0 0 * * 0 0 AGTAATTTTAGTACTGC ################# RG:Z:HiMom.1 QX:Z:((0@#### RX:Z:AGTTCTTC
+HiMom:1:2101:1054:2162 77 * 0 0 * * 0 0 .CCAGGTGTCTTCCCGGGCCCTGCC #1=DDFBDFHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:#4=DDFDF RX:Z:NGGACAGG
+HiMom:1:2101:1054:2162 141 * 0 0 * * 0 0 GAAGGGAAGGAAGGGTG HHHHHJIJIIDHHGICG RG:Z:HiMom.1 QX:Z:#4=DDFDF RX:Z:NGGACAGG
+HiMom:1:2101:1163:2203 77 * 0 0 * * 0 0 TCTCCATGTGAAACAAGCAAAAAGA CCCFFFFFHHHHGJJJIJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCFDFFF RX:Z:TTGGTTCA
+HiMom:1:2101:1163:2203 141 * 0 0 * * 0 0 CTTATGTATTTATGAAT HHHHHJHIIJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCFDFFF RX:Z:TTGGTTCA
+HiMom:1:2101:1249:2231 77 * 0 0 * * 0 0 GTTATTGATAGGATACTGTACAAAC @BCFFFFDHHHHFIJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:TCTCTCGG
+HiMom:1:2101:1249:2231 141 * 0 0 * * 0 0 CCTTCCACTCTAGCATA FHHGHIJJJGJIIJHIJ RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:TCTCTCGG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGGTCGCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGGTCGCA.sam
new file mode 100644
index 0000000..889a28c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/AGGTCGCA.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_AGGTCGCA LB:LN_AGGTCGCA PL:illumina PU:HiMom.1.AGGTCGCA CN:BI
+HiMom:1:1101:1150:2228 77 * 0 0 * * 0 0 GCTACTCAGTAGACAGTCCCACCCT @@CADDDDFCFHHIIIIGGIIGGGI RG:Z:HiMom.1 QX:Z:8?=DD8;@ RX:Z:ATGGGAGG
+HiMom:1:1101:1150:2228 141 * 0 0 * * 0 0 CGATTCCTAGGGGGTTG BH6DHD<FGGGEIGHIG RG:Z:HiMom.1 QX:Z:8?=DD8;@ RX:Z:ATGGGAGG
+HiMom:1:1101:1491:2120 77 * 0 0 * * 0 0 .GGCAGGTGCCCCCACTTGACTCTC #1?DFFFFGHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GGCCAGGC
+HiMom:1:1101:1491:2120 141 * 0 0 * * 0 0 TGAACTTCTGAGCTGCT HHHGHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GGCCAGGC
+HiMom:1:1201:1190:2194 77 * 0 0 * * 0 0 AACCTGGCGCTAAACCATTCGTAGA CCCFFFFFHHHHHJJJJJJJJIJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAAACCC
+HiMom:1:1201:1190:2194 141 * 0 0 * * 0 0 TTGTGTCGAGGGCTGAC HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAAACCC
+HiMom:1:2101:1188:2195 77 * 0 0 * * 0 0 TTAGACCGTCGTGAGACAGGTTAGT @CCFFFFFHHHHHJJJJJIIEHIJH RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCACATAC
+HiMom:1:2101:1188:2195 141 * 0 0 * * 0 0 ACCAAATGTCTGAACCT HHHHHJJJHIJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCACATAC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ATTATCAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ATTATCAA.sam
new file mode 100644
index 0000000..d9e04b1
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ATTATCAA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ATTATCAA LB:LN_ATTATCAA PL:illumina PU:HiMom.1.ATTATCAA CN:BI
+HiMom:1:1101:1100:2207 77 * 0 0 * * 0 0 ACGACAGACGTTCTTTCTTTGCTGC CCCFFFFFHHFHHJIJJJJJHIJJH RG:Z:HiMom.1 QX:Z:######## RX:Z:AGGCTNNN
+HiMom:1:1101:1100:2207 141 * 0 0 * * 0 0 .........G....... ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:AGGCTNNN
+HiMom:1:1101:1157:2135 77 * 0 0 * * 0 0 .GGACATTGTAATCATTTCTTACAA #1=DD?DDHHHHHGGHIIIIIIIII RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTAAAGT
+HiMom:1:1101:1157:2135 141 * 0 0 * * 0 0 CTTAATCAAAGATGATA HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTAAAGT
+HiMom:1:1101:1269:2170 77 * 0 0 * * 0 0 ACAGTGTGGGAGGCAGACGAAGAGA @@@DDDDDFA:C at EGA?FD<FFHII RG:Z:HiMom.1 QX:Z:@@<ADBDB RX:Z:TTCCAAGC
+HiMom:1:1101:1269:2170 141 * 0 0 * * 0 0 CTGTGCTTTAAGGAAAA DF8DDCFH at GIE@@GGH RG:Z:HiMom.1 QX:Z:@@<ADBDB RX:Z:TTCCAAGC
+HiMom:1:1201:1018:2217 589 * 0 0 * * 0 0 .TTTCTCTGGGCGCAAAGATGTTCA #07;8=8<<99(:=@@/@7>>6=?> RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1201:1018:2217 653 * 0 0 * * 0 0 ................. ################# RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1201:1118:2198 77 * 0 0 * * 0 0 CAAGTGTACAGGATTAGACTGGGTT BCCFDEBDHHHHHIJJJGIIIJJGH RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:AATAAACT
+HiMom:1:1201:1118:2198 141 * 0 0 * * 0 0 TTATTAAAGCAGTTAAA HDHHHGIIIJJJIJJJJ RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:AATAAACT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ATTCCTCT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ATTCCTCT.sam
new file mode 100644
index 0000000..56dafa3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/ATTCCTCT.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_ATTCCTCT LB:LN_ATTCCTCT PL:illumina PU:HiMom.1.ATTCCTCT CN:BI
+HiMom:1:1101:1309:2210 77 * 0 0 * * 0 0 ACACCAACCACCCAACTATCTATAA CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@@?DFFDF RX:Z:AGTGGGCT
+HiMom:1:1101:1309:2210 141 * 0 0 * * 0 0 AGGGCATTTTTAATCTT HHHDFHJIJJIJGIIIJ RG:Z:HiMom.1 QX:Z:@@?DFFDF RX:Z:AGTGGGCT
+HiMom:1:1201:1018:2133 77 * 0 0 * * 0 0 .AAAACTTGAGGATGCTATGCAAGC #1:B:ADDDDDDDEEAEBF9FFEBF RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1201:1018:2133 141 * 0 0 * * 0 0 ................. ################# RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1201:1073:2225 77 * 0 0 * * 0 0 GGGGCTGAGACCTTTGCTGATGGTG @@@FFFFFHHHGHJJJJJIIIGICH RG:Z:HiMom.1 QX:Z:@BBDFFFF RX:Z:CGTGTGCT
+HiMom:1:1201:1073:2225 141 * 0 0 * * 0 0 CTTCCGATCTGGAGGGT HHHHHJJJJJJJJJJJ: RG:Z:HiMom.1 QX:Z:@BBDFFFF RX:Z:CGTGTGCT
+HiMom:1:1201:1242:2207 77 * 0 0 * * 0 0 ATGGCAAAGTGGTGTCTGAGACCAA BCCFFFFFGHHHHHIIIJFHIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATCTTTTA
+HiMom:1:1201:1242:2207 141 * 0 0 * * 0 0 TTGGCCTCCTGCTCCCC HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATCTTTTA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAACTCTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAACTCTC.sam
new file mode 100644
index 0000000..b094170
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAACTCTC.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAACTCTC LB:LN_CAACTCTC PL:illumina PU:HiMom.1.CAACTCTC CN:BI
+HiMom:1:1101:1140:2120 77 * 0 0 * * 0 0 .CCCCAACATTCTAATTATGCCTCA #1:BDFFDHFFDFIJJJIIJIIIII RG:Z:HiMom.1 QX:Z:@@@DDDDD RX:Z:TTTTTTTT
+HiMom:1:1101:1140:2120 141 * 0 0 * * 0 0 TTTTTAACTTTGCAAAT HHHHFB at 9FHI@BFH@@ RG:Z:HiMom.1 QX:Z:@@@DDDDD RX:Z:TTTTTTTT
+HiMom:1:1101:1328:2225 77 * 0 0 * * 0 0 GAAATGCATCTGTCTTAGAAACTGG ??@=BDDDFDD<<,<2:C<F:FFEA RG:Z:HiMom.1 QX:Z:######## RX:Z:AGGAAATT
+HiMom:1:1101:1328:2225 141 * 0 0 * * 0 0 AGGACTTACCTGACATA 28?############## RG:Z:HiMom.1 QX:Z:######## RX:Z:AGGAAATT
+HiMom:1:1201:1127:2112 589 * 0 0 * * 0 0 .GTCAAGGATGTTCGTCGTGGCAAC #1=BDDDDDDDDDID<AE?@<CEEE RG:Z:HiMom.1 QX:Z:@<@?BDDD RX:Z:TAATCACC
+HiMom:1:1201:1127:2112 653 * 0 0 * * 0 0 TGAGCAGTGAAGCCAGC HD?FDBHI?AHGGGDFH RG:Z:HiMom.1 QX:Z:@<@?BDDD RX:Z:TAATCACC
+HiMom:1:1201:1452:2143 77 * 0 0 * * 0 0 TATCCCCTCTAAGACGGACCTGGGT CCCFFFFFHHHHHJJIIIJJJJJJG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTTAGTC
+HiMom:1:1201:1452:2143 141 * 0 0 * * 0 0 TTAGCATTTACTTTCCC HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTTAGTC
+HiMom:1:1201:1486:2146 589 * 0 0 * * 0 0 GTTCTCTGTCCCCAGGTCCTGTCTC ===A7<7222<<=C=?+<7>@?ACB RG:Z:HiMom.1 QX:Z:<<<@??@? RX:Z:TTTTTTTT
+HiMom:1:1201:1486:2146 653 * 0 0 * * 0 0 TTTTTTTTTTTTTGGGC ?@???????######## RG:Z:HiMom.1 QX:Z:<<<@??@? RX:Z:TTTTTTTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAATAGAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAATAGAC.sam
new file mode 100644
index 0000000..f800d53
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAATAGAC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAATAGAC LB:LN_CAATAGAC PL:illumina PU:HiMom.1.CAATAGAC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAATAGTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAATAGTC.sam
new file mode 100644
index 0000000..93a3709
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAATAGTC.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAATAGTC LB:LN_CAATAGTC PL:illumina PU:HiMom.1.CAATAGTC CN:BI
+HiMom:1:1101:1316:2126 77 * 0 0 * * 0 0 .AAAAAAAAAAAAAAAAAAAAAAAA #1BDFFFFHHHHHJJJJFDDDDDDD RG:Z:HiMom.1 XN:i:1 QX:Z:CCCFFFFF RX:Z:TCTTTTTT
+HiMom:1:1101:1316:2126 141 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTT HHHHHJJJJHFDDDDDD RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCTTTTTT
+HiMom:1:1101:1399:2128 77 * 0 0 * * 0 0 .TGCCCTTCGTCCTGGGAAACGGGG #1BDFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAAACCC
+HiMom:1:1101:1399:2128 141 * 0 0 * * 0 0 TTGTGTCGAGGGCTGAC HHHHHIJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAAACCC
+HiMom:1:1201:1054:2151 77 * 0 0 * * 0 0 .TAGTGCTGGGCACTAAGTAATACC #4=DDDFFHHHHHJJJJJHIJJJJJ RG:Z:HiMom.1 QX:Z:CBCFFFFF RX:Z:GTCAGGCA
+HiMom:1:1201:1054:2151 141 * 0 0 * * 0 0 CTGAGAATATATGGGTG HHHHHJJJJJJJJJJEG RG:Z:HiMom.1 QX:Z:CBCFFFFF RX:Z:GTCAGGCA
+HiMom:1:1201:1345:2181 77 * 0 0 * * 0 0 GGATAATCCTATTTATTACCTCAGA BBBDDFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATACGGAT
+HiMom:1:1201:1345:2181 141 * 0 0 * * 0 0 GTGTTTAGGAGTGGGAC HHHHHIIJJHJFHIJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATACGGAT
+HiMom:1:1201:1392:2184 77 * 0 0 * * 0 0 TTTCAGATTGGTCATTGTTAGTGTA ??@BDDDEHBHADHHIIEHDHFHFF RG:Z:HiMom.1 QX:Z:@@BFFFFF RX:Z:ATCTTTAT
+HiMom:1:1201:1392:2184 141 * 0 0 * * 0 0 TCATTTGTATGATCTTA HFFHFHIHIIJIJJJJI RG:Z:HiMom.1 QX:Z:@@BFFFFF RX:Z:ATCTTTAT
+HiMom:1:2101:1172:2152 589 * 0 0 * * 0 0 AACACGGACAAAGGAGTCTAACACG <<<??8@@################# RG:Z:HiMom.1 QX:Z:######## RX:Z:ATCGTTTC
+HiMom:1:2101:1172:2152 653 * 0 0 * * 0 0 TGGGGACTAGTGAGGCG ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:ATCGTTTC
+HiMom:1:2101:1491:2093 77 * 0 0 * * 0 0 .CTATGCCGATCGGGTGTCCGCACT #1=DDDDDHHFHHIIEHHHBGHGII RG:Z:HiMom.1 QX:Z:BCCDFFFF RX:Z:AGAGACGG
+HiMom:1:2101:1491:2093 141 * 0 0 * * 0 0 GGTCTCGCTATGTTGCC HHHHHJIIJJJJIJIJJ RG:Z:HiMom.1 QX:Z:BCCDFFFF RX:Z:AGAGACGG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAGCGGAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAGCGGAT.sam
new file mode 100644
index 0000000..c9d3cfe
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAGCGGAT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAGCGGAT LB:LN_CAGCGGAT PL:illumina PU:HiMom.1.CAGCGGAT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAGCGGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAGCGGTA.sam
new file mode 100644
index 0000000..74b232e
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CAGCGGTA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CAGCGGTA LB:LN_CAGCGGTA PL:illumina PU:HiMom.1.CAGCGGTA CN:BI
+HiMom:1:1101:1420:2213 77 * 0 0 * * 0 0 TACCTGGTTGATCCTGCCAGTAGCA @@CFFFFDDHHGHJGGHIJJIHGBH RG:Z:HiMom.1 QX:Z:@CCFFFFD RX:Z:TTCACTGT
+HiMom:1:1101:1420:2213 141 * 0 0 * * 0 0 ACCGGCCGTGCGTACTT HHHFGIJJJJJJGHIGG RG:Z:HiMom.1 QX:Z:@CCFFFFD RX:Z:TTCACTGT
+HiMom:1:1201:1364:2113 77 * 0 0 * * 0 0 .CACTCATTTTCTTATGTGGGATAT #1=DDFDFHHHHHIJJIFHIIHHHI RG:Z:HiMom.1 QX:Z:######## RX:Z:TAAAGAGA
+HiMom:1:1201:1364:2113 141 * 0 0 * * 0 0 GCCAGTGGAGTTACGAC ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:TAAAGAGA
+HiMom:1:2101:1072:2170 77 * 0 0 * * 0 0 ATCACCGCACTCATTTCCCGCTTCC CCCFFFFFHHHACEEGHIIBHIIII RG:Z:HiMom.1 QX:Z:#4=BDDFD RX:Z:NGGGGAGA
+HiMom:1:2101:1072:2170 141 * 0 0 * * 0 0 CAGAGAGGATCAGAAGT HHDFHEGFEGGIJIIIG RG:Z:HiMom.1 QX:Z:#4=BDDFD RX:Z:NGGGGAGA
+HiMom:1:2101:1123:2095 77 * 0 0 * * 0 0 .TGGACAACATGTTCGAGAGCTACA #1=BBDDDFFFFDGFGIG?F;HHFI RG:Z:HiMom.1 QX:Z:@@@FDDFF RX:Z:TCCGCCTC
+HiMom:1:2101:1123:2095 141 * 0 0 * * 0 0 CAGCTTCAGCTTCTCCT HHHHHJHGGJIJJJEHH RG:Z:HiMom.1 QX:Z:@@@FDDFF RX:Z:TCCGCCTC
+HiMom:1:2101:1151:2236 589 * 0 0 * * 0 0 TTAAAGAGGTTCAGGGATGCAGAGT ######################### RG:Z:HiMom.1 QX:Z:######## RX:Z:TTTGAAGC
+HiMom:1:2101:1151:2236 653 * 0 0 * * 0 0 CTCTTTATCCTTGGCAT ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:TTTGAAGC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CCAACATT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CCAACATT.sam
new file mode 100644
index 0000000..d84bd9b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CCAACATT.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCAACATT LB:LN_CCAACATT PL:illumina PU:HiMom.1.CCAACATT CN:BI
+HiMom:1:1101:1083:2193 77 * 0 0 * * 0 0 TTCTACCTCACCTTAGGGAGAAGAC @@@DDBDDD>F><C<4CG?EHGHIG RG:Z:HiMom.1 QX:Z:19;3#### RX:Z:AGGCTNNN
+HiMom:1:1101:1083:2193 141 * 0 0 * * 0 0 ................. ################# RG:Z:HiMom.1 XN:i:1 QX:Z:19;3#### RX:Z:AGGCTNNN
+HiMom:1:1101:1175:2197 77 * 0 0 * * 0 0 CCCCTGAGGACACCATCCCACTCCA CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:BC at FFFFF RX:Z:AAGAGCTG
+HiMom:1:1101:1175:2197 141 * 0 0 * * 0 0 GGGAACATCCAGAAAGG HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:BC at FFFFF RX:Z:AAGAGCTG
+HiMom:1:1201:1138:2227 589 * 0 0 * * 0 0 GCTGACACAATCTCTTCCGCCTGGT ######################### RG:Z:HiMom.1 QX:Z:=1=A=AAA RX:Z:GACAAATA
+HiMom:1:1201:1138:2227 653 * 0 0 * * 0 0 TAGGAAATAGAAGCTAT ,2?4>7C<<4<A+3<AB RG:Z:HiMom.1 QX:Z:=1=A=AAA RX:Z:GACAAATA
+HiMom:1:1201:1260:2165 77 * 0 0 * * 0 0 GGACACGGACAGGATTGACAGATTG BCBFFFFFHHHHHHIIJHIIIFHIJ RG:Z:HiMom.1 QX:Z:@@@FFDFF RX:Z:ATCTGATC
+HiMom:1:1201:1260:2165 141 * 0 0 * * 0 0 TAAGTTGGGGGACGCCG HHHHHJJJIJIIIGIJJ RG:Z:HiMom.1 QX:Z:@@@FFDFF RX:Z:ATCTGATC
+HiMom:1:1201:1281:2133 77 * 0 0 * * 0 0 .GGAAATCCAGAAAACATAGAAGAT #1=DDFFFHHHHHIJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCAACAAA
+HiMom:1:1201:1281:2133 141 * 0 0 * * 0 0 ATTTCATATGACTTAGC HHHHHJJIIIHICHIIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCAACAAA
+HiMom:1:1201:1331:2162 77 * 0 0 * * 0 0 ACGCTCGGCTAATTTTTGTATTTTT @CCFFFDFHHHHHIJJJJHIJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TAATCCCA
+HiMom:1:1201:1331:2162 141 * 0 0 * * 0 0 GTACTTTGGGAGGCCAA HHHHHJJJJIJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TAATCCCA
+HiMom:1:2101:1186:2093 77 * 0 0 * * 0 0 .CGACCATAAACGATGCCGACCGGC #4=DFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:######## RX:Z:AATGTTGG
+HiMom:1:2101:1186:2093 141 * 0 0 * * 0 0 GAGGACAATGATGGAAA ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:AATGTTGG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CCAGCACC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CCAGCACC.sam
new file mode 100644
index 0000000..0350e1b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CCAGCACC.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCAGCACC LB:LN_CCAGCACC PL:illumina PU:HiMom.1.CCAGCACC CN:BI
+HiMom:1:1101:1212:2230 77 * 0 0 * * 0 0 TTTCTATTAGCTCTTAGTAAGATTA CCCFFFFFHHHHHJJJIJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTTAGCT
+HiMom:1:1101:1212:2230 141 * 0 0 * * 0 0 TTATTGGGGAGGGGGTG HHGHHJJJJGJJJJJDF RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTTAGCT
+HiMom:1:1201:1204:2228 77 * 0 0 * * 0 0 CCGATACGCTGAGTGTGGTTTGCGG CCCFFFFFHHHFHEGGHIHIJJJJJ RG:Z:HiMom.1 QX:Z:@?@FFFFF RX:Z:TCTTCTTG
+HiMom:1:1201:1204:2228 141 * 0 0 * * 0 0 TCGATGAGGAACTTGGT DHHGHJIJJGHIIJJJH RG:Z:HiMom.1 QX:Z:@?@FFFFF RX:Z:TCTTCTTG
+HiMom:1:2101:1100:2085 77 * 0 0 * * 0 0 .CACATGGATGAGGAGAATGAGGAT #1=DDFFFFHHHHJHIGIHHHIJEH RG:Z:HiMom.1 QX:Z:@@@DDDDD RX:Z:ATCTTGAT
+HiMom:1:2101:1100:2085 141 * 0 0 * * 0 0 CTCCTCCTTCTTGGCCT HHFHFEIIIIHHBAHBG RG:Z:HiMom.1 QX:Z:@@@DDDDD RX:Z:ATCTTGAT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CCATGCGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CCATGCGT.sam
new file mode 100644
index 0000000..29ae6d9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CCATGCGT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CCATGCGT LB:LN_CCATGCGT PL:illumina PU:HiMom.1.CCATGCGT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CGCCTTCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CGCCTTCC.sam
new file mode 100644
index 0000000..5f2717b
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CGCCTTCC.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CGCCTTCC LB:LN_CGCCTTCC PL:illumina PU:HiMom.1.CGCCTTCC CN:BI
+HiMom:1:1201:1122:2227 77 * 0 0 * * 0 0 AGAAGACGAGGCTGAGAGTGACATC @@@FFFFFHHHDHJGHGHCHHJJIJ RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:GTCATATA
+HiMom:1:1201:1122:2227 141 * 0 0 * * 0 0 AGGCCCAGTCCAAGGAA HHHGGIJIGGIJFIJII RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:GTCATATA
+HiMom:1:1201:1160:2109 77 * 0 0 * * 0 0 .AGAAGCCTTTGCACCCTGGGAGGA #1=DDDFFHHHHHJJJJJJJJIIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACATCCTT
+HiMom:1:1201:1160:2109 141 * 0 0 * * 0 0 CCCATGCCACCAACTCG GHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACATCCTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CGCTATGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CGCTATGT.sam
new file mode 100644
index 0000000..78b5e75
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CGCTATGT.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CGCTATGT LB:LN_CGCTATGT PL:illumina PU:HiMom.1.CGCTATGT CN:BI
+HiMom:1:1101:1291:2150 77 * 0 0 * * 0 0 CGTGGGGAACCTGGCGCTAAACCAT @BBFFFFFHHHHHJJJJIJJJJJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAAACCC
+HiMom:1:1101:1291:2150 141 * 0 0 * * 0 0 TTGTGTCGAGGGCTGAC HHFHHIJJJIIIGIJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAAACCC
+HiMom:1:1101:1314:2233 77 * 0 0 * * 0 0 GTTTATTGGGGCATTCCTTATCCCA @??DDDDBDHF>FCHGGGBFAAED9 RG:Z:HiMom.1 QX:Z:@@<DDD;= RX:Z:AGGAAAGT
+HiMom:1:1101:1314:2233 141 * 0 0 * * 0 0 TGGGCTGACCTGACAGA FBFADBCGDEH?F;FCG RG:Z:HiMom.1 QX:Z:@@<DDD;= RX:Z:AGGAAAGT
+HiMom:1:1101:1441:2148 77 * 0 0 * * 0 0 ACTTTCACCGCTACACGACCGGGGG CCCFFFFFHGFFHIIFIHJIGGII> RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTTGGCT
+HiMom:1:1101:1441:2148 141 * 0 0 * * 0 0 CTAGAGGGGGTAGAGGG HHDFBHIIJJ1?FGHIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTTGGCT
+HiMom:1:1201:1043:2246 77 * 0 0 * * 0 0 .TTCTCGGCTGTCATGTGCAACATT #1=DDBDFHHHDFFBHGHGHIIJEH RG:Z:HiMom.1 QX:Z:#0;@@??@ RX:Z:NGCATCAT
+HiMom:1:1201:1043:2246 141 * 0 0 * * 0 0 TTC..GCTTCTCTCTGT =@>##22=;@??><@?? RG:Z:HiMom.1 QX:Z:#0;@@??@ RX:Z:NGCATCAT
+HiMom:1:1201:1134:2144 77 * 0 0 * * 0 0 TGCCAGGAAGTGTTTTTTCTGGGTC @CCFFEFFHHFFFGIJJJJJJJJGH RG:Z:HiMom.1 QX:Z:B@?DDDFF RX:Z:AGTGTGAG
+HiMom:1:1201:1134:2144 141 * 0 0 * * 0 0 TAATGGTTGAGAGGTGG FHHGHJHHGFIHHIFGI RG:Z:HiMom.1 QX:Z:B@?DDDFF RX:Z:AGTGTGAG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTAACTCG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTAACTCG.sam
new file mode 100644
index 0000000..894ad0d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTAACTCG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTAACTCG LB:LN_CTAACTCG PL:illumina PU:HiMom.1.CTAACTCG CN:BI
+HiMom:1:1101:1363:2138 77 * 0 0 * * 0 0 .GTCTGGCCTGCACAGACATCCTAC #1=DDFFFHHHHHJJJIJJIJJJIJ RG:Z:HiMom.1 QX:Z:C@@FFFFF RX:Z:GTTCTTAA
+HiMom:1:1101:1363:2138 141 * 0 0 * * 0 0 ACCTGTTAGAACTTCTG HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:C@@FFFFF RX:Z:GTTCTTAA
+HiMom:1:1201:1393:2143 77 * 0 0 * * 0 0 TGGTTGATCCTGCCAGTAGCATATG @@@ADADDFHFFDBHE?G at HIIIEE RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:GATAAATG
+HiMom:1:1201:1393:2143 141 * 0 0 * * 0 0 CACGCATCCCCCCCGCG GGHHHHJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:GATAAATG
+HiMom:1:2101:1273:2119 589 * 0 0 * * 0 0 .AGATAAGAGTCCACACAGTTGAGT #11AAAAA<A?4=C=7?733<ACA3 RG:Z:HiMom.1 QX:Z:>=><AAAA RX:Z:ATGATGGA
+HiMom:1:2101:1273:2119 653 * 0 0 * * 0 0 TCTTCTCTAACTTGTCA A+2AA?CB4@@ABB3?A RG:Z:HiMom.1 QX:Z:>=><AAAA RX:Z:ATGATGGA
+HiMom:1:2101:1414:2098 77 * 0 0 * * 0 0 .AGGACATCGATAAAGGCGAGGTGT #1=DDFFFHHHHHJJJJJJJJJHHG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGGGGCC
+HiMom:1:2101:1414:2098 141 * 0 0 * * 0 0 GGTGCCGTCGGGCCCAA HHHHGJJIJJJJJJJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGGGGCC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTATGCGC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTATGCGC.sam
new file mode 100644
index 0000000..2a5a21c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTATGCGC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTATGCGC LB:LN_CTATGCGC PL:illumina PU:HiMom.1.CTATGCGC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTATGCGT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTATGCGT.sam
new file mode 100644
index 0000000..8cc90ea
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTATGCGT.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTATGCGT LB:LN_CTATGCGT PL:illumina PU:HiMom.1.CTATGCGT CN:BI
+HiMom:1:1201:1083:2121 77 * 0 0 * * 0 0 .AGAACTGGCGCTGCGGGATGAACC #1=BDFFFHHHHHJJJJJHIJIJJJ RG:Z:HiMom.1 QX:Z:######## RX:Z:ACACACAA
+HiMom:1:1201:1083:2121 141 * 0 0 * * 0 0 CACCACCGCCCTCCCCC ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:ACACACAA
+HiMom:1:1201:1185:2143 77 * 0 0 * * 0 0 ATCTGCCTGGTTCGGCCCGCCTGCC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:GCTGAAGG
+HiMom:1:1201:1185:2143 141 * 0 0 * * 0 0 CCCGTGGGCCAGAGGTG HHHHHJJJJJJJJJJHI RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:GCTGAAGG
+HiMom:1:1201:1219:2115 77 * 0 0 * * 0 0 .TATAGTGGAGGCCGGAGCAGGAAC #1:DABADHHHFHIIIGGHGIIIII RG:Z:HiMom.1 QX:Z:???DBDBD RX:Z:TGGGAGTA
+HiMom:1:1201:1219:2115 141 * 0 0 * * 0 0 GTTCCCTGCTAAGGGAG ADDDDIEID:AFFD:?8 RG:Z:HiMom.1 QX:Z:???DBDBD RX:Z:TGGGAGTA
+HiMom:1:1201:1472:2121 589 * 0 0 * * 0 0 .TAAAGTGTGAACAAGGAAGGTCAT #07>@<9=@################ RG:Z:HiMom.1 QX:Z:=+=??A4A RX:Z:GTGTGCTC
+HiMom:1:1201:1472:2121 653 * 0 0 * * 0 0 TTCCGATCTGGAGGATG ==A at 7A<?######### RG:Z:HiMom.1 QX:Z:=+=??A4A RX:Z:GTGTGCTC
+HiMom:1:2101:1013:2146 77 * 0 0 * * 0 0 .ACACTGCTGCAGATGACAAGCAGC #4BDFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:####24=? RX:Z:NNNNCGCT
+HiMom:1:2101:1013:2146 141 * 0 0 * * 0 0 AGAACCAACTTATTCAT @@?@?@@?@@@@@@?@@ RG:Z:HiMom.1 QX:Z:####24=? RX:Z:NNNNCGCT
+HiMom:1:2101:1231:2208 77 * 0 0 * * 0 0 ACGCCGCAAGTCAGAGCCCCCCAGA @@@DDDFFFFB:DBBEBEFDHBDDB RG:Z:HiMom.1 QX:Z:@@;1ADAB RX:Z:AGCCAGTG
+HiMom:1:2101:1231:2208 141 * 0 0 * * 0 0 TTGGTGTGTTGACTGTT CF;BF<AACGCHEBHC< RG:Z:HiMom.1 QX:Z:@@;1ADAB RX:Z:AGCCAGTG
+HiMom:1:2101:1233:2133 589 * 0 0 * * 0 0 GAGAGAAGCACTCTTGAGCGGGATA 0;(@((@)2@############### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTTTTTT
+HiMom:1:2101:1233:2133 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTT GHHHHJJJFDDDDDDDD RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTTTTTTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTGCGGAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTGCGGAT.sam
new file mode 100644
index 0000000..3cbf3af
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTGCGGAT.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTGCGGAT LB:LN_CTGCGGAT PL:illumina PU:HiMom.1.CTGCGGAT CN:BI
+HiMom:1:2101:1102:2221 77 * 0 0 * * 0 0 TTTCATCTTATTTCATTGGTTTATA CCCFFFFFHHHHHJIJJJJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATAACTGA
+HiMom:1:2101:1102:2221 141 * 0 0 * * 0 0 CTCTACTCAGTAGATTA HHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATAACTGA
+HiMom:1:2101:1126:2082 77 * 0 0 * * 0 0 .GTTTTAGGGGTGCGCAGGAGTCAA #11=A=DD?DF at D@CCGHIEFH at BG RG:Z:HiMom.1 QX:Z:@C at DDDFF RX:Z:TCTCTTTC
+HiMom:1:2101:1126:2082 141 * 0 0 * * 0 0 CACCTTGGTCACCTTCC HHHHHJEGGIHHIJGIH RG:Z:HiMom.1 QX:Z:@C at DDDFF RX:Z:TCTCTTTC
+HiMom:1:2101:1216:2172 77 * 0 0 * * 0 0 TTTCTTCGCAGGATTTTTCTGAGCC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GGACTTCT
+HiMom:1:2101:1216:2172 141 * 0 0 * * 0 0 AGGGGATTTAGCGGGGT HHHHHJJJJJJJJJJJD RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GGACTTCT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTGTAATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTGTAATC.sam
new file mode 100644
index 0000000..e61e68f
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/CTGTAATC.sam
@@ -0,0 +1,14 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_CTGTAATC LB:LN_CTGTAATC PL:illumina PU:HiMom.1.CTGTAATC CN:BI
+HiMom:1:1101:1403:2194 77 * 0 0 * * 0 0 CTAAACAGAGAGAAGGTTTCTCTTT CCCFFFFFHHHHHJJJFHIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFDD RX:Z:ACATGGTG
+HiMom:1:1101:1403:2194 141 * 0 0 * * 0 0 AAACCCTGTCTCTACTA HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFDD RX:Z:ACATGGTG
+HiMom:1:1201:1045:2105 589 * 0 0 * * 0 0 .TAAAGAGAAATCAAGAATACTATT #-4@?(@)@@############### RG:Z:HiMom.1 QX:Z:#0;@@@@@ RX:Z:NTTTTTTT
+HiMom:1:1201:1045:2105 653 * 0 0 * * 0 0 TTT..TTTTTTTTTTTT @@?##0:????????=< RG:Z:HiMom.1 QX:Z:#0;@@@@@ RX:Z:NTTTTTTT
+HiMom:1:1201:1483:2126 589 * 0 0 * * 0 0 .TGATAAGGTGTTGCTATGTTACCC #1:D?DDDDA??2:<CC4:AEDF>? RG:Z:HiMom.1 QX:Z:@@@DDDBB RX:Z:GCATGCAG
+HiMom:1:1201:1483:2126 653 * 0 0 * * 0 0 CTGGGTGCTGTGATGCA <DD8F<<CGG?AA?A<F RG:Z:HiMom.1 QX:Z:@@@DDDBB RX:Z:GCATGCAG
+HiMom:1:2101:1011:2102 77 * 0 0 * * 0 0 .AAACAAAACTGTAGAACTGTGTAT #1=DDFFFHHHHHJJIJJJIHHHJJ RG:Z:HiMom.1 QX:Z:#####22@ RX:Z:NNNNNTCA
+HiMom:1:2101:1011:2102 141 * 0 0 * * 0 0 CACATAATTTTAAAATT ?@@??@@@@@??@@@@@ RG:Z:HiMom.1 QX:Z:#####22@ RX:Z:NNNNNTCA
+HiMom:1:2101:1245:2154 77 * 0 0 * * 0 0 TCGTTAAGTATATTCTTAGGTATTT CCCFFDFFFHFHHIIJJJJJFJJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACCAATCA
+HiMom:1:2101:1245:2154 141 * 0 0 * * 0 0 GTAGCACCACTATACAC HHHHHJJJJJJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACCAATCA
+HiMom:1:2101:1386:2105 77 * 0 0 * * 0 0 .TACTAAAGAAAAAGTTGAAGAACT #1=DDDFFHHHHHJJGHIJJJJIJJ RG:Z:HiMom.1 QX:Z:B@@DDFFF RX:Z:AGGAATTA
+HiMom:1:2101:1386:2105 141 * 0 0 * * 0 0 TTCTTCTGCCATAAGGT HGFHHIJJJJJGIGIJH RG:Z:HiMom.1 QX:Z:B@@DDFFF RX:Z:AGGAATTA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GAAAAAAA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GAAAAAAA.sam
new file mode 100644
index 0000000..0bfb1f9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GAAAAAAA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAAAAAAA LB:LN_GAAAAAAA PL:illumina PU:HiMom.1.GAAAAAAA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GAACGAT..sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GAACGAT..sam
new file mode 100644
index 0000000..1636a99
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GAACGAT..sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAACGAT. LB:LN_GAACGAT. PL:illumina PU:HiMom.1.GAACGAT. CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GAAGGAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GAAGGAAG.sam
new file mode 100644
index 0000000..da4b7bf
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GAAGGAAG.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GAAGGAAG LB:LN_GAAGGAAG PL:illumina PU:HiMom.1.GAAGGAAG CN:BI
+HiMom:1:1101:1338:2175 77 * 0 0 * * 0 0 CCCACCTTCCGGCGGCCGAAGACAC CCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCTTGTTG
+HiMom:1:1101:1338:2175 141 * 0 0 * * 0 0 GCTTTAACATCCACAAT HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCTTGTTG
+HiMom:1:1201:1028:2202 77 * 0 0 * * 0 0 .TCCTGGGAAACGGGGCGCGGCTGG #4BDDDFFHHHHHIJIIJJJJJJIJ RG:Z:HiMom.1 QX:Z:####@?## RX:Z:NNAAACNC
+HiMom:1:1201:1028:2202 141 * 0 0 * * 0 0 .T.......GG..TG.. ################# RG:Z:HiMom.1 QX:Z:####@?## RX:Z:NNAAACNC
+HiMom:1:2101:1084:2188 77 * 0 0 * * 0 0 TTGCTGCATGGGTTAATTGAGAATA CCCFFFFFHHHHFHHIIJJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:TACAAGGT
+HiMom:1:2101:1084:2188 141 * 0 0 * * 0 0 CAAAATCAGCAACAAGT HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:TACAAGGT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCAGGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCAGGA.sam
new file mode 100644
index 0000000..71791ff
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCAGGA.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCAGGA LB:LN_GACCAGGA PL:illumina PU:HiMom.1.GACCAGGA CN:BI
+HiMom:1:1101:1089:2172 77 * 0 0 * * 0 0 TTCCAGCATGCGGTTTAAGTAGGAT @CCFDFDBDFBF:<CEBHAFHHICH RG:Z:HiMom.1 QX:Z::<<??### RX:Z:TCCGGNNN
+HiMom:1:1101:1089:2172 141 * 0 0 * * 0 0 ................. ################# RG:Z:HiMom.1 XN:i:1 QX:Z::<<??### RX:Z:TCCGGNNN
+HiMom:1:1101:1347:2149 77 * 0 0 * * 0 0 GAGCAGATCGGAAGAGCACAGATCG @@@FFDDDHHHHHIJJBGGHJIHEG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCTCTTCC
+HiMom:1:1101:1347:2149 141 * 0 0 * * 0 0 GATCTGTGCTCTTCCGA DFHHFIJDGIGGHGIGH RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCTCTTCC
+HiMom:1:1201:1095:2146 77 * 0 0 * * 0 0 GCTGAGTCATGTAGTAAGCCTGTGC BB at FDDDFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTGACAA
+HiMom:1:1201:1095:2146 141 * 0 0 * * 0 0 CACCAAATGCTGCTAAG HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTGACAA
+HiMom:1:1201:1123:2161 589 * 0 0 * * 0 0 CACTAACTCCTGACCTCAAATAATC ?7?=DD?DD+CDBE>E at EEF@+<CF RG:Z:HiMom.1 QX:Z:===AAAA8 RX:Z:CGTGTGCT
+HiMom:1:1201:1123:2161 653 * 0 0 * * 0 0 CTTCCGATCTGCATACA AAAA<AAA)@CBA9>A# RG:Z:HiMom.1 QX:Z:===AAAA8 RX:Z:CGTGTGCT
+HiMom:1:1201:1439:2156 77 * 0 0 * * 0 0 AGCCGCGAGGTGCTGGCGGACTTCC :;1BDDDAA88A<?<E1C:D##### RG:Z:HiMom.1 QX:Z:####2<>> RX:Z:GGAGATTA
+HiMom:1:1201:1439:2156 141 * 0 0 * * 0 0 TTTGCCTTGAAGTAAGC @>8@>8;@######### RG:Z:HiMom.1 QX:Z:####2<>> RX:Z:GGAGATTA
+HiMom:1:2101:1207:2084 589 * 0 0 * * 0 0 .TAGATGACCAAAACTTGCAGGGCA #1:A<?@A+7A=?CBCCBCCBAAAA RG:Z:HiMom.1 QX:Z:@@@DDEDF RX:Z:TCACCACT
+HiMom:1:2101:1207:2084 653 * 0 0 * * 0 0 CTTCTGGGCATCCCCTG HHHHHIJIHHGHGGJJJ RG:Z:HiMom.1 QX:Z:@@@DDEDF RX:Z:TCACCACT
+HiMom:1:2101:1312:2105 77 * 0 0 * * 0 0 .TTCCCTCAGGATAGCTGGCGCTCT #1=DDFFFGHGHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCFFFDF RX:Z:GTTGAGAA
+HiMom:1:2101:1312:2105 141 * 0 0 * * 0 0 TAGGTTGAGATCGTTTC HHFHDHIJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:@CCFFFDF RX:Z:GTTGAGAA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCAGGC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCAGGC.sam
new file mode 100644
index 0000000..71b9f20
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCAGGC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCAGGC LB:LN_GACCAGGC PL:illumina PU:HiMom.1.GACCAGGC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCGTTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCGTTG.sam
new file mode 100644
index 0000000..a00ca6a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCGTTG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCGTTG LB:LN_GACCGTTG PL:illumina PU:HiMom.1.GACCGTTG CN:BI
+HiMom:1:1101:1218:2200 77 * 0 0 * * 0 0 GCACCGGAAGAGCACACAGATCGGA CCCFFFFDFHGHHJJIJIJJJJJJI RG:Z:HiMom.1 QX:Z:####34?? RX:Z:GCTCTTCC
+HiMom:1:1101:1218:2200 141 * 0 0 * * 0 0 GATCTATCTGCTCGTCC ?3;@############# RG:Z:HiMom.1 QX:Z:####34?? RX:Z:GCTCTTCC
+HiMom:1:1101:1257:2223 77 * 0 0 * * 0 0 TGTATTCGAGAGATCAAAGAGAGAG @@=DDBDD?FFHHEIDBDFCEDBAF RG:Z:HiMom.1 QX:Z::?@DDBDD RX:Z:TGCTCTTC
+HiMom:1:1101:1257:2223 141 * 0 0 * * 0 0 CGATCTTTTAGCAAAGC HFFHDGIGIIJJJGGGI RG:Z:HiMom.1 QX:Z::?@DDBDD RX:Z:TGCTCTTC
+HiMom:1:1201:1180:2119 77 * 0 0 * * 0 0 .TGAAAGATTTAGAGAGCTTACAAA #1=DDDDDHHHGHJJIIJJJJIJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCTCTAAA
+HiMom:1:1201:1180:2119 141 * 0 0 * * 0 0 TTTTGCTTTTCTACAGC HHHHHJJJJIJIJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCTCTAAA
+HiMom:1:2101:1036:2087 77 * 0 0 * * 0 0 .TGTAGTTTCTTTAGGCAAATTTGT #4=BDDDFHHHHHJJJJJJIIJJJI RG:Z:HiMom.1 QX:Z:#4=DDFFF RX:Z:NGTCCACT
+HiMom:1:2101:1036:2087 141 * 0 0 * * 0 0 TACGAAGCAAATACTTT HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:#4=DDFFF RX:Z:NGTCCACT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCTAAC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCTAAC.sam
new file mode 100644
index 0000000..4dced28
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GACCTAAC.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GACCTAAC LB:LN_GACCTAAC PL:illumina PU:HiMom.1.GACCTAAC CN:BI
+HiMom:1:1101:1302:2244 77 * 0 0 * * 0 0 GGAAAAGACGGAAAGGTTCTATCTC @C at DFFFDFHHHHJIJHHIJJJJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGAATACA
+HiMom:1:1101:1302:2244 141 * 0 0 * * 0 0 TATAACAAATGCAAAAA HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGAATACA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GATATCCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GATATCCA.sam
new file mode 100644
index 0000000..1bb3adf
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GATATCCA.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GATATCCA LB:LN_GATATCCA PL:illumina PU:HiMom.1.GATATCCA CN:BI
+HiMom:1:1101:1460:2176 77 * 0 0 * * 0 0 AGTCCAGGCTGAGCCCAGGGAAGAA CCCFFFFFHHHHGJIJJIJJHIJJI RG:Z:HiMom.1 QX:Z:######## RX:Z:AGGAAAAA
+HiMom:1:1101:1460:2176 141 * 0 0 * * 0 0 GACACAACAAGTCCAAC ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:AGGAAAAA
+HiMom:1:2101:1031:2163 77 * 0 0 * * 0 0 .TTTCCATGGCCGTCACCTTTGGGT #4=DDFFFHHHHHJJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:####@?@@ RX:Z:NNACATTT
+HiMom:1:2101:1031:2163 141 * 0 0 * * 0 0 GTCACCACTAGCCACCA @@@@@@@@?@@@@@@@? RG:Z:HiMom.1 QX:Z:####@?@@ RX:Z:NNACATTT
+HiMom:1:2101:1226:2088 77 * 0 0 * * 0 0 .GATCGGAAGAGCACACGTTTGACT #4=DAA=DDFHFHIIBFGHHIG>EG RG:Z:HiMom.1 QX:Z:==?BDFFF RX:Z:GCTCTTCC
+HiMom:1:2101:1226:2088 141 * 0 0 * * 0 0 GATCTAGGTAATAGCTA DCDDHFFFAFHDHIJGJ RG:Z:HiMom.1 QX:Z:==?BDFFF RX:Z:GCTCTTCC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GCCGTCGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GCCGTCGA.sam
new file mode 100644
index 0000000..235189a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GCCGTCGA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GCCGTCGA LB:LN_GCCGTCGA PL:illumina PU:HiMom.1.GCCGTCGA CN:BI
+HiMom:1:1101:1111:2148 77 * 0 0 * * 0 0 GTGGAGACCACCTCCGAGGCCTTGT BBCFFFFFHHHHHJJJIJJJJJJJI RG:Z:HiMom.1 QX:Z:######## RX:Z:GCGAANAN
+HiMom:1:1101:1111:2148 141 * 0 0 * * 0 0 .........GGACGAC. ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:GCGAANAN
+HiMom:1:1101:1221:2143 77 * 0 0 * * 0 0 TTTGGTGGAAATTTTTTGTTATGAT CCCFFBDBHFD?FBFHIIGGIC at EF RG:Z:HiMom.1 QX:Z:@@@FFFFD RX:Z:CAATTGAA
+HiMom:1:1101:1221:2143 141 * 0 0 * * 0 0 TGTCTGCACAGCCGCTT HHHHHJJJIIIJGHIJJ RG:Z:HiMom.1 QX:Z:@@@FFFFD RX:Z:CAATTGAA
+HiMom:1:1101:1327:2200 589 * 0 0 * * 0 0 AGGGGGATCCGCCGGGGGACCACAA ######################### RG:Z:HiMom.1 QX:Z:@B at FFFFF RX:Z:GTCATCTG
+HiMom:1:1101:1327:2200 653 * 0 0 * * 0 0 GGCTGTCGACAGGTGTC HHHHGIJJJJJJIFHHI RG:Z:HiMom.1 QX:Z:@B at FFFFF RX:Z:GTCATCTG
+HiMom:1:2101:1122:2136 77 * 0 0 * * 0 0 GTAGGCGCTCAGCAAATACTTGTCG @@@DDDD8?<CACEHHBBHDAAFH@ RG:Z:HiMom.1 QX:Z:???BBAAB RX:Z:CTTGCCAG
+HiMom:1:2101:1122:2136 141 * 0 0 * * 0 0 CCTGCAGGCCCCGCGGC DD?DDIID)A:3<EADD RG:Z:HiMom.1 QX:Z:???BBAAB RX:Z:CTTGCCAG
+HiMom:1:2101:1459:2083 77 * 0 0 * * 0 0 .CACACGCCACACGGAGCACACTTT #4=DDFFFHHHHHJJJJJJJJIIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:ATTTCACC
+HiMom:1:2101:1459:2083 141 * 0 0 * * 0 0 AAAATAATCAGAAGGCC BHGHHIGGIJFJJGGFH RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:ATTTCACC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GCCTAGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GCCTAGCC.sam
new file mode 100644
index 0000000..6b286a2
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GCCTAGCC.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GCCTAGCC LB:LN_GCCTAGCC PL:illumina PU:HiMom.1.GCCTAGCC CN:BI
+HiMom:1:1101:1165:2239 77 * 0 0 * * 0 0 GGCGGAGGCAGCATTTCAGCTGTGA CCCFFDFFHHHHHIJJIGHHHJHHF RG:Z:HiMom.1 QX:Z:######## RX:Z:ATGGAAGT
+HiMom:1:1101:1165:2239 141 * 0 0 * * 0 0 CGAGACAGAAGTGAGAA ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:ATGGAAGT
+HiMom:1:1101:1290:2225 77 * 0 0 * * 0 0 CTTGGGCGCATGGTGAGGGAGGGAG @@@FFDDFHDFH??CBEBHHIGDCD RG:Z:HiMom.1 QX:Z:C@@FBEDD RX:Z:TCAGTTCA
+HiMom:1:1101:1290:2225 141 * 0 0 * * 0 0 CTGGCAAAGACAGTCAC FHFHGIIICEHGDHBHE RG:Z:HiMom.1 QX:Z:C@@FBEDD RX:Z:TCAGTTCA
+HiMom:1:1201:1280:2179 77 * 0 0 * * 0 0 TTCAAGGAATCGTCCTGCCTCAGCC BCCFFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:@@BFFDEF RX:Z:GAGGACTG
+HiMom:1:1201:1280:2179 141 * 0 0 * * 0 0 CTTGAGTCCAGGAGTTC GHHHHIFGCHIJJJGGI RG:Z:HiMom.1 QX:Z:@@BFFDEF RX:Z:GAGGACTG
+HiMom:1:1201:1300:2137 77 * 0 0 * * 0 0 .TGTAATCCCAGCTCTCAGGGAGGC #1=ADDDDDDDBBA?@AE?E at FE8; RG:Z:HiMom.1 QX:Z:@@?DDDDD RX:Z:GCTCTTCC
+HiMom:1:1201:1300:2137 141 * 0 0 * * 0 0 GATCTTTTTTTTAATTT FDHADEHGIGGED3?FD RG:Z:HiMom.1 QX:Z:@@?DDDDD RX:Z:GCTCTTCC
+HiMom:1:2101:1023:2237 589 * 0 0 * * 0 0 .TAAACAGCTTCTGCACAGCCAAAG #00@@?>=39>9;<412@?###### RG:Z:HiMom.1 QX:Z:####=@?> RX:Z:NNTTTGTT
+HiMom:1:2101:1023:2237 653 * 0 0 * * 0 0 TGAGTTCCTTGTAGATT ?@???@:>?@??>?;?< RG:Z:HiMom.1 QX:Z:####=@?> RX:Z:NNTTTGTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GTAACATC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GTAACATC.sam
new file mode 100644
index 0000000..fd117a0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GTAACATC.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GTAACATC LB:LN_GTAACATC PL:illumina PU:HiMom.1.GTAACATC CN:BI
+HiMom:1:1101:1188:2237 77 * 0 0 * * 0 0 TCCCCCTCCCTTTTGCGCACACACC @?@DDADDHDHBDH<EFHIIHG?HF RG:Z:HiMom.1 QX:Z:CCCFFDDE RX:Z:GCTTCCTT
+HiMom:1:1101:1188:2237 141 * 0 0 * * 0 0 CAAGACAGAAGTGAGAA FHHFFE at FDHHAIAFHG RG:Z:HiMom.1 QX:Z:CCCFFDDE RX:Z:GCTTCCTT
+HiMom:1:2101:1208:2231 589 * 0 0 * * 0 0 TCACTAAACATCCAAACATCACTTT ######################### RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTTTTTTT
+HiMom:1:2101:1208:2231 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTT HHHHHJJJHFDDDDDDD RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTTTTTTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GTCCACAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GTCCACAG.sam
new file mode 100644
index 0000000..d61115a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/GTCCACAG.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_GTCCACAG LB:LN_GTCCACAG PL:illumina PU:HiMom.1.GTCCACAG CN:BI
+HiMom:1:1101:1069:2159 77 * 0 0 * * 0 0 TCCCTTACCATCAAATCAATTG.CC CCCFFFFFHHHHHJJJJJJJJJ#3A RG:Z:HiMom.1 QX:Z:<<<@?### RX:Z:GACGTNNN
+HiMom:1:1101:1069:2159 141 * 0 0 * * 0 0 ................. ################# RG:Z:HiMom.1 XN:i:1 QX:Z:<<<@?### RX:Z:GACGTNNN
+HiMom:1:1201:1486:2109 77 * 0 0 * * 0 0 .CACCTCCTAGCCCCTCACTTCTGT #1=B;BDDHHHGFIIIIIIIIIGGG RG:Z:HiMom.1 QX:Z:CCCFF?DD RX:Z:ACGTGTGC
+HiMom:1:1201:1486:2109 141 * 0 0 * * 0 0 TCTTCCCGATCTGTATA FBHHHJJIIDHJIJJJH RG:Z:HiMom.1 QX:Z:CCCFF?DD RX:Z:ACGTGTGC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/N.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/N.sam
new file mode 100644
index 0000000..21f945c
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/N.sam
@@ -0,0 +1,34 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_N LB:LN_N PL:illumina PU:HiMom.1.N CN:BI
+HiMom:1:1101:1031:2224 589 * 0 0 * * 0 0 .AATA............T....... #0;@@#################### BC:Z:NNNNNNNN RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1031:2224 653 * 0 0 * * 0 0 ................. ################# BC:Z:NNNNNNNN RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1039:2147 589 * 0 0 * * 0 0 .CCAA.G..GG....ATGTAA.... #4;@@#4##2<####43@@@@#### BC:Z:NNNNNNNN RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1039:2147 653 * 0 0 * * 0 0 ................. ################# BC:Z:NNNNNNNN RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1046:2175 589 * 0 0 * * 0 0 .TGCC.G.GTT.CG.GGTCTT.... #4;@@#################### BC:Z:NNNNNNNN RG:Z:HiMom.1 QX:Z:####@### RX:Z:NNGGANNN
+HiMom:1:1101:1046:2175 653 * 0 0 * * 0 0 ................. ################# BC:Z:NNNNNNNN RG:Z:HiMom.1 XN:i:1 QX:Z:####@### RX:Z:NNGGANNN
+HiMom:1:1101:1047:2122 589 * 0 0 * * 0 0 .CTAA.G.ACT.TG.GTGTGC.... #0;@@#4#3@@#3@#2<@@@@#### BC:Z:NNNANNNN RG:Z:HiMom.1 QX:Z:######## RX:Z:NNTCANNN
+HiMom:1:1101:1047:2122 653 * 0 0 * * 0 0 ................. ################# BC:Z:NNNANNNN RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNTCANNN
+HiMom:1:1101:1048:2197 589 * 0 0 * * 0 0 .CTCC.G.TCA.CA.GTGGAG.... #0;?@#################### BC:Z:NNNCNNNN RG:Z:HiMom.1 QX:Z:######## RX:Z:NNGTGNNN
+HiMom:1:1101:1048:2197 653 * 0 0 * * 0 0 ................. ################# BC:Z:NNNCNNNN RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNGTGNNN
+HiMom:1:1101:1065:2193 77 * 0 0 * * 0 0 GAAGTACGCCCTGCCCCTGGTT.GC ?@@DAADAHHFHBEBEGGHG?#### BC:Z:GAACGATN RG:Z:HiMom.1 QX:Z:######## RX:Z:NCTTGNNN
+HiMom:1:1101:1065:2193 141 * 0 0 * * 0 0 ................. ################# BC:Z:GAACGATN RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NCTTGNNN
+HiMom:1:1101:1162:2207 589 * 0 0 * * 0 0 ACCTTGAGGAGAACATAAGAGCAAA ######################### BC:Z:ACAAAATT RG:Z:HiMom.1 QX:Z:######## RX:Z:TAAAACTG
+HiMom:1:1101:1162:2207 653 * 0 0 * * 0 0 GGGAAGTTAGAGGAATG ################# BC:Z:ACAAAATT RG:Z:HiMom.1 QX:Z:######## RX:Z:TAAAACTG
+HiMom:1:1201:1159:2179 589 * 0 0 * * 0 0 GTTAGCACAGATATTGGATGAGTGA ######################### BC:Z:AAAAAAAA RG:Z:HiMom.1 QX:Z:===AA70< RX:Z:TTTTTTTT
+HiMom:1:1201:1159:2179 653 * 0 0 * * 0 0 TATTTTTCTAAATACTT ################# BC:Z:AAAAAAAA RG:Z:HiMom.1 QX:Z:===AA70< RX:Z:TTTTTTTT
+HiMom:1:1201:1414:2174 589 * 0 0 * * 0 0 GCCAAAAAAAAGAACCAGCCCAAGG ######################### BC:Z:AGAAAAGA RG:Z:HiMom.1 QX:Z:@;@1BDAD RX:Z:TTTTTTTT
+HiMom:1:1201:1414:2174 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTT F????FFEB>B6=BBBB BC:Z:AGAAAAGA RG:Z:HiMom.1 QX:Z:@;@1BDAD RX:Z:TTTTTTTT
+HiMom:1:2101:1040:2208 589 * 0 0 * * 0 0 .ATGCCCACCTCCCTCCTACGCACC ######################### BC:Z:ACGAAATC RG:Z:HiMom.1 QX:Z:####>(2@ RX:Z:NCTGATAG
+HiMom:1:2101:1040:2208 653 * 0 0 * * 0 0 TCACTGAAATGAATTCA .22@@############ BC:Z:ACGAAATC RG:Z:HiMom.1 QX:Z:####>(2@ RX:Z:NCTGATAG
+HiMom:1:2101:1059:2083 77 * 0 0 * * 0 0 .AAGAGGGGTCAAGAGTTAAACTTA #1=DDFFFHFHHGIGHGHJJJJJJI BC:Z:TACCGTCT RG:Z:HiMom.1 QX:Z:#1=BDDDE RX:Z:NGAATGTC
+HiMom:1:2101:1059:2083 141 * 0 0 * * 0 0 TTAGAAGGATGCTTCTC HHGHHJJJJJIJJIIJJ BC:Z:TACCGTCT RG:Z:HiMom.1 QX:Z:#1=BDDDE RX:Z:NGAATGTC
+HiMom:1:2101:1143:2137 77 * 0 0 * * 0 0 ATGCAGCAGCTGCCACGGAGCACCA CC at FFDFDFHFHHGIDHEHIGJJJJ BC:Z:TCCGTCTA RG:Z:HiMom.1 QX:Z:@@@DD?=D RX:Z:GCTCTTCA
+HiMom:1:2101:1143:2137 141 * 0 0 * * 0 0 GATCTAGGGGGAACAGC CAFFFHIIDG:EFHIII BC:Z:TCCGTCTA RG:Z:HiMom.1 QX:Z:@@@DD?=D RX:Z:GCTCTTCA
+HiMom:1:2101:1151:2182 589 * 0 0 * * 0 0 TTGTTTTGGCTTATAATGACAAGAA ;;8-2).2())(<6=@8;?4??>>? BC:Z:GAAAAAAA RG:Z:HiMom.1 QX:Z:9<<?@?@; RX:Z:TTTTTTTT
+HiMom:1:2101:1151:2182 653 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTA 5=?############## BC:Z:GAAAAAAA RG:Z:HiMom.1 QX:Z:9<<?@?@; RX:Z:TTTTTTTT
+HiMom:1:2101:1215:2110 77 * 0 0 * * 0 0 .AATATAATTTGGAGACCCTTTGTT #1=DDDDDEDDDDIDDBB3ABAB## BC:Z:AAAAGAAG RG:Z:HiMom.1 QX:Z:######## RX:Z:ATCTTTCC
+HiMom:1:2101:1215:2110 141 * 0 0 * * 0 0 CCCATTAAGAACAGCAA ################# BC:Z:AAAAGAAG RG:Z:HiMom.1 QX:Z:######## RX:Z:ATCTTTCC
+HiMom:1:2101:1285:2105 589 * 0 0 * * 0 0 .GCGGGGAGCCGGGCGTGGAATGCG ######################### BC:Z:TATCTCGG RG:Z:HiMom.1 QX:Z:######## RX:Z:TGTCTATA
+HiMom:1:2101:1285:2105 653 * 0 0 * * 0 0 TCAACCAACACCTCTTC 4:9:???########## BC:Z:TATCTCGG RG:Z:HiMom.1 QX:Z:######## RX:Z:TGTCTATA
+HiMom:1:2101:1450:2134 77 * 0 0 * * 0 0 AGCACGCTGCCGCGGGACCTGCCCA ?@@AD at DDHFH?DGIIIIG at FGFBF BC:Z:ACCAGTTG RG:Z:HiMom.1 QX:Z:CC at FDFDF RX:Z:ACAAACCC
+HiMom:1:2101:1450:2134 141 * 0 0 * * 0 0 TTGTGTCGAGGGCTGAC FDFHFGIIE1CGGHBGE BC:Z:ACCAGTTG RG:Z:HiMom.1 QX:Z:CC at FDFDF RX:Z:ACAAACCC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TAAGCACA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TAAGCACA.sam
new file mode 100644
index 0000000..e730e63
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TAAGCACA.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TAAGCACA LB:LN_TAAGCACA PL:illumina PU:HiMom.1.TAAGCACA CN:BI
+HiMom:1:1201:1064:2239 77 * 0 0 * * 0 0 CATGCAGCGCAAGTAGGTCTACAAG @@;DFAFFHHHHAHEGHFDGGFABG RG:Z:HiMom.1 QX:Z:8?@:DDDA RX:Z:GGGATGGG
+HiMom:1:1201:1064:2239 141 * 0 0 * * 0 0 AGGGCGATGAGGACTAG CC:FHHGIH<EGDDDFH RG:Z:HiMom.1 QX:Z:8?@:DDDA RX:Z:GGGATGGG
+HiMom:1:2101:1258:2092 77 * 0 0 * * 0 0 .CACACACACACTCATTCACAGCTT #1=DDDFFHHHFHJJIJGGGIIGIJ RG:Z:HiMom.1 QX:Z:######## RX:Z:TTAGACAA
+HiMom:1:2101:1258:2092 141 * 0 0 * * 0 0 AACACCAAAATAAAATA ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:TTAGACAA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TACCGTCT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TACCGTCT.sam
new file mode 100644
index 0000000..04cf526
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TACCGTCT.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TACCGTCT LB:LN_TACCGTCT PL:illumina PU:HiMom.1.TACCGTCT CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TAGCGGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TAGCGGTA.sam
new file mode 100644
index 0000000..3455237
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TAGCGGTA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TAGCGGTA LB:LN_TAGCGGTA PL:illumina PU:HiMom.1.TAGCGGTA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCAGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCAGCC.sam
new file mode 100644
index 0000000..5f7c0e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCAGCC.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCAGCC LB:LN_TATCAGCC PL:illumina PU:HiMom.1.TATCAGCC CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCCAGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCCAGG.sam
new file mode 100644
index 0000000..215a4b5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCCAGG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCCAGG LB:LN_TATCCAGG PL:illumina PU:HiMom.1.TATCCAGG CN:BI
+HiMom:1:1101:1071:2233 77 * 0 0 * * 0 0 TTTGACAGTCTCTGAATGAGAA.GG CCCFFFFFHHHHHJIIIJJJIJ#4A RG:Z:HiMom.1 QX:Z:<<<@@### RX:Z:GTTTGNNN
+HiMom:1:1101:1071:2233 141 * 0 0 * * 0 0 ................. ################# RG:Z:HiMom.1 XN:i:1 QX:Z:<<<@@### RX:Z:GTTTGNNN
+HiMom:1:1201:1140:2125 77 * 0 0 * * 0 0 .TTTCAGTTCAGAGAACTGCAGAAT #1=DBDFDHHHHGJIJJJJJIIIJI RG:Z:HiMom.1 QX:Z:CC at FFFFF RX:Z:TTCATAAA
+HiMom:1:1201:1140:2125 141 * 0 0 * * 0 0 TTGGTCTTAGATGTTGC HHHHFGIJIIIJIJIJJ RG:Z:HiMom.1 QX:Z:CC at FFFFF RX:Z:TTCATAAA
+HiMom:1:1201:1236:2187 77 * 0 0 * * 0 0 TTTAAATGGGTAAGAAGCCCGGCTC @BCDDFEFHHDHHJJJJJIJJIJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:CTCCTTAG
+HiMom:1:1201:1236:2187 141 * 0 0 * * 0 0 CGGATTCCGACTTCCAT HHHHGIJJIGIGIJJGG RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:CTCCTTAG
+HiMom:1:2101:1133:2239 77 * 0 0 * * 0 0 AGACAGAAGTACGGGAAGGCGAAGA @@@FFFFEHFHHHJJCGDHIIECD@ RG:Z:HiMom.1 QX:Z:?@?DDFFF RX:Z:AGCTTTTT
+HiMom:1:2101:1133:2239 141 * 0 0 * * 0 0 GTTTCCTAGCTTGTCTT HHHHF4ACFHIJHHHGH RG:Z:HiMom.1 QX:Z:?@?DDFFF RX:Z:AGCTTTTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCCATG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCCATG.sam
new file mode 100644
index 0000000..a5373e3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCCATG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCCATG LB:LN_TATCCATG PL:illumina PU:HiMom.1.TATCCATG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCTCGG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCTCGG.sam
new file mode 100644
index 0000000..04096c5
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCTCGG.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCTCGG LB:LN_TATCTCGG PL:illumina PU:HiMom.1.TATCTCGG CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCTGCC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCTGCC.sam
new file mode 100644
index 0000000..10f206d
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TATCTGCC.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TATCTGCC LB:LN_TATCTGCC PL:illumina PU:HiMom.1.TATCTGCC CN:BI
+HiMom:1:1101:1267:2209 77 * 0 0 * * 0 0 GAGACGGAGGCCAACGGGGGCCTGG @@CFFFFD8FDHFHIGIBG?@BCDG RG:Z:HiMom.1 QX:Z:=;?DDDD? RX:Z:GGCAGAGT
+HiMom:1:1101:1267:2209 141 * 0 0 * * 0 0 CTCCAACAGCCCCGTAC CCFHAIIIGGIIGE at EG RG:Z:HiMom.1 QX:Z:=;?DDDD? RX:Z:GGCAGAGT
+HiMom:1:1101:1353:2226 77 * 0 0 * * 0 0 TTGCTTGTCTGTAAAGTATTTTATT @C at DDFFDHHFHFHHIBGG>IHHII RG:Z:HiMom.1 QX:Z:BBBFFFFF RX:Z:GTGCTCTT
+HiMom:1:1101:1353:2226 141 * 0 0 * * 0 0 CCGATCTTCAGGTTACC HHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:BBBFFFFF RX:Z:GTGCTCTT
+HiMom:1:1101:1435:2194 77 * 0 0 * * 0 0 GAGAAAGAACATGACTACAGAGATG CCCFFFFFHHHHHJJJJJJJJJHJJ RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:TTTTGTTT
+HiMom:1:1101:1435:2194 141 * 0 0 * * 0 0 TCTTTTACTGAAGTGTA HHHHHJJJJIHIJHHHJ RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:TTTTGTTT
+HiMom:1:1201:1084:2204 77 * 0 0 * * 0 0 GGCCCGTGGACGCCGCCGAAGAAGC CCCFFFFFHHHHHJJJJJIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGGCTCCT
+HiMom:1:1201:1084:2204 141 * 0 0 * * 0 0 CAGGCTCTCATCAGTTG HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGGCTCCT
+HiMom:1:1201:1142:2242 77 * 0 0 * * 0 0 TGTTGATAGTCCTTCTTATCTTAGT ???DB?==CC2<AC:CC<CFEF<FF RG:Z:HiMom.1 QX:Z:?=?DDDD; RX:Z:GTAAAATG
+HiMom:1:1201:1142:2242 141 * 0 0 * * 0 0 TAAAATAATAAAAAATG AF<DF<FFFFIIIFF@< RG:Z:HiMom.1 QX:Z:?=?DDDD; RX:Z:GTAAAATG
+HiMom:1:1201:1187:2100 77 * 0 0 * * 0 0 .GCGGTAATTCCAGCTCCAATAGCG #1:BB2 at DHHFHHIIIIHHIIGHGG RG:Z:HiMom.1 QX:Z:=<=;AA## RX:Z:AAAAAAGA
+HiMom:1:1201:1187:2100 141 * 0 0 * * 0 0 GCCCGCATTGCCGAGAC 2<=;)<<):=@@##### RG:Z:HiMom.1 QX:Z:=<=;AA## RX:Z:AAAAAAGA
+HiMom:1:1201:1392:2109 77 * 0 0 * * 0 0 .CTGAAGAGGCCAAAGCGCCCTCCA #1=DDFFFHHHHHJJJJJJJJJJJI RG:Z:HiMom.1 QX:Z:BBCFFFFF RX:Z:GTCAGACA
+HiMom:1:1201:1392:2109 141 * 0 0 * * 0 0 GGGGGATTTGGGCTGTG HHHHHHJJJHIJIJJJJ RG:Z:HiMom.1 QX:Z:BBCFFFFF RX:Z:GTCAGACA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TCCGTCTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TCCGTCTA.sam
new file mode 100644
index 0000000..8f5dbc6
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TCCGTCTA.sam
@@ -0,0 +1,2 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCCGTCTA LB:LN_TCCGTCTA PL:illumina PU:HiMom.1.TCCGTCTA CN:BI
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TCGCTAGA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TCGCTAGA.sam
new file mode 100644
index 0000000..f4c706a
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TCGCTAGA.sam
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCGCTAGA LB:LN_TCGCTAGA PL:illumina PU:HiMom.1.TCGCTAGA CN:BI
+HiMom:1:1101:1143:2192 77 * 0 0 * * 0 0 GGAGCGAGTCTGGGTCTCAGCCCCG CCCFFFFFHHHHHJGHIIIHJJJJI RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CGACAAGT
+HiMom:1:1101:1143:2192 141 * 0 0 * * 0 0 CTGGCTTATCACTCATC HHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CGACAAGT
+HiMom:1:1101:1479:2221 77 * 0 0 * * 0 0 TGTAAAGTATGCTGGCTCAGTGTAT BBBFDFFEHHHHHJJJJJJJIJHJJ RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:GGGGAAAT
+HiMom:1:1101:1479:2221 141 * 0 0 * * 0 0 CTATTTTTATGTAAAAA HHHHHJIGIJJJJJJJJ RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:GGGGAAAT
+HiMom:1:1201:1312:2112 77 * 0 0 * * 0 0 .TCCCAGCGAACCCGCGTGCAACCT #1=DFFFFHHHHHJJJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATTTGCAG
+HiMom:1:1201:1312:2112 141 * 0 0 * * 0 0 GAGCCGGCGCAGGTGCA HHHHHJJJIJJJJGHIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATTTGCAG
+HiMom:1:1201:1416:2128 77 * 0 0 * * 0 0 .ACAGGCGTGGAGGAGGCGGCGGCC #4=DDDFFHHHHHJIGJHFHHFFED RG:Z:HiMom.1 QX:Z:@@@DDDDD RX:Z:TTGGTGTG
+HiMom:1:1201:1416:2128 141 * 0 0 * * 0 0 GAGGCGGTGGCGGGATC HHFHHII:?GGHIIB6? RG:Z:HiMom.1 QX:Z:@@@DDDDD RX:Z:TTGGTGTG
+HiMom:1:2101:1064:2242 77 * 0 0 * * 0 0 ATGAACAAAGGAAGAATTATGCACG ?;?D;DDDF?;:+<<CFFCHE433A RG:Z:HiMom.1 QX:Z:####(<?< RX:Z:NGGAAAAA
+HiMom:1:2101:1064:2242 141 * 0 0 * * 0 0 GGTTGTCAAGCGTTAAA ################# RG:Z:HiMom.1 QX:Z:####(<?< RX:Z:NGGAAAAA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TCTGCAAG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TCTGCAAG.sam
new file mode 100644
index 0000000..4b75b98
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TCTGCAAG.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TCTGCAAG LB:LN_TCTGCAAG PL:illumina PU:HiMom.1.TCTGCAAG CN:BI
+HiMom:1:1201:1042:2174 77 * 0 0 * * 0 0 .GTTGGTGTCTTCATTTTATGTATA #1=DDFDFHHHHHJIJJJHIJHIJJ RG:Z:HiMom.1 QX:Z:#0;@@@?@ RX:Z:NTCAGGAA
+HiMom:1:1201:1042:2174 141 * 0 0 * * 0 0 GGC..CAAAAAAAGAAA ?<@##3<@@?@@????? RG:Z:HiMom.1 QX:Z:#0;@@@?@ RX:Z:NTCAGGAA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGCAAGTA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGCAAGTA.sam
new file mode 100644
index 0000000..e5d43f9
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGCAAGTA.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGCAAGTA LB:LN_TGCAAGTA PL:illumina PU:HiMom.1.TGCAAGTA CN:BI
+HiMom:1:1101:1242:2170 77 * 0 0 * * 0 0 ATGGCAGGGCAGAGTTCTGATGAGT CCCFFFFFHHGGGIFHEIIGIIII? RG:Z:HiMom.1 QX:Z:@@@DFDFF RX:Z:GGAAGGAA
+HiMom:1:1101:1242:2170 141 * 0 0 * * 0 0 AAGAAGCACAAGTACAT HHHGHHGIIGJJEHHIG RG:Z:HiMom.1 QX:Z:@@@DFDFF RX:Z:GGAAGGAA
+HiMom:1:2101:1163:2222 77 * 0 0 * * 0 0 GAGCAGGCAAGGAGGACTTCTTGTT CCCFFFFFGHHHHJJHHIJJJJJIJ RG:Z:HiMom.1 QX:Z:@@@DFFFF RX:Z:GAGCGATA
+HiMom:1:2101:1163:2222 141 * 0 0 * * 0 0 ATGGTTCTTTTCCTCAC HHHHHJJJJJJJIJJJJ RG:Z:HiMom.1 QX:Z:@@@DFFFF RX:Z:GAGCGATA
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGCTGCTG.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGCTGCTG.sam
new file mode 100644
index 0000000..c6a0d35
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGCTGCTG.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGCTGCTG LB:LN_TGCTGCTG PL:illumina PU:HiMom.1.TGCTGCTG CN:BI
+HiMom:1:1101:1084:2136 77 * 0 0 * * 0 0 .TCTCACTGTGAATTTGTGGTGGGC #1=DDFFFHHHHHJJJJGIJIJJJJ RG:Z:HiMom.1 QX:Z:<<<@@### RX:Z:TTTCTNNN
+HiMom:1:1101:1084:2136 141 * 0 0 * * 0 0 ................. ################# RG:Z:HiMom.1 XN:i:1 QX:Z:<<<@@### RX:Z:TTTCTNNN
+HiMom:1:1201:1285:2100 77 * 0 0 * * 0 0 .AATGACATGTTTAAAGATGGACTC #1:BDDFFHHFHHGIJIJIIIIGII RG:Z:HiMom.1 QX:Z:@@@DFFFF RX:Z:GATCTTTT
+HiMom:1:1201:1285:2100 141 * 0 0 * * 0 0 TTGCTTTGTAGTTATAG HHHHHIIGIABCFFHBF RG:Z:HiMom.1 QX:Z:@@@DFFFF RX:Z:GATCTTTT
+HiMom:1:2101:1162:2139 77 * 0 0 * * 0 0 AGAGGTGAAATTCTTGGACCGGCGC @@@DDDDDHFHHHDB:EFHHCAG?D RG:Z:HiMom.1 QX:Z:BCCFFFFF RX:Z:ATCGTTTA
+HiMom:1:2101:1162:2139 141 * 0 0 * * 0 0 TGGTCGGAACTACGACG HHHHHIJJJJJJJIJJI RG:Z:HiMom.1 QX:Z:BCCFFFFF RX:Z:ATCGTTTA
+HiMom:1:2101:1195:2150 77 * 0 0 * * 0 0 CCGAGAGAGTGAGAGCGCTCCTGGG CCCFFFFFHFHHHJJJJIJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AATTGAAC
+HiMom:1:2101:1195:2150 141 * 0 0 * * 0 0 TTCACCACCCAGAGGAA HHHHHJJJJJJIJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AATTGAAC
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGTAACTC.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGTAACTC.sam
new file mode 100644
index 0000000..9fb57b3
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGTAACTC.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGTAACTC LB:LN_TGTAACTC PL:illumina PU:HiMom.1.TGTAACTC CN:BI
+HiMom:1:1201:1421:2154 77 * 0 0 * * 0 0 TGTGTGTGTGGGTGTGTGTATATAT ?@?DDFFFFFHH at GEFCCCHGIGJI RG:Z:HiMom.1 QX:Z:BC at DFFFF RX:Z:TGTGCTCT
+HiMom:1:1201:1421:2154 141 * 0 0 * * 0 0 TCCGATCTTGTGCTCTT HHHHHJJJJFHIHHIJJ RG:Z:HiMom.1 QX:Z:BC at DFFFF RX:Z:TGTGCTCT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGTAATCA.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGTAATCA.sam
new file mode 100644
index 0000000..cf56622
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TGTAATCA.sam
@@ -0,0 +1,8 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TGTAATCA LB:LN_TGTAATCA PL:illumina PU:HiMom.1.TGTAATCA CN:BI
+HiMom:1:1101:1419:2119 77 * 0 0 * * 0 0 .ATGACTATGGTAACTGAAAGAAAA #1:A1BDADBFFDFIIIEEHECACF RG:Z:HiMom.1 QX:Z:######## RX:Z:ACTTTCCT
+HiMom:1:1101:1419:2119 141 * 0 0 * * 0 0 TTTTTGTTTTACTTTAA ################# RG:Z:HiMom.1 QX:Z:######## RX:Z:ACTTTCCT
+HiMom:1:1201:1208:2132 77 * 0 0 * * 0 0 .CCTCAATGAGCGGCACTATGGGGG #1=DDFFFHHHHGJJIJJGHIJGIJ RG:Z:HiMom.1 QX:Z:@@CDFFFF RX:Z:CTGTAGAA
+HiMom:1:1201:1208:2132 141 * 0 0 * * 0 0 AGGATGGTCGGGCTCCA GHFHHJIJJGJIBHJJG RG:Z:HiMom.1 QX:Z:@@CDFFFF RX:Z:CTGTAGAA
+HiMom:1:1201:1344:2147 77 * 0 0 * * 0 0 TATCCTCCCTACTATGCCTAGAAGG =?@DADEFHBHDFG>EFGDHGFGHD RG:Z:HiMom.1 QX:Z:@<??DDDD RX:Z:ACGATTAG
+HiMom:1:1201:1344:2147 141 * 0 0 * * 0 0 TTTTAGCATTGGAGTAG FHHHFGGHHIIIGGAGH RG:Z:HiMom.1 QX:Z:@<??DDDD RX:Z:ACGATTAG
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TTGTCTAT.sam b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TTGTCTAT.sam
new file mode 100644
index 0000000..7d607b0
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/TTGTCTAT.sam
@@ -0,0 +1,10 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_TTGTCTAT LB:LN_TTGTCTAT PL:illumina PU:HiMom.1.TTGTCTAT CN:BI
+HiMom:1:1101:1219:2164 77 * 0 0 * * 0 0 TCAAGCAGGAGCAGCTAAGTCCTAA CCCFFFFFHHHHHJJJJJJHIJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATCTTATC
+HiMom:1:1101:1219:2164 141 * 0 0 * * 0 0 CACTCCTTCCACTTTGG HHHHHJJIJJJJJJJIJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATCTTATC
+HiMom:1:1201:1103:2184 77 * 0 0 * * 0 0 GTAAGAACTACCCTGGGTCCCCGTG @@BFFFFFHHHHHJJJJGIJJJJHI RG:Z:HiMom.1 QX:Z:B at BFFDEF RX:Z:AGAAGTTT
+HiMom:1:1201:1103:2184 141 * 0 0 * * 0 0 CAGAATTGTGGCCCCAT HHHHHJJJGHIJJJJJI RG:Z:HiMom.1 QX:Z:B at BFFDEF RX:Z:AGAAGTTT
+HiMom:1:1201:1107:2109 77 * 0 0 * * 0 0 .GGGAACCTGGCGCTAAACCATTCG #1=DFFFFHHHHHJJJJJJJJJIJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAAACCC
+HiMom:1:1201:1107:2109 141 * 0 0 * * 0 0 TTGTGTCGAGGGCTGAC HHGHHJJJJIIJJJJJJ RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAAACCC
+HiMom:1:1201:1252:2141 77 * 0 0 * * 0 0 .TTCCCCCCATGTAATTATTGTGAA #1=DDFFFHHHHHJJJJJJJJIJJJ RG:Z:HiMom.1 QX:Z:BCBFFFFF RX:Z:AGTTATTT
+HiMom:1:1201:1252:2141 141 * 0 0 * * 0 0 TGCCTATGTCCAACAAG GHHHHJIJJJJJJJJJJ RG:Z:HiMom.1 QX:Z:BCBFFFFF RX:Z:AGTTATTT
diff --git a/testdata/picard/illumina/25T8B25T/sams_with_4M4M/barcode.params b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/barcode.params
new file mode 100644
index 0000000..21aa277
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams_with_4M4M/barcode.params
@@ -0,0 +1,63 @@
+BARCODE SAMPLE_ALIAS LIBRARY_NAME OUTPUT
+AAAAAAAA SA_AAAAAAAA LN_AAAAAAAA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/AAAAAAAA.sam
+AAAAGAAG SA_AAAAGAAG LN_AAAAGAAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/AAAAGAAG.sam
+AACAATGG SA_AACAATGG LN_AACAATGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/AACAATGG.sam
+AACGCATT SA_AACGCATT LN_AACGCATT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/AACGCATT.sam
+ACAAAATT SA_ACAAAATT LN_ACAAAATT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/ACAAAATT.sam
+ACAGGTAT SA_ACAGGTAT LN_ACAGGTAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/ACAGGTAT.sam
+ACAGTTGA SA_ACAGTTGA LN_ACAGTTGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/ACAGTTGA.sam
+ACCAGTTG SA_ACCAGTTG LN_ACCAGTTG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/ACCAGTTG.sam
+ACGAAATC SA_ACGAAATC LN_ACGAAATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/ACGAAATC.sam
+ACTAAGAC SA_ACTAAGAC LN_ACTAAGAC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/ACTAAGAC.sam
+ACTGTACC SA_ACTGTACC LN_ACTGTACC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/ACTGTACC.sam
+ACTGTATC SA_ACTGTATC LN_ACTGTATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/ACTGTATC.sam
+AGAAAAGA SA_AGAAAAGA LN_AGAAAAGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/AGAAAAGA.sam
+AGCATGGA SA_AGCATGGA LN_AGCATGGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/AGCATGGA.sam
+AGGTAAGG SA_AGGTAAGG LN_AGGTAAGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/AGGTAAGG.sam
+AGGTCGCA SA_AGGTCGCA LN_AGGTCGCA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/AGGTCGCA.sam
+ATTATCAA SA_ATTATCAA LN_ATTATCAA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/ATTATCAA.sam
+ATTCCTCT SA_ATTCCTCT LN_ATTCCTCT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/ATTCCTCT.sam
+CAACTCTC SA_CAACTCTC LN_CAACTCTC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CAACTCTC.sam
+CAATAGAC SA_CAATAGAC LN_CAATAGAC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CAATAGAC.sam
+CAATAGTC SA_CAATAGTC LN_CAATAGTC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CAATAGTC.sam
+CAGCGGAT SA_CAGCGGAT LN_CAGCGGAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CAGCGGAT.sam
+CAGCGGTA SA_CAGCGGTA LN_CAGCGGTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CAGCGGTA.sam
+CCAACATT SA_CCAACATT LN_CCAACATT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CCAACATT.sam
+CCAGCACC SA_CCAGCACC LN_CCAGCACC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CCAGCACC.sam
+CCATGCGT SA_CCATGCGT LN_CCATGCGT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CCATGCGT.sam
+CGCCTTCC SA_CGCCTTCC LN_CGCCTTCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CGCCTTCC.sam
+CGCTATGT SA_CGCTATGT LN_CGCTATGT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CGCTATGT.sam
+CTAACTCG SA_CTAACTCG LN_CTAACTCG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CTAACTCG.sam
+CTATGCGC SA_CTATGCGC LN_CTATGCGC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CTATGCGC.sam
+CTATGCGT SA_CTATGCGT LN_CTATGCGT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CTATGCGT.sam
+CTGCGGAT SA_CTGCGGAT LN_CTGCGGAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CTGCGGAT.sam
+CTGTAATC SA_CTGTAATC LN_CTGTAATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/CTGTAATC.sam
+GAAAAAAA SA_GAAAAAAA LN_GAAAAAAA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GAAAAAAA.sam
+GAACGAT. SA_GAACGAT. LN_GAACGAT. /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GAACGAT..sam
+GAAGGAAG SA_GAAGGAAG LN_GAAGGAAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GAAGGAAG.sam
+GACCAGGA SA_GACCAGGA LN_GACCAGGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GACCAGGA.sam
+GACCAGGC SA_GACCAGGC LN_GACCAGGC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GACCAGGC.sam
+GACCGTTG SA_GACCGTTG LN_GACCGTTG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GACCGTTG.sam
+GACCTAAC SA_GACCTAAC LN_GACCTAAC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GACCTAAC.sam
+GATATCCA SA_GATATCCA LN_GATATCCA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GATATCCA.sam
+GCCGTCGA SA_GCCGTCGA LN_GCCGTCGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GCCGTCGA.sam
+GCCTAGCC SA_GCCTAGCC LN_GCCTAGCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GCCTAGCC.sam
+GTAACATC SA_GTAACATC LN_GTAACATC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GTAACATC.sam
+GTCCACAG SA_GTCCACAG LN_GTCCACAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/GTCCACAG.sam
+TAAGCACA SA_TAAGCACA LN_TAAGCACA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TAAGCACA.sam
+TACCGTCT SA_TACCGTCT LN_TACCGTCT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TACCGTCT.sam
+TAGCGGTA SA_TAGCGGTA LN_TAGCGGTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TAGCGGTA.sam
+TATCAGCC SA_TATCAGCC LN_TATCAGCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TATCAGCC.sam
+TATCCAGG SA_TATCCAGG LN_TATCCAGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TATCCAGG.sam
+TATCCATG SA_TATCCATG LN_TATCCATG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TATCCATG.sam
+TATCTCGG SA_TATCTCGG LN_TATCTCGG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TATCTCGG.sam
+TATCTGCC SA_TATCTGCC LN_TATCTGCC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TATCTGCC.sam
+TCCGTCTA SA_TCCGTCTA LN_TCCGTCTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TCCGTCTA.sam
+TCGCTAGA SA_TCGCTAGA LN_TCGCTAGA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TCGCTAGA.sam
+TCTGCAAG SA_TCTGCAAG LN_TCTGCAAG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TCTGCAAG.sam
+TGCAAGTA SA_TGCAAGTA LN_TGCAAGTA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TGCAAGTA.sam
+TGCTGCTG SA_TGCTGCTG LN_TGCTGCTG /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TGCTGCTG.sam
+TGTAACTC SA_TGTAACTC LN_TGTAACTC /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TGTAACTC.sam
+TGTAATCA SA_TGTAATCA LN_TGTAATCA /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TGTAATCA.sam
+TTGTCTAT SA_TTGTCTAT LN_TTGTCTAT /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/TTGTCTAT.sam
+N SA_N LN_N /var/folders/tc/hy9lszxd1dg9cf4bky51mrrd9k3s6g/T/multiplexedBarcode2.8715677092925684198.dir/N.sam
diff --git a/testdata/picard/illumina/25T8B8B25T/sams/N_with_molecular_index.sam b/testdata/picard/illumina/25T8B8B25T/sams/N_with_molecular_index.sam
new file mode 100644
index 0000000..ec805fb
--- /dev/null
+++ b/testdata/picard/illumina/25T8B8B25T/sams/N_with_molecular_index.sam
@@ -0,0 +1,40 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:SA_N LB:LN_N PL:illumina PU:HiMom.1.N-N CN:BI
+HiMom:1:1101:13743:1511 589 * 0 0 * * 0 0 TCCTTCTTTTCCCTCCCCTTTCC >11>111@@@3111A111ABA11 BC:Z:ATTATCAA-CACCATAC RG:Z:HiMom.1 QX:Z:3311 RX:Z:TTCC
+HiMom:1:1101:13743:1511 653 * 0 0 * * 0 0 CCCTTCCCTCTTTTTTCCTTCCC 11111111 at 11131100133100 BC:Z:ATTATCAA-CACCATAC RG:Z:HiMom.1 QX:Z:3311 RX:Z:TTCC
+HiMom:1:1101:13844:1536 589 * 0 0 * * 0 0 TTTCTTTTTTCTTTTCCCTTTAT 1111>3311111331311B1113 BC:Z:TCCCCGTT-CCAACTTC RG:Z:HiMom.1 QX:Z:3311 RX:Z:TTTC
+HiMom:1:1101:13844:1536 653 * 0 0 * * 0 0 TTCTTTTTTTTTTTTCTTTTCCT 1113131111A############ BC:Z:TCCCCGTT-CCAACTTC RG:Z:HiMom.1 QX:Z:3311 RX:Z:TTTC
+HiMom:1:1101:14281:1532 589 * 0 0 * * 0 0 CTTTTTTCCTTTTTTTCTTTTTT 1>>11111133 at 1A00013311A BC:Z:CCTTCTTT-TATCATTT RG:Z:HiMom.1 QX:Z:01>1 RX:Z:CTTT
+HiMom:1:1101:14281:1532 653 * 0 0 * * 0 0 TCTCTTTTTTTTTCTCCCTTCCT 11133331111000121001001 BC:Z:CCTTCTTT-TATCATTT RG:Z:HiMom.1 QX:Z:01>1 RX:Z:CTTT
+HiMom:1:1101:14340:1451 589 * 0 0 * * 0 0 TTTTTTCCTTCTCCCCTTTTTCC >1111111331111111111101 BC:Z:TTCCTCCT-CACCATCC RG:Z:HiMom.1 QX:Z:3A11 RX:Z:TTTC
+HiMom:1:1101:14340:1451 653 * 0 0 * * 0 0 CCTTCCTCCCCTCCTTCTTTTTC 11111111111111111113110 BC:Z:TTCCTCCT-CACCATCC RG:Z:HiMom.1 QX:Z:3A11 RX:Z:TTTC
+HiMom:1:1101:14809:1476 589 * 0 0 * * 0 0 TTTTTCCCTTTCTCTTTTCTTTT >1>>111111113333313AAA1 BC:Z:GCTATCCA-CCTCCCCC RG:Z:HiMom.1 QX:Z:31>1 RX:Z:CTTC
+HiMom:1:1101:14809:1476 653 * 0 0 * * 0 0 TCTTTTTCTTTTTTCTCCTTTCT 11133111133111113111313 BC:Z:GCTATCCA-CCTCCCCC RG:Z:HiMom.1 QX:Z:31>1 RX:Z:CTTC
+HiMom:1:1101:14852:1366 589 * 0 0 * * 0 0 TTCTCTCCCTCTTTTTTTCTTCT 111>1311111113A1AA01333 BC:Z:AGGTCGCA-TTCCTTTC RG:Z:HiMom.1 QX:Z:1311 RX:Z:TTTC
+HiMom:1:1101:14852:1366 653 * 0 0 * * 0 0 CTTTTTCTTTTCTTTTCTTTTCC 11131111331313313133131 BC:Z:AGGTCGCA-TTCCTTTC RG:Z:HiMom.1 QX:Z:1311 RX:Z:TTTC
+HiMom:1:1101:14927:1516 589 * 0 0 * * 0 0 CTCCTTTTCTCTTTTCCCCCTGT 11111 at 31313333131111111 BC:Z:GTCTGATG-TCACTTCC RG:Z:HiMom.1 QX:Z:1111 RX:Z:GCTT
+HiMom:1:1101:14927:1516 653 * 0 0 * * 0 0 CCTCCCCCTCCTCCCCCTCCCTC 111111>1111111100000A00 BC:Z:GTCTGATG-TCACTTCC RG:Z:HiMom.1 QX:Z:1111 RX:Z:GCTT
+HiMom:1:1101:14943:1353 589 * 0 0 * * 0 0 TTTTTTTTTCTTTTTTTTTTTTT 1>1>111100 at 33A100//A//A BC:Z:CTCCCTCT-TCCTTCTT RG:Z:HiMom.1 QX:Z:##11 RX:Z:TTCT
+HiMom:1:1101:14943:1353 653 * 0 0 * * 0 0 CCCTTTTTTTTTTCTTCCTCTCT 11111111110000112111101 BC:Z:CTCCCTCT-TCCTTCTT RG:Z:HiMom.1 QX:Z:##11 RX:Z:TTCT
+HiMom:1:1101:15334:1365 589 * 0 0 * * 0 0 TTTTTTTCTTCTCTTTCTTTTCT 111>111113331333333B131 BC:Z:TGTATCTC-TTACTCTT RG:Z:HiMom.1 QX:Z:3311 RX:Z:TTTC
+HiMom:1:1101:15334:1365 653 * 0 0 * * 0 0 TCTCTTTCTTCTCTTTTTTTTTT 11111333333313331110000 BC:Z:TGTATCTC-TTACTCTT RG:Z:HiMom.1 QX:Z:3311 RX:Z:TTTC
+HiMom:1:1101:15424:1475 589 * 0 0 * * 0 0 CTTTTTTCTTTTTCTTTTTTTTT 1>>11 at 11133111133110000 BC:Z:CCTCTTCT-CTCGCTTT RG:Z:HiMom.1 QX:Z:0111 RX:Z:CTTT
+HiMom:1:1101:15424:1475 653 * 0 0 * * 0 0 CCTTCTTTCCTTCTCTCTTTCTT 11131113313311131133333 BC:Z:CCTCTTCT-CTCGCTTT RG:Z:HiMom.1 QX:Z:0111 RX:Z:CTTT
+HiMom:1:1101:15736:1478 589 * 0 0 * * 0 0 CCTCTCTTTTTTCCTTTCCCTGC 1111111 at 3B1111A33111A10 BC:Z:ATTGTCTG-GATTCACA RG:Z:HiMom.1 QX:Z:0111 RX:Z:CTCT
+HiMom:1:1101:15736:1478 653 * 0 0 * * 0 0 CTTCCCTTTCCCCCTCTTCTTCT 1>111111111111A11113A33 BC:Z:ATTGTCTG-GATTCACA RG:Z:HiMom.1 QX:Z:0111 RX:Z:CTCT
+HiMom:1:1101:15785:1528 589 * 0 0 * * 0 0 CTTTTTTTTTTCTCTCTTTTTTT 11>111>100001221B###### BC:Z:CCCGATTT-TCTCTCCC RG:Z:HiMom.1 QX:Z:##11 RX:Z:TTTC
+HiMom:1:1101:15785:1528 653 * 0 0 * * 0 0 TTCTCTTTCTCTTTTTCTTTTCT 11131 at 3@3333331A1133131 BC:Z:CCCGATTT-TCTCTCCC RG:Z:HiMom.1 QX:Z:##11 RX:Z:TTTC
+HiMom:1:1101:15865:1540 589 * 0 0 * * 0 0 GCTTTCCTTGTTTTCTGTCCCCT 11111113311BB13A33A111B BC:Z:TTCCCCAG-TGCTTTTA RG:Z:HiMom.1 QX:Z:1011 RX:Z:TCCT
+HiMom:1:1101:15865:1540 653 * 0 0 * * 0 0 TTTTTATTTTCCTTTTGCATCCT 111111333 at 31333B1133301 BC:Z:TTCCCCAG-TGCTTTTA RG:Z:HiMom.1 QX:Z:1011 RX:Z:TCCT
+HiMom:1:1101:15987:1510 589 * 0 0 * * 0 0 CTTTTTTTTTTTCCCTTTTTACC ####################### BC:Z:CCTCTTTC-TTCTGCTC RG:Z:HiMom.1 QX:Z:1111 RX:Z:TTTT
+HiMom:1:1101:15987:1510 653 * 0 0 * * 0 0 CCCCTTCCCTTCTCTTTTCTTCT 1111>1111 at 1111333131333 BC:Z:CCTCTTTC-TTCTGCTC RG:Z:HiMom.1 QX:Z:1111 RX:Z:TTTT
+HiMom:1:1101:15998:1432 589 * 0 0 * * 0 0 TTCTTCCTCTTCTTCTTCTTCCT 111>131111113133133A311 BC:Z:CATGTTCG-CGCCTTTT RG:Z:HiMom.1 QX:Z:3311 RX:Z:TTCT
+HiMom:1:1101:15998:1432 653 * 0 0 * * 0 0 TTTTTTCTTTTCTCCTCCTTTTT 111 at 111@3313A31A000131A BC:Z:CATGTTCG-CGCCTTTT RG:Z:HiMom.1 QX:Z:3311 RX:Z:TTCT
+HiMom:1:1101:16070:1386 589 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTCCTTCCT ####################### BC:Z:TGTCGGTT-TTTTGCTT RG:Z:HiMom.1 QX:Z:A211 RX:Z:TTTT
+HiMom:1:1101:16070:1386 653 * 0 0 * * 0 0 TTTTTTCTCTCTTTTTTTTTTTT 111111013311331A####### BC:Z:TGTCGGTT-TTTTGCTT RG:Z:HiMom.1 QX:Z:A211 RX:Z:TTTT
+HiMom:1:1101:16085:1467 589 * 0 0 * * 0 0 TTTTTTCCTTTTCTTTTCCTCTT 1>>11>113B31313A1313111 BC:Z:ACCTACTG-CTTCCCTT RG:Z:HiMom.1 QX:Z:1A11 RX:Z:CTTT
+HiMom:1:1101:16085:1467 653 * 0 0 * * 0 0 TTTTTTTTTTCCTCTTTTTTTCC ####################### BC:Z:ACCTACTG-CTTCCCTT RG:Z:HiMom.1 QX:Z:1A11 RX:Z:CTTT
+HiMom:1:1101:16098:1529 589 * 0 0 * * 0 0 TTTTTTTTTTCTCTCCCTTTCTC 1>1>>111000122100011122 BC:Z:TTCTCATC-TTCCTTCC RG:Z:HiMom.1 QX:Z:2211 RX:Z:TTCC
+HiMom:1:1101:16098:1529 653 * 0 0 * * 0 0 CTCTCCACTTTTCTTTCCTCCCT 111>1111111331333131110 BC:Z:TTCTCATC-TTCCTTCC RG:Z:HiMom.1 QX:Z:2211 RX:Z:TTCC
+HiMom:1:1101:16530:1425 589 * 0 0 * * 0 0 TTTTTTTTTTTTTTCTTTTTTTT 1111>>110000A########## BC:Z:CCTATTCT-TCGTTTTC RG:Z:HiMom.1 QX:Z:##11 RX:Z:TTTT
+HiMom:1:1101:16530:1425 653 * 0 0 * * 0 0 CTTTCCCTTTTCTTTTCCTTCTC 11133111111313313133111 BC:Z:CCTATTCT-TCGTTTTC RG:Z:HiMom.1 QX:Z:##11 RX:Z:TTTT
diff --git a/testdata/picard/illumina/mark_illumina_adapters_test.sam b/testdata/picard/illumina/mark_illumina_adapters_test.sam
new file mode 100755
index 0000000..efe6b01
--- /dev/null
+++ b/testdata/picard/illumina/mark_illumina_adapters_test.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:62A79.3 PL:illumina PU:62A79AAXX100907.3 LB:Solexa-41734 DT:2010-09-07T00:00:00-0400 SM:NA12878 CN:BI
+62A79AAXX100907:3:113:16808:6267 77 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT EE?E?EEECECEEE>>7 at B;+5+8;4(74<??,?################################################################### RG:Z:62A79.3
+62A79AAXX100907:3:113:16808:6267 141 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA GGEGGHFGHHHHHHHHHDHHGHEGHHHBHGGG28G:@########################################################### RG:Z:62A79.3
diff --git a/testdata/picard/reference/test.dict b/testdata/picard/reference/test.dict
new file mode 100644
index 0000000..d900d1c
--- /dev/null
+++ b/testdata/picard/reference/test.dict
@@ -0,0 +1,9 @@
+ at HD VN:1.5 SO:unsorted
+ at SQ SN:chr1 LN:101 M5:bd01f7e11515bb6beda8f7257902aa67 UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr2 LN:101 M5:31c33e2155b3de5e2554b693c475b310 UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr3 LN:101 M5:631593c6dd2048ae88dcce2bd505d295 UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr4 LN:101 M5:c60cb92f1ee5b78053c92bdbfa19abf1 UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr5 LN:101 M5:07ebc213c7611db0eacbb1590c3e9bda UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr6 LN:101 M5:7be2f5e7ee39e60a6c3b5b6a41178c6d UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr7 LN:202 M5:93763aaf6a455871c7d7a7718bff9ccf UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr8 LN:202 M5:d339678efce576d5546e88b49a487b63 UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
diff --git a/testdata/picard/reference/test.fasta b/testdata/picard/reference/test.fasta
new file mode 100644
index 0000000..52779f5
--- /dev/null
+++ b/testdata/picard/reference/test.fasta
@@ -0,0 +1,36 @@
+>chr1
+TTCATGCTGAAGCCCTCTTACGATCGTACAGATGCAAATATTAACAAACC
+TTTAAGGGCAAAAAAAAAACAATACAATAATAGAGTACGTTAACACTCCA
+A
+>chr2
+CATCTCTACAAGCGCGTCCTACCAGACGCGCTTCCGATCTGAGAGCATAC
+TTTTCATTGGATTCCAGCACAACTCCATTTTTGATCCACTTGACACCTTT
+T
+>chr3
+CGTATGCGCTTTTTATGTCGCCCACAGTGCCTAGTATAGCCCCTGCTAAT
+AAAAAGAGATGAATACGTTTACTTAAAAAACTGAAACTAGGAATGTGCAA
+A
+>chr4
+CGTGATACCAACTCATGTTCACAGCCAAAGCCTGAAGCTGTCTATTATAT
+TTCTCAACCATAAACTTTTGCCTCAGGCATCCGCAGAATGGTTTGCAGCC
+C
+>chr5
+NTCTCATTTAAAAATGGTTATAAAAACATTTATGCTGAAAAGGTGAAGTT
+CATTAATGAACAGGCTGACTGTCTCACTATCGCGTTCGCAAGACGTTATC
+T
+>chr6
+NAATTGTTCTTAGTTTCTCGGTTTATGTGCTCTTCCAGGTGGGTAACACA
+ATAATGGCCTTCCAGATCGTAAGAGCGACGTGTGTTGCACCAGTGTCGAT
+C
+>chr7
+CAACAGAAGGGGGGATCTGTGTTTGTGTTTCGGATTTCCTGCTGAAAAGG
+TTTTCGGGTCCCCCCCCCATCCCGATTTCCTTCCGCAGCTTACCTCCCGA
+AACGCGGCATCCCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCA
+GCGCGTCGTGTAGGTCACTATGGTACATCTTGTCGTGCGGCCAGAGCATA
+CA
+>chr8
+CACATCGTGAATCTTACAATCTGCGGTTTCAGATGTGGAGCGATGTGTGA
+GAGATTGAGCAACTGATCTGAAAAGCAGACACAGCTATTCCTAAGATGAC
+CCCAGGTTCAAATGTGCAGCCCCTTTTGAGAGATTTTTTTTTTGGGCTGG
+AAAAAAGACACAGCTATTCCTAAGATGACAAGATCAGAAAAAAAGTCAAG
+CA
diff --git a/testdata/picard/reference/test.fasta.fai b/testdata/picard/reference/test.fasta.fai
new file mode 100644
index 0000000..b859cc8
--- /dev/null
+++ b/testdata/picard/reference/test.fasta.fai
@@ -0,0 +1,8 @@
+chr1 101 6 50 51
+chr2 101 116 50 51
+chr3 101 226 50 51
+chr4 101 336 50 51
+chr5 101 446 50 51
+chr6 101 556 50 51
+chr7 202 666 50 51
+chr8 202 879 50 51
diff --git a/testdata/picard/reference/test.intervals b/testdata/picard/reference/test.intervals
new file mode 100644
index 0000000..f45c196
--- /dev/null
+++ b/testdata/picard/reference/test.intervals
@@ -0,0 +1,12 @@
+ at HD VN:1.5 SO:unsorted
+ at SQ SN:chr1 LN:101 M5:bd01f7e11515bb6beda8f7257902aa67 UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr2 LN:101 M5:31c33e2155b3de5e2554b693c475b310 UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr3 LN:101 M5:631593c6dd2048ae88dcce2bd505d295 UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr4 LN:101 M5:c60cb92f1ee5b78053c92bdbfa19abf1 UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr5 LN:101 M5:07ebc213c7611db0eacbb1590c3e9bda UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr6 LN:101 M5:7be2f5e7ee39e60a6c3b5b6a41178c6d UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr7 LN:202 M5:93763aaf6a455871c7d7a7718bff9ccf UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+ at SQ SN:chr8 LN:202 M5:d339678efce576d5546e88b49a487b63 UR:file:/Users/ebanks/picard/testdata/picard/sam/summary_alignment_stats_test.fasta
+chr2 10 30 + .
+chr4 60 70 + .
+chr5 20 40 + .
diff --git a/testdata/picard/sam/CollectGcBiasMetrics/MSmallHeader.dict b/testdata/picard/sam/CollectGcBiasMetrics/MSmallHeader.dict
new file mode 100644
index 0000000..eea12d3
--- /dev/null
+++ b/testdata/picard/sam/CollectGcBiasMetrics/MSmallHeader.dict
@@ -0,0 +1,2 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chrM LN:16571 UR:chrM.reference.fasta
\ No newline at end of file
diff --git a/testdata/picard/sam/EstimateLibraryComplexity/dupes.sam b/testdata/picard/sam/EstimateLibraryComplexity/dupes.sam
new file mode 100644
index 0000000..4b30da6
--- /dev/null
+++ b/testdata/picard/sam/EstimateLibraryComplexity/dupes.sam
@@ -0,0 +1,16 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:101
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:1AAXX.1 SM:Hi,Mom! LB:mylib PL:ILLUMINA
+ at PG ID:MarkDuplicates PN:MarkDuplicates VN:1 CL:MarkDuplicates merge1.sam PP:bwa
+ at PG ID:bwa PN:bwa VN:1 CL:bwa aln
+C4A4WACXX140821:8:1112:2344:1984 83 chr7 1 255 101M = 302 201 CAACAGAAGCAGGAATCTGTGTTTGTGTTTCGGATTTCCTGCTGAAAAGATTATCGAATCAAAAAAAAATCCCGATTTCATTCCGCAGCTAACCTCCCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
+C4A4WACXX140821:8:1112:2344:1985 83 chr7 1 255 101M = 302 201 CAACAGAAGCAGGAATCTGTGTTTGTGTTTCGGAAAAAAAGCTGAAAAGATTATCGAATCAAAAAAAAATCCCGATTTCATTCCGCAGCTAACCTCCCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
+C4A4WACXX140821:8:1112:2344:1984 163 chr7 302 255 101M = 1 -201 ACGCGGCATCACGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCAAGAGCATACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
+C4A4WACXX140821:8:1112:2344:1985 163 chr7 302 255 101M = 1 -201 ACGCGGCATCACGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCAAGAGCATACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
diff --git a/testdata/picard/sam/FixMateInformation/missingMate.sam b/testdata/picard/sam/FixMateInformation/missingMate.sam
new file mode 100644
index 0000000..f5e9700
--- /dev/null
+++ b/testdata/picard/sam/FixMateInformation/missingMate.sam
@@ -0,0 +1,5 @@
+ at HD VN:1.5 SO:coordinate
+ at SQ SN:xx LN:100
+ at RG ID:grp1 PL:ILLUMINA LB:Library 1 DS:Group 1 SM:Sample
+a1 2179 xx 1 1 10M = 41 50 AAAAAAAAAA ********** MC:Z:10M RG:Z:grp1 NM:i:0 MQ:i:1
+a1 147 xx 41 1 10M = 1 -50 TTTTTTTTTT ********** MC:Z:10M RG:Z:grp1 NM:i:0 MQ:i:1
diff --git a/testdata/picard/sam/namesorted.test.sam b/testdata/picard/sam/namesorted.test.sam
index 645f140..d3c4209 100644
--- a/testdata/picard/sam/namesorted.test.sam
+++ b/testdata/picard/sam/namesorted.test.sam
@@ -25,9 +25,9 @@
@SQ SN:Y LN:59373566
@SQ SN:MT LN:16569
@RG ID:20FUK.1 PL:illumina PU:20FUKAAXX100202.1 LB:Solexa-18483 SM:NA12878 CN:BI
-readpair1 99 1 1811000 29 58M43S = 1811263 118 CGTGGTGGCAGGCACCTGTAGTCCCAGCTATTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGCGGACCTTGCAGTGAGCCAAGATCGACCCA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA############################################ X0:i:4 X1:i:200 XC:i:58 MD:Z:58 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:0 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-readpair1 147 1 1811030 37 12S89M = 1811004 -118 GATCGTCACATCGTGTCTGCTCTCAGCCTGATGCAATACACAGTTTTAGCTGAAGTCTAAGAGGAAAATTCAGTCTCACATAGATATGCAGCTGGAAAAAG #############AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA X0:i:1 X1:i:0 XC:i:89 MD:Z:89 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:37 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@>YV[Z`X[aXQWXRBGUTX^V]^^VNXRXXSVVUHWQWVUZZX]WHVXUXRUUUV[VJ\XRPXZXTQWWWTVTT_X^_]XOXWWZZY[Z
-readpair2 99 1 1811000 29 58M43S3H = 1811263 118 CGTGGTGGCAGGCACCTGTAGTCCCAGCTATTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGCGGACCTTGCAGTGAGCCAAGATCGACCCA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA############################################ X0:i:4 X1:i:200 XC:i:58 MD:Z:58 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:0 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-readpair2 147 1 1811030 37 4H12S89M = 1811004 -118 GATCGTCACATCGTGTCTGCTCTCAGCCTGATGCAATACACAGTTTTAGCTGAAGTCTAAGAGGAAAATTCAGTCTCACATAGATATGCAGCTGGAAAAAG #############AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA X0:i:1 X1:i:0 XC:i:89 MD:Z:89 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:37 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@>YV[Z`X[aXQWXRBGUTX^V]^^VNXRXXSVVUHWQWVUZZX]WHVXUXRUUUV[VJ\XRPXZXTQWWWTVTT_X^_]XOXWWZZY[Z
-readpair2Dup 1123 1 1811000 29 58M43S3H = 1811263 118 CGTGGTGGCAGGCACCTGTAGTCCCAGCTATTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGCGGACCTTGCAGTGAGCCAAGATCGACCCA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA############################################ X0:i:4 X1:i:200 XC:i:58 MD:Z:58 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:0 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-readpair2Dup 1171 1 1811030 37 4H12S89M = 1811004 -118 GATCGTCACATCGTGTCTGCTCTCAGCCTGATGCAATACACAGTTTTAGCTGAAGTCTAAGAGGAAAATTCAGTCTCACATAGATATGCAGCTGGAAAAAG #############AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA X0:i:1 X1:i:0 XC:i:89 MD:Z:89 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:37 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@>YV[Z`X[aXQWXRBGUTX^V]^^VNXRXXSVVUHWQWVUZZX]WHVXUXRUUUV[VJ\XRPXZXTQWWWTVTT_X^_]XOXWWZZY[Z
+readpair1 99 1 1811000 29 58M43S = 1811263 118 CGTGGTGGCAGGCACCTGTAGTCCCAGCTATTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGCGGACCTTGCAGTGAGCCAAGATCGACCCA AAAAAAAAAAAAAAAAAAAAAAAAAAAAA0000000000AAAAAAAAAAAAAAAAAA############################################ X0:i:4 X1:i:200 XC:i:58 MD:Z:58 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:0 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+readpair1 147 1 1811030 37 12S89M = 1811004 -118 GATCGTCACATCGTGTCTGCTCTCAGCCTGATGCAATACACAGTTTTAGCTGAAGTCTAAGAGGAAAATTCAGTCTCACATAGATATGCAGCTGGAAAAAG #############AAAAAAAAAAAA0000000000AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA X0:i:1 X1:i:0 XC:i:89 MD:Z:89 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:37 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@>YV[Z`X[aXQWXRBGUTX^V]^^VNXRXXSVVUHWQWVUZZX]WHVXUXRUUUV[VJ\XRPXZXTQWWWTVTT_X^_]XOXWWZZY[Z
+readpair2 99 1 1811000 15 58M43S3H = 1811263 118 CGTGGTGGCAGGCACCTGTAGTCCCAGCTATTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGCGGACCTTGCAGTGAGCCAAGATCGACCCA AAAAAAAAAAAAAAAAAAAAAAAAAA00000AAAAAAAAAAAAAAAAAAAAAAAAAA############################################ X0:i:4 X1:i:200 XC:i:58 MD:Z:58 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:0 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+readpair2 147 1 1811030 37 4H12S89M = 1811004 -118 GATCGTCACATCGTGTCTGCTCTCAGCCTGATGCAATACACAGTTTTAGCTGAAGTCTAAGAGGAAAATTCAGTCTCACATAGATATGCAGCTGGAAAAAG #############AAAAAAAAAAAAA00000AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA X0:i:1 X1:i:0 XC:i:89 MD:Z:89 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:37 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@>YV[Z`X[aXQWXRBGUTX^V]^^VNXRXXSVVUHWQWVUZZX]WHVXUXRUUUV[VJ\XRPXZXTQWWWTVTT_X^_]XOXWWZZY[Z
+readpair2Dup 1123 1 1811000 29 58M43S3H = 1811263 118 CGTGGTGGCAGGCACCTGTAGTCCCAGCTATTCGGGAGGCTGAGGCAGGAGAATGGCGTGAACCCAGGAGGCGGACCTTGCAGTGAGCCAAGATCGACCCA AAAAAAAAAAAAAAAAAAAAA00000AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA############################################ X0:i:4 X1:i:200 XC:i:58 MD:Z:58 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:0 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+readpair2Dup 1171 1 1811030 37 4H12S89M = 1811004 -118 GATCGTCACATCGTGTCTGCTCTCAGCCTGATGCAATACACAGTTTTAGCTGAAGTCTAAGAGGAAAATTCAGTCTCACATAGATATGCAGCTGGAAAAAG #############AAAAAAAAAAAAAAAAAAAAAAAAAAAAA00000AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA X0:i:1 X1:i:0 XC:i:89 MD:Z:89 RG:Z:20FUK.1 XG:i:0 AM:i:0 NM:i:0 SM:i:37 XM:i:0 XO:i:0 BQ:Z:@@@@@@@@@@@@>YV[Z`X[aXQWXRBGUTX^V]^^VNXRXXSVVUHWQWVUZZX]WHVXUXRUUUV[VJ\XRPXZXTQWWWTVTT_X^_]XOXWWZZY[Z
diff --git a/testdata/picard/sam/summary_alignment_stats_test_chimeras.sam b/testdata/picard/sam/summary_alignment_stats_test_chimeras.sam
new file mode 100644
index 0000000..95143c9
--- /dev/null
+++ b/testdata/picard/sam/summary_alignment_stats_test_chimeras.sam
@@ -0,0 +1,20 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:101
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:202
+ at SQ SN:chr8 LN:202
+ at RG ID:0 SM:Hi,Momma! LB:whatever PU:me PL:ILLUMINA
+SL-XAV:1:1:0:764#0/1 113 chr1 1 255 6M chr1 10 10 TTCATG &/,&-.
+SL-XAV:1:1:0:764#0/1 177 chr1 10 255 6M chr1 1 10 TTCATG &/,&-.
+SL-XAV:1:1:0:800#0/1 81 chr2 1 255 6M chr2 10 10 TTCATG &/,&-.
+SL-XAV:1:1:0:800#0/1 161 chr2 10 255 6M chr2 1 10 TTCATG &/,&-.
+SL-XAV:1:1:0:877#0/1 97 chr3 1 255 6M chr4 10 10 TTCATG &/,&-.
+SL-XAV:1:1:0:877#0/1 145 chr4 10 255 6M chr3 1 10 TTCATG &/,&-.
+SL-XAV:1:1:0:940#0/1 97 chr5 1 255 6M chr5 10 10 TTCATG &/,&-.
+SL-XAV:1:1:0:940#0/1 145 chr5 10 255 6M chr5 1 10 TTCATG &/,&-.
+SL-XAV:1:1:0:999#0/1 97 chr6 1 255 6M chr5 10 60 TTCATG &/,&-.
+SL-XAV:1:1:0:999#0/1 145 chr6 60 255 6M chr5 1 60 TTCATG &/,&-.
diff --git a/testdata/picard/util/interval_list_to_bed_test.bed b/testdata/picard/util/interval_list_to_bed_test.bed
new file mode 100644
index 0000000..06abec9
--- /dev/null
+++ b/testdata/picard/util/interval_list_to_bed_test.bed
@@ -0,0 +1,13 @@
+chr1 0 50 target_at_chrom_start 333 +
+chr1 99 110 target_1 333 +
+chr2 199 220 target_2 333 -
+chr3 299 330 target_3 333 +
+chr4 399 440 target_4 333 +
+chr5 499 550 target_5 333 -
+chr5 4999 5150 overlapping_1 333 +
+chr5 5099 5250 overlapping_2 333 -
+chr5 5199 5350 overlapping_3 333 -
+chr5 5299 5450 overlapping_4 333 +
+chr5 9999 10000 one_base_feature 333 +
+chr5 10000 10000 zero_base_feature 333 +
+chr5 999999 1000000 target_at_chrom_end 333 +
\ No newline at end of file
diff --git a/testdata/picard/util/interval_list_to_bed_test.interval_list b/testdata/picard/util/interval_list_to_bed_test.interval_list
new file mode 100644
index 0000000..4ca2229
--- /dev/null
+++ b/testdata/picard/util/interval_list_to_bed_test.interval_list
@@ -0,0 +1,19 @@
+ at HD VN:1.5 SO:coordinate
+ at SQ SN:chr1 LN:1000000
+ at SQ SN:chr2 LN:1000000
+ at SQ SN:chr3 LN:1000000
+ at SQ SN:chr4 LN:1000000
+ at SQ SN:chr5 LN:1000000
+chr1 1 50 + target_at_chrom_start
+chr1 100 110 + target_1
+chr2 200 220 - target_2
+chr3 300 330 + target_3
+chr4 400 440 + target_4
+chr5 500 550 - target_5
+chr5 5000 5150 + overlapping_1
+chr5 5100 5250 - overlapping_2
+chr5 5200 5350 - overlapping_3
+chr5 5300 5450 + overlapping_4
+chr5 10000 10000 + one_base_feature
+chr5 10001 10000 + zero_base_feature
+chr5 1000000 1000000 + target_at_chrom_end
\ No newline at end of file
diff --git a/testdata/picard/vcf/mini_gvcf.vcf b/testdata/picard/vcf/mini_gvcf.vcf
new file mode 100755
index 0000000..29f34f5
--- /dev/null
+++ b/testdata/picard/vcf/mini_gvcf.vcf
@@ -0,0 +1,290 @@
+##fileformat=VCFv4.1
+##ALT=<ID=NON_REF,Description="Represents any possible alternative allele at this location">
+##FILTER=<ID=LowQual,Description="Low quality">
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=MIN_DP,Number=1,Type=Integer,Description="Minimum DP observed within the GVCF block">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##FORMAT=<ID=SB,Number=4,Type=Integer,Description="Per-sample component statistics which comprise the Fisher's Exact Test to detect strand bias.">
+##GATKCommandLine=<ID=HaplotypeCaller,Version=3.1-1-g07a4bf8,Date="Thu May 22 09:23:16 EDT 2014",Epoch=1400764996952,CommandLineOptions="analysis_type=HaplotypeCaller input_file=[NA12878.bam] showFullBamList=false read_buffer_size=null phone_home=AWS gatk_key=null tag=NA read_filter=[] intervals=[scattered.intervals] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=Homo_sapiens_assembly19.fasta nonDeterministicRandomSeed=false disab [...]
+##GVCFBlock=minGQ=0(inclusive),maxGQ=5(exclusive)
+##GVCFBlock=minGQ=20(inclusive),maxGQ=60(exclusive)
+##GVCFBlock=minGQ=5(inclusive),maxGQ=20(exclusive)
+##GVCFBlock=minGQ=60(inclusive),maxGQ=2147483647(exclusive)
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=ClippingRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref number of hard clipped bases">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
+##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##contig=<ID=1,length=249250621>
+##contig=<ID=2,length=243199373>
+##contig=<ID=3,length=198022430>
+##contig=<ID=4,length=191154276>
+##contig=<ID=5,length=180915260>
+##contig=<ID=6,length=171115067>
+##contig=<ID=7,length=159138663>
+##contig=<ID=8,length=146364022>
+##contig=<ID=9,length=141213431>
+##contig=<ID=10,length=135534747>
+##contig=<ID=11,length=135006516>
+##contig=<ID=12,length=133851895>
+##contig=<ID=13,length=115169878>
+##contig=<ID=14,length=107349540>
+##contig=<ID=15,length=102531392>
+##contig=<ID=16,length=90354753>
+##contig=<ID=17,length=81195210>
+##contig=<ID=18,length=78077248>
+##contig=<ID=19,length=59128983>
+##contig=<ID=20,length=63025520>
+##contig=<ID=21,length=48129895>
+##contig=<ID=22,length=51304566>
+##contig=<ID=X,length=155270560>
+##contig=<ID=Y,length=59373566>
+##contig=<ID=MT,length=16569>
+##contig=<ID=GL000207.1,length=4262>
+##contig=<ID=GL000226.1,length=15008>
+##contig=<ID=GL000229.1,length=19913>
+##contig=<ID=GL000231.1,length=27386>
+##contig=<ID=GL000210.1,length=27682>
+##contig=<ID=GL000239.1,length=33824>
+##contig=<ID=GL000235.1,length=34474>
+##contig=<ID=GL000201.1,length=36148>
+##contig=<ID=GL000247.1,length=36422>
+##contig=<ID=GL000245.1,length=36651>
+##contig=<ID=GL000197.1,length=37175>
+##contig=<ID=GL000203.1,length=37498>
+##contig=<ID=GL000246.1,length=38154>
+##contig=<ID=GL000249.1,length=38502>
+##contig=<ID=GL000196.1,length=38914>
+##contig=<ID=GL000248.1,length=39786>
+##contig=<ID=GL000244.1,length=39929>
+##contig=<ID=GL000238.1,length=39939>
+##contig=<ID=GL000202.1,length=40103>
+##contig=<ID=GL000234.1,length=40531>
+##contig=<ID=GL000232.1,length=40652>
+##contig=<ID=GL000206.1,length=41001>
+##contig=<ID=GL000240.1,length=41933>
+##contig=<ID=GL000236.1,length=41934>
+##contig=<ID=GL000241.1,length=42152>
+##contig=<ID=GL000243.1,length=43341>
+##contig=<ID=GL000242.1,length=43523>
+##contig=<ID=GL000230.1,length=43691>
+##contig=<ID=GL000237.1,length=45867>
+##contig=<ID=GL000233.1,length=45941>
+##contig=<ID=GL000204.1,length=81310>
+##contig=<ID=GL000198.1,length=90085>
+##contig=<ID=GL000208.1,length=92689>
+##contig=<ID=GL000191.1,length=106433>
+##contig=<ID=GL000227.1,length=128374>
+##contig=<ID=GL000228.1,length=129120>
+##contig=<ID=GL000214.1,length=137718>
+##contig=<ID=GL000221.1,length=155397>
+##contig=<ID=GL000209.1,length=159169>
+##contig=<ID=GL000218.1,length=161147>
+##contig=<ID=GL000220.1,length=161802>
+##contig=<ID=GL000213.1,length=164239>
+##contig=<ID=GL000211.1,length=166566>
+##contig=<ID=GL000199.1,length=169874>
+##contig=<ID=GL000217.1,length=172149>
+##contig=<ID=GL000216.1,length=172294>
+##contig=<ID=GL000215.1,length=172545>
+##contig=<ID=GL000205.1,length=174588>
+##contig=<ID=GL000219.1,length=179198>
+##contig=<ID=GL000224.1,length=179693>
+##contig=<ID=GL000223.1,length=180455>
+##contig=<ID=GL000195.1,length=182896>
+##contig=<ID=GL000212.1,length=186858>
+##contig=<ID=GL000222.1,length=186861>
+##contig=<ID=GL000200.1,length=187035>
+##contig=<ID=GL000193.1,length=189789>
+##contig=<ID=GL000194.1,length=191469>
+##contig=<ID=GL000225.1,length=211173>
+##contig=<ID=GL000192.1,length=547496>
+##contig=<ID=NC_007605,length=171823>
+##reference=file:Homo_sapiens_assembly19.fasta
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878
+1 14565 . C <NON_REF> . . END=14573 GT:DP:GQ:MIN_DP:PL 0/0:21:62:20:0,60,670
+1 14574 . A <NON_REF> . . END=14574 GT:DP:GQ:MIN_DP:PL 0/0:24:11:24:0,12,695
+1 14575 . C <NON_REF> . . END=14589 GT:DP:GQ:MIN_DP:PL 0/0:32:96:25:0,75,823
+1 14590 . G <NON_REF> . . END=14590 GT:DP:GQ:MIN_DP:PL 0/0:39:40:39:0,41,953
+1 14591 . T <NON_REF> . . END=14662 GT:DP:GQ:MIN_DP:PL 0/0:45:99:35:0,78,1009
+1 14663 . C <NON_REF> . . END=14666 GT:DP:GQ:MIN_DP:PL 0/0:35:57:33:0,57,855
+1 14667 . G <NON_REF> . . END=14672 GT:DP:GQ:MIN_DP:PL 0/0:34:60:34:0,60,900
+1 14673 . G C,<NON_REF> 541.77 . BaseQRankSum=-1.814;ClippingRankSum=2.599;DP=30;MLEAC=1,0;MLEAF=0.500,0.00;MQ=26.87;MQ0=0;MQRankSum=0.245;ReadPosRankSum=0.294 GT:AD:DP:GQ:PL:SB 0/1:7,23,0:30:99:570,0,212,591,281,872:4,3,23,0
+1 14674 . G <NON_REF> . . END=14676 GT:DP:GQ:MIN_DP:PL 0/0:29:57:28:0,54,810
+1 14677 . G A,<NON_REF> 0.01 . BaseQRankSum=-1.386;ClippingRankSum=-1.386;DP=26;MLEAC=1,0;MLEAF=0.500,0.00;MQ=25.65;MQ0=0;MQRankSum=0.107;ReadPosRankSum=-0.249 GT:AD:DP:GQ:PL:SB 0/1:22,4,0:26:3:3,0,1089,84,1100,1184:19,3,4,0
+1 14678 . A <NON_REF> . . END=14698 GT:DP:GQ:MIN_DP:PL 0/0:18:42:15:0,39,380
+1 14699 . C G,<NON_REF> 70.77 . BaseQRankSum=0.796;ClippingRankSum=-2.014;DP=18;MLEAC=1,0;MLEAF=0.500,0.00;MQ=27.49;MQ0=0;MQRankSum=-1.077;ReadPosRankSum=0.421 GT:AD:DP:GQ:PL:SB 0/1:12,6,0:18:99:99,0,295,135,313,448:8,4,5,1
+1 14700 . G <NON_REF> . . END=14721 GT:DP:GQ:MIN_DP:PL 0/0:18:51:17:0,48,484
+1 14722 . C <NON_REF> . . END=14723 GT:DP:GQ:MIN_DP:PL 0/0:21:60:21:0,60,900
+1 14724 . T <NON_REF> . . END=14726 GT:DP:GQ:MIN_DP:PL 0/0:21:54:21:0,54,810
+1 14727 . G <NON_REF> . . END=14738 GT:DP:GQ:MIN_DP:PL 0/0:23:63:22:0,60,727
+1 14739 . T <NON_REF> . . END=14739 GT:DP:GQ:MIN_DP:PL 0/0:22:53:22:0,54,705
+1 14740 . G <NON_REF> . . END=14747 GT:DP:GQ:MIN_DP:PL 0/0:23:60:22:0,60,900
+1 14748 . G <NON_REF> . . END=14785 GT:DP:GQ:MIN_DP:PL 0/0:20:48:16:0,36,467
+1 14786 . T <NON_REF> . . END=14814 GT:DP:GQ:MIN_DP:PL 0/0:30:90:23:0,60,731
+1 14815 . C <NON_REF> . . END=14815 GT:DP:GQ:MIN_DP:PL 0/0:40:34:40:0,34,1203
+1 14816 . T <NON_REF> . . END=14906 GT:DP:GQ:MIN_DP:PL 0/0:70:99:42:0,117,1755
+1 14907 . A G,<NON_REF> 2470.77 . BaseQRankSum=-1.273;ClippingRankSum=-1.440;DP=83;MLEAC=2,0;MLEAF=1.00,0.00;MQ=29.78;MQ0=0;MQRankSum=1.231;ReadPosRankSum=1.023 GT:AD:DP:GQ:PL:SB 1/1:1,82,0:83:99:2501,221,0,2504,247,2529:0,0,0,0
+1 14908 . G <NON_REF> . . END=14929 GT:DP:GQ:MIN_DP:PL 0/0:82:99:79:0,120,1800
+1 14930 . A G,<NON_REF> 2273.77 . DP=80;MLEAC=2,0;MLEAF=1.00,0.00;MQ=32.23;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,80,0:80:99:2307,240,0,2307,240,2307:0,0,0,0
+1 14931 . A <NON_REF> . . END=15014 GT:DP:GQ:MIN_DP:PL 0/0:62:99:33:0,81,1215
+1 15015 . G C,<NON_REF> 0.13 . BaseQRankSum=-0.484;ClippingRankSum=0.199;DP=32;MLEAC=1,0;MLEAF=0.500,0.00;MQ=28.42;MQ0=0;MQRankSum=-0.541;ReadPosRankSum=2.251 GT:AD:DP:GQ:PL:SB 0/1:28,4,0:32:13:13,0,710,97,722,819:21,7,0,4
+1 15016 . C <NON_REF> . . END=15044 GT:DP:GQ:MIN_DP:PL 0/0:32:84:30:0,72,1039
+1 15045 . C T,<NON_REF> 0.20 . BaseQRankSum=0.745;ClippingRankSum=2.675;DP=33;MLEAC=1,0;MLEAF=0.500,0.00;MQ=24.17;MQ0=0;MQRankSum=0.800;ReadPosRankSum=-1.738 GT:AD:DP:GQ:PL:SB 0/1:29,4,0:33:15:15,0,710,102,722,824:21,8,0,4
+1 15046 . G <NON_REF> . . END=15117 GT:DP:GQ:MIN_DP:PL 0/0:48:99:32:0,85,920
+1 15118 . A G,<NON_REF> 1103.77 . BaseQRankSum=0.587;ClippingRankSum=1.762;DP=62;MLEAC=1,0;MLEAF=0.500,0.00;MQ=25.39;MQ0=0;MQRankSum=1.858;ReadPosRankSum=0.362 GT:AD:DP:GQ:PL:SB 0/1:16,46,0:62:99:1132,0,235,1180,372,1552:6,10,17,29
+1 15119 . C <NON_REF> . . END=15210 GT:DP:GQ:MIN_DP:PL 0/0:69:99:58:0,120,1800
+1 15211 . T G,<NON_REF> 1768.77 . BaseQRankSum=0.735;ClippingRankSum=-0.157;DP=66;MLEAC=2,0;MLEAF=1.00,0.00;MQ=29.78;MQ0=0;MQRankSum=-1.365;ReadPosRankSum=-0.525 GT:AD:DP:GQ:PL:SB 1/1:1,65,0:66:99:1799,169,0,1802,194,1827:0,0,0,0
+1 15212 . G <NON_REF> . . END=15273 GT:DP:GQ:MIN_DP:PL 0/0:65:99:38:0,90,1350
+1 15274 . A T,<NON_REF> 1074.77 . DP=38;MLEAC=2,0;MLEAF=1.00,0.00;MQ=27.77;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,38,0:38:99:1108,114,0,1108,114,1108:0,0,0,0
+1 15275 . A <NON_REF> . . END=15290 GT:DP:GQ:MIN_DP:PL 0/0:35:87:30:0,60,900
+1 15291 . A <NON_REF> . . END=15324 GT:DP:GQ:MIN_DP:PL 0/0:22:48:19:0,39,585
+1 15325 . C <NON_REF> . . END=15598 GT:DP:GQ:MIN_DP:PL 0/0:45:99:25:0,60,900
+1 15599 . G <NON_REF> . . END=15599 GT:DP:GQ:MIN_DP:PL 0/0:23:57:23:0,57,855
+1 15600 . A <NON_REF> . . END=15716 GT:DP:GQ:MIN_DP:PL 0/0:30:81:25:0,60,777
+1 15717 . A <NON_REF> . . END=15717 GT:DP:GQ:MIN_DP:PL 0/0:24:56:24:0,57,694
+1 15718 . C <NON_REF> . . END=15793 GT:DP:GQ:MIN_DP:PL 0/0:28:72:23:0,61,754
+1 15794 . A <NON_REF> . . END=15794 GT:DP:GQ:MIN_DP:PL 0/0:27:57:27:0,57,697
+1 15795 . C <NON_REF> . . END=15814 GT:DP:GQ:MIN_DP:PL 0/0:29:81:28:0,63,877
+1 15815 . C <NON_REF> . . END=15815 GT:DP:GQ:MIN_DP:PL 0/0:28:48:28:0,48,852
+1 15816 . T <NON_REF> . . END=15816 GT:DP:GQ:MIN_DP:PL 0/0:28:72:28:0,72,1080
+1 15817 . G <NON_REF> . . END=15817 GT:DP:GQ:MIN_DP:PL 0/0:28:51:28:0,51,884
+1 881627 . G A,<NON_REF> 1304.77 . DP=40;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,39,0:39:99:1338,117,0,1338,117,1338:0,0,0,0
+1 881628 . G <NON_REF> . . END=882115 GT:DP:GQ:MIN_DP:PL 0/0:41:99:24:0,60,900
+1 882116 . C <NON_REF> . . END=882120 GT:DP:GQ:MIN_DP:PL 0/0:24:57:23:0,54,787
+1 882121 . C <NON_REF> . . END=882211 GT:DP:GQ:MIN_DP:PL 0/0:30:81:22:0,63,749
+1 882212 . T <NON_REF> . . END=882212 GT:DP:GQ:MIN_DP:PL 0/0:27:57:27:0,57,846
+1 882213 . G <NON_REF> . . END=882802 GT:DP:GQ:MIN_DP:PL 0/0:38:99:27:0,64,867
+1 882803 . A G,<NON_REF> 1485.77 . DP=45;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,45,0:45:99:1519,134,0,1519,134,1519:0,0,0,0
+1 882804 . T <NON_REF> . . END=883399 GT:DP:GQ:MIN_DP:PL 0/0:43:99:30:0,69,914
+1 883400 . A <NON_REF> . . END=883400 GT:DP:GQ:MIN_DP:PL 0/0:35:54:35:0,55,889
+1 883401 . C <NON_REF> . . END=883586 GT:DP:GQ:MIN_DP:PL 0/0:37:99:26:0,63,926
+1 883587 . T <NON_REF> . . END=883587 GT:DP:GQ:MIN_DP:PL 0/0:27:51:27:0,51,736
+1 883588 . C <NON_REF> . . END=883589 GT:DP:GQ:MIN_DP:PL 0/0:29:86:27:0,69,746
+1 883590 . T <NON_REF> . . END=883590 GT:DP:GQ:MIN_DP:PL 0/0:29:56:29:0,56,839
+1 883591 . A <NON_REF> . . END=883613 GT:DP:GQ:MIN_DP:PL 0/0:35:90:31:0,80,936
+1 883614 . T <NON_REF> . . END=883614 GT:DP:GQ:MIN_DP:PL 0/0:33:56:33:0,57,1017
+1 883615 . G <NON_REF> . . END=883624 GT:DP:GQ:MIN_DP:PL 0/0:34:87:32:0,66,1082
+1 883625 . A G,<NON_REF> 937.77 . DP=28;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,28,0:28:84:971,84,0,971,84,971:0,0,0,0
+1 883626 . G <NON_REF> . . END=883648 GT:DP:GQ:MIN_DP:PL 0/0:32:84:28:0,72,843
+1 883649 . G <NON_REF> . . END=883686 GT:DP:GQ:MIN_DP:PL 0/0:19:51:16:0,24,418
+1 883687 . C <NON_REF> . . END=883805 GT:DP:GQ:MIN_DP:PL 0/0:39:99:21:0,63,738
+1 883806 . A <NON_REF> . . END=883806 GT:DP:GQ:MIN_DP:PL 0/0:28:51:28:0,51,781
+1 883807 . C <NON_REF> . . END=883898 GT:DP:GQ:MIN_DP:PL 0/0:37:96:27:0,72,780
+1 883899 . T G,<NON_REF> 498.77 . BaseQRankSum=0.578;ClippingRankSum=-1.634;DP=35;MLEAC=1,0;MLEAF=0.500,0.00;MQ=60.00;MQ0=0;MQRankSum=-0.776;ReadPosRankSum=1.568 GT:AD:DP:GQ:PL:SB 0/1:17,18,0:35:99:527,0,481,579,535,1113:6,11,9,9
+1 883900 . G <NON_REF> . . END=884007 GT:DP:GQ:MIN_DP:PL 0/0:34:90:25:0,69,932
+1 884008 . C <NON_REF> . . END=884011 GT:DP:GQ:MIN_DP:PL 0/0:30:24:29:0,21,879
+1 884012 . C <NON_REF> . . END=884012 GT:DP:GQ:MIN_DP:PL 0/0:29:78:29:0,78,1170
+1 884013 . G <NON_REF> . . END=884017 GT:DP:GQ:MIN_DP:PL 0/0:35:0:30:0,0,700
+1 884018 . C <NON_REF> . . END=884018 GT:DP:GQ:MIN_DP:PL 0/0:37:96:37:0,96,1440
+1 884019 . A <NON_REF> . . END=884021 GT:DP:GQ:MIN_DP:PL 0/0:35:0:34:0,0,766
+1 884022 . C <NON_REF> . . END=884022 GT:DP:GQ:MIN_DP:PL 0/0:35:87:35:0,87,1305
+1 884023 . C <NON_REF> . . END=884024 GT:DP:GQ:MIN_DP:PL 0/0:35:0:35:0,0,645
+1 884025 . G <NON_REF> . . END=884025 GT:DP:GQ:MIN_DP:PL 0/0:35:90:35:0,90,1350
+1 884026 . G <NON_REF> . . END=884027 GT:DP:GQ:MIN_DP:PL 0/0:35:0:35:0,0,641
+1 884028 . C <NON_REF> . . END=884028 GT:DP:GQ:MIN_DP:PL 0/0:35:78:35:0,79,1132
+1 884029 . A <NON_REF> . . END=884033 GT:DP:GQ:MIN_DP:PL 0/0:36:0:35:0,0,507
+1 884034 . G <NON_REF> . . END=884034 GT:DP:GQ:MIN_DP:PL 0/0:36:99:36:0,99,1485
+1 884035 . G <NON_REF> . . END=884035 GT:DP:GQ:MIN_DP:PL 0/0:32:0:32:0,0,368
+1 884036 . C <NON_REF> . . END=884036 GT:DP:GQ:MIN_DP:PL 0/0:33:96:33:0,96,1440
+1 884037 . T <NON_REF> . . END=884037 GT:DP:GQ:MIN_DP:PL 0/0:34:0:34:0,0,275
+1 884038 . T <NON_REF> . . END=884038 GT:DP:GQ:MIN_DP:PL 0/0:36:99:36:0,105,1575
+1 884039 . C <NON_REF> . . END=884041 GT:DP:GQ:MIN_DP:PL 0/0:34:0:34:0,0,219
+1 884042 . C <NON_REF> . . END=884090 GT:DP:GQ:MIN_DP:PL 0/0:37:99:35:0,78,1115
+1 884091 . C CACCCTGGTCCCCCTGGTCCCTTTGGCCCTGCACCTGGCTGG,<NON_REF> 1297.73 . DP=41;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,37,0:37:87:1335,87,0,1358,111,1382:0,0,0,0
+1 884092 . A <NON_REF> . . END=884100 GT:DP:GQ:MIN_DP:PL 0/0:44:99:41:0,81,1215
+1 884101 . A <NON_REF> . . END=884101 GT:DP:GQ:MIN_DP:PL 0/0:46:0:46:0,0,998
+1 884102 . C <NON_REF> . . END=884110 GT:DP:GQ:MIN_DP:PL 0/0:45:99:42:0,105,1575
+1 884111 . T <NON_REF> . . END=884114 GT:DP:GQ:MIN_DP:PL 0/0:45:30:44:0,23,1250
+1 884115 . G <NON_REF> . . END=884233 GT:DP:GQ:MIN_DP:PL 0/0:33:87:28:0,67,859
+1 884234 . T <NON_REF> . . END=884234 GT:DP:GQ:MIN_DP:PL 0/0:30:49:30:0,50,862
+1 884235 . C <NON_REF> . . END=884275 GT:DP:GQ:MIN_DP:PL 0/0:31:81:26:0,63,939
+1 884276 . G <NON_REF> . . END=884277 GT:DP:GQ:MIN_DP:PL 0/0:29:54:28:0,46,860
+1 884278 . A <NON_REF> . . END=884278 GT:DP:GQ:MIN_DP:PL 0/0:27:66:27:0,66,990
+1 884279 . C <NON_REF> . . END=884279 GT:DP:GQ:MIN_DP:PL 0/0:27:51:27:0,51,828
+1 884280 . C <NON_REF> . . END=884281 GT:DP:GQ:MIN_DP:PL 0/0:27:63:25:0,63,945
+1 884282 . A <NON_REF> . . END=884282 GT:DP:GQ:MIN_DP:PL 0/0:26:57:26:0,57,822
+1 884283 . C <NON_REF> . . END=884284 GT:DP:GQ:MIN_DP:PL 0/0:26:66:23:0,60,763
+1 884285 . T <NON_REF> . . END=884285 GT:DP:GQ:MIN_DP:PL 0/0:26:56:26:0,56,780
+1 884286 . C <NON_REF> . . END=884287 GT:DP:GQ:MIN_DP:PL 0/0:27:72:25:0,62,844
+1 884288 . G <NON_REF> . . END=884288 GT:DP:GQ:MIN_DP:PL 0/0:26:58:26:0,58,812
+1 884289 . G <NON_REF> . . END=884291 GT:DP:GQ:MIN_DP:PL 0/0:27:63:26:0,63,836
+1 884292 . A <NON_REF> . . END=884292 GT:DP:GQ:MIN_DP:PL 0/0:29:58:29:0,58,890
+1 884293 . C <NON_REF> . . END=884293 GT:DP:GQ:MIN_DP:PL 0/0:31:73:31:0,73,982
+1 884294 . C <NON_REF> . . END=884294 GT:DP:GQ:MIN_DP:PL 0/0:31:38:31:0,38,949
+1 884295 . T <NON_REF> . . END=884390 GT:DP:GQ:MIN_DP:PL 0/0:32:87:30:0,60,900
+1 884391 . G <NON_REF> . . END=884417 GT:DP:GQ:MIN_DP:PL 0/0:31:36:26:0,24,360
+1 884418 . G <NON_REF> . . END=884421 GT:DP:GQ:MIN_DP:PL 0/0:28:6:27:0,6,90
+1 884422 . C <NON_REF> . . END=884425 GT:DP:GQ:MIN_DP:PL 0/0:28:3:27:0,0,0
+1 884426 . A AACAGCAAAG,<NON_REF> 790.73 . DP=25;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,19,0:19:51:829,51,0,836,57,842:0,0,0,0
+1 884427 . A <NON_REF> . . END=884427 GT:DP:GQ:MIN_DP:PL 0/0:28:0:28:0,0,0
+1 884428 . C <NON_REF> . . END=884435 GT:DP:GQ:MIN_DP:PL 0/0:26:66:23:0,60,888
+1 884436 . A <NON_REF> . . END=884436 GT:DP:GQ:MIN_DP:PL 0/0:22:49:22:0,50,719
+1 884437 . G <NON_REF> . . END=884437 GT:DP:GQ:MIN_DP:PL 0/0:23:10:23:0,11,648
+1 884438 . T <NON_REF> . . END=884438 GT:DP:GQ:MIN_DP:PL 0/0:23:31:23:0,31,659
+1 884439 . G <NON_REF> . . END=884440 GT:DP:GQ:MIN_DP:PL 0/0:25:66:22:0,60,900
+1 884441 . C <NON_REF> . . END=884441 GT:DP:GQ:MIN_DP:PL 0/0:26:42:26:0,42,831
+1 884442 . A <NON_REF> . . END=884442 GT:DP:GQ:MIN_DP:PL 0/0:26:69:26:0,69,1035
+1 884443 . G <NON_REF> . . END=884444 GT:DP:GQ:MIN_DP:PL 0/0:26:43:26:0,42,827
+1 884445 . C <NON_REF> . . END=884445 GT:DP:GQ:MIN_DP:PL 0/0:26:69:26:0,69,1035
+1 884446 . A <NON_REF> . . END=884446 GT:DP:GQ:MIN_DP:PL 0/0:25:57:25:0,57,855
+1 884447 . C <NON_REF> . . END=884451 GT:DP:GQ:MIN_DP:PL 0/0:30:72:27:0,60,900
+1 884452 . C <NON_REF> . . END=884452 GT:DP:GQ:MIN_DP:PL 0/0:31:56:31:0,57,832
+1 884453 . G <NON_REF> . . END=884476 GT:DP:GQ:MIN_DP:PL 0/0:30:78:27:0,60,775
+1 884477 . C <NON_REF> . . END=884477 GT:DP:GQ:MIN_DP:PL 0/0:32:50:32:0,51,913
+1 884478 . C <NON_REF> . . END=884485 GT:DP:GQ:MIN_DP:PL 0/0:30:78:28:0,71,797
+1 884486 . C <NON_REF> . . END=884486 GT:DP:GQ:MIN_DP:PL 0/0:28:55:28:0,55,827
+1 884487 . C <NON_REF> . . END=884487 GT:DP:GQ:MIN_DP:PL 0/0:30:71:30:0,71,874
+1 884488 . C <NON_REF> . . END=884488 GT:DP:GQ:MIN_DP:PL 0/0:29:56:29:0,56,843
+1 884489 . C <NON_REF> . . END=884509 GT:DP:GQ:MIN_DP:PL 0/0:29:78:25:0,66,703
+1 884510 . C <NON_REF> . . END=884510 GT:DP:GQ:MIN_DP:PL 0/0:29:59:29:0,59,907
+1 884511 . C <NON_REF> . . END=884522 GT:DP:GQ:MIN_DP:PL 0/0:34:78:31:0,60,900
+1 884523 . A <NON_REF> . . END=884537 GT:DP:GQ:MIN_DP:PL 0/0:39:48:37:0,24,360
+1 884538 . G <NON_REF> . . END=884546 GT:DP:GQ:MIN_DP:PL 0/0:40:9:39:0,6,90
+1 884547 . A <NON_REF> . . END=884550 GT:DP:GQ:MIN_DP:PL 0/0:39:0:38:0,0,0
+1 884551 . GAGAA G,<NON_REF> 1528.73 . DP=38;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.28;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,35,0:35:99:1571,108,0,1571,108,1571:0,0,0,0
+1 884556 . A <NON_REF> . . END=884556 GT:DP:GQ:MIN_DP:PL 0/0:36:0:36:0,0,0
+1 884557 . G <NON_REF> . . END=884766 GT:DP:GQ:MIN_DP:PL 0/0:42:99:34:0,71,1060
+1 884767 . G A,<NON_REF> 586.77 . BaseQRankSum=-0.407;ClippingRankSum=0.994;DP=51;MLEAC=1,0;MLEAF=0.500,0.00;MQ=60.00;MQ0=0;MQRankSum=-1.372;ReadPosRankSum=1.013 GT:AD:DP:GQ:PL:SB 0/1:28,23,0:51:99:615,0,805,700,873,1573:13,15,14,9
+1 884768 . G <NON_REF> . . END=884814 GT:DP:GQ:MIN_DP:PL 0/0:53:99:49:0,120,1800
+1 884815 . A G,<NON_REF> 1553.77 . DP=53;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,53,0:53:99:1587,159,0,1587,159,1587:0,0,0,0
+1 884816 . A <NON_REF> . . END=885675 GT:DP:GQ:MIN_DP:PL 0/0:46:99:36:0,76,1012
+1 885676 . C A,<NON_REF> 1617.77 . DP=49;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,49,0:49:99:1651,147,0,1651,147,1651:0,0,0,0
+1 885677 . A <NON_REF> . . END=885688 GT:DP:GQ:MIN_DP:PL 0/0:51:99:47:0,118,1568
+1 885689 . G A,<NON_REF> 2306.77 . DP=50;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,50,0:50:99:2340,163,0,2340,163,2340:0,0,0,0
+1 885690 . C <NON_REF> . . END=885698 GT:DP:GQ:MIN_DP:PL 0/0:51:99:49:0,120,1800
+1 885699 . A G,<NON_REF> 2306.77 . DP=51;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,51,0:51:99:2340,163,0,2340,163,2340:0,0,0,0
+1 885700 . C <NON_REF> . . END=886005 GT:DP:GQ:MIN_DP:PL 0/0:41:99:29:0,66,944
+1 886006 . T C,<NON_REF> 1149.77 . DP=34;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.32;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,34,0:34:99:1183,102,0,1183,102,1183:0,0,0,0
+1 886007 . A <NON_REF> . . END=886008 GT:DP:GQ:MIN_DP:PL 0/0:36:60:35:0,60,900
+1 886009 . A <NON_REF> . . END=886030 GT:DP:GQ:MIN_DP:PL 0/0:37:33:34:0,21,315
+1 886031 . A <NON_REF> . . END=886038 GT:DP:GQ:MIN_DP:PL 0/0:36:12:35:0,6,90
+1 886039 . G <NON_REF> . . END=886048 GT:DP:GQ:MIN_DP:PL 0/0:38:0:38:0,0,0
+1 886049 . ACAG A,<NON_REF> 1683.73 . DP=39;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.55;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,39,0:39:99:1726,119,0,1726,119,1726:0,0,0,0
+1 886053 . A <NON_REF> . . END=886053 GT:DP:GQ:MIN_DP:PL 0/0:41:0:41:0,0,0
+1 886054 . T <NON_REF> . . END=886114 GT:DP:GQ:MIN_DP:PL 0/0:39:99:31:0,75,1125
+1 886115 . C <NON_REF> . . END=886115 GT:DP:GQ:MIN_DP:PL 0/0:29:57:29:0,57,917
+1 886116 . A <NON_REF> . . END=886173 GT:DP:GQ:MIN_DP:PL 0/0:28:69:24:0,61,774
+1 886174 . C <NON_REF> . . END=886174 GT:DP:GQ:MIN_DP:PL 0/0:31:57:31:0,57,961
+1 886175 . A <NON_REF> . . END=886178 GT:DP:GQ:MIN_DP:PL 0/0:32:87:32:0,83,1029
+1 886179 . CA C,<NON_REF> 1442.73 . DP=29;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,29,0:29:99:1485,99,0,1485,99,1485:0,0,0,0
+1 886181 . C <NON_REF> . . END=886181 GT:DP:GQ:MIN_DP:PL 0/0:29:0:29:0,0,0
+1 886182 . TG T,<NON_REF> 1442.73 . DP=33;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,33,0:33:99:1485,99,0,1485,99,1485:0,0,0,0
+1 886184 . T <NON_REF> . . END=886185 GT:DP:GQ:MIN_DP:PL 0/0:33:0:32:0,0,0
+1 886186 . C CAG,<NON_REF> 1442.73 . DP=33;MLEAC=2,0;MLEAF=1.00,0.00;MQ=60.00;MQ0=0 GT:AD:DP:GQ:PL:SB 1/1:0,33,0:33:99:1485,99,0,1485,99,1485:0,0,0,0
+1 886187 . A <NON_REF> . . END=886187 GT:DP:GQ:MIN_DP:PL 0/0:36:0:36:0,0,0
+1 886188 . G <NON_REF> . . END=886253 GT:DP:GQ:MIN_DP:PL 0/0:36:96:31:0,65,995
+1 886254 . C <NON_REF> . . END=886254 GT:DP:GQ:MIN_DP:PL 0/0:35:42:35:0,42,1068
+1 886255 . A <NON_REF> . . END=886269 GT:DP:GQ:MIN_DP:PL 0/0:32:75:28:0,62,970
+1 886270 . C <NON_REF> . . END=886270 GT:DP:GQ:MIN_DP:PL 0/0:29:54:29:0,54,882
diff --git a/testdata/picard/vcf/mini_gvcf.vcf.idx b/testdata/picard/vcf/mini_gvcf.vcf.idx
new file mode 100644
index 0000000..d8a50c1
Binary files /dev/null and b/testdata/picard/vcf/mini_gvcf.vcf.idx differ
diff --git a/testdata/picard/vcf/test.over.badContig.chain b/testdata/picard/vcf/test.over.badContig.chain
new file mode 100644
index 0000000..99662e8
--- /dev/null
+++ b/testdata/picard/vcf/test.over.badContig.chain
@@ -0,0 +1,2 @@
+chain 540 missingContig 540 + 0 540 missingContig 540 - 0 540 2
+540
diff --git a/testdata/picard/vcf/testLiftoverUsingMissingContig.vcf b/testdata/picard/vcf/testLiftoverUsingMissingContig.vcf
new file mode 100644
index 0000000..a05da7a
--- /dev/null
+++ b/testdata/picard/vcf/testLiftoverUsingMissingContig.vcf
@@ -0,0 +1,4 @@
+##fileformat=VCFv4.1
+#CHROM POS ID REF ALT QUAL FILTER INFO
+chr1 1 . C T 15676.17 PASS .
+missingContig 61 . C G 724.43 PASS .
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/picard-tools.git
More information about the debian-med-commit
mailing list