[med-svn] [picard-tools] 02/08: Imported Upstream version 2.5.0-gradle+dfsg
Vincent Danjean
vdanjean at debian.org
Thu Jul 7 10:00:00 UTC 2016
This is an automated email from the git hooks/post-receive script.
vdanjean pushed a commit to branch master
in repository picard-tools.
commit 9175c40a512a95dcdd1a7a057887325e72cfe5c7
Author: Vincent Danjean <vdanjean at debian.org>
Date: Thu Jul 7 09:18:04 2016 +0200
Imported Upstream version 2.5.0-gradle+dfsg
---
.gitignore | 3 +-
.travis.yml | 14 +-
Picard-public.fbp | 18 -
Picard-public.iml | 54 --
Picard-public.ipr | 351 ----------
README.md | 2 +
build.gradle | 326 +++++++++
build.sbt | 140 ----
build.xml | 631 +----------------
gradle/wrapper/gradle-wrapper.properties | 6 +
gradlew | 164 +++++
.../analysis/directed/TargetedPcrMetrics.java | 148 ----
src/java/picard/sam/RevertSam.java | 414 -----------
src/{ => main}/java/picard/PicardException.java | 0
src/{ => main}/java/picard/Test.java | 0
.../java/picard/analysis/AdapterUtility.java | 4 +-
.../picard/analysis/AlignmentSummaryMetrics.java | 0
.../analysis/AlignmentSummaryMetricsCollector.java | 23 +-
.../analysis/BaseDistributionByCycleMetrics.java | 0
.../java/picard/analysis/ChimeraUtil.java | 0
.../analysis/CollectAlignmentSummaryMetrics.java | 28 +-
.../analysis/CollectBaseDistributionByCycle.java | 33 +-
.../java/picard/analysis/CollectGcBiasMetrics.java | 12 +-
.../picard/analysis/CollectInsertSizeMetrics.java | 16 +-
.../analysis/CollectJumpingLibraryMetrics.java | 62 +-
.../picard/analysis/CollectMultipleMetrics.java | 51 +-
.../java/picard/analysis/CollectOxoGMetrics.java | 10 +-
.../analysis/CollectQualityYieldMetrics.java | 3 +-
.../java/picard/analysis/CollectRawWgsMetrics.java | 14 +-
.../java/picard/analysis/CollectRnaSeqMetrics.java | 47 +-
.../java/picard/analysis/CollectRrbsMetrics.java | 55 +-
.../java/picard/analysis/CollectWgsMetrics.java | 322 +++++----
.../analysis/CollectWgsMetricsFromQuerySorted.java | 3 +
.../CollectWgsMetricsFromSampledSites.java | 8 +-
.../CollectWgsMetricsWithNonZeroCoverage.java | 172 +++++
.../java/picard/analysis/CompareMetrics.java | 0
.../analysis/FingerprintingDetailMetrics.java | 0
.../analysis/FingerprintingSummaryMetrics.java | 0
.../java/picard/analysis/GcBiasDetailMetrics.java | 2 +-
.../picard/analysis/GcBiasMetricsCollector.java | 0
.../java/picard/analysis/GcBiasSummaryMetrics.java | 14 +-
.../java/picard/analysis/GcBiasUtils.java | 0
.../java/picard/analysis/InsertSizeMetrics.java | 0
.../picard/analysis/JumpingLibraryMetrics.java | 10 +-
.../java/picard/analysis/MeanQualityByCycle.java | 3 +-
.../picard/analysis/MetricAccumulationLevel.java | 0
.../picard/analysis/QualityScoreDistribution.java | 16 +-
.../java/picard/analysis/RnaSeqMetrics.java | 44 +-
.../java/picard/analysis/RrbsCpgDetailMetrics.java | 2 +-
.../java/picard/analysis/RrbsMetrics.java | 0
.../java/picard/analysis/RrbsMetricsCollector.java | 0
.../java/picard/analysis/RrbsSummaryMetrics.java | 4 +-
.../java/picard/analysis/SinglePassSamProgram.java | 0
.../picard/analysis/TheoreticalSensitivity.java | 47 +-
.../picard/analysis/artifacts/ArtifactCounter.java | 3 +-
.../CollectSequencingArtifactMetrics.java | 64 +-
.../analysis/artifacts/ContextAccumulator.java | 0
.../artifacts/ConvertSequencingArtifactToOxoG.java | 47 +-
.../artifacts/SequencingArtifactMetrics.java | 0
.../java/picard/analysis/artifacts/Transition.java | 0
.../analysis/directed/CalculateHsMetrics.java | 0
.../picard/analysis/directed/CollectHsMetrics.java | 46 +-
.../analysis/directed/CollectTargetedMetrics.java | 8 +-
.../directed/CollectTargetedPcrMetrics.java | 47 +-
.../analysis/directed/HsMetricCollector.java | 0
.../java/picard/analysis/directed/HsMetrics.java | 63 +-
.../directed/InsertSizeMetricsCollector.java | 8 +-
.../analysis/directed/RnaSeqMetricsCollector.java | 0
.../analysis/directed/TargetMetricsCollector.java | 121 ++--
.../analysis/directed/TargetedPcrMetrics.java | 156 +++++
.../directed/TargetedPcrMetricsCollector.java | 0
.../CollectIndependentReplicateMetrics.java | 529 ++++++++++++++
.../replicates/IndependentReplicateMetric.java | 222 ++++++
.../analysis/replicates/MergeableMetricBase.java | 189 +++++
.../picard/annotation/AnnotationException.java | 0
src/{ => main}/java/picard/annotation/Gene.java | 0
.../picard/annotation/GeneAnnotationReader.java | 0
.../java/picard/annotation/LocusFunction.java | 0
.../java/picard/annotation/RefFlatReader.java | 0
.../java/picard/cmdline/ClassFinder.java | 0
.../java/picard/cmdline/CommandLineDefaults.java | 4 +-
.../picard/cmdline/CommandLineParseException.java | 0
.../java/picard/cmdline/CommandLineParser.java | 17 +-
.../CommandLineParserDefinitionException.java | 0
.../java/picard/cmdline/CommandLineProgram.java | 3 +-
.../picard/cmdline/CommandLineProgramGroup.java | 0
.../cmdline/CommandLineProgramProperties.java | 0
.../picard/cmdline/CreateHtmlDocForProgram.java | 0
.../cmdline/CreateHtmlDocForStandardOptions.java | 0
.../java/picard/cmdline/NestedOptions.java | 0
src/{ => main}/java/picard/cmdline/Option.java | 0
.../java/picard/cmdline/PicardCommandLine.java | 0
.../java/picard/cmdline/PositionalArguments.java | 0
.../picard/cmdline/StandardOptionDefinitions.java | 1 +
.../java/picard/cmdline/programgroups/Alpha.java | 0
.../java/picard/cmdline/programgroups/Fasta.java | 0
.../picard/cmdline/programgroups/Illumina.java | 0
.../picard/cmdline/programgroups/Intervals.java | 0
.../java/picard/cmdline/programgroups/Metrics.java | 0
.../java/picard/cmdline/programgroups/None.java | 0
.../picard/cmdline/programgroups/SamOrBam.java | 0
.../java/picard/cmdline/programgroups/Testing.java | 0
.../picard/cmdline/programgroups/VcfOrBcf.java | 0
src/{ => main}/java/picard/fastq/BamToBfq.java | 0
.../java/picard/fastq/BamToBfqWriter.java | 0
.../java/picard/fastq/Casava18ReadNameEncoder.java | 25 +-
.../java/picard/fastq/IlluminaReadNameEncoder.java | 0
.../java/picard/fastq/ReadNameEncoder.java | 0
.../picard/filter/CountingDuplicateFilter.java | 0
.../java/picard/filter/CountingFilter.java | 0
.../java/picard/filter/CountingMapQFilter.java | 0
.../java/picard/filter/CountingPairedFilter.java | 0
.../java/picard/fingerprint/CheckFingerprint.java | 0
.../CrosscheckReadGroupFingerprints.java | 0
.../java/picard/fingerprint/DiploidGenotype.java | 0
.../java/picard/fingerprint/DiploidHaplotype.java | 0
.../java/picard/fingerprint/Fingerprint.java | 0
.../picard/fingerprint/FingerprintChecker.java | 67 +-
.../picard/fingerprint/FingerprintResults.java | 0
.../java/picard/fingerprint/GenotypeReader.java | 0
.../java/picard/fingerprint/HaplotypeBlock.java | 0
.../java/picard/fingerprint/HaplotypeMap.java | 0
.../picard/fingerprint/HaplotypeProbabilities.java | 0
...otypeProbabilitiesFromContaminatorSequence.java | 0
.../HaplotypeProbabilitiesFromGenotype.java | 28 +-
...lotypeProbabilitiesFromGenotypeLikelihoods.java | 0
.../HaplotypeProbabilitiesFromSequence.java | 0
.../HaplotypeProbabilitiesUsingLogLikelihoods.java | 0
.../HaplotypeProbabilityOfNormalGivenTumor.java | 19 +-
.../java/picard/fingerprint/LocusResult.java | 0
.../java/picard/fingerprint/MatchResults.java | 0
src/{ => main}/java/picard/fingerprint/Snp.java | 0
.../picard/illumina/CheckIlluminaDirectory.java | 38 +-
.../picard/illumina/ClusterDataToSamConverter.java | 93 ++-
.../CollectIlluminaBasecallingMetrics.java | 57 +-
.../illumina/CollectIlluminaLaneMetrics.java | 24 +-
.../picard/illumina/ExtractIlluminaBarcodes.java | 62 +-
.../illumina/IlluminaBasecallingMetrics.java | 0
.../illumina/IlluminaBasecallsConverter.java | 6 +-
.../picard/illumina/IlluminaBasecallsToFastq.java | 60 +-
.../picard/illumina/IlluminaBasecallsToSam.java | 67 +-
.../java/picard/illumina/IlluminaLaneMetrics.java | 0
.../picard/illumina/IlluminaPhasingMetrics.java | 6 +-
.../illumina/LanePhasingMetricsCollector.java | 0
.../java/picard/illumina/MarkIlluminaAdapters.java | 21 +-
.../java/picard/illumina/parser/BarcodeParser.java | 0
.../java/picard/illumina/parser/BclData.java | 0
.../java/picard/illumina/parser/BclParser.java | 0
.../java/picard/illumina/parser/ClusterData.java | 0
.../parser/ClusterIntensityFileReader.java | 0
.../illumina/parser/CycleIlluminaFileMap.java | 0
.../java/picard/illumina/parser/FilterParser.java | 0
.../illumina/parser/FourChannelIntensityData.java | 0
.../java/picard/illumina/parser/IlluminaData.java | 0
.../illumina/parser/IlluminaDataProvider.java | 0
.../parser/IlluminaDataProviderFactory.java | 7 +-
.../picard/illumina/parser/IlluminaDataType.java | 0
.../picard/illumina/parser/IlluminaFileMap.java | 0
.../parser/IlluminaFileNotFoundException.java | 0
.../picard/illumina/parser/IlluminaFileUtil.java | 10 +-
.../illumina/parser/IlluminaMetricsCode.java | 0
.../picard/illumina/parser/IlluminaParser.java | 0
.../illumina/parser/IlluminaTextIterator.java | 0
.../picard/illumina/parser/IntensityChannel.java | 0
.../illumina/parser/MultiTileBclFileUtil.java | 0
.../picard/illumina/parser/MultiTileBclParser.java | 8 +-
.../picard/illumina/parser/MultiTileFileUtil.java | 0
.../illumina/parser/MultiTileFilterParser.java | 0
.../illumina/parser/MultiTileLocsParser.java | 0
.../picard/illumina/parser/MultiTileParser.java | 0
.../java/picard/illumina/parser/OutputMapping.java | 0
.../illumina/parser/ParameterizedFileUtil.java | 0
.../picard/illumina/parser/PerTileCycleParser.java | 16 +-
.../picard/illumina/parser/PerTileFileUtil.java | 0
.../java/picard/illumina/parser/PerTileParser.java | 0
.../illumina/parser/PerTilePerCycleFileUtil.java | 0
.../java/picard/illumina/parser/PosParser.java | 0
.../java/picard/illumina/parser/Range.java | 0
.../java/picard/illumina/parser/ReadData.java | 0
.../picard/illumina/parser/ReadDescriptor.java | 0
.../java/picard/illumina/parser/ReadStructure.java | 0
.../java/picard/illumina/parser/ReadType.java | 0
.../java/picard/illumina/parser/Tile.java | 5 +-
.../java/picard/illumina/parser/TileIndex.java | 0
.../picard/illumina/parser/TileMetricsUtil.java | 0
.../picard/illumina/parser/TilePhasingValue.java | 0
.../picard/illumina/parser/TileTemplateRead.java | 0
.../illumina/parser/fakers/BarcodeFileFaker.java | 0
.../illumina/parser/fakers/BciFileFaker.java | 0
.../illumina/parser/fakers/BclFileFaker.java | 0
.../illumina/parser/fakers/ClocsFileFaker.java | 0
.../picard/illumina/parser/fakers/FileFaker.java | 0
.../illumina/parser/fakers/FilterFileFaker.java | 0
.../illumina/parser/fakers/LocsFileFaker.java | 0
.../parser/fakers/MultiTileBclFileFaker.java | 0
.../parser/fakers/MultiTileLocsFileFaker.java | 0
.../illumina/parser/fakers/PosFileFaker.java | 0
.../AbstractIlluminaPositionFileReader.java | 0
.../illumina/parser/readers/BarcodeFileReader.java | 0
.../illumina/parser/readers/BclIndexReader.java | 0
.../readers/BclQualityEvaluationStrategy.java | 2 +-
.../picard/illumina/parser/readers/BclReader.java | 0
.../illumina/parser/readers/ClocsFileReader.java | 0
.../illumina/parser/readers/FilterFileReader.java | 0
.../illumina/parser/readers/LocsFileReader.java | 0
.../parser/readers/MMapBackedIteratorFactory.java | 0
.../illumina/parser/readers/PosFileReader.java | 0
.../parser/readers/TileMetricsOutReader.java | 0
.../quality/CollectHiSeqXPfFailMetrics.java | 78 ++-
.../java/picard/metrics/GcBiasMetrics.java | 0
.../java/picard/metrics/MultiLevelCollector.java | 0
.../java/picard/metrics/MultilevelMetrics.java | 0
.../picard/metrics/PerUnitMetricCollector.java | 0
.../java/picard/metrics/SAMRecordAndReference.java | 0
.../SAMRecordAndReferenceMultiLevelCollector.java | 0
.../metrics/SAMRecordMultiLevelCollector.java | 0
src/{ => main}/java/picard/pedigree/PedFile.java | 2 +-
src/{ => main}/java/picard/pedigree/Sex.java | 0
.../java/picard/reference/ExtractSequences.java | 0
.../java/picard/reference/NonNFastaSize.java | 0
.../java/picard/reference/NormalizeFasta.java | 0
.../java/picard/sam/AbstractAlignmentMerger.java | 29 +-
.../java/picard/sam/AddCommentsToBam.java | 0
.../java/picard/sam/AddOrReplaceReadGroups.java | 0
src/{ => main}/java/picard/sam/BamIndexStats.java | 0
.../sam/BestEndMapqPrimaryAlignmentStrategy.java | 0
.../BestMapqPrimaryAlignmentSelectionStrategy.java | 0
src/{ => main}/java/picard/sam/BuildBamIndex.java | 0
.../picard/sam/CalculateReadGroupChecksum.java | 0
.../java/picard/sam/CheckTerminatorBlock.java | 0
src/{ => main}/java/picard/sam/CleanSam.java | 0
src/{ => main}/java/picard/sam/CompareSAMs.java | 31 +-
.../java/picard/sam/CreateSequenceDictionary.java | 0
src/{ => main}/java/picard/sam/DownsampleSam.java | 0
.../java/picard/sam/DuplicationMetrics.java | 11 +-
...tFragmentPrimaryAlignmentSelectionStrategy.java | 0
src/{ => main}/java/picard/sam/FastqToSam.java | 0
src/{ => main}/java/picard/sam/FilterSamReads.java | 0
.../java/picard/sam/FixMateInformation.java | 0
src/{ => main}/java/picard/sam/GatherBamFiles.java | 0
src/{ => main}/java/picard/sam/HitsForInsert.java | 0
.../java/picard/sam/MergeBamAlignment.java | 0
src/{ => main}/java/picard/sam/MergeSamFiles.java | 0
...stDistantPrimaryAlignmentSelectionStrategy.java | 0
.../picard/sam/MultiHitAlignedReadIterator.java | 0
.../picard/sam/PositionBasedDownsampleSam.java | 0
.../sam/PrimaryAlignmentSelectionStrategy.java | 0
src/{ => main}/java/picard/sam/ReorderSam.java | 0
.../java/picard/sam/ReplaceSamHeader.java | 0
...RevertOriginalBaseQualitiesAndAddMateCigar.java | 0
src/main/java/picard/sam/RevertSam.java | 758 +++++++++++++++++++++
.../java/picard/sam/SamAlignmentMerger.java | 3 +
.../java/picard/sam/SamFormatConverter.java | 0
src/{ => main}/java/picard/sam/SamToFastq.java | 11 +-
.../java/picard/sam/SetNmAndUqTags.java} | 68 +-
src/{ => main}/java/picard/sam/SortSam.java | 14 +-
.../java/picard/sam/SplitSamByLibrary.java | 0
.../java/picard/sam/ValidateSamFile.java | 29 +-
src/{ => main}/java/picard/sam/ViewSam.java | 0
.../markduplicates/EstimateLibraryComplexity.java | 59 +-
.../picard/sam/markduplicates/MarkDuplicates.java | 217 ++++--
.../MarkDuplicatesWithMateCigar.java | 11 +-
.../MarkDuplicatesWithMateCigarIterator.java | 14 +-
.../AbstractMarkDuplicatesCommandLineProgram.java | 48 +-
...ctOpticalDuplicateFinderCommandLineProgram.java | 0
.../DiskBasedReadEndsForMarkDuplicatesMap.java | 0
.../markduplicates/util/LibraryIdGenerator.java | 12 +-
.../picard/sam/markduplicates/util/MarkQueue.java | 0
.../MemoryBasedReadEndsForMarkDuplicatesMap.java | 0
.../util/OpticalDuplicateFinder.java | 58 +-
.../util/PhysicalLocationForMateCigar.java | 0
.../util/PhysicalLocationForMateCigarSet.java | 0
.../picard/sam/markduplicates/util/ReadEnds.java | 0
.../util/ReadEndsForMarkDuplicates.java | 7 +-
.../util/ReadEndsForMarkDuplicatesCodec.java | 2 +-
.../util/ReadEndsForMarkDuplicatesMap.java | 0
.../ReadEndsForMarkDuplicatesWithBarcodes.java | 12 +
...ReadEndsForMarkDuplicatesWithBarcodesCodec.java | 0
.../markduplicates/util/ReadEndsForMateCigar.java | 0
...amRecordWithOrdinalAndSetDuplicateReadFlag.java | 0
.../java/picard/sam/util/PhysicalLocation.java | 0
.../java/picard/sam/util/PhysicalLocationInt.java | 0
.../picard/sam/util/PhysicalLocationShort.java | 0
.../java/picard/sam/util/ReadNameParser.java | 0
.../java/picard/util/AbstractInputParser.java | 0
src/{ => main}/java/picard/util/AdapterMarker.java | 0
src/{ => main}/java/picard/util/AdapterPair.java | 0
src/{ => main}/java/picard/util/AsyncIterator.java | 0
.../java/picard/util/AtomicIterator.java | 0
src/{ => main}/java/picard/util/BaitDesigner.java | 31 +-
.../java/picard/util/BasicInputParser.java | 0
.../java/picard/util/BedToIntervalList.java | 0
.../java/picard/util/CircularByteBuffer.java | 0
.../java/picard/util/ClippingUtility.java | 6 +-
.../java/picard/util/CsvInputParser.java | 0
.../java/picard/util/DbSnpBitSetUtil.java | 0
.../util/DelimitedTextFileWithHeaderIterator.java | 0
src/{ => main}/java/picard/util/FifoBuffer.java | 0
.../picard/util/FileChannelJDKBugWorkAround.java | 0
src/{ => main}/java/picard/util/IlluminaUtil.java | 0
.../java/picard/util/IntervalListScatterer.java | 0
.../java/picard/util/IntervalListToBed.java | 0
.../java/picard/util/IntervalListTools.java | 8 +-
src/{ => main}/java/picard/util/Iterators.java | 0
.../java/picard/util/LiftOverIntervalList.java | 0
src/{ => main}/java/picard/util/MathUtil.java | 0
src/{ => main}/java/picard/util/MetricsDoclet.java | 39 +-
.../util/QuerySortedReadPairIteratorUtil.java | 0
src/{ => main}/java/picard/util/RExecutor.java | 0
.../java/picard/util/ScatterIntervalsByNs.java | 45 +-
.../java/picard/util/TabbedInputParser.java | 0
.../util/TabbedTextFileWithHeaderParser.java | 9 +
.../java/picard/util/UnsignedTypeUtil.java | 0
src/{ => main}/java/picard/util/VariantType.java | 0
.../vcf/ByIntervalListVariantContextIterator.java | 0
.../java/picard/vcf/CallingMetricAccumulator.java | 0
.../picard/vcf/CollectVariantCallingMetrics.java | 4 +-
src/{ => main}/java/picard/vcf/GA4GHScheme.java | 0
.../picard/vcf/GA4GHSchemeWithMissingAsHomRef.java | 0
src/{ => main}/java/picard/vcf/GatherVcfs.java | 10 +-
.../java/picard/vcf/GenotypeConcordance.java | 238 +++----
.../vcf/GenotypeConcordanceContingencyMetrics.java | 2 +-
.../java/picard/vcf/GenotypeConcordanceCounts.java | 5 +-
.../vcf/GenotypeConcordanceDetailMetrics.java | 0
.../java/picard/vcf/GenotypeConcordanceScheme.java | 0
.../vcf/GenotypeConcordanceSchemeFactory.java | 0
.../picard/vcf/GenotypeConcordanceStateCodes.java | 0
.../java/picard/vcf/GenotypeConcordanceStates.java | 6 +-
.../vcf/GenotypeConcordanceSummaryMetrics.java | 2 +-
.../java/picard/vcf/GvcfMetricAccumulator.java | 0
src/{ => main}/java/picard/vcf/LiftoverVcf.java | 48 +-
.../java/picard/vcf/MakeSitesOnlyVcf.java | 0
src/{ => main}/java/picard/vcf/MergeVcfs.java | 3 +-
.../vcf/PairedVariantSubContextIterator.java | 119 ++++
.../java/picard/vcf/RenameSampleInVcf.java | 0
src/{ => main}/java/picard/vcf/SortVcf.java | 0
src/{ => main}/java/picard/vcf/SplitVcfs.java | 0
.../picard/vcf/UpdateVcfSequenceDictionary.java | 0
.../java/picard/vcf/VcfFormatConverter.java | 0
.../java/picard/vcf/VcfToIntervalList.java | 0
.../picard/vcf/filter/AlleleBalanceFilter.java | 0
.../java/picard/vcf/filter/DepthFilter.java | 0
.../vcf/filter/FilterApplyingVariantIterator.java | 0
.../java/picard/vcf/filter/FilterVcf.java | 0
.../java/picard/vcf/filter/FisherStrandFilter.java | 0
.../java/picard/vcf/filter/GenotypeFilter.java | 0
.../picard/vcf/filter/GenotypeQualityFilter.java | 0
.../java/picard/vcf/filter/QdFilter.java | 0
.../java/picard/vcf/filter/VariantFilter.java | 0
.../vcf/processor/VariantAccumulatorExecutor.java | 0
.../vcf/processor/VariantIteratorProducer.java | 0
.../picard/vcf/processor/VariantProcessor.java | 0
.../java/picard/vcf/processor/VcfFileSegment.java | 0
.../vcf/processor/VcfFileSegmentGenerator.java | 0
.../PredicateFilterDecoratingClosableIterator.java | 0
.../picard/analysis/baseDistributionByCycle.R | 0
.../resources}/picard/analysis/gcBias.R | 0
.../picard/analysis/insertSizeHistogram.R | 0
.../picard/analysis/meanQualityByCycle.R | 0
.../picard/analysis/qualityScoreDistribution.R | 0
.../resources}/picard/analysis/rnaSeqCoverage.R | 0
.../resources}/picard/analysis/rrbsQc.R | 0
src/main/resources/picard/analysis/wgsHistogram.R | 121 ++++
.../resources}/picard/docker_helper.sh | 0
src/main/resources/release_picard.sh | 290 ++++++++
src/scripts/release_picard.sh | 285 --------
.../CollectAlignmentSummaryMetricsTest.java | 0
.../picard/analysis/CollectGcBiasMetricsTest.java | 0
.../analysis/CollectInsertSizeMetricsTest.java | 0
.../analysis/CollectMultipleMetricsTest.java | 0
.../analysis/CollectQualityYieldMetricsTest.java | 0
.../picard/analysis/CollectRnaSeqMetricsTest.java | 0
.../CollectWgsMetricsFromQuerySortedTest.java | 2 +-
.../CollectWgsMetricsFromSampledSitesTest.java | 62 +-
.../picard/analysis/CollectWgsMetricsTest.java | 116 ++++
.../picard/analysis/MultiLevelCollectorTest.java | 0
.../analysis/TheoreticalSensitivityTest.java | 4 +-
.../CollectSequencingArtifactMetricsTest.java | 0
.../analysis/directed/CollectHsMetricsTest.java | 0
.../directed/CollectTargetedMetricsTest.java | 0
.../CollectIndependentReplicatesMetricTest.java | 222 ++++++
.../replicates/MergeableMetricBaseTest.java | 161 +++++
.../java/picard/cmdline/CommandLineParserTest.java | 25 +
.../picard/cmdline/CommandLineProgramTest.java | 0
.../java/picard/cmdline/PicardCommandLineTest.java | 0
.../picard/fingerprint/FingerprintCheckerTest.java | 78 +++
.../java/picard/fingerprint/HaplotypeMapTest.java | 0
.../fingerprint/HaplotypeProbabilitiesTest.java | 0
...HaplotypeProbabilityOfNormalGivenTumorTest.java | 55 ++
.../illumina/CheckIlluminaDirectoryTest.java | 0
.../CollectIlluminaBasecallingMetricsTest.java | 0
.../illumina/ExtractIlluminaBarcodesTest.java | 0
.../illumina/IlluminaBasecallsToFastqTest.java | 0
.../IlluminaBasecallsToSamAdapterClippingTest.java | 0
.../illumina/IlluminaBasecallsToSamTest.java | 126 +++-
.../illumina/IlluminaLaneMetricsCollectorTest.java | 0
.../java/picard/illumina/ReadStructureTest.java | 0
.../java/picard/illumina/parser/BclParserTest.java | 0
.../java/picard/illumina/parser/BinTdUtil.java | 0
.../illumina/parser/CycleIlluminaFileMapTest.java | 0
.../picard/illumina/parser/FilterParserTest.java | 0
.../parser/IlluminaDataProviderFactoryTest.java | 0
.../illumina/parser/IlluminaDataProviderTest.java | 0
.../illumina/parser/IlluminaFileUtilTest.java | 0
.../picard/illumina/parser/PerTileParserTest.java | 0
.../illumina/parser/PerTilePerCycleParserTest.java | 0
.../java/picard/illumina/parser/PosParserTest.java | 0
.../illumina/parser/fakers/BclFileFakerTest.java | 0
.../AbstractIlluminaPositionFileReaderTest.java | 0
.../illumina/parser/readers/BclReaderTest.java | 0
.../parser/readers/ClocsFileReaderTest.java | 0
.../parser/readers/FilterFileReaderTest.java | 0
.../parser/readers/IlluminaFileUtilTest.java | 0
.../parser/readers/LocsFileReaderTest.java | 0
.../readers/MMapBackedIteratorFactoryTest.java | 0
.../illumina/parser/readers/PosFileReaderTest.java | 0
.../picard/metrics/CollectRrbsMetricsTest.java | 0
.../java/picard/pedigree/PedFileTest.java | 0
.../java/picard/reference/NonNFastaSizeTest.java | 0
.../java/picard/sam/AddCommentsToBamTest.java | 0
.../java/picard/sam/CleanSamTest.java | 0
.../java/picard/sam/CompareSAMsTest.java | 4 +
.../picard/sam/CreateSequenceDictionaryTest.java | 0
.../java/picard/sam/FastqToSamTest.java | 0
.../java/picard/sam/FilterSamReadsTest.java | 0
.../java/picard/sam/FixMateInformationTest.java | 0
.../java/picard/sam/GatherBamFilesTest.java | 0
.../java/picard/sam/MergeBamAlignmentTest.java | 0
.../java/picard/sam/MergeSamFilesTest.java | 0
.../picard/sam/PositionBasedDownsampleSamTest.java | 0
src/test/java/picard/sam/RevertSamTest.java | 449 ++++++++++++
.../java/picard/sam/SamFileConverterTest.java | 0
.../java/picard/sam/SamToFastqTest.java | 0
src/test/java/picard/sam/SetNmAndUqTagsTest.java | 70 ++
.../java/picard/sam/SplitSamByLibraryTest.java | 0
.../java/picard/sam/ViewSamTest.java | 0
...stractMarkDuplicatesCommandLineProgramTest.java | 67 +-
...ractMarkDuplicatesCommandLineProgramTester.java | 28 +-
.../markduplicates/AsIsMarkDuplicatesTester.java | 68 ++
.../BySumOfBaseQAndInOriginalOrderMDTester.java} | 19 +-
.../EstimateLibraryComplexityTest.java | 24 +-
.../MarkDuplicateWithMissingBarcodeTest.java | 0
...MarkDuplicateWithMissingReadOneBarcodeTest.java | 0
...MarkDuplicateWithMissingReadTwoBarcodeTest.java | 0
.../MarkDuplicateWithMissingSampleBarcodeTest.java | 0
.../sam/markduplicates/MarkDuplicatesTest.java | 0
.../sam/markduplicates/MarkDuplicatesTester.java | 0
.../MarkDuplicatesWithMateCigarTest.java | 0
.../MarkDuplicatesWithMateCigarTester.java | 0
.../QuerySortedMarkDuplicatesTester.java} | 9 +-
.../SimpleMarkDuplicatesWithMateCigar.java | 35 +-
.../SimpleMarkDuplicatesWithMateCigarTest.java | 0
.../SimpleMarkDuplicatesWithMateCigarTester.java | 0
.../util/OpticalDuplicateFinderTest.java | 3 -
.../java/picard/sam/testers/CleanSamTester.java | 0
.../java/picard/sam/testers/SamFileTester.java | 116 ++--
.../java/picard/sam/testers/ValidateSamTester.java | 0
.../java/picard/sam/util/ReadNameParserTests.java | 0
.../java/picard/util/BedToIntervalListTest.java | 0
.../java/picard/util/ClippingUtilityTest.java | 0
.../DelimitedTextFileWithHeaderIteratorTest.java | 0
.../java/picard/util/FifoBufferTest.java | 0
.../util/FileChannelJDKBugWorkAroundTest.java | 0
.../java/picard/util/IlluminaUtilTest.java | 0
.../picard/util/IntervalListScattererTest.java | 0
.../java/picard/util/IntervalListToBedTest.java | 0
.../java/picard/util/MathUtilTest.java | 0
.../java/picard/util/MergingIteratorTest.java | 0
.../util/QuerySortedReadPairIteratorUtilTest.java | 0
.../java/picard/util/RExecutorTest.java | 0
.../java/picard/util/ScatterIntervalsByNsTest.java | 2 -
.../util/TabbedTextFileWithHeaderParserTest.java | 0
src/{ => test}/java/picard/util/TestNGUtil.java | 0
.../java/picard/util/TextFileParsersTest.java | 0
.../java/picard/util/UnsignedTypeUtilTest.java | 0
.../picard/vcf/AbstractVcfMergingClpTester.java | 0
.../picard/vcf/CallingMetricAccumulatorTest.java | 0
.../vcf/CollectVariantCallingMetricsTest.java | 0
.../vcf/GenotypeConcordanceGA4GHSchemeTest.java | 0
...otypeConcordanceGA4GHSchemeWithMissingTest.java | 0
.../java/picard/vcf/GenotypeConcordanceTest.java | 6 +-
.../java/picard/vcf/LiftoverVcfTest.java | 45 +-
.../java/picard/vcf/MergeVcfsTest.java | 0
.../java/picard/vcf/SortVcfsTest.java | 0
.../java/picard/vcf/SplitVcfsTest.java | 0
.../vcf/UpdateVcfSequenceDictionaryTest.java | 0
.../picard/vcf/VariantContextComparatorTest.java | 0
.../java/picard/vcf/VcfFormatConverterTest.java | 0
.../java/picard/vcf/filter/TestFilterVcf.java | 0
.../vcf/processor/AccumulatorExecutorTest.java | 0
.../picard/vcf/processor/ByWholeContigTest.java | 0
.../java/picard/vcf/processor/ThreadsafeTest.java | 0
.../vcf/processor/VcfFileSegmentGeneratorTest.java | 0
.../vcf/processor/WidthLimitingDecoratorTest.java | 0
src/{tests/scripts => test/resources}/failing.R | 0
src/{tests/scripts => test/resources}/passing.R | 0
src/{tests => test}/resources/testng.xml | 0
.../picard/fingerprint/FingerprintCheckerTest.java | 29 -
...HaplotypeProbabilityOfNormalGivenTumorTest.java | 56 --
src/tests/java/picard/sam/RevertSamTest.java | 180 -----
...nonBarcodedWithTagPerMolecularIndex2M2M2M2M.sam | 182 +++++
.../nonBarcodedWithTagPerMolecularIndex4M4M.sam | 182 +++++
testdata/picard/independent_replicates/aTriple.sam | 22 +
.../independent_replicates/aTripleWithUMIs.sam | 22 +
testdata/picard/independent_replicates/hets.vcf | 14 +
.../picard/independent_replicates/hets_pos20.vcf | 11 +
.../independent_replicates/hets_pos21_HOMREF_G.vcf | 11 +
.../hets_pos22_IncorrectAlleles.vcf | 12 +
.../independent_replicates/multipleContigs.sam | 42 ++
.../independent_replicates/multipleContigs.vcf | 15 +
.../independent_replicates/twoSamplesHet.vcf | 12 +
.../picard/independent_replicates/twopairs.sam | 21 +
.../independent_replicates/twopairsWithBadUMIs.sam | 18 +
.../independent_replicates/twopairsWithUMIs.sam | 18 +
.../twopairsWithUMIsMultipleOrientations.sam | 30 +
.../CompareSAMs/genomic_sorted_same_position.sam | 5 +
.../picard/sam/CompareSAMs/unmapped_second.sam | 2 +-
.../EstimateLibraryComplexity/dupes_with_sos.sam | 20 +
.../sameUnclipped5primeEndCoordinateSortedv1.sam | 28 +
.../sameUnclipped5primeEndCoordinateSortedv2.sam | 28 +
.../sameUnclipped5primeEndCoordinateSortedv3.sam | 28 +
.../sameUnclipped5primeEndCoordinateSortedv4.sam | 28 +
.../MarkDuplicates/sameUnclipped5primeEndv1.sam | 28 +
.../MarkDuplicates/sameUnclipped5primeEndv2.sam | 28 +
testdata/picard/sam/largeIntervals.interval_list | 10 +
.../sam/revert_sam_bad_header_output_map.txt | 3 +
.../sam/revert_sam_positive_test_output_map.txt | 4 +
....sam => revert_sam_sample_library_override.sam} | 0
testdata/picard/sam/revert_sam_single_end.sam | 5 +
.../picard/sam/revert_sam_valid_output_map.txt | 3 +
.../sam/summary_alignment_stats_test_chimeras.sam | 24 +-
testdata/picard/vcf/vcfFormatTest.bad_dict.vcf | 2 +-
532 files changed, 7561 insertions(+), 3528 deletions(-)
diff --git a/.gitignore b/.gitignore
index d0f6515..59a8559 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,4 +13,5 @@ htsjdk
target
report
jacoco.data
-
+.gradle
+build
diff --git a/.travis.yml b/.travis.yml
index e1d4ea6..7ccf32f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,11 +1,15 @@
language: java
+before_cache:
+ - rm -f $HOME/.gradle/caches/modules-2/modules-2.lock
+cache:
+ directories:
+ - $HOME/.gradle/caches/
+ - $HOME/.gradle/wrapper/
+ - $HOME/.m2
jdk:
- oraclejdk8
before_install:
+ - wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
- sudo apt-get -qq update
- sudo apt-get install -y --no-install-recommends r-base-dev r-recommended qpdf
- - sed -i -e 's_git at github.com:_https://github.com/_' build.xml
-script:
- - ant -lib lib/ant clone-htsjdk
- - ant
- - ant test
+script: ./gradlew jacocoTestReport
diff --git a/Picard-public.fbp b/Picard-public.fbp
deleted file mode 100644
index 7ef8e55..0000000
--- a/Picard-public.fbp
+++ /dev/null
@@ -1,18 +0,0 @@
-<Project filename="<<unnamed project>>" projectName="Picard-public">
- <Jar>/Users/alecw/Documents/Picard/Picard-public/classes</Jar>
- <SrcDir>/Users/alecw/Documents/Picard/Picard-public/src/java</SrcDir>
- <SuppressionFilter>
- <LastVersion value="-1" relOp="NEQ"/>
- <Designation designation="NOT_A_BUG"/>
- <And>
- <Priority value="2"/>
- <Bug category="MALICIOUS_CODE"/>
- <Bug code="EI"/>
- </And>
- <And>
- <Priority value="2"/>
- <Bug category="MALICIOUS_CODE"/>
- <Bug code="EI2"/>
- </And>
- </SuppressionFilter>
-</Project>
diff --git a/Picard-public.iml b/Picard-public.iml
deleted file mode 100644
index 478d4c7..0000000
--- a/Picard-public.iml
+++ /dev/null
@@ -1,54 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<module relativePaths="true" type="JAVA_MODULE" version="4">
- <component name="FacetManager">
- <facet type="Python" name="Python">
- <configuration sdkName="" />
- </facet>
- </component>
- <component name="NewModuleRootManager" inherit-compiler-output="false">
- <output url="file://$MODULE_DIR$/intellij.classes" />
- <output-test url="file://$MODULE_DIR$/intellij.testclasses" />
- <exclude-output />
- <content url="file://$MODULE_DIR$">
- <sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
- <sourceFolder url="file://$MODULE_DIR$/src/tests/java" isTestSource="true" />
- <sourceFolder url="file://$MODULE_DIR$/htsjdk/src/java" isTestSource="false" />
- <sourceFolder url="file://$MODULE_DIR$/htsjdk/src/tests/java" isTestSource="true" />
- <excludeFolder url="file://$MODULE_DIR$/.command_tmp" />
- <excludeFolder url="file://$MODULE_DIR$/classes" />
- </content>
- <orderEntry type="jdk" jdkName="1.6" jdkType="JavaSDK" />
- <orderEntry type="sourceFolder" forTests="false" />
- <orderEntry type="module-library">
- <library>
- <CLASSES>
- <root url="file://$MODULE_DIR$/htsjdk/lib" />
- </CLASSES>
- <JAVADOC />
- <SOURCES />
- <jarDirectory url="file://$MODULE_DIR$/htsjdk/lib" recursive="false" />
- </library>
- </orderEntry>
- <orderEntry type="module-library">
- <library>
- <CLASSES>
- <root url="file://$MODULE_DIR$/htsjdk/lib/cofoja" />
- </CLASSES>
- <JAVADOC />
- <SOURCES />
- <jarDirectory url="file://$MODULE_DIR$/htsjdk/lib/cofoja" recursive="false" />
- </library>
- </orderEntry>
- <orderEntry type="module-library">
- <library>
- <CLASSES>
- <root url="file://$MODULE_DIR$/htsjdk/lib/testng" />
- </CLASSES>
- <JAVADOC />
- <SOURCES />
- <jarDirectory url="file://$MODULE_DIR$/htsjdk/lib/testng" recursive="false" />
- </library>
- </orderEntry>
- </component>
-</module>
-
diff --git a/Picard-public.ipr b/Picard-public.ipr
deleted file mode 100644
index 69418df..0000000
--- a/Picard-public.ipr
+++ /dev/null
@@ -1,351 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<project version="4">
- <component name="AntConfiguration">
- <buildFile url="file://$PROJECT_DIR$/build.xml">
- <additionalClassPath>
- <entry dir="file://$PROJECT_DIR$/lib/ant" />
- </additionalClassPath>
- <maximumStackSize value="32" />
- <executeOn event="compositeTask" target="[clean,test]" presentableName="[clean,test]" />
- </buildFile>
- </component>
- <component name="BuildJarProjectSettings">
- <option name="BUILD_JARS_ON_MAKE" value="false" />
- </component>
- <component name="CompilerConfiguration">
- <option name="DEFAULT_COMPILER" value="Javac" />
- <resourceExtensions>
- <entry name=".+\.(properties|xml|html|dtd|tld)" />
- <entry name=".+\.(gif|png|jpeg|jpg)" />
- </resourceExtensions>
- <wildcardResourcePatterns>
- <entry name="?*.properties" />
- <entry name="?*.xml" />
- <entry name="?*.gif" />
- <entry name="?*.png" />
- <entry name="?*.jpeg" />
- <entry name="?*.jpg" />
- <entry name="?*.html" />
- <entry name="?*.dtd" />
- <entry name="?*.tld" />
- <entry name="?*.ftl" />
- </wildcardResourcePatterns>
- <annotationProcessing>
- <profile default="true" name="Default" enabled="false">
- <processorPath useClasspath="true" />
- </profile>
- </annotationProcessing>
- </component>
- <component name="CopyrightManager" default="" />
- <component name="DependencyValidationManager">
- <option name="SKIP_IMPORT_STATEMENTS" value="false" />
- </component>
- <component name="EclipseCompilerSettings">
- <option name="GENERATE_NO_WARNINGS" value="true" />
- <option name="DEPRECATION" value="false" />
- </component>
- <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
- <component name="EntryPointsManager">
- <entry_points version="2.0" />
- </component>
- <component name="InspectionProjectProfileManager">
- <profiles>
- <profile version="1.0" is_locked="false">
- <option name="myName" value="Project Default" />
- <option name="myLocal" value="false" />
- <inspection_tool class="Convert2Diamond" enabled="false" level="WARNING" enabled_by_default="false" />
- <inspection_tool class="FieldMayBeFinal" enabled="true" level="WARNING" enabled_by_default="true" />
- <inspection_tool class="JavaDoc" enabled="false" level="WARNING" enabled_by_default="false">
- <option name="TOP_LEVEL_CLASS_OPTIONS">
- <value>
- <option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
- <option name="REQUIRED_TAGS" value="" />
- </value>
- </option>
- <option name="INNER_CLASS_OPTIONS">
- <value>
- <option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
- <option name="REQUIRED_TAGS" value="" />
- </value>
- </option>
- <option name="METHOD_OPTIONS">
- <value>
- <option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
- <option name="REQUIRED_TAGS" value="@return at param@throws or @exception" />
- </value>
- </option>
- <option name="FIELD_OPTIONS">
- <value>
- <option name="ACCESS_JAVADOC_REQUIRED_FOR" value="none" />
- <option name="REQUIRED_TAGS" value="" />
- </value>
- </option>
- <option name="IGNORE_DEPRECATED" value="false" />
- <option name="IGNORE_JAVADOC_PERIOD" value="true" />
- <option name="IGNORE_DUPLICATED_THROWS" value="false" />
- <option name="IGNORE_POINT_TO_ITSELF" value="false" />
- <option name="myAdditionalJavadocTags" value="" />
- </inspection_tool>
- <inspection_tool class="LocalCanBeFinal" enabled="true" level="WARNING" enabled_by_default="true">
- <option name="REPORT_VARIABLES" value="true" />
- <option name="REPORT_PARAMETERS" value="true" />
- </inspection_tool>
- <inspection_tool class="SqlNoDataSourceInspection" enabled="false" level="WARNING" enabled_by_default="false" />
- <inspection_tool class="UnusedDeclaration" enabled="false" level="WARNING" enabled_by_default="false">
- <option name="ADD_MAINS_TO_ENTRIES" value="true" />
- <option name="ADD_APPLET_TO_ENTRIES" value="true" />
- <option name="ADD_SERVLET_TO_ENTRIES" value="true" />
- <option name="ADD_NONJAVA_TO_ENTRIES" value="true" />
- </inspection_tool>
- <inspection_tool class="groupsTestNG" enabled="true" level="WARNING" enabled_by_default="true">
- <option name="groups">
- <value>
- <list size="1">
- <item index="0" class="java.lang.String" itemvalue="unix" />
- </list>
- </value>
- </option>
- </inspection_tool>
- </profile>
- </profiles>
- <option name="PROJECT_PROFILE" value="Project Default" />
- <option name="USE_PROJECT_PROFILE" value="true" />
- <version value="1.0" />
- <list size="6">
- <item index="0" class="java.lang.String" itemvalue="SERVER PROBLEM" />
- <item index="1" class="java.lang.String" itemvalue="WEAK WARNING" />
- <item index="2" class="java.lang.String" itemvalue="INFO" />
- <item index="3" class="java.lang.String" itemvalue="TYPO" />
- <item index="4" class="java.lang.String" itemvalue="WARNING" />
- <item index="5" class="java.lang.String" itemvalue="ERROR" />
- </list>
- </component>
- <component name="JavadocGenerationManager">
- <option name="OUTPUT_DIRECTORY" />
- <option name="OPTION_SCOPE" value="protected" />
- <option name="OPTION_HIERARCHY" value="true" />
- <option name="OPTION_NAVIGATOR" value="true" />
- <option name="OPTION_INDEX" value="true" />
- <option name="OPTION_SEPARATE_INDEX" value="true" />
- <option name="OPTION_DOCUMENT_TAG_USE" value="false" />
- <option name="OPTION_DOCUMENT_TAG_AUTHOR" value="false" />
- <option name="OPTION_DOCUMENT_TAG_VERSION" value="false" />
- <option name="OPTION_DOCUMENT_TAG_DEPRECATED" value="true" />
- <option name="OPTION_DEPRECATED_LIST" value="true" />
- <option name="OTHER_OPTIONS" value="" />
- <option name="HEAP_SIZE" />
- <option name="LOCALE" />
- <option name="OPEN_IN_BROWSER" value="true" />
- </component>
- <component name="Palette2">
- <group name="Swing">
- <item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.png" removable="false" auto-create-binding="false" can-attach-label="false">
- <default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
- </item>
- <item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.png" removable="false" auto-create-binding="false" can-attach-label="false">
- <default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
- </item>
- <item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.png" removable="false" auto-create-binding="false" can-attach-label="false">
- <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
- </item>
- <item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.png" removable="false" auto-create-binding="false" can-attach-label="true">
- <default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
- </item>
- <item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.png" removable="false" auto-create-binding="true" can-attach-label="false">
- <default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
- <initial-values>
- <property name="text" value="Button" />
- </initial-values>
- </item>
- <item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.png" removable="false" auto-create-binding="true" can-attach-label="false">
- <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
- <initial-values>
- <property name="text" value="RadioButton" />
- </initial-values>
- </item>
- <item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.png" removable="false" auto-create-binding="true" can-attach-label="false">
- <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
- <initial-values>
- <property name="text" value="CheckBox" />
- </initial-values>
- </item>
- <item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.png" removable="false" auto-create-binding="false" can-attach-label="false">
- <default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
- <initial-values>
- <property name="text" value="Label" />
- </initial-values>
- </item>
- <item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.png" removable="false" auto-create-binding="true" can-attach-label="true">
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
- <preferred-size width="150" height="-1" />
- </default-constraints>
- </item>
- <item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.png" removable="false" auto-create-binding="true" can-attach-label="true">
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
- <preferred-size width="150" height="-1" />
- </default-constraints>
- </item>
- <item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.png" removable="false" auto-create-binding="true" can-attach-label="true">
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
- <preferred-size width="150" height="-1" />
- </default-constraints>
- </item>
- <item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.png" removable="false" auto-create-binding="true" can-attach-label="true">
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
- <preferred-size width="150" height="50" />
- </default-constraints>
- </item>
- <item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.png" removable="false" auto-create-binding="true" can-attach-label="true">
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
- <preferred-size width="150" height="50" />
- </default-constraints>
- </item>
- <item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.png" removable="false" auto-create-binding="true" can-attach-label="true">
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
- <preferred-size width="150" height="50" />
- </default-constraints>
- </item>
- <item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.png" removable="false" auto-create-binding="true" can-attach-label="true">
- <default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
- </item>
- <item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.png" removable="false" auto-create-binding="true" can-attach-label="false">
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
- <preferred-size width="150" height="50" />
- </default-constraints>
- </item>
- <item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.png" removable="false" auto-create-binding="true" can-attach-label="false">
- <default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
- <preferred-size width="150" height="50" />
- </default-constraints>
- </item>
- <item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.png" removable="false" auto-create-binding="true" can-attach-label="false">
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
- <preferred-size width="150" height="50" />
- </default-constraints>
- </item>
- <item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.png" removable="false" auto-create-binding="true" can-attach-label="false">
- <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
- <preferred-size width="200" height="200" />
- </default-constraints>
- </item>
- <item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.png" removable="false" auto-create-binding="false" can-attach-label="false">
- <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
- <preferred-size width="200" height="200" />
- </default-constraints>
- </item>
- <item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.png" removable="false" auto-create-binding="true" can-attach-label="true">
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
- </item>
- <item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.png" removable="false" auto-create-binding="true" can-attach-label="false">
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
- </item>
- <item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.png" removable="false" auto-create-binding="false" can-attach-label="false">
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
- </item>
- <item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.png" removable="false" auto-create-binding="true" can-attach-label="false">
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
- </item>
- <item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.png" removable="false" auto-create-binding="false" can-attach-label="false">
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
- <preferred-size width="-1" height="20" />
- </default-constraints>
- </item>
- <item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.png" removable="false" auto-create-binding="false" can-attach-label="false">
- <default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
- </item>
- <item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.png" removable="false" auto-create-binding="true" can-attach-label="false">
- <default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
- </item>
- </group>
- </component>
- <component name="ProjectCodeStyleSettingsManager">
- <option name="PER_PROJECT_SETTINGS">
- <value>
- <option name="GENERATE_FINAL_LOCALS" value="true" />
- <option name="GENERATE_FINAL_PARAMETERS" value="true" />
- <option name="CLASS_COUNT_TO_USE_IMPORT_ON_DEMAND" value="999" />
- <option name="RIGHT_MARGIN" value="140" />
- <option name="JD_DO_NOT_WRAP_ONE_LINE_COMMENTS" value="true" />
- <XML>
- <option name="XML_LEGACY_SETTINGS_IMPORTED" value="true" />
- </XML>
- <codeStyleSettings language="JAVA">
- <option name="KEEP_SIMPLE_METHODS_IN_ONE_LINE" value="true" />
- </codeStyleSettings>
- </value>
- </option>
- <option name="USE_PER_PROJECT_SETTINGS" value="true" />
- </component>
- <component name="ProjectDetails">
- <option name="projectName" value="Picard-public" />
- </component>
- <component name="ProjectDictionaryState">
- <dictionary name="jrose">
- <words>
- <w>ribosomal</w>
- </words>
- </dictionary>
- <dictionary name="mccowan">
- <words>
- <w>bgzipped</w>
- <w>codecs</w>
- <w>demultiplex</w>
- <w>demultiplexed</w>
- <w>eamss</w>
- <w>endian</w>
- <w>gzipped</w>
- <w>illumina's</w>
- <w>indexable</w>
- <w>inferer</w>
- <w>inferrer</w>
- <w>parsability</w>
- <w>phread</w>
- <w>seekable</w>
- <w>tabix</w>
- <w>tokenizes</w>
- <w>tribble</w>
- </words>
- </dictionary>
- </component>
- <component name="ProjectKey">
- <option name="state" value="https://picard.svn.sourceforge.net/svnroot/picard/trunk/Picard-public.ipr" />
- </component>
- <component name="ProjectModuleManager">
- <modules>
- <module fileurl="file://$PROJECT_DIR$/Picard-public.iml" filepath="$PROJECT_DIR$/Picard-public.iml" />
- </modules>
- </component>
- <component name="ProjectResources">
- <default-html-doctype>http://www.w3.org/1999/xhtml</default-html-doctype>
- </component>
- <component name="ProjectRootManager" version="2" languageLevel="JDK_1_6" assert-keyword="true" jdk-15="true" project-jdk-name="1.6" project-jdk-type="JavaSDK">
- <output url="file://$PROJECT_DIR$/out" />
- </component>
- <component name="SvnBranchConfigurationManager">
- <option name="myConfigurationMap">
- <map>
- <entry key="$PROJECT_DIR$">
- <value>
- <SvnBranchConfiguration>
- <option name="branchUrls">
- <list>
- <option value="https://picard.svn.sourceforge.net/svnroot/picard/branches" />
- <option value="https://picard.svn.sourceforge.net/svnroot/picard/tags" />
- </list>
- </option>
- <option name="trunkUrl" value="https://picard.svn.sourceforge.net/svnroot/picard/trunk" />
- </SvnBranchConfiguration>
- </value>
- </entry>
- </map>
- </option>
- <option name="myVersion" value="124" />
- <option name="mySupportsUserInfoFilter" value="true" />
- </component>
- <component name="VcsDirectoryMappings">
- <mapping directory="" vcs="" />
- <mapping directory="$PROJECT_DIR$" vcs="Git" />
- </component>
- <component name="WebServicesPlugin" addRequiredLibraries="true" />
-</project>
-
diff --git a/README.md b/README.md
index 9ef1182..e811d0c 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,8 @@ Enjoy!
java -jar dist/picard.jar
+**NOTE:** Picard expects the *latest tagged release* version of HTSJDK. It is *not* guaranteed to be able to build from older versions of HTSJDK nor from the latest state of the HTSJDK master branch. When you run `ant clone-htsjdk` the first time, Picard will fetch the appropriate tagged version. Subsequently, to update HTSJDK (if for example you run into build issues) you can do so manually by running `git checkout <tag>` within your HTSJDK clone, where `<tag>` is the latest release tag [...]
+
----
diff --git a/build.gradle b/build.gradle
new file mode 100644
index 0000000..6170a0e
--- /dev/null
+++ b/build.gradle
@@ -0,0 +1,326 @@
+import javax.tools.ToolProvider
+import org.ajoberstar.grgit.*
+
+buildscript {
+ repositories {
+ mavenCentral()
+ }
+}
+
+plugins {
+ id "java"
+ id 'maven'
+ id 'signing'
+ id 'jacoco'
+ id 'application'
+ id 'com.palantir.git-version' version '0.5.1'
+ id 'com.github.johnrengelman.shadow' version '1.2.3'
+ id "com.github.kt3k.coveralls" version '2.6.3'
+ id 'org.ajoberstar.grgit' version '1.4.2'
+ id 'org.ajoberstar.github-pages' version '1.4.2'
+}
+
+mainClassName = "picard.cmdline.PicardCommandLine"
+
+repositories {
+ mavenLocal()
+ mavenCentral()
+}
+
+jacocoTestReport {
+ dependsOn test
+ group = "Reporting"
+ description = "Generate Jacoco coverage reports after running tests."
+ additionalSourceDirs = files(sourceSets.main.allJava.srcDirs)
+
+ reports {
+ xml.enabled = true // coveralls plugin depends on xml format report
+ html.enabled = true
+ }
+}
+
+jacoco {
+ toolVersion = "0.7.5.201505241946"
+}
+
+dependencies {
+ compile 'com.google.guava:guava:15.0'
+ compile ('com.github.samtools:htsjdk:2.5.0')
+ //tools dependency for doclet requires sdk devel
+ compile(files(((URLClassLoader) ToolProvider.getSystemToolClassLoader()).getURLs()))
+ testCompile 'org.testng:testng:6.9.10'
+}
+
+sourceCompatibility = 1.8
+targetCompatibility = 1.8
+
+final isRelease = Boolean.getBoolean("release")
+final gitVersion = gitVersion().replaceAll(".dirty", "")
+version = isRelease ? gitVersion : gitVersion + "-SNAPSHOT"
+
+logger.info("build for version:" + version)
+group = 'com.github.broadinstitute'
+
+defaultTasks 'all'
+
+task all(dependsOn: ['jar', 'distZip', 'documentAll'])
+
+jar {
+ manifest {
+ attributes 'Main-Class': 'picard.cmdline.PicardCommandLine',
+ 'Implementation-Title': 'Picard',
+ 'Implementation-Vendor': 'Broad Institute',
+ 'Implementation-Version': version
+ }
+}
+
+import org.gradle.internal.os.OperatingSystem;
+
+// This is a hack to disable the java 8 default javadoc lint until we fix the html formatting
+if (JavaVersion.current().isJava8Compatible()) {
+ tasks.withType(Javadoc) {
+ options.addStringOption('Xdoclint:none', '-quiet')
+ }
+}
+
+tasks.withType(Test) {
+ outputs.upToDateWhen { false } // tests will always rerun
+ description = "Runs the unit tests"
+
+ useTestNG {
+ if (OperatingSystem.current().isUnix()) {
+ excludeGroups "slow", "broken"
+ } else {
+ excludeGroups "slow", "broken", "unix"
+ }
+ }
+
+ // set heap size for the test JVM(s)
+ minHeapSize = "1G"
+ maxHeapSize = "2G"
+ if (System.env.CI == "true") { //if running under a CI output less into the logs
+ int count = 0
+
+ beforeTest { descriptor ->
+ count++
+ if( count % 100 == 0) {
+ logger.lifecycle("Finished "+ Integer.toString(count++) + " tests")
+ }
+ }
+ } else {
+ // show standard out and standard error of the test JVM(s) on the console
+ testLogging.showStandardStreams = true
+ beforeTest { descriptor ->
+ logger.lifecycle("Running Test: " + descriptor)
+ }
+
+ // listen to standard out and standard error of the test JVM(s)
+ onOutput { descriptor, event ->
+ logger.lifecycle("Test: " + descriptor + " produced standard out/err: " + event.message )
+ }
+ }
+
+ testLogging {
+ testLogging {
+ events "skipped", "failed"
+ exceptionFormat = "full"
+ }
+ afterSuite { desc, result ->
+ if (!desc.parent) { // will match the outermost suite
+ println "Results: ${result.resultType} (${result.testCount} tests, ${result.successfulTestCount} successes, ${result.failedTestCount} failures, ${result.skippedTestCount} skipped)"
+ }
+ }
+ }
+}
+
+ext.htmlDir = new File("build/docs/html")
+ext.htmlDirInc = new File(htmlDir, "_includes")
+ext.commandClasses = ["picard.sam.AddCommentsToBam", "picard.sam.AddOrReplaceReadGroups", "picard.util.BaitDesigner", "picard.fastq.BamToBfq",
+ "picard.sam.BamIndexStats", "picard.util.BedToIntervalList", "picard.sam.BuildBamIndex", "picard.analysis.directed.CalculateHsMetrics",
+ "picard.sam.CalculateReadGroupChecksum", "picard.sam.CleanSam", "picard.analysis.CollectAlignmentSummaryMetrics",
+ "picard.analysis.CollectBaseDistributionByCycle", "picard.analysis.CollectGcBiasMetrics", "picard.illumina.quality.CollectHiSeqXPfFailMetrics",
+ "picard.analysis.directed.CollectHsMetrics", "picard.illumina.CollectIlluminaBasecallingMetrics", "picard.illumina.CollectIlluminaLaneMetrics",
+ "picard.analysis.CollectInsertSizeMetrics", "picard.analysis.CollectJumpingLibraryMetrics", "picard.analysis.CollectMultipleMetrics",
+ "picard.analysis.CollectOxoGMetrics", "picard.analysis.CollectQualityYieldMetrics", "picard.analysis.CollectRawWgsMetrics",
+ "picard.analysis.directed.CollectTargetedPcrMetrics", "picard.analysis.CollectRnaSeqMetrics", "picard.analysis.CollectRrbsMetrics",
+ "picard.analysis.artifacts.CollectSequencingArtifactMetrics", "picard.vcf.CollectVariantCallingMetrics", "picard.analysis.CollectWgsMetrics",
+ "picard.analysis.CollectWgsMetricsFromQuerySorted", "picard.analysis.CollectWgsMetricsFromSampledSites",
+ "picard.analysis.CollectWgsMetricsWithNonZeroCoverage", "picard.analysis.CompareMetrics", "picard.sam.CompareSAMs",
+ "picard.analysis.artifacts.ConvertSequencingArtifactToOxoG", "picard.sam.CreateSequenceDictionary", "picard.sam.DownsampleSam",
+ "picard.illumina.ExtractIlluminaBarcodes", "picard.sam.markduplicates.EstimateLibraryComplexity", "picard.sam.FastqToSam", "picard.util.FifoBuffer",
+ "picard.sam.FilterSamReads", "picard.vcf.filter.FilterVcf", "picard.sam.FixMateInformation", "picard.sam.GatherBamFiles", "picard.vcf.GatherVcfs",
+ "picard.vcf.GenotypeConcordance", "picard.illumina.IlluminaBasecallsToFastq", "picard.illumina.IlluminaBasecallsToSam", "picard.illumina.CheckIlluminaDirectory",
+ "picard.sam.CheckTerminatorBlock", "picard.util.IntervalListTools", "picard.util.LiftOverIntervalList", "picard.vcf.LiftoverVcf", "picard.vcf.MakeSitesOnlyVcf",
+ "picard.sam.markduplicates.MarkDuplicates", "picard.sam.markduplicates.MarkDuplicatesWithMateCigar", "picard.analysis.MeanQualityByCycle",
+ "picard.sam.MergeBamAlignment", "picard.sam.MergeSamFiles", "picard.vcf.MergeVcfs", "picard.reference.NormalizeFasta", "picard.sam.PositionBasedDownsampleSam",
+ "picard.reference.ExtractSequences", "picard.analysis.QualityScoreDistribution", "picard.vcf.RenameSampleInVcf", "picard.sam.ReorderSam",
+ "picard.sam.ReplaceSamHeader", "picard.sam.RevertSam", "picard.sam.RevertOriginalBaseQualitiesAndAddMateCigar", "picard.sam.SamFormatConverter",
+ "picard.sam.SamToFastq", "picard.util.ScatterIntervalsByNs", "picard.sam.SortSam", "picard.vcf.SortVcf", "picard.sam.SplitSamByLibrary",
+ "picard.vcf.UpdateVcfSequenceDictionary", "picard.vcf.VcfFormatConverter", "picard.illumina.MarkIlluminaAdapters", "picard.vcf.SplitVcfs",
+ "picard.sam.ValidateSamFile", "picard.sam.ViewSam", "picard.vcf.VcfToIntervalList"]
+
+//generate documentation
+
+task documentAll(dependsOn: ['documentCommands', 'createMetricsDoc', 'documentStandardOptions']){
+ doFirst{
+ htmlDirInc.mkdirs()
+ }
+}
+
+task documentCommands {
+ def previousDocTask = null
+ def usageFile = new File(htmlDirInc, "command-line-usage.html")
+ def sidebarFile = new File(htmlDirInc, "command-line-sidebar.html")
+
+ commandClasses.each { mainClass ->
+ task "document_${mainClass}"(type: JavaExec) {
+ main ='picard.cmdline.CreateHtmlDocForProgram'
+ classpath = sourceSets.main.runtimeClasspath
+ args mainClass
+ def outputFile = new File(htmlDirInc, mainClass.substring(mainClass.lastIndexOf(".") + 1) + ".html")
+ doFirst {
+ htmlDirInc.mkdirs()
+ standardOutput = new FileOutputStream(outputFile)
+ }
+ outputs.file outputFile
+
+ if (previousDocTask != null) delegate.dependsOn previousDocTask
+ previousDocTask = delegate
+ documentCommands.dependsOn(delegate)
+ doLast {
+ usageFile.append("{% include ${mainClass.substring(mainClass.lastIndexOf(".") + 1) + ".html"} %}")
+ usageFile.append(System.getProperty("line.separator"))
+ sidebarFile.append("<li><a href=\"command-line-overview.html#${mainClass.substring(mainClass.lastIndexOf(".") + 1)}\">${mainClass.substring(mainClass.lastIndexOf(".") + 1)}</a>")
+ sidebarFile.append(System.getProperty("line.separator"))
+ }
+ }
+ }
+ outputs.dir htmlDirInc
+}
+
+task documentStandardOptions(type: JavaExec) {
+ main = 'picard.cmdline.CreateHtmlDocForStandardOptions'
+ classpath = sourceSets.main.runtimeClasspath
+ def standardOptionsFile = new File(htmlDirInc, "standard-options.html")
+ doFirst{
+ htmlDirInc.mkdirs()
+ standardOutput = new FileOutputStream(standardOptionsFile)
+ }
+ outputs.file standardOptionsFile
+ }
+
+task createMetricsDoc(dependsOn: classes, type: Javadoc) << {
+ source = sourceSets.main.allJava
+ classpath = sourceSets.main.runtimeClasspath
+ destinationDir = htmlDirInc
+ options.doclet = 'picard.util.MetricsDoclet'
+ options.docletpath = sourceSets.main.runtimeClasspath.asType(List)
+}
+//end generate documentation
+
+task wrapper(type: Wrapper) {
+ description = "Regenerate the gradle wrapper"
+ gradleVersion = '2.13'
+}
+
+task javadocJar(type: Jar, dependsOn: documentAll) {
+ classifier = 'javadoc'
+ from 'build/docs/javadoc'
+}
+
+task sourcesJar(type: Jar) {
+ from sourceSets.main.allSource
+ classifier = 'sources'
+}
+
+/**
+ * This specifies what artifacts will be built and uploaded when performing a maven upload.
+ */
+artifacts {
+ archives jar
+ archives javadocJar
+ archives sourcesJar
+}
+
+/**
+ * Sign non-snapshot releases with our secret key. This should never need to be invoked directly.
+ */
+signing {
+ required { isRelease && gradle.taskGraph.hasTask("uploadArchives") }
+ sign configurations.archives
+}
+
+/**
+ * Upload a release to sonatype. You must be an authorized uploader and have your sonatype
+ * username and password information in your gradle properties file. See the readme for more info.
+ *
+ * For releasing to your local maven repo, use gradle install
+ */
+uploadArchives {
+ repositories {
+ mavenDeployer {
+ beforeDeployment { MavenDeployment deployment -> signing.signPom(deployment) }
+
+ repository(url: "https://oss.sonatype.org/service/local/staging/deploy/maven2/") {
+ authentication(userName: project.findProperty("sonatypeUsername"), password: project.findProperty("sonatypePassword"))
+ }
+
+ snapshotRepository(url: "https://artifactory.broadinstitute.org/artifactory/libs-snapshot-local/") {
+ authentication(userName: System.env.ARTIFACTORY_USERNAME, password: System.env.ARTIFACTORY_PASSWORD)
+ }
+
+ pom.project {
+ name 'Picard'
+ packaging 'jar'
+ description 'A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) data and formats such as SAM/BAM/CRAM and VCF.'
+ url 'http://broadinstitute.github.io/picard/'
+
+ developers {
+ developer {
+ id 'picard'
+ name 'Picard Team'
+ url 'http://broadinstitute.github.io/picard'
+ }
+ }
+
+ scm {
+ url 'git at github.com:broadinstitute/picard.git'
+ connection 'scm:git:git at github.com:broadinstitute/picard.git'
+ }
+
+ licenses {
+ license {
+ name 'MIT License'
+ url 'http://opensource.org/licenses/MIT'
+ distribution 'repo'
+ }
+ }
+ }
+ }
+ }
+ doFirst {
+ System.out.println("Uploading version $version")
+ }
+}
+
+//update static web docs
+task copyJavadoc(dependsOn: 'javadoc', type: Copy) {
+ from 'build/docs/javadoc'
+ into "$htmlDir/javadoc"
+}
+
+task updateGhPages(dependsOn: ['copyJavadoc', 'documentAll']){
+ outputs.dir htmlDir
+}
+
+updateGhPages.finalizedBy publishGhPages
+
+githubPages {
+ repoUri = 'git at github.com:broadinstitute/picard.git'
+ targetBranch = 'gh-pages'
+ deleteExistingFiles = false
+ pages {
+ from htmlDir
+ into '.'
+ }
+}
diff --git a/build.sbt b/build.sbt
deleted file mode 100644
index e66a6e7..0000000
--- a/build.sbt
+++ /dev/null
@@ -1,140 +0,0 @@
-import com.typesafe.sbt.SbtGit._
-import de.johoop.testngplugin.TestNGPlugin._
-import sbt.Package.ManifestAttributes
-
-name := "picard"
-
-version := "2.1.1"
-
-organization := "com.github.broadinstitute"
-
-javaSource in Compile := baseDirectory.value / "src/java"
-
-javaSource in Test := baseDirectory.value / "src/tests"
-
-unmanagedResourceDirectories in Test := Seq(baseDirectory.value / "src/scripts", baseDirectory.value / "testdata", baseDirectory.value / "src/tests/scripts")
-
-libraryDependencies ++= Seq(
- "com.github.samtools" % "htsjdk" % "2.1.1",
- ("com.google.cloud.genomics" % "gatk-tools-java" % "1.1" % "picardopt").
- exclude("org.mortbay.jetty", "servlet-api"),
- "org.testng" % "testng" % "6.8.8" % Test
-)
-
-
-testNGSettings
-
-testNGSuites := Seq("src/tests/resources/testng.xml")
-
-autoScalaLibrary := false
-
-publishMavenStyle := true
-
-publishArtifact in Test := false
-
-pomIncludeRepository := { _ => false }
-
-crossPaths := false
-
-javacOptions in (Compile,doc) ++= Seq("-Xdoclint:none")
-
-versionWithGit
-
-assemblyJarName := s"${name.value}-${version.value}.jar"
-
-val PicardOpt = config("picardopt") extend Compile
-
-val gitVersion = settingKey[String]("The picard head commit git hash.")
-
-gitVersion := git.gitHeadCommit.value.get
-
-unmanagedJars in Compile ~= { uj =>
- Seq(Attributed.blank(file(System.getProperty("java.home").dropRight(3) + "lib/tools.jar"))) ++ uj
-}
-
-test in assembly := {}
-
-packageOptions := Seq(ManifestAttributes(
- ("Implementation-Version", s"${version.value}(${gitVersion.value})"),
- ("Implementation-Vendor", "Broad Institute"),
- ("Main-Class", "picard.cmdline.PicardCommandLine"),
- ("Implementation-Title", "PICARD Tools")
-))
-
-publishTo := {
- val nexus = "https://oss.sonatype.org/"
- if (isSnapshot.value)
- Some("snapshots" at nexus + "content/repositories/snapshots")
- else
- Some("releases" at nexus + "service/local/staging/deploy/maven2")
-}
-
-assemblyMergeStrategy in assembly := {
- case x if Assembly.isConfigFile(x) =>
- MergeStrategy.concat
- case PathList(ps at _*) if (Assembly.isReadme(ps.last) || Assembly.isLicenseFile(ps.last)) =>
- MergeStrategy.rename
- case PathList("META-INF", xs at _*) =>
- xs map {
- _.toLowerCase
- } match {
- case ("manifest.mf" :: Nil) | ("index.list" :: Nil) | ("dependencies" :: Nil) =>
- MergeStrategy.discard
- case ps@(x :: xs) if ps.last.endsWith(".sf") || ps.last.endsWith(".dsa") =>
- MergeStrategy.discard
- case "plexus" :: xs =>
- MergeStrategy.discard
- case "spring.tooling" :: xs =>
- MergeStrategy.discard
- case "services" :: xs =>
- MergeStrategy.filterDistinctLines
- case ("spring.schemas" :: Nil) | ("spring.handlers" :: Nil) =>
- MergeStrategy.filterDistinctLines
- case _ => MergeStrategy.deduplicate
- }
- case "asm-license.txt" | "overview.html" =>
- MergeStrategy.discard
- case _ => MergeStrategy.deduplicate
-}
-
-assemblyExcludedJars in assembly := {
- val cp = (fullClasspath in assembly).value
- cp filter { jar =>
- jar.data.getName == "gatk-tools-java-picard-1.1.jar" || jar.data.getName == "tools.jar"
- }
-}
-
-val root = project.in(file(".")).
- configs(PicardOpt).
- settings(inConfig(PicardOpt)(
- Classpaths.configSettings ++ Defaults.configTasks ++ baseAssemblySettings ++ Seq(
- test in assembly := {},
- assemblyJarName := s"${name.value}-opt-${version.value}.jar",
- assemblyExcludedJars in assembly := {
- val cp = (fullClasspath in assembly).value
- cp filter { jar =>
- jar.data.getName == "guava-15.0.jar" || jar.data.getName == "tools.jar"
- }
- }
- )): _*)
-
-
-pomExtra := <url>http://samtools.github.io/htsjdk/</url>
- <licenses>
- <license>
- <name>MIT License</name>
- <url>http://opensource.org/licenses/MIT</url>
- <distribution>repo</distribution>
- </license>
- </licenses>
- <scm>
- <url>git at github.com:samtools/htsjdk.git</url>
- <connection>scm:git:git at github.com:samtools/htsjdk.git</connection>
- </scm>
- <developers>
- <developer>
- <id>picard</id>
- <name>Picard Team</name>
- <url>http://broadinstitute.github.io/picard/</url>
- </developer>
- </developers>
diff --git a/build.xml b/build.xml
old mode 100755
new mode 100644
index b985cd9..8fc6cfe
--- a/build.xml
+++ b/build.xml
@@ -1,620 +1,37 @@
-<?xml version="1.0"?>
-<!--
- ~ The MIT License
- ~
- ~ Copyright (c) 2009 The Broad Institute
- ~
- ~ Permission is hereby granted, free of charge, to any person obtaining a copy
- ~ of this software and associated documentation files (the "Software"), to deal
- ~ in the Software without restriction, including without limitation the rights
- ~ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- ~ copies of the Software, and to permit persons to whom the Software is
- ~ furnished to do so, subject to the following conditions:
- ~
- ~ The above copyright notice and this permission notice shall be included in
- ~ all copies or substantial portions of the Software.
- ~
- ~ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- ~ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- ~ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- ~ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- ~ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- ~ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- ~ THE SOFTWARE.
- -->
+<project name="picard" basedir="." default="all">
-<project name="picard-public" basedir="." default="all">
- <property name="src" value="src/java"/>
- <property name="src.scripts" value="src/scripts"/>
- <property name="src.test" value="src/tests"/>
- <property name="src.test.java" value="${src.test}/java"/>
- <property name="src.www" value="src/www"/>
- <property name="lib" value="lib"/>
- <property name="dist" value="dist"/>
- <property name="dist.tmp" value="dist/tmp"/>
- <property name="classes" value="classes"/>
- <property name="classes.test" value="testclasses"/>
- <property name="scripts" value="src/scripts"/>
- <property name="reports" value="dist/test"/>
- <property name="test.output" value="dist/test"/>
- <property name="htsjdk_lib_dir" value="${dist}/htsjdk_lib_dir"/>
- <property name="htsjdk_git_url" value="git at github.com:samtools/htsjdk.git"/>
- <!-- TODO: get this from the build.xml in htsjdk -->
- <property name="htsjdk-classes" value="htsjdk/classes"/>
+ <property name="gradle.executable" location="gradlew"/>
- <property name="javac.target" value="1.8"/>
- <property name="javac.debug" value="true"/>
+ <echo>
+ ANT IS DEPRECATED FOR BUILDING Picard
- <!-- Get GIT hash, if available, otherwise leave it blank. -->
- <exec executable="git" outputproperty="repository.revision" failifexecutionfails="true" errorproperty="">
- <arg value="log"/>
- <arg value="-1"/>
- <arg value="--pretty=format:%H_%at"/>
- </exec>
- <property name="repository.revision" value=""/>
- <property name="picard-version" value="2.1.1"/>
- <property name="command-line-html-dir" value="${dist}/html"/>
- <property name="testng.verbosity" value="2"/>
- <property name="test.debug.port" value="5005"/>
- <!-- override on the command line if desired -->
+ Please switch to using gradlew
- <property environment="env"/>
- <property name="htsjdk" value="htsjdk"/>
- <property name="htsjdk_src" value="${htsjdk}/src/java"/>
- <property name="jar_opt" value=".jar_opt"/>
- <condition property="isUnix">
- <os family="unix"/>
- </condition>
+ Examples:
+ compile picard or it's tests
+ ./gradlew compileJava
+ ./gradlew compileTest
- <!-- Import JaCoCo Ant tasks -->
- <taskdef uri="antlib:org.jacoco.ant" resource="org/jacoco/ant/antlib.xml">
- <classpath path="lib/ant/jacocoant.jar" />
- </taskdef>
+ build a jar
+ ./gradlew jar
- <target name="set_excluded_test_groups_unix" if="isUnix">
- <property name="excludedTestGroups" value="slow, broken"/>
- </target>
- <target name="set_excluded_test_groups_non_unix" unless="isUnix">
- <property name="excludedTestGroups" value="slow, broken, unix"/>
- </target>
- <target name="set_excluded_test_groups" depends="set_excluded_test_groups_unix,set_excluded_test_groups_non_unix"/>
+ build a jar, along with source and document jars
+ ./gradlew build
- <!-- INIT -->
- <target name="init">
- <path id="classpath">
- <fileset dir="${htsjdk_lib_dir}">
- <include name="*.jar"/>
- <include name="**/*.jar"/>
- </fileset>
- <!-- for the specific HTSJDK library JARs -->
- <fileset dir="${htsjdk}/lib">
- <include name="*.jar"/>
- <include name="**/*.jar"/>
- </fileset>
- <fileset dir="${lib}">
- <include name="**/*.jar"/>
- </fileset>
- </path>
- <path id="metrics.classpath">
- <pathelement path="${classpath}"/>
- <pathelement location="${classes}"/>
- <pathelement location="${htsjdk}/classes"/>
- </path>
+ build a jar that packages all of htsjdk's dependencies in a single jar
+ ./gradlew shadowJar
- </target>
+ run tests, or a single test, or run a test and wait for the debugger
+ ./gradlew test
+ ./gradlew test --tests "*ParserTest"
+ ./gradlew test --tests "*ParserTest" --debug-jvm
- <!-- CLEAN -->
- <target name="clean-local" description="Delete local build products but not nested project" depends="clean-jar-opt">
- <delete dir="${classes}"/>
- <delete dir="${classes.test}"/>
- <delete dir="${test.output}"/>
- <delete dir="${dist}"/>
- <delete dir="javadoc"/>
- </target>
+ clean the project directory
+ ./gradlew clean
- <target name="clean" description="Clean local build products and also nested project" depends="clean-local, clean-htsjdk"/>
+ see an exhaustive list of all available targets
+ ./gradlew tasks
+ </echo>
- <!-- HTS-JDK -->
- <target name="set-htsjdk-version">
- <!-- set the htsjdk version -->
- <ant antfile="build.xml" dir="${htsjdk}" target="write-version-property" inheritall="false"/>
- <loadfile property="htsjdk-version" srcFile="${htsjdk}/htsjdk.version.properties">
- <filterchain>
- <linecontains>
- <contains value="htsjdk-version="/>
- </linecontains>
- <tokenfilter>
- <replacestring from="htsjdk-version=" to=""/>
- </tokenfilter>
- <striplinebreaks/>
- </filterchain>
- </loadfile>
- </target>
-
- <target name="clone-htsjdk" description="Clone HTS-JDK sources from Sourceforge">
- <exec executable="git" failonerror="true">
- <arg value="clone"/>
- <arg value="${htsjdk_git_url}"/>
- <arg value="${htsjdk}"/>
- </exec>
- </target>
-
- <target name="compile-htsjdk" depends="set-htsjdk-version" description="Build HTS-JDK, and grab build jars">
- <ant antfile="build.xml" dir="${htsjdk}" target="clean" inheritall="false"/>
- <ant antfile="build.xml" dir="${htsjdk}" target="all" inheritall="false"/>
- <copy todir="${htsjdk_lib_dir}">
- <fileset dir="${htsjdk}/dist" includes="**/*-*.jar"/>
- <fileset dir="${htsjdk}/lib" includes="*.jar"/>
- </copy>
- <copy todir="${dist}">
- <fileset dir="${htsjdk}/dist" includes="**/*.jar" excludes="**/*-*.jar"/>
- </copy>
- <!-- set the htsjdk version -->
- <ant antfile="build.xml" dir="${htsjdk}" target="write-version-property" inheritall="false"/>
- </target>
-
- <target name="clean-htsjdk" description="Clean HTS-JDK">
- <ant antfile="build.xml" dir="${htsjdk}" target="clean" inheritall="false"/>
- </target>
-
- <target name="test-htsjdk" description="Test HTS-JDK">
- <ant antfile="build.xml" dir="${htsjdk}" target="test" inheritall="false"/>
- </target>
-
- <target name="compile-htsjdk-tests" description="Compile HTS-JDK Tests">
- <ant antfile="build.xml" dir="${htsjdk}" target="compile-tests" inheritall="false"/>
- </target>
-
- <!-- COMPILE -->
- <target name="compile" depends="compile-src, compile-tests"
- description="Compile files without cleaning">
- </target>
-
- <target name="compile-src" depends="compile-htsjdk, compile-picard" description="Compile files without cleaning"/>
-
- <target name="compile-picard" depends="init" description="Compile picard files without cleaning">
- <compile-src includes="picard/**/*.*" />
- </target>
-
- <target name="compile-tests" depends="compile-htsjdk-tests, compile-picard-tests" description="Compile test files without cleaning"/>
-
- <target name="compile-picard-tests" depends="init" description="Compile picard test files without cleaning">
- <compile-tests includes="picard/**/*.*"/>
- <copy todir="${classes.test}/">
- <fileset dir="${src.test}/scripts" includes="**/*"/>
- </copy>
- </target>
-
- <target name="build-timestamp">
- <tstamp>
- <format property="build.timestamp" pattern="yyyy.MM.dd hh.mm.ss" locale="en,US"/>
- </tstamp>
- <replace dir="${command-line-html-dir}" token="@builddate@" value="${build.timestamp}">
- <include name="**/*.html"/>
- </replace>
- </target>
-
- <!-- TEST -->
- <target name="test" depends="compile, set_excluded_test_groups" description="Run unit tests">
- <taskdef resource="testngtasks" classpathref="classpath"/>
- <jacoco:coverage destfile="jacoco.data" xmlns:jacoco="antlib:org.jacoco.ant">
- <testng suitename="picard-tests" classpathref="classpath" outputdir="${test.output}"
- failureproperty="tests.failed" excludedgroups="${excludedTestGroups}" workingDir="${basedir}"
- verbose="${testng.verbosity}">
- <classpath>
- <pathelement path="${classes}"/>
- <pathelement path="${classes.test}"/>
- <pathelement path="${scripts}"/>
- </classpath>
- <classfileset dir="${classes.test}">
- <include name="**/Test*.class"/>
- <include name="**/*Test.class"/>
- </classfileset>
- <jvmarg value="-Xmx2G"/>
- </testng>
- </jacoco:coverage>
-
- <junitreport todir="${dist}/test">
- <fileset dir="${test.output}">
- <include name="*.xml"/>
- </fileset>
- <report format="noframes" todir="${dist}/test" styledir="etc/test"/>
- </junitreport>
- <copy file="etc/test/testng.css" todir="${dist}/test" overwrite="true"/>
- <fail if="tests.failed" message="There were failed unit tests"/>
- </target>
-
- <target name="test-coverage-report" depends="test" description="Runs tests and creates an HTML code coverage report">
- <jacoco:report xmlns:jacoco="antlib:org.jacoco.ant">
- <executiondata>
- <file file="jacoco.data"/>
- </executiondata>
- <structure name="Picard">
- <classfiles>
- <fileset dir="classes"/>
- </classfiles>
- <sourcefiles encoding="UTF-8">
- <fileset dir="src"/>
- </sourcefiles>
- </structure>
- <html destdir="report"/>
- </jacoco:report>
- </target>
-
- <target name="single-test"
- depends="compile, compile-tests"
- description="Compile and run a single test.">
- <taskdef resource="testngtasks" classpathref="classpath"/>
- <fail unless="name" message="Please provide input test: -Dname=..."/>
-
- <condition property="debug.jvm.args" value="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=${test.debug.port}"
- else="">
- <isset property="test.debug"/>
- </condition>
-
- <testng suitename="picard-single-test" classpathref="classpath" outputdir="${test.output}"
- verbose="${testng.verbosity}">
- <jvmarg line="-Xmx512M ${debug.jvm.args}"/>
- <classpath>
- <pathelement path="${classes}"/>
- <pathelement path="${classes.test}"/>
- <pathelement path="${scripts}"/>
- </classpath>
- <classfileset dir="${classes.test}">
- <include name="**/${name}.class"/>
- </classfileset>
- </testng>
- </target>
-
- <target name="process-external-jars" depends="clean-jar-opt, maybe-add-gatk-tools-java">
- </target>
-
- <target name="clean-jar-opt">
- <delete dir="${jar_opt}"/>
- <mkdir dir="${jar_opt}"/>
- </target>
-
- <target name="maybe-add-gatk-tools-java" if="addGATKToolsJava">
- <mkdir dir="${jar_opt}"/>
- <unzip dest="${jar_opt}">
- <fileset dir="${lib}/gatk-tools-java">
- <include name="*.jar"/>
- </fileset>
- </unzip>
- </target>
-
- <target name="picard-jar" depends="compile, process-external-jars"
- description="Builds the main executable picard.jar">
- <mkdir dir="${dist}"/>
- <mkdir dir="${dist.tmp}"/>
- <unjar dest="${dist.tmp}">
- <fileset dir="${lib}">
- <exclude name="**/jacocoant.jar"/> <!-- must exclude this jar from packing into picard - this is only used for testing -->
- </fileset>
- <fileset dir="${htsjdk_lib_dir}">
- <include name="*.jar"/>
- </fileset>
- </unjar>
-
- <jar destfile="${dist}/picard.jar" compress="no">
- <fileset dir="${classes}" includes="picard/**/*.*, META-INF/**/*"/>
- <fileset dir="${src.scripts}" includes="**/*.R"/>
- <fileset dir="${htsjdk-classes}" includes ="${htsjdk}/*/**/*.*"/>
- <fileset dir="${dist.tmp}" includes="**/*"/>
- <fileset dir="${jar_opt}" includes="**/*"/>
-
- <manifest>
- <attribute name="Implementation-Version" value="${picard-version}(${repository.revision})"/>
- <attribute name="Implementation-Vendor" value="Broad Institute"/>
- <attribute name="Main-Class" value="picard.cmdline.PicardCommandLine"/>
- <attribute name="Implementation-Title" value="PICARD Tools"/>
- <attribute name="HTSJDK-Version" value="${htsjdk-version}"/>
- </manifest>
- </jar>
-
- <delete dir="${dist.tmp}"/>
- </target>
-
- <target name="picard-lib-jar" depends="compile"
- description="Builds the library: picard-lib.jar">
- <mkdir dir="${dist}"/>
- <jar destfile="${dist}/picard-lib.jar" compress="no">
- <fileset dir="${classes}" includes="picard/**/*.*"/>
- <fileset dir="${src.scripts}" includes="**/*.R"/>
- <manifest>
- <attribute name="Implementation-Version" value="${picard-version}(${repository.revision})"/>
- <attribute name="Implementation-Vendor" value="Broad Institute"/>
- <attribute name="Implementation-Title" value="PICARD Tools Library"/>
- </manifest>
- </jar>
- </target>
-
- <target name="javadoc" depends="init" description="Generates the project javadoc.">
- <javadoc
- destdir="javadoc/picard"
- packagenames="picard*"
- windowtitle="PICARD JDK API Documentation"
- doctitle="<h1>PICARD JDK API Documentation</h1>"
- author="true"
- protected="true"
- use="true"
- version="true"
- additionalparam="-Xdoclint:none -notimestamp"
- failonerror="true"
- excludepackagenames="htsjdk*">
- <classpath>
- <pathelement location="${java.home}/../lib/tools.jar"/>
- <fileset dir="${lib}">
- <include name="**/*.jar"/>
- </fileset>
- <fileset dir="${htsjdk}/dist">
- <include name="**/*.jar"/>
- </fileset>
- <fileset dir="${htsjdk}/lib">
- <include name="**/*.jar"/>
- </fileset>
- </classpath>
- <link href="http://java.sun.com/j2se/1.6.0/docs/api/"/>
- <fileset dir="${src}" defaultexcludes="yes">
- <include name="**/*.java"/>
- </fileset>
- </javadoc>
- <javadoc
- destdir="javadoc/htsjdk"
- packagenames="htsjdk*"
- windowtitle="HTSJDK API Documentation"
- doctitle="<h1>HTSJDK API Documentation</h1>"
- author="true"
- protected="true"
- use="true"
- version="true"
- additionalparam="-Xdoclint:none -notimestamp"
- failonerror="true"
- excludepackagenames="picard*">
- <classpath>
- <pathelement location="${java.home}/../lib/tools.jar"/>
- <fileset dir="${lib}">
- <include name="**/*.jar"/>
- </fileset>
- <fileset dir="${htsjdk}/lib">
- <include name="**/*.jar"/>
- </fileset>
- </classpath>
- <link href="http://java.sun.com/j2se/1.6.0/docs/api/"/>
- <fileset dir="${htsjdk_src}" defaultexcludes="yes">
- <include name="**/*.java"/>
- </fileset>
- </javadoc>
- <mkdir dir="${command-line-html-dir}"/>
- <javadoc doclet="picard.util.MetricsDoclet"
- docletpathref="metrics.classpath"
- classpathref="metrics.classpath"
- failonerror="true" verbose="true">
- <classpath>
- <pathelement location="${java.home}/../lib/tools.jar"/>
- <fileset dir="${lib}">
- <include name="**/*.jar"/>
- </fileset>
- <fileset dir="${htsjdk}/dist">
- <include name="**/*.jar"/>
- </fileset>
- </classpath>
- <fileset dir=".">
- <include name="${src}/**/*.java"/>
- <include name="${htsjdk_src}/**/*.java"/>
- </fileset>
- <arg line="-f ${command-line-html-dir}/picard-metric-definitions.html"/>
- </javadoc>
- </target>
-
- <target name="add-ga4gh-support">
- <property name="addGATKToolsJava" value="1"/>
- </target>
-
- <target name="package-commands-ga4gh" depends="add-ga4gh-support, compile, picard-jar" />
-
- <target name="package-commands" depends="compile, picard-jar">
- <delete dir="${command-line-html-dir}"/>
- <!-- If you don't want to generate on-line doc for a command, use package-command instead of document-command -->
- <document-command title="AddCommentsToBam" main-class="picard.sam.AddCommentsToBam"/>
- <document-command title="AddOrReplaceReadGroups" main-class="picard.sam.AddOrReplaceReadGroups"/>
- <document-command title="BaitDesigner" main-class="picard.util.BaitDesigner"/>
- <document-command title="BamToBfq" main-class="picard.fastq.BamToBfq"/>
- <document-command title="BamIndexStats" main-class="picard.sam.BamIndexStats"/>
- <document-command title="BedToIntervalList" main-class="picard.util.BedToIntervalList"/>
- <document-command title="BuildBamIndex" main-class="picard.sam.BuildBamIndex"/>
- <document-command title="CalculateHsMetrics" main-class="picard.analysis.directed.CalculateHsMetrics"/>
- <document-command title="CollectHsMetrics" main-class="picard.analysis.directed.CollectHsMetrics"/>
- <document-command title="CalculateReadGroupChecksum" main-class="picard.sam.CalculateReadGroupChecksum"/>
- <document-command title="CleanSam" main-class="picard.sam.CleanSam"/>
- <document-command title="CollectAlignmentSummaryMetrics" main-class="picard.analysis.CollectAlignmentSummaryMetrics"/>
- <document-command title="CollectBaseDistributionByCycle" main-class="picard.analysis.CollectBaseDistributionByCycle"/>
- <document-command title="CollectGcBiasMetrics" main-class="picard.analysis.CollectGcBiasMetrics"/>
- <document-command title="CollectHiSeqXPfFailMetrics" main-class="picard.illumina.quality.CollectHiSeqXPfFailMetrics"/>
- <document-command title="CollectHsMetrics" main-class="picard.analysis.directed.CollectHsMetrics"/>
- <document-command title="CollectIlluminaBasecallingMetrics" main-class="picard.illumina.CollectIlluminaBasecallingMetrics"/>
- <document-command title="CollectIlluminaLaneMetrics" main-class="picard.illumina.CollectIlluminaLaneMetrics"/>
- <document-command title="CollectInsertSizeMetrics" main-class="picard.analysis.CollectInsertSizeMetrics"/>
- <document-command title="CollectJumpingLibraryMetrics" main-class="picard.analysis.CollectJumpingLibraryMetrics"/>
- <document-command title="CollectMultipleMetrics" main-class="picard.analysis.CollectMultipleMetrics"/>
- <document-command title="CollectOxoGMetrics" main-class="picard.analysis.CollectOxoGMetrics"/>
- <document-command title="CollectQualityYieldMetrics" main-class="picard.analysis.CollectQualityYieldMetrics"/>
- <document-command title="CollectRawWgsMetrics" main-class="picard.analysis.CollectRawWgsMetrics"/>
- <document-command title="CollectTargetedPcrMetrics" main-class="picard.analysis.directed.CollectTargetedPcrMetrics"/>
- <document-command title="CollectRnaSeqMetrics" main-class="picard.analysis.CollectRnaSeqMetrics"/>
- <document-command title="CollectRrbsMetrics" main-class="picard.analysis.CollectRrbsMetrics"/>
- <document-command title="CollectSequencingArtifactMetrics" main-class="picard.analysis.artifacts.CollectSequencingArtifactMetrics"/>
- <document-command title="CollectVariantCallingMetrics" main-class="picard.vcf.CollectVariantCallingMetrics"/>
- <document-command title="CollectWgsMetrics" main-class="picard.analysis.CollectWgsMetrics"/>
- <document-command title="CollectWgsMetricsFromQuerySorted" main-class="picard.analysis.CollectWgsMetricsFromQuerySorted"/>
- <document-command title="CollectWgsMetricsFromSampledSites" main-class="picard.analysis.CollectWgsMetricsFromSampledSites"/>
- <document-command title="CompareMetrics" main-class="picard.analysis.CompareMetrics"/>
- <document-command title="CompareSAMs" main-class="picard.sam.CompareSAMs"/>
- <document-command title="ConvertSequencingArtifactToOxoG" main-class="picard.analysis.artifacts.ConvertSequencingArtifactToOxoG"/>
- <document-command title="CreateSequenceDictionary" main-class="picard.sam.CreateSequenceDictionary"/>
- <document-command title="DownsampleSam" main-class="picard.sam.DownsampleSam"/>
- <document-command title="ExtractIlluminaBarcodes" main-class="picard.illumina.ExtractIlluminaBarcodes"/>
- <document-command title="EstimateLibraryComplexity" main-class="picard.sam.markduplicates.EstimateLibraryComplexity"/>
- <document-command title="FastqToSam" main-class="picard.sam.FastqToSam"/>
- <document-command title="FifoBuffer" main-class="picard.util.FifoBuffer"/>
- <document-command title="FilterSamReads" main-class="picard.sam.FilterSamReads"/>
- <document-command title="FilterVcf" main-class="picard.vcf.filter.FilterVcf"/>
- <document-command title="FixMateInformation" main-class="picard.sam.FixMateInformation"/>
- <document-command title="GatherBamFiles" main-class="picard.sam.GatherBamFiles"/>
- <document-command title="GatherVcfs" main-class="picard.vcf.GatherVcfs"/>
- <document-command title="GenotypeConcordance" main-class="picard.vcf.GenotypeConcordance"/>
- <document-command title="IlluminaBasecallsToFastq" main-class="picard.illumina.IlluminaBasecallsToFastq"/>
- <document-command title="IlluminaBasecallsToSam" main-class="picard.illumina.IlluminaBasecallsToSam"/>
- <document-command title="CheckIlluminaDirectory" main-class="picard.illumina.CheckIlluminaDirectory"/>
- <document-command title="CheckTerminatorBlock" main-class="picard.sam.CheckTerminatorBlock"/>
- <document-command title="IntervalListTools" main-class="picard.util.IntervalListTools"/>
- <document-command title="LiftOverIntervalList" main-class="picard.util.LiftOverIntervalList"/>
- <document-command title="LiftoverVcf" main-class="picard.vcf.LiftoverVcf"/>
- <document-command title="MakeSitesOnlyVcf" main-class="picard.vcf.MakeSitesOnlyVcf"/>
- <document-command title="MarkDuplicates" main-class="picard.sam.markduplicates.MarkDuplicates"/>
- <document-command title="MarkDuplicatesWithMateCigar" main-class="picard.sam.markduplicates.MarkDuplicatesWithMateCigar"/>
- <document-command title="MeanQualityByCycle" main-class="picard.analysis.MeanQualityByCycle"/>
- <document-command title="MergeBamAlignment" main-class="picard.sam.MergeBamAlignment"/>
- <document-command title="MergeSamFiles" main-class="picard.sam.MergeSamFiles"/>
- <document-command title="MergeVcfs" main-class="picard.vcf.MergeVcfs"/>
- <document-command title="NormalizeFasta" main-class="picard.reference.NormalizeFasta"/>
- <document-command title="PositionBasedDownsampleSam" main-class="picard.sam.PositionBasedDownsampleSam"/>
- <document-command title="ExtractSequences" main-class="picard.reference.ExtractSequences"/>
- <document-command title="QualityScoreDistribution" main-class="picard.analysis.QualityScoreDistribution"/>
- <document-command title="RenameSampleInVcf" main-class="picard.vcf.RenameSampleInVcf"/>
- <document-command title="ReorderSam" main-class="picard.sam.ReorderSam"/>
- <document-command title="ReplaceSamHeader" main-class="picard.sam.ReplaceSamHeader"/>
- <document-command title="RevertSam" main-class="picard.sam.RevertSam"/>
- <document-command title="RevertOriginalBaseQualitiesAndAddMateCigar" main-class="picard.sam.RevertOriginalBaseQualitiesAndAddMateCigar"/>
- <document-command title="SamFormatConverter" main-class="picard.sam.SamFormatConverter"/>
- <document-command title="SamToFastq" main-class="picard.sam.SamToFastq"/>
- <document-command title="ScatterIntervalsByNs" main-class="picard.util.ScatterIntervalsByNs"/>
- <document-command title="SortSam" main-class="picard.sam.SortSam"/>
- <document-command title="SortVcf" main-class="picard.vcf.SortVcf"/>
- <document-command title="SplitSamByLibrary" main-class="picard.sam.SplitSamByLibrary"/>
- <document-command title="UpdateVcfSequenceDictionary" main-class="picard.vcf.UpdateVcfSequenceDictionary"/>
- <document-command title="VcfFormatConverter" main-class="picard.vcf.VcfFormatConverter"/>
- <document-command title="MarkIlluminaAdapters" main-class="picard.illumina.MarkIlluminaAdapters"/>
- <document-command title="SplitVcfs" main-class="picard.vcf.SplitVcfs"/>
- <document-command title="ValidateSamFile" main-class="picard.sam.ValidateSamFile"/>
- <document-command title="ViewSam" main-class="picard.sam.ViewSam"/>
- <document-command title="VcfToIntervalList" main-class="picard.vcf.VcfToIntervalList"/>
- </target>
-
- <!-- ALL -->
- <target name="all" depends="compile, picard-lib-jar, package-commands, document-standard-options, build-timestamp"
- description="Default build target">
- <zip zipfile="${dist}/picard-tools-${picard-version}.zip">
- <zipfileset dir="${dist}" includes="*.jar" prefix="picard-tools-${picard-version}"/>
- <zipfileset dir="${htsjdk_lib_dir}" includes="htsjdk-*.jar" prefix="picard-tools-${picard-version}"/>
- <zipfileset dir="${lib}" includes="snappy*.jar"/>
- <!-- distribute libJniDeflater.so in same directory as jarfiles-->
- <zipfileset dir="${htsjdk}/lib/jni" includes="*" prefix="picard-tools-${picard-version}"/>
- </zip>
- </target>
-
- <!-- ************************************************************************************** -->
- <!-- ************************************************************************************** -->
- <!-- Beginning of taskdefs that are used elsewhere in the build file -->
- <!-- ************************************************************************************** -->
- <!-- ************************************************************************************** -->
-
- <target name="document-standard-options" depends="picard-jar">
- <sequential>
- <!-- For Sourceforge project website -->
- <mkdir dir="${command-line-html-dir}/program_usage"/>
-
- <!-- Generate HTML for each command -->
- <java classname="picard.cmdline.CreateHtmlDocForStandardOptions"
- output="${command-line-html-dir}/program_usage/standard-options.html"
- failonerror="true">
- <classpath>
- <path refid="classpath"/>
- <pathelement location="${classes}"/>
- </classpath>
- </java>
- </sequential>
- </target>
-
- <!-- generate HTML documentation for command-line program -->
- <macrodef name="document-command">
- <attribute name="main-class"/>
- <attribute name="title"/>
- <element name="filesets" optional="yes"/>
- <sequential>
- <!-- For Sourceforge project website -->
- <mkdir dir="${command-line-html-dir}/program_usage"/>
- <mkdir dir="${command-line-html-dir}/inc"/>
-
- <!-- Generate HTML for each command -->
- <java classname="picard.cmdline.CreateHtmlDocForProgram"
- output="${command-line-html-dir}/program_usage/@{title}.html"
- failonerror="true">
- <classpath>
- <path refid="classpath"/>
- <pathelement location="${classes}"/>
- </classpath>
- <arg value="@{main-class}"/>
- </java>
-
- <!-- Generate links to each command -->
- <echo file="${command-line-html-dir}/inc/command-line-sidebar.html" append="true"
- message="<li><a href="command-line-overview.html#@{title}">@{title}</a>${line.separator}"/>
-
- <!-- include usage for each command -->
- <echo file="${command-line-html-dir}/inc/command-line-usage.html" append="true"
- message="{% include @{title}.html %}${line.separator}"/>
- </sequential>
- </macrodef>
-
- <!-- Compile source files specified by includes, from source root. Can specifically
- include or exclude-->
- <macrodef name="compile-src">
- <attribute name="includes" default=""/>
- <attribute name="excludes" default=""/>
- <attribute name="destdir" default="${classes}"/>
- <attribute name="compile.classpath" default="classpath"/>
- <attribute name="compiler.args" default=""/>
- <sequential>
- <mkdir dir="${classes}"/>
- <!-- unset the sourcepath attribute in order to compile only files explicitly specified and disable javac's default searching mechanism -->
- <javac destdir="@{destdir}"
- optimize="${javac.opt}"
- debug="${javac.debug}"
- sourcepath=""
- srcdir="${src}"
- includes="@{includes}"
- excludes="@{excludes}"
- source="${javac.target}"
- target="${javac.target}">
- <classpath refid="@{compile.classpath}"/>
- <compilerarg line="@{compiler.args}"/>
- </javac>
- </sequential>
- </macrodef>
-
- <macrodef name="compile-tests">
- <attribute name="includes" default=""/>
- <attribute name="excludes" default=""/>
- <attribute name="compiler.args" default=""/>
-
- <sequential>
- <mkdir dir="${classes.test}"/>
- <javac destdir="${classes.test}"
- optimize="${javac.opt}"
- debug="${javac.debug}"
- srcdir="${src.test.java}"
- includes="@{includes}"
- excludes="@{excludes}"
- source="${javac.target}"
- target="${javac.target}">
- <classpath>
- <path refid="classpath"/>
- <pathelement location="${classes}"/>
- </classpath>
- <compilerarg line="@{compiler.args}"/>
- </javac>
- </sequential>
- </macrodef>
</project>
diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000..aad2b24
--- /dev/null
+++ b/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,6 @@
+#Fri May 13 14:00:35 EDT 2016
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-2.13-bin.zip
diff --git a/gradlew b/gradlew
new file mode 100755
index 0000000..27309d9
--- /dev/null
+++ b/gradlew
@@ -0,0 +1,164 @@
+#!/usr/bin/env bash
+
+##############################################################################
+##
+## Gradle start up script for UN*X
+##
+##############################################################################
+
+# Attempt to set APP_HOME
+# Resolve links: $0 may be a link
+PRG="$0"
+# Need this for relative symlinks.
+while [ -h "$PRG" ] ; do
+ ls=`ls -ld "$PRG"`
+ link=`expr "$ls" : '.*-> \(.*\)$'`
+ if expr "$link" : '/.*' > /dev/null; then
+ PRG="$link"
+ else
+ PRG=`dirname "$PRG"`"/$link"
+ fi
+done
+SAVED="`pwd`"
+cd "`dirname \"$PRG\"`/" >/dev/null
+APP_HOME="`pwd -P`"
+cd "$SAVED" >/dev/null
+
+APP_NAME="Gradle"
+APP_BASE_NAME=`basename "$0"`
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS=""
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD="maximum"
+
+warn ( ) {
+ echo "$*"
+}
+
+die ( ) {
+ echo
+ echo "$*"
+ echo
+ exit 1
+}
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "`uname`" in
+ CYGWIN* )
+ cygwin=true
+ ;;
+ Darwin* )
+ darwin=true
+ ;;
+ MINGW* )
+ msys=true
+ ;;
+ NONSTOP* )
+ nonstop=true
+ ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+ if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+ # IBM's JDK on AIX uses strange locations for the executables
+ JAVACMD="$JAVA_HOME/jre/sh/java"
+ else
+ JAVACMD="$JAVA_HOME/bin/java"
+ fi
+ if [ ! -x "$JAVACMD" ] ; then
+ die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+ fi
+else
+ JAVACMD="java"
+ which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+fi
+
+# Increase the maximum file descriptors if we can.
+if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
+ MAX_FD_LIMIT=`ulimit -H -n`
+ if [ $? -eq 0 ] ; then
+ if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
+ MAX_FD="$MAX_FD_LIMIT"
+ fi
+ ulimit -n $MAX_FD
+ if [ $? -ne 0 ] ; then
+ warn "Could not set maximum file descriptor limit: $MAX_FD"
+ fi
+ else
+ warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
+ fi
+fi
+
+# For Darwin, add options to specify how the application appears in the dock
+if $darwin; then
+ GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
+fi
+
+# For Cygwin, switch paths to Windows format before running java
+if $cygwin ; then
+ APP_HOME=`cygpath --path --mixed "$APP_HOME"`
+ CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
+ JAVACMD=`cygpath --unix "$JAVACMD"`
+
+ # We build the pattern for arguments to be converted via cygpath
+ ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
+ SEP=""
+ for dir in $ROOTDIRSRAW ; do
+ ROOTDIRS="$ROOTDIRS$SEP$dir"
+ SEP="|"
+ done
+ OURCYGPATTERN="(^($ROOTDIRS))"
+ # Add a user-defined pattern to the cygpath arguments
+ if [ "$GRADLE_CYGPATTERN" != "" ] ; then
+ OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
+ fi
+ # Now convert the arguments - kludge to limit ourselves to /bin/sh
+ i=0
+ for arg in "$@" ; do
+ CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
+ CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option
+
+ if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition
+ eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
+ else
+ eval `echo args$i`="\"$arg\""
+ fi
+ i=$((i+1))
+ done
+ case $i in
+ (0) set -- ;;
+ (1) set -- "$args0" ;;
+ (2) set -- "$args0" "$args1" ;;
+ (3) set -- "$args0" "$args1" "$args2" ;;
+ (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
+ (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
+ (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
+ (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
+ (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
+ (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
+ esac
+fi
+
+# Split up the JVM_OPTS And GRADLE_OPTS values into an array, following the shell quoting and substitution rules
+function splitJvmOpts() {
+ JVM_OPTS=("$@")
+}
+eval splitJvmOpts $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS
+JVM_OPTS[${#JVM_OPTS[*]}]="-Dorg.gradle.appname=$APP_BASE_NAME"
+
+exec "$JAVACMD" "${JVM_OPTS[@]}" -classpath "$CLASSPATH" org.gradle.wrapper.GradleWrapperMain "$@"
diff --git a/src/java/picard/analysis/directed/TargetedPcrMetrics.java b/src/java/picard/analysis/directed/TargetedPcrMetrics.java
deleted file mode 100644
index e4e560b..0000000
--- a/src/java/picard/analysis/directed/TargetedPcrMetrics.java
+++ /dev/null
@@ -1,148 +0,0 @@
-package picard.analysis.directed;
-
-import picard.metrics.MultilevelMetrics;
-
-/** Metrics class for targeted pcr runs such as TSCA runs */
-public class TargetedPcrMetrics extends MultilevelMetrics {
-
- /** The name of the amplicon set used in this metrics collection run */
- public String CUSTOM_AMPLICON_SET;
-
- /** The number of bases in the reference genome used for alignment. */
- public long GENOME_SIZE;
-
- /** The number of unique bases covered by the intervals of all amplicons in the amplicon set */
- public long AMPLICON_TERRITORY;
-
- /** The number of unique bases covered by the intervals of all targets that should be covered */
- public long TARGET_TERRITORY;
-
- /** The total number of reads in the SAM or BAM file examine. */
- public long TOTAL_READS;
-
- /** The number of reads that pass the vendor's filter. */
- public long PF_READS;
-
- /** THe number of bases in the SAM or BAM file to be examined */
- public long PF_BASES;
-
- /** The number of PF reads that are not marked as duplicates. */
- public long PF_UNIQUE_READS;
-
- /** PF reads / total reads. The percent of reads passing filter. */
- public double PCT_PF_READS;
-
- /** PF Unique Reads / Total Reads. */
- public double PCT_PF_UQ_READS;
-
- /** The number of PF unique reads that are aligned with mapping score > 0 to the reference genome. */
- public long PF_UQ_READS_ALIGNED;
-
- /** Tracks the number of read pairs that we see that are PF (used to calculate library size) */
- public long PF_SELECTED_PAIRS;
-
- /** Tracks the number of unique PF reads pairs we see (used to calc library size) */
- public long PF_SELECTED_UNIQUE_PAIRS;
-
- /** PF Reads Aligned / PF Reads. */
- public double PCT_PF_UQ_READS_ALIGNED;
-
- /** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */
- public long PF_BASES_ALIGNED;
-
- /** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */
- public long PF_UQ_BASES_ALIGNED;
-
- /** The number of PF aligned amplified that mapped to an amplified region of the genome. */
- public long ON_AMPLICON_BASES;
-
- /** The number of PF aligned bases that mapped to within a fixed interval of an amplified region, but not on a baited region. */
- public long NEAR_AMPLICON_BASES;
-
- /** The number of PF aligned bases that mapped to neither on or near an amplicon. */
- public long OFF_AMPLICON_BASES;
-
- /** The number of PF aligned bases that mapped to a targeted region of the genome. */
- public long ON_TARGET_BASES;
-
- /** The number of PF aligned bases that are mapped in pair to a targeted region of the genome. */
- public long ON_TARGET_FROM_PAIR_BASES;
-
- /** On+Near Amplicon Bases / PF Bases Aligned. */
- public double PCT_AMPLIFIED_BASES;
-
- /** The percentage of aligned PF bases that mapped neither on or near an amplicon. */
- public double PCT_OFF_AMPLICON;
-
- /** The percentage of on+near amplicon bases that are on as opposed to near. */
- public double ON_AMPLICON_VS_SELECTED;
-
- /** The mean coverage of all amplicons in the experiment. */
- public double MEAN_AMPLICON_COVERAGE;
-
- /** The mean coverage of targets. */
- public double MEAN_TARGET_COVERAGE;
-
- /** The median coverage of targets. */
- public double MEDIAN_TARGET_COVERAGE;
-
- /** The fold by which the amplicon region has been amplified above genomic background. */
- public double FOLD_ENRICHMENT;
-
- /** The fraction of targets that did not reach coverage=1 over any base. */
- public double ZERO_CVG_TARGETS_PCT;
-
- /** The fraction of aligned bases that were filtered out because they were in reads marked as duplicates. */
- public double PCT_EXC_DUPE;
-
- /** The fraction of aligned bases that were filtered out because they were in reads with low mapping quality. */
- public double PCT_EXC_MAPQ;
-
- /** The fraction of aligned bases that were filtered out because they were of low base quality. */
- public double PCT_EXC_BASEQ;
-
- /** The fraction of aligned bases that were filtered out because they were the second observation from an insert with overlapping reads. */
- public double PCT_EXC_OVERLAP;
-
- /** The fraction of aligned bases that were filtered out because they did not align over a target base. */
- public double PCT_EXC_OFF_TARGET;
-
- /**
- * The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to
- * the mean coverage level in those targets.
- */
- public double FOLD_80_BASE_PENALTY;
-
- /** The percentage of all target bases achieving 1X or greater coverage. */
- public double PCT_TARGET_BASES_1X;
- /** The percentage of all target bases achieving 2X or greater coverage. */
- public double PCT_TARGET_BASES_2X;
- /** The percentage of all target bases achieving 10X or greater coverage. */
- public double PCT_TARGET_BASES_10X;
- /** The percentage of all target bases achieving 20X or greater coverage. */
- public double PCT_TARGET_BASES_20X;
- /** The percentage of all target bases achieving 30X or greater coverage. */
- public double PCT_TARGET_BASES_30X;
-
- /**
- * A measure of how undercovered <= 50% GC regions are relative to the mean. For each GC bin [0..50]
- * we calculate a = % of target territory, and b = % of aligned reads aligned to these targets.
- * AT DROPOUT is then abs(sum(a-b when a-b < 0)). E.g. if the value is 5% this implies that 5% of total
- * reads that should have mapped to GC<=50% regions mapped elsewhere.
- */
- public double AT_DROPOUT;
-
- /**
- * A measure of how undercovered >= 50% GC regions are relative to the mean. For each GC bin [50..100]
- * we calculate a = % of target territory, and b = % of aligned reads aligned to these targets.
- * GC DROPOUT is then abs(sum(a-b when a-b < 0)). E.g. if the value is 5% this implies that 5% of total
- * reads that should have mapped to GC>=50% regions mapped elsewhere.
- */
- public double GC_DROPOUT;
-
- /** The theoretical HET SNP sensitivity. */
- public double HET_SNP_SENSITIVITY;
-
- /** The Q Score of the theoretical HET SNP sensitivity. */
- public double HET_SNP_Q;
-}
diff --git a/src/java/picard/sam/RevertSam.java b/src/java/picard/sam/RevertSam.java
deleted file mode 100644
index 7e7d72e..0000000
--- a/src/java/picard/sam/RevertSam.java
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-
-package picard.sam;
-
-import htsjdk.samtools.BAMRecordCodec;
-import htsjdk.samtools.SAMFileHeader;
-import htsjdk.samtools.SAMFileHeader.SortOrder;
-import htsjdk.samtools.SAMFileWriter;
-import htsjdk.samtools.SAMFileWriterFactory;
-import htsjdk.samtools.SAMReadGroupRecord;
-import htsjdk.samtools.SAMRecord;
-import htsjdk.samtools.SAMRecordQueryNameComparator;
-import htsjdk.samtools.SAMRecordUtil;
-import htsjdk.samtools.SAMTag;
-import htsjdk.samtools.SamReader;
-import htsjdk.samtools.SamReaderFactory;
-import htsjdk.samtools.filter.FilteringIterator;
-import htsjdk.samtools.filter.SamRecordFilter;
-import htsjdk.samtools.util.CloserUtil;
-import htsjdk.samtools.util.FastqQualityFormat;
-import htsjdk.samtools.util.IOUtil;
-import htsjdk.samtools.util.Log;
-import htsjdk.samtools.util.PeekableIterator;
-import htsjdk.samtools.util.ProgressLogger;
-import htsjdk.samtools.util.QualityEncodingDetector;
-import htsjdk.samtools.util.SolexaQualityConverter;
-import htsjdk.samtools.util.SortingCollection;
-import picard.PicardException;
-import picard.cmdline.CommandLineProgram;
-import picard.cmdline.CommandLineProgramProperties;
-import picard.cmdline.Option;
-import picard.cmdline.StandardOptionDefinitions;
-import picard.cmdline.programgroups.SamOrBam;
-
-import java.io.File;
-import java.text.DecimalFormat;
-import java.text.NumberFormat;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Reverts a SAM file by optionally restoring original quality scores and by removing
- * all alignment information.
- */
- at CommandLineProgramProperties(
- usage = RevertSam.USAGE_SUMMARY + RevertSam.USAGE_DETAILS,
- usageShort = RevertSam.USAGE_SUMMARY,
- programGroup = SamOrBam.class
-)
-public class RevertSam extends CommandLineProgram {
- static final String USAGE_SUMMARY ="Reverts SAM or BAM files to a previous state. ";
- static final String USAGE_DETAILS ="This tool removes or restores certain properties of the SAM records, including alignment " +
- "information, which can be used to produce an unmapped BAM (uBAM) from a previously aligned BAM. It is also capable of " +
- "restoring the original quality scores of a BAM file that has already undergone base quality score recalibration (BQSR) if the" +
- "original qualities were retained." +
- "<h4>Usage example:</h4>" +
- "<pre>" +
- "java -jar picard.jar RevertSam \\<br />" +
- " I=input.bam \\<br />" +
- " O=reverted.bam" +
- "</pre>" +
- "<hr />";
- @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input SAM/BAM file to revert the state of.")
- public File INPUT;
-
- @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output SAM/BAM file to create.")
- public File OUTPUT;
-
- @Option(shortName = "SO", doc = "The sort order to create the reverted output file with.")
- public SortOrder SORT_ORDER = SortOrder.queryname;
-
- @Option(shortName = StandardOptionDefinitions.USE_ORIGINAL_QUALITIES_SHORT_NAME, doc = "True to restore original qualities from the OQ field to the QUAL field if available.")
- public boolean RESTORE_ORIGINAL_QUALITIES = true;
-
- @Option(doc = "Remove duplicate read flags from all reads. Note that if this is true and REMOVE_ALIGNMENT_INFORMATION==false, " +
- " the output may have the unusual but sometimes desirable trait of having unmapped reads that are marked as duplicates.")
- public boolean REMOVE_DUPLICATE_INFORMATION = true;
-
- @Option(doc = "Remove all alignment information from the file.")
- public boolean REMOVE_ALIGNMENT_INFORMATION = true;
-
- @Option(doc = "When removing alignment information, the set of optional tags to remove.")
- public List<String> ATTRIBUTE_TO_CLEAR = new ArrayList<String>() {{
- add(SAMTag.NM.name());
- add(SAMTag.UQ.name());
- add(SAMTag.PG.name());
- add(SAMTag.MD.name());
- add(SAMTag.MQ.name());
- add(SAMTag.SA.name()); // Supplementary alignment metadata
- add(SAMTag.MC.name()); // Mate Cigar
- add(SAMTag.AS.name());
- }};
-
- @Option(doc = "WARNING: This option is potentially destructive. If enabled will discard reads in order to produce " +
- "a consistent output BAM. Reads discarded include (but are not limited to) paired reads with missing " +
- "mates, duplicated records, records with mismatches in length of bases and qualities. This option can " +
- "only be enabled if the output sort order is queryname and will always cause sorting to occur.")
- public boolean SANITIZE = false;
-
- @Option(doc = "If SANITIZE=true and higher than MAX_DISCARD_FRACTION reads are discarded due to sanitization then" +
- "the program will exit with an Exception instead of exiting cleanly. Output BAM will still be valid.")
- public double MAX_DISCARD_FRACTION = 0.01;
-
- @Option(doc = "The sample alias to use in the reverted output file. This will override the existing " +
- "sample alias in the file and is used only if all the read groups in the input file have the " +
- "same sample alias ", shortName = StandardOptionDefinitions.SAMPLE_ALIAS_SHORT_NAME, optional = true)
- public String SAMPLE_ALIAS;
-
- @Option(doc = "The library name to use in the reverted output file. This will override the existing " +
- "sample alias in the file and is used only if all the read groups in the input file have the " +
- "same sample alias ", shortName = StandardOptionDefinitions.LIBRARY_NAME_SHORT_NAME, optional = true)
- public String LIBRARY_NAME;
-
- private final static Log log = Log.getInstance(RevertSam.class);
-
- /** Default main method impl. */
- public static void main(final String[] args) {
- new RevertSam().instanceMainWithExit(args);
- }
-
- /**
- * Enforce that output ordering is queryname when sanitization is turned on since it requires a queryname sort.
- */
- @Override
- protected String[] customCommandLineValidation() {
- if (SANITIZE && SORT_ORDER != SortOrder.queryname) {
- return new String[]{"SORT_ORDER must be queryname when sanitization is enabled with SANITIZE=true."};
- }
-
- return null;
- }
-
- protected int doWork() {
- IOUtil.assertFileIsReadable(INPUT);
- IOUtil.assertFileIsWritable(OUTPUT);
-
- final boolean sanitizing = SANITIZE;
- final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
- final SAMFileHeader inHeader = in.getFileHeader();
-
- // If we are going to override SAMPLE_ALIAS or LIBRARY_NAME, make sure all the read
- // groups have the same values.
- final List<SAMReadGroupRecord> rgs = inHeader.getReadGroups();
- if (SAMPLE_ALIAS != null || LIBRARY_NAME != null) {
- boolean allSampleAliasesIdentical = true;
- boolean allLibraryNamesIdentical = true;
- for (int i = 1; i < rgs.size(); i++) {
- if (!rgs.get(0).getSample().equals(rgs.get(i).getSample())) {
- allSampleAliasesIdentical = false;
- }
- if (!rgs.get(0).getLibrary().equals(rgs.get(i).getLibrary())) {
- allLibraryNamesIdentical = false;
- }
- }
- if (SAMPLE_ALIAS != null && !allSampleAliasesIdentical) {
- throw new PicardException("Read groups have multiple values for sample. " +
- "A value for SAMPLE_ALIAS cannot be supplied.");
- }
- if (LIBRARY_NAME != null && !allLibraryNamesIdentical) {
- throw new PicardException("Read groups have multiple values for library name. " +
- "A value for library name cannot be supplied.");
- }
- }
-
- ////////////////////////////////////////////////////////////////////////////
- // Build the output writer with an appropriate header based on the options
- ////////////////////////////////////////////////////////////////////////////
- final boolean presorted = (inHeader.getSortOrder() == SORT_ORDER) || (SORT_ORDER == SortOrder.queryname && SANITIZE);
- final SAMFileHeader outHeader = new SAMFileHeader();
- for (final SAMReadGroupRecord rg : inHeader.getReadGroups()) {
- if (SAMPLE_ALIAS != null) {
- rg.setSample(SAMPLE_ALIAS);
- }
- if (LIBRARY_NAME != null) {
- rg.setLibrary(LIBRARY_NAME);
- }
- outHeader.addReadGroup(rg);
- }
- outHeader.setSortOrder(SORT_ORDER);
- if (!REMOVE_ALIGNMENT_INFORMATION) {
- outHeader.setSequenceDictionary(inHeader.getSequenceDictionary());
- outHeader.setProgramRecords(inHeader.getProgramRecords());
- }
-
- final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, presorted, OUTPUT);
-
- ////////////////////////////////////////////////////////////////////////////
- // Build a sorting collection to use if we are sanitizing
- ////////////////////////////////////////////////////////////////////////////
- final SortingCollection<SAMRecord> sorter;
- if (sanitizing) {
- sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader), new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM);
- } else {
- sorter = null;
- }
-
- final ProgressLogger progress = new ProgressLogger(log, 1000000, "Reverted");
- for (final SAMRecord rec : in) {
- // Weed out non-primary and supplemental read as we don't want duplicates in the reverted file!
- if (rec.isSecondaryOrSupplementary()) continue;
-
- // log the progress before you revert because otherwise the "last read position" might not be accurate
- progress.record(rec);
-
- // Actually do the reverting of the remaining records
- revertSamRecord(rec);
-
- if (sanitizing) sorter.add(rec);
- else out.addAlignment(rec);
- }
-
- ////////////////////////////////////////////////////////////////////////////
- // Now if we're sanitizing, clean up the records and write them to the output
- ////////////////////////////////////////////////////////////////////////////
- if (!sanitizing) {
- out.close();
- } else {
-
- long total = 0, discarded = 0;
- final PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(sorter.iterator());
- final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat = new HashMap<SAMReadGroupRecord, FastqQualityFormat>();
-
- // Figure out the quality score encoding scheme for each read group.
- for (final SAMReadGroupRecord rg : inHeader.getReadGroups()) {
- final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
- final SamRecordFilter filter = new SamRecordFilter() {
- public boolean filterOut(final SAMRecord rec) {
- return !rec.getReadGroup().getId().equals(rg.getId());
- }
-
- public boolean filterOut(final SAMRecord first, final SAMRecord second) {
- throw new UnsupportedOperationException();
- }
- };
- readGroupToFormat.put(rg, QualityEncodingDetector.detect(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, new FilteringIterator(reader.iterator(), filter), RESTORE_ORIGINAL_QUALITIES));
- CloserUtil.close(reader);
- }
- for (final SAMReadGroupRecord r : readGroupToFormat.keySet()) {
- log.info("Detected quality format for " + r.getReadGroupId() + ": " + readGroupToFormat.get(r));
- }
- if (readGroupToFormat.values().contains(FastqQualityFormat.Solexa)) {
- log.error("No quality score encoding conversion implemented for " + FastqQualityFormat.Solexa);
- return -1;
- }
-
-
- final ProgressLogger sanitizerProgress = new ProgressLogger(log, 1000000, "Sanitized");
-
- readNameLoop:
- while (iterator.hasNext()) {
- final List<SAMRecord> recs = fetchByReadName(iterator);
- total += recs.size();
-
- // Check that all the reads have bases and qualities of the same length
- for (final SAMRecord rec : recs) {
- if (rec.getReadBases().length != rec.getBaseQualities().length) {
- log.debug("Discarding " + recs.size() + " reads with name " + rec.getReadName() + " for mismatching bases and quals length.");
- discarded += recs.size();
- continue readNameLoop;
- }
- }
-
- // Check that if the first read is marked as unpaired that there is in fact only one read
- if (!recs.get(0).getReadPairedFlag() && recs.size() > 1) {
- log.debug("Discarding " + recs.size() + " reads with name " + recs.get(0).getReadName() + " because they claim to be unpaired.");
- discarded += recs.size();
- continue readNameLoop;
- }
-
- // Check that if we have paired reads there is exactly one first of pair and one second of pair
- if (recs.get(0).getReadPairedFlag()) {
- int firsts = 0, seconds = 0, unpaired = 0;
- for (final SAMRecord rec : recs) {
- if (!rec.getReadPairedFlag()) ++unpaired;
- if (rec.getFirstOfPairFlag()) ++firsts;
- if (rec.getSecondOfPairFlag()) ++seconds;
- }
-
- if (unpaired > 0 || firsts != 1 || seconds != 1) {
- log.debug("Discarding " + recs.size() + " reads with name " + recs.get(0).getReadName() + " because pairing information in corrupt.");
- discarded += recs.size();
- continue readNameLoop;
- }
- }
-
- // If we've made it this far spit the records into the output!
- for (final SAMRecord rec : recs) {
- // The only valid quality score encoding scheme is standard; if it's not standard, change it.
- final FastqQualityFormat recordFormat = readGroupToFormat.get(rec.getReadGroup());
- if (!recordFormat.equals(FastqQualityFormat.Standard)) {
- final byte[] quals = rec.getBaseQualities();
- for (int i = 0; i < quals.length; i++) {
- quals[i] -= SolexaQualityConverter.ILLUMINA_TO_PHRED_SUBTRAHEND;
- }
- rec.setBaseQualities(quals);
- }
- out.addAlignment(rec);
- sanitizerProgress.record(rec);
- }
- }
-
- out.close();
-
- final double discardRate = discarded / (double) total;
- final NumberFormat fmt = new DecimalFormat("0.000%");
- log.info("Discarded " + discarded + " out of " + total + " (" + fmt.format(discardRate) + ") reads in order to sanitize output.");
-
- if (discarded / (double) total > MAX_DISCARD_FRACTION) {
- throw new PicardException("Discarded " + fmt.format(discardRate) + " which is above MAX_DISCARD_FRACTION of " + fmt.format(MAX_DISCARD_FRACTION));
- }
- }
-
- CloserUtil.close(in);
- return 0;
- }
-
- /**
- * Generates a list by consuming from the iterator in order starting with the first available
- * read and continuing while subsequent reads share the same read name. If there are no reads
- * remaining returns an empty list.
- */
- private List<SAMRecord> fetchByReadName(final PeekableIterator<SAMRecord> iterator) {
- final List<SAMRecord> out = new LinkedList<SAMRecord>();
-
- if (iterator.hasNext()) {
- final SAMRecord first = iterator.next();
- out.add(first);
-
- while (iterator.hasNext() && iterator.peek().getReadName().equals(first.getReadName())) {
- out.add(iterator.next());
- }
- }
-
- return out;
- }
-
- /**
- * Takes an individual SAMRecord and applies the set of changes/reversions to it that
- * have been requested by program level options.
- */
- public void revertSamRecord(final SAMRecord rec) {
- if (RESTORE_ORIGINAL_QUALITIES) {
- final byte[] oq = rec.getOriginalBaseQualities();
- if (oq != null) {
- rec.setBaseQualities(oq);
- rec.setOriginalBaseQualities(null);
- }
- }
-
- if (REMOVE_DUPLICATE_INFORMATION) {
- rec.setDuplicateReadFlag(false);
- }
-
- if (REMOVE_ALIGNMENT_INFORMATION) {
- if (rec.getReadNegativeStrandFlag()) {
- SAMRecordUtil.reverseComplement(rec);
- rec.setReadNegativeStrandFlag(false);
- }
-
- // Remove all alignment based information about the read itself
- rec.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
- rec.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
- rec.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
- rec.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
-
- if (!rec.getReadUnmappedFlag()) {
- rec.setInferredInsertSize(0);
- rec.setNotPrimaryAlignmentFlag(false);
- rec.setProperPairFlag(false);
- rec.setReadUnmappedFlag(true);
-
- }
-
- // Then remove any mate flags and info related to alignment
- if (rec.getReadPairedFlag()) {
- rec.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
- rec.setMateNegativeStrandFlag(false);
- rec.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
- rec.setMateUnmappedFlag(true);
- }
-
- // And then remove any tags that are calculated from the alignment
- for (final String tag : ATTRIBUTE_TO_CLEAR) {
- rec.setAttribute(tag, null);
- }
- }
- }
-
-}
diff --git a/src/java/picard/PicardException.java b/src/main/java/picard/PicardException.java
similarity index 100%
rename from src/java/picard/PicardException.java
rename to src/main/java/picard/PicardException.java
diff --git a/src/java/picard/Test.java b/src/main/java/picard/Test.java
similarity index 100%
rename from src/java/picard/Test.java
rename to src/main/java/picard/Test.java
diff --git a/src/java/picard/analysis/AdapterUtility.java b/src/main/java/picard/analysis/AdapterUtility.java
similarity index 97%
rename from src/java/picard/analysis/AdapterUtility.java
rename to src/main/java/picard/analysis/AdapterUtility.java
index 9b9135b..b04a650 100644
--- a/src/java/picard/analysis/AdapterUtility.java
+++ b/src/main/java/picard/analysis/AdapterUtility.java
@@ -104,8 +104,8 @@ public class AdapterUtility {
int errors = 0;
for (int i=0; i<adapter.length; ++i) {
- if (read[i] != adapter[i]) {
- if (++errors > MAX_ADAPTER_ERRORS) break;
+ if (read[i] != adapter[i] && ++errors > MAX_ADAPTER_ERRORS) {
+ break;
}
}
diff --git a/src/java/picard/analysis/AlignmentSummaryMetrics.java b/src/main/java/picard/analysis/AlignmentSummaryMetrics.java
similarity index 100%
rename from src/java/picard/analysis/AlignmentSummaryMetrics.java
rename to src/main/java/picard/analysis/AlignmentSummaryMetrics.java
diff --git a/src/java/picard/analysis/AlignmentSummaryMetricsCollector.java b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java
similarity index 95%
rename from src/java/picard/analysis/AlignmentSummaryMetricsCollector.java
rename to src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java
index 43d78cc..6d9507c 100644
--- a/src/java/picard/analysis/AlignmentSummaryMetricsCollector.java
+++ b/src/main/java/picard/analysis/AlignmentSummaryMetricsCollector.java
@@ -205,7 +205,7 @@ public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMulti
//Calculate BAD_CYCLES
metrics.BAD_CYCLES = 0;
- for (final Histogram<Integer>.Bin cycleBin : badCycleHistogram.values()) {
+ for (final Histogram.Bin<Integer> cycleBin : badCycleHistogram.values()) {
final double badCyclePercentage = cycleBin.getValue() / metrics.TOTAL_READS;
if (badCyclePercentage >= .8) {
metrics.BAD_CYCLES++;
@@ -306,17 +306,16 @@ public class AlignmentSummaryMetricsCollector extends SAMRecordAndReferenceMulti
final int readBaseIndex = readIndex + i;
boolean mismatch = !SequenceUtil.basesEqual(readBases[readBaseIndex], refBases[refIndex+i]);
boolean bisulfiteBase = false;
- if (mismatch && isBisulfiteSequenced) {
- if ( (record.getReadNegativeStrandFlag() &&
- (refBases[refIndex+i] == 'G' || refBases[refIndex+i] =='g') &&
- (readBases[readBaseIndex] == 'A' || readBases[readBaseIndex] == 'a'))
- || ((!record.getReadNegativeStrandFlag()) &&
- (refBases[refIndex+i] == 'C' || refBases[refIndex+i] == 'c') &&
- (readBases[readBaseIndex] == 'T') || readBases[readBaseIndex] == 't') ) {
-
- bisulfiteBase = true;
- mismatch = false;
- }
+ if (mismatch && isBisulfiteSequenced &&
+ record.getReadNegativeStrandFlag() &&
+ (refBases[refIndex + i] == 'G' || refBases[refIndex + i] == 'g') &&
+ (readBases[readBaseIndex] == 'A' || readBases[readBaseIndex] == 'a')
+ || ((!record.getReadNegativeStrandFlag()) &&
+ (refBases[refIndex + i] == 'C' || refBases[refIndex + i] == 'c') &&
+ (readBases[readBaseIndex] == 'T') || readBases[readBaseIndex] == 't')) {
+
+ bisulfiteBase = true;
+ mismatch = false;
}
if(mismatch) mismatchCount++;
diff --git a/src/java/picard/analysis/BaseDistributionByCycleMetrics.java b/src/main/java/picard/analysis/BaseDistributionByCycleMetrics.java
similarity index 100%
rename from src/java/picard/analysis/BaseDistributionByCycleMetrics.java
rename to src/main/java/picard/analysis/BaseDistributionByCycleMetrics.java
diff --git a/src/java/picard/analysis/ChimeraUtil.java b/src/main/java/picard/analysis/ChimeraUtil.java
similarity index 100%
rename from src/java/picard/analysis/ChimeraUtil.java
rename to src/main/java/picard/analysis/ChimeraUtil.java
diff --git a/src/java/picard/analysis/CollectAlignmentSummaryMetrics.java b/src/main/java/picard/analysis/CollectAlignmentSummaryMetrics.java
similarity index 84%
rename from src/java/picard/analysis/CollectAlignmentSummaryMetrics.java
rename to src/main/java/picard/analysis/CollectAlignmentSummaryMetrics.java
index db41884..fec087d 100644
--- a/src/java/picard/analysis/CollectAlignmentSummaryMetrics.java
+++ b/src/main/java/picard/analysis/CollectAlignmentSummaryMetrics.java
@@ -78,10 +78,14 @@ import java.util.Set;
programGroup = Metrics.class
)
public class CollectAlignmentSummaryMetrics extends SinglePassSamProgram {
- static final String USAGE_SUMMARY = "Produce a summary of alignment metrics from a SAM or BAM file";
- static final String USAGE_DETAILS = "Using read outputs from high throughput sequencing (HTS) technologies, this tool provides " +
- "metrics regarding the quality of read alignments to a reference sequence, as well as the proportion of the reads " +
- "that passed machine signal-to-noise threshold quality filters (Illumina)."+
+ static final String USAGE_SUMMARY = "<b>Produces a summary of alignment metrics from a SAM or BAM file.</b> ";
+ static final String USAGE_DETAILS = "This tool takes a SAM/BAM file input and produces metrics detailing the quality of the read " +
+ "alignments as well as the proportion of the reads that passed machine signal-to-noise threshold quality filters. " +
+ "Note that these quality filters are specific to Illumina data; for additional information, please see the corresponding " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>GATK Dictionary entry</a>. </p>" +
+ "" +
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
+
"<h4>Usage example:</h4>" +
"<pre>" +
" java -jar picard.jar CollectAlignmentSummaryMetrics \\<br />" +
@@ -89,12 +93,12 @@ public class CollectAlignmentSummaryMetrics extends SinglePassSamProgram {
" I=input.bam \\<br />" +
" O=output.txt" +
"</pre>"+
- "Please see <a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#AlignmentSummaryMetrics'>" +
- "the AlignmentSummaryMetrics documentation</a> for detailed explanations of each metric. <br /> <br />" +
- "Additional information about Illumina's quality filters can be found in the following documents on the Illumina website: " +
- "<ul><li>http://support.illumina.com/content/dam/illumina-marketing/documents/products/technotes/hiseq-x-percent-pf-technical-note-770-2014-043.pdf</li> " +
- "<li>http://support.illumina.com/content/dam/illumina-support/documents/documentation/system_documentation/hiseqx/hiseq-x-system-guide-15050091-d.pdf</li></ul>" +
+
+ "<p>Please see the CollectAlignmentSummaryMetrics " +
+ "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#AlignmentSummaryMetrics'>definitions</a> " +
+ "for a complete description of the metrics produced by this tool.</p>" +
"<hr />";
+
private static final Log log = Log.getInstance(CollectAlignmentSummaryMetrics.class);
@Option(doc="Paired-end reads above this insert size will be considered chimeric along with inter-chromosomal pairs.")
@@ -103,13 +107,13 @@ public class CollectAlignmentSummaryMetrics extends SinglePassSamProgram {
@Option(doc="Paired-end reads that do not have this expected orientation will be considered chimeric.")
public Set<PairOrientation> EXPECTED_PAIR_ORIENTATIONS = EnumSet.copyOf(ChimeraUtil.DEFAULT_EXPECTED_ORIENTATIONS);
- @Option(doc="List of adapter sequences to use when processing the alignment metrics")
+ @Option(doc="List of adapter sequences to use when processing the alignment metrics.")
public List<String> ADAPTER_SEQUENCE = AdapterUtility.DEFAULT_ADAPTER_SEQUENCE;
- @Option(shortName="LEVEL", doc="The level(s) at which to accumulate metrics. ")
+ @Option(shortName="LEVEL", doc="The level(s) at which to accumulate metrics.")
public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);
- @Option(shortName="BS", doc="Whether the SAM or BAM file consists of bisulfite sequenced reads. ")
+ @Option(shortName="BS", doc="Whether the SAM or BAM file consists of bisulfite sequenced reads.")
public boolean IS_BISULFITE_SEQUENCED = false;
//overridden to make it visible on the commandline and to change the doc.
diff --git a/src/java/picard/analysis/CollectBaseDistributionByCycle.java b/src/main/java/picard/analysis/CollectBaseDistributionByCycle.java
similarity index 95%
rename from src/java/picard/analysis/CollectBaseDistributionByCycle.java
rename to src/main/java/picard/analysis/CollectBaseDistributionByCycle.java
index 7a5af91..f60109b 100644
--- a/src/java/picard/analysis/CollectBaseDistributionByCycle.java
+++ b/src/main/java/picard/analysis/CollectBaseDistributionByCycle.java
@@ -56,30 +56,33 @@ public class CollectBaseDistributionByCycle extends SinglePassSamProgram {
"in order to enable assessment of systematic errors at specific positions in the reads.<br /><br />" +
"" +
"<h4>Interpretation notes</h4>" +
- "Increased numbers of miscalled bases will be reflected in base distribution changes and increases in the number of Ns. " +
- "In general, we expect that for any given cycle, or position within reads, the relative proportions of A, T, C and G " +
- "should reflect the AT:GC content of the organism's genome. Thus, for all four nucleotides, flattish lines would be " +
- "expected. Deviations from this expectation, for example a spike of A at a particular cycle (position within reads), " +
+ "Increased numbers of miscalled bases will be reflected in base distribution changes and increases in the number of Ns. "+
+ "In general, we expect that for any given cycle, or position within reads, the relative proportions of A, T, C and G "+
+ "should reflect the AT:GC content of the organism's genome. Thus, for all four nucleotides, flattish lines would be "+
+ "expected. Deviations from this expectation, for example a spike of A at a particular cycle (position within reads), "+
"would suggest a systematic sequencing error."+
"" +
"<h4>Note on quality trimming</h4>" +
- "In the past, many sequencing data processing workflows included discarding the low-quality tails of reads by applying " +
- "hard-clipping at some arbitrary base quality threshold value. This is no longer useful because most sophisticated " +
- "analysis tools (such as the GATK variant discovery tools) are quality-aware, meaning that they are able to take base " +
- "quality into account when weighing evidence provided by sequencing reads. Unnecessary clipping may interfere with other " +
- "quality control evaluations and may lower the quality of analysis results. For example, trimming reduces the " +
- "effectiveness of the Base Recalibration (BQSR) pre-processing step of the " +
- "<a href='https://www.broadinstitute.org/gatk/guide/best-practices'>GATK Best Practices for Variant Discovery</a>, " +
- "which aims to correct some types of systematic biases that affect the accuracy of base quality scores. " +
- "" +
- "<br /><h4>Usage example:</h4>" +
+ "In the past, many sequencing data processing workflows included discarding the low-quality tails of reads by applying "+
+ "hard-clipping at some arbitrary base quality threshold value. This is no longer useful because most sophisticated "+
+ "analysis tools (such as the GATK variant discovery tools) are quality-aware, meaning that they are able to take base "+
+ "quality into account when weighing evidence provided by sequencing reads. Unnecessary clipping may interfere with other "+
+ "quality control evaluations and may lower the quality of analysis results. For example, trimming reduces the effectiveness "+
+ "of the Base Recalibration (BQSR) pre-processing step of the "+
+ "<a href='https://www.broadinstitute.org/gatk/guide/best-practices'>GATK Best Practices for Variant Discovery</a>, "+
+ "which aims to correct some types of systematic biases that affect the accuracy of base quality scores."+
+
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>"+
+
+ "<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar CollectBaseDistributionByCycle \\<br />" +
" CHART=collect_base_dist_by_cycle.pdf \\<br />" +
" I=input.bam \\<br />" +
" O=output.txt" +
"</pre>" +
- "<hr />";
+ "<hr />"
+ ;
@Option(shortName = "CHART", doc = "A file (with .pdf extension) to write the chart to.")
public File CHART_OUTPUT;
diff --git a/src/java/picard/analysis/CollectGcBiasMetrics.java b/src/main/java/picard/analysis/CollectGcBiasMetrics.java
similarity index 95%
rename from src/java/picard/analysis/CollectGcBiasMetrics.java
rename to src/main/java/picard/analysis/CollectGcBiasMetrics.java
index 46008bb..a0ca80e 100644
--- a/src/java/picard/analysis/CollectGcBiasMetrics.java
+++ b/src/main/java/picard/analysis/CollectGcBiasMetrics.java
@@ -61,13 +61,13 @@ public class CollectGcBiasMetrics extends SinglePassSamProgram {
static final String USAGE_SUMMARY = "Collect metrics regarding GC bias. ";
static final String USAGE_DETAILS = "This tool collects information about the relative proportions of guanine (G) and cytosine (C)" +
" nucleotides in a sample. Regions of high and low G + C content have been shown to interfere with mapping/aligning," +
- " ultimately leading to fragmented genome assemblies and poor coverage in a phenomenon known as \"GC bias\". " +
+ " ultimately leading to fragmented genome assemblies and poor coverage in a phenomenon known as 'GC bias'. " +
"Detailed information on the effects of GC bias on the collection and analysis of sequencing data can be found at " +
"DOI: 10.1371/journal.pone.0062856/.<br /><br />" +
"" +
- "The GC bias statistics are always output in a detailed long-form version, but a summary can also be produced. Both the " +
- "detailed metrics and the summary metrics are output as tables (\".txt\" files) and an accompanying chart that plots the " +
- "data (\".pdf\" file). <br /><br /> " +
+ "<p>The GC bias statistics are always output in a detailed long-form version, but a summary can also be produced. Both the " +
+ "detailed metrics and the summary metrics are output as tables '.txt' files) and an accompanying chart that plots the " +
+ "data ('.pdf' file). </p> " +
"" +
"<h4>Detailed metrics</h4>" +
"The table of detailed metrics includes GC percentages for each bin (GC), the percentage of WINDOWS corresponding to each " +
@@ -87,7 +87,7 @@ public class CollectGcBiasMetrics extends SinglePassSamProgram {
"produced in a run. In addition, the tool produces both AT_DROPOUT and GC_DROPOUT metrics, which indicate the percentage of " +
"misaligned reads that correlate with low (%-GC is < 50%) or high (%-GC is > 50%) GC content respectively. <br /><br />" +
"" +
- "The percentage of \"coverage\" or depth in a GC bin is calculated by dividing the number of reads of a particular GC content " +
+ "The percentage of 'coverage' or depth in a GC bin is calculated by dividing the number of reads of a particular GC content " +
"by the mean number of reads of all GC bins. A number of 1 represents mean coverage, a number less than 1 represents lower " +
"than mean coverage (e.g. 0.5 means half as much coverage as average) while a number greater than 1 represents higher than " +
"mean coverage (e.g. 3.1 means this GC bin has 3.1 times more reads per window than average). " +
@@ -97,7 +97,7 @@ public class CollectGcBiasMetrics extends SinglePassSamProgram {
"" +
"The chart output associated with this data table plots the NORMALIZED_COVERAGE, the distribution of WINDOWs corresponding " +
"to GC percentages, and base qualities corresponding to each %GC bin."+
- "" +
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
"<h4>Usage Example:</h4>"+
"<pre>" +
"java -jar picard.jar CollectGcBiasMetrics \\<br />"+
diff --git a/src/java/picard/analysis/CollectInsertSizeMetrics.java b/src/main/java/picard/analysis/CollectInsertSizeMetrics.java
similarity index 93%
rename from src/java/picard/analysis/CollectInsertSizeMetrics.java
rename to src/main/java/picard/analysis/CollectInsertSizeMetrics.java
index 4dccc0c..dffea1e 100644
--- a/src/java/picard/analysis/CollectInsertSizeMetrics.java
+++ b/src/main/java/picard/analysis/CollectInsertSizeMetrics.java
@@ -54,16 +54,18 @@ import java.util.Set;
)
public class CollectInsertSizeMetrics extends SinglePassSamProgram {
static final String USAGE_BRIEF = "Collect metrics about the insert size distribution of a paired-end library.";
- static final String USAGE_SUMMARY = "This tool provides useful metrics for validating library construction including " +
- "the insert size distribution and read orientation of paired-end libraries. <br /><br />" +
+ static final String USAGE_SUMMARY = "<p>This tool provides useful metrics for validating library construction including " +
+ "the insert size distribution and read orientation of paired-end libraries.</p>" +
"" +
"The expected proportions of these metrics vary depending on the type of library preparation used, resulting from " +
- "technical differences between pair-end libraries and mate-pair libraries. For a brief primer on paired-end sequencing and mate-pair reads, see <a href='http://gatkforums.broadinstitute.org/discussion/6327/paired-end-mate-pair'>the GATK Dictionary</a>" +
+ "technical differences between pair-end libraries and mate-pair libraries. For a brief primer on paired-end sequencing " +
+ "and mate-pair reads, see the " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6327'>GATK Dictionary</a>." +
"" +
- "<br /><br />The CollectInsertSizeMetrics tool outputs the percentages of read pairs in each of the three orientations " +
+ "<p>The CollectInsertSizeMetrics tool outputs the percentages of read pairs in each of the three orientations " +
"(FR, RF, and TANDEM) as a histogram. In addition, the insert size distribution is output as both a histogram " +
- "(.insert_size_Histogram.pdf) and as a data table (.insert_size_metrics.txt)." +
- ""+
+ "(.insert_size_Histogram.pdf) and as a data table (.insert_size_metrics.txt).</p>" +
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>"+
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar CollectInsertSizeMetrics \\<br />" +
@@ -75,7 +77,7 @@ public class CollectInsertSizeMetrics extends SinglePassSamProgram {
"Note: If processing a small file, set the minimum percentage option (M) to 0.5, otherwise an error may occur. "+
"<br /><br />" +
"Please see <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#InsertSizeMetrics'>" +
- "the InsertSizeMetrics documentation</a> for further explanations of each metric." +
+ "InsertSizeMetrics</a> for detailed explanations of each metric." +
"<hr />";
private static final Log log = Log.getInstance(CollectInsertSizeMetrics.class);
diff --git a/src/java/picard/analysis/CollectJumpingLibraryMetrics.java b/src/main/java/picard/analysis/CollectJumpingLibraryMetrics.java
similarity index 80%
rename from src/java/picard/analysis/CollectJumpingLibraryMetrics.java
rename to src/main/java/picard/analysis/CollectJumpingLibraryMetrics.java
index 2b7a295..f719632 100644
--- a/src/java/picard/analysis/CollectJumpingLibraryMetrics.java
+++ b/src/main/java/picard/analysis/CollectJumpingLibraryMetrics.java
@@ -61,26 +61,28 @@ import java.util.List;
)
public class CollectJumpingLibraryMetrics extends CommandLineProgram {
static final String USAGE_SUMMARY = "Collect jumping library metrics. ";
- static final String USAGE_DETAILS = "This tool collects high-level metrics about the " +
- "presence of outward-facing (jumping) and inward-facing (non-jumping) read pairs within a SAM or BAM file.<br /><br />" +
- "For a brief primer on jumping libraries, see <a href='http://gatkforums.broadinstitute.org/discussion/6326/jumping-libraries'>" +
- "the GATK Dictionary</a>." +
- "<br /><br />." +
- "This program gets all data for computation from the first read in each pair in which the mapping quality (MQ) tag " +
- "is set with the mate's mapping quality. If the MQ tag is not set, then the program assumes that the mate's MQ is " +
- "greater than or equal to MINIMUM_MAPPING_QUALITY (default value is 0).<br /><br /> "+
- "All the output metrics files are structured text files." +
- "<br /><br />" +
- "<h4>Usage example:</h4>" +
- "<pre>" +
- "java -jar picard.jar CollectJumpingLibraryMetrics \\<br />" +
- " I=input.bam \\<br />" +
- " O=jumping_metrics.txt" +
- "</pre>" +
- "<hr />" +
- "" +
- "Please see <a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#JumpingLibraryMetrics'>" +
- "the JumpingLibraryMetrics documentation</a> for details and explanations of the output metrics.";
+ static final String USAGE_DETAILS = "<p>This tool collects high-level metrics about the " +
+"presence of outward-facing (jumping) and inward-facing (non-jumping) read pairs within a SAM or BAM file." +
+"For a brief primer on jumping libraries, see the GATK "+
+"<a href='https://www.broadinstitute.org/gatk/guide/article?id=6326'>Dictionary</a></p>." +
+
+"<p>This program gets all data for computation from the first read in each pair in which the mapping quality (MQ) tag " +
+"is set with the mate's mapping quality. If the MQ tag is not set, then the program assumes that the mate's MQ is " +
+"greater than or equal to MINIMUM_MAPPING_QUALITY (default value is 0).</p> "+
+
+"<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
+
+"<h4>Usage example:</h4>" +
+"<pre>" +
+"java -jar picard.jar CollectJumpingLibraryMetrics \\<br />" +
+" I=input.bam \\<br />" +
+" O=jumping_metrics.txt" +
+"</pre>" +
+
+"Please see the output metrics documentation on "+
+"<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#JumpingLibraryMetrics'>JumpingLibraryMetrics</a> "+
+"for detailed explanations of the output metrics."+
+"<hr />";
// Usage and parameters
@@ -247,19 +249,15 @@ public class CollectJumpingLibraryMetrics extends CommandLineProgram {
break;
} else if (sam.getReadUnmappedFlag() || sam.getMateUnmappedFlag()) {
continue;
- } else {
- if ((sam.getAttribute(SAMTag.MQ.name()) == null ||
- sam.getIntegerAttribute(SAMTag.MQ.name()) >= MINIMUM_MAPPING_QUALITY) &&
- sam.getMappingQuality() >= MINIMUM_MAPPING_QUALITY &&
- sam.getMateNegativeStrandFlag() != sam.getReadNegativeStrandFlag() &&
- sam.getMateReferenceIndex().equals(sam.getReferenceIndex())) {
- if (SamPairUtil.getPairOrientation(sam) == PairOrientation.RF) {
- histo.increment(Math.abs(sam.getInferredInsertSize()));
- sampled++;
- }
- }
+ } else if ((sam.getAttribute(SAMTag.MQ.name()) == null ||
+ sam.getIntegerAttribute(SAMTag.MQ.name()) >= MINIMUM_MAPPING_QUALITY) &&
+ sam.getMappingQuality() >= MINIMUM_MAPPING_QUALITY &&
+ sam.getMateNegativeStrandFlag() != sam.getReadNegativeStrandFlag() &&
+ sam.getMateReferenceIndex().equals(sam.getReferenceIndex()) &&
+ SamPairUtil.getPairOrientation(sam) == PairOrientation.RF) {
+ histo.increment(Math.abs(sam.getInferredInsertSize()));
+ sampled++;
}
-
}
CloserUtil.close(reader);
}
diff --git a/src/java/picard/analysis/CollectMultipleMetrics.java b/src/main/java/picard/analysis/CollectMultipleMetrics.java
similarity index 89%
rename from src/java/picard/analysis/CollectMultipleMetrics.java
rename to src/main/java/picard/analysis/CollectMultipleMetrics.java
index ab7fd11..4b53813 100644
--- a/src/java/picard/analysis/CollectMultipleMetrics.java
+++ b/src/main/java/picard/analysis/CollectMultipleMetrics.java
@@ -58,16 +58,18 @@ public class CollectMultipleMetrics extends CommandLineProgram {
*/
static final String USAGE_SUMMARY ="Collect multiple classes of metrics. ";
- static final String USAGE_DETAILS ="This \"meta-metrics\" tool runs one or more of the metrics collection modules at the same time to cut down " +
- "on the time spent reading in data from input files. Available modules include CollectAlignmentSummaryMetrics, " +
- "CollectInsertSizeMetrics, QualityScoreDistribution, MeanQualityByCycle, and CollectBaseDistributionByCycle. " +
- "The tool produces outputs of \".pdf\" and \".txt\" files for each module, except for the CollectAlignmentSummaryMetrics " +
- "module, which outputs only a \".txt\" file. Output files are named by specifying a base name (without any file extensions)." +
- "<br /><br />" +
+ static final String USAGE_DETAILS ="This 'meta-metrics' tool runs one or more of the metrics collection modules at the same" +
+ " time to cut down on the time spent reading in data from input files. Available modules include " +
+ "CollectAlignmentSummaryMetrics, CollectInsertSizeMetrics, QualityScoreDistribution, MeanQualityByCycle, " +
+ "and CollectBaseDistributionByCycle. The tool produces outputs of '.pdf' and '.txt' files for each module, except for the " +
+ "CollectAlignmentSummaryMetrics module, which outputs only a '.txt' file. Output files are named by specifying a base name " +
+ "(without any file extensions).<br /><br />" +
+ "" +
+ "<p>Currently all programs are run with default options and fixed output extensions, " +
+ "but this may become more flexible in future. Specifying a reference sequence file is required.</p>" +
+
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
"" +
- "Currently all programs are run with default options and fixed output extensions, " +
- "but this may become more flexible in future. Specifying a reference sequence file is required." +
- "<br />" +
"<h4>Usage example (all modules on by default):</h4>" +
"<pre>" +
"java -jar picard.jar CollectMultipleMetrics \\<br />" +
@@ -326,27 +328,29 @@ public class CollectMultipleMetrics extends CommandLineProgram {
private final Set<MetricAccumulationLevel> accumLevelDefault = CollectionUtil.makeSet(MetricAccumulationLevel.ALL_READS);
@Option(shortName="LEVEL", doc="The level(s) at which to accumulate metrics.")
- public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = new HashSet<MetricAccumulationLevel>(accumLevelDefault);
+ public Set<MetricAccumulationLevel> METRIC_ACCUMULATION_LEVEL = new HashSet<>(accumLevelDefault);
@Option(shortName = "EXT", doc="Append the given file extension to all metric file names (ex. OUTPUT.insert_size_metrics.EXT). None if null", optional=true)
public String FILE_EXTENSION = null;
- @Option(doc = "List of metrics programs to apply during the pass through the SAM file.")
- public List<Program> PROGRAM = CollectionUtil.makeList(Program.CollectAlignmentSummaryMetrics, Program.CollectBaseDistributionByCycle,
- Program.CollectInsertSizeMetrics, Program.MeanQualityByCycle, Program.QualityScoreDistribution);
+ @Option(doc = "Set of metrics programs to apply during the pass through the SAM file.")
+ public Set<Program> PROGRAM = new LinkedHashSet<>(Arrays.asList(Program.CollectAlignmentSummaryMetrics, Program.CollectBaseDistributionByCycle,
+ Program.CollectInsertSizeMetrics, Program.MeanQualityByCycle, Program.QualityScoreDistribution));
- @Option(doc = "An optional list of intervals to restrict analysis to.", optional = true)
+ @Option(doc = "An optional list of intervals to restrict analysis to. Only pertains to some of the PROGRAMs. Programs whose stand-alone CLP does not " +
+ "have an INTERVALS argument will silently ignore this argument.", optional = true)
public File INTERVALS;
- @Option(doc = "VCF format dbSNP file, used to exclude regions around known polymorphisms from analysis.", optional = true)
+ @Option(doc = "VCF format dbSNP file, used to exclude regions around known polymorphisms from analysis " +
+ "by some PROGRAMs, PROGRAMS whose CLP doesn't allow for this argument will quetly ignore it.", optional = true)
public File DB_SNP;
/**
- * Contents of PROGRAM list is transferred to this list during command-line validation, so that an outside
+ * Contents of PROGRAM set is transferred to this set during command-line validation, so that an outside
* developer can invoke this class programmatically and provide alternative Programs to run by calling
* setProgramsToRun().
*/
- private List<ProgramInterface> programsToRun;
+ private Set<ProgramInterface> programsToRun;
private static final Log log = Log.getInstance(CollectMultipleMetrics.class);
@@ -360,7 +364,7 @@ public class CollectMultipleMetrics extends CommandLineProgram {
if (PROGRAM.isEmpty()) {
return new String[]{"No programs specified with PROGRAM"};
}
- programsToRun = new ArrayList<ProgramInterface>(PROGRAM);
+ programsToRun = new LinkedHashSet<>(PROGRAM);
return super.customCommandLineValidation();
}
@@ -369,8 +373,9 @@ public class CollectMultipleMetrics extends CommandLineProgram {
* Use this method when invoking CollectMultipleMetrics programmatically to run programs other than the ones
* available via enum. This must be called before doWork().
*/
- public void setProgramsToRun(final List<ProgramInterface> programsToRun) {
- this.programsToRun = programsToRun;
+ public void setProgramsToRun(final Collection<ProgramInterface> programsToRun) {
+ this.programsToRun.clear();
+ this.programsToRun.addAll(programsToRun);
}
@Override
@@ -379,9 +384,9 @@ public class CollectMultipleMetrics extends CommandLineProgram {
OUTPUT = OUTPUT.substring(0, OUTPUT.length() - 1);
}
- final List<SinglePassSamProgram> programs = new ArrayList<SinglePassSamProgram>();
- for (final ProgramInterface program : new HashSet<ProgramInterface>(programsToRun)) {
- if (program.needsReferenceSequence() && REFERENCE_SEQUENCE==null) {
+ final List<SinglePassSamProgram> programs = new ArrayList<>();
+ for (final ProgramInterface program : programsToRun) {
+ if (program.needsReferenceSequence() && REFERENCE_SEQUENCE == null) {
throw new PicardException("The " + program.toString() + " program needs a Reference Sequence, please set REFERENCE_SEQUENCE in the command line");
}
if (!accumLevelDefault.equals(METRIC_ACCUMULATION_LEVEL) && !program.supportsMetricAccumulationLevel()) {
diff --git a/src/java/picard/analysis/CollectOxoGMetrics.java b/src/main/java/picard/analysis/CollectOxoGMetrics.java
similarity index 98%
rename from src/java/picard/analysis/CollectOxoGMetrics.java
rename to src/main/java/picard/analysis/CollectOxoGMetrics.java
index 920b21f..5ff0878 100644
--- a/src/java/picard/analysis/CollectOxoGMetrics.java
+++ b/src/main/java/picard/analysis/CollectOxoGMetrics.java
@@ -77,9 +77,13 @@ public class CollectOxoGMetrics extends CommandLineProgram {
"the GATK Dictionary</a>." +
"<br /><br />" +
"This tool calculates the Phred-scaled probability that an alternate base call results from an oxidation artifact. This " +
- "probability score is based on base context, sequencing read orientation, and the characteristic low allelic frequency " +
- "(doi:10.1093/nar/gks1443). Lower probability values implicate artifacts resulting from 8-oxoguanine, while higher " +
- "probability values suggest that an alternate base call is due to either some other type of artifact or is a real variant." +
+ "probability score is based on base context, sequencing read orientation, and the characteristic low allelic frequency. " +
+ "Please see the following reference for an in-depth " +
+ "<a href='http://nar.oxfordjournals.org/content/early/2013/01/08/nar.gks1443'>discussion</a>" +
+ " of the OxoG error rate. " +
+ "<p>Lower probability values implicate artifacts resulting from 8-oxoguanine, while higher " +
+ "probability values suggest that an alternate base call is due to either some other type of artifact or is a " +
+ "real variant.</p>" +
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar CollectOxoGMetrics \\<br />" +
diff --git a/src/java/picard/analysis/CollectQualityYieldMetrics.java b/src/main/java/picard/analysis/CollectQualityYieldMetrics.java
similarity index 98%
rename from src/java/picard/analysis/CollectQualityYieldMetrics.java
rename to src/main/java/picard/analysis/CollectQualityYieldMetrics.java
index 6d80eb8..e2f9a58 100644
--- a/src/java/picard/analysis/CollectQualityYieldMetrics.java
+++ b/src/main/java/picard/analysis/CollectQualityYieldMetrics.java
@@ -58,7 +58,8 @@ public class CollectQualityYieldMetrics extends SinglePassSamProgram {
static final String USAGE_SUMMARY = "Collect metrics about reads that pass quality thresholds and Illumina-specific filters. ";
static final String USAGE_DETAILS = "This tool evaluates the overall quality of reads within a bam file containing one read group. " +
"The output indicates the total numbers of bases within a read group that pass a minimum base quality score threshold and " +
- "(in the case of Illumina data) pass Illumina quality filters as described in the <a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>GATK Dictionary entry</a>. " +
+ "(in the case of Illumina data) pass Illumina quality filters as described in the " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>GATK Dictionary entry</a>. " +
"<br />" +
"<h4>Note on base quality score options</h4>" +
"If the quality score of read bases has been modified in a previous data processing step such as " +
diff --git a/src/java/picard/analysis/CollectRawWgsMetrics.java b/src/main/java/picard/analysis/CollectRawWgsMetrics.java
similarity index 89%
rename from src/java/picard/analysis/CollectRawWgsMetrics.java
rename to src/main/java/picard/analysis/CollectRawWgsMetrics.java
index 1d81529..bde9b13 100644
--- a/src/java/picard/analysis/CollectRawWgsMetrics.java
+++ b/src/main/java/picard/analysis/CollectRawWgsMetrics.java
@@ -47,17 +47,17 @@ public class CollectRawWgsMetrics extends CollectWgsMetrics{
" minimal base- and mapping- quality filters as well as coverage (read-depth) levels. " +
"<br /><br /> " +
"The histogram output is optional and for a given run, displays two separate outputs on the y-axis while using a single set" +
- " of values for the x-axis. Specifically, the first column in the histogram table (x-axis) is labeled \"coverage\" and " +
+ " of values for the x-axis. Specifically, the first column in the histogram table (x-axis) is labeled 'coverage' and " +
"represents different possible coverage depths. However, it also represents the range of values for the base quality scores " +
- "and thus should probably be labeled \"sequence depth and base quality scores\". The second and third columns (y-axes) " +
- "correspond to the numbers of bases at a specific sequence depth \"count\" and the numbers of bases at a particular base " +
- "quality score \"baseq_count\" respectively." +
+ "and thus should probably be labeled 'sequence depth and base quality scores'. The second and third columns (y-axes) " +
+ "correspond to the numbers of bases at a specific sequence depth 'count' and the numbers of bases at a particular base " +
+ "quality score 'baseq_count' respectively." +
"<br /><br />" +
"Although similar to the CollectWgsMetrics tool, the default thresholds for CollectRawWgsMetrics are less stringent. " +
- "For example, the CollectRawWgsMetrics have base and mapping quality score thresholds set to \"3\" and \"0\" respectively, " +
- "while the CollectWgsMetrics tool has the default threshold values set to \"20\" (at time of writing). Nevertheless, both " +
+ "For example, the CollectRawWgsMetrics have base and mapping quality score thresholds set to '3' and '0' respectively, " +
+ "while the CollectWgsMetrics tool has the default threshold values set to '20' (at time of writing). Nevertheless, both " +
"tools enable the user to input specific threshold values." +
- "" +
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar CollectRawWgsMetrics \\<br />" +
diff --git a/src/java/picard/analysis/CollectRnaSeqMetrics.java b/src/main/java/picard/analysis/CollectRnaSeqMetrics.java
similarity index 68%
rename from src/java/picard/analysis/CollectRnaSeqMetrics.java
rename to src/main/java/picard/analysis/CollectRnaSeqMetrics.java
index 996105a..dfeb671 100644
--- a/src/java/picard/analysis/CollectRnaSeqMetrics.java
+++ b/src/main/java/picard/analysis/CollectRnaSeqMetrics.java
@@ -49,12 +49,49 @@ import java.util.List;
import java.util.Set;
@CommandLineProgramProperties(
- usage = "Collect metrics about the alignment of RNA to various functional classes of loci in the genome:" +
- "coding, intronic, UTR, intergenic, ribosomal. Also determines strand-specificity for strand-specific libraries.",
- usageShort = "Produces RNA alignment metrics for a SAM or BAM file",
+ usage = CollectRnaSeqMetrics.USAGE_SUMMARY + CollectRnaSeqMetrics.USAGE_DETAILS,
+ usageShort = CollectRnaSeqMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectRnaSeqMetrics extends SinglePassSamProgram {
+static final String USAGE_SUMMARY = "Produces RNA alignment metrics for a SAM or BAM file. ";
+static final String USAGE_DETAILS = "<p>This tool takes a SAM/BAM file containing the aligned reads from an RNAseq experiment "+
+"and produces metrics describing the distribution of the bases within the transcripts. It calculates the total numbers and the "+
+"fractions of nucleotides within specific genomic regions including untranslated regions (UTRs), introns, intergenic sequences "+
+"(between discrete genes), and peptide-coding sequences (exons). This tool also determines the numbers of bases that pass quality filters "+
+"that are specific to Illumina data (PF_BASES). For more information please see the corresponding GATK "+
+"<a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>Dictionary</a> entry.</p>" +
+
+"<p>Other metrics include the median coverage (depth), the ratios of 5 prime /3 prime-biases, and the numbers of reads with the "+
+"correct/incorrect strand designation. The 5 prime /3 prime-bias results from errors introduced by reverse transcriptase enzymes "+
+"during library construction, ultimately leading to the over-representation of either the 5 prime or 3 prime ends of transcripts. "+
+"Please see the CollectRnaSeqMetrics "+
+"<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#RnaSeqMetrics'>definitions</a> "+
+"for details on how these biases are calculated. </p>" +
+
+"<p>The sequence input must be a valid SAM/BAM file containing RNAseq data aligned by an RNAseq-aware genome aligner such a "+
+"<a href='http://github.com/alexdobin/STAR'>STAR</a> or <a href='http://ccb.jhu.edu/software/tophat/index.shtml'>TopHat</a>. "+
+"The tool also requires a REF_FLAT file, a tab-delimited file containing information about the location of RNA transcripts, "+
+"exon start and stop sites, etc. For more information on the REF_FLAT format, see the following "+
+"<a href='http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat'>description</a>. "+
+"Build-specific REF_FLAT files can be obtained <a href='http://hgdownload.cse.ucsc.edu/goldenPath/'>here</a>.</p>"+
+
+"<pNote: Metrics labeled as percentages are actually expressed as fractions!</p>"+
+"<h4>Usage example:</h4>"+
+"<pre>" +
+"java -jar picard.jar CollectRnaSeqMetrics \\<br />" +
+" I=input.bam \\<br />" +
+" O=output.RNA_Metrics \\<br />" +
+" REF_FLAT=ref_flat.txt \\<br />" +
+" STRAND=SECOND_READ_TRANSCRIPTION_STRAND \\<br />" +
+" RIBOSOMAL_INTERVALS=ribosomal.interval_list" +
+"</pre>" +
+"Please see the CollectRnaSeqMetrics " +
+"<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#RnaSeqMetrics'>definitions</a> " +
+"for a complete description of the metrics produced by this tool." +
+"<hr />"
+;
+
private static final Log LOG = Log.getInstance(CollectRnaSeqMetrics.class);
@Option(doc="Gene annotations in refFlat form. Format described here: http://genome.ucsc.edu/goldenPath/gbdDescriptionsOld.html#RefFlat")
@@ -62,7 +99,7 @@ public class CollectRnaSeqMetrics extends SinglePassSamProgram {
@Option(doc="Location of rRNA sequences in genome, in interval_list format. " +
"If not specified no bases will be identified as being ribosomal. " +
- "Format described here: http://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/samtools/util/IntervalList.html", optional = true)
+ "Format described <a href=\"http://samtools.github.io/htsjdk/javadoc/htsjdk/htsjdk/samtools/util/IntervalList.html\">here</a>:", optional = true)
public File RIBOSOMAL_INTERVALS;
@Option(shortName = "STRAND", doc="For strand-specific library prep. " +
@@ -79,7 +116,7 @@ public class CollectRnaSeqMetrics extends SinglePassSamProgram {
"These reads are not counted as ")
public Set<String> IGNORE_SEQUENCE = new HashSet<String>();
- @Option(doc="This percentage of the length of a fragment must overlap one of the ribosomal intervals for a read or read pair by this must in order to be considered rRNA.")
+ @Option(doc="This percentage of the length of a fragment must overlap one of the ribosomal intervals for a read or read pair to be considered rRNA.")
public double RRNA_FRAGMENT_PERCENTAGE = 0.8;
@Option(shortName="LEVEL", doc="The level(s) at which to accumulate metrics. ")
diff --git a/src/java/picard/analysis/CollectRrbsMetrics.java b/src/main/java/picard/analysis/CollectRrbsMetrics.java
similarity index 78%
rename from src/java/picard/analysis/CollectRrbsMetrics.java
rename to src/main/java/picard/analysis/CollectRrbsMetrics.java
index db175c5..d698b91 100644
--- a/src/java/picard/analysis/CollectRrbsMetrics.java
+++ b/src/main/java/picard/analysis/CollectRrbsMetrics.java
@@ -63,41 +63,36 @@ import java.util.Set;
programGroup = Metrics.class
)
public class CollectRrbsMetrics extends CommandLineProgram {
- static final String USAGE_SUMMARY = "Collect metrics from reduced representation bisulfite sequencing (RRBS) data. ";
- static final String USAGE_DETAILS = "This tool collect metrics for RRBS data, based on the methylation status of cytosine (C) " +
- "bases in both CpG and non-CpG sites across all reads of a BAM/SAM file. For a brief primer on bisulfite sequencing and " +
- "cytosine methylation, see the " +
- "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6330'>GATK Dictionary</a>." +
- "<br /><br />" +
- "" +
- "Since cytosine methylation is not exclusive for CpG \"hotspots\", the CollectRrbsMetrics tool outputs a summary table " +
- "indicating the number of CpG and non-CpG cytosines as well as their conversion C -> T (+ strand) or G -> A (- strand) " +
- "rates. The tool also outputs the numbers of reads having no CpG sites, and the numbers of reads discarded from the " +
- "analysis due to inadequate size or excessive numbers of mismatches." +
- "<br /><br />" +
- "The tool also provides a table containing detailed information on CpG occurrence frequency, CpG conversion frequencies " +
- "[C -> T (+ strand) or G -> A (- strand)], and the specific locations of the CpG sites in the genome. The conversion " +
- "frequency helps determines the methylation status of a CpG site." +
- "<br /><br />" +
- "Finally, the tool provides graphical representation of four metrics in the form of a \".pdf\" document. These metrics " +
- "are the bisulfite conversion rate for CpG and non-CpG cytosines, a distribution of the numbers of CpG sites as a " +
- "function of CpG conversion rate, the distribution of CpG sites by read coverage, and the numbers of reads discarded due " +
- "to high numbers of mismatches or inadequate read size." +
- "" +
+ static final String USAGE_SUMMARY = "<b>Collects metrics from reduced representation bisulfite sequencing (Rrbs) data.</b> ";
+ static final String USAGE_DETAILS = "<p>This tool uses reduced representation bisulfite sequencing (Rrbs) data to determine cytosine " +
+ "methylation status across all reads of a genomic DNA sequence. For a primer on bisulfite sequencing and cytosine methylation, " +
+ "please see the corresponding <a href='https://www.broadinstitute.org/gatk/guide/article?id=6330'>GATK Dictionary entry</a>. </p>" +
+
+ "<p>Briefly, bisulfite reduction converts un-methylated cytosine (C) to uracil (U) bases. Methylated sites are not converted " +
+ "because they are resistant to bisulfite reduction. Subsequent to PCR amplification of the reaction products, bisulfite " +
+ "reduction manifests as [C -> T (+ strand) or G -> A (- strand)] base conversions. Thus, conversion rates" +
+ " can be calculated from the reads as follows: [CR = converted/(converted + unconverted)]. Since methylated cytosines are " +
+ "protected against Rrbs-mediated conversion, the methylation rate (MR) is as follows:" +
+ "[MR = unconverted/(converted + unconverted) = (1 - CR)].</p>" +
+
+ "<p>The CpG CollectRrbsMetrics tool outputs three files including summary and detail metrics tables as well as a PDF file containing " +
+ "four graphs. These graphs are derived from the summary table and include a comparison of conversion rates for both CpG and non-CpG sites, " +
+ "the distribution of total numbers of CpG sites as a function of the CpG conversion rates, the distribution of CpG sites by the level of " +
+ "read coverage (depth), and the numbers of reads discarded resulting from either exceeding the mismatch rate or size (too short). " +
+ "The detailed metrics table includes the coordinates of all of the CpG sites for the experiment as well as the conversion rates " +
+ "observed for each site.</p>" +
+
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar CollectRrbsMetrics \\<br />" +
+ " R=reference_sequence.fasta \\<br />" +
" I=input.bam \\<br />" +
- " M=rrbs_metrics \\<br />" +
- " R=reference_sequence.fasta" +
+ " M=basename_for_metrics_files" +
"</pre>" +
- "<hr />" +
- "" +
- "Please see " +
- "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#RrbsCpgDetailMetrics'>" +
- "the RrbsCpgDetailMetrics documentation</a> and the " +
- "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#RrbsSummaryMetrics'>" +
- "the RrbsSummaryMetrics documentation</a>for detailed explanations of the output metrics." +
+
+ "<p>Please see the CollectRrbsMetrics " +
+ "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#RrbsCpgDetailMetrics'>definitions</a>" +
+ " for a complete description of both the detail and summary metrics produced by this tool.</p>" +
"<hr />";
// Path to R file for plotting purposes
diff --git a/src/java/picard/analysis/CollectWgsMetrics.java b/src/main/java/picard/analysis/CollectWgsMetrics.java
similarity index 53%
rename from src/java/picard/analysis/CollectWgsMetrics.java
rename to src/main/java/picard/analysis/CollectWgsMetrics.java
index e9708fb..0b76735 100644
--- a/src/java/picard/analysis/CollectWgsMetrics.java
+++ b/src/main/java/picard/analysis/CollectWgsMetrics.java
@@ -23,6 +23,7 @@
*/
package picard.analysis;
+import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.filter.SamRecordFilter;
@@ -31,12 +32,19 @@ import htsjdk.samtools.metrics.MetricBase;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.reference.ReferenceSequence;
import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
-import htsjdk.samtools.util.*;
+import htsjdk.samtools.util.Histogram;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.IntervalList;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.ProgressLogger;
+import htsjdk.samtools.util.QualityUtil;
+import htsjdk.samtools.util.SamLocusIterator;
+import htsjdk.samtools.util.SequenceUtil;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
-import picard.cmdline.programgroups.Metrics;
import picard.cmdline.StandardOptionDefinitions;
+import picard.cmdline.programgroups.Metrics;
import picard.filter.CountingDuplicateFilter;
import picard.filter.CountingFilter;
import picard.filter.CountingMapQFilter;
@@ -47,7 +55,6 @@ import java.io.File;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
-import java.util.stream.Collectors;
import java.util.stream.IntStream;
/**
@@ -61,21 +68,24 @@ import java.util.stream.IntStream;
programGroup = Metrics.class
)
public class CollectWgsMetrics extends CommandLineProgram {
- static final String USAGE_SUMMARY = "Collect metrics about coverage and performance of whole genome sequencing (WGS) experiments.";
- static final String USAGE_DETAILS = "This tool collects metrics about the percentages of reads that pass base- and mapping- quality " +
- "filters as well as coverage (read-depth) levels. Both minimum base- and mapping-quality values as well as the maximum " +
- "read depths (coverage cap) are user defined." +
- "<h4>Usage Example:</h4>" +
- "<pre>" +
- "java -jar picard.jar CollectWgsMetrics \\<br /> " +
- " I=input.bam \\<br /> "+
- " O=collect_wgs_metrics.txt \\<br /> " +
- " R=reference_sequence.fasta " +
- "</pre>" +
- "Please see " +
- "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics'>" +
- "the WgsMetrics documentation</a>for detailed explanations of the output metrics." +
- "<hr />";
+static final String USAGE_SUMMARY = "Collect metrics about coverage and performance of whole genome sequencing (WGS) experiments.";
+static final String USAGE_DETAILS = "<p>This tool collects metrics about the fractions of reads that pass base- and mapping-quality "+
+"filters as well as coverage (read-depth) levels for WGS analyses. Both minimum base- and mapping-quality values as well as the maximum "+
+"read depths (coverage cap) are user defined.</p>" +
+
+"<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>"+
+"<h4>Usage Example:</h4>"+
+"<pre>" +
+"java -jar picard.jar CollectWgsMetrics \\<br /> " +
+" I=input.bam \\<br /> "+
+" O=collect_wgs_metrics.txt \\<br /> " +
+" R=reference_sequence.fasta " +
+"</pre>" +
+"Please see "+
+"<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetrics.WgsMetrics'>CollectWgsMetrics</a> "+
+"for detailed explanations of the output metrics." +
+"<hr />"
+;
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input SAM or BAM file.")
public File INPUT;
@@ -89,7 +99,8 @@ public class CollectWgsMetrics extends CommandLineProgram {
@Option(shortName = "MQ", doc = "Minimum mapping quality for a read to contribute coverage.", overridable = true)
public int MINIMUM_MAPPING_QUALITY = 20;
- @Option(shortName = "Q", doc = "Minimum base quality for a base to contribute coverage.", overridable = true)
+ @Option(shortName = "Q", doc = "Minimum base quality for a base to contribute coverage. N bases will be treated as having a base quality " +
+ "of negative infinity and will therefore be excluded from coverage regardless of the value of this parameter.", overridable = true)
public int MINIMUM_BASE_QUALITY = 20;
@Option(shortName = "CAP", doc = "Treat positions with coverage exceeding this value as if they had coverage at this value (but calculate the difference for PCT_EXC_CAPPED).", overridable = true)
@@ -110,11 +121,21 @@ public class CollectWgsMetrics extends CommandLineProgram {
@Option(doc="Sample Size used for Theoretical Het Sensitivity sampling. Default is 10000.", optional = true)
public int SAMPLE_SIZE=10000;
+ @Option(doc = "An interval list file that contains the positions to restrict the assessment. Please note that " +
+ "all bases of reads that overlap these intervals will be considered, even if some of those bases extend beyond the boundaries of " +
+ "the interval. The ideal use case for this argument is to use it to restrict the calculation to a subset of (whole) contigs. To " +
+ "restrict the calculation just to individual positions without overlap, please see CollectWgsMetricsFromSampledSites.",
+ optional = true, overridable = true)
+ public File INTERVALS = null;
+
+ private SAMFileHeader header = null;
+
private final Log log = Log.getInstance(CollectWgsMetrics.class);
private static final double LOG_ODDS_THRESHOLD = 3.0;
/** Metrics for evaluating the performance of whole genome sequencing experiments. */
public static class WgsMetrics extends MetricBase {
+
/** The number of non-N bases in the genome reference over which coverage will be evaluated. */
public long GENOME_TERRITORY;
/** The mean coverage in bases of the genome territory, after all filters are applied. */
@@ -186,6 +207,9 @@ public class CollectWgsMetrics extends CommandLineProgram {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
IOUtil.assertFileIsReadable(REFERENCE_SEQUENCE);
+ if (INTERVALS != null) {
+ IOUtil.assertFileIsReadable(INTERVALS);
+ }
// it doesn't make sense for the locus accumulation cap to be lower than the coverage cap
if (LOCUS_ACCUMULATION_CAP < COVERAGE_CAP) {
@@ -198,8 +222,9 @@ public class CollectWgsMetrics extends CommandLineProgram {
final ReferenceSequenceFileWalker refWalker = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
final SamLocusIterator iterator = getLocusIterator(in);
+ this.header = in.getFileHeader();
- final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>();
+ final List<SamRecordFilter> filters = new ArrayList<>();
final CountingFilter dupeFilter = new CountingDuplicateFilter();
final CountingFilter mapqFilter = new CountingMapQFilter(MINIMUM_MAPPING_QUALITY);
final CountingPairedFilter pairFilter = new CountingPairedFilter();
@@ -217,144 +242,195 @@ public class CollectWgsMetrics extends CommandLineProgram {
iterator.setIncludeNonPfReads(false);
iterator.setMaxReadsToAccumulatePerLocus(LOCUS_ACCUMULATION_CAP);
- final int coverageCap = COVERAGE_CAP;
- final long[] HistogramArray = new long[coverageCap + 1];
- final long[] baseQHistogramArray = new long[Byte.MAX_VALUE];
- // We need a separate Het Sens histogram for base quality because the original one excludes bases below baseQ 20
- final long[] baseQHetSensHistogram = new long[Byte.MAX_VALUE];
+ final WgsMetricsCollector collector = getCollector(COVERAGE_CAP);
+
final boolean usingStopAfter = STOP_AFTER > 0;
final long stopAfter = STOP_AFTER - 1;
long counter = 0;
- long basesExcludedByBaseq = 0;
- long basesExcludedByOverlap = 0;
- long basesExcludedByCapping = 0;
-
// Loop through all the loci
while (iterator.hasNext()) {
final SamLocusIterator.LocusInfo info = iterator.next();
+ final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
// Check that the reference is not N
- final ReferenceSequence ref = refWalker.get(info.getSequenceIndex());
final byte base = ref.getBases()[info.getPosition() - 1];
- if (base == 'N') continue;
+ if (SequenceUtil.isNoCall(base)) continue;
+
+ // add to the collector
+ collector.addInfo(info, ref);
+
+ // Record progress and perhaps stop
+ progress.record(info.getSequenceName(), info.getPosition());
+ if (usingStopAfter && ++counter > stopAfter) break;
+ }
+
+
+ final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
+ collector.addToMetricsFile(out, INCLUDE_BQ_HISTOGRAM, dupeFilter, mapqFilter, pairFilter);
+ out.write(OUTPUT);
+
+ return 0;
+ }
+
+ protected SAMFileHeader getSamFileHeader() {
+ return this.header;
+ }
+
+ protected WgsMetrics generateWgsMetrics() {
+ return new WgsMetrics();
+ }
+
+ /**
+ * If INTERVALS is specified, this will count bases beyond the interval list when the read overlaps the intervals and extends beyond the
+ * edge. Ideally INTERVALS should only include regions that have hard edges without reads that could extend beyond the boundary (such as a whole contig).
+ */
+ protected long getBasesExcludedBy(final CountingFilter filter) {
+ return filter.getFilteredBases();
+ }
+
+ protected SamLocusIterator getLocusIterator(final SamReader in) {
+ return (INTERVALS != null) ? new SamLocusIterator(in, IntervalList.fromFile(INTERVALS)) : new SamLocusIterator(in);
+ }
+
+ protected WgsMetricsCollector getCollector(final int coverageCap) {
+ return new WgsMetricsCollector(coverageCap);
+ }
+
+ protected class WgsMetricsCollector {
+
+ protected final long[] depthHistogramArray;
+ private final long[] baseQHistogramArray;
+
+ private long basesExcludedByBaseq = 0;
+ private long basesExcludedByOverlap = 0;
+ private long basesExcludedByCapping = 0;
+ protected final int coverageCap;
+
+ public WgsMetricsCollector(final int coverageCap) {
+ depthHistogramArray = new long[coverageCap + 1];
+ baseQHistogramArray = new long[Byte.MAX_VALUE];
+ this.coverageCap = coverageCap;
+ }
+
+ public void addInfo(final SamLocusIterator.LocusInfo info, final ReferenceSequence ref) {
// Figure out the coverage while not counting overlapping reads twice, and excluding various things
- final HashSet<String> readNames = new HashSet<String>(info.getRecordAndPositions().size());
+ final HashSet<String> readNames = new HashSet<>(info.getRecordAndPositions().size());
int pileupSize = 0;
- int pileupSizeForBaseQHetSens = 0;
for (final SamLocusIterator.RecordAndOffset recs : info.getRecordAndPositions()) {
- pileupSizeForBaseQHetSens++;
- if(pileupSizeForBaseQHetSens <= coverageCap) {
- baseQHetSensHistogram[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
- }
- if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY) { ++basesExcludedByBaseq; continue; }
+ if (recs.getBaseQuality() < MINIMUM_BASE_QUALITY ||
+ SequenceUtil.isNoCall(recs.getReadBase())) { ++basesExcludedByBaseq; continue; }
if (!readNames.add(recs.getRecord().getReadName())) { ++basesExcludedByOverlap; continue; }
pileupSize++;
if (pileupSize <= coverageCap) {
baseQHistogramArray[recs.getRecord().getBaseQualities()[recs.getOffset()]]++;
}
-
}
- final int depth = Math.min(readNames.size(), coverageCap);
- if (depth < readNames.size()) basesExcludedByCapping += readNames.size() - coverageCap;
- HistogramArray[depth]++;
-
- // Record progress and perhaps stop
- progress.record(info.getSequenceName(), info.getPosition());
- if (usingStopAfter && ++counter > stopAfter) break;
+ final int depth = Math.min(pileupSize, coverageCap);
+ if (depth < pileupSize) basesExcludedByCapping += pileupSize - coverageCap;
+ depthHistogramArray[depth]++;
}
- // Construct and write the outputs
- final Histogram<Integer> depthHistogram = new Histogram<Integer>("coverage", "count");
- for (int i = 0; i < HistogramArray.length; ++i) {
- depthHistogram.increment(i, HistogramArray[i]);
- }
+ public void addToMetricsFile(final MetricsFile<WgsMetrics, Integer> file,
+ final boolean includeBQHistogram,
+ final CountingFilter dupeFilter,
+ final CountingFilter mapqFilter,
+ final CountingPairedFilter pairFilter) {
+ addMetricsToFile(file, dupeFilter, mapqFilter, pairFilter);
- // Construct and write the outputs
- final Histogram<Integer> baseQHistogram = new Histogram<Integer>("value", "baseq_count");
- for (int i=0; i<baseQHistogramArray.length; ++i) {
- baseQHistogram.increment(i, baseQHistogramArray[i]);
+ if (includeBQHistogram) {
+ addBaseQHistogram(file);
+ }
}
- // Construct and write the outputs
- final Histogram<Integer> baseQHetHistogram = new Histogram<Integer>("value", "baseq_count");
- final int BASEQ_MAX = 50;
- final Integer[] x = new Integer[BASEQ_MAX];
- IntStream.range(0, BASEQ_MAX).forEach(i -> x[i] = i);
- baseQHetHistogram.prefillBins(x);
-
- //Haplotype caller uses 17 as a baseQ cut off, so we are too. Everything below 17 is squashed into the '0' bin.
- final int BASEQ_MIN_CUTOFF = 17;
- for (int i=0; i<baseQHetSensHistogram.length; ++i) {
- baseQHetHistogram.increment( i < BASEQ_MIN_CUTOFF ? 0 : i, baseQHetSensHistogram[i]);
+ protected void addBaseQHistogram(final MetricsFile<WgsMetrics, Integer> file) {
+ file.addHistogram(getBaseQHistogram());
}
- final WgsMetrics metrics = generateWgsMetrics();
- metrics.GENOME_TERRITORY = (long) depthHistogram.getSumOfValues();
- metrics.MEAN_COVERAGE = depthHistogram.getMean();
- metrics.SD_COVERAGE = depthHistogram.getStandardDeviation();
- metrics.MEDIAN_COVERAGE = depthHistogram.getMedian();
- metrics.MAD_COVERAGE = depthHistogram.getMedianAbsoluteDeviation();
-
- final long basesExcludedByDupes = getBasesExcludedBy(dupeFilter);
- final long basesExcludedByMapq = getBasesExcludedBy(mapqFilter);
- final long basesExcludedByPairing = getBasesExcludedBy(pairFilter);
- final double total = depthHistogram.getSum();
- final double totalWithExcludes = total + basesExcludedByDupes + basesExcludedByMapq + basesExcludedByPairing + basesExcludedByBaseq + basesExcludedByOverlap + basesExcludedByCapping;
- metrics.PCT_EXC_DUPE = basesExcludedByDupes / totalWithExcludes;
- metrics.PCT_EXC_MAPQ = basesExcludedByMapq / totalWithExcludes;
- metrics.PCT_EXC_UNPAIRED = basesExcludedByPairing / totalWithExcludes;
- metrics.PCT_EXC_BASEQ = basesExcludedByBaseq / totalWithExcludes;
- metrics.PCT_EXC_OVERLAP = basesExcludedByOverlap / totalWithExcludes;
- metrics.PCT_EXC_CAPPED = basesExcludedByCapping / totalWithExcludes;
- metrics.PCT_EXC_TOTAL = (totalWithExcludes - total) / totalWithExcludes;
-
- metrics.PCT_1X = MathUtil.sum(HistogramArray, 1, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_5X = MathUtil.sum(HistogramArray, 5, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_10X = MathUtil.sum(HistogramArray, 10, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_15X = MathUtil.sum(HistogramArray, 15, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_20X = MathUtil.sum(HistogramArray, 20, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_25X = MathUtil.sum(HistogramArray, 25, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_30X = MathUtil.sum(HistogramArray, 30, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_40X = MathUtil.sum(HistogramArray, 40, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_50X = MathUtil.sum(HistogramArray, 50, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_60X = MathUtil.sum(HistogramArray, 60, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_70X = MathUtil.sum(HistogramArray, 70, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_80X = MathUtil.sum(HistogramArray, 80, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_90X = MathUtil.sum(HistogramArray, 90, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
- metrics.PCT_100X = MathUtil.sum(HistogramArray, 100, HistogramArray.length) / (double) metrics.GENOME_TERRITORY;
-
- // Get Theoretical Het SNP Sensitivity
- final double [] depthDoubleArray = TheoreticalSensitivity.normalizeHistogram(depthHistogram);
- final double [] baseQDoubleArray = TheoreticalSensitivity.normalizeHistogram(baseQHetHistogram);
- metrics.HET_SNP_SENSITIVITY = TheoreticalSensitivity.hetSNPSensitivity(depthDoubleArray, baseQDoubleArray, SAMPLE_SIZE, LOG_ODDS_THRESHOLD);
- metrics.HET_SNP_Q = QualityUtil.getPhredScoreFromErrorProbability((1-metrics.HET_SNP_SENSITIVITY));
-
- final MetricsFile<WgsMetrics, Integer> out = getMetricsFile();
- out.addMetric(metrics);
- out.addHistogram(depthHistogram);
- if (INCLUDE_BQ_HISTOGRAM) {
- out.addHistogram(baseQHistogram);
+ protected void addMetricsToFile(final MetricsFile<WgsMetrics, Integer> file,
+ final CountingFilter dupeFilter,
+ final CountingFilter mapqFilter,
+ final CountingPairedFilter pairFilter) {
+ // get the depth histogram and metrics
+ final Histogram<Integer> depthHistogram = getDepthHistogram();
+ final WgsMetrics metrics = getMetrics(depthHistogram, dupeFilter, mapqFilter, pairFilter);
+
+ // add them to the file
+ file.addMetric(metrics);
+ file.addHistogram(depthHistogram);
}
- out.write(OUTPUT);
- return 0;
- }
+ protected Histogram<Integer> getDepthHistogram() {
+ return getHistogram(depthHistogramArray,"coverage", "count");
+ }
- protected WgsMetrics generateWgsMetrics() {
- return new WgsMetrics();
- }
+ protected Histogram<Integer> getBaseQHistogram() {
+ return getHistogram(baseQHistogramArray, "value", "baseq_count");
+ }
- protected long getBasesExcludedBy(final CountingFilter filter) {
- return filter.getFilteredBases();
- }
+ private Histogram<Integer> getHistogram(final long[] array, final String binLabel, final String valueLabel) {
+ final Histogram<Integer> histogram = new Histogram<>(binLabel, valueLabel);
+ for (int i = 0; i < array.length; ++i) {
+ histogram.increment(i, array[i]);
+ }
+ return histogram;
+ }
- protected SamLocusIterator getLocusIterator(final SamReader in) {
- return new SamLocusIterator(in);
+ protected WgsMetrics getMetrics(final Histogram<Integer> depthHistogram,
+ final CountingFilter dupeFilter,
+ final CountingFilter mapqFilter,
+ final CountingPairedFilter pairFilter) {
+
+ // the base q het histogram
+
+ final WgsMetrics metrics = generateWgsMetrics();
+ metrics.GENOME_TERRITORY = (long) depthHistogram.getSumOfValues();
+ metrics.MEAN_COVERAGE = depthHistogram.getMean();
+ metrics.SD_COVERAGE = depthHistogram.getStandardDeviation();
+ metrics.MEDIAN_COVERAGE = depthHistogram.getMedian();
+ metrics.MAD_COVERAGE = depthHistogram.getMedianAbsoluteDeviation();
+
+ final long basesExcludedByDupes = getBasesExcludedBy(dupeFilter);
+ final long basesExcludedByMapq = getBasesExcludedBy(mapqFilter);
+ final long basesExcludedByPairing = getBasesExcludedBy(pairFilter);
+ final double total = depthHistogram.getSum();
+ final double totalWithExcludes = total + basesExcludedByDupes + basesExcludedByMapq + basesExcludedByPairing + basesExcludedByBaseq + basesExcludedByOverlap + basesExcludedByCapping;
+
+ metrics.PCT_EXC_DUPE = basesExcludedByDupes / totalWithExcludes;
+ metrics.PCT_EXC_MAPQ = basesExcludedByMapq / totalWithExcludes;
+ metrics.PCT_EXC_UNPAIRED = basesExcludedByPairing / totalWithExcludes;
+ metrics.PCT_EXC_BASEQ = basesExcludedByBaseq / totalWithExcludes;
+ metrics.PCT_EXC_OVERLAP = basesExcludedByOverlap / totalWithExcludes;
+ metrics.PCT_EXC_CAPPED = basesExcludedByCapping / totalWithExcludes;
+ metrics.PCT_EXC_TOTAL = (totalWithExcludes - total) / totalWithExcludes;
+
+ metrics.PCT_1X = MathUtil.sum(depthHistogramArray, 1, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_5X = MathUtil.sum(depthHistogramArray, 5, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_10X = MathUtil.sum(depthHistogramArray, 10, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_15X = MathUtil.sum(depthHistogramArray, 15, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_20X = MathUtil.sum(depthHistogramArray, 20, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_25X = MathUtil.sum(depthHistogramArray, 25, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_30X = MathUtil.sum(depthHistogramArray, 30, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_40X = MathUtil.sum(depthHistogramArray, 40, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_50X = MathUtil.sum(depthHistogramArray, 50, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_60X = MathUtil.sum(depthHistogramArray, 60, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_70X = MathUtil.sum(depthHistogramArray, 70, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_80X = MathUtil.sum(depthHistogramArray, 80, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_90X = MathUtil.sum(depthHistogramArray, 90, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+ metrics.PCT_100X = MathUtil.sum(depthHistogramArray, 100, depthHistogramArray.length) / (double) metrics.GENOME_TERRITORY;
+
+ // Get Theoretical Het SNP Sensitivity
+ final double[] depthDoubleArray = TheoreticalSensitivity.normalizeHistogram(depthHistogram);
+ final double[] baseQDoubleArray = TheoreticalSensitivity.normalizeHistogram(getBaseQHistogram());
+ metrics.HET_SNP_SENSITIVITY = TheoreticalSensitivity.hetSNPSensitivity(depthDoubleArray, baseQDoubleArray, SAMPLE_SIZE, LOG_ODDS_THRESHOLD);
+ metrics.HET_SNP_Q = QualityUtil.getPhredScoreFromErrorProbability((1 - metrics.HET_SNP_SENSITIVITY));
+
+ return metrics;
+ }
}
}
diff --git a/src/java/picard/analysis/CollectWgsMetricsFromQuerySorted.java b/src/main/java/picard/analysis/CollectWgsMetricsFromQuerySorted.java
similarity index 99%
rename from src/java/picard/analysis/CollectWgsMetricsFromQuerySorted.java
rename to src/main/java/picard/analysis/CollectWgsMetricsFromQuerySorted.java
index 42337bf..c147db0 100644
--- a/src/java/picard/analysis/CollectWgsMetricsFromQuerySorted.java
+++ b/src/main/java/picard/analysis/CollectWgsMetricsFromQuerySorted.java
@@ -40,8 +40,11 @@ import java.util.List;
/**
* Computes a number of metrics that are useful for evaluating coverage and performance of sequencing experiments.
*
+ * This tool is deprecated; please use CollectWgsMetrics instead.
+ *
* @author ebanks
*/
+ at Deprecated
@CommandLineProgramProperties(
usage = "Computes a number of metrics that are useful for evaluating coverage and performance of " +
"sequencing experiments.",
diff --git a/src/java/picard/analysis/CollectWgsMetricsFromSampledSites.java b/src/main/java/picard/analysis/CollectWgsMetricsFromSampledSites.java
similarity index 91%
rename from src/java/picard/analysis/CollectWgsMetricsFromSampledSites.java
rename to src/main/java/picard/analysis/CollectWgsMetricsFromSampledSites.java
index 478afa3..35c73f2 100644
--- a/src/java/picard/analysis/CollectWgsMetricsFromSampledSites.java
+++ b/src/main/java/picard/analysis/CollectWgsMetricsFromSampledSites.java
@@ -39,20 +39,24 @@ import java.io.File;
* It is important that the sampled positions be chosen so that they are spread out at least further than a read's length apart;
* otherwise, you run the risk of double-counting reads in the metrics.
*
+ * This tool is deprecated; please use CollectWgsMetrics instead.
+ *
* @author ebanks
*/
+ at Deprecated
@CommandLineProgramProperties(
usage = "Computes a number of metrics that are useful for evaluating coverage and performance of " +
"whole genome sequencing experiments, but only at a set of sampled positions. " +
"It is important that the sampled positions be chosen so that they are spread out " +
"at least further than a read's length apart; otherwise, you run the risk of double-counting " +
- "reads in the metrics.",
+ "reads in the metrics. If contig-sized intervals are needed, use INTERVALS argument in CollectWgsMetrics.",
usageShort = "Writes whole genome sequencing-related metrics for a SAM or BAM file",
programGroup = Metrics.class
)
+
public class CollectWgsMetricsFromSampledSites extends CollectWgsMetrics {
- @Option(shortName = "INTERVALS", doc = "An interval list file that contains the locations of the positions to assess.", optional = false)
+ @Option(doc = "An interval list file that contains the locations of the positions to assess.", optional = false)
public File INTERVALS = null;
public static void main(final String[] args) {
diff --git a/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java b/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java
new file mode 100644
index 0000000..a829cc5
--- /dev/null
+++ b/src/main/java/picard/analysis/CollectWgsMetricsWithNonZeroCoverage.java
@@ -0,0 +1,172 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 Nils Homer
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis;
+
+import htsjdk.samtools.SAMReadGroupRecord;
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.Histogram;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.StringUtil;
+import picard.PicardException;
+import picard.cmdline.CommandLineProgramProperties;
+import picard.cmdline.Option;
+import picard.cmdline.programgroups.Alpha;
+import picard.filter.CountingFilter;
+import picard.filter.CountingPairedFilter;
+import picard.util.RExecutor;
+
+import java.io.File;
+import java.util.List;
+
+ at CommandLineProgramProperties(
+ usage = CollectWgsMetricsWithNonZeroCoverage.USAGE_SUMMARY + CollectWgsMetricsWithNonZeroCoverage.USAGE_DETAILS,
+ usageShort = CollectWgsMetricsWithNonZeroCoverage.USAGE_SUMMARY,
+ programGroup = Alpha.class
+)
+public class CollectWgsMetricsWithNonZeroCoverage extends CollectWgsMetrics {
+
+ static final String USAGE_SUMMARY = "Collect metrics about coverage and performance of whole genome sequencing (WGS) experiments. ";
+ static final String USAGE_DETAILS = "This tool collects metrics about the percentages of reads that pass base- and mapping- quality " +
+ "filters as well as coverage (read-depth) levels. Both minimum base- and mapping-quality values as well as the maximum " +
+ "read depths (coverage cap) are user defined. This extends CollectWgsMetrics by including metrics related only to sites" +
+ "with non-zero (>0) coverage." +
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
+ "<h4>Usage Example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectWgsMetricsWithNonZeroCoverage \\<br /> " +
+ " I=input.bam \\<br /> "+
+ " O=collect_wgs_metrics.txt \\<br /> " +
+ " CHART=collect_wgs_metrics.pdf \\<br /> " +
+ " R=reference_sequence.fasta " +
+ "</pre>" +
+ "Please see the " +
+ "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectWgsMetricsWithNonZeroCoverage.WgsMetricsWithNonZeroCoverage'>" +
+ "WgsMetricsWithNonZeroCoverage</a> documentation for detailed explanations of the output metrics." +
+ "<hr />";
+
+ @Option(shortName = "CHART", doc = "A file (with .pdf extension) to write the chart to.")
+ public File CHART_OUTPUT;
+
+ private final Log log = Log.getInstance(CollectWgsMetricsWithNonZeroCoverage.class);
+
+ // Store this here since we need access to it in the doWork method
+ private WgsMetricsWithNonZeroCoverageCollector collector = null;
+
+ /** Metrics for evaluating the performance of whole genome sequencing experiments. */
+ public static class WgsMetricsWithNonZeroCoverage extends WgsMetrics {
+ public enum Category { WHOLE_GENOME, NON_ZERO_REGIONS }
+
+ /** One of either WHOLE_GENOME or NON_ZERO_REGIONS */
+ public Category CATEGORY;
+ }
+
+ public static void main(final String[] args) {
+ new CollectWgsMetrics().instanceMainWithExit(args);
+ }
+
+ @Override
+ protected int doWork() {
+ IOUtil.assertFileIsWritable(CHART_OUTPUT);
+
+ this.collector = new WgsMetricsWithNonZeroCoverageCollector(COVERAGE_CAP);
+
+ final List<SAMReadGroupRecord> readGroups = this.getSamFileHeader().getReadGroups();
+ final String plotSubtitle = (readGroups.size() == 1) ? StringUtil.asEmptyIfNull(readGroups.get(0).getLibrary()) : "";
+
+ super.doWork();
+
+ if (collector.areHistogramsEmpty()) {
+ log.warn("No valid bases found in input file. No plot will be produced.");
+ } else {
+ final int rResult = RExecutor.executeFromClasspath("picard/analysis/wgsHistogram.R",
+ OUTPUT.getAbsolutePath(),
+ CHART_OUTPUT.getAbsolutePath(),
+ INPUT.getName(),
+ plotSubtitle);
+ if (rResult != 0) {
+ throw new PicardException("R script wgsHistogram.R failed with return code " + rResult);
+ }
+ }
+
+ return 0;
+ }
+
+ @Override
+ protected WgsMetricsWithNonZeroCoverage generateWgsMetrics() {
+ return new WgsMetricsWithNonZeroCoverage();
+ }
+
+ @Override
+ protected WgsMetricsCollector getCollector(final int coverageCap) {
+ assert(coverageCap == this.collector.coverageCap);
+ return this.collector;
+ }
+
+ protected class WgsMetricsWithNonZeroCoverageCollector extends WgsMetricsCollector {
+ Histogram<Integer> depthHistogram = null;
+
+ public WgsMetricsWithNonZeroCoverageCollector(final int coverageCap) {
+ super(coverageCap);
+ }
+
+ @Override
+ public void addToMetricsFile(final MetricsFile<WgsMetrics, Integer> file,
+ final boolean includeBQHistogram,
+ final CountingFilter dupeFilter,
+ final CountingFilter mapqFilter,
+ final CountingPairedFilter pairFilter) {
+ this.depthHistogram = getDepthHistogram();
+ final Histogram<Integer> depthHistogramNonZero = depthHistogramNonZero();
+
+ final WgsMetricsWithNonZeroCoverage metrics = (WgsMetricsWithNonZeroCoverage) getMetrics(depthHistogram, dupeFilter, mapqFilter, pairFilter);
+ final WgsMetricsWithNonZeroCoverage metricsNonZero = (WgsMetricsWithNonZeroCoverage) getMetrics(depthHistogramNonZero, dupeFilter, mapqFilter, pairFilter);
+
+ metrics.CATEGORY = WgsMetricsWithNonZeroCoverage.Category.WHOLE_GENOME;
+ metricsNonZero.CATEGORY = WgsMetricsWithNonZeroCoverage.Category.NON_ZERO_REGIONS;
+
+ file.addMetric(metrics);
+ file.addMetric(metricsNonZero);
+
+ if (includeBQHistogram) {
+ addBaseQHistogram(file);
+ }
+ }
+
+ private Histogram<Integer> depthHistogramNonZero() {
+ final Histogram<Integer> depthHistogram = new Histogram<>("coverage", "count");
+ // do not include the zero-coverage bin
+ for (int i = 1; i < depthHistogramArray.length; ++i) {
+ depthHistogram.increment(i, depthHistogramArray[i]);
+ }
+ return depthHistogram;
+ }
+
+ public boolean areHistogramsEmpty() {
+ return (null == depthHistogram || depthHistogram.isEmpty());
+ }
+ }
+
+}
\ No newline at end of file
diff --git a/src/java/picard/analysis/CompareMetrics.java b/src/main/java/picard/analysis/CompareMetrics.java
similarity index 100%
rename from src/java/picard/analysis/CompareMetrics.java
rename to src/main/java/picard/analysis/CompareMetrics.java
diff --git a/src/java/picard/analysis/FingerprintingDetailMetrics.java b/src/main/java/picard/analysis/FingerprintingDetailMetrics.java
similarity index 100%
rename from src/java/picard/analysis/FingerprintingDetailMetrics.java
rename to src/main/java/picard/analysis/FingerprintingDetailMetrics.java
diff --git a/src/java/picard/analysis/FingerprintingSummaryMetrics.java b/src/main/java/picard/analysis/FingerprintingSummaryMetrics.java
similarity index 100%
rename from src/java/picard/analysis/FingerprintingSummaryMetrics.java
rename to src/main/java/picard/analysis/FingerprintingSummaryMetrics.java
diff --git a/src/java/picard/analysis/GcBiasDetailMetrics.java b/src/main/java/picard/analysis/GcBiasDetailMetrics.java
similarity index 96%
rename from src/java/picard/analysis/GcBiasDetailMetrics.java
rename to src/main/java/picard/analysis/GcBiasDetailMetrics.java
index 281a23c..422886b 100644
--- a/src/java/picard/analysis/GcBiasDetailMetrics.java
+++ b/src/main/java/picard/analysis/GcBiasDetailMetrics.java
@@ -47,7 +47,7 @@ public class GcBiasDetailMetrics extends MultilevelMetrics {
public int MEAN_BASE_QUALITY;
/**
- * The ration of "coverage" in this GC bin vs. the mean coverage of all GC bins. A number of
+ * The ratio of "coverage" in this GC bin vs. the mean coverage of all GC bins. A number of
* 1 represents mean coverage, a number less than one represents lower than mean coverage (e.g. 0.5
* means half as much coverage as average) while a number greater than one represents higher than
* mean coverage (e.g. 3.1 means this GC bin has 3.1 times more reads per window than average).
diff --git a/src/java/picard/analysis/GcBiasMetricsCollector.java b/src/main/java/picard/analysis/GcBiasMetricsCollector.java
similarity index 100%
rename from src/java/picard/analysis/GcBiasMetricsCollector.java
rename to src/main/java/picard/analysis/GcBiasMetricsCollector.java
diff --git a/src/java/picard/analysis/GcBiasSummaryMetrics.java b/src/main/java/picard/analysis/GcBiasSummaryMetrics.java
similarity index 83%
rename from src/java/picard/analysis/GcBiasSummaryMetrics.java
rename to src/main/java/picard/analysis/GcBiasSummaryMetrics.java
index 00eea88..0808776 100644
--- a/src/java/picard/analysis/GcBiasSummaryMetrics.java
+++ b/src/main/java/picard/analysis/GcBiasSummaryMetrics.java
@@ -56,11 +56,23 @@ public class GcBiasSummaryMetrics extends MultilevelMetrics {
public double GC_DROPOUT;
/**
- * Normalized coverage over each quintile of GC content.
+ * Normalized coverage over quintile of GC content ranging from 0 - 19.
*/
public double GC_NC_0_19;
+ /**
+ * Normalized coverage over each quintile of GC content ranging from 20 - 39.
+ */
public double GC_NC_20_39;
+ /**
+ * Normalized coverage over each quintile of GC content ranging from 40 - 59.
+ */
public double GC_NC_40_59;
+ /**
+ * Normalized coverage over each quintile of GC content ranging from 60 - 79.
+ */
public double GC_NC_60_79;
+ /**
+ * Normalized coverage over each quintile of GC content ranging from 80 - 100.
+ */
public double GC_NC_80_100;
}
diff --git a/src/java/picard/analysis/GcBiasUtils.java b/src/main/java/picard/analysis/GcBiasUtils.java
similarity index 100%
rename from src/java/picard/analysis/GcBiasUtils.java
rename to src/main/java/picard/analysis/GcBiasUtils.java
diff --git a/src/java/picard/analysis/InsertSizeMetrics.java b/src/main/java/picard/analysis/InsertSizeMetrics.java
similarity index 100%
rename from src/java/picard/analysis/InsertSizeMetrics.java
rename to src/main/java/picard/analysis/InsertSizeMetrics.java
diff --git a/src/java/picard/analysis/JumpingLibraryMetrics.java b/src/main/java/picard/analysis/JumpingLibraryMetrics.java
similarity index 85%
rename from src/java/picard/analysis/JumpingLibraryMetrics.java
rename to src/main/java/picard/analysis/JumpingLibraryMetrics.java
index de60c62..4e2e62c 100644
--- a/src/java/picard/analysis/JumpingLibraryMetrics.java
+++ b/src/main/java/picard/analysis/JumpingLibraryMetrics.java
@@ -46,7 +46,7 @@ public class JumpingLibraryMetrics extends MetricBase {
public long JUMP_DUPLICATE_PAIRS;
/**
- * The percentage of outward-facing pairs that are marked as duplicates
+ * The fraction of outward-facing pairs that are marked as duplicates
*/
public double JUMP_DUPLICATE_PCT;
@@ -76,7 +76,7 @@ public class JumpingLibraryMetrics extends MetricBase {
public long NONJUMP_DUPLICATE_PAIRS;
/**
- * The percentage of inward-facing pairs that are marked as duplicates
+ * The fraction of inward-facing pairs that are marked as duplicates
*/
public double NONJUMP_DUPLICATE_PCT;
@@ -107,19 +107,19 @@ public class JumpingLibraryMetrics extends MetricBase {
public long FRAGMENTS;
/**
- * The number of outward-facing pairs expressed as a percentage of the total of all outward facing pairs,
+ * The number of outward-facing pairs expressed as a fraction of the total of all outward facing pairs,
* inward-facing pairs, and chimeric pairs.
*/
public double PCT_JUMPS;
/**
- * The number of inward-facing pairs expressed as a percentage of the total of all outward facing pairs,
+ * The number of inward-facing pairs expressed as a fraction of the total of all outward facing pairs,
* inward-facing pairs, and chimeric pairs.
*/
public double PCT_NONJUMPS;
/**
- * The number of chimeric pairs expressed as a percentage of the total of all outward facing pairs,
+ * The number of chimeric pairs expressed as a fraction of the total of all outward facing pairs,
* inward-facing pairs, and chimeric pairs.
*/
public double PCT_CHIMERAS;
diff --git a/src/java/picard/analysis/MeanQualityByCycle.java b/src/main/java/picard/analysis/MeanQualityByCycle.java
similarity index 97%
rename from src/java/picard/analysis/MeanQualityByCycle.java
rename to src/main/java/picard/analysis/MeanQualityByCycle.java
index 15872b9..db46ded 100644
--- a/src/java/picard/analysis/MeanQualityByCycle.java
+++ b/src/main/java/picard/analysis/MeanQualityByCycle.java
@@ -59,12 +59,13 @@ import java.util.List;
public class MeanQualityByCycle extends SinglePassSamProgram {
static final String USAGE_SUMMARY = "Collect mean quality by cycle.";
static final String USAGE_DETAILS = "This tool generates a data table and chart of mean quality by cycle from a BAM file. It is " +
- "intended to be used on a single lane or read group's worth of data, but can be applied to merged BAMs if needed. " +
+ "intended to be used on a single lane or a read group's worth of data, but can be applied to merged BAMs if needed. " +
"<br /><br />" +
"This metric gives an overall snapshot of sequencing machine performance. For most types of sequencing data, the output " +
"is expected to show a slight reduction in overall base quality scores towards the end of each read. Spikes in quality within " +
"reads are not expected and may indicate that technical problems occurred during sequencing." +
"<br /><br />" +
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar MeanQualityByCycle \\<br />" +
diff --git a/src/java/picard/analysis/MetricAccumulationLevel.java b/src/main/java/picard/analysis/MetricAccumulationLevel.java
similarity index 100%
rename from src/java/picard/analysis/MetricAccumulationLevel.java
rename to src/main/java/picard/analysis/MetricAccumulationLevel.java
diff --git a/src/java/picard/analysis/QualityScoreDistribution.java b/src/main/java/picard/analysis/QualityScoreDistribution.java
similarity index 92%
rename from src/java/picard/analysis/QualityScoreDistribution.java
rename to src/main/java/picard/analysis/QualityScoreDistribution.java
index 7499e8b..360f372 100644
--- a/src/java/picard/analysis/QualityScoreDistribution.java
+++ b/src/main/java/picard/analysis/QualityScoreDistribution.java
@@ -54,16 +54,18 @@ import java.util.List;
)
public class QualityScoreDistribution extends SinglePassSamProgram {
static final String USAGE_SUMMARY = "Chart the distribution of quality scores. ";
- static final String USAGE_DETAILS = "This tool is used for determining the overall \"quality\" for a library in a given run. To " +
+ static final String USAGE_DETAILS = "<p>This tool is used for determining the overall 'quality' for a library in a given run. To " +
"that effect, it outputs a chart and tables indicating the range of quality scores and the total numbers of bases " +
"corresponding to those scores. Options include plotting the distribution of all of the reads, only the aligned reads, " +
- "or reads that have passed the Illumina Chastity filter thresholds as described <a href='http://gatkforums.broadinstitute.org/discussion/6329/pf-reads-illumina-chastity-filter'>here</a>." +
- "<br /> <br />" +
+ "or reads that have passed the Illumina Chastity filter thresholds as described " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>here</a>.</p>" +
+ "" +
"<h4>Note on base quality score options</h4>" +
- "If the quality score of read bases has been modified in a previous data processing step such as " +
- "<a href='http://gatkforums.broadinstitute.org/discussion/44/base-quality-score-recalibration-bqsr'>GATK Base Recalibration</a> " +
- "and an OQ tag is available, this tool can be set to plot the OQ value as well as the primary quality value for the evaluation. " +
- "<br />" +
+ "If the quality score of read bases has been modified in a previous data processing step such as GATK " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/article?id=44'>Base Recalibration</a> " +
+ "and an OQ tag is available, this tool can be set to plot the OQ value as well as the primary quality value for the " +
+ "evaluation. <br />" +
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
"<h4>Usage Example:</h4>" +
"<pre>" +
"java -jar picard.jar QualityScoreDistribution \\<br />" +
diff --git a/src/java/picard/analysis/RnaSeqMetrics.java b/src/main/java/picard/analysis/RnaSeqMetrics.java
similarity index 53%
rename from src/java/picard/analysis/RnaSeqMetrics.java
rename to src/main/java/picard/analysis/RnaSeqMetrics.java
index 0121040..07e299e 100644
--- a/src/java/picard/analysis/RnaSeqMetrics.java
+++ b/src/main/java/picard/analysis/RnaSeqMetrics.java
@@ -39,73 +39,73 @@ public class RnaSeqMetrics extends MultilevelMetrics {
*/
public long PF_ALIGNED_BASES;
- /** Number of bases in primary aligments that align to ribosomal sequence. */
+ /** Number of bases in primary alignments that align to ribosomal sequence. */
public Long RIBOSOMAL_BASES;
- /** Number of bases in primary aligments that align to a non-UTR coding base for some gene, and not ribosomal sequence. */
+ /** Number of bases in primary alignments that align to a non-UTR coding base for some gene, and not ribosomal sequence. */
public long CODING_BASES;
- /** Number of bases in primary aligments that align to a UTR base for some gene, and not a coding base. */
+ /** Number of bases in primary alignments that align to a UTR base for some gene, and not a coding base. */
public long UTR_BASES;
- /** Number of bases in primary aligments that align to an intronic base for some gene, and not a coding or UTR base. */
+ /** Number of bases in primary alignments that align to an intronic base for some gene, and not a coding or UTR base. */
public long INTRONIC_BASES;
- /** Number of bases in primary aligments that do not align to any gene. */
+ /** Number of bases in primary alignments that do not align to any gene. */
public long INTERGENIC_BASES;
-
/**
- * Number of primary alignments that map to a sequence specified on command-line as IGNORED_SEQUENCE. These are not
+ * Number of primary alignments that are mapped to a sequence specified on command-line as IGNORED_SEQUENCE. These are not
* counted in PF_ALIGNED_BASES, CORRECT_STRAND_READS, INCORRECT_STRAND_READS, or any of the base-counting metrics.
* These reads are counted in PF_BASES.
*/
public long IGNORED_READS;
- /** Number of aligned reads that map to the correct strand. 0 if library is not strand-specific. */
+ /** Number of aligned reads that are mapped to the correct strand. 0 if library is not strand-specific. */
public long CORRECT_STRAND_READS;
- /** Number of aligned reads that map to the incorrect strand. 0 if library is not strand-specific. */
+ /** Number of aligned reads that are mapped to the incorrect strand. 0 if library is not strand-specific. */
public long INCORRECT_STRAND_READS;
- /** RIBOSOMAL_BASES / PF_ALIGNED_BASES */
+ /** Fraction of PF_ALIGNED_BASES that mapped to regions encoding ribosomal RNA, RIBOSOMAL_BASES/PF_ALIGNED_BASES */
public Double PCT_RIBOSOMAL_BASES;
- /** CODING_BASES / PF_ALIGNED_BASES */
+ /** Fraction of PF_ALIGNED_BASES that mapped to protein coding regions of genes, CODING_BASES/PF_ALIGNED_BASES */
public double PCT_CODING_BASES;
- /** UTR_BASES / PF_ALIGNED_BASES */
+ /** Fraction of PF_ALIGNED_BASES that mapped to untranslated regions (UTR) of genes, UTR_BASES/PF_ALIGNED_BASES */
public double PCT_UTR_BASES;
- /** INTRONIC_BASES / PF_ALIGNED_BASES */
+ /** Fraction of PF_ALIGNED_BASES that correspond to gene introns, INTRONIC_BASES/PF_ALIGNED_BASES */
public double PCT_INTRONIC_BASES;
- /** INTERGENIC_BASES / PF_ALIGNED_BASES */
+ /** Fraction of PF_ALIGNED_BASES that mapped to intergenic regions of genomic DNA, INTERGENIC_BASES/PF_ALIGNED_BASES */
public double PCT_INTERGENIC_BASES;
- /** PCT_UTR_BASES + PCT_CODING_BASES */
+ /** Sum of bases mapped to regions corresponding to UTRs and coding regions of mRNA transcripts, PCT_UTR_BASES + PCT_CODING_BASES */
public double PCT_MRNA_BASES;
- /** The percentage of bases mapping to mRNA divided by the total number of PF bases. */
+ /** The fraction of bases mapping to mRNA divided by the total number of PF bases, (CODING_BASES + UTR_BASES)/PF_BASES. */
public double PCT_USABLE_BASES;
- /** CORRECT_STRAND_READS/(CORRECT_STRAND_READS + INCORRECT_STRAND_READS). 0 if library is not strand-specific. */
+ /** Fraction of reads corresponding to mRNA transcripts which map to the correct strand of a reference genome
+ = CORRECT_STRAND_READS/(CORRECT_STRAND_READS + INCORRECT_STRAND_READS). 0 if library is not strand-specific. */
public double PCT_CORRECT_STRAND_READS;
- /** The median CV of coverage of the 1000 most highly expressed transcripts. Ideal value = 0. */
+ /** The median coefficient of variation (CV) or stdev/mean for coverage values of the 1000 most highly expressed transcripts. Ideal value = 0. */
public double MEDIAN_CV_COVERAGE;
/**
- * The median 5 prime bias of the 1000 most highly expressed transcripts, where 5 prime bias is calculated per
- * transcript as: mean coverage of the 5' most 100 bases divided by the mean coverage of the whole transcript.
+ * The median 5 prime bias of the 1000 most highly expressed transcripts. The 5 prime bias is calculated per
+ * transcript as: mean coverage of the 5 prime-most 100 bases divided by the mean coverage of the whole transcript.
*/
public double MEDIAN_5PRIME_BIAS;
/**
* The median 3 prime bias of the 1000 most highly expressed transcripts, where 3 prime bias is calculated per
- * transcript as: mean coverage of the 3' most 100 bases divided by the mean coverage of the whole transcript.
+ * transcript as: mean coverage of the 3 prime-most 100 bases divided by the mean coverage of the whole transcript.
*/
public double MEDIAN_3PRIME_BIAS;
- /** The ratio of coverage at the 5' end of to the 3' end based on the 1000 most highly expressed transcripts. */
+ /** The ratio of coverage at the 5 prime end to the 3 prime end based on the 1000 most highly expressed transcripts. */
public double MEDIAN_5PRIME_TO_3PRIME_BIAS;
}
diff --git a/src/java/picard/analysis/RrbsCpgDetailMetrics.java b/src/main/java/picard/analysis/RrbsCpgDetailMetrics.java
similarity index 96%
rename from src/java/picard/analysis/RrbsCpgDetailMetrics.java
rename to src/main/java/picard/analysis/RrbsCpgDetailMetrics.java
index 9e4fd85..fa91c57 100644
--- a/src/java/picard/analysis/RrbsCpgDetailMetrics.java
+++ b/src/main/java/picard/analysis/RrbsCpgDetailMetrics.java
@@ -39,6 +39,6 @@ public final class RrbsCpgDetailMetrics extends MultilevelMetrics {
public Integer TOTAL_SITES;
/** Number of times this CpG site was converted (TG for + strand, CA for - strand) */
public Integer CONVERTED_SITES;
- /** TOTAL_BASES / CONVERTED_BASES */
+ /** CpG CONVERTED_BASES / CpG TOTAL_BASES (fraction) */
public Double PCT_CONVERTED;
}
diff --git a/src/java/picard/analysis/RrbsMetrics.java b/src/main/java/picard/analysis/RrbsMetrics.java
similarity index 100%
rename from src/java/picard/analysis/RrbsMetrics.java
rename to src/main/java/picard/analysis/RrbsMetrics.java
diff --git a/src/java/picard/analysis/RrbsMetricsCollector.java b/src/main/java/picard/analysis/RrbsMetricsCollector.java
similarity index 100%
rename from src/java/picard/analysis/RrbsMetricsCollector.java
rename to src/main/java/picard/analysis/RrbsMetricsCollector.java
diff --git a/src/java/picard/analysis/RrbsSummaryMetrics.java b/src/main/java/picard/analysis/RrbsSummaryMetrics.java
similarity index 95%
rename from src/java/picard/analysis/RrbsSummaryMetrics.java
rename to src/main/java/picard/analysis/RrbsSummaryMetrics.java
index 2454eb0..71b89c2 100644
--- a/src/java/picard/analysis/RrbsSummaryMetrics.java
+++ b/src/main/java/picard/analysis/RrbsSummaryMetrics.java
@@ -38,13 +38,13 @@ public final class RrbsSummaryMetrics extends MultilevelMetrics {
public Integer NON_CPG_BASES;
/** Number of times a non-CpG cytosine was converted (C->T for +, G->A for -) */
public Integer NON_CPG_CONVERTED_BASES;
- /** NON_CPG_BASES / NON_CPG_CONVERTED_BASES */
+ /** NON_CPG_CONVERTED_BASES / NON_CPG_BASES (fraction) */
public Double PCT_NON_CPG_BASES_CONVERTED;
/** Number of CpG sites encountered */
public Integer CPG_BASES_SEEN;
/** Number of CpG sites that were converted (TG for +, CA for -) */
public Integer CPG_BASES_CONVERTED;
- /** CPG_BASES_SEEN / CPG_BASES_CONVERTED */
+ /** CPG_BASES_CONVERTED / CPG_BASES_SEEN (fraction) */
public Double PCT_CPG_BASES_CONVERTED;
/** Mean coverage of CpG sites */
public Double MEAN_CPG_COVERAGE;
diff --git a/src/java/picard/analysis/SinglePassSamProgram.java b/src/main/java/picard/analysis/SinglePassSamProgram.java
similarity index 100%
rename from src/java/picard/analysis/SinglePassSamProgram.java
rename to src/main/java/picard/analysis/SinglePassSamProgram.java
diff --git a/src/java/picard/analysis/TheoreticalSensitivity.java b/src/main/java/picard/analysis/TheoreticalSensitivity.java
similarity index 80%
rename from src/java/picard/analysis/TheoreticalSensitivity.java
rename to src/main/java/picard/analysis/TheoreticalSensitivity.java
index 4e635da..48ebd9b 100644
--- a/src/java/picard/analysis/TheoreticalSensitivity.java
+++ b/src/main/java/picard/analysis/TheoreticalSensitivity.java
@@ -32,32 +32,47 @@ import picard.util.MathUtil;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
+import java.util.Random;
/**
* Created by David Benjamin on 5/13/15.
*/
public class TheoreticalSensitivity {
- /**
- * @param the probability of depth n is depthDistribution[n] for n = 0, 1. . . N - 1
- * @param the probability of quality q is qualityDistribution[q] for q = 0, 1. . . Q
- * @param sample size is the number of random sums of quality scores for each m
- * @param logOddsThreshold is the log_10 of the likelihood ratio required to call a SNP,
- * for example 5 if the variant likelihood must be 10^5 times greater
- */
+
private static final Log log = Log.getInstance(TheoreticalSensitivity.class);
private static final int SAMPLING_MAX = 600; //prevent 'infinite' loops
private static final int MAX_CONSIDERED_DEPTH = 1000; //no point in looking any deeper than this, otherwise GC overhead is too high.
+ /**
+ * @param depthDistribution the probability of depth n is depthDistribution[n] for n = 0, 1. . . N - 1
+ * @param qualityDistribution the probability of quality q is qualityDistribution[q] for q = 0, 1. . . Q
+ * @param sampleSize sample size is the number of random sums of quality scores for each m
+ * @param logOddsThreshold is the log_10 of the likelihood ratio required to call a SNP,
+ * for example 5 if the variant likelihood must be 10^5 times greater
+ */
public static double hetSNPSensitivity(final double[] depthDistribution, final double[] qualityDistribution,
final int sampleSize, final double logOddsThreshold) {
+ return hetSNPSensitivity(depthDistribution, qualityDistribution, sampleSize, logOddsThreshold, true);
+ }
+
+ /**
+ * @param depthDistribution the probability of depth n is depthDistribution[n] for n = 0, 1. . . N - 1
+ * @param qualityDistribution the probability of quality q is qualityDistribution[q] for q = 0, 1. . . Q
+ * @param sampleSize sample size is the number of random sums of quality scores for each m
+ * @param logOddsThreshold is the log_10 of the likelihood ratio required to call a SNP,
+ * for example 5 if the variant likelihood must be 10^5 times greater.
+ * @param withLogging true to output log messages, false otherwise.
+ */
+ public static double hetSNPSensitivity(final double[] depthDistribution, final double[] qualityDistribution,
+ final int sampleSize, final double logOddsThreshold, final boolean withLogging) {
final int N = Math.min(depthDistribution.length, MAX_CONSIDERED_DEPTH + 1);
- log.info("Creating Roulette Wheel");
+ if (withLogging) log.info("Creating Roulette Wheel");
final RouletteWheel qualitySampler = new RouletteWheel(qualityDistribution);
//qualitySums[m] is a random sample of sums of m quality scores, for m = 0, 1, N - 1
- log.info("Calculating quality sums from quality sampler");
- final List<ArrayList<Integer>> qualitySums = qualitySampler.sampleCumulativeSums(N, sampleSize);
+ if (withLogging) log.info("Calculating quality sums from quality sampler");
+ final List<ArrayList<Integer>> qualitySums = qualitySampler.sampleCumulativeSums(N, sampleSize, withLogging);
//if a quality sum of m qualities exceeds the quality sum threshold for n total reads, a SNP is called
final ArrayList<Double> qualitySumThresholds = new ArrayList<>(N);
@@ -67,7 +82,7 @@ public class TheoreticalSensitivity {
//probabilityToExceedThreshold[m][n] is the probability that the sum of m quality score
//exceeds the nth quality sum threshold
- log.info("Calculating theoretical het sensitivity");
+ if (withLogging) log.info("Calculating theoretical het sensitivity");
final List<ArrayList<Double>> probabilityToExceedThreshold = proportionsAboveThresholds(qualitySums, qualitySumThresholds);
final List<ArrayList<Double>> altDepthDistribution = hetAltDepthDistribution(N);
double result = 0.0;
@@ -125,8 +140,10 @@ public class TheoreticalSensitivity {
final private List<Double> probabilities;
final private int N;
private int count = 0;
+ private Random rng;
RouletteWheel(final double[] weights) {
+ rng = new Random(51);
N = weights.length;
probabilities = new ArrayList<>();
@@ -143,9 +160,9 @@ public class TheoreticalSensitivity {
public int draw() {
while (true) {
- final int n = (int) (N * Math.random());
+ final int n = (int) (N * rng.nextDouble());
count++;
- if (Math.random() < probabilities.get(n)) {
+ if (rng.nextDouble() < probabilities.get(n)) {
count = 0;
return n;
} else if (count >= SAMPLING_MAX) {
@@ -156,7 +173,7 @@ public class TheoreticalSensitivity {
}
//get samples of sums of 0, 1, 2,. . . N - 1 draws
- public List<ArrayList<Integer>> sampleCumulativeSums(final int maxNumberOfSummands, final int sampleSize) {
+ public List<ArrayList<Integer>> sampleCumulativeSums(final int maxNumberOfSummands, final int sampleSize, final boolean withLogging) {
final List<ArrayList<Integer>> result = new ArrayList<>();
for (int m = 0; m < maxNumberOfSummands; m++) result.add(new ArrayList<>());
@@ -166,7 +183,7 @@ public class TheoreticalSensitivity {
result.get(m).add(cumulativeSum);
cumulativeSum += draw();
}
- if (iteration % 1000 == 0) {
+ if (withLogging && iteration % 1000 == 0) {
log.info(iteration + " sampling iterations completed");
}
}
diff --git a/src/java/picard/analysis/artifacts/ArtifactCounter.java b/src/main/java/picard/analysis/artifacts/ArtifactCounter.java
similarity index 99%
rename from src/java/picard/analysis/artifacts/ArtifactCounter.java
rename to src/main/java/picard/analysis/artifacts/ArtifactCounter.java
index dba5fb4..7e423fd 100644
--- a/src/java/picard/analysis/artifacts/ArtifactCounter.java
+++ b/src/main/java/picard/analysis/artifacts/ArtifactCounter.java
@@ -8,6 +8,7 @@ import picard.PicardException;
import picard.analysis.artifacts.SequencingArtifactMetrics.*;
import java.util.ArrayList;
+import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -135,7 +136,7 @@ class ArtifactCounter {
*
*/
private Map<Transition, SummaryPair> getSummaryMetrics() {
- final Map<Transition, SummaryPair> summaryMetricsMap = new HashMap<Transition, SummaryPair>();
+ final Map<Transition, SummaryPair> summaryMetricsMap = new EnumMap<Transition, SummaryPair>(Transition.class);
// extract the detail metrics from each accumulator
final ListMap<Transition, DetailPair> fullMetrics = this.fullContextAccumulator.calculateMetrics(sampleAlias, library);
diff --git a/src/java/picard/analysis/artifacts/CollectSequencingArtifactMetrics.java b/src/main/java/picard/analysis/artifacts/CollectSequencingArtifactMetrics.java
similarity index 82%
rename from src/java/picard/analysis/artifacts/CollectSequencingArtifactMetrics.java
rename to src/main/java/picard/analysis/artifacts/CollectSequencingArtifactMetrics.java
index 58dd32b..9bfc561 100644
--- a/src/java/picard/analysis/artifacts/CollectSequencingArtifactMetrics.java
+++ b/src/main/java/picard/analysis/artifacts/CollectSequencingArtifactMetrics.java
@@ -58,32 +58,44 @@ import static htsjdk.samtools.util.CodeUtil.getOrElse;
programGroup = Metrics.class
)
public class CollectSequencingArtifactMetrics extends SinglePassSamProgram {
- static final String USAGE_SUMMARY = "Collect metrics to quantify single-base sequencing artifacts.";
- static final String USAGE_DETAILS = "This tool examines two sources of sequencing errors resulting from hybrid selection protocols:" +
- " <a href='https://www.broadinstitute.org/gatk/guide/article?id=6333'>bait-bias</a> and " +
- "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6332'>" +
- "pre-adapter artifacts</a>. For a brief primer on these types of artifacts, see the corresponding GATK Dictionary entries." +
- "<br /><br />" +
- "This tool produces four files; summary and detail metrics files for both pre-adapter and bait-bias artifacts. The detailed " +
- "metrics show the error rates for each type of base substitution within every possible triplet base configuration. Error " +
- "rates associated with these substitutions are Phred-scaled and provided as quality scores, the lower the value, the more " +
- "likely it is that an alternate base call is due to an artifact. The summary metrics provide likelihood information on the " +
- "\"worst-case\" errors. <br />" +
- "" +
- "<h4>Usage example:</h4>" +
- "<pre>" +
- "java -jar picard.jar CollectSequencingArtifactMetrics \\<br />" +
- " I=input.bam \\<br />" +
- " O=artifact_metrics.txt \\<br />" +
- " R=reference_sequence.fasta" +
- "</pre>" +
- "" +
- "For additional information, please see " +
- "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterDetailMetrics'>the PreAdapterDetailMetrics documentation</a>, the " +
- "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterSummaryMetrics'>the PreAdapterSummaryMetrics documentation</a>, the " +
- "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasDetailMetrics'>the BaitBiasDetailMetrics documentation</a>, and the " +
- "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasSummaryMetrics'>the BaitBiasSummaryMetrics documentation</a>. " +
- "<hr />" ;
+static final String USAGE_SUMMARY = "Collect metrics to quantify single-base sequencing artifacts. ";
+static final String USAGE_DETAILS = "<p>This tool examines two sources of sequencing errors associated with hybrid selection "+
+"protocols. These errors are divided into two broad categories, pre-adapter and bait-bias. Pre-adapter errors can arise from "+
+"laboratory manipulations of a nucleic acid sample e.g. shearing and occur prior to the ligation of adapters for PCR "+
+"amplification (hence the name pre-adapter). </p>" +
+
+"<p>Bait-bias artifacts occur during or after the target selection step, and correlate with substitution rates that are "+
+"'biased', or higher for sites having one base on the reference/positive strand relative to sites having the complementary "+
+"base on that strand. For example, during the target selection step, a (G>T) artifact might result in a higher substitution "+
+"rate at sites with a G on the positive strand (and C on the negative), relative to sites with the flip (C positive)/(G negative)." +
+" This is known as the 'G-Ref' artifact. </p>" +
+"" +
+"<p>For additional information on these types of artifacts, please see the corresponding GATK dictionary entries on "+
+"<a href='https://www.broadinstitute.org/gatk/guide/article?id=6333'>bait-bias</a> and "+
+"<a href='https://www.broadinstitute.org/gatk/guide/article?id=6332'>pre-adapter artifacts</a>.</p>"+
+""+
+"<p>This tool produces four files; summary and detail metrics files for both pre-adapter and bait-bias artifacts. The detailed "+
+"metrics show the error rates for each type of base substitution within every possible triplet base configuration. Error " +
+"rates associated with these substitutions are Phred-scaled and provided as quality scores, the lower the value, the more " +
+"likely it is that an alternate base call is due to an artifact. The summary metrics provide likelihood information on the " +
+"'worst-case' errors. </p>" +
+""+
+"<h4>Usage example:</h4>" +
+"<pre>" +
+"java -jar picard.jar CollectSequencingArtifactMetrics \\<br />" +
+" I=input.bam \\<br />" +
+" O=artifact_metrics.txt \\<br />" +
+" R=reference_sequence.fasta" +
+"</pre>" +
+
+"Please see the metrics at the following links " +
+"<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterDetailMetrics'>PreAdapterDetailMetrics</a>, "+
+"<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.PreAdapterSummaryMetrics'>PreAdapterSummaryMetrics</a>, "+
+"<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasDetailMetrics'>BaitBiasDetailMetrics</a>, and "+
+"<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#SequencingArtifactMetrics.BaitBiasSummaryMetrics'>BaitBiasSummaryMetrics</a> "+
+"for complete descriptions of the output metrics produced by this tool. "+
+"<hr />"
+;
@Option(doc = "An optional list of intervals to restrict analysis to.", optional = true)
public File INTERVALS;
diff --git a/src/java/picard/analysis/artifacts/ContextAccumulator.java b/src/main/java/picard/analysis/artifacts/ContextAccumulator.java
similarity index 100%
rename from src/java/picard/analysis/artifacts/ContextAccumulator.java
rename to src/main/java/picard/analysis/artifacts/ContextAccumulator.java
diff --git a/src/java/picard/analysis/artifacts/ConvertSequencingArtifactToOxoG.java b/src/main/java/picard/analysis/artifacts/ConvertSequencingArtifactToOxoG.java
similarity index 81%
rename from src/java/picard/analysis/artifacts/ConvertSequencingArtifactToOxoG.java
rename to src/main/java/picard/analysis/artifacts/ConvertSequencingArtifactToOxoG.java
index 4271d46..e65c04a 100644
--- a/src/java/picard/analysis/artifacts/ConvertSequencingArtifactToOxoG.java
+++ b/src/main/java/picard/analysis/artifacts/ConvertSequencingArtifactToOxoG.java
@@ -25,28 +25,31 @@ import java.util.Set;
programGroup = Metrics.class
)
public class ConvertSequencingArtifactToOxoG extends CommandLineProgram {
- static final String USAGE_SUMMARY = "Extract OxoG metrics from generalized artifacts metrics";
- static final String USAGE_DETAILS = "This tool extracts 8-oxoguanine (OxoG) artifact metrics from the output of " +
- "CollectSequencingArtifactsMetrics(a tool that provides detailed information on a variety of artifacts found in sequencing " +
- "libraries) and converts them to the CollectOxoGMetrics tool's output format. This conveniently eliminates the need to run " +
- "CollectOxoGMetrics if we already ran CollectSequencingArtifactsMetrics in our pipeline. See the documentation for " +
- "CollectSequencingArtifactsMetrics and CollectOxoGMetrics for additional information on these tools.<br /><br />." +
- "Note that only the base of the CollectSequencingArtifactsMetrics output file name is required for the file name input. " +
- "For example, if the file name is artifact_metrics.txt.bait_bias_detail_metrics" +
- " or artifact_metrics.txt.pre_adapter_detail_metrics, only the base of the file name \"artifact_metrics\" is " +
- "required on the command line for \"input\". " +
- "An output file called \"artifact_metrics.oxog_metrics\" will be generated automatically. " +
- "A reference sequence is also required.<br />"+
- "<h4>Usage example:</h4>" +
- "<pre>" +
- "java -jar picard.jar ConvertSequencingArtifactToOxoG \\<br />" +
- " I=artifact_metrics \\<br />" +
- " R=reference.fasta" +
- "</pre>" +
- "For detailed explanations of the output metrics, please see:" +
- "http://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectOxoGMetrics.CpcgMetrics" +
- "<hr />"
- ;
+static final String USAGE_SUMMARY = "Extract OxoG metrics from generalized artifacts metrics. ";
+static final String USAGE_DETAILS = "<p>This tool extracts 8-oxoguanine (OxoG) artifact metrics from the output of " +
+"CollectSequencingArtifactsMetrics (a tool that provides detailed information on a variety of artifacts found in sequencing " +
+"libraries) and converts them to the CollectOxoGMetrics tool's output format. This conveniently eliminates the need to run " +
+"CollectOxoGMetrics if we already ran CollectSequencingArtifactsMetrics in our pipeline. See the documentation for " +
+"<a href='http://broadinstitute.github.io/picard/command-line-overview.html#CollectSequencingArtifactsMetrics'>CollectSequencingArtifactsMetrics</a> "+
+"and <a href='http://broadinstitute.github.io/picard/command-line-overview.html#CollectOxoGMetrics'>CollectOxoGMetrics</a> "+
+"for additional information on these tools.</p>." +
+
+"<p>Note that only the base of the CollectSequencingArtifactsMetrics output file name is required for the (INPUT_BASE) "+
+"parameter. For example, if the file name is artifact_metrics.txt.bait_bias_detail_metrics or "+
+"artifact_metrics.txt.pre_adapter_detail_metrics, only the file name base 'artifact_metrics' is " +
+"required on the command line for this parameter. An output file called 'artifact_metrics.oxog_metrics' will be generated "+
+"automatically. Finally, to run this tool successfully, the REFERENCE_SEQUENCE must be provided.</p>"+
+"<h4>Usage example:</h4>" +
+"<pre>" +
+"java -jar picard.jar ConvertSequencingArtifactToOxoG \\<br />" +
+" I=artifact_metrics \\<br />" +
+" R=reference.fasta" +
+"</pre>" +
+"Please see the metrics definitions page at " +
+"<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectOxoGMetrics.CpcgMetrics'>ConvertSequencingArtifactToOxoG</a> "+
+"for detailed descriptions of the output metrics produced by this tool."+
+"<hr />"
+;
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME,
doc = "Basename of the input artifact metrics file (output by CollectSequencingArtifactMetrics)")
public File INPUT_BASE;
diff --git a/src/java/picard/analysis/artifacts/SequencingArtifactMetrics.java b/src/main/java/picard/analysis/artifacts/SequencingArtifactMetrics.java
similarity index 100%
rename from src/java/picard/analysis/artifacts/SequencingArtifactMetrics.java
rename to src/main/java/picard/analysis/artifacts/SequencingArtifactMetrics.java
diff --git a/src/java/picard/analysis/artifacts/Transition.java b/src/main/java/picard/analysis/artifacts/Transition.java
similarity index 100%
rename from src/java/picard/analysis/artifacts/Transition.java
rename to src/main/java/picard/analysis/artifacts/Transition.java
diff --git a/src/java/picard/analysis/directed/CalculateHsMetrics.java b/src/main/java/picard/analysis/directed/CalculateHsMetrics.java
similarity index 100%
rename from src/java/picard/analysis/directed/CalculateHsMetrics.java
rename to src/main/java/picard/analysis/directed/CalculateHsMetrics.java
diff --git a/src/java/picard/analysis/directed/CollectHsMetrics.java b/src/main/java/picard/analysis/directed/CollectHsMetrics.java
similarity index 64%
rename from src/java/picard/analysis/directed/CollectHsMetrics.java
rename to src/main/java/picard/analysis/directed/CollectHsMetrics.java
index e89beb0..ba3905b 100644
--- a/src/java/picard/analysis/directed/CollectHsMetrics.java
+++ b/src/main/java/picard/analysis/directed/CollectHsMetrics.java
@@ -45,14 +45,48 @@ import java.util.TreeSet;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Collects a set of Hybrid Selection specific metrics from an aligned SAM" +
- "or BAM file. If a reference sequence is provided, AT/GC dropout metrics will " +
- "be calculated, and the PER_TARGET_COVERAGE option can be used to output GC and " +
- "mean coverage information for every target.",
- usageShort = "Collects Hybrid Selection-specific metrics for a SAM or BAM file",
+ usage = CollectHsMetrics.USAGE_SUMMARY + CollectHsMetrics.USAGE_DETAILS,
+ usageShort = CollectHsMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectHsMetrics extends CollectTargetedMetrics<HsMetrics, HsMetricCollector> {
+static final String USAGE_SUMMARY = "Collects hybrid-selection (HS) metrics for a SAM or BAM file. ";
+static final String USAGE_DETAILS = "This tool takes a SAM/BAM file input and collects metrics that are specific for sequence "+
+"datasets generated through hybrid-selection. Hybrid-selection (HS) is the most commonly used technique to capture "+
+"exon-specific sequences for targeted sequencing experiments such as exome sequencing; for more information, please " +
+"see the corresponding <a href='http://www.broadinstitute.org/gatk/guide/article?id=6331'>GATK Dictionary entry</a>. </p> "+
+
+"<p>This tool requires an aligned SAM or BAM file as well as bait and target interval files in Picard interval_list format. " +
+"You should use the bait and interval files that correspond to the capture kit that was used to generate the capture " +
+"libraries for sequencing, which can generally be obtained from the kit manufacturer. If the baits and target " +
+"intervals are provided in BED format, you can convert them to the Picard interval_list format using Picard's " +
+"<a href='http://broadinstitute.github.io/picard/command-line-overview.html#BedToIntervalList'>BedToInterval</a> tool. </p>" +
+
+"<p>If a reference sequence is provided, this program will calculate both AT_DROPOUT and GC_DROPOUT metrics. Dropout " +
+"metrics are an attempt to measure the reduced representation of reads, in regions that deviate from 50% G/C content. " +
+"This reduction in the number of aligned reads is due to the increased numbers of errors associated with sequencing " +
+"regions with excessive or deficient numbers of G/C bases, ultimately leading to poor mapping efficiencies and low" +
+"coverage in the affected regions. </p>" +
+
+"<p>If you are interested in getting G/C content and mean sequence depth information for every target interval, use the " +
+"PER_TARGET_COVERAGE option. </p>" +
+
+"<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p> "+
+
+"<h4>Usage Example:</h4>"+
+"<pre>" +
+"java -jar picard.jar CollectHsMetrics \\<br />" +
+" I=input.bam \\<br />" +
+" O=hs_metrics.txt \\<br />" +
+" R=reference_sequence.fasta \\<br />" +
+" BAIT_INTERVALS=bait.interval_list \\<br />" +
+" TARGET_INTERVALS=target.interval_list" +
+"</pre> " +
+"<p>Please see " +
+"<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#HsMetrics'>CollectHsMetrics</a> for " +
+"detailed descriptions of the output metrics produced by this tool.</p>" +
+"<hr />"
+;
@Option(shortName = "BI", doc = "An interval list file that contains the locations of the baits used.", minElements=1)
public List<File> BAIT_INTERVALS;
@@ -106,4 +140,4 @@ public class CollectHsMetrics extends CollectTargetedMetrics<HsMetrics, HsMetric
return new HsMetricCollector(accumulationLevels, samRgRecords, refFile, perTargetCoverage, perBaseCoverage, targetIntervals, probeIntervals, probeSetName, nearProbeDistance,
MINIMUM_MAPPING_QUALITY, MINIMUM_BASE_QUALITY, CLIP_OVERLAPPING_READS, true, COVERAGE_CAP, SAMPLE_SIZE);
}
-}
\ No newline at end of file
+}
diff --git a/src/java/picard/analysis/directed/CollectTargetedMetrics.java b/src/main/java/picard/analysis/directed/CollectTargetedMetrics.java
similarity index 96%
rename from src/java/picard/analysis/directed/CollectTargetedMetrics.java
rename to src/main/java/picard/analysis/directed/CollectTargetedMetrics.java
index d5e691e..5078071 100644
--- a/src/java/picard/analysis/directed/CollectTargetedMetrics.java
+++ b/src/main/java/picard/analysis/directed/CollectTargetedMetrics.java
@@ -26,13 +26,13 @@ import java.util.List;
import java.util.Set;
/**
- * Both CollectTargetedPCRMetrics and CalculateHybridSelection metrics share virtually identical program structures except
+ * <p>Both CollectTargetedPCRMetrics and CollectHsSelection share virtually identical program structures except
* for the name of their targeting mechanisms (e.g. bait set or amplicon set). The shared behavior of these programs
- * is encapsulated in CollectTargetedMetrics which is then subclassed by CalculateHsMetrics and CollectTargetedPcrMetrics.
+ * is encapsulated in CollectTargetedMetrics which is then subclassed by CollectHsMetrics and CollectTargetedPcrMetrics.
* <p/>
- * This program verifies the input parameters to TargetMetricsCollector and converts all files to
+ * <p>This program verifies the input parameters to TargetMetricsCollector and converts all files to
* the format desired by TargetMetricsCollector. Then it instantiates a TargetMetricsCollector and
- * collects metric information for all reads in the INPUT sam file.
+ * collects metric information for all reads in the INPUT sam file.</p>
*/
public abstract class CollectTargetedMetrics<METRIC extends MultilevelMetrics, COLLECTOR extends TargetMetricsCollector<METRIC>> extends CommandLineProgram {
diff --git a/src/java/picard/analysis/directed/CollectTargetedPcrMetrics.java b/src/main/java/picard/analysis/directed/CollectTargetedPcrMetrics.java
similarity index 61%
rename from src/java/picard/analysis/directed/CollectTargetedPcrMetrics.java
rename to src/main/java/picard/analysis/directed/CollectTargetedPcrMetrics.java
index c3bb273..8c5c7c0 100644
--- a/src/java/picard/analysis/directed/CollectTargetedPcrMetrics.java
+++ b/src/main/java/picard/analysis/directed/CollectTargetedPcrMetrics.java
@@ -23,28 +23,31 @@ import java.util.Set;
)
public class CollectTargetedPcrMetrics extends CollectTargetedMetrics<TargetedPcrMetrics, TargetedPcrMetricsCollector> {
static final String USAGE_SUMMARY = "Calculate PCR-related metrics from targeted sequencing data. ";
- static final String USAGE_DETAILS = "This tool calculates a set of PCR-related metrics from an aligned SAM or " +
- "BAM file containing targeted sequencing data. It is appropriate for data produced with multiple small-target technologies " +
- "including exome sequencing an custom amplicon panels such as the Illumina " +
- "<a href='http://www.illumina.com/content/dam/illumina-marketing/documents/products/datasheets/datasheet_truseq_custom_amplicon.pdf'>TruSeq Custom Amplicon (TSCA)</a> kit. <br /><br />" +
- "If a reference sequence is provided, AT/GC dropout metrics will be calculated and the PER_TARGET_COVERAGE option can be " +
- "used to output GC content and mean coverage information for each target. The AT/GC dropout metrics indicate the degree of " +
- "inadequate coverage of a particular region based on its AT or GC content. The PER_TARGET_COVERAGE option can be used to " +
- "output GC content and mean sequence depth information for every target interval. <br /><br />" +
- "Please note that coverage depth at each locus should not exceed a limit of java MAX_SHORT ~32K. This is because " +
- "CollectTargetedPcrMetrics tool uses a short array to calculate coverage metrics." +
- "<h4>Usage Example</h4>" +
- "<pre>" +
- "java -jar picard.jar CollectTargetedPcrMetrics \\<br /> " +
- " I=input.bam \\<br /> " +
- " O=pcr_metrics.txt \\<br /> " +
- " R=reference_sequence.fasta \\<br /> " +
- " AMPLICON_INTERVALS=amplicon.interval_list \\<br /> " +
- " TARGET_INTERVALS=targets.interval_list " +
- "</pre>" +
- "For explanations of the output metrics, see " +
- "http://broadinstitute.github.io/picard/picard-metric-definitions.html#TargetedPcrMetrics" +
- "<hr />";
+ static final String USAGE_DETAILS = "<p>This tool calculates a set of PCR-related metrics from an aligned SAM or " +
+ "BAM file containing targeted sequencing data. It is appropriate for data produced with multiple small-target technologies " +
+ "including exome sequencing an custom amplicon panels such as the Illumina " +
+ "<a href='http://www.illumina.com/content/dam/illumina-marketing/documents/products/datasheets/datasheet_truseq_custom_amplicon.pdf'>" +
+ "TruSeq Custom Amplicon (TSCA)</a> kit.</p>" +
+ "" +
+ "<p>If a reference sequence is provided, AT/GC dropout metrics will be calculated and the PER_TARGET_COVERAGE option can be " +
+ "used to output GC content and mean coverage information for each target. The AT/GC dropout metrics indicate the degree of " +
+ "inadequate coverage of a particular region based on its AT or GC content. The PER_TARGET_COVERAGE option can be used to " +
+ "output GC content and mean sequence depth information for every target interval. </p>" +
+ "" +
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
+ "<h4>Usage Example</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectTargetedPcrMetrics \\<br /> " +
+ " I=input.bam \\<br /> " +
+ " O=pcr_metrics.txt \\<br /> " +
+ " R=reference_sequence.fasta \\<br /> " +
+ " AMPLICON_INTERVALS=amplicon.interval_list \\<br /> " +
+ " TARGET_INTERVALS=targets.interval_list " +
+ "</pre>" +
+ "Please see the metrics definitions page on " +
+ "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#TargetedPcrMetrics'>TargetedPcrMetrics</a> " +
+ "for detailed explanations of the output metrics produced by this tool." +
+ "<hr />";
@Option(shortName = "AI", doc = "An interval list file that contains the locations of the baits used.")
public File AMPLICON_INTERVALS;
diff --git a/src/java/picard/analysis/directed/HsMetricCollector.java b/src/main/java/picard/analysis/directed/HsMetricCollector.java
similarity index 100%
rename from src/java/picard/analysis/directed/HsMetricCollector.java
rename to src/main/java/picard/analysis/directed/HsMetricCollector.java
diff --git a/src/java/picard/analysis/directed/HsMetrics.java b/src/main/java/picard/analysis/directed/HsMetrics.java
similarity index 76%
rename from src/java/picard/analysis/directed/HsMetrics.java
rename to src/main/java/picard/analysis/directed/HsMetrics.java
index 21efcc1..1e6abaa 100644
--- a/src/java/picard/analysis/directed/HsMetrics.java
+++ b/src/main/java/picard/analysis/directed/HsMetrics.java
@@ -55,76 +55,76 @@ public class HsMetrics extends MultilevelMetrics {
/** The number of bases in the reference genome used for alignment. */
public long GENOME_SIZE;
- /** The number of bases which have one or more baits on top of them. */
+ /** The number of bases which are localized to one or more baits. */
public long BAIT_TERRITORY;
- /** The unique number of target bases in the experiment where target is usually exons etc. */
+ /** The unique number of target bases in the experiment, where the target sequence is usually exons etc. */
public long TARGET_TERRITORY;
- /** Target terrirtoy / bait territory. 1 == perfectly efficient, 0.5 = half of baited bases are not target. */
+ /** The ratio of TARGET_TERRITORY/BAIT_TERRITORY. A value of 1 indicates a perfect design efficiency, while a valud of 0.5 indicates that half of bases within the bait region are not within the target region. */
public double BAIT_DESIGN_EFFICIENCY;
- /** The total number of reads in the SAM or BAM file examine. */
+ /** The total number of reads in the SAM or BAM file examined. */
public long TOTAL_READS;
- /** The number of reads that pass the vendor's filter. */
+ /** The total number of reads that pass the vendor's filter. */
public long PF_READS;
/** The number of PF reads that are not marked as duplicates. */
public long PF_UNIQUE_READS;
- /** PF reads / total reads. The percent of reads passing filter. */
+ /** The fraction of reads passing the vendor's filter, PF_READS/TOTAL_READS. */
public double PCT_PF_READS;
- /** PF Unique Reads / Total Reads. */
+ /** The fraction of PF_UNIQUE_READS from the TOTAL_READS, PF_UNIQUE_READS/TOTAL_READS. */
public double PCT_PF_UQ_READS;
- /** The number of PF unique reads that are aligned with mapping score > 0 to the reference genome. */
+ /** The number of PF_UNIQUE_READS that aligned to the reference genome with a mapping score > 0. */
public long PF_UQ_READS_ALIGNED;
- /** PF Reads Aligned / PF Reads. */
+ /** The fraction of PF_UQ_READS_ALIGNED from the total number of PF reads. */
public double PCT_PF_UQ_READS_ALIGNED;
- /** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */
+ /** The number of PF unique bases that are aligned to the reference genome with mapping scores > 0. */
public long PF_BASES_ALIGNED;
- /** The number of bases in the PF aligned reads that are mapped to a reference base. Accounts for clipping and gaps. */
+ /** The number of bases in the PF_UQ_READS_ALIGNED reads. Accounts for clipping and gaps. */
public long PF_UQ_BASES_ALIGNED;
- /** The number of PF aligned bases that mapped to a baited region of the genome. */
+ /** The number of PF_BASES_ALIGNED that are mapped to the baited regions of the genome. */
public long ON_BAIT_BASES;
- /** The number of PF aligned bases that mapped to within a fixed interval of a baited region, but not on a baited region. */
+ /** The number of PF_BASES_ALIGNED that are mapped to within a fixed interval containing a baited region, but not within the baited section per se. */
public long NEAR_BAIT_BASES;
- /** The number of PF aligned bases that mapped to neither on or near a bait. */
+ /** The number of PF_BASES_ALIGNED that are mapped away from any baited region. */
public long OFF_BAIT_BASES;
- /** The number of PF aligned bases that mapped to a targeted region of the genome. */
+ /** The number of PF_BASES_ALIGNED that are mapped to a targeted region of the genome. */
public long ON_TARGET_BASES;
- /** On+Near Bait Bases / PF Bases Aligned. */
+ /** The fraction of PF_BASES_ALIGNED located on or near a baited region (ON_BAIT_BASES + NEAR_BAIT_BASES)/PF_BASES_ALIGNED. */
public double PCT_SELECTED_BASES;
- /** The percentage of aligned PF bases that mapped neither on or near a bait. */
+ /** The fraction of PF_BASES_ALIGNED that are mapped away from any baited region, OFF_BAIT_BASES/PF_BASES_ALIGNED. */
public double PCT_OFF_BAIT;
- /** The percentage of on+near bait bases that are on as opposed to near. */
+ /** The fraction of bases on or near baits that are covered by baits, ON_BAIT_BASES/(ON_BAIT_BASES + NEAR_BAIT_BASES). */
public double ON_BAIT_VS_SELECTED;
/** The mean coverage of all baits in the experiment. */
public double MEAN_BAIT_COVERAGE;
- /** The mean coverage of targets. */
+ /** The mean coverage of a target region. */
public double MEAN_TARGET_COVERAGE;
- /** The median coverage of targets. */
+ /** The median coverage of a target region. */
public double MEDIAN_TARGET_COVERAGE;
/** The number of aligned, de-duped, on-bait bases out of the PF bases available. */
public double PCT_USABLE_BASES_ON_BAIT;
- /** The number of aligned, de-duped, on-target bases out of the PF bases available. */
+ /** The number of aligned, de-duped, on-target bases out of all of the PF bases available. */
public double PCT_USABLE_BASES_ON_TARGET;
/** The fold by which the baited region has been amplified above genomic background. */
@@ -148,27 +148,24 @@ public class HsMetrics extends MultilevelMetrics {
/** The fraction of aligned bases that were filtered out because they did not align over a target base. */
public double PCT_EXC_OFF_TARGET;
- /**
- * The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to
- * the mean coverage level in those targets.
- */
+ /** The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to the mean coverage level in those targets.*/
public double FOLD_80_BASE_PENALTY;
- /** The percentage of all target bases achieving 1X or greater coverage. */
+ /** The fraction of all target bases achieving 1X or greater coverage. */
public double PCT_TARGET_BASES_1X;
- /** The percentage of all target bases achieving 2X or greater coverage. */
+ /** The fraction of all target bases achieving 2X or greater coverage. */
public double PCT_TARGET_BASES_2X;
- /** The percentage of all target bases achieving 10X or greater coverage. */
+ /** The fraction of all target bases achieving 10X or greater coverage. */
public double PCT_TARGET_BASES_10X;
- /** The percentage of all target bases achieving 20X or greater coverage. */
+ /** The fraction of all target bases achieving 20X or greater coverage. */
public double PCT_TARGET_BASES_20X;
- /** The percentage of all target bases achieving 30X or greater coverage. */
+ /** The fraction of all target bases achieving 30X or greater coverage. */
public double PCT_TARGET_BASES_30X;
- /** The percentage of all target bases achieving 40X or greater coverage. */
+ /** The fraction of all target bases achieving 40X or greater coverage. */
public double PCT_TARGET_BASES_40X;
- /** The percentage of all target bases achieving 50X or greater coverage. */
+ /** The fraction of all target bases achieving 50X or greater coverage. */
public double PCT_TARGET_BASES_50X;
- /** The percentage of all target bases achieving 100X or greater coverage. */
+ /** The fraction of all target bases achieving 100X or greater coverage. */
public double PCT_TARGET_BASES_100X;
/** The estimated number of unique molecules in the selected part of the library. */
diff --git a/src/java/picard/analysis/directed/InsertSizeMetricsCollector.java b/src/main/java/picard/analysis/directed/InsertSizeMetricsCollector.java
similarity index 97%
rename from src/java/picard/analysis/directed/InsertSizeMetricsCollector.java
rename to src/main/java/picard/analysis/directed/InsertSizeMetricsCollector.java
index fcc7c0a..5a2d3bf 100644
--- a/src/java/picard/analysis/directed/InsertSizeMetricsCollector.java
+++ b/src/main/java/picard/analysis/directed/InsertSizeMetricsCollector.java
@@ -18,7 +18,7 @@ import java.util.Map;
import java.util.Set;
/**
- * Collects InserSizeMetrics on the specified accumulationLevels using
+ * Collects InsertSizeMetrics on the specified accumulationLevels using
*/
public class InsertSizeMetricsCollector extends MultiLevelCollector<InsertSizeMetrics, Integer, InsertSizeCollectorArgs> {
// When generating the Histogram, discard any data categories (out of FR, TANDEM, RF) that have fewer than this
@@ -26,7 +26,7 @@ public class InsertSizeMetricsCollector extends MultiLevelCollector<InsertSizeMe
private final double minimumPct;
// Generate mean, sd and plots by trimming the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION.
- // This is done because insert size data typically includes enough anomolous values from chimeras and other
+ // This is done because insert size data typically includes enough anomalous values from chimeras and other
// artifacts to make the mean and sd grossly misleading regarding the real distribution.
private final double deviations;
@@ -150,11 +150,11 @@ public class InsertSizeMetricsCollector extends MultiLevelCollector<InsertSizeMe
double high = median;
while (low >= histogram.getMin() || high <= histogram.getMax()) {
- final Histogram<Integer>.Bin lowBin = histogram.get((int) low);
+ final Histogram.Bin<Integer> lowBin = histogram.get((int) low);
if (lowBin != null) covered += lowBin.getValue();
if (low != high) {
- final Histogram<Integer>.Bin highBin = histogram.get((int) high);
+ final Histogram.Bin<Integer> highBin = histogram.get((int) high);
if (highBin != null) covered += highBin.getValue();
}
diff --git a/src/java/picard/analysis/directed/RnaSeqMetricsCollector.java b/src/main/java/picard/analysis/directed/RnaSeqMetricsCollector.java
similarity index 100%
rename from src/java/picard/analysis/directed/RnaSeqMetricsCollector.java
rename to src/main/java/picard/analysis/directed/RnaSeqMetricsCollector.java
diff --git a/src/java/picard/analysis/directed/TargetMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java
similarity index 92%
rename from src/java/picard/analysis/directed/TargetMetricsCollector.java
rename to src/main/java/picard/analysis/directed/TargetMetricsCollector.java
index e318784..e5d16ff 100644
--- a/src/java/picard/analysis/directed/TargetMetricsCollector.java
+++ b/src/main/java/picard/analysis/directed/TargetMetricsCollector.java
@@ -467,25 +467,19 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
final Collection<Interval> probes = probeDetector.getOverlaps(read);
// Calculate the values we need for HS_LIBRARY_SIZE
- if (!record.getSupplementaryAlignmentFlag()) {
- if (record.getReadPairedFlag() && record.getFirstOfPairFlag() && !record.getReadUnmappedFlag() && !record.getMateUnmappedFlag()) {
- if (!probes.isEmpty()) {
- ++this.metrics.PF_SELECTED_PAIRS;
- if (!record.getDuplicateReadFlag()) ++this.metrics.PF_SELECTED_UNIQUE_PAIRS;
- }
- }
+ if (!record.getSupplementaryAlignmentFlag() &&
+ record.getReadPairedFlag() &&
+ record.getFirstOfPairFlag() &&
+ !record.getReadUnmappedFlag() &&
+ !record.getMateUnmappedFlag() &&
+ !probes.isEmpty()) {
+ ++this.metrics.PF_SELECTED_PAIRS;
+ if (!record.getDuplicateReadFlag()) ++this.metrics.PF_SELECTED_UNIQUE_PAIRS;
}
- ///////////////////////////////////////////////////////////////////
- // Duplicate reads can be totally ignored beyond this point
- ///////////////////////////////////////////////////////////////////
- if (record.getDuplicateReadFlag()) {
- this.metrics.PCT_EXC_DUPE += basesAlignedInRecord;
- return;
- }
-
- // Compute the bait-related metrics *before* applying the overlap clipping and
- // the map-q threshold, since those would skew the assay-related metrics
+ // Compute the bait-related metrics *before* applying the duplicate read
+ // filtering, overlap clipping and the map-q threshold, since those would
+ // skew the assay-related metrics
{
final int mappedBases = basesAlignedInRecord;
int onBaitBases = 0;
@@ -509,6 +503,14 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
}
///////////////////////////////////////////////////////////////////
+ // Duplicate reads can be totally ignored beyond this point
+ ///////////////////////////////////////////////////////////////////
+ if (record.getDuplicateReadFlag()) {
+ this.metrics.PCT_EXC_DUPE += basesAlignedInRecord;
+ return;
+ }
+
+ ///////////////////////////////////////////////////////////////////
// And lastly, ignore reads falling below the mapq threshold
///////////////////////////////////////////////////////////////////
if (this.mapQFilter.filterOut(record)) return;
@@ -522,6 +524,7 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
} else rec = record;
// Find the target overlaps
+ final Set<Interval> coveredTargets = new HashSet<>();
for (final AlignmentBlock block : rec.getAlignmentBlocks()) {
final int length = block.getLength(), refStart = block.getReferenceStart(), readStart = block.getReadStart();
@@ -536,7 +539,6 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
boolean isOnTarget = false;
for (final Interval target : targets) {
if (refPos >= target.getStart() && refPos <= target.getEnd()) {
- isOnTarget = true;
++this.metrics.ON_TARGET_BASES;
if (mappedInPair) ++this.metrics.ON_TARGET_FROM_PAIR_BASES;
@@ -544,6 +546,11 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
final Coverage coverage = this.coverageByTarget.get(target);
coverage.addBase(targetOffset);
baseQHistogramArray[baseQualities[offset]]++;
+ if (!coveredTargets.contains(target)) {
+ coverage.incrementReadCount();
+ coveredTargets.add(target);
+ }
+ isOnTarget = true;
}
}
@@ -700,7 +707,7 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
try {
if (perTargetOutput != null) {
out = new PrintWriter(perTargetOutput);
- out.println("chrom\tstart\tend\tlength\tname\t%gc\tmean_coverage\tnormalized_coverage\tmin_normalized_coverage\tmax_normalized_coverage\tmin_coverage\tmax_coverage\tpct_0x");
+ out.println("chrom\tstart\tend\tlength\tname\t%gc\tmean_coverage\tnormalized_coverage\tmin_normalized_coverage\tmax_normalized_coverage\tmin_coverage\tmax_coverage\tpct_0x\tread_count");
}
else {
out = null;
@@ -750,7 +757,8 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
fmt.format(max / this.metrics.MEAN_TARGET_COVERAGE) + "\t" +
fmt.format(min) + "\t" +
fmt.format(max) + "\t" +
- fmt.format(targetBasesAt0x / interval.length())
+ fmt.format(targetBasesAt0x / interval.length()) + "\t" +
+ fmt.format(cov.readCount)
);
}
}
@@ -798,6 +806,7 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
public static class Coverage {
private final Interval interval;
private final int[] depths;
+ public long readCount = 0;
/** Constructs a new coverage object for the provided mapping with the desired padding either side. */
public Coverage(final Interval i, final int padding) {
@@ -807,13 +816,16 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
/** Adds a single point of depth at the desired offset into the coverage array. */
public void addBase(final int offset) {
- if (offset >= 0 && offset < this.depths.length) {
- if (this.depths[offset] < Integer.MAX_VALUE) {
- this.depths[offset] += 1;
- }
+ if (offset >= 0 && offset < this.depths.length && this.depths[offset] < Integer.MAX_VALUE) {
+ this.depths[offset] += 1;
}
}
+ /** Increments the # of reads mapping to this target. */
+ public void incrementReadCount() {
+ this.readCount++;
+ }
+
/** Returns true if any base in the range has coverage of > 0 */
public boolean hasCoverage() {
// NB: if this is expensive, we could easily pre-compute this as we go along in addBase
@@ -845,7 +857,7 @@ public abstract class TargetMetricsCollector<METRIC_TYPE extends MultilevelMetri
* how well those regions were targeted.
*/
class TargetMetrics extends MultilevelMetrics {
- /** The name of the PROBE_SET (BAIT SET, AMPLICON SET, ...) used in this metrics collection run */
+ /** The name of the PROBE_SET (BAIT_SET, AMPLICON_SET, ...) used in this metrics collection run */
public String PROBE_SET;
/** The number of unique bases covered by the intervals of all probes in the probe set */
@@ -860,34 +872,35 @@ class TargetMetrics extends MultilevelMetrics {
/** The total number of reads in the SAM or BAM file examined. */
public long TOTAL_READS;
- /** The number of reads that pass the vendor's filter. */
+ /** The number of passing filter reads (PF). */
public long PF_READS;
- /** The number of bases in the SAM or BAM file to be examined */
+ /** The number of bases in the PF_READS of a SAM or BAM file */
public long PF_BASES;
- /** The number of PF reads that are not marked as duplicates. */
+ /** The number of PF_READS that are not marked as duplicates. */
public long PF_UNIQUE_READS;
/** Tracks the number of read pairs that we see that are PF (used to calculate library size) */
public long PF_SELECTED_PAIRS;
- /** Tracks the number of unique PF reads pairs we see (used to calc library size) */
+ /** Tracks the number of unique PF_SELECTED_PAIRS we see (used to calc library size) */
public long PF_SELECTED_UNIQUE_PAIRS;
- /** The number of PF unique reads that are aligned with mapping score > 0 to the reference genome. */
+ /** The number of PF_UNIQUE_READS that are aligned with mapping score > 0 to the reference genome. */
public long PF_UQ_READS_ALIGNED;
- /** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */
+ /** The number of PF_BASES that are aligned with mapping score > 0 to the reference genome. */
public long PF_BASES_ALIGNED;
/** The number of PF unique bases that are aligned with mapping score > 0 to the reference genome. */
public long PF_UQ_BASES_ALIGNED;
- /** The number of PF aligned probed that mapped to a baited region of the genome. */
+ /** The number of PF aligned probed bases that mapped to a baited region of the genome. */
public long ON_PROBE_BASES;
- /** The number of PF aligned bases that mapped to within a fixed interval of a probed region, but not on a baited region. */
+ /** The number of PF aligned bases that mapped to within a fixed interval of a probed region, but not on a
+ * baited region. */
public long NEAR_PROBE_BASES;
/** The number of PF aligned bases that mapped to neither on or near a probe. */
@@ -901,28 +914,33 @@ class TargetMetrics extends MultilevelMetrics {
//metrics below here are derived after collection
- /** PF reads / total reads. The percent of reads passing filter. */
+ /** The fraction of reads passing filter, PF_READS/TOTAL_READS. */
public double PCT_PF_READS;
- /** PF Unique Reads / Total Reads. */
+ /** The fraction of unique reads passing filter, PF_UNIQUE_READS/TOTAL_READS. */
public double PCT_PF_UQ_READS;
- /** PF Reads Aligned / PF Reads. */
+ /** The fraction of unique reads passing filter that align to the reference,
+ * PF_UQ_READS_ALIGNED/PF_UNIQUE_READS. */
public double PCT_PF_UQ_READS_ALIGNED;
- /** On+Near Bait Bases / PF Bases Aligned. */
+ /** The fraction of bases that map on or near a probe (ON_PROBE_BASES + NEAR_PROBE_BASES)/(ON_PROBE_BASES +
+ * NEAR_PROBE_BASES + OFF_PROBE_BASES). */
public double PCT_SELECTED_BASES;
- /** The percentage of aligned PF bases that mapped neither on or near a probe. */
+ /** The fraction of aligned PF bases that mapped neither on or near a probe, OFF_PROBE_BASES/(ON_PROBE_BASES +
+ * NEAR_PROBE_BASES + OFF_PROBE_BASES). */
public double PCT_OFF_PROBE;
- /** The percentage of on+near probe bases that are on as opposed to near. */
+ /** The fraction of on+near probe bases that are on as opposed to near, ON_PROBE_BASES/(ON_PROBE_BASES +
+ * NEAR_PROBE_BASES). */
public double ON_PROBE_VS_SELECTED;
- /** The mean coverage of all probes in the experiment. */
+ /** The mean coverage of all probes in the experiment, ON_PROBE_BASES/PROBE_TERRITORY. */
public double MEAN_PROBE_COVERAGE;
- /** The fold by which the probed region has been amplified above genomic background. */
+ /** The fold by which the probed region has been amplified above genomic background,
+ * (ON_PROBE_BASES/(ON_PROBE_BASES + NEAR_PROBE_BASES + OFF_PROBE_BASES))/(PROBE_TERRITORY/GENOME_SIZE) */
public double FOLD_ENRICHMENT;
/** The mean coverage of targets. */
@@ -943,7 +961,8 @@ class TargetMetrics extends MultilevelMetrics {
/** The fraction of aligned bases that were filtered out because they were of low base quality. */
public double PCT_EXC_BASEQ;
- /** The fraction of aligned bases that were filtered out because they were the second observation from an insert with overlapping reads. */
+ /** The fraction of aligned bases that were filtered out because they were the second observation from
+ * an insert with overlapping reads. */
public double PCT_EXC_OVERLAP;
/** The fraction of aligned bases that were filtered out because they did not align over a target base. */
@@ -955,21 +974,21 @@ class TargetMetrics extends MultilevelMetrics {
*/
public double FOLD_80_BASE_PENALTY;
- /** The percentage of all target bases achieving 1X or greater coverage. */
+ /** The fraction of all target bases achieving 1X or greater coverage. */
public double PCT_TARGET_BASES_1X;
- /** The percentage of all target bases achieving 2X or greater coverage. */
+ /** The fraction of all target bases achieving 2X or greater coverage. */
public double PCT_TARGET_BASES_2X;
- /** The percentage of all target bases achieving 10X or greater coverage. */
+ /** The fraction of all target bases achieving 10X or greater coverage. */
public double PCT_TARGET_BASES_10X;
- /** The percentage of all target bases achieving 20X or greater coverage. */
+ /** The fraction of all target bases achieving 20X or greater coverage. */
public double PCT_TARGET_BASES_20X;
- /** The percentage of all target bases achieving 30X or greater coverage. */
+ /** The fraction of all target bases achieving 30X or greater coverage. */
public double PCT_TARGET_BASES_30X;
- /** The percentage of all target bases achieving 40X or greater coverage. */
+ /** The fraction of all target bases achieving 40X or greater coverage. */
public double PCT_TARGET_BASES_40X;
- /** The percentage of all target bases achieving 50X or greater coverage. */
+ /** The fraction of all target bases achieving 50X or greater coverage. */
public double PCT_TARGET_BASES_50X;
- /** The percentage of all target bases achieving 100X or greater coverage. */
+ /** The fraction of all target bases achieving 100X or greater coverage. */
public double PCT_TARGET_BASES_100X;
/**
@@ -993,4 +1012,4 @@ class TargetMetrics extends MultilevelMetrics {
/** The Phred Scaled Q Score of the theoretical HET SNP sensitivity. */
public double HET_SNP_Q;
-}
\ No newline at end of file
+}
diff --git a/src/main/java/picard/analysis/directed/TargetedPcrMetrics.java b/src/main/java/picard/analysis/directed/TargetedPcrMetrics.java
new file mode 100644
index 0000000..96b580a
--- /dev/null
+++ b/src/main/java/picard/analysis/directed/TargetedPcrMetrics.java
@@ -0,0 +1,156 @@
+package picard.analysis.directed;
+
+import picard.metrics.MultilevelMetrics;
+
+/** Metrics class for the analysis of reads obtained from targeted pcr experiments e.g. the TruSeq Custom Amplicon
+ * (TSCA) kit (Illumina). */
+public class TargetedPcrMetrics extends MultilevelMetrics {
+
+ /** The name of the amplicon set used in this metrics collection run */
+ public String CUSTOM_AMPLICON_SET;
+
+ /** The number of bases in the reference genome used for alignment */
+ public long GENOME_SIZE;
+
+ /** The number of unique bases covered by the intervals of all amplicons in the amplicon set */
+ public long AMPLICON_TERRITORY;
+
+ /** The number of unique bases covered by the intervals of all targets that should be covered */
+ public long TARGET_TERRITORY;
+
+ /** The total number of reads in the SAM or BAM file examined */
+ public long TOTAL_READS;
+
+ /** The total number of reads passing filter (PF), where the filter(s) can be platform/vendor quality controls*/
+ public long PF_READS;
+
+ /** The total number of bases within the PF_READS of the SAM or BAM file to be examined */
+ public long PF_BASES;
+
+ /** The number of PF_READS that were not marked as sample or optical duplicates. */
+ public long PF_UNIQUE_READS;
+
+ /** The fraction of reads passing filter, PF_READS/TOTAL_READS. */
+ public double PCT_PF_READS;
+
+ /** The fraction of TOTAL_READS that are unique, PF, and are not duplicates, PF_UNIQUE_READS/TOTAL_READS */
+ public double PCT_PF_UQ_READS;
+
+ /** The total number of PF_UNIQUE_READS that align to the reference genome with mapping scores > 0 */
+ public long PF_UQ_READS_ALIGNED;
+
+ /** Tracks the number of PF read pairs (used to calculate library size) */
+ public long PF_SELECTED_PAIRS;
+
+ /** Tracks the number of unique, PF, read pairs, observed (used to calculate library size) */
+ public long PF_SELECTED_UNIQUE_PAIRS;
+
+ /** Fraction of PF_READS that are unique and align to the reference genome, PF_UQ_READS_ALIGNED/PF_READS */
+ public double PCT_PF_UQ_READS_ALIGNED;
+
+ /** The number of bases from PF_READS that align to the reference genome with mapping score > 0 */
+ public long PF_BASES_ALIGNED;
+
+ /** The number of bases from PF_UNIQUE_READS that align to the reference genome and have a mapping score > 0 */
+ public long PF_UQ_BASES_ALIGNED;
+
+ /** The number of PF_BASES_ALIGNED that mapped to an amplified region of the genome. */
+ public long ON_AMPLICON_BASES;
+
+ /** The number of PF_BASES_ALIGNED that mapped to within a fixed interval of an amplified region, but not on a
+ * baited region. */
+ public long NEAR_AMPLICON_BASES;
+
+ /** The number of PF_BASES_ALIGNED that mapped neither on or near an amplicon. */
+ public long OFF_AMPLICON_BASES;
+
+ /** The number of PF_BASES_ALIGNED that mapped to a targeted region of the genome. */
+ public long ON_TARGET_BASES;
+
+ /** The number of bases from PF_SELECTED_UNIQUE_PAIRS that mapped to a targeted region of the genome. */
+ public long ON_TARGET_FROM_PAIR_BASES;
+
+ /** The fraction of PF_BASES_ALIGNED that mapped to or near an amplicon, (ON_AMPLICON_BASES +
+ * NEAR_AMPLICON_BASES)/PF_BASES_ALIGNED. */
+ public double PCT_AMPLIFIED_BASES;
+
+ /** The fraction of PF_BASES_ALIGNED that mapped neither onto or near an amplicon,
+ * OFF_AMPLICON_BASES/PF_BASES_ALIGNED */
+ public double PCT_OFF_AMPLICON;
+
+ /**
+ * The fraction of bases mapping to regions on or near amplicons, which mapped directly to but not near
+ * amplicons, ON_AMPLICON_BASES/(NEAR_AMPLICON_BASES + ON_AMPLICON_BASES)
+ * */
+ public double ON_AMPLICON_VS_SELECTED;
+
+ /** The mean read coverage of all amplicon regions in the experiment. */
+ public double MEAN_AMPLICON_COVERAGE;
+
+ /** The mean read coverage of all target regions in an experiment. */
+ public double MEAN_TARGET_COVERAGE;
+
+ /** The median coverage of reads that mapped to target regions of an experiment. */
+ public double MEDIAN_TARGET_COVERAGE;
+
+ /** The fold by which the amplicon region has been amplified above genomic background. */
+ public double FOLD_ENRICHMENT;
+
+ /** The fraction of targets that did not reach coverage=1 over any base. */
+ public double ZERO_CVG_TARGETS_PCT;
+
+ /** The fraction of aligned bases that were filtered out because they were in reads marked as duplicates. */
+ public double PCT_EXC_DUPE;
+
+ /** The fraction of aligned bases that were filtered out because they were in reads with low mapping quality. */
+ public double PCT_EXC_MAPQ;
+
+ /** The fraction of aligned bases that were filtered out because they were of low base quality. */
+ public double PCT_EXC_BASEQ;
+
+ /** The fraction of aligned bases that were filtered out because they were the second observation from an
+ * insert with overlapping reads. */
+ public double PCT_EXC_OVERLAP;
+
+ /** The fraction of bases that were filtered out because they did not map to a base within a target region. */
+ public double PCT_EXC_OFF_TARGET;
+ /**
+ * The fold over-coverage necessary to raise 80% of bases in "non-zero-cvg" targets to
+ * the mean coverage level in those targets.
+ */
+ public double FOLD_80_BASE_PENALTY;
+
+ /** The fraction of all target bases achieving 1X or greater coverage. */
+ public double PCT_TARGET_BASES_1X;
+ /** The fraction of all target bases achieving 2X or greater coverage depth. */
+ public double PCT_TARGET_BASES_2X;
+ /** The fraction of all target bases achieving 10X or greater coverage depth. */
+ public double PCT_TARGET_BASES_10X;
+ /** The fraction of all target bases achieving 20X or greater coverage depth. */
+ public double PCT_TARGET_BASES_20X;
+ /** The fraction of all target bases achieving 30X or greater coverage depth. */
+ public double PCT_TARGET_BASES_30X;
+
+ /**
+ * A measure of how regions with low GC content (<= 50%), are undercovered relative to mean coverage.
+ * After binning the GC content [0..50], we calculate a = fraction of target territory, and b = fraction of
+ * aligned reads aligned to these targets for each bin. AT DROPOUT is then abs(sum(a-b when a-b < 0)).
+ * For example, if the AT_DROPOUT value is 5% this implies that 5% of total reads that
+ * should have mapped to GC<=50% regions, mapped elsewhere.
+ * */
+ public double AT_DROPOUT;
+
+ /**
+ * A measure of how regions of high GC content (>= 50% GC) are undercovered relative to the mean coverage
+ * value. For each GC bin [50..100], we calculate a = % of target territory, and b = % of aligned reads aligned
+ * to these targets. GC DROPOUT is then abs(sum(a-b when a-b < 0)). For example, if the value is 5%, this
+ * implies that 5% of total reads that should have mapped to GC>=50% regions, mapped elsewhere.
+ * */
+ public double GC_DROPOUT;
+
+ /** The theoretical HET SNP sensitivity. */
+ public double HET_SNP_SENSITIVITY;
+
+ /** The Q Score of the theoretical HET SNP sensitivity. */
+ public double HET_SNP_Q;
+}
diff --git a/src/java/picard/analysis/directed/TargetedPcrMetricsCollector.java b/src/main/java/picard/analysis/directed/TargetedPcrMetricsCollector.java
similarity index 100%
rename from src/java/picard/analysis/directed/TargetedPcrMetricsCollector.java
rename to src/main/java/picard/analysis/directed/TargetedPcrMetricsCollector.java
diff --git a/src/main/java/picard/analysis/replicates/CollectIndependentReplicateMetrics.java b/src/main/java/picard/analysis/replicates/CollectIndependentReplicateMetrics.java
new file mode 100644
index 0000000..e48d7d2
--- /dev/null
+++ b/src/main/java/picard/analysis/replicates/CollectIndependentReplicateMetrics.java
@@ -0,0 +1,529 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis.replicates;
+
+
+import htsjdk.samtools.DuplicateSet;
+import htsjdk.samtools.DuplicateSetIterator;
+import htsjdk.samtools.QueryInterval;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SAMUtils;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.filter.AggregateFilter;
+import htsjdk.samtools.filter.AlignedFilter;
+import htsjdk.samtools.filter.FilteringSamIterator;
+import htsjdk.samtools.filter.MappingQualityFilter;
+import htsjdk.samtools.filter.SamRecordFilter;
+import htsjdk.samtools.filter.SecondaryOrSupplementaryFilter;
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.CollectionUtil;
+import htsjdk.samtools.util.ComparableTuple;
+import htsjdk.samtools.util.Histogram;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.ProgressLogger;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.filter.CompoundFilter;
+import htsjdk.variant.variantcontext.filter.FilteringVariantContextIterator;
+import htsjdk.variant.variantcontext.filter.GenotypeQualityFilter;
+import htsjdk.variant.variantcontext.filter.HeterozygosityFilter;
+import htsjdk.variant.variantcontext.filter.PassingVariantFilter;
+import htsjdk.variant.variantcontext.filter.SnpFilter;
+import htsjdk.variant.vcf.VCFContigHeaderLine;
+import htsjdk.variant.vcf.VCFFileReader;
+import htsjdk.variant.vcf.VCFHeader;
+import picard.cmdline.CommandLineProgram;
+import picard.cmdline.CommandLineProgramProperties;
+import picard.cmdline.Option;
+import picard.cmdline.StandardOptionDefinitions;
+import picard.cmdline.programgroups.Alpha;
+import picard.filter.CountingPairedFilter;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+/**
+ * A CLP that, given a BAM and a VCF with genotypes of the same sample, estimates the rate of independent replication of reads within the bam.
+ * That is, it estimates the fraction of the reads which look like duplicates (in the MarkDuplicates sense of the word) but are actually
+ * independent observations of the data. In the presence of Unique Molecular Identifiers (UMIs), various metrics are collected regarding the
+ * utility of the UMI's for the purpose of increasing coverage.
+ * <p>
+ * The estimation is based on duplicate-sets of size 2 and 3 and gives separate estimates from each. The assumption is that the duplication
+ * rate (biological or otherwise) is independent of the duplicate-set size. A significant difference between the two rates may be an indication that
+ * this assumption is incorrect.
+ * <p>
+ * The duplicate sets are found using the mate-cigar tag (MC) which is added by {@link picard.sam.MergeBamAlignment} , or {@link picard.sam.FixMateInformation}.
+ * This program will not work without the MC tag.
+ * <p>
+ * Explanation of the calculation behind the estimation can be found in the {@link IndependentReplicateMetric} class.
+ * <p>
+ * The calculation Assumes a diploid organism (more accurately, assumes that only two alleles can appear at a HET site and that
+ * these two alleles will appear at equal probabilities. It requires as input a VCF with genotypes for the sample in question.
+ *
+ * NOTE: This class is very much in alpha stage, and still under heavy development (feel free to join!)
+ *
+ *
+ * @author Yossi Farjoun
+ *
+ */
+
+ at CommandLineProgramProperties(
+ usage = "Estimates the rate of independent replication rate of reads within a bam. \n" +
+ "That is, it estimates the fraction of the reads which would be marked as duplicates but " +
+ "are actually biological replicates, independent observations of the data. ",
+ usageShort = "Estimates the rate of independent replication of reads within a bam.",
+ programGroup = Alpha.class
+)
+public class CollectIndependentReplicateMetrics extends CommandLineProgram {
+ private static final int DOUBLETON_SIZE = 2, TRIPLETON_SIZE = 3;
+
+ @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "Input (indexed) BAM file.")
+ public File INPUT;
+
+ @Option(shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "Write metrics to this file")
+ public File OUTPUT;
+
+ @Option(shortName = "MO", doc = "Write the confusion matrix (of UMIs) to this file", optional = true)
+ public File MATRIX_OUTPUT;
+
+ @Option(shortName = "V", doc = "Input VCF file")
+ public File VCF;
+
+ @Option(shortName = "GQ", doc = "minimal value for the GQ field in the VCF to use variant site.", optional = true)
+ public Integer MINIMUM_GQ = 90;
+
+ @Option(shortName = "MQ", doc = "minimal value for the mapping quality of the reads to be used in the estimation.", optional = true)
+ public Integer MINIMUM_MQ = 40;
+
+ @Option(shortName = "BQ", doc = "minimal value for the base quality of a base to be used in the estimation.", optional = true)
+ public Integer MINIMUM_BQ = 17;
+
+ @Option(shortName = StandardOptionDefinitions.SAMPLE_ALIAS_SHORT_NAME,
+ doc = "Name of sample to look at in VCF. Can be omitted if VCF contains only one sample.", optional = true)
+ public String SAMPLE = null;
+
+ @Option(doc = "Number of sets to examine before stopping.", optional = true)
+ public Integer STOP_AFTER = 0;
+
+ @Option(doc = "Barcode SAM tag.", optional = true)
+ public String BARCODE_TAG = "RX";
+
+ @Option(doc = "Barcode Quality SAM tag.", optional = true)
+ public String BARCODE_BQ = "QX";
+
+ @Option(shortName = "MBQ", doc = "minimal value for the base quality of all the bases in a molecular barcode, for it to be used.", optional = true)
+ public Integer MINIMUM_BARCODE_BQ = 30;
+
+ private static final Log log = Log.getInstance(CollectIndependentReplicateMetrics.class);
+
+ @Override
+ protected int doWork() {
+
+ IOUtil.assertFileIsReadable(VCF);
+ IOUtil.assertFileIsReadable(INPUT);
+
+ IOUtil.assertFileIsWritable(OUTPUT);
+ if (MATRIX_OUTPUT != null) IOUtil.assertFileIsWritable(MATRIX_OUTPUT);
+
+ final VCFFileReader vcf = new VCFFileReader(VCF, false);
+
+ final VCFHeader vcfFileHeader = vcf.getFileHeader();
+ final List<String> samples = vcfFileHeader.getSampleNamesInOrder();
+
+ if (SAMPLE == null) {
+ if (samples.size() != 1) {
+ throw new IllegalArgumentException("When sample is null, VCF must have exactly 1 sample. found " + samples.size());
+ } else {
+ SAMPLE = samples.get(0);
+ log.info("No SAMPLE given, using sample from VCF: ", SAMPLE);
+ }
+ } else if (!samples.contains(SAMPLE)) {
+ throw new IllegalArgumentException("When sample is not null, VCF must contain supplied sample. Cannot find sample " + SAMPLE + " in vcf.");
+ }
+
+ final Histogram<ComparableTuple<String, String>> umiConfusionMatrix = new Histogram<>("ConfusionUMI", "Count");
+ final Histogram<ComparableTuple<String, String>> umiConfusionMatrixEditDistance = new Histogram<>("ConfusionUMI", "EditDistance");
+
+ final IndependentReplicateMetric metric = new IndependentReplicateMetric();
+
+ final Histogram<Byte> umiEditDistanceInDiffBiDups = new Histogram<>("editDistance", "diffAllelesCount");
+ final Histogram<Byte> umiEditDistanceInSameBiDups = new Histogram<>("editDistance", "sameAllelesCount");
+ final Histogram<Byte> alleleBalanceCount = new Histogram<>("alleleBalance", "alleleBalanceCount");
+
+ // get the intervals that correspond to het sites in the VCF
+ final SortedMap<QueryInterval, List<Allele>> intervalAlleleMap = getQueryIntervalsMap(VCF);
+ final Iterator<QueryInterval> queryIntervalIterator = intervalAlleleMap.keySet().iterator();
+
+ log.info("Found " + intervalAlleleMap.size() + " heterozygous sites in VCF.");
+
+ // get an iterator to reads that overlap the heterozygous sites
+ final SamReader in = SamReaderFactory.makeDefault().open(INPUT);
+
+ log.info("Querying BAM for sites.");
+
+ final SAMRecordIterator samRecordIterator = in.query(intervalAlleleMap.keySet().toArray(new QueryInterval[intervalAlleleMap.size()]), false);
+ final List<SamRecordFilter> samFilters = CollectionUtil.makeList(
+ new AlignedFilter(true),
+ new CountingPairedFilter(),
+ new SecondaryOrSupplementaryFilter(),
+ new MappingQualityFilter(MINIMUM_MQ)
+ );
+
+ final FilteringSamIterator filteredSamRecordIterator = new FilteringSamIterator(samRecordIterator, new AggregateFilter(samFilters));
+ log.info("Queried BAM, getting duplicate sets.");
+
+ // get duplicate iterator from iterator above
+ final DuplicateSetIterator duplicateSets = new DuplicateSetIterator(filteredSamRecordIterator, in.getFileHeader());
+
+ QueryInterval queryInterval = null;
+
+ log.info("Starting iteration on reads");
+ final ProgressLogger progress = new ProgressLogger(log, 10000000, "examined", "duplicate sets");
+
+ IndependentReplicateMetric locusData = new IndependentReplicateMetric();
+ boolean useLocus = true;
+ boolean newLocus = false;
+ int thirdAlleleInfos = 0;
+ Allele badAllele = null;
+ String offendingReadName = null;
+
+ set:
+ while (duplicateSets.hasNext()) {
+
+ final DuplicateSet set = duplicateSets.next();
+ final SAMRecord setRep = set.getRepresentative();
+ final QueryInterval setRepsInterval = queryIntervalFromSamRecord(setRep);
+
+ progress.record(setRep);
+ // if the current duplicate set no longer overlaps the query interval then null it (and handle it below)
+ // also move to the next variant if the previous variant is bad.
+ if (!useLocus || queryInterval != null && isCleanlyBefore(queryInterval, setRepsInterval)) {
+ if (!useLocus) {
+ metric.nThreeAllelesSites++;
+ if (++thirdAlleleInfos < 100) {
+ log.debug("Skipping a locus due to third allele: " + badAllele + " but expected " +
+ intervalAlleleMap.get(queryInterval) + " queryInterval " + queryInterval +
+ " offending read name is : " + offendingReadName);
+ }
+ }
+ queryInterval = null;
+ }
+
+ // Iterate until we find the query interval that contains the current duplicate set.
+
+ // Simply polling for the "next" query will not do since the next one might not be covered by any reads, or it may have been
+ // covered by past reads (if close enough to previous query interval)
+ while (queryIntervalIterator.hasNext() &&
+ (queryInterval == null || isCleanlyBefore(queryInterval, setRepsInterval))) {
+ // if we haven't seen either the reference or the alternate in the locus (subject to our stringent filters) do not use locus.
+ if (locusData.nReferenceReads == 0 || locusData.nAlternateReads == 0) {
+ useLocus = false;
+ log.debug("will not use this locus due to lack of evidence of het site.");
+ }
+ // Query interval didn't get killed by 3rd alleles and so we combine the results with the tally
+ if (useLocus && newLocus) {
+ metric.merge(locusData);
+ log.debug("merging metric. total nSites so far: " + metric.nSites);
+ //calculate allele balance with faux counts
+ final byte alleleBalance = (byte) Math.round(100D * (locusData.nAlternateReads + 0.5) / (locusData.nAlternateReads + locusData.nReferenceReads + 1));
+ alleleBalanceCount.increment(alleleBalance);
+ // we have merged now, no need to merge the old locus data or update the nSites until out of this while.
+ newLocus = false;
+ }
+ queryInterval = queryIntervalIterator.next();
+ locusData = new IndependentReplicateMetric();
+ locusData.nSites = 1;
+ useLocus = true;
+ }
+ // we have a new locus, next time we should perhaps merge
+ newLocus = true;
+
+ // shouldn't happen, but being safe.
+ if (queryInterval == null) break;
+
+ final int setSize = set.size();
+
+ locusData.nTotalReads += setSize;
+
+ if (setSize > 1) locusData.nDuplicateSets++;
+ if (setSize == DOUBLETON_SIZE) {
+ locusData.nExactlyDouble++;
+ } else if (setSize == TRIPLETON_SIZE) {
+ locusData.nExactlyTriple++;
+ } else if (setSize > TRIPLETON_SIZE) { // singletons are only counted in nTotalReads
+ locusData.nReadsInBigSets += setSize;
+ }
+
+ log.debug("set size is: " + setSize);
+ final List<Allele> allelesInVc = intervalAlleleMap.get(queryInterval);
+
+ log.debug("alleles in VC: " + allelesInVc);
+
+ int nRef = 0, nAlt = 0, nOther = 0;
+ for (final SAMRecord read : set.getRecords()) {
+
+ // getReadPositionAtReferencePosition gives 1-based offset
+ final int offset = read.getReadPositionAtReferencePosition(queryInterval.start) - 1;
+
+ if (offset == -1) {
+ // a labeled continue watch-out!
+ // This could be a deletion OR a clipped end. Get a new set.
+ log.debug("got offset -1, getting new set");
+ continue set;
+ }
+ // a labeled continue watch-out!
+ // need to move to the next set since this set has a low quality base-quality.
+
+ if (read.getBaseQualities()[offset] <= MINIMUM_BQ) {
+ log.debug("got low read quality, getting new set");
+ continue set;
+ }
+
+ final Allele allele = Allele.create(read.getReadBases()[offset]);
+
+ if (allelesInVc.get(0).basesMatch(allele)) {
+ nRef++;
+ } else if (allelesInVc.get(1).basesMatch(allele)) {
+ nAlt++;
+ } else {
+ nOther++;
+ // if other alleles were found, toss out the whole locus! (but read the reads first)
+ useLocus = false;
+ badAllele = allele;
+ offendingReadName = read.getReadName();
+ }
+ }
+ locusData.nAlternateReads += nAlt;
+ locusData.nReferenceReads += nRef;
+
+ if ( setSize == 1 || setSize > TRIPLETON_SIZE) continue;
+ // From here on there should only be 2 or 3 reads in the set
+
+ final SetClassification classification = classifySet(nRef, nAlt, nOther);
+
+ log.debug("Classification of set is: " + classification);
+ if (setSize == DOUBLETON_SIZE) {
+
+ final boolean useBarcodes = !set.getRecords().stream()
+ .map(read -> read.getStringAttribute(BARCODE_BQ))
+ .map(string -> string == null ? "" : string).map(string ->
+ {
+ final byte[] bytes = SAMUtils.fastqToPhred(string);
+ return IntStream.range(0, bytes.length).map(i -> bytes[i]).anyMatch(q -> q < MINIMUM_BARCODE_BQ);
+ }).anyMatch(a -> a);
+
+ log.debug("using barcodes?" + useBarcodes);
+
+ if(useBarcodes) locusData.nGoodBarcodes++; else locusData.nBadBarcodes++;
+
+ final List<String> barcodes = set.getRecords().stream()
+ .map(read -> read.getStringAttribute(BARCODE_TAG))
+ .map(string -> string == null ? "" : string).collect(Collectors.toList());
+
+ log.debug("found UMIs:" + barcodes);
+ final boolean hasMultipleOrientations = set.getRecords().stream()
+ .map(SAMRecord::getFirstOfPairFlag) //must be paired, due to filter on sam Iterator
+ .distinct().count() != 1;
+ log.debug("reads have multiple orientation?" + hasMultipleOrientations);
+
+ final byte editDistance = calculateEditDistance(barcodes.get(0), barcodes.get(1));
+
+ log.debug("Edit distance between umi: " + editDistance);
+
+ if (useBarcodes && editDistance != 0) {
+ if (hasMultipleOrientations) locusData.nMismatchingUMIsInContraOrientedBiDups++;
+ else locusData.nMismatchingUMIsInCoOrientedBiDups++;
+ }
+
+ // sanity check the number of distinct tags
+ if (classification == SetClassification.DIFFERENT_ALLELES) {
+ locusData.nDifferentAllelesBiDups++;
+ if (useBarcodes) {
+ umiEditDistanceInDiffBiDups.increment(editDistance);
+
+ if(editDistance == 0) locusData.nMatchingUMIsInDiffBiDups++; else locusData.nMismatchingUMIsInDiffBiDups++;
+ }
+
+ // we're going to toss out this locus.
+ } else if (classification == SetClassification.MISMATCHING_ALLELE) {
+ locusData.nMismatchingAllelesBiDups++;
+ } else { // the classification is either ALTERNATE_ALLELE or REFERENCE_ALLELE if we've reached here
+ if (classification == SetClassification.ALTERNATE_ALLELE) locusData.nAlternateAllelesBiDups++;
+ else locusData.nReferenceAllelesBiDups++;
+
+ if (useBarcodes) {
+
+ umiEditDistanceInSameBiDups.increment(editDistance);
+ final ComparableTuple<String, String> key = new ComparableTuple<>(barcodes.get(0), barcodes.get(1));
+ umiConfusionMatrix.increment(key);
+ if (!umiConfusionMatrixEditDistance.containsKey(key)) umiConfusionMatrixEditDistance.increment(key, editDistance);
+
+ if (editDistance == 0) locusData.nMatchingUMIsInSameBiDups++; else locusData.nMismatchingUMIsInSameBiDups++;
+ }
+ }
+ }
+ if (setSize == TRIPLETON_SIZE) {
+ switch (classification) {
+ case MISMATCHING_ALLELE:
+ locusData.nMismatchingAllelesTriDups++;
+ break;
+ case DIFFERENT_ALLELES:
+ locusData.nDifferentAllelesTriDups++;
+ break;
+ case ALTERNATE_ALLELE:
+ locusData.nAlternateAllelesTriDups++;
+ break;
+ case REFERENCE_ALLELE:
+ locusData.nReferenceAllelesTriDups++;
+ break;
+ default:
+ throw new IllegalStateException("Un possible!");
+ }
+ }
+ if (STOP_AFTER > 0 && progress.getCount() > STOP_AFTER) break;
+ }
+ if (useLocus && newLocus) {
+ metric.merge(locusData);
+ log.debug("Merged final metric. nSites:" + metric.nSites);
+ } else {
+ metric.nThreeAllelesSites++;
+ log.debug("didn't merge last metric, due to 3rd allele: nThreeAllelesSites =" + metric.nThreeAllelesSites);
+ }
+
+ log.info("Iteration done. Emitting metrics.");
+
+ // Emit metrics
+ final MetricsFile<IndependentReplicateMetric, Byte> metricsFile = getMetricsFile();
+
+ metric.calculateDerivedFields();
+ metricsFile.addMetric(metric);
+ metricsFile.addHistogram(alleleBalanceCount);
+ metricsFile.addHistogram(umiEditDistanceInDiffBiDups);
+ metricsFile.addHistogram(umiEditDistanceInSameBiDups);
+
+ metricsFile.write(OUTPUT);
+
+ final MetricsFile<?, ComparableTuple<String, String>> confusionMetrics = getMetricsFile();
+
+ if (MATRIX_OUTPUT != null) {
+ confusionMetrics.addHistogram(umiConfusionMatrix);
+ confusionMetrics.addHistogram(umiConfusionMatrixEditDistance);
+ confusionMetrics.write(MATRIX_OUTPUT);
+ }
+
+ return 0;
+ }
+
+ private enum SetClassification {
+ MISMATCHING_ALLELE,
+ DIFFERENT_ALLELES,
+ REFERENCE_ALLELE,
+ ALTERNATE_ALLELE
+ }
+
+ /**
+ * a small utility to inform if one interval is cleanly before another, meaning that they do not overlap and
+ * the first is prior (in genomic order) to the second
+ *
+ * @param lhs the "first" {@link QueryInterval}
+ * @param rhs the "second" {@link QueryInterval}
+ * @return true if the to intervals do not intersect _and_ the first is prior to the second in genomic order
+ */
+ private static boolean isCleanlyBefore(final QueryInterval lhs, final QueryInterval rhs) {
+ return !lhs.overlaps(rhs) && lhs.compareTo(rhs) < 0;
+ }
+
+ private static SetClassification classifySet(final int nRef, final int nAlt, final int nOther) {
+ // if we found any "other" alleles, this is a mismatching set
+ if (nOther != 0) return SetClassification.MISMATCHING_ALLELE;
+
+ // if we found both ref and alt alleles, this is a heterogeneous set
+ if (nAlt > 0 && nRef > 0) return SetClassification.DIFFERENT_ALLELES;
+
+ // if we found no reference alleles, this is an "alternate" set
+ if (nRef == 0) return SetClassification.ALTERNATE_ALLELE;
+
+ // if we found no alternate alleles, this is a "reference" set.
+ if (nAlt == 0) return SetClassification.REFERENCE_ALLELE;
+
+ throw new IllegalAccessError("shouldn't be here!");
+ }
+
+ private static QueryInterval queryIntervalFromSamRecord(final SAMRecord samRecord) {
+ return new QueryInterval(samRecord.getReferenceIndex(), samRecord.getStart(), samRecord.getEnd());
+ }
+
+ /** Gives the edit distance between this barcode and another of the same length. */
+ private static byte calculateEditDistance(final String lhs, final String rhs) {
+ assert(lhs.length()==rhs.length());
+ byte tmp = 0;
+ for (int i = 0; i < rhs.length(); ++i) {
+ if (rhs.charAt(i) != lhs.charAt(i)) ++tmp;
+ }
+ return tmp;
+ }
+
+ private SortedMap<QueryInterval, List<Allele>> getQueryIntervalsMap(final File vcf) {
+
+ final Map<String, Integer> contigIndexMap = new HashMap<>();
+ final VCFFileReader vcfReader = new VCFFileReader(vcf, false);
+
+ // We want to look at unfiltered SNP sites for which the sample is genotyped as a het
+ // with high quality.
+ final CompoundFilter compoundFilter = new CompoundFilter(true);
+ compoundFilter.add(new SnpFilter());
+ compoundFilter.add(new PassingVariantFilter());
+ compoundFilter.add(new GenotypeQualityFilter(MINIMUM_GQ, SAMPLE));
+ compoundFilter.add(new HeterozygosityFilter(true, SAMPLE));
+
+ final Iterator<VariantContext> hetIterator = new FilteringVariantContextIterator(vcfReader.iterator(), compoundFilter);
+
+ for (final VCFContigHeaderLine vcfContig : vcfReader.getFileHeader().getContigLines()) {
+ contigIndexMap.put(vcfContig.getID(), vcfContig.getContigIndex());
+ }
+
+ // return a TreeMap since the keys are comparable, and this will use their order in the iteration
+ final SortedMap<QueryInterval, List<Allele>> map = new TreeMap<>();
+
+ while (hetIterator.hasNext()) {
+ final VariantContext vc = hetIterator.next();
+ map.put(new QueryInterval(contigIndexMap.get(vc.getContig()), vc.getStart(), vc.getEnd()), vc.getGenotype(SAMPLE).getAlleles());
+ }
+
+ vcfReader.close();
+
+ return map;
+ }
+}
diff --git a/src/main/java/picard/analysis/replicates/IndependentReplicateMetric.java b/src/main/java/picard/analysis/replicates/IndependentReplicateMetric.java
new file mode 100644
index 0000000..06db4cc
--- /dev/null
+++ b/src/main/java/picard/analysis/replicates/IndependentReplicateMetric.java
@@ -0,0 +1,222 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.analysis.replicates;
+
+/**
+ * A class to store information relevant for biological rate estimation
+ *
+ * @author Yossi Farjoun
+ */
+public class IndependentReplicateMetric extends MergeableMetricBase {
+
+ // the count of sites used
+ @MergeByAdding
+ public Integer nSites = 0;
+ // the count of sites in which a third allele was found
+ @MergeByAdding
+ public Integer nThreeAllelesSites = 0;
+ // the total number of reads over the het sites
+ @MergeByAdding
+ public Integer nTotalReads = 0;
+ // the number of duplicate sets examined
+ @MergeByAdding
+ public Integer nDuplicateSets = 0;
+ // the number of sets of size exactly 3 found
+ @MergeByAdding
+ public Integer nExactlyTriple = 0;
+ // the number of sets of size exactly 2 found
+ @MergeByAdding
+ public Integer nExactlyDouble = 0;
+ // the number of reads in duplicate of sizes greater than 3
+ @MergeByAdding
+ public Integer nReadsInBigSets = 0;
+ // the number of doubletons where the two reads had different bases in the locus
+ @MergeByAdding
+ public Integer nDifferentAllelesBiDups = 0;
+ // the number of doubletons where the two reads matched the reference
+ @MergeByAdding
+ public Integer nReferenceAllelesBiDups = 0;
+ // the number of doubletons where the two reads matched the alternate
+ @MergeByAdding
+ public Integer nAlternateAllelesBiDups = 0;
+ // the number of tripletons where at least one of the reads didn't match either allele of the het site
+ @MergeByAdding
+ public Integer nDifferentAllelesTriDups = 0;
+ // the number of tripletons where the two reads had different bases in the locus
+ @MergeByAdding
+ public Integer nMismatchingAllelesBiDups = 0;
+ // the number of tripletons where the two reads matched the reference
+ @MergeByAdding
+ public Integer nReferenceAllelesTriDups = 0;
+ // the number of tripletons where the two reads matched the alternate
+ @MergeByAdding
+ public Integer nAlternateAllelesTriDups = 0;
+ // the number of tripletons where at least one of the reads didn't match either allele of the het site
+ @MergeByAdding
+ public Integer nMismatchingAllelesTriDups = 0;
+ // the number of reference alleles in the reads;
+ @MergeByAdding
+ public Integer nReferenceReads = 0;
+ // the number of alternate alleles in the reads;
+ @MergeByAdding
+ public Integer nAlternateReads = 0;
+ // the number of UMIs that are different within Bi-sets that come from different Alleles
+ @MergeByAdding
+ public Integer nMismatchingUMIsInDiffBiDups = 0;
+ // the number of UMIs that are match within Bi-sets that come from different Alleles
+ @MergeByAdding
+ public Integer nMatchingUMIsInDiffBiDups = 0;
+ // the number of UMIs that are different within Bi-sets that come from the same Alleles
+ @MergeByAdding
+ public Integer nMismatchingUMIsInSameBiDups = 0;
+ // the number of UMIs that are match within Bi-sets that come from the same Alleles
+ @MergeByAdding
+ public Integer nMatchingUMIsInSameBiDups = 0;
+ // the number of bi-sets with mismatching UMIs and same orientation
+ @MergeByAdding
+ public Integer nMismatchingUMIsInCoOrientedBiDups = 0;
+ // the number of bi-sets with mismatching UMIs and opposite orientation
+ @MergeByAdding
+ public Integer nMismatchingUMIsInContraOrientedBiDups = 0;
+ // the number of sets where the UMIs had poor quality bases and were not used for any comparisons.
+ @MergeByAdding
+ public Integer nBadBarcodes = 0;
+ // the number of sets where the UMIs had good quality bases and were used for any comparisons.
+ @MergeByAdding
+ public Integer nGoodBarcodes = 0;
+ // the rate of heterogeneity within doubleton sets
+ @NoMergingIsDerived
+ public Double biSiteHeterogeneityRate = 0.0;
+ // the rate of heterogeneity within tripleton sets
+ @NoMergingIsDerived
+ public Double triSiteHeterogeneityRate = 0.0;
+ // the rate of homogeneity within doubleton sets
+ @NoMergingIsDerived
+ public Double biSiteHomogeneityRate = 0.0;
+ // the rate of homogeneity within tripleton sets
+ @NoMergingIsDerived
+ public Double triSiteHomogeneityRate = 0.0;
+ //The biological duplication rate calculated from doublton sets
+ @NoMergingIsDerived
+ public Double independentReplicationRateFromBiDups = 0.0;
+ //The biological duplication rate calculated from tripleton sets
+ @NoMergingIsDerived
+ public Double independentReplicationRateFromTriDups = 0.0;
+ // when the alleles are different, we know that this is a biological duplication, thus we expect nearly all
+ // the UMIs to be different (allowing for equality due to chance). So we expect this to be near 1.
+ @NoMergingIsDerived
+ public Double pSameUmiInIndependentBiDup = 0.0;
+ //when the UMIs mismatch, we expect about the same number of different alleles as the same (assuming
+ //that different UMI implied biological duplicate. thus, this value should be near 0.5
+ @NoMergingIsDerived
+ public Double pSameAlleleWhenMismatchingUmi = 0.0;
+ // given the UMIs one can estimate the rate of biological duplication directly, as this would be the
+ // rate of having different UMIs in all duplicate sets. This is only a good estimate if the assumptions hold, for example if pSameUmiInIndependentBiDup is near 1.
+ @NoMergingIsDerived
+ public Double independentReplicationRateFromUmi = 0.0;
+ //an estimate of the duplication rate that is based on the duplicate sets we observed
+ @NoMergingIsDerived
+ public Double replicationRateFromReplicateSets = 0.0;
+
+ @Override
+ public void calculateDerivedFields() {
+ // In doubleton sets, the rate of different alleles over het sites is half the replication rate,
+ biSiteHeterogeneityRate = nDifferentAllelesBiDups / (double) (nDifferentAllelesBiDups + nAlternateAllelesBiDups + nReferenceAllelesBiDups);
+ biSiteHomogeneityRate = 1 - biSiteHeterogeneityRate;
+
+ this.independentReplicationRateFromBiDups = 2 * biSiteHeterogeneityRate;
+
+ // in tripleton sets, the calculation is a little more complicated....see below
+ triSiteHeterogeneityRate = nDifferentAllelesTriDups / (double) (nDifferentAllelesTriDups + nAlternateAllelesTriDups + nReferenceAllelesTriDups);
+ triSiteHomogeneityRate = 1 - triSiteHeterogeneityRate;
+ independentReplicationRateFromTriDups = 2 * (1 - Math.sqrt(triSiteHomogeneityRate));
+
+ // Some more metric collection here:
+ pSameUmiInIndependentBiDup = nMatchingUMIsInDiffBiDups / (double) (nMismatchingUMIsInDiffBiDups + nMatchingUMIsInDiffBiDups);
+ pSameAlleleWhenMismatchingUmi = nMismatchingUMIsInSameBiDups / (double) (nMismatchingUMIsInSameBiDups + nMismatchingUMIsInDiffBiDups);
+ independentReplicationRateFromUmi = (nMismatchingUMIsInDiffBiDups + nMismatchingUMIsInSameBiDups) / (double) nExactlyDouble;
+
+ final int numberOfBigSets = nDuplicateSets - nExactlyDouble - nExactlyDouble;
+ replicationRateFromReplicateSets = (nExactlyDouble + nExactlyTriple * 2 + nReadsInBigSets - numberOfBigSets) / (double) nTotalReads;
+ }
+}
+
+/*
+Explanation of calculation of independent replication rate from heterozygosity rate (within triplicate sets):
+
+We assume the there are two types of replication events:
+
+- those that are "independent", such that we just happen to get 2 fragments from the exact same region
+(These get a random allele so effectively change the allele with probability 0.5), and
+- those that are "artifactual" (do not change the allele)
+
+We skip sets that have unexpected alleles as they do not fit our model. In the following we use the term "duplicates" to indicate that
+the read-pairs would be marked as duplicates, not that they actually are technical duplicates.
+
+To reach a triplicate set, 2 replication events are required so there are the following options, assuming
+that the independent replication rate is x (thus the artifactual is 1-x). We assume a diploid organism with no bias towards either allele
+in heterozygous sites, so an idependent replication will result in the other alleles in half the cases:
+
+0 -> 0, 1 happens with probability x/2, therefore
+0 -> 0, 0 happens with probability 1-x/2
+
+(This is the explanation that is required for calculating the independent replication rate from doubleton sets...quite simpler)
+
+Without loss of generality we assume that we "start" with allele 0 and that 1 is the other allele.
+
+Each of the resulting alleles ("First" or "second") can replicate (each with probability 0.5) so we get:
+
+from first row:
+0=>1 1 (0 replicate to a 1) with probability x/2 * 0.5 * x/2
+0=>0 1 (0 replicate to a 0) with probability x/2 * 0.5 * (1-x/2)
+=====subtotal = x/4
+
+0 1=>0 (1 replicate to a 0) with probability x/2 * 0.5 * x/2
+0 1=>1 (1 replicate to a 1) with probability x/2 * 0.5 * (1-x/2)
+=====subtotal = x/4
+
+from second row:
+0=>1 0 (first 0 replicate to a 1) with probability (1-x/2) * 0.5 * x/2
+0=>0 0 (first 0 replicate to a 0) with probability (1-x/2) * 0.5 * (1-x/2) <======= Homogeneous set
+=====subtotal = (1-x/2)/2
+
+0 0=>1 (second 0 replicate to a 1) with probability (1-x/2) * 0.5 * x/2
+0 0=>0 (second 0 replicate to a 0) with probability (1-x/2) * 0.5 * (1-x/2) <======= Homogeneous set
+=====subtotal = (1-x/2)/2
+
+
+total is x/2 (from first two sub-totals) + (1-x/2) (from last two sub-totals) = 1
+
+We differentiate between a heterogeneous result (with 0's and 1's in the set) and homogeneous results
+(all zeros, since we assumed WLOG that we start with 0)
+The probability of a homogeneous set is therefore the sum of the two only homogeneous results
+
+P(hom) = (1-x/2) * 0.5 * (1-x/2) + (1-x/2) * 0.5 * (1-x/2) = (1-x/2)^2 = y
+
+where y = P(hom) is the rate of homogeneity within triplicate sets.
+
+Solving for x we find that 2 * ( 1 - sqrt(y) ) = x.
+
+*/
diff --git a/src/main/java/picard/analysis/replicates/MergeableMetricBase.java b/src/main/java/picard/analysis/replicates/MergeableMetricBase.java
new file mode 100644
index 0000000..0f94d24
--- /dev/null
+++ b/src/main/java/picard/analysis/replicates/MergeableMetricBase.java
@@ -0,0 +1,189 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package picard.analysis.replicates;
+
+import htsjdk.samtools.metrics.MetricBase;
+
+import java.lang.annotation.Annotation;
+import java.lang.annotation.ElementType;
+import java.lang.annotation.Retention;
+import java.lang.annotation.RetentionPolicy;
+import java.lang.annotation.Target;
+import java.lang.reflect.Field;
+
+/**
+ * An extension of MetricBase that knows how to merge-by-adding fields that are appropriately annotated. It also provides an interface
+ * for calculating derived fields (and an annotation that informs that said fields are derived). Finally, it also allows for an annotation
+ * that suggests that a field will be used as an ID and thus merging will simply require that these fields are equal.
+ *
+ * merge-by-adding is only enabled for the following types: int, Integer, float, Float, double, Double, short, Short, long, Long, byte, Byte.
+ * Overflow will be detected (for the short, and byte types) and an exception thrown.
+ *
+ * @author Yossi Farjoun
+ */
+public class MergeableMetricBase extends MetricBase {
+
+ @Retention(RetentionPolicy.RUNTIME)
+ @Target(ElementType.FIELD)
+ protected @interface MergeByAdding {}
+
+ @Retention(RetentionPolicy.RUNTIME)
+ @Target(ElementType.FIELD)
+ protected @interface MergeByAssertEquals {}
+
+ @Retention(RetentionPolicy.RUNTIME)
+ @Target(ElementType.FIELD)
+ protected @interface NoMergingIsDerived {}
+
+ /** checks if this instance can be merged with another
+ *
+ * Other must have all the fields that this instance has, and
+ * the fields that are annotated as MergeByAssertEquals must contain the same value
+ *
+ * @param other metric that will be merged into this one.
+ * @return true if the other metric can be merged into this one.
+ */
+ public boolean canMerge(final MergeableMetricBase other) {
+
+ try {
+ for (final Field field : this.getClass().getDeclaredFields()) {
+ if (field.isSynthetic()) continue;
+
+ //try to get field from other, will throw exception if other instance doesn't have the
+ field.get(other);
+
+ final Annotation[] equalAnnotations = field.getAnnotationsByType(MergeByAssertEquals.class);
+ if (equalAnnotations.length != 0) {
+ if (!field.get(this).equals(field.get(other))) {
+ return false;
+ }
+ }
+ }
+ } catch (final Exception e) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Merges another MergableMetricBase if possible
+ *
+ * @param other another MergableMetricBase instance to merge, must of the same class as this.
+ * @return true if the other metric can be merged into this one.
+ */
+ public boolean mergeIfCan(final MergeableMetricBase other) {
+
+ if(canMerge(other)) {
+ merge(other);
+ return true;
+ }
+ else {
+ return false;
+ }
+ }
+
+ /**
+ * Merge another metric into this one
+ *
+ * @param other metric to merge into this one.
+ */
+ public void merge(final MergeableMetricBase other) {
+
+ for (final Field field : this.getClass().getDeclaredFields()) {
+ if(field.isSynthetic()) continue;
+
+ if (field.getAnnotationsByType(MergeByAdding.class).length +
+ field.getAnnotationsByType(MergeByAssertEquals.class).length +
+ field.getAnnotationsByType(NoMergingIsDerived.class).length == 0) {
+ throw new IllegalStateException("All fields of this class must be annotated with @MergeByAdding, @NoMergingIsDerived, or @MergeByAssertEquals. " +
+ "Field " + field.getName() + " isn't annotated.");
+ }
+
+ final Annotation[] summableAnnotations = field.getAnnotationsByType(MergeByAdding.class);
+ if (summableAnnotations.length != 0) {
+ try {
+ if (field.getType() == Integer.class) {
+ field.set(this, (Integer) field.get(this) + (Integer) field.get(other));
+ } else if (field.getType() == int.class) {
+ field.set(this, (int) field.get(this) + (int) field.get(other));
+ } else if (field.getType() == Float.class) {
+ field.set(this, (Float) field.get(this) + (Float) field.get(other));
+ } else if (field.getType() == float.class) {
+ field.set(this, (float) field.get(this) + (float) field.get(other));
+ } else if (field.getType() == Double.class) {
+ field.set(this, (Double) field.get(this) + (Double) field.get(other));
+ } else if (field.getType() == double.class) {
+ field.set(this, (double) field.get(this) + (double) field.get(other));
+ } else if (field.getType() == Long.class) {
+ field.set(this, (Long) field.get(this) + (Long) field.get(other));
+ } else if (field.getType() == long.class) {
+ field.set(this, (long) field.get(this) + (long) field.get(other));
+ } else if (field.getType() == Byte.class) {
+ final Integer result = (Byte) field.get(this) + (Byte) field.get(other);
+ if (result > Byte.MAX_VALUE)
+ throw new IllegalArgumentException("Overflow detected in adding " + field.get(this) + " to " + field.get(other));
+ field.set(this, (byte) (int) result);
+ } else if (field.getType() == byte.class) {
+ final int result = (byte) field.get(this) + (byte) field.get(other);
+ if (result > Byte.MAX_VALUE)
+ throw new IllegalArgumentException("Overflow detected in adding " + field.get(this) + " to " + field.get(other));
+ field.set(this, (byte) result);
+ } else if (field.getType() == Short.class) {
+ final Integer result = (Short) field.get(this) + (Short) field.get(other);
+ if (result > Short.MAX_VALUE)
+ throw new IllegalArgumentException("Overflow detected in adding " + field.get(this) + " to " + field.get(other));
+ field.set(this, (Short) (short) (int) result);
+ } else if (field.getType() == short.class) {
+ final Integer result = (short) field.get(this) + (short) field.get(other);
+ if (result > Short.MAX_VALUE)
+ throw new IllegalArgumentException("Overflow detected in adding " + field.get(this) + " to " + field.get(other));
+ field.set(this, (short) (int) result);
+ } else
+ throw new IllegalArgumentException("I don't know how to MergeByAdding type " + field.getDeclaringClass().getCanonicalName() +
+ " of field " + field.getName() + "please teach me!");
+ } catch (IllegalAccessException e) {
+ e.printStackTrace();
+ }
+ }
+
+ final Annotation[] equalAnnotations = field.getAnnotationsByType(MergeByAssertEquals.class);
+ if (equalAnnotations.length != 0) {
+ try {
+ if (!field.get(this).equals(field.get(other))) {
+ throw new IllegalStateException("Field " + field.getName() +
+ " is annotated as @MergeByAssertEquals, but found two different values: " + field.get(this) + " & " + field.get(other));
+ }
+ } catch (IllegalAccessException e) {
+ e.printStackTrace();
+ }
+ }
+ }
+ }
+
+ /**
+ * placeholder method that will calculate the derived fields from the other ones. classes that are derived from non-trivial base classes
+ * should consider calling super.calculateDerivedFields() as well.
+ */
+ public void calculateDerivedFields(){}
+}
diff --git a/src/java/picard/annotation/AnnotationException.java b/src/main/java/picard/annotation/AnnotationException.java
similarity index 100%
rename from src/java/picard/annotation/AnnotationException.java
rename to src/main/java/picard/annotation/AnnotationException.java
diff --git a/src/java/picard/annotation/Gene.java b/src/main/java/picard/annotation/Gene.java
similarity index 100%
rename from src/java/picard/annotation/Gene.java
rename to src/main/java/picard/annotation/Gene.java
diff --git a/src/java/picard/annotation/GeneAnnotationReader.java b/src/main/java/picard/annotation/GeneAnnotationReader.java
similarity index 100%
rename from src/java/picard/annotation/GeneAnnotationReader.java
rename to src/main/java/picard/annotation/GeneAnnotationReader.java
diff --git a/src/java/picard/annotation/LocusFunction.java b/src/main/java/picard/annotation/LocusFunction.java
similarity index 100%
rename from src/java/picard/annotation/LocusFunction.java
rename to src/main/java/picard/annotation/LocusFunction.java
diff --git a/src/java/picard/annotation/RefFlatReader.java b/src/main/java/picard/annotation/RefFlatReader.java
similarity index 100%
rename from src/java/picard/annotation/RefFlatReader.java
rename to src/main/java/picard/annotation/RefFlatReader.java
diff --git a/src/java/picard/cmdline/ClassFinder.java b/src/main/java/picard/cmdline/ClassFinder.java
similarity index 100%
rename from src/java/picard/cmdline/ClassFinder.java
rename to src/main/java/picard/cmdline/ClassFinder.java
diff --git a/src/java/picard/cmdline/CommandLineDefaults.java b/src/main/java/picard/cmdline/CommandLineDefaults.java
similarity index 91%
rename from src/java/picard/cmdline/CommandLineDefaults.java
rename to src/main/java/picard/cmdline/CommandLineDefaults.java
index f9ebe79..1e9e54a 100644
--- a/src/java/picard/cmdline/CommandLineDefaults.java
+++ b/src/main/java/picard/cmdline/CommandLineDefaults.java
@@ -28,13 +28,13 @@ public class CommandLineDefaults {
/** Gets a boolean system property, prefixed with "picard.cmdline." using the default if the property does not exist. */
private static boolean getBooleanProperty(final String name, final boolean def) {
- final String value = getStringProperty(name, new Boolean(def).toString());
+ final String value = getStringProperty(name, String.valueOf(def));
return Boolean.parseBoolean(value);
}
/** Gets an int system property, prefixed with "picard.cmdline." using the default if the property does not exist. */
private static int getIntProperty(final String name, final int def) {
- final String value = getStringProperty(name, new Integer(def).toString());
+ final String value = getStringProperty(name, String.valueOf(def));
return Integer.parseInt(value);
}
diff --git a/src/java/picard/cmdline/CommandLineParseException.java b/src/main/java/picard/cmdline/CommandLineParseException.java
similarity index 100%
rename from src/java/picard/cmdline/CommandLineParseException.java
rename to src/main/java/picard/cmdline/CommandLineParseException.java
diff --git a/src/java/picard/cmdline/CommandLineParser.java b/src/main/java/picard/cmdline/CommandLineParser.java
similarity index 99%
rename from src/java/picard/cmdline/CommandLineParser.java
rename to src/main/java/picard/cmdline/CommandLineParser.java
index ee90b44..eb2732f 100644
--- a/src/java/picard/cmdline/CommandLineParser.java
+++ b/src/main/java/picard/cmdline/CommandLineParser.java
@@ -486,13 +486,10 @@ public class CommandLineParser {
}
final String[] pair = arg.split("=", 2);
- if (pair.length == 2 && pair[1].isEmpty()) {
-
- if (i < args.length - 1) {
+ if (pair.length == 2) {
+ if (pair[1].isEmpty() && i < args.length - 1) {
pair[1] = args[++i];
}
- }
- if (pair.length == 2) {
if (!parseOption(pair[0], pair[1], false)) {
messageStream.println();
usage(messageStream, true);
@@ -675,11 +672,9 @@ public class CommandLineParser {
this.optionsThatCannotBeOverridden.add(optionDefinition.name);
}
- if (!optionDefinition.isCollection) {
- if (optionDefinition.hasBeenSet && !optionDefinition.hasBeenSetFromOptionsFile) {
- messageStream.println("ERROR: Option '" + key + "' cannot be specified more than once.");
- return false;
- }
+ if (!optionDefinition.isCollection && optionDefinition.hasBeenSet && !optionDefinition.hasBeenSetFromOptionsFile) {
+ messageStream.println("ERROR: Option '" + key + "' cannot be specified more than once.");
+ return false;
}
final Object value;
try {
@@ -979,7 +974,7 @@ public class CommandLineParser {
if (!optionDefinition.overridable && optionMap.containsKey(optionDefinition.name)) {
throw new CommandLineParserDefinitionException(optionDefinition.name + " has already been used.");
}
- if (!optionDefinition.shortName.isEmpty()) {
+ if (!optionDefinition.shortName.isEmpty() && !optionDefinition.shortName.equals(optionDefinition.name)) {
if (optionMap.containsKey(optionDefinition.shortName)) {
if (!optionDefinition.overridable) {
throw new CommandLineParserDefinitionException(optionDefinition.shortName +
diff --git a/src/java/picard/cmdline/CommandLineParserDefinitionException.java b/src/main/java/picard/cmdline/CommandLineParserDefinitionException.java
similarity index 100%
rename from src/java/picard/cmdline/CommandLineParserDefinitionException.java
rename to src/main/java/picard/cmdline/CommandLineParserDefinitionException.java
diff --git a/src/java/picard/cmdline/CommandLineProgram.java b/src/main/java/picard/cmdline/CommandLineProgram.java
similarity index 99%
rename from src/java/picard/cmdline/CommandLineProgram.java
rename to src/main/java/picard/cmdline/CommandLineProgram.java
index 231e4d9..02a5965 100644
--- a/src/java/picard/cmdline/CommandLineProgram.java
+++ b/src/main/java/picard/cmdline/CommandLineProgram.java
@@ -198,8 +198,7 @@ public abstract class CommandLineProgram {
" on " + System.getProperty("os.name") + " " + System.getProperty("os.version") +
" " + System.getProperty("os.arch") + "; " + System.getProperty("java.vm.name") +
" " + System.getProperty("java.runtime.version") +
- "; Picard version: " + commandLineParser.getVersion() +
- " " + (DeflaterFactory.usingIntelDeflater()? "IntelDeflater": "JdkDeflater"));
+ "; Picard version: " + commandLineParser.getVersion());
}
catch (Exception e) { /* Unpossible! */ }
}
diff --git a/src/java/picard/cmdline/CommandLineProgramGroup.java b/src/main/java/picard/cmdline/CommandLineProgramGroup.java
similarity index 100%
rename from src/java/picard/cmdline/CommandLineProgramGroup.java
rename to src/main/java/picard/cmdline/CommandLineProgramGroup.java
diff --git a/src/java/picard/cmdline/CommandLineProgramProperties.java b/src/main/java/picard/cmdline/CommandLineProgramProperties.java
similarity index 100%
rename from src/java/picard/cmdline/CommandLineProgramProperties.java
rename to src/main/java/picard/cmdline/CommandLineProgramProperties.java
diff --git a/src/java/picard/cmdline/CreateHtmlDocForProgram.java b/src/main/java/picard/cmdline/CreateHtmlDocForProgram.java
similarity index 100%
rename from src/java/picard/cmdline/CreateHtmlDocForProgram.java
rename to src/main/java/picard/cmdline/CreateHtmlDocForProgram.java
diff --git a/src/java/picard/cmdline/CreateHtmlDocForStandardOptions.java b/src/main/java/picard/cmdline/CreateHtmlDocForStandardOptions.java
similarity index 100%
rename from src/java/picard/cmdline/CreateHtmlDocForStandardOptions.java
rename to src/main/java/picard/cmdline/CreateHtmlDocForStandardOptions.java
diff --git a/src/java/picard/cmdline/NestedOptions.java b/src/main/java/picard/cmdline/NestedOptions.java
similarity index 100%
rename from src/java/picard/cmdline/NestedOptions.java
rename to src/main/java/picard/cmdline/NestedOptions.java
diff --git a/src/java/picard/cmdline/Option.java b/src/main/java/picard/cmdline/Option.java
similarity index 100%
rename from src/java/picard/cmdline/Option.java
rename to src/main/java/picard/cmdline/Option.java
diff --git a/src/java/picard/cmdline/PicardCommandLine.java b/src/main/java/picard/cmdline/PicardCommandLine.java
similarity index 100%
rename from src/java/picard/cmdline/PicardCommandLine.java
rename to src/main/java/picard/cmdline/PicardCommandLine.java
diff --git a/src/java/picard/cmdline/PositionalArguments.java b/src/main/java/picard/cmdline/PositionalArguments.java
similarity index 100%
rename from src/java/picard/cmdline/PositionalArguments.java
rename to src/main/java/picard/cmdline/PositionalArguments.java
diff --git a/src/java/picard/cmdline/StandardOptionDefinitions.java b/src/main/java/picard/cmdline/StandardOptionDefinitions.java
similarity index 97%
rename from src/java/picard/cmdline/StandardOptionDefinitions.java
rename to src/main/java/picard/cmdline/StandardOptionDefinitions.java
index 4620800..f906990 100644
--- a/src/java/picard/cmdline/StandardOptionDefinitions.java
+++ b/src/main/java/picard/cmdline/StandardOptionDefinitions.java
@@ -38,6 +38,7 @@ public class StandardOptionDefinitions {
public static final String SEQUENCE_DICTIONARY_SHORT_NAME = "SD";
public static final String METRICS_FILE_SHORT_NAME = "M";
public static final String ASSUME_SORTED_SHORT_NAME = "AS";
+ public static final String ASSUME_SORT_ORDER_SHORT_NAME = "ASO";
public static final String PF_READS_ONLY_SHORT_NAME = "PF";
public static final String MINIMUM_MAPPING_QUALITY_SHORT_NAME = "MQ";
public static final String READ_GROUP_ID_SHORT_NAME = "RG";
diff --git a/src/java/picard/cmdline/programgroups/Alpha.java b/src/main/java/picard/cmdline/programgroups/Alpha.java
similarity index 100%
rename from src/java/picard/cmdline/programgroups/Alpha.java
rename to src/main/java/picard/cmdline/programgroups/Alpha.java
diff --git a/src/java/picard/cmdline/programgroups/Fasta.java b/src/main/java/picard/cmdline/programgroups/Fasta.java
similarity index 100%
rename from src/java/picard/cmdline/programgroups/Fasta.java
rename to src/main/java/picard/cmdline/programgroups/Fasta.java
diff --git a/src/java/picard/cmdline/programgroups/Illumina.java b/src/main/java/picard/cmdline/programgroups/Illumina.java
similarity index 100%
rename from src/java/picard/cmdline/programgroups/Illumina.java
rename to src/main/java/picard/cmdline/programgroups/Illumina.java
diff --git a/src/java/picard/cmdline/programgroups/Intervals.java b/src/main/java/picard/cmdline/programgroups/Intervals.java
similarity index 100%
rename from src/java/picard/cmdline/programgroups/Intervals.java
rename to src/main/java/picard/cmdline/programgroups/Intervals.java
diff --git a/src/java/picard/cmdline/programgroups/Metrics.java b/src/main/java/picard/cmdline/programgroups/Metrics.java
similarity index 100%
rename from src/java/picard/cmdline/programgroups/Metrics.java
rename to src/main/java/picard/cmdline/programgroups/Metrics.java
diff --git a/src/java/picard/cmdline/programgroups/None.java b/src/main/java/picard/cmdline/programgroups/None.java
similarity index 100%
rename from src/java/picard/cmdline/programgroups/None.java
rename to src/main/java/picard/cmdline/programgroups/None.java
diff --git a/src/java/picard/cmdline/programgroups/SamOrBam.java b/src/main/java/picard/cmdline/programgroups/SamOrBam.java
similarity index 100%
rename from src/java/picard/cmdline/programgroups/SamOrBam.java
rename to src/main/java/picard/cmdline/programgroups/SamOrBam.java
diff --git a/src/java/picard/cmdline/programgroups/Testing.java b/src/main/java/picard/cmdline/programgroups/Testing.java
similarity index 100%
rename from src/java/picard/cmdline/programgroups/Testing.java
rename to src/main/java/picard/cmdline/programgroups/Testing.java
diff --git a/src/java/picard/cmdline/programgroups/VcfOrBcf.java b/src/main/java/picard/cmdline/programgroups/VcfOrBcf.java
similarity index 100%
rename from src/java/picard/cmdline/programgroups/VcfOrBcf.java
rename to src/main/java/picard/cmdline/programgroups/VcfOrBcf.java
diff --git a/src/java/picard/fastq/BamToBfq.java b/src/main/java/picard/fastq/BamToBfq.java
similarity index 100%
rename from src/java/picard/fastq/BamToBfq.java
rename to src/main/java/picard/fastq/BamToBfq.java
diff --git a/src/java/picard/fastq/BamToBfqWriter.java b/src/main/java/picard/fastq/BamToBfqWriter.java
similarity index 100%
rename from src/java/picard/fastq/BamToBfqWriter.java
rename to src/main/java/picard/fastq/BamToBfqWriter.java
diff --git a/src/java/picard/fastq/Casava18ReadNameEncoder.java b/src/main/java/picard/fastq/Casava18ReadNameEncoder.java
similarity index 59%
rename from src/java/picard/fastq/Casava18ReadNameEncoder.java
rename to src/main/java/picard/fastq/Casava18ReadNameEncoder.java
index eacae48..439ca1a 100644
--- a/src/java/picard/fastq/Casava18ReadNameEncoder.java
+++ b/src/main/java/picard/fastq/Casava18ReadNameEncoder.java
@@ -28,19 +28,16 @@ public class Casava18ReadNameEncoder implements ReadNameEncoder {
@Override
public String generateReadName(final ClusterData cluster, final Integer pairNumber) {
- return String.format(
- "%s:%s:%s:%d:%d:%d:%d %s:%s:%d:%s",
- instrumentName,
- runId,
- flowcellId,
- cluster.getLane(),
- cluster.getTile(),
- cluster.getX(),
- cluster.getY(),
- StringUtil.asEmptyIfNull(pairNumber),
- IsFilteredLabel.get(cluster.isPf()),
- CONTROL_FIELD_VALUE,
- StringUtil.asEmptyIfNull(cluster.getMatchedBarcode())
- );
+ return new StringBuilder().append(instrumentName).append(":")
+ .append(runId).append(":")
+ .append(flowcellId).append(":")
+ .append(cluster.getLane()).append(":")
+ .append(cluster.getTile()).append(":")
+ .append(cluster.getX()).append(":")
+ .append(cluster.getY()).append(" ")
+ .append(StringUtil.asEmptyIfNull(pairNumber)).append(":")
+ .append(IsFilteredLabel.get(cluster.isPf())).append(":")
+ .append(CONTROL_FIELD_VALUE).append(":")
+ .append(StringUtil.asEmptyIfNull(cluster.getMatchedBarcode())).toString();
}
}
diff --git a/src/java/picard/fastq/IlluminaReadNameEncoder.java b/src/main/java/picard/fastq/IlluminaReadNameEncoder.java
similarity index 100%
rename from src/java/picard/fastq/IlluminaReadNameEncoder.java
rename to src/main/java/picard/fastq/IlluminaReadNameEncoder.java
diff --git a/src/java/picard/fastq/ReadNameEncoder.java b/src/main/java/picard/fastq/ReadNameEncoder.java
similarity index 100%
rename from src/java/picard/fastq/ReadNameEncoder.java
rename to src/main/java/picard/fastq/ReadNameEncoder.java
diff --git a/src/java/picard/filter/CountingDuplicateFilter.java b/src/main/java/picard/filter/CountingDuplicateFilter.java
similarity index 100%
rename from src/java/picard/filter/CountingDuplicateFilter.java
rename to src/main/java/picard/filter/CountingDuplicateFilter.java
diff --git a/src/java/picard/filter/CountingFilter.java b/src/main/java/picard/filter/CountingFilter.java
similarity index 100%
rename from src/java/picard/filter/CountingFilter.java
rename to src/main/java/picard/filter/CountingFilter.java
diff --git a/src/java/picard/filter/CountingMapQFilter.java b/src/main/java/picard/filter/CountingMapQFilter.java
similarity index 100%
rename from src/java/picard/filter/CountingMapQFilter.java
rename to src/main/java/picard/filter/CountingMapQFilter.java
diff --git a/src/java/picard/filter/CountingPairedFilter.java b/src/main/java/picard/filter/CountingPairedFilter.java
similarity index 100%
rename from src/java/picard/filter/CountingPairedFilter.java
rename to src/main/java/picard/filter/CountingPairedFilter.java
diff --git a/src/java/picard/fingerprint/CheckFingerprint.java b/src/main/java/picard/fingerprint/CheckFingerprint.java
similarity index 100%
rename from src/java/picard/fingerprint/CheckFingerprint.java
rename to src/main/java/picard/fingerprint/CheckFingerprint.java
diff --git a/src/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java b/src/main/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java
similarity index 100%
rename from src/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java
rename to src/main/java/picard/fingerprint/CrosscheckReadGroupFingerprints.java
diff --git a/src/java/picard/fingerprint/DiploidGenotype.java b/src/main/java/picard/fingerprint/DiploidGenotype.java
similarity index 100%
rename from src/java/picard/fingerprint/DiploidGenotype.java
rename to src/main/java/picard/fingerprint/DiploidGenotype.java
diff --git a/src/java/picard/fingerprint/DiploidHaplotype.java b/src/main/java/picard/fingerprint/DiploidHaplotype.java
similarity index 100%
rename from src/java/picard/fingerprint/DiploidHaplotype.java
rename to src/main/java/picard/fingerprint/DiploidHaplotype.java
diff --git a/src/java/picard/fingerprint/Fingerprint.java b/src/main/java/picard/fingerprint/Fingerprint.java
similarity index 100%
rename from src/java/picard/fingerprint/Fingerprint.java
rename to src/main/java/picard/fingerprint/Fingerprint.java
diff --git a/src/java/picard/fingerprint/FingerprintChecker.java b/src/main/java/picard/fingerprint/FingerprintChecker.java
similarity index 93%
rename from src/java/picard/fingerprint/FingerprintChecker.java
rename to src/main/java/picard/fingerprint/FingerprintChecker.java
index 9b5edff..2cc6fba 100644
--- a/src/java/picard/fingerprint/FingerprintChecker.java
+++ b/src/main/java/picard/fingerprint/FingerprintChecker.java
@@ -138,10 +138,10 @@ public class FingerprintChecker {
SequenceUtil.assertSequenceDictionariesEqual(this.haplotypes.getHeader().getSequenceDictionary(),
iterator.getSequenceDictionary());
- final Map<String, Fingerprint> fingerprints = new HashMap<String, Fingerprint>();
+ final Map<String, Fingerprint> fingerprints = new HashMap<>();
Set<String> samples = null;
if (specificSample != null) {
- samples = new HashSet<String>();
+ samples = new HashSet<>();
samples.add(specificSample);
}
@@ -264,9 +264,7 @@ public class FingerprintChecker {
}
}
- intervals.sort();
- intervals.unique();
- return intervals;
+ return intervals.uniqued();
}
/**
@@ -288,12 +286,12 @@ public class FingerprintChecker {
// sequence data where the duplicate marking may have been overly aggressive, and there is useful
// non-redundant data in the reads marked as "duplicates'.
if (this.allowDuplicateReads) {
- final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>(1);
+ final List<SamRecordFilter> filters = new ArrayList<>(1);
filters.add(new NotPrimaryAlignmentFilter());
iterator.setSamFilters(filters);
}
- final Map<SAMReadGroupRecord, Fingerprint> fingerprintsByReadGroup = new HashMap<SAMReadGroupRecord, Fingerprint>();
+ final Map<SAMReadGroupRecord, Fingerprint> fingerprintsByReadGroup = new HashMap<>();
final List<SAMReadGroupRecord> rgs = in.getFileHeader().getReadGroups();
for (final SAMReadGroupRecord rg : rgs) {
@@ -364,7 +362,7 @@ public class FingerprintChecker {
// sequence data where the duplicate marking may have been overly aggressive, and there is useful
// non-redundant data in the reads marked as "duplicates'.
if (this.allowDuplicateReads) {
- final List<SamRecordFilter> filters = new ArrayList<SamRecordFilter>(1);
+ final List<SamRecordFilter> filters = new ArrayList<>(1);
filters.add(new NotPrimaryAlignmentFilter());
iterator.setSamFilters(filters);
}
@@ -460,23 +458,21 @@ public class FingerprintChecker {
final AtomicInteger filesRead = new AtomicInteger(0);
final ExecutorService executor = Executors.newFixedThreadPool(threads);
final IntervalList intervals = this.haplotypes.getIntervalList();
- final Map<SAMReadGroupRecord, Fingerprint> retval = new ConcurrentHashMap<SAMReadGroupRecord, Fingerprint>();
+ final Map<SAMReadGroupRecord, Fingerprint> retval = new ConcurrentHashMap<>();
for (final File f : files) {
- executor.submit(new Runnable() {
- @Override public void run() {
- retval.putAll(fingerprintSamFile(f, intervals));
+ executor.submit(() -> {
+ retval.putAll(fingerprintSamFile(f, intervals));
- if (filesRead.incrementAndGet() % 100 == 0) {
- log.info("Processed " + filesRead.get() + " out of " + files.size());
- }
+ if (filesRead.incrementAndGet() % 100 == 0) {
+ log.info("Processed " + filesRead.get() + " out of " + files.size());
}
});
}
executor.shutdown();
try { executor.awaitTermination(waitTime, waitTimeUnit); }
- catch (InterruptedException ie) { log.warn(ie, "Interrupted while waiting for executor to terminate."); }
+ catch (final InterruptedException ie) { log.warn(ie, "Interrupted while waiting for executor to terminate."); }
return retval;
}
@@ -486,7 +482,7 @@ public class FingerprintChecker {
* by samples and totals up the probabilities.
*/
static public SortedMap<String, Fingerprint> mergeFingerprintsBySample(final Collection<Fingerprint> inputs) {
- final SortedMap<String, Fingerprint> sampleFps = new TreeMap<String, Fingerprint>();
+ final SortedMap<String, Fingerprint> sampleFps = new TreeMap<>();
for (final Fingerprint fp : inputs) {
Fingerprint sampleFp = sampleFps.get(fp.getSample());
if (sampleFp == null) {
@@ -515,7 +511,7 @@ public class FingerprintChecker {
final String specificSample,
final boolean ignoreReadGroups) {
// Load the fingerprint genotypes
- final List<Fingerprint> expectedFingerprints = new LinkedList<Fingerprint>();
+ final List<Fingerprint> expectedFingerprints = new LinkedList<>();
for (final File f : genotypeFiles) {
expectedFingerprints.addAll(loadFingerprints(f, specificSample).values());
}
@@ -524,7 +520,7 @@ public class FingerprintChecker {
throw new IllegalStateException("Could not find any fingerprints in: " + genotypeFiles);
}
- final List<FingerprintResults> resultsList = new ArrayList<FingerprintResults>();
+ final List<FingerprintResults> resultsList = new ArrayList<>();
final IntervalList intervals = getLociToGenotype(expectedFingerprints);
// Fingerprint the SAM files and calculate the results
@@ -533,7 +529,7 @@ public class FingerprintChecker {
if (ignoreReadGroups) {
final Fingerprint combinedFp = new Fingerprint(specificSample, f, null);
- for (final Fingerprint observedFp : fingerprintsByReadGroup.values()) combinedFp.merge(observedFp);
+ fingerprintsByReadGroup.values().forEach(combinedFp::merge);
final FingerprintResults results = new FingerprintResults(f, specificSample);
for (final Fingerprint expectedFp : expectedFingerprints) {
@@ -571,7 +567,7 @@ public class FingerprintChecker {
* lExpectedSample is Max(actualpExpectedSample, minPExpected).
*/
public static MatchResults calculateMatchResults(final Fingerprint observedFp, final Fingerprint expectedFp, final double minPExpected, final double pLoH) {
- final List<LocusResult> locusResults = new ArrayList<LocusResult>();
+ final List<LocusResult> locusResults = new ArrayList<>();
double llThisSample = 0;
double llOtherSample = 0;
@@ -586,23 +582,32 @@ public class FingerprintChecker {
final HaplotypeProbabilities probs1 = observedFp.get(haplotypeBlock);
if (probs1 == null) continue;
- final HaplotypeProbabilityOfNormalGivenTumor normalizedProbs1 = new HaplotypeProbabilityOfNormalGivenTumor(probs1, pLoH);
- final HaplotypeProbabilityOfNormalGivenTumor normalizedProbs2 = new HaplotypeProbabilityOfNormalGivenTumor(probs2, pLoH);
+ final HaplotypeProbabilityOfNormalGivenTumor prob1AssumingDataFromTumor = new HaplotypeProbabilityOfNormalGivenTumor(probs1, pLoH);
+ final HaplotypeProbabilityOfNormalGivenTumor prob2AssumingDataFromTumor = new HaplotypeProbabilityOfNormalGivenTumor(probs2, pLoH);
// If one is from genotype data we'd like to report the output relative
// to the genotyped SNP instead of against a random SNP from the haplotype
final Snp snp = probs2.getRepresentativeSnp();
final DiploidGenotype externalGenotype = probs2.getMostLikelyGenotype(snp);
final LocusResult lr = new LocusResult(snp,
- externalGenotype,
- probs1.getMostLikelyGenotype(snp),
- probs1.getObsAllele1(),
- probs1.getObsAllele2(),
- probs1.getLodMostProbableGenotype(),
- probs1.shiftedLogEvidenceProbabilityGivenOtherEvidence(probs2),
+ externalGenotype,
+ probs1.getMostLikelyGenotype(snp),
+ probs1.getObsAllele1(),
+ probs1.getObsAllele2(),
+ probs1.getLodMostProbableGenotype(),
+ // expected sample log-likelihood
+ probs1.shiftedLogEvidenceProbabilityGivenOtherEvidence(probs2),
+ // random sample log-likelihood
probs1.shiftedLogEvidenceProbability(),
- probs2.shiftedLogEvidenceProbabilityGivenOtherEvidence(normalizedProbs1)-probs2.shiftedLogEvidenceProbability(),
- probs1.shiftedLogEvidenceProbabilityGivenOtherEvidence(normalizedProbs2)-probs1.shiftedLogEvidenceProbability());
+
+ // probs1 is tumor probs2 is normal, correct sample lod
+ prob1AssumingDataFromTumor.shiftedLogEvidenceProbabilityGivenOtherEvidence(probs2) -
+ prob1AssumingDataFromTumor.shiftedLogEvidenceProbability(),
+ // probs1 is normal probs2 is tumor, correct sample lod
+ probs1.shiftedLogEvidenceProbabilityGivenOtherEvidence(prob2AssumingDataFromTumor) -
+ probs1.shiftedLogEvidenceProbability());
+
+
locusResults.add(lr);
if (probs1.hasEvidence() && probs2.hasEvidence()) {
diff --git a/src/java/picard/fingerprint/FingerprintResults.java b/src/main/java/picard/fingerprint/FingerprintResults.java
similarity index 100%
rename from src/java/picard/fingerprint/FingerprintResults.java
rename to src/main/java/picard/fingerprint/FingerprintResults.java
diff --git a/src/java/picard/fingerprint/GenotypeReader.java b/src/main/java/picard/fingerprint/GenotypeReader.java
similarity index 100%
rename from src/java/picard/fingerprint/GenotypeReader.java
rename to src/main/java/picard/fingerprint/GenotypeReader.java
diff --git a/src/java/picard/fingerprint/HaplotypeBlock.java b/src/main/java/picard/fingerprint/HaplotypeBlock.java
similarity index 100%
rename from src/java/picard/fingerprint/HaplotypeBlock.java
rename to src/main/java/picard/fingerprint/HaplotypeBlock.java
diff --git a/src/java/picard/fingerprint/HaplotypeMap.java b/src/main/java/picard/fingerprint/HaplotypeMap.java
similarity index 100%
rename from src/java/picard/fingerprint/HaplotypeMap.java
rename to src/main/java/picard/fingerprint/HaplotypeMap.java
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilities.java b/src/main/java/picard/fingerprint/HaplotypeProbabilities.java
similarity index 100%
rename from src/java/picard/fingerprint/HaplotypeProbabilities.java
rename to src/main/java/picard/fingerprint/HaplotypeProbabilities.java
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilitiesFromContaminatorSequence.java b/src/main/java/picard/fingerprint/HaplotypeProbabilitiesFromContaminatorSequence.java
similarity index 100%
rename from src/java/picard/fingerprint/HaplotypeProbabilitiesFromContaminatorSequence.java
rename to src/main/java/picard/fingerprint/HaplotypeProbabilitiesFromContaminatorSequence.java
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotype.java b/src/main/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotype.java
similarity index 75%
rename from src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotype.java
rename to src/main/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotype.java
index d6d0231..854c5a3 100644
--- a/src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotype.java
+++ b/src/main/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotype.java
@@ -24,8 +24,6 @@
package picard.fingerprint;
-import static picard.util.MathUtil.*;
-
/**
* Represents a set of HaplotypeProbabilities that were derived from a single SNP
* genotype at a point in time.
@@ -54,19 +52,19 @@ public class HaplotypeProbabilitiesFromGenotype extends HaplotypeProbabilities {
return likelihoods;
}
- @Override
- public void merge(final HaplotypeProbabilities other) {
- if (!this.getHaplotype().equals(other.getHaplotype())) {
- throw new IllegalArgumentException("Mismatched haplotypes in call to HaplotypeProbabilities.merge(): " +
- getHaplotype() + ", " + other.getHaplotype());
- }
+ @Override
+ public void merge(final HaplotypeProbabilities other) {
+ if (!this.getHaplotype().equals(other.getHaplotype())) {
+ throw new IllegalArgumentException("Mismatched haplotypes in call to HaplotypeProbabilities.merge(): " +
+ getHaplotype() + ", " + other.getHaplotype());
+ }
- if (! (other instanceof HaplotypeProbabilitiesFromGenotype)) {
- throw new IllegalArgumentException("Can only merge HaplotypeProbabilities of same class.");
- }
+ if (!(other instanceof HaplotypeProbabilitiesFromGenotype)) {
+ throw new IllegalArgumentException("Can only merge HaplotypeProbabilities of same class.");
+ }
- this.likelihoods[0] = this.likelihoods[0] * other.getLikelihoods()[0];
- this.likelihoods[1] = this.likelihoods[1] * other.getLikelihoods()[1];
- this.likelihoods[2] = this.likelihoods[2] * other.getLikelihoods()[2];
- }
+ this.likelihoods[0] = this.likelihoods[0] * other.getLikelihoods()[0];
+ this.likelihoods[1] = this.likelihoods[1] * other.getLikelihoods()[1];
+ this.likelihoods[2] = this.likelihoods[2] * other.getLikelihoods()[2];
+ }
}
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotypeLikelihoods.java b/src/main/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotypeLikelihoods.java
similarity index 100%
rename from src/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotypeLikelihoods.java
rename to src/main/java/picard/fingerprint/HaplotypeProbabilitiesFromGenotypeLikelihoods.java
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilitiesFromSequence.java b/src/main/java/picard/fingerprint/HaplotypeProbabilitiesFromSequence.java
similarity index 100%
rename from src/java/picard/fingerprint/HaplotypeProbabilitiesFromSequence.java
rename to src/main/java/picard/fingerprint/HaplotypeProbabilitiesFromSequence.java
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilitiesUsingLogLikelihoods.java b/src/main/java/picard/fingerprint/HaplotypeProbabilitiesUsingLogLikelihoods.java
similarity index 100%
rename from src/java/picard/fingerprint/HaplotypeProbabilitiesUsingLogLikelihoods.java
rename to src/main/java/picard/fingerprint/HaplotypeProbabilitiesUsingLogLikelihoods.java
diff --git a/src/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumor.java b/src/main/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumor.java
similarity index 83%
rename from src/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumor.java
rename to src/main/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumor.java
index 2c6f90a..7a64a07 100644
--- a/src/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumor.java
+++ b/src/main/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumor.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2010 The Broad Institute
+ * Copyright (c) 2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -47,7 +47,7 @@ public class HaplotypeProbabilityOfNormalGivenTumor extends HaplotypeProbabiliti
//This is P(g_t|g_n)
//tumor genotype are the columns.
{1, 0, 0}, //normal is hom_ref => tumor must be the same
- {pLoH / 2, 1 - pLoH, pLoH / 2}, //normal is het => tumor might transit
+ {pLoH / 2, 1 - pLoH, pLoH / 2}, //normal is het => tumor may have transitioned
{0, 0, 1}}; //normal is hom_var => tumor must be the same
}
@@ -55,18 +55,23 @@ public class HaplotypeProbabilityOfNormalGivenTumor extends HaplotypeProbabiliti
// data given a particular _normal_ genotype, however, the likelihood as given is that where the
// genotype is of the tumor (if that's what the data was measuring)
- // P(D_t|g_n) = \sum_{g_n} P(D_t|g_t,g_n) = \sum P(D_t|g_t) P(g_t|g_n) = hpOfTumor.getLikelihoods() * transitionMatrix
+ // P(D_t|g_n) = \sum_{g_t} P(D_t|g_t,g_n)
+ // = \sum P(D_t|g_t, g_n) P(g_t|g_n)
+ // = \sum P(D_t|g_t) P(g_t|g_n)
+ // = hpOfTumor.getLikelihoods() * transitionMatrix
+ // where the * operator is understood as linear algebra operation.
@Override
public double[] getLikelihoods() {
- final double[] asTumorLikelihoods = new double[3];
- final double[] asNormalLikelihoods = hpOfTumor.getLikelihoods();
+ final double[] normalHaplotypeLikelihoods = new double[3];
+ final double[] tumorHaplotypeLikelihoods = hpOfTumor.getLikelihoods();
for (final Genotype g_n : Genotype.values()) {
+ normalHaplotypeLikelihoods[g_n.v] = 0D;
for (final Genotype g_t : Genotype.values()) {
- asTumorLikelihoods[g_t.v] += asNormalLikelihoods[g_n.v] * transitionMatrix[g_n.v][g_t.v];
+ normalHaplotypeLikelihoods[g_n.v] += tumorHaplotypeLikelihoods[g_t.v] * transitionMatrix[g_n.v][g_t.v];
}
}
- return asTumorLikelihoods;
+ return normalHaplotypeLikelihoods;
}
@Override
diff --git a/src/java/picard/fingerprint/LocusResult.java b/src/main/java/picard/fingerprint/LocusResult.java
similarity index 100%
rename from src/java/picard/fingerprint/LocusResult.java
rename to src/main/java/picard/fingerprint/LocusResult.java
diff --git a/src/java/picard/fingerprint/MatchResults.java b/src/main/java/picard/fingerprint/MatchResults.java
similarity index 100%
rename from src/java/picard/fingerprint/MatchResults.java
rename to src/main/java/picard/fingerprint/MatchResults.java
diff --git a/src/java/picard/fingerprint/Snp.java b/src/main/java/picard/fingerprint/Snp.java
similarity index 100%
rename from src/java/picard/fingerprint/Snp.java
rename to src/main/java/picard/fingerprint/Snp.java
diff --git a/src/java/picard/illumina/CheckIlluminaDirectory.java b/src/main/java/picard/illumina/CheckIlluminaDirectory.java
similarity index 84%
rename from src/java/picard/illumina/CheckIlluminaDirectory.java
rename to src/main/java/picard/illumina/CheckIlluminaDirectory.java
index bc34da6..3f33dcd 100644
--- a/src/java/picard/illumina/CheckIlluminaDirectory.java
+++ b/src/main/java/picard/illumina/CheckIlluminaDirectory.java
@@ -30,13 +30,28 @@ import java.util.TreeSet;
* specified data type. If NO data type is specified then the default data types used by IlluminaBasecallsToSam are used.
*/
@CommandLineProgramProperties(
- usage = "Check that the files to provide the data specified by DATA_TYPES are available, exist, and are reasonably sized for every tile/cycle. " +
- "Reasonably sized means non-zero sized for files that exist per tile and equal size for binary files that exist per cycle/per tile. " +
- "CheckIlluminaDirectory DOES NOT check that the individual records in a file are well-formed.",
- usageShort = "Asserts the validity of the data in the specified Illumina basecalling data",
+ usage = CheckIlluminaDirectory.USAGE_SUMMARY + CheckIlluminaDirectory.USAGE_DETAILS,
+ usageShort = CheckIlluminaDirectory.USAGE_SUMMARY,
programGroup = Illumina.class
)
public class CheckIlluminaDirectory extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Asserts the validity for specified Illumina basecalling data. ";
+ static final String USAGE_DETAILS = "<p>This tool will check that the basecall directory and the internal files are available, exist, " +
+ "and are reasonably sized for every tile and cycle. Reasonably sized means non-zero sized for files that exist per tile and " +
+ "equal size for binary files that exist per cycle or per tile. If DATA_TYPES {Position, BaseCalls, QualityScores, PF," +
+ " or Barcodes} are not specified, then the default data types used by IlluminaBasecallsToSam are used. " +
+ "CheckIlluminaDirectory DOES NOT check that the individual records in a file are well-formed.</p>" +
+ "" +
+ "<h4>Usage example:</h4> " +
+ "<pre>" +
+ "java -jar picard.jar CheckIlluminaDirectory \\<br />" +
+ " BASECALLS_DIR=/BaseCalls/ \\<br />" +
+ " READ_STRUCTURE=25T8B25T \\<br />" +
+ " LANES=1 \\<br />" +
+ " DATA_TYPES=BaseCalls " +
+ "</pre>" +
+ "<hr />"
+ ;
private static final Log log = Log.getInstance(CheckIlluminaDirectory.class);
// The following attributes define the command-line arguments
@@ -44,16 +59,19 @@ public class CheckIlluminaDirectory extends CommandLineProgram {
@Option(doc = "The basecalls output directory. ", shortName = "B")
public File BASECALLS_DIR;
- @Option(doc = "The data types that should be checked for each tile/cycle. If no values are provided then the data types checked are those " +
- "required by IlluminaBaseCallsToSam (which is a superset of those used in ExtractIlluminaBarcodes). These data types vary slightly depending on " +
- "whether or not the run is barcoded so READ_STRUCTURE should be the same as that which will be passed to IlluminaBasecallsToSam. If this option " +
- "is left unspecified then both ExtractIlluminaBarcodes and IlluminaBaseCallsToSam should complete successfully UNLESS the " +
+ @Option(doc = "The data types that should be checked for each tile/cycle. If no values are provided then the data types checked are " +
+ "those required by IlluminaBaseCallsToSam (which is a superset of those used in ExtractIlluminaBarcodes). These data types vary " +
+ "slightly depending on whether or not the run is barcoded so READ_STRUCTURE should be the same as that which will be passed to " +
+ "IlluminaBasecallsToSam. " +
+ "If this option is left unspecified then both ExtractIlluminaBarcodes and IlluminaBaseCallsToSam should complete successfully " +
+ "UNLESS the " +
"individual records of the files themselves are spurious.",
shortName = "DT", optional = true)
public final Set<IlluminaDataType> DATA_TYPES = new TreeSet<IlluminaDataType>();
- @Option(doc = ReadStructure.PARAMETER_DOC + " Note: If you want to check whether or not a future IlluminaBasecallsToSam or ExtractIlluminaBarcodes " +
- "run will fail then be sure to use the exact same READ_STRUCTURE that you would pass to these programs for this run.",
+ @Option(doc = ReadStructure.PARAMETER_DOC + " Note: If you want to check whether or not a future IlluminaBasecallsToSam or " +
+ "ExtractIlluminaBarcodes run will fail then be sure to use the exact same READ_STRUCTURE that you would pass to these programs " +
+ "for this run.",
shortName = "RS")
public String READ_STRUCTURE;
diff --git a/src/java/picard/illumina/ClusterDataToSamConverter.java b/src/main/java/picard/illumina/ClusterDataToSamConverter.java
similarity index 65%
rename from src/java/picard/illumina/ClusterDataToSamConverter.java
rename to src/main/java/picard/illumina/ClusterDataToSamConverter.java
index 5e1ab07..4355622 100644
--- a/src/java/picard/illumina/ClusterDataToSamConverter.java
+++ b/src/main/java/picard/illumina/ClusterDataToSamConverter.java
@@ -29,6 +29,7 @@ import htsjdk.samtools.SAMTag;
import htsjdk.samtools.SAMUtils;
import htsjdk.samtools.filter.SamRecordFilter;
import htsjdk.samtools.filter.SolexaNoiseFilter;
+import picard.PicardException;
import picard.fastq.IlluminaReadNameEncoder;
import picard.fastq.ReadNameEncoder;
import picard.illumina.parser.ClusterData;
@@ -38,6 +39,8 @@ import picard.util.AdapterMarker;
import picard.util.AdapterPair;
import picard.util.IlluminaUtil;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
/**
@@ -51,7 +54,6 @@ public class ClusterDataToSamConverter implements
IlluminaBasecallsConverter.ClusterDataConverter<IlluminaBasecallsToSam.SAMRecordsForCluster> {
- private final String runBarcode;
private final String readGroupId;
private final SamRecordFilter filters = new SolexaNoiseFilter();
private final boolean isPairedEnd;
@@ -65,6 +67,15 @@ public class ClusterDataToSamConverter implements
private final int outputRecordsPerCluster;
private final ReadNameEncoder readNameEncoder;
+ // TODO: add RX and QX to the list of SAMTags and change this. initial discussion
+ // TODO: here:
+ // TODO: - https://github.com/broadinstitute/picard/issues/287
+ // TODO: - HTS-spec issue: https://github.com/samtools/hts-specs/issues/109
+ // TODO: - https://github.com/samtools/hts-specs/pull/119
+ private String molecularIndexTag = "RX";
+ private String molecularIndexQualityTag = "QX";
+ private List<String> tagPerMolecularIndex = Collections.emptyList();
+
/**
* Constructor
*
@@ -78,7 +89,6 @@ public class ClusterDataToSamConverter implements
final String readGroupId,
final ReadStructure readStructure,
final List<IlluminaUtil.IlluminaAdapterPair> adapters) {
- this.runBarcode = runBarcode;
this.readGroupId = readGroupId;
this.readNameEncoder = new IlluminaReadNameEncoder(runBarcode);
@@ -101,10 +111,42 @@ public class ClusterDataToSamConverter implements
}
/**
+ * Sets the SAM tag to use to store the molecular index bases. If multiple molecular indexes exist, it will concatenate them
+ * and store them in this tag.
+ */
+ public ClusterDataToSamConverter withMolecularIndexTag(final String molecularIndexTag) {
+ if (molecularIndexTag == null) throw new IllegalArgumentException("Molecular index tag was null");
+ this.molecularIndexTag = molecularIndexTag;
+ return this;
+ }
+
+ /**
+ * Sets the SAM tag to use to store the molecular index base qualities. If multiple molecular indexes exist, it will concatenate them
+ * and store them in this tag.
+ */
+ public ClusterDataToSamConverter withMolecularIndexQualityTag(final String molecularIndexQualityTag) {
+ if (molecularIndexQualityTag == null) throw new IllegalArgumentException("Molecular index quality tag was null");
+ this.molecularIndexQualityTag = molecularIndexQualityTag;
+ return this;
+ }
+
+ /**
+ * Sets the SAM tags to use to store the bases each molecular index. This will only be used if there are more than one molecular
+ * index. If fewer tags are given than molecular indexes found, then the remaining molecular indexes will be concatenated and stored
+ * in the last tag. If more tags are provided than molecular indexes found, the additional tags will not be used.
+ */
+ public ClusterDataToSamConverter withTagPerMolecularIndex(final List<String> tagPerMolecularIndex) {
+ if (tagPerMolecularIndex == null) throw new IllegalArgumentException("Null given for tagPerMolecularIndex");
+ this.tagPerMolecularIndex = tagPerMolecularIndex;
+ return this;
+ }
+
+ /**
* Creates a new SAM record from the basecall data
*/
private SAMRecord createSamRecord(final ReadData readData, final String readName, final boolean isPf, final boolean firstOfPair,
- final String unmatchedBarcode, final String molecularIndex, final String molecularIndexQ) {
+ final String unmatchedBarcode,
+ final List<String> molecularIndexes, final List<String> molecularIndexQualities) {
final SAMRecord sam = new SAMRecord(null);
sam.setReadName(readName);
sam.setReadBases(readData.getBases());
@@ -134,12 +176,21 @@ public class ClusterDataToSamConverter implements
sam.setAttribute(SAMTag.BC.name(), unmatchedBarcode);
}
- if (molecularIndex != null) {
- //TODO: add RX and QX to the list of SAMTags and change this. initial discussion
- //TODO: here: https://github.com/broadinstitute/picard/issues/287
- //TODO: HTS-spec issue: https://github.com/samtools/hts-specs/issues/109
- sam.setAttribute("RX", molecularIndex);
- sam.setAttribute("QX", molecularIndexQ);
+ if (!molecularIndexes.isEmpty()) {
+ if (!this.molecularIndexTag.isEmpty()) {
+ sam.setAttribute(this.molecularIndexTag, String.join("", molecularIndexes));
+ }
+ if (!this.molecularIndexQualityTag.isEmpty()) {
+ sam.setAttribute(this.molecularIndexQualityTag, String.join("", molecularIndexQualities));
+ }
+ if (!this.tagPerMolecularIndex.isEmpty()) {
+ if (tagPerMolecularIndex.size() != molecularIndexes.size()) {
+ throw new PicardException("Found " + molecularIndexes.size() + " molecular indexes but only " + tagPerMolecularIndex.size() + " SAM tags given.");
+ }
+ for (int i = 0; i < this.tagPerMolecularIndex.size(); i++) {
+ sam.setAttribute(this.tagPerMolecularIndex.get(i), molecularIndexes.get(i));
+ }
+ }
}
return sam;
@@ -163,33 +214,29 @@ public class ClusterDataToSamConverter implements
unmatchedBarcode = IlluminaUtil.barcodeSeqsToString(barcode).replace('.', 'N'); //TODO: This has a separator, where as in other places we do not use a separator
}
- final String joinedMolecularIndex ;
- final String joinedMolecularIndexQ ;
+ final List<String> molecularIndexes;
+ final List<String> molecularIndexQualities;
if (hasMolecularBarcode) {
- final StringBuilder joinedMolecularIndexQBuilder = new StringBuilder();
- final byte[][] molecularIndex = new byte[molecularBarcodeIndices.length][];
- final byte[][] molecularIndexQ = new byte[molecularBarcodeIndices.length][];
+ molecularIndexes = new ArrayList<>();
+ molecularIndexQualities = new ArrayList<>();
for (int i = 0; i < molecularBarcodeIndices.length; i++) {
- molecularIndex[i] = cluster.getRead(molecularBarcodeIndices[i]).getBases();
- molecularIndexQ[i] = cluster.getRead(molecularBarcodeIndices[i]).getQualities();
- joinedMolecularIndexQBuilder.append(SAMUtils.phredToFastq(molecularIndexQ[i]));
+ molecularIndexes.add(new String(cluster.getRead(molecularBarcodeIndices[i]).getBases()).replace('.', 'N'));
+ molecularIndexQualities.add(SAMUtils.phredToFastq(cluster.getRead(molecularBarcodeIndices[i]).getQualities()));
}
- joinedMolecularIndex = IlluminaUtil.byteArrayToString(molecularIndex, "").replace('.', 'N');
- joinedMolecularIndexQ = joinedMolecularIndexQBuilder.toString();
} else {
- joinedMolecularIndex = null;
- joinedMolecularIndexQ = null;
+ molecularIndexes = Collections.emptyList();
+ molecularIndexQualities = Collections.emptyList();
}
final SAMRecord firstOfPair = createSamRecord(
- cluster.getRead(templateIndices[0]), readName, cluster.isPf(), true, unmatchedBarcode, joinedMolecularIndex, joinedMolecularIndexQ);
+ cluster.getRead(templateIndices[0]), readName, cluster.isPf(), true, unmatchedBarcode, molecularIndexes, molecularIndexQualities);
ret.records[0] = firstOfPair;
SAMRecord secondOfPair = null;
if(isPairedEnd) {
secondOfPair = createSamRecord(
- cluster.getRead(templateIndices[1]), readName, cluster.isPf(), false, unmatchedBarcode, joinedMolecularIndex, joinedMolecularIndexQ);
+ cluster.getRead(templateIndices[1]), readName, cluster.isPf(), false, unmatchedBarcode, molecularIndexes, molecularIndexQualities);
ret.records[1] = secondOfPair;
}
diff --git a/src/java/picard/illumina/CollectIlluminaBasecallingMetrics.java b/src/main/java/picard/illumina/CollectIlluminaBasecallingMetrics.java
similarity index 82%
rename from src/java/picard/illumina/CollectIlluminaBasecallingMetrics.java
rename to src/main/java/picard/illumina/CollectIlluminaBasecallingMetrics.java
index 51df55e..21a4028 100644
--- a/src/java/picard/illumina/CollectIlluminaBasecallingMetrics.java
+++ b/src/main/java/picard/illumina/CollectIlluminaBasecallingMetrics.java
@@ -43,25 +43,54 @@ import java.util.SortedMap;
import java.util.TreeMap;
/***
- * A Command line tool to collect Illumina Basecalling metrics for a sequencing run
- * Requires a Lane and an input file of Barcodes to expect.
- * Outputs metrics:
- * * Mean Clusters Per Tile
- * * Standard Deviation of Clusters Per Tile
- * * Mean Pf Clusters Per Tile
- * * Standard Deviation of Pf Clusters Per Tile
- * * Mean Percentage of Pf Clusters Per Tile
- * * Standard Deviation of Percentage of Pf Clusters Per Tile
- */
+ - * A Command line tool to collect Illumina Basecalling metrics for a sequencing run
+ - * Requires a Lane and an input file of Barcodes to expect.
+ - * Outputs metrics:
+ - * * Mean Clusters Per Tile
+ - * * Standard Deviation of Clusters Per Tile
+ - * * Mean Pf Clusters Per Tile
+ - * * Standard Deviation of Pf Clusters Per Tile
+ - * * Mean Percentage of Pf Clusters Per Tile
+ - * * Standard Deviation of Percentage of Pf Clusters Per Tile
+ - */
+
@CommandLineProgramProperties(
- usage = CollectIlluminaBasecallingMetrics.USAGE,
- usageShort = CollectIlluminaBasecallingMetrics.USAGE,
+ usage = CollectIlluminaBasecallingMetrics.USAGE_SUMMARY + CollectIlluminaBasecallingMetrics.USAGE_DETAILS,
+ usageShort = CollectIlluminaBasecallingMetrics.USAGE_SUMMARY,
programGroup = Illumina.class
)
public class CollectIlluminaBasecallingMetrics extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Collects Illumina Basecalling metrics for a sequencing run. ";
+ static final String USAGE_DETAILS = "<p>This tool will produce per-barcode and per-lane basecall metrics for each sequencing run. " +
+ "Mean values for each metric are determined using data from all of the tiles. This tool requires the following data, LANE(#), " +
+ "BASECALLS_DIR, READ_STRUCTURE, and an input file listing the sample barcodes. " +
+ "Program will provide metrics including: the total numbers of bases, reads, and clusters, as well as the fractions of each " +
+ "bases, reads, and clusters that passed Illumina quality filters (PF) both per barcode and per lane. " +
+ "For additional information on Illumina's PF quality metric, please see the corresponding " +
+ "<a href='https://www.broadinstitute.org/gatk/guide/article?id=6329'>GATK Dictionary entry</a>.</p> " +
+ "<p>The input barcode_list.txt file is a file containing all of the sample and molecular barcodes and can be obtained from the " +
+ "<a href='http://broadinstitute.github.io/picard/command-line-overview.html#ExtractIlluminaBarcodes'>ExtractIlluminaBarcodes</a> " +
+ "tool. </p>" +
+ "" +
+ "Note: Metrics labeled as percentages are actually expressed as fractions! " +
+ "" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectIlluminaBasecallingMetrics \\<br />" +
+ " BASECALLS_DIR=/BaseCalls/ \\<br />" +
+ " LANE=001 \\<br />" +
+ " READ_STRUCTURE=25T8B25T \\<br />" +
+ " INPUT=barcode_list.txt " +
+ "</pre>" +
+
+ "<p>Please see the CollectIlluminaBasecallingMetrics " +
+ "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#IlluminaBasecallingMetrics'>definitions</a> " +
+ "for a complete description of the metrics produced by this tool. </p>" +
+
+ "<hr />"
+ ;
//Command Line Arguments
- static final String USAGE = "Given an Illumina basecalling and a lane, produces per-lane-barcode basecalling metrics";
-
+
@Option(doc="The Illumina basecalls output directory from which data are read", shortName="B")
public File BASECALLS_DIR;
diff --git a/src/java/picard/illumina/CollectIlluminaLaneMetrics.java b/src/main/java/picard/illumina/CollectIlluminaLaneMetrics.java
similarity index 83%
rename from src/java/picard/illumina/CollectIlluminaLaneMetrics.java
rename to src/main/java/picard/illumina/CollectIlluminaLaneMetrics.java
index 99e0ef8..e2b86d7 100644
--- a/src/java/picard/illumina/CollectIlluminaLaneMetrics.java
+++ b/src/main/java/picard/illumina/CollectIlluminaLaneMetrics.java
@@ -49,13 +49,29 @@ import java.util.stream.Collectors;
*/
@CommandLineProgramProperties(
- usage = CollectIlluminaLaneMetrics.USAGE,
- usageShort = CollectIlluminaLaneMetrics.USAGE,
+ usage = CollectIlluminaLaneMetrics.USAGE_SUMMARY + CollectIlluminaLaneMetrics.USAGE_DETAILS,
+ usageShort = CollectIlluminaLaneMetrics.USAGE_SUMMARY,
programGroup = Illumina.class
)
public class CollectIlluminaLaneMetrics extends CommandLineProgram {
- static final String USAGE = "Collects Illumina lane metrics for the given basecalling analysis directory";
-
+ static final String USAGE_SUMMARY = "Collects Illumina lane metrics for the given BaseCalling analysis directory. ";
+ static final String USAGE_DETAILS = "This tool produces quality control metrics on cluster density for each lane of an Illumina flowcell." +
+ " This tool takes Illumina TileMetrics data and places them into directories containing lane- and phasing-level metrics. " +
+ "In this context, phasing refers to the fraction of molecules that fall behind or jump ahead (prephasing) during a read cycle." +
+ "" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar CollectIlluminaLaneMetrics \\<br />" +
+ " RUN_DIR=test_run \\<br />" +
+ " OUTPUT_DIRECTORY=Lane_output_metrics \\<br />" +
+ " OUTPUT_PREFIX=experiment1 \\<br />" +
+ " READ_STRUCTURE=25T8B25T " +
+ "</pre>" +
+ "<p>Please see the CollectIlluminaLaneMetrics " +
+ "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectIlluminaLaneMetrics'>definitions</a> " +
+ "for a complete description of the metrics produced by this tool.</p>" +
+ "<hr />"
+ ;
@Option(doc = "The Illumina run directory of the run for which the lane metrics are to be generated")
public File RUN_DIRECTORY;
diff --git a/src/java/picard/illumina/ExtractIlluminaBarcodes.java b/src/main/java/picard/illumina/ExtractIlluminaBarcodes.java
similarity index 90%
rename from src/java/picard/illumina/ExtractIlluminaBarcodes.java
rename to src/main/java/picard/illumina/ExtractIlluminaBarcodes.java
index 94074c3..9a3c65e 100644
--- a/src/java/picard/illumina/ExtractIlluminaBarcodes.java
+++ b/src/main/java/picard/illumina/ExtractIlluminaBarcodes.java
@@ -73,19 +73,55 @@ import java.util.concurrent.TimeUnit;
* @author jburke at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = "Determine the sample barcode for each read in an Illumina lane.\n" +
- "For each tile, a file is written to the basecalls directory of the form s_<lane>_<tile>_barcode.txt. " +
- "An output file contains a line for each read in the tile, aligned with the regular basecall output. \n" +
- "The output file contains the following tab-separated columns: \n" +
- " * read subsequence at barcode position\n" +
- " * Y or N indicating if there was a barcode match\n" +
- " * matched barcode sequence\n" +
- "Note 1: that the order of specification of barcodes can cause arbitrary differences in output for poorly matching barcodes.\n" +
- "Note 2: molecular barcodes (M in the read structure) are not the barcode being extracted here and will be ignored here.\n\n",
- usageShort = "Tool to determine the barcode for each read in an Illumina lane",
+
+ usage = ExtractIlluminaBarcodes.USAGE_SUMMARY + ExtractIlluminaBarcodes.USAGE_DETAILS,
+ usageShort = ExtractIlluminaBarcodes.USAGE_SUMMARY,
programGroup = Illumina.class
)
public class ExtractIlluminaBarcodes extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Tool determines the barcode for each read in an Illumina lane. ";
+ static final String USAGE_DETAILS = "<p>This tool determines the numbers of reads containing barcode-matching sequences and provides " +
+ "statistics on the quality of these barcode matches.</p> " +
+ "<p>Illumina sequences can contain at least two types of barcodes, sample and molecular (index). Sample barcodes " +
+ "(B in the read structure) are used to demultiplex pooled samples while index barcodes (M in the read structure) are used " +
+ "to differentiate multiple reads of a template when carrying out paired-end sequencing. Note that this tool only extracts " +
+ "sample (B) and not molecular barcodes (M).</p>" +
+ "" +
+ "<p>Barcodes can be provided in the form of a list (BARCODE_FILE) or a string representing the barcode (BARCODE). " +
+ "The BARCODE_FILE contains multiple fields including 'barcode_sequence_1', 'barcode_sequence_2' (optional), " +
+ "'barcode_name', and 'library_name'. In contrast, the BARCODE argument is used for runs with reads containing a single " +
+ "barcode (nonmultiplexed) and can be added directly as a string of text e.g. BARCODE=CAATAGCG.</p>" +
+ "" +
+ "<p>Data is output per lane/tile within the BaseCalls directory with the file name format of 's_{lane}_{tile}_barcode.txt'. " +
+ "These files contain the following tab-separated columns:" +
+ "<ul> " +
+ "<li>Read subsequence at barcode position</li>" +
+ "<li>Y or N indicating if there was a barcode match</li>" +
+ "<li>Matched barcode sequence (empty if read did not match one of the barcodes)</li> " +
+ "</ul>" +
+ "If there is no match but we're close to the threshold of calling it a match, we output the barcode that would have been " +
+ "matched but in lower case. Threshold values can be adjusted to accommodate barcode sequence mismatches from the reads." +
+ " The metrics file produced by the ExtractIlluminaBarcodes program indicates the number of matches (and mismatches)" +
+ " between the barcode reads and the actual barcodes. These metrics are provided both per-barcode and per lane and can be " +
+ "found in the BaseCalls directory.</p>" +
+ "<p>For poorly matching barcodes, the order of specification of barcodes can cause arbitrary output differences.</p>" +
+ "" +
+ "<h4>Usage example:</h4> " +
+ "<pre>" +
+ "java -jar picard.jar ExtractIlluminaBarcodes \\<br />" +
+ " BASECALLS_DIR=/BaseCalls/ \\<br />" +
+ " LANE=1 \\<br />" +
+ " READ_STRUCTURE=25T8B25T \\<br />" +
+ " BARCODE_FILE=barcodes.txt \\<br />" +
+ " METRICS_FILE=metrics_output.txt " +
+ "</pre>" +
+ "" +
+ "Please see the ExtractIlluminaBarcodes.BarcodeMetric " +
+ "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#ExtractIlluminaBarcodes.BarcodeMetric'>definitions</a> " +
+ "for a complete description of the metrics produced by this tool.</p>" +
+ "" +
+ "<hr />"
+ ;
// The following attributes define the command-line arguments
@@ -418,7 +454,9 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
* do not match a barcode.
*/
public String BARCODE;
+ /** The barcode name. */
public String BARCODE_NAME = "";
+ /** The name of the library */
public String LIBRARY_NAME = "";
/** The total number of reads matching the barcode. */
public int READS = 0;
@@ -432,7 +470,7 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
public int ONE_MISMATCH_MATCHES = 0;
/** The number of PF reads matching this barcode that matched with 1 error or no-call. */
public int PF_ONE_MISMATCH_MATCHES = 0;
- /** The percentage of all reads in the lane that matched to this barcode. */
+ /** The fraction of all reads in the lane that matched to this barcode. */
public double PCT_MATCHES = 0d;
/**
* The rate of all reads matching this barcode to all reads matching the most prevelant barcode. For the
@@ -442,7 +480,7 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
* in representation between barcodes.
*/
public double RATIO_THIS_BARCODE_TO_BEST_BARCODE_PCT = 0d;
- /** The percentage of PF reads in the lane that matched to this barcode. */
+ /** The fraction of PF reads in the lane that matched to this barcode. */
public double PF_PCT_MATCHES = 0d;
/**
diff --git a/src/java/picard/illumina/IlluminaBasecallingMetrics.java b/src/main/java/picard/illumina/IlluminaBasecallingMetrics.java
similarity index 100%
rename from src/java/picard/illumina/IlluminaBasecallingMetrics.java
rename to src/main/java/picard/illumina/IlluminaBasecallingMetrics.java
diff --git a/src/java/picard/illumina/IlluminaBasecallsConverter.java b/src/main/java/picard/illumina/IlluminaBasecallsConverter.java
similarity index 99%
rename from src/java/picard/illumina/IlluminaBasecallsConverter.java
rename to src/main/java/picard/illumina/IlluminaBasecallsConverter.java
index cdf3d19..2be747f 100644
--- a/src/java/picard/illumina/IlluminaBasecallsConverter.java
+++ b/src/main/java/picard/illumina/IlluminaBasecallsConverter.java
@@ -107,10 +107,8 @@ public class IlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> {
if (s2.startsWith(s1)) {
return 1;
}
- } else if (s2.length() < s1.length()) {
- if (s1.startsWith(s2)) {
- return -1;
- }
+ } else if (s2.length() < s1.length() && s1.startsWith(s2)) {
+ return -1;
}
return s1.compareTo(s2);
}
diff --git a/src/java/picard/illumina/IlluminaBasecallsToFastq.java b/src/main/java/picard/illumina/IlluminaBasecallsToFastq.java
similarity index 87%
rename from src/java/picard/illumina/IlluminaBasecallsToFastq.java
rename to src/main/java/picard/illumina/IlluminaBasecallsToFastq.java
index d0141e7..87a0079 100644
--- a/src/java/picard/illumina/IlluminaBasecallsToFastq.java
+++ b/src/main/java/picard/illumina/IlluminaBasecallsToFastq.java
@@ -68,15 +68,52 @@ import java.util.Map;
import java.util.Set;
@CommandLineProgramProperties(
- usage = "Generate fastq file(s) from data in an Illumina basecalls output directory.\n" +
- "Separate fastq file(s) are created for each template read, and for each barcode read, in the basecalls.\n" +
- "Template fastqs have extensions like .<number>.fastq, where <number> is the number of the template read,\n" +
- "starting with 1. Barcode fastqs have extensions like .barcode_<number>.fastq, where <number> is the number\n" +
- "of the barcode read, starting with 1.",
- usageShort = "Generate fastq file(s) from data in an Illumina basecalls output directory",
- programGroup = Illumina.class
+ usage = IlluminaBasecallsToFastq.USAGE_SUMMARY + IlluminaBasecallsToFastq.USAGE_DETAILS,
+ usageShort = IlluminaBasecallsToFastq.USAGE_SUMMARY,
+ programGroup = Illumina.class
)
public class IlluminaBasecallsToFastq extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Generate FASTQ file(s) from Illumina basecall read data. ";
+ static final String USAGE_DETAILS = "<p>This tool generates FASTQ files from data in an Illumina BaseCalls output directory. " +
+ "Separate FASTQ files are created for each template, barcode, and index (molecular barcode) read. Briefly, the template reads " +
+ "are the target sequence of your experiment, the barcode sequence reads facilitate sample demultiplexing, and the index reads " +
+ "help mitigate instrument phasing errors. For additional information on the read types, please see the following " +
+ "reference <a href'=http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3245947/'>here</a>.</p>" +
+ "" +
+ "<p>In the absence of sample pooling (multiplexing) and/or barcodes, then an OUTPUT_PREFIX (file directory) must be " +
+ "provided as the sample identifier. For multiplexed samples, a MULTIPLEX_PARAMS file must be specified. " +
+ "The MULTIPLEX_PARAMS file contains the list of sample barcodes used to sort template, barcode, and index reads. " +
+ "It is essentially the same as the BARCODE_FILE used in the" +
+ "<a href='http://broadinstitute.github.io/picard/command-line-overview.html#ExtractIlluminaBarcodes'>ExtractIlluminaBarcodes</a> " +
+ "tool.</p> "+
+ "" +
+ "<p>Files from this tool use the following naming format: {prefix}.{type}_{number}.fastq with the {prefix} indicating the sample " +
+ "barcode, the {type} indicating the types of reads e.g. index, barcode, or blank (if it contains a template read). " +
+ "The {number} indicates the read number, either first (1) or second (2) for paired-end sequencing. </p> " +
+
+"<h4>Usage examples:</h4>" +
+"<pre>" +
+"Example 1: Sample(s) with either no barcode or barcoded without multiplexing <br />" +
+"java -jar picard.jar IlluminaBasecallsToFastq \\<br />"+
+" READ_STRUCTURE=25T8B25T \\<br />"+
+" BASECALLS_DIR=basecallDirectory \\<br />"+
+" LANE=001 \\<br />"+
+" OUTPUT_PREFIX=noBarcode.1 \\<br />"+
+" RUN_BARCODE=run15 \\<br />"+
+" FLOWCELL_BARCODE=abcdeACXX <br /><br />" +
+
+"Example 2: Multiplexed samples <br />" +
+"java -jar picard.jar IlluminaBasecallsToFastq \\<br />"+
+" READ_STRUCTURE=25T8B25T \\<br />"+
+" BASECALLS_DIR=basecallDirectory \\<br />"+
+" LANE=001 \\<br />"+
+" MULTIPLEX_PARAMS=demultiplexed_output.txt \\<br />"+
+" RUN_BARCODE=run15 \\<br />"+
+" FLOWCELL_BARCODE=abcdeACXX <br />" +
+"</pre>"+
+"<p>The FLOWCELL_BARCODE is required if emitting Casava 1.8-style read name headers.</p>" +
+ "<hr />"
+;
// The following attributes define the command-line arguments
@Option(doc = "The basecalls directory. ", shortName = "B")
@@ -88,9 +125,8 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
@Option(doc = "Lane number. ", shortName = StandardOptionDefinitions.LANE_SHORT_NAME)
public Integer LANE;
- @Option(doc = "The prefix for output fastqs. Extensions as described above are appended. Use this option for " +
- "a non-barcoded run, or for a barcoded run in which it is not desired to demultiplex reads into separate " +
- "files by barcode.",
+ @Option(doc = "The prefix for output FASTQs. Extensions as described above are appended. Use this option for a non-barcoded run, or" +
+ " for a barcoded run in which it is not desired to demultiplex reads into separate files by barcode.",
shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME,
mutex = {"MULTIPLEX_PARAMS"})
public File OUTPUT_PREFIX;
@@ -107,7 +143,7 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
@Option(doc = ReadStructure.PARAMETER_DOC, shortName = "RS")
public String READ_STRUCTURE;
- @Option(doc = "Tab-separated file for creating all output fastqs demultiplexed by barcode for a lane with single " +
+ @Option(doc = "Tab-separated file for creating all output FASTQs demultiplexed by barcode for a lane with single " +
"IlluminaBasecallsToFastq invocation. The columns are OUTPUT_PREFIX, and BARCODE_1, BARCODE_2 ... BARCODE_X " +
"where X = number of barcodes per cluster (optional). Row with BARCODE_1 set to 'N' is used to specify " +
"an output_prefix for no barcode match.",
@@ -154,7 +190,7 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
public boolean INCLUDE_NON_PF_READS = true;
@Option(doc="Whether to ignore reads whose barcodes are not found in MULTIPLEX_PARAMS. Useful when outputting " +
- "fastqs for only a subset of the barcodes in a lane.", shortName="INGORE_UNEXPECTED")
+ "FASTQs for only a subset of the barcodes in a lane.", shortName="INGORE_UNEXPECTED")
public boolean IGNORE_UNEXPECTED_BARCODES = false;
@Option(doc="The read name header formatting to emit. Casava1.8 formatting has additional information beyond Illumina, including: " +
diff --git a/src/java/picard/illumina/IlluminaBasecallsToSam.java b/src/main/java/picard/illumina/IlluminaBasecallsToSam.java
similarity index 87%
rename from src/java/picard/illumina/IlluminaBasecallsToSam.java
rename to src/main/java/picard/illumina/IlluminaBasecallsToSam.java
index 68dd57d..2d2dcf1 100644
--- a/src/java/picard/illumina/IlluminaBasecallsToSam.java
+++ b/src/main/java/picard/illumina/IlluminaBasecallsToSam.java
@@ -44,6 +44,7 @@ import picard.cmdline.Option;
import picard.cmdline.programgroups.Illumina;
import picard.cmdline.StandardOptionDefinitions;
import picard.illumina.parser.ReadStructure;
+import picard.illumina.parser.ReadType;
import picard.illumina.parser.readers.BclQualityEvaluationStrategy;
import picard.util.IlluminaUtil;
import picard.util.IlluminaUtil.IlluminaAdapterPair;
@@ -54,6 +55,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
@@ -93,11 +95,42 @@ import java.util.Set;
* @author mccowan at broadinstitute.org
*/
@CommandLineProgramProperties(
- usage = IlluminaBasecallsToSam.USAGE,
- usageShort = IlluminaBasecallsToSam.USAGE,
+ usage = IlluminaBasecallsToSam.USAGE_SUMMARY + IlluminaBasecallsToSam.USAGE_DETAILS,
+ usageShort = IlluminaBasecallsToSam.USAGE_SUMMARY,
programGroup = Illumina.class
)
public class IlluminaBasecallsToSam extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Transforms raw Illumina sequencing data into an unmapped SAM or BAM file." ;
+ static final String USAGE_DETAILS = "<p>The IlluminaBaseCallsToSam program collects, demultiplexes, and sorts reads across all " +
+ "of the tiles of a lane via barcode to produce an unmapped SAM/BAM file. An unmapped BAM file is often referred to as a uBAM. " +
+ "All barcode, sample, and library data is provided in the LIBRARY_PARAMS file. Note, this LIBRARY_PARAMS file " +
+ "should be formatted according to the specifications indicated below. The following is an example of a properly" +
+ " formmated LIBRARY_PARAMS file:</p>" +
+ "BARCODE_1\tOUTPUT\tSAMPLE_ALIAS\tLIBRARY_NAME\n" +
+ "AAAAAAAA\tSA_AAAAAAAA.bam\tSA_AAAAAAAA\tLN_AAAAAAAA\n" +
+ "AAAAGAAG\tSA_AAAAGAAG.bam\tSA_AAAAGAAG\tLN_AAAAGAAG\n" +
+ "AACAATGG\tSA_AACAATGG.bam\tSA_AACAATGG\tLN_AACAATGG\n" +
+ "N\tSA_non_indexed.bam\tSA_non_indexed\tLN_NNNNNNNN\n " +
+ "" +
+ "<p>The BARCODES_DIR file is produced by the " +
+ "<a href='http://broadinstitute.github.io/picard/command-line-overview.html#ExtractIlluminaBarcodes'>ExtractIlluminaBarcodes</a> " +
+ "tool for each lane of a flow cell.</p> " +
+
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "" +
+ "java -jar picard.jar IlluminaBasecallsToSam \\<br />" +
+ " BASECALLS_DIR=/BaseCalls/ \\<br />" +
+ " LANE=001 \\<br />" +
+ " READ_STRUCTURE=25T8B25T \\<br />" +
+ " RUN_BARCODE=run15 \\<br />" +
+ " IGNORE_UNEXPECTED_BARCODES=true \\<br />" +
+ " LIBRARY_PARAMS=library.params " +
+ "</pre>" +
+ "<hr />"
+ ;
+
+
// The following attributes define the command-line arguments
public static final String USAGE = "Generate a SAM or BAM file from data in an Illumina basecalls output directory";
@@ -206,6 +239,16 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
"BAMs for only a subset of the barcodes in a lane.", shortName="INGORE_UNEXPECTED")
public boolean IGNORE_UNEXPECTED_BARCODES = false;
+ @Option(doc="The tag to use to store any molecular indexes. If more than one molecular index is found, they will be concatenated and stored here.", optional=true)
+ public String MOLECULAR_INDEX_TAG = "RX";
+
+ @Option(doc="The tag to use to store any molecular index base qualities. If more than one molecular index is found, their qualities will be concatenated and stored here " +
+ "(.i.e. the number of \"M\" operators in the READ_STRUCTURE)", optional=true)
+ public String MOLECULAR_INDEX_BASE_QUALITY_TAG = "QX";
+
+ @Option(doc="The list of tags to store each molecular index. The number of tags should match the number of molecular indexes.", optional=true)
+ public List<String> TAG_PER_MOLECULAR_INDEX;
+
private final Map<String, SAMFileWriterWrapper> barcodeSamWriterMap = new HashMap<String, SAMFileWriterWrapper>();
private ReadStructure readStructure;
IlluminaBasecallsConverter<SAMRecordsForCluster> basecallsConverter;
@@ -239,8 +282,6 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
populateWritersFromLibraryParams();
}
- readStructure = new ReadStructure(READ_STRUCTURE);
-
final int numOutputRecords = readStructure.templates.length();
basecallsConverter = new IlluminaBasecallsConverter<SAMRecordsForCluster>(BASECALLS_DIR, BARCODES_DIR, LANE, readStructure,
@@ -255,7 +296,10 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
* data which may be different from the input read structure (specifically if there are skips).
*/
final ClusterDataToSamConverter converter = new ClusterDataToSamConverter(RUN_BARCODE, READ_GROUP_ID,
- basecallsConverter.getFactory().getOutputReadStructure(), ADAPTERS_TO_CHECK);
+ basecallsConverter.getFactory().getOutputReadStructure(), ADAPTERS_TO_CHECK)
+ .withMolecularIndexTag(MOLECULAR_INDEX_TAG)
+ .withMolecularIndexQualityTag(MOLECULAR_INDEX_BASE_QUALITY_TAG)
+ .withTagPerMolecularIndex(TAG_PER_MOLECULAR_INDEX);
basecallsConverter.setConverter(converter);
}
@@ -437,16 +481,19 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
final ArrayList<String> messages = new ArrayList<String>();
readStructure = new ReadStructure(READ_STRUCTURE);
- if (!readStructure.sampleBarcodes.isEmpty()) {
- if (LIBRARY_PARAMS == null) {
- messages.add("BARCODE_PARAMS or LIBRARY_PARAMS is missing. If READ_STRUCTURE contains a B (barcode)" +
- " then either LIBRARY_PARAMS or BARCODE_PARAMS(deprecated) must be provided!");
- }
+ if (!readStructure.sampleBarcodes.isEmpty() && LIBRARY_PARAMS == null) {
+ messages.add("BARCODE_PARAMS or LIBRARY_PARAMS is missing. If READ_STRUCTURE contains a B (barcode)" +
+ " then either LIBRARY_PARAMS or BARCODE_PARAMS(deprecated) must be provided!");
}
if (READ_GROUP_ID == null) {
READ_GROUP_ID = RUN_BARCODE.substring(0, 5) + "." + LANE;
}
+
+ if (!TAG_PER_MOLECULAR_INDEX.isEmpty() && TAG_PER_MOLECULAR_INDEX.size() != readStructure.molecularBarcode.length()) {
+ messages.add("The number of tags given in TAG_PER_MOLECULAR_INDEX does not match the number of molecular indexes in READ_STRUCTURE");
+ }
+
if (messages.isEmpty()) {
return null;
}
diff --git a/src/java/picard/illumina/IlluminaLaneMetrics.java b/src/main/java/picard/illumina/IlluminaLaneMetrics.java
similarity index 100%
rename from src/java/picard/illumina/IlluminaLaneMetrics.java
rename to src/main/java/picard/illumina/IlluminaLaneMetrics.java
diff --git a/src/java/picard/illumina/IlluminaPhasingMetrics.java b/src/main/java/picard/illumina/IlluminaPhasingMetrics.java
similarity index 88%
rename from src/java/picard/illumina/IlluminaPhasingMetrics.java
rename to src/main/java/picard/illumina/IlluminaPhasingMetrics.java
index 57c16dc..b56f26c 100644
--- a/src/java/picard/illumina/IlluminaPhasingMetrics.java
+++ b/src/main/java/picard/illumina/IlluminaPhasingMetrics.java
@@ -34,17 +34,21 @@ import java.util.Collection;
/**
* Metrics for Illumina Basecalling that stores median phasing and prephasing percentages on a per-template-read, per-lane basis.
+ * Phasing refers to the fraction of molecules that fall behind or jump ahead (prephasing) during a read cycle.
* For each lane/template read # (i.e. FIRST, SECOND) combination we will store the median values of both the phasing and prephasing
* values for every tile in that lane/template read pair.
*
* @author jgentry
*/
public class IlluminaPhasingMetrics extends MetricBase {
+ /** Illumina flowcell lane number */
public long LANE;
+ /** Defines an Illumina template read number (first or second) */
public String TYPE_NAME;
+ /** Median phasing value across all tiles in a lane, applied to the first and second template reads */
public double PHASING_APPLIED;
+ /** Median pre-phasing value across all tiles in a lane, applied to the first and second template reads */
public double PREPHASING_APPLIED;
-
/** Calculate the median phasing & prephasing values for a lane's tiles and create the appropriate IlluminaPhasingMetrics for them */
public static Collection<IlluminaPhasingMetrics> getPhasingMetricsForTiles(final long lane, final Collection<Tile> tilesForLane) {
final LanePhasingMetricsCollector lanePhasingMetricsCollector = new LanePhasingMetricsCollector(tilesForLane);
diff --git a/src/java/picard/illumina/LanePhasingMetricsCollector.java b/src/main/java/picard/illumina/LanePhasingMetricsCollector.java
similarity index 100%
rename from src/java/picard/illumina/LanePhasingMetricsCollector.java
rename to src/main/java/picard/illumina/LanePhasingMetricsCollector.java
diff --git a/src/java/picard/illumina/MarkIlluminaAdapters.java b/src/main/java/picard/illumina/MarkIlluminaAdapters.java
similarity index 93%
rename from src/java/picard/illumina/MarkIlluminaAdapters.java
rename to src/main/java/picard/illumina/MarkIlluminaAdapters.java
index bc1e904..993c5fe 100644
--- a/src/java/picard/illumina/MarkIlluminaAdapters.java
+++ b/src/main/java/picard/illumina/MarkIlluminaAdapters.java
@@ -64,15 +64,26 @@ import static picard.util.IlluminaUtil.IlluminaAdapterPair;
* @author Tim Fennell (adapted by mborkan at broadinstitute.org)
*/
@CommandLineProgramProperties(
- usage = "Reads a SAM or BAM file and rewrites it with new adapter-trimming tags.\n" +
- "Clear any existing adapter-trimming tags (XT:i:).\n" +
- "Only works for unaligned files in query-name order.\n"+
- "Note: This is a utility program and will not be run in the pipeline.\n",
- usageShort = "Reads a SAM or BAM file and rewrites it with new adapter-trimming tags",
+
+ usage = MarkIlluminaAdapters.USAGE_SUMMARY + MarkIlluminaAdapters.USAGE_DETAILS,
+ usageShort = MarkIlluminaAdapters.USAGE_SUMMARY,
programGroup = Illumina.class
)
public class MarkIlluminaAdapters extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Reads a SAM or BAM file and rewrites it with new adapter-trimming tags. ";
+ static final String USAGE_DETAILS = "<p>This tool clears any existing adapter-trimming tags (XT:i:) in the optional tag region of " +
+ "a SAM file. The SAM/BAM file must be sorted by query name.</p> "+
+ "<p>Outputs a metrics file histogram showing counts of bases_clipped per read." +
+ "" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar MarkIlluminaAdapters \\<br /> " +
+ "INPUT=input.sam \\<br />" +
+ "METRICS=metrics.txt " +
+ "</pre>" +
+ "<hr />"
+ ;
// The following attributes define the command-line arguments
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
diff --git a/src/java/picard/illumina/parser/BarcodeParser.java b/src/main/java/picard/illumina/parser/BarcodeParser.java
similarity index 100%
rename from src/java/picard/illumina/parser/BarcodeParser.java
rename to src/main/java/picard/illumina/parser/BarcodeParser.java
diff --git a/src/java/picard/illumina/parser/BclData.java b/src/main/java/picard/illumina/parser/BclData.java
similarity index 100%
rename from src/java/picard/illumina/parser/BclData.java
rename to src/main/java/picard/illumina/parser/BclData.java
diff --git a/src/java/picard/illumina/parser/BclParser.java b/src/main/java/picard/illumina/parser/BclParser.java
similarity index 100%
rename from src/java/picard/illumina/parser/BclParser.java
rename to src/main/java/picard/illumina/parser/BclParser.java
diff --git a/src/java/picard/illumina/parser/ClusterData.java b/src/main/java/picard/illumina/parser/ClusterData.java
similarity index 100%
rename from src/java/picard/illumina/parser/ClusterData.java
rename to src/main/java/picard/illumina/parser/ClusterData.java
diff --git a/src/java/picard/illumina/parser/ClusterIntensityFileReader.java b/src/main/java/picard/illumina/parser/ClusterIntensityFileReader.java
similarity index 100%
rename from src/java/picard/illumina/parser/ClusterIntensityFileReader.java
rename to src/main/java/picard/illumina/parser/ClusterIntensityFileReader.java
diff --git a/src/java/picard/illumina/parser/CycleIlluminaFileMap.java b/src/main/java/picard/illumina/parser/CycleIlluminaFileMap.java
similarity index 100%
rename from src/java/picard/illumina/parser/CycleIlluminaFileMap.java
rename to src/main/java/picard/illumina/parser/CycleIlluminaFileMap.java
diff --git a/src/java/picard/illumina/parser/FilterParser.java b/src/main/java/picard/illumina/parser/FilterParser.java
similarity index 100%
rename from src/java/picard/illumina/parser/FilterParser.java
rename to src/main/java/picard/illumina/parser/FilterParser.java
diff --git a/src/java/picard/illumina/parser/FourChannelIntensityData.java b/src/main/java/picard/illumina/parser/FourChannelIntensityData.java
similarity index 100%
rename from src/java/picard/illumina/parser/FourChannelIntensityData.java
rename to src/main/java/picard/illumina/parser/FourChannelIntensityData.java
diff --git a/src/java/picard/illumina/parser/IlluminaData.java b/src/main/java/picard/illumina/parser/IlluminaData.java
similarity index 100%
rename from src/java/picard/illumina/parser/IlluminaData.java
rename to src/main/java/picard/illumina/parser/IlluminaData.java
diff --git a/src/java/picard/illumina/parser/IlluminaDataProvider.java b/src/main/java/picard/illumina/parser/IlluminaDataProvider.java
similarity index 100%
rename from src/java/picard/illumina/parser/IlluminaDataProvider.java
rename to src/main/java/picard/illumina/parser/IlluminaDataProvider.java
diff --git a/src/java/picard/illumina/parser/IlluminaDataProviderFactory.java b/src/main/java/picard/illumina/parser/IlluminaDataProviderFactory.java
similarity index 98%
rename from src/java/picard/illumina/parser/IlluminaDataProviderFactory.java
rename to src/main/java/picard/illumina/parser/IlluminaDataProviderFactory.java
index cad0242..9842d75 100644
--- a/src/java/picard/illumina/parser/IlluminaDataProviderFactory.java
+++ b/src/main/java/picard/illumina/parser/IlluminaDataProviderFactory.java
@@ -34,6 +34,7 @@ import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
+import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -67,7 +68,7 @@ public class IlluminaDataProviderFactory {
* We try to prefer data types that will be the fastest to parse/smallest in memory
* NOTE: In the code below, if Qseq is chosen to provide for ANY data type then it is used for ALL its data types (since we'll have to parse the entire line for each Qseq anyways)
*/
- private static final Map<IlluminaDataType, List<SupportedIlluminaFormat>> DATA_TYPE_TO_PREFERRED_FORMATS = new HashMap<IlluminaDataType, List<SupportedIlluminaFormat>>();
+ private static final Map<IlluminaDataType, List<SupportedIlluminaFormat>> DATA_TYPE_TO_PREFERRED_FORMATS = new EnumMap<IlluminaDataType, List<SupportedIlluminaFormat>>(IlluminaDataType.class);
static {
/** For types found in Qseq, we prefer the NON-Qseq file formats first. However, if we end up using Qseqs then we use Qseqs for EVERY type it provides,
@@ -266,8 +267,8 @@ public class IlluminaDataProviderFactory {
public static Map<SupportedIlluminaFormat, Set<IlluminaDataType>> determineFormats(final Set<IlluminaDataType> requestedDataTypes, final IlluminaFileUtil fileUtil) {
//For predictable ordering and uniqueness only, put the requestedDataTypes into a treeSet
final SortedSet<IlluminaDataType> toSupport = new TreeSet<IlluminaDataType>(requestedDataTypes);
- final Map<SupportedIlluminaFormat, Set<IlluminaDataType>> fileTypeToDataTypes = new HashMap<SupportedIlluminaFormat, Set<IlluminaDataType>>();
- final Map<IlluminaDataType, SupportedIlluminaFormat> dataTypeToFormat = new HashMap<IlluminaDataType, SupportedIlluminaFormat>();
+ final Map<SupportedIlluminaFormat, Set<IlluminaDataType>> fileTypeToDataTypes = new EnumMap<SupportedIlluminaFormat, Set<IlluminaDataType>>(SupportedIlluminaFormat.class);
+ final Map<IlluminaDataType, SupportedIlluminaFormat> dataTypeToFormat = new EnumMap<IlluminaDataType, SupportedIlluminaFormat>(IlluminaDataType.class);
for (final IlluminaDataType ts : toSupport) {
final SupportedIlluminaFormat preferredFormat = findPreferredAvailableFormat(ts, fileUtil);
diff --git a/src/java/picard/illumina/parser/IlluminaDataType.java b/src/main/java/picard/illumina/parser/IlluminaDataType.java
similarity index 100%
rename from src/java/picard/illumina/parser/IlluminaDataType.java
rename to src/main/java/picard/illumina/parser/IlluminaDataType.java
diff --git a/src/java/picard/illumina/parser/IlluminaFileMap.java b/src/main/java/picard/illumina/parser/IlluminaFileMap.java
similarity index 100%
rename from src/java/picard/illumina/parser/IlluminaFileMap.java
rename to src/main/java/picard/illumina/parser/IlluminaFileMap.java
diff --git a/src/java/picard/illumina/parser/IlluminaFileNotFoundException.java b/src/main/java/picard/illumina/parser/IlluminaFileNotFoundException.java
similarity index 100%
rename from src/java/picard/illumina/parser/IlluminaFileNotFoundException.java
rename to src/main/java/picard/illumina/parser/IlluminaFileNotFoundException.java
diff --git a/src/java/picard/illumina/parser/IlluminaFileUtil.java b/src/main/java/picard/illumina/parser/IlluminaFileUtil.java
similarity index 97%
rename from src/java/picard/illumina/parser/IlluminaFileUtil.java
rename to src/main/java/picard/illumina/parser/IlluminaFileUtil.java
index 51cd573..69f92a7 100644
--- a/src/java/picard/illumina/parser/IlluminaFileUtil.java
+++ b/src/main/java/picard/illumina/parser/IlluminaFileUtil.java
@@ -36,6 +36,7 @@ import picard.illumina.parser.readers.TileMetricsOutReader;
import java.io.File;
import java.util.ArrayList;
+import java.util.EnumMap;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@@ -76,7 +77,7 @@ public class IlluminaFileUtil {
private final int lane;
private final File tileMetricsOut;
- private final Map<SupportedIlluminaFormat, ParameterizedFileUtil> utils = new HashMap<SupportedIlluminaFormat, ParameterizedFileUtil>();
+ private final Map<SupportedIlluminaFormat, ParameterizedFileUtil> utils = new EnumMap<SupportedIlluminaFormat, ParameterizedFileUtil>(SupportedIlluminaFormat.class);
public IlluminaFileUtil(final File basecallDir, final int lane) {
this(basecallDir, null, lane);
@@ -174,10 +175,9 @@ public class IlluminaFileUtil {
while (tileMetrics.hasNext()) {
final TileMetricsOutReader.IlluminaTileMetrics tileMetric = tileMetrics.next();
- if (tileMetric.getLaneNumber() == lane) {
- if (!expectedTiles.contains(tileMetric.getTileNumber())) {
- expectedTiles.add(tileMetric.getTileNumber());
- }
+ if (tileMetric.getLaneNumber() == lane &&
+ !expectedTiles.contains(tileMetric.getTileNumber())) {
+ expectedTiles.add(tileMetric.getTileNumber());
}
}
diff --git a/src/java/picard/illumina/parser/IlluminaMetricsCode.java b/src/main/java/picard/illumina/parser/IlluminaMetricsCode.java
similarity index 100%
rename from src/java/picard/illumina/parser/IlluminaMetricsCode.java
rename to src/main/java/picard/illumina/parser/IlluminaMetricsCode.java
diff --git a/src/java/picard/illumina/parser/IlluminaParser.java b/src/main/java/picard/illumina/parser/IlluminaParser.java
similarity index 100%
rename from src/java/picard/illumina/parser/IlluminaParser.java
rename to src/main/java/picard/illumina/parser/IlluminaParser.java
diff --git a/src/java/picard/illumina/parser/IlluminaTextIterator.java b/src/main/java/picard/illumina/parser/IlluminaTextIterator.java
similarity index 100%
rename from src/java/picard/illumina/parser/IlluminaTextIterator.java
rename to src/main/java/picard/illumina/parser/IlluminaTextIterator.java
diff --git a/src/java/picard/illumina/parser/IntensityChannel.java b/src/main/java/picard/illumina/parser/IntensityChannel.java
similarity index 100%
rename from src/java/picard/illumina/parser/IntensityChannel.java
rename to src/main/java/picard/illumina/parser/IntensityChannel.java
diff --git a/src/java/picard/illumina/parser/MultiTileBclFileUtil.java b/src/main/java/picard/illumina/parser/MultiTileBclFileUtil.java
similarity index 100%
rename from src/java/picard/illumina/parser/MultiTileBclFileUtil.java
rename to src/main/java/picard/illumina/parser/MultiTileBclFileUtil.java
diff --git a/src/java/picard/illumina/parser/MultiTileBclParser.java b/src/main/java/picard/illumina/parser/MultiTileBclParser.java
similarity index 94%
rename from src/java/picard/illumina/parser/MultiTileBclParser.java
rename to src/main/java/picard/illumina/parser/MultiTileBclParser.java
index c39f18a..4ebe9b9 100644
--- a/src/java/picard/illumina/parser/MultiTileBclParser.java
+++ b/src/main/java/picard/illumina/parser/MultiTileBclParser.java
@@ -66,6 +66,12 @@ public class MultiTileBclParser extends BclParser {
}
@Override
+ protected CycleFilesParser<BclData> makeCycleFileParser(final List<File> files, final CycleFilesParser<BclData> cycleFilesParser) {
+ // NB: do not close the underlying reader like the parent does just yet.
+ return makeCycleFileParser(files);
+ }
+
+ @Override
protected CycleFilesParser<BclData> makeCycleFileParser(final List<File> files) {
if (cycleFileParser == null) {
cycleFileParser = new MultiTileBclDataCycleFileParser(files, currentTile);
@@ -97,7 +103,7 @@ public class MultiTileBclParser extends BclParser {
@Override
public void close() {
- //underlyingIterator.close();
+ underlyingIterator.close();
}
@Override
diff --git a/src/java/picard/illumina/parser/MultiTileFileUtil.java b/src/main/java/picard/illumina/parser/MultiTileFileUtil.java
similarity index 100%
rename from src/java/picard/illumina/parser/MultiTileFileUtil.java
rename to src/main/java/picard/illumina/parser/MultiTileFileUtil.java
diff --git a/src/java/picard/illumina/parser/MultiTileFilterParser.java b/src/main/java/picard/illumina/parser/MultiTileFilterParser.java
similarity index 100%
rename from src/java/picard/illumina/parser/MultiTileFilterParser.java
rename to src/main/java/picard/illumina/parser/MultiTileFilterParser.java
diff --git a/src/java/picard/illumina/parser/MultiTileLocsParser.java b/src/main/java/picard/illumina/parser/MultiTileLocsParser.java
similarity index 100%
rename from src/java/picard/illumina/parser/MultiTileLocsParser.java
rename to src/main/java/picard/illumina/parser/MultiTileLocsParser.java
diff --git a/src/java/picard/illumina/parser/MultiTileParser.java b/src/main/java/picard/illumina/parser/MultiTileParser.java
similarity index 100%
rename from src/java/picard/illumina/parser/MultiTileParser.java
rename to src/main/java/picard/illumina/parser/MultiTileParser.java
diff --git a/src/java/picard/illumina/parser/OutputMapping.java b/src/main/java/picard/illumina/parser/OutputMapping.java
similarity index 100%
rename from src/java/picard/illumina/parser/OutputMapping.java
rename to src/main/java/picard/illumina/parser/OutputMapping.java
diff --git a/src/java/picard/illumina/parser/ParameterizedFileUtil.java b/src/main/java/picard/illumina/parser/ParameterizedFileUtil.java
similarity index 100%
rename from src/java/picard/illumina/parser/ParameterizedFileUtil.java
rename to src/main/java/picard/illumina/parser/ParameterizedFileUtil.java
diff --git a/src/java/picard/illumina/parser/PerTileCycleParser.java b/src/main/java/picard/illumina/parser/PerTileCycleParser.java
similarity index 92%
rename from src/java/picard/illumina/parser/PerTileCycleParser.java
rename to src/main/java/picard/illumina/parser/PerTileCycleParser.java
index 7cd5960..450a796 100644
--- a/src/java/picard/illumina/parser/PerTileCycleParser.java
+++ b/src/main/java/picard/illumina/parser/PerTileCycleParser.java
@@ -86,11 +86,17 @@ abstract class PerTileCycleParser<ILLUMINA_DATA extends IlluminaData> implements
}
/**
- * For a given cycle, return a CycleFilesParser.
+ * For a given cycle, return a CycleFilesParser. It will close the cycleFilesParser if not null.
*
- * @param file The file to parse
+ * @param file The file to parse
+ * @param cycleFilesParser The previous cycle file parser, null otherwise.
* @return A CycleFilesParser that will populate the correct position in the IlluminaData object with that cycle's data.
*/
+ protected CycleFilesParser<ILLUMINA_DATA> makeCycleFileParser(final List<File> file, final CycleFilesParser<ILLUMINA_DATA> cycleFilesParser) {
+ if (cycleFilesParser != null) cycleFilesParser.close();
+ return makeCycleFileParser(file);
+ }
+
protected abstract CycleFilesParser<ILLUMINA_DATA> makeCycleFileParser(final List<File> file);
public abstract void initialize();
@@ -118,17 +124,13 @@ abstract class PerTileCycleParser<ILLUMINA_DATA extends IlluminaData> implements
public void seekToTile(final int tile) {
currentTile = tile;
- if(cycleFilesParser != null) {
- cycleFilesParser.close();
- }
-
int totalCycles = 0;
final List<File> tileFiles = new ArrayList<File>();
for (final Map.Entry<Integer, IlluminaFileMap> entry : cyclesToTileFiles.entrySet()) {
tileFiles.add(entry.getValue().get(currentTile));
++totalCycles;
}
- cycleFilesParser = makeCycleFileParser(tileFiles);
+ cycleFilesParser = makeCycleFileParser(tileFiles, cycleFilesParser);
if (totalCycles != outputMapping.getTotalOutputCycles()) {
throw new PicardException("Number of cycle OUTPUT files found (" + totalCycles + ") does not equal the number expected (" + outputMapping.getTotalOutputCycles() + ")");
diff --git a/src/java/picard/illumina/parser/PerTileFileUtil.java b/src/main/java/picard/illumina/parser/PerTileFileUtil.java
similarity index 100%
rename from src/java/picard/illumina/parser/PerTileFileUtil.java
rename to src/main/java/picard/illumina/parser/PerTileFileUtil.java
diff --git a/src/java/picard/illumina/parser/PerTileParser.java b/src/main/java/picard/illumina/parser/PerTileParser.java
similarity index 100%
rename from src/java/picard/illumina/parser/PerTileParser.java
rename to src/main/java/picard/illumina/parser/PerTileParser.java
diff --git a/src/java/picard/illumina/parser/PerTilePerCycleFileUtil.java b/src/main/java/picard/illumina/parser/PerTilePerCycleFileUtil.java
similarity index 100%
rename from src/java/picard/illumina/parser/PerTilePerCycleFileUtil.java
rename to src/main/java/picard/illumina/parser/PerTilePerCycleFileUtil.java
diff --git a/src/java/picard/illumina/parser/PosParser.java b/src/main/java/picard/illumina/parser/PosParser.java
similarity index 100%
rename from src/java/picard/illumina/parser/PosParser.java
rename to src/main/java/picard/illumina/parser/PosParser.java
diff --git a/src/java/picard/illumina/parser/Range.java b/src/main/java/picard/illumina/parser/Range.java
similarity index 100%
rename from src/java/picard/illumina/parser/Range.java
rename to src/main/java/picard/illumina/parser/Range.java
diff --git a/src/java/picard/illumina/parser/ReadData.java b/src/main/java/picard/illumina/parser/ReadData.java
similarity index 100%
rename from src/java/picard/illumina/parser/ReadData.java
rename to src/main/java/picard/illumina/parser/ReadData.java
diff --git a/src/java/picard/illumina/parser/ReadDescriptor.java b/src/main/java/picard/illumina/parser/ReadDescriptor.java
similarity index 100%
rename from src/java/picard/illumina/parser/ReadDescriptor.java
rename to src/main/java/picard/illumina/parser/ReadDescriptor.java
diff --git a/src/java/picard/illumina/parser/ReadStructure.java b/src/main/java/picard/illumina/parser/ReadStructure.java
similarity index 100%
rename from src/java/picard/illumina/parser/ReadStructure.java
rename to src/main/java/picard/illumina/parser/ReadStructure.java
diff --git a/src/java/picard/illumina/parser/ReadType.java b/src/main/java/picard/illumina/parser/ReadType.java
similarity index 100%
rename from src/java/picard/illumina/parser/ReadType.java
rename to src/main/java/picard/illumina/parser/ReadType.java
diff --git a/src/java/picard/illumina/parser/Tile.java b/src/main/java/picard/illumina/parser/Tile.java
similarity index 94%
rename from src/java/picard/illumina/parser/Tile.java
rename to src/main/java/picard/illumina/parser/Tile.java
index 763f459..2763003 100644
--- a/src/java/picard/illumina/parser/Tile.java
+++ b/src/main/java/picard/illumina/parser/Tile.java
@@ -29,6 +29,7 @@ import htsjdk.samtools.util.CollectionUtil;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
+import java.util.EnumMap;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
@@ -58,8 +59,8 @@ public class Tile {
final Collection<TilePhasingValue> phasingValues = ensureSoleTilePhasingValuesPerRead(Arrays.asList(tilePhasingValues));
- final Map<TileTemplateRead, Float> phasingMap = new HashMap<TileTemplateRead, Float>();
- final Map<TileTemplateRead, Float> prePhasingMap = new HashMap<TileTemplateRead, Float>();
+ final Map<TileTemplateRead, Float> phasingMap = new EnumMap<TileTemplateRead, Float>(TileTemplateRead.class);
+ final Map<TileTemplateRead, Float> prePhasingMap = new EnumMap<TileTemplateRead, Float>(TileTemplateRead.class);
/** For each of the TileReads, assign their phasing & prephasing values to the respective maps, which we will
* use later to calculate the medians
diff --git a/src/java/picard/illumina/parser/TileIndex.java b/src/main/java/picard/illumina/parser/TileIndex.java
similarity index 100%
rename from src/java/picard/illumina/parser/TileIndex.java
rename to src/main/java/picard/illumina/parser/TileIndex.java
diff --git a/src/java/picard/illumina/parser/TileMetricsUtil.java b/src/main/java/picard/illumina/parser/TileMetricsUtil.java
similarity index 100%
rename from src/java/picard/illumina/parser/TileMetricsUtil.java
rename to src/main/java/picard/illumina/parser/TileMetricsUtil.java
diff --git a/src/java/picard/illumina/parser/TilePhasingValue.java b/src/main/java/picard/illumina/parser/TilePhasingValue.java
similarity index 100%
rename from src/java/picard/illumina/parser/TilePhasingValue.java
rename to src/main/java/picard/illumina/parser/TilePhasingValue.java
diff --git a/src/java/picard/illumina/parser/TileTemplateRead.java b/src/main/java/picard/illumina/parser/TileTemplateRead.java
similarity index 100%
rename from src/java/picard/illumina/parser/TileTemplateRead.java
rename to src/main/java/picard/illumina/parser/TileTemplateRead.java
diff --git a/src/java/picard/illumina/parser/fakers/BarcodeFileFaker.java b/src/main/java/picard/illumina/parser/fakers/BarcodeFileFaker.java
similarity index 100%
rename from src/java/picard/illumina/parser/fakers/BarcodeFileFaker.java
rename to src/main/java/picard/illumina/parser/fakers/BarcodeFileFaker.java
diff --git a/src/java/picard/illumina/parser/fakers/BciFileFaker.java b/src/main/java/picard/illumina/parser/fakers/BciFileFaker.java
similarity index 100%
rename from src/java/picard/illumina/parser/fakers/BciFileFaker.java
rename to src/main/java/picard/illumina/parser/fakers/BciFileFaker.java
diff --git a/src/java/picard/illumina/parser/fakers/BclFileFaker.java b/src/main/java/picard/illumina/parser/fakers/BclFileFaker.java
similarity index 100%
rename from src/java/picard/illumina/parser/fakers/BclFileFaker.java
rename to src/main/java/picard/illumina/parser/fakers/BclFileFaker.java
diff --git a/src/java/picard/illumina/parser/fakers/ClocsFileFaker.java b/src/main/java/picard/illumina/parser/fakers/ClocsFileFaker.java
similarity index 100%
rename from src/java/picard/illumina/parser/fakers/ClocsFileFaker.java
rename to src/main/java/picard/illumina/parser/fakers/ClocsFileFaker.java
diff --git a/src/java/picard/illumina/parser/fakers/FileFaker.java b/src/main/java/picard/illumina/parser/fakers/FileFaker.java
similarity index 100%
rename from src/java/picard/illumina/parser/fakers/FileFaker.java
rename to src/main/java/picard/illumina/parser/fakers/FileFaker.java
diff --git a/src/java/picard/illumina/parser/fakers/FilterFileFaker.java b/src/main/java/picard/illumina/parser/fakers/FilterFileFaker.java
similarity index 100%
rename from src/java/picard/illumina/parser/fakers/FilterFileFaker.java
rename to src/main/java/picard/illumina/parser/fakers/FilterFileFaker.java
diff --git a/src/java/picard/illumina/parser/fakers/LocsFileFaker.java b/src/main/java/picard/illumina/parser/fakers/LocsFileFaker.java
similarity index 100%
rename from src/java/picard/illumina/parser/fakers/LocsFileFaker.java
rename to src/main/java/picard/illumina/parser/fakers/LocsFileFaker.java
diff --git a/src/java/picard/illumina/parser/fakers/MultiTileBclFileFaker.java b/src/main/java/picard/illumina/parser/fakers/MultiTileBclFileFaker.java
similarity index 100%
rename from src/java/picard/illumina/parser/fakers/MultiTileBclFileFaker.java
rename to src/main/java/picard/illumina/parser/fakers/MultiTileBclFileFaker.java
diff --git a/src/java/picard/illumina/parser/fakers/MultiTileLocsFileFaker.java b/src/main/java/picard/illumina/parser/fakers/MultiTileLocsFileFaker.java
similarity index 100%
rename from src/java/picard/illumina/parser/fakers/MultiTileLocsFileFaker.java
rename to src/main/java/picard/illumina/parser/fakers/MultiTileLocsFileFaker.java
diff --git a/src/java/picard/illumina/parser/fakers/PosFileFaker.java b/src/main/java/picard/illumina/parser/fakers/PosFileFaker.java
similarity index 100%
rename from src/java/picard/illumina/parser/fakers/PosFileFaker.java
rename to src/main/java/picard/illumina/parser/fakers/PosFileFaker.java
diff --git a/src/java/picard/illumina/parser/readers/AbstractIlluminaPositionFileReader.java b/src/main/java/picard/illumina/parser/readers/AbstractIlluminaPositionFileReader.java
similarity index 100%
rename from src/java/picard/illumina/parser/readers/AbstractIlluminaPositionFileReader.java
rename to src/main/java/picard/illumina/parser/readers/AbstractIlluminaPositionFileReader.java
diff --git a/src/java/picard/illumina/parser/readers/BarcodeFileReader.java b/src/main/java/picard/illumina/parser/readers/BarcodeFileReader.java
similarity index 100%
rename from src/java/picard/illumina/parser/readers/BarcodeFileReader.java
rename to src/main/java/picard/illumina/parser/readers/BarcodeFileReader.java
diff --git a/src/java/picard/illumina/parser/readers/BclIndexReader.java b/src/main/java/picard/illumina/parser/readers/BclIndexReader.java
similarity index 100%
rename from src/java/picard/illumina/parser/readers/BclIndexReader.java
rename to src/main/java/picard/illumina/parser/readers/BclIndexReader.java
diff --git a/src/java/picard/illumina/parser/readers/BclQualityEvaluationStrategy.java b/src/main/java/picard/illumina/parser/readers/BclQualityEvaluationStrategy.java
similarity index 98%
rename from src/java/picard/illumina/parser/readers/BclQualityEvaluationStrategy.java
rename to src/main/java/picard/illumina/parser/readers/BclQualityEvaluationStrategy.java
index 9b448f1..fa9eef5 100644
--- a/src/java/picard/illumina/parser/readers/BclQualityEvaluationStrategy.java
+++ b/src/main/java/picard/illumina/parser/readers/BclQualityEvaluationStrategy.java
@@ -32,7 +32,7 @@ public class BclQualityEvaluationStrategy {
private final Map<Byte, AtomicInteger> qualityCountMap = Collections.synchronizedMap(new CollectionUtil.DefaultingMap<Byte, AtomicInteger>(
new CollectionUtil.DefaultingMap.Factory<AtomicInteger, Byte>() {
@Override
- public AtomicInteger make(final Byte _) {
+ public AtomicInteger make(final Byte key) {
return new AtomicInteger(0);
}
}, true));
diff --git a/src/java/picard/illumina/parser/readers/BclReader.java b/src/main/java/picard/illumina/parser/readers/BclReader.java
similarity index 100%
rename from src/java/picard/illumina/parser/readers/BclReader.java
rename to src/main/java/picard/illumina/parser/readers/BclReader.java
diff --git a/src/java/picard/illumina/parser/readers/ClocsFileReader.java b/src/main/java/picard/illumina/parser/readers/ClocsFileReader.java
similarity index 100%
rename from src/java/picard/illumina/parser/readers/ClocsFileReader.java
rename to src/main/java/picard/illumina/parser/readers/ClocsFileReader.java
diff --git a/src/java/picard/illumina/parser/readers/FilterFileReader.java b/src/main/java/picard/illumina/parser/readers/FilterFileReader.java
similarity index 100%
rename from src/java/picard/illumina/parser/readers/FilterFileReader.java
rename to src/main/java/picard/illumina/parser/readers/FilterFileReader.java
diff --git a/src/java/picard/illumina/parser/readers/LocsFileReader.java b/src/main/java/picard/illumina/parser/readers/LocsFileReader.java
similarity index 100%
rename from src/java/picard/illumina/parser/readers/LocsFileReader.java
rename to src/main/java/picard/illumina/parser/readers/LocsFileReader.java
diff --git a/src/java/picard/illumina/parser/readers/MMapBackedIteratorFactory.java b/src/main/java/picard/illumina/parser/readers/MMapBackedIteratorFactory.java
similarity index 100%
rename from src/java/picard/illumina/parser/readers/MMapBackedIteratorFactory.java
rename to src/main/java/picard/illumina/parser/readers/MMapBackedIteratorFactory.java
diff --git a/src/java/picard/illumina/parser/readers/PosFileReader.java b/src/main/java/picard/illumina/parser/readers/PosFileReader.java
similarity index 100%
rename from src/java/picard/illumina/parser/readers/PosFileReader.java
rename to src/main/java/picard/illumina/parser/readers/PosFileReader.java
diff --git a/src/java/picard/illumina/parser/readers/TileMetricsOutReader.java b/src/main/java/picard/illumina/parser/readers/TileMetricsOutReader.java
similarity index 100%
rename from src/java/picard/illumina/parser/readers/TileMetricsOutReader.java
rename to src/main/java/picard/illumina/parser/readers/TileMetricsOutReader.java
diff --git a/src/java/picard/illumina/quality/CollectHiSeqXPfFailMetrics.java b/src/main/java/picard/illumina/quality/CollectHiSeqXPfFailMetrics.java
similarity index 82%
rename from src/java/picard/illumina/quality/CollectHiSeqXPfFailMetrics.java
rename to src/main/java/picard/illumina/quality/CollectHiSeqXPfFailMetrics.java
index cacc0fa..77eb60c 100644
--- a/src/java/picard/illumina/quality/CollectHiSeqXPfFailMetrics.java
+++ b/src/main/java/picard/illumina/quality/CollectHiSeqXPfFailMetrics.java
@@ -58,12 +58,73 @@ import java.util.concurrent.TimeUnit;
* @author Yossi Farjoun
*/
@CommandLineProgramProperties(
- usage = "Classify PF-Failing reads in a HiSeqX Illumina Basecalling directory into various categories. The classification is based on a heuristic that was derived by looking at a few titration experiments.",
- usageShort = "Classify PF-Failing reads in a HiSeqX Illumina Basecalling directory into various categories.",
+ usage = CollectHiSeqXPfFailMetrics.USAGE_SUMMARY + CollectHiSeqXPfFailMetrics.USAGE_DETAILS,
+ usageShort = CollectHiSeqXPfFailMetrics.USAGE_SUMMARY,
programGroup = Metrics.class
)
public class CollectHiSeqXPfFailMetrics extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Classify PF-Failing reads in a HiSeqX Illumina Basecalling directory into " +
+ "various categories.";
+ static final String USAGE_DETAILS = "<p>This tool categorizes the reads that did not pass filter " +
+ "(PF-Failing) into four groups. These groups are based on a heuristic that was derived by looking at a" +
+ " few titration experiments. </p>" +
+ "" +
+ "<p>After examining the called bases from the first 24 cycles of each read, the PF-Failed reads " +
+ "are grouped into the following four categories: " +
+ "<ul>" +
+ "<li>MISALIGNED - The first 24 basecalls of a read are uncalled (numNs~24). " +
+ " These types of reads appear to be flow cell artifacts because reads were only found near tile boundaries " +
+ "and were concentration (library) independent</li> " +
+
+ "<li>EMPTY - All 24 bases are called (numNs~0) but the number of bases with quality scores" +
+ " greater than two is less than or equal to eight (numQGtTwo<=8). These reads were location independent" +
+ " within the tiles and were inversely proportional to the library concentration</li>" +
+
+ "<li>POLYCLONAL - All 24 bases were called and numQGtTwo>=12, were independent of their location" +
+ " with the tiles, and were directly proportional to the library concentration. These reads are likely" +
+ " the result of PCR artifacts </li>" +
+
+ "<li>UNKNOWN - The remaining reads that are PF-Failing but did not fit into any of the groups " +
+ "listed above</li>" +
+ "</ul></p> "+
+ "" +
+ "<p>The tool defaults to the SUMMARY output which indicates the number of PF-Failed reads per tile and" +
+ " groups them into the categories described above accordingly.</p> " +
+ "<p>A DETAILED metrics option is also available that subdivides the SUMMARY outputs by the x- y- position" +
+ " of these reads within each tile. To obtain the DETAILED metric table, you must add the " +
+ "PROB_EXPLICIT_READS option to your command line and set the value between 0 and 1. This value represents" +
+ " the fractional probability of PF-Failed reads to send to output. For example, if PROB_EXPLICIT_READS=0, " +
+ "then no metrics will be output. If PROB_EXPLICIT_READS=1, then it will " +
+ "provide detailed metrics for all (100%) of the reads. It follows that setting the " +
+ "PROB_EXPLICIT_READS=0.5, will provide detailed metrics for half of the PF-Failed reads.</p> "+
+
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
+ "" +
+ "<h4>Usage example: (SUMMARY Metrics)</h4> " +
+ "<pre>" +
+ "java -jar picard.jar CollectHiSeqXPfFailMetrics \\<br />" +
+ " BASECALLS_DIR=/BaseCalls/ \\<br />" +
+ " OUTPUT=/metrics/ \\<br />" +
+ " LANE=001" +
+ "</pre>" +
+ "<h4>Usage example: (DETAILED Metrics)</h4>" +
+ "<pre>"+
+ "java -jar picard.jar CollectHiSeqXPfFailMetrics \\<br />" +
+ " BASECALLS_DIR=/BaseCalls/ \\<br />" +
+ " OUTPUT=/Detail_metrics/ \\<br />" +
+ " LANE=001 \\<br />" +
+ " PROB_EXPLICIT_READS=1" +
+ "</pre>" +
+ "" +
+ "Please see our documentation on the " +
+ "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectHiSeqXPfFailMetrics.PFFailSummaryMetric'>SUMMARY</a>" +
+ " and " +
+ "" +
+ "<a href='https://broadinstitute.github.io/picard/picard-metric-definitions.html#CollectHiSeqXPfFailMetrics.PFFailDetailedMetric'>DETAILED</a> " +
+ "metrics for comprehensive explanations of the outputs produced by this tool." +
+ "<hr />";
+
@Option(doc = "The Illumina basecalls directory. ", shortName = "B")
public File BASECALLS_DIR;
@@ -365,19 +426,19 @@ public class CollectHiSeqXPfFailMetrics extends CommandLineProgram {
/** a metric class for describing FP failing reads from an Illumina HiSeqX lane * */
public static class PFFailDetailedMetric extends MetricBase {
- // The Tile that is described by this metric.
+ /** The Tile that is described by this metric */
public Integer TILE;
- //The X coordinate of the read within the tile
+ /** The X coordinate of the read within the tile */
public int X;
- //The Y coordinate of the read within the tile
+ /** The Y coordinate of the read within the tile */
public int Y;
- //The number of Ns found in this read.
+ /** The number of Ns found in this read */
public int NUM_N;
- //The number of Quality scores greater than 2 found in this read
+ /** The number of Quality scores greater than 2 found in this read */
public int NUM_Q_GT_TWO;
/**
@@ -443,7 +504,8 @@ public class CollectHiSeqXPfFailMetrics extends CommandLineProgram {
/** The fraction of non-PF reads in this tile that have not been classified (as fraction of all non-PF reads). */
public double PCT_PF_FAIL_UNKNOWN = 0.0;
- // constructor takes a String for tile since we want to have one instance with tile="All". This tile will contain the summary of all the tiles
+ //Constructor takes a String for tile since we want to have one instance with tile="All". This column will contain the
+ // summary of all the tiles
public PFFailSummaryMetric(final String tile) {
TILE = tile;
}
diff --git a/src/java/picard/metrics/GcBiasMetrics.java b/src/main/java/picard/metrics/GcBiasMetrics.java
similarity index 100%
rename from src/java/picard/metrics/GcBiasMetrics.java
rename to src/main/java/picard/metrics/GcBiasMetrics.java
diff --git a/src/java/picard/metrics/MultiLevelCollector.java b/src/main/java/picard/metrics/MultiLevelCollector.java
similarity index 100%
rename from src/java/picard/metrics/MultiLevelCollector.java
rename to src/main/java/picard/metrics/MultiLevelCollector.java
diff --git a/src/java/picard/metrics/MultilevelMetrics.java b/src/main/java/picard/metrics/MultilevelMetrics.java
similarity index 100%
rename from src/java/picard/metrics/MultilevelMetrics.java
rename to src/main/java/picard/metrics/MultilevelMetrics.java
diff --git a/src/java/picard/metrics/PerUnitMetricCollector.java b/src/main/java/picard/metrics/PerUnitMetricCollector.java
similarity index 100%
rename from src/java/picard/metrics/PerUnitMetricCollector.java
rename to src/main/java/picard/metrics/PerUnitMetricCollector.java
diff --git a/src/java/picard/metrics/SAMRecordAndReference.java b/src/main/java/picard/metrics/SAMRecordAndReference.java
similarity index 100%
rename from src/java/picard/metrics/SAMRecordAndReference.java
rename to src/main/java/picard/metrics/SAMRecordAndReference.java
diff --git a/src/java/picard/metrics/SAMRecordAndReferenceMultiLevelCollector.java b/src/main/java/picard/metrics/SAMRecordAndReferenceMultiLevelCollector.java
similarity index 100%
rename from src/java/picard/metrics/SAMRecordAndReferenceMultiLevelCollector.java
rename to src/main/java/picard/metrics/SAMRecordAndReferenceMultiLevelCollector.java
diff --git a/src/java/picard/metrics/SAMRecordMultiLevelCollector.java b/src/main/java/picard/metrics/SAMRecordMultiLevelCollector.java
similarity index 100%
rename from src/java/picard/metrics/SAMRecordMultiLevelCollector.java
rename to src/main/java/picard/metrics/SAMRecordMultiLevelCollector.java
diff --git a/src/java/picard/pedigree/PedFile.java b/src/main/java/picard/pedigree/PedFile.java
similarity index 99%
rename from src/java/picard/pedigree/PedFile.java
rename to src/main/java/picard/pedigree/PedFile.java
index 7ebcf28..5133b46 100644
--- a/src/java/picard/pedigree/PedFile.java
+++ b/src/main/java/picard/pedigree/PedFile.java
@@ -26,7 +26,7 @@ public class PedFile extends TreeMap<String, PedFile.PedTrio> {
private final String delimiterString; // A textual representation of the delimiter, for output purposes
// These two are really for PedTrio, but they can't be static in there and need to be accessed outside of PedFile
- public static final Number NO_PHENO = new Integer(-9);
+ public static final Number NO_PHENO = Integer.valueOf(-9);
public static final Sex UNKNOWN_SEX = Sex.Unknown;
public PedFile(final boolean isTabMode) {
diff --git a/src/java/picard/pedigree/Sex.java b/src/main/java/picard/pedigree/Sex.java
similarity index 100%
rename from src/java/picard/pedigree/Sex.java
rename to src/main/java/picard/pedigree/Sex.java
diff --git a/src/java/picard/reference/ExtractSequences.java b/src/main/java/picard/reference/ExtractSequences.java
similarity index 100%
rename from src/java/picard/reference/ExtractSequences.java
rename to src/main/java/picard/reference/ExtractSequences.java
diff --git a/src/java/picard/reference/NonNFastaSize.java b/src/main/java/picard/reference/NonNFastaSize.java
similarity index 100%
rename from src/java/picard/reference/NonNFastaSize.java
rename to src/main/java/picard/reference/NonNFastaSize.java
diff --git a/src/java/picard/reference/NormalizeFasta.java b/src/main/java/picard/reference/NormalizeFasta.java
similarity index 100%
rename from src/java/picard/reference/NormalizeFasta.java
rename to src/main/java/picard/reference/NormalizeFasta.java
diff --git a/src/java/picard/sam/AbstractAlignmentMerger.java b/src/main/java/picard/sam/AbstractAlignmentMerger.java
similarity index 97%
rename from src/java/picard/sam/AbstractAlignmentMerger.java
rename to src/main/java/picard/sam/AbstractAlignmentMerger.java
index 137d463..1797d52 100644
--- a/src/java/picard/sam/AbstractAlignmentMerger.java
+++ b/src/main/java/picard/sam/AbstractAlignmentMerger.java
@@ -476,15 +476,8 @@ public abstract class AbstractAlignmentMerger {
final ProgressLogger finalProgress = new ProgressLogger(log, 10000000, "Written in coordinate order to output", "records");
for (final SAMRecord rec : sink.sorter) {
- if (!rec.getReadUnmappedFlag()) {
- if (refSeq != null) {
- final byte[] referenceBases = refSeq.get(sequenceDictionary.getSequenceIndex(rec.getReferenceName())).getBases();
- rec.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(rec, referenceBases, 0, bisulfiteSequence));
-
- if (rec.getBaseQualities() != SAMRecord.NULL_QUALS) {
- rec.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(rec, referenceBases, 0, bisulfiteSequence));
- }
- }
+ if (!rec.getReadUnmappedFlag() && refSeq != null) {
+ fixNMandUQ(rec, refSeq, bisulfiteSequence);
}
writer.addAlignment(rec);
finalProgress.record(rec);
@@ -497,6 +490,24 @@ public abstract class AbstractAlignmentMerger {
log.info("Wrote " + aligned + " alignment records and " + (alignedReadsOnly ? 0 : unmapped) + " unmapped reads.");
}
+ /** Calculates and sets the NM and UQ tags from the record and the reference
+ *
+ * @param record the record to be fixed
+ * @param refSeqWalker a ReferenceSequenceWalker that will be used to traverse the reference
+ * @param isBisulfiteSequence a flag indicating whether the sequence came from bisulfite-sequencing which would imply a different
+ * calculation of the NM tag.
+ *
+ * No return value, modifies the provided record.
+ */
+ public static void fixNMandUQ(final SAMRecord record, final ReferenceSequenceFileWalker refSeqWalker, final boolean isBisulfiteSequence) {
+ final byte[] referenceBases = refSeqWalker.get(refSeqWalker.getSequenceDictionary().getSequenceIndex(record.getReferenceName())).getBases();
+ record.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(record, referenceBases, 0, isBisulfiteSequence));
+
+ if (record.getBaseQualities() != SAMRecord.NULL_QUALS) {
+ record.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(record, referenceBases, 0, isBisulfiteSequence));
+ }
+ }
+
/**
* Add record if it is primary or optionally secondary.
*/
diff --git a/src/java/picard/sam/AddCommentsToBam.java b/src/main/java/picard/sam/AddCommentsToBam.java
similarity index 100%
rename from src/java/picard/sam/AddCommentsToBam.java
rename to src/main/java/picard/sam/AddCommentsToBam.java
diff --git a/src/java/picard/sam/AddOrReplaceReadGroups.java b/src/main/java/picard/sam/AddOrReplaceReadGroups.java
similarity index 100%
rename from src/java/picard/sam/AddOrReplaceReadGroups.java
rename to src/main/java/picard/sam/AddOrReplaceReadGroups.java
diff --git a/src/java/picard/sam/BamIndexStats.java b/src/main/java/picard/sam/BamIndexStats.java
similarity index 100%
rename from src/java/picard/sam/BamIndexStats.java
rename to src/main/java/picard/sam/BamIndexStats.java
diff --git a/src/java/picard/sam/BestEndMapqPrimaryAlignmentStrategy.java b/src/main/java/picard/sam/BestEndMapqPrimaryAlignmentStrategy.java
similarity index 100%
rename from src/java/picard/sam/BestEndMapqPrimaryAlignmentStrategy.java
rename to src/main/java/picard/sam/BestEndMapqPrimaryAlignmentStrategy.java
diff --git a/src/java/picard/sam/BestMapqPrimaryAlignmentSelectionStrategy.java b/src/main/java/picard/sam/BestMapqPrimaryAlignmentSelectionStrategy.java
similarity index 100%
rename from src/java/picard/sam/BestMapqPrimaryAlignmentSelectionStrategy.java
rename to src/main/java/picard/sam/BestMapqPrimaryAlignmentSelectionStrategy.java
diff --git a/src/java/picard/sam/BuildBamIndex.java b/src/main/java/picard/sam/BuildBamIndex.java
similarity index 100%
rename from src/java/picard/sam/BuildBamIndex.java
rename to src/main/java/picard/sam/BuildBamIndex.java
diff --git a/src/java/picard/sam/CalculateReadGroupChecksum.java b/src/main/java/picard/sam/CalculateReadGroupChecksum.java
similarity index 100%
rename from src/java/picard/sam/CalculateReadGroupChecksum.java
rename to src/main/java/picard/sam/CalculateReadGroupChecksum.java
diff --git a/src/java/picard/sam/CheckTerminatorBlock.java b/src/main/java/picard/sam/CheckTerminatorBlock.java
similarity index 100%
rename from src/java/picard/sam/CheckTerminatorBlock.java
rename to src/main/java/picard/sam/CheckTerminatorBlock.java
diff --git a/src/java/picard/sam/CleanSam.java b/src/main/java/picard/sam/CleanSam.java
similarity index 100%
rename from src/java/picard/sam/CleanSam.java
rename to src/main/java/picard/sam/CleanSam.java
diff --git a/src/java/picard/sam/CompareSAMs.java b/src/main/java/picard/sam/CompareSAMs.java
similarity index 95%
rename from src/java/picard/sam/CompareSAMs.java
rename to src/main/java/picard/sam/CompareSAMs.java
index a45d17b..6991bd7 100644
--- a/src/java/picard/sam/CompareSAMs.java
+++ b/src/main/java/picard/sam/CompareSAMs.java
@@ -161,7 +161,7 @@ public class CompareSAMs extends CommandLineProgram {
// any of the saved right reads.
for (; itLeft.hasCurrent(); itLeft.advance()) {
final SAMRecord left = itLeft.getCurrent();
- final SAMRecord right = rightUnmatched.remove(left.getReadName());
+ final SAMRecord right = rightUnmatched.remove(getKeyForRecord(left));
if (right == null) {
++missingRight;
} else {
@@ -174,11 +174,11 @@ public class CompareSAMs extends CommandLineProgram {
// reads from the left that has the same coordinate.
final SAMRecord left = itLeft.getCurrent();
final Map<String, SAMRecord> leftCurrentCoordinate = new HashMap<String, SAMRecord>();
- leftCurrentCoordinate.put(left.getReadName(), left);
+ leftCurrentCoordinate.put(getKeyForRecord(left), left);
while (itLeft.advance()) {
final SAMRecord nextLeft = itLeft.getCurrent();
if (compareAlignmentCoordinates(left, nextLeft) == 0) {
- leftCurrentCoordinate.put(nextLeft.getReadName(), nextLeft);
+ leftCurrentCoordinate.put(getKeyForRecord(nextLeft), nextLeft);
} else {
break;
}
@@ -186,7 +186,7 @@ public class CompareSAMs extends CommandLineProgram {
// Advance the right iterator until it is >= the left reads that have just been grabbed
while (itRight.hasCurrent() && compareAlignmentCoordinates(left, itRight.getCurrent()) > 0) {
final SAMRecord right = itRight.getCurrent();
- rightUnmatched.put(right.getReadName(), right);
+ rightUnmatched.put(getKeyForRecord(right), right);
itRight.advance();
}
// For each right read that has the same coordinate as the current left reads,
@@ -194,24 +194,24 @@ public class CompareSAMs extends CommandLineProgram {
// save the right read for later.
for (; itRight.hasCurrent() && compareAlignmentCoordinates(left, itRight.getCurrent()) == 0; itRight.advance()) {
final SAMRecord right = itRight.getCurrent();
- final SAMRecord matchingLeft = leftCurrentCoordinate.remove(right.getReadName());
+ final SAMRecord matchingLeft = leftCurrentCoordinate.remove(getKeyForRecord(right));
if (matchingLeft != null) {
ret = tallyAlignmentRecords(matchingLeft, right) && ret;
} else {
- rightUnmatched.put(right.getReadName(), right);
+ rightUnmatched.put(getKeyForRecord(right), right);
}
}
// Anything left in leftCurrentCoordinate has not been matched
for (final SAMRecord samRecord : leftCurrentCoordinate.values()) {
- leftUnmatched.put(samRecord.getReadName(), samRecord);
+ leftUnmatched.put(getKeyForRecord(samRecord), samRecord);
}
}
// The left iterator has been exhausted. See if any of the remaining right reads
// match any of the saved left reads.
for (; itRight.hasCurrent(); itRight.advance()) {
final SAMRecord right = itRight.getCurrent();
- final SAMRecord left = leftUnmatched.remove(right.getReadName());
+ final SAMRecord left = leftUnmatched.remove(getKeyForRecord(right));
if (left != null) {
tallyAlignmentRecords(left, right);
} else {
@@ -222,9 +222,9 @@ public class CompareSAMs extends CommandLineProgram {
// Look up reads that were unmatched from left, and see if they are in rightUnmatched.
// If found, remove from rightUnmatched and tally.
for (final Map.Entry<String, SAMRecord> leftEntry : leftUnmatched.entrySet()) {
- final String readName = leftEntry.getKey();
+ final String key = leftEntry.getKey();
final SAMRecord left = leftEntry.getValue();
- final SAMRecord right = rightUnmatched.remove(readName);
+ final SAMRecord right = rightUnmatched.remove(key);
if (right == null) {
++missingRight;
continue;
@@ -235,10 +235,8 @@ public class CompareSAMs extends CommandLineProgram {
// Any elements remaining in rightUnmatched are guaranteed not to be in leftUnmatched.
missingLeft += rightUnmatched.size();
- if (ret) {
- if (missingLeft > 0 || missingRight > 0 || mappingsDiffer > 0 || unmappedLeft > 0 || unmappedRight > 0) {
- ret = false;
- }
+ if (ret && (missingLeft > 0 || missingRight > 0 || mappingsDiffer > 0 || unmappedLeft > 0 || unmappedRight > 0)) {
+ ret = false;
}
return ret;
}
@@ -501,6 +499,11 @@ public class CompareSAMs extends CommandLineProgram {
reportDifference(o1.toString(), o2.toString(), label);
}
+ private String getKeyForRecord(final SAMRecord record) {
+ final boolean isSecondOfPair = record.getReadPairedFlag() && record.getSecondOfPairFlag();
+ return record.getReadName() + "-" + (isSecondOfPair ? "second" : "first");
+ }
+
public int getMappingsMatch() {
return mappingsMatch;
}
diff --git a/src/java/picard/sam/CreateSequenceDictionary.java b/src/main/java/picard/sam/CreateSequenceDictionary.java
similarity index 100%
rename from src/java/picard/sam/CreateSequenceDictionary.java
rename to src/main/java/picard/sam/CreateSequenceDictionary.java
diff --git a/src/java/picard/sam/DownsampleSam.java b/src/main/java/picard/sam/DownsampleSam.java
similarity index 100%
rename from src/java/picard/sam/DownsampleSam.java
rename to src/main/java/picard/sam/DownsampleSam.java
diff --git a/src/java/picard/sam/DuplicationMetrics.java b/src/main/java/picard/sam/DuplicationMetrics.java
similarity index 95%
rename from src/java/picard/sam/DuplicationMetrics.java
rename to src/main/java/picard/sam/DuplicationMetrics.java
index 441d983..7544031 100644
--- a/src/java/picard/sam/DuplicationMetrics.java
+++ b/src/main/java/picard/sam/DuplicationMetrics.java
@@ -41,10 +41,13 @@ public class DuplicationMetrics extends MetricBase {
*/
public long UNPAIRED_READS_EXAMINED;
- /** The number of mapped read pairs examined. */
+ /** The number of mapped read pairs examined. (Primary, non-supplemental) */
public long READ_PAIRS_EXAMINED;
- /** The total number of unmapped reads examined. */
+ /** The number of reads that were either secondary or supplementary */
+ public long SECONDARY_OR_SUPPLEMENTARY_RDS;
+
+ /** The total number of unmapped reads examined. (Primary, non-supplemental) */
public long UNMAPPED_READS;
/** The number of fragments that were marked as duplicates. */
@@ -59,7 +62,7 @@ public class DuplicationMetrics extends MetricBase {
*/
public long READ_PAIR_OPTICAL_DUPLICATES;
- /** The percentage of mapped sequence that is marked as duplicate. */
+ /** The fraction of mapped sequence that is marked as duplicate. */
public Double PERCENT_DUPLICATION;
/** The estimated number of unique molecules in the library based on PE duplication. */
@@ -174,7 +177,7 @@ public class DuplicationMetrics extends MetricBase {
System.out.println();
System.out.println("X Seq\tX Unique");
- for (Histogram<Double>.Bin bin : m.calculateRoiHistogram().values()) {
+ for (Histogram.Bin<Double> bin : m.calculateRoiHistogram().values()) {
System.out.println(bin.getId() + "\t" + bin.getValue());
}
diff --git a/src/java/picard/sam/EarliestFragmentPrimaryAlignmentSelectionStrategy.java b/src/main/java/picard/sam/EarliestFragmentPrimaryAlignmentSelectionStrategy.java
similarity index 100%
rename from src/java/picard/sam/EarliestFragmentPrimaryAlignmentSelectionStrategy.java
rename to src/main/java/picard/sam/EarliestFragmentPrimaryAlignmentSelectionStrategy.java
diff --git a/src/java/picard/sam/FastqToSam.java b/src/main/java/picard/sam/FastqToSam.java
similarity index 100%
rename from src/java/picard/sam/FastqToSam.java
rename to src/main/java/picard/sam/FastqToSam.java
diff --git a/src/java/picard/sam/FilterSamReads.java b/src/main/java/picard/sam/FilterSamReads.java
similarity index 100%
rename from src/java/picard/sam/FilterSamReads.java
rename to src/main/java/picard/sam/FilterSamReads.java
diff --git a/src/java/picard/sam/FixMateInformation.java b/src/main/java/picard/sam/FixMateInformation.java
similarity index 100%
rename from src/java/picard/sam/FixMateInformation.java
rename to src/main/java/picard/sam/FixMateInformation.java
diff --git a/src/java/picard/sam/GatherBamFiles.java b/src/main/java/picard/sam/GatherBamFiles.java
similarity index 100%
rename from src/java/picard/sam/GatherBamFiles.java
rename to src/main/java/picard/sam/GatherBamFiles.java
diff --git a/src/java/picard/sam/HitsForInsert.java b/src/main/java/picard/sam/HitsForInsert.java
similarity index 100%
rename from src/java/picard/sam/HitsForInsert.java
rename to src/main/java/picard/sam/HitsForInsert.java
diff --git a/src/java/picard/sam/MergeBamAlignment.java b/src/main/java/picard/sam/MergeBamAlignment.java
similarity index 100%
rename from src/java/picard/sam/MergeBamAlignment.java
rename to src/main/java/picard/sam/MergeBamAlignment.java
diff --git a/src/java/picard/sam/MergeSamFiles.java b/src/main/java/picard/sam/MergeSamFiles.java
similarity index 100%
rename from src/java/picard/sam/MergeSamFiles.java
rename to src/main/java/picard/sam/MergeSamFiles.java
diff --git a/src/java/picard/sam/MostDistantPrimaryAlignmentSelectionStrategy.java b/src/main/java/picard/sam/MostDistantPrimaryAlignmentSelectionStrategy.java
similarity index 100%
rename from src/java/picard/sam/MostDistantPrimaryAlignmentSelectionStrategy.java
rename to src/main/java/picard/sam/MostDistantPrimaryAlignmentSelectionStrategy.java
diff --git a/src/java/picard/sam/MultiHitAlignedReadIterator.java b/src/main/java/picard/sam/MultiHitAlignedReadIterator.java
similarity index 100%
rename from src/java/picard/sam/MultiHitAlignedReadIterator.java
rename to src/main/java/picard/sam/MultiHitAlignedReadIterator.java
diff --git a/src/java/picard/sam/PositionBasedDownsampleSam.java b/src/main/java/picard/sam/PositionBasedDownsampleSam.java
similarity index 100%
rename from src/java/picard/sam/PositionBasedDownsampleSam.java
rename to src/main/java/picard/sam/PositionBasedDownsampleSam.java
diff --git a/src/java/picard/sam/PrimaryAlignmentSelectionStrategy.java b/src/main/java/picard/sam/PrimaryAlignmentSelectionStrategy.java
similarity index 100%
rename from src/java/picard/sam/PrimaryAlignmentSelectionStrategy.java
rename to src/main/java/picard/sam/PrimaryAlignmentSelectionStrategy.java
diff --git a/src/java/picard/sam/ReorderSam.java b/src/main/java/picard/sam/ReorderSam.java
similarity index 100%
rename from src/java/picard/sam/ReorderSam.java
rename to src/main/java/picard/sam/ReorderSam.java
diff --git a/src/java/picard/sam/ReplaceSamHeader.java b/src/main/java/picard/sam/ReplaceSamHeader.java
similarity index 100%
rename from src/java/picard/sam/ReplaceSamHeader.java
rename to src/main/java/picard/sam/ReplaceSamHeader.java
diff --git a/src/java/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java b/src/main/java/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java
similarity index 100%
rename from src/java/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java
rename to src/main/java/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java
diff --git a/src/main/java/picard/sam/RevertSam.java b/src/main/java/picard/sam/RevertSam.java
new file mode 100644
index 0000000..c2d0820
--- /dev/null
+++ b/src/main/java/picard/sam/RevertSam.java
@@ -0,0 +1,758 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package picard.sam;
+
+import htsjdk.samtools.BAMRecordCodec;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMFileHeader.SortOrder;
+import htsjdk.samtools.SAMFileWriter;
+import htsjdk.samtools.SAMFileWriterFactory;
+import htsjdk.samtools.SAMReadGroupRecord;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordQueryNameComparator;
+import htsjdk.samtools.SAMRecordUtil;
+import htsjdk.samtools.SAMTag;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
+import htsjdk.samtools.filter.FilteringIterator;
+import htsjdk.samtools.filter.SamRecordFilter;
+import htsjdk.samtools.util.CloserUtil;
+import htsjdk.samtools.util.FastqQualityFormat;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.PeekableIterator;
+import htsjdk.samtools.util.ProgressLogger;
+import htsjdk.samtools.util.QualityEncodingDetector;
+import htsjdk.samtools.util.SolexaQualityConverter;
+import htsjdk.samtools.util.SortingCollection;
+import picard.PicardException;
+import picard.cmdline.CommandLineProgram;
+import picard.cmdline.CommandLineProgramProperties;
+import picard.cmdline.Option;
+import picard.cmdline.StandardOptionDefinitions;
+import picard.cmdline.programgroups.SamOrBam;
+import picard.util.TabbedTextFileWithHeaderParser;
+
+import java.io.File;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.text.DecimalFormat;
+import java.text.NumberFormat;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Reverts a SAM file by optionally restoring original quality scores and by removing
+ * all alignment information.
+ */
+ at CommandLineProgramProperties(
+ usage = RevertSam.USAGE_SUMMARY + RevertSam.USAGE_DETAILS,
+ usageShort = RevertSam.USAGE_SUMMARY,
+ programGroup = SamOrBam.class
+)
+public class RevertSam extends CommandLineProgram {
+ static final String USAGE_SUMMARY ="Reverts SAM or BAM files to a previous state. ";
+ static final String USAGE_DETAILS ="This tool removes or restores certain properties of the SAM records, including alignment " +
+ "information, which can be used to produce an unmapped BAM (uBAM) from a previously aligned BAM. It is also capable of " +
+ "restoring the original quality scores of a BAM file that has already undergone base quality score recalibration (BQSR) if the" +
+ "original qualities were retained." +
+ "<h4>Example with single output:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar RevertSam \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=reverted.bam" +
+ "</pre>" +
+ "Output format is BAM by default, or SAM or CRAM if the input path ends with '.sam' or '.cram', respectively." +
+ "<h4>Example outputting by read group with output map:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar RevertSam \\<br />" +
+ " I=input.bam \\<br />" +
+ " OUTPUT_BY_READGROUP=true \\<br />" +
+ " OUTPUT_MAP=reverted_bam_paths.tsv" +
+ "</pre>" +
+ "Will output a BAM/SAM file per read group. By default, all outputs will be in BAM format. " +
+ "However, a SAM file will be produced instead for any read group mapped in OUTPUT_MAP to a path ending with '.sam'. " +
+ "A CRAM file will be produced for any read group mapped to a path ending with '.cram'. " +
+ "<h4>Example outputting by read group without output map:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar RevertSam \\<br />" +
+ " I=input.bam \\<br />" +
+ " OUTPUT_BY_READGROUP=true \\<br />" +
+ " O=/write/reverted/read/group/bams/in/this/dir" +
+ "</pre>" +
+ "Will output a BAM/SAM file per read group. By default, all outputs will be in BAM format. " +
+ "However, outputs will be in SAM format if the input path ends with '.sam', or CRAM format if it ends with '.cram'." +
+ "<hr />";
+ @Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "The input SAM/BAM file to revert the state of.")
+ public File INPUT;
+
+ @Option(mutex = {"OUTPUT_MAP"}, shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc = "The output SAM/BAM file to create, or an output directory if OUTPUT_BY_READGROUP is true.")
+ public File OUTPUT;
+
+ @Option(mutex = {"OUTPUT"}, shortName = "OM", doc = "Tab separated file with two columns, READ_GROUP_ID and OUTPUT, providing file mapping only used if OUTPUT_BY_READGROUP is true.")
+ public File OUTPUT_MAP;
+
+ @Option(shortName = "OBR", doc = "When true, outputs each read group in a separate file.")
+ public boolean OUTPUT_BY_READGROUP = false;
+
+ @Option(shortName = "SO", doc = "The sort order to create the reverted output file with.")
+ public SortOrder SORT_ORDER = SortOrder.queryname;
+
+ @Option(shortName = StandardOptionDefinitions.USE_ORIGINAL_QUALITIES_SHORT_NAME, doc = "True to restore original qualities from the OQ field to the QUAL field if available.")
+ public boolean RESTORE_ORIGINAL_QUALITIES = true;
+
+ @Option(doc = "Remove duplicate read flags from all reads. Note that if this is true and REMOVE_ALIGNMENT_INFORMATION==false, " +
+ " the output may have the unusual but sometimes desirable trait of having unmapped reads that are marked as duplicates.")
+ public boolean REMOVE_DUPLICATE_INFORMATION = true;
+
+ @Option(doc = "Remove all alignment information from the file.")
+ public boolean REMOVE_ALIGNMENT_INFORMATION = true;
+
+ @Option(doc = "When removing alignment information, the set of optional tags to remove.")
+ public List<String> ATTRIBUTE_TO_CLEAR = new ArrayList<String>() {{
+ add(SAMTag.NM.name());
+ add(SAMTag.UQ.name());
+ add(SAMTag.PG.name());
+ add(SAMTag.MD.name());
+ add(SAMTag.MQ.name());
+ add(SAMTag.SA.name()); // Supplementary alignment metadata
+ add(SAMTag.MC.name()); // Mate Cigar
+ add(SAMTag.AS.name());
+ }};
+
+ @Option(doc = "WARNING: This option is potentially destructive. If enabled will discard reads in order to produce " +
+ "a consistent output BAM. Reads discarded include (but are not limited to) paired reads with missing " +
+ "mates, duplicated records, records with mismatches in length of bases and qualities. This option can " +
+ "only be enabled if the output sort order is queryname and will always cause sorting to occur.")
+ public boolean SANITIZE = false;
+
+ @Option(doc = "If SANITIZE=true and higher than MAX_DISCARD_FRACTION reads are discarded due to sanitization then" +
+ "the program will exit with an Exception instead of exiting cleanly. Output BAM will still be valid.")
+ public double MAX_DISCARD_FRACTION = 0.01;
+
+ @Option(doc = "The sample alias to use in the reverted output file. This will override the existing " +
+ "sample alias in the file and is used only if all the read groups in the input file have the " +
+ "same sample alias ", shortName = StandardOptionDefinitions.SAMPLE_ALIAS_SHORT_NAME, optional = true)
+ public String SAMPLE_ALIAS;
+
+ @Option(doc = "The library name to use in the reverted output file. This will override the existing " +
+ "sample alias in the file and is used only if all the read groups in the input file have the " +
+ "same library name ", shortName = StandardOptionDefinitions.LIBRARY_NAME_SHORT_NAME, optional = true)
+ public String LIBRARY_NAME;
+
+ private final static Log log = Log.getInstance(RevertSam.class);
+
+ /** Default main method impl. */
+ public static void main(final String[] args) {
+ new RevertSam().instanceMainWithExit(args);
+ }
+
+ /**
+ * Enforce that output ordering is queryname when sanitization is turned on since it requires a queryname sort.
+ */
+ @Override
+ protected String[] customCommandLineValidation() {
+ final List<String> errors = new ArrayList<String>();
+ ValidationUtil.validateSanitizeSortOrder(SANITIZE, SORT_ORDER, errors);
+ ValidationUtil.validateOutputParams(OUTPUT_BY_READGROUP, OUTPUT, OUTPUT_MAP, errors);
+
+ if (!errors.isEmpty()) {
+ return errors.toArray(new String[errors.size()]);
+ }
+ return null;
+ }
+
+ protected int doWork() {
+ IOUtil.assertFileIsReadable(INPUT);
+ ValidationUtil.assertWritable(OUTPUT, OUTPUT_BY_READGROUP);
+
+ final boolean sanitizing = SANITIZE;
+ final SamReader in = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).validationStringency(VALIDATION_STRINGENCY).open(INPUT);
+ final SAMFileHeader inHeader = in.getFileHeader();
+ ValidationUtil.validateHeaderOverrides(inHeader, SAMPLE_ALIAS, LIBRARY_NAME);
+
+ ////////////////////////////////////////////////////////////////////////////
+ // Build the output writer with an appropriate header based on the options
+ ////////////////////////////////////////////////////////////////////////////
+ final boolean presorted = isPresorted(inHeader, SORT_ORDER, sanitizing);
+ if (SAMPLE_ALIAS != null) overwriteSample(inHeader.getReadGroups(), SAMPLE_ALIAS);
+ if (LIBRARY_NAME != null) overwriteLibrary(inHeader.getReadGroups(), LIBRARY_NAME);
+ final SAMFileHeader singleOutHeader = createOutHeader(inHeader, SORT_ORDER, REMOVE_ALIGNMENT_INFORMATION);
+ inHeader.getReadGroups().forEach(readGroup -> singleOutHeader.addReadGroup(readGroup));
+
+ final Map<String, File> outputMap;
+ final Map<String, SAMFileHeader> headerMap;
+ if (OUTPUT_BY_READGROUP) {
+ final String defaultExtension = getDefaultExtension(INPUT.toString());
+ outputMap = createOutputMap(OUTPUT_MAP, OUTPUT, defaultExtension, inHeader.getReadGroups());
+ ValidationUtil.assertAllReadGroupsMapped(outputMap, inHeader.getReadGroups());
+ headerMap = createHeaderMap(inHeader, SORT_ORDER, REMOVE_ALIGNMENT_INFORMATION);
+ } else {
+ outputMap = null;
+ headerMap = null;
+ }
+
+ final SAMFileWriterFactory factory = new SAMFileWriterFactory();
+ final RevertSamWriter out = new RevertSamWriter(OUTPUT_BY_READGROUP, headerMap, outputMap, singleOutHeader, OUTPUT, presorted, factory, REFERENCE_SEQUENCE);
+
+ ////////////////////////////////////////////////////////////////////////////
+ // Build a sorting collection to use if we are sanitizing
+ ////////////////////////////////////////////////////////////////////////////
+ final RevertSamSorter sorter;
+ if (sanitizing) sorter = new RevertSamSorter(OUTPUT_BY_READGROUP, headerMap, singleOutHeader, MAX_RECORDS_IN_RAM);
+ else sorter = null;
+
+ final ProgressLogger progress = new ProgressLogger(log, 1000000, "Reverted");
+ for (final SAMRecord rec : in) {
+ // Weed out non-primary and supplemental read as we don't want duplicates in the reverted file!
+ if (rec.isSecondaryOrSupplementary()) continue;
+
+ // log the progress before you revert because otherwise the "last read position" might not be accurate
+ progress.record(rec);
+
+ // Actually do the reverting of the remaining records
+ revertSamRecord(rec);
+
+ if (sanitizing) sorter.add(rec);
+ else out.addAlignment(rec);
+ }
+
+ ////////////////////////////////////////////////////////////////////////////
+ // Now if we're sanitizing, clean up the records and write them to the output
+ ////////////////////////////////////////////////////////////////////////////
+ if (!sanitizing) {
+ out.close();
+ } else {
+ final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat;
+ try {
+ readGroupToFormat = createReadGroupFormatMap(inHeader, REFERENCE_SEQUENCE, VALIDATION_STRINGENCY, INPUT, RESTORE_ORIGINAL_QUALITIES);
+ } catch (final PicardException e) {
+ log.error(e.getMessage());
+ return -1;
+ }
+
+ final long[] sanitizeResults = sanitize(readGroupToFormat, sorter, out);
+ final long discarded = sanitizeResults[0];
+ final long total = sanitizeResults[1];
+ out.close();
+
+ final double discardRate = discarded / (double) total;
+ final NumberFormat fmt = new DecimalFormat("0.000%");
+ log.info("Discarded " + discarded + " out of " + total + " (" + fmt.format(discardRate) + ") reads in order to sanitize output.");
+
+ if (discardRate > MAX_DISCARD_FRACTION) {
+ throw new PicardException("Discarded " + fmt.format(discardRate) + " which is above MAX_DISCARD_FRACTION of " + fmt.format(MAX_DISCARD_FRACTION));
+ }
+ }
+
+ CloserUtil.close(in);
+ return 0;
+ }
+
+ static String getDefaultExtension(final String input) {
+ if (input.endsWith(".sam")) {
+ return ".sam";
+ }
+ if (input.endsWith(".cram")) {
+ return ".cram";
+ }
+ return ".bam";
+ }
+
+ private boolean isPresorted(final SAMFileHeader inHeader, final SortOrder sortOrder, final boolean sanitizing) {
+ return (inHeader.getSortOrder() == sortOrder) || (sortOrder == SortOrder.queryname && sanitizing);
+ }
+
+ /**
+ * Takes an individual SAMRecord and applies the set of changes/reversions to it that
+ * have been requested by program level options.
+ */
+ public void revertSamRecord(final SAMRecord rec) {
+ if (RESTORE_ORIGINAL_QUALITIES) {
+ final byte[] oq = rec.getOriginalBaseQualities();
+ if (oq != null) {
+ rec.setBaseQualities(oq);
+ rec.setOriginalBaseQualities(null);
+ }
+ }
+
+ if (REMOVE_DUPLICATE_INFORMATION) {
+ rec.setDuplicateReadFlag(false);
+ }
+
+ if (REMOVE_ALIGNMENT_INFORMATION) {
+ if (rec.getReadNegativeStrandFlag()) {
+ SAMRecordUtil.reverseComplement(rec);
+ rec.setReadNegativeStrandFlag(false);
+ }
+
+ // Remove all alignment based information about the read itself
+ rec.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ rec.setAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
+ rec.setCigarString(SAMRecord.NO_ALIGNMENT_CIGAR);
+ rec.setMappingQuality(SAMRecord.NO_MAPPING_QUALITY);
+
+ rec.setInferredInsertSize(0);
+ rec.setNotPrimaryAlignmentFlag(false);
+ rec.setProperPairFlag(false);
+ rec.setReadUnmappedFlag(true);
+
+ // Then remove any mate flags and info related to alignment
+ rec.setMateAlignmentStart(SAMRecord.NO_ALIGNMENT_START);
+ rec.setMateNegativeStrandFlag(false);
+ rec.setMateReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
+ rec.setMateUnmappedFlag(rec.getReadPairedFlag());
+
+ // And then remove any tags that are calculated from the alignment
+ ATTRIBUTE_TO_CLEAR.forEach(tag -> rec.setAttribute(tag, null));
+ }
+ }
+
+ private long[] sanitize(final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat, final RevertSamSorter sorter, final RevertSamWriter out) {
+
+ long total = 0, discarded = 0;
+ final ProgressLogger sanitizerProgress = new ProgressLogger(log, 1000000, "Sanitized");
+
+ final List<PeekableIterator<SAMRecord>> iterators = sorter.iterators();
+
+ for (final PeekableIterator<SAMRecord> iterator : iterators) {
+ readNameLoop:
+ while (iterator.hasNext()) {
+ final List<SAMRecord> recs = fetchByReadName(iterator);
+ total += recs.size();
+
+ // Check that all the reads have bases and qualities of the same length
+ for (final SAMRecord rec : recs) {
+ if (rec.getReadBases().length != rec.getBaseQualities().length) {
+ log.debug("Discarding " + recs.size() + " reads with name " + rec.getReadName() + " for mismatching bases and quals length.");
+ discarded += recs.size();
+ continue readNameLoop;
+ }
+ }
+
+ // Check that if the first read is marked as unpaired that there is in fact only one read
+ if (!recs.get(0).getReadPairedFlag() && recs.size() > 1) {
+ log.debug("Discarding " + recs.size() + " reads with name " + recs.get(0).getReadName() + " because they claim to be unpaired.");
+ discarded += recs.size();
+ continue readNameLoop;
+ }
+
+ // Check that if we have paired reads there is exactly one first of pair and one second of pair
+ if (recs.get(0).getReadPairedFlag()) {
+ int firsts = 0, seconds = 0, unpaired = 0;
+ for (final SAMRecord rec : recs) {
+ if (!rec.getReadPairedFlag()) ++unpaired;
+ if (rec.getFirstOfPairFlag()) ++firsts;
+ if (rec.getSecondOfPairFlag()) ++seconds;
+ }
+
+ if (unpaired > 0 || firsts != 1 || seconds != 1) {
+ log.debug("Discarding " + recs.size() + " reads with name " + recs.get(0).getReadName() + " because pairing information in corrupt.");
+ discarded += recs.size();
+ continue readNameLoop;
+ }
+ }
+
+ // If we've made it this far spit the records into the output!
+ for (final SAMRecord rec : recs) {
+ // The only valid quality score encoding scheme is standard; if it's not standard, change it.
+ final FastqQualityFormat recordFormat = readGroupToFormat.get(rec.getReadGroup());
+ if (!recordFormat.equals(FastqQualityFormat.Standard)) {
+ final byte[] quals = rec.getBaseQualities();
+ for (int i = 0; i < quals.length; i++) {
+ quals[i] -= SolexaQualityConverter.ILLUMINA_TO_PHRED_SUBTRAHEND;
+ }
+ rec.setBaseQualities(quals);
+ }
+ out.addAlignment(rec);
+ sanitizerProgress.record(rec);
+ }
+ }
+ }
+ return new long[]{discarded, total};
+ }
+
+ /**
+ * Generates a list by consuming from the iterator in order starting with the first available
+ * read and continuing while subsequent reads share the same read name. If there are no reads
+ * remaining returns an empty list.
+ */
+ private List<SAMRecord> fetchByReadName(final PeekableIterator<SAMRecord> iterator) {
+ final List<SAMRecord> out = new ArrayList<SAMRecord>();
+
+ if (iterator.hasNext()) {
+ final SAMRecord first = iterator.next();
+ out.add(first);
+
+ while (iterator.hasNext() && iterator.peek().getReadName().equals(first.getReadName())) {
+ out.add(iterator.next());
+ }
+ }
+
+ return out;
+ }
+
+ private void overwriteSample(final List<SAMReadGroupRecord> readGroups, final String sampleAlias) {
+ readGroups.forEach(rg -> rg.setSample(sampleAlias));
+ }
+
+ private void overwriteLibrary(final List<SAMReadGroupRecord> readGroups, final String libraryName) {
+ readGroups.forEach(rg -> rg.setLibrary(libraryName));
+ }
+
+ static Map<String, File> createOutputMap(
+ final File outputMapFile,
+ final File outputDir,
+ final String defaultExtension,
+ final List<SAMReadGroupRecord> readGroups) {
+
+ final Map<String, File> outputMap;
+ if (outputMapFile != null) {
+ outputMap = createOutputMapFromFile(outputMapFile);
+ } else {
+ outputMap = createOutputMap(readGroups, outputDir, defaultExtension);
+ }
+ return outputMap;
+ }
+
+ private static Map<String, File> createOutputMapFromFile(final File outputMapFile) {
+ final Map<String, File> outputMap = new HashMap<String, File>();
+ final TabbedTextFileWithHeaderParser parser = new TabbedTextFileWithHeaderParser(outputMapFile);
+ for (final TabbedTextFileWithHeaderParser.Row row : parser) {
+ final String id = row.getField("READ_GROUP_ID");
+ final String output = row.getField("OUTPUT");
+ final File outputPath = new File(output);
+ outputMap.put(id, outputPath);
+ }
+ CloserUtil.close(parser);
+ return outputMap;
+ }
+
+ private static Map<String, File> createOutputMap(final List<SAMReadGroupRecord> readGroups, final File outputDir, final String extension) {
+ final Map<String, File> outputMap = new HashMap<String, File>();
+ for (final SAMReadGroupRecord readGroup : readGroups) {
+ final String id = readGroup.getId();
+ final String fileName = id + extension;
+ final Path outputPath = Paths.get(outputDir.toString(), fileName);
+ outputMap.put(id, outputPath.toFile());
+ }
+ return outputMap;
+ }
+
+ private Map<String, SAMFileHeader> createHeaderMap(
+ final SAMFileHeader inHeader,
+ final SortOrder sortOrder,
+ final boolean removeAlignmentInformation) {
+
+ final Map<String, SAMFileHeader> headerMap = new HashMap<String, SAMFileHeader>();
+ for (final SAMReadGroupRecord readGroup : inHeader.getReadGroups()) {
+ final SAMFileHeader header = createOutHeader(inHeader, sortOrder, removeAlignmentInformation);
+ header.addReadGroup(readGroup);
+ headerMap.put(readGroup.getId(), header);
+ }
+ return headerMap;
+ }
+
+ private SAMFileHeader createOutHeader(
+ final SAMFileHeader inHeader,
+ final SAMFileHeader.SortOrder sortOrder,
+ final boolean removeAlignmentInformation) {
+
+ final SAMFileHeader outHeader = new SAMFileHeader();
+ outHeader.setSortOrder(sortOrder);
+ if (!removeAlignmentInformation) {
+ outHeader.setSequenceDictionary(inHeader.getSequenceDictionary());
+ outHeader.setProgramRecords(inHeader.getProgramRecords());
+ }
+ return outHeader;
+ }
+
+ private Map<SAMReadGroupRecord, FastqQualityFormat> createReadGroupFormatMap(
+ final SAMFileHeader inHeader,
+ final File referenceSequence,
+ final ValidationStringency validationStringency,
+ final File input,
+ final boolean restoreOriginalQualities) {
+
+ final Map<SAMReadGroupRecord, FastqQualityFormat> readGroupToFormat = new HashMap<SAMReadGroupRecord, FastqQualityFormat>();
+
+ // Figure out the quality score encoding scheme for each read group.
+ for (final SAMReadGroupRecord rg : inHeader.getReadGroups()) {
+ final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceSequence).validationStringency(validationStringency).open(input);
+ final SamRecordFilter filter = new SamRecordFilter() {
+ public boolean filterOut(final SAMRecord rec) {
+ return !rec.getReadGroup().getId().equals(rg.getId());
+ }
+
+ public boolean filterOut(final SAMRecord first, final SAMRecord second) {
+ throw new UnsupportedOperationException();
+ }
+ };
+ readGroupToFormat.put(rg, QualityEncodingDetector.detect(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, new FilteringIterator(reader.iterator(), filter), restoreOriginalQualities));
+ CloserUtil.close(reader);
+ }
+ for (final SAMReadGroupRecord r : readGroupToFormat.keySet()) {
+ log.info("Detected quality format for " + r.getReadGroupId() + ": " + readGroupToFormat.get(r));
+ }
+ if (readGroupToFormat.values().contains(FastqQualityFormat.Solexa)) {
+ throw new PicardException("No quality score encoding conversion implemented for " + FastqQualityFormat.Solexa);
+ }
+
+ return readGroupToFormat;
+ }
+
+ /**
+ * Contains a map of writers used when OUTPUT_BY_READGROUP=true
+ * and a single writer used when OUTPUT_BY_READGROUP=false.
+ */
+ private static class RevertSamWriter {
+ private final Map<String, SAMFileWriter> writerMap = new HashMap<String, SAMFileWriter>();
+ private final SAMFileWriter singleWriter;
+ private final boolean outputByReadGroup;
+
+ RevertSamWriter(
+ final boolean outputByReadGroup,
+ final Map<String, SAMFileHeader> headerMap,
+ final Map<String, File> outputMap,
+ final SAMFileHeader singleOutHeader,
+ final File singleOutput,
+ final boolean presorted,
+ final SAMFileWriterFactory factory,
+ final File referenceFasta) {
+
+ this.outputByReadGroup = outputByReadGroup;
+ if (outputByReadGroup) {
+ singleWriter = null;
+ for (final Map.Entry<String, File> outputMapEntry : outputMap.entrySet()) {
+ final String readGroupId = outputMapEntry.getKey();
+ final File output = outputMapEntry.getValue();
+ final SAMFileHeader header = headerMap.get(readGroupId);
+ final SAMFileWriter writer = factory.makeWriter(header, presorted, output, referenceFasta);
+ writerMap.put(readGroupId, writer);
+ }
+ } else {
+ singleWriter = factory.makeWriter(singleOutHeader, presorted, singleOutput, referenceFasta);
+ }
+ }
+
+ void addAlignment(final SAMRecord rec) {
+ final SAMFileWriter writer;
+ if (outputByReadGroup) {
+ writer = writerMap.get(rec.getReadGroup().getId());
+ } else {
+ writer = singleWriter;
+ }
+ writer.addAlignment(rec);
+ }
+
+ void close() {
+ if (outputByReadGroup) {
+ for (final SAMFileWriter writer : writerMap.values()) {
+ writer.close();
+ }
+ } else {
+ singleWriter.close();
+ }
+ }
+ }
+
+ /**
+ * Contains a map of sorters used when OUTPUT_BY_READGROUP=true
+ * and a single sorter used when OUTPUT_BY_READGROUP=false.
+ */
+ private static class RevertSamSorter {
+ private final Map<String, SortingCollection<SAMRecord>> sorterMap = new HashMap<String, SortingCollection<SAMRecord>>();
+ private final SortingCollection<SAMRecord> singleSorter;
+ private final boolean outputByReadGroup;
+
+ RevertSamSorter(
+ final boolean outputByReadGroup,
+ final Map<String, SAMFileHeader> headerMap,
+ final SAMFileHeader singleOutHeader,
+ final int maxRecordsInRam) {
+
+ this.outputByReadGroup = outputByReadGroup;
+ if (outputByReadGroup) {
+ for (final Map.Entry<String, SAMFileHeader> entry : headerMap.entrySet()) {
+ final String readGroupId = entry.getKey();
+ final SAMFileHeader outHeader = entry.getValue();
+ final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader), new SAMRecordQueryNameComparator(), maxRecordsInRam);
+ sorterMap.put(readGroupId, sorter);
+ }
+ singleSorter = null;
+ } else {
+ singleSorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(singleOutHeader), new SAMRecordQueryNameComparator(), maxRecordsInRam);
+ }
+ }
+
+ void add(final SAMRecord rec) {
+ final SortingCollection<SAMRecord> sorter;
+ if (outputByReadGroup) {
+ sorter = sorterMap.get(rec.getReadGroup().getId());
+ } else {
+ sorter = singleSorter;
+ }
+ sorter.add(rec);
+ }
+
+ List<PeekableIterator<SAMRecord>> iterators() {
+ final List<PeekableIterator<SAMRecord>> iterators = new ArrayList<PeekableIterator<SAMRecord>>();
+ if (outputByReadGroup) {
+ for (final SortingCollection<SAMRecord> sorter : sorterMap.values()) {
+ final PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(sorter.iterator());
+ iterators.add(iterator);
+ }
+ } else {
+ final PeekableIterator<SAMRecord> iterator = new PeekableIterator<SAMRecord>(singleSorter.iterator());
+ iterators.add(iterator);
+ }
+ return iterators;
+ }
+ }
+
+ /**
+ * Methods used for validating parameters to RevertSam.
+ */
+ static class ValidationUtil {
+
+ static void validateSanitizeSortOrder(final boolean sanitize, final SAMFileHeader.SortOrder sortOrder, final List<String> errors) {
+ if (sanitize && sortOrder != SAMFileHeader.SortOrder.queryname) {
+ errors.add("SORT_ORDER must be queryname when sanitization is enabled with SANITIZE=true.");
+ }
+ }
+
+ static void validateOutputParams(final boolean outputByReadGroup, final File output, final File outputMap, final List<String> errors) {
+ if (outputByReadGroup) {
+ validateOutputParamsByReadGroup(output, outputMap, errors);
+ } else {
+ validateOutputParamsNotByReadGroup(output, outputMap, errors);
+ }
+ }
+
+ static void validateOutputParamsByReadGroup(final File output, final File outputMap, final List<String> errors) {
+ if (output != null) {
+ if (!Files.isDirectory(output.toPath())) {
+ errors.add("When OUTPUT_BY_READGROUP=true and OUTPUT is provided, it must be a directory: " + output);
+ }
+ return;
+ }
+ // output is null if we reached here
+ if (outputMap == null) {
+ errors.add("Must provide either OUTPUT or OUTPUT_MAP when OUTPUT_BY_READGROUP=true.");
+ return;
+ }
+ if (!Files.isReadable(outputMap.toPath())) {
+ errors.add("Cannot read OUTPUT_MAP " + outputMap);
+ return;
+ }
+ final TabbedTextFileWithHeaderParser parser = new TabbedTextFileWithHeaderParser(outputMap);
+ if (!ValidationUtil.isOutputMapHeaderValid(parser.columnLabelsList())) {
+ errors.add("Invalid header: " + outputMap + ". Must be a tab-separated file with READ_GROUP_ID as first column and OUTPUT as second column.");
+ }
+ }
+
+ static void validateOutputParamsNotByReadGroup(final File output, final File outputMap, final List<String> errors) {
+ if (outputMap != null) {
+ errors.add("Cannot provide OUTPUT_MAP when OUTPUT_BY_READGROUP=false. Provide OUTPUT instead.");
+ }
+ if (output == null) {
+ errors.add("OUTPUT is required when OUTPUT_BY_READGROUP=false");
+ return;
+ }
+ if (Files.isDirectory(output.toPath())) {
+ errors.add("OUTPUT " + output + " should not be a directory when OUTPUT_BY_READGROUP=false");
+ }
+ }
+
+ /**
+ * If we are going to override SAMPLE_ALIAS or LIBRARY_NAME, make sure all the read
+ * groups have the same values.
+ */
+ static void validateHeaderOverrides(
+ final SAMFileHeader inHeader,
+ final String sampleAlias,
+ final String libraryName) {
+
+ final List<SAMReadGroupRecord> rgs = inHeader.getReadGroups();
+ if (sampleAlias != null || libraryName != null) {
+ boolean allSampleAliasesIdentical = true;
+ boolean allLibraryNamesIdentical = true;
+ for (int i = 1; i < rgs.size(); i++) {
+ if (!rgs.get(0).getSample().equals(rgs.get(i).getSample())) {
+ allSampleAliasesIdentical = false;
+ }
+ if (!rgs.get(0).getLibrary().equals(rgs.get(i).getLibrary())) {
+ allLibraryNamesIdentical = false;
+ }
+ }
+ if (sampleAlias != null && !allSampleAliasesIdentical) {
+ throw new PicardException("Read groups have multiple values for sample. " +
+ "A value for SAMPLE_ALIAS cannot be supplied.");
+ }
+ if (libraryName != null && !allLibraryNamesIdentical) {
+ throw new PicardException("Read groups have multiple values for library name. " +
+ "A value for library name cannot be supplied.");
+ }
+ }
+ }
+
+ static void assertWritable(final File output, final boolean outputByReadGroup) {
+ if (outputByReadGroup) {
+ if (output != null) {
+ IOUtil.assertDirectoryIsWritable(output);
+ }
+ } else {
+ IOUtil.assertFileIsWritable(output);
+ }
+ }
+
+ static void assertAllReadGroupsMapped(final Map<String, File> outputMap, final List<SAMReadGroupRecord> readGroups) {
+ for (final SAMReadGroupRecord readGroup : readGroups) {
+ final String id = readGroup.getId();
+ final File output = outputMap.get(id);
+ if (output == null) {
+ throw new PicardException("Read group id " + id + " not found in OUTPUT_MAP " + outputMap);
+ }
+ }
+ }
+
+ static boolean isOutputMapHeaderValid(final List<String> columnLabels) {
+ if (columnLabels.size() < 2) {
+ return false;
+ }
+ if (!"READ_GROUP_ID".equals(columnLabels.get(0))) {
+ return false;
+ }
+ if (!"OUTPUT".equals(columnLabels.get(1))) {
+ return false;
+ }
+ return true;
+ }
+ }
+}
diff --git a/src/java/picard/sam/SamAlignmentMerger.java b/src/main/java/picard/sam/SamAlignmentMerger.java
similarity index 98%
rename from src/java/picard/sam/SamAlignmentMerger.java
rename to src/main/java/picard/sam/SamAlignmentMerger.java
index 15a697f..a716c4b 100644
--- a/src/java/picard/sam/SamAlignmentMerger.java
+++ b/src/main/java/picard/sam/SamAlignmentMerger.java
@@ -67,6 +67,9 @@ public class SamAlignmentMerger extends AbstractAlignmentMerger {
* alignment. Alignments with more than this many gaps will be ignored.
* -1 means to allow any number of gaps.
* @param attributesToRetain attributes from the alignment record that should be
+ * retained when merging, overridden by attributesToRemove if they share
+ * common tags.
+ * @param attributesToRemove attributes from the alignment record that should be
* removed when merging. This overrides attributesToRetain if they share
* common tags.
* @param read1BasesTrimmed The number of bases trimmed from start of read 1 prior to alignment. Optional.
diff --git a/src/java/picard/sam/SamFormatConverter.java b/src/main/java/picard/sam/SamFormatConverter.java
similarity index 100%
rename from src/java/picard/sam/SamFormatConverter.java
rename to src/main/java/picard/sam/SamFormatConverter.java
diff --git a/src/java/picard/sam/SamToFastq.java b/src/main/java/picard/sam/SamToFastq.java
similarity index 95%
rename from src/java/picard/sam/SamToFastq.java
rename to src/main/java/picard/sam/SamToFastq.java
index 8990c33..f577ed0 100755
--- a/src/java/picard/sam/SamToFastq.java
+++ b/src/main/java/picard/sam/SamToFastq.java
@@ -128,6 +128,11 @@ public class SamToFastq extends CommandLineProgram {
"clipped region.", optional = true)
public String CLIPPING_ACTION;
+ @Option(shortName = "CLIP_MIN", doc = "When performing clipping with the CLIPPING_ATTRIBUTE and CLIPPING_ACTION " +
+ "parameters, ensure that the resulting reads after clipping are at least CLIPPING_MIN_LENGTH bases long. " +
+ "If the original read is shorter than CLIPPING_MIN_LENGTH then the original read length will be maintained.")
+ public int CLIPPING_MIN_LENGTH = 0;
+
@Option(shortName = "R1_TRIM", doc = "The number of bases to trim from the beginning of read 1.")
public int READ1_TRIM = 0;
@@ -297,7 +302,11 @@ public class SamToFastq extends CommandLineProgram {
// If we're clipping, do the right thing to the bases or qualities
if (CLIPPING_ATTRIBUTE != null) {
- final Integer clipPoint = (Integer) read.getAttribute(CLIPPING_ATTRIBUTE);
+ Integer clipPoint = (Integer) read.getAttribute(CLIPPING_ATTRIBUTE);
+ if (clipPoint != null && clipPoint < CLIPPING_MIN_LENGTH) {
+ clipPoint = Math.min(readString.length(), CLIPPING_MIN_LENGTH);
+ }
+
if (clipPoint != null) {
if (CLIPPING_ACTION.equalsIgnoreCase("X")) {
readString = clip(readString, clipPoint, null, !read.getReadNegativeStrandFlag());
diff --git a/src/java/picard/sam/SortSam.java b/src/main/java/picard/sam/SetNmAndUqTags.java
similarity index 51%
copy from src/java/picard/sam/SortSam.java
copy to src/main/java/picard/sam/SetNmAndUqTags.java
index 6dd1c41..8a1b9c5 100644
--- a/src/java/picard/sam/SortSam.java
+++ b/src/main/java/picard/sam/SetNmAndUqTags.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2009 The Broad Institute
+ * Copyright (c) 2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -23,12 +23,14 @@
*/
package picard.sam;
+import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMFileWriter;
import htsjdk.samtools.SAMFileWriterFactory;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.reference.ReferenceSequenceFileWalker;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.Log;
@@ -40,60 +42,70 @@ import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.SamOrBam;
import java.io.File;
+import java.util.stream.StreamSupport;
/**
- * @author alecw at broadinstitute.org
+ * @author Yossi Farjoun
*/
@CommandLineProgramProperties(
- usage = SortSam.USAGE_SUMMARY + SortSam.USAGE_DETAILS,
- usageShort = SortSam.USAGE_SUMMARY,
+ usage = SetNmAndUqTags.USAGE_SUMMARY + SetNmAndUqTags.USAGE_DETAILS,
+ usageShort = SetNmAndUqTags.USAGE_SUMMARY,
programGroup = SamOrBam.class
)
-public class SortSam extends CommandLineProgram {
- static final String USAGE_SUMMARY = "Sorts a SAM or BAM file. ";
- static final String USAGE_DETAILS = "This tool sorts the input SAM or BAM file by coordinate, queryname or some other property " +
- "of the SAMRecord. Input and output formats (SAM or BAM) are determined by the file extension." +
+public class SetNmAndUqTags extends CommandLineProgram {
+ static final String USAGE_SUMMARY = "Fixes the UQ and NM tags in a SAM file. ";
+ static final String USAGE_DETAILS = "This tool takes in a SAM or BAM file (sorted by coordinate) and calculates the NM and UQ tags by comparing with the reference."+
"<br />" +
+ "This may be needed when MergeBamAlignment was run with SORT_ORDER different from 'coordinate' and thus could not fix\n"+
+ "these tags then.<br />"+
"<h4>Usage example:</h4>" +
"<pre>" +
- "java -jar picard.jar SortSam \\<br />" +
- " I=input.bam \\<br />" +
- " O=sorted.bam \\<br />" +
- " SORT_ORDER=coordinate" +
+ "java -jar picard.jar SetNmAndUqTags \\<br />" +
+ " I=sorted.bam \\<br />" +
+ " O=fixed.bam \\<br />"+
"</pre>" +
"<hr />";
- @Option(doc = "The BAM or SAM file to sort.", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
+ @Option(doc = "The BAM or SAM file to fix.", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
public File INPUT;
- @Option(doc = "The sorted BAM or SAM output file. ", shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME)
+ @Option(doc = "The fixed BAM or SAM output file. ", shortName = StandardOptionDefinitions.OUTPUT_SHORT_NAME)
public File OUTPUT;
- @Option(shortName = StandardOptionDefinitions.SORT_ORDER_SHORT_NAME, doc = "Sort order of output file")
- public SAMFileHeader.SortOrder SORT_ORDER;
+ @Option(doc = "Whether the file contains bisulfite sequence (used when calculating the NM tag).")
+ public boolean IS_BISULFITE_SEQUENCE = false;
- private final Log log = Log.getInstance(SortSam.class);
+ @Override
+ protected String[] customCommandLineValidation() {
+ if (REFERENCE_SEQUENCE == null) {
+ return new String[]{"Must have a non-null REFERENCE_SEQUENCE"};
+ }
+ return super.customCommandLineValidation();
+ }
+
+ private final Log log = Log.getInstance(SetNmAndUqTags.class);
public static void main(final String[] argv) {
- new SortSam().instanceMainWithExit(argv);
+ new SetNmAndUqTags().instanceMainWithExit(argv);
}
protected int doWork() {
IOUtil.assertFileIsReadable(INPUT);
IOUtil.assertFileIsWritable(OUTPUT);
final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(REFERENCE_SEQUENCE).open(INPUT);
- ;
- reader.getFileHeader().setSortOrder(SORT_ORDER);
- final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), false, OUTPUT);
- writer.setProgressLogger(
- new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection"));
- final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Read");
- for (final SAMRecord rec : reader) {
- writer.addAlignment(rec);
- progress.record(rec);
+ if (reader.getFileHeader().getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
+ throw new SAMException("Input must be coordinate-sorted for this program to run. Found: " + reader.getFileHeader().getSortOrder());
}
- log.info("Finished reading inputs, merging and writing to output now.");
+ final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), true, OUTPUT);
+ writer.setProgressLogger(
+ new ProgressLogger(log, (int) 1e7, "Wrote", "records"));
+
+ final ReferenceSequenceFileWalker refSeq = new ReferenceSequenceFileWalker(REFERENCE_SEQUENCE);
+
+ StreamSupport.stream(reader.spliterator(),false)
+ .peek(rec->{if(!rec.getReadUnmappedFlag()) AbstractAlignmentMerger.fixNMandUQ(rec, refSeq, IS_BISULFITE_SEQUENCE);})
+ .forEach(writer::addAlignment);
CloserUtil.close(reader);
writer.close();
diff --git a/src/java/picard/sam/SortSam.java b/src/main/java/picard/sam/SortSam.java
similarity index 81%
rename from src/java/picard/sam/SortSam.java
rename to src/main/java/picard/sam/SortSam.java
index 6dd1c41..28058aa 100644
--- a/src/java/picard/sam/SortSam.java
+++ b/src/main/java/picard/sam/SortSam.java
@@ -51,9 +51,16 @@ import java.io.File;
)
public class SortSam extends CommandLineProgram {
static final String USAGE_SUMMARY = "Sorts a SAM or BAM file. ";
- static final String USAGE_DETAILS = "This tool sorts the input SAM or BAM file by coordinate, queryname or some other property " +
- "of the SAMRecord. Input and output formats (SAM or BAM) are determined by the file extension." +
- "<br />" +
+ static final String USAGE_DETAILS = "This tool sorts the input SAM or BAM file by coordinate, queryname (QNAME), or some other property " +
+ "of the SAM record. The SortOrder of a SAM/BAM file is found in the SAM file header tag @HD in the field labeled SO. " +
+ "" +
+ "<p>For a coordinate sorted SAM/BAM file, read alignments are sorted first by the reference sequence name (RNAME) field using the " +
+ "reference sequence dictionary (@SQ tag). Alignments within these subgroups are secondarily sorted using the left-most mapping " +
+ "position of the read (POS). Subsequent to this sorting scheme, alignments are listed arbitrarily.</p>" +
+ "" +
+ "For queryname-sorted alignments, all alignments are grouped using the queryname field but the alignments are not necessarily sorted within these groups. " +
+ "Reads having the same queryname are derived from the same template. " +
+
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar SortSam \\<br />" +
@@ -61,6 +68,7 @@ public class SortSam extends CommandLineProgram {
" O=sorted.bam \\<br />" +
" SORT_ORDER=coordinate" +
"</pre>" +
+
"<hr />";
@Option(doc = "The BAM or SAM file to sort.", shortName = StandardOptionDefinitions.INPUT_SHORT_NAME)
public File INPUT;
diff --git a/src/java/picard/sam/SplitSamByLibrary.java b/src/main/java/picard/sam/SplitSamByLibrary.java
similarity index 100%
rename from src/java/picard/sam/SplitSamByLibrary.java
rename to src/main/java/picard/sam/SplitSamByLibrary.java
diff --git a/src/java/picard/sam/ValidateSamFile.java b/src/main/java/picard/sam/ValidateSamFile.java
similarity index 81%
rename from src/java/picard/sam/ValidateSamFile.java
rename to src/main/java/picard/sam/ValidateSamFile.java
index 70cfc27..2c4f788 100644
--- a/src/java/picard/sam/ValidateSamFile.java
+++ b/src/main/java/picard/sam/ValidateSamFile.java
@@ -58,20 +58,29 @@ import java.util.List;
)
public class ValidateSamFile extends CommandLineProgram {
static final String USAGE_SUMMARY = "Validates a SAM or BAM file. ";
- static final String USAGE_DETAILS = "This tool reports on the validity of a SAM or BAM file relative to the SAM format specification " +
- "(see http://samtools.github.io/hts-specs/SAMv1.pdf), which is useful for troubleshooting errors encountered with other tools " +
- "that may be caused by improper formatting.<br /><br />" +
- "By default, the tool runs in VERBOSE mode and will exit after finding 100 errors and output them to the " +
- "console (stdout). It is often practical to start by running this tool with the SUMMARY mode option, which summarizes the " +
- "\"errors\" and \"warnings\". Consequently, specific validation warnings or errors that are of lesser concern can be ignored " +
- "using the IGNORE and/or IGNORE_WARNINGS arguments in order to focus on blocking errors. " +
- "<br />" +
+ static final String USAGE_DETAILS = "<p>This tool reports on the validity of a SAM or BAM file relative to the SAM format " +
+ "specification. This is useful for troubleshooting errors encountered with other tools that may be caused by improper " +
+ "formatting, faulty alignments, incorrect flag values, etc. </p> " +
+
+ "<p>By default, the tool runs in VERBOSE mode and will exit after finding 100 errors and output them to the console (stdout). " +
+ "Therefore, it is often more practical to run this tool initially using the MODE=SUMMARY option. This mode outputs a summary " +
+ "table listing the numbers of all 'errors' and 'warnings'.</p> "+
+
+ "<p>When fixing errors in your file, it is often useful to prioritize the severe validation errors and ignore the " +
+ "errors/warnings of lesser concern. This can be done using the IGNORE and/or IGNORE_WARNINGS arguments. For helpful " +
+ "suggestions on error prioritization, please follow this link to obtain additional documentation on <a href='https://www.broadinstitute.org/gatk/guide/article?id=7571'>ValidateSamFile</a>.</p>" +
+
+ "<p>After identifying and fixing your 'warnings/errors', we recommend that you rerun this tool to validate your SAM/BAM " +
+ "file prior to proceeding with your downstream analysis. This will verify that all problems in your file have been addressed.</p>" +
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar ValidateSamFile \\<br />" +
- " I=input.bam \\<br />" +
- " MODE=SUMMARY" +
+ " I=input.bam \\<br />" +
+ " MODE=SUMMARY" +
"</pre>" +
+ "<p>To obtain a complete list with descriptions of both 'ERROR' and 'WARNING' messages, please see our additional " +
+ " <a href='https://www.broadinstitute.org/gatk/guide/article?id=7571'>documentation</a> for this tool.</p>" +
+ ""+
"<hr />";
public enum Mode {VERBOSE, SUMMARY}
diff --git a/src/java/picard/sam/ViewSam.java b/src/main/java/picard/sam/ViewSam.java
similarity index 100%
rename from src/java/picard/sam/ViewSam.java
rename to src/main/java/picard/sam/ViewSam.java
diff --git a/src/java/picard/sam/markduplicates/EstimateLibraryComplexity.java b/src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java
similarity index 90%
rename from src/java/picard/sam/markduplicates/EstimateLibraryComplexity.java
rename to src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java
index 5201bdb..4bf41b0 100644
--- a/src/java/picard/sam/markduplicates/EstimateLibraryComplexity.java
+++ b/src/main/java/picard/sam/markduplicates/EstimateLibraryComplexity.java
@@ -89,26 +89,26 @@ import static java.lang.Math.pow;
programGroup = Metrics.class
)
public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCommandLineProgram {
- static final String USAGE_SUMMARY = "Estimates library complexity. ";
- static final String USAGE_DETAILS = "This tool outputs quality metrics for a sequencing library preparation." +
- "<br /><br />Library complexity refers to the number of unique DNA fragments present in a given library. " +
- "Reductions in complexity compromise downstream analyses and can result from either PCR or optical duplicates." +
+ static final String USAGE_SUMMARY = "Estimates the numbers of unique molecules in a sequencing library. ";
+ static final String USAGE_DETAILS = "<p>This tool outputs quality metrics for a sequencing library preparation." +
+ "Library complexity refers to the number of unique DNA fragments present in a given library. Reductions in complexity " +
+ "resulting from PCR amplification during library preparation will ultimately compromise downstream analyses " +
+ "via an elevation in the number of duplicate reads. PCR-associated duplication artifacts can result from: inadequate amounts " +
+ "of starting material (genomic DNA, cDNA, etc.), losses during cleanups, and size selection issues. " +
+ "Duplicate reads can also arise from optical duplicates resulting from sequencing-machine optical sensor artifacts.</p> " +
+
+ "<p>This tool attempts to estimate library complexity from sequence of read pairs alone. Reads are sorted by the first N bases " +
+ "(5 by default) of the first read and then the first N bases of the second read of a pair. Read pairs are considered to " +
+ "be duplicates if they match each other with no gaps and an overall mismatch rate less than or equal to MAX_DIFF_RATE " +
+ "(0.03 by default). Reads of poor quality are filtered out to provide a more accurate estimate. The filtering removes reads" +
+ " with any poor quality bases as defined by a read's MIN_MEAN_QUALITY (20 is the default value) across either the first or " +
+ "second read. Unpaired reads are ignored in this computation.</p> " +
"" +
- "<br /><br />PCR-induced duplicates can result from: inadequate amounts of starting material (genomic DNA, cDNA, etc.), " +
- "losses during cleanups and size selection, and amplification-associated artifacts. These duplicates " +
- "are removed via the creation of a histogram consisting of the numbers of reads in duplicate set vs. the numbers " +
- "of duplicate sets. All bins that contain exactly one duplicate set are then removed from the histogram prior" +
- " to estimating library size.<br /> <br />" +
- "" +
- "Optical duplicates result from reads that have identical sequences and whose clusters are proximal to each other. " +
- "These are identified using both the READ_NAME_REGEX string and the values for the OPTICAL_DUPLICATE_PIXEL_DISTANCE.<br /> <br />" +
- "" +
- "Sequence identity is determined by scanning the first five (default value) bases of a read. Sequences are considered identical if" +
- " they match each other with no gaps, have an overall mismatch rate less than or equal to the MAX_DIFF_RATE" +
- " (0.03 is the default value), and the cluster proximity is less than the defined value (default is set at 100 pixels)." +
- "" +
- "<br /><br />Finally, the algorithm filters out poor quality reads defined as those with ambiguous base calls \"N\"s in the first 5 (default value) bases, " +
- "reads with a mean base quality score lower than MIN_MEAN_QUALITY (20 is the default value), or any unpaired reads. " +
+ "<p>The algorithm attempts to detect optical duplicates separately from PCR duplicates and excludes these in the calculation " +
+ "of library size. Also, since there is no alignment information used in this algorithm, an additional filter is applied to " +
+ "the data as follows. After examining all reads, a histogram is built in which the number of reads in a duplicate set is " +
+ "compared with the number of of duplicate sets. All bins that contain exactly one duplicate set are then removed from the " +
+ "histogram as outliers prior to the library size estimation. </p>" +
"<h4>Usage example:</h4>" +
"<pre>" +
@@ -116,6 +116,8 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
" I=input.bam \\<br />" +
" O=est_lib_complex_metrics.txt" +
"</pre>" +
+ "Please see the documentation for the companion " +
+ "<a href='https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates'>MarkDuplicates</a> tool." +
"<hr />";
@Option(shortName = StandardOptionDefinitions.INPUT_SHORT_NAME, doc = "One or more files to combine and " +
"estimate library complexity from. Reads can be mapped or unmapped.")
@@ -339,8 +341,9 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
/**
* Comparator that orders read pairs on the first N bases of both reads.
+ * There is no tie-breaking, so any sort is stable, not total.
*/
- class PairedReadComparator implements Comparator<PairedReadSequence> {
+ private class PairedReadComparator implements Comparator<PairedReadSequence> {
final int BASES = EstimateLibraryComplexity.this.MIN_IDENTICAL_BASES;
public int compare(final PairedReadSequence lhs, final PairedReadSequence rhs) {
@@ -356,7 +359,7 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
if (retval != 0) return retval;
}
- return System.identityHashCode(lhs) - System.identityHashCode(rhs);
+ return 0;
}
}
@@ -406,7 +409,6 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
log.info("Will store " + MAX_RECORDS_IN_RAM + " read pairs in memory before sorting.");
final List<SAMReadGroupRecord> readGroups = new ArrayList<SAMReadGroupRecord>();
- final int recordsRead = 0;
final SortingCollection<PairedReadSequence> sorter;
final boolean useBarcodes = (null != BARCODE_TAG || null != READ_ONE_BARCODE_TAG || null != READ_TWO_BARCODE_TAG);
@@ -436,6 +438,7 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
if (!rec.getFirstOfPairFlag() && !rec.getSecondOfPairFlag()) {
continue;
}
+ if (rec.isSecondaryOrSupplementary()) continue;
PairedReadSequence prs = pendingByName.remove(rec.getReadName());
if (prs == null) {
@@ -494,7 +497,7 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
int groupsProcessed = 0;
long lastLogTime = System.currentTimeMillis();
- final int meanGroupSize = Math.max(1, (recordsRead / 2) / (int) pow(4, MIN_IDENTICAL_BASES * 2));
+ final int meanGroupSize = (int) (Math.max(1, (progress.getCount() / 2) / (int) pow(4, MIN_IDENTICAL_BASES * 2)));
while (iterator.hasNext()) {
// Get the next group and split it apart by library
@@ -506,7 +509,7 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
"Mean=" + meanGroupSize + ", Actual=" + group.size() + ". Prefixes: " +
StringUtil.bytesToString(prs.read1, 0, MIN_IDENTICAL_BASES) +
" / " +
- StringUtil.bytesToString(prs.read1, 0, MIN_IDENTICAL_BASES));
+ StringUtil.bytesToString(prs.read2, 0, MIN_IDENTICAL_BASES));
} else {
final Map<String, List<PairedReadSequence>> sequencesByLibrary = splitByLibrary(group, readGroups);
@@ -620,14 +623,14 @@ public class EstimateLibraryComplexity extends AbstractOpticalDuplicateFinderCom
// The loop can start from MIN_IDENTICAL_BASES because we've already confirmed that
// at least those first few bases are identical when sorting.
for (int i = MIN_IDENTICAL_BASES; i < read1Length; ++i) {
- if (lhs.read1[i] != rhs.read1[i]) {
- if (++errors > maxErrors) return false;
+ if (lhs.read1[i] != rhs.read1[i] && ++errors > maxErrors) {
+ return false;
}
}
for (int i = MIN_IDENTICAL_BASES; i < read2Length; ++i) {
- if (lhs.read2[i] != rhs.read2[i]) {
- if (++errors > maxErrors) return false;
+ if (lhs.read2[i] != rhs.read2[i] && ++errors > maxErrors) {
+ return false;
}
}
diff --git a/src/java/picard/sam/markduplicates/MarkDuplicates.java b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java
similarity index 74%
rename from src/java/picard/sam/markduplicates/MarkDuplicates.java
rename to src/main/java/picard/sam/markduplicates/MarkDuplicates.java
index 1eaa37d..e343c9e 100644
--- a/src/java/picard/sam/markduplicates/MarkDuplicates.java
+++ b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java
@@ -24,6 +24,7 @@
package picard.sam.markduplicates;
+import picard.PicardException;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import picard.cmdline.programgroups.SamOrBam;
@@ -63,23 +64,55 @@ import java.util.*;
)
public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
static final String USAGE_SUMMARY = "Identifies duplicate reads. ";
- static final String USAGE_DETAILS =
- "This tool locates and tags duplicate reads (both PCR and optical/sequencing-driven) in a BAM or SAM file, where\n" +
- "duplicate reads are defined as originating from the same original fragment of DNA. Duplicates are identified as read\n" +
- "pairs having identical 5' positions (coordinate and strand) for both reads in a mate pair (and optinally, matching\n" +
- "unique molecular identifier reads; see BARCODE_TAG option). Optical, or more broadly Sequencing, duplicates are\n" +
- "duplicates that appear clustered together spatially during sequencing and can arise from optical/imagine-processing\n" +
- "artifacts or from bio-chemical processes during clonal amplification and sequencing; they are identified using the\n" +
- "READ_NAME_REGEX and the OPTICAL_DUPLICATE_PIXEL_DISTANCE options.\n" +
- "\n" +
- "The tool's main output is a new SAM or BAM file in which duplicates have been identified in the SAM flags field, or\n" +
- "optionally removed (see REMOVE_DUPLICATE and REMOVE_SEQUENCING_DUPLICATES), and optionally marked with a duplicate type\n" +
- "in the 'DT' optional attribute. In addition, it also outputs a metrics file containing the numbers of\n" +
- "READ_PAIRS_EXAMINED, UNMAPPED_READS, UNPAIRED_READS, UNPAIRED_READ DUPLICATES, READ_PAIR_DUPLICATES, and\n" +
- "READ_PAIR_OPTICAL_DUPLICATES.\n" +
- "\n" +
- "Usage example: java -jar picard.jar MarkDuplicates I=input.bam \\\n" +
- " O=marked_duplicates.bam M=marked_dup_metrics.txt\n";
+ static final String USAGE_DETAILS = "<p>This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are " +
+ "defined as originating from a single fragment of DNA. Duplicates can arise during sample preparation e.g. library " +
+ "construction using PCR. See also " +
+ "<a href='https://broadinstitute.github.io/picard/command-line-overview.html#EstimateLibraryComplexity'>EstimateLibraryComplexity</a>" +
+ " for additional notes on PCR duplication artifacts. Duplicate reads can also result from a single amplification cluster, " +
+ "incorrectly detected as multiple clusters by the optical sensor of the sequencing instrument. These duplication artifacts are " +
+ "referred to as optical duplicates.</p>" +
+ "" +
+ "<p>The MarkDuplicates tool works by comparing sequences in the 5 prime positions of both reads and read-pairs in a SAM/BAM file. " +
+ "An BARCODE_TAG option is available to facilitate duplicate marking using molecular barcodes. After duplicate reads are" +
+ " collected, the tool differentiates the primary and duplicate reads using an algorithm that ranks reads by the sums " +
+ "of their base-quality scores (default method).</p> " +
+
+ "<p>The tool's main output is a new SAM or BAM file, in which duplicates have been identified in the SAM flags field for each" +
+ " read. Duplicates are marked with the hexadecimal value of 0x0400, which corresponds to a decimal value of 1024. " +
+ "If you are not familiar with this type of annotation, please see the following " +
+ "<a href='https://www.broadinstitute.org/gatk/blog?id=7019'>blog post</a> for additional information.</p>" +
+ "" +
+ "<p>Although the bitwise flag annotation indicates whether a read was marked as a duplicate, it does not identify the type of " +
+ "duplicate. To do this, a new tag called the duplicate type (DT) tag was recently added as an optional output in " +
+ "the 'optional field' section of a SAM/BAM file. Invoking the TAGGING_POLICY option," +
+ " you can instruct the program to mark all the duplicates (All), only the optical duplicates (OpticalOnly), or no " +
+ "duplicates (DontTag). This tool uses the READ_NAME_REGEX and the OPTICAL_DUPLICATE_PIXEL_DISTANCE options as the primary " +
+ "methods to identify and differentiate duplicate types. The records within the output of a SAM/BAM file will have values " +
+ "for the 'DT' tag (depending on the invoked TAGGING_POLICY), as either library/PCR-generated duplicates (LB), or " +
+ "sequencing-platform artifact duplicates (SQ).</p> "+
+
+ "<p>MarkDuplicates also produces a metrics file indicating the numbers of duplicates for both single- and paired-end reads.</p> "+
+
+ "<p>The program can take either coordinate-sorted or query-sorted inputs, however the behavior is slightly different. " +
+ "When the input is coordinate-sorted, unmapped mates of mapped records and supplementary/secondary alignments are not " +
+ "marked as duplicates. However, when the input is query-sorted (actually query-grouped), " +
+ "then unmapped mates and secondary/supplementary reads are not excluded from the duplication test and can be" +
+ " marked as duplicate reads.</p> " +
+
+ "<p>If desired, duplicates can be removed using the REMOVE_DUPLICATE and REMOVE_SEQUENCING_DUPLICATES options.</p>" +
+ "" +
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar MarkDuplicates \\<br />" +
+ " I=input.bam \\<br />" +
+ " O=marked_duplicates.bam \\<br />" +
+ " M=marked_dup_metrics.txt" +
+ "</pre>" +
+ "" +
+ "Please see " +
+ "<a href='http://broadinstitute.github.io/picard/picard-metric-definitions.html#DuplicationMetrics'>MarkDuplicates</a> " +
+ "for detailed explanations of the output metrics." +
+ "<hr />";
/** Enum used to control how duplicates are flagged in the DT optional tag on each read. */
public enum DuplicateTaggingPolicy { DontTag, OpticalOnly, All }
@@ -142,7 +175,9 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
private SortingCollection<ReadEndsForMarkDuplicates> fragSort;
private SortingLongCollection duplicateIndexes;
private SortingLongCollection opticalDuplicateIndexes;
+
private int numDuplicateIndices = 0;
+ static private final long NO_SUCH_INDEX = Long.MAX_VALUE; // needs to be large so that that >= test fails for query-sorted traversal
protected LibraryIdGenerator libraryIdGenerator = null; // this is initialized in buildSortedReadEndLists
@@ -196,10 +231,19 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
final SamHeaderAndIterator headerAndIterator = openInputs();
final SAMFileHeader header = headerAndIterator.header;
+ final SAMFileHeader.SortOrder sortOrder = header.getSortOrder();
final SAMFileHeader outputHeader = header.clone();
- outputHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
- for (final String comment : COMMENT) outputHeader.addComment(comment);
+
+
+ log.info("Reads are assumed to be ordered by: " + sortOrder);
+
+ if (sortOrder != SAMFileHeader.SortOrder.coordinate && sortOrder != SAMFileHeader.SortOrder.queryname) {
+ throw new PicardException("This program requires input that are either coordinate or query sorted. " +
+ "Found "+ sortOrder);
+ }
+
+ COMMENT.forEach(outputHeader::addComment);
// Key: previous PG ID on a SAM Record (or null). Value: New PG ID to replace it.
final Map<String, String> chainedPgIds = getChainedPgIds(outputHeader);
@@ -210,14 +254,17 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
// Now copy over the file while marking all the necessary indexes as duplicates
long recordInFileIndex = 0;
- long nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : -1);
- long nextOpticalDuplicateIndex = this.opticalDuplicateIndexes != null && this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : -1;
+ long nextOpticalDuplicateIndex = this.opticalDuplicateIndexes != null && this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX;
+ long nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : NO_SUCH_INDEX);
final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Written");
final CloseableIterator<SAMRecord> iterator = headerAndIterator.iterator;
+ String duplicateQueryName = null;
+ String opticalDuplicateQueryName = null;
+
while (iterator.hasNext()) {
final SAMRecord rec = iterator.next();
- if (!rec.isSecondaryOrSupplementary()) {
+
final String library = LibraryIdGenerator.getLibraryName(header, rec);
DuplicationMetrics metrics = libraryIdGenerator.getMetricsByLibrary(library);
if (metrics == null) {
@@ -229,42 +276,65 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
// First bring the simple metrics up to date
if (rec.getReadUnmappedFlag()) {
++metrics.UNMAPPED_READS;
+ } else if(rec.isSecondaryOrSupplementary()) {
+ ++metrics.SECONDARY_OR_SUPPLEMENTARY_RDS;
} else if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
++metrics.UNPAIRED_READS_EXAMINED;
} else {
++metrics.READ_PAIRS_EXAMINED; // will need to be divided by 2 at the end
}
+ // Now try and figure out the next duplicate index (if going by coordinate. if going by query name, only do this
+ // if the query name has changed.
+ final boolean needNextDuplicateIndex = recordInFileIndex > nextDuplicateIndex &&
+ (sortOrder == SAMFileHeader.SortOrder.coordinate || !rec.getReadName().equals(duplicateQueryName));
- if (recordInFileIndex == nextDuplicateIndex) {
- rec.setDuplicateReadFlag(true);
+ if (needNextDuplicateIndex) {
+ nextDuplicateIndex = (this.duplicateIndexes.hasNext() ? this.duplicateIndexes.next() : NO_SUCH_INDEX);
+ }
- // Update the duplication metrics
- if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
- ++metrics.UNPAIRED_READ_DUPLICATES;
- } else {
- ++metrics.READ_PAIR_DUPLICATES;// will need to be divided by 2 at the end
- }
+ final boolean isDuplicate = recordInFileIndex == nextDuplicateIndex ||
+ (sortOrder == SAMFileHeader.SortOrder.queryname &&
+ recordInFileIndex > nextDuplicateIndex && rec.getReadName().equals(duplicateQueryName));
- // Now try and figure out the next duplicate index
- if (this.duplicateIndexes.hasNext()) {
- nextDuplicateIndex = this.duplicateIndexes.next();
- } else {
- // Only happens once we've marked all the duplicates
- nextDuplicateIndex = -1;
+
+ if (isDuplicate) {
+ duplicateQueryName = rec.getReadName();
+ rec.setDuplicateReadFlag(true);
+
+ // only update duplicate counts for "decider" reads, not tag-a-long reads
+ if (!rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) {
+ // Update the duplication metrics
+ if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) {
+ ++metrics.UNPAIRED_READ_DUPLICATES;
+ } else {
+ ++metrics.READ_PAIR_DUPLICATES;// will need to be divided by 2 at the end
+ }
}
} else {
rec.setDuplicateReadFlag(false);
}
- }
// Manage the flagging of optical/sequencing duplicates
- final boolean isOpticalDuplicate = (recordInFileIndex == nextOpticalDuplicateIndex);
- if (isOpticalDuplicate) nextOpticalDuplicateIndex = this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : -1;
+ final boolean needNextOpticalDuplicateIndex = recordInFileIndex > nextOpticalDuplicateIndex &&
+ (sortOrder == SAMFileHeader.SortOrder.coordinate || !rec.getReadName().equals(opticalDuplicateQueryName));
+
+ // Possibly figure out the next opticalDuplicate index (if going by coordinate, if going by query name, only do this
+ // if the query name has changed)
+ if (needNextOpticalDuplicateIndex) {
+ nextOpticalDuplicateIndex = (this.opticalDuplicateIndexes.hasNext() ? this.opticalDuplicateIndexes.next() : NO_SUCH_INDEX);
+ }
+
+ final boolean isOpticalDuplicate = sortOrder == SAMFileHeader.SortOrder.queryname &&
+ recordInFileIndex > nextOpticalDuplicateIndex &&
+ rec.getReadName().equals(opticalDuplicateQueryName) ||
+ recordInFileIndex == nextOpticalDuplicateIndex;
+
rec.setAttribute(DUPLICATE_TYPE_TAG, null);
if (this.TAGGING_POLICY != DuplicateTaggingPolicy.DontTag && rec.getDuplicateReadFlag()) {
if (isOpticalDuplicate) {
+ opticalDuplicateQueryName = rec.getReadName();
rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.SEQUENCING.code());
} else if (this.TAGGING_POLICY == DuplicateTaggingPolicy.All) {
rec.setAttribute(DUPLICATE_TYPE_TAG, DuplicateType.LIBRARY.code());
@@ -312,7 +382,7 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
/**
* Goes through all the records in a file and generates a set of ReadEndsForMarkDuplicates objects that
* hold the necessary information (reference sequence, 5' read coordinate) to do
- * duplication, caching to disk as necssary to sort them.
+ * duplication, caching to disk as necessary to sort them.
*/
private void buildSortedReadEndLists(final boolean useBarcodes) {
final int sizeInBytes;
@@ -349,6 +419,7 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
TMP_DIR);
final SamHeaderAndIterator headerAndIterator = openInputs();
+ final SAMFileHeader.SortOrder assumedSortOrder = headerAndIterator.header.getSortOrder();
final SAMFileHeader header = headerAndIterator.header;
final ReadEndsForMarkDuplicatesMap tmp = new DiskBasedReadEndsForMarkDuplicatesMap(MAX_FILE_HANDLES_FOR_READ_ENDS_MAP, diskCodec);
long index = 0;
@@ -359,6 +430,8 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
this.libraryIdGenerator = new LibraryIdGenerator(header);
}
+ String duplicateQueryName = null;
+ long duplicateIndex = NO_SUCH_INDEX;
while (iterator.hasNext()) {
final SAMRecord rec = iterator.next();
@@ -373,14 +446,22 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
pgIdsSeen.add(rec.getStringAttribute(SAMTag.PG.name()));
}
+ // Of working in query-sorted, need to keep index of first record with any given query-name.
+ if(assumedSortOrder == SAMFileHeader.SortOrder.queryname && !rec.getReadName().equals(duplicateQueryName)) {
+ duplicateQueryName = rec.getReadName();
+ duplicateIndex = index;
+ }
+
if (rec.getReadUnmappedFlag()) {
- if (rec.getReferenceIndex() == -1) {
- // When we hit the unmapped reads with no coordinate, no reason to continue.
+ if (rec.getReferenceIndex() == -1 && assumedSortOrder == SAMFileHeader.SortOrder.coordinate) {
+ // When we hit the unmapped reads with no coordinate, no reason to continue (only in coordinate sort).
break;
}
// If this read is unmapped but sorted with the mapped reads, just skip it.
+
} else if (!rec.isSecondaryOrSupplementary()) {
- final ReadEndsForMarkDuplicates fragmentEnd = buildReadEnds(header, index, rec, useBarcodes);
+ final long indexForRead = assumedSortOrder == SAMFileHeader.SortOrder.queryname ? duplicateIndex : index;
+ final ReadEndsForMarkDuplicates fragmentEnd = buildReadEnds(header, indexForRead, rec, useBarcodes);
this.fragSort.add(fragmentEnd);
if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) {
@@ -389,11 +470,13 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
// See if we've already seen the first end or not
if (pairedEnds == null) {
- pairedEnds = buildReadEnds(header, index, rec, useBarcodes);
+ // at this point pairedEnds and fragmentEnd are the same, but we need to make
+ // a copy since pairedEnds will be modified when the mate comes along.
+ pairedEnds = fragmentEnd.clone();
tmp.put(pairedEnds.read2ReferenceIndex, key, pairedEnds);
} else {
- final int sequence = fragmentEnd.read1ReferenceIndex;
- final int coordinate = fragmentEnd.read1Coordinate;
+ final int matesRefIndex = fragmentEnd.read1ReferenceIndex;
+ final int matesCoordinate = fragmentEnd.read1Coordinate;
// Set orientationForOpticalDuplicates, which always goes by the first then the second end for the strands. NB: must do this
// before updating the orientation later.
@@ -407,21 +490,31 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
((ReadEndsForMarkDuplicatesWithBarcodes) pairedEnds).readTwoBarcode = getReadTwoBarcodeValue(rec);
}
- // If the second read is actually later, just add the second read data, else flip the reads
- if (sequence > pairedEnds.read1ReferenceIndex ||
- (sequence == pairedEnds.read1ReferenceIndex && coordinate >= pairedEnds.read1Coordinate)) {
- pairedEnds.read2ReferenceIndex = sequence;
- pairedEnds.read2Coordinate = coordinate;
- pairedEnds.read2IndexInFile = index;
+ // If the other read is actually later, simply add the other read's data as read2, else flip the reads
+ if (matesRefIndex > pairedEnds.read1ReferenceIndex ||
+ (matesRefIndex == pairedEnds.read1ReferenceIndex && matesCoordinate >= pairedEnds.read1Coordinate)) {
+ pairedEnds.read2ReferenceIndex = matesRefIndex;
+ pairedEnds.read2Coordinate = matesCoordinate;
+ pairedEnds.read2IndexInFile = indexForRead;
pairedEnds.orientation = ReadEnds.getOrientationByte(pairedEnds.orientation == ReadEnds.R,
rec.getReadNegativeStrandFlag());
+
+ // if the two read ends are in the same position, pointing in opposite directions,
+ // the orientation is undefined and the procedure above
+ // will depend on the order of the reads in the file.
+ // To avoid this, we set it explicitly (to FR):
+ if (pairedEnds.read2ReferenceIndex == pairedEnds.read1ReferenceIndex &&
+ pairedEnds.read2Coordinate == pairedEnds.read1Coordinate &&
+ pairedEnds.orientation == ReadEnds.RF) {
+ pairedEnds.orientation = ReadEnds.FR;
+ }
} else {
pairedEnds.read2ReferenceIndex = pairedEnds.read1ReferenceIndex;
pairedEnds.read2Coordinate = pairedEnds.read1Coordinate;
pairedEnds.read2IndexInFile = pairedEnds.read1IndexInFile;
- pairedEnds.read1ReferenceIndex = sequence;
- pairedEnds.read1Coordinate = coordinate;
- pairedEnds.read1IndexInFile = index;
+ pairedEnds.read1ReferenceIndex = matesRefIndex;
+ pairedEnds.read1Coordinate = matesCoordinate;
+ pairedEnds.read1IndexInFile = indexForRead;
pairedEnds.orientation = ReadEnds.getOrientationByte(rec.getReadNegativeStrandFlag(),
pairedEnds.orientation == ReadEnds.R);
}
@@ -521,16 +614,12 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
// First just do the pairs
log.info("Traversing read pair information and detecting duplicates.");
for (final ReadEndsForMarkDuplicates next : this.pairSort) {
- if (firstOfNextChunk == null) {
- firstOfNextChunk = next;
- nextChunk.add(firstOfNextChunk);
- } else if (areComparableForDuplicates(firstOfNextChunk, next, true, useBarcodes)) {
+ if (firstOfNextChunk != null && areComparableForDuplicates(firstOfNextChunk, next, true, useBarcodes)) {
nextChunk.add(next);
} else {
if (nextChunk.size() > 1) {
markDuplicatePairs(nextChunk);
}
-
nextChunk.clear();
nextChunk.add(next);
firstOfNextChunk = next;
@@ -545,6 +634,8 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
boolean containsPairs = false;
boolean containsFrags = false;
+ firstOfNextChunk = null;
+
for (final ReadEndsForMarkDuplicates next : this.fragSort) {
if (firstOfNextChunk != null && areComparableForDuplicates(firstOfNextChunk, next, false, useBarcodes)) {
nextChunk.add(next);
@@ -554,7 +645,6 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
if (nextChunk.size() > 1 && containsFrags) {
markDuplicateFragments(nextChunk, containsPairs);
}
-
nextChunk.clear();
nextChunk.add(next);
firstOfNextChunk = next;
@@ -626,7 +716,10 @@ public class MarkDuplicates extends AbstractMarkDuplicatesCommandLineProgram {
for (final ReadEndsForMarkDuplicates end : list) {
if (end != best) {
addIndexAsDuplicate(end.read1IndexInFile);
- addIndexAsDuplicate(end.read2IndexInFile);
+
+ // in query-sorted case, these will be the same.
+ // TODO: also in coordinate sorted, when one read is unmapped
+ if(end.read2IndexInFile != end.read1IndexInFile) addIndexAsDuplicate(end.read2IndexInFile);
if (end.isOpticalDuplicate && this.opticalDuplicateIndexes != null) {
this.opticalDuplicateIndexes.add(end.read1IndexInFile);
diff --git a/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java b/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
similarity index 96%
rename from src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
rename to src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
index a5d8563..fb9aff2 100644
--- a/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
+++ b/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java
@@ -24,6 +24,7 @@
package picard.sam.markduplicates;
+import picard.PicardException;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.Option;
import htsjdk.samtools.util.Histogram;
@@ -75,7 +76,8 @@ public class MarkDuplicatesWithMateCigar extends AbstractMarkDuplicatesCommandLi
"Note also that this tool will not work with alignments that have large gaps or deletions, such as those from RNA-seq data. " +
"This is due to the need to buffer small genomic windows to ensure integrity of the duplicate marking, while large skips " +
"(ex. skipping introns) in the alignment records would force making that window very large, thus exhausting memory. <br />" +
-
+ "" +
+ "<p>Note: Metrics labeled as percentages are actually expressed as fractions!</p>" +
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar MarkDuplicatesWithMateCigar \\<br />" +
@@ -120,8 +122,11 @@ public class MarkDuplicatesWithMateCigar extends AbstractMarkDuplicatesCommandLi
// Create the output header
final SAMFileHeader outputHeader = header.clone();
- outputHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
- for (final String comment : COMMENT) outputHeader.addComment(comment);
+ if (outputHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
+ throw new PicardException("This program requires inputs in coordinate SortOrder");
+ }
+
+ COMMENT.forEach(outputHeader::addComment);
// Since this is one-pass, unlike MarkDuplicates, we cannot only chain together program
// group records we have seen, we have to assume all of them may be seen. We can perhaps
diff --git a/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java b/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java
similarity index 98%
rename from src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java
rename to src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java
index d4ce284..99ff13f 100644
--- a/src/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java
+++ b/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java
@@ -143,7 +143,7 @@ public class MarkDuplicatesWithMateCigarIterator implements SAMRecordIterator {
// set up metrics
for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
- final String library = readGroup.getLibrary();
+ final String library = LibraryIdGenerator.getReadGroupLibraryName(readGroup);
DuplicationMetrics metrics = libraryIdGenerator.getMetricsByLibrary(library);
if (metrics == null) {
metrics = new DuplicationMetrics();
@@ -242,9 +242,12 @@ public class MarkDuplicatesWithMateCigarIterator implements SAMRecordIterator {
!record.getMateUnmappedFlag() && null == SAMUtils.getMateCigar(record)) { // paired with one end unmapped and no mate cigar
// NB: we are not truly examining these records. Do we want to count them?
- if (!record.isSecondaryOrSupplementary()) {
+
+ final DuplicationMetrics metrics = getMetrics(record);
+ if (record.isSecondaryOrSupplementary()) {
+ ++metrics.SECONDARY_OR_SUPPLEMENTARY_RDS;
+ } else {
// update metrics
- final DuplicationMetrics metrics = getMetrics(record);
if (record.getReadUnmappedFlag()) {
++metrics.UNMAPPED_READS;
} else if (!record.getReadPairedFlag() || record.getMateUnmappedFlag()) {
@@ -433,11 +436,14 @@ public class MarkDuplicatesWithMateCigarIterator implements SAMRecordIterator {
backingIteratorRecordIndex++; // Each record is has an index and is emitted in the same order. This helps that.
// We do not consider secondary, supplementary, or unmapped alignments for duplicate marking. We can thus mark that duplicate marking on them has been completed.
+ final DuplicationMetrics metrics = getMetrics(record);
if (record.isSecondaryOrSupplementary() || record.getReadUnmappedFlag()) {
outputBuffer.setResultState(samRecordWithOrdinal, false);
+ if(record.isSecondaryOrSupplementary()){
+ ++metrics.SECONDARY_OR_SUPPLEMENTARY_RDS;
+ }
} else {
// Bring the simple metrics up to date
- final DuplicationMetrics metrics = getMetrics(record);
if (!record.getReadPairedFlag() || record.getMateUnmappedFlag()) {
++metrics.UNPAIRED_READS_EXAMINED;
} else {
diff --git a/src/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java b/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
similarity index 89%
rename from src/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
rename to src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
index 7b6adce..669767d 100644
--- a/src/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
+++ b/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java
@@ -36,11 +36,11 @@ import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.Histogram;
+import htsjdk.samtools.util.Log;
import picard.PicardException;
import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.sam.DuplicationMetrics;
-import picard.sam.util.PhysicalLocation;
import java.io.File;
import java.util.ArrayList;
@@ -73,13 +73,19 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
@Option(doc = "If true do not write duplicates to the output file instead of writing them with appropriate flags set.")
public boolean REMOVE_DUPLICATES = false;
+ @Deprecated
@Option(shortName = StandardOptionDefinitions.ASSUME_SORTED_SHORT_NAME,
- doc = "If true, assume that the input file is coordinate sorted even if the header says otherwise.")
+ doc = "If true, assume that the input file is coordinate sorted even if the header says otherwise. " +
+ "Deprecated, used ASSUME_SORT_ORDER=coordinate instead.", mutex = {"ASSUME_SORT_ORDER"})
public boolean ASSUME_SORTED = false;
+ @Option(shortName = StandardOptionDefinitions.ASSUME_SORT_ORDER_SHORT_NAME,
+ doc = "If not null, assume that the input file has this order even if the header says otherwise.",
+ optional = true, mutex = {"ASSUME_SORTED"})
+ public SAMFileHeader.SortOrder ASSUME_SORT_ORDER = null;
+
@Option(shortName = "DS", doc = "The scoring strategy for choosing the non-duplicate among candidates.")
public ScoringStrategy DUPLICATE_SCORING_STRATEGY = ScoringStrategy.TOTAL_MAPPED_REFERENCE_LENGTH;
-
@Option(shortName = StandardOptionDefinitions.PROGRAM_RECORD_ID_SHORT_NAME,
doc = "The program record ID for the @PG record(s) created by this program. Set to null to disable " +
@@ -105,10 +111,11 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
@Option(shortName = "CO",
doc = "Comment(s) to include in the output file's header.",
optional = true)
- public List<String> COMMENT = new ArrayList<String>();
+ public List<String> COMMENT = new ArrayList<>();
/** The program groups that have been seen during the course of examining the input records. */
- protected final Set<String> pgIdsSeen = new HashSet<String>();
+ protected final Set<String> pgIdsSeen = new HashSet<>();
+
/**
* We have to re-chain the program groups based on this algorithm. This returns the map from existing program group ID
@@ -125,7 +132,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
if (PROGRAM_GROUP_COMMAND_LINE == null) {
PROGRAM_GROUP_COMMAND_LINE = this.getCommandLine();
}
- chainedPgIds = new HashMap<String, String>();
+ chainedPgIds = new HashMap<>();
for (final String existingId : this.pgIdsSeen) {
final String newPgId = pgIdGenerator.getNonCollidingId(PROGRAM_RECORD_ID);
chainedPgIds.put(existingId, newPgId);
@@ -164,7 +171,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
// Add the optical dupes to the metrics
final Short libraryId = libraryIds.get(libraryName);
if (libraryId != null) {
- final Histogram<Short>.Bin bin = opticalDuplicatesByLibraryId.get(libraryId);
+ final Histogram.Bin<Short> bin = opticalDuplicatesByLibraryId.get(libraryId);
if (bin != null) {
metrics.READ_PAIR_OPTICAL_DUPLICATES = (long) bin.getValue();
}
@@ -192,12 +199,12 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
}
/**
- * Since this may read it's inputs more than once this method does all the opening
+ * Since this may read its inputs more than once this method does all the opening
* and checking of the inputs.
*/
protected SamHeaderAndIterator openInputs() {
- final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(INPUT.size());
- final List<SamReader> readers = new ArrayList<SamReader>(INPUT.size());
+ final List<SAMFileHeader> headers = new ArrayList<>(INPUT.size());
+ final List<SamReader> readers = new ArrayList<>(INPUT.size());
for (final String input : INPUT) {
SamReader reader = SamReaderFactory.makeDefault()
@@ -205,19 +212,24 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
.open(SamInputResource.of(input));
final SAMFileHeader header = reader.getFileHeader();
- if (!ASSUME_SORTED && header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
- throw new PicardException("Input file " + input + " is not coordinate sorted.");
- }
-
headers.add(header);
readers.add(reader);
}
+ if (ASSUME_SORT_ORDER != null || ASSUME_SORTED) {
+ if (ASSUME_SORT_ORDER == null) {
+ ASSUME_SORT_ORDER = SAMFileHeader.SortOrder.coordinate;
+ ASSUME_SORTED = false; // to maintain the "mutex" regarding these two arguments.
+ }
+
+ //if we assume a particular order, then the output will have that order in the header
+ headers.get(0).setSortOrder(ASSUME_SORT_ORDER);
+ }
if (headers.size() == 1) {
return new SamHeaderAndIterator(headers.get(0), readers.get(0).iterator());
} else {
- final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, false);
- final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, ASSUME_SORTED);
+ final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(headers.get(0).getSortOrder(), headers, false);
+ final MergingSamRecordIterator iterator = new MergingSamRecordIterator(headerMerger, readers, ASSUME_SORT_ORDER != null);
return new SamHeaderAndIterator(headerMerger.getMergedHeader(), iterator);
}
}
@@ -246,8 +258,8 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO
// Check if we need to partition since the orientations could have changed
if (hasFR && hasRF) { // need to track them independently
// Variables used for optical duplicate detection and tracking
- final List<ReadEnds> trackOpticalDuplicatesF = new ArrayList<ReadEnds>();
- final List<ReadEnds> trackOpticalDuplicatesR = new ArrayList<ReadEnds>();
+ final List<ReadEnds> trackOpticalDuplicatesF = new ArrayList<>();
+ final List<ReadEnds> trackOpticalDuplicatesR = new ArrayList<>();
// Split into two lists: first of pairs and second of pairs, since they must have orientation and same starting end
for (final ReadEnds end : ends) {
diff --git a/src/java/picard/sam/markduplicates/util/AbstractOpticalDuplicateFinderCommandLineProgram.java b/src/main/java/picard/sam/markduplicates/util/AbstractOpticalDuplicateFinderCommandLineProgram.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/AbstractOpticalDuplicateFinderCommandLineProgram.java
rename to src/main/java/picard/sam/markduplicates/util/AbstractOpticalDuplicateFinderCommandLineProgram.java
diff --git a/src/java/picard/sam/markduplicates/util/DiskBasedReadEndsForMarkDuplicatesMap.java b/src/main/java/picard/sam/markduplicates/util/DiskBasedReadEndsForMarkDuplicatesMap.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/DiskBasedReadEndsForMarkDuplicatesMap.java
rename to src/main/java/picard/sam/markduplicates/util/DiskBasedReadEndsForMarkDuplicatesMap.java
diff --git a/src/java/picard/sam/markduplicates/util/LibraryIdGenerator.java b/src/main/java/picard/sam/markduplicates/util/LibraryIdGenerator.java
similarity index 91%
rename from src/java/picard/sam/markduplicates/util/LibraryIdGenerator.java
rename to src/main/java/picard/sam/markduplicates/util/LibraryIdGenerator.java
index 518856c..d9f2c59 100644
--- a/src/java/picard/sam/markduplicates/util/LibraryIdGenerator.java
+++ b/src/main/java/picard/sam/markduplicates/util/LibraryIdGenerator.java
@@ -32,6 +32,7 @@ import picard.sam.DuplicationMetrics;
import java.util.HashMap;
import java.util.Map;
+import java.util.Optional;
/**
* A class to generate library Ids and keep duplication metrics by library IDs.
@@ -40,6 +41,8 @@ import java.util.Map;
*/
public class LibraryIdGenerator {
+ private static final String UNKNOWN_LIBRARY = "Unknown Library";
+
private final SAMFileHeader header;
private final Map<String, Short> libraryIds = new HashMap<String, Short>(); // from library string to library id
private short nextLibraryId = 1;
@@ -51,7 +54,7 @@ public class LibraryIdGenerator {
this.header = header;
for (final SAMReadGroupRecord readGroup : header.getReadGroups()) {
- final String library = readGroup.getLibrary();
+ final String library = LibraryIdGenerator.getReadGroupLibraryName(readGroup);
DuplicationMetrics metrics = metricsByLibrary.get(library);
if (metrics == null) {
metrics = new DuplicationMetrics();
@@ -67,6 +70,11 @@ public class LibraryIdGenerator {
public Histogram<Short> getOpticalDuplicatesByLibraryIdMap() { return this.opticalDuplicatesByLibraryId; }
+ public static String getReadGroupLibraryName(SAMReadGroupRecord readGroup) {
+ return Optional.ofNullable(readGroup.getLibrary())
+ .orElse(UNKNOWN_LIBRARY);
+ }
+
/**
* Gets the library name from the header for the record. If the RG tag is not present on
* the record, or the library isn't denoted on the read group, a constant string is
@@ -83,7 +91,7 @@ public class LibraryIdGenerator {
}
}
- return "Unknown Library";
+ return UNKNOWN_LIBRARY;
}
/** Get the library ID for the given SAM record. */
diff --git a/src/java/picard/sam/markduplicates/util/MarkQueue.java b/src/main/java/picard/sam/markduplicates/util/MarkQueue.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/MarkQueue.java
rename to src/main/java/picard/sam/markduplicates/util/MarkQueue.java
diff --git a/src/java/picard/sam/markduplicates/util/MemoryBasedReadEndsForMarkDuplicatesMap.java b/src/main/java/picard/sam/markduplicates/util/MemoryBasedReadEndsForMarkDuplicatesMap.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/MemoryBasedReadEndsForMarkDuplicatesMap.java
rename to src/main/java/picard/sam/markduplicates/util/MemoryBasedReadEndsForMarkDuplicatesMap.java
diff --git a/src/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java b/src/main/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java
similarity index 70%
rename from src/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java
rename to src/main/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java
index 3c1c5c6..d122669 100644
--- a/src/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java
+++ b/src/main/java/picard/sam/markduplicates/util/OpticalDuplicateFinder.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2014 The Broad Institute
+ * Copyright (c) 2014-2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -25,11 +25,10 @@
package picard.sam.markduplicates.util;
import htsjdk.samtools.util.Log;
+import htsjdk.samtools.util.ProgressLogger;
import picard.sam.util.PhysicalLocation;
import picard.sam.util.ReadNameParser;
-import java.util.Collections;
-import java.util.Comparator;
import java.util.List;
/**
@@ -43,9 +42,22 @@ public class OpticalDuplicateFinder extends ReadNameParser {
public int opticalDuplicatePixelDistance;
public static final int DEFAULT_OPTICAL_DUPLICATE_DISTANCE = 100;
+ public static final int DEFAULT_BIG_DUPLICATE_SET_SIZE = 1000;
/**
- * Uses the default duplicate distance {@value DEFAULT_OPTICAL_DUPLICATE_DISTANCE} and the default read name regex
+ * Sets the size of a set that is big enough to log progress about.
+ * Defaults to {@value OpticalDuplicateFinder#DEFAULT_BIG_DUPLICATE_SET_SIZE}
+ *
+ * @param bigDuplicateSetSize the size of a set that is big enough to log progress about
+ */
+ public void setBigDuplicateSetSize(final int bigDuplicateSetSize) {
+ this.bigDuplicateSetSize = bigDuplicateSetSize;
+ }
+
+ private int bigDuplicateSetSize = DEFAULT_BIG_DUPLICATE_SET_SIZE;
+
+ /**
+ * Uses the default duplicate distance {@value OpticalDuplicateFinder#DEFAULT_OPTICAL_DUPLICATE_DISTANCE} and the default read name regex
* {@link ReadNameParser#DEFAULT_READ_NAME_REGEX}.
*/
public OpticalDuplicateFinder() {
@@ -87,20 +99,50 @@ public class OpticalDuplicateFinder extends ReadNameParser {
final PhysicalLocation actualKeeper = keeperOrNull(list, keeper);
+ final Log log;
+ final ProgressLogger progressLoggerForKeeper, progressLoggerForRest;
+ final boolean logProgress = length > bigDuplicateSetSize;
+
+ if (logProgress) {
+ log = Log.getInstance(OpticalDuplicateFinder.class);
+ progressLoggerForKeeper = new ProgressLogger(log, 10000, "compared", "ReadEnds to keeper");
+ progressLoggerForRest = new ProgressLogger(log, 1000, "compared", "ReadEnds to others");
+
+ log.info("Large duplicate set. size = " + length);
+ log.debug("About to compare to keeper:" + actualKeeper);
+ } else {
+ log = null;
+ progressLoggerForKeeper = null;
+ progressLoggerForRest = null;
+ }
+
// First go through and compare all the reads to the keeper
if (actualKeeper != null) {
- for (int i=0; i<length; ++i) {
+ for (int i = 0; i < length; ++i) {
final PhysicalLocation other = list.get(i);
opticalDuplicateFlags[i] = closeEnough(actualKeeper, other, distance);
+
+ // The main point of adding this log and if statement (also below) is a workaround a bug in the JVM
+ // which causes a deep exception (https://github.com/broadinstitute/picard/issues/472).
+ // It seems that this is related to https://bugs.openjdk.java.net/browse/JDK-8033717 which
+ // was closed due to non-reproducibility. We came across a bam file that evoked this error
+ // every time we tried to duplicate-mark it. The problem seemed to be a duplicate-set of size 500,000,
+ // and this loop seemed to kill the JVM for some reason. This logging statement (and the one in the
+ // loop below) solved the problem.
+ if (logProgress) progressLoggerForKeeper.record(String.format("%d", other.getReadGroup()), other.getX());
}
}
+ if (logProgress) log.debug("Done with comparing to keeper, now the rest.");
// Now go through and do each pairwise comparison not involving the actualKeeper
- for (int i=0; i<length; ++i) {
+ for (int i = 0; i < length; ++i) {
final PhysicalLocation lhs = list.get(i);
if (lhs == actualKeeper) continue; // no comparisons to actualKeeper since those are all handled above
- for (int j =i+1; j<length; ++j) {
+ // logging here for same reason as above
+ if (logProgress) progressLoggerForRest.record(String.format("%d", lhs.getReadGroup()), lhs.getX());
+
+ for (int j = i + 1; j < length; ++j) {
final PhysicalLocation rhs = list.get(j);
if (rhs == actualKeeper) continue; // no comparisons to actualKeeper since those are all handled above
if (opticalDuplicateFlags[i] && opticalDuplicateFlags[j]) continue; // both already marked, no need to check
@@ -114,7 +156,7 @@ public class OpticalDuplicateFinder extends ReadNameParser {
}
}
}
-
+
return opticalDuplicateFlags;
}
diff --git a/src/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigar.java b/src/main/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigar.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigar.java
rename to src/main/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigar.java
diff --git a/src/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigarSet.java b/src/main/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigarSet.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigarSet.java
rename to src/main/java/picard/sam/markduplicates/util/PhysicalLocationForMateCigarSet.java
diff --git a/src/java/picard/sam/markduplicates/util/ReadEnds.java b/src/main/java/picard/sam/markduplicates/util/ReadEnds.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/ReadEnds.java
rename to src/main/java/picard/sam/markduplicates/util/ReadEnds.java
diff --git a/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicates.java b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicates.java
similarity index 93%
rename from src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicates.java
rename to src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicates.java
index c55fc88..da50385 100644
--- a/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicates.java
+++ b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicates.java
@@ -29,7 +29,7 @@ package picard.sam.markduplicates.util;
*
* @author Nils Homer
*/
-public class ReadEndsForMarkDuplicates extends ReadEnds {
+public class ReadEndsForMarkDuplicates extends ReadEnds implements Cloneable {
/*
What do we need to store you ask? Well, we need to store:
- byte: orientation
@@ -71,4 +71,9 @@ public class ReadEndsForMarkDuplicates extends ReadEnds {
this.read1IndexInFile = read.read1IndexInFile;
this.read2IndexInFile = read.read2IndexInFile;
}
+
+ @Override
+ public ReadEndsForMarkDuplicates clone() {
+ return new ReadEndsForMarkDuplicates(this);
+ }
}
\ No newline at end of file
diff --git a/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesCodec.java b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesCodec.java
similarity index 98%
rename from src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesCodec.java
rename to src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesCodec.java
index d266de7..790f042 100644
--- a/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesCodec.java
+++ b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesCodec.java
@@ -28,7 +28,7 @@ import picard.PicardException;
import java.io.*;
-/** Coded for ReadEnds that just outputs the primitive fields and reads them back. */
+/** Codec for ReadEnds that just outputs the primitive fields and reads them back. */
public class ReadEndsForMarkDuplicatesCodec implements SortingCollection.Codec<ReadEndsForMarkDuplicates> {
protected DataInputStream in;
protected DataOutputStream out;
diff --git a/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesMap.java b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesMap.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesMap.java
rename to src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesMap.java
diff --git a/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesWithBarcodes.java b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesWithBarcodes.java
similarity index 82%
rename from src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesWithBarcodes.java
rename to src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesWithBarcodes.java
index 09a2c8b..b161100 100644
--- a/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesWithBarcodes.java
+++ b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesWithBarcodes.java
@@ -35,6 +35,18 @@ public class ReadEndsForMarkDuplicatesWithBarcodes extends ReadEndsForMarkDuplic
super(read);
}
+ public ReadEndsForMarkDuplicatesWithBarcodes(final ReadEndsForMarkDuplicatesWithBarcodes read) {
+ super(read);
+ barcode = read.barcode;
+ readOneBarcode = read.readOneBarcode;
+ readTwoBarcode = read.readTwoBarcode;
+ }
+
+ @Override
+ public ReadEndsForMarkDuplicatesWithBarcodes clone() {
+ return new ReadEndsForMarkDuplicatesWithBarcodes(this);
+ }
+
public static int getSizeOf() {
return ReadEndsForMarkDuplicates.getSizeOf() + (3 * 4);
}
diff --git a/src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesWithBarcodesCodec.java b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesWithBarcodesCodec.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesWithBarcodesCodec.java
rename to src/main/java/picard/sam/markduplicates/util/ReadEndsForMarkDuplicatesWithBarcodesCodec.java
diff --git a/src/java/picard/sam/markduplicates/util/ReadEndsForMateCigar.java b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMateCigar.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/ReadEndsForMateCigar.java
rename to src/main/java/picard/sam/markduplicates/util/ReadEndsForMateCigar.java
diff --git a/src/java/picard/sam/markduplicates/util/SamRecordWithOrdinalAndSetDuplicateReadFlag.java b/src/main/java/picard/sam/markduplicates/util/SamRecordWithOrdinalAndSetDuplicateReadFlag.java
similarity index 100%
rename from src/java/picard/sam/markduplicates/util/SamRecordWithOrdinalAndSetDuplicateReadFlag.java
rename to src/main/java/picard/sam/markduplicates/util/SamRecordWithOrdinalAndSetDuplicateReadFlag.java
diff --git a/src/java/picard/sam/util/PhysicalLocation.java b/src/main/java/picard/sam/util/PhysicalLocation.java
similarity index 100%
rename from src/java/picard/sam/util/PhysicalLocation.java
rename to src/main/java/picard/sam/util/PhysicalLocation.java
diff --git a/src/java/picard/sam/util/PhysicalLocationInt.java b/src/main/java/picard/sam/util/PhysicalLocationInt.java
similarity index 100%
rename from src/java/picard/sam/util/PhysicalLocationInt.java
rename to src/main/java/picard/sam/util/PhysicalLocationInt.java
diff --git a/src/java/picard/sam/util/PhysicalLocationShort.java b/src/main/java/picard/sam/util/PhysicalLocationShort.java
similarity index 100%
rename from src/java/picard/sam/util/PhysicalLocationShort.java
rename to src/main/java/picard/sam/util/PhysicalLocationShort.java
diff --git a/src/java/picard/sam/util/ReadNameParser.java b/src/main/java/picard/sam/util/ReadNameParser.java
similarity index 100%
rename from src/java/picard/sam/util/ReadNameParser.java
rename to src/main/java/picard/sam/util/ReadNameParser.java
diff --git a/src/java/picard/util/AbstractInputParser.java b/src/main/java/picard/util/AbstractInputParser.java
similarity index 100%
rename from src/java/picard/util/AbstractInputParser.java
rename to src/main/java/picard/util/AbstractInputParser.java
diff --git a/src/java/picard/util/AdapterMarker.java b/src/main/java/picard/util/AdapterMarker.java
similarity index 100%
rename from src/java/picard/util/AdapterMarker.java
rename to src/main/java/picard/util/AdapterMarker.java
diff --git a/src/java/picard/util/AdapterPair.java b/src/main/java/picard/util/AdapterPair.java
similarity index 100%
rename from src/java/picard/util/AdapterPair.java
rename to src/main/java/picard/util/AdapterPair.java
diff --git a/src/java/picard/util/AsyncIterator.java b/src/main/java/picard/util/AsyncIterator.java
similarity index 100%
rename from src/java/picard/util/AsyncIterator.java
rename to src/main/java/picard/util/AsyncIterator.java
diff --git a/src/java/picard/util/AtomicIterator.java b/src/main/java/picard/util/AtomicIterator.java
similarity index 100%
rename from src/java/picard/util/AtomicIterator.java
rename to src/main/java/picard/util/AtomicIterator.java
diff --git a/src/java/picard/util/BaitDesigner.java b/src/main/java/picard/util/BaitDesigner.java
similarity index 93%
rename from src/java/picard/util/BaitDesigner.java
rename to src/main/java/picard/util/BaitDesigner.java
index c20742f..895b43f 100644
--- a/src/java/picard/util/BaitDesigner.java
+++ b/src/main/java/picard/util/BaitDesigner.java
@@ -39,11 +39,34 @@ import java.util.regex.Pattern;
* @author Tim Fennell
*/
@CommandLineProgramProperties(
- usage = "Designs baits or oligos for hybrid selection reactions.",
- usageShort = "Designs baits or oligos for hybrid selection reactions.",
- programGroup = None.class
-)
+ usage = BaitDesigner.USAGE_SUMMARY + BaitDesigner.USAGE_DETAILS,
+ usageShort = BaitDesigner.USAGE_SUMMARY,
+ programGroup = None.class
+ )
public class BaitDesigner extends CommandLineProgram {
+static final String USAGE_SUMMARY = "<b>Designs oligonucleotide baits for hybrid selection reactions.</b> ";
+static final String USAGE_DETAILS = "<p>This tool is used to design custom bait sets for hybrid selection experiments. The following " +
+ "files are input into BaitDesigner: a (TARGET) interval list indicating the sequences of interest, e.g. exons with their " +
+ "respective coordinates, a reference sequence, and a unique identifier string (DESIGN_NAME). </p>" +
+
+ "<p>The tool will output interval_list files of both bait and target sequences as well as the actual bait sequences in " +
+ "FastA format. At least two baits are output for each target sequence, with greater numbers for larger intervals. Although " +
+ "the default values for both bait size (120 bases) nd offsets (80 bases) are suitable for most applications, these values can " +
+ "be customized. Offsets represent the distance between sequential baits on a contiguous stretch of target DNA sequence. </p>"+
+
+ "<p>The tool will also output a pooled set of 55,000 (default) oligonucleotides representing all of the baits redundantly. " +
+ "This redundancy achieves a uniform concentration of oligonucleotides for synthesis by a vendor as well as equal numbers" +
+ "of each bait to prevent bias during the hybrid selection reaction. </p>" +
+
+ "<h4>Usage example:</h4>" +
+ "<pre>" +
+ "java -jar picard.jar BaitDesigner \\<br /> " +
+ " TARGET=targets.interval_list \\<br /> " +
+ " DESIGN_NAME=new_baits \\<br /> " +
+ " R=reference_sequence.fasta " +
+ "</pre> " +
+ "<hr />";
+
/**
* Subclass of Interval for representing Baits, that caches the bait sequence.
*/
diff --git a/src/java/picard/util/BasicInputParser.java b/src/main/java/picard/util/BasicInputParser.java
similarity index 100%
rename from src/java/picard/util/BasicInputParser.java
rename to src/main/java/picard/util/BasicInputParser.java
diff --git a/src/java/picard/util/BedToIntervalList.java b/src/main/java/picard/util/BedToIntervalList.java
similarity index 100%
rename from src/java/picard/util/BedToIntervalList.java
rename to src/main/java/picard/util/BedToIntervalList.java
diff --git a/src/java/picard/util/CircularByteBuffer.java b/src/main/java/picard/util/CircularByteBuffer.java
similarity index 100%
rename from src/java/picard/util/CircularByteBuffer.java
rename to src/main/java/picard/util/CircularByteBuffer.java
diff --git a/src/java/picard/util/ClippingUtility.java b/src/main/java/picard/util/ClippingUtility.java
similarity index 98%
rename from src/java/picard/util/ClippingUtility.java
rename to src/main/java/picard/util/ClippingUtility.java
index c1433d6..6fa40e9 100644
--- a/src/java/picard/util/ClippingUtility.java
+++ b/src/main/java/picard/util/ClippingUtility.java
@@ -264,8 +264,10 @@ public class ClippingUtility {
int mismatches = 0;
for (int i = 0; i < length; ++i) {
- if (!SequenceUtil.isNoCall(adapterSequence[i]) && !SequenceUtil.basesEqual(adapterSequence[i], read[start + i])) {
- if (++mismatches > mismatchesAllowed) continue READ_LOOP;
+ if (!SequenceUtil.isNoCall(adapterSequence[i]) &&
+ !SequenceUtil.basesEqual(adapterSequence[i], read[start + i]) &&
+ ++mismatches > mismatchesAllowed) {
+ continue READ_LOOP;
}
}
diff --git a/src/java/picard/util/CsvInputParser.java b/src/main/java/picard/util/CsvInputParser.java
similarity index 100%
rename from src/java/picard/util/CsvInputParser.java
rename to src/main/java/picard/util/CsvInputParser.java
diff --git a/src/java/picard/util/DbSnpBitSetUtil.java b/src/main/java/picard/util/DbSnpBitSetUtil.java
similarity index 100%
rename from src/java/picard/util/DbSnpBitSetUtil.java
rename to src/main/java/picard/util/DbSnpBitSetUtil.java
diff --git a/src/java/picard/util/DelimitedTextFileWithHeaderIterator.java b/src/main/java/picard/util/DelimitedTextFileWithHeaderIterator.java
similarity index 100%
rename from src/java/picard/util/DelimitedTextFileWithHeaderIterator.java
rename to src/main/java/picard/util/DelimitedTextFileWithHeaderIterator.java
diff --git a/src/java/picard/util/FifoBuffer.java b/src/main/java/picard/util/FifoBuffer.java
similarity index 100%
rename from src/java/picard/util/FifoBuffer.java
rename to src/main/java/picard/util/FifoBuffer.java
diff --git a/src/java/picard/util/FileChannelJDKBugWorkAround.java b/src/main/java/picard/util/FileChannelJDKBugWorkAround.java
similarity index 100%
rename from src/java/picard/util/FileChannelJDKBugWorkAround.java
rename to src/main/java/picard/util/FileChannelJDKBugWorkAround.java
diff --git a/src/java/picard/util/IlluminaUtil.java b/src/main/java/picard/util/IlluminaUtil.java
similarity index 100%
rename from src/java/picard/util/IlluminaUtil.java
rename to src/main/java/picard/util/IlluminaUtil.java
diff --git a/src/java/picard/util/IntervalListScatterer.java b/src/main/java/picard/util/IntervalListScatterer.java
similarity index 100%
rename from src/java/picard/util/IntervalListScatterer.java
rename to src/main/java/picard/util/IntervalListScatterer.java
diff --git a/src/java/picard/util/IntervalListToBed.java b/src/main/java/picard/util/IntervalListToBed.java
similarity index 100%
rename from src/java/picard/util/IntervalListToBed.java
rename to src/main/java/picard/util/IntervalListToBed.java
diff --git a/src/java/picard/util/IntervalListTools.java b/src/main/java/picard/util/IntervalListTools.java
similarity index 98%
rename from src/java/picard/util/IntervalListTools.java
rename to src/main/java/picard/util/IntervalListTools.java
index ec03bfd..f3aeca7 100644
--- a/src/java/picard/util/IntervalListTools.java
+++ b/src/main/java/picard/util/IntervalListTools.java
@@ -268,7 +268,7 @@ public class IntervalListTools extends CommandLineProgram {
long totalUniqueBaseCount = 0;
long intervalCount = 0;
for (final IntervalList finalInterval : resultIntervals) {
- totalUniqueBaseCount = finalInterval.getUniqueBaseCount();
+ totalUniqueBaseCount += finalInterval.getUniqueBaseCount();
intervalCount += finalInterval.size();
}
@@ -331,10 +331,8 @@ public class IntervalListTools extends CommandLineProgram {
}
private static void createDirectoryOrFail(final File directory) {
- if (!directory.exists()) {
- if (!directory.mkdir()) {
- throw new PicardException("Unable to create directory: " + directory.getAbsolutePath());
- }
+ if (!directory.exists() && !directory.mkdir()) {
+ throw new PicardException("Unable to create directory: " + directory.getAbsolutePath());
}
}
diff --git a/src/java/picard/util/Iterators.java b/src/main/java/picard/util/Iterators.java
similarity index 100%
rename from src/java/picard/util/Iterators.java
rename to src/main/java/picard/util/Iterators.java
diff --git a/src/java/picard/util/LiftOverIntervalList.java b/src/main/java/picard/util/LiftOverIntervalList.java
similarity index 100%
rename from src/java/picard/util/LiftOverIntervalList.java
rename to src/main/java/picard/util/LiftOverIntervalList.java
diff --git a/src/java/picard/util/MathUtil.java b/src/main/java/picard/util/MathUtil.java
similarity index 100%
rename from src/java/picard/util/MathUtil.java
rename to src/main/java/picard/util/MathUtil.java
diff --git a/src/java/picard/util/MetricsDoclet.java b/src/main/java/picard/util/MetricsDoclet.java
similarity index 85%
rename from src/java/picard/util/MetricsDoclet.java
rename to src/main/java/picard/util/MetricsDoclet.java
index b423d97..0e62d71 100644
--- a/src/java/picard/util/MetricsDoclet.java
+++ b/src/main/java/picard/util/MetricsDoclet.java
@@ -24,11 +24,8 @@
package picard.util;
-import com.sun.javadoc.ClassDoc;
-import com.sun.javadoc.Doc;
-import com.sun.javadoc.FieldDoc;
-import com.sun.javadoc.RootDoc;
-import com.sun.javadoc.Tag;
+import com.sun.javadoc.*;
+import com.sun.tools.doclets.standard.Standard;
import htsjdk.samtools.metrics.MetricBase;
import java.io.File;
@@ -40,13 +37,13 @@ import java.util.TreeMap;
/**
* Doclet for use with JavaDoc that will find all classes extending MetricBase and
* output information about the metrics definitions that go along with the classes.
- *
+ * <p>
* Takes a single parameter (-f file) to tell it where to output the resulting
* documentation file in HTML format.
*
* @author Tim Fennell
*/
-public class MetricsDoclet {
+public class MetricsDoclet extends Standard {
/**
* Entry point called by the javadoc command line tool. Loops over all the
* classes identifying metrics classes and then produces some basic information
@@ -57,7 +54,7 @@ public class MetricsDoclet {
*/
public static boolean start(final RootDoc root) {
// Build a set of metrics classes sorted by name
- final SortedMap<String,ClassDoc> metricsClasses = new TreeMap<String,ClassDoc>();
+ final SortedMap<String, ClassDoc> metricsClasses = new TreeMap<String, ClassDoc>();
for (final ClassDoc doc : root.classes()) {
if (isMetricsClass(doc)) {
System.out.println("Processing " + doc.qualifiedTypeName());
@@ -76,9 +73,13 @@ public class MetricsDoclet {
out.println("<ol>");
for (final ClassDoc doc : metricsClasses.values()) {
out.println("<li><a href=\"#" + doc.name() + "\">" + doc.name() + "</a>: " +
- firstSentence(doc) + "</li>");
+ firstSentence(doc) + "</li>");
}
out.println("</ol>");
+ out.println("<p>Note: Metrics labeled as percentages (with 'percent' in the full metric name or 'PCT' " +
+ "in the name given in the output file) are actually expressed as fractions. For example, " +
+ "'PCT_TARGET_BASES_20X = 0.85' should be interpreted as '85 percent of targeted bases are " +
+ "covered to 20X coverage or more'.</p>");
out.println("</section>");
// Now print out each class
@@ -134,11 +135,10 @@ public class MetricsDoclet {
*/
protected static PrintStream getOutput(final RootDoc root) {
for (final String[] arg : root.options()) {
- if (arg[0].equals("-f") && arg.length == 2) {
+ if (arg[0].equals("-d") && arg.length == 2) {
try {
- return new PrintStream(new File(arg[1]));
- }
- catch (FileNotFoundException fnfe) {
+ return new PrintStream(new File(arg[1], "picard-metric-definitions.html"));
+ } catch (FileNotFoundException fnfe) {
root.printError("Could not open destination file: " + arg[1]);
fnfe.printStackTrace();
return null;
@@ -146,22 +146,11 @@ public class MetricsDoclet {
}
}
- root.printError("Destination file parameter -f not supplied.");
+ root.printError("Destination file parameter -d not supplied.");
return null;
}
/**
- * Required method by the javadoc caller that returns the expected number of elements
- * for doclet specific command line arguments.
- */
- public static int optionLength(final String option) {
- if(option.equals("-f")) {
- return 2;
- }
- return 0;
- }
-
- /**
* Takes a Doc object and uses the firstSentenceTags() to recreate the first sentence
* text.
*/
diff --git a/src/java/picard/util/QuerySortedReadPairIteratorUtil.java b/src/main/java/picard/util/QuerySortedReadPairIteratorUtil.java
similarity index 100%
rename from src/java/picard/util/QuerySortedReadPairIteratorUtil.java
rename to src/main/java/picard/util/QuerySortedReadPairIteratorUtil.java
diff --git a/src/java/picard/util/RExecutor.java b/src/main/java/picard/util/RExecutor.java
similarity index 100%
rename from src/java/picard/util/RExecutor.java
rename to src/main/java/picard/util/RExecutor.java
diff --git a/src/java/picard/util/ScatterIntervalsByNs.java b/src/main/java/picard/util/ScatterIntervalsByNs.java
similarity index 83%
rename from src/java/picard/util/ScatterIntervalsByNs.java
rename to src/main/java/picard/util/ScatterIntervalsByNs.java
index 588b545..e405925 100644
--- a/src/java/picard/util/ScatterIntervalsByNs.java
+++ b/src/main/java/picard/util/ScatterIntervalsByNs.java
@@ -1,5 +1,6 @@
package picard.util;
+import htsjdk.samtools.util.SequenceUtil;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.programgroups.Intervals;
@@ -18,7 +19,10 @@ import htsjdk.samtools.SAMSequenceRecord;
import htsjdk.samtools.util.StringUtil;
import java.io.File;
-import java.lang.Boolean;import java.lang.Override;import java.lang.String;import java.util.Collections;
+import java.lang.Boolean;
+import java.lang.Override;
+import java.lang.String;
+import java.util.Collections;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
@@ -38,11 +42,11 @@ import java.util.Set;
programGroup = Intervals.class
)
public class ScatterIntervalsByNs extends CommandLineProgram {
- static final String USAGE_SUMMARY = "Writes an interval list based on splitting the reference by Ns. ";
- static final String USAGE_DETAILS = "This tool identifies positions in the reference where the basecalls are Ns and writes out an " +
- "interval list using the resulting coordinates (excluding the N bases). This can be used to create an interval list for " +
+ static final String USAGE_SUMMARY = "Writes an interval list based on splitting a reference by Ns. ";
+ static final String USAGE_DETAILS = "This tool identifies positions in a reference where the bases are 'no-calls' and writes out an " +
+ "interval-list using the resulting coordinates. This can be used to create an interval list for " +
"whole genome sequence (WGS) for e.g. scatter-gather purposes, as an alternative to using fixed-length intervals. The number " +
- "of contiguous Ns that can be tolerated before creating a break is adjustable from the command line.<br />" +
+ "of contiguous nocalls that can be tolerated before creating a break is adjustable from the command line.<br />" +
"<h4>Usage example:</h4>" +
"<pre>" +
"java -jar picard.jar ScatterIntervalsByNs \\<br />" +
@@ -64,7 +68,7 @@ public class ScatterIntervalsByNs extends CommandLineProgram {
public int MAX_TO_MERGE = 1;
//not using an enum since Interval.name is a String, and am using that to define the type of the Interval
- static final String
+ private static final String
ACGTmer = "ACGTmer",
Nmer = "Nmer";
@@ -74,19 +78,19 @@ public class ScatterIntervalsByNs extends CommandLineProgram {
ACGT(ACGTmer),
BOTH(Nmer, ACGTmer);
- private final Set acceptedTypes;
+ private final Set<String> acceptedTypes;
public Boolean accepts(final String string) {return acceptedTypes.contains(string);}
OutputType(final String... strings) {
- acceptedTypes = new HashSet<String>();
+ acceptedTypes = new HashSet<>();
Collections.addAll(acceptedTypes, strings);
}
}
private static final Log log = Log.getInstance(ScatterIntervalsByNs.class);
- final ProgressLogger locusProgress = new ProgressLogger(log, (int) 1e7, "examined", "loci");
- final ProgressLogger intervalProgress = new ProgressLogger(log, (int) 10, "found", "intervals");
+ private static final ProgressLogger locusProgress = new ProgressLogger(log, (int) 1e7, "examined", "loci");
+ private static final ProgressLogger intervalProgress = new ProgressLogger(log, (int) 10, "found", "intervals");
public static void main(final String[] args) {
new ScatterIntervalsByNs().instanceMainWithExit(args);
@@ -111,11 +115,7 @@ public class ScatterIntervalsByNs extends CommandLineProgram {
final IntervalList outputIntervals = new IntervalList(intervals.getHeader().clone());
log.info(String.format("Collecting requested type of intervals (%s)", OUTPUT_TYPE));
- for (final Interval i : intervals.getIntervals()) {
- if (OUTPUT_TYPE.accepts(i.getName())) {
- outputIntervals.add(i);
- }
- }
+ intervals.getIntervals().stream().filter(i -> OUTPUT_TYPE.accepts(i.getName())).forEach(outputIntervals::add);
log.info("Writing Intervals.");
outputIntervals.write(OUTPUT);
@@ -130,8 +130,8 @@ public class ScatterIntervalsByNs extends CommandLineProgram {
* Generate an interval list that alternates between Ns and ACGTs *
* ****************************************************************
*/
- public static IntervalList segregateReference(final ReferenceSequenceFile refFile, final int maxNmerToMerge) {
- final List<Interval> preliminaryIntervals = new LinkedList<Interval>();
+ static IntervalList segregateReference(final ReferenceSequenceFile refFile, final int maxNmerToMerge) {
+ final List<Interval> preliminaryIntervals = new LinkedList<>();
final SAMFileHeader header = new SAMFileHeader();
header.setSequenceDictionary(refFile.getSequenceDictionary());
header.setSortOrder(SAMFileHeader.SortOrder.coordinate);
@@ -143,11 +143,12 @@ public class ScatterIntervalsByNs extends CommandLineProgram {
final byte[] bytes = ref.getBases();
StringUtil.toUpperCase(bytes);
- boolean nBlockIsOpen = (bytes[0] == 'N');
+ boolean nBlockIsOpen = SequenceUtil.isNoCall(bytes[0]);
int start = 0;
for (int i = 0; i < bytes.length; ++i) {
- final boolean currentBaseIsN = (bytes[i] == 'N');
+ locusProgress.record(rec.getSequenceName(), i);
+ final boolean currentBaseIsN = SequenceUtil.isNoCall(bytes[i]);
//create intervals when switching, i.e "nBlockIsOpen" disagrees with "currentBaseIsN"
if (nBlockIsOpen != currentBaseIsN) {
@@ -180,7 +181,7 @@ public class ScatterIntervalsByNs extends CommandLineProgram {
{
// create the new ACGTmer interval
final Interval temp = new Interval(
- preliminaryIntervals.get(0).getSequence(),
+ preliminaryIntervals.get(0).getContig(),
preliminaryIntervals.get(0).getStart(),
preliminaryIntervals.get(2).getEnd(), false, ACGTmer);
@@ -191,7 +192,9 @@ public class ScatterIntervalsByNs extends CommandLineProgram {
//and replace them with the newly created one
preliminaryIntervals.add(0, temp);
} else { //if cannot merge top three intervals, transfer the top intervals to finalIntervals
- finalIntervals.add(preliminaryIntervals.remove(0));
+ final Interval remove = preliminaryIntervals.remove(0);
+ finalIntervals.add(remove);
+ intervalProgress.record(remove.getContig(),remove.getStart());
}
}
return finalIntervals;
diff --git a/src/java/picard/util/TabbedInputParser.java b/src/main/java/picard/util/TabbedInputParser.java
similarity index 100%
rename from src/java/picard/util/TabbedInputParser.java
rename to src/main/java/picard/util/TabbedInputParser.java
diff --git a/src/java/picard/util/TabbedTextFileWithHeaderParser.java b/src/main/java/picard/util/TabbedTextFileWithHeaderParser.java
similarity index 95%
rename from src/java/picard/util/TabbedTextFileWithHeaderParser.java
rename to src/main/java/picard/util/TabbedTextFileWithHeaderParser.java
index 655077d..7d35d85 100644
--- a/src/java/picard/util/TabbedTextFileWithHeaderParser.java
+++ b/src/main/java/picard/util/TabbedTextFileWithHeaderParser.java
@@ -27,9 +27,11 @@ import htsjdk.samtools.util.CloseableIterator;
import picard.PicardException;
import java.io.File;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
@@ -147,6 +149,13 @@ public class TabbedTextFileWithHeaderParser implements Iterable<TabbedTextFileWi
}
/**
+ * @return The column labels for this file as a List, in no particular order.
+ */
+ public List<String> columnLabelsList() {
+ return Collections.unmodifiableList(new ArrayList<String>(columnLabelIndices.keySet()));
+ }
+
+ /**
* Creates the iterator object. It is illegal to have more than one iterator extant
* on the same parser object.
*/
diff --git a/src/java/picard/util/UnsignedTypeUtil.java b/src/main/java/picard/util/UnsignedTypeUtil.java
similarity index 100%
rename from src/java/picard/util/UnsignedTypeUtil.java
rename to src/main/java/picard/util/UnsignedTypeUtil.java
diff --git a/src/java/picard/util/VariantType.java b/src/main/java/picard/util/VariantType.java
similarity index 100%
rename from src/java/picard/util/VariantType.java
rename to src/main/java/picard/util/VariantType.java
diff --git a/src/java/picard/vcf/ByIntervalListVariantContextIterator.java b/src/main/java/picard/vcf/ByIntervalListVariantContextIterator.java
similarity index 100%
rename from src/java/picard/vcf/ByIntervalListVariantContextIterator.java
rename to src/main/java/picard/vcf/ByIntervalListVariantContextIterator.java
diff --git a/src/java/picard/vcf/CallingMetricAccumulator.java b/src/main/java/picard/vcf/CallingMetricAccumulator.java
similarity index 100%
rename from src/java/picard/vcf/CallingMetricAccumulator.java
rename to src/main/java/picard/vcf/CallingMetricAccumulator.java
diff --git a/src/java/picard/vcf/CollectVariantCallingMetrics.java b/src/main/java/picard/vcf/CollectVariantCallingMetrics.java
similarity index 99%
rename from src/java/picard/vcf/CollectVariantCallingMetrics.java
rename to src/main/java/picard/vcf/CollectVariantCallingMetrics.java
index 1a709f2..33e0b93 100644
--- a/src/java/picard/vcf/CollectVariantCallingMetrics.java
+++ b/src/main/java/picard/vcf/CollectVariantCallingMetrics.java
@@ -149,7 +149,7 @@ public class CollectVariantCallingMetrics extends CommandLineProgram {
/** The number of SNPs that are also filtered */
public long FILTERED_SNPS;
- /** The percentage of high confidence SNPs in dbSNP */
+ /** The fraction of high confidence SNPs in dbSNP */
public float PCT_DBSNP;
/** The Transition/Transversion ratio of the SNP calls made at dbSNP sites */
@@ -167,7 +167,7 @@ public class CollectVariantCallingMetrics extends CommandLineProgram {
/** The number of indels that are also filtered */
public long FILTERED_INDELS;
- /** The percentage of high confidence Indels in dbSNP */
+ /** The fraction of high confidence Indels in dbSNP */
public float PCT_DBSNP_INDELS;
/** The number of high confidence Indels found in dbSNP */
diff --git a/src/java/picard/vcf/GA4GHScheme.java b/src/main/java/picard/vcf/GA4GHScheme.java
similarity index 100%
rename from src/java/picard/vcf/GA4GHScheme.java
rename to src/main/java/picard/vcf/GA4GHScheme.java
diff --git a/src/java/picard/vcf/GA4GHSchemeWithMissingAsHomRef.java b/src/main/java/picard/vcf/GA4GHSchemeWithMissingAsHomRef.java
similarity index 100%
rename from src/java/picard/vcf/GA4GHSchemeWithMissingAsHomRef.java
rename to src/main/java/picard/vcf/GA4GHSchemeWithMissingAsHomRef.java
diff --git a/src/java/picard/vcf/GatherVcfs.java b/src/main/java/picard/vcf/GatherVcfs.java
similarity index 96%
rename from src/java/picard/vcf/GatherVcfs.java
rename to src/main/java/picard/vcf/GatherVcfs.java
index 0334a40..1f6a4e2 100644
--- a/src/java/picard/vcf/GatherVcfs.java
+++ b/src/main/java/picard/vcf/GatherVcfs.java
@@ -124,11 +124,9 @@ public class GatherVcfs extends CommandLineProgram {
final CloseableIterator<VariantContext> variantIterator = in.iterator();
if (variantIterator.hasNext()) {
final VariantContext currentContext = variantIterator.next();
- if (lastContext != null) {
- if (comparator.compare(lastContext, currentContext) >= 0) {
- throw new IllegalArgumentException("First record in file " + f.getAbsolutePath() + " is not after first record in " +
- "previous file " + lastFile.getAbsolutePath());
- }
+ if (lastContext != null && comparator.compare(lastContext, currentContext) >= 0) {
+ throw new IllegalArgumentException("First record in file " + f.getAbsolutePath() + " is not after first record in " +
+ "previous file " + lastFile.getAbsolutePath());
}
lastContext = currentContext;
@@ -212,7 +210,7 @@ public class GatherVcfs extends CommandLineProgram {
final BlockCompressedInputStream blockIn = new BlockCompressedInputStream(in, false);
boolean lastByteNewline = true;
- while (in.available() > 0) {
+ while (blockIn.available() > 0) {
// Read a block - blockIn.available() is guaranteed to return the bytes remaining in the block that has been
// read, and since we haven't consumed any yet, that is the block size.
final int blockLength = blockIn.available();
diff --git a/src/java/picard/vcf/GenotypeConcordance.java b/src/main/java/picard/vcf/GenotypeConcordance.java
similarity index 80%
rename from src/java/picard/vcf/GenotypeConcordance.java
rename to src/main/java/picard/vcf/GenotypeConcordance.java
index 0b25b97..aa1eb85 100644
--- a/src/java/picard/vcf/GenotypeConcordance.java
+++ b/src/main/java/picard/vcf/GenotypeConcordance.java
@@ -28,13 +28,11 @@ import htsjdk.samtools.metrics.MetricsFile;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.samtools.util.Log;
-import htsjdk.samtools.util.PeekableIterator;
import htsjdk.samtools.util.ProgressLogger;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.tribble.Tribble;
import htsjdk.variant.variantcontext.Genotype;
import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.variantcontext.VariantContextComparator;
import htsjdk.variant.vcf.VCFFileReader;
import picard.PicardException;
import picard.cmdline.CommandLineProgram;
@@ -43,6 +41,7 @@ import picard.cmdline.Option;
import picard.cmdline.StandardOptionDefinitions;
import picard.cmdline.programgroups.VcfOrBcf;
import picard.vcf.GenotypeConcordanceStates.*;
+import picard.vcf.PairedVariantSubContextIterator.VcfTuple;
import java.io.File;
import java.util.ArrayList;
@@ -50,6 +49,7 @@ import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.Optional;
import static htsjdk.variant.variantcontext.VariantContext.Type.*;
@@ -192,7 +192,7 @@ public class GenotypeConcordance extends CommandLineProgram {
}
if (MISSING_SITES_HOM_REF) {
//If you are using this flag you must include a high confidence interval list where missing sites are hom_ref.
- if (!usingIntervals){
+ if (!usingIntervals) {
errors.add("You cannot use the MISSING_HOM option without also supplying an interval list over which missing " +
"sites are considered confident homozygous reference calls.");
}
@@ -286,69 +286,33 @@ public class GenotypeConcordance extends CommandLineProgram {
log.info("Starting iteration over variants.");
while (pairedIterator.hasNext()) {
- final VcTuple tuple = pairedIterator.next();
-
- final VariantContext.Type truthVariantContextType = tuple.truthVariantContext != null ? tuple.truthVariantContext.getType() : NO_VARIATION;
- final VariantContext.Type callVariantContextType = tuple.callVariantContext != null ? tuple.callVariantContext.getType() : NO_VARIATION;
-
- // A flag to keep track of whether we have been able to successfully classify the Truth/Call States.
- // Unclassified include MIXED/MNP/Symbolic...
- boolean stateClassified = false;
- final TruthAndCallStates truthAndCallStates = determineState(tuple.truthVariantContext, TRUTH_SAMPLE, tuple.callVariantContext, CALL_SAMPLE, MIN_GQ, MIN_DP);
- if (truthVariantContextType == SNP) {
- if ((callVariantContextType == SNP) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
- // Note. If truth is SNP and call is MIXED, the event will be logged in the indelCounter, with row = MIXED
- snpCounter.increment(truthAndCallStates);
- stateClassified = true;
- }
- }
- else if (truthVariantContextType == INDEL) {
- // Note. If truth is Indel and call is MIXED, the event will be logged in the indelCounter, with row = MIXED
- if ((callVariantContextType == INDEL) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
- indelCounter.increment(truthAndCallStates);
- stateClassified = true;
- }
- }
- else if (truthVariantContextType == MIXED) {
- // Note. If truth is MIXED and call is SNP, the event will be logged in the snpCounter, with column = MIXED
- if (callVariantContextType == SNP) {
- snpCounter.increment(truthAndCallStates);
- stateClassified = true;
- }
- // Note. If truth is MIXED and call is INDEL, the event will be logged in the snpCounter, with column = MIXED
- else if (callVariantContextType == INDEL) {
- indelCounter.increment(truthAndCallStates);
- stateClassified = true;
- }
- }
- else if (truthVariantContextType == NO_VARIATION) {
- if (callVariantContextType == SNP) {
- snpCounter.increment(truthAndCallStates);
- stateClassified = true;
- }
- else if (callVariantContextType == INDEL) {
- indelCounter.increment(truthAndCallStates);
- stateClassified = true;
- }
- }
+ final VcfTuple tuple = pairedIterator.next();
+ final VariantContext.Type truthVariantContextType = tuple.leftVariantContext.map(VariantContext::getType).orElse(NO_VARIATION);
+ final VariantContext.Type callVariantContextType = tuple.rightVariantContext.map(VariantContext::getType).orElse(NO_VARIATION);
+
+ final boolean stateClassified = classifyVariants(tuple.leftVariantContext, TRUTH_SAMPLE,
+ tuple.rightVariantContext, CALL_SAMPLE,
+ Optional.of(snpCounter), Optional.of(indelCounter),
+ MIN_GQ, MIN_DP);
+
if (!stateClassified) {
final String condition = truthVariantContextType + " " + callVariantContextType;
- Integer count = unClassifiedStatesMap.get(condition);
- if (count == null) count = 0;
- unClassifiedStatesMap.put(condition, ++count);
+ final Integer count = unClassifiedStatesMap.getOrDefault(condition, 0) + 1;
+ unClassifiedStatesMap.put(condition, count);
}
- final VariantContext variantContextForLogging = tuple.truthVariantContext != null ? tuple.truthVariantContext : tuple.callVariantContext;
+ //final VariantContext variantContextForLogging = tuple.leftVariantContext.orElseGet(tuple.rightVariantContext::get); // FIXME
+ final VariantContext variantContextForLogging = tuple.leftVariantContext.isPresent() ? tuple.leftVariantContext.get() : tuple.rightVariantContext.get();
progress.record(variantContextForLogging.getContig(), variantContextForLogging.getStart());
}
//snp counter add in X number of missing-missing hom ref's (truth and call state)
//missing missing is total interval size minus number of iterations in while loop
if (MISSING_SITES_HOM_REF) {
- //need to know size of intervals to add missing-missing sites for NIST schema.
- final long intervalBaseCount = intervals.getBaseCount();
- addMissingTruthAndMissingCallStates(snpCounter.getCounterSize(), intervalBaseCount, snpCounter);
- addMissingTruthAndMissingCallStates(indelCounter.getCounterSize(), intervalBaseCount, indelCounter);
+ // need to know size of region called over (intervals or whole genome) to add missing-missing sites for NIST schema.
+ final long baseCount = (intervals != null) ? intervals.getBaseCount() : truthReader.getFileHeader().getSequenceDictionary().getReferenceLength();
+ addMissingTruthAndMissingCallStates(snpCounter.getCounterSize(), baseCount, snpCounter);
+ addMissingTruthAndMissingCallStates(indelCounter.getCounterSize(), baseCount, indelCounter);
}
// Calculate and store the summary-level metrics
@@ -361,8 +325,8 @@ public class GenotypeConcordance extends CommandLineProgram {
// Calculate and store the detailed metrics for both SNP and indels
final MetricsFile<GenotypeConcordanceDetailMetrics,?> genotypeConcordanceDetailMetrics = getMetricsFile();
- outputDetailMetricsFile(SNP, genotypeConcordanceDetailMetrics, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE);
- outputDetailMetricsFile(INDEL, genotypeConcordanceDetailMetrics, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE);
+ outputDetailMetricsFile(SNP, genotypeConcordanceDetailMetrics, snpCounter, TRUTH_SAMPLE, CALL_SAMPLE, MISSING_SITES_HOM_REF, OUTPUT_ALL_ROWS);
+ outputDetailMetricsFile(INDEL, genotypeConcordanceDetailMetrics, indelCounter, TRUTH_SAMPLE, CALL_SAMPLE, MISSING_SITES_HOM_REF, OUTPUT_ALL_ROWS);
genotypeConcordanceDetailMetrics.write(detailedMetricsFile);
// Calculate and score the contingency metrics
@@ -380,10 +344,86 @@ public class GenotypeConcordance extends CommandLineProgram {
return 0;
}
+ public static boolean classifyVariants(final Optional<VariantContext> truthContext,
+ final String truthSample,
+ final Optional<VariantContext> callContext,
+ final String callSample,
+ final int minGq, final int minDp) {
+ return classifyVariants(truthContext, truthSample, callContext, callSample, Optional.empty(), Optional.empty(), minGq, minDp);
+ }
+
+ /**
+ * Attempts to determine the concordance state given the truth and all variant context and optionally increments the genotype concordance
+ * count for the given variant type (SNP or INDEL). This will ignore cases where an indel was found in the truth and a SNP was found in
+ * the call, and vice versa. We typically fail to classify Mixed, Symbolic variants, or MNPs.
+ *
+ * @param truthContext A variant context representing truth
+ * @param truthSample The name of the truth sample
+ * @param callContext A variant context representing the call
+ * @param callSample The name of the call sample
+ * @param snpCounter optionally a place to increment the counts for SNP truth/call states
+ * @param indelCounter optionally a place to increment the counts for INDEL truth/call states
+ * @param minGq Threshold for filtering by genotype attribute GQ
+ * @param minDp Threshold for filtering by genotype attribute DP
+ * @return true if the concordance state could be classified.
+ */
+ public static boolean classifyVariants(final Optional<VariantContext> truthContext,
+ final String truthSample,
+ final Optional<VariantContext> callContext,
+ final String callSample,
+ final Optional<GenotypeConcordanceCounts> snpCounter,
+ final Optional<GenotypeConcordanceCounts> indelCounter,
+ final int minGq, final int minDp) {
+ final VariantContext.Type truthVariantContextType = truthContext.map(VariantContext::getType).orElse(NO_VARIATION);
+ final VariantContext.Type callVariantContextType = callContext.map(VariantContext::getType).orElse(NO_VARIATION);
+
+ // A flag to keep track of whether we have been able to successfully classify the Truth/Call States.
+ // Unclassified include MIXED/MNP/Symbolic...
+ final TruthAndCallStates truthAndCallStates = determineState(truthContext.orElse(null), truthSample, callContext.orElse(null), callSample, minGq, minDp);
+ if (truthVariantContextType == SNP) {
+ if ((callVariantContextType == SNP) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
+ // Note. If truth is SNP and call is MIXED, the event will be logged in the snpCounter, with row = MIXED
+ snpCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
+ return true;
+ }
+ }
+ else if (truthVariantContextType == INDEL) {
+ // Note. If truth is Indel and call is MIXED, the event will be logged in the indelCounter, with row = MIXED
+ if ((callVariantContextType == INDEL) || (callVariantContextType == MIXED) || (callVariantContextType == NO_VARIATION)) {
+ indelCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
+ return true;
+ }
+ }
+ else if (truthVariantContextType == MIXED) {
+ // Note. If truth is MIXED and call is SNP, the event will be logged in the snpCounter, with column = MIXED
+ if (callVariantContextType == SNP) {
+ snpCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
+ return true;
+ }
+ // Note. If truth is MIXED and call is INDEL, the event will be logged in the indelCounter, with column = MIXED
+ else if (callVariantContextType == INDEL) {
+ indelCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
+ return true;
+ }
+ }
+ else if (truthVariantContextType == NO_VARIATION) {
+ if (callVariantContextType == SNP) {
+ snpCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
+ return true;
+ }
+ else if (callVariantContextType == INDEL) {
+ indelCounter.ifPresent(counter -> counter.increment(truthAndCallStates));
+ return true;
+ }
+ }
+ return false;
+ }
+
+
/**
* Method to add missing sites that are KNOWN to be HOM_REF in the case of the NIST truth data set.
*/
- private void addMissingTruthAndMissingCallStates(final double numVariants, final long intervalBaseCount, final GenotypeConcordanceCounts counter){
+ public static void addMissingTruthAndMissingCallStates(final double numVariants, final long intervalBaseCount, final GenotypeConcordanceCounts counter) {
final double countMissingMissing = intervalBaseCount-numVariants;
final TruthAndCallStates missingMissing = new TruthAndCallStates(TruthState.MISSING, CallState.MISSING);
counter.increment(missingMissing, countMissingMissing);
@@ -392,16 +432,17 @@ public class GenotypeConcordance extends CommandLineProgram {
/**
* Outputs the detailed statistics tables for SNP and Indel match categories.
**/
- private void outputDetailMetricsFile(final VariantContext.Type variantType, final MetricsFile<GenotypeConcordanceDetailMetrics,?> genotypeConcordanceDetailMetricsFile,
- final GenotypeConcordanceCounts counter, final String truthSampleName, final String callSampleName) {
+ public static void outputDetailMetricsFile(final VariantContext.Type variantType, final MetricsFile<GenotypeConcordanceDetailMetrics,?> genotypeConcordanceDetailMetricsFile,
+ final GenotypeConcordanceCounts counter, final String truthSampleName, final String callSampleName,
+ final boolean missingSitesHomRef, final boolean outputAllRows) {
final GenotypeConcordanceSchemeFactory schemeFactory = new GenotypeConcordanceSchemeFactory();
- final GenotypeConcordanceScheme scheme = schemeFactory.getScheme(MISSING_SITES_HOM_REF);
+ final GenotypeConcordanceScheme scheme = schemeFactory.getScheme(missingSitesHomRef);
scheme.validateScheme();
for (final TruthState truthState : TruthState.values()) {
for (final CallState callState : CallState.values()) {
final long count = counter.getCount(truthState, callState);
final String contingencyValues = scheme.getContingencyStateString(truthState, callState);
- if (count > 0 || OUTPUT_ALL_ROWS) {
+ if (count > 0 || outputAllRows) {
final GenotypeConcordanceDetailMetrics detailMetrics = new GenotypeConcordanceDetailMetrics();
detailMetrics.VARIANT_TYPE = variantType;
detailMetrics.TRUTH_SAMPLE = truthSampleName;
@@ -433,7 +474,7 @@ public class GenotypeConcordance extends CommandLineProgram {
* @param minDp Threshold for filtering by genotype attribute DP
* @return TruthAndCallStates object containing the TruthState and CallState determined here.
*/
- final TruthAndCallStates determineState(final VariantContext truthContext, final String truthSample, final VariantContext callContext, final String callSample, final int minGq, final int minDp) {
+ final public static TruthAndCallStates determineState(final VariantContext truthContext, final String truthSample, final VariantContext callContext, final String callSample, final int minGq, final int minDp) {
TruthState truthState = null;
CallState callState = null;
@@ -596,7 +637,7 @@ public class GenotypeConcordance extends CommandLineProgram {
return new TruthAndCallStates(truthState, callState);
}
- final String getStringSuffix(final String longerString, final String shorterString, final String errorMsg) {
+ final static String getStringSuffix(final String longerString, final String shorterString, final String errorMsg) {
// Truth reference is shorter than call reference
if (!longerString.startsWith(shorterString)) {
throw new IllegalStateException(errorMsg);
@@ -623,73 +664,6 @@ class OrderedSet<T> extends ArrayList<T> {
}
}
-/** Little class to hold a pair of VariantContexts that are in sync with one another. */
-class VcTuple {
- public final VariantContext truthVariantContext;
- public final VariantContext callVariantContext;
-
- VcTuple(final VariantContext truthVariantContext, final VariantContext callVariantContext) {
- this.truthVariantContext = truthVariantContext;
- this.callVariantContext = callVariantContext;
- }
-}
-
-/** Iterator that takes a pair of iterators over VariantContexts and iterates over them in tandem. */
-class PairedVariantSubContextIterator implements Iterator<VcTuple> {
- private final PeekableIterator<VariantContext> truthIterator;
- private final String truthSample;
- private final PeekableIterator<VariantContext> callIterator;
- private final String callSample;
- private final VariantContextComparator comparator;
-
- PairedVariantSubContextIterator(final Iterator<VariantContext> truthIterator, final String truthSample,
- final Iterator<VariantContext> callIterator, final String callSample,
- final SAMSequenceDictionary dict) {
- this.truthIterator = new PeekableIterator<VariantContext>(truthIterator);
- this.truthSample = truthSample;
- this.callIterator = new PeekableIterator<VariantContext>(callIterator);
- this.callSample = callSample;
- this.comparator = new VariantContextComparator(dict);
- }
-
- @Override
- public boolean hasNext() {
- return this.truthIterator.hasNext() || this.callIterator.hasNext();
- }
-
- @Override
- public VcTuple next() {
- if (!hasNext()) throw new IllegalStateException("next() called while hasNext() is false.");
-
- final VariantContext truthVariantContext = this.truthIterator.hasNext() ? this.truthIterator.peek() : null;
- final VariantContext callVariantContext = this.callIterator.hasNext() ? this.callIterator.peek() : null;
-
- // If one or the other is null because there is no next, just return a one-sided tuple
- if (truthVariantContext == null) {
- return new VcTuple(null, this.callIterator.next().subContextFromSample(callSample));
- }
- else if (callVariantContext == null) {
- return new VcTuple(this.truthIterator.next().subContextFromSample(truthSample), null);
- }
-
- // Otherwise check the ordering and do the right thing
- final int ordering = this.comparator.compare(truthVariantContext, callVariantContext);
- if (ordering == 0) {
- return new VcTuple(this.truthIterator.next().subContextFromSample(truthSample), this.callIterator.next().subContextFromSample(callSample));
- }
- else if (ordering < 0) {
- return new VcTuple(this.truthIterator.next().subContextFromSample(truthSample), null);
- }
- else {
- return new VcTuple(null, this.callIterator.next().subContextFromSample(callSample));
- }
- }
-
- @Override public void remove() {
- throw new UnsupportedOperationException();
- }
-}
-
diff --git a/src/java/picard/vcf/GenotypeConcordanceContingencyMetrics.java b/src/main/java/picard/vcf/GenotypeConcordanceContingencyMetrics.java
similarity index 93%
rename from src/java/picard/vcf/GenotypeConcordanceContingencyMetrics.java
rename to src/main/java/picard/vcf/GenotypeConcordanceContingencyMetrics.java
index 6290b24..7c67680 100644
--- a/src/java/picard/vcf/GenotypeConcordanceContingencyMetrics.java
+++ b/src/main/java/picard/vcf/GenotypeConcordanceContingencyMetrics.java
@@ -16,7 +16,7 @@ public class GenotypeConcordanceContingencyMetrics extends MetricBase {
public GenotypeConcordanceContingencyMetrics() {
}
- GenotypeConcordanceContingencyMetrics(final VariantContext.Type variantType, final GenotypeConcordanceCounts concordanceCounts,
+ public GenotypeConcordanceContingencyMetrics(final VariantContext.Type variantType, final GenotypeConcordanceCounts concordanceCounts,
final String truthSample, final String callSample, final boolean missingSitesFlag) {
this.VARIANT_TYPE = variantType;
this.TRUTH_SAMPLE = truthSample;
diff --git a/src/java/picard/vcf/GenotypeConcordanceCounts.java b/src/main/java/picard/vcf/GenotypeConcordanceCounts.java
similarity index 98%
rename from src/java/picard/vcf/GenotypeConcordanceCounts.java
rename to src/main/java/picard/vcf/GenotypeConcordanceCounts.java
index 18d169f..cef0c36 100644
--- a/src/java/picard/vcf/GenotypeConcordanceCounts.java
+++ b/src/main/java/picard/vcf/GenotypeConcordanceCounts.java
@@ -2,6 +2,7 @@ package picard.vcf;
import java.util.Arrays;
import java.util.Collections;
+import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -231,7 +232,7 @@ public class GenotypeConcordanceCounts {
* Returns the count defined by the truth state set and call state set.
*/
public long getCount(final TruthAndCallStates truthAndCallStates) {
- final Histogram<TruthAndCallStates>.Bin bin = this.counter.get(truthAndCallStates);
+ final Histogram.Bin<TruthAndCallStates> bin = this.counter.get(truthAndCallStates);
return (bin == null ? 0L : (long) bin.getValue());
}
@@ -282,7 +283,7 @@ public class GenotypeConcordanceCounts {
public Map<ContingencyState, Long> getContingencyStateCounts(final GenotypeConcordanceScheme scheme) {
scheme.validateScheme();
- final Map<ContingencyState, Long> counts = new HashMap<ContingencyState, Long>();
+ final Map<ContingencyState, Long> counts = new EnumMap<ContingencyState, Long>(ContingencyState.class);
for (final ContingencyState contingencyState : ContingencyState.values()) {
counts.put(contingencyState, 0L);
}
diff --git a/src/java/picard/vcf/GenotypeConcordanceDetailMetrics.java b/src/main/java/picard/vcf/GenotypeConcordanceDetailMetrics.java
similarity index 100%
rename from src/java/picard/vcf/GenotypeConcordanceDetailMetrics.java
rename to src/main/java/picard/vcf/GenotypeConcordanceDetailMetrics.java
diff --git a/src/java/picard/vcf/GenotypeConcordanceScheme.java b/src/main/java/picard/vcf/GenotypeConcordanceScheme.java
similarity index 100%
rename from src/java/picard/vcf/GenotypeConcordanceScheme.java
rename to src/main/java/picard/vcf/GenotypeConcordanceScheme.java
diff --git a/src/java/picard/vcf/GenotypeConcordanceSchemeFactory.java b/src/main/java/picard/vcf/GenotypeConcordanceSchemeFactory.java
similarity index 100%
rename from src/java/picard/vcf/GenotypeConcordanceSchemeFactory.java
rename to src/main/java/picard/vcf/GenotypeConcordanceSchemeFactory.java
diff --git a/src/java/picard/vcf/GenotypeConcordanceStateCodes.java b/src/main/java/picard/vcf/GenotypeConcordanceStateCodes.java
similarity index 100%
rename from src/java/picard/vcf/GenotypeConcordanceStateCodes.java
rename to src/main/java/picard/vcf/GenotypeConcordanceStateCodes.java
diff --git a/src/java/picard/vcf/GenotypeConcordanceStates.java b/src/main/java/picard/vcf/GenotypeConcordanceStates.java
similarity index 97%
rename from src/java/picard/vcf/GenotypeConcordanceStates.java
rename to src/main/java/picard/vcf/GenotypeConcordanceStates.java
index 56a212c..eaa804f 100644
--- a/src/java/picard/vcf/GenotypeConcordanceStates.java
+++ b/src/main/java/picard/vcf/GenotypeConcordanceStates.java
@@ -67,7 +67,7 @@ public class GenotypeConcordanceStates {
* a reference sequence.
* The Enum constants must be in the same order as the truth state to allow for comparison.
*/
- enum CallState {
+ public enum CallState {
MISSING (MISSING_CODE.ordinal()),
HOM_REF (HOM_REF_CODE.ordinal()), // ref/ref, valid for all TruthStates
HET_REF_VAR1 (HET_REF_VAR1_CODE.ordinal()), // ref/var1, valid for all TruthStates
@@ -141,7 +141,7 @@ public class GenotypeConcordanceStates {
* NA denotes an invalid state that should not be reachable by the code.
* EMPTY denotes that no conclusion could be drawn from the data.
*/
- enum ContingencyState {
+ public enum ContingencyState {
TP,
FP,
TN,
@@ -153,7 +153,7 @@ public class GenotypeConcordanceStates {
/**
* A minute class to store the truth and call state respectively.
*/
- static class TruthAndCallStates implements Comparable<TruthAndCallStates>{
+ public static class TruthAndCallStates implements Comparable<TruthAndCallStates>{
public final TruthState truthState;
public final CallState callState;
diff --git a/src/java/picard/vcf/GenotypeConcordanceSummaryMetrics.java b/src/main/java/picard/vcf/GenotypeConcordanceSummaryMetrics.java
similarity index 96%
rename from src/java/picard/vcf/GenotypeConcordanceSummaryMetrics.java
rename to src/main/java/picard/vcf/GenotypeConcordanceSummaryMetrics.java
index f58b2ae..5dea725 100644
--- a/src/java/picard/vcf/GenotypeConcordanceSummaryMetrics.java
+++ b/src/main/java/picard/vcf/GenotypeConcordanceSummaryMetrics.java
@@ -15,7 +15,7 @@ public class GenotypeConcordanceSummaryMetrics extends MetricBase {
public GenotypeConcordanceSummaryMetrics() {
}
- GenotypeConcordanceSummaryMetrics(final VariantContext.Type variantType, final GenotypeConcordanceCounts concordanceCounts,
+ public GenotypeConcordanceSummaryMetrics(final VariantContext.Type variantType, final GenotypeConcordanceCounts concordanceCounts,
final String truthSample, final String callSample, final boolean missingSitesFlag) {
this.VARIANT_TYPE = variantType;
this.TRUTH_SAMPLE = truthSample;
diff --git a/src/java/picard/vcf/GvcfMetricAccumulator.java b/src/main/java/picard/vcf/GvcfMetricAccumulator.java
similarity index 100%
rename from src/java/picard/vcf/GvcfMetricAccumulator.java
rename to src/main/java/picard/vcf/GvcfMetricAccumulator.java
diff --git a/src/java/picard/vcf/LiftoverVcf.java b/src/main/java/picard/vcf/LiftoverVcf.java
similarity index 85%
rename from src/java/picard/vcf/LiftoverVcf.java
rename to src/main/java/picard/vcf/LiftoverVcf.java
index 1faa79f..4be6ab7 100644
--- a/src/java/picard/vcf/LiftoverVcf.java
+++ b/src/main/java/picard/vcf/LiftoverVcf.java
@@ -21,6 +21,8 @@ import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
import htsjdk.variant.vcf.VCFFileReader;
import htsjdk.variant.vcf.VCFFilterHeaderLine;
import htsjdk.variant.vcf.VCFHeader;
+import htsjdk.variant.vcf.VCFHeaderLineType;
+import htsjdk.variant.vcf.VCFInfoHeaderLine;
import htsjdk.variant.vcf.VCFRecordCodec;
import picard.cmdline.CommandLineProgram;
import picard.cmdline.CommandLineProgramProperties;
@@ -89,6 +91,16 @@ public class LiftoverVcf extends CommandLineProgram {
@Option(shortName = "WMC", doc = "Warn on missing contig.", optional = true)
public boolean WARN_ON_MISSING_CONTIG = false;
+ // Option on whether or not to write the original contig/position of the variant to the INFO field
+ @Option(doc = "Write the original contig/position for lifted variants to the INFO field.", optional = true)
+ public boolean WRITE_ORIGINAL_POSITION = false;
+
+ @Option(doc = "The minimum percent match required for a variant to be lifted.", optional = true)
+ public double LIFTOVER_MIN_MATCH = 1.0;
+
+ @Option(doc = "Allow INFO and FORMAT in the records that are not found in the header", optional = true)
+ public boolean ALLOW_MISSING_FIELDS_IN_HEADER = false;
+
// When a contig used in the chain is not in the reference, exit with this value instead of 0.
protected static int EXIT_CODE_WHEN_CONTIG_NOT_IN_REFERENCE = 1;
@@ -108,6 +120,18 @@ public class LiftoverVcf extends CommandLineProgram {
new VCFFilterHeaderLine(FILTER_MISMATCHING_REF_ALLELE, "Reference allele does not match reference genome sequence after liftover.")
);
+ /** Attribute used to store the name of the source contig/chromosome prior to liftover. */
+ public static final String ORIGINAL_CONTIG = "OriginalContig";
+
+ /** Attribute used to store the position of the variant on the source contig prior to liftover. */
+ public static final String ORIGINAL_START = "OriginalStart";
+
+ /** Metadata to be added to the Passing file. */
+ private static final List<VCFInfoHeaderLine> ATTRS = CollectionUtil.makeList(
+ new VCFInfoHeaderLine(ORIGINAL_CONTIG, 1, VCFHeaderLineType.String, "The name of the source contig/chromosome prior to liftover."),
+ new VCFInfoHeaderLine(ORIGINAL_START, 1, VCFHeaderLineType.String, "The position of the variant on the source contig prior to liftover.")
+ );
+
private final Log log = Log.getInstance(LiftoverVcf.class);
// Stock main method
@@ -143,13 +167,22 @@ public class LiftoverVcf extends CommandLineProgram {
final VCFHeader inHeader = in.getFileHeader();
final VCFHeader outHeader = new VCFHeader(inHeader);
outHeader.setSequenceDictionary(walker.getSequenceDictionary());
+ if (WRITE_ORIGINAL_POSITION) {
+ for (final VCFInfoHeaderLine line : ATTRS) outHeader.addMetaDataLine(line);
+ }
final VariantContextWriter out = new VariantContextWriterBuilder().setOption(Options.INDEX_ON_THE_FLY)
+ .modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, ALLOW_MISSING_FIELDS_IN_HEADER)
.setOutputFile(OUTPUT).setReferenceDictionary(walker.getSequenceDictionary()).build();
out.writeHeader(outHeader);
- final VariantContextWriter rejects = new VariantContextWriterBuilder().setOutputFile(REJECT).unsetOption(Options.INDEX_ON_THE_FLY).build();
+ final VariantContextWriter rejects = new VariantContextWriterBuilder().setOutputFile(REJECT).unsetOption(Options.INDEX_ON_THE_FLY)
+ .modifyOption(Options.ALLOW_MISSING_FIELDS_IN_HEADER, ALLOW_MISSING_FIELDS_IN_HEADER)
+ .build();
final VCFHeader rejectHeader = new VCFHeader(in.getFileHeader());
for (final VCFFilterHeaderLine line : FILTERS) rejectHeader.addMetaDataLine(line);
+ if (WRITE_ORIGINAL_POSITION) {
+ for (final VCFInfoHeaderLine line : ATTRS) rejectHeader.addMetaDataLine(line);
+ }
rejects.writeHeader(rejectHeader);
@@ -161,7 +194,7 @@ public class LiftoverVcf extends CommandLineProgram {
log.info("Lifting variants over and sorting.");
final SortingCollection<VariantContext> sorter = SortingCollection.newInstance(VariantContext.class,
- new VCFRecordCodec(outHeader, VALIDATION_STRINGENCY != ValidationStringency.STRICT),
+ new VCFRecordCodec(outHeader, ALLOW_MISSING_FIELDS_IN_HEADER || VALIDATION_STRINGENCY != ValidationStringency.STRICT),
outHeader.getVCFRecordComparator(),
MAX_RECORDS_IN_RAM,
TMP_DIR);
@@ -173,14 +206,14 @@ public class LiftoverVcf extends CommandLineProgram {
for (final VariantContext ctx : in) {
++total;
final Interval source = new Interval(ctx.getContig(), ctx.getStart(), ctx.getEnd(), false, ctx.getContig() + ":" + ctx.getStart() + "-" + ctx.getEnd());
- final Interval target = liftOver.liftOver(source, 1.0);
+ final Interval target = liftOver.liftOver(source, LIFTOVER_MIN_MATCH);
// if the target is null OR (the target is reverse complemented AND the variant is an indel or mixed), then we cannot lift it over
if (target == null || (target.isNegativeStrand() && (ctx.isMixed() || ctx.isIndel()))) {
final String reason = (target == null) ? FILTER_NO_TARGET : FILTER_CANNOT_LIFTOVER_INDEL;
rejects.add(new VariantContextBuilder(ctx).filter(reason).make());
failedLiftover++;
- } else if (!refSeqs.containsValue(target.getContig())) {
+ } else if (!refSeqs.containsKey(target.getContig())) {
rejects.add(new VariantContextBuilder(ctx).filter(FILTER_NO_TARGET).make());
failedLiftover++;
@@ -217,6 +250,11 @@ public class LiftoverVcf extends CommandLineProgram {
builder.id(ctx.getID());
builder.attributes(ctx.getAttributes());
+
+ if (WRITE_ORIGINAL_POSITION) {
+ builder.attribute(ORIGINAL_CONTIG, source.getContig());
+ builder.attribute(ORIGINAL_START, source.getStart());
+ }
builder.genotypes(fixGenotypes(ctx.getGenotypes(), reverseComplementAlleleMap));
builder.filters(ctx.getFilters());
builder.log10PError(ctx.getLog10PError());
@@ -292,4 +330,4 @@ public class LiftoverVcf extends CommandLineProgram {
}
return fixedGenotypes;
}
-}
\ No newline at end of file
+}
diff --git a/src/java/picard/vcf/MakeSitesOnlyVcf.java b/src/main/java/picard/vcf/MakeSitesOnlyVcf.java
similarity index 100%
rename from src/java/picard/vcf/MakeSitesOnlyVcf.java
rename to src/main/java/picard/vcf/MakeSitesOnlyVcf.java
diff --git a/src/java/picard/vcf/MergeVcfs.java b/src/main/java/picard/vcf/MergeVcfs.java
similarity index 98%
rename from src/java/picard/vcf/MergeVcfs.java
rename to src/main/java/picard/vcf/MergeVcfs.java
index 7e5ec7c..f8294bf 100644
--- a/src/java/picard/vcf/MergeVcfs.java
+++ b/src/main/java/picard/vcf/MergeVcfs.java
@@ -138,8 +138,7 @@ public class MergeVcfs extends CommandLineProgram {
final VariantContextWriterBuilder builder = new VariantContextWriterBuilder()
.setOutputFile(OUTPUT)
- .setReferenceDictionary(sequenceDictionary)
- .clearOptions();
+ .setReferenceDictionary(sequenceDictionary);
if (CREATE_INDEX) {
builder.setOption(Options.INDEX_ON_THE_FLY);
}
diff --git a/src/main/java/picard/vcf/PairedVariantSubContextIterator.java b/src/main/java/picard/vcf/PairedVariantSubContextIterator.java
new file mode 100644
index 0000000..57b9291
--- /dev/null
+++ b/src/main/java/picard/vcf/PairedVariantSubContextIterator.java
@@ -0,0 +1,119 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2015-2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package picard.vcf;
+
+import htsjdk.samtools.SAMSequenceDictionary;
+import htsjdk.samtools.util.PeekableIterator;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextComparator;
+
+import java.util.Iterator;
+import java.util.Optional;
+
+/**
+ * An iterator that takes a pair of iterators over VariantContexts and iterates over them in tandem.
+ *
+ * A tuple will be returned with variant contexts for both contexts if present. Otherwise, the missing
+ * context at that site will be empty. The contexts will be returned in coordinate order.
+ *
+ * */
+public class PairedVariantSubContextIterator implements Iterator<PairedVariantSubContextIterator.VcfTuple> {
+ private final PeekableIterator<VariantContext> leftIterator;
+ private final String leftSample;
+ private final PeekableIterator<VariantContext> rightIterator;
+ private final String rightSample;
+ private final VariantContextComparator comparator;
+
+ public PairedVariantSubContextIterator(final Iterator<VariantContext> leftIterator, final String leftSample,
+ final Iterator<VariantContext> rightIterator, final String rightSample,
+ final SAMSequenceDictionary dict) {
+ this.leftIterator = new PeekableIterator<>(leftIterator);
+ this.leftSample = leftSample;
+ this.rightIterator = new PeekableIterator<>(rightIterator);
+ this.rightSample = rightSample;
+ this.comparator = new VariantContextComparator(dict);
+ }
+
+ @Override
+ public boolean hasNext() {
+ return this.leftIterator.hasNext() || this.rightIterator.hasNext();
+ }
+
+ @Override
+ public VcfTuple next() {
+ if (!hasNext()) throw new IllegalStateException("next() called while hasNext() is false.");
+
+ final Optional<VariantContext> leftVariantContext = this.leftIterator.hasNext() ? Optional.of(this.leftIterator.peek()) : Optional.empty();
+ final Optional<VariantContext> rightVariantContext = this.rightIterator.hasNext() ? Optional.of(this.rightIterator.peek()) : Optional.empty();
+
+ // If one or the other is missing because there is no next, just return a one-sided tuple
+ if (!leftVariantContext.isPresent() && !rightVariantContext.isPresent()) {
+ throw new IllegalStateException("BUG: Both contexts empty.");
+ }
+ else if (!leftVariantContext.isPresent()) {
+ return new VcfTuple(Optional.empty(), this.rightIterator.next().subContextFromSample(rightSample));
+ }
+ else if (!rightVariantContext.isPresent()) {
+ return new VcfTuple(this.leftIterator.next().subContextFromSample(leftSample), Optional.empty());
+ }
+ else { // Otherwise check the ordering and do the right thing
+ final int ordering = this.comparator.compare(leftVariantContext.get(), rightVariantContext.get());
+ if (ordering == 0) {
+ return new VcfTuple(this.leftIterator.next().subContextFromSample(leftSample), this.rightIterator.next().subContextFromSample(rightSample));
+ } else if (ordering < 0) {
+ return new VcfTuple(this.leftIterator.next().subContextFromSample(leftSample), Optional.empty());
+ } else {
+ return new VcfTuple(Optional.empty(), this.rightIterator.next().subContextFromSample(rightSample));
+ }
+ }
+ }
+
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+ /** Little class to hold a pair of VariantContexts that are in sync with one another. */
+ public static class VcfTuple {
+ public final Optional<VariantContext> leftVariantContext;
+ public final Optional<VariantContext> rightVariantContext;
+
+ private VcfTuple(final Optional<VariantContext> leftVariantContext, final Optional<VariantContext> rightVariantContext) {
+ this.leftVariantContext = leftVariantContext;
+ this.rightVariantContext = rightVariantContext;
+ }
+
+ VcfTuple(final VariantContext leftVariantContext, final VariantContext rightVariantContext) {
+ this(Optional.of(leftVariantContext), Optional.of(rightVariantContext));
+ }
+
+ VcfTuple(final Optional<VariantContext> leftVariantContext, final VariantContext rightVariantContext) {
+ this(leftVariantContext, Optional.of(rightVariantContext));
+ }
+
+ VcfTuple(final VariantContext leftVariantContext, final Optional<VariantContext> rightVariantContext) {
+ this(Optional.of(leftVariantContext), rightVariantContext);
+ }
+ }
+}
diff --git a/src/java/picard/vcf/RenameSampleInVcf.java b/src/main/java/picard/vcf/RenameSampleInVcf.java
similarity index 100%
rename from src/java/picard/vcf/RenameSampleInVcf.java
rename to src/main/java/picard/vcf/RenameSampleInVcf.java
diff --git a/src/java/picard/vcf/SortVcf.java b/src/main/java/picard/vcf/SortVcf.java
similarity index 100%
rename from src/java/picard/vcf/SortVcf.java
rename to src/main/java/picard/vcf/SortVcf.java
diff --git a/src/java/picard/vcf/SplitVcfs.java b/src/main/java/picard/vcf/SplitVcfs.java
similarity index 100%
rename from src/java/picard/vcf/SplitVcfs.java
rename to src/main/java/picard/vcf/SplitVcfs.java
diff --git a/src/java/picard/vcf/UpdateVcfSequenceDictionary.java b/src/main/java/picard/vcf/UpdateVcfSequenceDictionary.java
similarity index 100%
rename from src/java/picard/vcf/UpdateVcfSequenceDictionary.java
rename to src/main/java/picard/vcf/UpdateVcfSequenceDictionary.java
diff --git a/src/java/picard/vcf/VcfFormatConverter.java b/src/main/java/picard/vcf/VcfFormatConverter.java
similarity index 100%
rename from src/java/picard/vcf/VcfFormatConverter.java
rename to src/main/java/picard/vcf/VcfFormatConverter.java
diff --git a/src/java/picard/vcf/VcfToIntervalList.java b/src/main/java/picard/vcf/VcfToIntervalList.java
similarity index 100%
rename from src/java/picard/vcf/VcfToIntervalList.java
rename to src/main/java/picard/vcf/VcfToIntervalList.java
diff --git a/src/java/picard/vcf/filter/AlleleBalanceFilter.java b/src/main/java/picard/vcf/filter/AlleleBalanceFilter.java
similarity index 100%
rename from src/java/picard/vcf/filter/AlleleBalanceFilter.java
rename to src/main/java/picard/vcf/filter/AlleleBalanceFilter.java
diff --git a/src/java/picard/vcf/filter/DepthFilter.java b/src/main/java/picard/vcf/filter/DepthFilter.java
similarity index 100%
rename from src/java/picard/vcf/filter/DepthFilter.java
rename to src/main/java/picard/vcf/filter/DepthFilter.java
diff --git a/src/java/picard/vcf/filter/FilterApplyingVariantIterator.java b/src/main/java/picard/vcf/filter/FilterApplyingVariantIterator.java
similarity index 100%
rename from src/java/picard/vcf/filter/FilterApplyingVariantIterator.java
rename to src/main/java/picard/vcf/filter/FilterApplyingVariantIterator.java
diff --git a/src/java/picard/vcf/filter/FilterVcf.java b/src/main/java/picard/vcf/filter/FilterVcf.java
similarity index 100%
rename from src/java/picard/vcf/filter/FilterVcf.java
rename to src/main/java/picard/vcf/filter/FilterVcf.java
diff --git a/src/java/picard/vcf/filter/FisherStrandFilter.java b/src/main/java/picard/vcf/filter/FisherStrandFilter.java
similarity index 100%
rename from src/java/picard/vcf/filter/FisherStrandFilter.java
rename to src/main/java/picard/vcf/filter/FisherStrandFilter.java
diff --git a/src/java/picard/vcf/filter/GenotypeFilter.java b/src/main/java/picard/vcf/filter/GenotypeFilter.java
similarity index 100%
rename from src/java/picard/vcf/filter/GenotypeFilter.java
rename to src/main/java/picard/vcf/filter/GenotypeFilter.java
diff --git a/src/java/picard/vcf/filter/GenotypeQualityFilter.java b/src/main/java/picard/vcf/filter/GenotypeQualityFilter.java
similarity index 100%
rename from src/java/picard/vcf/filter/GenotypeQualityFilter.java
rename to src/main/java/picard/vcf/filter/GenotypeQualityFilter.java
diff --git a/src/java/picard/vcf/filter/QdFilter.java b/src/main/java/picard/vcf/filter/QdFilter.java
similarity index 100%
rename from src/java/picard/vcf/filter/QdFilter.java
rename to src/main/java/picard/vcf/filter/QdFilter.java
diff --git a/src/java/picard/vcf/filter/VariantFilter.java b/src/main/java/picard/vcf/filter/VariantFilter.java
similarity index 100%
rename from src/java/picard/vcf/filter/VariantFilter.java
rename to src/main/java/picard/vcf/filter/VariantFilter.java
diff --git a/src/java/picard/vcf/processor/VariantAccumulatorExecutor.java b/src/main/java/picard/vcf/processor/VariantAccumulatorExecutor.java
similarity index 100%
rename from src/java/picard/vcf/processor/VariantAccumulatorExecutor.java
rename to src/main/java/picard/vcf/processor/VariantAccumulatorExecutor.java
diff --git a/src/java/picard/vcf/processor/VariantIteratorProducer.java b/src/main/java/picard/vcf/processor/VariantIteratorProducer.java
similarity index 100%
rename from src/java/picard/vcf/processor/VariantIteratorProducer.java
rename to src/main/java/picard/vcf/processor/VariantIteratorProducer.java
diff --git a/src/java/picard/vcf/processor/VariantProcessor.java b/src/main/java/picard/vcf/processor/VariantProcessor.java
similarity index 100%
rename from src/java/picard/vcf/processor/VariantProcessor.java
rename to src/main/java/picard/vcf/processor/VariantProcessor.java
diff --git a/src/java/picard/vcf/processor/VcfFileSegment.java b/src/main/java/picard/vcf/processor/VcfFileSegment.java
similarity index 100%
rename from src/java/picard/vcf/processor/VcfFileSegment.java
rename to src/main/java/picard/vcf/processor/VcfFileSegment.java
diff --git a/src/java/picard/vcf/processor/VcfFileSegmentGenerator.java b/src/main/java/picard/vcf/processor/VcfFileSegmentGenerator.java
similarity index 100%
rename from src/java/picard/vcf/processor/VcfFileSegmentGenerator.java
rename to src/main/java/picard/vcf/processor/VcfFileSegmentGenerator.java
diff --git a/src/java/picard/vcf/processor/util/PredicateFilterDecoratingClosableIterator.java b/src/main/java/picard/vcf/processor/util/PredicateFilterDecoratingClosableIterator.java
similarity index 100%
rename from src/java/picard/vcf/processor/util/PredicateFilterDecoratingClosableIterator.java
rename to src/main/java/picard/vcf/processor/util/PredicateFilterDecoratingClosableIterator.java
diff --git a/src/scripts/picard/analysis/baseDistributionByCycle.R b/src/main/resources/picard/analysis/baseDistributionByCycle.R
similarity index 100%
rename from src/scripts/picard/analysis/baseDistributionByCycle.R
rename to src/main/resources/picard/analysis/baseDistributionByCycle.R
diff --git a/src/scripts/picard/analysis/gcBias.R b/src/main/resources/picard/analysis/gcBias.R
similarity index 100%
rename from src/scripts/picard/analysis/gcBias.R
rename to src/main/resources/picard/analysis/gcBias.R
diff --git a/src/scripts/picard/analysis/insertSizeHistogram.R b/src/main/resources/picard/analysis/insertSizeHistogram.R
similarity index 100%
rename from src/scripts/picard/analysis/insertSizeHistogram.R
rename to src/main/resources/picard/analysis/insertSizeHistogram.R
diff --git a/src/scripts/picard/analysis/meanQualityByCycle.R b/src/main/resources/picard/analysis/meanQualityByCycle.R
similarity index 100%
rename from src/scripts/picard/analysis/meanQualityByCycle.R
rename to src/main/resources/picard/analysis/meanQualityByCycle.R
diff --git a/src/scripts/picard/analysis/qualityScoreDistribution.R b/src/main/resources/picard/analysis/qualityScoreDistribution.R
similarity index 100%
rename from src/scripts/picard/analysis/qualityScoreDistribution.R
rename to src/main/resources/picard/analysis/qualityScoreDistribution.R
diff --git a/src/scripts/picard/analysis/rnaSeqCoverage.R b/src/main/resources/picard/analysis/rnaSeqCoverage.R
similarity index 100%
rename from src/scripts/picard/analysis/rnaSeqCoverage.R
rename to src/main/resources/picard/analysis/rnaSeqCoverage.R
diff --git a/src/scripts/picard/analysis/rrbsQc.R b/src/main/resources/picard/analysis/rrbsQc.R
similarity index 100%
rename from src/scripts/picard/analysis/rrbsQc.R
rename to src/main/resources/picard/analysis/rrbsQc.R
diff --git a/src/main/resources/picard/analysis/wgsHistogram.R b/src/main/resources/picard/analysis/wgsHistogram.R
new file mode 100644
index 0000000..6a05076
--- /dev/null
+++ b/src/main/resources/picard/analysis/wgsHistogram.R
@@ -0,0 +1,121 @@
+##
+# Copyright (c) 2016, Nils Homer
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice,
+# this list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+# this list of conditions and the following disclaimer in the documentation
+# and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+# POSSIBILITY OF SUCH DAMAGE.
+##
+
+# Script to generate a chart for the fraction of bases at a given mean fold coverage.
+# @author Nils Homer
+
+# Parse the arguments
+args <- commandArgs(trailing=T)
+metricsFile <- args[1]
+outputFile <- args[2]
+bamFile <- ifelse(length(args) < 3, NA, args[3])
+subtitle <- ifelse(length(args) < 4, "", args[4])
+
+# Figure out where the metrics and the histogram are in the file and parse them out
+startFinder <- scan(metricsFile, what="character", sep="\n", quiet=TRUE, blank.lines.skip=FALSE)
+
+firstBlankLine=0
+
+for (i in 1:length(startFinder))
+{
+ if (startFinder[i] == "") {
+ if (firstBlankLine==0) {
+ firstBlankLine=i+1
+ } else {
+ secondBlankLine=i+1
+ break
+ }
+ }
+}
+
+metrics <- read.table(metricsFile, header=T, nrows=2, sep="\t", skip=firstBlankLine)
+histogram <- read.table(metricsFile, header=T, sep="\t", skip=secondBlankLine)
+
+coverages = rbind(histogram$coverage, histogram$coverage)
+counts = rbind(histogram$count_WHOLE_GENOME, histogram$count_NON_ZERO_REGIONS)
+labels = c("Whole Genome", "Non-Zero Regions")
+colors = c("blue", "green")
+
+ymins = c();
+ymaxs = c();
+percentOfMeans = c()
+percentCovereds = c();
+
+for (i in 1:2) {
+ coverage = coverages[i,];
+ count = counts[i,];
+
+ coverage = coverage[!is.na(count)];
+ count = count[!is.na(count)];
+
+ meanCoverage = metrics$MEAN_COVERAGE[i];
+ percentOfMean <- coverage / meanCoverage; # x-axis
+ percentCovered <- rep(0, length(count)); # y-axis
+
+ # must do a cumulative sume of percentCovered
+ totalCount = sum(as.numeric(count));
+ for (j in 1:length(percentCovered)) {
+ percentCovered[j] = 100.0# sum(as.numeric(count[j:length(percentCovered)])) / totalCount;
+ }
+
+ ymin = percentCovered[round(meanCoverage+1)]
+ ymax = min(100,max(percentCovered));
+
+ ymins = append(ymins, ymin);
+ ymaxs = append(ymaxs, ymax);
+ percentOfMeans = append(percentOfMeans, list(percentOfMean));
+ percentCovereds = append(percentCovereds, list(percentCovered));
+}
+
+ymin = min(ymins);
+ymax = max(ymaxs);
+
+# Then plot the histogram as a PDF
+pdf(outputFile);
+
+plot(x=c(0, 1.0),
+ y=c(ymin, ymax),
+ xlim=c(0, 1.0),
+ ylim=c(ymin, ymax),
+ type="n",
+ main=paste("WGS Base Coverage Plot", ifelse(is.na(bamFile),"",paste("\nin file",bamFile))," ",ifelse(subtitle == "","",paste("(",subtitle,")",sep="")),sep=""),
+ xlab="Fold Coverage of Mean",
+ ylab="% of Bases Covered");
+
+for (i in 1:2) {
+ label = labels[i];
+ color = colors[i]
+ percentOfMean = percentOfMeans[[i]];
+ percentCovered = percentCovereds[[i]];
+
+ lines(percentOfMean, percentCovered, col=color, lwd=5);
+}
+
+legend(x="topright", legend=labels, lwd=5, col=colors);
+
+dev.off()
+
diff --git a/src/scripts/picard/docker_helper.sh b/src/main/resources/picard/docker_helper.sh
similarity index 100%
rename from src/scripts/picard/docker_helper.sh
rename to src/main/resources/picard/docker_helper.sh
diff --git a/src/main/resources/release_picard.sh b/src/main/resources/release_picard.sh
new file mode 100755
index 0000000..78dabed
--- /dev/null
+++ b/src/main/resources/release_picard.sh
@@ -0,0 +1,290 @@
+#! /bin/bash
+
+# The MIT License
+#
+# Copyright (c) $today.year The Broad Institute
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+echo "This release script has been DEPRECATED. Please use the gradle build file for releases."
+#
+# PROGNAME=`basename $0`
+#
+# function usage () {
+# echo "USAGE: $PROGNAME <release-id>" >&2
+# echo "Tags Github Picard source, checks out and builds sources, uploads build results to Sourceforge.">&2
+# echo "-t <tmpdir> Build in <tmpdir>. Default: $TMPDIR." >&2
+# }
+#
+# function create_release () {
+# local token="$1";
+# local owner="$2";
+# local repo="$3";
+# local tag_name="$4";
+# local target_commitish="$5";
+# local name="$6";
+# local body="$7";
+# local draft="$8";
+# local prerelease="$9";
+#
+# local payload="\"tag_name\":\"$tag_name\"";
+# payload="$payload,\"target_commitish\":\"$target_commitish\"";
+# payload="$payload,\"name\":\"$name\"";
+# payload="$payload,\"body\":\"$body\"";
+# payload="$payload,\"draft\":$draft";
+# payload="$payload,\"prerelease\":$prerelease";
+# payload="{$payload}";
+#
+# RELEASE_RESPONSE=$(curl --fail -s -S -X POST \
+# https://api.github.com/repos/$owner/$repo/releases \
+# -A "create-release" \
+# -H "Accept: application/vnd.github.v3+json" \
+# -H "Content-Type: application/json" \
+# -H "Authorization: token $token" \
+# -d "$payload");
+#
+# # NB: we must set the RELEASE_GITHUB_ID as the ID in the returned json response
+# export RELEASE_GITHUB_ID=$(echo "$RELEASE_RESPONSE" | sed -e 's_",.*__g' -e 's_.*/__g')
+# }
+#
+# function upload_asset () {
+# local token="$1";
+# local owner="$2";
+# local repo="$3";
+# local name="$4";
+# local content_type="$5";
+# local file="$6";
+# local id="$7";
+#
+# curl --fail -s -S -X POST \
+# https://uploads.github.com/repos/$owner/$repo/releases/$id/assets?name=$name \
+# -A "upload-asset" \
+# -H "Accept: application/vnd.github.v3+json" \
+# -H "Content-Type: $content_type" \
+# -H "Authorization: token $token" \
+# --progress-bar \
+# --data-binary @"$file";
+# }
+#
+# function tag_exists() {
+# git tag | grep -q "$1$"
+# if test $? = 0
+# then return 0
+# else return 1
+# fi
+# }
+#
+# function remote_does_not_exist() {
+# git ls-remote $1 2>/dev/null 1>/dev/null
+# if test $? = 0
+# then return 1
+# else return 0
+# fi
+# }
+#
+# function remote_tag_does_not_exist() {
+# git ls-remote --tags $2 | grep -q "$1$";
+# if test $? = 0
+# then return 0
+# else return 1
+# fi
+# }
+#
+#
+# # This method called once for picard and once for htsjdk
+# function tag_it() {
+#
+# # tag must not exist
+# if tag_exists $RELEASE_ID
+# then echo "ERROR: Tag $RELEASE_ID locally already exists"
+# return 1
+# fi
+#
+# # remote must exist
+# if remote_does_not_exist $REMOTE
+# then echo "ERROR: Remote $REMOTE does not exist"
+# return 1
+# fi
+#
+# # tag at remote must not exist
+# if remote_tag_does_not_exist $RELEASE_ID $REMOTE
+# then echo "ERROR: Tag $RELEASE_ID at remote $REMOTE already exists"
+# return 1
+# fi
+#
+# # tag the branch locally then push to remote
+# echo Tagging master as $RELEASE_ID and pushing the tag to $REMOTE
+# # NB: we could use annotated tags in the future to store release notes, etc.
+# git tag $RELEASE_ID
+# git push $REMOTE $RELEASE_ID # TODO: should we check this return value in case someone made a tag since we last checked?
+# }
+#
+# set -e
+#
+# while getopts "ht:" options; do
+# case $options in
+# t ) TMPDIR=$OPTARG;;
+# h ) usage;;
+# \? ) usage
+# exit 1;;
+# * ) usage
+# exit 1;;
+# esac
+# done
+# shift $(($OPTIND - 1))
+#
+# if [ -z $GITHUB_USER_TOKEN ]
+# then echo "ERROR: environment variable GITHUB_USER_TOKEN must be set." >&2
+# usage
+# exit 1
+# fi
+#
+# if (( $# != 1 ))
+# then echo "ERROR: Incorrect number of arguments." >&2
+# usage
+# exit 1
+# fi
+#
+# if [[ x"$EDITOR" == x ]]
+# then echo "EDITOR environment variable must be set." >&2
+# exit 1
+# fi
+#
+# java_version=`java -version 2>&1 | fgrep -i version`
+#
+# PICARDGITROOT=git at github.com:broadinstitute/picard.git
+# REMOTE=origin
+# GHPAGES_BRANCH="gh-pages"
+#
+# RELEASE_ID=$1
+#
+# # Since releases are lexically sorted, need to filter in order to have 1.1xx be at the bottom.
+# PICARD_PREV_RELEASE_ID=`git ls-remote --tags | grep -v "{}$" | awk '{print $2}' | sed -e "s_.*/__g" | egrep '[.]\d\d\d' | tail -1`
+#
+# if [[ -e $TMPDIR/picard ]]
+# then echo "$TMPDIR/picard already exists. Please remove or specify a different TMPDIR." >&2
+# exit 1
+# fi
+# echo "Using TMPDIR: $TMPDIR";
+# cd $TMPDIR
+#
+# # clone
+# git clone $PICARDGITROOT picard
+# cd picard
+# ant clone-htsjdk
+# #by default clone will grab the latest release we need master
+# cd htsjdk
+# git checkout master
+# cd ..
+# ant clean # clean shouldn't be necessary, but no harm
+#
+# # Since releases are lexically sorted, need to filter in order to have 1.1xx be at the bottom.
+# PICARD_PREV_RELEASE_ID=`git ls-remote --tags | grep -v "{}$" | awk '{print $2}' | sed -e "s_.*/__g" | egrep '[.]\d\d\d' | tail -1`
+# HTSJDK_PREV_RELEASE_ID=$(cd htsjdk; git ls-remote --tags | grep -v "{}$" | awk '{print $2}' | sed -e "s_.*/__g" | egrep '[.]\d\d\d' | tail -1)
+#
+# # Tag in both repos
+# for sandbox in . htsjdk
+# do pushd $sandbox
+# tag_it || exit 1
+# popd
+# done
+#
+# ant -lib lib/ant test-htsjdk test
+#
+# ant -lib lib/ant clean all javadoc
+#
+# mkdir -p deploy/picard-tools/$RELEASE_ID
+#
+# git log --name-status ${PICARD_PREV_RELEASE_ID}..${RELEASE_ID} > deploy/picard-tools/$RELEASE_ID/README.txt
+#
+# (cd htsjdk; git log --name-status ${HTSJDK_PREV_RELEASE_ID}..${RELEASE_ID}) >> deploy/picard-tools/$RELEASE_ID/README.txt
+#
+# echo 'Edit release notes and exit editor when finished.'
+#
+# $EDITOR deploy/picard-tools/$RELEASE_ID/README.txt
+#
+# cp dist/picard-tools-$RELEASE_ID.zip deploy/picard-tools/$RELEASE_ID/
+#
+# # Make all files to be pushed to Sourceforge writable by group so that another Picard admin can overwrite them.
+#
+# chmod -R gu+rw javadoc deploy dist
+#
+# find javadoc deploy dist -type d -exec chmod g+s '{}' ';'
+#
+# # Move the javadoc directory to a temporary location
+# mv javadoc tmp_javadoc
+#
+# # Copy over javadoc for htsjdk since we are in the picard directory
+# # NB: need to move javadoc to a tmp directory since the javadoc
+# # directory in the gh-pages branch may already exist.
+# cd htsjdk
+# mkdir tmp_javadoc
+# cp -r ../tmp_javadoc/htsjdk tmp_javadoc/.
+# cd ../
+#
+# # Update the javadoc
+# for sandbox in . htsjdk
+# do pushd $sandbox
+# if [ "." == $sandbox ]; then
+# sandbox="picard";
+# fi
+# echo "Updating the javadoc for $sandbox"
+# # Checkout the gh-pages branch
+# git checkout -b $GHPAGES_BRANCH $REMOTE/$GHPAGES_BRANCH
+# # Copy over from the tmp javadoc directory
+# if [ ! -d javadoc ]; then
+# mkdir javadoc;
+# fi
+# rsync -avP --delete-after tmp_javadoc/* javadoc/.
+# # Remove the tmp directory as we no longer need it
+# rm -r tmp_javadoc
+# # Add the new javadoc files
+# find javadoc/$sandbox | xargs git add
+# # Commit!
+# git commit -a --allow-empty -m "Updating javadoc for release: $RELEASE_ID"
+# # NB: assumes the push will not fail
+# git push $REMOTE $GHPAGES_BRANCH
+# # Reset the repository to master
+# git checkout master
+# echo "Updated the javadoc for $sandbox"
+# popd
+# done
+#
+# # Publish a release and upload assets
+# echo "Creating a release on github for htsjdk and picard"
+# create_release $GITHUB_USER_TOKEN samtools htsjdk $RELEASE_ID "" $RELEASE_ID "Release $RELEASE_ID" "false" "false";
+# create_release $GITHUB_USER_TOKEN broadinstitute picard $RELEASE_ID "" $RELEASE_ID "Release $RELEASE_ID" "false" "false";
+# echo "Github release id: $RELEASE_GITHUB_ID"
+# echo "Updating the release zip and README.txt to github"
+# upload_asset $GITHUB_USER_TOKEN broadinstitute picard picard-tools-$RELEASE_ID.zip "application/zip" deploy/picard-tools/$RELEASE_ID/picard-tools-$RELEASE_ID.zip $RELEASE_GITHUB_ID;
+# upload_asset $GITHUB_USER_TOKEN broadinstitute picard README.txt "application/zip" deploy/picard-tools/$RELEASE_ID/README.txt $RELEASE_GITHUB_ID;
+#
+# # Update the website
+# echo "Updating the website"
+# # Assumes the gh-pages branch is already locally created
+# git checkout $GHPAGES_BRANCH;
+# cd dist/html
+# cp inc/*.html program_usage/*.html picard-metric-definitions.html ../../_includes/.
+# cd ../../
+# find _includes | xargs git add
+# git commit -a --allow-empty -m "Adding website files for $RELEASE_ID"
+# git push $REMOTE $GHPAGES_BRANCH
+#
+# # Move back to master just in case
+# git checkout master
+#
+# echo "Release was successful!"
diff --git a/src/scripts/release_picard.sh b/src/scripts/release_picard.sh
deleted file mode 100755
index 94654ec..0000000
--- a/src/scripts/release_picard.sh
+++ /dev/null
@@ -1,285 +0,0 @@
-#! /bin/bash
-
-# The MIT License
-#
-# Copyright (c) $today.year The Broad Institute
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-
-PROGNAME=`basename $0`
-
-function usage () {
- echo "USAGE: $PROGNAME <release-id>" >&2
- echo "Tags Github Picard source, checks out and builds sources, uploads build results to Sourceforge.">&2
- echo "-t <tmpdir> Build in <tmpdir>. Default: $TMPDIR." >&2
-}
-
-function create_release () {
- local token="$1";
- local owner="$2";
- local repo="$3";
- local tag_name="$4";
- local target_commitish="$5";
- local name="$6";
- local body="$7";
- local draft="$8";
- local prerelease="$9";
-
- local payload="\"tag_name\":\"$tag_name\"";
- payload="$payload,\"target_commitish\":\"$target_commitish\"";
- payload="$payload,\"name\":\"$name\"";
- payload="$payload,\"body\":\"$body\"";
- payload="$payload,\"draft\":$draft";
- payload="$payload,\"prerelease\":$prerelease";
- payload="{$payload}";
-
- RELEASE_RESPONSE=$(curl --fail -s -S -X POST \
- https://api.github.com/repos/$owner/$repo/releases \
- -A "create-release" \
- -H "Accept: application/vnd.github.v3+json" \
- -H "Content-Type: application/json" \
- -H "Authorization: token $token" \
- -d "$payload");
-
- # NB: we must set the RELEASE_GITHUB_ID as the ID in the returned json response
- export RELEASE_GITHUB_ID=$(echo "$RELEASE_RESPONSE" | sed -e 's_",.*__g' -e 's_.*/__g')
-}
-
-function upload_asset () {
- local token="$1";
- local owner="$2";
- local repo="$3";
- local name="$4";
- local content_type="$5";
- local file="$6";
- local id="$7";
-
- curl --fail -s -S -X POST \
- https://uploads.github.com/repos/$owner/$repo/releases/$id/assets?name=$name \
- -A "upload-asset" \
- -H "Accept: application/vnd.github.v3+json" \
- -H "Content-Type: $content_type" \
- -H "Authorization: token $token" \
- --progress-bar \
- --data-binary @"$file";
-}
-
-function tag_exists() {
- git tag | grep -q "$1$"
- if test $? = 0
- then return 0
- else return 1
- fi
-}
-
-function remote_does_not_exist() {
- git ls-remote $1 2>/dev/null 1>/dev/null
- if test $? = 0
- then return 1
- else return 0
- fi
-}
-
-function remote_tag_does_not_exist() {
- git ls-remote --tags $2 | grep -q "$1$";
- if test $? = 0
- then return 0
- else return 1
- fi
-}
-
-
-# This method called once for picard and once for htsjdk
-function tag_it() {
-
- # tag must not exist
- if tag_exists $RELEASE_ID
- then echo "ERROR: Tag $RELEASE_ID locally already exists"
- return 1
- fi
-
- # remote must exist
- if remote_does_not_exist $REMOTE
- then echo "ERROR: Remote $REMOTE does not exist"
- return 1
- fi
-
- # tag at remote must not exist
- if remote_tag_does_not_exist $RELEASE_ID $REMOTE
- then echo "ERROR: Tag $RELEASE_ID at remote $REMOTE already exists"
- return 1
- fi
-
- # tag the branch locally then push to remote
- echo Tagging master as $RELEASE_ID and pushing the tag to $REMOTE
- # NB: we could use annotated tags in the future to store release notes, etc.
- git tag $RELEASE_ID
- git push $REMOTE $RELEASE_ID # TODO: should we check this return value in case someone made a tag since we last checked?
-}
-
-set -e
-
-while getopts "ht:" options; do
- case $options in
- t ) TMPDIR=$OPTARG;;
- h ) usage;;
- \? ) usage
- exit 1;;
- * ) usage
- exit 1;;
- esac
-done
-shift $(($OPTIND - 1))
-
-if [ -z $GITHUB_USER_TOKEN ]
-then echo "ERROR: environment variable GITHUB_USER_TOKEN must be set." >&2
- usage
- exit 1
-fi
-
-if (( $# != 1 ))
-then echo "ERROR: Incorrect number of arguments." >&2
- usage
- exit 1
-fi
-
-if [[ x"$EDITOR" == x ]]
-then echo "EDITOR environment variable must be set." >&2
- exit 1
-fi
-
-java_version=`java -version 2>&1 | fgrep -i version`
-
-PICARDGITROOT=git at github.com:broadinstitute/picard.git
-REMOTE=origin
-GHPAGES_BRANCH="gh-pages"
-
-RELEASE_ID=$1
-
-# Since releases are lexically sorted, need to filter in order to have 1.1xx be at the bottom.
-PICARD_PREV_RELEASE_ID=`git ls-remote --tags | grep -v "{}$" | awk '{print $2}' | sed -e "s_.*/__g" | egrep '[.]\d\d\d' | tail -1`
-
-if [[ -e $TMPDIR/picard ]]
-then echo "$TMPDIR/picard already exists. Please remove or specify a different TMPDIR." >&2
- exit 1
-fi
-echo "Using TMPDIR: $TMPDIR";
-cd $TMPDIR
-
-# clone
-git clone $PICARDGITROOT picard
-cd picard
-ant clone-htsjdk
-ant clean # clean shouldn't be necessary, but no harm
-
-# Since releases are lexically sorted, need to filter in order to have 1.1xx be at the bottom.
-PICARD_PREV_RELEASE_ID=`git ls-remote --tags | grep -v "{}$" | awk '{print $2}' | sed -e "s_.*/__g" | egrep '[.]\d\d\d' | tail -1`
-HTSJDK_PREV_RELEASE_ID=$(cd htsjdk; git ls-remote --tags | grep -v "{}$" | awk '{print $2}' | sed -e "s_.*/__g" | egrep '[.]\d\d\d' | tail -1)
-
-# Tag in both repos
-for sandbox in . htsjdk
-do pushd $sandbox
- tag_it || exit 1
- popd
-done
-
-ant -lib lib/ant test-htsjdk test
-
-ant -lib lib/ant clean all javadoc
-
-mkdir -p deploy/picard-tools/$RELEASE_ID
-
-git log --name-status ${PICARD_PREV_RELEASE_ID}..${RELEASE_ID} > deploy/picard-tools/$RELEASE_ID/README.txt
-
-(cd htsjdk; git log --name-status ${HTSJDK_PREV_RELEASE_ID}..${RELEASE_ID}) >> deploy/picard-tools/$RELEASE_ID/README.txt
-
-echo 'Edit release notes and exit editor when finished.'
-
-$EDITOR deploy/picard-tools/$RELEASE_ID/README.txt
-
-cp dist/picard-tools-$RELEASE_ID.zip deploy/picard-tools/$RELEASE_ID/
-
-# Make all files to be pushed to Sourceforge writable by group so that another Picard admin can overwrite them.
-
-chmod -R gu+rw javadoc deploy dist
-
-find javadoc deploy dist -type d -exec chmod g+s '{}' ';'
-
-# Move the javadoc directory to a temporary location
-mv javadoc tmp_javadoc
-
-# Copy over javadoc for htsjdk since we are in the picard directory
-# NB: need to move javadoc to a tmp directory since the javadoc
-# directory in the gh-pages branch may already exist.
-cd htsjdk
-mkdir tmp_javadoc
-cp -r ../tmp_javadoc/htsjdk tmp_javadoc/.
-cd ../
-
-# Update the javadoc
-for sandbox in . htsjdk
-do pushd $sandbox
- if [ "." == $sandbox ]; then
- sandbox="picard";
- fi
- echo "Updating the javadoc for $sandbox"
- # Checkout the gh-pages branch
- git checkout -b $GHPAGES_BRANCH $REMOTE/$GHPAGES_BRANCH
- # Copy over from the tmp javadoc directory
- if [ ! -d javadoc ]; then
- mkdir javadoc;
- fi
- rsync -avP --delete-after tmp_javadoc/* javadoc/.
- # Remove the tmp directory as we no longer need it
- rm -r tmp_javadoc
- # Add the new javadoc files
- find javadoc/$sandbox | xargs git add
- # Commit!
- git commit -a --allow-empty -m "Updating javadoc for release: $RELEASE_ID"
- # NB: assumes the push will not fail
- git push $REMOTE $GHPAGES_BRANCH
- # Reset the repository to master
- git checkout master
- echo "Updated the javadoc for $sandbox"
- popd
-done
-
-# Publish a release and upload assets
-echo "Creating a release on github for htsjdk and picard"
-create_release $GITHUB_USER_TOKEN samtools htsjdk $RELEASE_ID "" $RELEASE_ID "Release $RELEASE_ID" "false" "false";
-create_release $GITHUB_USER_TOKEN broadinstitute picard $RELEASE_ID "" $RELEASE_ID "Release $RELEASE_ID" "false" "false";
-echo "Github release id: $RELEASE_GITHUB_ID"
-echo "Updating the release zip and README.txt to github"
-upload_asset $GITHUB_USER_TOKEN broadinstitute picard picard-tools-$RELEASE_ID.zip "application/zip" deploy/picard-tools/$RELEASE_ID/picard-tools-$RELEASE_ID.zip $RELEASE_GITHUB_ID;
-upload_asset $GITHUB_USER_TOKEN broadinstitute picard README.txt "application/zip" deploy/picard-tools/$RELEASE_ID/README.txt $RELEASE_GITHUB_ID;
-
-# Update the website
-echo "Updating the website"
-# Assumes the gh-pages branch is already locally created
-git checkout $GHPAGES_BRANCH;
-cd dist/html
-cp inc/*.html program_usage/*.html picard-metric-definitions.html ../../_includes/.
-cd ../../
-find _includes | xargs git add
-git commit -a --allow-empty -m "Adding website files for $RELEASE_ID"
-git push $REMOTE $GHPAGES_BRANCH
-
-# Move back to master just in case
-git checkout master
-
-echo "Release was successful!"
diff --git a/src/tests/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java b/src/test/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java
rename to src/test/java/picard/analysis/CollectAlignmentSummaryMetricsTest.java
diff --git a/src/tests/java/picard/analysis/CollectGcBiasMetricsTest.java b/src/test/java/picard/analysis/CollectGcBiasMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/analysis/CollectGcBiasMetricsTest.java
rename to src/test/java/picard/analysis/CollectGcBiasMetricsTest.java
diff --git a/src/tests/java/picard/analysis/CollectInsertSizeMetricsTest.java b/src/test/java/picard/analysis/CollectInsertSizeMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/analysis/CollectInsertSizeMetricsTest.java
rename to src/test/java/picard/analysis/CollectInsertSizeMetricsTest.java
diff --git a/src/tests/java/picard/analysis/CollectMultipleMetricsTest.java b/src/test/java/picard/analysis/CollectMultipleMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/analysis/CollectMultipleMetricsTest.java
rename to src/test/java/picard/analysis/CollectMultipleMetricsTest.java
diff --git a/src/tests/java/picard/analysis/CollectQualityYieldMetricsTest.java b/src/test/java/picard/analysis/CollectQualityYieldMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/analysis/CollectQualityYieldMetricsTest.java
rename to src/test/java/picard/analysis/CollectQualityYieldMetricsTest.java
diff --git a/src/tests/java/picard/analysis/CollectRnaSeqMetricsTest.java b/src/test/java/picard/analysis/CollectRnaSeqMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/analysis/CollectRnaSeqMetricsTest.java
rename to src/test/java/picard/analysis/CollectRnaSeqMetricsTest.java
diff --git a/src/tests/java/picard/analysis/CollectWgsMetricsFromQuerySortedTest.java b/src/test/java/picard/analysis/CollectWgsMetricsFromQuerySortedTest.java
similarity index 99%
rename from src/tests/java/picard/analysis/CollectWgsMetricsFromQuerySortedTest.java
rename to src/test/java/picard/analysis/CollectWgsMetricsFromQuerySortedTest.java
index d4ae1e4..1ad9cc7 100644
--- a/src/tests/java/picard/analysis/CollectWgsMetricsFromQuerySortedTest.java
+++ b/src/test/java/picard/analysis/CollectWgsMetricsFromQuerySortedTest.java
@@ -16,7 +16,7 @@ import java.util.List;
*
* @author Eric Banks
*/
-
+ at Deprecated
public class CollectWgsMetricsFromQuerySortedTest extends CommandLineProgramTest {
private static final File TEST_DATA_DIR = new File("testdata/picard/sam");
diff --git a/src/tests/java/picard/analysis/CollectWgsMetricsFromSampledSitesTest.java b/src/test/java/picard/analysis/CollectWgsMetricsFromSampledSitesTest.java
old mode 100755
new mode 100644
similarity index 60%
rename from src/tests/java/picard/analysis/CollectWgsMetricsFromSampledSitesTest.java
rename to src/test/java/picard/analysis/CollectWgsMetricsFromSampledSitesTest.java
index 2912bde..90a63e5
--- a/src/tests/java/picard/analysis/CollectWgsMetricsFromSampledSitesTest.java
+++ b/src/test/java/picard/analysis/CollectWgsMetricsFromSampledSitesTest.java
@@ -27,14 +27,20 @@ import htsjdk.samtools.metrics.MetricsFile;
import org.testng.Assert;
import org.testng.annotations.Test;
import picard.cmdline.CommandLineProgramTest;
+import picard.cmdline.PicardCommandLine;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
/**
* Tests CollectWgsMetricsFromSampledSites
*/
+ at Deprecated
public class CollectWgsMetricsFromSampledSitesTest extends CommandLineProgramTest {
private static final File TEST_DATA_DIR = new File("testdata/picard/sam/");
@@ -55,11 +61,12 @@ public class CollectWgsMetricsFromSampledSitesTest extends CommandLineProgramTes
"OUTPUT=" + outfile.getAbsolutePath(),
"REFERENCE_SEQUENCE=" + ref.getAbsolutePath(),
"INTERVALS=" + intervals.getAbsolutePath(),
+ "INCLUDE_BQ_HISTOGRAM=true",
"SAMPLE_SIZE=" + sampleSize
};
Assert.assertEquals(runPicardCommandLine(args), 0);
- final MetricsFile<CollectWgsMetricsFromSampledSites.SampledWgsMetrics, Comparable<?>> output = new MetricsFile<CollectWgsMetricsFromSampledSites.SampledWgsMetrics, Comparable<?>>();
+ final MetricsFile<CollectWgsMetricsFromSampledSites.SampledWgsMetrics, Comparable<?>> output = new MetricsFile<>();
output.read(new FileReader(outfile));
for (final CollectWgsMetrics.WgsMetrics metrics : output.getMetrics()) {
@@ -69,7 +76,7 @@ public class CollectWgsMetricsFromSampledSitesTest extends CommandLineProgramTes
Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.181818); // 2 of 11
Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.090909); // 1 of 9
Assert.assertEquals(metrics.PCT_EXC_BASEQ, 0.090909); // 1 of 9
- Assert.assertEquals(metrics.HET_SNP_SENSITIVITY, 0.34655, .02);
+ Assert.assertEquals(metrics.HET_SNP_SENSITIVITY, 0.4955, .02);
}
}
@@ -90,7 +97,7 @@ public class CollectWgsMetricsFromSampledSitesTest extends CommandLineProgramTes
};
Assert.assertEquals(runPicardCommandLine(args), 0);
- final MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>> output = new MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>>();
+ final MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>> output = new MetricsFile<>();
output.read(new FileReader(outfile));
for (final CollectWgsMetrics.WgsMetrics metrics : output.getMetrics()) {
@@ -101,4 +108,53 @@ public class CollectWgsMetricsFromSampledSitesTest extends CommandLineProgramTes
Assert.assertEquals(metrics.HET_SNP_SENSITIVITY, 0.393802, .02);
}
}
+
+ /*
+ * Tests the same inputs for CollectWgsMetrics vs CollectWgsMetricsFromSampledSites in order to make sure the results are different.
+ */
+ @Test
+ public void testLargeIntervals() throws IOException {
+ final File input = new File(TEST_DATA_DIR, "forMetrics.sam");
+ final File outfile = File.createTempFile("test", ".wgs_metrics");
+ final File ref = new File(TEST_DATA_DIR, "merger.fasta");
+ final File intervals = new File(TEST_DATA_DIR, "largeIntervals.interval_list");
+ final int sampleSize = 1000;
+ outfile.deleteOnExit();
+
+ final Map<String, String> args = new HashMap<>(5);
+ args.put("INPUT", "INPUT=" + input.getAbsolutePath());
+ args.put("REFERENCE_SEQUENCE", "REFERENCE_SEQUENCE=" + ref.getAbsolutePath());
+ args.put("INTERVALS", "INTERVALS=" + intervals.getAbsolutePath());
+ args.put("SAMPLE_SIZE", "SAMPLE_SIZE=" + sampleSize);
+
+ args.put("OUTPUT", "OUTPUT=" + outfile.getAbsolutePath());
+ Assert.assertEquals(runPicardCommandLine(args.values().toArray(new String[]{})), 0);
+
+ final MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>> output = new MetricsFile<>();
+ output.read(new FileReader(outfile));
+
+ final File collectWgsOutfile = File.createTempFile("collectWgsMetrics.test", ".wgs_metrics");
+ collectWgsOutfile.deleteOnExit();
+
+ /*
+ * Replace the output file in order to be able to run CollectWgsMetrics with a different output file but leave all
+ * other arguments the same.
+ */
+ args.put("OUTPUT", "OUTPUT=" + collectWgsOutfile.getAbsolutePath());
+
+ CollectWgsMetrics collectWgsMetrics = new CollectWgsMetrics();
+ collectWgsMetrics.instanceMain(args.values().toArray(new String[]{}));
+
+ final MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>> collectWgsMetricsOutput = new MetricsFile<>();
+ collectWgsMetricsOutput.read(new FileReader(collectWgsOutfile));
+
+ for (final CollectWgsMetrics.WgsMetrics metrics : output.getMetrics()) {
+ Assert.assertEquals(metrics.GENOME_TERRITORY, 404);
+ for (final CollectWgsMetrics.WgsMetrics collectWgsMetricsOut : collectWgsMetricsOutput.getMetrics()) {
+ Assert.assertNotEquals(metrics.PCT_EXC_MAPQ, collectWgsMetricsOut.PCT_EXC_MAPQ);
+ Assert.assertNotEquals(metrics.PCT_EXC_DUPE, collectWgsMetricsOut.PCT_EXC_DUPE);
+ Assert.assertNotEquals(metrics.PCT_EXC_UNPAIRED, collectWgsMetricsOut.PCT_EXC_UNPAIRED);
+ }
+ }
+ }
}
diff --git a/src/tests/java/picard/analysis/CollectWgsMetricsTest.java b/src/test/java/picard/analysis/CollectWgsMetricsTest.java
similarity index 58%
rename from src/tests/java/picard/analysis/CollectWgsMetricsTest.java
rename to src/test/java/picard/analysis/CollectWgsMetricsTest.java
index ccf444e..87d5f93 100644
--- a/src/tests/java/picard/analysis/CollectWgsMetricsTest.java
+++ b/src/test/java/picard/analysis/CollectWgsMetricsTest.java
@@ -8,6 +8,7 @@ import htsjdk.samtools.SAMReadGroupRecord;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordSetBuilder;
import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.Histogram;
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
import org.testng.Assert;
import org.testng.annotations.BeforeTest;
@@ -171,4 +172,119 @@ public class CollectWgsMetricsTest extends CommandLineProgramTest {
outfile = File.createTempFile("testWgsMetrics", ".txt");
outfile.deleteOnExit();
}
+
+ @Test
+ public void testLargeIntervals() throws IOException {
+ final File input = new File(TEST_DIR, "forMetrics.sam");
+ final File outfile = File.createTempFile("test", ".wgs_metrics");
+ final File ref = new File(TEST_DIR, "merger.fasta");
+ final File intervals = new File(TEST_DIR, "largeIntervals.interval_list");
+ final int sampleSize = 1000;
+ outfile.deleteOnExit();
+ final String[] args = new String[] {
+ "INPUT=" + input.getAbsolutePath(),
+ "OUTPUT=" + outfile.getAbsolutePath(),
+ "REFERENCE_SEQUENCE=" + ref.getAbsolutePath(),
+ "INTERVALS=" + intervals.getAbsolutePath(),
+ "SAMPLE_SIZE=" + sampleSize
+ };
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+
+ final MetricsFile<CollectWgsMetrics.WgsMetrics, Comparable<?>> output = new MetricsFile<>();
+ output.read(new FileReader(outfile));
+
+ for (final CollectWgsMetrics.WgsMetrics metrics : output.getMetrics()) {
+ Assert.assertEquals(metrics.GENOME_TERRITORY, 404);
+ Assert.assertEquals(metrics.PCT_EXC_MAPQ, 0.271403);
+ Assert.assertEquals(metrics.PCT_EXC_DUPE, 0.182149);
+ Assert.assertEquals(metrics.PCT_EXC_UNPAIRED, 0.091075);
+ }
+ }
+
+ @Test
+ public void testExclusions() throws IOException {
+ final File reference = new File("testdata/picard/sam/merger.fasta");
+ final File tempSamFile = File.createTempFile("CollectWgsMetrics", ".bam", TEST_DIR);
+ tempSamFile.deleteOnExit();
+
+ final SAMFileHeader header = new SAMFileHeader();
+
+ //Check that dictionary file is readable and then set header dictionary
+ try {
+ header.setSequenceDictionary(SAMSequenceDictionaryExtractor.extractDictionary(reference));
+ header.setSortOrder(SAMFileHeader.SortOrder.unsorted);
+ } catch (final SAMException e) {
+ e.printStackTrace();
+ }
+
+ //Set readGroupRecord
+ final SAMReadGroupRecord readGroupRecord = new SAMReadGroupRecord(READ_GROUP_ID);
+ readGroupRecord.setSample(SAMPLE);
+ readGroupRecord.setPlatform(PLATFORM);
+ readGroupRecord.setLibrary(LIBRARY);
+ readGroupRecord.setPlatformUnit(READ_GROUP_ID);
+ header.addReadGroup(readGroupRecord);
+
+ //Add to setBuilder
+ final SAMRecordSetBuilder setBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true, 100);
+ setBuilder.setReadGroup(readGroupRecord);
+ setBuilder.setUseNmFlag(true);
+ setBuilder.setHeader(header);
+
+ setBuilder.setReadLength(10);
+
+ int expectedSingltonCoverage = 0;
+
+ expectedSingltonCoverage += 13;
+ setBuilder.addPair("overlappingReads", 0, 2, 5, false, false, "10M", "10M", true, false, 30);
+
+ expectedSingltonCoverage += 2 * 5; // 5 bases for each mate are good (see AAA!!!AA!! below).
+ setBuilder.addPair("poorQualityReads", 1, 2, 20, false, false, "10M", "10M", true, false, -1);
+
+ for(int i = 1; i < 5; i++) {
+ setBuilder.addPair("deepStack-" + i, 2, 2, 20, false, false, "10M", "10M", true, false, 30);
+ }
+
+ // modify quality of reads
+ setBuilder.getRecords().stream()
+ .filter(samRecord -> samRecord.getReadName().equals("poorQualityReads"))
+ .forEach(record -> record.setBaseQualityString("AAA!!!AA!!"));
+
+ setBuilder.getSamReader();
+
+ // Write SAM file
+ final SAMFileWriter writer = new SAMFileWriterFactory()
+ .setCreateIndex(true).makeBAMWriter(header, false, tempSamFile);
+
+ for (final SAMRecord record : setBuilder) {
+ writer.addAlignment(record);
+ }
+ writer.close();
+
+ // create output files for tests
+ final File outfile = File.createTempFile("testWgsMetrics", ".txt");
+ outfile.deleteOnExit();
+
+ final String[] args = new String[] {
+ "INPUT=" + tempSamFile.getAbsolutePath(),
+ "OUTPUT=" + outfile.getAbsolutePath(),
+ "REFERENCE_SEQUENCE=" + reference.getAbsolutePath(),
+ "INCLUDE_BQ_HISTOGRAM=true",
+ "COVERAGE_CAP=3"
+ };
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+
+ final MetricsFile<CollectWgsMetrics.WgsMetrics, Integer> output = new MetricsFile<>();
+ output.read(new FileReader(outfile));
+ final CollectWgsMetrics.WgsMetrics metrics = output.getMetrics().get(0);
+
+ final Histogram<Integer> depthHistogram = output.getAllHistograms().get(0);
+ final Histogram<Integer> baseQHistogram = output.getAllHistograms().get(1);
+
+ Assert.assertEquals((long) depthHistogram.getSumOfValues(), metrics.GENOME_TERRITORY);
+ Assert.assertEquals(baseQHistogram.getSumOfValues(), depthHistogram.getSum());
+ Assert.assertEquals((long) depthHistogram.get(1).getValue(), expectedSingltonCoverage);
+ Assert.assertEquals((long) depthHistogram.get(3).getValue(), 2*10);
+
+ }
}
\ No newline at end of file
diff --git a/src/tests/java/picard/analysis/MultiLevelCollectorTest.java b/src/test/java/picard/analysis/MultiLevelCollectorTest.java
similarity index 100%
rename from src/tests/java/picard/analysis/MultiLevelCollectorTest.java
rename to src/test/java/picard/analysis/MultiLevelCollectorTest.java
diff --git a/src/tests/java/picard/analysis/TheoreticalSensitivityTest.java b/src/test/java/picard/analysis/TheoreticalSensitivityTest.java
similarity index 99%
rename from src/tests/java/picard/analysis/TheoreticalSensitivityTest.java
rename to src/test/java/picard/analysis/TheoreticalSensitivityTest.java
index 5f28768..4fa9dd2 100644
--- a/src/tests/java/picard/analysis/TheoreticalSensitivityTest.java
+++ b/src/test/java/picard/analysis/TheoreticalSensitivityTest.java
@@ -55,7 +55,7 @@ public class TheoreticalSensitivityTest {
for (int n = 0; n < 10; n++) Assert.assertEquals(deterministicWheel.draw(), 1);
//test the sums of this deterministic wheel: a sum of n 1's equals n
- final List<ArrayList<Integer>> deterministicSums = deterministicWheel.sampleCumulativeSums(10, 1);
+ final List<ArrayList<Integer>> deterministicSums = deterministicWheel.sampleCumulativeSums(10, 1, false);
for (int n = 0; n < 10; n++) Assert.assertEquals(deterministicSums.get(n).get(0), (Integer) n);
}
@@ -119,7 +119,7 @@ public class TheoreticalSensitivityTest {
final double sigma = Math.sqrt(numSummands) * sigmaSingleDraw;
//test the sums of this deterministic wheel: a sum of n 1's equals n
- final List<ArrayList<Integer>> sums = wheel.sampleCumulativeSums(numSummands, sampleSize);
+ final List<ArrayList<Integer>> sums = wheel.sampleCumulativeSums(numSummands, sampleSize, false);
//we only want the last set of sums, those with numSummands summands
sums.subList(0, sums.size() - 1).clear();
diff --git a/src/tests/java/picard/analysis/artifacts/CollectSequencingArtifactMetricsTest.java b/src/test/java/picard/analysis/artifacts/CollectSequencingArtifactMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/analysis/artifacts/CollectSequencingArtifactMetricsTest.java
rename to src/test/java/picard/analysis/artifacts/CollectSequencingArtifactMetricsTest.java
diff --git a/src/tests/java/picard/analysis/directed/CollectHsMetricsTest.java b/src/test/java/picard/analysis/directed/CollectHsMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/analysis/directed/CollectHsMetricsTest.java
rename to src/test/java/picard/analysis/directed/CollectHsMetricsTest.java
diff --git a/src/tests/java/picard/analysis/directed/CollectTargetedMetricsTest.java b/src/test/java/picard/analysis/directed/CollectTargetedMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/analysis/directed/CollectTargetedMetricsTest.java
rename to src/test/java/picard/analysis/directed/CollectTargetedMetricsTest.java
diff --git a/src/test/java/picard/analysis/replicates/CollectIndependentReplicatesMetricTest.java b/src/test/java/picard/analysis/replicates/CollectIndependentReplicatesMetricTest.java
new file mode 100644
index 0000000..94e2014
--- /dev/null
+++ b/src/test/java/picard/analysis/replicates/CollectIndependentReplicatesMetricTest.java
@@ -0,0 +1,222 @@
+package picard.analysis.replicates;
+
+import com.google.common.collect.ImmutableMap;
+import htsjdk.samtools.metrics.MetricsFile;
+import htsjdk.samtools.util.IOUtil;
+import htsjdk.samtools.util.TestUtil;
+import org.testng.Assert;
+import org.testng.annotations.AfterTest;
+import org.testng.annotations.BeforeTest;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import picard.sam.MergeSamFiles;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.lang.reflect.Field;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.testng.Assert.assertEquals;
+
+/**
+ * Created by farjoun on 6/24/15.
+ */
+public class CollectIndependentReplicatesMetricTest {
+ private final static File testdir = new File("testdata/picard/independent_replicates");
+ private final static File bamOutDir = IOUtil.createTempDir("convertSamToBam", "dir");
+
+ private final static Map<String, String> sams = new ImmutableMap.Builder<String, String>()
+ .put("twoPairs", "twopairs.sam")
+ .put("twoPairsWithUMIs", "twopairsWithUMIs.sam")
+ .put("twoPairsWithBadUMIs", "twopairsWithBadUMIs.sam")
+ .put("aTriple", "aTriple.sam")
+ .put("aTripleWithUMIs", "aTripleWithUMIs.sam")
+ .put("multipleContigs", "multipleContigs.sam")
+ .put("twopairsWithUMIsMultipleOrientations","twopairsWithUMIsMultipleOrientations.sam").build();
+
+ private final static Map<String, File> bams = new HashMap<>(sams.size());
+
+ @BeforeTest
+ public void prepareBams() throws IOException {
+
+ sams.keySet().stream().forEach(key -> {
+ try {
+ bams.put(key, convertSamToBam(sams.get(key)));
+ bams.get(key).deleteOnExit();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ });
+
+ bamOutDir.deleteOnExit();
+ }
+
+ @AfterTest
+ public void tearDown() {
+ TestUtil.recursiveDelete(bamOutDir);
+ }
+
+ @DataProvider(name = "simpleTests")
+ public Iterator<Object[]> simpleTestsData() {
+ final List<Object[]> tests = new ArrayList<>(3);
+ {
+ final Map<String, Object> map = new LinkedHashMap<>();
+
+ map.put("nSites", 3);
+ map.put("nDuplicateSets", 3);
+ map.put("nMismatchingUMIsInCoOrientedBiDups",2);
+ map.put("nMismatchingUMIsInContraOrientedBiDups",1);
+
+ tests.add(new Object[]{"multipleContigs.vcf", "twopairsWithUMIsMultipleOrientations", map});
+ }
+ {
+ final Map<String, Object> map = new LinkedHashMap<>();
+
+ map.put("nSites", 1);
+ map.put("nDuplicateSets", 1);
+ map.put("nDifferentAllelesTriDups", 0);
+ map.put("nDifferentAllelesBiDups", 1);
+ map.put("nReferenceAllelesBiDups", 0);
+ map.put("nAlternateAllelesBiDups", 0);
+ map.put("biSiteHeterogeneityRate", 1.0);
+ map.put("biSiteHomogeneityRate", 0.0);
+ map.put("nAlternateReads", 1);
+ map.put("nReferenceReads", 1);
+
+ tests.add(new Object[]{"hets.vcf", "twoPairs", map});
+ }
+ {// this tests the GQ cutoff
+ final Map<String, Object> map = new LinkedHashMap<>();
+
+ map.put("nDifferentAllelesBiDups", 1);
+ map.put("biSiteHeterogeneityRate", 1.0);
+ map.put("biSiteHomogeneityRate", 0.0);
+ map.put("nDifferentAllelesTriDups", 0);
+ map.put("nSites", 1);
+
+ tests.add(new Object[]{"twoSamplesHet.vcf", "twoPairs", map});
+ }
+ {
+ final Map<String, Object> map = new LinkedHashMap<>();
+
+ map.put("nExactlyTriple", 1);
+ map.put("nExactlyDouble", 1);
+ map.put("nDuplicateSets", 2);
+ map.put("nAlternateReads", 2);
+ map.put("nReferenceReads", 3);
+
+ tests.add(new Object[]{"hets.vcf", "aTriple", map});
+ }
+ {
+ final Map<String, Object> map = new HashMap<>();
+
+ map.put("nSites", 4);
+ map.put("nTotalReads", 20);
+ map.put("nDuplicateSets", 8);
+
+ tests.add(new Object[]{"multipleContigs.vcf", "multipleContigs", map});
+ }
+ {
+ final Map<String, Object> map = new LinkedHashMap<>();
+
+ map.put("nAlternateAllelesTriDups", 1);
+ map.put("nMismatchingAllelesBiDups", 0); //we remove sites that have mismatching alleles, so this should be zero.
+
+ tests.add(new Object[]{"hets_pos20.vcf", "aTriple", map});
+ }
+ tests.add(new Object[]{"hets_pos21_HOMREF_G.vcf", "aTriple", Collections.singletonMap("nReferenceAllelesTriDups", 1)});
+ tests.add(new Object[]{"hets_pos20.vcf", "twoPairs", Collections.singletonMap("nAlternateAllelesBiDups", 1)});
+ tests.add(new Object[]{"hets_pos21_HOMREF_G.vcf", "twoPairs", Collections.singletonMap("nReferenceAllelesBiDups", 1)});
+ {
+ final Map<String, Object> map = new LinkedHashMap<>();
+
+ map.put("nSites", 1);
+ map.put("nThreeAllelesSites", 1);
+ map.put("nAlternateAllelesTriDups", 0);
+ map.put("nMismatchingAllelesBiDups", 0); //we remove sites that have mismatching alleles, so this should be zero.
+
+ tests.add(new Object[]{"hets_pos22_IncorrectAlleles.vcf", "twoPairs", map});
+ }
+ //This tests the BQ cutoff
+ tests.add(new Object[]{"hets_pos22_IncorrectAlleles.vcf", "aTriple", Collections.singletonMap("nMismatchingAllelesTriDups", 0)});
+
+ {// tests for UMIs
+ final Map<String, Object> map = new LinkedHashMap<>();
+
+ map.put("nSites", 1);
+ map.put("nMismatchingUMIsInDiffBiDups", 1);
+ map.put("nMatchingUMIsInDiffBiDups", 0);
+ map.put("nMismatchingUMIsInSameBiDups", 0);
+ map.put("nMatchingUMIsInSameBiDups", 0);
+ map.put("nGoodBarcodes",1);
+ map.put("nBadBarcodes",0);
+
+
+ tests.add(new Object[]{"hets.vcf", "twoPairsWithUMIs", map});
+ }
+ {// tests for UMIs
+ final Map<String, Object> map = new LinkedHashMap<>();
+
+ map.put("nSites", 1);
+ map.put("nMismatchingUMIsInDiffBiDups", 0);
+ map.put("nMatchingUMIsInDiffBiDups", 0);
+ map.put("nMismatchingUMIsInSameBiDups", 0);
+ map.put("nMatchingUMIsInSameBiDups", 0);
+ map.put("nGoodBarcodes",0);
+ map.put("nBadBarcodes",1);
+
+ tests.add(new Object[]{"hets.vcf", "twoPairsWithBadUMIs", map});
+ }
+ return tests.iterator();
+ }
+
+ @Test(dataProvider = "simpleTests")
+ public void simpleTest(final String vcf, final String bam, final Map<String, Object> fieldValueMap) throws IOException, NoSuchFieldException, IllegalAccessException {
+
+ final CollectIndependentReplicateMetrics est = new CollectIndependentReplicateMetrics();
+ est.INPUT = bams.get(bam);
+ est.VCF = new File(testdir, vcf);
+ est.OUTPUT = IOUtil.newTempFile("singleHet", ".duplication_metric", new File[]{bamOutDir});
+ est.MATRIX_OUTPUT = IOUtil.newTempFile("singleHet", ".duplication_matrix", new File[]{bamOutDir});
+ est.SAMPLE = "SAMPLE1";
+
+ est.OUTPUT.deleteOnExit();
+ est.MATRIX_OUTPUT.deleteOnExit();
+
+ est.doWork();
+
+ final MetricsFile<IndependentReplicateMetric, Integer> retval = new MetricsFile<>();
+ retval.read(new FileReader(est.OUTPUT));
+
+ for (final Map.Entry<String, Object> fieldValue : fieldValueMap.entrySet()) {
+ final String field = fieldValue.getKey();
+ final Object expectedValue = fieldValue.getValue();
+ final Field o = IndependentReplicateMetric.class.getField(field);
+ assertEquals(o.get(retval.getMetrics().get(0)), expectedValue, field);
+ }
+ }
+
+ /**
+ * Converts a sam-file to a bam-file changing the extension from .sam to .bam
+ *
+ */
+ private static File convertSamToBam(final String sam) throws IOException {
+ final MergeSamFiles msf = new MergeSamFiles();
+ final File bam = new File(bamOutDir, sam.replaceAll("sam$", "bam"));
+ final int returnCode = msf.instanceMain(
+ new String[]{
+ "INPUT=" + (new File(testdir, sam).getAbsolutePath()),
+ "CREATE_INDEX=true",
+ "OUTPUT=" + bam.getAbsolutePath()});
+ Assert.assertEquals(returnCode, 0);
+
+ return bam;
+ }
+}
\ No newline at end of file
diff --git a/src/test/java/picard/analysis/replicates/MergeableMetricBaseTest.java b/src/test/java/picard/analysis/replicates/MergeableMetricBaseTest.java
new file mode 100644
index 0000000..bdcc8ac
--- /dev/null
+++ b/src/test/java/picard/analysis/replicates/MergeableMetricBaseTest.java
@@ -0,0 +1,161 @@
+package picard.analysis.replicates;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+public class MergeableMetricBaseTest {
+
+ class TestMergeableMetric extends MergeableMetricBase {
+ @MergeByAdding
+ Integer boxedInt = 1;
+ @MergeByAdding
+ int unboxedInt = 2;
+
+ @MergeByAdding
+ Double boxedDouble = 3D;
+ @MergeByAdding
+ double unboxedDouble = 4D;
+
+ @MergeByAdding
+ Long boxedLong = 5L;
+ @MergeByAdding
+ long unboxedLong = 6L;
+
+ @MergeByAdding
+ Float boxedFloat = 7F;
+ @MergeByAdding
+ float unboxedFloat = 8F;
+
+ @MergeByAdding
+ Short boxedShort = 9;
+ @MergeByAdding
+ short unboxedShort = 10;
+
+ @MergeByAdding
+ Byte boxedByte = 11;
+ @MergeByAdding
+ byte unboxedByte = 12;
+
+ @MergeByAssertEquals
+ String mustBeEqualString = "hello";
+
+ @MergeByAssertEquals
+ Double mustBeEqualDouble = 0.5;
+
+ @MergeByAssertEquals
+ boolean mustBeEqualUnboxedBoolean = false;
+
+ @NoMergingIsDerived
+ double ratioIntValues;
+
+ @Override
+ public void calculateDerivedFields() {
+ ratioIntValues = boxedInt / (double) unboxedInt;
+ }
+ }
+
+ @Test
+ public void testMerging() {
+ final TestMergeableMetric metric1 = new TestMergeableMetric(), metric2 = new TestMergeableMetric();
+ metric1.merge(metric2);
+
+ Assert.assertEquals(metric1.boxedInt, (Integer) 2);
+ Assert.assertEquals(metric1.unboxedInt, 4);
+
+ Assert.assertEquals(metric1.boxedDouble, 6D);
+ Assert.assertEquals(metric1.unboxedDouble, 8D);
+
+ Assert.assertEquals(metric1.boxedLong, (Long) 10L);
+ Assert.assertEquals(metric1.unboxedLong, 12L);
+
+ Assert.assertEquals(metric1.boxedFloat, 14F);
+ Assert.assertEquals(metric1.unboxedFloat, 16F);
+
+ Assert.assertEquals(metric1.boxedShort, (Short) (short) 18);
+ Assert.assertEquals(metric1.unboxedShort, (short) 20);
+
+ Assert.assertEquals(metric1.boxedByte, (Byte) (byte) 22);
+ Assert.assertEquals(metric1.unboxedByte, 24);
+
+ Assert.assertEquals(metric1.mustBeEqualDouble, metric2.mustBeEqualDouble);
+ Assert.assertEquals(metric1.mustBeEqualString, metric2.mustBeEqualString);
+ Assert.assertEquals(metric1.mustBeEqualUnboxedBoolean, metric2.mustBeEqualUnboxedBoolean);
+
+ metric1.calculateDerivedFields();
+
+ Assert.assertEquals(metric1.ratioIntValues, 0.5D);
+ }
+
+ @Test(expectedExceptions = IllegalStateException.class)
+ public void testMergingUnequalString() {
+
+ final TestMergeableMetric metric1 = new TestMergeableMetric(), metric2 = new TestMergeableMetric();
+ metric1.mustBeEqualString = "goodbye";
+
+ Assert.assertFalse(metric1.canMerge(metric2));
+ metric1.merge(metric2);
+ }
+
+ @Test(expectedExceptions = IllegalStateException.class)
+ public void testMergingUnequalDouble() {
+
+ final TestMergeableMetric metric1 = new TestMergeableMetric(), metric2 = new TestMergeableMetric();
+ metric1.mustBeEqualDouble = 1D;
+
+ Assert.assertFalse(metric1.canMerge(metric2));
+ metric1.merge(metric2);
+ }
+
+ @Test(expectedExceptions = IllegalStateException.class)
+ public void testMergingUnequalBoolean() {
+
+ final TestMergeableMetric metric1 = new TestMergeableMetric(), metric2 = new TestMergeableMetric();
+ metric1.mustBeEqualUnboxedBoolean = true;
+
+ Assert.assertFalse(metric1.canMerge(metric2));
+ metric1.merge(metric2);
+ }
+
+ private class TestMergeableMericIllegal extends MergeableMetricBase {
+ Integer undecorated = 0;
+ }
+
+ @Test(expectedExceptions = IllegalStateException.class)
+ public void testIllegalClass() {
+ final TestMergeableMericIllegal illegal1 = new TestMergeableMericIllegal(), illegal2 = new TestMergeableMericIllegal();
+
+ illegal1.merge(illegal2);
+ }
+
+ private class TestDerivedMergableMetric extends TestMergeableMetric {
+ @MergeByAdding
+ Integer anotherBoxed = 1;
+ }
+
+ @Test
+ public void TestMergingDerivedClass() {
+ final TestMergeableMetric instance1 = new TestMergeableMetric();
+ final TestDerivedMergableMetric instance2 = new TestDerivedMergableMetric();
+
+ instance1.merge(instance2);
+ }
+
+ @Test(expectedExceptions = IllegalArgumentException.class)
+ public void TestMergingSuperClass() {
+ final TestMergeableMetric instance1 = new TestMergeableMetric();
+ final TestDerivedMergableMetric instance2 = new TestDerivedMergableMetric();
+
+ instance2.merge(instance1);
+ }
+
+ @Test
+ public void TestCanMerge() {
+ final TestMergeableMetric instance1 = new TestMergeableMetric();
+ instance1.unboxedInt=1;
+ final TestDerivedMergableMetric instance2 = new TestDerivedMergableMetric();
+ instance2.unboxedInt=2;
+
+ instance1.merge(instance2);
+ Assert.assertEquals(instance1.unboxedInt, 3);
+ }
+}
diff --git a/src/tests/java/picard/cmdline/CommandLineParserTest.java b/src/test/java/picard/cmdline/CommandLineParserTest.java
similarity index 97%
rename from src/tests/java/picard/cmdline/CommandLineParserTest.java
rename to src/test/java/picard/cmdline/CommandLineParserTest.java
index 4a78ff3..4690cbc 100644
--- a/src/tests/java/picard/cmdline/CommandLineParserTest.java
+++ b/src/test/java/picard/cmdline/CommandLineParserTest.java
@@ -121,6 +121,13 @@ public class CommandLineParserTest {
public String frob;
}
+ class OptionsWithSameShortName {
+ @Option(shortName = "SAME_SHORT_NAME", overridable = true, optional = true)
+ public String SAME_SHORT_NAME;
+ @Option(shortName = "SOMETHING_ELSE", overridable = true, optional = true)
+ public String DIFF_SHORT_NAME;
+ }
+
class MutexOptions {
@Option(mutex = {"M", "N", "Y", "Z"})
public String A;
@@ -166,6 +173,24 @@ public class CommandLineParserTest {
clp.usage(System.out, true);
}
+ /**
+ * If the short name is set to be the same as the long name we still want the argument to appear in the commandLine.
+ */
+ @Test
+ public void testForIdenticalShortName() {
+ final String[] args = {
+ "SAME_SHORT_NAME=FOO",
+ "SOMETHING_ELSE=BAR"
+ };
+ final OptionsWithSameShortName fo = new OptionsWithSameShortName();
+ final CommandLineParser clp = new CommandLineParser(fo);
+ clp.parseOptions(System.err, args);
+ final String commandLine = clp.getCommandLine();
+ Assert.assertTrue(commandLine.contains("DIFF_SHORT_NAME"));
+ Assert.assertTrue(commandLine.contains("SAME_SHORT_NAME"));
+ }
+
+
@Test
public void testPositive() {
final String[] args = {
diff --git a/src/tests/java/picard/cmdline/CommandLineProgramTest.java b/src/test/java/picard/cmdline/CommandLineProgramTest.java
similarity index 100%
rename from src/tests/java/picard/cmdline/CommandLineProgramTest.java
rename to src/test/java/picard/cmdline/CommandLineProgramTest.java
diff --git a/src/tests/java/picard/cmdline/PicardCommandLineTest.java b/src/test/java/picard/cmdline/PicardCommandLineTest.java
similarity index 100%
rename from src/tests/java/picard/cmdline/PicardCommandLineTest.java
rename to src/test/java/picard/cmdline/PicardCommandLineTest.java
diff --git a/src/test/java/picard/fingerprint/FingerprintCheckerTest.java b/src/test/java/picard/fingerprint/FingerprintCheckerTest.java
new file mode 100644
index 0000000..119e530
--- /dev/null
+++ b/src/test/java/picard/fingerprint/FingerprintCheckerTest.java
@@ -0,0 +1,78 @@
+package picard.fingerprint;
+
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Created by farjoun on 8/27/15.
+ */
+public class FingerprintCheckerTest {
+
+ private final double maf = 0.4;
+ private final Snp snp = new Snp("test", "chr1", 1, (byte) 'A', (byte) 'C', maf, Collections.singletonList("dummy"));
+ private final HaplotypeBlock hb = new HaplotypeBlock(maf);
+
+ @BeforeClass
+ public void setup() {
+ hb.addSnp(snp);
+ }
+
+ @Test
+ public void testRandomSublist() throws Exception {
+
+ final List<Integer> list = new ArrayList<>();
+ list.add(1);
+ list.add(2);
+ list.add(3);
+
+ Assert.assertEquals(list, FingerprintChecker.randomSublist(list, 3));
+ Assert.assertEquals(list, FingerprintChecker.randomSublist(list, 4));
+ Assert.assertEquals(FingerprintChecker.randomSublist(list, 2).size(), 2);
+ }
+
+ @DataProvider(name = "pLoH")
+ public Iterator<Object[]> pLohData() {
+ final List<Object[]> listOfDoubles = new ArrayList<>();
+
+ for (int i = 1; i < 20; i++) {
+ listOfDoubles.add(new Object[]{i / 40D});
+ }
+ return listOfDoubles.iterator();
+ }
+
+ @Test(dataProvider = "pLoH")
+ public void testMatchResults(final double pLoH) {
+
+ final Fingerprint fpObserved = new Fingerprint("test", null, "noop");
+ final Fingerprint fpExpected = new Fingerprint("test", null, "noop");
+
+ final HaplotypeProbabilities hpHet = new HaplotypeProbabilitiesFromGenotype(snp, hb, 0.0001, 1.0, 0.0001);
+ final HaplotypeProbabilities hpHomRef = new HaplotypeProbabilitiesFromGenotype(snp, hb, 1.0, 0.00001, 0.000000001);
+
+ // Expected is a het
+ fpExpected.add(hpHet);
+
+ // Observed is a hom, so possible scenario is that observed is tumor, and expected is normal
+ fpObserved.add(hpHomRef);
+
+ // get match results using pLOD
+ final MatchResults mr = FingerprintChecker.calculateMatchResults(fpObserved, fpExpected, 0.01, pLoH);
+
+ // make sure that it's more likely to be the same sample, if the observed is "tumor" and the expected is "normal"
+ Assert.assertTrue(mr.getLodTN() > mr.getLOD());
+
+ // make sure that the regular LOD is negative (we're comparing a HET to a HOM)
+ Assert.assertTrue(mr.getLOD() < 0);
+
+ // make sure that it's more likely to be tumor/normal rather than normal/tumor
+ // (a hom normal isn't expected to be measured as a het in the tumor)
+ Assert.assertTrue(mr.getLodTN() > mr.getLodNT());
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/picard/fingerprint/HaplotypeMapTest.java b/src/test/java/picard/fingerprint/HaplotypeMapTest.java
similarity index 100%
rename from src/tests/java/picard/fingerprint/HaplotypeMapTest.java
rename to src/test/java/picard/fingerprint/HaplotypeMapTest.java
diff --git a/src/tests/java/picard/fingerprint/HaplotypeProbabilitiesTest.java b/src/test/java/picard/fingerprint/HaplotypeProbabilitiesTest.java
similarity index 100%
rename from src/tests/java/picard/fingerprint/HaplotypeProbabilitiesTest.java
rename to src/test/java/picard/fingerprint/HaplotypeProbabilitiesTest.java
diff --git a/src/test/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumorTest.java b/src/test/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumorTest.java
new file mode 100644
index 0000000..a926487
--- /dev/null
+++ b/src/test/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumorTest.java
@@ -0,0 +1,55 @@
+package picard.fingerprint;
+
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import picard.util.TestNGUtil;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Created by farjoun on 5/29/15.
+ */
+public class HaplotypeProbabilityOfNormalGivenTumorTest {
+
+ private final double maf = 0.4;
+ private final Snp snp = new Snp("test", "chr1", 1, (byte) 'A', (byte) 'C', maf, Collections.singletonList("dummy"));
+ private final HaplotypeBlock hb = new HaplotypeBlock(maf);
+
+ @DataProvider(name = "testGetLikelihoodsData")
+ public Iterator<Object[]> testGetLikelihoodsData() {
+ final List<Object[]> testData = new ArrayList<>();
+
+ //make sure that giving 0 pLoH doesn't change the underlying likelihoods:
+ testData.add(new Object[]{0.0, new double[]{1, 0, 0}, new double[]{1, 0, 0}});
+ testData.add(new Object[]{0.0, new double[]{0, 1, 0}, new double[]{0, 1, 0}});
+ testData.add(new Object[]{0.0, new double[]{0, 0, 1}, new double[]{0, 0, 1}});
+ testData.add(new Object[]{0.0, new double[]{0, 0.4, 0.6}, new double[]{0, 0.4, 0.6}});
+ testData.add(new Object[]{0.0, new double[]{0.3, 0.7, 0}, new double[]{0.3, 0.7, 0}});
+
+ final double pLoh = 0.1;
+ //see that non zero pLoH changes the likelihood of a HET site as expected:
+ testData.add(new Object[]{pLoh, new double[]{0, 1, 0}, new double[]{0, 1 - pLoh, 0}});
+
+ //HOMs will change a little
+ testData.add(new Object[]{pLoh, new double[]{1, 0, 0}, new double[]{1, pLoh/2, 0}});
+ testData.add(new Object[]{pLoh, new double[]{0, 0, 1}, new double[]{0, pLoh/2, 1}});
+ testData.add(new Object[]{pLoh, new double[]{.3, 0, .7}, new double[]{.3, pLoh/2, .7}});
+
+ // check that the calculation is linear
+ testData.add(new Object[]{pLoh, new double[]{0, 0.5, 0.5}, new double[]{0, 0.5 * (1 - pLoh/2), 0.5}});
+ testData.add(new Object[]{pLoh, new double[]{0.5, 0.5, 0}, new double[]{0.5, 0.5 * (1 - pLoh/2), 0}});
+
+ return testData.iterator();
+ }
+
+ @Test(dataProvider = "testGetLikelihoodsData")
+ public void testGetLikelihoods(final double pLoH, final double[] tumorLikelihood, final double[] normalLikelihood) throws Exception {
+ final HaplotypeProbabilities hp = new HaplotypeProbabilitiesFromGenotype(snp, hb, tumorLikelihood[0], tumorLikelihood[1], tumorLikelihood[2]);
+ final HaplotypeProbabilities hpTumor = new HaplotypeProbabilityOfNormalGivenTumor(hp, pLoH);
+
+ TestNGUtil.assertEqualDoubleArrays(hpTumor.getLikelihoods(), normalLikelihood, 0.0001);
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/picard/illumina/CheckIlluminaDirectoryTest.java b/src/test/java/picard/illumina/CheckIlluminaDirectoryTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/CheckIlluminaDirectoryTest.java
rename to src/test/java/picard/illumina/CheckIlluminaDirectoryTest.java
diff --git a/src/tests/java/picard/illumina/CollectIlluminaBasecallingMetricsTest.java b/src/test/java/picard/illumina/CollectIlluminaBasecallingMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/CollectIlluminaBasecallingMetricsTest.java
rename to src/test/java/picard/illumina/CollectIlluminaBasecallingMetricsTest.java
diff --git a/src/tests/java/picard/illumina/ExtractIlluminaBarcodesTest.java b/src/test/java/picard/illumina/ExtractIlluminaBarcodesTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/ExtractIlluminaBarcodesTest.java
rename to src/test/java/picard/illumina/ExtractIlluminaBarcodesTest.java
diff --git a/src/tests/java/picard/illumina/IlluminaBasecallsToFastqTest.java b/src/test/java/picard/illumina/IlluminaBasecallsToFastqTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/IlluminaBasecallsToFastqTest.java
rename to src/test/java/picard/illumina/IlluminaBasecallsToFastqTest.java
diff --git a/src/tests/java/picard/illumina/IlluminaBasecallsToSamAdapterClippingTest.java b/src/test/java/picard/illumina/IlluminaBasecallsToSamAdapterClippingTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/IlluminaBasecallsToSamAdapterClippingTest.java
rename to src/test/java/picard/illumina/IlluminaBasecallsToSamAdapterClippingTest.java
diff --git a/src/tests/java/picard/illumina/IlluminaBasecallsToSamTest.java b/src/test/java/picard/illumina/IlluminaBasecallsToSamTest.java
similarity index 65%
rename from src/tests/java/picard/illumina/IlluminaBasecallsToSamTest.java
rename to src/test/java/picard/illumina/IlluminaBasecallsToSamTest.java
index 0d9fe17..63d4e79 100644
--- a/src/tests/java/picard/illumina/IlluminaBasecallsToSamTest.java
+++ b/src/test/java/picard/illumina/IlluminaBasecallsToSamTest.java
@@ -64,14 +64,13 @@ public class IlluminaBasecallsToSamTest extends CommandLineProgramTest {
Assert.assertTrue(IlluminaBasecallsConverter.TILE_NUMBER_COMPARATOR.compare(10, 10) == 0, "");
}
-
@Test
public void testNonBarcoded() throws Exception {
final File outputBam = File.createTempFile("nonBarcoded.", ".sam");
outputBam.deleteOnExit();
final int lane = 1;
- runPicardCommandLine(new String[]{
+ Assert.assertEquals(runPicardCommandLine(new String[]{
"BASECALLS_DIR=" + BASECALLS_DIR,
"LANE=" + lane,
"READ_STRUCTURE=25S8S25T",
@@ -79,17 +78,17 @@ public class IlluminaBasecallsToSamTest extends CommandLineProgramTest {
"RUN_BARCODE=HiMom",
"SAMPLE_ALIAS=HiDad",
"LIBRARY_NAME=Hello, World"
- });
+ }), 0);
IOUtil.assertFilesEqual(outputBam, new File(TEST_DATA_DIR, "nonBarcoded.sam"));
}
@Test
- public void testNonBarcodedWithMoleclarIndex() throws Exception {
+ public void testNonBarcodedWithMolecularIndex() throws Exception {
final File outputBam = File.createTempFile("nonBarcodedWithMI.", ".sam");
outputBam.deleteOnExit();
final int lane = 1;
- runPicardCommandLine(new String[]{
+ Assert.assertEquals(runPicardCommandLine(new String[]{
"BASECALLS_DIR=" + BASECALLS_DIR,
"LANE=" + lane,
"READ_STRUCTURE=25S8M25T",
@@ -97,7 +96,26 @@ public class IlluminaBasecallsToSamTest extends CommandLineProgramTest {
"RUN_BARCODE=HiMom",
"SAMPLE_ALIAS=HiDad",
"LIBRARY_NAME=Hello, World"
- });
+ }), 0);
+ IOUtil.assertFilesEqual(outputBam, new File(TEST_DATA_DIR, "nonBarcodedWithMolecularIndex8M.sam"));
+ }
+
+ @Test
+ public void testNonBarcodedWithTagPerMolecularIndexIsNUll() throws Exception {
+ final File outputBam = File.createTempFile("nonBarcodedWithMI.", ".sam");
+ outputBam.deleteOnExit();
+ final int lane = 1;
+
+ Assert.assertEquals(runPicardCommandLine(new String[]{
+ "BASECALLS_DIR=" + BASECALLS_DIR,
+ "LANE=" + lane,
+ "READ_STRUCTURE=25S8M25T",
+ "OUTPUT=" + outputBam,
+ "RUN_BARCODE=HiMom",
+ "SAMPLE_ALIAS=HiDad",
+ "LIBRARY_NAME=Hello, World",
+ "TAG_PER_MOLECULAR_INDEX=null"
+ }), 0);
IOUtil.assertFilesEqual(outputBam, new File(TEST_DATA_DIR, "nonBarcodedWithMolecularIndex8M.sam"));
}
@@ -107,7 +125,7 @@ public class IlluminaBasecallsToSamTest extends CommandLineProgramTest {
outputBam.deleteOnExit();
final int lane = 1;
- runPicardCommandLine(new String[]{
+ Assert.assertEquals(runPicardCommandLine(new String[]{
"BASECALLS_DIR=" + BASECALLS_DIR,
"LANE=" + lane,
"READ_STRUCTURE=25S4M4M25T",
@@ -115,9 +133,97 @@ public class IlluminaBasecallsToSamTest extends CommandLineProgramTest {
"RUN_BARCODE=HiMom",
"SAMPLE_ALIAS=HiDad",
"LIBRARY_NAME=Hello, World"
- });
+ }), 0);
IOUtil.assertFilesEqual(outputBam, new File(TEST_DATA_DIR, "nonBarcodedWithMolecularIndex4M4M.sam"));
+ }
+
+ // This *should* store molecular indexes individually in ZA and ZB
+ @Test
+ public void testNonBarcodedWithTagPerMolecularIndexDual() throws Exception {
+ final File outputBam = File.createTempFile("nonBarcodedWithDualMI.", ".sam");
+ outputBam.deleteOnExit();
+ final int lane = 1;
+
+ Assert.assertEquals(runPicardCommandLine(new String[]{
+ "BASECALLS_DIR=" + BASECALLS_DIR,
+ "LANE=" + lane,
+ "READ_STRUCTURE=25S4M4M25T",
+ "OUTPUT=" + outputBam,
+ "RUN_BARCODE=HiMom",
+ "SAMPLE_ALIAS=HiDad",
+ "LIBRARY_NAME=Hello, World",
+ "TAG_PER_MOLECULAR_INDEX=ZA",
+ "TAG_PER_MOLECULAR_INDEX=ZB"
+
+ }), 0);
+ IOUtil.assertFilesEqual(outputBam, new File(TEST_DATA_DIR, "nonBarcodedWithTagPerMolecularIndex4M4M.sam"));
+ }
+
+ // Too many tags
+ @Test
+ public void testNonBarcodedWithTagPerMolecularIndexDualTooManyTags() throws Exception {
+ final File outputBam = File.createTempFile("nonBarcodedWithDualMI.", ".sam");
+ outputBam.deleteOnExit();
+ final int lane = 1;
+
+ Assert.assertEquals(runPicardCommandLine(new String[]{
+ "BASECALLS_DIR=" + BASECALLS_DIR,
+ "LANE=" + lane,
+ "READ_STRUCTURE=25S4M4M25T",
+ "OUTPUT=" + outputBam,
+ "RUN_BARCODE=HiMom",
+ "SAMPLE_ALIAS=HiDad",
+ "LIBRARY_NAME=Hello, World",
+ "TAG_PER_MOLECULAR_INDEX=ZA",
+ "TAG_PER_MOLECULAR_INDEX=ZB",
+ "TAG_PER_MOLECULAR_INDEX=ZC"
+ }), 1);
+ }
+
+ // Too few tags
+ @Test
+ public void testNonBarcodedWithTagPerMolecularIndexDualTooFewTags() throws Exception {
+ final File outputBam = File.createTempFile("nonBarcodedWithDualMI.", ".sam");
+ outputBam.deleteOnExit();
+ final int lane = 1;
+
+ Assert.assertEquals(runPicardCommandLine(new String[]{
+ "BASECALLS_DIR=" + BASECALLS_DIR,
+ "LANE=" + lane,
+ "READ_STRUCTURE=25S2M2M2M2M25T",
+ "OUTPUT=" + outputBam,
+ "RUN_BARCODE=HiMom",
+ "SAMPLE_ALIAS=HiDad",
+ "LIBRARY_NAME=Hello, World",
+ "TAG_PER_MOLECULAR_INDEX=ZA",
+ "TAG_PER_MOLECULAR_INDEX=ZB",
+ "TAG_PER_MOLECULAR_INDEX=ZC"
+
+ }), 1);
+ }
+
+ // Just the right number of tags
+ @Test
+ public void testNonBarcodedWithTagPerMolecularIndexDualFourMolecularIndexes() throws Exception {
+ final File outputBam = File.createTempFile("nonBarcodedWithDualMI.", ".sam");
+ //outputBam.deleteOnExit();
+ final int lane = 1;
+
+ Assert.assertEquals(runPicardCommandLine(new String[]{
+ "BASECALLS_DIR=" + BASECALLS_DIR,
+ "LANE=" + lane,
+ "READ_STRUCTURE=25S2M2M2M2M25T",
+ "OUTPUT=" + outputBam,
+ "RUN_BARCODE=HiMom",
+ "SAMPLE_ALIAS=HiDad",
+ "LIBRARY_NAME=Hello, World",
+ "TAG_PER_MOLECULAR_INDEX=ZA",
+ "TAG_PER_MOLECULAR_INDEX=ZB",
+ "TAG_PER_MOLECULAR_INDEX=ZC",
+ "TAG_PER_MOLECULAR_INDEX=ZD"
+ }), 0);
+ IOUtil.assertFilesEqual(outputBam, new File(TEST_DATA_DIR, "nonBarcodedWithTagPerMolecularIndex2M2M2M2M.sam"));
}
@Test
@@ -203,13 +309,13 @@ public class IlluminaBasecallsToSamTest extends CommandLineProgramTest {
writer.close();
reader.close();
- runPicardCommandLine(new String[]{
+ Assert.assertEquals(runPicardCommandLine(new String[]{
"BASECALLS_DIR=" + baseCallsDir,
"LANE=" + lane,
"RUN_BARCODE=HiMom",
"READ_STRUCTURE=" + readStructure,
"LIBRARY_PARAMS=" + libraryParams
- });
+ }), 0);
for (final File outputSam : samFiles) {
IOUtil.assertFilesEqual(outputSam, new File(testDataDir, outputSam.getName()));
diff --git a/src/tests/java/picard/illumina/IlluminaLaneMetricsCollectorTest.java b/src/test/java/picard/illumina/IlluminaLaneMetricsCollectorTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/IlluminaLaneMetricsCollectorTest.java
rename to src/test/java/picard/illumina/IlluminaLaneMetricsCollectorTest.java
diff --git a/src/tests/java/picard/illumina/ReadStructureTest.java b/src/test/java/picard/illumina/ReadStructureTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/ReadStructureTest.java
rename to src/test/java/picard/illumina/ReadStructureTest.java
diff --git a/src/tests/java/picard/illumina/parser/BclParserTest.java b/src/test/java/picard/illumina/parser/BclParserTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/BclParserTest.java
rename to src/test/java/picard/illumina/parser/BclParserTest.java
diff --git a/src/tests/java/picard/illumina/parser/BinTdUtil.java b/src/test/java/picard/illumina/parser/BinTdUtil.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/BinTdUtil.java
rename to src/test/java/picard/illumina/parser/BinTdUtil.java
diff --git a/src/tests/java/picard/illumina/parser/CycleIlluminaFileMapTest.java b/src/test/java/picard/illumina/parser/CycleIlluminaFileMapTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/CycleIlluminaFileMapTest.java
rename to src/test/java/picard/illumina/parser/CycleIlluminaFileMapTest.java
diff --git a/src/tests/java/picard/illumina/parser/FilterParserTest.java b/src/test/java/picard/illumina/parser/FilterParserTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/FilterParserTest.java
rename to src/test/java/picard/illumina/parser/FilterParserTest.java
diff --git a/src/tests/java/picard/illumina/parser/IlluminaDataProviderFactoryTest.java b/src/test/java/picard/illumina/parser/IlluminaDataProviderFactoryTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/IlluminaDataProviderFactoryTest.java
rename to src/test/java/picard/illumina/parser/IlluminaDataProviderFactoryTest.java
diff --git a/src/tests/java/picard/illumina/parser/IlluminaDataProviderTest.java b/src/test/java/picard/illumina/parser/IlluminaDataProviderTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/IlluminaDataProviderTest.java
rename to src/test/java/picard/illumina/parser/IlluminaDataProviderTest.java
diff --git a/src/tests/java/picard/illumina/parser/IlluminaFileUtilTest.java b/src/test/java/picard/illumina/parser/IlluminaFileUtilTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/IlluminaFileUtilTest.java
rename to src/test/java/picard/illumina/parser/IlluminaFileUtilTest.java
diff --git a/src/tests/java/picard/illumina/parser/PerTileParserTest.java b/src/test/java/picard/illumina/parser/PerTileParserTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/PerTileParserTest.java
rename to src/test/java/picard/illumina/parser/PerTileParserTest.java
diff --git a/src/tests/java/picard/illumina/parser/PerTilePerCycleParserTest.java b/src/test/java/picard/illumina/parser/PerTilePerCycleParserTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/PerTilePerCycleParserTest.java
rename to src/test/java/picard/illumina/parser/PerTilePerCycleParserTest.java
diff --git a/src/tests/java/picard/illumina/parser/PosParserTest.java b/src/test/java/picard/illumina/parser/PosParserTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/PosParserTest.java
rename to src/test/java/picard/illumina/parser/PosParserTest.java
diff --git a/src/tests/java/picard/illumina/parser/fakers/BclFileFakerTest.java b/src/test/java/picard/illumina/parser/fakers/BclFileFakerTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/fakers/BclFileFakerTest.java
rename to src/test/java/picard/illumina/parser/fakers/BclFileFakerTest.java
diff --git a/src/tests/java/picard/illumina/parser/readers/AbstractIlluminaPositionFileReaderTest.java b/src/test/java/picard/illumina/parser/readers/AbstractIlluminaPositionFileReaderTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/readers/AbstractIlluminaPositionFileReaderTest.java
rename to src/test/java/picard/illumina/parser/readers/AbstractIlluminaPositionFileReaderTest.java
diff --git a/src/tests/java/picard/illumina/parser/readers/BclReaderTest.java b/src/test/java/picard/illumina/parser/readers/BclReaderTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/readers/BclReaderTest.java
rename to src/test/java/picard/illumina/parser/readers/BclReaderTest.java
diff --git a/src/tests/java/picard/illumina/parser/readers/ClocsFileReaderTest.java b/src/test/java/picard/illumina/parser/readers/ClocsFileReaderTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/readers/ClocsFileReaderTest.java
rename to src/test/java/picard/illumina/parser/readers/ClocsFileReaderTest.java
diff --git a/src/tests/java/picard/illumina/parser/readers/FilterFileReaderTest.java b/src/test/java/picard/illumina/parser/readers/FilterFileReaderTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/readers/FilterFileReaderTest.java
rename to src/test/java/picard/illumina/parser/readers/FilterFileReaderTest.java
diff --git a/src/tests/java/picard/illumina/parser/readers/IlluminaFileUtilTest.java b/src/test/java/picard/illumina/parser/readers/IlluminaFileUtilTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/readers/IlluminaFileUtilTest.java
rename to src/test/java/picard/illumina/parser/readers/IlluminaFileUtilTest.java
diff --git a/src/tests/java/picard/illumina/parser/readers/LocsFileReaderTest.java b/src/test/java/picard/illumina/parser/readers/LocsFileReaderTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/readers/LocsFileReaderTest.java
rename to src/test/java/picard/illumina/parser/readers/LocsFileReaderTest.java
diff --git a/src/tests/java/picard/illumina/parser/readers/MMapBackedIteratorFactoryTest.java b/src/test/java/picard/illumina/parser/readers/MMapBackedIteratorFactoryTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/readers/MMapBackedIteratorFactoryTest.java
rename to src/test/java/picard/illumina/parser/readers/MMapBackedIteratorFactoryTest.java
diff --git a/src/tests/java/picard/illumina/parser/readers/PosFileReaderTest.java b/src/test/java/picard/illumina/parser/readers/PosFileReaderTest.java
similarity index 100%
rename from src/tests/java/picard/illumina/parser/readers/PosFileReaderTest.java
rename to src/test/java/picard/illumina/parser/readers/PosFileReaderTest.java
diff --git a/src/tests/java/picard/metrics/CollectRrbsMetricsTest.java b/src/test/java/picard/metrics/CollectRrbsMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/metrics/CollectRrbsMetricsTest.java
rename to src/test/java/picard/metrics/CollectRrbsMetricsTest.java
diff --git a/src/tests/java/picard/pedigree/PedFileTest.java b/src/test/java/picard/pedigree/PedFileTest.java
similarity index 100%
rename from src/tests/java/picard/pedigree/PedFileTest.java
rename to src/test/java/picard/pedigree/PedFileTest.java
diff --git a/src/tests/java/picard/reference/NonNFastaSizeTest.java b/src/test/java/picard/reference/NonNFastaSizeTest.java
similarity index 100%
rename from src/tests/java/picard/reference/NonNFastaSizeTest.java
rename to src/test/java/picard/reference/NonNFastaSizeTest.java
diff --git a/src/tests/java/picard/sam/AddCommentsToBamTest.java b/src/test/java/picard/sam/AddCommentsToBamTest.java
similarity index 100%
rename from src/tests/java/picard/sam/AddCommentsToBamTest.java
rename to src/test/java/picard/sam/AddCommentsToBamTest.java
diff --git a/src/tests/java/picard/sam/CleanSamTest.java b/src/test/java/picard/sam/CleanSamTest.java
similarity index 100%
rename from src/tests/java/picard/sam/CleanSamTest.java
rename to src/test/java/picard/sam/CleanSamTest.java
diff --git a/src/tests/java/picard/sam/CompareSAMsTest.java b/src/test/java/picard/sam/CompareSAMsTest.java
similarity index 96%
rename from src/tests/java/picard/sam/CompareSAMsTest.java
rename to src/test/java/picard/sam/CompareSAMsTest.java
index 4a51ae0..1c07aec 100644
--- a/src/tests/java/picard/sam/CompareSAMsTest.java
+++ b/src/test/java/picard/sam/CompareSAMsTest.java
@@ -114,6 +114,10 @@ public class CompareSAMsTest extends CommandLineProgramTest {
}
@Test
+ public void testGroupWithSameCoordinateSamePosition() {
+ testHelper("genomic_sorted_same_position.sam", "genomic_sorted_same_position.sam", 2, 0, 0, 0, 0, 0, 0, true);
+ }
+ @Test
public void testGroupWithSameCoordinateAndNoMatchInOther() {
testHelper("group_same_coord.sam", "diff_coords.sam", 0, 5, 0, 0, 0, 0, 0, false);
}
diff --git a/src/tests/java/picard/sam/CreateSequenceDictionaryTest.java b/src/test/java/picard/sam/CreateSequenceDictionaryTest.java
similarity index 100%
rename from src/tests/java/picard/sam/CreateSequenceDictionaryTest.java
rename to src/test/java/picard/sam/CreateSequenceDictionaryTest.java
diff --git a/src/tests/java/picard/sam/FastqToSamTest.java b/src/test/java/picard/sam/FastqToSamTest.java
similarity index 100%
rename from src/tests/java/picard/sam/FastqToSamTest.java
rename to src/test/java/picard/sam/FastqToSamTest.java
diff --git a/src/tests/java/picard/sam/FilterSamReadsTest.java b/src/test/java/picard/sam/FilterSamReadsTest.java
similarity index 100%
rename from src/tests/java/picard/sam/FilterSamReadsTest.java
rename to src/test/java/picard/sam/FilterSamReadsTest.java
diff --git a/src/tests/java/picard/sam/FixMateInformationTest.java b/src/test/java/picard/sam/FixMateInformationTest.java
similarity index 100%
rename from src/tests/java/picard/sam/FixMateInformationTest.java
rename to src/test/java/picard/sam/FixMateInformationTest.java
diff --git a/src/tests/java/picard/sam/GatherBamFilesTest.java b/src/test/java/picard/sam/GatherBamFilesTest.java
similarity index 100%
rename from src/tests/java/picard/sam/GatherBamFilesTest.java
rename to src/test/java/picard/sam/GatherBamFilesTest.java
diff --git a/src/tests/java/picard/sam/MergeBamAlignmentTest.java b/src/test/java/picard/sam/MergeBamAlignmentTest.java
similarity index 100%
rename from src/tests/java/picard/sam/MergeBamAlignmentTest.java
rename to src/test/java/picard/sam/MergeBamAlignmentTest.java
diff --git a/src/tests/java/picard/sam/MergeSamFilesTest.java b/src/test/java/picard/sam/MergeSamFilesTest.java
similarity index 100%
rename from src/tests/java/picard/sam/MergeSamFilesTest.java
rename to src/test/java/picard/sam/MergeSamFilesTest.java
diff --git a/src/tests/java/picard/sam/PositionBasedDownsampleSamTest.java b/src/test/java/picard/sam/PositionBasedDownsampleSamTest.java
similarity index 100%
rename from src/tests/java/picard/sam/PositionBasedDownsampleSamTest.java
rename to src/test/java/picard/sam/PositionBasedDownsampleSamTest.java
diff --git a/src/test/java/picard/sam/RevertSamTest.java b/src/test/java/picard/sam/RevertSamTest.java
new file mode 100755
index 0000000..d14046d
--- /dev/null
+++ b/src/test/java/picard/sam/RevertSamTest.java
@@ -0,0 +1,449 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package picard.sam;
+
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMReadGroupRecord;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMTag;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
+import htsjdk.samtools.util.CloserUtil;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import picard.PicardException;
+import picard.cmdline.CommandLineProgramTest;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: ktibbett
+ * Date: Jul 20, 2010
+ * Time: 10:27:58 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class RevertSamTest extends CommandLineProgramTest {
+ private static final String basicSamToRevert = "testdata/picard/sam/revert_sam_basic.sam";
+ private static final String sampleLibraryOverrideSam = "testdata/picard/sam/revert_sam_sample_library_override.sam";
+ private static final File validOutputMap = new File("testdata/picard/sam/revert_sam_valid_output_map.txt");
+ private static final File nonExistentOutputMap = new File("testdata/picard/sam/revert_sam_does_not_exist.txt");
+ private static final File badHeaderOutputMap = new File("testdata/picard/sam/revert_sam_bad_header_output_map.txt");
+ private static final File samTestData = new File("testdata/picard/sam");
+ private static final File writablePath = new File("testdata/picard/sam/revert_sam_writable.bam");
+ private static final File referenceFasta = new File("testdata/picard/reference/test.fasta");
+ private static final String singleEndSamToRevert = "testdata/picard/sam/revert_sam_single_end.sam";
+
+ private static final String revertedQualities =
+ "11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111";
+
+ private static final String unmappedRead = "both_reads_present_only_first_aligns/2";
+
+ public String getCommandLineProgramName() {
+ return RevertSam.class.getSimpleName();
+ }
+
+ @Test(dataProvider="positiveTestData")
+ public void basicPositiveTests(final SAMFileHeader.SortOrder so, final boolean removeDuplicates, final boolean removeAlignmentInfo,
+ final boolean restoreOriginalQualities, final boolean outputByReadGroup, final String sample, final String library,
+ final List<String> attributesToClear) throws Exception {
+
+ final File output;
+ File output0 = null;
+ File output1 = null;
+ File output2 = null;
+ if (outputByReadGroup) {
+ output = Files.createTempDirectory("picardRevertSamTest").toFile();
+ output0 = Paths.get(output.toString(), "0.sam").toFile();
+ output1 = Paths.get(output.toString(), "1.sam").toFile();
+ output2 = Paths.get(output.toString(), "2.sam").toFile();
+ } else {
+ output = File.createTempFile("reverted", ".sam");
+ }
+ output.deleteOnExit();
+ final RevertSam reverter = new RevertSam();
+ int argSize = 5 + (so != null ? 1 : 0) + attributesToClear.size() + (sample != null ? 1 : 0) + (library != null ? 1 : 0);
+ if (outputByReadGroup) {
+ argSize++;
+ }
+ final String args[] = new String[argSize];
+ int index = 0;
+ args[index++] = "INPUT=" + basicSamToRevert;
+ args[index++] = "OUTPUT=" + output.getAbsolutePath();
+ if (outputByReadGroup) {
+ args[index++] = "OUTPUT_BY_READGROUP=" + outputByReadGroup;
+ }
+ if (so != null) {
+ args[index++] = "SORT_ORDER=" + so.name();
+ }
+ args[index++] = "REMOVE_DUPLICATE_INFORMATION=" + removeDuplicates;
+ args[index++] = "REMOVE_ALIGNMENT_INFORMATION=" + removeAlignmentInfo;
+ args[index++] = "RESTORE_ORIGINAL_QUALITIES=" + restoreOriginalQualities;
+ if (sample != null) {
+ args[index++] = "SAMPLE_ALIAS=" + sample;
+ }
+ if (library != null) {
+ args[index++] = "LIBRARY_NAME=" + library;
+ }
+ for (final String attr : attributesToClear) {
+ args[index++] = "ATTRIBUTE_TO_CLEAR=" + attr;
+ }
+
+ runPicardCommandLine(args);
+
+ if (outputByReadGroup) {
+ verifyPositiveResults(output0, reverter, removeDuplicates, removeAlignmentInfo, restoreOriginalQualities, outputByReadGroup, "0", 2, sample, library);
+ verifyPositiveResults(output1, reverter, removeDuplicates, removeAlignmentInfo, restoreOriginalQualities, outputByReadGroup, "1", 4, sample, library);
+ verifyPositiveResults(output2, reverter, removeDuplicates, removeAlignmentInfo, restoreOriginalQualities, outputByReadGroup, "2", 2, sample, library);
+ } else {
+ verifyPositiveResults(output, reverter, removeDuplicates, removeAlignmentInfo, restoreOriginalQualities, outputByReadGroup, null, 8, sample, library);
+ }
+ }
+
+ @Test
+ public void testOutputByReadGroupWithOutputMap() throws Exception {
+ final File outputDir = Files.createTempDirectory("tmpPicardTest").toFile();
+ //outputDir.deleteOnExit();
+ // Create the output map
+ final File outputMapFile = Files.createTempFile("picardRevertSamTestOutputMap", ".txt").toFile();
+ final PrintWriter mapWriter = new PrintWriter(outputMapFile);
+ final String outputPath0 = outputDir + "/my_rg0.sam";
+ final String outputPath1 = outputDir + "/rg1.cram";
+ final String outputPath2 = outputDir + "/my_rg2.bam";
+ mapWriter.println("READ_GROUP_ID\tOUTPUT");
+ mapWriter.println("0\t" + outputPath0);
+ mapWriter.println("2\t" + outputPath2);
+ mapWriter.println("1\t" + outputPath1);
+ System.out.println("outputFile: " + outputPath0);
+ System.out.println("outputFile: " + outputPath1);
+ System.out.println("outputFile: " + outputPath2);
+ mapWriter.close();
+ outputMapFile.deleteOnExit();
+
+ final RevertSam reverter = new RevertSam();
+
+ final String args[] = new String[11];
+ int index = 0;
+ args[index++] = "INPUT=" + basicSamToRevert;
+ args[index++] = "OUTPUT_BY_READGROUP=true";
+ args[index++] = "OUTPUT_MAP=" + outputMapFile;
+ args[index++] = "REFERENCE_SEQUENCE=" + referenceFasta;
+ args[index++] = "SORT_ORDER=" + SAMFileHeader.SortOrder.queryname.name();
+ args[index++] = "REMOVE_DUPLICATE_INFORMATION=" + true;
+ args[index++] = "REMOVE_ALIGNMENT_INFORMATION=" + true;
+ args[index++] = "RESTORE_ORIGINAL_QUALITIES=" + true;
+ args[index++] = "SAMPLE_ALIAS=" + "test_sample_1";
+ args[index++] = "LIBRARY_NAME=" + "test_library_1";
+ args[index++] = "ATTRIBUTE_TO_CLEAR=" + SAMTag.NM.name();
+
+ runPicardCommandLine(args);
+
+ final File output0 = new File(outputPath0);
+ final File output1 = new File(outputPath1);
+ final File output2 = new File(outputPath2);
+ verifyPositiveResults(output0, reverter, true, true, true, true, "0", 2, "test_sample_1", "test_library_1");
+ verifyPositiveResults(output1, reverter, true, true, true, true, "1", 4, "test_sample_1", "test_library_1");
+ verifyPositiveResults(output2, reverter, true, true, true, true, "2", 2, "test_sample_1", "test_library_1");
+ }
+
+ @Test
+ public void testSingleEnd() throws Exception {
+ final File output = File.createTempFile("single_end_reverted", ".sam");
+ output.deleteOnExit();
+ final String args[] = { "INPUT=" + singleEndSamToRevert, "OUTPUT=" + output.getAbsolutePath() };
+ runPicardCommandLine(args);
+ final ValidateSamFile validator = new ValidateSamFile();
+ validator.INPUT = output;
+ validator.VALIDATION_STRINGENCY = ValidationStringency.STRICT;
+ validator.MODE = ValidateSamFile.Mode.VERBOSE;
+ final int result = validator.doWork();
+ Assert.assertEquals(result, 0, "Validation of reverted single-end sample failed.");
+ }
+
+ private void verifyPositiveResults(
+ final File outputFile,
+ final RevertSam reverter,
+ final boolean removeDuplicates,
+ final boolean removeAlignmentInfo,
+ final boolean restoreOriginalQualities,
+ final boolean outputByReadGroup,
+ final String readGroupId,
+ final int numReadsExpected,
+ final String sample,
+ final String library) {
+
+ outputFile.deleteOnExit();
+ final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(referenceFasta).open(outputFile);
+ final SAMFileHeader header = reader.getFileHeader();
+ Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.queryname);
+ Assert.assertEquals(header.getProgramRecords().size(), removeAlignmentInfo ? 0 : 1);
+ final List<SAMReadGroupRecord> readGroups = header.getReadGroups();
+ if (outputByReadGroup) {
+ Assert.assertEquals(readGroups.size(), 1);
+ Assert.assertEquals(readGroups.get(0).getId(), readGroupId);
+ }
+ for (final SAMReadGroupRecord rg : header.getReadGroups()) {
+ if (sample != null) {
+ Assert.assertEquals(rg.getSample(), sample);
+ } else {
+ Assert.assertEquals(rg.getSample(), "Hi,Mom!");
+ }
+ if (library != null) {
+ Assert.assertEquals(rg.getLibrary(), library);
+ } else {
+ Assert.assertEquals(rg.getLibrary(), "my-library");
+ }
+ }
+ int numReads = 0;
+ for (final SAMRecord rec : reader) {
+ numReads++;
+ if (removeDuplicates) {
+ Assert.assertFalse(rec.getDuplicateReadFlag(),
+ "Duplicates should have been removed: " + rec.getReadName());
+ }
+
+ if (removeAlignmentInfo) {
+ Assert.assertTrue(rec.getReadUnmappedFlag(),
+ "Alignment info should have been removed: " + rec.getReadName());
+ }
+
+ if (restoreOriginalQualities && !unmappedRead.equals(
+ rec.getReadName() + "/" + (rec.getFirstOfPairFlag() ? "1" : "2"))) {
+
+ Assert.assertEquals(rec.getBaseQualityString(), revertedQualities);
+ } else {
+ Assert.assertNotSame(rec.getBaseQualityString(), revertedQualities);
+ }
+
+ for (final SAMRecord.SAMTagAndValue attr : rec.getAttributes()) {
+ if (removeAlignmentInfo || (!attr.tag.equals("PG") && !attr.tag.equals("NM")
+ && !attr.tag.equals("MQ"))) {
+ Assert.assertFalse(reverter.ATTRIBUTE_TO_CLEAR.contains(attr.tag),
+ attr.tag + " should have been cleared.");
+ }
+ }
+ }
+ Assert.assertEquals(numReads, numReadsExpected);
+ CloserUtil.close(reader);
+ }
+
+ @DataProvider(name="positiveTestData")
+ public Object[][] getPostitiveTestData() {
+ return new Object[][] {
+ {null, true, true, true, true, null, null, Collections.EMPTY_LIST},
+ {SAMFileHeader.SortOrder.queryname, true, true, true, false, "Hey,Dad!", null, Arrays.asList("XT")},
+ {null, false, true, false, false, "Hey,Dad!", "NewLibraryName", Arrays.asList("XT")},
+ {null, false, false, false, false, null, null, Collections.EMPTY_LIST}
+ };
+ }
+
+ @Test(dataProvider="overrideTestData", expectedExceptions = {PicardException.class})
+ public void testSampleLibraryOverride(final String sample, final String library) throws Exception {
+
+ final File output = File.createTempFile("bad", ".sam");
+ output.deleteOnExit();
+ final RevertSam reverter = new RevertSam();
+ final String args[] = new String[2 + (sample != null ? 1 : 0) + (library != null ? 1 : 0)];
+ int index = 0;
+ args[index++] = "INPUT=" + sampleLibraryOverrideSam;
+ args[index++] = "OUTPUT=" + output.getAbsolutePath();
+ if (sample != null) {
+ args[index++] = "SAMPLE_ALIAS=" + sample;
+ }
+ if (library != null) {
+ args[index++] = "LIBRARY_NAME=" + library;
+ }
+ runPicardCommandLine(args);
+ Assert.fail("Negative test should have thrown an exception and didn't");
+ }
+
+ @DataProvider(name="overrideTestData")
+ public Object[][] getNegativeTestData() {
+ return new Object[][] {
+ {"NewSample", null},
+ {null, "NewLibrary"},
+ {"NewSample", "NewLibrary"}
+ };
+ }
+
+ @Test
+ public void testMutexOutputMapVsOutput() throws Exception {
+ final File outputDir = Files.createTempDirectory("picardRevertSamTest").toFile();
+ outputDir.deleteOnExit();
+
+ final RevertSam reverter = new RevertSam();
+ final String args[] = new String[4];
+ int index = 0;
+ args[index++] = "INPUT=" + basicSamToRevert;
+ args[index++] = "OUTPUT_BY_READGROUP=true";
+ args[index++] = "OUTPUT=" + outputDir;
+ args[index++] = "OUTPUT_MAP=" + validOutputMap;
+ final int returnCode = runPicardCommandLine(args);
+ Assert.assertEquals(returnCode, 1);
+ }
+
+ @Test
+ public void testValidateOutputParamsByReadGroupMapValid() {
+ final List<String> errors = new ArrayList<String>();
+ RevertSam.ValidationUtil.validateOutputParamsByReadGroup(null, validOutputMap, errors);
+ Assert.assertEquals(errors.size(), 0);
+ }
+
+ @Test
+ public void testValidateOutputParamsByReadGroupMissingMap() {
+ final List<String> errors = new ArrayList<String>();
+ RevertSam.ValidationUtil.validateOutputParamsByReadGroup(null, nonExistentOutputMap, errors);
+ Assert.assertEquals(errors.size(), 1);
+ Assert.assertEquals(errors.get(0).contains("Cannot read"), true);
+ }
+
+ @Test
+ public void testValidateOutputParamsByReadGroupBadHeaderMap() {
+ final List<String> errors = new ArrayList<String>();
+ RevertSam.ValidationUtil.validateOutputParamsByReadGroup(null, badHeaderOutputMap, errors);
+ Assert.assertEquals(errors.size(), 1);
+ Assert.assertEquals(errors.get(0).contains("Invalid header"), true);
+ }
+
+ @Test
+ public void testValidateOutputParamsByReadGroupNoMapOrDir() {
+ final List<String> errors = new ArrayList<String>();
+ RevertSam.ValidationUtil.validateOutputParamsByReadGroup(null, null, errors);
+ Assert.assertEquals(errors.size(), 1);
+ Assert.assertEquals(errors.get(0).contains("Must provide either"), true);
+ }
+
+ @Test
+ public void testValidateOutputParamsByReadGroupDirValid() {
+ final List<String> errors = new ArrayList<String>();
+ RevertSam.ValidationUtil.validateOutputParamsByReadGroup(samTestData, null, errors);
+ Assert.assertEquals(errors.size(), 0);
+ }
+
+ @Test
+ public void testValidateOutputParamsNotByReadGroupValid() {
+ final List<String> errors = new ArrayList<String>();
+ RevertSam.ValidationUtil.validateOutputParamsNotByReadGroup(writablePath, null, errors);
+ Assert.assertEquals(errors.size(), 0);
+ }
+
+ @Test
+ public void testValidateOutputParamsNotByReadGroupNoOutput() {
+ final List<String> errors = new ArrayList<String>();
+ RevertSam.ValidationUtil.validateOutputParamsNotByReadGroup(null, null, errors);
+ Assert.assertEquals(errors.size(), 1);
+ Assert.assertEquals(errors.get(0).contains("OUTPUT is required"), true);
+ }
+
+ @Test
+ public void testValidateOutputParamsNotByReadGroupMap() {
+ final List<String> errors = new ArrayList<String>();
+ RevertSam.ValidationUtil.validateOutputParamsNotByReadGroup(null, validOutputMap, errors);
+ Assert.assertEquals(errors.size(), 2);
+ Assert.assertEquals(errors.get(0).contains("Cannot provide OUTPUT_MAP"), true);
+ Assert.assertEquals(errors.get(1).contains("OUTPUT is required"), true);
+ }
+
+ @Test
+ public void testValidateOutputParamsNotByReadGroupDir() {
+ final List<String> errors = new ArrayList<String>();
+ RevertSam.ValidationUtil.validateOutputParamsNotByReadGroup(samTestData, null, errors);
+ Assert.assertEquals(errors.size(), 1);
+ Assert.assertEquals(errors.get(0).contains("should not be a directory"), true);
+ }
+
+ @Test
+ public void testAssertAllReadGroupsMappedSuccess() {
+ final SAMReadGroupRecord rg1 = new SAMReadGroupRecord("rg1");
+ final SAMReadGroupRecord rg2 = new SAMReadGroupRecord("rg2");
+
+ final Map<String, File> outputMap = new HashMap<String, File>();
+ outputMap.put("rg1", new File("/foo/bar/rg1.bam"));
+ outputMap.put("rg2", new File("/foo/bar/rg2.bam"));
+ RevertSam.ValidationUtil.assertAllReadGroupsMapped(outputMap, Arrays.asList(rg1, rg2));
+ RevertSam.ValidationUtil.assertAllReadGroupsMapped(outputMap, Arrays.asList(rg1));
+ RevertSam.ValidationUtil.assertAllReadGroupsMapped(outputMap, Arrays.asList(rg2));
+ }
+
+ @Test(expectedExceptions = {PicardException.class})
+ public void testAssertAllReadGroupsMappedFailure() {
+ final SAMReadGroupRecord rg1 = new SAMReadGroupRecord("rg1");
+ final SAMReadGroupRecord rg2 = new SAMReadGroupRecord("rg2");
+ final SAMReadGroupRecord rg3 = new SAMReadGroupRecord("rg3");
+
+ final Map<String, File> outputMap = new HashMap<String, File>();
+ outputMap.put("rg1", new File("/foo/bar/rg1.bam"));
+ outputMap.put("rg2", new File("/foo/bar/rg2.bam"));
+ RevertSam.ValidationUtil.assertAllReadGroupsMapped(outputMap, Arrays.asList(rg1, rg2, rg3));
+ }
+
+ @Test
+ public void testIsOutputMapHeaderValid() {
+ boolean isValid = RevertSam.ValidationUtil.isOutputMapHeaderValid(Arrays.asList("READ_GROUP_ID", "OUTPUT"));
+ Assert.assertEquals(isValid, true);
+
+ isValid = RevertSam.ValidationUtil.isOutputMapHeaderValid(Arrays.asList("OUTPUT"));
+ Assert.assertEquals(isValid, false);
+
+ isValid = RevertSam.ValidationUtil.isOutputMapHeaderValid(Collections.EMPTY_LIST);
+ Assert.assertEquals(isValid, false);
+ }
+
+ @Test
+ public void testFilePathsWithoutMapFile() {
+ final SAMReadGroupRecord rg1 = new SAMReadGroupRecord("rg1");
+ final SAMReadGroupRecord rg2 = new SAMReadGroupRecord("rg2");
+
+ final Map<String, File> outputMap = RevertSam.createOutputMap(null, new File("/foo/bar"), ".bam", Arrays.asList(rg1, rg2));
+ Assert.assertEquals(outputMap.get("rg1"), new File("/foo/bar/rg1.bam"));
+ Assert.assertEquals(outputMap.get("rg2"), new File("/foo/bar/rg2.bam"));
+ }
+
+ @Test
+ public void testFilePathsWithMapFile() {
+ final Map<String, File> outputMap = RevertSam.createOutputMap(validOutputMap, null, ".bam", Collections.emptyList());
+ Assert.assertEquals(outputMap.get("rg1"), new File("/path/to/my_rg_1.ubam"));
+ Assert.assertEquals(outputMap.get("rg2"), new File("/path/to/my_rg_2.ubam"));
+ }
+
+ @Test
+ public void testGetDefaultExtension() {
+ Assert.assertEquals(RevertSam.getDefaultExtension("this.is.a.sam"), ".sam");
+ Assert.assertEquals(RevertSam.getDefaultExtension("this.is.a.cram"), ".cram");
+ Assert.assertEquals(RevertSam.getDefaultExtension("this.is.a.bam"), ".bam");
+ Assert.assertEquals(RevertSam.getDefaultExtension("foo"), ".bam");
+ }
+}
diff --git a/src/tests/java/picard/sam/SamFileConverterTest.java b/src/test/java/picard/sam/SamFileConverterTest.java
similarity index 100%
rename from src/tests/java/picard/sam/SamFileConverterTest.java
rename to src/test/java/picard/sam/SamFileConverterTest.java
diff --git a/src/tests/java/picard/sam/SamToFastqTest.java b/src/test/java/picard/sam/SamToFastqTest.java
similarity index 100%
rename from src/tests/java/picard/sam/SamToFastqTest.java
rename to src/test/java/picard/sam/SamToFastqTest.java
diff --git a/src/test/java/picard/sam/SetNmAndUqTagsTest.java b/src/test/java/picard/sam/SetNmAndUqTagsTest.java
new file mode 100644
index 0000000..899e8ba
--- /dev/null
+++ b/src/test/java/picard/sam/SetNmAndUqTagsTest.java
@@ -0,0 +1,70 @@
+package picard.sam;
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.IOException;
+
+public class SetNmAndUqTagsTest {
+
+ private static final File fasta = new File("testdata/picard/sam/merger.fasta");
+ @DataProvider(name="filesToFix")
+ Object[][] TestValidSortData() {
+ return new Object[][]{
+ new Object[]{new File("testdata/picard/sam/aligned.sam"), fasta},
+ new Object[]{new File("testdata/picard/sam/aligned_queryname_sorted.sam"), fasta},
+ new Object[]{new File("testdata/picard/sam/aligned_queryname_sorted.sam"), fasta},
+ };
+ }
+
+ @Test(dataProvider = "filesToFix")
+ public void TestValidSort(final File input, final File reference) throws IOException {
+ final File sortOutput = File.createTempFile("Sort", ".bam");
+ sortOutput.deleteOnExit();
+ final File fixOutput = File.createTempFile("Fix", ".bam");
+ fixOutput.deleteOnExit();
+ final File validateOutput = File.createTempFile("Sort", ".validation_report");
+ validateOutput.deleteOnExit();
+
+ sort(input, sortOutput);
+ fixFile(sortOutput, fixOutput, reference);
+ validate(fixOutput,validateOutput, reference);
+ }
+
+ private void validate(final File input, final File output, final File reference) {
+
+ final String[] args = new String[] {
+ "INPUT="+input,
+ "OUTPUT="+output,
+ "MODE=VERBOSE",
+ "REFERENCE_SEQUENCE="+reference };
+
+ ValidateSamFile validateSam = new ValidateSamFile();
+ Assert.assertEquals(validateSam.instanceMain(args), 0, "validate did not succeed");
+ }
+
+ private void sort(final File input, final File output) {
+
+ final String[] args = new String[] {
+ "INPUT=" + input,
+ "OUTPUT=" + output,
+ "SORT_ORDER=coordinate"
+ };
+
+ SortSam sortSam = new SortSam();
+ Assert.assertEquals(sortSam.instanceMain(args), 0, "Sort did not succeed");
+ }
+
+ private void fixFile(final File input, final File output, final File reference) throws IOException {
+
+ final String[] args = new String[] {
+ "INPUT="+input,
+ "OUTPUT="+output,
+ "REFERENCE_SEQUENCE="+reference };
+
+ SetNmAndUqTags setNmAndUqTags = new SetNmAndUqTags();
+ Assert.assertEquals(setNmAndUqTags.instanceMain(args), 0, "Fix did not succeed");
+ }
+}
diff --git a/src/tests/java/picard/sam/SplitSamByLibraryTest.java b/src/test/java/picard/sam/SplitSamByLibraryTest.java
similarity index 100%
rename from src/tests/java/picard/sam/SplitSamByLibraryTest.java
rename to src/test/java/picard/sam/SplitSamByLibraryTest.java
diff --git a/src/tests/java/picard/sam/ViewSamTest.java b/src/test/java/picard/sam/ViewSamTest.java
similarity index 100%
rename from src/tests/java/picard/sam/ViewSamTest.java
rename to src/test/java/picard/sam/ViewSamTest.java
diff --git a/src/tests/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java b/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java
similarity index 84%
rename from src/tests/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java
rename to src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java
index 9b5d65d..192ab00 100644
--- a/src/tests/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java
+++ b/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java
@@ -28,6 +28,7 @@ import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.util.CloseableIterator;
import htsjdk.samtools.util.CloserUtil;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
@@ -42,6 +43,10 @@ public abstract class AbstractMarkDuplicatesCommandLineProgramTest {
protected final static int DEFAULT_BASE_QUALITY = 10;
+ protected boolean markSecondaryAndSupplementaryRecordsLikeTheCanonical() { return false; }
+
+ protected boolean markUnmappedRecordsLikeTheirMates() { return false; }
+
@Test
public void testSingleUnmappedFragment() {
final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
@@ -191,6 +196,27 @@ public abstract class AbstractMarkDuplicatesCommandLineProgramTest {
tester.runTest();
}
+ @Test(dataProvider = "secondarySupplementaryData")
+ public void testManyOpticalDuplicateClusterOneEndSamePositionOneCluster(final Boolean additionalFragSecondary, final Boolean additionalFragSupplementary, final Boolean fragLikeFirst) {
+ final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
+ tester.setExpectedOpticalDuplicate(2);
+ //canonical
+ tester.addMatePair("RUNID:7:2205:17939:39728", 1, 485328, 485312, false, false, false, false, "55M46S", "30S71M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+ //library
+ tester.addMatePair("RUNID:7:2205:27949:39745", 1, 485328, 485328, false, false, true, true, "55M46S", "46S55M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+ //optical (of canonical)
+ tester.addMatePair("RUNID:7:2205:17949:39745", 1, 485328, 485328, false, false, true, true, "55M46S", "46S55M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+
+ //non-canonical
+ tester.addMappedFragment(fragLikeFirst ? "RUNID:7:2205:17939:39728" : "RUNID:7:2205:17949:39745", 1, 400, markSecondaryAndSupplementaryRecordsLikeTheCanonical() && !fragLikeFirst, null, null, additionalFragSecondary, additionalFragSupplementary, DEFAULT_BASE_QUALITY);
+
+ //library
+ tester.addMatePair("RUNID:7:2205:37949:39745", 1, 485328, 485328, false, false, true, true, "55M46S", "46S55M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+ //optical of canonical
+ tester.addMatePair("RUNID:7:2205:17959:39735", 1, 485328, 485328, false, false, true, true, "55M46S", "46S55M", false, true, false, false, false, DEFAULT_BASE_QUALITY);
+ tester.runTest();
+ }
+
@Test
public void testTwoMappedPairsAndMappedSecondaryFragment() {
final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
@@ -282,7 +308,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgramTest {
@Test
public void testMappedPairAndMatePairSecondUnmapped() {
final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
- tester.addMatePair(1, 10040, 10040, false, true, true, false, "76M", null, false, false, false, false, false, DEFAULT_BASE_QUALITY); // second a duplicate,
+ tester.addMatePair(1, 10040, 10040, false, true, true, markUnmappedRecordsLikeTheirMates(), "76M", null, false, false, false, false, false, DEFAULT_BASE_QUALITY); // second a duplicate,
// second end unmapped
tester.addMappedPair(1, 10189, 10040, false, false, "41S35M", "65M11S", true, false, false, DEFAULT_BASE_QUALITY); // mapped OK
tester.runTest();
@@ -291,7 +317,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgramTest {
@Test
public void testMappedPairAndMatePairFirstUnmapped() {
final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
- tester.addMatePair(1, 10040, 10040, true, false, false, true, null, "76M", false, false, false, false, false, DEFAULT_BASE_QUALITY); // first a duplicate,
+ tester.addMatePair(1, 10040, 10040, true, false, markUnmappedRecordsLikeTheirMates(), true, null, "76M", false, false, false, false, false, DEFAULT_BASE_QUALITY); // first a duplicate,
// first end unmapped
tester.addMappedPair(1, 10189, 10040, false, false, "41S35M", "65M11S", true, false, false, DEFAULT_BASE_QUALITY); // mapped OK
tester.runTest();
@@ -312,7 +338,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgramTest {
@Test
public void testMappedPairAndMappedFragmentAndMatePairSecondUnmapped() {
final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
- tester.addMatePair(1, 10040, 10040, false, true, true, false, "76M", null, false, false, false, false, false, DEFAULT_BASE_QUALITY); // first a duplicate,
+ tester.addMatePair(1, 10040, 10040, false, true, true, markUnmappedRecordsLikeTheirMates(), "76M", null, false, false, false, false, false, DEFAULT_BASE_QUALITY); // first a duplicate,
// second end unmapped
tester.addMappedPair(1, 10189, 10040, false, false, "41S35M", "65M11S", true, false, false, DEFAULT_BASE_QUALITY); // mapped OK
tester.addMappedFragment(1, 10040, true, DEFAULT_BASE_QUALITY); // duplicate
@@ -322,7 +348,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgramTest {
@Test
public void testMappedPairAndMappedFragmentAndMatePairFirstUnmapped() {
final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
- tester.addMatePair(1, 10040, 10040, true, false, false, true, null, "76M", false, false, false, false, false, DEFAULT_BASE_QUALITY); // first a duplicate,
+ tester.addMatePair(1, 10040, 10040, true, false, markUnmappedRecordsLikeTheirMates(), true, null, "76M", false, false, false, false, false, DEFAULT_BASE_QUALITY); // first a duplicate,
// first end unmapped
tester.addMappedPair(1, 10189, 10040, false, false, "41S35M", "65M11S", true, false, false, DEFAULT_BASE_QUALITY); // mapped OK
tester.addMappedFragment(1, 10040, true, DEFAULT_BASE_QUALITY); // duplicate
@@ -540,4 +566,37 @@ public abstract class AbstractMarkDuplicatesCommandLineProgramTest {
tester.addMatePair("RUNID:1:1:16020:13352", 2, 41212324, 41212319, false, false, true, true, "33S35M", "28S40M", true, true, false, false, false, DEFAULT_BASE_QUALITY);
tester.runTest();
}
+
+ @DataProvider(name = "secondarySupplementaryData")
+ public Object[][] secondarySupplementaryData() {
+ return new Object[][] {
+ { true, true , true},
+ { true, false, true},
+ { false, true , true},
+ { true, true , false},
+ { true, false, false},
+ { false, true , false}
+ };
+ }
+
+ @Test(dataProvider = "secondarySupplementaryData")
+ public void testTwoMappedPairsWithSupplementaryReads(final Boolean additionalFragSecondary, final Boolean additionalFragSupplementary, final Boolean fragLikeFirst) {
+ final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
+ tester.setExpectedOpticalDuplicate(1);
+ tester.addMatePair("RUNID:1:1:15993:13361", 2, 41212324, 41212310, false, false, false, false, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY);
+ tester.addMatePair("RUNID:1:1:16020:13352", 2, 41212324, 41212319, false, false, true, true, "33S35M", "28S40M", true, true, false, false, false, DEFAULT_BASE_QUALITY);
+ tester.addMappedFragment(fragLikeFirst ? "RUNID:1:1:15993:13361" : "RUNID:1:1:16020:13352", 1, 400, markSecondaryAndSupplementaryRecordsLikeTheCanonical() && !fragLikeFirst, null, null, additionalFragSecondary, additionalFragSupplementary, DEFAULT_BASE_QUALITY);
+ tester.runTest();
+ }
+
+ @Test(dataProvider = "secondarySupplementaryData")
+ public void testTwoMappedPairsWithSupplementaryReadsAfterCanonical(final Boolean additionalFragSecondary, final Boolean additionalFragSupplementary, final Boolean fragLikeFirst) {
+ final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester();
+ tester.setExpectedOpticalDuplicate(1);
+ tester.addMatePair("RUNID:1:1:15993:13361", 2, 41212324, 41212310, false, false, false, false, "33S35M", "19S49M", true, true, false, false, false, DEFAULT_BASE_QUALITY);
+ tester.addMatePair("RUNID:1:1:16020:13352", 2, 41212324, 41212319, false, false, true, true, "33S35M", "28S40M", true, true, false, false, false, DEFAULT_BASE_QUALITY);
+ tester.addMappedFragment(fragLikeFirst ? "RUNID:1:1:15993:13361" : "RUNID:1:1:16020:13352", 1, 400, markSecondaryAndSupplementaryRecordsLikeTheCanonical() && !fragLikeFirst, null, null, additionalFragSecondary, additionalFragSupplementary, DEFAULT_BASE_QUALITY);
+ tester.runTest();
+ }
+
}
diff --git a/src/tests/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java b/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java
similarity index 88%
rename from src/tests/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java
rename to src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java
index cea6fcf..0e1f2f0 100644
--- a/src/tests/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java
+++ b/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java
@@ -25,6 +25,7 @@
package picard.sam.markduplicates;
import htsjdk.samtools.DuplicateScoringStrategy.ScoringStrategy;
+import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.SAMRecordSetBuilder;
import htsjdk.samtools.SamReader;
@@ -52,8 +53,12 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
final private File metricsFile;
final DuplicationMetrics expectedMetrics;
- public AbstractMarkDuplicatesCommandLineProgramTester(final ScoringStrategy duplicateScoringStrategy) {
- super(50, true, SAMRecordSetBuilder.DEFAULT_CHROMOSOME_LENGTH, duplicateScoringStrategy);
+ public AbstractMarkDuplicatesCommandLineProgramTester(final ScoringStrategy duplicateScoringStrategy, SAMFileHeader.SortOrder sortOrder) {
+ this(duplicateScoringStrategy, sortOrder, true);
+ }
+
+ public AbstractMarkDuplicatesCommandLineProgramTester(final ScoringStrategy duplicateScoringStrategy, SAMFileHeader.SortOrder sortOrder, boolean recordNeedSorting) {
+ super(50, true, SAMRecordSetBuilder.DEFAULT_CHROMOSOME_LENGTH, duplicateScoringStrategy, sortOrder, recordNeedSorting);
expectedMetrics = new DuplicationMetrics();
expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES = 0;
@@ -63,6 +68,9 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
addArg("DUPLICATE_SCORING_STRATEGY=" + duplicateScoringStrategy.name());
}
+ public AbstractMarkDuplicatesCommandLineProgramTester(final ScoringStrategy duplicateScoringStrategy) {
+ this(duplicateScoringStrategy, SAMFileHeader.SortOrder.coordinate);
+ }
public AbstractMarkDuplicatesCommandLineProgramTester() {
this(SAMRecordSetBuilder.DEFAULT_DUPLICATE_SCORING_STRATEGY);
@@ -81,7 +89,9 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
final CloseableIterator<SAMRecord> inputRecordIterator = this.getRecordIterator();
while (inputRecordIterator.hasNext()) {
final SAMRecord record = inputRecordIterator.next();
- if (!record.isSecondaryOrSupplementary()) {
+ if (record.isSecondaryOrSupplementary()) {
+ ++expectedMetrics.SECONDARY_OR_SUPPLEMENTARY_RDS;
+ } else {
final String key = samRecordToDuplicatesFlagsKey(record);
if (!this.duplicateFlags.containsKey(key)) {
System.err.println("DOES NOT CONTAIN KEY: " + key);
@@ -91,12 +101,10 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
// First bring the simple metricsFile up to date
if (record.getReadUnmappedFlag()) {
++expectedMetrics.UNMAPPED_READS;
- }
- else if (!record.getReadPairedFlag() || record.getMateUnmappedFlag()) {
+ } else if (!record.getReadPairedFlag() || record.getMateUnmappedFlag()) {
++expectedMetrics.UNPAIRED_READS_EXAMINED;
if (isDuplicate) ++expectedMetrics.UNPAIRED_READ_DUPLICATES;
- }
- else {
+ } else {
++expectedMetrics.READ_PAIRS_EXAMINED; // will need to be divided by 2 at the end
if (isDuplicate) ++expectedMetrics.READ_PAIR_DUPLICATES; // will need to be divided by 2 at the end
}
@@ -150,8 +158,8 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
catch (final FileNotFoundException ex) {
System.err.println("Metrics file not found: " + ex);
}
- // NB: Test writes an initial metrics line with a null entry for LIBRARY and 0 values for all metrics. Why?
- final DuplicationMetrics observedMetrics = metricsOutput.getMetrics().get(metricsOutput.getMetrics().size() - 1);
+ Assert.assertEquals(metricsOutput.getMetrics().size(), 1);
+ final DuplicationMetrics observedMetrics = metricsOutput.getMetrics().get(0);
Assert.assertEquals(observedMetrics.UNPAIRED_READS_EXAMINED, expectedMetrics.UNPAIRED_READS_EXAMINED, "UNPAIRED_READS_EXAMINED does not match expected");
Assert.assertEquals(observedMetrics.READ_PAIRS_EXAMINED, expectedMetrics.READ_PAIRS_EXAMINED, "READ_PAIRS_EXAMINED does not match expected");
Assert.assertEquals(observedMetrics.UNMAPPED_READS, expectedMetrics.UNMAPPED_READS, "UNMAPPED_READS does not match expected");
@@ -160,6 +168,7 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
Assert.assertEquals(observedMetrics.READ_PAIR_OPTICAL_DUPLICATES, expectedMetrics.READ_PAIR_OPTICAL_DUPLICATES, "READ_PAIR_OPTICAL_DUPLICATES does not match expected");
Assert.assertEquals(observedMetrics.PERCENT_DUPLICATION, expectedMetrics.PERCENT_DUPLICATION, "PERCENT_DUPLICATION does not match expected");
Assert.assertEquals(observedMetrics.ESTIMATED_LIBRARY_SIZE, expectedMetrics.ESTIMATED_LIBRARY_SIZE, "ESTIMATED_LIBRARY_SIZE does not match expected");
+ Assert.assertEquals(observedMetrics.SECONDARY_OR_SUPPLEMENTARY_RDS, expectedMetrics.SECONDARY_OR_SUPPLEMENTARY_RDS, "SECONDARY_OR_SUPPLEMENTARY_RDS does not match expected");
} finally {
TestUtil.recursiveDelete(getOutputDir());
}
@@ -167,4 +176,3 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam
abstract protected CommandLineProgram getProgram();
}
-
diff --git a/src/test/java/picard/sam/markduplicates/AsIsMarkDuplicatesTester.java b/src/test/java/picard/sam/markduplicates/AsIsMarkDuplicatesTester.java
new file mode 100644
index 0000000..6996104
--- /dev/null
+++ b/src/test/java/picard/sam/markduplicates/AsIsMarkDuplicatesTester.java
@@ -0,0 +1,68 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2016 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package picard.sam.markduplicates;
+
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.util.CloserUtil;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+
+/**
+ * Tests a few hand build sam files as they are.
+ */
+public class AsIsMarkDuplicatesTester {
+
+ @DataProvider
+ public Object[][] testSameUnclipped5PrimeOppositeStrandData() {
+ final File TEST_DIR = new File("testdata/picard/sam/MarkDuplicates");
+ return new Object[][]{
+ new Object[]{new File(TEST_DIR, "sameUnclipped5primeEndv1.sam")},
+ new Object[]{new File(TEST_DIR, "sameUnclipped5primeEndv2.sam")},
+ new Object[]{new File(TEST_DIR, "sameUnclipped5primeEndCoordinateSortedv1.sam")},
+ new Object[]{new File(TEST_DIR, "sameUnclipped5primeEndCoordinateSortedv2.sam")},
+ new Object[]{new File(TEST_DIR, "sameUnclipped5primeEndCoordinateSortedv3.sam")},
+ new Object[]{new File(TEST_DIR, "sameUnclipped5primeEndCoordinateSortedv4.sam")}
+ };
+ }
+
+ @Test(dataProvider = "testSameUnclipped5PrimeOppositeStrandData")
+ public void testSameUnclipped5PrimeOppositeStrand(final File input) {
+
+ final AbstractMarkDuplicatesCommandLineProgramTester tester = new BySumOfBaseQAndInOriginalOrderMDTester();
+
+ final SamReader reader = SamReaderFactory.makeDefault().open(input);
+
+ tester.setHeader(reader.getFileHeader());
+ reader.iterator().stream().forEach(tester::addRecord);
+
+ CloserUtil.close(reader);
+ tester.setExpectedOpticalDuplicate(0);
+ tester.runTest();
+ }
+}
+
+
diff --git a/src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTester.java b/src/test/java/picard/sam/markduplicates/BySumOfBaseQAndInOriginalOrderMDTester.java
similarity index 70%
copy from src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTester.java
copy to src/test/java/picard/sam/markduplicates/BySumOfBaseQAndInOriginalOrderMDTester.java
index 53ef71f..6dfe244 100644
--- a/src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTester.java
+++ b/src/test/java/picard/sam/markduplicates/BySumOfBaseQAndInOriginalOrderMDTester.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2015 The Broad Institute
+ * Copyright (c) 2014 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -25,21 +25,20 @@
package picard.sam.markduplicates;
import htsjdk.samtools.DuplicateScoringStrategy;
+import htsjdk.samtools.SAMFileHeader;
import picard.cmdline.CommandLineProgram;
/**
- * @author nhomer
+ * Created by farjoun on 5/25/16.
*/
-public class SimpleMarkDuplicatesWithMateCigarTester extends AbstractMarkDuplicatesCommandLineProgramTester {
+public class BySumOfBaseQAndInOriginalOrderMDTester extends AbstractMarkDuplicatesCommandLineProgramTester {
- public SimpleMarkDuplicatesWithMateCigarTester() {
- // NB: to be equivalent to MarkDuplicates we need to use SUM_OF_BASE_QUALITIES
- super(DuplicateScoringStrategy.ScoringStrategy.TOTAL_MAPPED_REFERENCE_LENGTH);
-
- addArg("MAX_RECORDS_IN_RAM=1000");
+ public BySumOfBaseQAndInOriginalOrderMDTester() {
+ super(DuplicateScoringStrategy.ScoringStrategy.SUM_OF_BASE_QUALITIES, SAMFileHeader.SortOrder.unsorted, false);
}
@Override
- protected CommandLineProgram getProgram() { return new SimpleMarkDuplicatesWithMateCigar(); }
+ protected CommandLineProgram getProgram() {
+ return new MarkDuplicates();
+ }
}
-
diff --git a/src/tests/java/picard/sam/markduplicates/EstimateLibraryComplexityTest.java b/src/test/java/picard/sam/markduplicates/EstimateLibraryComplexityTest.java
similarity index 84%
rename from src/tests/java/picard/sam/markduplicates/EstimateLibraryComplexityTest.java
rename to src/test/java/picard/sam/markduplicates/EstimateLibraryComplexityTest.java
index bcaadc4..7dd28d7 100644
--- a/src/tests/java/picard/sam/markduplicates/EstimateLibraryComplexityTest.java
+++ b/src/test/java/picard/sam/markduplicates/EstimateLibraryComplexityTest.java
@@ -58,7 +58,23 @@ public class EstimateLibraryComplexityTest extends CommandLineProgramTest {
final File output = File.createTempFile("estimateLibraryComplexity",".els_metrics");
output.deleteOnExit();
- final List<String> args =new ArrayList<String>();
+ final List<String> args = new ArrayList<>();
+ args.add("INPUT=" + input.getAbsolutePath());
+ args.add("OUTPUT=" + output.getAbsolutePath());
+ args.add("MIN_GROUP_COUNT=1");
+
+ Assert.assertEquals(runPicardCommandLine(args), 0);
+ examineMetricsFile(output, 2, 2);
+ }
+
+ /** Finds duplicates as expected ignoring secondary and supplementary records. */
+ @Test
+ public void testSimpleDuplicatesWithSecondaryAndSupplementaryRecords() throws IOException {
+ final File input = new File(TEST_DATA_DIR, "dupes_with_sos.sam");
+ final File output = File.createTempFile("estimateLibraryComplexity",".els_metrics");
+ output.deleteOnExit();
+
+ final List<String> args = new ArrayList<>();
args.add("INPUT=" + input.getAbsolutePath());
args.add("OUTPUT=" + output.getAbsolutePath());
args.add("MIN_GROUP_COUNT=1");
@@ -74,7 +90,7 @@ public class EstimateLibraryComplexityTest extends CommandLineProgramTest {
final File output = File.createTempFile("estimateLibraryComplexity",".els_metrics");
output.deleteOnExit();
- final List<String> args =new ArrayList<String>();
+ final List<String> args = new ArrayList<>();
args.add("INPUT=" + input.getAbsolutePath());
args.add("OUTPUT=" + output.getAbsolutePath());
args.add("MAX_DIFF_RATE=0.0");
@@ -91,7 +107,7 @@ public class EstimateLibraryComplexityTest extends CommandLineProgramTest {
final File output = File.createTempFile("estimateLibraryComplexity",".els_metrics");
output.deleteOnExit();
- final List<String> args =new ArrayList<String>();
+ final List<String> args = new ArrayList<>();
args.add("INPUT=" + input.getAbsolutePath());
args.add("OUTPUT=" + output.getAbsolutePath());
args.add("MAX_DIFF_RATE=0.0");
@@ -111,7 +127,7 @@ public class EstimateLibraryComplexityTest extends CommandLineProgramTest {
final File output = File.createTempFile("estimateLibraryComplexity",".els_metrics");
output.deleteOnExit();
- final List<String> args =new ArrayList<String>();
+ final List<String> args = new ArrayList<>();
args.add("INPUT=" + input.getAbsolutePath());
args.add("OUTPUT=" + output.getAbsolutePath());
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java b/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java
similarity index 100%
rename from src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java
rename to src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadOneBarcodeTest.java b/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadOneBarcodeTest.java
similarity index 100%
rename from src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadOneBarcodeTest.java
rename to src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadOneBarcodeTest.java
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java b/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java
similarity index 100%
rename from src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java
rename to src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingSampleBarcodeTest.java b/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingSampleBarcodeTest.java
similarity index 100%
rename from src/tests/java/picard/sam/markduplicates/MarkDuplicateWithMissingSampleBarcodeTest.java
rename to src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingSampleBarcodeTest.java
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicatesTest.java b/src/test/java/picard/sam/markduplicates/MarkDuplicatesTest.java
similarity index 100%
rename from src/tests/java/picard/sam/markduplicates/MarkDuplicatesTest.java
rename to src/test/java/picard/sam/markduplicates/MarkDuplicatesTest.java
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicatesTester.java b/src/test/java/picard/sam/markduplicates/MarkDuplicatesTester.java
similarity index 100%
copy from src/tests/java/picard/sam/markduplicates/MarkDuplicatesTester.java
copy to src/test/java/picard/sam/markduplicates/MarkDuplicatesTester.java
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTest.java b/src/test/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTest.java
similarity index 100%
rename from src/tests/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTest.java
rename to src/test/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTest.java
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTester.java b/src/test/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTester.java
similarity index 100%
rename from src/tests/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTester.java
rename to src/test/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTester.java
diff --git a/src/tests/java/picard/sam/markduplicates/MarkDuplicatesTester.java b/src/test/java/picard/sam/markduplicates/QuerySortedMarkDuplicatesTester.java
similarity index 85%
rename from src/tests/java/picard/sam/markduplicates/MarkDuplicatesTester.java
rename to src/test/java/picard/sam/markduplicates/QuerySortedMarkDuplicatesTester.java
index 67cfc82..d7508b9 100644
--- a/src/tests/java/picard/sam/markduplicates/MarkDuplicatesTester.java
+++ b/src/test/java/picard/sam/markduplicates/QuerySortedMarkDuplicatesTester.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2014 The Broad Institute
+ * Copyright (c) 2016 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -25,16 +25,17 @@
package picard.sam.markduplicates;
import htsjdk.samtools.DuplicateScoringStrategy;
+import htsjdk.samtools.SAMFileHeader;
import picard.cmdline.CommandLineProgram;
/**
* This class is an extension of AbstractMarkDuplicatesCommandLineProgramTester used to test MarkDuplicates with SAM files generated on the fly.
* This performs the underlying tests defined by classes such as see AbstractMarkDuplicatesCommandLineProgramTest and MarkDuplicatesTest.
*/
-public class MarkDuplicatesTester extends AbstractMarkDuplicatesCommandLineProgramTester {
+public class QuerySortedMarkDuplicatesTester extends AbstractMarkDuplicatesCommandLineProgramTester {
- public MarkDuplicatesTester() {
- super(DuplicateScoringStrategy.ScoringStrategy.TOTAL_MAPPED_REFERENCE_LENGTH);
+ public QuerySortedMarkDuplicatesTester() {
+ super(DuplicateScoringStrategy.ScoringStrategy.TOTAL_MAPPED_REFERENCE_LENGTH, SAMFileHeader.SortOrder.queryname);
}
@Override
diff --git a/src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java b/src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
similarity index 91%
rename from src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
rename to src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
index 0dde3eb..006dd33 100644
--- a/src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
+++ b/src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java
@@ -36,6 +36,7 @@ import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IterableAdapter;
import htsjdk.samtools.util.Log;
import htsjdk.samtools.util.ProgressLogger;
+import picard.PicardException;
import picard.cmdline.CommandLineProgramProperties;
import picard.cmdline.programgroups.Testing;
import picard.sam.DuplicationMetrics;
@@ -101,8 +102,12 @@ public class SimpleMarkDuplicatesWithMateCigar extends MarkDuplicates {
// Create the output header
final SAMFileHeader outputHeader = header.clone();
- outputHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
- for (final String comment : COMMENT) outputHeader.addComment(comment);
+
+ if (outputHeader.getSortOrder() != SAMFileHeader.SortOrder.coordinate) {
+ throw new PicardException("This program requires inputs in coordinate SortOrder");
+ }
+
+ COMMENT.forEach(outputHeader::addComment);
// Key: previous PG ID on a SAM Record (or null). Value: New PG ID to replace it.
final Map<String, String> chainedPgIds = getChainedPgIds(outputHeader);
@@ -127,24 +132,28 @@ public class SimpleMarkDuplicatesWithMateCigar extends MarkDuplicates {
libraryIdGenerator = new LibraryIdGenerator(headerAndIterator.header);
- for (final DuplicateSet duplicateSet : new IterableAdapter<DuplicateSet>(iterator)) {
+ for (final DuplicateSet duplicateSet : new IterableAdapter<>(iterator)) {
final SAMRecord representative = duplicateSet.getRepresentative();
final boolean doOpticalDuplicateTracking = (this.READ_NAME_REGEX != null) &&
isPairedAndBothMapped(representative) &&
representative.getFirstOfPairFlag();
- final Set<String> duplicateReadEndsSeen = new HashSet<String>();
+ final Set<String> duplicateReadEndsSeen = new HashSet<>();
- final List<ReadEnds> duplicateReadEnds = new ArrayList<ReadEnds>();
+ final List<ReadEnds> duplicateReadEnds = new ArrayList<>();
for (final SAMRecord record : duplicateSet.getRecords()) {
- if (!record.isSecondaryOrSupplementary()) {
- final String library = LibraryIdGenerator.getLibraryName(header, record);
- DuplicationMetrics metrics = libraryIdGenerator.getMetricsByLibrary(library);
- if (metrics == null) {
- metrics = new DuplicationMetrics();
- metrics.LIBRARY = library;
- libraryIdGenerator.addMetricsByLibrary(library, metrics);
- }
+ // get the metrics for the library of this read (creating a new one if needed)
+ final String library = LibraryIdGenerator.getLibraryName(header, record);
+ DuplicationMetrics metrics = libraryIdGenerator.getMetricsByLibrary(library);
+ if (metrics == null) {
+ metrics = new DuplicationMetrics();
+ metrics.LIBRARY = library;
+ libraryIdGenerator.addMetricsByLibrary(library, metrics);
+ }
+
+ if (record.isSecondaryOrSupplementary()) {
+ ++metrics.SECONDARY_OR_SUPPLEMENTARY_RDS;
+ } else {
// First bring the simple metrics up to date
if (record.getReadUnmappedFlag()) {
diff --git a/src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTest.java b/src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTest.java
similarity index 100%
rename from src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTest.java
rename to src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTest.java
diff --git a/src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTester.java b/src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTester.java
similarity index 100%
rename from src/tests/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTester.java
rename to src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTester.java
diff --git a/src/tests/java/picard/sam/markduplicates/util/OpticalDuplicateFinderTest.java b/src/test/java/picard/sam/markduplicates/util/OpticalDuplicateFinderTest.java
similarity index 98%
rename from src/tests/java/picard/sam/markduplicates/util/OpticalDuplicateFinderTest.java
rename to src/test/java/picard/sam/markduplicates/util/OpticalDuplicateFinderTest.java
index 1adf4f9..6d0dc58 100644
--- a/src/tests/java/picard/sam/markduplicates/util/OpticalDuplicateFinderTest.java
+++ b/src/test/java/picard/sam/markduplicates/util/OpticalDuplicateFinderTest.java
@@ -1,13 +1,10 @@
package picard.sam.markduplicates.util;
-import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.Log;
-import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.testng.Assert;
import picard.sam.util.PhysicalLocation;
import picard.sam.util.PhysicalLocationInt;
-import picard.sam.util.PhysicalLocationShort;
import picard.sam.util.ReadNameParser;
import java.util.ArrayList;
diff --git a/src/tests/java/picard/sam/testers/CleanSamTester.java b/src/test/java/picard/sam/testers/CleanSamTester.java
similarity index 100%
rename from src/tests/java/picard/sam/testers/CleanSamTester.java
rename to src/test/java/picard/sam/testers/CleanSamTester.java
diff --git a/src/tests/java/picard/sam/testers/SamFileTester.java b/src/test/java/picard/sam/testers/SamFileTester.java
similarity index 71%
rename from src/tests/java/picard/sam/testers/SamFileTester.java
rename to src/test/java/picard/sam/testers/SamFileTester.java
index 5a2b1d4..bd83ba7 100644
--- a/src/tests/java/picard/sam/testers/SamFileTester.java
+++ b/src/test/java/picard/sam/testers/SamFileTester.java
@@ -23,23 +23,30 @@ import java.util.Map;
*/
public abstract class SamFileTester extends CommandLineProgramTest {
- public static final String TEST_DATA_BASE_DIR = "testdata/picard/sam/";
private final SAMRecordSetBuilder samRecordSetBuilder;
- protected final Map<String, Boolean> duplicateFlags = new HashMap<String, Boolean>();
+ protected final Map<String, Boolean> duplicateFlags = new HashMap<>();
private File outputDir;
private File output;
private int readNameCounter = 0;
private boolean noMateCigars = false;
private boolean deleteOnExit = true;
- private final ArrayList<String> args = new ArrayList<String>();
+ private final ArrayList<String> args = new ArrayList<>();
- public SamFileTester(final int readLength, final boolean deleteOnExit, final int defaultChromosomeLength, final ScoringStrategy duplicateScoringStrategy) {
+ public SamFileTester(final int readLength, final boolean deleteOnExit, final int defaultChromosomeLength, final ScoringStrategy duplicateScoringStrategy, final SAMFileHeader.SortOrder sortOrder, boolean recordsNeedSorting) {
this.deleteOnExit = deleteOnExit;
- this.samRecordSetBuilder = new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate, true, defaultChromosomeLength, duplicateScoringStrategy);
+ this.samRecordSetBuilder = new SAMRecordSetBuilder(recordsNeedSorting, sortOrder, true, defaultChromosomeLength, duplicateScoringStrategy);
samRecordSetBuilder.setReadLength(readLength);
setOutputDir();
}
+ public SamFileTester(final int readLength, final boolean deleteOnExit, final int defaultChromosomeLength, final ScoringStrategy duplicateScoringStrategy, final SAMFileHeader.SortOrder sortOrder) {
+ this(readLength, deleteOnExit, defaultChromosomeLength, duplicateScoringStrategy, SAMFileHeader.SortOrder.coordinate, true);
+ }
+
+ public SamFileTester(final int readLength, final boolean deleteOnExit, final int defaultChromosomeLength, final ScoringStrategy duplicateScoringStrategy) {
+ this(readLength, deleteOnExit, defaultChromosomeLength, duplicateScoringStrategy, SAMFileHeader.SortOrder.coordinate);
+ }
+
public SamFileTester(final int readLength, final boolean deleteOnExit, final int defaultChromosomeLength) {
this(readLength, deleteOnExit, defaultChromosomeLength, SAMRecordSetBuilder.DEFAULT_DUPLICATE_SCORING_STRATEGY);
}
@@ -53,7 +60,9 @@ public abstract class SamFileTester extends CommandLineProgramTest {
}
public void addRecord(final SAMRecord record) {
- this.duplicateFlags.put(samRecordToDuplicatesFlagsKey(record), record.getDuplicateReadFlag());
+ final String key = samRecordToDuplicatesFlagsKey(record);
+ Assert.assertFalse(this.duplicateFlags.containsKey(key));
+ this.duplicateFlags.put(key, record.getDuplicateReadFlag());
this.samRecordSetBuilder.addRecord(record);
}
@@ -102,18 +111,30 @@ public abstract class SamFileTester extends CommandLineProgramTest {
}
protected String samRecordToDuplicatesFlagsKey(final SAMRecord record) {
- String readName = record.getReadName()
- + "-"
- + record.getReadPairedFlag()
- + "-";
+ final StringBuilder nameBuilder = new StringBuilder();
+ nameBuilder.append(record.getReadName());
+ nameBuilder.append("-");
+
+ if (record.getReadUnmappedFlag()) {
+ nameBuilder.append("Unmapped");
+ } else {
+ nameBuilder.append(record.getContig())
+ .append("-")
+ .append(record.getAlignmentStart());
+ }
+ nameBuilder.append("-")
+ .append(record.getReadPairedFlag())
+ .append("-").append(record.getNotPrimaryAlignmentFlag())
+ .append("-");
+
if (record.getReadPairedFlag()) {
- readName += record.getFirstOfPairFlag()
- + "-"
- + record.getSecondOfPairFlag();
+ nameBuilder.append(record.getFirstOfPairFlag())
+ .append("-")
+ .append(record.getSecondOfPairFlag());
} else {
- readName += "false-false";
+ nameBuilder.append("false-false");
}
- return readName;
+ return nameBuilder.toString();
}
// Below are a bunch of utility methods for adding records to the SAMRecordSetBuilder
@@ -153,6 +174,12 @@ public abstract class SamFileTester extends CommandLineProgramTest {
addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, cigar, qualityString, defaultQualityScore, false);
}
+ public void addMappedFragment(final String readName, final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final String cigar,
+ final String qualityString, final boolean isSecondary, final boolean isSupplementary,
+ final int defaultQualityScore) {
+ addFragment(readName, referenceSequenceIndex, alignmentStart, false, isDuplicate, cigar, qualityString, defaultQualityScore, isSecondary, isSupplementary);
+ }
+
public void addMappedPair(final int referenceSequenceIndex,
final int alignmentStart1,
final int alignmentStart2,
@@ -192,20 +219,20 @@ public abstract class SamFileTester extends CommandLineProgramTest {
}
public void addMatePair(final int referenceSequenceIndex,
- final int alignmentStart1,
- final int alignmentStart2,
- final boolean record1Unmapped,
- final boolean record2Unmapped,
- final boolean isDuplicate1,
- final boolean isDuplicate2,
- final String cigar1,
- final String cigar2,
- final boolean strand1,
- final boolean strand2,
- final boolean firstOnly,
- final boolean record1NonPrimary,
- final boolean record2NonPrimary,
- final int defaultQualityScore) {
+ final int alignmentStart1,
+ final int alignmentStart2,
+ final boolean record1Unmapped,
+ final boolean record2Unmapped,
+ final boolean isDuplicate1,
+ final boolean isDuplicate2,
+ final String cigar1,
+ final String cigar2,
+ final boolean strand1,
+ final boolean strand2,
+ final boolean firstOnly,
+ final boolean record1NonPrimary,
+ final boolean record2NonPrimary,
+ final int defaultQualityScore) {
addMatePair("READ" + readNameCounter++, referenceSequenceIndex, alignmentStart1, alignmentStart2, record1Unmapped, record2Unmapped,
isDuplicate1, isDuplicate2, cigar1, cigar2, strand1, strand2, firstOnly, record1NonPrimary, record2NonPrimary,
defaultQualityScore);
@@ -213,10 +240,19 @@ public abstract class SamFileTester extends CommandLineProgramTest {
private void addFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean recordUnmapped, final boolean isDuplicate, final String cigar,
final String qualityString, final int defaultQualityScore, final boolean isSecondary) {
- final SAMRecord record = samRecordSetBuilder.addFrag("READ" + readNameCounter++, referenceSequenceIndex, alignmentStart, false,
- recordUnmapped, cigar, qualityString, defaultQualityScore, isSecondary);
+ addFragment("READ" + readNameCounter++, referenceSequenceIndex, alignmentStart, recordUnmapped, isDuplicate, cigar,
+ qualityString, defaultQualityScore, isSecondary, false);
+ }
- this.duplicateFlags.put(samRecordToDuplicatesFlagsKey(record), isDuplicate);
+ private void addFragment(final String readName, final int referenceSequenceIndex, final int alignmentStart, final boolean recordUnmapped, final boolean isDuplicate, final String cigar,
+ final String qualityString, final int defaultQualityScore, final boolean isSecondary, final boolean isSupplementary) {
+
+ final SAMRecord record = samRecordSetBuilder.addFrag(readName, referenceSequenceIndex, alignmentStart, false,
+ recordUnmapped, cigar, qualityString, defaultQualityScore, isSecondary, isSupplementary);
+
+ final String key = samRecordToDuplicatesFlagsKey(record);
+ Assert.assertFalse(this.duplicateFlags.containsKey(key));
+ this.duplicateFlags.put(key, isDuplicate);
}
public void addMatePair(final String readName,
@@ -251,8 +287,13 @@ public abstract class SamFileTester extends CommandLineProgramTest {
samRecordSetBuilder.getRecords().remove(record2);
}
- this.duplicateFlags.put(samRecordToDuplicatesFlagsKey(record1), isDuplicate1);
- this.duplicateFlags.put(samRecordToDuplicatesFlagsKey(record2), isDuplicate2);
+ final String key1 = samRecordToDuplicatesFlagsKey(record1);
+ Assert.assertFalse(this.duplicateFlags.containsKey(key1));
+ this.duplicateFlags.put(key1, isDuplicate1);
+
+ final String key2 = samRecordToDuplicatesFlagsKey(record2);
+ Assert.assertFalse(this.duplicateFlags.containsKey(key2));
+ this.duplicateFlags.put(key2, isDuplicate2);
}
public void addMatePair(final String readName,
@@ -271,7 +312,7 @@ public abstract class SamFileTester extends CommandLineProgramTest {
final boolean record1NonPrimary,
final boolean record2NonPrimary,
final int defaultQuality) {
- addMatePair(readName, referenceSequenceIndex,referenceSequenceIndex, alignmentStart1, alignmentStart2, record1Unmapped, record2Unmapped,
+ addMatePair(readName, referenceSequenceIndex, referenceSequenceIndex, alignmentStart1, alignmentStart2, record1Unmapped, record2Unmapped,
isDuplicate1, isDuplicate2, cigar1, cigar2, strand1, strand2, firstOnly, record1NonPrimary, record2NonPrimary, defaultQuality);
}
@@ -294,9 +335,7 @@ public abstract class SamFileTester extends CommandLineProgramTest {
// Create the input file
final File input = new File(outputDir, "input.sam");
final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(samRecordSetBuilder.getHeader(), true, input);
- for (final SAMRecord record : samRecordSetBuilder.getRecords()) {
- writer.addAlignment(record);
- }
+ samRecordSetBuilder.getRecords().forEach(writer::addAlignment);
writer.close();
return input;
}
@@ -304,4 +343,5 @@ public abstract class SamFileTester extends CommandLineProgramTest {
public SamReader getInput() {
return samRecordSetBuilder.getSamReader();
}
+
}
\ No newline at end of file
diff --git a/src/tests/java/picard/sam/testers/ValidateSamTester.java b/src/test/java/picard/sam/testers/ValidateSamTester.java
similarity index 100%
rename from src/tests/java/picard/sam/testers/ValidateSamTester.java
rename to src/test/java/picard/sam/testers/ValidateSamTester.java
diff --git a/src/tests/java/picard/sam/util/ReadNameParserTests.java b/src/test/java/picard/sam/util/ReadNameParserTests.java
similarity index 100%
rename from src/tests/java/picard/sam/util/ReadNameParserTests.java
rename to src/test/java/picard/sam/util/ReadNameParserTests.java
diff --git a/src/tests/java/picard/util/BedToIntervalListTest.java b/src/test/java/picard/util/BedToIntervalListTest.java
similarity index 100%
rename from src/tests/java/picard/util/BedToIntervalListTest.java
rename to src/test/java/picard/util/BedToIntervalListTest.java
diff --git a/src/tests/java/picard/util/ClippingUtilityTest.java b/src/test/java/picard/util/ClippingUtilityTest.java
similarity index 100%
rename from src/tests/java/picard/util/ClippingUtilityTest.java
rename to src/test/java/picard/util/ClippingUtilityTest.java
diff --git a/src/tests/java/picard/util/DelimitedTextFileWithHeaderIteratorTest.java b/src/test/java/picard/util/DelimitedTextFileWithHeaderIteratorTest.java
similarity index 100%
rename from src/tests/java/picard/util/DelimitedTextFileWithHeaderIteratorTest.java
rename to src/test/java/picard/util/DelimitedTextFileWithHeaderIteratorTest.java
diff --git a/src/tests/java/picard/util/FifoBufferTest.java b/src/test/java/picard/util/FifoBufferTest.java
similarity index 100%
rename from src/tests/java/picard/util/FifoBufferTest.java
rename to src/test/java/picard/util/FifoBufferTest.java
diff --git a/src/tests/java/picard/util/FileChannelJDKBugWorkAroundTest.java b/src/test/java/picard/util/FileChannelJDKBugWorkAroundTest.java
similarity index 100%
rename from src/tests/java/picard/util/FileChannelJDKBugWorkAroundTest.java
rename to src/test/java/picard/util/FileChannelJDKBugWorkAroundTest.java
diff --git a/src/tests/java/picard/util/IlluminaUtilTest.java b/src/test/java/picard/util/IlluminaUtilTest.java
similarity index 100%
rename from src/tests/java/picard/util/IlluminaUtilTest.java
rename to src/test/java/picard/util/IlluminaUtilTest.java
diff --git a/src/tests/java/picard/util/IntervalListScattererTest.java b/src/test/java/picard/util/IntervalListScattererTest.java
similarity index 100%
rename from src/tests/java/picard/util/IntervalListScattererTest.java
rename to src/test/java/picard/util/IntervalListScattererTest.java
diff --git a/src/tests/java/picard/util/IntervalListToBedTest.java b/src/test/java/picard/util/IntervalListToBedTest.java
similarity index 100%
rename from src/tests/java/picard/util/IntervalListToBedTest.java
rename to src/test/java/picard/util/IntervalListToBedTest.java
diff --git a/src/tests/java/picard/util/MathUtilTest.java b/src/test/java/picard/util/MathUtilTest.java
similarity index 100%
rename from src/tests/java/picard/util/MathUtilTest.java
rename to src/test/java/picard/util/MathUtilTest.java
diff --git a/src/tests/java/picard/util/MergingIteratorTest.java b/src/test/java/picard/util/MergingIteratorTest.java
similarity index 100%
rename from src/tests/java/picard/util/MergingIteratorTest.java
rename to src/test/java/picard/util/MergingIteratorTest.java
diff --git a/src/tests/java/picard/util/QuerySortedReadPairIteratorUtilTest.java b/src/test/java/picard/util/QuerySortedReadPairIteratorUtilTest.java
similarity index 100%
rename from src/tests/java/picard/util/QuerySortedReadPairIteratorUtilTest.java
rename to src/test/java/picard/util/QuerySortedReadPairIteratorUtilTest.java
diff --git a/src/tests/java/picard/util/RExecutorTest.java b/src/test/java/picard/util/RExecutorTest.java
similarity index 100%
rename from src/tests/java/picard/util/RExecutorTest.java
rename to src/test/java/picard/util/RExecutorTest.java
diff --git a/src/tests/java/picard/util/ScatterIntervalsByNsTest.java b/src/test/java/picard/util/ScatterIntervalsByNsTest.java
similarity index 99%
rename from src/tests/java/picard/util/ScatterIntervalsByNsTest.java
rename to src/test/java/picard/util/ScatterIntervalsByNsTest.java
index 39f4201..320f573 100644
--- a/src/tests/java/picard/util/ScatterIntervalsByNsTest.java
+++ b/src/test/java/picard/util/ScatterIntervalsByNsTest.java
@@ -98,8 +98,6 @@ public class ScatterIntervalsByNsTest {
new Interval("fake1", 18, 18),
new Interval("fake1", 19, 19)
)}
-
-
};
}
diff --git a/src/tests/java/picard/util/TabbedTextFileWithHeaderParserTest.java b/src/test/java/picard/util/TabbedTextFileWithHeaderParserTest.java
similarity index 100%
rename from src/tests/java/picard/util/TabbedTextFileWithHeaderParserTest.java
rename to src/test/java/picard/util/TabbedTextFileWithHeaderParserTest.java
diff --git a/src/java/picard/util/TestNGUtil.java b/src/test/java/picard/util/TestNGUtil.java
similarity index 100%
rename from src/java/picard/util/TestNGUtil.java
rename to src/test/java/picard/util/TestNGUtil.java
diff --git a/src/tests/java/picard/util/TextFileParsersTest.java b/src/test/java/picard/util/TextFileParsersTest.java
similarity index 100%
rename from src/tests/java/picard/util/TextFileParsersTest.java
rename to src/test/java/picard/util/TextFileParsersTest.java
diff --git a/src/tests/java/picard/util/UnsignedTypeUtilTest.java b/src/test/java/picard/util/UnsignedTypeUtilTest.java
similarity index 100%
rename from src/tests/java/picard/util/UnsignedTypeUtilTest.java
rename to src/test/java/picard/util/UnsignedTypeUtilTest.java
diff --git a/src/tests/java/picard/vcf/AbstractVcfMergingClpTester.java b/src/test/java/picard/vcf/AbstractVcfMergingClpTester.java
similarity index 100%
rename from src/tests/java/picard/vcf/AbstractVcfMergingClpTester.java
rename to src/test/java/picard/vcf/AbstractVcfMergingClpTester.java
diff --git a/src/tests/java/picard/vcf/CallingMetricAccumulatorTest.java b/src/test/java/picard/vcf/CallingMetricAccumulatorTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/CallingMetricAccumulatorTest.java
rename to src/test/java/picard/vcf/CallingMetricAccumulatorTest.java
diff --git a/src/tests/java/picard/vcf/CollectVariantCallingMetricsTest.java b/src/test/java/picard/vcf/CollectVariantCallingMetricsTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/CollectVariantCallingMetricsTest.java
rename to src/test/java/picard/vcf/CollectVariantCallingMetricsTest.java
diff --git a/src/tests/java/picard/vcf/GenotypeConcordanceGA4GHSchemeTest.java b/src/test/java/picard/vcf/GenotypeConcordanceGA4GHSchemeTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/GenotypeConcordanceGA4GHSchemeTest.java
rename to src/test/java/picard/vcf/GenotypeConcordanceGA4GHSchemeTest.java
diff --git a/src/tests/java/picard/vcf/GenotypeConcordanceGA4GHSchemeWithMissingTest.java b/src/test/java/picard/vcf/GenotypeConcordanceGA4GHSchemeWithMissingTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/GenotypeConcordanceGA4GHSchemeWithMissingTest.java
rename to src/test/java/picard/vcf/GenotypeConcordanceGA4GHSchemeWithMissingTest.java
diff --git a/src/tests/java/picard/vcf/GenotypeConcordanceTest.java b/src/test/java/picard/vcf/GenotypeConcordanceTest.java
similarity index 99%
rename from src/tests/java/picard/vcf/GenotypeConcordanceTest.java
rename to src/test/java/picard/vcf/GenotypeConcordanceTest.java
index 608f0a9..83a1623 100644
--- a/src/tests/java/picard/vcf/GenotypeConcordanceTest.java
+++ b/src/test/java/picard/vcf/GenotypeConcordanceTest.java
@@ -469,11 +469,7 @@ public class GenotypeConcordanceTest {
private void testGenotypeConcordanceDetermineState(final VariantContext truthVariantContext, final TruthState expectedTruthState,
final VariantContext callVariantContext, final CallState expectedCallState,
final int minGq, final int minDp) {
- final GenotypeConcordance genotypeConcordance = new GenotypeConcordance();
- genotypeConcordance.TRUTH_SAMPLE = TRUTH_SAMPLE_NAME;
- genotypeConcordance.CALL_SAMPLE = CALL_SAMPLE_NAME;
-
- final TruthAndCallStates truthAndCallStates = genotypeConcordance.determineState(truthVariantContext, TRUTH_SAMPLE_NAME,
+ final TruthAndCallStates truthAndCallStates = GenotypeConcordance.determineState(truthVariantContext, TRUTH_SAMPLE_NAME,
callVariantContext, CALL_SAMPLE_NAME, minGq, minDp);
Assert.assertEquals(truthAndCallStates.truthState, expectedTruthState);
Assert.assertEquals(truthAndCallStates.callState, expectedCallState);
diff --git a/src/tests/java/picard/vcf/LiftoverVcfTest.java b/src/test/java/picard/vcf/LiftoverVcfTest.java
similarity index 75%
rename from src/tests/java/picard/vcf/LiftoverVcfTest.java
rename to src/test/java/picard/vcf/LiftoverVcfTest.java
index 0a691f9..758967e 100644
--- a/src/tests/java/picard/vcf/LiftoverVcfTest.java
+++ b/src/test/java/picard/vcf/LiftoverVcfTest.java
@@ -62,7 +62,7 @@ public class LiftoverVcfTest extends CommandLineProgramTest {
for (final VariantContext inputContext : rejectReader) {
counter++;
}
- Assert.assertEquals(counter, 2, "the wrong number of rejected indels faile the liftover");
+ Assert.assertEquals(counter, 2, "the wrong number of rejected indels failed the liftover");
}
@Test
@@ -124,4 +124,47 @@ public class LiftoverVcfTest extends CommandLineProgramTest {
};
Assert.assertEquals(runPicardCommandLine(argsWithWarnOnMissingContig), expectedReturnCode);
}
+
+ @DataProvider(name = "dataTestWriteOriginalPosition")
+ public Object[][] dataTestWriteOriginalPosition() {
+ return new Object[][]{
+ {false},
+ {true}
+ };
+ }
+
+ @Test(dataProvider = "dataTestWriteOriginalPosition")
+ public void testWriteOriginalPosition(boolean shouldWriteOriginalPosition) {
+ final File liftOutputFile = new File(OUTPUT_DATA_PATH, "lift-delete-me.vcf");
+ final File rejectOutputFile = new File(OUTPUT_DATA_PATH, "reject-delete-me.vcf");
+ final File input = new File(TEST_DATA_PATH, "testLiftover.vcf");
+
+ liftOutputFile.deleteOnExit();
+ rejectOutputFile.deleteOnExit();
+
+ final String[] args = new String[]{
+ "INPUT=" + input.getAbsolutePath(),
+ "OUTPUT=" + liftOutputFile.getAbsolutePath(),
+ "REJECT=" + rejectOutputFile.getAbsolutePath(),
+ "CHAIN=" + CHAIN_FILE,
+ "REFERENCE_SEQUENCE=" + REFERENCE_FILE,
+ "CREATE_INDEX=false",
+ "WRITE_ORIGINAL_POSITION=" + shouldWriteOriginalPosition
+ };
+
+ runPicardCommandLine(args);
+
+ try (VCFFileReader liftReader = new VCFFileReader(liftOutputFile, false)) {
+ for (VariantContext vc : liftReader) {
+ if (shouldWriteOriginalPosition) {
+ Assert.assertNotNull(vc.getAttribute(LiftoverVcf.ORIGINAL_CONTIG));
+ Assert.assertNotNull(vc.getAttribute(LiftoverVcf.ORIGINAL_START));
+ }
+ else {
+ Assert.assertFalse(vc.hasAttribute(LiftoverVcf.ORIGINAL_CONTIG));
+ Assert.assertFalse(vc.hasAttribute(LiftoverVcf.ORIGINAL_START));
+ }
+ }
+ }
+ }
}
diff --git a/src/tests/java/picard/vcf/MergeVcfsTest.java b/src/test/java/picard/vcf/MergeVcfsTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/MergeVcfsTest.java
rename to src/test/java/picard/vcf/MergeVcfsTest.java
diff --git a/src/tests/java/picard/vcf/SortVcfsTest.java b/src/test/java/picard/vcf/SortVcfsTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/SortVcfsTest.java
rename to src/test/java/picard/vcf/SortVcfsTest.java
diff --git a/src/tests/java/picard/vcf/SplitVcfsTest.java b/src/test/java/picard/vcf/SplitVcfsTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/SplitVcfsTest.java
rename to src/test/java/picard/vcf/SplitVcfsTest.java
diff --git a/src/tests/java/picard/vcf/UpdateVcfSequenceDictionaryTest.java b/src/test/java/picard/vcf/UpdateVcfSequenceDictionaryTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/UpdateVcfSequenceDictionaryTest.java
rename to src/test/java/picard/vcf/UpdateVcfSequenceDictionaryTest.java
diff --git a/src/tests/java/picard/vcf/VariantContextComparatorTest.java b/src/test/java/picard/vcf/VariantContextComparatorTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/VariantContextComparatorTest.java
rename to src/test/java/picard/vcf/VariantContextComparatorTest.java
diff --git a/src/tests/java/picard/vcf/VcfFormatConverterTest.java b/src/test/java/picard/vcf/VcfFormatConverterTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/VcfFormatConverterTest.java
rename to src/test/java/picard/vcf/VcfFormatConverterTest.java
diff --git a/src/tests/java/picard/vcf/filter/TestFilterVcf.java b/src/test/java/picard/vcf/filter/TestFilterVcf.java
similarity index 100%
rename from src/tests/java/picard/vcf/filter/TestFilterVcf.java
rename to src/test/java/picard/vcf/filter/TestFilterVcf.java
diff --git a/src/tests/java/picard/vcf/processor/AccumulatorExecutorTest.java b/src/test/java/picard/vcf/processor/AccumulatorExecutorTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/processor/AccumulatorExecutorTest.java
rename to src/test/java/picard/vcf/processor/AccumulatorExecutorTest.java
diff --git a/src/tests/java/picard/vcf/processor/ByWholeContigTest.java b/src/test/java/picard/vcf/processor/ByWholeContigTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/processor/ByWholeContigTest.java
rename to src/test/java/picard/vcf/processor/ByWholeContigTest.java
diff --git a/src/tests/java/picard/vcf/processor/ThreadsafeTest.java b/src/test/java/picard/vcf/processor/ThreadsafeTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/processor/ThreadsafeTest.java
rename to src/test/java/picard/vcf/processor/ThreadsafeTest.java
diff --git a/src/tests/java/picard/vcf/processor/VcfFileSegmentGeneratorTest.java b/src/test/java/picard/vcf/processor/VcfFileSegmentGeneratorTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/processor/VcfFileSegmentGeneratorTest.java
rename to src/test/java/picard/vcf/processor/VcfFileSegmentGeneratorTest.java
diff --git a/src/tests/java/picard/vcf/processor/WidthLimitingDecoratorTest.java b/src/test/java/picard/vcf/processor/WidthLimitingDecoratorTest.java
similarity index 100%
rename from src/tests/java/picard/vcf/processor/WidthLimitingDecoratorTest.java
rename to src/test/java/picard/vcf/processor/WidthLimitingDecoratorTest.java
diff --git a/src/tests/scripts/failing.R b/src/test/resources/failing.R
similarity index 100%
rename from src/tests/scripts/failing.R
rename to src/test/resources/failing.R
diff --git a/src/tests/scripts/passing.R b/src/test/resources/passing.R
similarity index 100%
rename from src/tests/scripts/passing.R
rename to src/test/resources/passing.R
diff --git a/src/tests/resources/testng.xml b/src/test/resources/testng.xml
similarity index 100%
rename from src/tests/resources/testng.xml
rename to src/test/resources/testng.xml
diff --git a/src/tests/java/picard/fingerprint/FingerprintCheckerTest.java b/src/tests/java/picard/fingerprint/FingerprintCheckerTest.java
deleted file mode 100644
index 831a422..0000000
--- a/src/tests/java/picard/fingerprint/FingerprintCheckerTest.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package picard.fingerprint;
-
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.testng.Assert.*;
-
-/**
- * Created by farjoun on 8/27/15.
- */
-public class FingerprintCheckerTest {
-
- @Test
- public void testRandomSublist() throws Exception {
-
- List<Integer> list = new ArrayList<>();
- list.add(1);
- list.add(2);
- list.add(3);
-
- Assert.assertEquals(list, FingerprintChecker.randomSublist(list, 3));
- Assert.assertEquals(list, FingerprintChecker.randomSublist(list, 4));
-
- Assert.assertEquals(FingerprintChecker.randomSublist(list, 2).size(), 2);
- }
-}
\ No newline at end of file
diff --git a/src/tests/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumorTest.java b/src/tests/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumorTest.java
deleted file mode 100644
index 53a9880..0000000
--- a/src/tests/java/picard/fingerprint/HaplotypeProbabilityOfNormalGivenTumorTest.java
+++ /dev/null
@@ -1,56 +0,0 @@
-package picard.fingerprint;
-
-import picard.util.TestNGUtil;
-import org.testng.Assert;
-import org.testng.annotations.DataProvider;
-import org.testng.annotations.Test;
-
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
-
-/**
- * Created by farjoun on 5/29/15.
- */
-public class HaplotypeProbabilityOfNormalGivenTumorTest {
-
- private double maf = 0.4;
- private Snp snp = new Snp("test", "chr1", 1, (byte) 'A', (byte) 'C', maf, Collections.singletonList("dummy"));
- private HaplotypeBlock hb = new HaplotypeBlock(maf);
-
- @DataProvider(name = "testGetLikelihoodsData")
- public Iterator<Object[]> testGetLikelihoodsData() {
- List<Object[]> testData = new ArrayList<>();
-
- //make sure that giving 0 pLoH doesn't change the underlying likelihoods:
- testData.add(new Object[]{0.0, new double[]{1, 0, 0}, new double[]{1, 0, 0}});
- testData.add(new Object[]{0.0, new double[]{0, 1, 0}, new double[]{0, 1, 0}});
- testData.add(new Object[]{0.0, new double[]{0, 0, 1}, new double[]{0, 0, 1}});
- testData.add(new Object[]{0.0, new double[]{0, 0.4, 0.6}, new double[]{0, 0.4, 0.6}});
- testData.add(new Object[]{0.0, new double[]{0.3, 0.7, 0}, new double[]{0.3, 0.7, 0}});
-
- //make sure that pLoH will not affect HOM likelihoods:
- testData.add(new Object[]{0.1, new double[]{1, 0, 0}, new double[]{1, 0, 0}});
- testData.add(new Object[]{0.2, new double[]{0, 0, 1}, new double[]{0, 0, 1}});
- testData.add(new Object[]{0.3, new double[]{.3, 0, .7}, new double[]{.3, 0, .7}});
-
- //see that non zero pLoH changes the likelihood of a HET site as expected:
- testData.add(new Object[]{0.1, new double[]{0, 1, 0}, new double[]{.1/2, 1-0.1, .1/2}});
- testData.add(new Object[]{0.1, new double[]{0, .5, .5}, new double[]{0.5*0.1*0.5, 0.5*(1-0.1), 0.5*1+0.5*0.1/2}});
- testData.add(new Object[]{0.1, new double[]{0.5, 0.5, 0}, new double[]{.5+0.5*0.1*0.5, 0.5*(1-0.1), 0.5*0.1*0.5}});
-
- return testData.iterator();
- }
-
- @Test(dataProvider = "testGetLikelihoodsData")
- public void testGetLikelihoods(double pLoH, double[] underlyingLikelihood, double[] tumorLikelihood) throws Exception {
- HaplotypeProbabilities hp = new HaplotypeProbabilitiesFromGenotype(snp, hb, underlyingLikelihood[0], underlyingLikelihood[1], underlyingLikelihood[2]);
-
- HaplotypeProbabilities hpTumor = new HaplotypeProbabilityOfNormalGivenTumor(hp, pLoH);
-
- TestNGUtil.assertEqualDoubleArrays(hpTumor.getLikelihoods(), tumorLikelihood, 0.0001);
-
- }
-}
\ No newline at end of file
diff --git a/src/tests/java/picard/sam/RevertSamTest.java b/src/tests/java/picard/sam/RevertSamTest.java
deleted file mode 100755
index 76ae2f3..0000000
--- a/src/tests/java/picard/sam/RevertSamTest.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2009 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package picard.sam;
-
-import htsjdk.samtools.SAMFileHeader;
-import htsjdk.samtools.SAMReadGroupRecord;
-import htsjdk.samtools.SAMRecord;
-import htsjdk.samtools.SamReader;
-import htsjdk.samtools.SamReaderFactory;
-import htsjdk.samtools.util.CloserUtil;
-import org.testng.Assert;
-import org.testng.annotations.DataProvider;
-import org.testng.annotations.Test;
-import picard.PicardException;
-import picard.cmdline.CommandLineProgramTest;
-
-import java.io.File;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-
-/**
- * Created by IntelliJ IDEA.
- * User: ktibbett
- * Date: Jul 20, 2010
- * Time: 10:27:58 AM
- * To change this template use File | Settings | File Templates.
- */
-public class RevertSamTest extends CommandLineProgramTest {
- public static final String basicSamToRevert = "testdata/picard/sam/revert_sam_basic.sam";
- public static final String negativeTestSamToRevert = "testdata/picard/sam/revert_sam_negative.sam";
-
- private static final String revertedQualities =
- "11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111";
-
- private static final String unmappedRead = "both_reads_present_only_first_aligns/2";
-
- public String getCommandLineProgramName() {
- return RevertSam.class.getSimpleName();
- }
-
- @Test(dataProvider="positiveTestData")
- public void basicPositiveTests(final SAMFileHeader.SortOrder so, final boolean removeDuplicates, final boolean removeAlignmentInfo,
- final boolean restoreOriginalQualities, final String sample, final String library,
- final List<String> attributesToClear) throws Exception {
-
- final File output = File.createTempFile("reverted", ".sam");
- output.deleteOnExit();
- final RevertSam reverter = new RevertSam();
- final String args[] = new String[5 + (so != null ? 1 : 0) + attributesToClear.size() + (sample != null ? 1 : 0) + (library != null ? 1 : 0)];
- int index = 0;
- args[index++] = "INPUT=" + basicSamToRevert;
- args[index++] = "OUTPUT=" + output.getAbsolutePath();
- if (so != null) {
- args[index++] = "SORT_ORDER=" + so.name();
- }
- args[index++] = "REMOVE_DUPLICATE_INFORMATION=" + removeDuplicates;
- args[index++] = "REMOVE_ALIGNMENT_INFORMATION=" + removeAlignmentInfo;
- args[index++] = "RESTORE_ORIGINAL_QUALITIES=" + restoreOriginalQualities;
- if (sample != null) {
- args[index++] = "SAMPLE_ALIAS=" + sample;
- }
- if (library != null) {
- args[index++] = "LIBRARY_NAME=" + library;
- }
- for (final String attr : attributesToClear) {
- args[index++] = "ATTRIBUTE_TO_CLEAR=" + attr;
- }
- runPicardCommandLine(args);
-
- final SamReader reader = SamReaderFactory.makeDefault().open(output);
- final SAMFileHeader header = reader.getFileHeader();
- Assert.assertEquals(header.getSortOrder(), SAMFileHeader.SortOrder.queryname);
- Assert.assertEquals(header.getProgramRecords().size(), removeAlignmentInfo ? 0 : 1);
- for (final SAMReadGroupRecord rg : header.getReadGroups()) {
- if (sample != null) {
- Assert.assertEquals(rg.getSample(), sample);
- }
- else {
- Assert.assertEquals(rg.getSample(), "Hi,Mom!");
- }
- if (library != null) {
- Assert.assertEquals(rg.getLibrary(), library);
- }
- else {
- Assert.assertEquals(rg.getLibrary(), "my-library");
- }
- }
- for (final SAMRecord rec : reader) {
- if (removeDuplicates) {
- Assert.assertFalse(rec.getDuplicateReadFlag(),
- "Duplicates should have been removed: " + rec.getReadName());
- }
-
- if (removeAlignmentInfo) {
- Assert.assertTrue(rec.getReadUnmappedFlag(),
- "Alignment info should have been removed: " + rec.getReadName());
- }
-
- if (restoreOriginalQualities && !unmappedRead.equals(
- rec.getReadName() + "/" + (rec.getFirstOfPairFlag() ? "1" : "2"))) {
-
- Assert.assertEquals(rec.getBaseQualityString(), revertedQualities);
- } else {
- Assert.assertNotSame(rec.getBaseQualityString(), revertedQualities);
- }
-
- for (final SAMRecord.SAMTagAndValue attr : rec.getAttributes()) {
- if (removeAlignmentInfo || (!attr.tag.equals("PG") && !attr.tag.equals("NM")
- && !attr.tag.equals("MQ"))) {
- Assert.assertFalse(reverter.ATTRIBUTE_TO_CLEAR.contains(attr.tag),
- attr.tag + " should have been cleared.");
- }
- }
- }
- CloserUtil.close(reader);
- }
-
-
- @DataProvider(name="positiveTestData")
- public Object[][] getPostitiveTestData() {
- return new Object[][] {
- {null, true, true, true, null, null, Collections.EMPTY_LIST},
- {SAMFileHeader.SortOrder.queryname, true, true, true, "Hey,Dad!", null, Arrays.asList("XT")},
- {null, false, true, false, "Hey,Dad!", "NewLibraryName", Arrays.asList("XT")},
- {null, false, false, false, null, null, Collections.EMPTY_LIST}
- };
- }
-
-
- @Test(dataProvider="negativeTestData", expectedExceptions = {PicardException.class})
- public void basicNegativeTest(final String sample, final String library) throws Exception {
-
- final File output = File.createTempFile("bad", ".sam");
- output.deleteOnExit();
- final RevertSam reverter = new RevertSam();
- final String args[] = new String[2 + (sample != null ? 1 : 0) + (library != null ? 1 : 0)];
- int index = 0;
- args[index++] = "INPUT=" + negativeTestSamToRevert;
- args[index++] = "OUTPUT=" + output.getAbsolutePath();
- if (sample != null) {
- args[index++] = "SAMPLE_ALIAS=" + sample;
- }
- if (library != null) {
- args[index++] = "LIBRARY_NAME=" + library;
- }
- runPicardCommandLine(args);
- Assert.fail("Negative test should have thrown an exception and didn't");
- }
-
- @DataProvider(name="negativeTestData")
- public Object[][] getNegativeTestData() {
- return new Object[][] {
- {"NewSample", null},
- {null, "NewLibrary"},
- {"NewSample", "NewLibrary"}
- };
- }
-}
diff --git a/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithTagPerMolecularIndex2M2M2M2M.sam b/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithTagPerMolecularIndex2M2M2M2M.sam
new file mode 100644
index 0000000..7ea7a64
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithTagPerMolecularIndex2M2M2M2M.sam
@@ -0,0 +1,182 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:HiDad LB:Hello, World PL:illumina PU:HiMom.1 CN:BI
+HiMom:1:1101:1031:2224 516 * 0 0 * * 0 0 ......................... ######################### ZA:Z:NN ZB:Z:NN ZC:Z:NN ZD:Z:NN RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1039:2147 516 * 0 0 * * 0 0 ......................... ######################### ZA:Z:NN ZB:Z:NN ZC:Z:NN ZD:Z:NN RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1046:2175 516 * 0 0 * * 0 0 ..GGA.................... ######################### ZA:Z:NN ZB:Z:NN ZC:Z:NN ZD:Z:NN RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1047:2122 516 * 0 0 * * 0 0 ..TCA.................... ######################### ZA:Z:NN ZB:Z:NA ZC:Z:NN ZD:Z:NN RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNANNNN
+HiMom:1:1101:1048:2197 516 * 0 0 * * 0 0 ..GTG.................... ######################### ZA:Z:NN ZB:Z:NC ZC:Z:NN ZD:Z:NN RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNCNNNN
+HiMom:1:1101:1065:2193 4 * 0 0 * * 0 0 .CTTG.................... ######################### ZA:Z:GA ZB:Z:AC ZC:Z:GA ZD:Z:TN RG:Z:HiMom.1 QX:Z:######## RX:Z:GAACGATN
+HiMom:1:1101:1069:2159 4 * 0 0 * * 0 0 GACGT.................... <<<@?#################### ZA:Z:GT ZB:Z:CC ZC:Z:AC ZD:Z:AG RG:Z:HiMom.1 QX:Z:@BBFFFFF RX:Z:GTCCACAG
+HiMom:1:1101:1071:2233 4 * 0 0 * * 0 0 GTTTG.................... <<<@@#################### ZA:Z:TA ZB:Z:TC ZC:Z:CA ZD:Z:GG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCCAGG
+HiMom:1:1101:1083:2193 4 * 0 0 * * 0 0 AGGCT.................... ######################### ZA:Z:CC ZB:Z:AA ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:?@;DD?BD RX:Z:CCAACATT
+HiMom:1:1101:1084:2136 4 * 0 0 * * 0 0 TTTCT.................... <<<@@#################### ZA:Z:TG ZB:Z:CT ZC:Z:GC ZD:Z:TG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:1101:1089:2172 4 * 0 0 * * 0 0 TCCGG.................... :<<??#################### ZA:Z:GA ZB:Z:CC ZC:Z:AG ZD:Z:GA RG:Z:HiMom.1 QX:Z:?@@FF;=B RX:Z:GACCAGGA
+HiMom:1:1101:1100:2207 4 * 0 0 * * 0 0 AGGCT............G....... ######################### ZA:Z:AT ZB:Z:TA ZC:Z:TC ZD:Z:AA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATTATCAA
+HiMom:1:1101:1111:2148 4 * 0 0 * * 0 0 GCGAA.A..........GGACGAC. ######################### ZA:Z:GC ZB:Z:CG ZC:Z:TC ZD:Z:GA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCCGTCGA
+HiMom:1:1101:1138:2141 4 * 0 0 * * 0 0 TCCGATCTGCTTCAGGTCGATCAGA CCCFFFFFHGHHHJJIGHIJJJJJJ ZA:Z:AA ZB:Z:CA ZC:Z:AT ZD:Z:GG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:1101:1140:2120 4 * 0 0 * * 0 0 TTTTTTTTTTTTTAACTTTGCAAAT @@@DDDDDHHHHFB at 9FHI@BFH@@ ZA:Z:CA ZB:Z:AC ZC:Z:TC ZD:Z:TC RG:Z:HiMom.1 QX:Z:@@@DDFDF RX:Z:CAACTCTC
+HiMom:1:1101:1143:2192 4 * 0 0 * * 0 0 CGACAAGTCTGGCTTATCACTCATC CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:TC ZB:Z:GC ZC:Z:TA ZD:Z:GA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1101:1150:2228 4 * 0 0 * * 0 0 ATGGGAGGCGATTCCTAGGGGGTTG 8?=DD8;@BH6DHD<FGGGEIGHIG ZA:Z:AG ZB:Z:GT ZC:Z:CG ZD:Z:CA RG:Z:HiMom.1 QX:Z:@@@DDFFF RX:Z:AGGTCGCA
+HiMom:1:1101:1157:2135 4 * 0 0 * * 0 0 TTTAAAGTCTTAATCAAAGATGATA CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:AT ZB:Z:TA ZC:Z:TC ZD:Z:AA RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:ATTATCAA
+HiMom:1:1101:1162:2207 516 * 0 0 * * 0 0 TAAAACTGGGGAAGTTAGAGGAATG ######################### ZA:Z:AC ZB:Z:AA ZC:Z:AA ZD:Z:TT RG:Z:HiMom.1 QX:Z:####2<## RX:Z:ACAAAATT
+HiMom:1:1101:1165:2239 4 * 0 0 * * 0 0 ATGGAAGTCGAGACAGAAGTGAGAA ######################### ZA:Z:GC ZB:Z:CT ZC:Z:AG ZD:Z:CC RG:Z:HiMom.1 QX:Z:B@@DFFFF RX:Z:GCCTAGCC
+HiMom:1:1101:1175:2197 4 * 0 0 * * 0 0 AAGAGCTGGGGAACATCCAGAAAGG BC at FFFFFHHHHHJJJJJJJJJJJJ ZA:Z:CC ZB:Z:AA ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:1101:1188:2237 4 * 0 0 * * 0 0 GCTTCCTTCAAGACAGAAGTGAGAA CCCFFDDEFHHFFE at FDHHAIAFHG ZA:Z:GT ZB:Z:AA ZC:Z:CA ZD:Z:TC RG:Z:HiMom.1 QX:Z:@@?DFFDF RX:Z:GTAACATC
+HiMom:1:1101:1197:2200 4 * 0 0 * * 0 0 ATATTCCACTGGAACCACAGAACCC @@@FFFFFHHHHHJJJJJJJJJJJJ ZA:Z:AA ZB:Z:CG ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:@CCFDFFF RX:Z:AACGCATT
+HiMom:1:1101:1206:2126 4 * 0 0 * * 0 0 ATCTGTCCAGTGGTGCACTGAATGT CCCFFFFFHHHHHHIIJJJJIJJJJ ZA:Z:AA ZB:Z:CA ZC:Z:AT ZD:Z:GG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:1101:1212:2230 4 * 0 0 * * 0 0 TTTTAGCTTTATTGGGGAGGGGGTG CCCFFFFFHHGHHJJJJGJJJJJDF ZA:Z:CC ZB:Z:AG ZC:Z:CA ZD:Z:CC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:1101:1218:2200 4 * 0 0 * * 0 0 GCTCTTCCGATCTATCTGCTCGTCC (-(=34???3;@############# ZA:Z:GA ZB:Z:CC ZC:Z:GT ZD:Z:TG RG:Z:HiMom.1 QX:Z:@CCFFDDF RX:Z:GACCGTTG
+HiMom:1:1101:1219:2164 4 * 0 0 * * 0 0 ATCTTATCCACTCCTTCCACTTTGG CCCFFFFFHHHHHJJIJJJJJJJIJ ZA:Z:TT ZB:Z:GT ZC:Z:CT ZD:Z:AT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1101:1221:2143 4 * 0 0 * * 0 0 CAATTGAATGTCTGCACAGCCGCTT @@@FFFFDHHHHHJJJIIIJGHIJJ ZA:Z:GC ZB:Z:CG ZC:Z:TC ZD:Z:GA RG:Z:HiMom.1 QX:Z:@@CDDDDF RX:Z:GCCGTCGA
+HiMom:1:1101:1236:2121 4 * 0 0 * * 0 0 TTGCGCTTACTTTGTAGCCTTCATC CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:AC ZB:Z:AG ZC:Z:GT ZD:Z:AT RG:Z:HiMom.1 QX:Z:CCCFFDDF RX:Z:ACAGGTAT
+HiMom:1:1101:1242:2170 4 * 0 0 * * 0 0 GGAAGGAAAAGAAGCACAAGTACAT @@@DFDFFHHHGHHGIIGJJEHHIG ZA:Z:TG ZB:Z:CA ZC:Z:AG ZD:Z:TA RG:Z:HiMom.1 QX:Z:@@CFFF?D RX:Z:TGCAAGTA
+HiMom:1:1101:1257:2223 4 * 0 0 * * 0 0 TGCTCTTCCGATCTTTTAGCAAAGC :?@DDBDDHFFHDGIGIIJJJGGGI ZA:Z:GA ZB:Z:CC ZC:Z:GT ZD:Z:TG RG:Z:HiMom.1 QX:Z:;@@DD=DD RX:Z:GACCGTTG
+HiMom:1:1101:1259:2152 4 * 0 0 * * 0 0 ATTTTTATATTTTTTTAGACATAGG CCCFFFFFGHHHHJJJJIGIIJJJJ ZA:Z:AC ZB:Z:TA ZC:Z:AG ZD:Z:AC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTAAGAC
+HiMom:1:1101:1261:2127 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHGHHHJJIFDDDDDDDD ZA:Z:AC ZB:Z:TA ZC:Z:AG ZD:Z:AC RG:Z:HiMom.1 QX:Z:>7+ at A7A7 RX:Z:ACTAAGAC
+HiMom:1:1101:1263:2236 516 * 0 0 * * 0 0 AGTTCTTCAGTAATTTTAGTACTGC ######################### ZA:Z:AG ZB:Z:GT ZC:Z:AA ZD:Z:GG RG:Z:HiMom.1 QX:Z:######## RX:Z:AGGTAAGG
+HiMom:1:1101:1267:2209 4 * 0 0 * * 0 0 GGCAGAGTCTCCAACAGCCCCGTAC =;?DDDD?CCFHAIIIGGIIGE at EG ZA:Z:TA ZB:Z:TC ZC:Z:AG ZD:Z:CC RG:Z:HiMom.1 QX:Z:?@@D;ADD RX:Z:TATCAGCC
+HiMom:1:1101:1269:2170 4 * 0 0 * * 0 0 TTCCAAGCCTGTGCTTTAAGGAAAA @@<ADBDBDF8DDCFH at GIE@@GGH ZA:Z:AT ZB:Z:TA ZC:Z:TC ZD:Z:AA RG:Z:HiMom.1 QX:Z:@@@DDDF? RX:Z:ATTATCAA
+HiMom:1:1101:1290:2225 4 * 0 0 * * 0 0 TCAGTTCACTGGCAAAGACAGTCAC C@@FBEDDFHFHGIIICEHGDHBHE ZA:Z:GC ZB:Z:CT ZC:Z:AG ZD:Z:CC RG:Z:HiMom.1 QX:Z:?<@DFBBD RX:Z:GCCTAGCC
+HiMom:1:1101:1291:2150 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHFHHIJJJIIIGIJIJ ZA:Z:CG ZB:Z:CT ZC:Z:AT ZD:Z:GT RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:CGCTATGT
+HiMom:1:1101:1302:2244 4 * 0 0 * * 0 0 TGAATACATATAACAAATGCAAAAA CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:GA ZB:Z:CC ZC:Z:TA ZD:Z:AC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCTAAC
+HiMom:1:1101:1308:2153 516 * 0 0 * * 0 0 TCTGTAAGGTAATCCCCGCATGTGT 1?1=4===AFFDFFGFDGFB at CFB: ZA:Z:AA ZB:Z:CG ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z::?@B?@DD RX:Z:AACGCATT
+HiMom:1:1101:1309:2210 4 * 0 0 * * 0 0 AGTGGGCTAGGGCATTTTTAATCTT @@?DFFDFHHHDFHJIJJIJGIIIJ ZA:Z:AT ZB:Z:TC ZC:Z:CT ZD:Z:CT RG:Z:HiMom.1 QX:Z:?@@ADEEF RX:Z:ATTCCTCT
+HiMom:1:1101:1314:2233 4 * 0 0 * * 0 0 AGGAAAGTTGGGCTGACCTGACAGA @@<DDD;=FBFADBCGDEH?F;FCG ZA:Z:CG ZB:Z:CT ZC:Z:AT ZD:Z:GT RG:Z:HiMom.1 QX:Z:@<@?B@;A RX:Z:CGCTATGT
+HiMom:1:1101:1316:2126 4 * 0 0 * * 0 0 TCTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHHHHHJJJJHFDDDDDD ZA:Z:CA ZB:Z:AT ZC:Z:AG ZD:Z:AC RG:Z:HiMom.1 QX:Z:1>>7A7## RX:Z:CAATAGAC
+HiMom:1:1101:1327:2200 516 * 0 0 * * 0 0 GTCATCTGGGCTGTCGACAGGTGTC @B at FFFFFHHHHGIJJJJJJIFHHI ZA:Z:GC ZB:Z:CG ZC:Z:TC ZD:Z:GA RG:Z:HiMom.1 QX:Z:BCCFDFFD RX:Z:GCCGTCGA
+HiMom:1:1101:1328:2225 4 * 0 0 * * 0 0 AGGAAATTAGGACTTACCTGACATA ######################### ZA:Z:CA ZB:Z:AC ZC:Z:TC ZD:Z:TC RG:Z:HiMom.1 QX:Z:??;=A:B= RX:Z:CAACTCTC
+HiMom:1:1101:1338:2175 4 * 0 0 * * 0 0 GCTTGTTGGCTTTAACATCCACAAT CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:GA ZB:Z:AG ZC:Z:GA ZD:Z:AG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GAAGGAAG
+HiMom:1:1101:1347:2149 4 * 0 0 * * 0 0 GCTCTTCCGATCTGTGCTCTTCCGA CCCFFFFFDFHHFIJDGIGGHGIGH ZA:Z:GA ZB:Z:CC ZC:Z:AG ZD:Z:GA RG:Z:HiMom.1 QX:Z:CC at DFFFD RX:Z:GACCAGGA
+HiMom:1:1101:1353:2226 4 * 0 0 * * 0 0 GTGCTCTTCCGATCTTCAGGTTACC BBBFFFFFHHHHHJJJJJJJIJJJJ ZA:Z:TA ZB:Z:TC ZC:Z:TG ZD:Z:CC RG:Z:HiMom.1 QX:Z:@B at FFEFF RX:Z:TATCTGCC
+HiMom:1:1101:1363:2138 4 * 0 0 * * 0 0 GTTCTTAAACCTGTTAGAACTTCTG C@@FFFFFHHHHHJJJJJJJJJJJJ ZA:Z:CT ZB:Z:AA ZC:Z:CT ZD:Z:CG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTAACTCG
+HiMom:1:1101:1399:2128 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHIJJJJJJJJJJJ ZA:Z:CA ZB:Z:AT ZC:Z:AG ZD:Z:TC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CAATAGTC
+HiMom:1:1101:1403:2194 4 * 0 0 * * 0 0 ACATGGTGAAACCCTGTCTCTACTA CCCFFFDDHHHHHJJJJJJJJJJJJ ZA:Z:CT ZB:Z:GT ZC:Z:AA ZD:Z:TC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGTAATC
+HiMom:1:1101:1406:2222 4 * 0 0 * * 0 0 GGCTGGACTCCCCTGGTTCTGGGCA ;?@DDDBD?FHDFGIIIGIGHHIII ZA:Z:AG ZB:Z:CA ZC:Z:TG ZD:Z:GA RG:Z:HiMom.1 QX:Z:C@@DBFEF RX:Z:AGCATGGA
+HiMom:1:1101:1419:2119 4 * 0 0 * * 0 0 ACTTTCCTTTTTTGTTTTACTTTAA ######################### ZA:Z:TG ZB:Z:TA ZC:Z:AT ZD:Z:CA RG:Z:HiMom.1 QX:Z:@@@DFDFD RX:Z:TGTAATCA
+HiMom:1:1101:1420:2213 4 * 0 0 * * 0 0 TTCACTGTACCGGCCGTGCGTACTT @CCFFFFDHHHFGIJJJJJJGHIGG ZA:Z:CA ZB:Z:GC ZC:Z:GG ZD:Z:TA RG:Z:HiMom.1 QX:Z:@C at FFFDF RX:Z:CAGCGGTA
+HiMom:1:1101:1435:2194 4 * 0 0 * * 0 0 TTTTGTTTTCTTTTACTGAAGTGTA CCCFFDFFHHHHHJJJJIHIJHHHJ ZA:Z:TA ZB:Z:TC ZC:Z:TG ZD:Z:CC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1101:1441:2148 4 * 0 0 * * 0 0 TTTTGGCTCTAGAGGGGGTAGAGGG CCCFFFFFHHDFBHIIJJ1?FGHIJ ZA:Z:CG ZB:Z:CT ZC:Z:AT ZD:Z:GT RG:Z:HiMom.1 QX:Z:@@BFFDDD RX:Z:CGCTATGT
+HiMom:1:1101:1452:2132 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHJJJJJJJIJJJJ ZA:Z:AA ZB:Z:CG ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:AACGCATT
+HiMom:1:1101:1460:2176 4 * 0 0 * * 0 0 AGGAAAAAGACACAACAAGTCCAAC ######################### ZA:Z:GA ZB:Z:TA ZC:Z:TC ZD:Z:CA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GATATCCA
+HiMom:1:1101:1479:2221 4 * 0 0 * * 0 0 GGGGAAATCTATTTTTATGTAAAAA @CCFFFFFHHHHHJIGIJJJJJJJJ ZA:Z:TC ZB:Z:GC ZC:Z:TA ZD:Z:GA RG:Z:HiMom.1 QX:Z:@BCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1101:1491:2120 4 * 0 0 * * 0 0 GGCCAGGCTGAACTTCTGAGCTGCT CCCFFFFFHHHGHJJJJJJJJJJJJ ZA:Z:AG ZB:Z:GT ZC:Z:CG ZD:Z:CA RG:Z:HiMom.1 QX:Z:BCCDFFFF RX:Z:AGGTCGCA
+HiMom:1:1201:1018:2133 4 * 0 0 * * 0 0 ......................... ######################### ZA:Z:AT ZB:Z:TC ZC:Z:CT ZD:Z:CT RG:Z:HiMom.1 XN:i:1 QX:Z:8??=BBBA RX:Z:ATTCCTCT
+HiMom:1:1201:1018:2217 516 * 0 0 * * 0 0 ......................... ######################### ZA:Z:AT ZB:Z:TA ZC:Z:TC ZD:Z:AA RG:Z:HiMom.1 XN:i:1 QX:Z:;<;:BBDD RX:Z:ATTATCAA
+HiMom:1:1201:1028:2202 4 * 0 0 * * 0 0 ..AAAC.C.T.......GG..TG.. ##42@?################### ZA:Z:GA ZB:Z:AG ZC:Z:GA ZD:Z:AG RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:GAAGGAAG
+HiMom:1:1201:1042:2174 4 * 0 0 * * 0 0 .TCAGGAAGGC..CAAAAAAAGAAA #0;@@@?@?<@##3<@@?@@????? ZA:Z:TC ZB:Z:TG ZC:Z:CA ZD:Z:AG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCTGCAAG
+HiMom:1:1201:1043:2246 4 * 0 0 * * 0 0 .GCATCATTTC..GCTTCTCTCTGT #0;@@??@=@>##22=;@??><@?? ZA:Z:CG ZB:Z:CT ZC:Z:AT ZD:Z:GT RG:Z:HiMom.1 QX:Z:@<?DD:B= RX:Z:CGCTATGT
+HiMom:1:1201:1045:2105 516 * 0 0 * * 0 0 .TTTTTTTTTT..TTTTTTTTTTTT #0;@@@@@@@?##0:????????=< ZA:Z:CT ZB:Z:GT ZC:Z:AA ZD:Z:TC RG:Z:HiMom.1 QX:Z:1112 at A## RX:Z:CTGTAATC
+HiMom:1:1201:1054:2151 4 * 0 0 * * 0 0 GTCAGGCACTGAGAATATATGGGTG CBCFFFFFHHHHHJJJJJJJJJJEG ZA:Z:CA ZB:Z:AT ZC:Z:AG ZD:Z:TC RG:Z:HiMom.1 QX:Z:CCCFFFDF RX:Z:CAATAGTC
+HiMom:1:1201:1064:2239 4 * 0 0 * * 0 0 GGGATGGGAGGGCGATGAGGACTAG 8?@:DDDACC:FHHGIH<EGDDDFH ZA:Z:TA ZB:Z:AG ZC:Z:CA ZD:Z:CA RG:Z:HiMom.1 QX:Z:@@@FFADB RX:Z:TAAGCACA
+HiMom:1:1201:1073:2225 4 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGGAGGGT @BBDFFFFHHHHHJJJJJJJJJJJ: ZA:Z:AT ZB:Z:TC ZC:Z:CT ZD:Z:CT RG:Z:HiMom.1 QX:Z:B@@BDEFF RX:Z:ATTCCTCT
+HiMom:1:1201:1083:2121 4 * 0 0 * * 0 0 ACACACAACACCACCGCCCTCCCCC ######################### ZA:Z:CT ZB:Z:AT ZC:Z:GC ZD:Z:GT RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:CTATGCGT
+HiMom:1:1201:1084:2204 4 * 0 0 * * 0 0 TGGCTCCTCAGGCTCTCATCAGTTG CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:TA ZB:Z:TC ZC:Z:TG ZD:Z:CC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1201:1095:2146 4 * 0 0 * * 0 0 ACTGACAACACCAAATGCTGCTAAG CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:GA ZB:Z:CC ZC:Z:AG ZD:Z:GA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCAGGA
+HiMom:1:1201:1103:2184 4 * 0 0 * * 0 0 AGAAGTTTCAGAATTGTGGCCCCAT B at BFFDEFHHHHHJJJGHIJJJJJI ZA:Z:TT ZB:Z:GT ZC:Z:CT ZD:Z:AT RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1107:2109 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHGHHJJJJIIJJJJJJ ZA:Z:TT ZB:Z:GT ZC:Z:CT ZD:Z:AT RG:Z:HiMom.1 QX:Z:B at CFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1118:2198 4 * 0 0 * * 0 0 AATAAACTTTATTAAAGCAGTTAAA C at CFFFFFHDHHHGIIIJJJIJJJJ ZA:Z:AT ZB:Z:TA ZC:Z:TC ZD:Z:AA RG:Z:HiMom.1 QX:Z:@@@DDBDD RX:Z:ATTATCAA
+HiMom:1:1201:1122:2227 4 * 0 0 * * 0 0 GTCATATAAGGCCCAGTCCAAGGAA @@@FFFFFHHHGGIJIGGIJFIJII ZA:Z:CG ZB:Z:CC ZC:Z:TT ZD:Z:CC RG:Z:HiMom.1 QX:Z:@@@DDFFF RX:Z:CGCCTTCC
+HiMom:1:1201:1123:2161 516 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGCATACA ===AAAA8AAAA<AAA)@CBA9>A# ZA:Z:GA ZB:Z:CC ZC:Z:AG ZD:Z:GA RG:Z:HiMom.1 QX:Z:?;@DFDFF RX:Z:GACCAGGA
+HiMom:1:1201:1127:2112 516 * 0 0 * * 0 0 TAATCACCTGAGCAGTGAAGCCAGC @<@?BDDDHD?FDBHI?AHGGGDFH ZA:Z:CA ZB:Z:AC ZC:Z:TC ZD:Z:TC RG:Z:HiMom.1 QX:Z:=??BA?BD RX:Z:CAACTCTC
+HiMom:1:1201:1134:2144 4 * 0 0 * * 0 0 AGTGTGAGTAATGGTTGAGAGGTGG B@?DDDFFFHHGHJHHGFIHHIFGI ZA:Z:CG ZB:Z:CT ZC:Z:AT ZD:Z:GT RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:CGCTATGT
+HiMom:1:1201:1138:2227 516 * 0 0 * * 0 0 GACAAATATAGGAAATAGAAGCTAT =1=A=AAA,2?4>7C<<4<A+3<AB ZA:Z:CC ZB:Z:AA ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:==###### RX:Z:CCAACATT
+HiMom:1:1201:1140:2125 4 * 0 0 * * 0 0 TTCATAAATTGGTCTTAGATGTTGC CC at FFFFFHHHHFGIJIIIJIJIJJ ZA:Z:TA ZB:Z:TC ZC:Z:CA ZD:Z:GG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCCAGG
+HiMom:1:1201:1142:2242 4 * 0 0 * * 0 0 GTAAAATGTAAAATAATAAAAAATG ?=?DDDD;AF<DF<FFFFIIIFF@< ZA:Z:TA ZB:Z:TC ZC:Z:TG ZD:Z:CC RG:Z:HiMom.1 QX:Z:??<D?D83 RX:Z:TATCTGCC
+HiMom:1:1201:1150:2161 4 * 0 0 * * 0 0 TTCTCACTACTGTGATTGTGCCACT @C at FFFFFGHHHHGIIIICEHCFGH ZA:Z:AA ZB:Z:CG ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:@@@FDDDD RX:Z:AACGCATT
+HiMom:1:1201:1159:2179 516 * 0 0 * * 0 0 TTTTTTTTTATTTTTCTAAATACTT ===AA#################### ZA:Z:AA ZB:Z:AA ZC:Z:AA ZD:Z:AA RG:Z:HiMom.1 QX:Z:######0? RX:Z:AAAAAAAA
+HiMom:1:1201:1160:2109 4 * 0 0 * * 0 0 ACATCCTTCCCATGCCACCAACTCG CCCFFFFFGHHHHJJJJJJJJJJJJ ZA:Z:CG ZB:Z:CC ZC:Z:TT ZD:Z:CC RG:Z:HiMom.1 QX:Z:C at BFFFFF RX:Z:CGCCTTCC
+HiMom:1:1201:1180:2119 4 * 0 0 * * 0 0 GCTCTAAATTTTGCTTTTCTACAGC CCCFFFFFHHHHHJJJJIJIJJIJJ ZA:Z:GA ZB:Z:CC ZC:Z:GT ZD:Z:TG RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:GACCGTTG
+HiMom:1:1201:1185:2143 4 * 0 0 * * 0 0 GCTGAAGGCCCGTGGGCCAGAGGTG @CCFFFFFHHHHHJJJJJJJJJJHI ZA:Z:CT ZB:Z:AT ZC:Z:GC ZD:Z:GT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTATGCGT
+HiMom:1:1201:1187:2100 4 * 0 0 * * 0 0 AAAAAAGAGCCCGCATTGCCGAGAC =<=;AA################### ZA:Z:TA ZB:Z:TC ZC:Z:TG ZD:Z:CC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1201:1190:2194 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:AG ZB:Z:GT ZC:Z:CG ZD:Z:CA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AGGTCGCA
+HiMom:1:1201:1204:2228 4 * 0 0 * * 0 0 TCTTCTTGTCGATGAGGAACTTGGT @?@FFFFFDHHGHJIJJGHIIJJJH ZA:Z:CC ZB:Z:AG ZC:Z:CA ZD:Z:CC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:1201:1208:2132 4 * 0 0 * * 0 0 CTGTAGAAAGGATGGTCGGGCTCCA @@CDFFFFGHFHHJIJJGJIBHJJG ZA:Z:TG ZB:Z:TA ZC:Z:AT ZD:Z:CA RG:Z:HiMom.1 QX:Z:CC at FFFFF RX:Z:TGTAATCA
+HiMom:1:1201:1219:2115 4 * 0 0 * * 0 0 TGGGAGTAGTTCCCTGCTAAGGGAG ???DBDBDADDDDIEID:AFFD:?8 ZA:Z:CC ZB:Z:AT ZC:Z:GC ZD:Z:GT RG:Z:HiMom.1 QX:Z:??<DDA?D RX:Z:CCATGCGT
+HiMom:1:1201:1236:2187 4 * 0 0 * * 0 0 CTCCTTAGCGGATTCCGACTTCCAT CCCFFFFDHHHHGIJJIGIGIJJGG ZA:Z:TA ZB:Z:TC ZC:Z:CA ZD:Z:GG RG:Z:HiMom.1 QX:Z:@@BFFFFF RX:Z:TATCCAGG
+HiMom:1:1201:1242:2207 4 * 0 0 * * 0 0 ATCTTTTATTGGCCTCCTGCTCCCC CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:AT ZB:Z:TC ZC:Z:CT ZD:Z:CT RG:Z:HiMom.1 QX:Z:?BBDDDFF RX:Z:ATTCCTCT
+HiMom:1:1201:1252:2141 4 * 0 0 * * 0 0 AGTTATTTTGCCTATGTCCAACAAG BCBFFFFFGHHHHJIJJJJJJJJJJ ZA:Z:TT ZB:Z:GT ZC:Z:CT ZD:Z:AT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1260:2165 4 * 0 0 * * 0 0 ATCTGATCTAAGTTGGGGGACGCCG @@@FFDFFHHHHHJJJIJIIIGIJJ ZA:Z:CC ZB:Z:AA ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CCAACATT
+HiMom:1:1201:1280:2179 4 * 0 0 * * 0 0 GAGGACTGCTTGAGTCCAGGAGTTC @@BFFDEFGHHHHIFGCHIJJJGGI ZA:Z:GC ZB:Z:CT ZC:Z:AG ZD:Z:CC RG:Z:HiMom.1 QX:Z:BCCFFFFF RX:Z:GCCTAGCC
+HiMom:1:1201:1281:2133 4 * 0 0 * * 0 0 GCAACAAAATTTCATATGACTTAGC CCCFFFFFHHHHHJJIIIHICHIIJ ZA:Z:CC ZB:Z:AA ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:C at CFFFDF RX:Z:CCAACATT
+HiMom:1:1201:1285:2100 4 * 0 0 * * 0 0 GATCTTTTTTGCTTTGTAGTTATAG @@@DFFFFHHHHHIIGIABCFFHBF ZA:Z:TG ZB:Z:CT ZC:Z:GC ZD:Z:TG RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:TGCTGCTG
+HiMom:1:1201:1291:2158 4 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGATGGGC @CCFFFDD?FHHFGEHHIIDHIIII ZA:Z:AG ZB:Z:CA ZC:Z:TG ZD:Z:GA RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:AGCATGGA
+HiMom:1:1201:1300:2137 4 * 0 0 * * 0 0 GCTCTTCCGATCTTTTTTTTAATTT @@?DDDDDFDHADEHGIGGED3?FD ZA:Z:GC ZB:Z:CT ZC:Z:AG ZD:Z:CC RG:Z:HiMom.1 QX:Z:8?84B23? RX:Z:GCCTAGCC
+HiMom:1:1201:1312:2112 4 * 0 0 * * 0 0 ATTTGCAGGAGCCGGCGCAGGTGCA CCCFFFFFHHHHHJJJIJJJJGHIJ ZA:Z:TC ZB:Z:GC ZC:Z:TA ZD:Z:GA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1201:1331:2162 4 * 0 0 * * 0 0 TAATCCCAGTACTTTGGGAGGCCAA CCCFFFFFHHHHHJJJJIJJJJJJJ ZA:Z:CC ZB:Z:AA ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:1201:1341:2116 4 * 0 0 * * 0 0 ATAACAGCGAGACTGGCAACTTAAA ######################### ZA:Z:AC ZB:Z:AG ZC:Z:GT ZD:Z:AT RG:Z:HiMom.1 QX:Z:CCCFFBDD RX:Z:ACAGGTAT
+HiMom:1:1201:1344:2147 4 * 0 0 * * 0 0 ACGATTAGTTTTAGCATTGGAGTAG @<??DDDDFHHHFGGHHIIIGGAGH ZA:Z:TG ZB:Z:TA ZC:Z:AT ZD:Z:CA RG:Z:HiMom.1 QX:Z:=?1AA:=D RX:Z:TGTAATCA
+HiMom:1:1201:1345:2181 4 * 0 0 * * 0 0 ATACGGATGTGTTTAGGAGTGGGAC CCCFFFFFHHHHHIIJJHJFHIJIJ ZA:Z:CA ZB:Z:AT ZC:Z:AG ZD:Z:TC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CAATAGTC
+HiMom:1:1201:1364:2113 4 * 0 0 * * 0 0 TAAAGAGAGCCAGTGGAGTTACGAC ######################### ZA:Z:CA ZB:Z:GC ZC:Z:GG ZD:Z:TA RG:Z:HiMom.1 QX:Z:C at CFFF@D RX:Z:CAGCGGTA
+HiMom:1:1201:1392:2109 4 * 0 0 * * 0 0 GTCAGACAGGGGGATTTGGGCTGTG BBCFFFFFHHHHHHJJJHIJIJJJJ ZA:Z:TA ZB:Z:TC ZC:Z:TG ZD:Z:CC RG:Z:HiMom.1 QX:Z:CCCDF?DD RX:Z:TATCTGCC
+HiMom:1:1201:1392:2184 4 * 0 0 * * 0 0 ATCTTTATTCATTTGTATGATCTTA @@BFFFFFHFFHFHIHIIJIJJJJI ZA:Z:CA ZB:Z:AT ZC:Z:AG ZD:Z:TC RG:Z:HiMom.1 QX:Z:@CCFFDDE RX:Z:CAATAGTC
+HiMom:1:1201:1393:2143 4 * 0 0 * * 0 0 GATAAATGCACGCATCCCCCCCGCG C at CFFFFFGGHHHHJJJJJJJJJJI ZA:Z:CT ZB:Z:AA ZC:Z:CT ZD:Z:CG RG:Z:HiMom.1 QX:Z:@@CFDDFD RX:Z:CTAACTCG
+HiMom:1:1201:1414:2174 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT @;@1BDADF????FFEB>B6=BBBB ZA:Z:AG ZB:Z:AA ZC:Z:AA ZD:Z:GA RG:Z:HiMom.1 QX:Z:####<>## RX:Z:AGAAAAGA
+HiMom:1:1201:1416:2128 4 * 0 0 * * 0 0 TTGGTGTGGAGGCGGTGGCGGGATC @@@DDDDDHHFHHII:?GGHIIB6? ZA:Z:TC ZB:Z:GC ZC:Z:TA ZD:Z:GA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1201:1421:2154 4 * 0 0 * * 0 0 TGTGCTCTTCCGATCTTGTGCTCTT BC at DFFFFHHHHHJJJJFHIHHIJJ ZA:Z:TG ZB:Z:TA ZC:Z:AC ZD:Z:TC RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:TGTAACTC
+HiMom:1:1201:1439:2156 4 * 0 0 * * 0 0 GGAGATTATTTGCCTTGAAGTAAGC -;(22<>>@>8@>8;@######### ZA:Z:GA ZB:Z:CC ZC:Z:AG ZD:Z:GC RG:Z:HiMom.1 QX:Z:1;;=#### RX:Z:GACCAGGC
+HiMom:1:1201:1452:2143 4 * 0 0 * * 0 0 TTTTAGTCTTAGCATTTACTTTCCC CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:CA ZB:Z:AC ZC:Z:TC ZD:Z:TC RG:Z:HiMom.1 QX:Z:BC at DDFFF RX:Z:CAACTCTC
+HiMom:1:1201:1458:2109 4 * 0 0 * * 0 0 GATACGAACACACAAGAACTTTTTT CCCFFFFFHHHHHJJJJJJJJJJJI ZA:Z:AC ZB:Z:TG ZC:Z:TA ZD:Z:TC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTGTATC
+HiMom:1:1201:1472:2121 516 * 0 0 * * 0 0 GTGTGCTCTTCCGATCTGGAGGATG =+=??A4A==A at 7A<?######### ZA:Z:CT ZB:Z:AT ZC:Z:GC ZD:Z:GC RG:Z:HiMom.1 QX:Z:;?=D;:## RX:Z:CTATGCGC
+HiMom:1:1201:1483:2126 516 * 0 0 * * 0 0 GCATGCAGCTGGGTGCTGTGATGCA @@@DDDBB<DD8F<<CGG?AA?A<F ZA:Z:CT ZB:Z:GT ZC:Z:AA ZD:Z:TC RG:Z:HiMom.1 QX:Z:@C<DD:B? RX:Z:CTGTAATC
+HiMom:1:1201:1486:2109 4 * 0 0 * * 0 0 ACGTGTGCTCTTCCCGATCTGTATA CCCFF?DDFBHHHJJIIDHJIJJJH ZA:Z:GT ZB:Z:CC ZC:Z:AC ZD:Z:AG RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:GTCCACAG
+HiMom:1:1201:1486:2146 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTGGGC <<<@??@??@???????######## ZA:Z:CA ZB:Z:AC ZC:Z:TC ZD:Z:TC RG:Z:HiMom.1 QX:Z:?@@1:DBD RX:Z:CAACTCTC
+HiMom:1:2101:1011:2102 4 * 0 0 * * 0 0 .....TCACACATAATTTTAAAATT #####22@?@@??@@@@@??@@@@@ ZA:Z:CT ZB:Z:GT ZC:Z:AA ZD:Z:TC RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1013:2146 4 * 0 0 * * 0 0 ....CGCTAGAACCAACTTATTCAT ####24=?@@?@?@@?@@@@@@?@@ ZA:Z:CT ZB:Z:AT ZC:Z:GC ZD:Z:GT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTATGCGT
+HiMom:1:2101:1021:2209 4 * 0 0 * * 0 0 ..GGAAGGCTGCTAGCTGGCCAGAG ##08@>??@@??@?????????>?@ ZA:Z:AC ZB:Z:TA ZC:Z:AG ZD:Z:AC RG:Z:HiMom.1 QX:Z:@CCDFFFF RX:Z:ACTAAGAC
+HiMom:1:2101:1023:2237 516 * 0 0 * * 0 0 ..TTTGTTTGAGTTCCTTGTAGATT ##0:=@?>?@???@:>?@??>?;?< ZA:Z:GC ZB:Z:CT ZC:Z:AG ZD:Z:CC RG:Z:HiMom.1 QX:Z:=:1<#### RX:Z:GCCTAGCC
+HiMom:1:2101:1031:2163 4 * 0 0 * * 0 0 ..ACATTTGTCACCACTAGCCACCA ##0<@?@@@@@@@@@@?@@@@@@@? ZA:Z:GA ZB:Z:TA ZC:Z:TC ZD:Z:CA RG:Z:HiMom.1 QX:Z:B at BFFFFF RX:Z:GATATCCA
+HiMom:1:2101:1036:2087 4 * 0 0 * * 0 0 .GTCCACTTACGAAGCAAATACTTT #4=DDFFFHHHHHJJJJJJJJJJJJ ZA:Z:GA ZB:Z:CC ZC:Z:GT ZD:Z:TG RG:Z:HiMom.1 QX:Z:B at CFFDFF RX:Z:GACCGTTG
+HiMom:1:2101:1040:2208 516 * 0 0 * * 0 0 .CTGATAGTCACTGAAATGAATTCA #-0=>(2 at .22@@############ ZA:Z:AC ZB:Z:GA ZC:Z:AA ZD:Z:TC RG:Z:HiMom.1 QX:Z::1###### RX:Z:ACGAAATC
+HiMom:1:2101:1048:2238 4 * 0 0 * * 0 0 .GTCACATCGTTGAAGCACTGGATC #11ADDDB<CFFHCHGDBHGIIIII ZA:Z:AC ZB:Z:AG ZC:Z:TT ZD:Z:GA RG:Z:HiMom.1 QX:Z:?@7DDDDA RX:Z:ACAGTTGA
+HiMom:1:2101:1054:2162 4 * 0 0 * * 0 0 .GGACAGGGAAGGGAAGGAAGGGTG #4=DDFDFHHHHHJIJIIDHHGICG ZA:Z:AG ZB:Z:GT ZC:Z:AA ZD:Z:GG RG:Z:HiMom.1 QX:Z:B at BDDFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1059:2083 4 * 0 0 * * 0 0 .GAATGTCTTAGAAGGATGCTTCTC #1=BDDDEHHGHHJJJJJIJJIIJJ ZA:Z:TA ZB:Z:CC ZC:Z:GT ZD:Z:CT RG:Z:HiMom.1 QX:Z:1:?D##02 RX:Z:TACCGTCT
+HiMom:1:2101:1063:2206 4 * 0 0 * * 0 0 .TGCTAGGATGAGGATGGATAGTAA #1=DDDFFHHHHHJHIIJHIIIHHJ ZA:Z:AC ZB:Z:AG ZC:Z:GT ZD:Z:AT RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:ACAGGTAT
+HiMom:1:2101:1064:2242 4 * 0 0 * * 0 0 .GGAAAAAGGTTGTCAAGCGTTAAA ######################### ZA:Z:TC ZB:Z:GC ZC:Z:TA ZD:Z:GA RG:Z:HiMom.1 QX:Z:;@<:AA at A RX:Z:TCGCTAGA
+HiMom:1:2101:1072:2170 4 * 0 0 * * 0 0 .GGGGAGACAGAGAGGATCAGAAGT #4=BDDFDHHDFHEGFEGGIJIIIG ZA:Z:CA ZB:Z:GC ZC:Z:GG ZD:Z:TA RG:Z:HiMom.1 QX:Z:B@@DFDDF RX:Z:CAGCGGTA
+HiMom:1:2101:1077:2139 4 * 0 0 * * 0 0 .ATTAGTTGGCGGATGAAGCAGATA #4=DFFFFHHHHHJJJJJJJJJIJJ ZA:Z:AA ZB:Z:CA ZC:Z:AT ZD:Z:GG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:2101:1084:2188 4 * 0 0 * * 0 0 TACAAGGTCAAAATCAGCAACAAGT CCCFFFFDHHHHHJJJJJJJJJJJJ ZA:Z:GA ZB:Z:AG ZC:Z:GA ZD:Z:AG RG:Z:HiMom.1 QX:Z:@B at FFFFF RX:Z:GAAGGAAG
+HiMom:1:2101:1100:2085 4 * 0 0 * * 0 0 ATCTTGATCTCCTCCTTCTTGGCCT @@@DDDDDHHFHFEIIIIHHBAHBG ZA:Z:CC ZB:Z:AG ZC:Z:CA ZD:Z:CC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:2101:1102:2221 4 * 0 0 * * 0 0 ATAACTGACTCTACTCAGTAGATTA CCCFFFFFHHHHHJJJJJIJJJJJJ ZA:Z:CT ZB:Z:GC ZC:Z:GG ZD:Z:AT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGCGGAT
+HiMom:1:2101:1105:2131 4 * 0 0 * * 0 0 CAGCAGCAGCAACAGCAGAAACATG CCCFFFFFHHHHHJJJJJIJJJJJJ ZA:Z:AC ZB:Z:TG ZC:Z:TA ZD:Z:TC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTGTATC
+HiMom:1:2101:1112:2245 4 * 0 0 * * 0 0 TCGTAGTGTTGTAATTTCGTCTTCT ?8?DBDDDCCFCAACGGFFCBFFAE ZA:Z:AA ZB:Z:CA ZC:Z:AT ZD:Z:GG RG:Z:HiMom.1 QX:Z:@@?BBDDD RX:Z:AACAATGG
+HiMom:1:2101:1122:2136 4 * 0 0 * * 0 0 CTTGCCAGCCTGCAGGCCCCGCGGC ???BBAABDD?DDIID)A:3<EADD ZA:Z:GC ZB:Z:CG ZC:Z:TC ZD:Z:GA RG:Z:HiMom.1 QX:Z:?@<DDDD? RX:Z:GCCGTCGA
+HiMom:1:2101:1123:2095 4 * 0 0 * * 0 0 TCCGCCTCCAGCTTCAGCTTCTCCT @@@FDDFFHHHHHJHGGJIJJJEHH ZA:Z:CA ZB:Z:GC ZC:Z:GG ZD:Z:TA RG:Z:HiMom.1 QX:Z:@?@DDF@@ RX:Z:CAGCGGTA
+HiMom:1:2101:1126:2082 4 * 0 0 * * 0 0 TCTCTTTCCACCTTGGTCACCTTCC @C at DDDFFHHHHHJEGGIHHIJGIH ZA:Z:CT ZB:Z:GC ZC:Z:GG ZD:Z:AT RG:Z:HiMom.1 QX:Z:@@@FFFDA RX:Z:CTGCGGAT
+HiMom:1:2101:1133:2239 4 * 0 0 * * 0 0 AGCTTTTTGTTTCCTAGCTTGTCTT ?@?DDFFFHHHHF4ACFHIJHHHGH ZA:Z:TA ZB:Z:TC ZC:Z:CA ZD:Z:TG RG:Z:HiMom.1 QX:Z:@@@BDDDF RX:Z:TATCCATG
+HiMom:1:2101:1143:2137 4 * 0 0 * * 0 0 GCTCTTCAGATCTAGGGGGAACAGC @@@DD?=DCAFFFHIIDG:EFHIII ZA:Z:TC ZB:Z:CG ZC:Z:TC ZD:Z:TA RG:Z:HiMom.1 QX:Z:####4=## RX:Z:TCCGTCTA
+HiMom:1:2101:1151:2182 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTA 9<<?@?@;5=?############## ZA:Z:GA ZB:Z:AA ZC:Z:AA ZD:Z:AA RG:Z:HiMom.1 QX:Z:######## RX:Z:GAAAAAAA
+HiMom:1:2101:1151:2236 516 * 0 0 * * 0 0 TTTGAAGCCTCTTTATCCTTGGCAT ######################### ZA:Z:TA ZB:Z:GC ZC:Z:GG ZD:Z:TA RG:Z:HiMom.1 QX:Z:######@5 RX:Z:TAGCGGTA
+HiMom:1:2101:1162:2139 4 * 0 0 * * 0 0 ATCGTTTATGGTCGGAACTACGACG BCCFFFFFHHHHHIJJJJJJJIJJI ZA:Z:TG ZB:Z:CT ZC:Z:GC ZD:Z:TG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:2101:1163:2203 4 * 0 0 * * 0 0 TTGGTTCACTTATGTATTTATGAAT @CCFDFFFHHHHHJHIIJJJJJJJJ ZA:Z:AG ZB:Z:GT ZC:Z:AA ZD:Z:GG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1163:2222 4 * 0 0 * * 0 0 GAGCGATAATGGTTCTTTTCCTCAC @@@DFFFFHHHHHJJJJJJJIJJJJ ZA:Z:TG ZB:Z:CA ZC:Z:AG ZD:Z:TA RG:Z:HiMom.1 QX:Z:CCCFFFEF RX:Z:TGCAAGTA
+HiMom:1:2101:1172:2152 516 * 0 0 * * 0 0 ATCGTTTCTGGGGACTAGTGAGGCG ######################### ZA:Z:CA ZB:Z:AT ZC:Z:AG ZD:Z:TC RG:Z:HiMom.1 QX:Z:######## RX:Z:CAATAGTC
+HiMom:1:2101:1186:2093 4 * 0 0 * * 0 0 AATGTTGGGAGGACAATGATGGAAA ######################### ZA:Z:CC ZB:Z:AA ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:2101:1188:2195 4 * 0 0 * * 0 0 GCACATACACCAAATGTCTGAACCT CCCFFFFFHHHHHJJJHIJJJJJJJ ZA:Z:AG ZB:Z:GT ZC:Z:CG ZD:Z:CA RG:Z:HiMom.1 QX:Z:BCCDFFFF RX:Z:AGGTCGCA
+HiMom:1:2101:1195:2150 4 * 0 0 * * 0 0 AATTGAACTTCACCACCCAGAGGAA CCCFFFFFHHHHHJJJJJJIJJJJJ ZA:Z:TG ZB:Z:CT ZC:Z:GC ZD:Z:TG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:2101:1207:2084 516 * 0 0 * * 0 0 TCACCACTCTTCTGGGCATCCCCTG @@@DDEDFHHHHHIJIHHGHGGJJJ ZA:Z:GA ZB:Z:CC ZC:Z:AG ZD:Z:GA RG:Z:HiMom.1 QX:Z:@@CDFFFF RX:Z:GACCAGGA
+HiMom:1:2101:1208:2231 516 * 0 0 * * 0 0 CTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHHHHHJJJHFDDDDDDD ZA:Z:GT ZB:Z:AA ZC:Z:CA ZD:Z:TC RG:Z:HiMom.1 QX:Z:##:A1A22 RX:Z:GTAACATC
+HiMom:1:2101:1215:2110 4 * 0 0 * * 0 0 ATCTTTCCCCCATTAAGAACAGCAA ######################### ZA:Z:AA ZB:Z:AA ZC:Z:GA ZD:Z:AG RG:Z:HiMom.1 QX:Z:1:7<#### RX:Z:AAAAGAAG
+HiMom:1:2101:1216:2172 4 * 0 0 * * 0 0 GGACTTCTAGGGGATTTAGCGGGGT CCCFFFFFHHHHHJJJJJJJJJJJD ZA:Z:CA ZB:Z:GC ZC:Z:GG ZD:Z:AT RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CAGCGGAT
+HiMom:1:2101:1216:2193 4 * 0 0 * * 0 0 AGGCATGACACTGCATTTTAAATAC @@@DDDDDHFFHHGGDFHFHIIHGG ZA:Z:AC ZB:Z:AG ZC:Z:TT ZD:Z:GA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAGTTGA
+HiMom:1:2101:1226:2088 4 * 0 0 * * 0 0 GCTCTTCCGATCTAGGTAATAGCTA ==?BDFFFDCDDHFFFAFHDHIJGJ ZA:Z:GA ZB:Z:TA ZC:Z:TC ZD:Z:CA RG:Z:HiMom.1 QX:Z:@@@:DDDD RX:Z:GATATCCA
+HiMom:1:2101:1231:2208 4 * 0 0 * * 0 0 AGCCAGTGTTGGTGTGTTGACTGTT @@;1ADABCF;BF<AACGCHEBHC< ZA:Z:CT ZB:Z:AT ZC:Z:GC ZD:Z:GT RG:Z:HiMom.1 QX:Z:@<@?D8 at D RX:Z:CTATGCGT
+HiMom:1:2101:1233:2133 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFGHHHHJJJFDDDDDDDD ZA:Z:CT ZB:Z:AT ZC:Z:GC ZD:Z:GT RG:Z:HiMom.1 QX:Z:=??B14## RX:Z:CTATGCGT
+HiMom:1:2101:1240:2197 516 * 0 0 * * 0 0 ACTGGAGATCCTTGTTACATGCCCA ??+++A:DD?:ADEE@::C4:C<E: ZA:Z:AA ZB:Z:CG ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:88+AD@?8 RX:Z:AACGCATT
+HiMom:1:2101:1245:2154 4 * 0 0 * * 0 0 ACCAATCAGTAGCACCACTATACAC CCCFFFFFHHHHHJJJJJJIJJJJJ ZA:Z:CT ZB:Z:GT ZC:Z:AA ZD:Z:TC RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1249:2231 4 * 0 0 * * 0 0 TCTCTCGGCCTTCCACTCTAGCATA @@@FFFFFFHHGHIJJJGJIIJHIJ ZA:Z:AG ZB:Z:GT ZC:Z:AA ZD:Z:GG RG:Z:HiMom.1 QX:Z:@@CBDFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1258:2092 4 * 0 0 * * 0 0 TTAGACAAAACACCAAAATAAAATA ######################### ZA:Z:TA ZB:Z:AG ZC:Z:CA ZD:Z:CA RG:Z:HiMom.1 QX:Z:@@CDDFFF RX:Z:TAAGCACA
+HiMom:1:2101:1262:2128 516 * 0 0 * * 0 0 TCTTGTGGTAACTTTTCTGACACCT -(---9@;@?:8>?4:>?@###### ZA:Z:AC ZB:Z:TA ZC:Z:AG ZD:Z:AC RG:Z:HiMom.1 QX:Z:##8?ADD8 RX:Z:ACTAAGAC
+HiMom:1:2101:1273:2119 516 * 0 0 * * 0 0 ATGATGGATCTTCTCTAACTTGTCA >=><AAAAA+2AA?CB4@@ABB3?A ZA:Z:CT ZB:Z:AA ZC:Z:CT ZD:Z:CG RG:Z:HiMom.1 QX:Z:####=ADB RX:Z:CTAACTCG
+HiMom:1:2101:1285:2105 516 * 0 0 * * 0 0 TGTCTATATCAACCAACACCTCTTC -(0(():94:9:???########## ZA:Z:TA ZB:Z:TC ZC:Z:TC ZD:Z:GG RG:Z:HiMom.1 QX:Z:09###### RX:Z:TATCTCGG
+HiMom:1:2101:1312:2105 4 * 0 0 * * 0 0 GTTGAGAATAGGTTGAGATCGTTTC @CCFFFDFHHFHDHIJJJJJJJIJJ ZA:Z:GA ZB:Z:CC ZC:Z:AG ZD:Z:GA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCAGGA
+HiMom:1:2101:1325:2083 4 * 0 0 * * 0 0 TGTGCTCTTCCGATCTGGAGAAAAA ######################### ZA:Z:AC ZB:Z:AG ZC:Z:GT ZD:Z:AT RG:Z:HiMom.1 QX:Z:@@@BD=DD RX:Z:ACAGGTAT
+HiMom:1:2101:1336:2109 4 * 0 0 * * 0 0 AGACCAGAACAGCTCCAGGTGCTCC CCCFFFFFHHHHHJJJJJJCGHIJJ ZA:Z:AA ZB:Z:CG ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACGCATT
+HiMom:1:2101:1349:2084 4 * 0 0 * * 0 0 AGTCTGAATCATTGGTGTCTGAAGA <5;??=>=>>?############## ZA:Z:AC ZB:Z:TG ZC:Z:TA ZD:Z:TC RG:Z:HiMom.1 QX:Z:=;##22<A RX:Z:ACTGTATC
+HiMom:1:2101:1365:2094 4 * 0 0 * * 0 0 GCTCTTCCGATCTTGTGCTCTTCCG CCCFFFFDHFHHGJJIIJIJJIHII ZA:Z:AC ZB:Z:TG ZC:Z:TA ZD:Z:CC RG:Z:HiMom.1 QX:Z:##=22<## RX:Z:ACTGTACC
+HiMom:1:2101:1370:2116 4 * 0 0 * * 0 0 CACCATCTGACATCATGTTTGAAAG @@@DFFFDFFHDHIGBHHII<HEDB ZA:Z:AG ZB:Z:CA ZC:Z:TG ZD:Z:GA RG:Z:HiMom.1 QX:Z:?:8A?3:B RX:Z:AGCATGGA
+HiMom:1:2101:1386:2105 4 * 0 0 * * 0 0 AGGAATTATTCTTCTGCCATAAGGT B@@DDFFFHGFHHIJJJJJGIGIJH ZA:Z:CT ZB:Z:GT ZC:Z:AA ZD:Z:TC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1414:2098 4 * 0 0 * * 0 0 TTGGGGCCGGTGCCGTCGGGCCCAA CCCFFFFFHHHHGJJIJJJJJJJIJ ZA:Z:CT ZB:Z:AA ZC:Z:CT ZD:Z:CG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTAACTCG
+HiMom:1:2101:1427:2081 4 * 0 0 * * 0 0 CCGACTTCCATGGCCACCGTCCTGC CCCFFFFFHHHHHJJJIIGFIIJJI ZA:Z:AA ZB:Z:CG ZC:Z:CA ZD:Z:TT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACGCATT
+HiMom:1:2101:1450:2134 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CC at FDFDFFDFHFGIIE1CGGHBGE ZA:Z:AC ZB:Z:CA ZC:Z:GT ZD:Z:TG RG:Z:HiMom.1 QX:Z:@C at DDDB? RX:Z:ACCAGTTG
+HiMom:1:2101:1459:2083 4 * 0 0 * * 0 0 ATTTCACCAAAATAATCAGAAGGCC CCCFFFFDBHGHHIGGIJFJJGGFH ZA:Z:GC ZB:Z:CG ZC:Z:TC ZD:Z:GA RG:Z:HiMom.1 QX:Z:@@CFDDFD RX:Z:GCCGTCGA
+HiMom:1:2101:1491:2093 4 * 0 0 * * 0 0 AGAGACGGGGTCTCGCTATGTTGCC BCCDFFFFHHHHHJIIJJJJIJIJJ ZA:Z:CA ZB:Z:AT ZC:Z:AG ZD:Z:TC RG:Z:HiMom.1 QX:Z:@@@FDEBD RX:Z:CAATAGTC
diff --git a/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithTagPerMolecularIndex4M4M.sam b/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithTagPerMolecularIndex4M4M.sam
new file mode 100644
index 0000000..84de969
--- /dev/null
+++ b/testdata/picard/illumina/25T8B25T/sams/nonBarcodedWithTagPerMolecularIndex4M4M.sam
@@ -0,0 +1,182 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:HiMom.1 SM:HiDad LB:Hello, World PL:illumina PU:HiMom.1 CN:BI
+HiMom:1:1101:1031:2224 516 * 0 0 * * 0 0 ......................... ######################### ZA:Z:NNNN ZB:Z:NNNN RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1039:2147 516 * 0 0 * * 0 0 ......................... ######################### ZA:Z:NNNN ZB:Z:NNNN RG:Z:HiMom.1 XN:i:1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1046:2175 516 * 0 0 * * 0 0 ..GGA.................... ######################### ZA:Z:NNNN ZB:Z:NNNN RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNNNNNN
+HiMom:1:1101:1047:2122 516 * 0 0 * * 0 0 ..TCA.................... ######################### ZA:Z:NNNA ZB:Z:NNNN RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNANNNN
+HiMom:1:1101:1048:2197 516 * 0 0 * * 0 0 ..GTG.................... ######################### ZA:Z:NNNC ZB:Z:NNNN RG:Z:HiMom.1 QX:Z:######## RX:Z:NNNCNNNN
+HiMom:1:1101:1065:2193 4 * 0 0 * * 0 0 .CTTG.................... ######################### ZA:Z:GAAC ZB:Z:GATN RG:Z:HiMom.1 QX:Z:######## RX:Z:GAACGATN
+HiMom:1:1101:1069:2159 4 * 0 0 * * 0 0 GACGT.................... <<<@?#################### ZA:Z:GTCC ZB:Z:ACAG RG:Z:HiMom.1 QX:Z:@BBFFFFF RX:Z:GTCCACAG
+HiMom:1:1101:1071:2233 4 * 0 0 * * 0 0 GTTTG.................... <<<@@#################### ZA:Z:TATC ZB:Z:CAGG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCCAGG
+HiMom:1:1101:1083:2193 4 * 0 0 * * 0 0 AGGCT.................... ######################### ZA:Z:CCAA ZB:Z:CATT RG:Z:HiMom.1 QX:Z:?@;DD?BD RX:Z:CCAACATT
+HiMom:1:1101:1084:2136 4 * 0 0 * * 0 0 TTTCT.................... <<<@@#################### ZA:Z:TGCT ZB:Z:GCTG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:1101:1089:2172 4 * 0 0 * * 0 0 TCCGG.................... :<<??#################### ZA:Z:GACC ZB:Z:AGGA RG:Z:HiMom.1 QX:Z:?@@FF;=B RX:Z:GACCAGGA
+HiMom:1:1101:1100:2207 4 * 0 0 * * 0 0 AGGCT............G....... ######################### ZA:Z:ATTA ZB:Z:TCAA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ATTATCAA
+HiMom:1:1101:1111:2148 4 * 0 0 * * 0 0 GCGAA.A..........GGACGAC. ######################### ZA:Z:GCCG ZB:Z:TCGA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GCCGTCGA
+HiMom:1:1101:1138:2141 4 * 0 0 * * 0 0 TCCGATCTGCTTCAGGTCGATCAGA CCCFFFFFHGHHHJJIGHIJJJJJJ ZA:Z:AACA ZB:Z:ATGG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:1101:1140:2120 4 * 0 0 * * 0 0 TTTTTTTTTTTTTAACTTTGCAAAT @@@DDDDDHHHHFB at 9FHI@BFH@@ ZA:Z:CAAC ZB:Z:TCTC RG:Z:HiMom.1 QX:Z:@@@DDFDF RX:Z:CAACTCTC
+HiMom:1:1101:1143:2192 4 * 0 0 * * 0 0 CGACAAGTCTGGCTTATCACTCATC CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:TCGC ZB:Z:TAGA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1101:1150:2228 4 * 0 0 * * 0 0 ATGGGAGGCGATTCCTAGGGGGTTG 8?=DD8;@BH6DHD<FGGGEIGHIG ZA:Z:AGGT ZB:Z:CGCA RG:Z:HiMom.1 QX:Z:@@@DDFFF RX:Z:AGGTCGCA
+HiMom:1:1101:1157:2135 4 * 0 0 * * 0 0 TTTAAAGTCTTAATCAAAGATGATA CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:ATTA ZB:Z:TCAA RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:ATTATCAA
+HiMom:1:1101:1162:2207 516 * 0 0 * * 0 0 TAAAACTGGGGAAGTTAGAGGAATG ######################### ZA:Z:ACAA ZB:Z:AATT RG:Z:HiMom.1 QX:Z:######## RX:Z:ACAAAATT
+HiMom:1:1101:1165:2239 4 * 0 0 * * 0 0 ATGGAAGTCGAGACAGAAGTGAGAA ######################### ZA:Z:GCCT ZB:Z:AGCC RG:Z:HiMom.1 QX:Z:B@@DFFFF RX:Z:GCCTAGCC
+HiMom:1:1101:1175:2197 4 * 0 0 * * 0 0 AAGAGCTGGGGAACATCCAGAAAGG BC at FFFFFHHHHHJJJJJJJJJJJJ ZA:Z:CCAA ZB:Z:CATT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:1101:1188:2237 4 * 0 0 * * 0 0 GCTTCCTTCAAGACAGAAGTGAGAA CCCFFDDEFHHFFE at FDHHAIAFHG ZA:Z:GTAA ZB:Z:CATC RG:Z:HiMom.1 QX:Z:@@?DFFDF RX:Z:GTAACATC
+HiMom:1:1101:1197:2200 4 * 0 0 * * 0 0 ATATTCCACTGGAACCACAGAACCC @@@FFFFFHHHHHJJJJJJJJJJJJ ZA:Z:AACG ZB:Z:CATT RG:Z:HiMom.1 QX:Z:@CCFDFFF RX:Z:AACGCATT
+HiMom:1:1101:1206:2126 4 * 0 0 * * 0 0 ATCTGTCCAGTGGTGCACTGAATGT CCCFFFFFHHHHHHIIJJJJIJJJJ ZA:Z:AACA ZB:Z:ATGG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:1101:1212:2230 4 * 0 0 * * 0 0 TTTTAGCTTTATTGGGGAGGGGGTG CCCFFFFFHHGHHJJJJGJJJJJDF ZA:Z:CCAG ZB:Z:CACC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:1101:1218:2200 4 * 0 0 * * 0 0 GCTCTTCCGATCTATCTGCTCGTCC (-(=34???3;@############# ZA:Z:GACC ZB:Z:GTTG RG:Z:HiMom.1 QX:Z:@CCFFDDF RX:Z:GACCGTTG
+HiMom:1:1101:1219:2164 4 * 0 0 * * 0 0 ATCTTATCCACTCCTTCCACTTTGG CCCFFFFFHHHHHJJIJJJJJJJIJ ZA:Z:TTGT ZB:Z:CTAT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1101:1221:2143 4 * 0 0 * * 0 0 CAATTGAATGTCTGCACAGCCGCTT @@@FFFFDHHHHHJJJIIIJGHIJJ ZA:Z:GCCG ZB:Z:TCGA RG:Z:HiMom.1 QX:Z:@@CDDDDF RX:Z:GCCGTCGA
+HiMom:1:1101:1236:2121 4 * 0 0 * * 0 0 TTGCGCTTACTTTGTAGCCTTCATC CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:ACAG ZB:Z:GTAT RG:Z:HiMom.1 QX:Z:CCCFFDDF RX:Z:ACAGGTAT
+HiMom:1:1101:1242:2170 4 * 0 0 * * 0 0 GGAAGGAAAAGAAGCACAAGTACAT @@@DFDFFHHHGHHGIIGJJEHHIG ZA:Z:TGCA ZB:Z:AGTA RG:Z:HiMom.1 QX:Z:@@CFFF?D RX:Z:TGCAAGTA
+HiMom:1:1101:1257:2223 4 * 0 0 * * 0 0 TGCTCTTCCGATCTTTTAGCAAAGC :?@DDBDDHFFHDGIGIIJJJGGGI ZA:Z:GACC ZB:Z:GTTG RG:Z:HiMom.1 QX:Z:;@@DD=DD RX:Z:GACCGTTG
+HiMom:1:1101:1259:2152 4 * 0 0 * * 0 0 ATTTTTATATTTTTTTAGACATAGG CCCFFFFFGHHHHJJJJIGIIJJJJ ZA:Z:ACTA ZB:Z:AGAC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTAAGAC
+HiMom:1:1101:1261:2127 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHGHHHJJIFDDDDDDDD ZA:Z:ACTA ZB:Z:AGAC RG:Z:HiMom.1 QX:Z:>7+ at A7A7 RX:Z:ACTAAGAC
+HiMom:1:1101:1263:2236 516 * 0 0 * * 0 0 AGTTCTTCAGTAATTTTAGTACTGC ######################### ZA:Z:AGGT ZB:Z:AAGG RG:Z:HiMom.1 QX:Z:######## RX:Z:AGGTAAGG
+HiMom:1:1101:1267:2209 4 * 0 0 * * 0 0 GGCAGAGTCTCCAACAGCCCCGTAC =;?DDDD?CCFHAIIIGGIIGE at EG ZA:Z:TATC ZB:Z:AGCC RG:Z:HiMom.1 QX:Z:?@@D;ADD RX:Z:TATCAGCC
+HiMom:1:1101:1269:2170 4 * 0 0 * * 0 0 TTCCAAGCCTGTGCTTTAAGGAAAA @@<ADBDBDF8DDCFH at GIE@@GGH ZA:Z:ATTA ZB:Z:TCAA RG:Z:HiMom.1 QX:Z:@@@DDDF? RX:Z:ATTATCAA
+HiMom:1:1101:1290:2225 4 * 0 0 * * 0 0 TCAGTTCACTGGCAAAGACAGTCAC C@@FBEDDFHFHGIIICEHGDHBHE ZA:Z:GCCT ZB:Z:AGCC RG:Z:HiMom.1 QX:Z:?<@DFBBD RX:Z:GCCTAGCC
+HiMom:1:1101:1291:2150 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHFHHIJJJIIIGIJIJ ZA:Z:CGCT ZB:Z:ATGT RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:CGCTATGT
+HiMom:1:1101:1302:2244 4 * 0 0 * * 0 0 TGAATACATATAACAAATGCAAAAA CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:GACC ZB:Z:TAAC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCTAAC
+HiMom:1:1101:1308:2153 516 * 0 0 * * 0 0 TCTGTAAGGTAATCCCCGCATGTGT 1?1=4===AFFDFFGFDGFB at CFB: ZA:Z:AACG ZB:Z:CATT RG:Z:HiMom.1 QX:Z::?@B?@DD RX:Z:AACGCATT
+HiMom:1:1101:1309:2210 4 * 0 0 * * 0 0 AGTGGGCTAGGGCATTTTTAATCTT @@?DFFDFHHHDFHJIJJIJGIIIJ ZA:Z:ATTC ZB:Z:CTCT RG:Z:HiMom.1 QX:Z:?@@ADEEF RX:Z:ATTCCTCT
+HiMom:1:1101:1314:2233 4 * 0 0 * * 0 0 AGGAAAGTTGGGCTGACCTGACAGA @@<DDD;=FBFADBCGDEH?F;FCG ZA:Z:CGCT ZB:Z:ATGT RG:Z:HiMom.1 QX:Z:@<@?B@;A RX:Z:CGCTATGT
+HiMom:1:1101:1316:2126 4 * 0 0 * * 0 0 TCTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHHHHHJJJJHFDDDDDD ZA:Z:CAAT ZB:Z:AGAC RG:Z:HiMom.1 QX:Z:1>>7A### RX:Z:CAATAGAC
+HiMom:1:1101:1327:2200 516 * 0 0 * * 0 0 GTCATCTGGGCTGTCGACAGGTGTC @B at FFFFFHHHHGIJJJJJJIFHHI ZA:Z:GCCG ZB:Z:TCGA RG:Z:HiMom.1 QX:Z:BCCFDFFD RX:Z:GCCGTCGA
+HiMom:1:1101:1328:2225 4 * 0 0 * * 0 0 AGGAAATTAGGACTTACCTGACATA ######################### ZA:Z:CAAC ZB:Z:TCTC RG:Z:HiMom.1 QX:Z:??;=A:B= RX:Z:CAACTCTC
+HiMom:1:1101:1338:2175 4 * 0 0 * * 0 0 GCTTGTTGGCTTTAACATCCACAAT CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:GAAG ZB:Z:GAAG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GAAGGAAG
+HiMom:1:1101:1347:2149 4 * 0 0 * * 0 0 GCTCTTCCGATCTGTGCTCTTCCGA CCCFFFFFDFHHFIJDGIGGHGIGH ZA:Z:GACC ZB:Z:AGGA RG:Z:HiMom.1 QX:Z:CC at DFFFD RX:Z:GACCAGGA
+HiMom:1:1101:1353:2226 4 * 0 0 * * 0 0 GTGCTCTTCCGATCTTCAGGTTACC BBBFFFFFHHHHHJJJJJJJIJJJJ ZA:Z:TATC ZB:Z:TGCC RG:Z:HiMom.1 QX:Z:@B at FFEFF RX:Z:TATCTGCC
+HiMom:1:1101:1363:2138 4 * 0 0 * * 0 0 GTTCTTAAACCTGTTAGAACTTCTG C@@FFFFFHHHHHJJJJJJJJJJJJ ZA:Z:CTAA ZB:Z:CTCG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTAACTCG
+HiMom:1:1101:1399:2128 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHIJJJJJJJJJJJ ZA:Z:CAAT ZB:Z:AGTC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CAATAGTC
+HiMom:1:1101:1403:2194 4 * 0 0 * * 0 0 ACATGGTGAAACCCTGTCTCTACTA CCCFFFDDHHHHHJJJJJJJJJJJJ ZA:Z:CTGT ZB:Z:AATC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGTAATC
+HiMom:1:1101:1406:2222 4 * 0 0 * * 0 0 GGCTGGACTCCCCTGGTTCTGGGCA ;?@DDDBD?FHDFGIIIGIGHHIII ZA:Z:AGCA ZB:Z:TGGA RG:Z:HiMom.1 QX:Z:C@@DBFEF RX:Z:AGCATGGA
+HiMom:1:1101:1419:2119 4 * 0 0 * * 0 0 ACTTTCCTTTTTTGTTTTACTTTAA ######################### ZA:Z:TGTA ZB:Z:ATCA RG:Z:HiMom.1 QX:Z:@@@DFDFD RX:Z:TGTAATCA
+HiMom:1:1101:1420:2213 4 * 0 0 * * 0 0 TTCACTGTACCGGCCGTGCGTACTT @CCFFFFDHHHFGIJJJJJJGHIGG ZA:Z:CAGC ZB:Z:GGTA RG:Z:HiMom.1 QX:Z:@C at FFFDF RX:Z:CAGCGGTA
+HiMom:1:1101:1435:2194 4 * 0 0 * * 0 0 TTTTGTTTTCTTTTACTGAAGTGTA CCCFFDFFHHHHHJJJJIHIJHHHJ ZA:Z:TATC ZB:Z:TGCC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1101:1441:2148 4 * 0 0 * * 0 0 TTTTGGCTCTAGAGGGGGTAGAGGG CCCFFFFFHHDFBHIIJJ1?FGHIJ ZA:Z:CGCT ZB:Z:ATGT RG:Z:HiMom.1 QX:Z:@@BFFDDD RX:Z:CGCTATGT
+HiMom:1:1101:1452:2132 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHJJJJJJJIJJJJ ZA:Z:AACG ZB:Z:CATT RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:AACGCATT
+HiMom:1:1101:1460:2176 4 * 0 0 * * 0 0 AGGAAAAAGACACAACAAGTCCAAC ######################### ZA:Z:GATA ZB:Z:TCCA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GATATCCA
+HiMom:1:1101:1479:2221 4 * 0 0 * * 0 0 GGGGAAATCTATTTTTATGTAAAAA @CCFFFFFHHHHHJIGIJJJJJJJJ ZA:Z:TCGC ZB:Z:TAGA RG:Z:HiMom.1 QX:Z:@BCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1101:1491:2120 4 * 0 0 * * 0 0 GGCCAGGCTGAACTTCTGAGCTGCT CCCFFFFFHHHGHJJJJJJJJJJJJ ZA:Z:AGGT ZB:Z:CGCA RG:Z:HiMom.1 QX:Z:BCCDFFFF RX:Z:AGGTCGCA
+HiMom:1:1201:1018:2133 4 * 0 0 * * 0 0 ......................... ######################### ZA:Z:ATTC ZB:Z:CTCT RG:Z:HiMom.1 XN:i:1 QX:Z:8??=BBBA RX:Z:ATTCCTCT
+HiMom:1:1201:1018:2217 516 * 0 0 * * 0 0 ......................... ######################### ZA:Z:ATTA ZB:Z:TCAA RG:Z:HiMom.1 XN:i:1 QX:Z:;<;:BBDD RX:Z:ATTATCAA
+HiMom:1:1201:1028:2202 4 * 0 0 * * 0 0 ..AAAC.C.T.......GG..TG.. ##42@?################### ZA:Z:GAAG ZB:Z:GAAG RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:GAAGGAAG
+HiMom:1:1201:1042:2174 4 * 0 0 * * 0 0 .TCAGGAAGGC..CAAAAAAAGAAA #0;@@@?@?<@##3<@@?@@????? ZA:Z:TCTG ZB:Z:CAAG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCTGCAAG
+HiMom:1:1201:1043:2246 4 * 0 0 * * 0 0 .GCATCATTTC..GCTTCTCTCTGT #0;@@??@=@>##22=;@??><@?? ZA:Z:CGCT ZB:Z:ATGT RG:Z:HiMom.1 QX:Z:@<?DD:B= RX:Z:CGCTATGT
+HiMom:1:1201:1045:2105 516 * 0 0 * * 0 0 .TTTTTTTTTT..TTTTTTTTTTTT #0;@@@@@@@?##0:????????=< ZA:Z:CTGT ZB:Z:AATC RG:Z:HiMom.1 QX:Z:1112 at A## RX:Z:CTGTAATC
+HiMom:1:1201:1054:2151 4 * 0 0 * * 0 0 GTCAGGCACTGAGAATATATGGGTG CBCFFFFFHHHHHJJJJJJJJJJEG ZA:Z:CAAT ZB:Z:AGTC RG:Z:HiMom.1 QX:Z:CCCFFFDF RX:Z:CAATAGTC
+HiMom:1:1201:1064:2239 4 * 0 0 * * 0 0 GGGATGGGAGGGCGATGAGGACTAG 8?@:DDDACC:FHHGIH<EGDDDFH ZA:Z:TAAG ZB:Z:CACA RG:Z:HiMom.1 QX:Z:@@@FFADB RX:Z:TAAGCACA
+HiMom:1:1201:1073:2225 4 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGGAGGGT @BBDFFFFHHHHHJJJJJJJJJJJ: ZA:Z:ATTC ZB:Z:CTCT RG:Z:HiMom.1 QX:Z:B@@BDEFF RX:Z:ATTCCTCT
+HiMom:1:1201:1083:2121 4 * 0 0 * * 0 0 ACACACAACACCACCGCCCTCCCCC ######################### ZA:Z:CTAT ZB:Z:GCGT RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:CTATGCGT
+HiMom:1:1201:1084:2204 4 * 0 0 * * 0 0 TGGCTCCTCAGGCTCTCATCAGTTG CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:TATC ZB:Z:TGCC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1201:1095:2146 4 * 0 0 * * 0 0 ACTGACAACACCAAATGCTGCTAAG CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:GACC ZB:Z:AGGA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCAGGA
+HiMom:1:1201:1103:2184 4 * 0 0 * * 0 0 AGAAGTTTCAGAATTGTGGCCCCAT B at BFFDEFHHHHHJJJGHIJJJJJI ZA:Z:TTGT ZB:Z:CTAT RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1107:2109 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHGHHJJJJIIJJJJJJ ZA:Z:TTGT ZB:Z:CTAT RG:Z:HiMom.1 QX:Z:B at CFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1118:2198 4 * 0 0 * * 0 0 AATAAACTTTATTAAAGCAGTTAAA C at CFFFFFHDHHHGIIIJJJIJJJJ ZA:Z:ATTA ZB:Z:TCAA RG:Z:HiMom.1 QX:Z:@@@DDBDD RX:Z:ATTATCAA
+HiMom:1:1201:1122:2227 4 * 0 0 * * 0 0 GTCATATAAGGCCCAGTCCAAGGAA @@@FFFFFHHHGGIJIGGIJFIJII ZA:Z:CGCC ZB:Z:TTCC RG:Z:HiMom.1 QX:Z:@@@DDFFF RX:Z:CGCCTTCC
+HiMom:1:1201:1123:2161 516 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGCATACA ===AAAA8AAAA<AAA)@CBA9>A# ZA:Z:GACC ZB:Z:AGGA RG:Z:HiMom.1 QX:Z:?;@DFDFF RX:Z:GACCAGGA
+HiMom:1:1201:1127:2112 516 * 0 0 * * 0 0 TAATCACCTGAGCAGTGAAGCCAGC @<@?BDDDHD?FDBHI?AHGGGDFH ZA:Z:CAAC ZB:Z:TCTC RG:Z:HiMom.1 QX:Z:=??BA?BD RX:Z:CAACTCTC
+HiMom:1:1201:1134:2144 4 * 0 0 * * 0 0 AGTGTGAGTAATGGTTGAGAGGTGG B@?DDDFFFHHGHJHHGFIHHIFGI ZA:Z:CGCT ZB:Z:ATGT RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:CGCTATGT
+HiMom:1:1201:1138:2227 516 * 0 0 * * 0 0 GACAAATATAGGAAATAGAAGCTAT =1=A=AAA,2?4>7C<<4<A+3<AB ZA:Z:CCAA ZB:Z:CATT RG:Z:HiMom.1 QX:Z:######## RX:Z:CCAACATT
+HiMom:1:1201:1140:2125 4 * 0 0 * * 0 0 TTCATAAATTGGTCTTAGATGTTGC CC at FFFFFHHHHFGIJIIIJIJIJJ ZA:Z:TATC ZB:Z:CAGG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCCAGG
+HiMom:1:1201:1142:2242 4 * 0 0 * * 0 0 GTAAAATGTAAAATAATAAAAAATG ?=?DDDD;AF<DF<FFFFIIIFF@< ZA:Z:TATC ZB:Z:TGCC RG:Z:HiMom.1 QX:Z:??<D?D83 RX:Z:TATCTGCC
+HiMom:1:1201:1150:2161 4 * 0 0 * * 0 0 TTCTCACTACTGTGATTGTGCCACT @C at FFFFFGHHHHGIIIICEHCFGH ZA:Z:AACG ZB:Z:CATT RG:Z:HiMom.1 QX:Z:@@@FDDDD RX:Z:AACGCATT
+HiMom:1:1201:1159:2179 516 * 0 0 * * 0 0 TTTTTTTTTATTTTTCTAAATACTT ===AA#################### ZA:Z:AAAA ZB:Z:AAAA RG:Z:HiMom.1 QX:Z:####+<0? RX:Z:AAAAAAAA
+HiMom:1:1201:1160:2109 4 * 0 0 * * 0 0 ACATCCTTCCCATGCCACCAACTCG CCCFFFFFGHHHHJJJJJJJJJJJJ ZA:Z:CGCC ZB:Z:TTCC RG:Z:HiMom.1 QX:Z:C at BFFFFF RX:Z:CGCCTTCC
+HiMom:1:1201:1180:2119 4 * 0 0 * * 0 0 GCTCTAAATTTTGCTTTTCTACAGC CCCFFFFFHHHHHJJJJIJIJJIJJ ZA:Z:GACC ZB:Z:GTTG RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:GACCGTTG
+HiMom:1:1201:1185:2143 4 * 0 0 * * 0 0 GCTGAAGGCCCGTGGGCCAGAGGTG @CCFFFFFHHHHHJJJJJJJJJJHI ZA:Z:CTAT ZB:Z:GCGT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTATGCGT
+HiMom:1:1201:1187:2100 4 * 0 0 * * 0 0 AAAAAAGAGCCCGCATTGCCGAGAC =<=;AA################### ZA:Z:TATC ZB:Z:TGCC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TATCTGCC
+HiMom:1:1201:1190:2194 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:AGGT ZB:Z:CGCA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AGGTCGCA
+HiMom:1:1201:1204:2228 4 * 0 0 * * 0 0 TCTTCTTGTCGATGAGGAACTTGGT @?@FFFFFDHHGHJIJJGHIIJJJH ZA:Z:CCAG ZB:Z:CACC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:1201:1208:2132 4 * 0 0 * * 0 0 CTGTAGAAAGGATGGTCGGGCTCCA @@CDFFFFGHFHHJIJJGJIBHJJG ZA:Z:TGTA ZB:Z:ATCA RG:Z:HiMom.1 QX:Z:CC at FFFFF RX:Z:TGTAATCA
+HiMom:1:1201:1219:2115 4 * 0 0 * * 0 0 TGGGAGTAGTTCCCTGCTAAGGGAG ???DBDBDADDDDIEID:AFFD:?8 ZA:Z:CCAT ZB:Z:GCGT RG:Z:HiMom.1 QX:Z:??<DDA?D RX:Z:CCATGCGT
+HiMom:1:1201:1236:2187 4 * 0 0 * * 0 0 CTCCTTAGCGGATTCCGACTTCCAT CCCFFFFDHHHHGIJJIGIGIJJGG ZA:Z:TATC ZB:Z:CAGG RG:Z:HiMom.1 QX:Z:@@BFFFFF RX:Z:TATCCAGG
+HiMom:1:1201:1242:2207 4 * 0 0 * * 0 0 ATCTTTTATTGGCCTCCTGCTCCCC CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:ATTC ZB:Z:CTCT RG:Z:HiMom.1 QX:Z:?BBDDDFF RX:Z:ATTCCTCT
+HiMom:1:1201:1252:2141 4 * 0 0 * * 0 0 AGTTATTTTGCCTATGTCCAACAAG BCBFFFFFGHHHHJIJJJJJJJJJJ ZA:Z:TTGT ZB:Z:CTAT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TTGTCTAT
+HiMom:1:1201:1260:2165 4 * 0 0 * * 0 0 ATCTGATCTAAGTTGGGGGACGCCG @@@FFDFFHHHHHJJJIJIIIGIJJ ZA:Z:CCAA ZB:Z:CATT RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CCAACATT
+HiMom:1:1201:1280:2179 4 * 0 0 * * 0 0 GAGGACTGCTTGAGTCCAGGAGTTC @@BFFDEFGHHHHIFGCHIJJJGGI ZA:Z:GCCT ZB:Z:AGCC RG:Z:HiMom.1 QX:Z:BCCFFFFF RX:Z:GCCTAGCC
+HiMom:1:1201:1281:2133 4 * 0 0 * * 0 0 GCAACAAAATTTCATATGACTTAGC CCCFFFFFHHHHHJJIIIHICHIIJ ZA:Z:CCAA ZB:Z:CATT RG:Z:HiMom.1 QX:Z:C at CFFFDF RX:Z:CCAACATT
+HiMom:1:1201:1285:2100 4 * 0 0 * * 0 0 GATCTTTTTTGCTTTGTAGTTATAG @@@DFFFFHHHHHIIGIABCFFHBF ZA:Z:TGCT ZB:Z:GCTG RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:TGCTGCTG
+HiMom:1:1201:1291:2158 4 * 0 0 * * 0 0 CGTGTGCTCTTCCGATCTGATGGGC @CCFFFDD?FHHFGEHHIIDHIIII ZA:Z:AGCA ZB:Z:TGGA RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:AGCATGGA
+HiMom:1:1201:1300:2137 4 * 0 0 * * 0 0 GCTCTTCCGATCTTTTTTTTAATTT @@?DDDDDFDHADEHGIGGED3?FD ZA:Z:GCCT ZB:Z:AGCC RG:Z:HiMom.1 QX:Z:8?84B23? RX:Z:GCCTAGCC
+HiMom:1:1201:1312:2112 4 * 0 0 * * 0 0 ATTTGCAGGAGCCGGCGCAGGTGCA CCCFFFFFHHHHHJJJIJJJJGHIJ ZA:Z:TCGC ZB:Z:TAGA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1201:1331:2162 4 * 0 0 * * 0 0 TAATCCCAGTACTTTGGGAGGCCAA CCCFFFFFHHHHHJJJJIJJJJJJJ ZA:Z:CCAA ZB:Z:CATT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:1201:1341:2116 4 * 0 0 * * 0 0 ATAACAGCGAGACTGGCAACTTAAA ######################### ZA:Z:ACAG ZB:Z:GTAT RG:Z:HiMom.1 QX:Z:CCCFFBDD RX:Z:ACAGGTAT
+HiMom:1:1201:1344:2147 4 * 0 0 * * 0 0 ACGATTAGTTTTAGCATTGGAGTAG @<??DDDDFHHHFGGHHIIIGGAGH ZA:Z:TGTA ZB:Z:ATCA RG:Z:HiMom.1 QX:Z:=?1AA:=D RX:Z:TGTAATCA
+HiMom:1:1201:1345:2181 4 * 0 0 * * 0 0 ATACGGATGTGTTTAGGAGTGGGAC CCCFFFFFHHHHHIIJJHJFHIJIJ ZA:Z:CAAT ZB:Z:AGTC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CAATAGTC
+HiMom:1:1201:1364:2113 4 * 0 0 * * 0 0 TAAAGAGAGCCAGTGGAGTTACGAC ######################### ZA:Z:CAGC ZB:Z:GGTA RG:Z:HiMom.1 QX:Z:C at CFFF@D RX:Z:CAGCGGTA
+HiMom:1:1201:1392:2109 4 * 0 0 * * 0 0 GTCAGACAGGGGGATTTGGGCTGTG BBCFFFFFHHHHHHJJJHIJIJJJJ ZA:Z:TATC ZB:Z:TGCC RG:Z:HiMom.1 QX:Z:CCCDF?DD RX:Z:TATCTGCC
+HiMom:1:1201:1392:2184 4 * 0 0 * * 0 0 ATCTTTATTCATTTGTATGATCTTA @@BFFFFFHFFHFHIHIIJIJJJJI ZA:Z:CAAT ZB:Z:AGTC RG:Z:HiMom.1 QX:Z:@CCFFDDE RX:Z:CAATAGTC
+HiMom:1:1201:1393:2143 4 * 0 0 * * 0 0 GATAAATGCACGCATCCCCCCCGCG C at CFFFFFGGHHHHJJJJJJJJJJI ZA:Z:CTAA ZB:Z:CTCG RG:Z:HiMom.1 QX:Z:@@CFDDFD RX:Z:CTAACTCG
+HiMom:1:1201:1414:2174 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT @;@1BDADF????FFEB>B6=BBBB ZA:Z:AGAA ZB:Z:AAGA RG:Z:HiMom.1 QX:Z:######## RX:Z:AGAAAAGA
+HiMom:1:1201:1416:2128 4 * 0 0 * * 0 0 TTGGTGTGGAGGCGGTGGCGGGATC @@@DDDDDHHFHHII:?GGHIIB6? ZA:Z:TCGC ZB:Z:TAGA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TCGCTAGA
+HiMom:1:1201:1421:2154 4 * 0 0 * * 0 0 TGTGCTCTTCCGATCTTGTGCTCTT BC at DFFFFHHHHHJJJJFHIHHIJJ ZA:Z:TGTA ZB:Z:ACTC RG:Z:HiMom.1 QX:Z:@@@FFFFF RX:Z:TGTAACTC
+HiMom:1:1201:1439:2156 4 * 0 0 * * 0 0 GGAGATTATTTGCCTTGAAGTAAGC -;(22<>>@>8@>8;@######### ZA:Z:GACC ZB:Z:AGGC RG:Z:HiMom.1 QX:Z:1;;=#### RX:Z:GACCAGGC
+HiMom:1:1201:1452:2143 4 * 0 0 * * 0 0 TTTTAGTCTTAGCATTTACTTTCCC CCCFFFFFHHHHHJJJJJJJJJJJJ ZA:Z:CAAC ZB:Z:TCTC RG:Z:HiMom.1 QX:Z:BC at DDFFF RX:Z:CAACTCTC
+HiMom:1:1201:1458:2109 4 * 0 0 * * 0 0 GATACGAACACACAAGAACTTTTTT CCCFFFFFHHHHHJJJJJJJJJJJI ZA:Z:ACTG ZB:Z:TATC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTGTATC
+HiMom:1:1201:1472:2121 516 * 0 0 * * 0 0 GTGTGCTCTTCCGATCTGGAGGATG =+=??A4A==A at 7A<?######### ZA:Z:CTAT ZB:Z:GCGC RG:Z:HiMom.1 QX:Z:;?=D#### RX:Z:CTATGCGC
+HiMom:1:1201:1483:2126 516 * 0 0 * * 0 0 GCATGCAGCTGGGTGCTGTGATGCA @@@DDDBB<DD8F<<CGG?AA?A<F ZA:Z:CTGT ZB:Z:AATC RG:Z:HiMom.1 QX:Z:@C<DD:B? RX:Z:CTGTAATC
+HiMom:1:1201:1486:2109 4 * 0 0 * * 0 0 ACGTGTGCTCTTCCCGATCTGTATA CCCFF?DDFBHHHJJIIDHJIJJJH ZA:Z:GTCC ZB:Z:ACAG RG:Z:HiMom.1 QX:Z:CCCFFFFD RX:Z:GTCCACAG
+HiMom:1:1201:1486:2146 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTGGGC <<<@??@??@???????######## ZA:Z:CAAC ZB:Z:TCTC RG:Z:HiMom.1 QX:Z:?@@1:DBD RX:Z:CAACTCTC
+HiMom:1:2101:1011:2102 4 * 0 0 * * 0 0 .....TCACACATAATTTTAAAATT #####22@?@@??@@@@@??@@@@@ ZA:Z:CTGT ZB:Z:AATC RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1013:2146 4 * 0 0 * * 0 0 ....CGCTAGAACCAACTTATTCAT ####24=?@@?@?@@?@@@@@@?@@ ZA:Z:CTAT ZB:Z:GCGT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTATGCGT
+HiMom:1:2101:1021:2209 4 * 0 0 * * 0 0 ..GGAAGGCTGCTAGCTGGCCAGAG ##08@>??@@??@?????????>?@ ZA:Z:ACTA ZB:Z:AGAC RG:Z:HiMom.1 QX:Z:@CCDFFFF RX:Z:ACTAAGAC
+HiMom:1:2101:1023:2237 516 * 0 0 * * 0 0 ..TTTGTTTGAGTTCCTTGTAGATT ##0:=@?>?@???@:>?@??>?;?< ZA:Z:GCCT ZB:Z:AGCC RG:Z:HiMom.1 QX:Z:=:1<#### RX:Z:GCCTAGCC
+HiMom:1:2101:1031:2163 4 * 0 0 * * 0 0 ..ACATTTGTCACCACTAGCCACCA ##0<@?@@@@@@@@@@?@@@@@@@? ZA:Z:GATA ZB:Z:TCCA RG:Z:HiMom.1 QX:Z:B at BFFFFF RX:Z:GATATCCA
+HiMom:1:2101:1036:2087 4 * 0 0 * * 0 0 .GTCCACTTACGAAGCAAATACTTT #4=DDFFFHHHHHJJJJJJJJJJJJ ZA:Z:GACC ZB:Z:GTTG RG:Z:HiMom.1 QX:Z:B at CFFDFF RX:Z:GACCGTTG
+HiMom:1:2101:1040:2208 516 * 0 0 * * 0 0 .CTGATAGTCACTGAAATGAATTCA #-0=>(2 at .22@@############ ZA:Z:ACGA ZB:Z:AATC RG:Z:HiMom.1 QX:Z:######## RX:Z:ACGAAATC
+HiMom:1:2101:1048:2238 4 * 0 0 * * 0 0 .GTCACATCGTTGAAGCACTGGATC #11ADDDB<CFFHCHGDBHGIIIII ZA:Z:ACAG ZB:Z:TTGA RG:Z:HiMom.1 QX:Z:?@7DDDDA RX:Z:ACAGTTGA
+HiMom:1:2101:1054:2162 4 * 0 0 * * 0 0 .GGACAGGGAAGGGAAGGAAGGGTG #4=DDFDFHHHHHJIJIIDHHGICG ZA:Z:AGGT ZB:Z:AAGG RG:Z:HiMom.1 QX:Z:B at BDDFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1059:2083 4 * 0 0 * * 0 0 .GAATGTCTTAGAAGGATGCTTCTC #1=BDDDEHHGHHJJJJJIJJIIJJ ZA:Z:TACC ZB:Z:GTCT RG:Z:HiMom.1 QX:Z:1:?D#### RX:Z:TACCGTCT
+HiMom:1:2101:1063:2206 4 * 0 0 * * 0 0 .TGCTAGGATGAGGATGGATAGTAA #1=DDDFFHHHHHJHIIJHIIIHHJ ZA:Z:ACAG ZB:Z:GTAT RG:Z:HiMom.1 QX:Z:CCCFFDFF RX:Z:ACAGGTAT
+HiMom:1:2101:1064:2242 4 * 0 0 * * 0 0 .GGAAAAAGGTTGTCAAGCGTTAAA ######################### ZA:Z:TCGC ZB:Z:TAGA RG:Z:HiMom.1 QX:Z:;@<:AA at A RX:Z:TCGCTAGA
+HiMom:1:2101:1072:2170 4 * 0 0 * * 0 0 .GGGGAGACAGAGAGGATCAGAAGT #4=BDDFDHHDFHEGFEGGIJIIIG ZA:Z:CAGC ZB:Z:GGTA RG:Z:HiMom.1 QX:Z:B@@DFDDF RX:Z:CAGCGGTA
+HiMom:1:2101:1077:2139 4 * 0 0 * * 0 0 .ATTAGTTGGCGGATGAAGCAGATA #4=DFFFFHHHHHJJJJJJJJJIJJ ZA:Z:AACA ZB:Z:ATGG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACAATGG
+HiMom:1:2101:1084:2188 4 * 0 0 * * 0 0 TACAAGGTCAAAATCAGCAACAAGT CCCFFFFDHHHHHJJJJJJJJJJJJ ZA:Z:GAAG ZB:Z:GAAG RG:Z:HiMom.1 QX:Z:@B at FFFFF RX:Z:GAAGGAAG
+HiMom:1:2101:1100:2085 4 * 0 0 * * 0 0 ATCTTGATCTCCTCCTTCTTGGCCT @@@DDDDDHHFHFEIIIIHHBAHBG ZA:Z:CCAG ZB:Z:CACC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAGCACC
+HiMom:1:2101:1102:2221 4 * 0 0 * * 0 0 ATAACTGACTCTACTCAGTAGATTA CCCFFFFFHHHHHJJJJJIJJJJJJ ZA:Z:CTGC ZB:Z:GGAT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGCGGAT
+HiMom:1:2101:1105:2131 4 * 0 0 * * 0 0 CAGCAGCAGCAACAGCAGAAACATG CCCFFFFFHHHHHJJJJJIJJJJJJ ZA:Z:ACTG ZB:Z:TATC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACTGTATC
+HiMom:1:2101:1112:2245 4 * 0 0 * * 0 0 TCGTAGTGTTGTAATTTCGTCTTCT ?8?DBDDDCCFCAACGGFFCBFFAE ZA:Z:AACA ZB:Z:ATGG RG:Z:HiMom.1 QX:Z:@@?BBDDD RX:Z:AACAATGG
+HiMom:1:2101:1122:2136 4 * 0 0 * * 0 0 CTTGCCAGCCTGCAGGCCCCGCGGC ???BBAABDD?DDIID)A:3<EADD ZA:Z:GCCG ZB:Z:TCGA RG:Z:HiMom.1 QX:Z:?@<DDDD? RX:Z:GCCGTCGA
+HiMom:1:2101:1123:2095 4 * 0 0 * * 0 0 TCCGCCTCCAGCTTCAGCTTCTCCT @@@FDDFFHHHHHJHGGJIJJJEHH ZA:Z:CAGC ZB:Z:GGTA RG:Z:HiMom.1 QX:Z:@?@DDF@@ RX:Z:CAGCGGTA
+HiMom:1:2101:1126:2082 4 * 0 0 * * 0 0 TCTCTTTCCACCTTGGTCACCTTCC @C at DDDFFHHHHHJEGGIHHIJGIH ZA:Z:CTGC ZB:Z:GGAT RG:Z:HiMom.1 QX:Z:@@@FFFDA RX:Z:CTGCGGAT
+HiMom:1:2101:1133:2239 4 * 0 0 * * 0 0 AGCTTTTTGTTTCCTAGCTTGTCTT ?@?DDFFFHHHHF4ACFHIJHHHGH ZA:Z:TATC ZB:Z:CATG RG:Z:HiMom.1 QX:Z:@@@BDDDF RX:Z:TATCCATG
+HiMom:1:2101:1143:2137 4 * 0 0 * * 0 0 GCTCTTCAGATCTAGGGGGAACAGC @@@DD?=DCAFFFHIIDG:EFHIII ZA:Z:TCCG ZB:Z:TCTA RG:Z:HiMom.1 QX:Z:######## RX:Z:TCCGTCTA
+HiMom:1:2101:1151:2182 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTA 9<<?@?@;5=?############## ZA:Z:GAAA ZB:Z:AAAA RG:Z:HiMom.1 QX:Z:######## RX:Z:GAAAAAAA
+HiMom:1:2101:1151:2236 516 * 0 0 * * 0 0 TTTGAAGCCTCTTTATCCTTGGCAT ######################### ZA:Z:TAGC ZB:Z:GGTA RG:Z:HiMom.1 QX:Z:####(- at 5 RX:Z:TAGCGGTA
+HiMom:1:2101:1162:2139 4 * 0 0 * * 0 0 ATCGTTTATGGTCGGAACTACGACG BCCFFFFFHHHHHIJJJJJJJIJJI ZA:Z:TGCT ZB:Z:GCTG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:2101:1163:2203 4 * 0 0 * * 0 0 TTGGTTCACTTATGTATTTATGAAT @CCFDFFFHHHHHJHIIJJJJJJJJ ZA:Z:AGGT ZB:Z:AAGG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1163:2222 4 * 0 0 * * 0 0 GAGCGATAATGGTTCTTTTCCTCAC @@@DFFFFHHHHHJJJJJJJIJJJJ ZA:Z:TGCA ZB:Z:AGTA RG:Z:HiMom.1 QX:Z:CCCFFFEF RX:Z:TGCAAGTA
+HiMom:1:2101:1172:2152 516 * 0 0 * * 0 0 ATCGTTTCTGGGGACTAGTGAGGCG ######################### ZA:Z:CAAT ZB:Z:AGTC RG:Z:HiMom.1 QX:Z:######## RX:Z:CAATAGTC
+HiMom:1:2101:1186:2093 4 * 0 0 * * 0 0 AATGTTGGGAGGACAATGATGGAAA ######################### ZA:Z:CCAA ZB:Z:CATT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CCAACATT
+HiMom:1:2101:1188:2195 4 * 0 0 * * 0 0 GCACATACACCAAATGTCTGAACCT CCCFFFFFHHHHHJJJHIJJJJJJJ ZA:Z:AGGT ZB:Z:CGCA RG:Z:HiMom.1 QX:Z:BCCDFFFF RX:Z:AGGTCGCA
+HiMom:1:2101:1195:2150 4 * 0 0 * * 0 0 AATTGAACTTCACCACCCAGAGGAA CCCFFFFFHHHHHJJJJJJIJJJJJ ZA:Z:TGCT ZB:Z:GCTG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:TGCTGCTG
+HiMom:1:2101:1207:2084 516 * 0 0 * * 0 0 TCACCACTCTTCTGGGCATCCCCTG @@@DDEDFHHHHHIJIHHGHGGJJJ ZA:Z:GACC ZB:Z:AGGA RG:Z:HiMom.1 QX:Z:@@CDFFFF RX:Z:GACCAGGA
+HiMom:1:2101:1208:2231 516 * 0 0 * * 0 0 CTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFHHHHHJJJHFDDDDDDD ZA:Z:GTAA ZB:Z:CATC RG:Z:HiMom.1 QX:Z:1+:A1A22 RX:Z:GTAACATC
+HiMom:1:2101:1215:2110 4 * 0 0 * * 0 0 ATCTTTCCCCCATTAAGAACAGCAA ######################### ZA:Z:AAAA ZB:Z:GAAG RG:Z:HiMom.1 QX:Z:1:7<#### RX:Z:AAAAGAAG
+HiMom:1:2101:1216:2172 4 * 0 0 * * 0 0 GGACTTCTAGGGGATTTAGCGGGGT CCCFFFFFHHHHHJJJJJJJJJJJD ZA:Z:CAGC ZB:Z:GGAT RG:Z:HiMom.1 QX:Z:C at CFFFFF RX:Z:CAGCGGAT
+HiMom:1:2101:1216:2193 4 * 0 0 * * 0 0 AGGCATGACACTGCATTTTAAATAC @@@DDDDDHFFHHGGDFHFHIIHGG ZA:Z:ACAG ZB:Z:TTGA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:ACAGTTGA
+HiMom:1:2101:1226:2088 4 * 0 0 * * 0 0 GCTCTTCCGATCTAGGTAATAGCTA ==?BDFFFDCDDHFFFAFHDHIJGJ ZA:Z:GATA ZB:Z:TCCA RG:Z:HiMom.1 QX:Z:@@@:DDDD RX:Z:GATATCCA
+HiMom:1:2101:1231:2208 4 * 0 0 * * 0 0 AGCCAGTGTTGGTGTGTTGACTGTT @@;1ADABCF;BF<AACGCHEBHC< ZA:Z:CTAT ZB:Z:GCGT RG:Z:HiMom.1 QX:Z:@<@?D8 at D RX:Z:CTATGCGT
+HiMom:1:2101:1233:2133 516 * 0 0 * * 0 0 TTTTTTTTTTTTTTTTTTTTTTTTT CCCFFFFFGHHHHJJJFDDDDDDDD ZA:Z:CTAT ZB:Z:GCGT RG:Z:HiMom.1 QX:Z:=??B#### RX:Z:CTATGCGT
+HiMom:1:2101:1240:2197 516 * 0 0 * * 0 0 ACTGGAGATCCTTGTTACATGCCCA ??+++A:DD?:ADEE@::C4:C<E: ZA:Z:AACG ZB:Z:CATT RG:Z:HiMom.1 QX:Z:88+AD@?8 RX:Z:AACGCATT
+HiMom:1:2101:1245:2154 4 * 0 0 * * 0 0 ACCAATCAGTAGCACCACTATACAC CCCFFFFFHHHHHJJJJJJIJJJJJ ZA:Z:CTGT ZB:Z:AATC RG:Z:HiMom.1 QX:Z:@CCFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1249:2231 4 * 0 0 * * 0 0 TCTCTCGGCCTTCCACTCTAGCATA @@@FFFFFFHHGHIJJJGJIIJHIJ ZA:Z:AGGT ZB:Z:AAGG RG:Z:HiMom.1 QX:Z:@@CBDFFF RX:Z:AGGTAAGG
+HiMom:1:2101:1258:2092 4 * 0 0 * * 0 0 TTAGACAAAACACCAAAATAAAATA ######################### ZA:Z:TAAG ZB:Z:CACA RG:Z:HiMom.1 QX:Z:@@CDDFFF RX:Z:TAAGCACA
+HiMom:1:2101:1262:2128 516 * 0 0 * * 0 0 TCTTGTGGTAACTTTTCTGACACCT -(---9@;@?:8>?4:>?@###### ZA:Z:ACTA ZB:Z:AGAC RG:Z:HiMom.1 QX:Z:1+8?ADD8 RX:Z:ACTAAGAC
+HiMom:1:2101:1273:2119 516 * 0 0 * * 0 0 ATGATGGATCTTCTCTAACTTGTCA >=><AAAAA+2AA?CB4@@ABB3?A ZA:Z:CTAA ZB:Z:CTCG RG:Z:HiMom.1 QX:Z:####=ADB RX:Z:CTAACTCG
+HiMom:1:2101:1285:2105 516 * 0 0 * * 0 0 TGTCTATATCAACCAACACCTCTTC -(0(():94:9:???########## ZA:Z:TATC ZB:Z:TCGG RG:Z:HiMom.1 QX:Z:######## RX:Z:TATCTCGG
+HiMom:1:2101:1312:2105 4 * 0 0 * * 0 0 GTTGAGAATAGGTTGAGATCGTTTC @CCFFFDFHHFHDHIJJJJJJJIJJ ZA:Z:GACC ZB:Z:AGGA RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:GACCAGGA
+HiMom:1:2101:1325:2083 4 * 0 0 * * 0 0 TGTGCTCTTCCGATCTGGAGAAAAA ######################### ZA:Z:ACAG ZB:Z:GTAT RG:Z:HiMom.1 QX:Z:@@@BD=DD RX:Z:ACAGGTAT
+HiMom:1:2101:1336:2109 4 * 0 0 * * 0 0 AGACCAGAACAGCTCCAGGTGCTCC CCCFFFFFHHHHHJJJJJJCGHIJJ ZA:Z:AACG ZB:Z:CATT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACGCATT
+HiMom:1:2101:1349:2084 4 * 0 0 * * 0 0 AGTCTGAATCATTGGTGTCTGAAGA <5;??=>=>>?############## ZA:Z:ACTG ZB:Z:TATC RG:Z:HiMom.1 QX:Z:####22<A RX:Z:ACTGTATC
+HiMom:1:2101:1365:2094 4 * 0 0 * * 0 0 GCTCTTCCGATCTTGTGCTCTTCCG CCCFFFFDHFHHGJJIIJIJJIHII ZA:Z:ACTG ZB:Z:TACC RG:Z:HiMom.1 QX:Z:######## RX:Z:ACTGTACC
+HiMom:1:2101:1370:2116 4 * 0 0 * * 0 0 CACCATCTGACATCATGTTTGAAAG @@@DFFFDFFHDHIGBHHII<HEDB ZA:Z:AGCA ZB:Z:TGGA RG:Z:HiMom.1 QX:Z:?:8A?3:B RX:Z:AGCATGGA
+HiMom:1:2101:1386:2105 4 * 0 0 * * 0 0 AGGAATTATTCTTCTGCCATAAGGT B@@DDFFFHGFHHIJJJJJGIGIJH ZA:Z:CTGT ZB:Z:AATC RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTGTAATC
+HiMom:1:2101:1414:2098 4 * 0 0 * * 0 0 TTGGGGCCGGTGCCGTCGGGCCCAA CCCFFFFFHHHHGJJIJJJJJJJIJ ZA:Z:CTAA ZB:Z:CTCG RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:CTAACTCG
+HiMom:1:2101:1427:2081 4 * 0 0 * * 0 0 CCGACTTCCATGGCCACCGTCCTGC CCCFFFFFHHHHHJJJIIGFIIJJI ZA:Z:AACG ZB:Z:CATT RG:Z:HiMom.1 QX:Z:CCCFFFFF RX:Z:AACGCATT
+HiMom:1:2101:1450:2134 4 * 0 0 * * 0 0 ACAAACCCTTGTGTCGAGGGCTGAC CC at FDFDFFDFHFGIIE1CGGHBGE ZA:Z:ACCA ZB:Z:GTTG RG:Z:HiMom.1 QX:Z:@C at DDDB? RX:Z:ACCAGTTG
+HiMom:1:2101:1459:2083 4 * 0 0 * * 0 0 ATTTCACCAAAATAATCAGAAGGCC CCCFFFFDBHGHHIGGIJFJJGGFH ZA:Z:GCCG ZB:Z:TCGA RG:Z:HiMom.1 QX:Z:@@CFDDFD RX:Z:GCCGTCGA
+HiMom:1:2101:1491:2093 4 * 0 0 * * 0 0 AGAGACGGGGTCTCGCTATGTTGCC BCCDFFFFHHHHHJIIJJJJIJIJJ ZA:Z:CAAT ZB:Z:AGTC RG:Z:HiMom.1 QX:Z:@@@FDEBD RX:Z:CAATAGTC
diff --git a/testdata/picard/independent_replicates/aTriple.sam b/testdata/picard/independent_replicates/aTriple.sam
new file mode 100644
index 0000000..a1dd2e5
--- /dev/null
+++ b/testdata/picard/independent_replicates/aTriple.sam
@@ -0,0 +1,22 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:501
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:1AAXX.3 SM:test LB:mylib.yossi PL:ILLUMINA
+ at PG ID:bwa PN:bwa VN:3 CL:bwa aln
+ at CO 123456789 123456789 123456789 123456789 123456789 1234567890
+1AAXX.3.1 83 chr1 1 255 101M = 302 +201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAA!AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.2 1107 chr1 1 255 101M = 302 +201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.3 1107 chr1 1 255 101M = 302 +201 CAACAGAAGCCGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1 83 chr1 2 255 101M = 303 +201 AACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1.2 1107 chr1 2 255 101M = 303 +201 AACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1 163 chr1 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.2 1187 chr1 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.3 1187 chr1 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1 163 chr1 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1.2 1187 chr1 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
diff --git a/testdata/picard/independent_replicates/aTripleWithUMIs.sam b/testdata/picard/independent_replicates/aTripleWithUMIs.sam
new file mode 100644
index 0000000..a1dd2e5
--- /dev/null
+++ b/testdata/picard/independent_replicates/aTripleWithUMIs.sam
@@ -0,0 +1,22 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:501
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:1AAXX.3 SM:test LB:mylib.yossi PL:ILLUMINA
+ at PG ID:bwa PN:bwa VN:3 CL:bwa aln
+ at CO 123456789 123456789 123456789 123456789 123456789 1234567890
+1AAXX.3.1 83 chr1 1 255 101M = 302 +201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAA!AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.2 1107 chr1 1 255 101M = 302 +201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.3 1107 chr1 1 255 101M = 302 +201 CAACAGAAGCCGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1 83 chr1 2 255 101M = 303 +201 AACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1.2 1107 chr1 2 255 101M = 303 +201 AACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1 163 chr1 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.2 1187 chr1 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.3 1187 chr1 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1 163 chr1 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1.2 1187 chr1 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
diff --git a/testdata/picard/independent_replicates/hets.vcf b/testdata/picard/independent_replicates/hets.vcf
new file mode 100755
index 0000000..acd9a92
--- /dev/null
+++ b/testdata/picard/independent_replicates/hets.vcf
@@ -0,0 +1,14 @@
+##fileformat=VCFv4.1
+##contig=<ID=chr1,length=501>
+##contig=<ID=chr2,length=101>
+##contig=<ID=chr3,length=101>
+##contig=<ID=chr4,length=101>
+##contig=<ID=chr5,length=101>
+##contig=<ID=chr6,length=101>
+##contig=<ID=chr7,length=404>
+##contig=<ID=chr8,length=202>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1
+chr1 11 . C T 569.98 PASS AC=23;AF=0.767;AN=30; GT:GQ 0/1:90
+chr2 11 . C T 569.98 . AC=23;AF=0.767;AN=30; GT:GQ 1/1:90
+chr2 305 . C T 569.98 PASS AC=23;AF=0.767;AN=30; GT:GQ 0/1:10
+chr2 306 . C T 569.98 FAIL AC=23;AF=0.767;AN=30; GT:GQ 0/1:90
\ No newline at end of file
diff --git a/testdata/picard/independent_replicates/hets_pos20.vcf b/testdata/picard/independent_replicates/hets_pos20.vcf
new file mode 100755
index 0000000..1ed02ed
--- /dev/null
+++ b/testdata/picard/independent_replicates/hets_pos20.vcf
@@ -0,0 +1,11 @@
+##fileformat=VCFv4.1
+##contig=<ID=chr1,length=501>
+##contig=<ID=chr2,length=101>
+##contig=<ID=chr3,length=101>
+##contig=<ID=chr4,length=101>
+##contig=<ID=chr5,length=101>
+##contig=<ID=chr6,length=101>
+##contig=<ID=chr7,length=404>
+##contig=<ID=chr8,length=202>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1
+chr1 20 . T A 569.98 PASS AC=23;AF=0.767;AN=30; GT:GQ 0/1:90
diff --git a/testdata/picard/independent_replicates/hets_pos21_HOMREF_G.vcf b/testdata/picard/independent_replicates/hets_pos21_HOMREF_G.vcf
new file mode 100755
index 0000000..60d4a29
--- /dev/null
+++ b/testdata/picard/independent_replicates/hets_pos21_HOMREF_G.vcf
@@ -0,0 +1,11 @@
+##fileformat=VCFv4.1
+##contig=<ID=chr1,length=501>
+##contig=<ID=chr2,length=101>
+##contig=<ID=chr3,length=101>
+##contig=<ID=chr4,length=101>
+##contig=<ID=chr5,length=101>
+##contig=<ID=chr6,length=101>
+##contig=<ID=chr7,length=404>
+##contig=<ID=chr8,length=202>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1
+chr1 21 . G A 569.98 PASS AC=23;AF=0.767;AN=30; GT:GQ 0/1:90
diff --git a/testdata/picard/independent_replicates/hets_pos22_IncorrectAlleles.vcf b/testdata/picard/independent_replicates/hets_pos22_IncorrectAlleles.vcf
new file mode 100755
index 0000000..9443f1f
--- /dev/null
+++ b/testdata/picard/independent_replicates/hets_pos22_IncorrectAlleles.vcf
@@ -0,0 +1,12 @@
+##fileformat=VCFv4.1
+##contig=<ID=chr1,length=501>
+##contig=<ID=chr2,length=101>
+##contig=<ID=chr3,length=101>
+##contig=<ID=chr4,length=101>
+##contig=<ID=chr5,length=101>
+##contig=<ID=chr6,length=101>
+##contig=<ID=chr7,length=404>
+##contig=<ID=chr8,length=202>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1
+chr1 22 . G C 569.98 PASS AC=23;AF=0.767;AN=30; GT:GQ 0/1:90
+chr1 312 . G C 569.98 PASS AC=23;AF=0.767;AN=30; GT:GQ 0/1:90
diff --git a/testdata/picard/independent_replicates/multipleContigs.sam b/testdata/picard/independent_replicates/multipleContigs.sam
new file mode 100644
index 0000000..6af865c
--- /dev/null
+++ b/testdata/picard/independent_replicates/multipleContigs.sam
@@ -0,0 +1,42 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:501
+ at SQ SN:chr2 LN:501
+ at SQ SN:chr3 LN:501
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:1AAXX.3 SM:test LB:mylib.yossi PL:ILLUMINA
+ at PG ID:dummy PN:manual VN:0 CL:none
+ at CO 123456789 123456789 123456789 123456789 123456789 1234567890
+1AAXX.3.1 83 chr1 1 255 101M = 302 +201 CAACAGAAGCcGGTATCTGaGtTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.2 1107 chr1 1 255 101M = 302 +201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCtGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.3 1107 chr1 1 255 101M = 302 +201 CAACAGAAGCcGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1 83 chr1 2 255 101M = 303 +201 AACAGAAGCcGGTATCTGaGtTTGTGTTTCtGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1.2 1107 chr1 2 255 101M = 303 +201 AACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1 163 chr1 302 255 101M = 1 -201 NcGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.2 1187 chr1 302 255 101M = 1 -201 NaGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.3 1187 chr1 302 255 101M = 1 -201 NaGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1 163 chr1 303 255 101M = 1 -201 cGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.2.1.2 1187 chr1 303 255 101M = 1 -201 cGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ABXX.3.1 83 chr2 1 255 101M = 302 +201 CAACAGAAGCcGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ABXX.3.1.2 1107 chr2 1 255 101M = 302 +201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ABXX.3.1.3 1107 chr2 1 255 101M = 302 +201 CAACAGAAGCcGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ABXX.2.1 83 chr2 2 255 101M = 303 +201 AACAGAAGCtGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ABXX.2.1.2 1107 chr2 2 255 101M = 303 +201 AACAGAAGCcGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ABXX.3.1 163 chr2 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ABXX.3.1.2 1187 chr2 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ABXX.3.1.3 1187 chr2 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ABXX.2.1 163 chr2 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ABXX.2.1.2 1187 chr2 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ACXX.3.1 83 chr3 1 255 101M = 302 +201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ACXX.3.1.2 1107 chr3 1 255 101M = 302 +201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ACXX.3.1.3 1107 chr3 1 255 101M = 302 +201 CAACAGAAGCCGGNATCTGaGcTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ACXX.2.1 83 chr3 2 255 101M = 303 +201 AACAGAAGCCGGTATCTGaGTTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ACXX.2.1.2 1107 chr3 2 255 101M = 303 +201 AACAGAAGCtGGNATCTGaGcTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ACXX.3.1 163 chr3 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ACXX.3.1.2 1187 chr3 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ACXX.3.1.3 1187 chr3 302 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ACXX.2.1 163 chr3 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1ACXX.2.1.2 1187 chr3 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
diff --git a/testdata/picard/independent_replicates/multipleContigs.vcf b/testdata/picard/independent_replicates/multipleContigs.vcf
new file mode 100755
index 0000000..726747c
--- /dev/null
+++ b/testdata/picard/independent_replicates/multipleContigs.vcf
@@ -0,0 +1,15 @@
+##fileformat=VCFv4.1
+##contig=<ID=chr1,length=501>
+##contig=<ID=chr2,length=101>
+##contig=<ID=chr3,length=101>
+##contig=<ID=chr4,length=101>
+##contig=<ID=chr5,length=101>
+##contig=<ID=chr6,length=101>
+##contig=<ID=chr7,length=404>
+##contig=<ID=chr8,length=202>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1
+chr1 11 . C T 1.0 . AC=1; GT:GQ 0/1:90
+chr1 303 . C A 1.0 . AC=1; GT:GQ 0/1:90
+chr2 11 . C T 1.0 . AC=1; GT:GQ 0/1:90
+chr2 32 . G T 1.0 . AC=1; GT:GQ 0/1:90
+chr3 32 . G T 1.0 . AC=1; GT:GQ 0/1:90
diff --git a/testdata/picard/independent_replicates/twoSamplesHet.vcf b/testdata/picard/independent_replicates/twoSamplesHet.vcf
new file mode 100755
index 0000000..d96af2e
--- /dev/null
+++ b/testdata/picard/independent_replicates/twoSamplesHet.vcf
@@ -0,0 +1,12 @@
+##fileformat=VCFv4.1
+##contig=<ID=chr1,length=501>
+##contig=<ID=chr2,length=101>
+##contig=<ID=chr3,length=101>
+##contig=<ID=chr4,length=101>
+##contig=<ID=chr5,length=101>
+##contig=<ID=chr6,length=101>
+##contig=<ID=chr7,length=404>
+##contig=<ID=chr8,length=202>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT SAMPLE1 SAMPLE2
+chr1 11 . C G,T 569.98 PASS AC=23;AF=0.767;AN=30; GT:GQ 0/2:90 0/0:90
+chr2 11 . C G,T 569.98 PASS AC=23;AF=0.767;AN=30; GT:GQ 2/2:90 0/1:90
\ No newline at end of file
diff --git a/testdata/picard/independent_replicates/twopairs.sam b/testdata/picard/independent_replicates/twopairs.sam
new file mode 100644
index 0000000..7c132e1
--- /dev/null
+++ b/testdata/picard/independent_replicates/twopairs.sam
@@ -0,0 +1,21 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:501
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:1AAXX.3 SM:test LB:mylib.yossi PL:ILLUMINA
+ at PG ID:bwa PN:bwa VN:3 CL:bwa aln
+ at CO manually created file. second pair is a copy of the first pair except that it's marked as a duplicate, and the 10th base of the
+ at CO first read is modified C->t (lower case for easy view). this corresponds to position 11
+ at CO 123456789 123456789 123456789 123456789 123456789 1234567890
+1AAXX.3.1.1 83 chr1 1 255 101M = 302 0201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.4 339 chr1 1 255 101M = 302 0201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.2 1107 chr1 1 255 101M = 302 0201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.3 16 chr1 1 255 101M * 0 0000 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3
+1AAXX.3.1.1 163 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.4 419 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
+1AAXX.3.1.2 1187 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M
diff --git a/testdata/picard/independent_replicates/twopairsWithBadUMIs.sam b/testdata/picard/independent_replicates/twopairsWithBadUMIs.sam
new file mode 100644
index 0000000..8becea2
--- /dev/null
+++ b/testdata/picard/independent_replicates/twopairsWithBadUMIs.sam
@@ -0,0 +1,18 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:501
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:1AAXX.3 SM:test LB:mylib.yossi PL:ILLUMINA
+ at PG ID:bwa PN:bwa VN:3 CL:bwa aln
+ at CO manually created file. second pair is a copy of the first pair except that it's marked as a duplicate, and the 10th base of the
+ at CO first read is modified C->t (lower case for easy view). this corresponds to position 11
+ at CO 123456789 123456789 123456789 123456789 123456789 1234567890
+1AAXX.3.1.1 83 chr1 1 255 101M = 302 0201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:!!!!
+1AAXX.3.1.2 1107 chr1 1 255 101M = 302 0201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:!!!!
+1AAXX.3.1.1 163 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:!!!!
+1AAXX.3.1.2 1187 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:!!!!
\ No newline at end of file
diff --git a/testdata/picard/independent_replicates/twopairsWithUMIs.sam b/testdata/picard/independent_replicates/twopairsWithUMIs.sam
new file mode 100644
index 0000000..5b6e32b
--- /dev/null
+++ b/testdata/picard/independent_replicates/twopairsWithUMIs.sam
@@ -0,0 +1,18 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:501
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:1AAXX.3 SM:test LB:mylib.yossi PL:ILLUMINA
+ at PG ID:bwa PN:bwa VN:3 CL:bwa aln
+ at CO manually created file. second pair is a copy of the first pair except that it's marked as a duplicate, and the 10th base of the
+ at CO first read is modified C->t (lower case for easy view). this corresponds to position 11
+ at CO 123456789 123456789 123456789 123456789 123456789 1234567890
+1AAXX.3.1.1 83 chr1 1 255 101M = 302 0201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:AAAA
+1AAXX.3.1.2 1107 chr1 1 255 101M = 302 0201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:AAAA
+1AAXX.3.1.1 163 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:AAAA
+1AAXX.3.1.2 1187 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:AAAA
\ No newline at end of file
diff --git a/testdata/picard/independent_replicates/twopairsWithUMIsMultipleOrientations.sam b/testdata/picard/independent_replicates/twopairsWithUMIsMultipleOrientations.sam
new file mode 100644
index 0000000..0523919
--- /dev/null
+++ b/testdata/picard/independent_replicates/twopairsWithUMIsMultipleOrientations.sam
@@ -0,0 +1,30 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:501
+ at SQ SN:chr2 LN:501
+ at SQ SN:chr3 LN:501
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:1AAXX.3 SM:test LB:mylib.yossi PL:ILLUMINA
+ at PG ID:bwa PN:bwa VN:3 CL:bwa aln
+ at CO manually created file. second pair is a copy of the first pair except that it's marked as a duplicate, and the 10th base of the
+ at CO first read is modified C->t (lower case for easy view). this corresponds to position 10
+ at CO 123456789 123456789 123456789 123456789 123456789 1234567890
+1AAXX.3.1.1 83 chr1 1 255 101M = 302 0201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:AAAA
+1AAXX.3.1.2 83 chr1 1 255 101M = 302 0201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:AAAA
+1AAXX.3.1.1 163 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:AAAA CO:Z:GETS_SKIPPED_DUE_TO_MONOMORPHIC_SITE
+1AAXX.3.1.2 163 chr1 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:AAAA CO:Z:GETS_SKIPPED_DUE_TO_MONOMORPHIC_SITE
+1AAXX.3.2.1 99 chr2 1 255 101M = 302 0201 CAACAGAAGCCGGTATCTGaGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:AAAA
+1AAXX.3.2.2 163 chr2 1 255 101M = 302 0201 CAACAGAAGCtGGNATCTGaGcTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:AAAA
+1AAXX.3.2.1 147 chr2 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:AAAA
+1AAXX.3.2.2 83 chr2 302 255 101M = 1 -201 NCGCGGCATCcCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:AAAA
+1ACXX.3.3.1 83 chr3 2 255 101M = 303 +201 AACAGAAGCCGGTATCTGaGTTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:AAAA
+1ACXX.3.3.2 83 chr3 2 255 101M = 303 +201 AACAGAAGCtGGNATCTGaGcTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:AAAA
+1ACXX.3.4.1 83 chr3 2 20 101M = 303 +201 AACAGAAGCCGGTATCTGaGTTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:AAAA
+1ACXX.3.4.2 83 chr3 2 20 101M = 303 +201 AACAGAAGCtGGNATCTGaGcTTGTGTTTCgGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAANN AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:AAAA
+1ACXX.3.3.1 163 chr3 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:AAAA
+1ACXX.3.3.2 163 chr3 303 255 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:AAAA
+1ACXX.3.4.1 163 chr3 303 20 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:AAAA QX:Z:AAAA
+1ACXX.3.4.2 163 chr3 303 20 101M = 1 -201 NCGCGGCATCNCGATTTCTTTCCGCAGCTAAtCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA RG:Z:1AAXX.3 MC:Z:101M RX:Z:CCCC QX:Z:AAAA
diff --git a/testdata/picard/sam/CompareSAMs/genomic_sorted_same_position.sam b/testdata/picard/sam/CompareSAMs/genomic_sorted_same_position.sam
new file mode 100755
index 0000000..c403bd7
--- /dev/null
+++ b/testdata/picard/sam/CompareSAMs/genomic_sorted_same_position.sam
@@ -0,0 +1,5 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr20 AS:HG18 LN:62435964
+ at RG ID:A PU:SC_1_10 LB:SC_1 SM:NA12878 PL:ILLUMINA
+H06JUADXX130110:1:1101:10054:44262 163 chr20 343900 0 239M11S = 343900 246 ATGTACAATCGTGCATACATCATGTACATACATCTACTTAAGAAAATAGCTATGTAATATACCATTACTCAACTAGATTATAATTTTTTCTCCATTTCTTTATTGTAATTTATCATTTTCTACTTTTTTGTTTTCTCATTTTTATTGCATAATATTTAATTATGCAAAAAATACATTAAATACATTGAAAATATATAGTGTAGCTATAAGAATAAAGAACGATGGTAAATCAAATGCTATTATCCAGCAC =>=?AACA?A;ABABAAABAABABABABBBABBABBAB at BACBAAABBCBBBBCBCABBBBBACB@CBCBCBBBCCCCACCCAB at BBBDEEEDGFDFEFABEEBFDEFFEFEEDGFFFFEFFDFBFBBAFDADADFDGFFEFAEEAFEFBD@CDEAEDACCDDDDD [...]
+H06JUADXX130110:1:1101:10054:44262 83 chr20 343900 0 4S246M = 343900 -246 ATTTAGGTACAATCGTGCATACATCATGTACATACATCTACTTAAGAAAATAGCTATGTAATATACCATTTCTCAACTAGATTATAATTTTTTCTCCATTTCTTTATTGTAATTTATCATTTTCTACTTTTTTGTTTTCTCATTTTTATTGCATAATATTTAATTATGCAAAAAATACATTAAATACATTGAAAATATATAGTGTAGCTATAAGAATAAAGAACGATGGTAAAACAAATGCTAATACCCA #########################################CDCEACAC?9/1492</2+B;+><?>:9;+;=8CDABC??@D?DBB==D at AC8?DEDDDEEDDDEDFDEEEDDEEEFDDDDEEECEDDDDDFDDDDEEEFEDDDDEECFEEEEEEEFEEECEEEEE [...]
diff --git a/testdata/picard/sam/CompareSAMs/unmapped_second.sam b/testdata/picard/sam/CompareSAMs/unmapped_second.sam
index 72e4da4..cbc2e35 100755
--- a/testdata/picard/sam/CompareSAMs/unmapped_second.sam
+++ b/testdata/picard/sam/CompareSAMs/unmapped_second.sam
@@ -3,4 +3,4 @@
@RG ID:L1 PU:SC_1_10 LB:SC_1 SM:NA12891 PL:ILLUMINA
@RG ID:L2 PU:SC_2_12 LB:SC_2 SM:NA12891 PL:ILLUMINA
read_28833_29006_6945 99 chr20 28833 20 10M1D25M = 28993 195 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< MF:i:130 Nm:i:1 H0:i:0 H1:i:0 RG:Z:L1
-read_28701_28881_323b 1101 chr20 28834 0 * * 0 0 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 Nm:i:0 H0:i:1 H1:i:0 RG:Z:L2
+read_28701_28881_323b 1165 chr20 28834 0 * * 0 0 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 Nm:i:0 H0:i:1 H1:i:0 RG:Z:L2
diff --git a/testdata/picard/sam/EstimateLibraryComplexity/dupes_with_sos.sam b/testdata/picard/sam/EstimateLibraryComplexity/dupes_with_sos.sam
new file mode 100644
index 0000000..dc4d2d4
--- /dev/null
+++ b/testdata/picard/sam/EstimateLibraryComplexity/dupes_with_sos.sam
@@ -0,0 +1,20 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:101
+ at SQ SN:chr2 LN:101
+ at SQ SN:chr3 LN:101
+ at SQ SN:chr4 LN:101
+ at SQ SN:chr5 LN:101
+ at SQ SN:chr6 LN:101
+ at SQ SN:chr7 LN:404
+ at SQ SN:chr8 LN:202
+ at RG ID:1AAXX.1 SM:Hi,Mom! LB:mylib PL:ILLUMINA
+ at PG ID:MarkDuplicates PN:MarkDuplicates VN:1 CL:MarkDuplicates merge1.sam PP:bwa
+ at PG ID:bwa PN:bwa VN:1 CL:bwa aln
+C4A4WACXX140821:8:1112:2344:1984 83 chr7 1 255 101M = 302 201 CAACAGAAGCAGGAATCTGTGTTTGTGTTTCGGATTTCCTGCTGAAAAGATTATCGAATCAAAAAAAAATCCCGATTTCATTCCGCAGCTAACCTCCCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
+C4A4WACXX140821:8:1112:2344:1985 83 chr7 1 255 101M = 302 201 CAACAGAAGCAGGAATCTGTGTTTGTGTTTCGGAAAAAAAGCTGAAAAGATTATCGAATCAAAAAAAAATCCCGATTTCATTCCGCAGCTAACCTCCCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
+C4A4WACXX140821:8:1112:2344:1985 339 chr7 1 255 101M = 302 201 CAACAGAAGCAGGAATCTGTGTTTGTGTTTCGGAAAAAAAGCTGAAAAGATTATCGAATCAAAAAAAAATCCCGATTTCATTCCGCAGCTAACCTCCCAAA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
+C4A4WACXX140821:8:1112:2344:1984 163 chr7 302 255 101M = 1 -201 ACGCGGCATCACGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCAAGAGCATACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
+C4A4WACXX140821:8:1112:2344:1985 163 chr7 302 255 101M = 1 -201 ACGCGGCATCACGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCAAGAGCATACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
+C4A4WACXX140821:8:1112:2344:1985 419 chr7 302 255 101M = 1 -201 ACGCGGCATCACGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCAAGAGCATACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
+C4A4WACXX140821:8:1112:2344:1985 2131 chr7 302 255 101M = 1 -201 ACGCGGCATCACGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCAAGAGCATACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
+C4A4WACXX140821:8:1112:2344:1985 2211 chr7 302 255 101M = 1 -201 ACGCGGCATCACGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCAAGAGCATACA IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII RG:Z:1AAXX.1 PG:Z:MarkDuplicates
diff --git a/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv1.sam b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv1.sam
new file mode 100644
index 0000000..e9d3d97
--- /dev/null
+++ b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv1.sam
@@ -0,0 +1,28 @@
+ at HD VN:1.5 SO:coordinate
+ at SQ SN:1 LN:197195432 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f05d753079c455c0e57af88eeda24493 SP:Mus musculus
+ at SQ SN:2 LN:181748087 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9b9d64dc89ecc73d3288eb38af3f94bd SP:Mus musculus
+ at SQ SN:3 LN:159599783 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:0a692666a1b8526e1d1e799beb71b6d0 SP:Mus musculus
+ at SQ SN:4 LN:155630120 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f5993a04396a06ed6b28fa42b2429be0 SP:Mus musculus
+ at SQ SN:5 LN:152537259 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f90804fb8fe9cb06076d51a710fb4563 SP:Mus musculus
+ at SQ SN:6 LN:149517037 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:258a37e20815bb7e3f2e974b9d4dd295 SP:Mus musculus
+ at SQ SN:7 LN:152524553 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0d6cea6f72cb4d9f8d0efc1d29dd180 SP:Mus musculus
+ at SQ SN:8 LN:131738871 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5f217cb8a9685b9879add3ae110cabd7 SP:Mus musculus
+ at SQ SN:9 LN:124076172 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:dde08574942fc18050195618cc3f35af SP:Mus musculus
+ at SQ SN:10 LN:129993255 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:be7e6a13cc6b9da7c1da7b7fc32c5506 SP:Mus musculus
+ at SQ SN:11 LN:121843856 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0099550b3d3943fb9bb7af6fa6952c1 SP:Mus musculus
+ at SQ SN:12 LN:121257530 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:1f9c11dc6f288f93e9fab56772a36e85 SP:Mus musculus
+ at SQ SN:13 LN:120284312 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:a7b4bb418aa21e0ec59d9e2a1fe1810b SP:Mus musculus
+ at SQ SN:14 LN:125194864 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:09d1c8449706a17d40934302a0a3b671 SP:Mus musculus
+ at SQ SN:15 LN:103494974 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e41c8b42b0921378b1fdd5172f6be067 SP:Mus musculus
+ at SQ SN:16 LN:98319150 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e051b3930c2557ade21d67db41f3a518 SP:Mus musculus
+ at SQ SN:17 LN:95272651 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:47eede15e5761fb9c2267627f18211e7 SP:Mus musculus
+ at SQ SN:18 LN:90772031 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9f9d41cfdb9d91b62b928a3eb4eb6928 SP:Mus musculus
+ at SQ SN:19 LN:61342430 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:591f8486f82c22442bb8463595a18e0a SP:Mus musculus
+ at SQ SN:X LN:166650296 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:3d0d9df898d2c830b858f91255d8a1eb SP:Mus musculus
+ at SQ SN:Y LN:15902555 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5ff564f9fbc8cb87bcad6cfa6874902b SP:Mus musculus
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 145 8 43092875 0 150S1M = 43092875 49 AAAAAAAAAA BBBBBBBBBB MC:Z:151M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 97 8 43092875 0 151M = 43092875 -49 AAAAAAAAAA BBBBBBBBBB MC:Z:150S1M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1169 8 43092875 0 150S1M = 43092875 44 AAAAAAAAAA AAAAAAAAAA MC:Z:151M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1121 8 43092875 0 151M = 43092875 -44 AAAAAAAAAA AAAAAAAAAA MC:Z:150S1M RG:Z:00001.3
diff --git a/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv2.sam b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv2.sam
new file mode 100644
index 0000000..1d6cc7e
--- /dev/null
+++ b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv2.sam
@@ -0,0 +1,28 @@
+ at HD VN:1.5 SO:coordinate
+ at SQ SN:1 LN:197195432 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f05d753079c455c0e57af88eeda24493 SP:Mus musculus
+ at SQ SN:2 LN:181748087 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9b9d64dc89ecc73d3288eb38af3f94bd SP:Mus musculus
+ at SQ SN:3 LN:159599783 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:0a692666a1b8526e1d1e799beb71b6d0 SP:Mus musculus
+ at SQ SN:4 LN:155630120 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f5993a04396a06ed6b28fa42b2429be0 SP:Mus musculus
+ at SQ SN:5 LN:152537259 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f90804fb8fe9cb06076d51a710fb4563 SP:Mus musculus
+ at SQ SN:6 LN:149517037 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:258a37e20815bb7e3f2e974b9d4dd295 SP:Mus musculus
+ at SQ SN:7 LN:152524553 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0d6cea6f72cb4d9f8d0efc1d29dd180 SP:Mus musculus
+ at SQ SN:8 LN:131738871 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5f217cb8a9685b9879add3ae110cabd7 SP:Mus musculus
+ at SQ SN:9 LN:124076172 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:dde08574942fc18050195618cc3f35af SP:Mus musculus
+ at SQ SN:10 LN:129993255 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:be7e6a13cc6b9da7c1da7b7fc32c5506 SP:Mus musculus
+ at SQ SN:11 LN:121843856 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0099550b3d3943fb9bb7af6fa6952c1 SP:Mus musculus
+ at SQ SN:12 LN:121257530 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:1f9c11dc6f288f93e9fab56772a36e85 SP:Mus musculus
+ at SQ SN:13 LN:120284312 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:a7b4bb418aa21e0ec59d9e2a1fe1810b SP:Mus musculus
+ at SQ SN:14 LN:125194864 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:09d1c8449706a17d40934302a0a3b671 SP:Mus musculus
+ at SQ SN:15 LN:103494974 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e41c8b42b0921378b1fdd5172f6be067 SP:Mus musculus
+ at SQ SN:16 LN:98319150 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e051b3930c2557ade21d67db41f3a518 SP:Mus musculus
+ at SQ SN:17 LN:95272651 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:47eede15e5761fb9c2267627f18211e7 SP:Mus musculus
+ at SQ SN:18 LN:90772031 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9f9d41cfdb9d91b62b928a3eb4eb6928 SP:Mus musculus
+ at SQ SN:19 LN:61342430 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:591f8486f82c22442bb8463595a18e0a SP:Mus musculus
+ at SQ SN:X LN:166650296 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:3d0d9df898d2c830b858f91255d8a1eb SP:Mus musculus
+ at SQ SN:Y LN:15902555 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5ff564f9fbc8cb87bcad6cfa6874902b SP:Mus musculus
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 97 8 43092875 0 151M = 43092875 -49 AAAAAAAAAA BBBBBBBBBB MC:Z:150S1M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 145 8 43092875 0 150S1M = 43092875 49 AAAAAAAAAA BBBBBBBBBB MC:Z:151M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1169 8 43092875 0 150S1M = 43092875 44 AAAAAAAAAA AAAAAAAAAA MC:Z:151M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1121 8 43092875 0 151M = 43092875 -44 AAAAAAAAAA AAAAAAAAAA MC:Z:150S1M RG:Z:00001.3
diff --git a/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv3.sam b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv3.sam
new file mode 100644
index 0000000..22991c5
--- /dev/null
+++ b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv3.sam
@@ -0,0 +1,28 @@
+ at HD VN:1.5 SO:coordinate
+ at SQ SN:1 LN:197195432 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f05d753079c455c0e57af88eeda24493 SP:Mus musculus
+ at SQ SN:2 LN:181748087 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9b9d64dc89ecc73d3288eb38af3f94bd SP:Mus musculus
+ at SQ SN:3 LN:159599783 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:0a692666a1b8526e1d1e799beb71b6d0 SP:Mus musculus
+ at SQ SN:4 LN:155630120 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f5993a04396a06ed6b28fa42b2429be0 SP:Mus musculus
+ at SQ SN:5 LN:152537259 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f90804fb8fe9cb06076d51a710fb4563 SP:Mus musculus
+ at SQ SN:6 LN:149517037 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:258a37e20815bb7e3f2e974b9d4dd295 SP:Mus musculus
+ at SQ SN:7 LN:152524553 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0d6cea6f72cb4d9f8d0efc1d29dd180 SP:Mus musculus
+ at SQ SN:8 LN:131738871 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5f217cb8a9685b9879add3ae110cabd7 SP:Mus musculus
+ at SQ SN:9 LN:124076172 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:dde08574942fc18050195618cc3f35af SP:Mus musculus
+ at SQ SN:10 LN:129993255 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:be7e6a13cc6b9da7c1da7b7fc32c5506 SP:Mus musculus
+ at SQ SN:11 LN:121843856 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0099550b3d3943fb9bb7af6fa6952c1 SP:Mus musculus
+ at SQ SN:12 LN:121257530 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:1f9c11dc6f288f93e9fab56772a36e85 SP:Mus musculus
+ at SQ SN:13 LN:120284312 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:a7b4bb418aa21e0ec59d9e2a1fe1810b SP:Mus musculus
+ at SQ SN:14 LN:125194864 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:09d1c8449706a17d40934302a0a3b671 SP:Mus musculus
+ at SQ SN:15 LN:103494974 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e41c8b42b0921378b1fdd5172f6be067 SP:Mus musculus
+ at SQ SN:16 LN:98319150 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e051b3930c2557ade21d67db41f3a518 SP:Mus musculus
+ at SQ SN:17 LN:95272651 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:47eede15e5761fb9c2267627f18211e7 SP:Mus musculus
+ at SQ SN:18 LN:90772031 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9f9d41cfdb9d91b62b928a3eb4eb6928 SP:Mus musculus
+ at SQ SN:19 LN:61342430 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:591f8486f82c22442bb8463595a18e0a SP:Mus musculus
+ at SQ SN:X LN:166650296 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:3d0d9df898d2c830b858f91255d8a1eb SP:Mus musculus
+ at SQ SN:Y LN:15902555 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5ff564f9fbc8cb87bcad6cfa6874902b SP:Mus musculus
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 145 8 43092875 0 150S1M = 43092875 49 AAAAAAAAAA BBBBBBBBBB MC:Z:151M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 97 8 43092875 0 151M = 43092875 -49 AAAAAAAAAA BBBBBBBBBB MC:Z:150S1M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1121 8 43092875 0 151M = 43092875 -44 AAAAAAAAAA AAAAAAAAAA MC:Z:150S1M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1169 8 43092875 0 150S1M = 43092875 44 AAAAAAAAAA AAAAAAAAAA MC:Z:151M RG:Z:00001.3
diff --git a/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv4.sam b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv4.sam
new file mode 100644
index 0000000..34ff0f3
--- /dev/null
+++ b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndCoordinateSortedv4.sam
@@ -0,0 +1,28 @@
+ at HD VN:1.5 SO:coordinate
+ at SQ SN:1 LN:197195432 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f05d753079c455c0e57af88eeda24493 SP:Mus musculus
+ at SQ SN:2 LN:181748087 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9b9d64dc89ecc73d3288eb38af3f94bd SP:Mus musculus
+ at SQ SN:3 LN:159599783 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:0a692666a1b8526e1d1e799beb71b6d0 SP:Mus musculus
+ at SQ SN:4 LN:155630120 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f5993a04396a06ed6b28fa42b2429be0 SP:Mus musculus
+ at SQ SN:5 LN:152537259 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f90804fb8fe9cb06076d51a710fb4563 SP:Mus musculus
+ at SQ SN:6 LN:149517037 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:258a37e20815bb7e3f2e974b9d4dd295 SP:Mus musculus
+ at SQ SN:7 LN:152524553 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0d6cea6f72cb4d9f8d0efc1d29dd180 SP:Mus musculus
+ at SQ SN:8 LN:131738871 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5f217cb8a9685b9879add3ae110cabd7 SP:Mus musculus
+ at SQ SN:9 LN:124076172 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:dde08574942fc18050195618cc3f35af SP:Mus musculus
+ at SQ SN:10 LN:129993255 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:be7e6a13cc6b9da7c1da7b7fc32c5506 SP:Mus musculus
+ at SQ SN:11 LN:121843856 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0099550b3d3943fb9bb7af6fa6952c1 SP:Mus musculus
+ at SQ SN:12 LN:121257530 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:1f9c11dc6f288f93e9fab56772a36e85 SP:Mus musculus
+ at SQ SN:13 LN:120284312 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:a7b4bb418aa21e0ec59d9e2a1fe1810b SP:Mus musculus
+ at SQ SN:14 LN:125194864 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:09d1c8449706a17d40934302a0a3b671 SP:Mus musculus
+ at SQ SN:15 LN:103494974 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e41c8b42b0921378b1fdd5172f6be067 SP:Mus musculus
+ at SQ SN:16 LN:98319150 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e051b3930c2557ade21d67db41f3a518 SP:Mus musculus
+ at SQ SN:17 LN:95272651 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:47eede15e5761fb9c2267627f18211e7 SP:Mus musculus
+ at SQ SN:18 LN:90772031 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9f9d41cfdb9d91b62b928a3eb4eb6928 SP:Mus musculus
+ at SQ SN:19 LN:61342430 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:591f8486f82c22442bb8463595a18e0a SP:Mus musculus
+ at SQ SN:X LN:166650296 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:3d0d9df898d2c830b858f91255d8a1eb SP:Mus musculus
+ at SQ SN:Y LN:15902555 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5ff564f9fbc8cb87bcad6cfa6874902b SP:Mus musculus
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 97 8 43092875 0 151M = 43092875 -49 AAAAAAAAAA BBBBBBBBBB MC:Z:150S1M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 145 8 43092875 0 150S1M = 43092875 49 AAAAAAAAAA BBBBBBBBBB MC:Z:151M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1121 8 43092875 0 151M = 43092875 -44 AAAAAAAAAA AAAAAAAAAA MC:Z:150S1M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1169 8 43092875 0 150S1M = 43092875 44 AAAAAAAAAA AAAAAAAAAA MC:Z:151M RG:Z:00001.3
diff --git a/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndv1.sam b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndv1.sam
new file mode 100644
index 0000000..6722603
--- /dev/null
+++ b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndv1.sam
@@ -0,0 +1,28 @@
+ at HD VN:1.5 SO:queryname
+ at SQ SN:1 LN:197195432 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f05d753079c455c0e57af88eeda24493 SP:Mus musculus
+ at SQ SN:2 LN:181748087 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9b9d64dc89ecc73d3288eb38af3f94bd SP:Mus musculus
+ at SQ SN:3 LN:159599783 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:0a692666a1b8526e1d1e799beb71b6d0 SP:Mus musculus
+ at SQ SN:4 LN:155630120 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f5993a04396a06ed6b28fa42b2429be0 SP:Mus musculus
+ at SQ SN:5 LN:152537259 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f90804fb8fe9cb06076d51a710fb4563 SP:Mus musculus
+ at SQ SN:6 LN:149517037 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:258a37e20815bb7e3f2e974b9d4dd295 SP:Mus musculus
+ at SQ SN:7 LN:152524553 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0d6cea6f72cb4d9f8d0efc1d29dd180 SP:Mus musculus
+ at SQ SN:8 LN:131738871 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5f217cb8a9685b9879add3ae110cabd7 SP:Mus musculus
+ at SQ SN:9 LN:124076172 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:dde08574942fc18050195618cc3f35af SP:Mus musculus
+ at SQ SN:10 LN:129993255 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:be7e6a13cc6b9da7c1da7b7fc32c5506 SP:Mus musculus
+ at SQ SN:11 LN:121843856 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0099550b3d3943fb9bb7af6fa6952c1 SP:Mus musculus
+ at SQ SN:12 LN:121257530 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:1f9c11dc6f288f93e9fab56772a36e85 SP:Mus musculus
+ at SQ SN:13 LN:120284312 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:a7b4bb418aa21e0ec59d9e2a1fe1810b SP:Mus musculus
+ at SQ SN:14 LN:125194864 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:09d1c8449706a17d40934302a0a3b671 SP:Mus musculus
+ at SQ SN:15 LN:103494974 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e41c8b42b0921378b1fdd5172f6be067 SP:Mus musculus
+ at SQ SN:16 LN:98319150 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e051b3930c2557ade21d67db41f3a518 SP:Mus musculus
+ at SQ SN:17 LN:95272651 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:47eede15e5761fb9c2267627f18211e7 SP:Mus musculus
+ at SQ SN:18 LN:90772031 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9f9d41cfdb9d91b62b928a3eb4eb6928 SP:Mus musculus
+ at SQ SN:19 LN:61342430 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:591f8486f82c22442bb8463595a18e0a SP:Mus musculus
+ at SQ SN:X LN:166650296 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:3d0d9df898d2c830b858f91255d8a1eb SP:Mus musculus
+ at SQ SN:Y LN:15902555 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5ff564f9fbc8cb87bcad6cfa6874902b SP:Mus musculus
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 97 8 43092987 0 151M = 43092874 -49 AAAAAAAAAA BBBBBBBBBB MC:Z:37S66M48S RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 145 8 43092874 0 37S66M48S = 43092987 49 AAAAAAAAAA BBBBBBBBBB MC:Z:151M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1121 8 43092987 0 66M4I6M4D75M = 43092875 -44 AAAAAAAAAA AAAAAAAAAA MC:Z:38S70M43S RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1169 8 43092875 0 38S70M43S = 43092987 44 AAAAAAAAAA AAAAAAAAAA MC:Z:66M4I6M4D75M RG:Z:00001.3
diff --git a/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndv2.sam b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndv2.sam
new file mode 100644
index 0000000..dce604c
--- /dev/null
+++ b/testdata/picard/sam/MarkDuplicates/sameUnclipped5primeEndv2.sam
@@ -0,0 +1,28 @@
+ at HD VN:1.5 SO:queryname
+ at SQ SN:1 LN:197195432 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f05d753079c455c0e57af88eeda24493 SP:Mus musculus
+ at SQ SN:2 LN:181748087 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9b9d64dc89ecc73d3288eb38af3f94bd SP:Mus musculus
+ at SQ SN:3 LN:159599783 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:0a692666a1b8526e1d1e799beb71b6d0 SP:Mus musculus
+ at SQ SN:4 LN:155630120 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f5993a04396a06ed6b28fa42b2429be0 SP:Mus musculus
+ at SQ SN:5 LN:152537259 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:f90804fb8fe9cb06076d51a710fb4563 SP:Mus musculus
+ at SQ SN:6 LN:149517037 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:258a37e20815bb7e3f2e974b9d4dd295 SP:Mus musculus
+ at SQ SN:7 LN:152524553 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0d6cea6f72cb4d9f8d0efc1d29dd180 SP:Mus musculus
+ at SQ SN:8 LN:131738871 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5f217cb8a9685b9879add3ae110cabd7 SP:Mus musculus
+ at SQ SN:9 LN:124076172 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:dde08574942fc18050195618cc3f35af SP:Mus musculus
+ at SQ SN:10 LN:129993255 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:be7e6a13cc6b9da7c1da7b7fc32c5506 SP:Mus musculus
+ at SQ SN:11 LN:121843856 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e0099550b3d3943fb9bb7af6fa6952c1 SP:Mus musculus
+ at SQ SN:12 LN:121257530 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:1f9c11dc6f288f93e9fab56772a36e85 SP:Mus musculus
+ at SQ SN:13 LN:120284312 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:a7b4bb418aa21e0ec59d9e2a1fe1810b SP:Mus musculus
+ at SQ SN:14 LN:125194864 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:09d1c8449706a17d40934302a0a3b671 SP:Mus musculus
+ at SQ SN:15 LN:103494974 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e41c8b42b0921378b1fdd5172f6be067 SP:Mus musculus
+ at SQ SN:16 LN:98319150 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:e051b3930c2557ade21d67db41f3a518 SP:Mus musculus
+ at SQ SN:17 LN:95272651 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:47eede15e5761fb9c2267627f18211e7 SP:Mus musculus
+ at SQ SN:18 LN:90772031 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:9f9d41cfdb9d91b62b928a3eb4eb6928 SP:Mus musculus
+ at SQ SN:19 LN:61342430 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:591f8486f82c22442bb8463595a18e0a SP:Mus musculus
+ at SQ SN:X LN:166650296 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:3d0d9df898d2c830b858f91255d8a1eb SP:Mus musculus
+ at SQ SN:Y LN:15902555 UR:ftp://hgdownload.cse.ucsc.edu/goldenPath/Mus_musculus_assembly9/bigZips/chromFa.tar.gz AS:mm9 M5:5ff564f9fbc8cb87bcad6cfa6874902b SP:Mus musculus
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ at RG ID:00001.3 PL:illumina PU:00001ABXX101026.3.TTGAGCCT LB:Solexa-45924 DT:2010-10-26T00:00:00-0400 SM:stat2_120 CN:BI
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 97 8 43092987 0 151M = 43092874 -49 AAAAAAAAAA BBBBBBBBBB MC:Z:37S66M48S RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:3:1218:20812:27591 145 8 43092874 0 37S66M48S = 43092987 49 AAAAAAAAAA BBBBBBBBBB MC:Z:151M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1169 8 43092875 0 38S70M43S = 43092987 44 AAAAAAAAAA AAAAAAAAAA MC:Z:66M4I6M4D75M RG:Z:00001.3
+ST-E00297:149016593:H3GVWCCXX:5:2214:10145:57038 1121 8 43092987 0 66M4I6M4D75M = 43092875 -44 AAAAAAAAAA AAAAAAAAAA MC:Z:38S70M43S RG:Z:00001.3
diff --git a/testdata/picard/sam/largeIntervals.interval_list b/testdata/picard/sam/largeIntervals.interval_list
new file mode 100644
index 0000000..7a59765
--- /dev/null
+++ b/testdata/picard/sam/largeIntervals.interval_list
@@ -0,0 +1,10 @@
+ at HD VN:1.0
+ at SQ SN:chr1 LN:101 UR:merger.fasta M5:bd01f7e11515bb6beda8f7257902aa67
+ at SQ SN:chr2 LN:101 UR:merger.fasta M5:31c33e2155b3de5e2554b693c475b310
+ at SQ SN:chr3 LN:101 UR:merger.fasta M5:631593c6dd2048ae88dcce2bd505d295
+ at SQ SN:chr4 LN:101 UR:merger.fasta M5:c60cb92f1ee5b78053c92bdbfa19abf1
+ at SQ SN:chr5 LN:101 UR:merger.fasta M5:07ebc213c7611db0eacbb1590c3e9bda
+ at SQ SN:chr6 LN:101 UR:merger.fasta M5:7be2f5e7ee39e60a6c3b5b6a41178c6d
+ at SQ SN:chr7 LN:404 UR:merger.fasta M5:da488fc432cdaf2c20c96da473a7b630
+ at SQ SN:chr8 LN:202 UR:merger.fasta M5:d339678efce576d5546e88b49a487b63
+chr7 1 404 + .
diff --git a/testdata/picard/sam/revert_sam_bad_header_output_map.txt b/testdata/picard/sam/revert_sam_bad_header_output_map.txt
new file mode 100644
index 0000000..cb81728
--- /dev/null
+++ b/testdata/picard/sam/revert_sam_bad_header_output_map.txt
@@ -0,0 +1,3 @@
+FOO OUTPUT
+rg1 my_rg_1.ubam
+rg2 my_rg_2.ubam
diff --git a/testdata/picard/sam/revert_sam_positive_test_output_map.txt b/testdata/picard/sam/revert_sam_positive_test_output_map.txt
new file mode 100644
index 0000000..584df1d
--- /dev/null
+++ b/testdata/picard/sam/revert_sam_positive_test_output_map.txt
@@ -0,0 +1,4 @@
+READ_GROUP_ID OUTPUT
+0 my_rg_1.sam
+1 my_rg_2.sam
+2 my_rg_3.bam
diff --git a/testdata/picard/sam/revert_sam_negative.sam b/testdata/picard/sam/revert_sam_sample_library_override.sam
similarity index 100%
rename from testdata/picard/sam/revert_sam_negative.sam
rename to testdata/picard/sam/revert_sam_sample_library_override.sam
diff --git a/testdata/picard/sam/revert_sam_single_end.sam b/testdata/picard/sam/revert_sam_single_end.sam
new file mode 100755
index 0000000..9fc8848
--- /dev/null
+++ b/testdata/picard/sam/revert_sam_single_end.sam
@@ -0,0 +1,5 @@
+ at HD VN:1.5 SO:queryname
+ at RG ID:A SM:foo LB:foo PL:illumina PU:Illumina CN:Broad
+read1 4 * 0 0 * * 0 0 AAACCTACTTTGTTGAGAGTTTTTATTAACAGATGTTCAATTATATAAAAAAACTGAGTATTAATATATCCTCGTTCTTACTTTTATAGGACATTAATATGCAGCGGGGGGGGGGACTGCTCAAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGF RG:Z:A
+read2 4 * 0 0 * * 0 0 AGCGGGGGCTGGCCCAGGCCCTATGGGCACTCACACCCCCTTCCTACCACGCCCCGGGCGCCCTCAGCCCCTGCCGTGGCCGTGGGGGGGGGGGAAGGCTCCAGGGTTCTGCAGACTTGGAACTT <<=<</EE<EBBB>BFGBGGDBE>@>D0</<11BF1/ECD/DEG>0F at 0FB<F/E<<GG/:E///C//@GGEGGG6EC.89C>.6 at D@=/.. at 8@@.8<EB.D/.C<@66//@//6:/CB/.69B RG:Z:A
+read3 4 * 0 0 * * 0 0 ATGCAGCTAGTTGACAGCTGGCTTCAAGACATGCTTCCCTGTGAGGGGGGGGGGCAGGGCAGAAACCTCCCGCATCCTGCCTGACTGTAGGAGTAGCTGCTAACATCCTAATGTTAGTGGCTGGG ?BBBBGEGGGGGGGGGGGGEBFGGGGGGCGGGGEG>BFFDCGEGDGGGGEG0FGGGGBGGGGGGGGEBGGGGGGGGGGGGGGGGGGGGGEGGGGGGEGEGGGE at DGCGFGGGGGEGCF=@FGGGG RG:Z:A
diff --git a/testdata/picard/sam/revert_sam_valid_output_map.txt b/testdata/picard/sam/revert_sam_valid_output_map.txt
new file mode 100644
index 0000000..ed5a13a
--- /dev/null
+++ b/testdata/picard/sam/revert_sam_valid_output_map.txt
@@ -0,0 +1,3 @@
+READ_GROUP_ID OUTPUT
+rg1 /path/to/my_rg_1.ubam
+rg2 /path/to/my_rg_2.ubam
diff --git a/testdata/picard/sam/summary_alignment_stats_test_chimeras.sam b/testdata/picard/sam/summary_alignment_stats_test_chimeras.sam
index 583072e..ee31b6c 100644
--- a/testdata/picard/sam/summary_alignment_stats_test_chimeras.sam
+++ b/testdata/picard/sam/summary_alignment_stats_test_chimeras.sam
@@ -8,15 +8,15 @@
@SQ SN:chr7 LN:202
@SQ SN:chr8 LN:202
@RG ID:0 SM:Hi,Momma! LB:whatever PU:me PL:ILLUMINA
-SL-XAV:1:1:0:764#0/1 0113 chr1 1 255 6M chr1 10 10 TTCATG &/,&-. OT:Z:left pointing
-SL-XAV:1:1:0:764#0/1 0177 chr1 10 255 6M chr1 1 10 TTCATG &/,&-. OT:Z:normal
-SL-XAV:1:1:0:800#0/1 0081 chr2 1 255 6M chr2 10 10 TTCATG &/,&-. OT:Z:outtie
-SL-XAV:1:1:0:800#0/1 00161 chr2 10 255 6M chr2 1 10 TTCATG &/,&-. OT:Z:outtie
-SL-XAV:1:1:0:877#0/1 0097 chr3 1 255 6M chr4 10 10 TTCATG &/,&-. OT:Z:chimeric
-SL-XAV:1:1:0:877#0/1 00145 chr4 10 255 6M chr3 1 10 TTCATG &/,&-. OT:Z:chimeric
-SL-XAV:1:1:0:940#0/1 0097 chr5 1 255 6M chr5 10 10 TTCATG &/,&-. OT:Z:normal
-SL-XAV:1:1:0:940#0/1 00145 chr5 10 255 6M chr5 1 10 TTCATG &/,&-. OT:Z:normal
-SL-XAV:1:1:0:999#0/1 0097 chr6 1 255 6M chr5 10 60 TTCATG &/,&-. OT:Z:chimeric
-SL-XAV:1:1:0:999#0/1 00145 chr6 60 255 6M chr5 1 60 TTCATG &/,&-. OT:Z:chimeric
-SL-XAV:1:1:0:430#0/1 0097 chr7 1 255 6M chr5 10 10 TTCATG &/,&-. OT:Z:hasSa SA:Z:gotSA
-SL-XAV:1:1:0:430#0/1 00145 chr7 10 255 6M chr5 1 10 TTCATG &/,&-. OT:Z:normal
+SL-XAV:1:1:0:764#0/1 113 chr1 1 255 6M chr1 10 10 TTCATG &/,&-. OT:Z:left pointing
+SL-XAV:1:1:0:764#0/1 177 chr1 10 255 6M chr1 1 10 TTCATG &/,&-. OT:Z:normal
+SL-XAV:1:1:0:800#0/1 81 chr2 1 255 6M chr2 10 10 TTCATG &/,&-. OT:Z:outtie
+SL-XAV:1:1:0:800#0/1 161 chr2 10 255 6M chr2 1 10 TTCATG &/,&-. OT:Z:outtie
+SL-XAV:1:1:0:877#0/1 97 chr3 1 255 6M chr4 10 10 TTCATG &/,&-. OT:Z:chimeric
+SL-XAV:1:1:0:877#0/1 145 chr4 10 255 6M chr3 1 10 TTCATG &/,&-. OT:Z:chimeric
+SL-XAV:1:1:0:940#0/1 97 chr5 1 255 6M chr5 10 10 TTCATG &/,&-. OT:Z:normal
+SL-XAV:1:1:0:940#0/1 145 chr5 10 255 6M chr5 1 10 TTCATG &/,&-. OT:Z:normal
+SL-XAV:1:1:0:999#0/1 97 chr6 1 255 6M chr5 10 60 TTCATG &/,&-. OT:Z:chimeric
+SL-XAV:1:1:0:999#0/1 145 chr6 60 255 6M chr5 1 60 TTCATG &/,&-. OT:Z:chimeric
+SL-XAV:1:1:0:430#0/1 97 chr7 1 255 6M chr5 10 10 TTCATG &/,&-. OT:Z:hasSa SA:Z:gotSA
+SL-XAV:1:1:0:430#0/1 145 chr7 10 255 6M chr5 1 10 TTCATG &/,&-. OT:Z:normal
diff --git a/testdata/picard/vcf/vcfFormatTest.bad_dict.vcf b/testdata/picard/vcf/vcfFormatTest.bad_dict.vcf
index bd1e303..8a62ad5 100644
--- a/testdata/picard/vcf/vcfFormatTest.bad_dict.vcf
+++ b/testdata/picard/vcf/vcfFormatTest.bad_dict.vcf
@@ -8,7 +8,7 @@
##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -25.4561 <= x < 3.2489">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -38672.7015">
##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -38672.7015 <= x < -25.4561">
-##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=AD,Number=R,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/picard-tools.git
More information about the debian-med-commit
mailing list