[med-svn] [picard-tools] 07/09: Imported Upstream version 1.109
Charles Plessy
plessy at moszumanska.debian.org
Sat Mar 22 01:56:48 UTC 2014
This is an automated email from the git hooks/post-receive script.
plessy pushed a commit to branch master
in repository picard-tools.
commit a6e579a832e38698f5f9bc3d737eeaaf9d7adfb7
Author: Charles Plessy <plessy at debian.org>
Date: Sat Mar 22 10:48:38 2014 +0900
Imported Upstream version 1.109
---
build.xml | 3 +-
.../net/sf/picard/sam/AbstractAlignmentMerger.java | 59 ++++-
src/java/net/sf/picard/sam/CleanSam.java | 19 +-
src/java/net/sf/picard/sam/ReorderSam.java | 9 +-
...RevertOriginalBaseQualitiesAndAddMateCigar.java | 234 ++++++++++++++++++
src/java/net/sf/picard/sam/RevertSam.java | 16 +-
src/java/net/sf/picard/sam/SamFileValidator.java | 43 +++-
src/java/net/sf/picard/sam/SamPairUtil.java | 83 ++++---
src/java/net/sf/picard/sam/SortSam.java | 2 +
src/java/net/sf/picard/util/ProgressLogger.java | 10 +-
src/java/net/sf/samtools/AsyncSAMFileWriter.java | 13 +-
src/java/net/sf/samtools/SAMFileWriter.java | 11 +-
src/java/net/sf/samtools/SAMFileWriterFactory.java | 42 ++--
src/java/net/sf/samtools/SAMFileWriterImpl.java | 14 +-
.../net/sf/samtools/SAMHeaderRecordComparator.java | 66 +++++
src/java/net/sf/samtools/SAMRecord.java | 267 +++++++++++++++++----
src/java/net/sf/samtools/SAMRecordSetBuilder.java | 14 +-
src/java/net/sf/samtools/SAMRecordUtil.java | 1 -
src/java/net/sf/samtools/SAMTag.java | 2 +-
src/java/net/sf/samtools/SAMTagUtil.java | 66 ++---
src/java/net/sf/samtools/SAMUtils.java | 65 +++++
src/java/net/sf/samtools/SAMValidationError.java | 42 ++--
.../net/sf/samtools/util/AbstractAsyncWriter.java | 3 +
.../ProgressLoggerInterface.java} | 31 ++-
.../net/sf/picard/sam/ValidateSamFileTest.java | 68 ++++--
.../java/net/sf/samtools/BAMFileIndexTest.java | 3 +
.../sf/samtools/SamHeaderRecordComparatorTest.java | 72 ++++++
.../intervallist/IntervalListFromVCFTest.vcf | 53 ++++
.../IntervalListFromVCFTestComp.interval_list | 7 +
...ntervalListFromVCFTestCompInverse.interval_list | 10 +
...lListFromVCFTestCompInverseManual.interval_list | 10 +
.../intervallist/IntervalListFromVCFTestManual.vcf | 57 +++++
...IntervalListFromVCFTestManualComp.interval_list | 12 +
.../inappropriate_mate_cigar_string.sam | 13 +
.../invalid_mate_cigar_string.sam | 19 ++
.../net/sf/picard/sam/mate_cigar_and_no_oqs.sam | 4 +
testdata/net/sf/picard/sam/mate_cigar_and_oqs.sam | 4 +
.../net/sf/picard/sam/no_mate_cigar_and_no_oqs.sam | 4 +
.../net/sf/picard/sam/no_mate_cigar_and_oqs.sam | 4 +
testdata/net/sf/picard/sam/no_mates_and_no_oqs.sam | 6 +
.../no_mates_and_no_oqs_in_first_four_records.sam | 7 +
testdata/net/sf/picard/sam/no_mates_and_oqs.sam | 6 +
42 files changed, 1233 insertions(+), 241 deletions(-)
diff --git a/build.xml b/build.xml
index f88bf7b..5a8a437 100755
--- a/build.xml
+++ b/build.xml
@@ -43,7 +43,7 @@
<!-- Get SVN revision, if available, otherwise leave it blank. -->
<exec executable="svnversion" outputproperty="repository.revision" failifexecutionfails="false"/>
<property name="repository.revision" value=""/>
- <property name="sam-version" value="1.108"/>
+ <property name="sam-version" value="1.109"/>
<property name="picard-version" value="${sam-version}"/>
<property name="tribble-version" value="${sam-version}"/>
<property name="variant-version" value="${sam-version}"/>
@@ -327,6 +327,7 @@
<package-and-document-command title="ReorderSam" id="" main-class="net.sf.picard.sam.ReorderSam"/>
<package-and-document-command title="ReplaceSamHeader" main-class="net.sf.picard.sam.ReplaceSamHeader"/>
<package-and-document-command title="RevertSam" main-class="net.sf.picard.sam.RevertSam"/>
+ <package-and-document-command title="RevertOriginalBaseQualitiesAndAddMateCigar" main-class="net.sf.picard.sam.RevertOriginalBaseQualitiesAndAddMateCigar"/>
<package-and-document-command title="SamFormatConverter" main-class="net.sf.picard.sam.SamFormatConverter"/>
<package-and-document-command title="SamToFastq" main-class="net.sf.picard.sam.SamToFastq"/>
<package-and-document-command title="SortSam" main-class="net.sf.picard.sam.SortSam"/>
diff --git a/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java b/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
index 817568d..2d0cf26 100644
--- a/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
+++ b/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
@@ -356,6 +356,8 @@ public abstract class AbstractAlignmentMerger {
header.setSortOrder(this.sortOrder);
final boolean presorted = this.sortOrder == SortOrder.coordinate;
final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, this.targetBamFile);
+ writer.setProgressLogger(
+ new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection"));
final ProgressLogger finalProgress = new ProgressLogger(log, 10000000, "Written in coordinate order to output", "records");
for (final SAMRecord rec : sorted) {
@@ -519,17 +521,57 @@ public abstract class AbstractAlignmentMerger {
}
- protected void updateCigarForTrimmedOrClippedBases(final SAMRecord rec, final SAMRecord alignment) {
+ private static Cigar createNewCigarIfMapsOffEndOfReference(SAMFileHeader header,
+ boolean isUnmapped,
+ int referenceIndex,
+ int alignmentEnd,
+ int readLength,
+ Cigar oldCigar) {
+ Cigar newCigar = null;
+ if (!isUnmapped) {
+ final SAMSequenceRecord refseq = header.getSequence(referenceIndex);
+ final int overhang = alignmentEnd - refseq.getSequenceLength();
+ if (overhang > 0) {
+ // 1-based index of first base in read to clip.
+ final int clipFrom = readLength - overhang + 1;
+ final List<CigarElement> newCigarElements = CigarUtil.softClipEndOfRead(clipFrom, oldCigar.getCigarElements());
+ newCigar = new Cigar(newCigarElements);
+ }
+ }
+ return newCigar;
+ }
+
+ /**
+ * Soft-clip an alignment that hangs off the end of its reference sequence. Checks both the read and its mate,
+ * if available.
+ * @param rec
+ */
+ public static void createNewCigarsIfMapsOffEndOfReference(final SAMRecord rec) {
// If the read maps off the end of the alignment, clip it
- final SAMSequenceRecord refseq = rec.getHeader().getSequence(rec.getReferenceIndex());
- if (rec.getAlignmentEnd() > refseq.getSequenceLength()) {
- // 1-based index of first base in read to clip.
- final int clipFrom = refseq.getSequenceLength() - rec.getAlignmentStart() + 1;
- final List<CigarElement> newCigarElements = CigarUtil.softClipEndOfRead(clipFrom, rec.getCigar().getCigarElements());
- rec.setCigar(new Cigar(newCigarElements));
+ if (!rec.getReadUnmappedFlag()) {
+ final Cigar readCigar = createNewCigarIfMapsOffEndOfReference(rec.getHeader(),
+ rec.getReadUnmappedFlag(),
+ rec.getReferenceIndex(),
+ rec.getAlignmentEnd(),
+ rec.getReadLength(),
+ rec.getCigar());
+ if (null != readCigar) rec.setCigar(readCigar);
+ }
+
+ // If the read's mate maps off the end of the alignment, clip it
+ if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag() && null != rec.getMateCigar()) {
+ final Cigar mateCigar = createNewCigarIfMapsOffEndOfReference(rec.getHeader(),
+ rec.getMateUnmappedFlag(),
+ rec.getMateReferenceIndex(),
+ rec.getMateAlignmentEnd(),
+ rec.getMateCigar().getReadLength(),
+ rec.getMateCigar());
+ if (null != mateCigar) rec.setAttribute(SAMTag.MC.name(), mateCigar);
}
+ }
+ protected void updateCigarForTrimmedOrClippedBases(final SAMRecord rec, final SAMRecord alignment) {
// If the read was trimmed or not all the bases were sent for alignment, clip it
final int alignmentReadLength = alignment.getReadLength();
final int originalReadLength = rec.getReadLength();
@@ -538,6 +580,9 @@ public abstract class AbstractAlignmentMerger {
: this.read2BasesTrimmed != null ? this.read2BasesTrimmed : 0;
final int notWritten = originalReadLength - (alignmentReadLength + trimmed);
+ // Update cigar if the mate maps off the reference
+ createNewCigarsIfMapsOffEndOfReference(rec);
+
rec.setCigar(CigarUtil.addSoftClippedBasesToEndsOfCigar(
rec.getCigar(), rec.getReadNegativeStrandFlag(), notWritten, trimmed));
diff --git a/src/java/net/sf/picard/sam/CleanSam.java b/src/java/net/sf/picard/sam/CleanSam.java
index 338c71a..f9871e4 100644
--- a/src/java/net/sf/picard/sam/CleanSam.java
+++ b/src/java/net/sf/picard/sam/CleanSam.java
@@ -76,19 +76,15 @@ public class CleanSam extends CommandLineProgram {
final CloseableIterator<SAMRecord> it = reader.iterator();
final ProgressLogger progress = new ProgressLogger(Log.getInstance(CleanSam.class));
- // If the read maps off the end of the alignment, clip it
+ // If the read (or its mate) maps off the end of the alignment, clip it
while(it.hasNext()) {
final SAMRecord rec = it.next();
- if (!rec.getReadUnmappedFlag()) {
- final SAMSequenceRecord refseq = rec.getHeader().getSequence(rec.getReferenceIndex());
- final int overhang = rec.getAlignmentEnd() - refseq.getSequenceLength();
- if (overhang > 0) {
- // 1-based index of first base in read to clip.
- final int clipFrom = rec.getReadLength() - overhang + 1;
- final List<CigarElement> newCigarElements = CigarUtil.softClipEndOfRead(clipFrom, rec.getCigar().getCigarElements());
- rec.setCigar(new Cigar(newCigarElements));
- }
- } else if (rec.getMappingQuality() != 0) {
+
+ // If the read (or its mate) maps off the end of the alignment, clip it
+ AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(rec);
+
+ // check the read's mapping quality
+ if (rec.getReadUnmappedFlag() && 0 != rec.getMappingQuality()) {
rec.setMappingQuality(0);
}
@@ -104,5 +100,4 @@ public class CleanSam extends CommandLineProgram {
}
return 0;
}
-
}
diff --git a/src/java/net/sf/picard/sam/ReorderSam.java b/src/java/net/sf/picard/sam/ReorderSam.java
index 4d5f474..1847f09 100644
--- a/src/java/net/sf/picard/sam/ReorderSam.java
+++ b/src/java/net/sf/picard/sam/ReorderSam.java
@@ -155,17 +155,18 @@ public class ReorderSam extends CommandLineProgram {
while ( it.hasNext() ) {
counter++;
final SAMRecord read = it.next();
- int oldRefIndex = read.getReferenceIndex();
- int oldMateIndex = read.getMateReferenceIndex();
- int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder);
+ final int oldRefIndex = read.getReferenceIndex();
+ final int oldMateIndex = read.getMateReferenceIndex();
+ final int newRefIndex = newOrderIndex(read, oldRefIndex, newOrder);
read.setHeader(out.getFileHeader());
read.setReferenceIndex(newRefIndex);
- int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder);
+ final int newMateIndex = newOrderIndex(read, oldMateIndex, newOrder);
if ( oldMateIndex != -1 && newMateIndex == -1 ) { // becoming unmapped
read.setMateAlignmentStart(0);
read.setMateUnmappedFlag(true);
+ read.setAttribute(SAMTag.MC.name(), null); // Set the Mate Cigar String to null
}
read.setMateReferenceIndex(newMateIndex);
diff --git a/src/java/net/sf/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java b/src/java/net/sf/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java
new file mode 100644
index 0000000..3d6276e
--- /dev/null
+++ b/src/java/net/sf/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java
@@ -0,0 +1,234 @@
+package net.sf.picard.sam;
+
+import net.sf.picard.PicardException;
+import net.sf.picard.cmdline.CommandLineProgram;
+import net.sf.picard.cmdline.Option;
+import net.sf.picard.cmdline.StandardOptionDefinitions;
+import net.sf.picard.cmdline.Usage;
+import net.sf.picard.io.IoUtil;
+import net.sf.picard.util.Log;
+import net.sf.picard.util.PeekableIterator;
+import net.sf.picard.util.ProgressLogger;
+import net.sf.samtools.*;
+import net.sf.samtools.util.CloserUtil;
+import net.sf.samtools.util.SortingCollection;
+
+import java.io.File;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+/**
+ * This tool reverts the original base qualities (if specified) and adds the mate cigar tag to mapped BAMs.
+ * If the file does not have OQs and already has mate cigar tags, nothing is done.
+ * New BAM/BAI/MD5 files are created.
+ * @author Nils Homer
+ */
+public class RevertOriginalBaseQualitiesAndAddMateCigar extends CommandLineProgram {
+
+ @Usage
+ public String USAGE = getStandardUsagePreamble() +
+ "Reverts the original base qualities and adds the mate cigar tag to read-group BAMs.";
+
+ @Option(shortName= StandardOptionDefinitions.INPUT_SHORT_NAME, doc="The input SAM/BAM file to revert the state of.")
+ public File INPUT;
+
+ @Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="The output SAM/BAM file to create.")
+ public File OUTPUT;
+
+ @Option(shortName="SO", doc="The sort order to create the reverted output file with."
+ + "By default, the sort order will be the same as the input.", optional = true)
+ public SAMFileHeader.SortOrder SORT_ORDER = null;
+
+ @Option(shortName=StandardOptionDefinitions.USE_ORIGINAL_QUALITIES_SHORT_NAME, doc="True to restore original" +
+ " qualities from the OQ field to the QUAL field if available.")
+ public boolean RESTORE_ORIGINAL_QUALITIES = true;
+
+ @Option(doc="The maximum number of records to examine to determine if we can exit early and not output, given that"
+ + " there are a no original base qualities (if we are to restore) and mate cigars exist."
+ + " Set to 0 to never skip the file.")
+ public int MAX_RECORDS_TO_EXAMINE = 10000;
+
+ private final static Log log = Log.getInstance(RevertOriginalBaseQualitiesAndAddMateCigar.class);
+
+ public RevertOriginalBaseQualitiesAndAddMateCigar() {
+ this.CREATE_INDEX = true;
+ this.CREATE_MD5_FILE = true;
+ }
+
+ /** Default main method impl. */
+ public static void main(final String[] args) {
+ new RevertOriginalBaseQualitiesAndAddMateCigar().instanceMainWithExit(args);
+ }
+
+ protected int doWork() {
+ IoUtil.assertFileIsReadable(INPUT);
+ IoUtil.assertFileIsWritable(OUTPUT);
+
+ boolean foundPairedMappedReads = false;
+
+ // Check if we can skip this file since it does not have OQ tags and the mate cigar tag is already there.
+ final CanSkipSamFile skipSamFile = RevertOriginalBaseQualitiesAndAddMateCigar.canSkipSAMFile(INPUT, MAX_RECORDS_TO_EXAMINE);
+ log.info(skipSamFile.getMessage(MAX_RECORDS_TO_EXAMINE));
+ if (skipSamFile.canSkip()) return 0;
+
+ final SAMFileReader in = new SAMFileReader(INPUT, true);
+ final SAMFileHeader inHeader = in.getFileHeader();
+
+ // Build the output writer based on the correct sort order
+ final SAMFileHeader outHeader = inHeader.clone();
+ if (null == SORT_ORDER) this.SORT_ORDER = inHeader.getSortOrder(); // same as the input
+ outHeader.setSortOrder(SORT_ORDER);
+ SAMFileWriterFactory.setDefaultCreateIndexWhileWriting(CREATE_INDEX);
+ SAMFileWriterFactory.setDefaultCreateMd5File(CREATE_MD5_FILE);
+ final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outHeader, false, OUTPUT);
+
+ // Iterate over the records, revert original base qualities, and push them into a SortingCollection by queryname
+ final SortingCollection<SAMRecord> sorter = SortingCollection.newInstance(SAMRecord.class, new BAMRecordCodec(outHeader),
+ new SAMRecordQueryNameComparator(), MAX_RECORDS_IN_RAM);
+ final ProgressLogger revertingProgress = new ProgressLogger(log, 1000000, " reverted OQs");
+ int numOriginalQualitiesRestored = 0;
+ for (final SAMRecord record : in) {
+ // Clean up reads that map off the end of the reference
+ AbstractAlignmentMerger.createNewCigarsIfMapsOffEndOfReference(record);
+
+ if (RESTORE_ORIGINAL_QUALITIES && null != record.getOriginalBaseQualities()) {
+ // revert the original base qualities
+ record.setBaseQualities(record.getOriginalBaseQualities());
+ record.setOriginalBaseQualities(null);
+ numOriginalQualitiesRestored++;
+ }
+ if (!foundPairedMappedReads && record.getReadPairedFlag() && !record.getReadUnmappedFlag()) foundPairedMappedReads = true;
+ revertingProgress.record(record);
+ sorter.add(record);
+ }
+ CloserUtil.close(in);
+ log.info("Reverted the original base qualities for " + numOriginalQualitiesRestored + " records");
+
+ /**
+ * Iterator through sorting collection output
+ * 1. Set mate cigar string on primary/non-supplemental records
+ * 2. push record into SAMFileWriter to the output
+ */
+ final PeekableIterator<SAMRecord> sorterIterator = new PeekableIterator<SAMRecord>(sorter.iterator());
+ final ProgressLogger sorterProgress = new ProgressLogger(log, 1000000, " mate cigars added");
+ int numMateCigarsAdded = 0;
+ while (sorterIterator.hasNext()) {
+ final List<SAMRecord> records = new LinkedList<SAMRecord>();
+
+
+ /**
+ * Get all records with the same name, and then identify the canonical first and second end to which we
+ * want to add mate cigars.
+ */
+ SAMRecord firstRecord = null, secondRecord = null;
+ final SAMRecord first = sorterIterator.peek(); // peek so we consider it in the following loop
+ while (sorterIterator.hasNext() && sorterIterator.peek().getReadName().equals(first.getReadName())) {
+ final SAMRecord record = sorterIterator.next();
+ // We must make sure that we find only one "primary" alignments for each end
+ if (record.getReadPairedFlag() && !record.isSecondaryOrSupplementary()) {
+ if (record.getFirstOfPairFlag()) {
+ if (null != firstRecord) {
+ throw new PicardException("Found two records that are paired, not supplementary, and first of the pair");
+ }
+ firstRecord = record;
+ }
+ else if (record.getSecondOfPairFlag()) {
+ if (null != secondRecord) {
+ throw new PicardException("Found two records that are paired, not supplementary, and second of the pair");
+ }
+ secondRecord = record;
+ }
+ }
+ records.add(record);
+ }
+
+ // we must find both records, and then always update the mate cigar
+ if (null != firstRecord && null != secondRecord) {
+ // Update mate info
+ SamPairUtil.setMateInfo(firstRecord, secondRecord, outHeader);
+ numMateCigarsAdded+=2;
+ }
+
+ // Add it to the output file
+ for (final SAMRecord record : records) {
+ sorterProgress.record(record);
+ out.addAlignment(record);
+ }
+
+ }
+ sorterIterator.close();
+ CloserUtil.close(out);
+ log.info("Updated " + numMateCigarsAdded + " records with mate cigar");
+ if (!foundPairedMappedReads) log.info("Did not find any paired mapped reads.");
+
+ return 0;
+ }
+
+ /**
+ * Used as a return for the canSkipSAMFile function.
+ */
+ public enum CanSkipSamFile {
+ CAN_SKIP("Can skip the BAM file", true),
+ CANNOT_SKIP_FOUND_OQ("Cannot skip the BAM as we found a record with an OQ", false),
+ CANNOT_SKIP_FOUND_NO_MC("Cannot skip the BAM as we found a mate with no mate cigar tag", false),
+ FOUND_NO_EVIDENCE("Found no evidence of OQ or mate with no mate cigar in the first %d records. Will continue...", false);
+ private String format;
+ private boolean skip;
+
+ private CanSkipSamFile(String format, boolean skip) {
+ this.format = format;
+ this.skip = skip;
+ }
+
+ public String getMessage(int maxRecordsToExamine) { return String.format(this.format, maxRecordsToExamine); }
+ public boolean canSkip() { return this.skip; }
+ }
+
+ /**
+ * Checks if we can skip the SAM/BAM file when reverting origin base qualities and adding mate cigars.
+ * @param inputFile the SAM/BAM input file
+ * @param maxRecordsToExamine the maximum number of records to examine before quitting
+ * @return whether we can skip or not, and the explanation why.
+ */
+ public static CanSkipSamFile canSkipSAMFile(final File inputFile, final int maxRecordsToExamine) {
+ final SAMFileReader in = new SAMFileReader(inputFile, true);
+ final Iterator<SAMRecord> iterator = in.iterator();
+ int numRecordsExamined = 0;
+ CanSkipSamFile returnType = CanSkipSamFile.FOUND_NO_EVIDENCE;
+
+ while (iterator.hasNext() && numRecordsExamined < maxRecordsToExamine) {
+ final SAMRecord record = iterator.next();
+
+ if (null != record.getOriginalBaseQualities()) {
+ // has OQ, break and return case #2
+ returnType = CanSkipSamFile.CANNOT_SKIP_FOUND_OQ;
+ break;
+ }
+
+ // check if mate pair and its mate is mapped
+ if (record.getReadPairedFlag() && !record.getMateUnmappedFlag()) {
+ if (null == record.getMateCigar()) {
+ // has no MC, break and return case #2
+ returnType = CanSkipSamFile.CANNOT_SKIP_FOUND_NO_MC;
+ break;
+ }
+ else {
+ // has MC, previously checked that it does not have OQ, break and return case #1
+ returnType = CanSkipSamFile.CAN_SKIP;
+ break;
+ }
+ }
+
+ numRecordsExamined++;
+ }
+
+ // no more records anyhow, so we can skip
+ if (!iterator.hasNext() && CanSkipSamFile.FOUND_NO_EVIDENCE == returnType) {
+ returnType = CanSkipSamFile.CAN_SKIP;
+ }
+
+ in.close();
+
+ return returnType;
+ }
+}
diff --git a/src/java/net/sf/picard/sam/RevertSam.java b/src/java/net/sf/picard/sam/RevertSam.java
index 563fec1..73a9ace 100644
--- a/src/java/net/sf/picard/sam/RevertSam.java
+++ b/src/java/net/sf/picard/sam/RevertSam.java
@@ -30,7 +30,6 @@ import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.StandardOptionDefinitions;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.io.IoUtil;
-import net.sf.picard.util.FormatUtil;
import net.sf.picard.util.Log;
import net.sf.picard.util.PeekableIterator;
import net.sf.picard.util.ProgressLogger;
@@ -75,12 +74,13 @@ public class RevertSam extends CommandLineProgram {
@Option(doc="When removing alignment information, the set of optional tags to remove.")
public List<String> ATTRIBUTE_TO_CLEAR = new ArrayList<String>() {{
- add("NM");
- add("UQ");
- add("PG");
- add("MD");
- add("MQ");
- add("SA"); // Supplementary alignment metadata
+ add(SAMTag.NM.name());
+ add(SAMTag.UQ.name());
+ add(SAMTag.PG.name());
+ add(SAMTag.MD.name());
+ add(SAMTag.MQ.name());
+ add(SAMTag.SA.name()); // Supplementary alignment metadata
+ add(SAMTag.MC.name()); // Mate Cigar
}};
@Option(doc="WARNING: This option is potentially destructive. If enabled will discard reads in order to produce " +
@@ -107,7 +107,7 @@ public class RevertSam extends CommandLineProgram {
/** Default main method impl. */
public static void main(final String[] args) {
- System.exit(new RevertSam().instanceMain(args));
+ new RevertSam().instanceMainWithExit(args);
}
/**
diff --git a/src/java/net/sf/picard/sam/SamFileValidator.java b/src/java/net/sf/picard/sam/SamFileValidator.java
index 77f9a90..73e2519 100644
--- a/src/java/net/sf/picard/sam/SamFileValidator.java
+++ b/src/java/net/sf/picard/sam/SamFileValidator.java
@@ -351,9 +351,17 @@ public class SamFileValidator {
if (record.getReadUnmappedFlag()) {
return true;
}
+ return validateCigar(record, recordNumber, true);
+ }
+
+ private boolean validateMateCigar(final SAMRecord record, final long recordNumber) {
+ return validateCigar(record, recordNumber, false);
+ }
+
+ private boolean validateCigar(final SAMRecord record, final long recordNumber, final boolean isReadCigar) {
final ValidationStringency savedStringency = record.getValidationStringency();
record.setValidationStringency(ValidationStringency.LENIENT);
- final List<SAMValidationError> errors = record.validateCigar(recordNumber);
+ final List<SAMValidationError> errors = isReadCigar ? record.validateCigar(recordNumber) : record.validateMateCigar(recordNumber);
record.setValidationStringency(savedStringency);
if (errors == null) {
return true;
@@ -366,6 +374,7 @@ public class SamFileValidator {
return valid;
}
+
private void validateSortOrder(final SAMRecord record, final long recordNumber) {
final SAMRecord prev = orderChecker.getPreviousRecord();
if (!orderChecker.isSorted(record)) {
@@ -429,6 +438,7 @@ public class SamFileValidator {
if (!record.getReadPairedFlag() || record.isSecondaryOrSupplementary()) {
return;
}
+ validateMateCigar(record, recordNumber);
final PairEndInfo pairEndInfo = pairEndInfoByName.remove(record.getReferenceIndex(), record.getReadName());
if (pairEndInfo == null) {
@@ -536,11 +546,13 @@ public class SamFileValidator {
private final int readReferenceIndex;
private final boolean readNegStrandFlag;
private final boolean readUnmappedFlag;
+ private final String readCigarString;
private final int mateAlignmentStart;
private final int mateReferenceIndex;
private final boolean mateNegStrandFlag;
private final boolean mateUnmappedFlag;
+ private final String mateCigarString;
private final boolean firstOfPairFlag;
@@ -553,26 +565,33 @@ public class SamFileValidator {
this.readNegStrandFlag = record.getReadNegativeStrandFlag();
this.readReferenceIndex = record.getReferenceIndex();
this.readUnmappedFlag = record.getReadUnmappedFlag();
+ this.readCigarString = record.getCigarString();
this.mateAlignmentStart = record.getMateAlignmentStart();
this.mateNegStrandFlag = record.getMateNegativeStrandFlag();
this.mateReferenceIndex = record.getMateReferenceIndex();
this.mateUnmappedFlag = record.getMateUnmappedFlag();
+ final Object mcs = record.getAttribute(SAMTag.MC.name());
+ this.mateCigarString = (mcs != null) ? (String) mcs : null;
this.firstOfPairFlag = record.getFirstOfPairFlag();
}
private PairEndInfo(int readAlignmentStart, int readReferenceIndex, boolean readNegStrandFlag, boolean readUnmappedFlag,
+ String readCigarString,
int mateAlignmentStart, int mateReferenceIndex, boolean mateNegStrandFlag, boolean mateUnmappedFlag,
+ String mateCigarString,
boolean firstOfPairFlag, long recordNumber) {
this.readAlignmentStart = readAlignmentStart;
this.readReferenceIndex = readReferenceIndex;
this.readNegStrandFlag = readNegStrandFlag;
this.readUnmappedFlag = readUnmappedFlag;
+ this.readCigarString = readCigarString;
this.mateAlignmentStart = mateAlignmentStart;
this.mateReferenceIndex = mateReferenceIndex;
this.mateNegStrandFlag = mateNegStrandFlag;
this.mateUnmappedFlag = mateUnmappedFlag;
+ this.mateCigarString = mateCigarString;
this.firstOfPairFlag = firstOfPairFlag;
this.recordNumber = recordNumber;
}
@@ -623,6 +642,15 @@ public class SamFileValidator {
readName,
end1.recordNumber));
}
+ if ((end1.mateCigarString != null) && (!end1.mateCigarString.equals(end2.readCigarString))) {
+ errors.add(new SAMValidationError(
+ Type.MISMATCH_MATE_CIGAR_STRING,
+ "Mate CIGAR string does not match CIGAR string of mate",
+ readName,
+ end1.recordNumber));
+ }
+ // Note - don't need to validate that the mateCigarString is a valid cigar string, since this
+ // will be validated by validateCigar on the mate's record itself.
}
}
@@ -671,17 +699,20 @@ public class SamFileValidator {
this.in = new DataInputStream(is);
}
- public void encode(String key, PairEndInfo record) {
+ public void encode(final String key, final PairEndInfo record) {
try {
out.writeUTF(key);
out.writeInt(record.readAlignmentStart);
out.writeInt(record.readReferenceIndex);
out.writeBoolean(record.readNegStrandFlag);
out.writeBoolean(record.readUnmappedFlag);
+ out.writeUTF(record.readCigarString);
out.writeInt(record.mateAlignmentStart);
out.writeInt(record.mateReferenceIndex);
out.writeBoolean(record.mateNegStrandFlag);
out.writeBoolean(record.mateUnmappedFlag);
+ // writeUTF can't take null, so store a null mateCigarString as an empty string
+ out.writeUTF(record.mateCigarString != null ? record.mateCigarString : "");
out.writeBoolean(record.firstOfPairFlag);
out.writeLong(record.recordNumber);
} catch (IOException e) {
@@ -696,17 +727,23 @@ public class SamFileValidator {
final int readReferenceIndex = in.readInt();
final boolean readNegStrandFlag = in.readBoolean();
final boolean readUnmappedFlag = in.readBoolean();
+ final String readCigarString = in.readUTF();
final int mateAlignmentStart = in.readInt();
final int mateReferenceIndex = in.readInt();
final boolean mateNegStrandFlag = in.readBoolean();
final boolean mateUnmappedFlag = in.readBoolean();
+ // read mateCigarString - note that null value is stored as an empty string
+ final String mcs = in.readUTF();
+ final String mateCigarString = !mcs.isEmpty() ? mcs : null;
+
final boolean firstOfPairFlag = in.readBoolean();
final long recordNumber = in.readLong();
final PairEndInfo rec = new PairEndInfo(readAlignmentStart, readReferenceIndex, readNegStrandFlag,
- readUnmappedFlag, mateAlignmentStart, mateReferenceIndex, mateNegStrandFlag, mateUnmappedFlag,
+ readUnmappedFlag, readCigarString, mateAlignmentStart, mateReferenceIndex, mateNegStrandFlag,
+ mateUnmappedFlag, mateCigarString,
firstOfPairFlag, recordNumber);
return new AbstractMap.SimpleEntry(key, rec);
} catch (IOException e) {
diff --git a/src/java/net/sf/picard/sam/SamPairUtil.java b/src/java/net/sf/picard/sam/SamPairUtil.java
index 9b1fa3a..35534d9 100644
--- a/src/java/net/sf/picard/sam/SamPairUtil.java
+++ b/src/java/net/sf/picard/sam/SamPairUtil.java
@@ -28,7 +28,6 @@ import net.sf.picard.PicardException;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMTag;
-import net.sf.samtools.util.CoordMath;
import java.util.Iterator;
import java.util.List;
@@ -39,15 +38,15 @@ import java.util.List;
public class SamPairUtil {
/**
- * The possible orientations of paired reads.
- *
- * F = mapped to forward strand
- * R = mapped to reverse strand
- *
- * FR means the read that's mapped to the forward strand comes before the
- * read mapped to the reverse strand when their 5'-end coordinates are
- * compared.
- */
+ * The possible orientations of paired reads.
+ *
+ * F = mapped to forward strand
+ * R = mapped to reverse strand
+ *
+ * FR means the read that's mapped to the forward strand comes before the
+ * read mapped to the reverse strand when their 5'-end coordinates are
+ * compared.
+ */
public static enum PairOrientation
{
FR, // ( 5' --F--> <--R-- 5' ) - aka. innie
@@ -58,12 +57,12 @@ public class SamPairUtil {
/**
- * Computes the pair orientation of the given SAMRecord.
- * @param r
- * @return PairOrientation of the given SAMRecord.
- * @throws IllegalArgumentException If the record is not a paired read, or
- * one or both reads are unmapped.
- */
+ * Computes the pair orientation of the given SAMRecord.
+ * @param r
+ * @return PairOrientation of the given SAMRecord.
+ * @throws IllegalArgumentException If the record is not a paired read, or
+ * one or both reads are unmapped.
+ */
public static PairOrientation getPairOrientation(SAMRecord r)
{
final boolean readIsOnReverseStrand = r.getReadNegativeStrandFlag();
@@ -78,16 +77,16 @@ public class SamPairUtil {
}
final long positiveStrandFivePrimePos = ( readIsOnReverseStrand
- ? r.getMateAlignmentStart() //mate's 5' position ( x---> )
- : r.getAlignmentStart() ); //read's 5' position ( x---> )
+ ? r.getMateAlignmentStart() //mate's 5' position ( x---> )
+ : r.getAlignmentStart() ); //read's 5' position ( x---> )
final long negativeStrandFivePrimePos = ( readIsOnReverseStrand
- ? r.getAlignmentEnd() //read's 5' position ( <---x )
- : r.getAlignmentStart() + r.getInferredInsertSize() ); //mate's 5' position ( <---x )
+ ? r.getAlignmentEnd() //read's 5' position ( <---x )
+ : r.getAlignmentStart() + r.getInferredInsertSize() ); //mate's 5' position ( <---x )
return ( positiveStrandFivePrimePos < negativeStrandFivePrimePos
- ? PairOrientation.FR
- : PairOrientation.RF );
+ ? PairOrientation.FR
+ : PairOrientation.RF );
}
@@ -118,26 +117,26 @@ public class SamPairUtil {
if (firstOfPair == null) {
throw new PicardException(
- "First record does not exist - cannot perform mate assertion!");
+ "First record does not exist - cannot perform mate assertion!");
} else if (secondOfPair == null) {
throw new PicardException(
- firstOfPair.toString() + " is missing its mate");
+ firstOfPair.toString() + " is missing its mate");
} else if (!firstOfPair.getReadPairedFlag()) {
throw new PicardException(
- "First record is not marked as paired: " + firstOfPair.toString());
+ "First record is not marked as paired: " + firstOfPair.toString());
} else if (!secondOfPair.getReadPairedFlag()) {
throw new PicardException(
- "Second record is not marked as paired: " + secondOfPair.toString());
+ "Second record is not marked as paired: " + secondOfPair.toString());
} else if (!firstOfPair.getFirstOfPairFlag()) {
throw new PicardException(
- "First record is not marked as first of pair: " + firstOfPair.toString());
+ "First record is not marked as first of pair: " + firstOfPair.toString());
} else if (!secondOfPair.getSecondOfPairFlag()) {
throw new PicardException(
- "Second record is not marked as second of pair: " + secondOfPair.toString());
+ "Second record is not marked as second of pair: " + secondOfPair.toString());
} else if (!firstOfPair.getReadName().equals(secondOfPair.getReadName())) {
throw new PicardException(
- "First [" + firstOfPair.getReadName() + "] and Second [" +
- secondOfPair.getReadName() + "] readnames do not match!");
+ "First [" + firstOfPair.getReadName() + "] and Second [" +
+ secondOfPair.getReadName() + "] readnames do not match!");
}
}
@@ -151,14 +150,14 @@ public class SamPairUtil {
* @throws PicardException when the secondOfPair mate cannot be obtained due to assertion failures
*/
public static SAMRecord obtainAssertedMate(final Iterator<SAMRecord> samRecordIterator,
- final SAMRecord firstOfPair) {
+ final SAMRecord firstOfPair) {
if (samRecordIterator.hasNext()) {
final SAMRecord secondOfPair = samRecordIterator.next();
assertMate(firstOfPair, secondOfPair);
return secondOfPair;
} else {
throw new PicardException(
- "Second record does not exist: " + firstOfPair.getReadName());
+ "Second record does not exist: " + firstOfPair.getReadName());
}
}
@@ -175,10 +174,10 @@ public class SamPairUtil {
if (!firstEnd.getReferenceName().equals(secondEnd.getReferenceName())) {
return 0;
}
-
+
final int firstEnd5PrimePosition = firstEnd.getReadNegativeStrandFlag()? firstEnd.getAlignmentEnd(): firstEnd.getAlignmentStart();
final int secondEnd5PrimePosition = secondEnd.getReadNegativeStrandFlag()? secondEnd.getAlignmentEnd(): secondEnd.getAlignmentStart();
-
+
final int adjustment = (secondEnd5PrimePosition >= firstEnd5PrimePosition) ? +1 : -1;
return secondEnd5PrimePosition - firstEnd5PrimePosition + adjustment;
}
@@ -195,12 +194,14 @@ public class SamPairUtil {
rec1.setMateNegativeStrandFlag(rec2.getReadNegativeStrandFlag());
rec1.setMateUnmappedFlag(false);
rec1.setAttribute(SAMTag.MQ.name(), rec2.getMappingQuality());
+ rec1.setAttribute(SAMTag.MC.name(), rec2.getCigarString());
rec2.setMateReferenceIndex(rec1.getReferenceIndex());
rec2.setMateAlignmentStart(rec1.getAlignmentStart());
rec2.setMateNegativeStrandFlag(rec1.getReadNegativeStrandFlag());
rec2.setMateUnmappedFlag(false);
rec2.setAttribute(SAMTag.MQ.name(), rec1.getMappingQuality());
+ rec2.setAttribute(SAMTag.MC.name(), rec1.getCigarString());
}
// Else if they're both unmapped set that straight
else if (rec1.getReadUnmappedFlag() && rec2.getReadUnmappedFlag()) {
@@ -211,6 +212,7 @@ public class SamPairUtil {
rec1.setMateNegativeStrandFlag(rec2.getReadNegativeStrandFlag());
rec1.setMateUnmappedFlag(true);
rec1.setAttribute(SAMTag.MQ.name(), null);
+ rec1.setAttribute(SAMTag.MC.name(), null);
rec1.setInferredInsertSize(0);
rec2.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
@@ -220,6 +222,7 @@ public class SamPairUtil {
rec2.setMateNegativeStrandFlag(rec1.getReadNegativeStrandFlag());
rec2.setMateUnmappedFlag(true);
rec2.setAttribute(SAMTag.MQ.name(), null);
+ rec2.setAttribute(SAMTag.MC.name(), null);
rec2.setInferredInsertSize(0);
}
// And if only one is mapped copy it's coordinate information to the mate
@@ -233,12 +236,16 @@ public class SamPairUtil {
mapped.setMateAlignmentStart(unmapped.getAlignmentStart());
mapped.setMateNegativeStrandFlag(unmapped.getReadNegativeStrandFlag());
mapped.setMateUnmappedFlag(true);
+ // For the mapped read, set it's mateCigar to null, since the other read must be unmapped
+ mapped.setAttribute(SAMTag.MC.name(), null);
mapped.setInferredInsertSize(0);
unmapped.setMateReferenceIndex(mapped.getReferenceIndex());
unmapped.setMateAlignmentStart(mapped.getAlignmentStart());
unmapped.setMateNegativeStrandFlag(mapped.getReadNegativeStrandFlag());
unmapped.setMateUnmappedFlag(false);
+ // For the unmapped read, set it's mateCigar to the mate's Cigar, since the mate must be mapped
+ unmapped.setAttribute(SAMTag.MC.name(), mapped.getCigarString());
unmapped.setInferredInsertSize(0);
}
@@ -269,10 +276,10 @@ public class SamPairUtil {
setProperPairFlags(rec1, rec2, exepectedOrientations);
}
- public static void setProperPairFlags(SAMRecord rec1, SAMRecord rec2, List<PairOrientation> exepectedOrientations) {
- boolean properPair = (!rec1.getReadUnmappedFlag() && !rec2.getReadUnmappedFlag())
- ? isProperPair(rec1, rec2, exepectedOrientations)
- : false;
+ public static void setProperPairFlags(final SAMRecord rec1, final SAMRecord rec2, final List<PairOrientation> expectedOrientations) {
+ final boolean properPair = (!rec1.getReadUnmappedFlag() && !rec2.getReadUnmappedFlag())
+ ? isProperPair(rec1, rec2, expectedOrientations)
+ : false;
rec1.setProperPairFlag(properPair);
rec2.setProperPairFlag(properPair);
}
diff --git a/src/java/net/sf/picard/sam/SortSam.java b/src/java/net/sf/picard/sam/SortSam.java
index ee3c815..ab5d324 100644
--- a/src/java/net/sf/picard/sam/SortSam.java
+++ b/src/java/net/sf/picard/sam/SortSam.java
@@ -63,6 +63,8 @@ public class SortSam extends CommandLineProgram {
final SAMFileReader reader = new SAMFileReader(IoUtil.openFileForReading(INPUT));
reader.getFileHeader().setSortOrder(SORT_ORDER);
final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(reader.getFileHeader(), false, OUTPUT);
+ writer.setProgressLogger(
+ new ProgressLogger(log, (int) 1e7, "Wrote", "records from a sorting collection"));
final ProgressLogger progress = new ProgressLogger(log, (int) 1e7, "Read");
for (final SAMRecord rec: reader) {
diff --git a/src/java/net/sf/picard/util/ProgressLogger.java b/src/java/net/sf/picard/util/ProgressLogger.java
index 93ddaa3..f532076 100644
--- a/src/java/net/sf/picard/util/ProgressLogger.java
+++ b/src/java/net/sf/picard/util/ProgressLogger.java
@@ -1,6 +1,7 @@
package net.sf.picard.util;
import net.sf.samtools.SAMRecord;
+import net.sf.samtools.util.ProgressLoggerInterface;
import java.text.DecimalFormat;
import java.text.NumberFormat;
@@ -11,7 +12,7 @@ import java.text.NumberFormat;
*
* @author Tim Fennell
*/
-public class ProgressLogger {
+public class ProgressLogger implements ProgressLoggerInterface {
private final Log log;
private final int n;
private final String verb;
@@ -22,7 +23,9 @@ public class ProgressLogger {
private final NumberFormat timeFmt = new DecimalFormat("00");
private long processed = 0;
- private long lastStartTime = startTime;
+
+ // Set to -1 until the first record is added
+ private long lastStartTime = -1;
/**
* Construct a progress logger.
@@ -62,7 +65,8 @@ public class ProgressLogger {
public ProgressLogger(final Log log) { this(log, 1000000); }
public synchronized boolean record(final String chrom, final int pos) {
- if (++this.processed % this.n == 0) {
+ if (this.lastStartTime == -1) this.lastStartTime = System.currentTimeMillis();
+ if (++this.processed % this.n == 0) {
final long now = System.currentTimeMillis();
final long lastPeriodSeconds = (now - this.lastStartTime) / 1000;
this.lastStartTime = now;
diff --git a/src/java/net/sf/samtools/AsyncSAMFileWriter.java b/src/java/net/sf/samtools/AsyncSAMFileWriter.java
index ec7c824..5543c83 100644
--- a/src/java/net/sf/samtools/AsyncSAMFileWriter.java
+++ b/src/java/net/sf/samtools/AsyncSAMFileWriter.java
@@ -1,12 +1,7 @@
package net.sf.samtools;
import net.sf.samtools.util.AbstractAsyncWriter;
-
-import java.util.concurrent.ArrayBlockingQueue;
-import java.util.concurrent.BlockingQueue;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicReference;
+import net.sf.samtools.util.ProgressLoggerInterface;
/**
* SAMFileWriter that can be wrapped around an underlying SAMFileWriter to provide asynchronous output. Records
@@ -19,6 +14,7 @@ import java.util.concurrent.atomic.AtomicReference;
* @author Tim Fennell
*/
class AsyncSAMFileWriter extends AbstractAsyncWriter<SAMRecord> implements SAMFileWriter {
+
private final SAMFileWriter underlyingWriter;
/**
@@ -43,6 +39,11 @@ class AsyncSAMFileWriter extends AbstractAsyncWriter<SAMRecord> implements SAMFi
@Override protected final String getThreadNamePrefix() { return "SAMFileWriterThread-"; }
+ @Override
+ public void setProgressLogger(final ProgressLoggerInterface progress) {
+ this.underlyingWriter.setProgressLogger(progress);
+ }
+
/**
* Adds an alignment to the queue to be written. Will re-throw any exception that was received when
* writing prior record(s) to the underlying SAMFileWriter.
diff --git a/src/java/net/sf/samtools/SAMFileWriter.java b/src/java/net/sf/samtools/SAMFileWriter.java
index 18931ec..46790fc 100644
--- a/src/java/net/sf/samtools/SAMFileWriter.java
+++ b/src/java/net/sf/samtools/SAMFileWriter.java
@@ -23,15 +23,24 @@
*/
package net.sf.samtools;
+import net.sf.samtools.util.ProgressLoggerInterface;
+
/**
* Interface for SAMText and BAM file writers. Clients need not care which they write to,
* once the object is constructed.
*/
public interface SAMFileWriter {
- void addAlignment(SAMRecord alignment);
+
+ void addAlignment(SAMRecord alignment);
SAMFileHeader getFileHeader();
+ /**
+ * Sets a ProgressLogger on this writer. This is useful when pulling, for instance, from a
+ * SortingCollection.
+ */
+ void setProgressLogger(final ProgressLoggerInterface progress);
+
/**
* Must be called to flush or file will likely be defective.
*/
diff --git a/src/java/net/sf/samtools/SAMFileWriterFactory.java b/src/java/net/sf/samtools/SAMFileWriterFactory.java
index 5b3e675..cfc495b 100644
--- a/src/java/net/sf/samtools/SAMFileWriterFactory.java
+++ b/src/java/net/sf/samtools/SAMFileWriterFactory.java
@@ -28,10 +28,7 @@ import net.sf.samtools.util.IOUtil;
import net.sf.samtools.util.Md5CalculatingOutputStream;
import net.sf.samtools.util.RuntimeIOException;
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
+import java.io.*;
/**
* Create a SAMFileWriter for writing SAM or BAM.
@@ -43,6 +40,7 @@ public class SAMFileWriterFactory {
private boolean createMd5File = defaultCreateMd5File;
private boolean useAsyncIo = Defaults.USE_ASYNC_IO;
private int asyncOutputBufferSize = AsyncSAMFileWriter.DEFAULT_QUEUE_SIZE;
+ private int bufferSize = Defaults.BUFFER_SIZE;
private File tmpDir;
@@ -96,7 +94,7 @@ public class SAMFileWriterFactory {
* @param maxRecordsInRam Number of records to store in RAM before spilling to temporary file when
* creating a sorted SAM or BAM file.
*/
- public SAMFileWriterFactory setMaxRecordsInRam(int maxRecordsInRam) {
+ public SAMFileWriterFactory setMaxRecordsInRam(final int maxRecordsInRam) {
this.maxRecordsInRam = maxRecordsInRam;
return this;
}
@@ -105,18 +103,30 @@ public class SAMFileWriterFactory {
* Turn on or off the use of asynchronous IO for writing output SAM and BAM files. If true then
* each SAMFileWriter creates a dedicated thread which is used for compression and IO activities.
*/
- public void setUseAsyncIo(final boolean useAsyncIo) {
+ public SAMFileWriterFactory setUseAsyncIo(final boolean useAsyncIo) {
this.useAsyncIo = useAsyncIo;
+ return this;
}
/**
* If and only if using asynchronous IO then sets the maximum number of records that can be buffered per
* SAMFileWriter before producers will block when trying to write another SAMRecord.
*/
- public void setAsyncOutputBufferSize(final int asyncOutputBufferSize) {
+ public SAMFileWriterFactory setAsyncOutputBufferSize(final int asyncOutputBufferSize) {
this.asyncOutputBufferSize = asyncOutputBufferSize;
+ return this;
}
-
+
+ /**
+ * Controls size of write buffer.
+ * Default value: [[net.sf.samtools.Defaults#BUFFER_SIZE]]
+ *
+ */
+ public SAMFileWriterFactory setBufferSize(final int bufferSize) {
+ this.bufferSize = bufferSize;
+ return this;
+ }
+
/**
* Set the temporary directory to use when sort data.
* @param tmpDir Path to the temporary directory
@@ -147,15 +157,15 @@ public class SAMFileWriterFactory {
public SAMFileWriter makeBAMWriter(final SAMFileHeader header, final boolean presorted, final File outputFile,
final int compressionLevel) {
try {
- boolean createMd5File = this.createMd5File && IOUtil.isRegularPath(outputFile);
+ final boolean createMd5File = this.createMd5File && IOUtil.isRegularPath(outputFile);
if (this.createMd5File && !createMd5File) {
System.err.println("Cannot create MD5 file for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
}
- final BAMFileWriter ret = createMd5File
- ? new BAMFileWriter(new Md5CalculatingOutputStream(new FileOutputStream(outputFile, false),
- new File(outputFile.getAbsolutePath() + ".md5")), outputFile, compressionLevel)
- : new BAMFileWriter(outputFile, compressionLevel);
- boolean createIndex = this.createIndex && IOUtil.isRegularPath(outputFile);
+ OutputStream os = new FileOutputStream(outputFile, false);
+ if (bufferSize > 0) os = new BufferedOutputStream(os, bufferSize);
+ if (createMd5File) os = new Md5CalculatingOutputStream(os, new File(outputFile.getAbsolutePath() + ".md5"));
+ final BAMFileWriter ret = new BAMFileWriter(os, outputFile, compressionLevel);
+ final boolean createIndex = this.createIndex && IOUtil.isRegularPath(outputFile);
if (this.createIndex && !createIndex) {
System.err.println("Cannot create index for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
}
@@ -165,7 +175,7 @@ public class SAMFileWriterFactory {
if (this.useAsyncIo) return new AsyncSAMFileWriter(ret, this.asyncOutputBufferSize);
else return ret;
}
- catch (IOException ioe) {
+ catch (final IOException ioe) {
throw new RuntimeIOException("Error opening file: " + outputFile.getAbsolutePath());
}
}
@@ -202,7 +212,7 @@ public class SAMFileWriterFactory {
if (this.useAsyncIo) return new AsyncSAMFileWriter(ret, this.asyncOutputBufferSize);
else return ret;
}
- catch (IOException ioe) {
+ catch (final IOException ioe) {
throw new RuntimeIOException("Error opening file: " + outputFile.getAbsolutePath());
}
}
diff --git a/src/java/net/sf/samtools/SAMFileWriterImpl.java b/src/java/net/sf/samtools/SAMFileWriterImpl.java
index fbd6545..2e752c8 100644
--- a/src/java/net/sf/samtools/SAMFileWriterImpl.java
+++ b/src/java/net/sf/samtools/SAMFileWriterImpl.java
@@ -23,6 +23,7 @@
*/
package net.sf.samtools;
+import net.sf.samtools.util.ProgressLoggerInterface;
import net.sf.samtools.util.SortingCollection;
import java.io.File;
@@ -42,6 +43,7 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter
private SAMFileHeader header;
private SortingCollection<SAMRecord> alignmentSorter;
private File tmpDir = new File(System.getProperty("java.io.tmpdir"));
+ private ProgressLoggerInterface progressLogger = null;
// If true, records passed to addAlignment are already in the order specified by sortOrder
private boolean presorted;
@@ -67,8 +69,15 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter
public static int getDefaultMaxRecordsInRam() {
return DEAFULT_MAX_RECORDS_IN_RAM;
}
-
-
+
+ /**
+ * Sets the progress logger used by this implementation. Setting this lets this writer emit log
+ * messages as SAM records in a SortingCollection are being written to disk.
+ */
+ public void setProgressLogger(final ProgressLoggerInterface progress) {
+ this.progressLogger = progress;
+ }
+
/**
* Must be called before calling setHeader(). SortOrder value in the header passed
* to setHeader() is ignored. If setSortOrder is not called, default is SortOrder.unsorted.
@@ -189,6 +198,7 @@ public abstract class SAMFileWriterImpl implements SAMFileWriter
if (alignmentSorter != null) {
for (final SAMRecord alignment : alignmentSorter) {
writeAlignment(alignment);
+ if (progressLogger != null) progressLogger.record(alignment);
}
alignmentSorter.cleanup();
}
diff --git a/src/java/net/sf/samtools/SAMHeaderRecordComparator.java b/src/java/net/sf/samtools/SAMHeaderRecordComparator.java
new file mode 100644
index 0000000..8f01cea
--- /dev/null
+++ b/src/java/net/sf/samtools/SAMHeaderRecordComparator.java
@@ -0,0 +1,66 @@
+package net.sf.samtools;
+
+/**
+ * The MIT License
+ * <p/>
+ * Copyright (c) 2014 The Broad Institute
+ * <p/>
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * <p/>
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * <p/>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+import java.util.Comparator;
+
+/**
+ * Provides ordering based on SAM header records' attribute values. Provide the list of attributes to use
+ * in the comparison to the constructor. Null attribute values (i.e., those attributes not present in the
+ * record) sort behind those that have values.
+ */
+public class SAMHeaderRecordComparator<T extends AbstractSAMHeaderRecord> implements Comparator<T> {
+
+ private final String[] attributes;
+
+ public SAMHeaderRecordComparator(final String... attributes) {
+ this.attributes = attributes;
+ }
+
+ @Override
+ public int compare(final T left, final T right) {
+ for (final String attribute : attributes) {
+ final String leftValue = left.getAttribute(attribute);
+ final String rightValue = right.getAttribute(attribute);
+
+ if (leftValue == null) {
+ // Fastest comparison possible; two empty values are
+ // equivalent, so move along to the next attribute
+ if (rightValue == null) continue;
+
+ // Otherwise left < right, since right has a value
+ else return -1;
+ }
+
+ // left is not null; if right is, left > right
+ if (rightValue == null) return 1;
+
+ final int compare = leftValue.compareTo(rightValue);
+ if (compare != 0) return compare;
+ }
+
+ return 0;
+ }
+}
diff --git a/src/java/net/sf/samtools/SAMRecord.java b/src/java/net/sf/samtools/SAMRecord.java
index 372b636..6d0b378 100644
--- a/src/java/net/sf/samtools/SAMRecord.java
+++ b/src/java/net/sf/samtools/SAMRecord.java
@@ -77,6 +77,10 @@ import java.util.List;
* By default, extensive validation of SAMRecords is done when they are read. Very limited validation is done when
* values are set onto SAMRecords.
*/
+/**
+ * @author alecw at broadinstitute.org
+ * @author mishali.naik at intel.com
+ */
public class SAMRecord implements Cloneable
{
/**
@@ -468,19 +472,32 @@ public class SAMRecord implements Cloneable
* Invalid to call on an unmapped read.
*/
public int getUnclippedStart() {
- int pos = getAlignmentStart();
+ return getUnclippedStart(getAlignmentStart(), getCigar());
+ }
- for (final CigarElement cig : getCigar().getCigarElements()) {
+ /**
+ * @param alignmentStart The start (1-based) of the alignment
+ * @param cigar The cigar containing the alignment information
+ * @return the alignment start (1-based, inclusive) adjusted for clipped bases. For example if the read
+ * has an alignment start of 100 but the first 4 bases were clipped (hard or soft clipped)
+ * then this method will return 96.
+ *
+ * Invalid to call on an unmapped read.
+ * Invalid to call with cigar = null
+ */
+ private int getUnclippedStart(final int alignmentStart, final Cigar cigar) {
+ int unClippedStart = alignmentStart;
+ for (final CigarElement cig : cigar.getCigarElements()) {
final CigarOperator op = cig.getOperator();
if (op == CigarOperator.SOFT_CLIP || op == CigarOperator.HARD_CLIP) {
- pos -= cig.getLength();
+ unClippedStart -= cig.getLength();
}
else {
break;
}
}
- return pos;
+ return unClippedStart;
}
/**
@@ -491,21 +508,35 @@ public class SAMRecord implements Cloneable
* Invalid to call on an unmapped read.
*/
public int getUnclippedEnd() {
- int pos = getAlignmentEnd();
- final List<CigarElement> cigs = getCigar().getCigarElements();
+ return getUnclippedEnd(getAlignmentEnd(), getCigar());
+ }
+
+ /**
+ * @param alignmentEnd The end (1-based) of the alignment
+ * @param cigar The cigar containing the alignment information
+ * @return the alignment end (1-based, inclusive) adjusted for clipped bases. For example if the read
+ * has an alignment end of 100 but the last 7 bases were clipped (hard or soft clipped)
+ * then this method will return 107.
+ *
+ * Invalid to call on an unmapped read.
+ * Invalid to call with cigar = null
+ */
+ private int getUnclippedEnd(final int alignmentEnd, final Cigar cigar) {
+ int unClippedEnd = alignmentEnd;
+ final List<CigarElement> cigs = cigar.getCigarElements();
for (int i=cigs.size() - 1; i>=0; --i) {
final CigarElement cig = cigs.get(i);
final CigarOperator op = cig.getOperator();
if (op == CigarOperator.SOFT_CLIP || op == CigarOperator.HARD_CLIP) {
- pos += cig.getLength();
+ unClippedEnd += cig.getLength();
}
else {
break;
}
}
- return pos;
+ return unClippedEnd;
}
/**
@@ -554,6 +585,58 @@ public class SAMRecord implements Cloneable
}
/**
+ * This method uses the MateCigar value as determined from the attribute MC. It must be non-null.
+ * @return 1-based inclusive rightmost position of the clipped mate sequence, or 0 read if unmapped.
+ */
+ public int getMateAlignmentEnd() {
+ if (getMateUnmappedFlag()) {
+ throw new RuntimeException("getMateAlignmentEnd called on an unmapped mate.");
+ }
+ final Cigar mateCigar = getMateCigar();
+ if (mateCigar == null) {
+ throw new SAMException("Mate CIGAR (Tag MC) not found.");
+ }
+ return CoordMath.getEnd(getMateAlignmentStart(), mateCigar.getReferenceLength());
+ }
+
+ /**
+ * @return the mate alignment start (1-based, inclusive) adjusted for clipped bases. For example if the mate
+ * has an alignment start of 100 but the first 4 bases were clipped (hard or soft clipped)
+ * then this method will return 96.
+ *
+ * Invalid to call on an unmapped read.
+ */
+ public int getMateUnclippedStart() {
+ if (getMateUnmappedFlag())
+ throw new RuntimeException("getMateUnclippedStart called on an unmapped mate.");
+ final Cigar mateCigar = getMateCigar();
+ if (mateCigar == null) {
+ throw new SAMException("Mate CIGAR (Tag MC) not found.");
+ }
+ return getUnclippedStart(getMateAlignmentStart(), mateCigar);
+ }
+
+
+ /**
+ * @return the mate alignment end (1-based, inclusive) adjusted for clipped bases. For example if the mate
+ * has an alignment end of 100 but the last 7 bases were clipped (hard or soft clipped)
+ * then this method will return 107.
+ *
+ * Invalid to call on an unmapped read.
+ */
+ public int getMateUnclippedEnd() {
+ if (getMateUnmappedFlag()) {
+ throw new RuntimeException("getMateUnclippedEnd called on an unmapped mate.");
+ }
+ final Cigar mateCigar = getMateCigar();
+ if (mateCigar == null) {
+ throw new SAMException("Mate CIGAR (Tag MC) not found.");
+ }
+ return getUnclippedEnd(getMateAlignmentEnd(), mateCigar);
+ }
+
+
+ /**
* @return insert size (difference btw 5' end of read & 5' end of mate), if possible, else 0.
* Negative if mate maps to lower position than read.
*/
@@ -636,6 +719,39 @@ public class SAMRecord implements Cloneable
}
/**
+ * Returns the Mate Cigar String as stored in the attribute 'MC'.
+ * @return Mate Cigar String, or null if there is none.
+ */
+ public String getMateCigarString() {
+ return getStringAttribute(SAMTag.MC.name());
+ }
+
+ /**
+ * Returns the Mate Cigar or null if there is none.
+ * @return Cigar object for the read's mate, or null if there is none.
+ */
+ public Cigar getMateCigar() {
+ final String mateCigarString = getMateCigarString();
+ Cigar mateCigar = null;
+ if (mateCigarString != null) {
+ mateCigar = TextCigarCodec.getSingleton().decode(mateCigarString);
+ if (getValidationStringency() != SAMFileReader.ValidationStringency.SILENT) {
+ final List<AlignmentBlock> alignmentBlocks = getAlignmentBlocks(mateCigar, getMateAlignmentStart(), "mate cigar");
+ SAMUtils.processValidationErrors(validateCigar(mateCigar, getMateReferenceIndex(), alignmentBlocks, -1, "Mate CIGAR"), -1L, getValidationStringency());
+ }
+ }
+ return mateCigar;
+ }
+
+ /**
+ * @return number of cigar elements (number + operator) in the mate cigar string.
+ */
+ public int getMateCigarLength() {
+ final Cigar mateCigar = getMateCigar();
+ return (mateCigar != null) ? mateCigar.numCigarElements() : 0;
+ }
+
+ /**
* Get the SAMReadGroupRecord for this SAMRecord.
* @return The SAMReadGroupRecord from the SAMFileHeader for this SAMRecord, or null if
* 1) this record has no RG tag, or 2) the header doesn't contain the read group with
@@ -763,8 +879,8 @@ public class SAMRecord implements Cloneable
*/
public boolean getSupplementaryAlignmentFlag() {
return (mFlags & SUPPLEMENTARY_ALIGNMENT_FLAG) != 0;
- }
-
+ }
+
/**
* the read fails platform/vendor quality checks.
*/
@@ -881,7 +997,7 @@ public class SAMRecord implements Cloneable
public boolean isSecondaryOrSupplementary() {
return getNotPrimaryAlignmentFlag() || getSupplementaryAlignmentFlag();
}
-
+
private void setFlag(final boolean flag, final int bit) {
if (flag) {
mFlags |= bit;
@@ -1160,8 +1276,8 @@ public class SAMRecord implements Cloneable
protected void setAttribute(final short tag, final Object value, final boolean isUnsignedArray) {
if (value != null &&
!(value instanceof Byte || value instanceof Short || value instanceof Integer ||
- value instanceof String || value instanceof Character || value instanceof Float ||
- value instanceof byte[] || value instanceof short[] || value instanceof int[] ||
+ value instanceof String || value instanceof Character || value instanceof Float ||
+ value instanceof byte[] || value instanceof short[] || value instanceof int[] ||
value instanceof float[])) {
throw new SAMException("Attribute type " + value.getClass() + " not supported. Tag: " +
SAMTagUtil.getSingleton().makeStringTag(tag));
@@ -1345,7 +1461,7 @@ public class SAMRecord implements Cloneable
if (value == null || value instanceof String) {
return tagString + ":Z:" + value;
} else if (value instanceof Integer || value instanceof Long ||
- value instanceof Short || value instanceof Byte) {
+ value instanceof Short || value instanceof Byte) {
return tagString + ":i:" + value;
} else if (value instanceof Character) {
return tagString + ":A:" + value;
@@ -1355,7 +1471,7 @@ public class SAMRecord implements Cloneable
return tagString + ":H:" + StringUtil.bytesToHexString((byte[]) value);
} else {
throw new RuntimeException("Unexpected value type for tag " + tagString +
- ": " + value + " of class " + value.getClass().getName());
+ ": " + value + " of class " + value.getClass().getName());
}
}
@@ -1384,15 +1500,37 @@ public class SAMRecord implements Cloneable
* deleted bases (vs. the reference) are not represented in the alignment blocks.
*/
public List<AlignmentBlock> getAlignmentBlocks() {
- if (this.mAlignmentBlocks != null) return this.mAlignmentBlocks;
+ if (this.mAlignmentBlocks == null) {
+ this.mAlignmentBlocks = getAlignmentBlocks(getCigar(), getAlignmentStart(), "read cigar");
+ }
+ return this.mAlignmentBlocks;
+ }
- final Cigar cigar = getCigar();
- if (cigar == null) return Collections.emptyList();
+ /**
+ * Returns blocks of the mate sequence that have been aligned directly to the
+ * reference sequence. Note that clipped portions of the mate and inserted and
+ * deleted bases (vs. the reference) are not represented in the alignment blocks.
+ */
+ public List<AlignmentBlock> getMateAlignmentBlocks() {
+ return getAlignmentBlocks(getMateCigar(), getMateAlignmentStart(), "mate cigar");
+ }
+ /**
+ * Given a Cigar, Returns blocks of the sequence that have been aligned directly to the
+ * reference sequence. Note that clipped portions, and inserted and deleted bases (vs. the reference)
+ * are not represented in the alignment blocks.
+ *
+ * @param cigar The cigar containing the alignment information
+ * @param alignmentStart The start (1-based) of the alignment
+ * @param cigarTypeName The type of cigar passed - for error logging.
+ * @return List of alignment blocks
+ */
+ private List<AlignmentBlock> getAlignmentBlocks(final Cigar cigar, final int alignmentStart, final String cigarTypeName) {
+ if (cigar == null) return Collections.emptyList();
final List<AlignmentBlock> alignmentBlocks = new ArrayList<AlignmentBlock>();
int readBase = 1;
- int refBase = getAlignmentStart();
+ int refBase = alignmentStart;
for (final CigarElement e : cigar.getCigarElements()) {
switch (e.getOperator()) {
@@ -1410,14 +1548,13 @@ public class SAMRecord implements Cloneable
readBase += length;
refBase += length;
break;
- default : throw new IllegalStateException("Case statement didn't deal with cigar op: " + e.getOperator());
+ default : throw new IllegalStateException("Case statement didn't deal with " + cigarTypeName + " op: " + e.getOperator());
}
}
- this.mAlignmentBlocks = Collections.unmodifiableList(alignmentBlocks);
-
- return this.mAlignmentBlocks;
+ return Collections.unmodifiableList(alignmentBlocks);
}
+
/**
* Run all validations of CIGAR. These include validation that the CIGAR makes sense independent of
* placement, plus validation that CIGAR + placement yields all bases with M operator within the range of the reference.
@@ -1428,24 +1565,70 @@ public class SAMRecord implements Cloneable
List<SAMValidationError> ret = null;
if (getValidationStringency() != SAMFileReader.ValidationStringency.SILENT && !this.getReadUnmappedFlag()) {
- // Don't know line number, and don't want to force read name to be decoded.
- ret = getCigar().isValid(getReadName(), recordNumber);
- if (getReferenceIndex() != NO_ALIGNMENT_REFERENCE_INDEX) {
- final SAMSequenceRecord sequence = getHeader().getSequence(getReferenceIndex());
- final int referenceSequenceLength = sequence.getSequenceLength();
- for (final AlignmentBlock alignmentBlock : getAlignmentBlocks()) {
- if (alignmentBlock.getReferenceStart() + alignmentBlock.getLength() - 1 > referenceSequenceLength) {
- if (ret == null) ret = new ArrayList<SAMValidationError>();
- ret.add(new SAMValidationError(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE,
- "CIGAR M operator maps off end of reference", getReadName(), recordNumber));
- break;
- }
+ ret = validateCigar(getCigar(), getReferenceIndex(), getAlignmentBlocks(), recordNumber, "Read CIGAR");
+ }
+ return ret;
+ }
+
+ /**
+ * Run all validations of the mate's CIGAR. These include validation that the CIGAR makes sense independent of
+ * placement, plus validation that CIGAR + placement yields all bases with M operator within the range of the reference.
+ * @param recordNumber For error reporting. -1 if not known.
+ * @return List of errors, or null if no errors.
+ */
+ public List<SAMValidationError> validateMateCigar(final long recordNumber) {
+ List<SAMValidationError> ret = null;
+
+ if (getValidationStringency() != SAMFileReader.ValidationStringency.SILENT) {
+ if (!this.getMateUnmappedFlag()) { // The mateCigar will be defined if the mate is mapped
+ if (getMateCigarString() != null) {
+ ret = validateCigar(getMateCigar(), getMateReferenceIndex(), getMateAlignmentBlocks(), recordNumber, "Mate CIGAR");
+ }
+ } else {
+ if (getMateCigarString() != null) {
+ // If the Mate is unmapped, and the Mate Cigar String (MC Attribute) exists, that is a validation error.
+ ret = new ArrayList<SAMValidationError>();
+ ret.add(new SAMValidationError(SAMValidationError.Type.MATE_CIGAR_STRING_INVALID_PRESENCE,
+ "Mate CIGAR String (MC Attribute) present for a read whose mate is unmapped", getReadName(), recordNumber));
}
}
}
+
return ret;
}
+ /**
+ * Run all validations of the mate's CIGAR. These include validation that the CIGAR makes sense independent of
+ * placement, plus validation that CIGAR + placement yields all bases with M operator within the range of the reference.
+ * @return List of errors, or null if no errors.
+ * @param cigar The cigar containing the alignment information
+ * @param referenceIndex The reference index
+ * @param alignmentBlocks The alignment blocks (parsed from the cigar)
+ * @param recordNumber For error reporting. -1 if not known.
+ * @param cigarTypeName For error reporting. "Read CIGAR" or "Mate Cigar"
+ * @return
+ */
+
+ private List<SAMValidationError> validateCigar(final Cigar cigar, final Integer referenceIndex, final List<AlignmentBlock> alignmentBlocks,
+ final long recordNumber, final String cigarTypeName) {
+ // Don't know line number, and don't want to force read name to be decoded.
+ List<SAMValidationError> ret = cigar.isValid(getReadName(), recordNumber);
+ if (referenceIndex != NO_ALIGNMENT_REFERENCE_INDEX) {
+ final SAMSequenceRecord sequence = getHeader().getSequence(getReferenceIndex());
+ final int referenceSequenceLength = sequence.getSequenceLength();
+ for (final AlignmentBlock alignmentBlock : alignmentBlocks) {
+ if (alignmentBlock.getReferenceStart() + alignmentBlock.getLength() - 1 > referenceSequenceLength) {
+ if (ret == null) ret = new ArrayList<SAMValidationError>();
+ ret.add(new SAMValidationError(SAMValidationError.Type.CIGAR_MAPS_OFF_REFERENCE,
+ cigarTypeName + " M operator maps off end of reference", getReadName(), recordNumber));
+ break;
+ }
+ }
+ }
+ return ret;
+ }
+
+
@Override
public boolean equals(final Object o) {
if (this == o) return true;
@@ -1637,7 +1820,7 @@ public class SAMRecord implements Cloneable
ret.addAll(errors);
}
// TODO(mccowan): Is this asking "is this the primary alignment"?
- if (this.getReadLength() == 0 && !this.getNotPrimaryAlignmentFlag()) {
+ if (this.getReadLength() == 0 && !this.getNotPrimaryAlignmentFlag()) {
final Object fz = getAttribute(SAMTagUtil.getSingleton().FZ);
if (fz == null) {
final String cq = (String)getAttribute(SAMTagUtil.getSingleton().CQ);
@@ -1701,7 +1884,7 @@ public class SAMRecord implements Cloneable
}
private List<SAMValidationError> isValidReferenceIndexAndPosition(final Integer referenceIndex, final String referenceName,
- final int alignmentStart, final boolean isMate) {
+ final int alignmentStart, final boolean isMate) {
final boolean hasReference = hasReferenceName(referenceIndex, referenceName);
// ret is only instantiate if there are errors to report, in order to reduce GC in the typical case
@@ -1735,7 +1918,7 @@ public class SAMRecord implements Cloneable
}
return ret;
}
-
+
private String buildMessage(final String baseMessage, final boolean isMate) {
return isMate ? "Mate " + baseMessage : baseMessage;
}
@@ -1784,11 +1967,11 @@ public class SAMRecord implements Cloneable
}
/**
- Returns the record in the SAM line-based text format. Fields are
- separated by '\t' characters, and the String is terminated by '\n'.
- */
+ Returns the record in the SAM line-based text format. Fields are
+ separated by '\t' characters, and the String is terminated by '\n'.
+ */
public String getSAMString() {
- return SAMTextWriter.getSAMString(this);
+ return SAMTextWriter.getSAMString(this);
}
}
diff --git a/src/java/net/sf/samtools/SAMRecordSetBuilder.java b/src/java/net/sf/samtools/SAMRecordSetBuilder.java
index 45d119a..c2d876d 100644
--- a/src/java/net/sf/samtools/SAMRecordSetBuilder.java
+++ b/src/java/net/sf/samtools/SAMRecordSetBuilder.java
@@ -51,10 +51,10 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
private static final String SAMPLE = "FREE_SAMPLE";
private final Random random = new Random();
- private SAMFileHeader header;
- private Collection<SAMRecord> records;
+ private final SAMFileHeader header;
+ private final Collection<SAMRecord> records;
- private int readLength = 36 ;
+ private final int readLength = 36 ;
private SAMProgramRecord programRecord = null;
private SAMReadGroupRecord readGroup = null;
@@ -77,7 +77,7 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
public SAMRecordSetBuilder(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder) {
this(sortForMe, sortOrder, true) ;
}
- public SAMRecordSetBuilder(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder, boolean addReadGroup) {
+ public SAMRecordSetBuilder(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder, final boolean addReadGroup) {
final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>();
for (final String chrom : chroms) {
final SAMSequenceRecord sequenceRecord = new SAMSequenceRecord(chrom, 1000000);
@@ -210,6 +210,7 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
end1.setReadPairedFlag(true);
end1.setProperPairFlag(true);
end1.setMateReferenceIndex(contig);
+ end1.setAttribute(SAMTag.MC.name(), readLength + "M");
end1.setMateAlignmentStart(start2);
end1.setMateNegativeStrandFlag(true);
end1.setFirstOfPairFlag(end1IsFirstOfPair);
@@ -233,6 +234,7 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
end2.setReadPairedFlag(true);
end2.setProperPairFlag(true);
end2.setMateReferenceIndex(contig);
+ end2.setAttribute(SAMTag.MC.name(), readLength + "M");
end2.setMateAlignmentStart(start1);
end2.setMateNegativeStrandFlag(false);
end2.setFirstOfPairFlag(!end1IsFirstOfPair);
@@ -260,6 +262,7 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
end1.setReadName(name);
end1.setReadPairedFlag(false);
end1.setReadUnmappedFlag(true);
+ end1.setAttribute(SAMTag.MC.name(), null);
end1.setProperPairFlag(false);
end1.setFirstOfPairFlag(end1IsFirstOfPair);
end1.setSecondOfPairFlag(!end1IsFirstOfPair);
@@ -272,6 +275,7 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
end2.setReadName(name);
end2.setReadPairedFlag(false);
end2.setReadUnmappedFlag(true);
+ end2.setAttribute(SAMTag.MC.name(), null);
end2.setProperPairFlag(false);
end2.setFirstOfPairFlag(!end1IsFirstOfPair);
end2.setSecondOfPairFlag(end1IsFirstOfPair);
@@ -313,7 +317,7 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
try {
tempFile = File.createTempFile("temp", ".sam");
- } catch (IOException e) {
+ } catch (final IOException e) {
throw new RuntimeIOException("problems creating tempfile", e);
}
diff --git a/src/java/net/sf/samtools/SAMRecordUtil.java b/src/java/net/sf/samtools/SAMRecordUtil.java
index 045c880..69b5972 100644
--- a/src/java/net/sf/samtools/SAMRecordUtil.java
+++ b/src/java/net/sf/samtools/SAMRecordUtil.java
@@ -78,5 +78,4 @@ public class SAMRecordUtil {
array[j] = tmp;
}
}
-
}
diff --git a/src/java/net/sf/samtools/SAMTag.java b/src/java/net/sf/samtools/SAMTag.java
index 32e8791..8854f29 100644
--- a/src/java/net/sf/samtools/SAMTag.java
+++ b/src/java/net/sf/samtools/SAMTag.java
@@ -28,5 +28,5 @@ package net.sf.samtools;
*/
public enum SAMTag {
RG, LB, PU, PG, AS, SQ, MQ, NM, H0, H1, H2, UQ, PQ, NH, IH, HI, MD, CS, CQ, CM, R2, Q2, S2, CC, CP, SM, AM, MF,
- E2, U2, OQ
+ E2, U2, OQ, FZ, SA, MC
}
diff --git a/src/java/net/sf/samtools/SAMTagUtil.java b/src/java/net/sf/samtools/SAMTagUtil.java
index d5e4267..ba108fc 100644
--- a/src/java/net/sf/samtools/SAMTagUtil.java
+++ b/src/java/net/sf/samtools/SAMTagUtil.java
@@ -36,38 +36,40 @@ import net.sf.samtools.util.StringUtil;
public class SAMTagUtil {
// Standard tags pre-computed for convenience
- public final short RG = makeBinaryTag("RG");
- public final short LB = makeBinaryTag("LB");
- public final short PU = makeBinaryTag("PU");
- public final short PG = makeBinaryTag("PG");
- public final short AS = makeBinaryTag("AS");
- public final short SQ = makeBinaryTag("SQ");
- public final short MQ = makeBinaryTag("MQ");
- public final short NM = makeBinaryTag("NM");
- public final short H0 = makeBinaryTag("H0");
- public final short H1 = makeBinaryTag("H1");
- public final short H2 = makeBinaryTag("H2");
- public final short UQ = makeBinaryTag("UQ");
- public final short PQ = makeBinaryTag("PQ");
- public final short NH = makeBinaryTag("NH");
- public final short IH = makeBinaryTag("IH");
- public final short HI = makeBinaryTag("HI");
- public final short MD = makeBinaryTag("MD");
- public final short CS = makeBinaryTag("CS");
- public final short CQ = makeBinaryTag("CQ");
- public final short CM = makeBinaryTag("CM");
- public final short R2 = makeBinaryTag("R2");
- public final short Q2 = makeBinaryTag("Q2");
- public final short S2 = makeBinaryTag("S2");
- public final short CC = makeBinaryTag("CC");
- public final short CP = makeBinaryTag("CP");
- public final short SM = makeBinaryTag("SM");
- public final short AM = makeBinaryTag("AM");
- public final short MF = makeBinaryTag("MF");
- public final short E2 = makeBinaryTag("E2");
- public final short U2 = makeBinaryTag("U2");
- public final short OQ = makeBinaryTag("OQ");
- public final short FZ = makeBinaryTag("FZ");
+ public final short RG = makeBinaryTag(SAMTag.RG.name());
+ public final short LB = makeBinaryTag(SAMTag.LB.name());
+ public final short PU = makeBinaryTag(SAMTag.PU.name());
+ public final short PG = makeBinaryTag(SAMTag.PG.name());
+ public final short AS = makeBinaryTag(SAMTag.AS.name());
+ public final short SQ = makeBinaryTag(SAMTag.SQ.name());
+ public final short MQ = makeBinaryTag(SAMTag.MQ.name());
+ public final short NM = makeBinaryTag(SAMTag.NM.name());
+ public final short H0 = makeBinaryTag(SAMTag.H0.name());
+ public final short H1 = makeBinaryTag(SAMTag.H1.name());
+ public final short H2 = makeBinaryTag(SAMTag.H2.name());
+ public final short UQ = makeBinaryTag(SAMTag.UQ.name());
+ public final short PQ = makeBinaryTag(SAMTag.PQ.name());
+ public final short NH = makeBinaryTag(SAMTag.NH.name());
+ public final short IH = makeBinaryTag(SAMTag.IH.name());
+ public final short HI = makeBinaryTag(SAMTag.HI.name());
+ public final short MD = makeBinaryTag(SAMTag.MD.name());
+ public final short CS = makeBinaryTag(SAMTag.CS.name());
+ public final short CQ = makeBinaryTag(SAMTag.CQ.name());
+ public final short CM = makeBinaryTag(SAMTag.CM.name());
+ public final short R2 = makeBinaryTag(SAMTag.R2.name());
+ public final short Q2 = makeBinaryTag(SAMTag.Q2.name());
+ public final short S2 = makeBinaryTag(SAMTag.S2.name());
+ public final short CC = makeBinaryTag(SAMTag.CC.name());
+ public final short CP = makeBinaryTag(SAMTag.CP.name());
+ public final short SM = makeBinaryTag(SAMTag.SM.name());
+ public final short AM = makeBinaryTag(SAMTag.AM.name());
+ public final short MF = makeBinaryTag(SAMTag.MF.name());
+ public final short E2 = makeBinaryTag(SAMTag.E2.name());
+ public final short U2 = makeBinaryTag(SAMTag.U2.name());
+ public final short OQ = makeBinaryTag(SAMTag.OQ.name());
+ public final short FZ = makeBinaryTag(SAMTag.FZ.name());
+ public final short SA = makeBinaryTag(SAMTag.SA.name());
+ public final short MC = makeBinaryTag(SAMTag.MC.name());
private static SAMTagUtil singleton;
diff --git a/src/java/net/sf/samtools/SAMUtils.java b/src/java/net/sf/samtools/SAMUtils.java
index 71a8203..e3a83e0 100644
--- a/src/java/net/sf/samtools/SAMUtils.java
+++ b/src/java/net/sf/samtools/SAMUtils.java
@@ -25,8 +25,16 @@ package net.sf.samtools;
import net.sf.samtools.util.StringUtil;
+import java.io.File;
+import java.io.UnsupportedEncodingException;
+import java.math.BigInteger;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
/**
@@ -466,6 +474,63 @@ public final class SAMUtils
}
+ private static final SAMHeaderRecordComparator<SAMReadGroupRecord> HEADER_RECORD_COMPARATOR =
+ new SAMHeaderRecordComparator<SAMReadGroupRecord>(
+ SAMReadGroupRecord.PLATFORM_UNIT_TAG,
+ SAMReadGroupRecord.LIBRARY_TAG,
+ SAMReadGroupRecord.DATE_RUN_PRODUCED_TAG,
+ SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG,
+ SAMReadGroupRecord.SEQUENCING_CENTER_TAG,
+ SAMReadGroupRecord.PLATFORM_TAG,
+ SAMReadGroupRecord.DESCRIPTION_TAG,
+ SAMReadGroupRecord.READ_GROUP_ID_TAG // We don't actually want to compare with ID but it's suitable
+ // "just in case" since it's the only one that's actually required
+ );
+
+ /**
+ * Calculate a hash code from identifying information in the RG (read group) records in a SAM file's
+ * header. This hash code changes any time read groups are added or removed. Comparing one file's
+ * hash code to another's tells you if the read groups in the BAM files are different.
+ */
+ public static String calculateReadGroupRecordChecksum(final File input) {
+ final String ENCODING = "UTF-8";
+
+ final MessageDigest digest;
+ try {
+ digest = MessageDigest.getInstance("MD5");
+ } catch (final NoSuchAlgorithmException nsae) {
+ throw new Error("No MD5 algorithm was available in a Java JDK? Unheard-of!");
+ }
+
+ // Sort the read group records by their first
+ final SAMFileReader reader = new SAMFileReader(input);
+ final List<SAMReadGroupRecord> sortedRecords = new ArrayList<SAMReadGroupRecord>(reader.getFileHeader().getReadGroups());
+ Collections.sort(sortedRecords, HEADER_RECORD_COMPARATOR);
+
+ for (final SAMReadGroupRecord rgRecord : sortedRecords) {
+ final TreeMap<String, String> sortedAttributes = new TreeMap<String, String>();
+ for (final Map.Entry<String, String> attributeEntry : rgRecord.getAttributes()) {
+ sortedAttributes.put(attributeEntry.getKey(), attributeEntry.getValue());
+ }
+
+ try {
+ for (final Map.Entry<String, String> sortedEntry : sortedAttributes.entrySet()) {
+ if ( ! sortedEntry.getKey().equals(SAMReadGroupRecord.READ_GROUP_ID_TAG)) { // Redundant check, safety first
+ digest.update(sortedEntry.getKey().getBytes(ENCODING));
+ digest.update(sortedEntry.getValue().getBytes(ENCODING));
+ }
+ }
+ } catch (final UnsupportedEncodingException uee) {
+ throw new Error("No " + ENCODING + "!? WTH?");
+ }
+ }
+
+ // Convert to a String and pad to get the full 32 chars.
+ final StringBuilder hashText = new StringBuilder((new BigInteger(1, digest.digest())).toString(16));
+ while (hashText.length() < 32 ) hashText.insert(0, "0");
+
+ return hashText.toString();
+ }
/**
* Chains <code>program</code> in front of the first "head" item in the list of
diff --git a/src/java/net/sf/samtools/SAMValidationError.java b/src/java/net/sf/samtools/SAMValidationError.java
index 0034766..4968477 100644
--- a/src/java/net/sf/samtools/SAMValidationError.java
+++ b/src/java/net/sf/samtools/SAMValidationError.java
@@ -37,7 +37,7 @@ public class SAMValidationError {
public enum Type {
/** quality encodings out of range; appear to be Solexa or Illumina when Phread expected */
INVALID_QUALITY_FORMAT(Severity.WARNING),
-
+
/** proper pair flag set for unpaired read */
INVALID_FLAG_PROPER_PAIR,
@@ -46,7 +46,7 @@ public class SAMValidationError {
/** mate unmapped flag does not match read unmapped flag of mate */
MISMATCH_FLAG_MATE_UNMAPPED,
-
+
/** mate negative strand flag set for unpaired read */
INVALID_FLAG_MATE_NEG_STRAND,
@@ -67,11 +67,11 @@ public class SAMValidationError {
/** supplementary alignment flag set for unmapped read */
INVALID_FLAG_SUPPLEMENTARY_ALIGNMENT,
-
+
/** mapped read flat not set for mapped read */
INVALID_FLAG_READ_UNMAPPED,
- /**
+ /**
* inferred insert size is out of range
* @see SAMRecord#MAX_INSERT_SIZE
*/
@@ -86,10 +86,10 @@ public class SAMValidationError {
/** CIGAR string contains I followed by D, or vice versa */
ADJACENT_INDEL_IN_CIGAR(Severity.WARNING),
- /** mate reference index (MRNM) set for unpaired read */
+ /** mate reference index (MRNM) set for unpaired read */
INVALID_MATE_REF_INDEX,
- /** mate reference index (MRNM) does not match reference index of mate */
+ /** mate reference index (MRNM) does not match reference index of mate */
MISMATCH_MATE_REF_INDEX,
/** reference index not found in sequence dictionary */
@@ -97,31 +97,31 @@ public class SAMValidationError {
/** alignment start is can not be correct */
INVALID_ALIGNMENT_START,
-
+
/** mate alignment does not match alignment start of mate */
MISMATCH_MATE_ALIGNMENT_START,
-
+
/** the record's mate fields do not match the corresponding fields of the mate */
MATE_FIELD_MISMATCH,
-
+
/** the NM tag (nucleotide differences) is incorrect */
INVALID_TAG_NM,
-
+
/** the NM tag (nucleotide differences) is missing */
MISSING_TAG_NM(Severity.WARNING),
-
+
/** the sam/bam file is missing the header */
MISSING_HEADER,
-
+
/** there is no sequence dictionary in the header */
MISSING_SEQUENCE_DICTIONARY,
-
+
/** the header is missing read group information */
MISSING_READ_GROUP,
/** the record is out of order */
RECORD_OUT_OF_ORDER,
-
+
/** A read group ID on a SAMRecord is not found in the header */
READ_GROUP_NOT_FOUND,
@@ -189,7 +189,13 @@ public class SAMValidationError {
MATE_NOT_FOUND,
/** Both mates are marked as first of pair, or both mates are marked as second of pair. */
- MATES_ARE_SAME_END;
+ MATES_ARE_SAME_END,
+
+ /** The Cigar String in the MC Tag does not match the Cigar String for the mate of this read. */
+ MISMATCH_MATE_CIGAR_STRING,
+
+ /** There is a Cigar String (stored in the MC Tag) for a read whose mate is NOT mapped. */
+ MATE_CIGAR_STRING_INVALID_PRESENCE;
public final Severity severity;
@@ -226,7 +232,7 @@ public class SAMValidationError {
this.message = message;
this.readName = readName;
}
-
+
/**
* Construct a SAMValidationError with possibly-known record number.
* @param type
@@ -254,13 +260,13 @@ public class SAMValidationError {
}
return builder.append(message).toString();
}
-
+
public Type getType() { return type; }
public String getMessage() { return message; }
/** may be null */
public String getReadName() { return readName; }
-
+
/** 1-based. -1 if not known. */
public long getRecordNumber() { return recordNumber; }
diff --git a/src/java/net/sf/samtools/util/AbstractAsyncWriter.java b/src/java/net/sf/samtools/util/AbstractAsyncWriter.java
index 8adbc5b..7048815 100644
--- a/src/java/net/sf/samtools/util/AbstractAsyncWriter.java
+++ b/src/java/net/sf/samtools/util/AbstractAsyncWriter.java
@@ -112,6 +112,9 @@ public abstract class AbstractAsyncWriter<T> {
}
catch (Throwable t) {
ex.compareAndSet(null, t);
+ // In case a writer was blocking on a full queue before ex has been set, clear the queue
+ // so that the writer will no longer be blocked so that it can see the exception.
+ queue.clear();
}
}
}
diff --git a/src/java/net/sf/samtools/SAMFileWriter.java b/src/java/net/sf/samtools/util/ProgressLoggerInterface.java
similarity index 71%
copy from src/java/net/sf/samtools/SAMFileWriter.java
copy to src/java/net/sf/samtools/util/ProgressLoggerInterface.java
index 18931ec..0401e55 100644
--- a/src/java/net/sf/samtools/SAMFileWriter.java
+++ b/src/java/net/sf/samtools/util/ProgressLoggerInterface.java
@@ -1,18 +1,20 @@
-/*
+package net.sf.samtools.util;
+
+/**
* The MIT License
- *
- * Copyright (c) 2009 The Broad Institute
- *
+ * <p/>
+ * Copyright (c) 2014 The Broad Institute
+ * <p/>
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
- *
+ * <p/>
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
- *
+ * <p/>
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
@@ -21,19 +23,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package net.sf.samtools;
+
+import net.sf.samtools.SAMRecord;
/**
- * Interface for SAMText and BAM file writers. Clients need not care which they write to,
- * once the object is constructed.
+ * An interface defining the record() methods of the Picard-public ProgressLogger implementation.
*/
-public interface SAMFileWriter {
- void addAlignment(SAMRecord alignment);
+public interface ProgressLoggerInterface {
- SAMFileHeader getFileHeader();
+ public boolean record(final String chrom, final int pos);
+ public boolean record(final SAMRecord rec);
+ public boolean record(final SAMRecord... recs);
- /**
- * Must be called to flush or file will likely be defective.
- */
- void close();
}
diff --git a/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java b/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java
index 0150445..cb3ee99 100644
--- a/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java
+++ b/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java
@@ -56,33 +56,33 @@ public class ValidateSamFileTest {
results = executeValidation(new SAMFileReader(new File(TEST_DATA_DIR, "invalid_queryname_sort_order.sam")), null);
Assert.assertEquals(results.get(SAMValidationError.Type.RECORD_OUT_OF_ORDER.getHistogramString()).getValue(), 5.0);
}
-
+
@Test
public void testVerbose() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
-
+
for (int i=0; i<20; i++) {
samBuilder.addFrag(String.valueOf(i), 1, i, false);
}
for (final SAMRecord record : samBuilder) {
record.setProperPairFlag(true);
}
-
+
final StringWriter results = new StringWriter();
final SamFileValidator validator = new SamFileValidator(new PrintWriter(results), 8000);
validator.setVerbose(true, 10);
validator.validateSamFileVerbose(
- samBuilder.getSamReader(),
+ samBuilder.getSamReader(),
null);
-
+
final int lineCount = results.toString().split("\n").length;
Assert.assertEquals(lineCount, 11);
}
-
+
@Test
public void testUnpairedRecords() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
-
+
for (int i=0; i<6; i++) {
samBuilder.addFrag(String.valueOf(i), i, i, false);
}
@@ -93,9 +93,9 @@ public class ValidateSamFileTest {
records.next().setFirstOfPairFlag(true);
records.next().setSecondOfPairFlag(true);
records.next().setMateReferenceIndex(1);
-
+
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
-
+
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_PROPER_PAIR.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_MATE_UNMAPPED.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_MATE_NEG_STRAND.getHistogramString()).getValue(), 1.0);
@@ -107,7 +107,7 @@ public class ValidateSamFileTest {
@Test
public void testPairedRecords() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
-
+
for (int i=0; i<5; i++) {
samBuilder.addPair(String.valueOf(i), i, i, i+100);
}
@@ -119,9 +119,9 @@ public class ValidateSamFileTest {
records.next().setMateReferenceIndex(records.next().getReferenceIndex() + 1);
records.next().setMateUnmappedFlag(!records.next().getReadUnmappedFlag());
-
+
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
-
+
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_ALIGNMENT_START.getHistogramString()).getValue(), 3.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_MATE_UNMAPPED.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.MISMATCH_FLAG_MATE_NEG_STRAND.getHistogramString()).getValue(), 1.0);
@@ -155,7 +155,7 @@ public class ValidateSamFileTest {
@Test
public void testUnmappedRecords() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
-
+
for (int i=0; i<4; i++) {
samBuilder.addUnmappedFragment(String.valueOf(i));
}
@@ -164,9 +164,9 @@ public class ValidateSamFileTest {
records.next().setNotPrimaryAlignmentFlag(true);
records.next().setMappingQuality(10);
records.next().setCigarString("36M");
-
+
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
-
+
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_NOT_PRIM_ALIGNMENT.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_MAPPING_QUALITY.getHistogramString()).getValue(), 1.0);
}
@@ -174,25 +174,25 @@ public class ValidateSamFileTest {
@Test
public void testMappedRecords() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
-
+
for (int i=0; i<2; i++) {
samBuilder.addFrag(String.valueOf(i), i, i, false);
}
final Iterator<SAMRecord> records = samBuilder.iterator();
records.next().setCigarString("25M3S25M");
records.next().setReferenceName("*");
-
+
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), null);
-
+
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_CIGAR.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_FLAG_READ_UNMAPPED.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.MISSING_TAG_NM.getHistogramString()).getValue(), 1.0);
}
-
+
@Test
public void testNmFlagValidation() throws IOException {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
-
+
for (int i=0; i<3; i++) {
samBuilder.addFrag(String.valueOf(i), i, i+1, false);
}
@@ -206,7 +206,7 @@ public class ValidateSamFileTest {
recordWithInsert.setReadBases(sequence);
recordWithInsert.setCigarString("1D" + Integer.toString(sequence.length-1) + "M1I");
recordWithInsert.setAttribute(ReservedTagConstants.NM, 2);
-
+
final Histogram<String> results = executeValidation(samBuilder.getSamReader(), new ReferenceSequenceFile() {
private int index=0;
public SAMSequenceDictionary getSequenceDictionary() {
@@ -215,7 +215,7 @@ public class ValidateSamFileTest {
public ReferenceSequence nextSequence() {
final byte[] bases = new byte[10000];
- Arrays.fill(bases, (byte) 'A');
+ Arrays.fill(bases, (byte) 'A');
return new ReferenceSequence("foo", index++, bases);
}
@@ -233,11 +233,29 @@ public class ValidateSamFileTest {
throw new UnsupportedOperationException();
}
});
-
+
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_TAG_NM.getHistogramString()).getValue(), 1.0);
Assert.assertEquals(results.get(SAMValidationError.Type.MISSING_TAG_NM.getHistogramString()).getValue(), 1.0);
}
+ @Test(dataProvider = "testMateCigarScenarios")
+ public void testMateCigarScenarios(final String scenario, final String inputFile, final SAMValidationError.Type expectedError)
+ throws Exception {
+ final SAMFileReader reader = new SAMFileReader(new File(TEST_DATA_DIR, inputFile));
+ final Histogram<String> results = executeValidation(reader, null);
+ Assert.assertNotNull(results.get(expectedError.getHistogramString()));
+ Assert.assertEquals(results.get(expectedError.getHistogramString()).getValue(), 1.0);
+ }
+
+
+ @DataProvider(name = "testMateCigarScenarios")
+ public Object[][] testMateCigarScenarios() {
+ return new Object[][] {
+ {"invalid mate cigar", "invalid_mate_cigar_string.sam", SAMValidationError.Type.MISMATCH_MATE_CIGAR_STRING},
+ {"inappropriate mate cigar", "inappropriate_mate_cigar_string.sam", SAMValidationError.Type.MATE_CIGAR_STRING_INVALID_PRESENCE}
+ };
+ }
+
@Test(dataProvider = "testTruncatedScenarios")
public void testTruncated(final String scenario, final String inputFile, final SAMValidationError.Type expectedError)
throws Exception {
@@ -289,7 +307,7 @@ public class ValidateSamFileTest {
final Histogram<String> results = executeValidation(samReader, null);
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_QUALITY_FORMAT.getHistogramString()).getValue(), 1.0);
}
-
+
@Test
public void testCigarOffEndOfReferenceValidation() throws Exception {
final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
@@ -345,7 +363,7 @@ public class ValidateSamFileTest {
SAMFileReader.setDefaultValidationStringency(saveStringency);
}
}
-
+
private Histogram<String> executeValidation(final SAMFileReader samReader, final ReferenceSequenceFile reference) throws IOException {
final File outFile = File.createTempFile("validation", ".txt");
final PrintWriter out = new PrintWriter(outFile);
diff --git a/src/tests/java/net/sf/samtools/BAMFileIndexTest.java b/src/tests/java/net/sf/samtools/BAMFileIndexTest.java
index bce9cb8..5b3086b 100755
--- a/src/tests/java/net/sf/samtools/BAMFileIndexTest.java
+++ b/src/tests/java/net/sf/samtools/BAMFileIndexTest.java
@@ -212,6 +212,9 @@ public class BAMFileIndexTest
Assert.assertNotNull(mate);
Assert.assertEquals(mate.getReadName(), rec.getReadName());
Assert.assertEquals(mate.getReferenceIndex(), rec.getMateReferenceIndex());
+ if (rec.getMateCigarString() != null) {
+ Assert.assertEquals(mate.getCigarString(), rec.getMateCigarString());
+ }
Assert.assertEquals(mate.getAlignmentStart(), rec.getMateAlignmentStart());
Assert.assertFalse(mate.getFirstOfPairFlag() == rec.getFirstOfPairFlag());
}
diff --git a/src/tests/java/net/sf/samtools/SamHeaderRecordComparatorTest.java b/src/tests/java/net/sf/samtools/SamHeaderRecordComparatorTest.java
new file mode 100644
index 0000000..6cb89b8
--- /dev/null
+++ b/src/tests/java/net/sf/samtools/SamHeaderRecordComparatorTest.java
@@ -0,0 +1,72 @@
+package net.sf.samtools;
+
+/**
+ * The MIT License
+ * <p/>
+ * Copyright (c) 2014 The Broad Institute
+ * <p/>
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ * <p/>
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * <p/>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+public class SamHeaderRecordComparatorTest {
+
+ @DataProvider(name="UsualSuspects")
+ public Object[][] createData() {
+ final SAMReadGroupRecord left = new SAMReadGroupRecord("left");
+ left.setPlatformUnit("left.1");
+ left.setLibrary("library");
+
+ final SAMReadGroupRecord right = new SAMReadGroupRecord("right");
+ right.setPlatformUnit("right.1");
+ right.setLibrary("library");
+ right.setDescription("description");
+
+ return new Object[][] {{ left, right }};
+ }
+
+ @Test(dataProvider="UsualSuspects")
+ public void testEqualRecords(final SAMReadGroupRecord left, final SAMReadGroupRecord right) {
+ final SAMHeaderRecordComparator<SAMReadGroupRecord> comparator = new SAMHeaderRecordComparator<SAMReadGroupRecord>(SAMReadGroupRecord.PLATFORM_UNIT_TAG);
+ Assert.assertEquals(0, comparator.compare(left, left)); // see what I did there?
+ }
+
+ @Test(dataProvider="UsualSuspects")
+ public void testUnequalRecords(final SAMReadGroupRecord left, final SAMReadGroupRecord right) {
+ final SAMHeaderRecordComparator<SAMReadGroupRecord> comparator = new SAMHeaderRecordComparator<SAMReadGroupRecord>(SAMReadGroupRecord.PLATFORM_UNIT_TAG);
+ Assert.assertTrue(comparator.compare(left, right) < 0);
+ Assert.assertTrue(comparator.compare(right, left) > 0);
+ }
+
+ @Test(dataProvider="UsualSuspects")
+ public void testNullAttributes(final SAMReadGroupRecord left, final SAMReadGroupRecord right) {
+ final SAMHeaderRecordComparator<SAMReadGroupRecord> comparator = new SAMHeaderRecordComparator<SAMReadGroupRecord>(SAMReadGroupRecord.FLOW_ORDER_TAG);
+ Assert.assertEquals(0, comparator.compare(left, right)); // neither record has this attribute
+ }
+
+ @Test(dataProvider="UsualSuspects")
+ public void testOneNullAttribute(final SAMReadGroupRecord left, final SAMReadGroupRecord right) {
+ final SAMHeaderRecordComparator<SAMReadGroupRecord> comparator = new SAMHeaderRecordComparator<SAMReadGroupRecord>(SAMReadGroupRecord.DESCRIPTION_TAG);
+ Assert.assertTrue(comparator.compare(left, right) < 0);
+ Assert.assertTrue(comparator.compare(right, left) > 0);
+ }
+}
diff --git a/testdata/net/sf/picard/intervallist/IntervalListFromVCFTest.vcf b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTest.vcf
new file mode 100644
index 0000000..39bed22
--- /dev/null
+++ b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTest.vcf
@@ -0,0 +1,53 @@
+##fileformat=VCFv4.1
+##ApplyRecalibration="analysis_type=ApplyRecalibration input_file=[] read_buffer_size=null phone_home=NO_ET gatk_key=/humgen/gsa-hpprojects/GATK/data/gatk_user_keys/gsamembers_broadinstitute.org.key read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null do [...]
+##CombineVariants="analysis_type=CombineVariants input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null use [...]
+##FILTER=<ID=LowQual,Description="Low quality">
+##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -4.1718 <= x < -0.8611">
+##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -727.2136">
+##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -727.2136 <= x < -4.1718">
+##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -25.4561 <= x < 3.2489">
+##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -38672.7015">
+##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -38672.7015 <= x < -25.4561">
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##FORMAT=<ID=TP,Number=1,Type=Integer,Description="Phred score of the genotype combination and phase given that the genotypes are correct">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
+##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
+##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias">
+##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">
+##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model">
+##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out">
+##INFO=<ID=set,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
+##PhaseByTransmission="analysis_type=PhaseByTransmission input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog= [...]
+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list] read_buffer_size=null phone_home=NO_ET gatk_key=/humgen/gsa-hpprojects/GATK/data/gatk_user_keys/gsamembers_broadinstitute.org.key read_filter=[] intervals=[/broad/hptmp/ami/tmp/queueScatterGather/.qlog/CEUTrio.indelcall-sg/temp_020_of_300/scatter.intervals] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_s [...]
+##contig=<ID=1,length=249250621,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
+1 8216712 rs11121115 A G 1540.26 PASS AC=3;AF=0.500;AN=6;BaseQRankSum=0.917;DB;DP=131;Dels=0.00;FS=11.67;HaplotypeScore=3.35;MLEAC=3;MLEAF=0.500;MQ=57.74;MQ0=1;MQRankSum=0.427;QD=11.76;ReadPosRankSum=-2.190e-01;SB=-9.390e+02;VQSLOD=5.53;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0/1:23,28:51:99:681,0,668:127 0/1:16,18:34:99:338,0,244:127 0/1:24,22:46:99:560,0,323:127
+1 17032814 rs2773183 T C 2828.26 PASS AC=3;AF=0.500;AN=6;BaseQRankSum=-3.879e+00;DB;DP=322;Dels=0.00;FS=2.43;HaplotypeScore=15.45;MLEAC=3;MLEAF=0.500;MQ=56.86;MQ0=0;MQRankSum=2.92;QD=8.78;ReadPosRankSum=-1.245e+00;SB=-1.943e+03;VQSLOD=-1.421e+00;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0/1:63,59:122:99:1434,0,1831:127 0/1:53,56:109:99:910,0,871:127 0/1:61,30:91:99:523,0,1257:127
+1 17032818 rs2773183 T C 2828.26 FILTER AC=3;AF=0.500;AN=6;BaseQRankSum=-3.879e+00;DB;DP=322;Dels=0.00;FS=2.43;HaplotypeScore=15.45;MLEAC=3;MLEAF=0.500;MQ=56.86;MQ0=0;MQRankSum=2.92;QD=8.78;ReadPosRankSum=-1.245e+00;SB=-1.943e+03;VQSLOD=-1.421e+00;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0/1:63,59:122:99:1434,0,1831:127 0/1:53,56:109:99:910,0,871:127 0/1:61,30:91:99:523,0,1257:127
+2 1143476 rs4998209 C T 1483.26 PASS AC=2;AF=0.333;AN=6;BaseQRankSum=-4.814e+00;DB;DP=189;Dels=0.00;FS=5.61;HaplotypeScore=0.324;MLEAC=2;MLEAF=0.333;MQ=58.36;MQ0=0;MQRankSum=1.58;QD=12.06;ReadPosRankSum=0.326;SB=-9.320e+02;VQSLOD=6.81;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0|0:66,0:66:99:0,178,2264:127 0|1:33,38:71:99:844,0,1024:127 0|1:26,26:52:99:678,0,719:127
+2 9240279 rs56249990 A G 3978.01 PASS AC=3;AF=0.500;AN=6;BaseQRankSum=1.70;DB;DP=213;Dels=0.00;FS=7.83;HaplotypeScore=1.19;MLEAC=3;MLEAF=0.500;MQ=59.40;MQ0=0;MQRankSum=0.143;QD=27.25;ReadPosRankSum=-9.700e-02;SB=-1.991e+03;VQSLOD=9.14;culprit=FS GT:AD:DP:GQ:PL:TP 0|1:33,42:75:99:1400,0,1031:127 0|0:67,0:67:99:0,178,2277:127 1|1:0,71:71:99:2578,199,0:127
diff --git a/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestComp.interval_list b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestComp.interval_list
new file mode 100644
index 0000000..aad2820
--- /dev/null
+++ b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestComp.interval_list
@@ -0,0 +1,7 @@
+ at SQ SN:1 LN:249250621
+ at SQ SN:2 LN:243199373
+ at SQ SN:3 LN:198022430
+1 8216712 8216712 + rs11121115
+1 17032814 17032814 + rs2773183
+2 1143476 1143476 + rs4998209
+2 9240279 9240279 + rs56249990
diff --git a/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestCompInverse.interval_list b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestCompInverse.interval_list
new file mode 100644
index 0000000..9bcca99
--- /dev/null
+++ b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestCompInverse.interval_list
@@ -0,0 +1,10 @@
+ at SQ SN:1 LN:249250621
+ at SQ SN:2 LN:243199373
+ at SQ SN:3 LN:198022430
+1 1 8216711 + interval-1
+1 8216713 17032813 + interval-2
+1 17032815 249250621 + interval-3
+2 1 1143475 + interval-4
+2 1143477 9240278 + interval-5
+2 9240280 243199373 + interval-6
+3 1 198022430 + interval-7
diff --git a/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestCompInverseManual.interval_list b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestCompInverseManual.interval_list
new file mode 100644
index 0000000..de85ca5
--- /dev/null
+++ b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestCompInverseManual.interval_list
@@ -0,0 +1,10 @@
+ at SQ SN:1 LN:249250621
+ at SQ SN:2 LN:243199373
+ at SQ SN:3 LN:198022430
+1 1 8216711 + interval-1
+1 8216715 17032813 + interval-2
+1 17032815 249250620 + interval-3
+2 3 1143475 + interval-4
+2 1143477 9240278 + interval-5
+2 9240280 243199373 + interval-6
+3 1 198022430 + interval-7
diff --git a/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestManual.vcf b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestManual.vcf
new file mode 100644
index 0000000..f9ec538
--- /dev/null
+++ b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestManual.vcf
@@ -0,0 +1,57 @@
+##fileformat=VCFv4.1
+##ApplyRecalibration="analysis_type=ApplyRecalibration input_file=[] read_buffer_size=null phone_home=NO_ET gatk_key=/humgen/gsa-hpprojects/GATK/data/gatk_user_keys/gsamembers_broadinstitute.org.key read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null do [...]
+##CombineVariants="analysis_type=CombineVariants input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog=null use [...]
+##FILTER=<ID=LowQual,Description="Low quality">
+##FILTER=<ID=VQSRTrancheINDEL99.00to99.90,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -4.1718 <= x < -0.8611">
+##FILTER=<ID=VQSRTrancheINDEL99.90to100.00+,Description="Truth sensitivity tranche level for INDEL model at VQS Lod < -727.2136">
+##FILTER=<ID=VQSRTrancheINDEL99.90to100.00,Description="Truth sensitivity tranche level for INDEL model at VQS Lod: -727.2136 <= x < -4.1718">
+##FILTER=<ID=VQSRTrancheSNP99.00to99.90,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -25.4561 <= x < 3.2489">
+##FILTER=<ID=VQSRTrancheSNP99.90to100.00+,Description="Truth sensitivity tranche level for SNP model at VQS Lod < -38672.7015">
+##FILTER=<ID=VQSRTrancheSNP99.90to100.00,Description="Truth sensitivity tranche level for SNP model at VQS Lod: -38672.7015 <= x < -25.4561">
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth (reads with MQ=255 or with bad mates are filtered)">
+##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification">
+##FORMAT=<ID=TP,Number=1,Type=Integer,Description="Phred score of the genotype combination and phase given that the genotypes are correct">
+##INFO=<ID=AC,Number=A,Type=Integer,Description="Allele count in genotypes, for each ALT allele, in the same order as listed">
+##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency, for each ALT allele, in the same order as listed">
+##INFO=<ID=AN,Number=1,Type=Integer,Description="Total number of alleles in called genotypes">
+##INFO=<ID=BaseQRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt Vs. Ref base qualities">
+##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP Membership">
+##INFO=<ID=DP,Number=1,Type=Integer,Description="Approximate read depth; some reads may have been filtered">
+##INFO=<ID=DS,Number=0,Type=Flag,Description="Were any of the samples downsampled?">
+##INFO=<ID=Dels,Number=1,Type=Float,Description="Fraction of Reads Containing Spanning Deletions">
+##INFO=<ID=END,Number=1,Type=Integer,Description="Stop position of the interval">
+##INFO=<ID=FS,Number=1,Type=Float,Description="Phred-scaled p-value using Fisher's exact test to detect strand bias">
+##INFO=<ID=HaplotypeScore,Number=1,Type=Float,Description="Consistency of the site with at most two segregating haplotypes">
+##INFO=<ID=InbreedingCoeff,Number=1,Type=Float,Description="Inbreeding coefficient as estimated from the genotype likelihoods per-sample when compared against the Hardy-Weinberg expectation">
+##INFO=<ID=MLEAC,Number=A,Type=Integer,Description="Maximum likelihood expectation (MLE) for the allele counts (not necessarily the same as the AC), for each ALT allele, in the same order as listed">
+##INFO=<ID=MLEAF,Number=A,Type=Float,Description="Maximum likelihood expectation (MLE) for the allele frequency (not necessarily the same as the AF), for each ALT allele, in the same order as listed">
+##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
+##INFO=<ID=MQ0,Number=1,Type=Integer,Description="Total Mapping Quality Zero Reads">
+##INFO=<ID=MQRankSum,Number=1,Type=Float,Description="Z-score From Wilcoxon rank sum test of Alt vs. Ref read mapping qualities">
+##INFO=<ID=QD,Number=1,Type=Float,Description="Variant Confidence/Quality by Depth">
+##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
+##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
+##INFO=<ID=ReadPosRankSum,Number=1,Type=Float,Description="Z-score from Wilcoxon rank sum test of Alt vs. Ref read position bias">
+##INFO=<ID=SB,Number=1,Type=Float,Description="Strand Bias">
+##INFO=<ID=STR,Number=0,Type=Flag,Description="Variant is a short tandem repeat">
+##INFO=<ID=VQSLOD,Number=1,Type=Float,Description="Log odds ratio of being a true variant versus being false under the trained gaussian mixture model">
+##INFO=<ID=culprit,Number=1,Type=String,Description="The annotation which was the worst performing in the Gaussian mixture model, likely the reason why the variant was filtered out">
+##INFO=<ID=set,Number=1,Type=String,Description="Source VCF for the merged record in CombineVariants">
+##PhaseByTransmission="analysis_type=PhaseByTransmission input_file=[] read_buffer_size=null phone_home=STANDARD gatk_key=null read_filter=[] intervals=null excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_sequence=/humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta nonDeterministicRandomSeed=false downsampling_type=BY_SAMPLE downsample_to_fraction=null downsample_to_coverage=1000 baq=OFF baqGapOpenPenalty=40.0 performanceLog= [...]
+##UnifiedGenotyper="analysis_type=UnifiedGenotyper input_file=[/humgen/gsa-hpprojects/NA12878Collection/bams/CEUTrio.HiSeq.WGS.b37.list] read_buffer_size=null phone_home=NO_ET gatk_key=/humgen/gsa-hpprojects/GATK/data/gatk_user_keys/gsamembers_broadinstitute.org.key read_filter=[] intervals=[/broad/hptmp/ami/tmp/queueScatterGather/.qlog/CEUTrio.indelcall-sg/temp_020_of_300/scatter.intervals] excludeIntervals=null interval_set_rule=UNION interval_merging=ALL interval_padding=0 reference_s [...]
+##contig=<ID=1,length=249250621,assembly=b37>
+##contig=<ID=2,length=243199373,assembly=b37>
+##contig=<ID=3,length=198022430,assembly=b37>
+#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA12878 NA12891 NA12892
+1 8216712 rs11121115 A G 1540.26 PASS AC=3;AF=0.500;AN=6;BaseQRankSum=0.917;DB;DP=131;Dels=0.00;FS=11.67;HaplotypeScore=3.35;MLEAC=3;MLEAF=0.500;MQ=57.74;MQ0=1;MQRankSum=0.427;QD=11.76;ReadPosRankSum=-2.190e-01;SB=-9.390e+02;VQSLOD=5.53;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0/1:23,28:51:99:681,0,668:127 0/1:16,18:34:99:338,0,244:127 0/1:24,22:46:99:560,0,323:127
+1 8216713 yossi-1 A G 1540.26 PASS AC=3;AF=0.500;AN=6;BaseQRankSum=0.917;DB;DP=131;Dels=0.00;FS=11.67;HaplotypeScore=3.35;MLEAC=3;MLEAF=0.500;MQ=57.74;MQ0=1;MQRankSum=0.427;QD=11.76;ReadPosRankSum=-2.190e-01;SB=-9.390e+02;VQSLOD=5.53;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0/1:23,28:51:99:681,0,668:127 0/1:16,18:34:99:338,0,244:127 0/1:24,22:46:99:560,0,323:127
+1 8216714 yossi-2 A G 1540.26 PASS AC=3;AF=0.500;AN=6;BaseQRankSum=0.917;DB;DP=131;Dels=0.00;FS=11.67;HaplotypeScore=3.35;MLEAC=3;MLEAF=0.500;MQ=57.74;MQ0=1;MQRankSum=0.427;QD=11.76;ReadPosRankSum=-2.190e-01;SB=-9.390e+02;VQSLOD=5.53;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0/1:23,28:51:99:681,0,668:127 0/1:16,18:34:99:338,0,244:127 0/1:24,22:46:99:560,0,323:127
+1 17032814 rs2773183 T C 2828.26 PASS AC=3;AF=0.500;AN=6;BaseQRankSum=-3.879e+00;DB;DP=322;Dels=0.00;FS=2.43;HaplotypeScore=15.45;MLEAC=3;MLEAF=0.500;MQ=56.86;MQ0=0;MQRankSum=2.92;QD=8.78;ReadPosRankSum=-1.245e+00;SB=-1.943e+03;VQSLOD=-1.421e+00;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0/1:63,59:122:99:1434,0,1831:127 0/1:53,56:109:99:910,0,871:127 0/1:61,30:91:99:523,0,1257:127
+1 249250621 yossi-4 T C 2828.26 PASS AC=3;AF=0.500;AN=6;BaseQRankSum=-3.879e+00;DB;DP=322;Dels=0.00;FS=2.43;HaplotypeScore=15.45;MLEAC=3;MLEAF=0.500;MQ=56.86;MQ0=0;MQRankSum=2.92;QD=8.78;ReadPosRankSum=-1.245e+00;SB=-1.943e+03;VQSLOD=-1.421e+00;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0/1:63,59:122:99:1434,0,1831:127 0/1:53,56:109:99:910,0,871:127 0/1:61,30:91:99:523,0,1257:127
+2 1 yossi-5 C T 1483.26 PASS AC=2;AF=0.333;AN=6;BaseQRankSum=-4.814e+00;DB;DP=189;Dels=0.00;FS=5.61;HaplotypeScore=0.324;MLEAC=2;MLEAF=0.333;MQ=58.36;MQ0=0;MQRankSum=1.58;QD=12.06;ReadPosRankSum=0.326;SB=-9.320e+02;VQSLOD=6.81;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0|0:66,0:66:99:0,178,2264:127 0|1:33,38:71:99:844,0,1024:127 0|1:26,26:52:99:678,0,719:127
+2 2 . C T 1483.26 PASS AC=2;AF=0.333;AN=6;BaseQRankSum=-4.814e+00;DB;DP=189;Dels=0.00;FS=5.61;HaplotypeScore=0.324;MLEAC=2;MLEAF=0.333;MQ=58.36;MQ0=0;MQRankSum=1.58;QD=12.06;ReadPosRankSum=0.326;SB=-9.320e+02;VQSLOD=6.81;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0|0:66,0:66:99:0,178,2264:127 0|1:33,38:71:99:844,0,1024:127 0|1:26,26:52:99:678,0,719:127
+2 1143476 rs4998209 C T 1483.26 PASS AC=2;AF=0.333;AN=6;BaseQRankSum=-4.814e+00;DB;DP=189;Dels=0.00;FS=5.61;HaplotypeScore=0.324;MLEAC=2;MLEAF=0.333;MQ=58.36;MQ0=0;MQRankSum=1.58;QD=12.06;ReadPosRankSum=0.326;SB=-9.320e+02;VQSLOD=6.81;culprit=HaplotypeScore GT:AD:DP:GQ:PL:TP 0|0:66,0:66:99:0,178,2264:127 0|1:33,38:71:99:844,0,1024:127 0|1:26,26:52:99:678,0,719:127
+2 9240279 rs56249990 A G 3978.01 PASS AC=3;AF=0.500;AN=6;BaseQRankSum=1.70;DB;DP=213;Dels=0.00;FS=7.83;HaplotypeScore=1.19;MLEAC=3;MLEAF=0.500;MQ=59.40;MQ0=0;MQRankSum=0.143;QD=27.25;ReadPosRankSum=-9.700e-02;SB=-1.991e+03;VQSLOD=9.14;culprit=FS GT:AD:DP:GQ:PL:TP 0|1:33,42:75:99:1400,0,1031:127 0|0:67,0:67:99:0,178,2277:127 1|1:0,71:71:99:2578,199,0:127
diff --git a/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestManualComp.interval_list b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestManualComp.interval_list
new file mode 100644
index 0000000..6c6b5e0
--- /dev/null
+++ b/testdata/net/sf/picard/intervallist/IntervalListFromVCFTestManualComp.interval_list
@@ -0,0 +1,12 @@
+ at SQ SN:1 LN:249250621
+ at SQ SN:2 LN:243199373
+ at SQ SN:3 LN:198022430
+1 8216712 8216712 + rs11121115
+1 8216713 8216713 + yossi-1
+1 8216714 8216714 + yossi-2
+1 17032814 17032814 + rs2773183
+1 249250621 249250621 + yossi-4
+2 1 1 + yossi-5
+2 2 2 + interval-1
+2 1143476 1143476 + rs4998209
+2 9240279 9240279 + rs56249990
diff --git a/testdata/net/sf/picard/sam/ValidateSamFileTest/inappropriate_mate_cigar_string.sam b/testdata/net/sf/picard/sam/ValidateSamFileTest/inappropriate_mate_cigar_string.sam
new file mode 100644
index 0000000..9500e48
--- /dev/null
+++ b/testdata/net/sf/picard/sam/ValidateSamFileTest/inappropriate_mate_cigar_string.sam
@@ -0,0 +1,13 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:chr1 LN:101 UR:merger.fasta M5:bd01f7e11515bb6beda8f7257902aa67
+ at SQ SN:chr2 LN:101 UR:merger.fasta M5:31c33e2155b3de5e2554b693c475b310
+ at SQ SN:chr3 LN:101 UR:merger.fasta M5:631593c6dd2048ae88dcce2bd505d295
+ at SQ SN:chr4 LN:101 UR:merger.fasta M5:c60cb92f1ee5b78053c92bdbfa19abf1
+ at SQ SN:chr5 LN:101 UR:merger.fasta M5:07ebc213c7611db0eacbb1590c3e9bda
+ at SQ SN:chr6 LN:101 UR:merger.fasta M5:7be2f5e7ee39e60a6c3b5b6a41178c6d
+ at SQ SN:chr7 LN:404 UR:merger.fasta M5:da488fc432cdaf2c20c96da473a7b630
+ at SQ SN:chr8 LN:202 UR:merger.fasta M5:d339678efce576d5546e88b49a487b63
+ at RG ID:0 SM:Hi,Mom!
+ at PG ID:0 PN:myAligner VN:1.0 CL:align!
+pair_read 73 chr7 3 9 6M = 3 0 CAACAG )'.*.+ MC:Z:* PG:Z:0 RG:Z:0 NM:i:4 UQ:i:33
+pair_read 133 chr7 3 0 * = 3 0 NCGCGG &/1544 MC:Z:6M PG:Z:0 RG:Z:0
diff --git a/testdata/net/sf/picard/sam/ValidateSamFileTest/invalid_mate_cigar_string.sam b/testdata/net/sf/picard/sam/ValidateSamFileTest/invalid_mate_cigar_string.sam
new file mode 100644
index 0000000..b542d77
--- /dev/null
+++ b/testdata/net/sf/picard/sam/ValidateSamFileTest/invalid_mate_cigar_string.sam
@@ -0,0 +1,19 @@
+ at HD VN:1.4 SO:coordinate
+ at SQ SN:chr1 LN:101 UR:merger.fasta M5:bd01f7e11515bb6beda8f7257902aa67
+ at SQ SN:chr2 LN:101 UR:merger.fasta M5:31c33e2155b3de5e2554b693c475b310
+ at SQ SN:chr3 LN:101 UR:merger.fasta M5:631593c6dd2048ae88dcce2bd505d295
+ at SQ SN:chr4 LN:101 UR:merger.fasta M5:c60cb92f1ee5b78053c92bdbfa19abf1
+ at SQ SN:chr5 LN:101 UR:merger.fasta M5:07ebc213c7611db0eacbb1590c3e9bda
+ at SQ SN:chr6 LN:101 UR:merger.fasta M5:7be2f5e7ee39e60a6c3b5b6a41178c6d
+ at SQ SN:chr7 LN:404 UR:merger.fasta M5:da488fc432cdaf2c20c96da473a7b630
+ at SQ SN:chr8 LN:202 UR:merger.fasta M5:d339678efce576d5546e88b49a487b63
+ at RG ID:0 SM:Hi,Mom!
+ at PG ID:0 PN:myAligner VN:1.0 CL:align!
+pair_both_hit 65 chr7 1 100 101M = 120 120 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& MC:Z:99M PG:Z:0 RG:Z:0 NM:i:21 MQ:i:100 UQ:i:144
+pair_both_multihit 321 chr7 1 100 101M = 120 120 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& MC:Z:101M PG:Z:0 RG:Z:0 HI:i:0 NM:i:21 MQ:i:100 UQ:i:144
+pair_both_multihit 321 chr7 10 100 101M = 130 121 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& MC:Z:101M PG:Z:0 RG:Z:0 HI:i:1 NM:i:83 MQ:i:100 UQ:i:865
+pair_both_hit 129 chr7 120 100 101M = 1 -120 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 MC:Z:101M PG:Z:0 RG:Z:0 NM:i:73 MQ:i:100 UQ:i:944
+pair_both_multihit 385 chr7 120 100 101M = 1 -120 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 MC:Z:101M PG:Z:0 RG:Z:0 HI:i:0 NM:i:73 MQ:i:100 UQ:i:944
+pair_both_multihit 385 chr7 130 100 101M = 10 -121 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 MC:Z:101M PG:Z:0 RG:Z:0 HI:i:1 NM:i:84 MQ:i:100 UQ:i:1072
+pair_both_multihit 65 chr8 1 100 101M = 101 101 CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN )'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/& MC:Z:101M PG:Z:0 RG:Z:0 HI:i:2 NM:i:86 MQ:i:100 UQ:i:940
+pair_both_multihit 129 chr8 101 100 101M = 1 -101 NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA &/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1 MC:Z:101M PG:Z:0 RG:Z:0 HI:i:2 NM:i:81 MQ:i:100 UQ:i:1042
diff --git a/testdata/net/sf/picard/sam/mate_cigar_and_no_oqs.sam b/testdata/net/sf/picard/sam/mate_cigar_and_no_oqs.sam
new file mode 100755
index 0000000..efe2be2
--- /dev/null
+++ b/testdata/net/sf/picard/sam/mate_cigar_and_no_oqs.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.0 SO:queryname
+ at SQ SN:REF LN:1000
+A 99 REF 100 50 10M = 200 100 CAACAGAAGC )'.*.+2,)) MC:Z:5M1I4M
+A 147 REF 200 50 5M1I4M = 100 -100 CAACAGAAGC )'.*.+2,)) MC:Z:10M
diff --git a/testdata/net/sf/picard/sam/mate_cigar_and_oqs.sam b/testdata/net/sf/picard/sam/mate_cigar_and_oqs.sam
new file mode 100755
index 0000000..3782ad7
--- /dev/null
+++ b/testdata/net/sf/picard/sam/mate_cigar_and_oqs.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.0 SO:queryname
+ at SQ SN:REF LN:1000
+A 99 REF 100 50 10M = 200 100 CAACAGAAGC )'.*.+2,)) MC:Z:5M1I4M OQ:Z:IIIIIIIIII
+A 147 REF 200 50 5M1I4M = 100 -100 CAACAGAAGC )'.*.+2,)) MC:Z:10M OQ:Z:IIIIIIIIII
diff --git a/testdata/net/sf/picard/sam/no_mate_cigar_and_no_oqs.sam b/testdata/net/sf/picard/sam/no_mate_cigar_and_no_oqs.sam
new file mode 100755
index 0000000..39211cc
--- /dev/null
+++ b/testdata/net/sf/picard/sam/no_mate_cigar_and_no_oqs.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.0 SO:queryname
+ at SQ SN:REF LN:1000
+A 99 REF 100 50 10M = 200 100 CAACAGAAGC )'.*.+2,))
+A 147 REF 200 50 5M1I4M = 100 -100 CAACAGAAGC )'.*.+2,))
diff --git a/testdata/net/sf/picard/sam/no_mate_cigar_and_oqs.sam b/testdata/net/sf/picard/sam/no_mate_cigar_and_oqs.sam
new file mode 100755
index 0000000..55d5533
--- /dev/null
+++ b/testdata/net/sf/picard/sam/no_mate_cigar_and_oqs.sam
@@ -0,0 +1,4 @@
+ at HD VN:1.0 SO:queryname
+ at SQ SN:REF LN:1000
+A 99 REF 100 50 10M = 200 100 CAACAGAAGC )'.*.+2,)) OQ:Z:IIIIIIIIII
+A 147 REF 200 50 5M1I4M = 100 -100 CAACAGAAGC )'.*.+2,)) OQ:Z:IIIIIIIIII
diff --git a/testdata/net/sf/picard/sam/no_mates_and_no_oqs.sam b/testdata/net/sf/picard/sam/no_mates_and_no_oqs.sam
new file mode 100755
index 0000000..9de670f
--- /dev/null
+++ b/testdata/net/sf/picard/sam/no_mates_and_no_oqs.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.0 SO:queryname
+A 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,))
+B 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,))
+C 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,))
+D 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,))
+E 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,))
diff --git a/testdata/net/sf/picard/sam/no_mates_and_no_oqs_in_first_four_records.sam b/testdata/net/sf/picard/sam/no_mates_and_no_oqs_in_first_four_records.sam
new file mode 100755
index 0000000..c1eb683
--- /dev/null
+++ b/testdata/net/sf/picard/sam/no_mates_and_no_oqs_in_first_four_records.sam
@@ -0,0 +1,7 @@
+ at HD VN:1.0 SO:queryname
+A 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,))
+B 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,))
+C 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,))
+D 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,))
+E 79 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,)) OQ:Z:IIIIIIIIII
+E 143 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,)) OQ:Z:IIIIIIIIII
diff --git a/testdata/net/sf/picard/sam/no_mates_and_oqs.sam b/testdata/net/sf/picard/sam/no_mates_and_oqs.sam
new file mode 100755
index 0000000..345c3c2
--- /dev/null
+++ b/testdata/net/sf/picard/sam/no_mates_and_oqs.sam
@@ -0,0 +1,6 @@
+ at HD VN:1.0 SO:queryname
+A 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,)) OQ:Z:IIIIIIIIII
+B 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,)) OQ:Z:IIIIIIIIII
+C 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,)) OQ:Z:IIIIIIIIII
+D 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,)) OQ:Z:IIIIIIIIII
+E 4 * 0 0 * * 0 0 CAACAGAAGC )'.*.+2,)) OQ:Z:IIIIIIIIII
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/picard-tools.git
More information about the debian-med-commit
mailing list