[med-svn] [picard-tools] 01/04: Imported Upstream version 1.106
Charles Plessy
plessy at moszumanska.debian.org
Sat Mar 22 02:05:04 UTC 2014
This is an automated email from the git hooks/post-receive script.
plessy pushed a commit to branch upstream
in repository picard-tools.
commit f1b15727a0124693c80646a16d41b88e1a9a2f28
Author: Charles Plessy <plessy at debian.org>
Date: Sat Mar 22 10:37:13 2014 +0900
Imported Upstream version 1.106
---
build.xml | 2 +-
src/java/net/sf/picard/pedigree/PedFile.java | 76 +++-
src/java/net/sf/picard/pedigree/PedTrio.java | 45 ---
.../reference/ReferenceSequenceFileFactory.java | 2 +
.../net/sf/picard/sam/AbstractAlignmentMerger.java | 71 ++--
src/java/net/sf/picard/sam/CleanSam.java | 6 +-
src/java/net/sf/picard/sam/HitsForInsert.java | 5 +
src/java/net/sf/picard/sam/MarkDuplicates.java | 4 +-
.../sf/picard/sam/MultiHitAlignedReadIterator.java | 54 ++-
src/java/net/sf/picard/sam/SamAlignmentMerger.java | 5 +-
src/java/net/sf/picard/sam/SamPairUtil.java | 15 +
src/java/net/sf/picard/util/ProgressLogger.java | 17 +-
src/java/net/sf/picard/util/SamLocusIterator.java | 5 +-
.../sf/picard/util/SamRecordIntervalIterator.java | 188 ---------
.../util/SamRecordIntervalIteratorFactory.java | 13 +-
src/java/net/sf/samtools/AbstractBAMFileIndex.java | 114 +++---
src/java/net/sf/samtools/BAMFileReader.java | 419 ++++++++++++++-------
src/java/net/sf/samtools/CachingBAMFileIndex.java | 34 +-
src/java/net/sf/samtools/Chunk.java | 50 ++-
.../net/sf/samtools/DiskBasedBAMFileIndex.java | 14 +-
src/java/net/sf/samtools/SAMFileHeader.java | 5 +-
src/java/net/sf/samtools/SAMFileReader.java | 224 ++++++++++-
src/java/net/sf/samtools/SAMFileSpan.java | 48 +--
src/java/net/sf/samtools/SAMTextReader.java | 5 +
.../variant/variantcontext/Allele.java | 17 +-
.../variant/variantcontext/VariantContext.java | 25 +-
.../variantcontext/VariantContextBuilder.java | 17 +-
.../variantcontext/writer/BCF2FieldEncoder.java | 16 +-
.../variant/vcf/AbstractVCFCodec.java | 26 +-
src/tests/java/net/sf/picard/sam/CleanSamTest.java | 78 ++++
.../net/sf/picard/sam/MergeBamAlignmentTest.java | 21 +-
.../java/net/sf/picard/vcf/MergeVcfsTest.java | 12 +-
.../java/net/sf/samtools/BAMFileIndexTest.java | 96 ++++-
.../variantcontext/writer/VCFWriterUnitTest.java | 13 +-
.../sf/picard/sam/CleanSam/fits_with_deletion.sam | 3 +
.../sam/CleanSam/long_trailing_insertion.sam | 3 +
.../picard/sam/CleanSam/overhang_with_deletion.sam | 3 +
.../net/sf/picard/sam/CleanSam/simple_fits.sam | 3 +
.../net/sf/picard/sam/CleanSam/simple_overhang.sam | 3 +
.../sf/picard/sam/CleanSam/trailing_insertion.sam | 3 +
40 files changed, 1113 insertions(+), 647 deletions(-)
diff --git a/build.xml b/build.xml
index a58ab94..82e6061 100755
--- a/build.xml
+++ b/build.xml
@@ -43,7 +43,7 @@
<!-- Get SVN revision, if available, otherwise leave it blank. -->
<exec executable="svnversion" outputproperty="repository.revision" failifexecutionfails="false"/>
<property name="repository.revision" value=""/>
- <property name="sam-version" value="1.105"/>
+ <property name="sam-version" value="1.106"/>
<property name="picard-version" value="${sam-version}"/>
<property name="tribble-version" value="${sam-version}"/>
<property name="variant-version" value="${sam-version}"/>
diff --git a/src/java/net/sf/picard/pedigree/PedFile.java b/src/java/net/sf/picard/pedigree/PedFile.java
index 1a429d6..648ca20 100644
--- a/src/java/net/sf/picard/pedigree/PedFile.java
+++ b/src/java/net/sf/picard/pedigree/PedFile.java
@@ -18,9 +18,21 @@ import java.util.regex.Pattern;
*
* Stores the information in memory as a map of individualId -> Pedigree information for that individual
*/
-public class PedFile extends TreeMap<String,PedTrio> {
+public class PedFile extends TreeMap<String, PedFile.PedTrio> {
private static final Log log = Log.getInstance(PedFile.class);
static final Pattern WHITESPACE = Pattern.compile("\\s+");
+ static final Pattern TAB = Pattern.compile("\\t");
+ private final Pattern delimiterPattern;
+ private final String delimiterString; // A textual representation of the delimiter, for output purposes
+
+ // These two are really for PedTrio, but they can't be static in there and need to be accessed outside of PedFile
+ public static final Number NO_PHENO = new Integer(-9);
+ public static final Sex UNKNOWN_SEX = Sex.Unknown;
+
+ public PedFile(final boolean isTabMode) {
+ delimiterPattern = isTabMode ? TAB : WHITESPACE;
+ delimiterString = isTabMode ? "tabs" : "whitespace";
+ }
/** Adds a trio to the PedFile keyed by the individual id. */
public void add(final PedTrio trio) {
@@ -52,7 +64,7 @@ public class PedFile extends TreeMap<String,PedTrio> {
out.close();
}
- catch (IOException ioe) {
+ catch (final IOException ioe) {
throw new RuntimeIOException("IOException while writing to file " + file.getAbsolutePath(), ioe);
}
}
@@ -60,28 +72,28 @@ public class PedFile extends TreeMap<String,PedTrio> {
/**
* Attempts to read a pedigree file into memory.
*/
- public static PedFile fromFile(final File file) {
- final PedFile pedfile = new PedFile();
+ public static PedFile fromFile(final File file, final boolean isTabMode) {
+ final PedFile pedFile = new PedFile(isTabMode);
IoUtil.assertFileIsReadable(file);
for (final String line : IoUtil.readLines(file)) {
- final String[] fields = WHITESPACE.split(line);
+ final String[] fields = pedFile.delimiterPattern.split(line);
if (fields.length != 6) {
log.error("Ped file line contained invalid number of fields, skipping: " + line);
continue;
}
- final PedTrio trio = new PedTrio(fields[0],
- fields[1],
- fields[2],
- fields[3],
- Sex.fromCode(Integer.parseInt(fields[4])),
- fields[5].contains(".") ? Double.parseDouble(fields[5]) : Integer.parseInt(fields[5])
- );
- pedfile.add(trio);
+ final PedTrio trio = pedFile.new PedTrio(fields[0],
+ fields[1],
+ fields[2],
+ fields[3],
+ Sex.fromCode(Integer.parseInt(fields[4])),
+ fields[5].contains(".") ? Double.parseDouble(fields[5]) : Integer.parseInt(fields[5])
+ );
+ pedFile.add(trio);
}
- return pedfile;
+ return pedFile;
}
/**
@@ -96,4 +108,40 @@ public class PedFile extends TreeMap<String,PedTrio> {
return this;
}
+
+ public class PedTrio {
+ private final String familyId;
+ private final String individualId;
+ private final String paternalId;
+ private final String maternalId;
+ private final Sex sex;
+ private final Number phenotype;
+
+ /** Constructs a TRIO that cannot be modified after the fact. */
+ public PedTrio(final String familyId, final String individualId, final String paternalId, final String maternalId, final Sex sex, final Number phenotype) {
+ if (delimiterPattern.split(familyId).length != 1) throw new IllegalArgumentException("FamilyID cannot contain " + delimiterString + ": [" + familyId + "]");
+ if (delimiterPattern.split(individualId).length != 1) throw new IllegalArgumentException("IndividualID cannot contain " + delimiterString + ": [" + individualId + "]");
+ if (delimiterPattern.split(paternalId).length != 1) throw new IllegalArgumentException("PaternalID cannot contain " + delimiterString + ": [" + paternalId + "]");
+ if (delimiterPattern.split(maternalId).length != 1) throw new IllegalArgumentException("MaternalID cannot contain " + delimiterString + ": [" + maternalId + "]");
+
+ this.familyId = familyId;
+ this.individualId = individualId;
+ this.paternalId = paternalId;
+ this.maternalId = maternalId;
+ this.sex = sex;
+ this.phenotype = phenotype;
+ }
+
+ /** True if this record has paternal and maternal ids, otherwise false. */
+ public boolean hasBothParents() {
+ return this.paternalId != null && this.maternalId != null;
+ }
+
+ public String getFamilyId() { return familyId; }
+ public String getIndividualId() { return individualId; }
+ public String getPaternalId() { return paternalId; }
+ public String getMaternalId() { return maternalId; }
+ public Sex getSex() { return sex; }
+ public Number getPhenotype() { return phenotype; }
+ }
}
diff --git a/src/java/net/sf/picard/pedigree/PedTrio.java b/src/java/net/sf/picard/pedigree/PedTrio.java
deleted file mode 100644
index 8288625..0000000
--- a/src/java/net/sf/picard/pedigree/PedTrio.java
+++ /dev/null
@@ -1,45 +0,0 @@
-package net.sf.picard.pedigree;
-
-/**
- * Represents a single trio within a ped file.
- *
- * @author Tim Fennell
- */
-public class PedTrio {
- public static final Number NO_PHENO = new Integer(-9);
- public static final Sex UNKNOWN_SEX = Sex.Unknown;
-
- private final String familyId;
- private final String individualId;
- private final String paternalId;
- private final String maternalId;
- private final Sex sex;
- private final Number phenotype;
-
- /** Constructs a TRIO that cannot be modified after the fact. */
- public PedTrio(final String familyId, final String individualId, final String paternalId, final String maternalId, final Sex sex, final Number phenotype) {
- if (PedFile.WHITESPACE.split(familyId).length != 1) throw new IllegalArgumentException("FamilyID cannot contain whitespace: [" + familyId + "]");
- if (PedFile.WHITESPACE.split(individualId).length != 1) throw new IllegalArgumentException("IndividualID cannot contain whitespace: [" + individualId + "]");
- if (PedFile.WHITESPACE.split(paternalId).length != 1) throw new IllegalArgumentException("PaternalID cannot contain whitespace: [" + paternalId + "]");
- if (PedFile.WHITESPACE.split(maternalId).length != 1) throw new IllegalArgumentException("MaternalID cannot contain whitespace: [" + maternalId + "]");
-
- this.familyId = familyId;
- this.individualId = individualId;
- this.paternalId = paternalId;
- this.maternalId = maternalId;
- this.sex = sex;
- this.phenotype = phenotype;
- }
-
- /** True if this record has paternal and maternal ids, otherwise false. */
- public boolean hasBothParents() {
- return this.paternalId != null && this.maternalId != null;
- }
-
- public String getFamilyId() { return familyId; }
- public String getIndividualId() { return individualId; }
- public String getPaternalId() { return paternalId; }
- public String getMaternalId() { return maternalId; }
- public Sex getSex() { return sex; }
- public Number getPhenotype() { return phenotype; }
-}
diff --git a/src/java/net/sf/picard/reference/ReferenceSequenceFileFactory.java b/src/java/net/sf/picard/reference/ReferenceSequenceFileFactory.java
index ec99dcf..812ac69 100644
--- a/src/java/net/sf/picard/reference/ReferenceSequenceFileFactory.java
+++ b/src/java/net/sf/picard/reference/ReferenceSequenceFileFactory.java
@@ -43,6 +43,8 @@ public class ReferenceSequenceFileFactory {
add(".fasta.gz");
add(".fa");
add(".fa.gz");
+ add(".fna");
+ add(".fna.gz");
add(".txt");
add(".txt.gz");
}};
diff --git a/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java b/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
index 37a1e8e..817568d 100644
--- a/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
+++ b/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
@@ -29,6 +29,7 @@ import net.sf.picard.io.IoUtil;
import net.sf.picard.reference.ReferenceSequenceFileWalker;
import net.sf.picard.util.CigarUtil;
import net.sf.picard.util.Log;
+import net.sf.picard.util.ProgressLogger;
import net.sf.samtools.*;
import net.sf.samtools.SAMFileHeader.SortOrder;
import net.sf.samtools.util.CloseableIterator;
@@ -72,6 +73,8 @@ public abstract class AbstractAlignmentMerger {
private final NumberFormat FMT = new DecimalFormat("#,###");
private final Log log = Log.getInstance(AbstractAlignmentMerger.class);
+ private final ProgressLogger progress = new ProgressLogger(this.log, 1000000, "Written to sorting collection in queryname order", "records");
+
private final File unmappedBamFile;
private final File targetBamFile;
private final SAMSequenceDictionary sequenceDictionary;
@@ -249,7 +252,8 @@ public abstract class AbstractAlignmentMerger {
if (nextAligned != null && rec.getReadName().equals(nextAligned.getReadName())) {
// If there are multiple alignments for a read (pair), then the unaligned SAMRecord must be cloned
// before copying info from the aligned record to the unaligned.
- final boolean clone = nextAligned.numHits() > 1;
+ final boolean clone = nextAligned.numHits() > 1 || nextAligned.hasSupplementalHits();
+ SAMRecord r1Primary = null, r2Primary = null;
if (rec.getReadPairedFlag()) {
for (int i = 0; i < nextAligned.numHits(); ++i) {
@@ -271,6 +275,12 @@ public abstract class AbstractAlignmentMerger {
secondToWrite = secondOfPair;
}
+ // If these are the primary alignments then stash them for use on any supplemental alignments
+ if (isPrimaryAlignment) {
+ r1Primary = firstToWrite;
+ r2Primary = secondToWrite;
+ }
+
transferAlignmentInfoToPairedRead(firstToWrite, secondToWrite, firstAligned, secondAligned);
// Only write unmapped read when it has the mate info from the primary alignment.
@@ -286,27 +296,22 @@ public abstract class AbstractAlignmentMerger {
}
}
- // This is already being checked at construction, but just to be sure ....
- if (nextAligned.getSupplementalFirstOfPairOrFragment().size() != nextAligned.getSupplementalSecondOfPair().size()) {
- throw new IllegalStateException("Supplemental first of pairs not the same size as second of pairs!");
- }
// Take all of the supplemental reads which had been stashed and add them (as appropriate) to sorted
- for (int i = 0; i < nextAligned.getSupplementalFirstOfPairOrFragment().size(); i++) {
- final SAMRecord firstToWrite = clone(rec);
- final SAMRecord secondToWrite = clone(secondOfPair);
- transferAlignmentInfoToPairedRead(firstToWrite, secondToWrite,
- nextAligned.getSupplementalFirstOfPairOrFragment().get(i),
- nextAligned.getSupplementalSecondOfPair().get(i));
- addIfNotFiltered(sorted, firstToWrite);
- addIfNotFiltered(sorted, secondToWrite);
-
- if (!firstToWrite.getReadUnmappedFlag()) ++unmapped;
- else ++aligned;
-
- if (!secondToWrite.getReadUnmappedFlag()) ++unmapped;
- else ++aligned;
+ for (final boolean isRead1 : new boolean[]{true,false}) {
+ final List<SAMRecord> supplementals = isRead1 ? nextAligned.getSupplementalFirstOfPairOrFragment() : nextAligned.getSupplementalSecondOfPair();
+ final SAMRecord sourceRec = isRead1 ? rec : secondOfPair;
+ final SAMRecord matePrimary = isRead1 ? r2Primary : r1Primary;
+
+ for (final SAMRecord supp : supplementals) {
+ final SAMRecord out = clone(sourceRec);
+ transferAlignmentInfoToFragment(out, supp);
+ if (matePrimary != null) SamPairUtil.setMateInformationOnSupplementalAlignment(out, matePrimary);
+ ++aligned;
+ addIfNotFiltered(sorted, out);
+ }
}
- } else {
+ }
+ else {
for (int i = 0; i < nextAligned.numHits(); ++i) {
final SAMRecord recToWrite = clone ? clone(rec) : rec;
transferAlignmentInfoToFragment(recToWrite, nextAligned.getFragment(i));
@@ -316,12 +321,10 @@ public abstract class AbstractAlignmentMerger {
}
// Take all of the supplemental reads which had been stashed and add them (as appropriate) to sorted
for (final SAMRecord supplementalRec : nextAligned.getSupplementalFirstOfPairOrFragment()) {
- // always clone supplementals
final SAMRecord recToWrite = clone(rec);
transferAlignmentInfoToFragment(recToWrite, supplementalRec);
addIfNotFiltered(sorted, recToWrite);
- if (recToWrite.getReadUnmappedFlag()) ++unmapped;
- else ++aligned;
+ ++aligned;
}
}
nextAligned = nextAligned();
@@ -342,15 +345,10 @@ public abstract class AbstractAlignmentMerger {
}
}
}
-
- if ((aligned + unmapped) % 1000000 == 0) {
- log.info("Processed " + FMT.format(aligned + unmapped) + " records in query name order.");
- }
}
unmappedIterator.close();
if (alignedIterator.hasNext()) {
- throw new IllegalStateException("Reads remaining on alignment iterator: " +
- alignedIterator.next().getReadName() + "!");
+ throw new IllegalStateException("Reads remaining on alignment iterator: " + alignedIterator.next().getReadName() + "!");
}
alignedIterator.close();
@@ -358,23 +356,21 @@ public abstract class AbstractAlignmentMerger {
header.setSortOrder(this.sortOrder);
final boolean presorted = this.sortOrder == SortOrder.coordinate;
final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(header, presorted, this.targetBamFile);
- int count = 0;
+ final ProgressLogger finalProgress = new ProgressLogger(log, 10000000, "Written in coordinate order to output", "records");
+
for (final SAMRecord rec : sorted) {
if (!rec.getReadUnmappedFlag()) {
if (refSeq != null) {
final byte[] referenceBases = refSeq.get(sequenceDictionary.getSequenceIndex(rec.getReferenceName())).getBases();
- rec.setAttribute(SAMTag.NM.name(),
- SequenceUtil.calculateSamNmTag(rec, referenceBases, 0, bisulfiteSequence));
+ rec.setAttribute(SAMTag.NM.name(), SequenceUtil.calculateSamNmTag(rec, referenceBases, 0, bisulfiteSequence));
+
if (rec.getBaseQualities() != SAMRecord.NULL_QUALS) {
- rec.setAttribute(SAMTag.UQ.name(),
- SequenceUtil.sumQualitiesOfMismatches(rec, referenceBases, 0, bisulfiteSequence));
+ rec.setAttribute(SAMTag.UQ.name(), SequenceUtil.sumQualitiesOfMismatches(rec, referenceBases, 0, bisulfiteSequence));
}
}
}
writer.addAlignment(rec);
- if (++count % 1000000 == 0) {
- log.info(FMT.format(count) + " SAMRecords written to " + targetBamFile.getName());
- }
+ finalProgress.record(rec);
}
writer.close();
sorted.cleanup();
@@ -388,6 +384,7 @@ public abstract class AbstractAlignmentMerger {
private void addIfNotFiltered(final SortingCollection<SAMRecord> sorted, final SAMRecord rec) {
if (includeSecondaryAlignments || !rec.getNotPrimaryAlignmentFlag()) {
sorted.add(rec);
+ this.progress.record(rec);
}
}
diff --git a/src/java/net/sf/picard/sam/CleanSam.java b/src/java/net/sf/picard/sam/CleanSam.java
index adfa6ad..338c71a 100644
--- a/src/java/net/sf/picard/sam/CleanSam.java
+++ b/src/java/net/sf/picard/sam/CleanSam.java
@@ -33,7 +33,6 @@ import net.sf.picard.util.Log;
import net.sf.picard.util.ProgressLogger;
import net.sf.samtools.*;
import net.sf.samtools.util.CloseableIterator;
-import net.sf.samtools.util.CloserUtil;
import java.io.File;
import java.util.List;
@@ -82,9 +81,10 @@ public class CleanSam extends CommandLineProgram {
final SAMRecord rec = it.next();
if (!rec.getReadUnmappedFlag()) {
final SAMSequenceRecord refseq = rec.getHeader().getSequence(rec.getReferenceIndex());
- if (rec.getAlignmentEnd() > refseq.getSequenceLength()) {
+ final int overhang = rec.getAlignmentEnd() - refseq.getSequenceLength();
+ if (overhang > 0) {
// 1-based index of first base in read to clip.
- final int clipFrom = refseq.getSequenceLength() - rec.getAlignmentStart() + 1;
+ final int clipFrom = rec.getReadLength() - overhang + 1;
final List<CigarElement> newCigarElements = CigarUtil.softClipEndOfRead(clipFrom, rec.getCigar().getCigarElements());
rec.setCigar(new Cigar(newCigarElements));
}
diff --git a/src/java/net/sf/picard/sam/HitsForInsert.java b/src/java/net/sf/picard/sam/HitsForInsert.java
index cac30f2..7491191 100644
--- a/src/java/net/sf/picard/sam/HitsForInsert.java
+++ b/src/java/net/sf/picard/sam/HitsForInsert.java
@@ -93,6 +93,11 @@ class HitsForInsert {
return Math.max(firstOfPairOrFragment.size(), secondOfPair.size());
}
+ /** True if either the first or second of pair has supplementary alignments, otherwise false. */
+ public boolean hasSupplementalHits() {
+ return !(this.supplementalFirstOfPairOrFragment.isEmpty() && this.supplementalSecondOfPair.isEmpty());
+ }
+
/**
* @return Returns the ith hit for the first end, or null if the first end is not aligned.
*/
diff --git a/src/java/net/sf/picard/sam/MarkDuplicates.java b/src/java/net/sf/picard/sam/MarkDuplicates.java
index 5d716a4..b10abcf 100644
--- a/src/java/net/sf/picard/sam/MarkDuplicates.java
+++ b/src/java/net/sf/picard/sam/MarkDuplicates.java
@@ -566,7 +566,9 @@ public class MarkDuplicates extends AbstractDuplicateFindingAlgorithm {
* @return an array with an ordered list of indexes into the source file
*/
private void generateDuplicateIndexes() {
- final int maxInMemory = (int) ((Runtime.getRuntime().maxMemory() * 0.25) / SortingLongCollection.SIZEOF);
+ // Keep this number from getting too large even if there is a huge heap.
+ final int maxInMemory = (int) Math.min((Runtime.getRuntime().maxMemory() * 0.25) / SortingLongCollection.SIZEOF,
+ (double)(Integer.MAX_VALUE - 5));
log.info("Will retain up to " + maxInMemory + " duplicate indices before spilling to disk.");
this.duplicateIndexes = new SortingLongCollection(maxInMemory, TMP_DIR.toArray(new File[TMP_DIR.size()]));
diff --git a/src/java/net/sf/picard/sam/MultiHitAlignedReadIterator.java b/src/java/net/sf/picard/sam/MultiHitAlignedReadIterator.java
index 1c8e1e6..f7fa051 100644
--- a/src/java/net/sf/picard/sam/MultiHitAlignedReadIterator.java
+++ b/src/java/net/sf/picard/sam/MultiHitAlignedReadIterator.java
@@ -28,14 +28,10 @@ import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.picard.util.Log;
import net.sf.picard.util.PeekableIterator;
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMRecordQueryNameComparator;
-import net.sf.samtools.SAMTag;
-import net.sf.samtools.SAMUtils;
+import net.sf.samtools.*;
import net.sf.samtools.util.CloseableIterator;
-import java.util.Comparator;
-import java.util.NoSuchElementException;
+import java.util.*;
import static net.sf.picard.sam.HitsForInsert.NumPrimaryAlignmentState;
@@ -120,6 +116,7 @@ class MultiHitAlignedReadIterator implements CloseableIterator<HitsForInsert> {
// Accumulate the alignments matching readName.
do {
final SAMRecord rec = peekIterator.next();
+ replaceHardWithSoftClips(rec);
// It is critical to do this here, because SamAlignmentMerger uses this exception to determine
// if the aligned input needs to be sorted.
if (peekIterator.hasNext() && queryNameComparator.fileOrderCompare(rec, peekIterator.peek()) > 0) {
@@ -150,12 +147,6 @@ class MultiHitAlignedReadIterator implements CloseableIterator<HitsForInsert> {
} else throw new PicardException("Read is marked as pair but neither first or second: " + readName);
} while (peekIterator.hasNext() && peekIterator.peek().getReadName().equals(readName));
- // If we've added to the second of pair supplementals, make sure it is the same size as the first of pairs
- if (hits.getSupplementalSecondOfPair().size() > 0 &&
- hits.getSupplementalSecondOfPair().size() != hits.getSupplementalFirstOfPairOrFragment().size()) {
- throw new PicardException("Number of supplemental second of pairs do not equal the number of supplemental first of pairs");
- }
-
// If there is no more than one alignment for each end, no need to do any coordination.
if (hits.numHits() <= 1) {
// No HI tags needed if only a single hit
@@ -175,9 +166,44 @@ class MultiHitAlignedReadIterator implements CloseableIterator<HitsForInsert> {
return hits;
}
+ /** Replaces hard clips with soft clips and fills in bases and qualities with dummy values as needed. */
+ private void replaceHardWithSoftClips(final SAMRecord rec) {
+ if (rec.getReadUnmappedFlag()) return;
+ if (rec.getCigar().isEmpty()) return;
+
+ List<CigarElement> elements = rec.getCigar().getCigarElements();
+ final CigarElement first = elements.get(0);
+ final CigarElement last = elements.size() == 1 ? null : elements.get(elements.size()-1);
+ final int startHardClip = first.getOperator() == CigarOperator.H ? first.getLength() : 0;
+ final int endHardClip = (last != null && last.getOperator() == CigarOperator.H) ? last.getLength() : 0;
+
+ if (startHardClip + endHardClip > 0) {
+ final int len = rec.getReadBases().length + startHardClip + endHardClip;
+
+ // Fix the basecalls
+ final byte[] bases = new byte[len];
+ Arrays.fill(bases, (byte) 'N');
+ System.arraycopy(rec.getReadBases(), 0, bases, startHardClip, rec.getReadBases().length);
+
+ // Fix the quality scores
+ final byte[] quals = new byte[len];
+ Arrays.fill(quals, (byte) 2 );
+ System.arraycopy(rec.getBaseQualities(), 0, quals, startHardClip, rec.getBaseQualities().length);
+
+ // Fix the cigar!
+ elements = new ArrayList<CigarElement>(elements); // make it modifiable
+ if (startHardClip > 0) elements.set(0, new CigarElement(first.getLength(), CigarOperator.S));
+ if (endHardClip > 0) elements.set(elements.size()-1, new CigarElement(last.getLength(), CigarOperator.S));
+
+ // Set the update structures on the new record
+ rec.setReadBases(bases);
+ rec.setBaseQualities(quals);
+ rec.setCigar(new Cigar(elements));
+ }
+ }
+
+ /** Unsupported operation. */
public void remove() {
throw new UnsupportedOperationException();
}
-
-
}
diff --git a/src/java/net/sf/picard/sam/SamAlignmentMerger.java b/src/java/net/sf/picard/sam/SamAlignmentMerger.java
index d80053a..ded5782 100644
--- a/src/java/net/sf/picard/sam/SamAlignmentMerger.java
+++ b/src/java/net/sf/picard/sam/SamAlignmentMerger.java
@@ -150,7 +150,8 @@ public class SamAlignmentMerger extends AbstractAlignmentMerger {
try {
super.mergeAlignment();
}
- catch(IllegalStateException e) {
+ catch(final IllegalStateException ise) {
+ log.warn("Exception merging bam alignment - attempting to sort aligned reads and try again: ", ise.getMessage());
forceSort = true;
resetRefSeqFileWalker();
super.mergeAlignment();
@@ -175,7 +176,7 @@ public class SamAlignmentMerger extends AbstractAlignmentMerger {
readers.add(r);
}
- final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, false);
+ final SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SortOrder.queryname, headers, false);
mergingIterator = new MergingSamRecordIterator(headerMerger, readers, true);
header = headerMerger.getMergedHeader();
diff --git a/src/java/net/sf/picard/sam/SamPairUtil.java b/src/java/net/sf/picard/sam/SamPairUtil.java
index e323ec2..9b1fa3a 100644
--- a/src/java/net/sf/picard/sam/SamPairUtil.java
+++ b/src/java/net/sf/picard/sam/SamPairUtil.java
@@ -247,6 +247,21 @@ public class SamPairUtil {
rec2.setInferredInsertSize(-insertSize);
}
+ /**
+ * Sets mate pair information appropriately on a supplemental SAMRecord (e.g. from a split alignment)
+ * using the primary alignment of the read's mate.
+ * @param supplemental a supplemental alignment for the mate pair of the primary supplied
+ * @param matePrimary the primary alignment of the the mate pair of the supplemental
+ */
+ public static void setMateInformationOnSupplementalAlignment( final SAMRecord supplemental,
+ final SAMRecord matePrimary) {
+ supplemental.setMateReferenceIndex(matePrimary.getReferenceIndex());
+ supplemental.setMateAlignmentStart(matePrimary.getAlignmentStart());
+ supplemental.setMateNegativeStrandFlag(matePrimary.getReadNegativeStrandFlag());
+ supplemental.setMateUnmappedFlag(matePrimary.getReadUnmappedFlag());
+ supplemental.setInferredInsertSize(-matePrimary.getInferredInsertSize());
+ }
+
public static void setProperPairAndMateInfo(final SAMRecord rec1, final SAMRecord rec2,
final SAMFileHeader header,
final List<PairOrientation> exepectedOrientations) {
diff --git a/src/java/net/sf/picard/util/ProgressLogger.java b/src/java/net/sf/picard/util/ProgressLogger.java
index 034188a..93ddaa3 100644
--- a/src/java/net/sf/picard/util/ProgressLogger.java
+++ b/src/java/net/sf/picard/util/ProgressLogger.java
@@ -15,6 +15,7 @@ public class ProgressLogger {
private final Log log;
private final int n;
private final String verb;
+ private final String noun;
private final long startTime = System.currentTimeMillis();
private final NumberFormat fmt = new DecimalFormat("#,###");
@@ -28,11 +29,23 @@ public class ProgressLogger {
* @param log the Log object to write outputs to
* @param n the frequency with which to output (i.e. every N records)
* @param verb the verb to log, e.g. "Processed, Read, Written".
+ * @param noun the noun to use when logging, e.g. "Records, Variants, Loci"
*/
- public ProgressLogger(final Log log, final int n, final String verb) {
+ public ProgressLogger(final Log log, final int n, final String verb, final String noun) {
this.log = log;
this.n = n;
this.verb = verb;
+ this.noun = noun;
+ }
+
+ /**
+ * Construct a progress logger.
+ * @param log the Log object to write outputs to
+ * @param n the frequency with which to output (i.e. every N records)
+ * @param verb the verb to log, e.g. "Processed, Read, Written".
+ */
+ public ProgressLogger(final Log log, final int n, final String verb) {
+ this(log, n, verb, "records");
}
/**
@@ -63,7 +76,7 @@ public class ProgressLogger {
if (chrom == null) readInfo = "*/*";
else readInfo = chrom + ":" + fmt.format(pos);
- log.info(this.verb, " ", processed, " records. Elapsed time: ", elapsed, "s. Time for last ", fmt.format(this.n),
+ log.info(this.verb, " ", processed, " " + noun + ". Elapsed time: ", elapsed, "s. Time for last ", fmt.format(this.n),
": ", period, "s. Last read position: ", readInfo);
return true;
}
diff --git a/src/java/net/sf/picard/util/SamLocusIterator.java b/src/java/net/sf/picard/util/SamLocusIterator.java
index 9dd8a54..f5df237 100644
--- a/src/java/net/sf/picard/util/SamLocusIterator.java
+++ b/src/java/net/sf/picard/util/SamLocusIterator.java
@@ -168,7 +168,7 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
* passed in that is not coordinate sorted, it will eventually be coordinated sorted by this class.
*/
public SamLocusIterator(final SAMFileReader samReader, final IntervalList intervalList) {
- this(samReader, intervalList, false);
+ this(samReader, intervalList, samReader.hasIndex());
}
/**
@@ -178,7 +178,8 @@ public class SamLocusIterator implements Iterable<SamLocusIterator.LocusInfo>, C
* @param intervalList Either the list of desired intervals, or null. Note that if an intervalList is
* passed in that is not coordinate sorted, it will eventually be coordinated sorted by this class.
* @param useIndex If true, do indexed lookup to improve performance. Not relevant if intervalList == null.
- * This can actually slow performance if the intervals are densely packed.
+ * It is no longer the case the useIndex==true can make performance worse. It should always perform at least
+ * as well as useIndex==false, and generally will be much faster.
*/
public SamLocusIterator(final SAMFileReader samReader, final IntervalList intervalList, final boolean useIndex) {
if (samReader.getFileHeader().getSortOrder() == null || samReader.getFileHeader().getSortOrder() == SAMFileHeader.SortOrder.unsorted) {
diff --git a/src/java/net/sf/picard/util/SamRecordIntervalIterator.java b/src/java/net/sf/picard/util/SamRecordIntervalIterator.java
deleted file mode 100644
index a0eef92..0000000
--- a/src/java/net/sf/picard/util/SamRecordIntervalIterator.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package net.sf.picard.util;
-
-import net.sf.samtools.SAMFileReader;
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.util.CloseableIterator;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * Iterate over SAMRecords in an indexed BAM file that overlap the given list of intervals.
- * Note that it is not guaranteed that only reads that overlap one of the given intervals will be
- * returned. Rather, the interval list is a hint that is used to optimize access to the BAM file.
- * An IntervalFilter can be used to ensure that only SAMRecords overlapping a set of intervals are iterated over.
- * c.f. SamRecordIntervalIteratorFactory.
- *
- * Note that if there are too many intervals or they are too close together, using this class may be slower
- * than iterating through the entire BAM file.
- *
- * @author alecw at broadinstitute.org
- */
-class SamRecordIntervalIterator implements CloseableIterator<SAMRecord> {
- private static final int DEFAULT_MAX_READ_LENGTH_GUESS = 16384;
-
- private final SAMFileReader samReader;
- private final Iterator<Interval> mergedIntervalsIterator;
- /**
- * null implies that there are no more records to return. Otherwise samIterator.next()
- * will return a record that is appropriate to return to the caller.
- */
- private PeekableIterator<SAMRecord> samIterator = null;
-
- /**
- * The start of the most recent read returned is remembered. When advancing to the next interval,
- * it is possible that queryOverlapping() could return a read that was previous returned. Therefore, when
- * getting a new iterator from queryOverlapping(), it must be advanced so that the next SAMRecord starts
- * after this locus.
- */
- private int lastSequenceIndex = -1;
- private int lastPosition = -1;
-
-
- /**
- * Create an iterator that returns reads overlapping one (or more) of the intervals in uniqueIntervals.
- * Note that some of the reads may not be overlap any of the intervals. The only guarantee is that all
- * reads that overlap will be returned.
- *
- * @param samReader supportsQuery() must return true
- * @param uniqueIntervals must be locus-ordered and non-overlapping.
- */
- public SamRecordIntervalIterator(final SAMFileReader samReader, final List<Interval> uniqueIntervals) {
- this(samReader, uniqueIntervals, DEFAULT_MAX_READ_LENGTH_GUESS);
- }
-
- /**
- * Create an iterator that returns reads overlapping one (or more) of the intervals in uniqueIntervals.
- * Note that some of the reads may not be overlap any of the intervals. The only guarantee is that all
- * reads that overlap will be returned.
- *
- * @param samReader supportsQuery() must return true
- * @param uniqueIntervals must be locus-ordered and non-overlapping.
- * @param maxReadLengthGuess Guess for the max read length in the SAM file. intervals closer together
- * than this are merged when querying in order to avoid reading the same SAMRecord more than once.
- */
- public SamRecordIntervalIterator(final SAMFileReader samReader, final List<Interval> uniqueIntervals, final int maxReadLengthGuess) {
- IntervalUtil.assertOrderedNonOverlapping(uniqueIntervals.iterator(), samReader.getFileHeader().getSequenceDictionary());
- if (!samReader.hasIndex()) {
- throw new IllegalArgumentException("SAMFileReader does not support query");
- }
- this.samReader = samReader;
- this.mergedIntervalsIterator = mergeCloseIntervals(uniqueIntervals, maxReadLengthGuess).iterator();
- advanceInterval();
- }
-
- private List<Interval> mergeCloseIntervals(final List<Interval> uniqueIntervals, final int maxReadLengthGuess) {
- final List<Interval> ret = new ArrayList<Interval>();
- if (uniqueIntervals.isEmpty()) {
- return ret;
- }
- Interval accumulatingInterval = uniqueIntervals.get(0);
- for (int i = 1; i < uniqueIntervals.size(); ++i) {
- final Interval thisInterval = uniqueIntervals.get(i);
- if (!accumulatingInterval.getSequence().equals(thisInterval.getSequence()) ||
- thisInterval.getStart() - accumulatingInterval.getEnd() > maxReadLengthGuess) {
- ret.add(accumulatingInterval);
- accumulatingInterval = thisInterval;
- } else {
- accumulatingInterval = new Interval(accumulatingInterval.getSequence(),
- accumulatingInterval.getStart(), thisInterval.getEnd());
- }
- }
- ret.add(accumulatingInterval);
- return ret;
- }
-
-
- /**
- * Called when iterator for the current interval has been exhausted. Get an iterator for the next interval
- * for which there are SAMRecords, and advance it past any already seen.
- */
- private void advanceInterval() {
- if (samIterator != null) {
- samIterator.close();
- }
- samIterator = null;
- while (mergedIntervalsIterator.hasNext()) {
- final Interval nextInterval = mergedIntervalsIterator.next();
- samIterator = new PeekableIterator<SAMRecord>(samReader.queryOverlapping(nextInterval.getSequence(),
- nextInterval.getStart(), nextInterval.getEnd()));
- // Skip over any SAMRecords already seen.
- advanceSamIterator();
- if (samIterator.hasNext()) {
- // This iterator has some SAMRecords to return.
- break;
- } else {
- // Nothing valid for this interval. Try the next interval.
- samIterator.close();
- samIterator = null;
- }
- }
- }
-
- /**
- * Advance the current samIterator past any SAMRecords previously returned.
- */
- private void advanceSamIterator() {
- for (; samIterator.hasNext(); samIterator.next()) {
- final SAMRecord rec = samIterator.peek();
- if (rec.getReferenceIndex() > lastSequenceIndex ||
- rec.getAlignmentStart() > lastPosition) {
- break;
- }
- }
- }
-
- public void close() {
- if (samIterator != null) {
- samIterator.close();
- samIterator = null;
- }
- }
-
- public boolean hasNext() {
- return samIterator != null && samIterator.hasNext();
- }
-
- public SAMRecord next() {
- if (!hasNext()) {
- throw new NoSuchElementException();
- }
- final SAMRecord rec = samIterator.next();
- lastSequenceIndex = rec.getReferenceIndex();
- lastPosition = rec.getAlignmentStart();
- if (!samIterator.hasNext()) {
- advanceInterval();
- }
- return rec;
- }
-
- public void remove() {
- throw new UnsupportedOperationException("Not supported: remove");
- }
-}
diff --git a/src/java/net/sf/picard/util/SamRecordIntervalIteratorFactory.java b/src/java/net/sf/picard/util/SamRecordIntervalIteratorFactory.java
index 4dab4f3..0f8af5d 100644
--- a/src/java/net/sf/picard/util/SamRecordIntervalIteratorFactory.java
+++ b/src/java/net/sf/picard/util/SamRecordIntervalIteratorFactory.java
@@ -55,7 +55,6 @@ public class SamRecordIntervalIteratorFactory {
public CloseableIterator<SAMRecord> makeSamRecordIntervalIterator(final SAMFileReader samReader,
final List<Interval> uniqueIntervals,
final boolean useIndex) {
- final IntervalFilter intervalFilter = new IntervalFilter(uniqueIntervals, samReader.getFileHeader());
if (!samReader.hasIndex() || !useIndex) {
final int stopAfterSequence;
final int stopAfterPosition;
@@ -67,12 +66,16 @@ public class SamRecordIntervalIteratorFactory {
stopAfterSequence = samReader.getFileHeader().getSequenceIndex(lastInterval.getSequence());
stopAfterPosition = lastInterval.getEnd();
}
+ final IntervalFilter intervalFilter = new IntervalFilter(uniqueIntervals, samReader.getFileHeader());
return new StopAfterFilteringIterator(samReader.iterator(), intervalFilter, stopAfterSequence, stopAfterPosition);
} else {
- // Note that SamRecordIntervalIterator may return some records that do not overlap the intervals,
- // because it merges intervals that are close to one another in order to reduce I/O. Thus
- // the IntervalFilter is necessary.
- return new FilteringIterator(new SamRecordIntervalIterator(samReader, uniqueIntervals), intervalFilter);
+ final SAMFileReader.QueryInterval[] queryIntervals = new SAMFileReader.QueryInterval[uniqueIntervals.size()];
+ for (int i = 0; i < queryIntervals.length; ++i) {
+ final Interval inputInterval = uniqueIntervals.get(i);
+ queryIntervals[i] = samReader.makeQueryInterval(inputInterval.getSequence(),
+ inputInterval.getStart(), inputInterval.getEnd());
+ }
+ return samReader.queryOverlapping(queryIntervals);
}
}
diff --git a/src/java/net/sf/samtools/AbstractBAMFileIndex.java b/src/java/net/sf/samtools/AbstractBAMFileIndex.java
index fc929b8..1f68704 100644
--- a/src/java/net/sf/samtools/AbstractBAMFileIndex.java
+++ b/src/java/net/sf/samtools/AbstractBAMFileIndex.java
@@ -68,7 +68,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
private SAMSequenceDictionary mBamDictionary = null;
protected AbstractBAMFileIndex(
- SeekableStream stream, SAMSequenceDictionary dictionary)
+ final SeekableStream stream, final SAMSequenceDictionary dictionary)
{
mBamDictionary = dictionary;
mIndexBuffer = new IndexStreamBuffer(stream);
@@ -78,7 +78,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
this(file, dictionary, true);
}
- protected AbstractBAMFileIndex(final File file, final SAMSequenceDictionary dictionary, boolean useMemoryMapping) {
+ protected AbstractBAMFileIndex(final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) {
mBamDictionary = dictionary;
mIndexBuffer = (useMemoryMapping ? new MemoryMappedFileBuffer(file) : new RandomAccessFileBuffer(file));
@@ -213,10 +213,10 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
* @param reference the reference of interest
* @return meta data for the reference
*/
- public BAMIndexMetaData getMetaData(int reference) {
+ public BAMIndexMetaData getMetaData(final int reference) {
seek(4);
- List<Chunk> metaDataChunks = new ArrayList<Chunk>();
+ final List<Chunk> metaDataChunks = new ArrayList<Chunk>();
final int sequenceCount = readInteger();
@@ -226,20 +226,16 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
skipToSequence(reference);
- int binCount = readInteger();
+ final int binCount = readInteger();
for (int binNumber = 0; binNumber < binCount; binNumber++) {
final int indexBin = readInteger();
final int nChunks = readInteger();
- // System.out.println("# bin[" + i + "] = " + indexBin + ", nChunks = " + nChunks);
- Chunk lastChunk = null;
if (indexBin == MAX_BINS) {
for (int ci = 0; ci < nChunks; ci++) {
final long chunkBegin = readLong();
final long chunkEnd = readLong();
- lastChunk = new Chunk(chunkBegin, chunkEnd);
- metaDataChunks.add(lastChunk);
+ metaDataChunks.add(new Chunk(chunkBegin, chunkEnd));
}
- continue;
} else {
skipBytes(16 * nChunks);
}
@@ -261,7 +257,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
skipToSequence(sequenceCount);
try { // in case of old index file without meta data
return readLong();
- } catch (Exception e) {
+ } catch (final Exception e) {
return null;
}
}
@@ -269,7 +265,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
protected BAMIndexContent query(final int referenceSequence, final int startPos, final int endPos) {
seek(4);
- List<Chunk> metaDataChunks = new ArrayList<Chunk>();
+ final List<Chunk> metaDataChunks = new ArrayList<Chunk>();
final int sequenceCount = readInteger();
@@ -284,13 +280,13 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
skipToSequence(referenceSequence);
- int binCount = readInteger();
+ final int binCount = readInteger();
boolean metaDataSeen = false;
- Bin[] bins = new Bin[getMaxBinNumberForReference(referenceSequence) +1];
+ final Bin[] bins = new Bin[getMaxBinNumberForReference(referenceSequence) +1];
for (int binNumber = 0; binNumber < binCount; binNumber++) {
final int indexBin = readInteger();
final int nChunks = readInteger();
- List<Chunk> chunks = new ArrayList<Chunk>(nChunks);
+ final List<Chunk> chunks = new ArrayList<Chunk>(nChunks);
// System.out.println("# bin[" + i + "] = " + indexBin + ", nChunks = " + nChunks);
Chunk lastChunk = null;
if (regionBins.get(indexBin)) {
@@ -314,7 +310,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
} else {
skipBytes(16 * nChunks);
}
- Bin bin = new Bin(referenceSequence, indexBin);
+ final Bin bin = new Bin(referenceSequence, indexBin);
bin.setChunkList(chunks);
bin.setLastChunk(lastChunk);
bins[indexBin] = bin;
@@ -348,7 +344,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
try {
final int sequenceLength = mBamDictionary.getSequence(reference).getSequenceLength();
return getMaxBinNumberForSequenceLength(sequenceLength);
- } catch (Exception e) {
+ } catch (final Exception e) {
return MAX_BINS;
}
}
@@ -356,7 +352,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
/**
* The maxiumum bin number for a reference sequence of a given length
*/
- static int getMaxBinNumberForSequenceLength(int sequenceLength) {
+ static int getMaxBinNumberForSequenceLength(final int sequenceLength) {
return getFirstBinInLevel(getNumIndexLevels() - 1) + (sequenceLength >> 14);
// return 4680 + (sequenceLength >> 14); // note 4680 = getFirstBinInLevel(getNumIndexLevels() - 1)
}
@@ -395,31 +391,11 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
return bitSet;
}
+ /**
+ * @deprecated Invoke net.sf.samtools.Chunk#optimizeChunkList(java.util.List<net.sf.samtools.Chunk>, long) directly.
+ */
protected List<Chunk> optimizeChunkList(final List<Chunk> chunks, final long minimumOffset) {
- Chunk lastChunk = null;
- Collections.sort(chunks);
- final List<Chunk> result = new ArrayList<Chunk>();
- for (final Chunk chunk : chunks) {
- if (chunk.getChunkEnd() <= minimumOffset) {
- continue; // linear index optimization
- }
- if (result.isEmpty()) {
- result.add(chunk);
- lastChunk = chunk;
- continue;
- }
- // Coalesce chunks that are in adjacent file blocks.
- // This is a performance optimization.
- if (!lastChunk.overlaps(chunk) && !lastChunk.isAdjacentTo(chunk)) {
- result.add(chunk);
- lastChunk = chunk;
- } else {
- if (chunk.getChunkEnd() > lastChunk.getChunkEnd()) {
- lastChunk.setChunkEnd(chunk.getChunkEnd());
- }
- }
- }
- return result;
+ return Chunk.optimizeChunkList(chunks, minimumOffset);
}
private void skipToSequence(final int sequenceIndex) {
@@ -428,7 +404,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
final int nBins = readInteger();
// System.out.println("# nBins: " + nBins);
for (int j = 0; j < nBins; j++) {
- final int bin = readInteger();
+ readInteger(); // bin
final int nChunks = readInteger();
// System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
skipBytes(16 * nChunks);
@@ -474,16 +450,16 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
private static class MemoryMappedFileBuffer extends IndexFileBuffer {
private MappedByteBuffer mFileBuffer;
- MemoryMappedFileBuffer(File file) {
+ MemoryMappedFileBuffer(final File file) {
try {
// Open the file stream.
- FileInputStream fileStream = new FileInputStream(file);
- FileChannel fileChannel = fileStream.getChannel();
+ final FileInputStream fileStream = new FileInputStream(file);
+ final FileChannel fileChannel = fileStream.getChannel();
mFileBuffer = fileChannel.map(FileChannel.MapMode.READ_ONLY, 0L, fileChannel.size());
mFileBuffer.order(ByteOrder.LITTLE_ENDIAN);
fileChannel.close();
fileStream.close();
- } catch (IOException exc) {
+ } catch (final IOException exc) {
throw new RuntimeIOException(exc.getMessage(), exc);
}
}
@@ -530,23 +506,23 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
private static final int PAGE_OFFSET_MASK = PAGE_SIZE-1;
private static final int PAGE_MASK = ~PAGE_OFFSET_MASK;
private static final int INVALID_PAGE = 1;
- private File mFile;
+ private final File mFile;
private RandomAccessFile mRandomAccessFile;
- private int mFileLength;
+ private final int mFileLength;
private int mFilePointer = 0;
private int mCurrentPage = INVALID_PAGE;
private final byte[] mBuffer = new byte[PAGE_SIZE];
- RandomAccessFileBuffer(File file) {
+ RandomAccessFileBuffer(final File file) {
mFile = file;
try {
mRandomAccessFile = new RandomAccessFile(file, "r");
- long fileLength = mRandomAccessFile.length();
+ final long fileLength = mRandomAccessFile.length();
if (fileLength > Integer.MAX_VALUE) {
throw new RuntimeException("BAM index file " + mFile + " is too large: " + fileLength);
}
mFileLength = (int) fileLength;
- } catch (IOException exc) {
+ } catch (final IOException exc) {
throw new RuntimeIOException(exc.getMessage(), exc);
}
}
@@ -582,8 +558,8 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
long readLong() {
// BAM index files are always 4-byte aligned, but not necessrily 8-byte aligned.
// So, rather than fooling with complex page logic we simply read the long in two 4-byte chunks.
- long lower = readInteger();
- long upper = readInteger();
+ final long lower = readInteger();
+ final long upper = readInteger();
return ((upper << 32) | (lower & 0xFFFFFFFFL));
}
@@ -601,14 +577,14 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
if (mRandomAccessFile != null) {
try {
mRandomAccessFile.close();
- } catch (IOException exc) {
+ } catch (final IOException exc) {
throw new RuntimeIOException(exc.getMessage(), exc);
}
mRandomAccessFile = null;
}
}
- private void loadPage(int filePosition) {
+ private void loadPage(final int filePosition) {
final int page = filePosition & PAGE_MASK;
if (page == mCurrentPage) {
return;
@@ -618,7 +594,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
final int readLength = Math.min(mFileLength - page, PAGE_SIZE);
mRandomAccessFile.readFully(mBuffer, 0, readLength);
mCurrentPage = page;
- } catch (IOException exc) {
+ } catch (final IOException exc) {
throw new RuntimeIOException("Exception reading BAM index file " + mFile + ": " + exc.getMessage(), exc);
}
}
@@ -628,7 +604,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
private final SeekableStream in;
private final ByteBuffer tmpBuf;
- public IndexStreamBuffer(SeekableStream s) {
+ public IndexStreamBuffer(final SeekableStream s) {
in = s;
tmpBuf = ByteBuffer.allocate(8); // Enough to fit a long.
tmpBuf.order(ByteOrder.LITTLE_ENDIAN);
@@ -636,42 +612,42 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
@Override public void close() {
try { in.close(); }
- catch (IOException e) { throw new RuntimeIOException(e); }
+ catch (final IOException e) { throw new RuntimeIOException(e); }
}
- @Override public void readBytes(byte[] bytes) {
+ @Override public void readBytes(final byte[] bytes) {
try { in.read(bytes); }
- catch (IOException e) { throw new RuntimeIOException(e); }
+ catch (final IOException e) { throw new RuntimeIOException(e); }
}
- @Override public void seek(int position) {
+ @Override public void seek(final int position) {
try { in.seek(position); }
- catch (IOException e) { throw new RuntimeIOException(e); }
+ catch (final IOException e) { throw new RuntimeIOException(e); }
}
@Override public int readInteger() {
try {
- int r = in.read(tmpBuf.array(), 0, 4);
+ final int r = in.read(tmpBuf.array(), 0, 4);
if (r != 4)
throw new RuntimeIOException("Expected 4 bytes, got " + r);
- } catch (IOException e) { throw new RuntimeIOException(e); }
+ } catch (final IOException e) { throw new RuntimeIOException(e); }
return tmpBuf.getInt(0);
}
@Override public long readLong() {
try {
- int r = in.read(tmpBuf.array(), 0, 8);
+ final int r = in.read(tmpBuf.array(), 0, 8);
if (r != 8)
throw new RuntimeIOException("Expected 8 bytes, got " + r);
- } catch (IOException e) { throw new RuntimeIOException(e); }
+ } catch (final IOException e) { throw new RuntimeIOException(e); }
return tmpBuf.getLong(0);
}
@Override public void skipBytes(final int count) {
try {
for (int s = count; s > 0;) {
- int skipped = (int)in.skip(s);
+ final int skipped = (int)in.skip(s);
if (skipped <= 0)
throw new RuntimeIOException("Failed to skip " + s);
s -= skipped;
}
- } catch (IOException e) { throw new RuntimeIOException(e); }
+ } catch (final IOException e) { throw new RuntimeIOException(e); }
}
}
}
diff --git a/src/java/net/sf/samtools/BAMFileReader.java b/src/java/net/sf/samtools/BAMFileReader.java
index 7bdc637..a3f6c18 100644
--- a/src/java/net/sf/samtools/BAMFileReader.java
+++ b/src/java/net/sf/samtools/BAMFileReader.java
@@ -24,11 +24,14 @@
package net.sf.samtools;
-import net.sf.samtools.util.*;
import net.sf.samtools.SAMFileReader.ValidationStringency;
import net.sf.samtools.seekablestream.SeekableStream;
+import net.sf.samtools.util.*;
-import java.io.*;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@@ -54,6 +57,7 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
private BAMIndex mIndex = null;
private long mFirstRecordPointer = 0;
+ // If non-null, there is an unclosed iterator extant.
private CloseableIterator<SAMRecord> mCurrentIterator = null;
// If true, all SAMRecords are fully decoded as they are read.
@@ -283,7 +287,7 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
if (mIsSeekable) {
try {
mCompressedInputStream.seek(mFirstRecordPointer);
- } catch (IOException exc) {
+ } catch (final IOException exc) {
throw new RuntimeException(exc.getMessage(), exc);
}
}
@@ -349,7 +353,45 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
if (!mIsSeekable) {
throw new UnsupportedOperationException("Cannot query stream-based BAM file");
}
- mCurrentIterator = createIndexIterator(sequence, start, end, contained? QueryType.CONTAINED: QueryType.OVERLAPPING);
+ final int referenceIndex = mFileHeader.getSequenceIndex(sequence);
+ if (referenceIndex == -1) {
+ mCurrentIterator = new EmptyBamIterator();
+ } else {
+ final SAMFileReader.QueryInterval[] queryIntervals = {new SAMFileReader.QueryInterval(referenceIndex, start, end)};
+ mCurrentIterator = createIndexIterator(queryIntervals, contained);
+ }
+ return mCurrentIterator;
+ }
+
+ /**
+ * Prepare to iterate through the SAMRecords that match any of the given intervals.
+ * Only a single iterator on a BAMFile can be extant at a time. The previous one must be closed
+ * before calling any of the methods that return an iterator.
+ *
+ * Note that an unmapped SAMRecord may still have a reference name and an alignment start for sorting
+ * purposes (typically this is the coordinate of its mate), and will be found by this method if the coordinate
+ * matches the specified interval.
+ *
+ * Note that this method is not necessarily efficient in terms of disk I/O. The index does not have perfect
+ * resolution, so some SAMRecords may be read and then discarded because they do not match the specified interval.
+ *
+ * @param intervals list of intervals to be queried. Must be optimized.
+ * @param contained If true, the alignments for the SAMRecords must be completely contained in the interval
+ * specified by start and end. If false, the SAMRecords need only overlap the interval.
+ * @return Iterator for the matching SAMRecords
+ * @see net.sf.samtools.SAMFileReader.QueryInterval#optimizeIntervals(net.sf.samtools.SAMFileReader.QueryInterval[])
+ */
+ CloseableIterator<SAMRecord> query(final SAMFileReader.QueryInterval[] intervals, final boolean contained) {
+ if (mStream == null) {
+ throw new IllegalStateException("File reader is closed");
+ }
+ if (mCurrentIterator != null) {
+ throw new IllegalStateException("Iteration in progress");
+ }
+ if (!mIsSeekable) {
+ throw new UnsupportedOperationException("Cannot query stream-based BAM file");
+ }
+ mCurrentIterator = createIndexIterator(intervals, contained);
return mCurrentIterator;
}
@@ -379,10 +421,22 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
if (!mIsSeekable) {
throw new UnsupportedOperationException("Cannot query stream-based BAM file");
}
- mCurrentIterator = createIndexIterator(sequence, start, -1, QueryType.STARTING_AT);
+ final int referenceIndex = mFileHeader.getSequenceIndex(sequence);
+ if (referenceIndex == -1) {
+ mCurrentIterator = new EmptyBamIterator();
+ } else {
+ mCurrentIterator = createStartingAtIndexIterator(referenceIndex, start);
+ }
return mCurrentIterator;
}
+ /**
+ * Prepare to iterate through the SAMRecords that are unmapped and do not have a reference name or alignment start.
+ * Only a single iterator on a BAMFile can be extant at a time. The previous one must be closed
+ * before calling any of the methods that return an iterator.
+ *
+ * @return Iterator for the matching SAMRecords.
+ */
public CloseableIterator<SAMRecord> queryUnmapped() {
if (mStream == null) {
throw new IllegalStateException("File reader is closed");
@@ -403,7 +457,7 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
}
mCurrentIterator = new BAMFileIndexUnmappedIterator();
return mCurrentIterator;
- } catch (IOException e) {
+ } catch (final IOException e) {
throw new RuntimeException("IOException seeking to unmapped reads", e);
}
}
@@ -475,14 +529,54 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
}
/**
+ * Encapsulates the restriction that only one iterator may be open at a time.
+ */
+ private abstract class AbstractBamIterator implements CloseableIterator<SAMRecord> {
+
+ private boolean isClosed = false;
+
+ public void close() {
+ if (!isClosed) {
+ if (mCurrentIterator != null && this != mCurrentIterator) {
+ throw new IllegalStateException("Attempt to close non-current iterator");
+ }
+ mCurrentIterator = null;
+ isClosed = true;
+ }
+ }
+
+ protected void assertOpen() {
+ if (isClosed) throw new AssertionError("Iterator has been closed");
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException("Not supported: remove");
+ }
+
+ }
+
+ private class EmptyBamIterator extends AbstractBamIterator {
+ @Override
+ public boolean hasNext() {
+ return false;
+ }
+
+ @Override
+ public SAMRecord next() {
+ throw new NoSuchElementException("next called on empty iterator");
+ }
+ }
+
+ /**
+
+ /**
* Iterator for non-indexed sequential iteration through all SAMRecords in file.
* Starting point of iteration is wherever current file position is when the iterator is constructed.
*/
- private class BAMFileIterator implements CloseableIterator<SAMRecord> {
+ private class BAMFileIterator extends AbstractBamIterator {
private SAMRecord mNextRecord = null;
private final BAMRecordCodec bamRecordCodec;
private long samRecordIndex = 0; // Records at what position (counted in records) we are at in the file
- private boolean isClosed = false;
BAMFileIterator() {
this(true);
@@ -501,32 +595,18 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
}
}
- public void close() {
- if (!isClosed) {
- if (mCurrentIterator != null && this != mCurrentIterator) {
- throw new IllegalStateException("Attempt to close non-current iterator");
- }
- mCurrentIterator = null;
- isClosed = true;
- }
- }
-
public boolean hasNext() {
- if (isClosed) throw new IllegalStateException("Iterator has been closed");
+ assertOpen();
return (mNextRecord != null);
}
public SAMRecord next() {
- if (isClosed) throw new IllegalStateException("Iterator has been closed");
+ assertOpen();
final SAMRecord result = mNextRecord;
advance();
return result;
}
- public void remove() {
- throw new UnsupportedOperationException("Not supported: remove");
- }
-
void advance() {
try {
mNextRecord = getNextRecord();
@@ -545,7 +625,7 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
if (eagerDecode && mNextRecord != null) {
mNextRecord.eagerDecode();
}
- } catch (IOException exc) {
+ } catch (final IOException exc) {
throw new RuntimeException(exc.getMessage(), exc);
}
}
@@ -573,25 +653,65 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
}
/**
- * Prepare to iterate through SAMRecords matching the target interval.
- * @param sequence Desired reference sequence.
- * @param start 1-based start of target interval, inclusive.
- * @param end 1-based end of target interval, inclusive.
- * @param queryType contained, overlapping, or starting-at query.
+ * Prepare to iterate through SAMRecords in the given reference that start exactly at the given start coordinate.
+ * @param referenceIndex Desired reference sequence.
+ * @param start 1-based alignment start.
+ */
+ private CloseableIterator<SAMRecord> createStartingAtIndexIterator(final int referenceIndex,
+ final int start) {
+
+ // Hit the index to determine the chunk boundaries for the required data.
+ final BAMIndex fileIndex = getIndex();
+ final BAMFileSpan fileSpan = fileIndex.getSpanOverlapping(referenceIndex, start, 0);
+ final long[] filePointers = fileSpan != null ? fileSpan.toCoordinateArray() : null;
+
+ // Create an iterator over the above chunk boundaries.
+ final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers);
+
+ // Add some preprocessing filters for edge-case reads that don't fit into this
+ // query type.
+ return new BAMQueryFilteringIterator(iterator,new BAMStartingAtIteratorFilter(referenceIndex,start));
+ }
+
+ /**
+ * @throws java.lang.IllegalArgumentException if the intervals are not optimized
+ * @see net.sf.samtools.SAMFileReader.QueryInterval#optimizeIntervals(net.sf.samtools.SAMFileReader.QueryInterval[])
*/
- private CloseableIterator<SAMRecord> createIndexIterator(final String sequence,
- final int start,
- final int end,
- final QueryType queryType) {
- long[] filePointers = null;
+ private void assertIntervalsOptimized(final SAMFileReader.QueryInterval[] intervals) {
+ if (intervals.length == 0) return;
+ for (int i = 1; i < intervals.length; ++i) {
+ final SAMFileReader.QueryInterval prev = intervals[i-1];
+ final SAMFileReader.QueryInterval thisInterval = intervals[i];
+ if (prev.compareTo(thisInterval) >= 0) {
+ throw new IllegalArgumentException(String.format("List of intervals is not sorted: %s >= %s", prev, thisInterval));
+ }
+ if (prev.overlaps(thisInterval)) {
+ throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s intersects %s", prev, thisInterval));
+ }
+ if (prev.abuts(thisInterval)) {
+ throw new IllegalArgumentException(String.format("List of intervals is not optimized: %s abuts %s", prev, thisInterval));
+ }
+ }
+ }
+
+ private CloseableIterator<SAMRecord> createIndexIterator(final SAMFileReader.QueryInterval[] intervals,
+ final boolean contained) {
+
+ assertIntervalsOptimized(intervals);
// Hit the index to determine the chunk boundaries for the required data.
- final SAMFileHeader fileHeader = getFileHeader();
- final int referenceIndex = fileHeader.getSequenceIndex(sequence);
- if (referenceIndex != -1) {
- final BAMIndex fileIndex = getIndex();
- final BAMFileSpan fileSpan = fileIndex.getSpanOverlapping(referenceIndex, start, end);
- filePointers = fileSpan != null ? fileSpan.toCoordinateArray() : null;
+ final BAMFileSpan[] inputSpans = new BAMFileSpan[intervals.length];
+ final BAMIndex fileIndex = getIndex();
+ for (int i = 0; i < intervals.length; ++i) {
+ final SAMFileReader.QueryInterval interval = intervals[i];
+ final BAMFileSpan span = fileIndex.getSpanOverlapping(interval.referenceIndex, interval.start, interval.end);
+ inputSpans[i] = span;
+ }
+ final long[] filePointers;
+ if (inputSpans.length > 0) {
+ filePointers = BAMFileSpan.merge(inputSpans).toCoordinateArray();
+ } else {
+ filePointers = null;
}
// Create an iterator over the above chunk boundaries.
@@ -599,10 +719,11 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
// Add some preprocessing filters for edge-case reads that don't fit into this
// query type.
- return new BAMQueryFilteringIterator(iterator,sequence,start,end,queryType);
+ return new BAMQueryFilteringIterator(iterator, new BAMQueryMultipleIntervalsIteratorFilter(intervals, contained));
}
- enum QueryType {CONTAINED, OVERLAPPING, STARTING_AT}
+
+
/**
* Look for BAM index file according to standard naming convention.
@@ -630,8 +751,11 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
} else {
return null;
}
- }
+ }
+ /**
+ * Iterate over the SAMRecords defined by the sections of the file described in the ctor argument.
+ */
private class BAMFileIndexIterator extends BAMFileIterator {
private long[] mFilePointers = null;
@@ -667,37 +791,23 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
}
/**
- * A decorating iterator that filters out records that are outside the bounds of the
- * given query parameters.
+ * Pull SAMRecords from a coordinate-sorted iterator, and filter out any that do not match the filter.
*/
- private class BAMQueryFilteringIterator implements CloseableIterator<SAMRecord> {
+ public class BAMQueryFilteringIterator extends AbstractBamIterator {
/**
* The wrapped iterator.
*/
- private final CloseableIterator<SAMRecord> wrappedIterator;
-
+ protected final CloseableIterator<SAMRecord> wrappedIterator;
/**
* The next record to be returned. Will be null if no such record exists.
*/
- private SAMRecord mNextRecord;
-
- private final int mReferenceIndex;
- private final int mRegionStart;
- private final int mRegionEnd;
- private final QueryType mQueryType;
- private boolean isClosed = false;
+ protected SAMRecord mNextRecord;
+ private final BAMIteratorFilter iteratorFilter;
- public BAMQueryFilteringIterator(final CloseableIterator<SAMRecord> iterator,final String sequence, final int start, final int end, final QueryType queryType) {
+ public BAMQueryFilteringIterator(final CloseableIterator<SAMRecord> iterator,
+ final BAMIteratorFilter iteratorFilter) {
this.wrappedIterator = iterator;
- final SAMFileHeader fileHeader = getFileHeader();
- mReferenceIndex = fileHeader.getSequenceIndex(sequence);
- mRegionStart = start;
- if (queryType == QueryType.STARTING_AT) {
- mRegionEnd = mRegionStart;
- } else {
- mRegionEnd = (end <= 0) ? Integer.MAX_VALUE : end;
- }
- mQueryType = queryType;
+ this.iteratorFilter = iteratorFilter;
mNextRecord = advance();
}
@@ -705,7 +815,7 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
* Returns true if a next element exists; false otherwise.
*/
public boolean hasNext() {
- if (isClosed) throw new IllegalStateException("Iterator has been closed");
+ assertOpen();
return mNextRecord != null;
}
@@ -721,26 +831,6 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
return currentRead;
}
- /**
- * Closes down the existing iterator.
- */
- public void close() {
- if (!isClosed) {
- if (this != mCurrentIterator) {
- throw new IllegalStateException("Attempt to close non-current iterator");
- }
- mCurrentIterator = null;
- isClosed = true;
- }
- }
-
- /**
- * @throws UnsupportedOperationException always.
- */
- public void remove() {
- throw new UnsupportedOperationException("Not supported: remove");
- }
-
SAMRecord advance() {
while (true) {
// Pull next record from stream
@@ -748,51 +838,64 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
return null;
final SAMRecord record = wrappedIterator.next();
- // If beyond the end of this reference sequence, end iteration
- final int referenceIndex = record.getReferenceIndex();
- if (referenceIndex != mReferenceIndex) {
- if (referenceIndex < 0 ||
- referenceIndex > mReferenceIndex) {
- return null;
- }
- // If before this reference sequence, continue
- continue;
- }
- if (mRegionStart == 0 && mRegionEnd == Integer.MAX_VALUE) {
- // Quick exit to avoid expensive alignment end calculation
- return record;
- }
- final int alignmentStart = record.getAlignmentStart();
- // If read is unmapped but has a coordinate, return it if the coordinate is within
- // the query region, regardless of whether the mapped mate will be returned.
- final int alignmentEnd;
- if (mQueryType == QueryType.STARTING_AT) {
- alignmentEnd = -1;
- } else {
- alignmentEnd = (record.getAlignmentEnd() != SAMRecord.NO_ALIGNMENT_START?
- record.getAlignmentEnd(): alignmentStart);
+ switch (iteratorFilter.compareToFilter(record)) {
+ case MATCHES_FILTER: return record;
+ case STOP_ITERATION: return null;
+ case CONTINUE_ITERATION: break; // keep looping
+ default: throw new SAMException("Unexpected return from compareToFilter");
}
+ }
+ }
+ }
- if (alignmentStart > mRegionEnd) {
- // If scanned beyond target region, end iteration
- return null;
- }
- // Filter for overlap with region
- if (mQueryType == QueryType.CONTAINED) {
- if (alignmentStart >= mRegionStart && alignmentEnd <= mRegionEnd) {
- return record;
- }
- } else if (mQueryType == QueryType.OVERLAPPING) {
- if (alignmentEnd >= mRegionStart && alignmentStart <= mRegionEnd) {
- return record;
- }
- } else {
- if (alignmentStart == mRegionStart) {
- return record;
- }
- }
+ interface BAMIteratorFilter {
+ /**
+ * Determine if given record passes the filter, and if it does not, whether iteration should continue
+ * or if this record is beyond the region(s) of interest.
+ */
+ FilteringIteratorState compareToFilter(final SAMRecord record);
+ }
+
+ /**
+ * A decorating iterator that filters out records that do not match the given reference and start position.
+ */
+ private class BAMStartingAtIteratorFilter implements BAMIteratorFilter {
+
+ private final int mReferenceIndex;
+ private final int mRegionStart;
+
+ public BAMStartingAtIteratorFilter(final int referenceIndex, final int start) {
+ mReferenceIndex = referenceIndex;
+ mRegionStart = start;
+ }
+
+ /**
+ *
+ * @return MATCHES_FILTER if this record matches the filter;
+ * CONTINUE_ITERATION if does not match filter but iteration should continue;
+ * STOP_ITERATION if does not match filter and iteration should end.
+ */
+ @Override
+ public FilteringIteratorState compareToFilter(final SAMRecord record) {
+ // If beyond the end of this reference sequence, end iteration
+ final int referenceIndex = record.getReferenceIndex();
+ if (referenceIndex < 0 || referenceIndex > mReferenceIndex) {
+ return FilteringIteratorState.STOP_ITERATION;
+ } else if (referenceIndex < mReferenceIndex) {
+ // If before this reference sequence, continue
+ return FilteringIteratorState.CONTINUE_ITERATION;
+ }
+ final int alignmentStart = record.getAlignmentStart();
+ if (alignmentStart > mRegionStart) {
+ // If scanned beyond target region, end iteration
+ return FilteringIteratorState.STOP_ITERATION;
+ } else if (alignmentStart == mRegionStart) {
+ return FilteringIteratorState.MATCHES_FILTER;
+ } else {
+ return FilteringIteratorState.CONTINUE_ITERATION;
}
}
+
}
private class BAMFileIndexUnmappedIterator extends BAMFileIterator {
@@ -803,4 +906,64 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
}
}
+ /**
+ * Filters out records that do not match any of the given intervals and query type.
+ */
+ private class BAMQueryMultipleIntervalsIteratorFilter implements BAMIteratorFilter {
+ final SAMFileReader.QueryInterval[] intervals;
+ final boolean contained;
+ int intervalIndex = 0;
+
+
+ public BAMQueryMultipleIntervalsIteratorFilter(final SAMFileReader.QueryInterval[] intervals,
+ final boolean contained) {
+ this.contained = contained;
+ this.intervals = intervals;
+ }
+
+ @Override
+ public FilteringIteratorState compareToFilter(final SAMRecord record) {
+ while (intervalIndex < intervals.length) {
+ final IntervalComparison comparison = compareIntervalToRecord(intervals[intervalIndex], record);
+ switch (comparison) {
+ // Interval is before SAMRecord. Try next interval;
+ case BEFORE: ++intervalIndex; break;
+ // Interval is after SAMRecord. Keep scanning forward in SAMRecords
+ case AFTER: return FilteringIteratorState.CONTINUE_ITERATION;
+ // Found a good record
+ case CONTAINED: return FilteringIteratorState.MATCHES_FILTER;
+ // Either found a good record, or else keep scanning SAMRecords
+ case OVERLAPPING: return
+ (contained ? FilteringIteratorState.CONTINUE_ITERATION : FilteringIteratorState.MATCHES_FILTER);
+ }
+ }
+ // Went past the last interval
+ return FilteringIteratorState.STOP_ITERATION;
+ }
+
+ private IntervalComparison compareIntervalToRecord(final SAMFileReader.QueryInterval interval, final SAMRecord record) {
+ // interval.end <= 0 implies the end of the reference sequence.
+ final int intervalEnd = (interval.end <= 0? Integer.MAX_VALUE: interval.end);
+
+ if (interval.referenceIndex < record.getReferenceIndex()) return IntervalComparison.BEFORE;
+ else if (interval.referenceIndex > record.getReferenceIndex()) return IntervalComparison.AFTER;
+ else if (intervalEnd < record.getAlignmentStart()) return IntervalComparison.BEFORE;
+ else if (record.getAlignmentEnd() < interval.start) return IntervalComparison.AFTER;
+ else if (CoordMath.encloses(interval.start, intervalEnd, record.getAlignmentStart(), record.getAlignmentEnd())) {
+ return IntervalComparison.CONTAINED;
+ } else return IntervalComparison.OVERLAPPING;
+ }
+ }
+
+ private enum IntervalComparison {
+ BEFORE, AFTER, OVERLAPPING, CONTAINED
+ }
+
+ /**
+ * Type returned by BAMIteratorFilter that tell BAMQueryFilteringIterator how to handle each SAMRecord.
+ */
+ private enum FilteringIteratorState {
+ MATCHES_FILTER, STOP_ITERATION, CONTINUE_ITERATION
+
+ }
}
diff --git a/src/java/net/sf/samtools/CachingBAMFileIndex.java b/src/java/net/sf/samtools/CachingBAMFileIndex.java
index 91c684d..516ca77 100644
--- a/src/java/net/sf/samtools/CachingBAMFileIndex.java
+++ b/src/java/net/sf/samtools/CachingBAMFileIndex.java
@@ -35,17 +35,17 @@ import java.util.*;
class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMIndex
{
private Integer mLastReferenceRetrieved = null;
- private WeakHashMap<Integer,BAMIndexContent> mQueriesByReference = new WeakHashMap<Integer,BAMIndexContent>();
+ private final WeakHashMap<Integer,BAMIndexContent> mQueriesByReference = new WeakHashMap<Integer,BAMIndexContent>();
- public CachingBAMFileIndex(final File file, SAMSequenceDictionary dictionary) {
+ public CachingBAMFileIndex(final File file, final SAMSequenceDictionary dictionary) {
super(file, dictionary);
}
- public CachingBAMFileIndex(final SeekableStream stream, SAMSequenceDictionary dictionary) {
+ public CachingBAMFileIndex(final SeekableStream stream, final SAMSequenceDictionary dictionary) {
super(stream, dictionary);
}
- public CachingBAMFileIndex(final File file, SAMSequenceDictionary dictionary, boolean useMemoryMapping) {
+ public CachingBAMFileIndex(final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) {
super(file, dictionary, useMemoryMapping);
}
@@ -58,16 +58,16 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
* in a range that can be scanned to find SAMRecords that overlap the given positions.
*/
public BAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) {
- BAMIndexContent queryResults = getQueryResults(referenceIndex);
+ final BAMIndexContent queryResults = getQueryResults(referenceIndex);
if(queryResults == null)
return null;
- BinList overlappingBins = getBinsOverlapping(referenceIndex,startPos,endPos);
+ final BinList overlappingBins = getBinsOverlapping(referenceIndex,startPos,endPos);
// System.out.println("# Sequence target TID: " + referenceIndex);
- List<Bin> bins = new ArrayList<Bin>();
- for(Bin bin: queryResults.getBins()) {
+ final List<Bin> bins = new ArrayList<Bin>();
+ for(final Bin bin: queryResults.getBins()) {
if (overlappingBins.getBins().get(bin.getBinNumber()))
bins.add(bin);
}
@@ -77,8 +77,8 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
}
List<Chunk> chunkList = new ArrayList<Chunk>();
- for(Bin bin: bins) {
- for(Chunk chunk: bin.getChunkList())
+ for(final Bin bin: bins) {
+ for(final Chunk chunk: bin.getChunkList())
chunkList.add(chunk.clone());
}
@@ -86,7 +86,7 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
return null;
}
- chunkList = optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos));
+ chunkList = Chunk.optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos));
return new BAMFileSpan(chunkList);
}
@@ -115,7 +115,7 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
return null;
final int referenceSequence = bin.getReferenceSequence();
- BAMIndexContent indexQuery = getQueryResults(referenceSequence);
+ final BAMIndexContent indexQuery = getQueryResults(referenceSequence);
if(indexQuery == null)
return null;
@@ -124,7 +124,7 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
final int firstLocusInBin = getFirstLocusInBin(bin);
// Add the specified bin to the tree if it exists.
- List<Bin> binTree = new ArrayList<Bin>();
+ final List<Bin> binTree = new ArrayList<Bin>();
if(indexQuery.containsBin(bin))
binTree.add(indexQuery.getBins().getBin(bin.getBinNumber()));
@@ -133,19 +133,19 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
final int binStart = getFirstBinInLevel(currentBinLevel);
final int binWidth = getMaxAddressibleGenomicLocation()/getLevelSize(currentBinLevel);
final int binNumber = firstLocusInBin/binWidth + binStart;
- Bin parentBin = indexQuery.getBins().getBin(binNumber);
+ final Bin parentBin = indexQuery.getBins().getBin(binNumber);
if(parentBin != null && indexQuery.containsBin(parentBin))
binTree.add(parentBin);
}
List<Chunk> chunkList = new ArrayList<Chunk>();
- for(Bin coveringBin: binTree) {
- for(Chunk chunk: coveringBin.getChunkList())
+ for(final Bin coveringBin: binTree) {
+ for(final Chunk chunk: coveringBin.getChunkList())
chunkList.add(chunk.clone());
}
final int start = getFirstLocusInBin(bin);
- chunkList = optimizeChunkList(chunkList,indexQuery.getLinearIndex().getMinimumOffset(start));
+ chunkList = Chunk.optimizeChunkList(chunkList,indexQuery.getLinearIndex().getMinimumOffset(start));
return new BAMFileSpan(chunkList);
}
diff --git a/src/java/net/sf/samtools/Chunk.java b/src/java/net/sf/samtools/Chunk.java
index 9dc8da2..0555f56 100644
--- a/src/java/net/sf/samtools/Chunk.java
+++ b/src/java/net/sf/samtools/Chunk.java
@@ -3,10 +3,7 @@ package net.sf.samtools;
import net.sf.samtools.util.BlockCompressedFilePointerUtil;
import java.io.Serializable;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.List;
+import java.util.*;
/**
* A [start,stop) file pointer pairing into the BAM file, stored
@@ -86,16 +83,16 @@ class Chunk implements Cloneable, Serializable,Comparable<Chunk> {
* @return True if the chunks overlap. Returns false if the two chunks abut or are disjoint.
*/
public boolean overlaps(final Chunk other) {
- int comparison = this.compareTo(other);
+ final int comparison = this.compareTo(other);
if(comparison == 0)
return true;
// "sort" the two chunks using the comparator.
- Chunk leftMost = comparison==-1 ? this : other;
- Chunk rightMost = comparison==1 ? this : other;
+ final Chunk leftMost = comparison==-1 ? this : other;
+ final Chunk rightMost = comparison==1 ? this : other;
- long leftMostBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(leftMost.getChunkEnd());
- long rightMostBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(rightMost.getChunkStart());
+ final long leftMostBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(leftMost.getChunkEnd());
+ final long rightMostBlockAddress = BlockCompressedFilePointerUtil.getBlockAddress(rightMost.getChunkStart());
// If the left block's address is after the right block's address, compare the two blocks.
// If the two blocks are identical, compare the block offsets.
@@ -103,8 +100,8 @@ class Chunk implements Cloneable, Serializable,Comparable<Chunk> {
if(leftMostBlockAddress > rightMostBlockAddress)
return true;
else if(leftMostBlockAddress == rightMostBlockAddress) {
- int leftMostOffset = BlockCompressedFilePointerUtil.getBlockOffset(leftMost.getChunkEnd());
- int rightMostOffset = BlockCompressedFilePointerUtil.getBlockOffset(rightMost.getChunkStart());
+ final int leftMostOffset = BlockCompressedFilePointerUtil.getBlockOffset(leftMost.getChunkEnd());
+ final int rightMostOffset = BlockCompressedFilePointerUtil.getBlockOffset(rightMost.getChunkStart());
return leftMostOffset > rightMostOffset;
}
else
@@ -136,4 +133,35 @@ class Chunk implements Cloneable, Serializable,Comparable<Chunk> {
public String toString() {
return String.format("%d:%d-%d:%d",mChunkStart >> 16,mChunkStart & 0xFFFF,mChunkEnd >> 16,mChunkEnd & 0xFFFF);
}
+
+ /**
+ * @param minimumOffset Discard chunks that end before this file offset.
+ * @return sorted list of chunks in which adjacent chunks are coalesced.
+ */
+ public static List<Chunk> optimizeChunkList(final List<Chunk> chunks, final long minimumOffset) {
+ Chunk lastChunk = null;
+ Collections.sort(chunks);
+ final List<Chunk> result = new ArrayList<Chunk>();
+ for (final Chunk chunk : chunks) {
+ if (chunk.getChunkEnd() <= minimumOffset) {
+ continue; // linear index optimization
+ }
+ if (result.isEmpty()) {
+ result.add(chunk);
+ lastChunk = chunk;
+ continue;
+ }
+ // Coalesce chunks that are in adjacent file blocks.
+ // This is a performance optimization.
+ if (!lastChunk.overlaps(chunk) && !lastChunk.isAdjacentTo(chunk)) {
+ result.add(chunk);
+ lastChunk = chunk;
+ } else {
+ if (chunk.getChunkEnd() > lastChunk.getChunkEnd()) {
+ lastChunk.setChunkEnd(chunk.getChunkEnd());
+ }
+ }
+ }
+ return result;
+ }
}
diff --git a/src/java/net/sf/samtools/DiskBasedBAMFileIndex.java b/src/java/net/sf/samtools/DiskBasedBAMFileIndex.java
index 84bff64..7a00666 100644
--- a/src/java/net/sf/samtools/DiskBasedBAMFileIndex.java
+++ b/src/java/net/sf/samtools/DiskBasedBAMFileIndex.java
@@ -34,15 +34,15 @@ import java.util.List;
*/
class DiskBasedBAMFileIndex extends AbstractBAMFileIndex
{
- DiskBasedBAMFileIndex(final File file, SAMSequenceDictionary dictionary) {
+ DiskBasedBAMFileIndex(final File file, final SAMSequenceDictionary dictionary) {
super(file, dictionary);
}
- DiskBasedBAMFileIndex(final SeekableStream stream, SAMSequenceDictionary dictionary) {
+ DiskBasedBAMFileIndex(final SeekableStream stream, final SAMSequenceDictionary dictionary) {
super(stream, dictionary);
}
- DiskBasedBAMFileIndex(final File file, SAMSequenceDictionary dictionary, boolean useMemoryMapping) {
+ DiskBasedBAMFileIndex(final File file, final SAMSequenceDictionary dictionary, final boolean useMemoryMapping) {
super(file, dictionary, useMemoryMapping);
}
@@ -57,19 +57,19 @@ class DiskBasedBAMFileIndex extends AbstractBAMFileIndex
* the range that may contain the indicated SAMRecords.
*/
public BAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) {
- BAMIndexContent queryResults = query(referenceIndex,startPos,endPos);
+ final BAMIndexContent queryResults = query(referenceIndex,startPos,endPos);
if(queryResults == null)
return null;
List<Chunk> chunkList = new ArrayList<Chunk>();
- for(Chunk chunk: queryResults.getAllChunks())
+ for(final Chunk chunk: queryResults.getAllChunks())
chunkList.add(chunk.clone());
- chunkList = optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos));
+ chunkList = Chunk.optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos));
return new BAMFileSpan(chunkList);
}
- protected BAMIndexContent getQueryResults(int reference){
+ protected BAMIndexContent getQueryResults(final int reference){
throw new UnsupportedOperationException();
// todo: there ought to be a way to support this using the first startPos for the reference and the last
// return query(reference, 1, -1);
diff --git a/src/java/net/sf/samtools/SAMFileHeader.java b/src/java/net/sf/samtools/SAMFileHeader.java
index a40eb25..a0abe96 100644
--- a/src/java/net/sf/samtools/SAMFileHeader.java
+++ b/src/java/net/sf/samtools/SAMFileHeader.java
@@ -233,10 +233,11 @@ public class SAMFileHeader extends AbstractSAMHeaderRecord
}
public SortOrder getSortOrder() {
- if (getAttribute("SO") == null) {
+ final String so = getAttribute("SO");
+ if (so == null || so.equals("unknown")) {
return SortOrder.unsorted;
}
- return SortOrder.valueOf((String)getAttribute("SO"));
+ return SortOrder.valueOf((String) so);
}
public void setSortOrder(final SortOrder so) {
diff --git a/src/java/net/sf/samtools/SAMFileReader.java b/src/java/net/sf/samtools/SAMFileReader.java
index 4a16b9a..d3fd9db 100644
--- a/src/java/net/sf/samtools/SAMFileReader.java
+++ b/src/java/net/sf/samtools/SAMFileReader.java
@@ -30,7 +30,10 @@ import net.sf.samtools.seekablestream.SeekableHTTPStream;
import net.sf.samtools.seekablestream.SeekableStream;
import java.io.*;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
import java.util.zip.GZIPInputStream;
import java.net.URL;
@@ -111,6 +114,7 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
abstract CloseableIterator<SAMRecord> getIterator(SAMFileSpan fileSpan);
abstract SAMFileSpan getFilePointerSpanningReads();
abstract CloseableIterator<SAMRecord> query(String sequence, int start, int end, boolean contained);
+ abstract CloseableIterator<SAMRecord> query(QueryInterval[] intervals, boolean contained);
abstract CloseableIterator<SAMRecord> queryAlignmentStart(String sequence, int start);
abstract public CloseableIterator<SAMRecord> queryUnmapped();
abstract void close();
@@ -301,7 +305,7 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
* @throws SAMException if no such index is available.
*/
public BrowseableBAMIndex getBrowseableIndex() {
- BAMIndex index = getIndex();
+ final BAMIndex index = getIndex();
if(!(index instanceof BrowseableBAMIndex))
throw new SAMException("Cannot return index: index created by BAM is not browseable.");
return BrowseableBAMIndex.class.cast(index);
@@ -415,6 +419,83 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
return query(sequence, start, end, true);
}
+ /**
+ * Iterate over records that match one of the given intervals. This may be more efficient than querying
+ * each interval separately, because multiple reads of the same SAMRecords is avoided.
+ * <p/>
+ * Only valid to call this if hasIndex() == true.
+ * <p/>
+ * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
+ * a second iteration, the first one must be closed first. You can use a second SAMFileReader to iterate
+ * in parallel over the same underlying file.
+ * <p/>
+ * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
+ * and then discarded because they do not match an interval of interest.
+ * <p/>
+ * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
+ * is in the query region.
+ *
+ * @param intervals Intervals to be queried. The intervals must be optimized, i.e. in order, with overlapping
+ * and abutting intervals merged. This can be done with
+ * net.sf.samtools.SAMFileReader.QueryInterval#optimizeIntervals(net.sf.samtools.SAMFileReader.QueryInterval[])
+ * @param contained If true, each SAMRecord returned is will have its alignment completely contained in one of the
+ * intervals of interest. If false, the alignment of the returned SAMRecords need only overlap one of
+ * the intervals of interest.
+ * @return Iterator over the SAMRecords matching the interval.
+ */
+ public SAMRecordIterator query(final QueryInterval[] intervals, final boolean contained) {
+ return new AssertableIterator(mReader.query(intervals, contained));
+ }
+
+ /**
+ * Iterate over records that overlap any of the given intervals. This may be more efficient than querying
+ * each interval separately, because multiple reads of the same SAMRecords is avoided.
+ * <p/>
+ * Only valid to call this if hasIndex() == true.
+ * <p/>
+ * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
+ * a second iteration, the first one must be closed first.
+ * <p/>
+ * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
+ * and then discarded because they do not match the interval of interest.
+ * <p/>
+ * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
+ * is in the query region.
+ *
+ * @param intervals Intervals to be queried. The intervals must be optimized, i.e. in order, with overlapping
+ * and abutting intervals merged. This can be done with
+ * net.sf.samtools.SAMFileReader.QueryInterval#optimizeIntervals(net.sf.samtools.SAMFileReader.QueryInterval[])
+ * @return Iterator over the SAMRecords overlapping any of the intervals.
+ */
+ public SAMRecordIterator queryOverlapping(final QueryInterval[] intervals) {
+ return query(intervals, false);
+ }
+
+ /**
+ * Iterate over records that are contained in the given interval. This may be more efficient than querying
+ * each interval separately, because multiple reads of the same SAMRecords is avoided.
+ * <p/>
+ * Only valid to call this if hasIndex() == true.
+ * <p/>
+ * Only a single open iterator on a given SAMFileReader may be extant at any one time. If you want to start
+ * a second iteration, the first one must be closed first.
+ * <p/>
+ * Note that indexed lookup is not perfectly efficient in terms of disk I/O. I.e. some SAMRecords may be read
+ * and then discarded because they do not match the interval of interest.
+ * <p/>
+ * Note that an unmapped read will be returned by this call if it has a coordinate for the purpose of sorting that
+ * is in the query region.
+ *
+ * @param intervals Intervals to be queried. The intervals must be optimized, i.e. in order, with overlapping
+ * and abutting intervals merged. This can be done with
+ * net.sf.samtools.SAMFileReader.QueryInterval#optimizeIntervals(net.sf.samtools.SAMFileReader.QueryInterval[])
+ * @return Iterator over the SAMRecords contained in any of the intervals.
+ */
+ public SAMRecordIterator queryContained(final QueryInterval[] intervals) {
+ return query(intervals, true);
+ }
+
+
public SAMRecordIterator queryUnmapped() {
return new AssertableIterator(mReader.queryUnmapped());
}
@@ -509,7 +590,7 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
}
setValidationStringency(validationStringency);
}
- catch (IOException e) {
+ catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
@@ -526,14 +607,14 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
}
setValidationStringency(validationStringency);
}
- catch (IOException e) {
+ catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
// Its too expensive to examine the remote file to determine type.
// Rely on file extension.
- private boolean streamLooksLikeBam(SeekableStream strm) {
+ private boolean streamLooksLikeBam(final SeekableStream strm) {
String source = strm.getSource();
if(source == null) return true;
source = source.toLowerCase();
@@ -542,13 +623,13 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
return source.endsWith(".bam") || source.contains(".bam?")|| source.contains(".bam&") || source.contains(".bam%26");
}
- private void init(final InputStream stream, final File file, File indexFile, final boolean eagerDecode, final ValidationStringency validationStringency) {
+ private void init(final InputStream stream, final File file, final File indexFile, final boolean eagerDecode, final ValidationStringency validationStringency) {
if (stream != null && file != null) throw new IllegalArgumentException("stream and file are mutually exclusive");
this.samFile = file;
try {
final BufferedInputStream bufferedStream;
- if (file != null) bufferedStream = new BufferedInputStream(new FileInputStream(file), IOUtil.STANDARD_BUFFER_SIZE);
+ if (file != null) bufferedStream = new BufferedInputStream(new FileInputStream(file), Defaults.BUFFER_SIZE);
else bufferedStream = IOUtil.toBufferedStream(stream);
if (isBAMFile(bufferedStream)) {
mIsBinary = true;
@@ -580,7 +661,7 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
setValidationStringency(validationStringency);
mReader.setSAMRecordFactory(this.samRecordFactory);
}
- catch (IOException e) {
+ catch (final IOException e) {
throw new RuntimeIOException(e);
}
}
@@ -632,14 +713,14 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
final int ch = gunzip.read();
return true;
}
- catch (IOException ioe) {
+ catch (final IOException ioe) {
return false;
}
finally {
try {
stream.reset();
}
- catch (IOException ioe) {
+ catch (final IOException ioe) {
throw new IllegalStateException("Could not reset stream.");
}
}
@@ -668,11 +749,11 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
private SAMRecord previous = null;
private SAMRecordComparator comparator = null;
- public AssertableIterator(CloseableIterator<SAMRecord> iterator) {
+ public AssertableIterator(final CloseableIterator<SAMRecord> iterator) {
wrappedIterator = iterator;
}
- public SAMRecordIterator assertSorted(SAMFileHeader.SortOrder sortOrder) {
+ public SAMRecordIterator assertSorted(final SAMFileHeader.SortOrder sortOrder) {
if (sortOrder == null || sortOrder == SAMFileHeader.SortOrder.unsorted) {
comparator = null;
@@ -684,7 +765,7 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
}
public SAMRecord next() {
- SAMRecord result = wrappedIterator.next();
+ final SAMRecord result = wrappedIterator.next();
if (comparator != null) {
if (previous != null) {
if (comparator.fileOrderCompare(previous, result) > 0) {
@@ -706,4 +787,123 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
}
+ /**
+ * Interval relative to a reference, for querying a BAM file.
+ */
+ public static class QueryInterval implements Comparable<QueryInterval> {
+
+ /** Index of reference sequence, based on the sequence dictionary of the BAM file being queried. */
+ public final int referenceIndex;
+ /** 1-based, inclusive */
+ public final int start;
+ /** 1-based, inclusive. If <= 0, implies that the interval goes to the end of the reference sequence */
+ public final int end;
+
+
+ public QueryInterval(final int referenceIndex, final int start, final int end) {
+ if (referenceIndex < 0) {
+ throw new IllegalArgumentException("Invalid reference index " + referenceIndex);
+ }
+ this.referenceIndex = referenceIndex;
+ this.start = start;
+ this.end = end;
+ }
+
+
+ public int compareTo(final QueryInterval other) {
+ int comp = this.referenceIndex - other.referenceIndex;
+ if (comp != 0) return comp;
+ comp = this.start - other.start;
+ if (comp != 0) return comp;
+ else if (this.end == other.end) return 0;
+ else if (this.end == 0) return 1;
+ else if (other.end == 0) return -1;
+ else return this.end - other.end;
+ }
+
+ /**
+ * @return true if both are on same reference, and other starts exactly where this ends.
+ */
+ public boolean abuts(final QueryInterval other) {
+ return this.referenceIndex == other.referenceIndex && this.end == other.start;
+ }
+
+ /**
+ * @return true if both are on same reference, and the overlap.
+ */
+ public boolean overlaps(final QueryInterval other) {
+ if (this.referenceIndex != other.referenceIndex) {
+ return false;
+ }
+ final int thisEnd = (this.end == 0? Integer.MAX_VALUE: this.end);
+ final int otherEnd = (other.end == 0? Integer.MAX_VALUE: other.end);
+ return CoordMath.overlaps(this.start, thisEnd, other.start, otherEnd);
+ }
+
+ @Override
+ public String toString() {
+ return String.format("%d:%d-%d", referenceIndex, start, end);
+ }
+
+ private static final QueryInterval[] EMPTY_QUERY_INTERVAL_ARRAY = new QueryInterval[0];
+
+ /**
+ *
+ * @param inputIntervals WARNING: This list is modified (sorted) by this method.
+ * @return Ordered list of intervals in which abutting and overlapping intervals are merged.
+ */
+ public static QueryInterval[] optimizeIntervals(final QueryInterval[] inputIntervals) {
+ if (inputIntervals.length == 0) return EMPTY_QUERY_INTERVAL_ARRAY;
+ Arrays.sort(inputIntervals);
+
+ final List<QueryInterval> unique = new ArrayList<QueryInterval>();
+ QueryInterval previous = inputIntervals[0];
+
+
+ for(int i = 1; i < inputIntervals.length; ++i) {
+ final QueryInterval next = inputIntervals[i];
+ if (previous.abuts(next) || previous.overlaps(next)) {
+ final int newEnd = ((previous.end == 0 || next.end == 0)? 0: Math.max(previous.end, next.end));
+ previous = new QueryInterval(previous.referenceIndex, previous.start, newEnd);
+ }
+ else {
+ unique.add(previous);
+ previous = next;
+ }
+ }
+
+ if (previous != null) unique.add(previous);
+
+ return unique.toArray(EMPTY_QUERY_INTERVAL_ARRAY);
+ }
+ }
+
+ /**
+ * Convenience method to create a QueryInterval
+ * @param sequence sequence of interest, must exist in sequence dictionary
+ * @param start 1-based start position, must be >= 1
+ * @param end 1-based end position.
+ * @throws java.lang.IllegalArgumentException if sequence not found in sequence dictionary, or start position < 1
+ */
+ public QueryInterval makeQueryInterval(final String sequence, int start, int end) {
+ int referenceIndex = getFileHeader().getSequenceIndex(sequence);
+ if (referenceIndex < 0) {
+ throw new IllegalArgumentException(String.format("Sequence '%s' not found in sequence dictionary", sequence));
+ }
+ if (start < 1) {
+ throw new IllegalArgumentException("Start position must be >= 1");
+ }
+ return new QueryInterval(referenceIndex, start, end);
+ }
+
+ /**
+ * Convenience method to create a QueryInterval that goes from start to end of given sequence.
+ * @param sequence sequence of interest, must exist in sequence dictionary
+ * @param start 1-based start position, must be >= 1
+ * @throws java.lang.IllegalArgumentException if sequence not found in sequence dictionary, or start position < 1
+ */
+ public QueryInterval makeQueryInterval(final String sequence, int start) {
+ return makeQueryInterval(sequence, start, 0);
+ }
+
}
diff --git a/src/java/net/sf/samtools/SAMFileSpan.java b/src/java/net/sf/samtools/SAMFileSpan.java
index 7f9b696..4b0bf69 100644
--- a/src/java/net/sf/samtools/SAMFileSpan.java
+++ b/src/java/net/sf/samtools/SAMFileSpan.java
@@ -23,10 +23,12 @@
*/
package net.sf.samtools;
-import java.util.List;
+import net.sf.samtools.util.StringUtil;
+
+import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
-import java.io.Serializable;
+import java.util.List;
/**
* A interface representing a collection of (possibly) discontinuous segments in the
@@ -110,8 +112,8 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* @return A copy of the chunk list.
*/
public BAMFileSpan clone() {
- BAMFileSpan clone = new BAMFileSpan();
- for(Chunk chunk: chunks)
+ final BAMFileSpan clone = new BAMFileSpan();
+ for(final Chunk chunk: chunks)
clone.chunks.add(chunk.clone());
return clone;
}
@@ -130,15 +132,15 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
if(!(fileSpan instanceof BAMFileSpan))
throw new SAMException("Unable to compare ");
- BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan;
+ final BAMFileSpan bamFileSpan = (BAMFileSpan)fileSpan;
if(bamFileSpan.isEmpty())
return clone();
validateSorted();
- BAMFileSpan trimmedChunkList = new BAMFileSpan();
- for(Chunk chunkToTrim: chunks) {
+ final BAMFileSpan trimmedChunkList = new BAMFileSpan();
+ for(final Chunk chunkToTrim: chunks) {
if(chunkToTrim.getChunkEnd() > chunkToTrim.getChunkStart()) {
if(chunkToTrim.getChunkStart() >= bamFileSpan.chunks.get(0).getChunkStart()) {
// This chunk from the list is completely beyond the start of the filtering chunk.
@@ -170,7 +172,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* @param span - span with chunks to add to this one
*/
public void add(final BAMFileSpan span) {
- for (Chunk c : span.chunks) {
+ for (final Chunk c : span.chunks) {
chunks.add(c);
}
}
@@ -206,7 +208,7 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* @return The first offset in the span
*/
protected long getFirstOffset() {
- long result = 0;
+ final long result = 0;
if (chunks == null){
return result;
}
@@ -243,11 +245,11 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
* @param coordinateArray List of chunks to convert.
* @return A list of chunks.
*/
- protected static SAMFileSpan toChunkList(long[] coordinateArray) {
+ protected static SAMFileSpan toChunkList(final long[] coordinateArray) {
if(coordinateArray.length % 2 != 0)
throw new SAMException("Data supplied does not appear to be in coordinate array format.");
- BAMFileSpan chunkList = new BAMFileSpan();
+ final BAMFileSpan chunkList = new BAMFileSpan();
for(int i = 0; i < coordinateArray.length; i += 2)
chunkList.add(new Chunk(coordinateArray[i],coordinateArray[i+1]));
@@ -271,15 +273,19 @@ class BAMFileSpan implements SAMFileSpan, Serializable {
*/
@Override
public String toString() {
- StringBuilder builder = new StringBuilder();
- boolean first = true;
- for(Chunk chunk: chunks) {
- if(!first) {
- builder.append(';');
- first = false;
- }
- builder.append(chunk);
- }
- return builder.toString();
+ return StringUtil.join(";", chunks);
+ }
+
+ /**
+ *
+ * @return A single BAMFileSpan that is an intelligent merge of the input spans, i.e. contiguous, overlapping
+ * and contained chunks are intelligently merged, and the chunks are sorted.
+ */
+ public static BAMFileSpan merge(final BAMFileSpan[] spans) {
+ int numInputChunks = 0;
+ for (final BAMFileSpan span : spans) numInputChunks += span.chunks.size();
+ final ArrayList<Chunk> inputChunks = new ArrayList<Chunk>(numInputChunks);
+ for (final BAMFileSpan span : spans) inputChunks.addAll(span.chunks);
+ return new BAMFileSpan(Chunk.optimizeChunkList(inputChunks, 0));
}
}
diff --git a/src/java/net/sf/samtools/SAMTextReader.java b/src/java/net/sf/samtools/SAMTextReader.java
index adef9d8..bf651ff 100644
--- a/src/java/net/sf/samtools/SAMTextReader.java
+++ b/src/java/net/sf/samtools/SAMTextReader.java
@@ -168,6 +168,11 @@ class SAMTextReader extends SAMFileReader.ReaderImplementation {
throw new UnsupportedOperationException("Cannot query SAM text files");
}
+ @Override
+ CloseableIterator<SAMRecord> query(final SAMFileReader.QueryInterval[] intervals, final boolean contained) {
+ throw new UnsupportedOperationException("Cannot query SAM text files");
+ }
+
/**
* Unsupported for SAM text files.
*/
diff --git a/src/java/org/broadinstitute/variant/variantcontext/Allele.java b/src/java/org/broadinstitute/variant/variantcontext/Allele.java
index 79d5ec9..0113923 100644
--- a/src/java/org/broadinstitute/variant/variantcontext/Allele.java
+++ b/src/java/org/broadinstitute/variant/variantcontext/Allele.java
@@ -46,8 +46,8 @@ import java.util.Collection;
* In these cases, where are the alleles?
*
* SNP polymorphism of C/G -> { C , G } -> C is the reference allele
- * 1 base deletion of C -> { C , - } -> C is the reference allele
- * 1 base insertion of A -> { - ; A } -> Null is the reference allele
+ * 1 base deletion of C -> { tC , t } -> C is the reference allele and we include the preceding reference base (null alleles are not allowed)
+ * 1 base insertion of A -> { C ; CA } -> C is the reference allele (because null alleles are not allowed)
*
* Suppose I see a the following in the population:
*
@@ -59,6 +59,10 @@ import java.util.Collection;
*
* { C , G , - }
*
+ * and these are represented as:
+ *
+ * { tC, tG, t }
+ *
* Now suppose I have this more complex example:
*
* Ref: a t C g a // C is the reference base
@@ -68,12 +72,11 @@ import java.util.Collection;
*
* There are actually four segregating alleles:
*
- * { C g , - g, - -, and CAg } over bases 2-4
+ * { Cg , -g, --, and CAg } over bases 2-4
*
- * However, the molecular equivalence explicitly listed above is usually discarded, so the actual
- * segregating alleles are:
+ * represented as:
*
- * { C g, g, -, C a g }
+ * { tCg, tg, t, tCAg }
*
* Critically, it should be possible to apply an allele to a reference sequence to create the
* correct haplotype sequence:
@@ -85,7 +88,7 @@ import java.util.Collection;
*
* Given list of alleles it's possible to determine the "type" of the variation
*
- * A / C @ loc => SNP with
+ * A / C @ loc => SNP
* - / A => INDEL
*
* If you know where allele is the reference, you can determine whether the variant is an insertion or deletion.
diff --git a/src/java/org/broadinstitute/variant/variantcontext/VariantContext.java b/src/java/org/broadinstitute/variant/variantcontext/VariantContext.java
index 0709e2d..a18fd32 100644
--- a/src/java/org/broadinstitute/variant/variantcontext/VariantContext.java
+++ b/src/java/org/broadinstitute/variant/variantcontext/VariantContext.java
@@ -40,7 +40,7 @@ import java.util.*;
*
* The VariantContext object is a single general class system for representing genetic variation data composed of:
*
- * * Allele: representing single genetic haplotypes (A, T, ATC, -)
+ * * Allele: representing single genetic haplotypes (A, T, ATC, -) (note that null alleles are used here for illustration; see the Allele class for how to represent indels)
* * Genotype: an assignment of alleles for each chromosome of a single named sample at a particular locus
* * VariantContext: an abstract class holding all segregating alleles at a locus as well as genotypes
* for multiple individuals containing alleles at that locus
@@ -72,10 +72,10 @@ import java.util.*;
* A [ref] / T at 10
* GenomeLoc snpLoc = GenomeLocParser.createGenomeLoc("chr1", 10, 10);
*
- * - / ATC [ref] from 20-23
+ * A / ATC [ref] from 20-23
* GenomeLoc delLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 22);
*
- * // - [ref] / ATC immediately after 20
+ * // A [ref] / ATC immediately after 20
* GenomeLoc insLoc = GenomeLocParser.createGenomeLoc("chr1", 20, 20);
*
* === Alleles ===
@@ -86,9 +86,8 @@ import java.util.*;
*
* Alleles can be either reference or non-reference
*
- * Example alleles used here:
+ * Examples of alleles used here:
*
- * del = new Allele("-");
* A = new Allele("A");
* Aref = new Allele("A", true);
* T = new Allele("T");
@@ -116,10 +115,8 @@ import java.util.*;
* VariantContext vc = new VariantContext(name, delLoc, Arrays.asList(ATCref, del));
* </pre>
*
- * The only 2 things that distinguishes between a insertion and deletion are the reference allele
- * and the location of the variation. An insertion has a Null reference allele and at least
- * one non-reference Non-Null allele. Additionally, the location of the insertion is immediately after
- * a 1-bp GenomeLoc (at say 20).
+ * The only thing that distinguishes between an insertion and deletion is which is the reference allele.
+ * An insertion has a reference allele that is smaller than the non-reference allele, and vice versa for deletions.
*
* <pre>
* VariantContext vc = new VariantContext("name", insLoc, Arrays.asList(delRef, ATC));
@@ -1189,14 +1186,14 @@ public class VariantContext implements Feature { // to enable tribble integratio
}
if ( getAttribute(VCFConstants.ALLELE_COUNT_KEY) instanceof List ) {
- Collections.sort(observedACs);
- List reportedACs = (List)getAttribute(VCFConstants.ALLELE_COUNT_KEY);
- Collections.sort(reportedACs);
+ final List reportedACs = (List)getAttribute(VCFConstants.ALLELE_COUNT_KEY);
if ( observedACs.size() != reportedACs.size() )
throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag doesn't have the correct number of values for the record at position %s:%d, %d vs. %d", getChr(), getStart(), reportedACs.size(), observedACs.size()));
for (int i = 0; i < observedACs.size(); i++) {
- if ( Integer.valueOf(reportedACs.get(i).toString()) != observedACs.get(i) )
- throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %s vs. %d", getChr(), getStart(), reportedACs.get(i), observedACs.get(i)));
+ // need to cast to int to make sure we don't have an issue below with object equals (earlier bug) - EB
+ final int reportedAC = Integer.valueOf(reportedACs.get(i).toString());
+ if ( reportedAC != observedACs.get(i) )
+ throw new TribbleException.InternalCodecException(String.format("the Allele Count (AC) tag is incorrect for the record at position %s:%d, %s vs. %d", getChr(), getStart(), reportedAC, observedACs.get(i)));
}
} else {
if ( observedACs.size() != 1 )
diff --git a/src/java/org/broadinstitute/variant/variantcontext/VariantContextBuilder.java b/src/java/org/broadinstitute/variant/variantcontext/VariantContextBuilder.java
index 276a693..bbacbc1 100644
--- a/src/java/org/broadinstitute/variant/variantcontext/VariantContextBuilder.java
+++ b/src/java/org/broadinstitute/variant/variantcontext/VariantContextBuilder.java
@@ -209,7 +209,7 @@ public class VariantContextBuilder {
/**
* Removes key if present in the attributes
*
- * @param key
+ * @param key key to remove
* @return
*/
@Requires({"key != null"})
@@ -221,6 +221,21 @@ public class VariantContextBuilder {
}
/**
+ * Removes list of keys if present in the attributes
+ *
+ * @param keys list of keys to remove
+ * @return
+ */
+ @Requires({"keys != null"})
+ @Ensures({"this.attributes.size() <= old(this.attributes.size())"})
+ public VariantContextBuilder rmAttributes(final List<String> keys) {
+ makeAttributesModifiable();
+ for ( final String key : keys )
+ attributes.remove(key);
+ return this;
+ }
+
+ /**
* Makes the attributes field modifiable. In many cases attributes is just a pointer to an immutable
* collection, so methods that want to add / remove records require the attributes to be copied to a
*/
diff --git a/src/java/org/broadinstitute/variant/variantcontext/writer/BCF2FieldEncoder.java b/src/java/org/broadinstitute/variant/variantcontext/writer/BCF2FieldEncoder.java
index a04a6bf..88b9c49 100644
--- a/src/java/org/broadinstitute/variant/variantcontext/writer/BCF2FieldEncoder.java
+++ b/src/java/org/broadinstitute/variant/variantcontext/writer/BCF2FieldEncoder.java
@@ -35,6 +35,7 @@ import org.broadinstitute.variant.vcf.VCFHeaderLineCount;
import org.broadinstitute.variant.variantcontext.VariantContext;
import java.io.IOException;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -334,8 +335,11 @@ public abstract class BCF2FieldEncoder {
return "";
else if (value instanceof List) {
final List<String> l = (List<String>)value;
- if ( l.isEmpty() ) return "";
- else return BCF2Utils.collapseStringList(l);
+ return BCF2Utils.collapseStringList(l);
+ } else if ( value.getClass().isArray() ) {
+ final List<String> l = new ArrayList<String>();
+ Collections.addAll(l, (String[])value);
+ return BCF2Utils.collapseStringList(l);
} else
return (String)value;
}
@@ -507,12 +511,18 @@ public abstract class BCF2FieldEncoder {
* o is a list => o
* else => [o]
*
- * @param o
+ * @param c the class of the object
+ * @param o the object to convert to a Java List
* @return
*/
private final static <T> List<T> toList(final Class<T> c, final Object o) {
if ( o == null ) return Collections.emptyList();
else if ( o instanceof List ) return (List<T>)o;
+ else if ( o.getClass().isArray() ) {
+ final List<T> l = new ArrayList<T>();
+ Collections.addAll(l, (T[])o);
+ return l;
+ }
else return Collections.singletonList((T)o);
}
}
diff --git a/src/java/org/broadinstitute/variant/vcf/AbstractVCFCodec.java b/src/java/org/broadinstitute/variant/vcf/AbstractVCFCodec.java
index 7049d8b..9975488 100644
--- a/src/java/org/broadinstitute/variant/vcf/AbstractVCFCodec.java
+++ b/src/java/org/broadinstitute/variant/vcf/AbstractVCFCodec.java
@@ -523,7 +523,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
*/
private static void checkAllele(String allele, boolean isRef, int lineNo) {
if ( allele == null || allele.length() == 0 )
- generateException("Empty alleles are not permitted in VCF records", lineNo);
+ generateException(generateExceptionTextForBadAlleleBases(""), lineNo);
if ( GeneralUtils.DEBUG_MODE_ENABLED && MAX_ALLELE_SIZE_BEFORE_WARNING != -1 && allele.length() > MAX_ALLELE_SIZE_BEFORE_WARNING ) {
System.err.println(String.format("Allele detected with length %d exceeding max size %d at approximately line %d, likely resulting in degraded VCF processing performance", allele.length(), MAX_ALLELE_SIZE_BEFORE_WARNING, lineNo));
@@ -540,7 +540,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
" convert your file to VCF4 using VCFTools, available at http://vcftools.sourceforge.net/index.html", lineNo);
if (!Allele.acceptableAlleleBases(allele))
- generateException("Unparsable vcf record with allele " + allele, lineNo);
+ generateException(generateExceptionTextForBadAlleleBases(allele), lineNo);
if ( isRef && allele.equals(VCFConstants.EMPTY_ALLELE) )
generateException("The reference allele cannot be missing", lineNo);
@@ -548,6 +548,20 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
}
/**
+ * Generates the exception text for the case where the allele string contains unacceptable bases.
+ *
+ * @param allele non-null allele string
+ * @return non-null exception text string
+ */
+ private static String generateExceptionTextForBadAlleleBases(final String allele) {
+ if ( allele.length() == 0 )
+ return "empty alleles are not permitted in VCF records";
+ if ( allele.contains("[") || allele.contains("]") || allele.contains(":") || allele.contains(".") )
+ return "VCF support for complex rearrangements with breakends has not yet been implemented";
+ return "unparsable vcf record with allele " + allele;
+ }
+
+ /**
* return true if this is a symbolic allele (e.g. <SOMETAG>) or
* structural variation breakend (with [ or ]), otherwise false
* @param allele the allele to check
@@ -708,8 +722,12 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
private final static int[] decodeInts(final String string) {
final int nValues = ParsingUtils.split(string, INT_DECODE_ARRAY, ',');
final int[] values = new int[nValues];
- for ( int i = 0; i < nValues; i++ )
- values[i] = Integer.valueOf(INT_DECODE_ARRAY[i]);
+ try {
+ for ( int i = 0; i < nValues; i++ )
+ values[i] = Integer.valueOf(INT_DECODE_ARRAY[i]);
+ } catch (final NumberFormatException e) {
+ return null;
+ }
return values;
}
diff --git a/src/tests/java/net/sf/picard/sam/CleanSamTest.java b/src/tests/java/net/sf/picard/sam/CleanSamTest.java
new file mode 100644
index 0000000..6a3d678
--- /dev/null
+++ b/src/tests/java/net/sf/picard/sam/CleanSamTest.java
@@ -0,0 +1,78 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.picard.sam;
+
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMValidationError;
+import net.sf.samtools.util.TestUtil;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.Arrays;
+import java.util.Collection;
+
+public class CleanSamTest {
+
+ private static final File TEST_DATA_DIR = new File("testdata/net/sf/picard/sam/CleanSam");
+
+ @Test(dataProvider = "testCleanSamDataProvider")
+ public void testCleanSam(final String samFile, final String expectedCigar) throws IOException {
+ final File cleanedFile = File.createTempFile(samFile + ".", ".sam");
+ cleanedFile.deleteOnExit();
+ final CleanSam cleanSam = new CleanSam();
+ cleanSam.INPUT = new File(TEST_DATA_DIR, samFile);
+ cleanSam.OUTPUT = cleanedFile;
+ Assert.assertEquals(cleanSam.doWork(), 0);
+ final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);
+ validator.setIgnoreWarnings(true);
+ validator.setVerbose(true, 1000);
+ validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
+ SAMFileReader samReader = new SAMFileReader(cleanedFile);
+ samReader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
+ final SAMRecord rec = samReader.iterator().next();
+ samReader.close();
+ Assert.assertEquals(rec.getCigarString(), expectedCigar);
+ samReader = new SAMFileReader(cleanedFile);
+ final boolean validated = validator.validateSamFileVerbose(samReader, null);
+ samReader.close();
+ Assert.assertTrue(validated, "ValidateSamFile failed");
+ }
+
+ @DataProvider(name = "testCleanSamDataProvider")
+ public Object[][] testCleanSamDataProvider() {
+ return new Object[][]{
+ {"simple_fits.sam", "100M"},
+ {"simple_overhang.sam", "99M1S"},
+ {"fits_with_deletion.sam", "91M2D9M"},
+ {"overhang_with_deletion.sam", "91M2D8M1S"},
+ {"trailing_insertion.sam", "99M1I"},
+ {"long_trailing_insertion.sam", "90M10I"},
+ };
+ }
+}
diff --git a/src/tests/java/net/sf/picard/sam/MergeBamAlignmentTest.java b/src/tests/java/net/sf/picard/sam/MergeBamAlignmentTest.java
index 0f4d095..df61c10 100644
--- a/src/tests/java/net/sf/picard/sam/MergeBamAlignmentTest.java
+++ b/src/tests/java/net/sf/picard/sam/MergeBamAlignmentTest.java
@@ -69,7 +69,8 @@ public class MergeBamAlignmentTest {
@Test
public void testMergerWithSupplemental() throws Exception {
final File outputWithSupplemental = File.createTempFile("mergeWithSupplementalTest", ".sam");
- outputWithSupplemental.deleteOnExit();
+ System.out.println(outputWithSupplemental.getAbsolutePath());
+ // outputWithSupplemental.deleteOnExit();
final MergeBamAlignment merger = new MergeBamAlignment();
merger.UNMAPPED_BAM = unmappedBam;
merger.ALIGNED_BAM = Arrays.asList(supplementalReadAlignedBam);
@@ -99,15 +100,19 @@ public class MergeBamAlignmentTest {
// This tests that we clip both (a) when the adapter is marked in the unmapped BAM file and
// (b) when the insert size is less than the read length
- if (sam.getReadName().equals("both_reads_align_clip_adapter") ||
- sam.getReadName().equals("both_reads_align_clip_marked")) {
+ if (sam.getReadName().equals("both_reads_align_clip_marked")) {
Assert.assertEquals(sam.getReferenceName(), "chr7");
if (sam.getReadNegativeStrandFlag()) {
- Assert.assertEquals(sam.getCigarString(), "5S96M", "Incorrect CIGAR string for " +
- sam.getReadName());
+ Assert.assertEquals(sam.getCigarString(), "5S96M", "Incorrect CIGAR string for " + sam.getReadName());
} else {
- Assert.assertEquals(sam.getCigarString(), "96M5S", "Incorrect CIGAR string for " +
- sam.getReadName());
+ Assert.assertEquals(sam.getCigarString(), "96M5S", "Incorrect CIGAR string for " + sam.getReadName());
+ }
+ }
+ else if (sam.getReadName().equals("both_reads_align_clip_adapter")) {
+ Assert.assertEquals(sam.getReferenceName(), "chr7");
+ if (!sam.getSupplementaryAlignmentFlag()) {
+ if (sam.getReadNegativeStrandFlag()) Assert.assertEquals(sam.getCigarString(), "5S96M", "Incorrect CIGAR string for " + sam.getReadName());
+ else Assert.assertEquals(sam.getCigarString(), "96M5S", "Incorrect CIGAR string for " + sam.getReadName());
}
}
// This tests that we DON'T clip when we run off the end if there are equal to or more than
@@ -133,7 +138,7 @@ public class MergeBamAlignmentTest {
}
// Make sure that we have the appropriate primary and supplementary reads in the new file
- Assert.assertEquals(clipAdapterFlags.size(), foundClipAdapterFlags.size());
+ Assert.assertEquals(foundClipAdapterFlags.size(), clipAdapterFlags.size());
Collections.sort(clipAdapterFlags);
Collections.sort(foundClipAdapterFlags);
for (int i = 0; i < clipAdapterFlags.size(); i++) {
diff --git a/src/tests/java/net/sf/picard/vcf/MergeVcfsTest.java b/src/tests/java/net/sf/picard/vcf/MergeVcfsTest.java
index 8378255..d4590d0 100644
--- a/src/tests/java/net/sf/picard/vcf/MergeVcfsTest.java
+++ b/src/tests/java/net/sf/picard/vcf/MergeVcfsTest.java
@@ -9,6 +9,7 @@ import org.testng.Assert;
import org.testng.annotations.Test;
import java.io.File;
+import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
@@ -70,10 +71,11 @@ public class MergeVcfsTest {
}
@Test
- public void testMergeIndelsSnps() {
+ public void testMergeIndelsSnps() throws IOException {
final File indelInputFile = new File(TEST_DATA_PATH + "CEUTrio-indels.vcf");
final File snpInputFile = new File(TEST_DATA_PATH + "CEUTrio-snps.vcf");
- final File output = new File(TEST_DATA_PATH + "merge-indels-snps-test-output-delete-me.vcf");
+ final File output = File.createTempFile("merge-indels-snps-test-output.", ".vcf");
+ output.deleteOnExit();
final Queue<String> indelContigPositions = loadContigPositions(indelInputFile);
final Queue<String> snpContigPositions = loadContigPositions(snpInputFile);
@@ -101,6 +103,7 @@ public class MergeVcfsTest {
if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) < 0);
last = outputContext;
}
+ iterator.close();
// We should have polled everything off the indel (snp) queues
Assert.assertEquals(indelContigPositions.size(), 0);
@@ -110,7 +113,7 @@ public class MergeVcfsTest {
}
@Test
- public void testMergeRandomScatter() {
+ public void testMergeRandomScatter() throws IOException {
final File zero = new File(TEST_DATA_PATH, "CEUTrio-random-scatter-0.vcf");
final File one = new File(TEST_DATA_PATH, "CEUTrio-random-scatter-1.vcf");
final File two = new File(TEST_DATA_PATH, "CEUTrio-random-scatter-2.vcf");
@@ -126,7 +129,7 @@ public class MergeVcfsTest {
positionQueues.add(4, loadContigPositions(four));
positionQueues.add(5, loadContigPositions(five));
- final File output = new File(TEST_DATA_PATH + "random-scatter-test-output-delete-me.vcf");
+ final File output = File.createTempFile("random-scatter-test-output.", ".vcf");
output.deleteOnExit();
final MergeVcfs mergeVcfs = new MergeVcfs();
@@ -153,6 +156,7 @@ public class MergeVcfsTest {
if (last != null) Assert.assertTrue(outputComparator.compare(last, outputContext) < 0);
last = outputContext;
}
+ iterator.close();
for (final Queue<String> positionQueue : positionQueues) {
Assert.assertEquals(positionQueue.size(), 0);
diff --git a/src/tests/java/net/sf/samtools/BAMFileIndexTest.java b/src/tests/java/net/sf/samtools/BAMFileIndexTest.java
index a02fa20..bce9cb8 100755
--- a/src/tests/java/net/sf/samtools/BAMFileIndexTest.java
+++ b/src/tests/java/net/sf/samtools/BAMFileIndexTest.java
@@ -27,13 +27,12 @@ import net.sf.samtools.util.CloseableIterator;
import net.sf.samtools.util.StopWatch;
import org.testng.Assert;
import static org.testng.Assert.*;
+
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Random;
+import java.util.*;
/**
* Test BAM file indexing.
@@ -70,7 +69,7 @@ public class BAMFileIndexTest
assertEquals(runQueryTest(BAM_FILE, "chrM", 10400, 10600, false), 2);
}
- @Test(enabled = false)
+ @Test(groups = {"slow"})
public void testRandomQueries()
throws Exception {
runRandomTest(BAM_FILE, 1000, new Random());
@@ -112,9 +111,7 @@ public class BAMFileIndexTest
final StopWatch linearScan = new StopWatch();
final StopWatch queryUnmapped = new StopWatch();
int unmappedCountFromLinearScan = 0;
- final File bamFile =
- BAM_FILE;
- //new File("/Users/alecw/tmp/30ED6AAXX.1.aligned.duplicates_marked.bam");
+ final File bamFile = BAM_FILE;
final SAMFileReader reader = new SAMFileReader(bamFile);
linearScan.start();
CloseableIterator<SAMRecord> it = reader.iterator();
@@ -194,10 +191,10 @@ public class BAMFileIndexTest
Assert.assertEquals(originalRec, rec);
// Both ends mapped
- CloseableIterator<SAMRecord> it = reader.queryUnmapped();
+ final CloseableIterator<SAMRecord> it = reader.queryUnmapped();
rec = null;
while (it.hasNext()) {
- SAMRecord next = it.next();
+ final SAMRecord next = it.next();
if (next.getReadName().equals("2615")) {
rec = next;
break;
@@ -219,10 +216,56 @@ public class BAMFileIndexTest
Assert.assertFalse(mate.getFirstOfPairFlag() == rec.getFirstOfPairFlag());
}
+ /**
+ * Compare the results of a multi-interval query versus the union of the results from each interval done
+ * separately.
+ */
+ @Test(dataProvider = "testMultiIntervalQueryDataProvider")
+ public void testMultiIntervalQuery(final boolean contained) {
+ final List<String> referenceNames = getReferenceNames(BAM_FILE);
+
+ final SAMFileReader.QueryInterval[] intervals = generateRandomIntervals(referenceNames.size(), 1000, new Random());
+ final Set<SAMRecord> multiIntervalRecords = new HashSet<SAMRecord>();
+ final Set<SAMRecord> singleIntervalRecords = new HashSet<SAMRecord>();
+ final SAMFileReader reader = new SAMFileReader(BAM_FILE);
+ for (final SAMFileReader.QueryInterval interval : intervals) {
+ consumeAll(singleIntervalRecords, reader.query(referenceNames.get(interval.referenceIndex), interval.start, interval.end, contained));
+ }
+
+ final SAMFileReader.QueryInterval[] optimizedIntervals = SAMFileReader.QueryInterval.optimizeIntervals(intervals);
+ consumeAll(multiIntervalRecords, reader.query(optimizedIntervals, contained));
+ final Iterator<SAMRecord> singleIntervalRecordIterator = singleIntervalRecords.iterator();
+ boolean failed = false;
+ while (singleIntervalRecordIterator.hasNext()) {
+ final SAMRecord record = singleIntervalRecordIterator.next();
+ if (!multiIntervalRecords.remove(record)) {
+ System.out.println("SingleIntervalQuery found " + record + " but MultiIntervalQuery did not");
+ failed = true;
+ }
+ }
+ for (final SAMRecord record : multiIntervalRecords) {
+ System.out.println("MultiIntervalQuery found " + record + " but SingleIntervalQuery did not");
+ failed = true;
+ }
+ Assert.assertFalse(failed);
+ }
+
+ @DataProvider(name = "testMultiIntervalQueryDataProvider")
+ private Object[][] testMultiIntervalQueryDataProvider() {
+ return new Object[][]{{true}, {false}};
+ }
+
+ private <E> void consumeAll(final Collection<E> collection, final CloseableIterator<E> iterator) {
+ while (iterator.hasNext()) {
+ collection.add(iterator.next());
+ }
+ iterator.close();
+ }
+
private SAMRecord getSingleRecordStartingAt(final SAMFileReader reader, final String sequence, final int alignmentStart) {
- CloseableIterator<SAMRecord> it = reader.queryAlignmentStart(sequence, alignmentStart);
+ final CloseableIterator<SAMRecord> it = reader.queryAlignmentStart(sequence, alignmentStart);
Assert.assertTrue(it.hasNext());
- SAMRecord rec = it.next();
+ final SAMRecord rec = it.next();
Assert.assertNotNull(rec);
Assert.assertFalse(it.hasNext());
it.close();
@@ -244,19 +287,17 @@ public class BAMFileIndexTest
}
private void runRandomTest(final File bamFile, final int count, final Random generator) {
- final int maxCoordinate = 10000000;
final List<String> referenceNames = getReferenceNames(bamFile);
- for (int i = 0; i < count; i++) {
- final String refName = referenceNames.get(generator.nextInt(referenceNames.size()));
- final int coord1 = generator.nextInt(maxCoordinate+1);
- final int coord2 = generator.nextInt(maxCoordinate+1);
- final int startPos = Math.min(coord1, coord2);
- final int endPos = Math.max(coord1, coord2);
+ final SAMFileReader.QueryInterval[] intervals = generateRandomIntervals(referenceNames.size(), count, generator);
+ for (final SAMFileReader.QueryInterval interval : intervals) {
+ final String refName = referenceNames.get(interval.referenceIndex);
+ final int startPos = interval.start;
+ final int endPos = interval.end;
System.out.println("Testing query " + refName + ":" + startPos + "-" + endPos + " ...");
try {
runQueryTest(bamFile, refName, startPos, endPos, true);
runQueryTest(bamFile, refName, startPos, endPos, false);
- } catch (Throwable exc) {
+ } catch (final Throwable exc) {
String message = "Query test failed: " + refName + ":" + startPos + "-" + endPos;
message += ": " + exc.getMessage();
throw new RuntimeException(message, exc);
@@ -264,6 +305,21 @@ public class BAMFileIndexTest
}
}
+ private SAMFileReader.QueryInterval[] generateRandomIntervals(final int numReferences, final int count, final Random generator) {
+ final SAMFileReader.QueryInterval[] intervals = new SAMFileReader.QueryInterval[count];
+ final int maxCoordinate = 10000000;
+ for (int i = 0; i < count; i++) {
+ final int referenceIndex = generator.nextInt(numReferences);
+ final int coord1 = generator.nextInt(maxCoordinate+1);
+ final int coord2 = generator.nextInt(maxCoordinate+1);
+ final int startPos = Math.min(coord1, coord2);
+ final int endPos = Math.max(coord1, coord2);
+ intervals[i] = new SAMFileReader.QueryInterval(referenceIndex, startPos, endPos);
+ }
+
+ return intervals;
+ }
+
private List<String> getReferenceNames(final File bamFile) {
final SAMFileReader reader = new SAMFileReader(bamFile);
final List<String> result = new ArrayList<String>();
diff --git a/src/tests/java/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java b/src/tests/java/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java
index 653d5a0..7a1117d 100644
--- a/src/tests/java/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java
+++ b/src/tests/java/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java
@@ -65,13 +65,16 @@ import java.util.Set;
* This class tests out the ability of the VCF writer to correctly write VCF files
*/
public class VCFWriterUnitTest extends VariantBaseTest {
- private Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
- private Set<String> additionalColumns = new HashSet<String>();
- private File fakeVCFFile = new File("FAKEVCFFILEFORTESTING.vcf");
+ private Set<VCFHeaderLine> metaData;
+ private Set<String> additionalColumns;
/** test, using the writer and reader, that we can output and input a VCF file without problems */
@Test
- public void testBasicWriteAndRead() {
+ public void testBasicWriteAndRead() throws IOException {
+ File fakeVCFFile = File.createTempFile("testBasicWriteAndRead.", ".vcf");
+ fakeVCFFile.deleteOnExit();
+ metaData = new HashSet<VCFHeaderLine>();
+ additionalColumns = new HashSet<String>();
VCFHeader header = createFakeHeader(metaData,additionalColumns);
final EnumSet<Options> options = EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
VariantContextWriter writer = VariantContextWriterFactory.create(fakeVCFFile, createArtificialSequenceDictionary(), options);
@@ -171,6 +174,7 @@ public class VCFWriterUnitTest extends VariantBaseTest {
@Test(enabled=true)
public void TestWritingLargeVCF() throws FileNotFoundException, InterruptedException {
+ Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
final Set<String> Columns = new HashSet<String>();
for (int i = 0; i < 123; i++) {
@@ -202,6 +206,5 @@ public class VCFWriterUnitTest extends VariantBaseTest {
Assert.assertTrue(vcf.lastModified() <= vcfIndex.lastModified());
}
}
-
}
diff --git a/testdata/net/sf/picard/sam/CleanSam/fits_with_deletion.sam b/testdata/net/sf/picard/sam/CleanSam/fits_with_deletion.sam
new file mode 100644
index 0000000..25b7c92
--- /dev/null
+++ b/testdata/net/sf/picard/sam/CleanSam/fits_with_deletion.sam
@@ -0,0 +1,3 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:102
+fits_with_deletion 0 chr1 1 47 91M2D9M * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCA &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0
diff --git a/testdata/net/sf/picard/sam/CleanSam/long_trailing_insertion.sam b/testdata/net/sf/picard/sam/CleanSam/long_trailing_insertion.sam
new file mode 100644
index 0000000..6ad740b
--- /dev/null
+++ b/testdata/net/sf/picard/sam/CleanSam/long_trailing_insertion.sam
@@ -0,0 +1,3 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:101
+long_trailing_insertion 0 chr1 3 47 90M10I * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCA &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0
diff --git a/testdata/net/sf/picard/sam/CleanSam/overhang_with_deletion.sam b/testdata/net/sf/picard/sam/CleanSam/overhang_with_deletion.sam
new file mode 100644
index 0000000..39bc397
--- /dev/null
+++ b/testdata/net/sf/picard/sam/CleanSam/overhang_with_deletion.sam
@@ -0,0 +1,3 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:101
+overhang_with_deletion 0 chr1 1 47 91M2D9M * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCA &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0
diff --git a/testdata/net/sf/picard/sam/CleanSam/simple_fits.sam b/testdata/net/sf/picard/sam/CleanSam/simple_fits.sam
new file mode 100644
index 0000000..d5ff6cb
--- /dev/null
+++ b/testdata/net/sf/picard/sam/CleanSam/simple_fits.sam
@@ -0,0 +1,3 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:101
+simple_fits 0 chr1 2 47 100M * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCA &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0
diff --git a/testdata/net/sf/picard/sam/CleanSam/simple_overhang.sam b/testdata/net/sf/picard/sam/CleanSam/simple_overhang.sam
new file mode 100644
index 0000000..3707439
--- /dev/null
+++ b/testdata/net/sf/picard/sam/CleanSam/simple_overhang.sam
@@ -0,0 +1,3 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:101
+simple_overhang 0 chr1 3 47 100M * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCA &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0
diff --git a/testdata/net/sf/picard/sam/CleanSam/trailing_insertion.sam b/testdata/net/sf/picard/sam/CleanSam/trailing_insertion.sam
new file mode 100644
index 0000000..987cde4
--- /dev/null
+++ b/testdata/net/sf/picard/sam/CleanSam/trailing_insertion.sam
@@ -0,0 +1,3 @@
+ at HD VN:1.0 SO:coordinate
+ at SQ SN:chr1 LN:101
+trailing_insertion 0 chr1 3 47 99M1I * 0 0 TTCATGCTGANGCNCTCTTACGATCGTACAGATGCAAATATTAACANNCNTTNAAGNNCANNNNNNNNNCAATACAATANTAGAGTACGTNAACACTCCA &/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/picard-tools.git
More information about the debian-med-commit
mailing list