[med-svn] [picard-tools] 01/06: Imported Upstream version 1.110
Charles Plessy
plessy at moszumanska.debian.org
Sun Apr 6 02:31:53 UTC 2014
This is an automated email from the git hooks/post-receive script.
plessy pushed a commit to branch master
in repository picard-tools.
commit ad4ec03714d712bc43f37a2f333e9c1310595af4
Author: Charles Plessy <plessy at debian.org>
Date: Sun Apr 6 10:30:49 2014 +0900
Imported Upstream version 1.110
---
Picard-public.ipr | 20 +-
build.xml | 2 +-
.../net/sf/picard/analysis/InsertSizeMetrics.java | 2 +-
.../directed/InsertSizeMetricsCollector.java | 2 +-
src/java/net/sf/picard/fastq/BasicFastqWriter.java | 3 +-
src/java/net/sf/picard/fastq/FastqReader.java | 11 +-
.../net/sf/picard/filter/FilteringIterator.java | 2 +-
.../sf/picard/illumina/CheckIlluminaDirectory.java | 161 ++--
.../picard/illumina/ExtractIlluminaBarcodes.java | 11 +-
.../illumina/IlluminaBasecallsConverter.java | 14 +-
.../picard/illumina/IlluminaBasecallsToFastq.java | 5 +-
.../sf/picard/illumina/IlluminaBasecallsToSam.java | 8 +-
.../net/sf/picard/illumina/parser/BclParser.java | 14 +-
.../parser/IlluminaDataProviderFactory.java | 22 +-
.../picard/illumina/parser/IlluminaFileUtil.java | 683 ++++++++++----
.../illumina/parser/IlluminaMetricsCode.java | 42 +
.../picard/illumina/parser/MultiTileBclParser.java | 2 +-
.../illumina/parser/PerTilePerCycleParser.java | 4 +-
src/java/net/sf/picard/illumina/parser/Tile.java | 114 +++
.../net/sf/picard/illumina/parser/TileIndex.java | 3 +-
.../sf/picard/illumina/parser/TileMetricsUtil.java | 162 ++--
.../picard/illumina/parser/TilePhasingValue.java | 30 +
.../picard/illumina/parser/TileTemplateRead.java} | 38 +-
.../illumina/parser/fakers/BarcodeFileFaker.java | 25 +
.../illumina/parser/fakers/BciFileFaker.java | 52 ++
.../illumina/parser/fakers/BclFileFaker.java | 29 +
.../illumina/parser/fakers/CifFileFaker.java | 31 +
.../illumina/parser/fakers/ClocsFileFaker.java | 25 +
.../illumina/parser/fakers/CnfFileFaker.java | 31 +
.../picard/illumina/parser/fakers/FileFaker.java | 73 ++
.../illumina/parser/fakers/FilterFileFaker.java | 26 +
.../illumina/parser/fakers/LocsFileFaker.java | 28 +
.../parser/fakers/MultiTileBclFileFaker.java | 32 +
.../parser/fakers/MultiTileLocsFileFaker.java | 30 +
.../illumina/parser/fakers/PosFileFaker.java | 25 +
.../illumina/parser/fakers/QSeqFileFaker.java | 25 +
.../picard/illumina/parser/readers/BclReader.java | 67 +-
.../parser/readers/TileMetricsOutReader.java | 82 +-
src/java/net/sf/picard/io/IoUtil.java | 25 +-
.../net/sf/picard/sam/AbstractAlignmentMerger.java | 48 +-
src/java/net/sf/picard/sam/FastqToSam.java | 5 +-
src/java/net/sf/picard/sam/MergeBamAlignment.java | 9 +-
...RevertOriginalBaseQualitiesAndAddMateCigar.java | 2 +-
src/java/net/sf/picard/sam/SamAlignmentMerger.java | 16 +-
src/java/net/sf/picard/sam/SamFileValidator.java | 4 +-
src/java/net/sf/picard/sam/ValidateSamFile.java | 1 +
src/java/net/sf/picard/util/IntervalList.java | 4 +-
src/java/net/sf/picard/util/MathUtil.java | 13 +
src/java/net/sf/picard/util/OverlapDetector.java | 24 +-
.../sf/picard/util/QualityEncodingDetector.java | 110 ++-
src/java/net/sf/picard/vcf/MakeSitesOnlyVcf.java | 47 +-
src/java/net/sf/samtools/AbstractBAMFileIndex.java | 53 +-
src/java/net/sf/samtools/BAMFileReader.java | 11 +-
src/java/net/sf/samtools/BAMIndexContent.java | 156 +---
src/java/net/sf/samtools/BAMIndexer.java | 186 ++--
src/java/net/sf/samtools/Bin.java | 16 +-
src/java/net/sf/samtools/BinaryBAMIndexWriter.java | 20 +-
src/java/net/sf/samtools/BinningIndexBuilder.java | 187 ++++
...MIndexContent.java => BinningIndexContent.java} | 465 +++++-----
src/java/net/sf/samtools/CachingBAMFileIndex.java | 28 +-
src/java/net/sf/samtools/Chunk.java | 6 +-
src/java/net/sf/samtools/Defaults.java | 11 +
src/java/net/sf/samtools/GenomicIndexUtil.java | 100 +++
src/java/net/sf/samtools/LinearIndex.java | 30 +-
src/java/net/sf/samtools/SAMFileReader.java | 10 +-
src/java/net/sf/samtools/SAMFileWriterFactory.java | 6 +-
src/java/net/sf/samtools/SAMRecord.java | 13 +-
src/java/net/sf/samtools/SAMRecordSetBuilder.java | 185 +++-
src/java/net/sf/samtools/SAMUtils.java | 35 +-
.../sf/{picard/sam => samtools}/SamPairUtil.java | 43 +-
.../net/sf/samtools/TextualBAMIndexWriter.java | 12 +-
.../seekablestream/SeekableStreamFactory.java | 3 +-
src/java/net/sf/samtools/util/AsciiWriter.java | 4 +-
.../samtools/util/BlockCompressedInputStream.java | 9 +-
.../samtools/util/BlockCompressedOutputStream.java | 25 +-
.../net/sf/samtools/util/BufferedLineReader.java | 4 +-
src/java/net/sf/samtools/util/IOUtil.java | 81 +-
.../sf/samtools/util}/LocationAware.java | 6 +-
.../net/sf/samtools/util/SortingCollection.java | 6 +-
.../sf/samtools/util/SortingLongCollection.java | 4 +-
.../net/sf/samtools/util/TempStreamFactory.java | 16 +-
.../org/broad/tribble/AbstractFeatureReader.java | 3 +-
src/java/org/broad/tribble/AsciiFeatureCodec.java | 1 +
src/java/org/broad/tribble/BinaryFeatureCodec.java | 4 +-
src/java/org/broad/tribble/FeatureCodec.java | 2 +-
.../broad/tribble/TribbleIndexedFeatureReader.java | 75 +-
.../org/broad/tribble/index/AbstractIndex.java | 115 ++-
src/java/org/broad/tribble/index/Block.java | 14 +-
.../broad/tribble/index/DynamicIndexCreator.java | 83 +-
src/java/org/broad/tribble/index/Index.java | 36 +-
src/java/org/broad/tribble/index/IndexCreator.java | 25 -
src/java/org/broad/tribble/index/IndexFactory.java | 202 +++--
.../Positional.java => index/MutableIndex.java} | 38 +-
.../TribbleIndexCreator.java} | 39 +-
.../index/interval/IntervalIndexCreator.java | 46 +-
.../tribble/index/interval/IntervalTreeIndex.java | 66 +-
.../broad/tribble/index/linear/LinearIndex.java | 127 +--
.../tribble/index/linear/LinearIndexCreator.java | 25 +-
.../org/broad/tribble/index/tabix/TabixFormat.java | 109 +++
.../org/broad/tribble/index/tabix/TabixIndex.java | 364 ++++++++
.../tribble/index/tabix/TabixIndexCreator.java | 199 ++++
.../org/broad/tribble/readers/AsciiLineReader.java | 23 +-
.../tribble/readers/AsciiLineReaderIterator.java | 5 +-
.../tribble/readers/AsynchronousLineReader.java | 9 +-
src/java/org/broad/tribble/readers/Positional.java | 2 +
.../org/broad/tribble/readers/TabixReader.java | 3 +-
.../writer/IndexingVariantContextWriter.java | 60 +-
.../writer/VariantContextWriterFactory.java | 55 +-
.../picard/analysis/CollectRnaSeqMetricsTest.java | 1 +
.../illumina/CheckIlluminaDirectoryTest.java | 304 ++++---
.../illumina/parser/TileMetricsUtilTest.java | 999 ---------------------
.../illumina/parser/readers/BclReaderTest.java | 8 +-
src/tests/java/net/sf/picard/sam/CleanSamTest.java | 24 +-
.../java/net/sf/picard/sam/MarkDuplicatesTest.java | 171 ++++
.../net/sf/picard/sam/MergeBamAlignmentTest.java | 24 +-
...MergingSamRecordIteratorGroupCollisionTest.java | 10 +-
.../net/sf/picard/sam/ValidateSamFileTest.java | 10 +-
.../net/sf/picard/sam/testers/CleanSamTester.java | 52 ++
.../picard/sam/testers/MarkDuplicatesTester.java | 55 ++
.../net/sf/picard/sam/testers/SamFileTester.java | 213 +++++
.../picard/util/QualityEncodingDetectorTest.java | 40 +-
.../java/net/sf/samtools/BAMFileIndexTest.java | 35 +
.../java/net/sf/samtools/BAMFileWriterTest.java | 2 -
.../{picard/sam => samtools}/SamPairUtilTest.java | 3 +-
.../java/org/broad/tribble/FeatureReaderTest.java | 2 +-
.../tribble/index/linear/LinearIndexTest.java | 44 +-
.../broad/tribble/index/tabix/TabixIndexTest.java | 65 ++
.../writer/TabixOnTheFlyIndexCreationTest.java | 66 ++
.../variantcontext/writer/VCFWriterUnitTest.java | 118 +--
.../HiSeq2000TileMetricsOut.bin | Bin 115202 -> 0 bytes
.../HiSeq2500TileMetricsOut.bin | Bin 19202 -> 0 bytes
.../TileMetricsUtilTest/MiSeqTileMetricsOut.bin | Bin 2942 -> 0 bytes
testdata/tribble/tabix/bigger.vcf.gz.tbi | Bin 0 -> 595996 bytes
testdata/variant/ex2.bgzf.bcf | Bin 0 -> 1062 bytes
testdata/variant/ex2.bgzf.bcf.csi | Bin 0 -> 130 bytes
testdata/variant/ex2.uncompressed.bcf | Bin 0 -> 1892 bytes
136 files changed, 5044 insertions(+), 2907 deletions(-)
diff --git a/Picard-public.ipr b/Picard-public.ipr
index 19d8680..c904413 100644
--- a/Picard-public.ipr
+++ b/Picard-public.ipr
@@ -91,8 +91,6 @@
<inspection_tool class="LocalCanBeFinal" enabled="true" level="WARNING" enabled_by_default="true">
<option name="REPORT_VARIABLES" value="true" />
<option name="REPORT_PARAMETERS" value="true" />
- <option name="REPORT_CATCH_PARAMETERS" value="true" />
- <option name="REPORT_FOREACH_PARAMETERS" value="true" />
</inspection_tool>
<inspection_tool class="SqlNoDataSourceInspection" enabled="false" level="WARNING" enabled_by_default="false" />
<inspection_tool class="UnusedDeclaration" enabled="false" level="WARNING" enabled_by_default="false">
@@ -262,6 +260,24 @@
</item>
</group>
</component>
+ <component name="ProjectCodeStyleSettingsManager">
+ <option name="PER_PROJECT_SETTINGS">
+ <value>
+ <option name="GENERATE_FINAL_LOCALS" value="true" />
+ <option name="GENERATE_FINAL_PARAMETERS" value="true" />
+ <option name="CLASS_COUNT_TO_USE_IMPORT_ON_DEMAND" value="999" />
+ <option name="RIGHT_MARGIN" value="140" />
+ <option name="JD_DO_NOT_WRAP_ONE_LINE_COMMENTS" value="true" />
+ <XML>
+ <option name="XML_LEGACY_SETTINGS_IMPORTED" value="true" />
+ </XML>
+ <codeStyleSettings language="JAVA">
+ <option name="KEEP_SIMPLE_METHODS_IN_ONE_LINE" value="true" />
+ </codeStyleSettings>
+ </value>
+ </option>
+ <option name="USE_PER_PROJECT_SETTINGS" value="true" />
+ </component>
<component name="ProjectDetails">
<option name="projectName" value="Picard-public" />
</component>
diff --git a/build.xml b/build.xml
index 5a8a437..b942ca7 100755
--- a/build.xml
+++ b/build.xml
@@ -43,7 +43,7 @@
<!-- Get SVN revision, if available, otherwise leave it blank. -->
<exec executable="svnversion" outputproperty="repository.revision" failifexecutionfails="false"/>
<property name="repository.revision" value=""/>
- <property name="sam-version" value="1.109"/>
+ <property name="sam-version" value="1.110"/>
<property name="picard-version" value="${sam-version}"/>
<property name="tribble-version" value="${sam-version}"/>
<property name="variant-version" value="${sam-version}"/>
diff --git a/src/java/net/sf/picard/analysis/InsertSizeMetrics.java b/src/java/net/sf/picard/analysis/InsertSizeMetrics.java
index 499cfea..fec44bf 100644
--- a/src/java/net/sf/picard/analysis/InsertSizeMetrics.java
+++ b/src/java/net/sf/picard/analysis/InsertSizeMetrics.java
@@ -24,7 +24,7 @@
package net.sf.picard.analysis;
-import net.sf.picard.sam.SamPairUtil.PairOrientation;
+import net.sf.samtools.SamPairUtil.PairOrientation;
import net.sf.picard.metrics.MultilevelMetrics;
/**
diff --git a/src/java/net/sf/picard/analysis/directed/InsertSizeMetricsCollector.java b/src/java/net/sf/picard/analysis/directed/InsertSizeMetricsCollector.java
index 3e2167b..b24d56f 100644
--- a/src/java/net/sf/picard/analysis/directed/InsertSizeMetricsCollector.java
+++ b/src/java/net/sf/picard/analysis/directed/InsertSizeMetricsCollector.java
@@ -6,7 +6,7 @@ import net.sf.picard.metrics.MultiLevelCollector;
import net.sf.picard.metrics.PerUnitMetricCollector;
import net.sf.picard.metrics.MetricsFile;
import net.sf.picard.reference.ReferenceSequence;
-import net.sf.picard.sam.SamPairUtil;
+import net.sf.samtools.SamPairUtil;
import net.sf.picard.util.Histogram;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
diff --git a/src/java/net/sf/picard/fastq/BasicFastqWriter.java b/src/java/net/sf/picard/fastq/BasicFastqWriter.java
index 3e6cf52..4579725 100644
--- a/src/java/net/sf/picard/fastq/BasicFastqWriter.java
+++ b/src/java/net/sf/picard/fastq/BasicFastqWriter.java
@@ -26,6 +26,7 @@ package net.sf.picard.fastq;
import net.sf.picard.PicardException;
import net.sf.picard.io.IoUtil;
import net.sf.samtools.Defaults;
+import net.sf.samtools.util.IOUtil;
import java.io.BufferedOutputStream;
import java.io.File;
@@ -45,7 +46,7 @@ public class BasicFastqWriter implements FastqWriter {
}
public BasicFastqWriter(final File file, final boolean createMd5) {
- this(file, new PrintStream(new BufferedOutputStream(maybeMd5Wrap(file, createMd5), Defaults.BUFFER_SIZE)));
+ this(file, new PrintStream(IOUtil.maybeBufferOutputStream(maybeMd5Wrap(file, createMd5))));
}
private BasicFastqWriter(final File file, final PrintStream writer) {
diff --git a/src/java/net/sf/picard/fastq/FastqReader.java b/src/java/net/sf/picard/fastq/FastqReader.java
index 76a27ba..65f1282 100755
--- a/src/java/net/sf/picard/fastq/FastqReader.java
+++ b/src/java/net/sf/picard/fastq/FastqReader.java
@@ -51,14 +51,9 @@ public class FastqReader implements Iterator<FastqRecord>, Iterable<FastqRecord>
public FastqReader(final File file, final boolean skipBlankLines) {
this.skipBlankLines=skipBlankLines;
- try {
- fastqFile = file;
- reader = IoUtil.openFileForBufferedReading(fastqFile);
- nextRecord = readNextRecord();
- }
- catch (IOException ioe) {
- throw new RuntimeIOException(ioe);
- }
+ fastqFile = file;
+ reader = IoUtil.openFileForBufferedReading(fastqFile);
+ nextRecord = readNextRecord();
}
public FastqReader(final BufferedReader reader) {
diff --git a/src/java/net/sf/picard/filter/FilteringIterator.java b/src/java/net/sf/picard/filter/FilteringIterator.java
index 57d0d1a..769ae75 100644
--- a/src/java/net/sf/picard/filter/FilteringIterator.java
+++ b/src/java/net/sf/picard/filter/FilteringIterator.java
@@ -23,7 +23,7 @@
*/
package net.sf.picard.filter;
-import net.sf.picard.sam.SamPairUtil;
+import net.sf.samtools.SamPairUtil;
import net.sf.picard.util.PeekableIterator;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
diff --git a/src/java/net/sf/picard/illumina/CheckIlluminaDirectory.java b/src/java/net/sf/picard/illumina/CheckIlluminaDirectory.java
index 5b432e1..65aea3e 100644
--- a/src/java/net/sf/picard/illumina/CheckIlluminaDirectory.java
+++ b/src/java/net/sf/picard/illumina/CheckIlluminaDirectory.java
@@ -5,13 +5,22 @@ import net.sf.picard.cmdline.CommandLineProgram;
import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.StandardOptionDefinitions;
import net.sf.picard.cmdline.Usage;
-import net.sf.picard.illumina.parser.*;
+import net.sf.picard.illumina.parser.IlluminaDataProviderFactory;
+import net.sf.picard.illumina.parser.IlluminaDataType;
+import net.sf.picard.illumina.parser.IlluminaFileUtil;
+import net.sf.picard.illumina.parser.OutputMapping;
+import net.sf.picard.illumina.parser.ReadStructure;
import net.sf.picard.io.IoUtil;
import net.sf.picard.util.Log;
import net.sf.samtools.util.StringUtil;
import java.io.File;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
/**
* Program to check a lane of an Illumina output directory. This program checks that files exist, are non-zero in length, for every tile/cycle and
@@ -23,30 +32,50 @@ public class CheckIlluminaDirectory extends CommandLineProgram {
// The following attributes define the command-line arguments
@Usage
public String USAGE = getStandardUsagePreamble() +
- "Check that the files to provide the data specified by DATA_TYPES are available, exist, and are reasonably sized for every tile/cycle. " +
- "Reasonably sized means non-zero sized for files that exist per tile and equal size for binary files that exist per cycle/per tile. " +
- "CheckIlluminaDirectory DOES NOT check that the individual records in a file are well-formed.\n";
+ "Check that the files to provide the data specified by DATA_TYPES are available, exist, and are reasonably sized for every tile/cycle. "
+ +
+ "Reasonably sized means non-zero sized for files that exist per tile and equal size for binary files that exist per cycle/per tile. "
+ +
+ "CheckIlluminaDirectory DOES NOT check that the individual records in a file are well-formed.\n";
- @Option(doc="The basecalls output directory. ", shortName="B")
+ @Option(doc = "The basecalls output directory. ", shortName = "B")
public File BASECALLS_DIR;
- @Option(doc="The data types that should be checked for each tile/cycle. If no values are provided then the data types checked are those " +
- "required by IlluminaBaseCallsToSam (which is a superset of those used in ExtractIlluminaBarcodes). These data types vary slightly depending on" +
- "whether or not the run is barcoded so READ_STRUCTURE should be the same as that which will be passed to IlluminaBasecallsToSam. If this option " +
- "is left unspecified then both ExtractIlluminaBarcodes and IlluminaBaseCallsToSam should complete successfully UNLESS the " +
- "individual records of the files themselves are spurious. ",
- shortName="DT",
- optional=true)
+ @Option(doc =
+ "The data types that should be checked for each tile/cycle. If no values are provided then the data types checked are those "
+ +
+ "required by IlluminaBaseCallsToSam (which is a superset of those used in ExtractIlluminaBarcodes). These data types vary slightly depending on"
+ +
+ "whether or not the run is barcoded so READ_STRUCTURE should be the same as that which will be passed to IlluminaBasecallsToSam. If this option "
+ +
+ "is left unspecified then both ExtractIlluminaBarcodes and IlluminaBaseCallsToSam should complete successfully UNLESS the "
+ +
+ "individual records of the files themselves are spurious. ",
+ shortName = "DT",
+ optional = true)
public final Set<IlluminaDataType> DATA_TYPES = new TreeSet<IlluminaDataType>();
- @Option(doc= ReadStructure.PARAMETER_DOC + " Note: If you want to check whether or not a future IlluminaBasecallsToSam or ExtractIlluminaBarcodes " +
- "run will fail then be sure to use the exact same READ_STRUCTURE that you would pass to these programs for this run.", shortName="RS")
+ @Option(doc = ReadStructure.PARAMETER_DOC
+ + " Note: If you want to check whether or not a future IlluminaBasecallsToSam or ExtractIlluminaBarcodes "
+ +
+ "run will fail then be sure to use the exact same READ_STRUCTURE that you would pass to these programs for this run.",
+ shortName = "RS")
public String READ_STRUCTURE;
- @Option(doc="The number of the lane(s) to check. ", shortName= StandardOptionDefinitions.LANE_SHORT_NAME, minElements = 1)
+ @Option(doc = "The number of the lane(s) to check. ", shortName = StandardOptionDefinitions.LANE_SHORT_NAME,
+ minElements = 1)
public List<Integer> LANES;
- /** Required main method implementation. */
+ @Option(doc = "The number(s) of the tile(s) to check. ", shortName = "T", optional = true)
+ public List<Integer> TILE_NUMBERS;
+
+ @Option(doc = "A flag to determine whether or not to create fake versions of the missing files.", shortName = "F",
+ optional = true)
+ public Boolean FAKE_FILES = false;
+
+ /**
+ * Required main method implementation.
+ */
public static void main(final String[] argv) {
new CheckIlluminaDirectory().instanceMainWithExit(argv);
}
@@ -55,27 +84,31 @@ public class CheckIlluminaDirectory extends CommandLineProgram {
@Override
protected int doWork() {
final ReadStructure readStructure = new ReadStructure(READ_STRUCTURE);
- if(DATA_TYPES.size() == 0) {
+ if (DATA_TYPES.isEmpty()) {
DATA_TYPES.addAll(Arrays.asList(IlluminaBasecallsConverter.DATA_TYPES_NO_BARCODE));
}
final List<Integer> failingLanes = new ArrayList<Integer>();
int totalFailures = 0;
- final int [] expectedCycles = new OutputMapping(readStructure).getOutputCycles();
- log.info("Checking lanes(" + StringUtil.join(",", LANES) + " in basecalls directory (" + BASECALLS_DIR.getAbsolutePath() + ")\n");
+ final int[] expectedCycles = new OutputMapping(readStructure).getOutputCycles();
+ log.info("Checking lanes(" + StringUtil.join(",", LANES) + " in basecalls directory (" + BASECALLS_DIR
+ .getAbsolutePath() + ")\n");
log.info("Expected cycles: " + StringUtil.intValuesToString(expectedCycles));
- for(final Integer lane : LANES) {
- final IlluminaFileUtil fileUtil = new IlluminaFileUtil(BASECALLS_DIR, lane);
- final List<Integer> expectedTiles = fileUtil.getExpectedTiles();
+ for (final Integer lane : LANES) {
+ final IlluminaFileUtil fileUtil = new IlluminaFileUtil(BASECALLS_DIR, lane);
+ final List<Integer> expectedTiles = fileUtil.getExpectedTiles();
+ if (!TILE_NUMBERS.isEmpty()) {
+ expectedTiles.retainAll(TILE_NUMBERS);
+ }
log.info("Checking lane " + lane);
- log.info("Expected tiles: " + StringUtil.join(", ", expectedTiles));
+ log.info("Expected tiles: " + StringUtil.join(", ", expectedTiles));
- final int numFailures = verifyLane(fileUtil, expectedTiles, expectedCycles, DATA_TYPES);
+ final int numFailures = verifyLane(fileUtil, expectedTiles, expectedCycles, DATA_TYPES, FAKE_FILES);
- if(numFailures > 0) {
+ if (numFailures > 0) {
log.info("Lane " + lane + " FAILED " + " Total Errors: " + numFailures);
failingLanes.add(lane);
totalFailures += numFailures;
@@ -85,50 +118,75 @@ public class CheckIlluminaDirectory extends CommandLineProgram {
}
int status = 0;
- if(totalFailures == 0) {
+ if (totalFailures == 0) {
log.info("SUCCEEDED! All required files are present and non-empty.");
} else {
status = totalFailures;
- log.info("FAILED! There were " + totalFailures + " in the following lanes: " + StringUtil.join(", ", failingLanes));
+ log.info("FAILED! There were " + totalFailures + " in the following lanes: " + StringUtil
+ .join(", ", failingLanes));
}
return status;
}
/**
- * Use fileUtil to find the data types that would be used by IlluminaDataProvider. Verify that for the expected tiles/cycles/data types that all
- * the files needed to provide their data is present. This method logs every error that is found and returns the number of errors found
- * @param fileUtil A file util paramterized with the directory/lane to check
+ * Use fileUtil to find the data types that would be used by IlluminaDataProvider. Verify that for the expected
+ * tiles/cycles/data types that all the files needed to provide their data is present. This method logs every
+ * error that is found (excluding file faking errors) and returns the number of errors found
+ *
+ * @param fileUtil A file util paramterized with the directory/lane to check
* @param expectedTiles The tiles we expect to be available/well-formed
- * @param cycles The cycles we expect to be available/well-formed
- * @param dataTypes The data types we expect to be available/well-formed
+ * @param cycles The cycles we expect to be available/well-formed
+ * @param dataTypes The data types we expect to be available/well-formed
+ *
* @return The number of errors found/logged for this directory/lane
*/
- private static final int verifyLane(final IlluminaFileUtil fileUtil, final List<Integer> expectedTiles, final int[] cycles, final Set<IlluminaDataType> dataTypes) {
- if(expectedTiles.size() == 0) {
- throw new PicardException("0 input tiles were specified! Check to make sure this lane is in the InterOp file!");
+ private static final int verifyLane(final IlluminaFileUtil fileUtil, final List<Integer> expectedTiles,
+ final int[] cycles,
+ final Set<IlluminaDataType> dataTypes, final boolean fakeFiles) {
+ if (expectedTiles.isEmpty()) {
+ throw new PicardException(
+ "0 input tiles were specified! Check to make sure this lane is in the InterOp file!");
}
- if(cycles.length == 0) {
+ if (cycles.length == 0) {
throw new PicardException("0 output cycles were specified!");
}
int numFailures = 0;
//find what request IlluminaDataTypes we have files for and select the most preferred file format available for that type
- final Map<IlluminaFileUtil.SupportedIlluminaFormat, Set<IlluminaDataType>> formatToDataTypes = IlluminaDataProviderFactory.determineFormats(dataTypes, fileUtil);
-
- //find if we have any IlluminaDataType with NO available file formats and, if any exist, throw an exception
- final Set<IlluminaDataType> unmatchedDataTypes = IlluminaDataProviderFactory.findUnmatchedTypes(dataTypes, formatToDataTypes);
- if(unmatchedDataTypes.size() > 0) {
- log.info("Could not find a format with available files for the following data types: " + StringUtil.join(", ", new ArrayList<IlluminaDataType>(unmatchedDataTypes)));
+ final Map<IlluminaFileUtil.SupportedIlluminaFormat, Set<IlluminaDataType>> formatToDataTypes =
+ IlluminaDataProviderFactory.determineFormats(dataTypes, fileUtil);
+
+ //find if we have any IlluminaDataType with NO available file formats and, if any exist, increase the error count
+ final Set<IlluminaDataType> unmatchedDataTypes =
+ IlluminaDataProviderFactory.findUnmatchedTypes(dataTypes, formatToDataTypes);
+ if (!unmatchedDataTypes.isEmpty()) {
+ if (fakeFiles) {
+ for (final IlluminaDataType dataType : unmatchedDataTypes) {
+ final IlluminaFileUtil.SupportedIlluminaFormat format =
+ IlluminaDataProviderFactory.findPreferredFormat(dataType, fileUtil);
+ fileUtil.getUtil(format).fakeFiles(expectedTiles, cycles, format);
+
+ }
+ }
+ log.info("Could not find a format with available files for the following data types: " + StringUtil
+ .join(", ", new ArrayList<IlluminaDataType>(unmatchedDataTypes)));
numFailures += unmatchedDataTypes.size();
}
- for(final IlluminaFileUtil.SupportedIlluminaFormat format : formatToDataTypes.keySet()) {
- final List<String> failures = fileUtil.getUtil(format).verify(expectedTiles, cycles);
+ for (final IlluminaFileUtil.SupportedIlluminaFormat format : formatToDataTypes.keySet()) {
+ final IlluminaFileUtil.ParameterizedFileUtil util = fileUtil.getUtil(format);
+ final List<String> failures = util.verify(expectedTiles, cycles);
+ //if we have failures and we want to fake files then fake them now.
+ if (!failures.isEmpty() && fakeFiles) {
+ //fake files
+ util.fakeFiles(expectedTiles, cycles, format);
+
+ }
numFailures += failures.size();
- for(final String failure : failures) {
+ for (final String failure : failures) {
log.info(failure);
}
}
@@ -136,19 +194,20 @@ public class CheckIlluminaDirectory extends CommandLineProgram {
return numFailures;
}
-
+ @Override
protected String[] customCommandLineValidation() {
IoUtil.assertDirectoryIsReadable(BASECALLS_DIR);
final List<String> errors = new ArrayList<String>();
- for(final Integer lane : LANES) {
- if(lane < 1) {
- errors.add("LANES must be greater than or equal to 1. LANES passed in " + StringUtil.join(", ", LANES));
+ for (final Integer lane : LANES) {
+ if (lane < 1) {
+ errors.add(
+ "LANES must be greater than or equal to 1. LANES passed in " + StringUtil.join(", ", LANES));
break;
}
}
- if(errors.size() == 0) {
+ if (errors.isEmpty()) {
return null;
} else {
return errors.toArray(new String[errors.size()]);
diff --git a/src/java/net/sf/picard/illumina/ExtractIlluminaBarcodes.java b/src/java/net/sf/picard/illumina/ExtractIlluminaBarcodes.java
index 23d7042..6228179 100644
--- a/src/java/net/sf/picard/illumina/ExtractIlluminaBarcodes.java
+++ b/src/java/net/sf/picard/illumina/ExtractIlluminaBarcodes.java
@@ -170,9 +170,9 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
LOG.info("Processing with " + numProcessors + " PerTileBarcodeExtractor(s).");
final ExecutorService pool = Executors.newFixedThreadPool(numProcessors);
+ // TODO: This is terribly inefficient; we're opening a huge number of files via the extractor constructor and we never close them.
final List<PerTileBarcodeExtractor> extractors = new ArrayList<PerTileBarcodeExtractor>(factory.getAvailableTiles().size());
for (final int tile : factory.getAvailableTiles()) {
-
final PerTileBarcodeExtractor extractor = new PerTileBarcodeExtractor(
tile,
getBarcodeFile(tile),
@@ -184,11 +184,13 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
MAX_MISMATCHES,
MIN_MISMATCH_DELTA
);
- pool.submit(extractor);
extractors.add(extractor);
}
- pool.shutdown();
try {
+ for (final PerTileBarcodeExtractor extractor : extractors) {
+ pool.submit(extractor);
+ }
+ pool.shutdown();
// Wait a while for existing tasks to terminate
if (!pool.awaitTermination(6, TimeUnit.HOURS)) {
pool.shutdownNow(); // Cancel any still-executing tasks
@@ -197,8 +199,9 @@ public class ExtractIlluminaBarcodes extends CommandLineProgram {
LOG.error("Pool did not terminate");
return 1;
}
- } catch (InterruptedException ie) {
+ } catch (final Throwable e) {
// (Re-)Cancel if current thread also interrupted
+ LOG.error(e, "Parent thread encountered problem submitting extractors to thread pool or awaiting shutdown of threadpool. Attempting to kill threadpool.");
pool.shutdownNow();
return 2;
}
diff --git a/src/java/net/sf/picard/illumina/IlluminaBasecallsConverter.java b/src/java/net/sf/picard/illumina/IlluminaBasecallsConverter.java
index 3f5fd86..d79b0e8 100644
--- a/src/java/net/sf/picard/illumina/IlluminaBasecallsConverter.java
+++ b/src/java/net/sf/picard/illumina/IlluminaBasecallsConverter.java
@@ -112,6 +112,7 @@ public class IlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> {
// ends, but for unit testing it is desirable to stop the task when done with this instance.
private final TimerTask gcTimerTask;
private List<Integer> tiles;
+ private boolean includeNonPfReads;
private final SortingCollection.Codec<CLUSTER_OUTPUT_RECORD> codecPrototype;
// Annoying that we need this.
private final Class<CLUSTER_OUTPUT_RECORD> outputRecordClass;
@@ -133,6 +134,7 @@ public class IlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> {
* @param outputRecordComparator For sorting output records within a single tile.
* @param codecPrototype For spilling output records to disk.
* @param outputRecordClass Inconveniently needed to create SortingCollections.
+ * @param includeNonPfReads If true, will include ALL reads (including those which do not have PF set)
*/
public IlluminaBasecallsConverter(final File basecallsDir, final int lane, final ReadStructure readStructure,
final Map<String, ? extends ConvertedClusterDataWriter<CLUSTER_OUTPUT_RECORD>> barcodeRecordWriterMap,
@@ -145,7 +147,9 @@ public class IlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> {
final SortingCollection.Codec<CLUSTER_OUTPUT_RECORD> codecPrototype,
final Class<CLUSTER_OUTPUT_RECORD> outputRecordClass,
final BclQualityEvaluationStrategy bclQualityEvaluationStrategy,
- final boolean applyEamssFiltering) {
+ final boolean applyEamssFiltering,
+ final boolean includeNonPfReads
+ ) {
this.barcodeRecordWriterMap = barcodeRecordWriterMap;
this.demultiplex = demultiplex;
this.maxReadsInRamPerTile = maxReadsInRamPerTile;
@@ -154,6 +158,7 @@ public class IlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> {
this.codecPrototype = codecPrototype;
this.outputRecordClass = outputRecordClass;
this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy;
+ this.includeNonPfReads = includeNonPfReads;
// If we're forcing garbage collection, collect every 5 minutes in a daemon thread.
if (forceGc) {
@@ -470,9 +475,12 @@ public class IlluminaBasecallsConverter<CLUSTER_OUTPUT_RECORD> {
while (dataProvider.hasNext()) {
final ClusterData cluster = dataProvider.next();
- final String barcode = (demultiplex? cluster.getMatchedBarcode(): null);
readProgressLogger.record(null, 0);
- this.processingRecord.addRecord(barcode, converter.convertClusterToOutputRecord(cluster));
+ // If this cluster is passing, or we do NOT want to ONLY emit passing reads, then add it to the next
+ if (cluster.isPf() || includeNonPfReads) {
+ final String barcode = (demultiplex? cluster.getMatchedBarcode(): null);
+ this.processingRecord.addRecord(barcode, converter.convertClusterToOutputRecord(cluster));
+ }
}
this.handler.completeTile(this.tile);
diff --git a/src/java/net/sf/picard/illumina/IlluminaBasecallsToFastq.java b/src/java/net/sf/picard/illumina/IlluminaBasecallsToFastq.java
index 9b316e6..511c584 100644
--- a/src/java/net/sf/picard/illumina/IlluminaBasecallsToFastq.java
+++ b/src/java/net/sf/picard/illumina/IlluminaBasecallsToFastq.java
@@ -123,6 +123,9 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
"The default of 2 is what the Illumina's spec describes as the minimum, but in practice the value has been observed lower.")
public int MINIMUM_QUALITY = BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY;
+ @Option(doc="Whether to include non-PF reads", shortName="NONPF", optional=true)
+ public boolean INCLUDE_NON_PF_READS = true;
+
@Option(doc="The read name header formatting to emit. Casava1.8 formatting has additional information beyond Illumina, including: " +
"the passing-filter flag value for the read, the flowcell name, and the sequencer name.", optional = false)
public ReadNameFormat READ_NAME_FORMAT = ReadNameFormat.CASAVA_1_8;
@@ -207,7 +210,7 @@ public class IlluminaBasecallsToFastq extends CommandLineProgram {
FORCE_GC, FIRST_TILE, TILE_LIMIT, queryNameComparator,
new FastqRecordsForClusterCodec(readStructure.templates.length(),
readStructure.barcodes.length()), FastqRecordsForCluster.class, bclQualityEvaluationStrategy,
- this.APPLY_EAMSS_FILTER);
+ this.APPLY_EAMSS_FILTER, INCLUDE_NON_PF_READS);
log.info("READ STRUCTURE IS " + readStructure.toString());
diff --git a/src/java/net/sf/picard/illumina/IlluminaBasecallsToSam.java b/src/java/net/sf/picard/illumina/IlluminaBasecallsToSam.java
index 54c47ab..2603474 100644
--- a/src/java/net/sf/picard/illumina/IlluminaBasecallsToSam.java
+++ b/src/java/net/sf/picard/illumina/IlluminaBasecallsToSam.java
@@ -176,6 +176,8 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
"The default of 2 is what the Illumina's spec describes as the minimum, but in practice the value has been observed lower.")
public int MINIMUM_QUALITY = BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY;
+ @Option(doc="Whether to include non-PF reads", shortName="NONPF", optional=true)
+ public boolean INCLUDE_NON_PF_READS = true;
private final Map<String, SAMFileWriterWrapper> barcodeSamWriterMap = new HashMap<String, SAMFileWriterWrapper>();
private ReadStructure readStructure;
@@ -195,7 +197,7 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
*/
private void initialize() {
this.bclQualityEvaluationStrategy = new BclQualityEvaluationStrategy(MINIMUM_QUALITY);
-
+
if (OUTPUT != null) {
IoUtil.assertFileIsWritable(OUTPUT);
}
@@ -217,7 +219,7 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
basecallsConverter = new IlluminaBasecallsConverter<SAMRecordsForCluster>(BASECALLS_DIR, LANE, readStructure,
barcodeSamWriterMap, true, MAX_READS_IN_RAM_PER_TILE/numOutputRecords, TMP_DIR, NUM_PROCESSORS, FORCE_GC,
FIRST_TILE, TILE_LIMIT, new QueryNameComparator(), new Codec(numOutputRecords), SAMRecordsForCluster.class,
- bclQualityEvaluationStrategy, this.APPLY_EAMSS_FILTER);
+ bclQualityEvaluationStrategy, this.APPLY_EAMSS_FILTER, INCLUDE_NON_PF_READS);
log.info("DONE_READING STRUCTURE IS " + readStructure.toString());
@@ -439,7 +441,7 @@ public class IlluminaBasecallsToSam extends CommandLineProgram {
@Override
public void write(final SAMRecordsForCluster records) {
for (final SAMRecord rec : records.records) {
- writer.addAlignment(rec);
+ writer.addAlignment(rec);
}
}
diff --git a/src/java/net/sf/picard/illumina/parser/BclParser.java b/src/java/net/sf/picard/illumina/parser/BclParser.java
index 22262e9..81ba506 100644
--- a/src/java/net/sf/picard/illumina/parser/BclParser.java
+++ b/src/java/net/sf/picard/illumina/parser/BclParser.java
@@ -71,7 +71,7 @@ class BclParser extends PerTilePerCycleParser<BclData>{
* Allow for overriding in derived classes.
*/
protected CloseableIterator<BclReader.BclValue> makeReader(final File file, final int cycle, final int tileNumber) {
- return new BclReader(file, bclQualityEvaluationStrategy);
+ return BclReader.make(file, bclQualityEvaluationStrategy);
}
/** Create a Bcl parser for an individual cycle and wrap it with the CycleFileParser interface which populates
@@ -84,7 +84,10 @@ class BclParser extends PerTilePerCycleParser<BclData>{
protected CycleFileParser<BclData> makeCycleFileParser(final File file, final int cycle, final int tileNumber) {
return new CycleFileParser<BclData>(){
final OutputMapping.TwoDIndex cycleOutputIndex = outputMapping.getOutputIndexForCycle(cycle);
- CloseableIterator<BclReader.BclValue> reader = makeReader(file, cycle, tileNumber);
+ final CloseableIterator<BclReader.BclValue> reader = makeReader(file, cycle, tileNumber);
+
+ final int majorIndex = cycleOutputIndex.majorIndex;
+ final int minorIndex = cycleOutputIndex.minorIndex;
@Override
public void close() {
@@ -98,13 +101,14 @@ class BclParser extends PerTilePerCycleParser<BclData>{
}
final BclReader.BclValue value = reader.next();
- ild.getBases() [cycleOutputIndex.majorIndex][cycleOutputIndex.minorIndex] = value.base;
- ild.getQualities()[cycleOutputIndex.majorIndex][cycleOutputIndex.minorIndex] = value.quality;
+ ild.bases[majorIndex][minorIndex] = value.base;
+ ild.qualities[majorIndex][minorIndex] = value.quality;
}
@Override
public boolean hasNext() {
- return reader != null && reader.hasNext();
+ try { return reader.hasNext(); }
+ catch (final NullPointerException npe) { return false; }
}
};
}
diff --git a/src/java/net/sf/picard/illumina/parser/IlluminaDataProviderFactory.java b/src/java/net/sf/picard/illumina/parser/IlluminaDataProviderFactory.java
index ae0b274..4d3d3cf 100644
--- a/src/java/net/sf/picard/illumina/parser/IlluminaDataProviderFactory.java
+++ b/src/java/net/sf/picard/illumina/parser/IlluminaDataProviderFactory.java
@@ -279,11 +279,29 @@ public class IlluminaDataProviderFactory {
* @param fileUtil Util for the lane/directory in which we will find data
* @return The file format that is "most preferred" (i.e. fastest to parse/smallest in memory)
*/
- public static SupportedIlluminaFormat findPreferredAvailableFormat(final IlluminaDataType dt, final IlluminaFileUtil fileUtil) {
+ private static SupportedIlluminaFormat findPreferredAvailableFormat(final IlluminaDataType dt, final IlluminaFileUtil fileUtil) {
+ return findPreferredFormat(dt, fileUtil, true);
+ }
+
+ /**
+ * Given a data type find the most preferred file format even if files are not available
+ * @param dt Type of desired data
+ * @param fileUtil Util for the lane/directory in which we will find data
+ * @return The file format that is "most preferred" (i.e. fastest to parse/smallest in memory)
+ */
+ public static SupportedIlluminaFormat findPreferredFormat(final IlluminaDataType dt, final IlluminaFileUtil fileUtil){
+ return findPreferredFormat(dt, fileUtil, false);
+ }
+
+ private static SupportedIlluminaFormat findPreferredFormat(final IlluminaDataType dt, final IlluminaFileUtil fileUtil,
+ final boolean checkAvailable){
final List<SupportedIlluminaFormat> preferredFormats = DATA_TYPE_TO_PREFERRED_FORMATS.get(dt);
SupportedIlluminaFormat format = null;
for(int i = 0; i < preferredFormats.size() && format == null; i++) {
- if(fileUtil.getUtil(preferredFormats.get(i)).filesAvailable()) {
+ if(checkAvailable && fileUtil.getUtil(preferredFormats.get(i)).filesAvailable()) {
+ format = preferredFormats.get(i);
+ }
+ else if(!checkAvailable){
format = preferredFormats.get(i);
}
}
diff --git a/src/java/net/sf/picard/illumina/parser/IlluminaFileUtil.java b/src/java/net/sf/picard/illumina/parser/IlluminaFileUtil.java
index 2bbe9a7..3be24a0 100644
--- a/src/java/net/sf/picard/illumina/parser/IlluminaFileUtil.java
+++ b/src/java/net/sf/picard/illumina/parser/IlluminaFileUtil.java
@@ -24,12 +24,39 @@
package net.sf.picard.illumina.parser;
import net.sf.picard.PicardException;
+import net.sf.picard.illumina.parser.fakers.BarcodeFileFaker;
+import net.sf.picard.illumina.parser.fakers.BciFileFaker;
+import net.sf.picard.illumina.parser.fakers.BclFileFaker;
+import net.sf.picard.illumina.parser.fakers.CifFileFaker;
+import net.sf.picard.illumina.parser.fakers.ClocsFileFaker;
+import net.sf.picard.illumina.parser.fakers.CnfFileFaker;
+import net.sf.picard.illumina.parser.fakers.FileFaker;
+import net.sf.picard.illumina.parser.fakers.FilterFileFaker;
+import net.sf.picard.illumina.parser.fakers.LocsFileFaker;
+import net.sf.picard.illumina.parser.fakers.MultiTileBclFileFaker;
+import net.sf.picard.illumina.parser.fakers.MultiTileLocsFileFaker;
+import net.sf.picard.illumina.parser.fakers.PosFileFaker;
+import net.sf.picard.illumina.parser.fakers.QSeqFileFaker;
import net.sf.picard.illumina.parser.readers.TileMetricsOutReader;
import net.sf.picard.io.IoUtil;
+import net.sf.samtools.util.CloserUtil;
import net.sf.samtools.util.StringUtil;
-import java.io.*;
-import java.util.*;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -65,7 +92,9 @@ public class IlluminaFileUtil {
private final int lane;
- /** A regex string matching only qseq files */
+ /**
+ * A regex string matching only qseq files
+ */
private final QSeqIlluminaFileUtil qseq;
private final PerTilePerCycleFileUtil bcl;
private final PerTilePerCycleFileUtil cif;
@@ -83,13 +112,13 @@ public class IlluminaFileUtil {
public IlluminaFileUtil(final File basecallDir, final int lane) {
- this.basecallDir = basecallDir;
+ this.basecallDir = basecallDir;
final File intensityDir = basecallDir.getParentFile();
- final File dataDir = intensityDir.getParentFile();
- final File interopDir = new File(dataDir.getParentFile(), "InterOp");
+ final File dataDir = intensityDir.getParentFile();
+ final File interopDir = new File(dataDir.getParentFile(), "InterOp");
this.lane = lane;
- final File basecallLaneDir = new File(basecallDir, longLaneStr(lane));
+ final File basecallLaneDir = new File(basecallDir, longLaneStr(lane));
this.intensityLaneDir = new File(intensityDir, longLaneStr(lane));
utils = new HashMap<SupportedIlluminaFormat, ParameterizedFileUtil>();
@@ -97,28 +126,28 @@ public class IlluminaFileUtil {
qseq = new QSeqIlluminaFileUtil();
utils.put(SupportedIlluminaFormat.Qseq, qseq);
- bcl = new PerTilePerCycleFileUtil(inferBclExtension(basecallLaneDir), basecallLaneDir);
+ bcl = new PerTilePerCycleFileUtil(inferBclExtension(basecallLaneDir), basecallLaneDir, new BclFileFaker());
utils.put(SupportedIlluminaFormat.Bcl, bcl);
- cif = new PerTilePerCycleFileUtil(".cif");
+ cif = new PerTilePerCycleFileUtil(".cif", new CifFileFaker());
utils.put(SupportedIlluminaFormat.Cif, cif);
- cnf = new PerTilePerCycleFileUtil(".cnf");
+ cnf = new PerTilePerCycleFileUtil(".cnf", new CnfFileFaker());
utils.put(SupportedIlluminaFormat.Cnf, cnf);
- locs = new PerTileFileUtil(".locs", false);
+ locs = new PerTileFileUtil(".locs", false, new LocsFileFaker());
utils.put(SupportedIlluminaFormat.Locs, locs);
- clocs = new PerTileFileUtil(".clocs", false);
+ clocs = new PerTileFileUtil(".clocs", false, new ClocsFileFaker());
utils.put(SupportedIlluminaFormat.Clocs, clocs);
- pos = new PerTileFileUtil("_pos.txt", false, intensityDir);
- utils.put(SupportedIlluminaFormat.Pos, pos);
+ pos = new PerTileFileUtil("_pos.txt", false, intensityDir, new PosFileFaker());
+ utils.put(SupportedIlluminaFormat.Pos, pos);
- filter = new PerTileFileUtil(".filter", true, basecallLaneDir);
+ filter = new PerTileFileUtil(".filter", true, basecallLaneDir, new FilterFileFaker());
utils.put(SupportedIlluminaFormat.Filter, filter);
- barcode = new PerTileFileUtil("_barcode.txt", true, basecallDir);
+ barcode = new PerTileFileUtil("_barcode.txt", true, basecallDir, new BarcodeFileFaker());
utils.put(SupportedIlluminaFormat.Barcode, barcode);
multiTileFilter = new MultiTileFilterFileUtil(basecallLaneDir);
@@ -133,53 +162,63 @@ public class IlluminaFileUtil {
tileMetricsOut = new File(interopDir, "TileMetricsOut.bin");
}
- /** Return the lane we're inspecting */
+ /**
+ * Return the lane we're inspecting
+ */
public int getLane() {
return lane;
}
- /** Given a file type, get the Parameterized File Util object associated with it*/
+ /**
+ * Given a file type, get the Parameterized File Util object associated with it
+ */
public ParameterizedFileUtil getUtil(final SupportedIlluminaFormat format) {
return utils.get(format);
}
- /** Return the list of tiles we would expect for this lane based on the metrics found in InterOp/TileMetricsOut.bin */
+ /**
+ * Return the list of tiles we would expect for this lane based on the metrics found in InterOp/TileMetricsOut.bin
+ */
public List<Integer> getExpectedTiles() {
IoUtil.assertFileIsReadable(tileMetricsOut);
//Used just to ensure predictable ordering
final TreeSet<Integer> expectedTiles = new TreeSet<Integer>();
final Iterator<TileMetricsOutReader.IlluminaTileMetrics> tileMetrics = new TileMetricsOutReader(tileMetricsOut);
- while(tileMetrics.hasNext()) {
+ while (tileMetrics.hasNext()) {
final TileMetricsOutReader.IlluminaTileMetrics tileMetric = tileMetrics.next();
- if(tileMetric.getLaneNumber() == lane) {
- if(!expectedTiles.contains(tileMetric.getTileNumber())) {
+ if (tileMetric.getLaneNumber() == lane) {
+ if (!expectedTiles.contains(tileMetric.getTileNumber())) {
expectedTiles.add(tileMetric.getTileNumber());
}
}
}
+ CloserUtil.close(tileMetrics);
return new ArrayList<Integer>(expectedTiles);
}
- /** Get the available tiles for the given formats, if the formats have tile lists that differ then
+ /**
+ * Get the available tiles for the given formats, if the formats have tile lists that differ then
* throw an exception, if any of the format
*/
public List<Integer> getActualTiles(final List<SupportedIlluminaFormat> formats) {
- if(formats == null) {
+ if (formats == null) {
throw new PicardException("Format list provided to getTiles was null!");
}
- if(formats.size() == 0) {
- throw new PicardException("0 Formats were specified. You need to specify at least SupportedIlluminaFormat to use getTiles");
+ if (formats.size() == 0) {
+ throw new PicardException(
+ "0 Formats were specified. You need to specify at least SupportedIlluminaFormat to use getTiles");
}
final List<Integer> tiles = utils.get(formats.get(0)).getTiles();
- for(int i = 0; i < formats.size(); i++) {
+ for (int i = 0; i < formats.size(); i++) {
final List<Integer> fmTiles = utils.get(formats.get(i)).getTiles();
- if(tiles.size() != fmTiles.size() || !tiles.containsAll(fmTiles)) {
- throw new PicardException("Formats do not have the same number of tiles! " + summarizeTileCounts(formats));
+ if (tiles.size() != fmTiles.size() || !tiles.containsAll(fmTiles)) {
+ throw new PicardException(
+ "Formats do not have the same number of tiles! " + summarizeTileCounts(formats));
}
}
@@ -191,7 +230,7 @@ public class IlluminaFileUtil {
}
public PerTilePerCycleFileUtil bcl() {
- return bcl;
+ return bcl;
}
public PerTilePerCycleFileUtil cif() {
@@ -205,6 +244,7 @@ public class IlluminaFileUtil {
public PerTileFileUtil locs() {
return locs;
}
+
public PerTileFileUtil clocs() {
return clocs;
}
@@ -238,68 +278,85 @@ public class IlluminaFileUtil {
}
public static final String UNPARAMETERIZED_PER_TILE_PATTERN = "s_(\\d+)_(\\d{1,5})";
- public static final String UNPARAMETERIZED_QSEQ_PATTERN = "s_(\\d+)_(\\d)_(\\d{4})_qseq\\.txt(\\.gz|\\.bz2)?";
+ public static final String UNPARAMETERIZED_QSEQ_PATTERN = "s_(\\d+)_(\\d)_(\\d{4})_qseq\\.txt(\\.gz|\\.bz2)?";
private static final Pattern CYCLE_SUBDIRECTORY_PATTERN = Pattern.compile("^C(\\d+)\\.1$");
public static String makeParameterizedLaneAndTileRegex(final int lane) {
- if(lane < 0) {
+ if (lane < 0) {
throw new PicardException("Lane (" + lane + ") cannot be negative");
}
return "s_" + lane + "_(\\d{1,5})";
}
public static String makeParameterizedQseqRegex(final int lane) {
- if(lane < 0) {
+ if (lane < 0) {
throw new PicardException("Lane (" + lane + ") cannot be negative");
}
return "s_" + lane + "_(\\d)_(\\d{4})_qseq\\.txt(\\.gz|\\.bz2)?";
}
- /** An object providing utilities for locating Illumina files of specific types */
+ /**
+ * An object providing utilities for locating Illumina files of specific types
+ */
public abstract class ParameterizedFileUtil {
- /** The file extension for this class, file extension does not have the standard meaning
+ /**
+ * The file extension for this class, file extension does not have the standard meaning
* in this instance. It means, all the characters that come after the identifying portion of
* the file (after lane, tile, and end that is). So _qseq.txt and .filter are both file extensions
*/
public final String extension;
- /** A pattern that will match files of this type for this lane*/
+ /**
+ * A pattern that will match files of this type for this lane
+ */
public final Pattern pattern;
- /** A pattern that will match files of this type for this lane*/
+ /**
+ * A pattern that will match files of this type for this lane
+ */
public final Pattern unparameterizedPattern;
- /** If you think of the file system as a tree, this is the deepest directory(node) on the tree that
+ /**
+ * If you think of the file system as a tree, this is the deepest directory(node) on the tree that
* still contains all of the files for this given type (e.g. If we're talking about BCLs the directory
* structure is:
- *
- * BaseCall Dir
- * |
- * L001
+ * <p/>
+ * BaseCall Dir
+ * |
+ * L001
* | | |
* C1.1 C2.1 ... Cn.1
* | | |
* bcl Files ... bclFiles
- *
+ * <p/>
* L001 is the base because it contains every BCL file in the run (though those files are nested in
* other folders).
*/
protected final File base;
- public ParameterizedFileUtil(final String unparameterizedPattern, final String patternStr, final String extension, final File base) {
- this.pattern = Pattern.compile(escapePeriods(patternStr));
- this.unparameterizedPattern = Pattern.compile(escapePeriods(unparameterizedPattern));
+ protected final FileFaker faker;
+
+ public ParameterizedFileUtil(final String unparameterizedPattern, final String patternStr,
+ final String extension, final File base,
+ final FileFaker faker) {
+ this.pattern = Pattern.compile(escapePeriods(patternStr));
+ this.unparameterizedPattern = Pattern.compile(escapePeriods(unparameterizedPattern));
this.extension = extension;
- this.base = base;
+ this.base = base;
+ this.faker = faker;
}
- /** The period separator is expected in the file extension, since some do not start with it */
+ /**
+ * The period separator is expected in the file extension, since some do not start with it
+ */
private String escapePeriods(final String preEscaped) {
- return preEscaped.replaceAll("\\.", "\\."); //In the first one the \\ is inside a regex in the second it's NOT
+ return preEscaped
+ .replaceAll("\\.", "\\."); //In the first one the \\ is inside a regex in the second it's NOT
}
/**
* Determine whether or not files are available
+ *
* @return return true if files are found matching this types pattern, false otherwise
*/
public abstract boolean filesAvailable();
@@ -307,13 +364,16 @@ public class IlluminaFileUtil {
/**
* Illumina file names contain at least lane and tile information and sometimes end info. Return all
* available lane tile and end information.
+ *
* @param fileName Filename to analyze for data
+ *
* @return A LaneTileEnd object with discovered values or null if that value is not available in the given file name
*/
public abstract LaneTileEnd fileToLaneTileEnd(final String fileName);
/**
* Return a list of all tiles available for this file format and run
+ *
* @return A List of tile integers
*/
public abstract List<Integer> getTiles();
@@ -322,35 +382,55 @@ public class IlluminaFileUtil {
/**
* Given the expected tiles/expected cycles for this file type, return a list of error messages describing any
* missing/or malformed files
- * @param expectedTiles An ordered list of tile numbers
+ *
+ * @param expectedTiles An ordered list of tile numbers
* @param expectedCycles An ordered list of cycle numbers that may contain gaps
+ *
* @return A list of error messages for this format
*/
- public abstract List<String> verify(List<Integer> expectedTiles, int [] expectedCycles);
+ public abstract List<String> verify(List<Integer> expectedTiles, int[] expectedCycles);
+
+ /**
+ * Given the expected tiles/expected cycles for this file type create a set of fake files such that the
+ * verification criteria are met.
+ *
+ * @param expectedTiles An ordered list of tile numbers
+ * @param cycles An ordered list of cycle numbers that may contain gaps
+ * @param format The format of the files that are to be faked
+ *
+ * @return A list of error messages for this format
+ */
+ public abstract List<String> fakeFiles(List<Integer> expectedTiles, int[] cycles,
+ SupportedIlluminaFormat format);
}
- /** Represents file types that have one file per tile */
+ /**
+ * Represents file types that have one file per tile
+ */
class PerTileFileUtil extends ParameterizedFileUtil {
protected final boolean txtBased;
protected final boolean padTile;
protected final IlluminaFileMap fileMap;
protected final List<Integer> tiles;
- public PerTileFileUtil(final String fileNameEndPattern, final boolean padTile, final File base) {
- super(makeLTRegex(processTxtExtension(fileNameEndPattern)), makeLTRegex(processTxtExtension(fileNameEndPattern), lane), fileNameEndPattern, base);
+ public PerTileFileUtil(final String fileNameEndPattern, final boolean padTile, final File base,
+ final FileFaker fileFaker) {
+ super(makeLTRegex(processTxtExtension(fileNameEndPattern)),
+ makeLTRegex(processTxtExtension(fileNameEndPattern), lane), fileNameEndPattern, base,
+ fileFaker);
this.txtBased = fileNameEndPattern.endsWith(".txt");
- this.padTile = padTile;
- this.fileMap = getTiledFiles(base, pattern, this);
+ this.padTile = padTile;
+ this.fileMap = getTiledFiles(base, pattern, this);
- if(fileMap.size() > 0) {
+ if (fileMap.size() > 0) {
this.tiles = Collections.unmodifiableList(new ArrayList<Integer>(this.fileMap.keySet()));
} else {
this.tiles = new ArrayList<Integer>();
}
}
- public PerTileFileUtil(final String fileNameEndPattern, final boolean padTile) {
- this(fileNameEndPattern, padTile, intensityLaneDir);
+ public PerTileFileUtil(final String fileNameEndPattern, final boolean padTile, final FileFaker fileFaker) {
+ this(fileNameEndPattern, padTile, intensityLaneDir, fileFaker);
}
@Override
@@ -360,7 +440,9 @@ public class IlluminaFileUtil {
/**
* Returns only lane and tile information as PerTileFt's do not have End information.
+ *
* @param fileName Filename to analyze for data
+ *
* @return A LaneTileEnd object with the discovered Lane and Tile information and a null end field.
*/
public LaneTileEnd fileToLaneTileEnd(final String fileName) {
@@ -383,13 +465,13 @@ public class IlluminaFileUtil {
public List<String> verify(final List<Integer> expectedTiles, final int[] expectedCycles) {
final List<String> failures = new LinkedList<String>();
- if(!base.exists()) {
+ if (!base.exists()) {
failures.add("Base directory(" + base.getAbsolutePath() + ") does not exist!");
} else {
- for(final Integer tile : expectedTiles) {
- if(!tiles.contains(tile)) {
+ for (final Integer tile : expectedTiles) {
+ if (!tiles.contains(tile)) {
failures.add("Missing tile " + tile + " for file type " + extension + ".");
- } else if( fileMap.get(tile).length() == 0 ) {
+ } else if (fileMap.get(tile).length() == 0) {
failures.add("Tile " + tile + " is empty for file type " + extension + ".");
}
}
@@ -397,6 +479,29 @@ public class IlluminaFileUtil {
return failures;
}
+
+ @Override
+ public List<String> fakeFiles(final List<Integer> expectedTiles, final int[] cycles,
+ final SupportedIlluminaFormat format) {
+ final List<String> failures = new LinkedList<String>();
+ if (!base.exists()) {
+ failures.add("Base directory(" + base.getAbsolutePath() + ") does not exist!");
+ } else {
+ for (final Integer tile : expectedTiles) {
+ if (!tiles.contains(tile) || fileMap.get(tile).length() == 0) {
+ //create a new file of this type
+ try {
+ faker.fakeFile(base, tile, lane, extension);
+ } catch (final IOException e) {
+ failures.add(String.format("Could not create fake file %s: %s", fileMap.get(tile),
+ e.getMessage()));
+ }
+
+ }
+ }
+ }
+ return failures;
+ }
}
/**
@@ -405,26 +510,29 @@ public class IlluminaFileUtil {
class PerTilePerCycleFileUtil extends ParameterizedFileUtil {
private final CycleIlluminaFileMap cycleFileMap;
private final List<Integer> tiles;
- private int [] detectedCycles;
+ private int[] detectedCycles;
- public PerTilePerCycleFileUtil(final String fileNameEndPattern, final File base) {
- super(makeLTRegex(fileNameEndPattern), makeLTRegex(fileNameEndPattern, lane), fileNameEndPattern, base);
+ public PerTilePerCycleFileUtil(final String fileNameEndPattern, final File base, final FileFaker fileFaker) {
+ super(makeLTRegex(fileNameEndPattern), makeLTRegex(fileNameEndPattern, lane), fileNameEndPattern, base,
+ fileFaker);
this.cycleFileMap = getPerTilePerCycleFiles(); //sideEffect, assigned to numCycles
- if(cycleFileMap.size() > 0) {
+ if (cycleFileMap.size() > 0) {
this.tiles = Collections.unmodifiableList(new ArrayList<Integer>(this.cycleFileMap.keySet()));
} else {
this.tiles = new ArrayList<Integer>();
}
}
- public PerTilePerCycleFileUtil(final String fileNameEndPattern) {
- this(fileNameEndPattern, intensityLaneDir);
+ public PerTilePerCycleFileUtil(final String fileNameEndPattern, final FileFaker fileFaker) {
+ this(fileNameEndPattern, intensityLaneDir, fileFaker);
}
/**
* Returns only lane and tile information as PerTilePerCycleFt's do not have End information.
+ *
* @param fileName Filename to analyze for data
+ *
* @return A LaneTileEnd object with the discovered Lane and Tile information and a null end field.
*/
public LaneTileEnd fileToLaneTileEnd(final String fileName) {
@@ -434,14 +542,16 @@ public class IlluminaFileUtil {
/**
* Given a cycle directory, return a list of tiles in that directory. If expectedTiles equals null
* return all files discovered otherwise filter by expectedTiles.
+ *
* @param cycleDir The file object of the cycle directory we are searching
+ *
* @return A list of tile integers describing the tiles available in a cycle directory
*/
private List<Integer> getTilesInCycleDir(final File cycleDir) {
- final File [] files = IoUtil.getFilesMatchingRegexp(cycleDir, pattern);
+ final File[] files = IoUtil.getFilesMatchingRegexp(cycleDir, pattern);
final List<Integer> tiles = new ArrayList<Integer>();
- for(final File file : files) {
- if(file.length() > 0) {
+ for (final File file : files) {
+ if (file.length() > 0) {
tiles.add(fileToLaneTileEnd(file.getName()).tile);
}
}
@@ -453,6 +563,7 @@ public class IlluminaFileUtil {
* For the given tiles, populate a CycleIlluminaFileMap that contains all these tiles and will iterate through
* all the files for these tiles in expectedBase
* Side Effect: Assigns numCycles
+ *
* @return A CycleIlluminaFileMap with the listed (or all) tiles for at least expectedCycles number of cycles(or total available
* cycles if expectedCycles is null)
*/
@@ -466,12 +577,12 @@ public class IlluminaFileUtil {
return cycledMap;
}
- int lowestCycle = Integer.MAX_VALUE;
+ int lowestCycle = Integer.MAX_VALUE;
int lowestCycleDirIndex = 0;
- final int [] cycles = new int[tempCycleDirs.length];
+ final int[] cycles = new int[tempCycleDirs.length];
for (int i = 0; i < tempCycleDirs.length; ++i) {
cycles[i] = getCycleFromDir(tempCycleDirs[i]);
- if(cycles[i] < lowestCycle) {
+ if (cycles[i] < lowestCycle) {
lowestCycle = cycles[i];
lowestCycleDirIndex = i;
}
@@ -483,8 +594,9 @@ public class IlluminaFileUtil {
detectedCycles = cycles;
final List<Integer> tiles = getTilesInCycleDir(firstCycleDir);
- for(final int tile : tiles) {
- cycledMap.put(tile, new CycleFilesIterator(laneDir, lane, tile, cycles, extension)); //Gonna have a problem here if we ever get a (.txt.gz for these types of files)
+ for (final int tile : tiles) {
+ cycledMap.put(tile, new CycleFilesIterator(laneDir, lane, tile, cycles,
+ extension)); //Gonna have a problem here if we ever get a (.txt.gz for these types of files)
}
return cycledMap;
@@ -501,10 +613,12 @@ public class IlluminaFileUtil {
/**
* Returns a cycleIlluminaFileMap with all available tiles but limited to the cycles passed in. Any cycles that are missing
* cycle files or directories will be removed from the cycle list that is kept.
+ *
* @param cycles Cycles that should be present in the output CycleIlluminaFileMap
+ *
* @return A CycleIlluminaFileMap with all available tiles but at most the cycles passed in by the cycles parameter
*/
- public CycleIlluminaFileMap getFiles(final int [] cycles) {
+ public CycleIlluminaFileMap getFiles(final int[] cycles) {
//Remove any cycles that were discovered to be NON-EXISTENT when this util was instantiated
final int[] filteredCycles = removeNonExistentCycles(cycles);
return cycleFileMap.keep(null, filteredCycles);
@@ -513,10 +627,12 @@ public class IlluminaFileUtil {
/**
* Returns a cycleIlluminaFileMap that contains only the tiles and cycles specified (and fewer if the orginal CycleIlluminaFileMap, created
* on util instantiation, doesn't contain any of these tiles/cycles).
+ *
* @param cycles Cycles that should be present in the output CycleIlluminaFileMap
+ *
* @return A CycleIlluminaFileMap with at most the tiles/cycles listed in the parameters
*/
- public CycleIlluminaFileMap getFiles(final List<Integer> tiles, final int [] cycles) {
+ public CycleIlluminaFileMap getFiles(final List<Integer> tiles, final int[] cycles) {
//Remove any cycles that were discovered to be NON-EXISTENT when this util was instantiated
final int[] filteredCycles = removeNonExistentCycles(cycles);
return cycleFileMap.keep(tiles, filteredCycles);
@@ -524,12 +640,12 @@ public class IlluminaFileUtil {
private int[] removeNonExistentCycles(final int[] cycles) {
final TreeSet<Integer> detectedCyclesSet = new TreeSet<Integer>();
- for(final Integer cycle : detectedCycles) {
+ for (final Integer cycle : detectedCycles) {
detectedCyclesSet.add(cycle);
}
final TreeSet<Integer> inputCyclesSet = new TreeSet<Integer>();
- for(final Integer inputCycle : cycles) {
+ for (final Integer inputCycle : cycles) {
inputCyclesSet.add(inputCycle);
}
@@ -538,20 +654,21 @@ public class IlluminaFileUtil {
inputCyclesSet.retainAll(detectedCyclesSet);
outputCycles = new int[inputCyclesSet.size()];
int i = 0;
- for(final Integer element : inputCyclesSet) {
+ for (final Integer element : inputCyclesSet) {
outputCycles[i++] = element;
}
return outputCycles;
}
- public int [] getDetectedCycles() {
+ public int[] getDetectedCycles() {
return detectedCycles;
}
/**
* Discover all files of this type in expectedBase that match pattern and construct a list of tiles
* available based on these files. The same number of tiles is expected in each cycle dir.
+ *
* @return A list of tile integers for all tiles available
*/
public List<Integer> getTiles() {
@@ -572,49 +689,63 @@ public class IlluminaFileUtil {
final CycleIlluminaFileMap cfm = getFiles(expectedTiles, expectedCycles);
final Set<Integer> detectedCycleSet = new HashSet<Integer>();
- for(final Integer cycle : detectedCycles) detectedCycleSet.add(cycle);
+ for (final Integer cycle : detectedCycles) {
+ detectedCycleSet.add(cycle);
+ }
final Set<Integer> missingCycleSet = new TreeSet<Integer>();
- for(final Integer cycle : expectedCycles) missingCycleSet.add(cycle);
+ for (final Integer cycle : expectedCycles) {
+ missingCycleSet.add(cycle);
+ }
missingCycleSet.removeAll(detectedCycleSet);
- for(final Integer tile : expectedTiles) {
+ for (final Integer tile : expectedTiles) {
final CycleFilesIterator cfIterator = cfm.get(tile);
- if( cfIterator == null ) {
+ if (cfIterator == null) {
failures.add("File type " + extension + " is missing tile " + tile);
- } else if( !cfIterator.hasNext()) {
+ } else if (!cfIterator.hasNext()) {
failures.add("File type " + extension + " has 0 cycle files for tile " + tile);
} else {
int expectedCycleIndex = 0;
Long cycleSize = null;
- while(cfIterator.hasNext() && expectedCycleIndex < expectedCycles.length) {
+ while (cfIterator.hasNext() && expectedCycleIndex < expectedCycles.length) {
final int currentCycle = expectedCycles[expectedCycleIndex];
- if(cfIterator.getNextCycle() == currentCycle) {
+ if (cfIterator.getNextCycle() == currentCycle) {
final File cycleFile = cfIterator.next();
- if(!missingCycleSet.contains(currentCycle)) {
- if(!cycleFile.exists()) {
+ if (!missingCycleSet.contains(currentCycle)) {
+ if (!cycleFile.exists()) {
failures.add("Missing file(" + cycleFile.getAbsolutePath() + ")");
- } else if(cycleFile.length() == 0) {
+ } else if (cycleFile.length() == 0) {
failures.add("0 Length tile file(" + cycleFile.getAbsolutePath() + ")");
- } else if(cycleSize == null) {
+ } else if (cycleSize == null) {
cycleSize = cycleFile.length();
} else if (!extension.equals(".bcl.gz") && cycleSize != cycleFile.length()) {
// TODO: The gzip bcl files might not be the same length despite having the same content,
// for now we're punting on this but this should be looked into at some point
- failures.add("File type " + extension + " has cycles files of different length. Current cycle (" + currentCycle + ") " +
- "Length of first non-empty file (" + cycleSize + ") length of current cycle (" + cycleFile.length() + ")" + " File(" + cycleFile.getAbsolutePath() + ")");
+ failures.add("File type " + extension
+ + " has cycles files of different length. Current cycle ("
+ + currentCycle + ") " +
+ "Length of first non-empty file (" + cycleSize
+ + ") length of current cycle (" + cycleFile.length() + ")"
+ + " File(" + cycleFile.getAbsolutePath() + ")");
}
} else {
cfIterator.reset();
- throw new PicardException("Malformed CycleIlluminaFileMap! CycleIlluminaFileMap has cycle " + currentCycle + " even though the directory does not exist! CycleFileIterator(" + CycleIlluminaFileMap.remainingCyclesToString(cfIterator) + ")");
+ throw new PicardException(
+ "Malformed CycleIlluminaFileMap! CycleIlluminaFileMap has cycle "
+ + currentCycle
+ + " even though the directory does not exist! CycleFileIterator("
+ + CycleIlluminaFileMap.remainingCyclesToString(cfIterator) + ")");
}
- } else if(!missingCycleSet.contains(currentCycle)) {
+ } else if (!missingCycleSet.contains(currentCycle)) {
cfIterator.reset();
- throw new PicardException("Malformed CycleIlluminaFileMap! Tile " + tile + "CycleFileIterator(" + CycleIlluminaFileMap.remainingCyclesToString(cfIterator)+ ")");
+ throw new PicardException(
+ "Malformed CycleIlluminaFileMap! Tile " + tile + "CycleFileIterator("
+ + CycleIlluminaFileMap.remainingCyclesToString(cfIterator) + ")");
}
expectedCycleIndex += 1;
@@ -622,38 +753,127 @@ public class IlluminaFileUtil {
}
}
- for(final Integer cycle : missingCycleSet) {
- failures.add("Missing cycle directory " + cycle + " in directory " + base.getAbsolutePath() + " for file type " + extension);
+ for (final Integer cycle : missingCycleSet) {
+ failures.add("Missing cycle directory " + cycle + " in directory " + base.getAbsolutePath()
+ + " for file type " + extension);
+ }
+ }
+
+ return failures;
+ }
+
+ @Override
+ public List<String> fakeFiles(final List<Integer> expectedTiles, final int[] expectedCycles,
+ final SupportedIlluminaFormat format) {
+ final List<String> failures = new LinkedList<String>();
+
+ if (!base.exists()) {
+ base.mkdirs();
+ }
+
+ final Set<Integer> detectedCycleSet = new HashSet<Integer>();
+
+ if (detectedCycles == null) {
+ detectedCycles = new int[0];
+ }
+
+ for (final Integer cycle : detectedCycles) {
+ detectedCycleSet.add(cycle);
+ }
+
+ final Set<Integer> missingCycleSet = new TreeSet<Integer>();
+ for (final Integer cycle : expectedCycles) {
+ missingCycleSet.add(cycle);
+ }
+
+ missingCycleSet.removeAll(detectedCycleSet);
+ for (final Integer cycle : missingCycleSet) {
+ final File cycleDirectory = new File(base, "C" + cycle + ".1");
+ if (cycleDirectory.mkdirs()) {
+ detectedCycleSet.add(cycle);
+ }
+ }
+
+ final CycleIlluminaFileMap cfm = getPerTilePerCycleFiles();
+
+ for (final Integer tile : expectedTiles) {
+ final CycleFilesIterator cfIterator = cfm.get(tile);
+ if (cfIterator == null) {
+ for (final Integer cycle : missingCycleSet) {
+ final File cycleDirectory = new File(base, "C" + cycle + ".1");
+ try {
+ faker.fakeFile(cycleDirectory, tile, lane, extension);
+ } catch (final IOException e) {
+ failures.add(String.format("Could not create fake file %s: %s", tile + extension,
+ e.getMessage()));
+ }
+ }
+ } else if (!cfIterator.hasNext()) {
+ failures.add("File type " + extension + " has 0 cycle files for tile " + tile);
+ } else {
+ int expectedCycleIndex = 0;
+ Long cycleSize = null;
+ while (cfIterator.hasNext() && expectedCycleIndex < expectedCycles.length) {
+ final int currentCycle = expectedCycles[expectedCycleIndex];
+
+ if (cfIterator.getNextCycle() == currentCycle) {
+ final File cycleFile = cfIterator.next();
+
+ if (cycleSize == null) {
+ cycleSize = cycleFile.length();
+ }
+
+ if (!cycleFile.exists() || cycleFile.length() == 0) {
+ try {
+ faker.fakeFile(cycleFile, cycleSize.intValue());
+ } catch (final IOException e) {
+ failures.add("Could not create fake file: " + cycleFile);
+ }
+ }
+ }
+ expectedCycleIndex += 1;
+ }
}
}
+ for (final Integer cycle : missingCycleSet) {
+ failures.add("Missing cycle directory " + cycle + " in directory " + base.getAbsolutePath()
+ + " for file type " + extension);
+ }
return failures;
}
}
- /** QSeq files are really tiled and ended so define it's own nested format since no other file types
- * are structured the same. */
+ /**
+ * QSeq files are really tiled and ended so define it's own nested format since no other file types
+ * are structured the same.
+ */
class QSeqIlluminaFileUtil extends ParameterizedFileUtil {
private final List<Integer> tiles;
private final List<IlluminaFileMap> readFileMaps;
+
public QSeqIlluminaFileUtil() {
- super(UNPARAMETERIZED_QSEQ_PATTERN, makeParameterizedQseqRegex(lane), "_qseq.txt", basecallDir);
+ super(UNPARAMETERIZED_QSEQ_PATTERN, makeParameterizedQseqRegex(lane), "_qseq.txt", basecallDir,
+ new QSeqFileFaker());
readFileMaps = getFiles();
- if(readFileMaps.size() > 0) {
+ if (readFileMaps.size() > 0) {
tiles = Collections.unmodifiableList(new ArrayList<Integer>(readFileMaps.get(0).keySet()));
} else {
tiles = new ArrayList<Integer>();
}
}
- /** Make a qSeq regex string with the lane and end already filled in */
+ /**
+ * Make a qSeq regex string with the lane and end already filled in
+ */
private String makeLaneAndEndSpecificRegex(final int lane, final int end) {
return "^s_" + lane + "_" + end + "_\\d{4}_qseq\\.txt(\\.gz|\\.bz2)?$";
}
/**
* Return the number of ends found in the basecallDir
+ *
* @return The highest end number found among the files in the basecallDir
*/
public int numberOfEnds() {
@@ -662,22 +882,27 @@ public class IlluminaFileUtil {
/**
* Given a file name return it's Lane, Tile, and End information
+ *
* @param fileName The name of a file to analyze
+ *
* @return The lane, tile, and end of the file with the given name
*/
@Override
public LaneTileEnd fileToLaneTileEnd(final String fileName) {
final Matcher matcher = unparameterizedPattern.matcher(fileName);
- if(!matcher.matches()) {
+ if (!matcher.matches()) {
return null;
}
- return new LaneTileEnd(Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(3)), Integer.parseInt(matcher.group(2)));
+ return new LaneTileEnd(Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(3)),
+ Integer.parseInt(matcher.group(2)));
}
/**
* For each tile in tiles with the given end find the corresponding QSeq file. Place that qseq file in an IlluminaFileMap
* and after all tiles are processed, return that fileMap;
+ *
* @param end A single end integer
+ *
* @return A map of tiles->Files where each file is represents the given tile and end
*/
private IlluminaFileMap getFiles(final int end) {
@@ -687,15 +912,16 @@ public class IlluminaFileUtil {
/**
* Return a list of illumina file map, where index 0 contains files for end 1, index 1 contains files for end 2, etc...
+ *
* @return An list of illuminaFileMaps with containing all files for all ends for each given tile
*/
public List<IlluminaFileMap> getFiles() {
final List<IlluminaFileMap> readTileMap = new ArrayList<IlluminaFileMap>();
boolean emptyMap = false;
- for(int i = 1; !emptyMap; i++) {
+ for (int i = 1; !emptyMap; i++) {
final IlluminaFileMap fm = getFiles(i);
- if(fm.isEmpty()) {
+ if (fm.isEmpty()) {
emptyMap = true;
} else {
readTileMap.add(fm);
@@ -707,7 +933,7 @@ public class IlluminaFileUtil {
public List<IlluminaFileMap> getFiles(final List<Integer> tiles) {
final List<IlluminaFileMap> filteredMaps = new ArrayList<IlluminaFileMap>();
- for(final IlluminaFileMap fm : readFileMaps) {
+ for (final IlluminaFileMap fm : readFileMaps) {
filteredMaps.add(fm.keep(tiles));
}
@@ -722,33 +948,79 @@ public class IlluminaFileUtil {
public List<String> verify(final List<Integer> expectedTiles, final int[] expectedCycles) {
final List<String> failures = new LinkedList<String>();
- if(!this.base.exists()) {
+ if (!this.base.exists()) {
failures.add("Base directory( " + this.base.getAbsolutePath() + ") does not exist!");
} else {
final List<IlluminaFileMap> fileMapPerRead = getFiles(expectedTiles);
final int[] qseqReadLengths = new int[numberOfEnds()];
int lastCycle = 0;
- for(int i = 0; i < qseqReadLengths.length; i++) {
+ for (int i = 0; i < qseqReadLengths.length; i++) {
final File currentReadForTile = fileMapPerRead.get(i).get(expectedTiles.get(0));
qseqReadLengths[i] = QseqReadParser.getReadLength(currentReadForTile);
lastCycle += qseqReadLengths[i];
}
- final Range cycleRange = new Range(1,lastCycle);
- for(final int expectedCycle : expectedCycles) {
- if(expectedCycle < cycleRange.start || expectedCycle > cycleRange.end) {
- failures.add("Expected cycle(" + expectedCycle + ") is not within the range provided by available qseqs. " +
- "Min Available Cycle(" + cycleRange.start + ") Max Available Cycle(" + cycleRange.end + ") Length of Qseqs( " + StringUtil.join(", ", qseqReadLengths));
+ final Range cycleRange = new Range(1, lastCycle);
+ for (final int expectedCycle : expectedCycles) {
+ if (expectedCycle < cycleRange.start || expectedCycle > cycleRange.end) {
+ failures.add("Expected cycle(" + expectedCycle
+ + ") is not within the range provided by available qseqs. " +
+ "Min Available Cycle(" + cycleRange.start + ") Max Available Cycle("
+ + cycleRange.end + ") Length of Qseqs( " + StringUtil.join(", ", qseqReadLengths));
}
}
//ensure that those same ends exist for each expectedTile
- for(int i = 1; i < expectedTiles.size(); i++) {
+ for (int i = 1; i < expectedTiles.size(); i++) {
final Integer tile = expectedTiles.get(i);
- for(int j = 0; j < qseqReadLengths.length; j++) {
+ for (int j = 0; j < qseqReadLengths.length; j++) {
final File currentReadForTile = fileMapPerRead.get(j).get(tile);
- if(currentReadForTile == null || !currentReadForTile.exists()) {
- failures.add("Missing file " + "s_" + lane + "_" + (j+1) + "_" + longTileStr(tile) + "_qseq.txt");
+ if (currentReadForTile == null || !currentReadForTile.exists()) {
+ failures.add("Missing file " + "s_" + lane + "_" + (j + 1) + "_" + longTileStr(tile)
+ + "_qseq.txt");
+ }
+ }
+ }
+ }
+
+ return failures;
+ }
+
+ @Override
+ public List<String> fakeFiles(final List<Integer> expectedTiles, final int[] expectedCycles,
+ final SupportedIlluminaFormat format) {
+ final List<String> failures = new LinkedList<String>();
+
+ if (!this.base.exists()) {
+ failures.add("Base directory( " + this.base.getAbsolutePath() + ") does not exist!");
+ } else {
+ final List<IlluminaFileMap> fileMapPerRead = getFiles(expectedTiles);
+ final int[] qseqReadLengths = new int[numberOfEnds()];
+ int lastCycle = 0;
+ for (int i = 0; i < qseqReadLengths.length; i++) {
+ final File currentReadForTile = fileMapPerRead.get(i).get(expectedTiles.get(0));
+ qseqReadLengths[i] = QseqReadParser.getReadLength(currentReadForTile);
+ lastCycle += qseqReadLengths[i];
+ }
+
+ final Range cycleRange = new Range(1, lastCycle);
+ for (final int expectedCycle : expectedCycles) {
+ if (expectedCycle < cycleRange.start || expectedCycle > cycleRange.end) {
+ failures.add("Expected cycle(" + expectedCycle
+ + ") is not within the range provided by available qseqs. " +
+ "Min Available Cycle(" + cycleRange.start + ") Max Available Cycle("
+ + cycleRange.end + ") Length of Qseqs( " + StringUtil.join(", ", qseqReadLengths));
+ }
+ }
+
+ //ensure that those same ends exist for each expectedTile
+ for (int i = 1; i < expectedTiles.size(); i++) {
+ final Integer tile = expectedTiles.get(i);
+ for (int j = 0; j < qseqReadLengths.length; j++) {
+ final File currentReadForTile = fileMapPerRead.get(j).get(tile);
+ if (currentReadForTile == null || !currentReadForTile.exists()) {
+ failures.add("Missing file " + "s_" + lane + "_" + (j + 1) + "_" + longTileStr(tile)
+ + "_qseq.txt");
}
}
}
@@ -768,11 +1040,11 @@ public class IlluminaFileUtil {
*/
abstract class MultiTileFileUtil<OUTPUT_RECORD extends IlluminaData> extends ParameterizedFileUtil {
protected final File bci;
- protected final TileIndex tileIndex;
- protected final File dataFile;
+ protected TileIndex tileIndex;
+ protected File dataFile;
- MultiTileFileUtil(final String extension, final File base, final File bciDir) {
- super(makeLaneRegex(extension), makeLaneRegex(extension, lane), extension, base);
+ MultiTileFileUtil(final String extension, final File base, final File bciDir, final FileFaker fileFaker) {
+ super(makeLaneRegex(extension), makeLaneRegex(extension, lane), extension, base, fileFaker);
bci = new File(bciDir, "s_" + lane + ".bci");
if (bci.exists()) {
tileIndex = new TileIndex(bci);
@@ -780,9 +1052,13 @@ public class IlluminaFileUtil {
tileIndex = null;
}
final File[] filesMatchingRegexp = IoUtil.getFilesMatchingRegexp(base, pattern);
- if (filesMatchingRegexp == null || filesMatchingRegexp.length == 0) dataFile = null;
- else if (filesMatchingRegexp.length == 1) dataFile = filesMatchingRegexp[0];
- else throw new PicardException("More than one filter file found in " + base.getAbsolutePath());
+ if (filesMatchingRegexp == null || filesMatchingRegexp.length == 0) {
+ dataFile = null;
+ } else if (filesMatchingRegexp.length == 1) {
+ dataFile = filesMatchingRegexp[0];
+ } else {
+ throw new PicardException("More than one filter file found in " + base.getAbsolutePath());
+ }
}
@Override
@@ -797,7 +1073,9 @@ public class IlluminaFileUtil {
@Override
public List<Integer> getTiles() {
- if (tileIndex == null) return Collections.EMPTY_LIST;
+ if (tileIndex == null) {
+ return Collections.EMPTY_LIST;
+ }
return tileIndex.getTiles();
}
@@ -812,6 +1090,29 @@ public class IlluminaFileUtil {
return tileIndex.verify(expectedTiles);
}
+ @Override
+ public List<String> fakeFiles(final List<Integer> expectedTiles, final int[] expectedCycles,
+ final SupportedIlluminaFormat format) {
+ //we need to fake a bci file for the tile index
+ final BciFileFaker bciFileFaker = new BciFileFaker();
+ try {
+ bciFileFaker.fakeBciFile(bci, expectedTiles);
+ tileIndex = new TileIndex(bci);
+ faker.fakeFile(base, expectedTiles, lane, extension);
+ final File[] filesMatchingRegexp = IoUtil.getFilesMatchingRegexp(base, pattern);
+ if (filesMatchingRegexp == null || filesMatchingRegexp.length == 0) {
+ dataFile = null;
+ } else if (filesMatchingRegexp.length == 1) {
+ dataFile = filesMatchingRegexp[0];
+ } else {
+ throw new PicardException("More than one filter file found in " + base.getAbsolutePath());
+ }
+ } catch (final IOException e) {
+ return Collections.singletonList("Could not create tile index file: " + bci.getAbsolutePath());
+ }
+ return tileIndex.verify(expectedTiles);
+ }
+
abstract IlluminaParser<OUTPUT_RECORD> makeParser(List<Integer> requestedTiles);
}
@@ -821,7 +1122,7 @@ public class IlluminaFileUtil {
* @param basecallLaneDir location of .filter file and also .bci file
*/
MultiTileFilterFileUtil(final File basecallLaneDir) {
- super(".filter", basecallLaneDir, basecallLaneDir);
+ super(".filter", basecallLaneDir, basecallLaneDir, new FilterFileFaker());
}
@Override
@@ -833,7 +1134,7 @@ public class IlluminaFileUtil {
class MultiTileLocsFileUtil extends MultiTileFileUtil<PositionalData> {
MultiTileLocsFileUtil(final File basecallLaneDir, final File bciDir) {
- super(".locs", basecallLaneDir, bciDir);
+ super(".locs", basecallLaneDir, bciDir, new MultiTileLocsFileFaker());
}
@Override
@@ -853,7 +1154,8 @@ public class IlluminaFileUtil {
MultiTileBclFileUtil(final File basecallLaneDir) {
// Since these file names do not contain lane number, first two args to ctor are the same.
- super("^(\\d{4}).bcl.bgzf$", "^(\\d{4}).bcl.bgzf$", ".bcl.bgzf", basecallLaneDir);
+ super("^(\\d{4}).bcl.bgzf$", "^(\\d{4}).bcl.bgzf$", ".bcl.bgzf", basecallLaneDir,
+ new MultiTileBclFileFaker());
this.basecallLaneDir = basecallLaneDir;
bci = new File(basecallLaneDir, "s_" + lane + ".bci");
// Do this once rather than when deciding if these files exist and again later.
@@ -873,22 +1175,27 @@ public class IlluminaFileUtil {
}
- public CycleIlluminaFileMap getFiles(final List<Integer> tiles, final int [] cycles) {
+ public CycleIlluminaFileMap getFiles(final List<Integer> tiles, final int[] cycles) {
// Filter input list of cycles according to which actually exist
final ArrayList<Integer> goodCycleList = new ArrayList<Integer>(cycles.length);
for (final int cycle : cycles) {
- if (cycleFileMap.containsKey(cycle)) goodCycleList.add(cycle);
+ if (cycleFileMap.containsKey(cycle)) {
+ goodCycleList.add(cycle);
+ }
}
// Ensure cycles are sorted.
Collections.sort(goodCycleList);
final int[] goodCycles = new int[goodCycleList.size()];
- for (int i = 0; i < goodCycles.length; ++i) goodCycles[i] = goodCycleList.get(i);
+ for (int i = 0; i < goodCycles.length; ++i) {
+ goodCycles[i] = goodCycleList.get(i);
+ }
// Create the map.
final CycleIlluminaFileMap cycledMap = new CycleIlluminaFileMap();
if (goodCycles.length > 0) {
- for(final int tile : tiles) {
- cycledMap.put(tile, new MultiTileBclCycleFilesIterator(basecallLaneDir, lane, tile, goodCycles, extension));
+ for (final int tile : tiles) {
+ cycledMap.put(tile,
+ new MultiTileBclCycleFilesIterator(basecallLaneDir, lane, tile, goodCycles, extension));
}
}
return cycledMap;
@@ -896,7 +1203,7 @@ public class IlluminaFileUtil {
@Override
public boolean filesAvailable() {
- return bci.exists() && cycleFileMap.size()> 0;
+ return bci.exists() && cycleFileMap.size() > 0;
}
@Override
@@ -907,7 +1214,9 @@ public class IlluminaFileUtil {
@Override
public List<Integer> getTiles() {
- if (tileIndex == null) return Collections.EMPTY_LIST;
+ if (tileIndex == null) {
+ return Collections.EMPTY_LIST;
+ }
return tileIndex.getTiles();
}
@@ -924,9 +1233,26 @@ public class IlluminaFileUtil {
}
return ret;
}
+
+ @Override
+ public List<String> fakeFiles(final List<Integer> expectedTiles, final int[] expectedCycles,
+ final SupportedIlluminaFormat format) {
+ if (tileIndex == null) {
+ return Collections.singletonList("Tile index(" + bci.getAbsolutePath() + ") does not exist!");
+ }
+ final List<String> ret = tileIndex.verify(expectedTiles);
+ for (final int expectedCycle : expectedCycles) {
+ if (!cycleFileMap.containsKey(expectedCycle)) {
+ ret.add(expectedCycle + ".bcl.bgzf not found in " + base);
+ }
+ }
+ return ret;
+ }
}
- /** A support class for return lane tile and end information for a given file */
+ /**
+ * A support class for return lane tile and end information for a given file
+ */
static class LaneTileEnd {
public final Integer lane;
public final Integer tile;
@@ -935,7 +1261,7 @@ public class IlluminaFileUtil {
public LaneTileEnd(final Integer lane, final Integer tile, final Integer end) {
this.lane = lane;
this.tile = tile;
- this.end = end;
+ this.end = end;
}
public LaneTileEnd(final Integer lane, final Integer tile) {
@@ -943,12 +1269,16 @@ public class IlluminaFileUtil {
}
}
- /** Return a regex string for finding Lane and Tile given a file extension pattern */
+ /**
+ * Return a regex string for finding Lane and Tile given a file extension pattern
+ */
public static String makeLTRegex(final String fileNameEndPattern) {
return "^" + UNPARAMETERIZED_PER_TILE_PATTERN + fileNameEndPattern + "$";
}
- /** Return a regex string for finding Lane and Tile given a file extension pattern */
+ /**
+ * Return a regex string for finding Lane and Tile given a file extension pattern
+ */
private static String makeLTRegex(final String fileNameEndPattern, final int lane) {
return "^" + makeParameterizedLaneAndTileRegex(lane) + fileNameEndPattern + "$";
}
@@ -962,34 +1292,36 @@ public class IlluminaFileUtil {
}
private static int getCycleFromDir(final File tempCycleDir) {
- final char [] name = tempCycleDir.getName().toCharArray();
- if(name[0] != 'C') {
+ final char[] name = tempCycleDir.getName().toCharArray();
+ if (name[0] != 'C') {
throw new PicardException("Invalid cycle directory name " + tempCycleDir.getName());
}
String intStr = "";
boolean periodFound = false;
- for(int i = 1; i < name.length && !periodFound; i++) {
- if(name[i] == '.') {
+ for (int i = 1; i < name.length && !periodFound; i++) {
+ if (name[i] == '.') {
periodFound = true;
- } else if(name[i] == '1' || name[i] == '2' || name[i] == '3' ||
- name[i] == '4' || name[i] == '5' || name[i] == '6' ||
- name[i] == '7' || name[i] == '8' || name[i] == '9' ||
- name[i] == '0') {
+ } else if (name[i] == '1' || name[i] == '2' || name[i] == '3' ||
+ name[i] == '4' || name[i] == '5' || name[i] == '6' ||
+ name[i] == '7' || name[i] == '8' || name[i] == '9' ||
+ name[i] == '0') {
intStr += name[i];
} else {
- throw new PicardException("Invalid cycle directory name " + tempCycleDir.getAbsolutePath());
+ throw new PicardException("Invalid cycle directory name " + tempCycleDir.getAbsolutePath());
}
}
return Integer.parseInt(intStr);
}
- /** Given a pattern and file name return a LaneTileEnd with the first two matches to the pattern returned
- * as the lane and tile respectively */
+ /**
+ * Given a pattern and file name return a LaneTileEnd with the first two matches to the pattern returned
+ * as the lane and tile respectively
+ */
private static LaneTileEnd laneAndTileFromFirstTwoMatches(final String fileName, final Pattern pattern) {
final Matcher matcher = pattern.matcher(fileName);
- if(!matcher.matches()) {
+ if (!matcher.matches()) {
return null;
}
return new LaneTileEnd(Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(2)));
@@ -997,14 +1329,16 @@ public class IlluminaFileUtil {
/**
* Return a string representing the Lane in the format "L00<lane>"
+ *
* @param lane The lane to transform
+ *
* @return A long string representation of the name
*/
private static String longLaneStr(final int lane) {
String lstr = String.valueOf(lane);
final int zerosToAdd = 3 - lstr.length();
- for(int i = 0; i < zerosToAdd; i++) {
+ for (int i = 0; i < zerosToAdd; i++) {
lstr = "0" + lstr;
}
return "L" + lstr;
@@ -1012,27 +1346,32 @@ public class IlluminaFileUtil {
/**
* Return a string representing the Lane in the format "000<tile>"
+ *
* @param tile The tile to transform
+ *
* @return A long string representation of the name
*/
private static String longTileStr(final int tile) {
String tstr = String.valueOf(tile);
final int zerosToAdd = 4 - tstr.length();
- for(int i = 0; i < zerosToAdd; i++) {
+ for (int i = 0; i < zerosToAdd; i++) {
tstr = "0" + tstr;
}
return tstr;
}
- /** Return all files that match pattern of the given file type in the given base directory */
- private static IlluminaFileMap getTiledFiles(final File baseDirectory, final Pattern pattern, final ParameterizedFileUtil ift) {
+ /**
+ * Return all files that match pattern of the given file type in the given base directory
+ */
+ private static IlluminaFileMap getTiledFiles(final File baseDirectory, final Pattern pattern,
+ final ParameterizedFileUtil ift) {
final IlluminaFileMap fileMap = new IlluminaFileMap();
- if(baseDirectory.exists()) {
+ if (baseDirectory.exists()) {
IoUtil.assertDirectoryIsReadable(baseDirectory);
- final File [] files = IoUtil.getFilesMatchingRegexp(baseDirectory, pattern);
- for(final File file : files) {
- if(file.length() > 0) {
+ final File[] files = IoUtil.getFilesMatchingRegexp(baseDirectory, pattern);
+ for (final File file : files) {
+ if (file.length() > 0) {
final LaneTileEnd lt = ift.fileToLaneTileEnd(file.getName());
fileMap.put(lt.tile, file);
}
@@ -1042,9 +1381,11 @@ public class IlluminaFileUtil {
return fileMap;
}
- /** For filename patterns that end with .txt tack on the option .gz extension */
+ /**
+ * For filename patterns that end with .txt tack on the option .gz extension
+ */
private static String processTxtExtension(final String fileNameEndPattern) {
- if(fileNameEndPattern.endsWith(".txt")) {
+ if (fileNameEndPattern.endsWith(".txt")) {
return fileNameEndPattern + "(\\.gz|\\.bz2)?";
} else {
return fileNameEndPattern;
@@ -1053,11 +1394,12 @@ public class IlluminaFileUtil {
private String liToStr(final List<Integer> intList) {
- if(intList.size() == 0)
+ if (intList.size() == 0) {
return "";
+ }
String summary = String.valueOf(intList.get(0));
- for(int i = 1; i < intList.size(); i++) {
+ for (int i = 1; i < intList.size(); i++) {
summary += ", " + String.valueOf(intList.get(i));
}
@@ -1084,7 +1426,7 @@ public class IlluminaFileUtil {
* We want to be able to predetermine if the BCL files are gzipped or not and we also want to verify
* that all of the files are the same. Look through all of the cycle dirs in this lane and grab all
* BCL (gzipped or not) files in the tree. Determine the exension and then verify that they're all the same.
- *
+ * <p/>
* If there are no BCL files, return the standard extension (i.e. ".bcl") to conserve backwards compatibility
*/
private String inferBclExtension(final File laneDir) {
@@ -1109,7 +1451,8 @@ public class IlluminaFileUtil {
for (final File bclFile : allBclFiles) {
if (!bclFile.getPath().endsWith(bclExtension)) {
- throw new PicardException("Not all BCL files in " + laneDir.getAbsolutePath() + " have the same extension!");
+ throw new PicardException(
+ "Not all BCL files in " + laneDir.getAbsolutePath() + " have the same extension!");
}
}
}
diff --git a/src/java/net/sf/picard/illumina/parser/IlluminaMetricsCode.java b/src/java/net/sf/picard/illumina/parser/IlluminaMetricsCode.java
new file mode 100644
index 0000000..30f93a6
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/IlluminaMetricsCode.java
@@ -0,0 +1,42 @@
+package net.sf.picard.illumina.parser;
+
+/**
+ * Illumina's TileMetricsOut.bin file codes various metrics, both concrete (all density id's are code 100) or as a base code
+ * (e.g. phasing values are computed from a base of 200).
+ *
+ * @author jgentry
+ */
+public enum IlluminaMetricsCode {
+ DENSITY_ID(100),
+ CLUSTER_ID(102),
+ PHASING_BASE(200),
+ PREPHASING_BASE(201);
+
+ private final int metricsCode;
+
+ IlluminaMetricsCode(final int metricsCode) {
+ this.metricsCode = metricsCode;
+ }
+
+ /**
+ * Phasing codes are between 200 and 299 (inclusive). Phasing codes are defined as being
+ * (200 + ((N - 1) * 2)) for (a 0-based) read descriptor N (i.e., 200, 202, 204, etc.) Prephasing codes are defined
+ * as being (201 + ((N - 1) * 2)) for read descriptor N (i.e., 201, 203, 205, etc.). So for a 101T8B101T read
+ * structure, there will be phasing codes of 200, 202 and 204 and prephasing codes of 201, 203, 205.
+ */
+ public static int getPhasingCode(final int readDescriptorIndex, final IlluminaMetricsCode phasingType) {
+ if (!isPhasing(phasingType)) {
+ throw new IllegalArgumentException("phasingType must be PHASING_BASE or PREPHASING_BASE");
+ }
+
+ return (phasingType.getMetricsCode() + (readDescriptorIndex * 2));
+ }
+
+ public static boolean isPhasing(final IlluminaMetricsCode metricsCode) {
+ return (metricsCode.equals(PHASING_BASE) || metricsCode.equals(PREPHASING_BASE));
+ }
+
+ public int getMetricsCode() {
+ return metricsCode;
+ }
+}
diff --git a/src/java/net/sf/picard/illumina/parser/MultiTileBclParser.java b/src/java/net/sf/picard/illumina/parser/MultiTileBclParser.java
index d8c71e2..fd2fd93 100644
--- a/src/java/net/sf/picard/illumina/parser/MultiTileBclParser.java
+++ b/src/java/net/sf/picard/illumina/parser/MultiTileBclParser.java
@@ -66,7 +66,7 @@ public class MultiTileBclParser extends BclParser {
tileIndex.getFile().getAbsolutePath(), tileIndex.getNumTiles(), bclIndexReader.getBciFile().getAbsolutePath(), bclIndexReader.getNumTiles()));
}
- final BclReader bclReader = new BclReader(file, bclQualityEvaluationStrategy);
+ final BclReader bclReader = BclReader.makeSeekable(file, bclQualityEvaluationStrategy);
bclReader.seek(bclIndexReader.get(tileIndexRecord.zeroBasedTileNumber));
return new CountLimitedIterator(bclReader, tileIndexRecord.numClustersInTile);
diff --git a/src/java/net/sf/picard/illumina/parser/PerTilePerCycleParser.java b/src/java/net/sf/picard/illumina/parser/PerTilePerCycleParser.java
index df75751..dd600d7 100644
--- a/src/java/net/sf/picard/illumina/parser/PerTilePerCycleParser.java
+++ b/src/java/net/sf/picard/illumina/parser/PerTilePerCycleParser.java
@@ -145,7 +145,9 @@ abstract class PerTilePerCycleParser<ILLUMINA_DATA extends IlluminaData> impleme
}
final ILLUMINA_DATA data = makeData(outputMapping.getOutputReadLengths());
- for(int i = 0; i < outputMapping.getTotalOutputCycles(); i++) {
+ final int totalOutputCycles = outputMapping.getTotalOutputCycles();
+
+ for(int i = 0; i < totalOutputCycles; ++i) {
cycleFileParsers.get(i).next(data);
}
diff --git a/src/java/net/sf/picard/illumina/parser/Tile.java b/src/java/net/sf/picard/illumina/parser/Tile.java
new file mode 100644
index 0000000..6aff9ad
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/Tile.java
@@ -0,0 +1,114 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package net.sf.picard.illumina.parser;
+
+import net.sf.samtools.util.CollectionUtil;
+
+import java.util.*;
+
+/** Represents a tile from TileMetricsOut.bin. Stores information on location (lane & tile #, density, number of clusters and the
+ * phasing/prephasing values associated with this tile
+ *
+ * @author jgentry
+ */
+public class Tile {
+ private final int lane, tile;
+ private final float density, clusters;
+
+ private final Map<TileTemplateRead, Float> phasingMap;
+ private final Map<TileTemplateRead, Float> prePhasingMap;
+
+ /**
+ * @param tilePhasingValues Either one or two TilePhasingValues, corresponding to the FIRST and potentially SECOND template reads
+ */
+ public Tile(final int lane, final int tile, final float density, final float clusters, final TilePhasingValue... tilePhasingValues) {
+ this.lane = lane;
+ this.tile = tile;
+ this.density = density;
+ this.clusters = clusters;
+
+ final Collection<TilePhasingValue> phasingValues = ensureSoleTilePhasingValuesPerRead(Arrays.asList(tilePhasingValues));
+
+ final Map<TileTemplateRead, Float> phasingMap = new HashMap<TileTemplateRead, Float>();
+ final Map<TileTemplateRead, Float> prePhasingMap = new HashMap<TileTemplateRead, Float>();
+
+ /** For each of the TileReads, assign their phasing & prephasing values to the respective maps, which we will
+ * use later to calculate the medians
+ */
+ for (final TilePhasingValue phasingValue : phasingValues) {
+ phasingMap.put(phasingValue.getTileTemplateRead(), phasingValue.getPhasingValue());
+ prePhasingMap.put(phasingValue.getTileTemplateRead(), phasingValue.getPrePhasingValue());
+ }
+
+ this.phasingMap = Collections.unmodifiableMap(phasingMap);
+ this.prePhasingMap = Collections.unmodifiableMap(prePhasingMap);
+ }
+
+ /** Returns the number of this tile's parent lane. */
+ public int getLaneNumber() {
+ return lane;
+ }
+
+ /** Returns the number/name of this tile. */
+ public int getTileNumber() {
+ return tile;
+ }
+
+ /** Returns the cluster density of this tile, in units of [cluster/mm^2]. */
+ public float getClusterDensity() {
+ return density;
+ }
+
+ /** Returns the number of on this tile. */
+ public float getClusterCount() {
+ return clusters;
+ }
+
+ public Map<TileTemplateRead, Float> getPhasingMap() {
+ return phasingMap;
+ }
+
+ public Map<TileTemplateRead, Float> getPrePhasingMap() {
+ return prePhasingMap;
+ }
+
+ /** For any given TileTemplateRead, we want to make sure that there is only a single TilePhasingValue */
+ private static Collection<TilePhasingValue> ensureSoleTilePhasingValuesPerRead(final Collection<TilePhasingValue> tilePhasingValues) {
+ final Map<TileTemplateRead, Collection<TilePhasingValue>> partitionedMap = CollectionUtil.partition(tilePhasingValues,
+ new CollectionUtil.Partitioner<TilePhasingValue, TileTemplateRead>() {
+ @Override
+ public TileTemplateRead getPartition(final TilePhasingValue phasingValue) {
+ return phasingValue.getTileTemplateRead();
+ }
+ });
+
+ final Collection<TilePhasingValue> newTilePhasingValues = new LinkedList<TilePhasingValue>();
+ for (final TileTemplateRead read : partitionedMap.keySet()) {
+ newTilePhasingValues.add(CollectionUtil.getSoleElement(partitionedMap.get(read)));
+ }
+
+ return newTilePhasingValues;
+ }
+}
diff --git a/src/java/net/sf/picard/illumina/parser/TileIndex.java b/src/java/net/sf/picard/illumina/parser/TileIndex.java
index 0750b7c..28b0ffc 100644
--- a/src/java/net/sf/picard/illumina/parser/TileIndex.java
+++ b/src/java/net/sf/picard/illumina/parser/TileIndex.java
@@ -26,6 +26,7 @@ package net.sf.picard.illumina.parser;
import net.sf.picard.PicardException;
import net.sf.samtools.Defaults;
import net.sf.samtools.util.CloserUtil;
+import net.sf.samtools.util.IOUtil;
import java.io.*;
import java.nio.ByteBuffer;
@@ -45,7 +46,7 @@ class TileIndex implements Iterable<TileIndex.TileIndexRecord> {
TileIndex(final File tileIndexFile) {
try {
this.tileIndexFile = tileIndexFile;
- final InputStream is = new BufferedInputStream(new FileInputStream(tileIndexFile), Defaults.BUFFER_SIZE);
+ final InputStream is = IOUtil.maybeBufferInputStream(new FileInputStream(tileIndexFile));
final ByteBuffer buf = ByteBuffer.allocate(8);
buf.order(ByteOrder.LITTLE_ENDIAN);
int absoluteRecordIndex = 0;
diff --git a/src/java/net/sf/picard/illumina/parser/TileMetricsUtil.java b/src/java/net/sf/picard/illumina/parser/TileMetricsUtil.java
index dd6b54f..6e0bad6 100644
--- a/src/java/net/sf/picard/illumina/parser/TileMetricsUtil.java
+++ b/src/java/net/sf/picard/illumina/parser/TileMetricsUtil.java
@@ -1,8 +1,33 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
package net.sf.picard.illumina.parser;
import net.sf.picard.PicardException;
import net.sf.picard.illumina.parser.readers.TileMetricsOutReader;
import net.sf.picard.illumina.parser.readers.TileMetricsOutReader.IlluminaTileMetrics;
+import net.sf.picard.util.IterableAdapter;
import net.sf.samtools.util.CollectionUtil;
import java.io.File;
@@ -15,9 +40,6 @@ import java.util.*;
* @author mccowan
*/
public class TileMetricsUtil {
- private final static Integer DENSITY_ID_CODE = 100;
- private final static Integer CLUSTER_ID_CODE = 102;
-
/** The path to the directory containing the tile metrics file relative to the basecalling directory. */
public static String INTEROP_SUBDIRECTORY_NAME = "InterOp";
@@ -30,79 +52,103 @@ public class TileMetricsUtil {
}
/**
- * Returns an unmodifiable collection of tile data read from the provided file.
+ * Returns an unmodifiable collection of tile data read from the provided file. For each tile we will extract:
+ * - lane number
+ * - tile number
+ * - density
+ * - cluster ID
+ * - Phasing & Prephasing for first template read (if available)
+ * - Phasing & Prephasing for second template read (if available)
*/
- public static Collection<Tile> parseTileMetrics(final File tileMetricsOutFile) throws FileNotFoundException {
-
- // Discard duplicate tile data entries (which has caused problems) via Set.
- final Set<IlluminaTileMetrics> metrics =
- new HashSet<IlluminaTileMetrics>(CollectionUtil.makeCollection(new TileMetricsOutReader(tileMetricsOutFile)));
- final Map<String, Collection<IlluminaTileMetrics>> locationToMetricsMap = CollectionUtil.partition(metrics, new CollectionUtil.Partitioner<IlluminaTileMetrics, String>() {
- @Override
- public String getPartition(final IlluminaTileMetrics metric) {
- return renderMetricLocationKey(metric);
- }
- });
+ public static Collection<Tile> parseTileMetrics(final File tileMetricsOutFile, final ReadStructure readStructure) throws FileNotFoundException {
+ // Get the tile metrics lines from TileMetricsOut, keeping only the last value for any Lane/Tile/Code combination
+ final Collection<IlluminaTileMetrics> tileMetrics = determineLastValueForLaneTileMetricsCode(new TileMetricsOutReader
+ (tileMetricsOutFile));
+ // Collect the tiles by lane & tile, and then collect the metrics by lane
+ final Map<String, Collection<IlluminaTileMetrics>> locationToMetricsMap = partitionTileMetricsByLocation(tileMetrics);
final Collection<Tile> tiles = new LinkedList<Tile>();
for (final Map.Entry<String, Collection<IlluminaTileMetrics>> entry : locationToMetricsMap.entrySet()) {
final Collection<IlluminaTileMetrics> tileRecords = entry.getValue();
- final Map<Integer, Collection<IlluminaTileMetrics>> codeMetricsMap = CollectionUtil.partition(tileRecords, new CollectionUtil.Partitioner<IlluminaTileMetrics, Integer>() {
- @Override
- public Integer getPartition(final IlluminaTileMetrics metric) {
- return metric.getMetricCode();
- }
- });
+
+ // Get a mapping from metric code number to the corresponding IlluminaTileMetrics
+ final Map<Integer, Collection<IlluminaTileMetrics>> codeMetricsMap = partitionTileMetricsByCode(tileRecords);
+
final Set<Integer> observedCodes = codeMetricsMap.keySet();
- if (!(observedCodes.contains(DENSITY_ID_CODE) && observedCodes.contains(CLUSTER_ID_CODE)))
- throw new PicardException(String.format("Expected to find cluster and density record codes (%s and %s) in records read for tile location %s (lane:tile), but found only %s.", CLUSTER_ID_CODE, DENSITY_ID_CODE, entry.getKey(), observedCodes));
+ if (!(observedCodes.contains(IlluminaMetricsCode.DENSITY_ID.getMetricsCode()) && observedCodes.contains(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode())))
+ throw new PicardException(String.format("Expected to find cluster and density record codes (%s and %s) in records read for tile location %s (lane:tile), but found only %s.",
+ IlluminaMetricsCode.CLUSTER_ID.getMetricsCode(), IlluminaMetricsCode.DENSITY_ID.getMetricsCode(), entry.getKey(), observedCodes));
+
+ final IlluminaTileMetrics densityRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.DENSITY_ID.getMetricsCode()));
+ final IlluminaTileMetrics clusterRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(IlluminaMetricsCode.CLUSTER_ID.getMetricsCode()));
+
+ // Snag the phasing data for each read in the read structure. For both types of phasing values, this is the median of all of the individual values seen
+ final Collection<TilePhasingValue> tilePhasingValues = getTilePhasingValues(codeMetricsMap, readStructure);
- final IlluminaTileMetrics densityRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(DENSITY_ID_CODE));
- final IlluminaTileMetrics clusterRecord = CollectionUtil.getSoleElement(codeMetricsMap.get(CLUSTER_ID_CODE));
- tiles.add(new Tile(densityRecord.getLaneNumber(), densityRecord.getTileNumber(), densityRecord.getMetricValue(), clusterRecord.getMetricValue()));
+ tiles.add(new Tile(densityRecord.getLaneNumber(), densityRecord.getTileNumber(), densityRecord.getMetricValue(), clusterRecord.getMetricValue(),
+ tilePhasingValues.toArray(new TilePhasingValue[tilePhasingValues.size()])));
}
return Collections.unmodifiableCollection(tiles);
}
-
-
- private static String renderMetricLocationKey(final IlluminaTileMetrics metric) {
- return String.format("%s:%s", metric.getLaneNumber(), metric.getTileNumber());
- }
+ /** Pulls out the phasing & prephasing value for the template reads and returns a collection of TilePhasingValues representing these */
+ private static Collection<TilePhasingValue> getTilePhasingValues(final Map<Integer, Collection<IlluminaTileMetrics>> codeMetricsMap, final ReadStructure readStructure) {
+ boolean isFirstRead = true;
+ final Collection<TilePhasingValue> tilePhasingValues = new ArrayList<TilePhasingValue>();
+ for (int descriptorIndex = 0; descriptorIndex < readStructure.descriptors.size(); descriptorIndex++) {
+ if (readStructure.descriptors.get(descriptorIndex).type == ReadType.Template) {
+ final TileTemplateRead tileTemplateRead = isFirstRead ? TileTemplateRead.FIRST : TileTemplateRead.SECOND;
+ // For both phasing & prephasing, pull out the value and create a TilePhasingValue for further processing
+ final int phasingCode = IlluminaMetricsCode.getPhasingCode(descriptorIndex, IlluminaMetricsCode.PHASING_BASE);
+ final int prePhasingCode = IlluminaMetricsCode.getPhasingCode(descriptorIndex, IlluminaMetricsCode.PREPHASING_BASE);
+
+ if (!(codeMetricsMap.containsKey(phasingCode) && codeMetricsMap.containsKey(prePhasingCode))) {
+ throw new PicardException("Don't have both phasing and prephasing values for tile");
+ }
- /**
- * Describes a tile.
- */
- public static class Tile {
- private final int lane, tile;
- private final float density, clusters;
-
- protected Tile(final int lane, final int tile, final float density, final float clusters) {
- this.lane = lane;
- this.tile = tile;
- this.density = density;
- this.clusters = clusters;
+ tilePhasingValues.add(new TilePhasingValue(tileTemplateRead,
+ CollectionUtil.getSoleElement(codeMetricsMap.get(phasingCode)).getMetricValue(),
+ CollectionUtil.getSoleElement(codeMetricsMap.get(prePhasingCode)).getMetricValue()));
+ isFirstRead = false;
+ }
}
- /** Returns the number of this tile's parent lane. */
- public int getLaneNumber() {
- return lane;
- }
+ return tilePhasingValues;
+ }
- /** Returns the number/name of this tile. */
- public int getTileNumber() {
- return tile;
+ /** According to Illumina, for every lane/tile/code combination they will only use the last value. Filter out the previous values */
+ private static Collection<IlluminaTileMetrics> determineLastValueForLaneTileMetricsCode(final Iterator<IlluminaTileMetrics>
+ tileMetricsIterator) {
+ final Map<TileMetricsOutReader.IlluminaLaneTileCode, IlluminaTileMetrics> filteredTileMetrics = new HashMap<TileMetricsOutReader.IlluminaLaneTileCode, IlluminaTileMetrics>();
+ for (final IlluminaTileMetrics illuminaTileMetrics : new IterableAdapter<IlluminaTileMetrics>(tileMetricsIterator)) {
+ filteredTileMetrics.put(illuminaTileMetrics.getLaneTileCode(), illuminaTileMetrics);
}
- /** Returns the cluster density of this tile, in units of [cluster/mm^2]. */
- public float getClusterDensity() {
- return density;
- }
+ return filteredTileMetrics.values();
+ }
- /** Returns the number of on this tile. */
- public float getClusterCount() {
- return clusters;
- }
+ private static String renderMetricLocationKey(final IlluminaTileMetrics metric) {
+ return String.format("%s:%s", metric.getLaneNumber(), metric.getTileNumber());
+ }
+
+ // Wrapper around CollectionUtil.Partitioner, purely to de-bulk the actual methods
+ private static Map<Integer, Collection<IlluminaTileMetrics>> partitionTileMetricsByCode(final Collection<IlluminaTileMetrics> tileMetrics) {
+ return CollectionUtil.partition(tileMetrics, new CollectionUtil.Partitioner<IlluminaTileMetrics, Integer>() {
+ @Override
+ public Integer getPartition(final IlluminaTileMetrics metric) {
+ return metric.getMetricCode();
+ }
+ });
+ }
+
+ // Wrapper around CollectionUtil.Partitioner, purely to de-bulk the actual methods
+ private static Map<String, Collection<IlluminaTileMetrics>> partitionTileMetricsByLocation(final Collection<IlluminaTileMetrics> tileMetrics) {
+ return CollectionUtil.partition(tileMetrics, new CollectionUtil.Partitioner<IlluminaTileMetrics, String>() {
+ @Override
+ public String getPartition(final IlluminaTileMetrics metric) {
+ return renderMetricLocationKey(metric);
+ }
+ });
}
}
diff --git a/src/java/net/sf/picard/illumina/parser/TilePhasingValue.java b/src/java/net/sf/picard/illumina/parser/TilePhasingValue.java
new file mode 100644
index 0000000..c94379d
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/TilePhasingValue.java
@@ -0,0 +1,30 @@
+package net.sf.picard.illumina.parser;
+
+/**
+ * Captures information about a phasing value - Which read it corresponds to, which phasing type and a median value
+ *
+ * @author jgentry
+ */
+public class TilePhasingValue {
+ private final TileTemplateRead tileTemplateRead;
+ private final float phasingValue;
+ private final float prePhasingValue;
+
+ public TilePhasingValue(final TileTemplateRead tileTemplateRead, final float phasingValue, final float prePhasingValue) {
+ this.tileTemplateRead = tileTemplateRead;
+ this.phasingValue = phasingValue;
+ this.prePhasingValue = prePhasingValue;
+ }
+
+ public TileTemplateRead getTileTemplateRead() {
+ return tileTemplateRead;
+ }
+
+ public float getPhasingValue() {
+ return phasingValue;
+ }
+
+ public float getPrePhasingValue() {
+ return prePhasingValue;
+ }
+}
\ No newline at end of file
diff --git a/src/java/org/broad/tribble/readers/Positional.java b/src/java/net/sf/picard/illumina/parser/TileTemplateRead.java
similarity index 51%
copy from src/java/org/broad/tribble/readers/Positional.java
copy to src/java/net/sf/picard/illumina/parser/TileTemplateRead.java
index 0b5fb7d..ef60a66 100644
--- a/src/java/org/broad/tribble/readers/Positional.java
+++ b/src/java/net/sf/picard/illumina/parser/TileTemplateRead.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2013 The Broad Institute
+ * Copyright (c) 2014 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,38 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package org.broad.tribble.readers;
-import java.io.IOException;
+package net.sf.picard.illumina.parser;
-/**
- * Minimal interface for an object at support getting the current position in the stream / writer / file, as well as a handful of other
- * reader-like features.
- *
- * @author depristo
+/** Defines the first or second template read for a tile
+ * @author jgentry
*/
-public interface Positional extends LocationAware {
- /**
- * Is the stream done? Equivalent to ! hasNext() for an iterator?
- * @return true if the stream has reached EOF, false otherwise
- */
- public boolean isDone() throws IOException;
-
- /**
- * Skip the next nBytes in the stream.
- * @param nBytes to skip, must be >= 0
- * @return the number of bytes actually skippped.
- * @throws IOException
- */
- public long skip(long nBytes) throws IOException;
-
- /**
- * Return the next byte in the first, without actually reading it from the stream.
- *
- * Has the same output as read()
- *
- * @return the next byte, or -1 if EOF encountered
- * @throws IOException
- */
- public int peek() throws IOException;
+public enum TileTemplateRead {
+ FIRST, SECOND
}
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/BarcodeFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/BarcodeFileFaker.java
new file mode 100644
index 0000000..b199963
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/BarcodeFileFaker.java
@@ -0,0 +1,25 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public class BarcodeFileFaker extends FileFaker {
+ private final String barcodeString = "1\tn\t \n";
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ buffer.put(barcodeString.getBytes());
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return barcodeString.getBytes().length;
+ }
+}
\ No newline at end of file
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/BciFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/BciFileFaker.java
new file mode 100644
index 0000000..b7af331
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/BciFileFaker.java
@@ -0,0 +1,52 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import net.sf.samtools.util.CloserUtil;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.List;
+
+/**
+ * Created by jcarey on 3/14/14.
+ */
+public class BciFileFaker extends FileFaker {
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ for (final Integer tile : tiles) {
+ buffer.putInt(tile);
+ buffer.putInt(1);
+ }
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return 8 * tiles.size();
+ }
+
+ public void fakeBciFile(final File bci, final List<Integer> expectedTiles) throws IOException {
+ tiles = expectedTiles;
+ final FileOutputStream fileOutputStream = new FileOutputStream(bci);
+ final FileChannel channel = fileOutputStream.getChannel();
+ final ByteBuffer buffer = ByteBuffer.allocate(8 * expectedTiles.size());
+ buffer.order(ByteOrder.LITTLE_ENDIAN);
+
+ fakeFile(buffer);
+ buffer.flip();
+
+ channel.write(buffer);
+ channel.force(true);
+
+ CloserUtil.close(channel);
+ CloserUtil.close(fileOutputStream);
+ }
+}
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/BclFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/BclFileFaker.java
new file mode 100644
index 0000000..2ea32d1
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/BclFileFaker.java
@@ -0,0 +1,29 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public class BclFileFaker extends FileFaker {
+
+ @Override
+ public void fakeFile(final ByteBuffer buffer) {
+ buffer.putInt(1);
+ size -= 4;
+ while (size > 0) {
+ //fill the file with no calls
+ buffer.put((byte) 0);
+ size--;
+ }
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ protected int bufferSize() {
+ return size;
+ }
+}
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/CifFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/CifFileFaker.java
new file mode 100644
index 0000000..54f7371
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/CifFileFaker.java
@@ -0,0 +1,31 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public class CifFileFaker extends FileFaker {
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ buffer.putChar('C');
+ buffer.putChar('I');
+ buffer.putChar('F');
+ buffer.put((byte) 1);
+ buffer.put((byte) 2);
+ buffer.putShort((short) 1);
+ buffer.putShort((short) 1);
+ buffer.putInt(1);
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return Integer.SIZE + (Character.SIZE * 3) + (Short.SIZE * 2) + 2;
+ }
+}
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/ClocsFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/ClocsFileFaker.java
new file mode 100644
index 0000000..7d129cf
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/ClocsFileFaker.java
@@ -0,0 +1,25 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+public class ClocsFileFaker extends FileFaker {
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ buffer.put((byte) 1);
+ buffer.putInt(1);
+ buffer.put((byte) (0xff & 1));
+ buffer.putFloat((byte) (0xff & 5));
+ buffer.putFloat((byte) (0xff & 5));
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return (Integer.SIZE * 2) + (Float.SIZE * 3);
+ }
+}
\ No newline at end of file
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/CnfFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/CnfFileFaker.java
new file mode 100644
index 0000000..a1482be
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/CnfFileFaker.java
@@ -0,0 +1,31 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public class CnfFileFaker extends FileFaker {
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ buffer.putChar('C');
+ buffer.putChar('I');
+ buffer.putChar('F');
+ buffer.put((byte) 1);
+ buffer.put((byte) 1);
+ buffer.putShort((short) 1);
+ buffer.putShort((short) 1);
+ buffer.putInt(1);
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return Integer.SIZE + (Character.SIZE * 3) + (Short.SIZE * 2) + 2;
+ }
+}
\ No newline at end of file
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/FileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/FileFaker.java
new file mode 100644
index 0000000..5697a54
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/FileFaker.java
@@ -0,0 +1,73 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import net.sf.samtools.util.CloserUtil;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public abstract class FileFaker {
+
+ int size;
+ List<Integer> tiles;
+
+ protected abstract void fakeFile(ByteBuffer buffer);
+
+ protected abstract boolean addLeadingZeros();
+
+ protected abstract int bufferSize();
+
+ public void fakeFile(final File base, final int tile, final int lane, final String extension) throws IOException {
+ fakeFile(base, Collections.singletonList(tile), lane, extension);
+ }
+
+ public void fakeFile(final File base, final List<Integer> expectedTiles, final int lane, final String extension)
+ throws IOException {
+ if (base.exists() || base.mkdirs()) {
+ this.tiles = expectedTiles;
+ final File fakeFile;
+ if (expectedTiles.size() == 1) {
+ String longTileName = String.valueOf(tiles.get(0));
+ if (addLeadingZeros()) {
+ while (longTileName.length() < 4) {
+ longTileName = "0" + longTileName;
+ }
+ }
+ fakeFile = new File(base, String.format("s_%d_%s%s", lane, longTileName, extension));
+ } else {
+ fakeFile = new File(base, String.format("s_%s%s", lane, extension));
+ }
+
+ fakeFile(fakeFile, bufferSize());
+ }
+
+ }
+
+ public void fakeFile(final File cycleFile, Integer size) throws IOException {
+ if (size == null) {
+ size = 1;
+ }
+ this.size = size;
+ final FileOutputStream fileOutputStream = new FileOutputStream(cycleFile);
+ final FileChannel channel = fileOutputStream.getChannel();
+ final ByteBuffer buffer = ByteBuffer.allocate(size);
+ buffer.order(ByteOrder.LITTLE_ENDIAN);
+
+ fakeFile(buffer);
+
+ buffer.flip();
+
+ channel.write(buffer);
+
+ CloserUtil.close(channel);
+ CloserUtil.close(fileOutputStream);
+ }
+}
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/FilterFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/FilterFileFaker.java
new file mode 100644
index 0000000..dd5be22
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/FilterFileFaker.java
@@ -0,0 +1,26 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public class FilterFileFaker extends FileFaker {
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ buffer.putInt(0);
+ buffer.putInt(3);
+ buffer.putInt(1);
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return true;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return Integer.SIZE * 3;
+ }
+}
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/LocsFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/LocsFileFaker.java
new file mode 100644
index 0000000..53591f7
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/LocsFileFaker.java
@@ -0,0 +1,28 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public class LocsFileFaker extends FileFaker {
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ buffer.putInt(1);
+ buffer.putFloat(1.0f);
+ buffer.putInt(1);
+ buffer.putFloat(5.0f);
+ buffer.putFloat(5.0f);
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return (Integer.SIZE * 2) + (Float.SIZE * 3);
+ }
+}
\ No newline at end of file
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/MultiTileBclFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/MultiTileBclFileFaker.java
new file mode 100644
index 0000000..56e4c59
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/MultiTileBclFileFaker.java
@@ -0,0 +1,32 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public class MultiTileBclFileFaker extends FileFaker {
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ buffer.putInt(1);
+ for (final Integer tile : tiles) {
+ long perTileSize = size;
+ while (perTileSize > 0) {
+ //fill the file with no calls
+ buffer.put((byte) 0);
+ perTileSize--;
+ }
+ }
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return ((size - Integer.SIZE) * tiles.size()) + Integer.SIZE;
+ }
+}
\ No newline at end of file
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/MultiTileLocsFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/MultiTileLocsFileFaker.java
new file mode 100644
index 0000000..13fd569
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/MultiTileLocsFileFaker.java
@@ -0,0 +1,30 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public class MultiTileLocsFileFaker extends FileFaker {
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ buffer.putInt(1);
+ buffer.putFloat(1.0f);
+ buffer.putInt(1);
+ for (int count = 0; count < tiles.size(); count++) {
+ buffer.putFloat(5.0f + (count * 0.5f));
+ buffer.putFloat(5.0f + (count * 0.5f));
+ }
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return (Integer.SIZE * 2) + (Float.SIZE * tiles.size()) + Float.SIZE;
+ }
+}
\ No newline at end of file
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/PosFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/PosFileFaker.java
new file mode 100644
index 0000000..a11ed22
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/PosFileFaker.java
@@ -0,0 +1,25 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public class PosFileFaker extends FileFaker {
+ private final String posFileString = "102.0\t303.3\n";
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ buffer.put(posFileString.getBytes());
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return posFileString.getBytes().length;
+ }
+}
\ No newline at end of file
diff --git a/src/java/net/sf/picard/illumina/parser/fakers/QSeqFileFaker.java b/src/java/net/sf/picard/illumina/parser/fakers/QSeqFileFaker.java
new file mode 100644
index 0000000..dc3ba44
--- /dev/null
+++ b/src/java/net/sf/picard/illumina/parser/fakers/QSeqFileFaker.java
@@ -0,0 +1,25 @@
+package net.sf.picard.illumina.parser.fakers;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Created by jcarey on 3/13/14.
+ */
+public class QSeqFileFaker extends FileFaker {
+ private final String qseqString = "Can not make qseq file";
+
+ @Override
+ protected void fakeFile(final ByteBuffer buffer) {
+ buffer.put(qseqString.getBytes());
+ }
+
+ @Override
+ protected boolean addLeadingZeros() {
+ return false;
+ }
+
+ @Override
+ protected int bufferSize() {
+ return qseqString.getBytes().length;
+ }
+}
\ No newline at end of file
diff --git a/src/java/net/sf/picard/illumina/parser/readers/BclReader.java b/src/java/net/sf/picard/illumina/parser/readers/BclReader.java
index 058a387..1c09689 100644
--- a/src/java/net/sf/picard/illumina/parser/readers/BclReader.java
+++ b/src/java/net/sf/picard/illumina/parser/readers/BclReader.java
@@ -23,18 +23,17 @@
*/
package net.sf.picard.illumina.parser.readers;
-import net.sf.picard.PicardException;
-import net.sf.picard.util.UnsignedTypeUtil;
-import net.sf.samtools.Defaults;
-import net.sf.samtools.util.BlockCompressedInputStream;
-import net.sf.samtools.util.CloseableIterator;
-import net.sf.samtools.util.CloserUtil;
-
-import java.io.*;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.util.Iterator;
-import java.util.zip.GZIPInputStream;
+ import net.sf.picard.PicardException;
+ import net.sf.picard.util.UnsignedTypeUtil;
+ import net.sf.samtools.util.BlockCompressedInputStream;
+ import net.sf.samtools.util.CloseableIterator;
+ import net.sf.samtools.util.CloserUtil;
+ import net.sf.samtools.util.IOUtil;
+
+ import java.io.*;
+ import java.nio.ByteBuffer;
+ import java.nio.ByteOrder;
+ import java.util.zip.GZIPInputStream;
/**
* BCL Files are base call and quality score binary files containing a (base,quality) pair for successive clusters.
@@ -96,22 +95,46 @@ public class BclReader implements CloseableIterator<BclReader.BclValue> {
}
}
- public BclReader(final File file, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy) {
+ public static BclReader make(final File file, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy) {
+ return new BclReader(file, bclQualityEvaluationStrategy, false);
+ }
+
+ /**
+ * Produces a {@link net.sf.picard.illumina.parser.readers.BclReader} appropriate for when the consumer intends to call
+ * {@link net.sf.picard.illumina.parser.readers.BclReader#seek(long)}. If this functionality is not required, call
+ * {@link net.sf.picard.illumina.parser.readers.BclReader#make(java.io.File, BclQualityEvaluationStrategy)}.
+ */
+ public static BclReader makeSeekable(final File file, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy) {
+ return new BclReader(file, bclQualityEvaluationStrategy, true);
+ }
+
+ BclReader(final File file, final BclQualityEvaluationStrategy bclQualityEvaluationStrategy, final boolean requiresSeekability) {
this.bclQualityEvaluationStrategy = bclQualityEvaluationStrategy;
-
+
filePath = file.getAbsolutePath();
final boolean isGzip = filePath.endsWith(".gz");
final boolean isBgzf = filePath.endsWith(".bgzf");
- // Open up a buffered stream to read from the file and optionally wrap it in a gzip stream
- // if necessary
- final BufferedInputStream bufferedInputStream;
try {
+ // Open up a buffered stream to read from the file and optionally wrap it in a gzip stream
+ // if necessary
if (isBgzf) {
- inputStream = new BlockCompressedInputStream(file);
+ // Only BlockCompressedInputStreams can seek, and only if they are fed a SeekableStream.
+ inputStream = new BlockCompressedInputStream(IOUtil.maybeBufferedSeekableStream(file));
+ } else if (isGzip) {
+ if (requiresSeekability) {
+ throw new IllegalArgumentException(
+ String.format("Cannot create a seekable reader for gzip bcl: %s.", filePath)
+ );
+ }
+ inputStream = new GZIPInputStream(IOUtil.maybeBufferInputStream(new FileInputStream(file)));
} else {
- bufferedInputStream = new BufferedInputStream(new FileInputStream(file), Defaults.BUFFER_SIZE);
- inputStream = isGzip ? new GZIPInputStream(bufferedInputStream) : bufferedInputStream;
+ if (requiresSeekability) {
+ throw new IllegalArgumentException(
+ String.format("Cannot create a seekable reader for provided bcl: %s.", filePath)
+ );
+ }
+ inputStream = IOUtil.maybeBufferInputStream(new FileInputStream(file));
}
} catch (FileNotFoundException fnfe) {
throw new PicardException("File not found: (" + filePath + ")", fnfe);
@@ -159,8 +182,8 @@ public class BclReader implements CloseableIterator<BclReader.BclValue> {
}
}
- public boolean hasNext() {
- return nextCluster < numClusters;
+ public final boolean hasNext() {
+ return this.nextCluster < this.numClusters;
}
public BclValue next() {
diff --git a/src/java/net/sf/picard/illumina/parser/readers/TileMetricsOutReader.java b/src/java/net/sf/picard/illumina/parser/readers/TileMetricsOutReader.java
index 4f3f329..d813181 100644
--- a/src/java/net/sf/picard/illumina/parser/readers/TileMetricsOutReader.java
+++ b/src/java/net/sf/picard/illumina/parser/readers/TileMetricsOutReader.java
@@ -17,7 +17,7 @@ import java.util.NoSuchElementException;
* byte 0 (unsigned byte) = The version number which MUST be 2 or an exception will be thrown
* byte 1 (unsigned byte) = The record size which must be 10 or an exception will be thrown
* bytes 3 + (current_record * 10) to (current_record * 10 + 10) (TileMetrics Record) = The actual records each of size 10 that
- * get converted into IlluminaTileMetrics objects
+ * get converted into IlluminaPhasingMetrics objects
*
* TileMetrics Record Format:
* Each 10 byte record is of the following format:
@@ -70,52 +70,47 @@ public class TileMetricsOutReader implements Iterator<TileMetricsOutReader.Illum
}
/**
- * IlluminaTileMetrics corresponds to a single record in a TileMetricsOut file
+ * IlluminaPhasingMetrics corresponds to a single record in a TileMetricsOut file
*/
public static class IlluminaTileMetrics {
- private final int laneNumber;
- private final int tileNumber;
- private final int metricCode;
+ private final IlluminaLaneTileCode laneTileCode;
private final float metricValue;
public IlluminaTileMetrics(final ByteBuffer bb) {
- laneNumber = UnsignedTypeUtil.uShortToInt(bb.getShort());
- tileNumber = UnsignedTypeUtil.uShortToInt(bb.getShort());
- metricCode = UnsignedTypeUtil.uShortToInt(bb.getShort());
- metricValue = bb.getFloat();
+ this(UnsignedTypeUtil.uShortToInt(bb.getShort()), UnsignedTypeUtil.uShortToInt(bb.getShort()),
+ UnsignedTypeUtil.uShortToInt(bb.getShort()), bb.getFloat());
}
- public IlluminaTileMetrics(int laneNumber, int tileNumber, int metricCode, float metricValue) {
- this.laneNumber = laneNumber;
- this.tileNumber = tileNumber;
- this.metricCode = metricCode;
+ public IlluminaTileMetrics(final int laneNumber, final int tileNumber, final int metricCode, final float metricValue) {
+ this.laneTileCode = new IlluminaLaneTileCode(laneNumber, tileNumber, metricCode);
this.metricValue = metricValue;
}
public int getLaneNumber() {
- return laneNumber;
+ return laneTileCode.getLaneNumber();
}
public int getTileNumber() {
- return tileNumber;
+ return laneTileCode.getTileNumber();
}
public int getMetricCode() {
- return metricCode;
+ return laneTileCode.getMetricCode();
}
public float getMetricValue() {
return metricValue;
}
-
+
+ public IlluminaLaneTileCode getLaneTileCode() {
+ return laneTileCode;
+ }
+
@Override
public boolean equals(final Object o) {
if (o instanceof IlluminaTileMetrics) {
final IlluminaTileMetrics that = (IlluminaTileMetrics) o;
- return laneNumber == that.laneNumber
- && tileNumber == that.tileNumber
- && metricCode == that.metricCode
- && metricValue == that.metricValue; // Identical tile data should render exactly the same float.
+ return laneTileCode == that.laneTileCode && metricValue == that.metricValue; // Identical tile data should render exactly the same float.
} else {
return false;
}
@@ -123,7 +118,50 @@ public class TileMetricsOutReader implements Iterator<TileMetricsOutReader.Illum
@Override
public int hashCode() {
- return String.format("%s:%s:%s:%s", laneNumber, tileNumber, metricCode, metricValue).hashCode(); // Slow but adequate.
+ return String.format("%s:%s:%s:%s", laneTileCode.getLaneNumber(), laneTileCode.getTileNumber(), laneTileCode.getMetricCode(), metricValue).hashCode(); // Slow but adequate.
+ }
+ }
+
+ /** Helper class which captures the combination of a lane, tile & metric code */
+ public static class IlluminaLaneTileCode {
+ private final int laneNumber;
+ private final int tileNumber;
+ private final int metricCode;
+
+ public IlluminaLaneTileCode(final int laneNumber, final int tileNumber, final int metricCode) {
+ this.laneNumber = laneNumber;
+ this.tileNumber = tileNumber;
+ this.metricCode = metricCode;
+ }
+
+ public int getLaneNumber() {
+ return laneNumber;
+ }
+
+ public int getTileNumber() {
+ return tileNumber;
+ }
+
+ public int getMetricCode() {
+ return metricCode;
+ }
+
+ @Override
+ public boolean equals(final Object o) {
+ if (o instanceof IlluminaLaneTileCode) {
+ final IlluminaLaneTileCode that = (IlluminaLaneTileCode) o;
+ return laneNumber == that.laneNumber && tileNumber == that.tileNumber && metricCode == that.metricCode;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ int result = laneNumber;
+ result = 31 * result + tileNumber;
+ result = 31 * result + metricCode;
+ return result;
}
}
}
diff --git a/src/java/net/sf/picard/io/IoUtil.java b/src/java/net/sf/picard/io/IoUtil.java
index 8479327..0fade1f 100644
--- a/src/java/net/sf/picard/io/IoUtil.java
+++ b/src/java/net/sf/picard/io/IoUtil.java
@@ -288,7 +288,7 @@ public class IoUtil extends net.sf.samtools.util.IOUtil {
* Preferred over PrintStream and PrintWriter because an exception is thrown on I/O error
*/
public static BufferedWriter openFileForBufferedWriting(final File file, final boolean append) {
- return new BufferedWriter(new OutputStreamWriter(openFileForWriting(file, append)), Defaults.BUFFER_SIZE);
+ return new BufferedWriter(new OutputStreamWriter(openFileForWriting(file, append)), Defaults.NON_ZERO_BUFFER_SIZE);
}
/**
@@ -302,8 +302,8 @@ public class IoUtil extends net.sf.samtools.util.IOUtil {
* Preferred over PrintStream and PrintWriter because an exception is thrown on I/O error
*/
public static BufferedWriter openFileForBufferedUtf8Writing(final File file) {
- return new BufferedWriter(new OutputStreamWriter(
- openFileForWriting(file), Charset.forName("UTF-8")), Defaults.BUFFER_SIZE);
+ return new BufferedWriter(new OutputStreamWriter(openFileForWriting(file), Charset.forName("UTF-8")),
+ Defaults.NON_ZERO_BUFFER_SIZE);
}
/**
@@ -326,9 +326,13 @@ public class IoUtil extends net.sf.samtools.util.IOUtil {
public static OutputStream openGzipFileForWriting(final File file, final boolean append) {
try {
+ if (Defaults.BUFFER_SIZE > 0) {
return new CustomGzipOutputStream(new FileOutputStream(file, append),
Defaults.BUFFER_SIZE,
Defaults.COMPRESSION_LEVEL);
+ } else {
+ return new CustomGzipOutputStream(new FileOutputStream(file, append), Defaults.COMPRESSION_LEVEL);
+ }
}
catch (IOException ioe) {
throw new PicardException("Error opening file for writing: " + file.getName(), ioe);
@@ -349,7 +353,7 @@ public class IoUtil extends net.sf.samtools.util.IOUtil {
final FileOutputStream fos = new FileOutputStream(file, append);
fos.write(66); //write magic number 'BZ' because CBZip2OutputStream does not do it for you
fos.write(90);
- return new BufferedOutputStream(new CBZip2OutputStream(fos), Defaults.BUFFER_SIZE);
+ return IOUtil.maybeBufferOutputStream(new CBZip2OutputStream(fos));
}
catch (IOException ioe) {
throw new PicardException("Error opening file for writing: " + file.getName(), ioe);
@@ -369,7 +373,7 @@ public class IoUtil extends net.sf.samtools.util.IOUtil {
*/
public static void copyStream(final InputStream input, final OutputStream output) {
try {
- final byte[] buffer = new byte[Defaults.BUFFER_SIZE];
+ final byte[] buffer = new byte[Defaults.NON_ZERO_BUFFER_SIZE];
int bytesRead = 0;
while((bytesRead = input.read(buffer)) > 0) {
output.write(buffer, 0, bytesRead);
@@ -487,8 +491,8 @@ public class IoUtil extends net.sf.samtools.util.IOUtil {
}
/** Checks that a file exists and is readable, and then returns a buffered reader for it. */
- public static BufferedReader openFileForBufferedReading(final File file) throws IOException {
- return new BufferedReader(new InputStreamReader(openFileForReading(file)), Defaults.BUFFER_SIZE);
+ public static BufferedReader openFileForBufferedReading(final File file) {
+ return new BufferedReader(new InputStreamReader(openFileForReading(file)), Defaults.NON_ZERO_BUFFER_SIZE);
}
/** Takes a string and replaces any characters that are not safe for filenames with an underscore */
@@ -541,7 +545,7 @@ public class IoUtil extends net.sf.samtools.util.IOUtil {
*/
public static String readFully(final InputStream in) {
try {
- final BufferedReader r = new BufferedReader(new InputStreamReader(in), Defaults.BUFFER_SIZE);
+ final BufferedReader r = new BufferedReader(new InputStreamReader(in), Defaults.NON_ZERO_BUFFER_SIZE);
final StringBuilder builder = new StringBuilder(512);
String line = null;
@@ -685,5 +689,10 @@ class CustomGzipOutputStream extends GZIPOutputStream {
super(outputStream, bufferSize);
this.def.setLevel(compressionLevel);
}
+
+ CustomGzipOutputStream(OutputStream outputStream, final int compressionLevel) throws IOException {
+ super(outputStream);
+ this.def.setLevel(compressionLevel);
+ }
}
diff --git a/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java b/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
index 2d0cf26..507c8cd 100644
--- a/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
+++ b/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
@@ -24,18 +24,34 @@
package net.sf.picard.sam;
import net.sf.picard.PicardException;
+import net.sf.picard.filter.FilteringIterator;
import net.sf.picard.filter.SamRecordFilter;
import net.sf.picard.io.IoUtil;
import net.sf.picard.reference.ReferenceSequenceFileWalker;
import net.sf.picard.util.CigarUtil;
import net.sf.picard.util.Log;
import net.sf.picard.util.ProgressLogger;
-import net.sf.samtools.*;
+import net.sf.samtools.BAMRecordCodec;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMFileHeader.SortOrder;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMFileWriter;
+import net.sf.samtools.SAMFileWriterFactory;
+import net.sf.samtools.SAMProgramRecord;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMRecordCoordinateComparator;
+import net.sf.samtools.SAMRecordQueryNameComparator;
+import net.sf.samtools.SAMRecordUtil;
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
+import net.sf.samtools.SAMTag;
+import net.sf.samtools.SAMUtils;
+import net.sf.samtools.SamPairUtil;
import net.sf.samtools.util.CloseableIterator;
import net.sf.samtools.util.SequenceUtil;
import net.sf.samtools.util.SortingCollection;
-import net.sf.picard.filter.FilteringIterator;
import java.io.File;
import java.text.DecimalFormat;
@@ -85,6 +101,7 @@ public abstract class AbstractAlignmentMerger {
private final boolean alignedReadsOnly;
private final SAMFileHeader header;
private final List<String> attributesToRetain = new ArrayList<String>();
+ private final List<String> attributesToRemove = new ArrayList<String>();
private final File referenceFasta;
private final Integer read1BasesTrimmed;
private final Integer read2BasesTrimmed;
@@ -127,6 +144,9 @@ public abstract class AbstractAlignmentMerger {
* included when merging. This overrides the exclusion of
* attributes whose tags start with the reserved characters
* of X, Y, and Z
+ * @param attributesToRemove attributes from the alignment record that should be
+ * removed when merging. This overrides attributesToRetain if they share
+ * common tags.
* @param read1BasesTrimmed The number of bases trimmed from start of read 1 prior to alignment. Optional.
* @param read2BasesTrimmed The number of bases trimmed from start of read 2 prior to alignment. Optional.
* @param expectedOrientations A List of SamPairUtil.PairOrientations that are expected for
@@ -140,6 +160,7 @@ public abstract class AbstractAlignmentMerger {
final File referenceFasta, final boolean clipAdapters,
final boolean bisulfiteSequence, final boolean alignedReadsOnly,
final SAMProgramRecord programRecord, final List<String> attributesToRetain,
+ final List<String> attributesToRemove,
final Integer read1BasesTrimmed, final Integer read2BasesTrimmed,
final List<SamPairUtil.PairOrientation> expectedOrientations,
final SAMFileHeader.SortOrder sortOrder,
@@ -173,6 +194,18 @@ public abstract class AbstractAlignmentMerger {
if (attributesToRetain != null) {
this.attributesToRetain.addAll(attributesToRetain);
}
+ if (attributesToRemove != null) {
+ this.attributesToRemove.addAll(attributesToRemove);
+ // attributesToRemove overrides attributesToRetain
+ if (!this.attributesToRetain.isEmpty()) {
+ for (String attribute : this.attributesToRemove) {
+ if (this.attributesToRetain.contains(attribute)) {
+ log.info("Overriding retaining the " + attribute + " tag since remove overrides retain.");
+ this.attributesToRetain.remove(attribute);
+ }
+ }
+ }
+ }
this.read1BasesTrimmed = read1BasesTrimmed;
this.read2BasesTrimmed = read2BasesTrimmed;
this.expectedOrientations = expectedOrientations;
@@ -417,6 +450,9 @@ public abstract class AbstractAlignmentMerger {
updateCigarForTrimmedOrClippedBases(unaligned, aligned);
if (SAMUtils.cigarMapsNoBasesToRef(unaligned.getCigar())) {
SAMUtils.makeReadUnmapped(unaligned);
+ } else if (SAMUtils.recordMapsEntirelyBeyondEndOfReference(aligned)) {
+ log.warn("Record mapped off end of reference; making unmapped: " + aligned);
+ SAMUtils.makeReadUnmapped(unaligned);
}
}
@@ -487,9 +523,9 @@ public abstract class AbstractAlignmentMerger {
*/
protected void setValuesFromAlignment(final SAMRecord rec, final SAMRecord alignment) {
for (final SAMRecord.SAMTagAndValue attr : alignment.getAttributes()) {
- // Copy over any non-reserved attributes.
- if (!isReservedTag(attr.tag) || this.attributesToRetain.contains(attr.tag)) {
- rec.setAttribute(attr.tag, attr.value);
+ // Copy over any non-reserved attributes. attributesToRemove overrides attributesToRetain.
+ if ((!isReservedTag(attr.tag) || this.attributesToRetain.contains(attr.tag)) && !this.attributesToRemove.contains(attr.tag)) {
+ rec.setAttribute(attr.tag, attr.value);
}
}
rec.setReadUnmappedFlag(alignment.getReadUnmappedFlag());
@@ -579,7 +615,7 @@ public abstract class AbstractAlignmentMerger {
? this.read1BasesTrimmed != null ? this.read1BasesTrimmed : 0
: this.read2BasesTrimmed != null ? this.read2BasesTrimmed : 0;
final int notWritten = originalReadLength - (alignmentReadLength + trimmed);
-
+
// Update cigar if the mate maps off the reference
createNewCigarsIfMapsOffEndOfReference(rec);
diff --git a/src/java/net/sf/picard/sam/FastqToSam.java b/src/java/net/sf/picard/sam/FastqToSam.java
index af9d0f3..b70d6e7 100644
--- a/src/java/net/sf/picard/sam/FastqToSam.java
+++ b/src/java/net/sf/picard/sam/FastqToSam.java
@@ -124,7 +124,6 @@ public class FastqToSam extends CommandLineProgram {
/* Simply invokes the right method for unpaired or paired data. */
protected int doWork() {
- if (QUALITY_FORMAT == null) {
final QualityEncodingDetector detector = new QualityEncodingDetector();
final FastqReader reader = new FastqReader(FASTQ,ALLOW_AND_IGNORE_EMPTY_LINES);
if (FASTQ2 == null) {
@@ -136,11 +135,11 @@ public class FastqToSam extends CommandLineProgram {
}
reader.close();
- QUALITY_FORMAT = detector.generateBestGuess(QualityEncodingDetector.FileContext.FASTQ);
+ QUALITY_FORMAT = detector.generateBestGuess(QualityEncodingDetector.FileContext.FASTQ, QUALITY_FORMAT);
if (detector.isDeterminationAmbiguous())
LOG.warn("Making ambiguous determination about fastq's quality encoding; more than one format possible based on observed qualities.");
LOG.info(String.format("Auto-detected quality format as: %s.", QUALITY_FORMAT));
- }
+
final int readCount = (FASTQ2 == null) ? doUnpaired() : doPaired();
LOG.info("Processed " + readCount + " fastq reads");
return 0;
diff --git a/src/java/net/sf/picard/sam/MergeBamAlignment.java b/src/java/net/sf/picard/sam/MergeBamAlignment.java
index bd21347..53dfc99 100644
--- a/src/java/net/sf/picard/sam/MergeBamAlignment.java
+++ b/src/java/net/sf/picard/sam/MergeBamAlignment.java
@@ -29,8 +29,9 @@ import net.sf.picard.cmdline.Option;
import net.sf.picard.cmdline.StandardOptionDefinitions;
import net.sf.picard.cmdline.Usage;
import net.sf.picard.util.Log;
-import net.sf.samtools.*;
import net.sf.samtools.SAMFileHeader.SortOrder;
+import net.sf.samtools.SAMProgramRecord;
+import net.sf.samtools.SamPairUtil;
import java.io.File;
import java.util.ArrayList;
@@ -132,6 +133,10 @@ public class MergeBamAlignment extends CommandLineProgram {
"brought over from the alignment data when merging.")
public List<String> ATTRIBUTES_TO_RETAIN = new ArrayList<String>();
+ @Option(doc="Attributes from the alignment record that should be removed when merging." +
+ " This overrides ATTRIBUTES_TO_RETAIN if they share common tags.")
+ public List<String> ATTRIBUTES_TO_REMOVE = new ArrayList<String>();
+
@Option(shortName="R1_TRIM",
doc="The number of bases trimmed from the beginning of read 1 prior to alignment")
public int READ1_TRIM = 0;
@@ -226,7 +231,7 @@ public class MergeBamAlignment extends CommandLineProgram {
final SamAlignmentMerger merger = new SamAlignmentMerger(UNMAPPED_BAM, OUTPUT,
REFERENCE_SEQUENCE, prod, CLIP_ADAPTERS, IS_BISULFITE_SEQUENCE,
ALIGNED_READS_ONLY, ALIGNED_BAM, MAX_INSERTIONS_OR_DELETIONS,
- ATTRIBUTES_TO_RETAIN, READ1_TRIM, READ2_TRIM,
+ ATTRIBUTES_TO_RETAIN, ATTRIBUTES_TO_REMOVE, READ1_TRIM, READ2_TRIM,
READ1_ALIGNED_BAM, READ2_ALIGNED_BAM, EXPECTED_ORIENTATIONS, SORT_ORDER,
PRIMARY_ALIGNMENT_STRATEGY.newInstance());
merger.setClipOverlappingReads(CLIP_OVERLAPPING_READS);
diff --git a/src/java/net/sf/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java b/src/java/net/sf/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java
index 3d6276e..ed5fffd 100644
--- a/src/java/net/sf/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java
+++ b/src/java/net/sf/picard/sam/RevertOriginalBaseQualitiesAndAddMateCigar.java
@@ -60,7 +60,7 @@ public class RevertOriginalBaseQualitiesAndAddMateCigar extends CommandLineProgr
new RevertOriginalBaseQualitiesAndAddMateCigar().instanceMainWithExit(args);
}
- protected int doWork() {
+ public int doWork() {
IoUtil.assertFileIsReadable(INPUT);
IoUtil.assertFileIsWritable(OUTPUT);
diff --git a/src/java/net/sf/picard/sam/SamAlignmentMerger.java b/src/java/net/sf/picard/sam/SamAlignmentMerger.java
index 0d3b66d..85dba7a 100644
--- a/src/java/net/sf/picard/sam/SamAlignmentMerger.java
+++ b/src/java/net/sf/picard/sam/SamAlignmentMerger.java
@@ -1,11 +1,19 @@
package net.sf.picard.sam;
+import net.sf.samtools.BAMRecordCodec;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMRecordQueryNameComparator;
+import net.sf.samtools.SamPairUtil;
import net.sf.picard.PicardException;
import net.sf.picard.io.IoUtil;
import net.sf.picard.util.Log;
import net.sf.picard.util.PeekableIterator;
-import net.sf.samtools.*;
import net.sf.samtools.SAMFileHeader.SortOrder;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMProgramRecord;
import net.sf.samtools.util.CloseableIterator;
import net.sf.samtools.util.DelegatingIterator;
import net.sf.samtools.util.SortingCollection;
@@ -54,6 +62,9 @@ public class SamAlignmentMerger extends AbstractAlignmentMerger {
* included when merging. This overrides the exclusion of
* attributes whose tags start with the reserved characters
* of X, Y, and Z
+ * @param attributesToRetain attributes from the alignment record that should be
+ * removed when merging. This overrides attributesToRetain if they share
+ * common tags.
* @param read1BasesTrimmed The number of bases trimmed from start of read 1 prior to alignment. Optional.
* @param read2BasesTrimmed The number of bases trimmed from start of read 2 prior to alignment. Optional.
* @param read1AlignedSamFile The alignment records for read1. Used when the two ends of a read are
@@ -73,6 +84,7 @@ public class SamAlignmentMerger extends AbstractAlignmentMerger {
final SAMProgramRecord programRecord, final boolean clipAdapters, final boolean bisulfiteSequence,
final boolean alignedReadsOnly,
final List<File> alignedSamFile, final int maxGaps, final List<String> attributesToRetain,
+ final List<String> attributesToRemove,
final Integer read1BasesTrimmed, final Integer read2BasesTrimmed,
final List<File> read1AlignedSamFile, final List<File> read2AlignedSamFile,
final List<SamPairUtil.PairOrientation> expectedOrientations,
@@ -80,7 +92,7 @@ public class SamAlignmentMerger extends AbstractAlignmentMerger {
final PrimaryAlignmentSelectionStrategy primaryAlignmentSelectionStrategy) {
super(unmappedBamFile, targetBamFile, referenceFasta, clipAdapters, bisulfiteSequence,
- alignedReadsOnly, programRecord, attributesToRetain, read1BasesTrimmed,
+ alignedReadsOnly, programRecord, attributesToRetain, attributesToRemove, read1BasesTrimmed,
read2BasesTrimmed, expectedOrientations, sortOrder, primaryAlignmentSelectionStrategy);
if ((alignedSamFile == null || alignedSamFile.size() == 0) &&
diff --git a/src/java/net/sf/picard/sam/SamFileValidator.java b/src/java/net/sf/picard/sam/SamFileValidator.java
index 73e2519..46695af 100644
--- a/src/java/net/sf/picard/sam/SamFileValidator.java
+++ b/src/java/net/sf/picard/sam/SamFileValidator.java
@@ -233,7 +233,7 @@ public class SamFileValidator {
* records on a subsequent call).
*/
private void validateSamRecordsAndQualityFormat(final Iterable<SAMRecord> samRecords, final SAMFileHeader header) {
- SAMRecordIterator iter = (SAMRecordIterator) samRecords.iterator();
+ final SAMRecordIterator iter = (SAMRecordIterator) samRecords.iterator();
final ProgressLogger progress = new ProgressLogger(log, 10000000, "Validated Read");
final QualityEncodingDetector qualityDetector = new QualityEncodingDetector();
try {
@@ -270,7 +270,7 @@ public class SamFileValidator {
try {
if (progress.getCount() > 0) { // Avoid exception being thrown as a result of no qualities being read
- final FastqQualityFormat format = qualityDetector.generateBestGuess(QualityEncodingDetector.FileContext.SAM);
+ final FastqQualityFormat format = qualityDetector.generateBestGuess(QualityEncodingDetector.FileContext.SAM, FastqQualityFormat.Standard);
if (format != FastqQualityFormat.Standard) {
addError(new SAMValidationError(Type.INVALID_QUALITY_FORMAT, String.format("Detected %s quality score encoding, but expected %s.", format, FastqQualityFormat.Standard), null));
}
diff --git a/src/java/net/sf/picard/sam/ValidateSamFile.java b/src/java/net/sf/picard/sam/ValidateSamFile.java
index 1f11fb5..dc2d448 100644
--- a/src/java/net/sf/picard/sam/ValidateSamFile.java
+++ b/src/java/net/sf/picard/sam/ValidateSamFile.java
@@ -31,6 +31,7 @@ import java.util.List;
import java.util.ArrayList;
import net.sf.picard.cmdline.Usage;
+import net.sf.picard.util.FastqQualityFormat;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMValidationError;
import net.sf.picard.PicardException;
diff --git a/src/java/net/sf/picard/util/IntervalList.java b/src/java/net/sf/picard/util/IntervalList.java
index 19b315c..740a333 100644
--- a/src/java/net/sf/picard/util/IntervalList.java
+++ b/src/java/net/sf/picard/util/IntervalList.java
@@ -166,7 +166,7 @@ public class IntervalList implements Iterable<Interval> {
* @return an IntervalList object that contains the headers and intervals from the file
*/
public static IntervalList fromFile(final File file) {
- return fromReader(new BufferedReader(new InputStreamReader(IoUtil.openFileForReading(file)), IoUtil.STANDARD_BUFFER_SIZE));
+ return fromReader(IoUtil.openFileForBufferedReading(file));
}
/**
@@ -248,7 +248,7 @@ public class IntervalList implements Iterable<Interval> {
*/
public void write(final File file) {
try {
- final BufferedWriter out = new BufferedWriter(new OutputStreamWriter(IoUtil.openFileForWriting(file)), IoUtil.STANDARD_BUFFER_SIZE);
+ final BufferedWriter out = IoUtil.openFileForBufferedWriting(file);
final FormatUtil format = new FormatUtil();
// Write out the header
diff --git a/src/java/net/sf/picard/util/MathUtil.java b/src/java/net/sf/picard/util/MathUtil.java
index 164e94c..7ffb8c9 100644
--- a/src/java/net/sf/picard/util/MathUtil.java
+++ b/src/java/net/sf/picard/util/MathUtil.java
@@ -25,6 +25,7 @@
package net.sf.picard.util;
import java.math.BigDecimal;
+import java.util.Arrays;
import static java.lang.Math.pow;
@@ -66,6 +67,18 @@ public class MathUtil {
return (v1 < v2 ? -1 : (v1 == v2 ? 0 : 1));
}
+ /** Calculate the median of an array of doubles. Assumes that the input is sorted */
+ public static double median(final double... in) {
+ if (in.length == 0) {
+ throw new IllegalArgumentException("Attempting to find the median of an empty array");
+ }
+
+ final double[] data = Arrays.copyOf(in, in.length);
+ Arrays.sort(data);
+ final int middle = data.length / 2;
+ return data.length % 2 == 1 ? data[middle] : (data[middle - 1] + data[middle]) / 2.0;
+ }
+
/**
* Obtains percentage of two Longs
* @param numerator dividend
diff --git a/src/java/net/sf/picard/util/OverlapDetector.java b/src/java/net/sf/picard/util/OverlapDetector.java
index 89a8d17..ffd3416 100644
--- a/src/java/net/sf/picard/util/OverlapDetector.java
+++ b/src/java/net/sf/picard/util/OverlapDetector.java
@@ -83,6 +83,18 @@ public class OverlapDetector<T> {
}
}
+ /** Gets all the objects that could be returned by the overlap detector. */
+ public Collection<T> getAll() {
+ Collection<T> all = new HashSet<T>();
+ for (IntervalTree<Set<T>> tree : this.cache.values()) {
+ for (IntervalTree.Node<Set<T>> node : tree) {
+ all.addAll(node.getValue());
+ }
+ }
+
+ return all;
+ }
+
/** Gets the collection of objects that overlap the provided mapping. */
public Collection<T> getOverlaps(Interval rhs) {
Collection<T> matches = new ArrayList<T>();
@@ -104,16 +116,4 @@ public class OverlapDetector<T> {
return matches;
}
-
- /** Gets all the objects that could be returned by the overlap detector. */
- public Collection<T> getAll() {
- Collection<T> all = new HashSet<T>();
- for (IntervalTree<Set<T>> tree : this.cache.values()) {
- for (IntervalTree.Node<Set<T>> node : tree) {
- all.addAll(node.getValue());
- }
- }
-
- return all;
- }
}
diff --git a/src/java/net/sf/picard/util/QualityEncodingDetector.java b/src/java/net/sf/picard/util/QualityEncodingDetector.java
index def47fe..808929b 100644
--- a/src/java/net/sf/picard/util/QualityEncodingDetector.java
+++ b/src/java/net/sf/picard/util/QualityEncodingDetector.java
@@ -146,7 +146,7 @@ public class QualityEncodingDetector {
while (iterator.hasNext() && recordCount++ != maxRecords) {
this.add(iterator.next());
}
-
+
return recordCount;
} finally {
iterator.close();
@@ -174,7 +174,7 @@ public class QualityEncodingDetector {
* @return True if more than one format is possible after exclusions; false otherwise
*/
public boolean isDeterminationAmbiguous() {
- return this.generateCandidateQualities().size() > 1;
+ return this.generateCandidateQualities(true).size() > 1;
}
/**
@@ -183,9 +183,10 @@ public class QualityEncodingDetector {
* Specifically, for each format's known range of possible values (its "quality scheme"), exclude formats if any
* observed values fall outside of that range. Additionally, exclude formats for which we expect to see at
* least one quality in a range of values, but do not. (For example, for Phred, we expect to eventually see
- * a value below 58. If we never see such a value, we exclude Phred as a possible format.)
+ * a value below 58. If we never see such a value, we exclude Phred as a possible format unless the checkExpected
+ * flag is set to false in which case we leave Phred as a possible quality format.)
*/
- public EnumSet<FastqQualityFormat> generateCandidateQualities() {
+ public EnumSet<FastqQualityFormat> generateCandidateQualities(final boolean checkExpected) {
final EnumSet<FastqQualityFormat> candidateFormats = EnumSet.allOf(FastqQualityFormat.class);
final Set<Integer> observedAsciiQualities = this.qualityAggregator.getObservedAsciiQualities();
if (observedAsciiQualities.isEmpty())
@@ -209,11 +210,11 @@ public class QualityEncodingDetector {
}
/**
- * We remove elements from this list as we observe values in the corresponding range; if the list isn't
+ * We remove elements from this list as we observe values in the corresponding range; if the list isn't
* empty, we haven't seen a value in that range. In other words, we haven't seen a value we expected.
* Consequently, we remove the corresponding format from the running possibilities.
*/
- if (!remainingExpectedValueRanges.isEmpty()) {
+ if (!remainingExpectedValueRanges.isEmpty() && checkExpected) {
candidateFormats.remove(scheme.qualityFormat);
}
}
@@ -222,42 +223,6 @@ public class QualityEncodingDetector {
}
/**
- * Based on the quality scores accumulated in the detector as well as the context in which this guess applies (a BAM
- * or fastq), determines the best guess as to the quality format.
- * <p/>
- * This method does not exclude any quality formats based on observed quality values; even if there remain multiple
- * candidate qualities (see generateCandidateQualities()), picks a single one based on a high-level logic.
- *
- * @param context The type of file for which the guess is being made, Fastq or Bam
- */
- public FastqQualityFormat generateBestGuess(final FileContext context) {
- final EnumSet<FastqQualityFormat> possibleFormats = this.generateCandidateQualities();
- switch (possibleFormats.size()) {
- case 1:
- return possibleFormats.iterator().next();
- case 2:
- if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Illumina, FastqQualityFormat.Solexa))) {
- return FastqQualityFormat.Illumina;
- } else if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Illumina, FastqQualityFormat.Standard))) {
- switch (context) {
- case FASTQ:
- return FastqQualityFormat.Illumina;
- case SAM:
- return FastqQualityFormat.Standard;
- }
- } else if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Standard, FastqQualityFormat.Solexa))) {
- throw new PicardException("The quality format cannot be determined: both Phred and Solexa formats are possible; this application's logic does not handle this scenario.");
- } else throw new PicardException("Unreachable code.");
- case 3:
- throw new PicardException("The quality format cannot be determined: no formats were excluded.");
- case 0:
- throw new PicardException("The quality format cannot be determined: all formats were excluded.");
- default:
- throw new PicardException("Unreachable code.");
- }
- }
-
- /**
* Interleaves FastqReader iterators so that serial-iteration of the result cycles between the constituent iterators.
*/
private static Iterator<FastqRecord> generateInterleavedFastqIterator(final FastqReader... readers) {
@@ -308,7 +273,7 @@ public class QualityEncodingDetector {
final QualityEncodingDetector detector = new QualityEncodingDetector();
final long recordCount = detector.add(maxRecords, readers);
log.debug(String.format("Read %s records from %s.", recordCount, Arrays.toString(readers)));
- return detector.generateBestGuess(FileContext.FASTQ);
+ return detector.generateBestGuess(FileContext.FASTQ, null);
}
public static FastqQualityFormat detect(final FastqReader... readers) {
@@ -328,10 +293,67 @@ public class QualityEncodingDetector {
final QualityEncodingDetector detector = new QualityEncodingDetector();
final long recordCount = detector.add(maxRecords, reader);
log.debug(String.format("Read %s records from %s.", recordCount, reader));
- return detector.generateBestGuess(FileContext.SAM);
+ return detector.generateBestGuess(FileContext.SAM, null);
}
public static FastqQualityFormat detect(final SAMFileReader reader) {
return detect(DEFAULT_MAX_RECORDS_TO_ITERATE, reader);
}
+
+ /**
+ * Reads through the records in the provided SAM reader and uses their quality scores to sanity check the expected
+ * quality passed in. If the expected quality format is sane we just hand this back otherwise we throw a
+ * {@link PicardException}.
+ */
+ public static FastqQualityFormat detect(final SAMFileReader reader, final FastqQualityFormat expectedQualityFormat) {
+ //sanity check expectedQuality
+ final QualityEncodingDetector detector = new QualityEncodingDetector();
+ final long recordCount = detector.add(DEFAULT_MAX_RECORDS_TO_ITERATE, reader);
+ log.debug(String.format("Read %s records from %s.", recordCount, reader));
+ return detector.generateBestGuess(FileContext.SAM, expectedQualityFormat);
+ }
+
+ /**
+ * Make the best guess at the quality format. If an expected quality is passed in the values are sanity checked
+ * (ignoring expected range) and if they are deemed acceptable the expected quality is passed back. Otherwise we use
+ * a set of heuristics to make our best guess.
+ */
+ public FastqQualityFormat generateBestGuess(final FileContext context, final FastqQualityFormat expectedQuality) {
+ final EnumSet<FastqQualityFormat> possibleFormats;
+ if (null != expectedQuality) {
+ possibleFormats = this.generateCandidateQualities(false);
+ if (possibleFormats.contains(expectedQuality)) {
+ return expectedQuality;
+ } else {
+ throw new PicardException(
+ String.format("The quality values do not fall in the range appropriate for the expected quality of %s.",
+ expectedQuality.name()));
+ }
+ } else {
+ possibleFormats = this.generateCandidateQualities(true);
+ switch (possibleFormats.size()) {
+ case 1:
+ return possibleFormats.iterator().next();
+ case 2:
+ if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Illumina, FastqQualityFormat.Solexa))) {
+ return FastqQualityFormat.Illumina;
+ } else if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Illumina, FastqQualityFormat.Standard))) {
+ switch (context) {
+ case FASTQ:
+ return FastqQualityFormat.Illumina;
+ case SAM:
+ return FastqQualityFormat.Standard;
+ }
+ } else if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Standard, FastqQualityFormat.Solexa))) {
+ return FastqQualityFormat.Standard;
+ } else throw new PicardException("Unreachable code.");
+ case 3:
+ throw new PicardException("The quality format cannot be determined: no formats were excluded.");
+ case 0:
+ throw new PicardException("The quality format cannot be determined: all formats were excluded.");
+ default:
+ throw new PicardException("Unreachable code.");
+ }
+ }
+ }
}
\ No newline at end of file
diff --git a/src/java/net/sf/picard/vcf/MakeSitesOnlyVcf.java b/src/java/net/sf/picard/vcf/MakeSitesOnlyVcf.java
index c6d3398..01f3338 100644
--- a/src/java/net/sf/picard/vcf/MakeSitesOnlyVcf.java
+++ b/src/java/net/sf/picard/vcf/MakeSitesOnlyVcf.java
@@ -12,17 +12,17 @@ import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.CloseableIterator;
import net.sf.samtools.util.CloserUtil;
+import org.broadinstitute.variant.variantcontext.Allele;
+import org.broadinstitute.variant.variantcontext.GenotypesContext;
import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
import org.broadinstitute.variant.variantcontext.writer.Options;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriter;
import org.broadinstitute.variant.variantcontext.writer.VariantContextWriterFactory;
-import org.broadinstitute.variant.vcf.VCFFileReader;
-import org.broadinstitute.variant.vcf.VCFHeader;
+import org.broadinstitute.variant.vcf.*;
import java.io.File;
-import java.util.Collections;
-import java.util.EnumSet;
-import java.util.Set;
+import java.util.*;
/**
* Writes out a VCF that contains all the site-level information for all records in the input VCF and no per-sample information.
@@ -41,12 +41,9 @@ public class MakeSitesOnlyVcf extends CommandLineProgram {
@Option(shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME, doc="Output VCF or BCF to emit without per-sample info.")
public File OUTPUT;
- @Option(shortName=StandardOptionDefinitions.SEQUENCE_DICTIONARY_SHORT_NAME,
- doc="Sequence dictionary to use when indexing the VCF if the VCF header does not contain contig information.", optional = true)
- public File SEQUENCE_DICTIONARY;
+ @Option(shortName="S", doc="Optionally one or more samples to retain when building the 'sites-only' VCF.", optional=true)
+ public Set<String> SAMPLE = new TreeSet<String>();
- private static final Set<String> NO_SAMPLES = Collections.emptySet();
-
// Stock main method
public static void main(final String[] args) {
new MakeSitesOnlyVcf().instanceMainWithExit(args);
@@ -59,15 +56,11 @@ public class MakeSitesOnlyVcf extends CommandLineProgram {
@Override
protected int doWork() {
IoUtil.assertFileIsReadable(INPUT);
- if (SEQUENCE_DICTIONARY != null) IoUtil.assertFileIsReadable(SEQUENCE_DICTIONARY);
IoUtil.assertFileIsWritable(OUTPUT);
final VCFFileReader reader = new VCFFileReader(INPUT, false);
- final VCFHeader header = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
- final SAMSequenceDictionary sequenceDictionary =
- SEQUENCE_DICTIONARY != null
- ? SAMFileReader.getSequenceDictionary(SEQUENCE_DICTIONARY)
- : header.getSequenceDictionary();
+ final VCFHeader inputVcfHeader = new VCFHeader(reader.getFileHeader().getMetaDataInInputOrder());
+ final SAMSequenceDictionary sequenceDictionary = inputVcfHeader.getSequenceDictionary();
if (CREATE_INDEX && sequenceDictionary == null) {
throw new PicardException("A sequence dictionary must be available (either through the input file or by setting it explicitly) when creating indexed output.");
@@ -77,14 +70,18 @@ public class MakeSitesOnlyVcf extends CommandLineProgram {
final EnumSet<Options> options = EnumSet.copyOf(VariantContextWriterFactory.DEFAULT_OPTIONS);
if (CREATE_INDEX) options.add(Options.INDEX_ON_THE_FLY); else options.remove(Options.INDEX_ON_THE_FLY);
+ // Setup the site-only file writer
final VariantContextWriter writer = VariantContextWriterFactory.create(OUTPUT, sequenceDictionary, options);
- writer.writeHeader(header);
- final CloseableIterator<VariantContext> iterator = reader.iterator();
+ final VCFHeader header = new VCFHeader(inputVcfHeader.getMetaDataInInputOrder(), SAMPLE);
+ writer.writeHeader(header);
+ // Go through the input, strip the records and write them to the output
+ final CloseableIterator<VariantContext> iterator = reader.iterator();
while (iterator.hasNext()) {
- final VariantContext context = iterator.next();
- writer.add(context.subContextFromSamples(NO_SAMPLES, false)); // Do not re-derive the alleles from the new, subsetted genotypes: our site-only VCF should retain these values.
- progress.record(context.getChr(), context.getStart());
+ final VariantContext full = iterator.next();
+ final VariantContext site = subsetToSamplesWithOriginalAnnotations(full, SAMPLE);
+ writer.add(site);
+ progress.record(site.getChr(), site.getStart());
}
CloserUtil.close(iterator);
@@ -93,4 +90,12 @@ public class MakeSitesOnlyVcf extends CommandLineProgram {
return 0;
}
+
+ /** Makes a new VariantContext with only the desired samples. */
+ private static VariantContext subsetToSamplesWithOriginalAnnotations(final VariantContext ctx, final Set<String> samples) {
+ final VariantContextBuilder builder = new VariantContextBuilder(ctx);
+ final GenotypesContext newGenotypes = ctx.getGenotypes().subsetToSamples(samples);
+ builder.alleles(ctx.getAlleles());
+ return builder.genotypes(newGenotypes).make();
+ }
}
diff --git a/src/java/net/sf/samtools/AbstractBAMFileIndex.java b/src/java/net/sf/samtools/AbstractBAMFileIndex.java
index 1f68704..a3a41e5 100644
--- a/src/java/net/sf/samtools/AbstractBAMFileIndex.java
+++ b/src/java/net/sf/samtools/AbstractBAMFileIndex.java
@@ -46,23 +46,6 @@ import java.util.*;
*/
public abstract class AbstractBAMFileIndex implements BAMIndex {
- /**
- * Reports the total amount of genomic data that any bin can index.
- */
- protected static final int BIN_GENOMIC_SPAN = 512*1024*1024;
-
- /**
- * What is the starting bin for each level?
- */
- private static final int[] LEVEL_STARTS = {0,1,9,73,585,4681};
-
- /**
- * Reports the maximum number of bins that can appear in a BAM file.
- */
- public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
-
- public static final int MAX_LINEAR_INDEX_SIZE = MAX_BINS+1-LEVEL_STARTS[LEVEL_STARTS.length-1];
-
private final IndexFileBuffer mIndexBuffer;
private SAMSequenceDictionary mBamDictionary = null;
@@ -104,7 +87,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
* @return Number of levels in this index.
*/
public static int getNumIndexLevels() {
- return LEVEL_STARTS.length;
+ return GenomicIndexUtil.LEVEL_STARTS.length;
}
/**
@@ -113,7 +96,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
* @return The first bin in this level.
*/
public static int getFirstBinInLevel(final int levelNumber) {
- return LEVEL_STARTS[levelNumber];
+ return GenomicIndexUtil.LEVEL_STARTS[levelNumber];
}
/**
@@ -123,9 +106,9 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
*/
public int getLevelSize(final int levelNumber) {
if(levelNumber == getNumIndexLevels())
- return MAX_BINS+1-LEVEL_STARTS[levelNumber];
+ return GenomicIndexUtil.MAX_BINS+1-GenomicIndexUtil.LEVEL_STARTS[levelNumber];
else
- return LEVEL_STARTS[levelNumber+1]-LEVEL_STARTS[levelNumber];
+ return GenomicIndexUtil.LEVEL_STARTS[levelNumber+1]-GenomicIndexUtil.LEVEL_STARTS[levelNumber];
}
/**
@@ -134,10 +117,10 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
* @return the level associated with the given bin number.
*/
public int getLevelForBin(final Bin bin) {
- if(bin.getBinNumber() >= MAX_BINS)
+ if(bin.getBinNumber() >= GenomicIndexUtil.MAX_BINS)
throw new SAMException("Tried to get level for invalid bin.");
for(int i = getNumIndexLevels()-1; i >= 0; i--) {
- if(bin.getBinNumber() >= LEVEL_STARTS[i])
+ if(bin.getBinNumber() >= GenomicIndexUtil.LEVEL_STARTS[i])
return i;
}
throw new SAMException("Unable to find correct bin for bin "+bin);
@@ -150,9 +133,9 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
*/
public int getFirstLocusInBin(final Bin bin) {
final int level = getLevelForBin(bin);
- final int levelStart = LEVEL_STARTS[level];
- final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
- return (bin.getBinNumber() - levelStart)*(BIN_GENOMIC_SPAN /levelSize)+1;
+ final int levelStart = GenomicIndexUtil.LEVEL_STARTS[level];
+ final int levelSize = ((level==getNumIndexLevels()-1) ? GenomicIndexUtil.MAX_BINS-1 : GenomicIndexUtil.LEVEL_STARTS[level+1]) - levelStart;
+ return (bin.getBinNumber() - levelStart)*(GenomicIndexUtil.BIN_GENOMIC_SPAN /levelSize)+1;
}
/**
@@ -162,9 +145,9 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
*/
public int getLastLocusInBin(final Bin bin) {
final int level = getLevelForBin(bin);
- final int levelStart = LEVEL_STARTS[level];
- final int levelSize = ((level==getNumIndexLevels()-1) ? MAX_BINS-1 : LEVEL_STARTS[level+1]) - levelStart;
- return (bin.getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
+ final int levelStart = GenomicIndexUtil.LEVEL_STARTS[level];
+ final int levelSize = ((level==getNumIndexLevels()-1) ? GenomicIndexUtil.MAX_BINS-1 : GenomicIndexUtil.LEVEL_STARTS[level+1]) - levelStart;
+ return (bin.getBinNumber()-levelStart+1)*(GenomicIndexUtil.BIN_GENOMIC_SPAN /levelSize);
}
public int getNumberOfReferences() {
@@ -230,7 +213,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
for (int binNumber = 0; binNumber < binCount; binNumber++) {
final int indexBin = readInteger();
final int nChunks = readInteger();
- if (indexBin == MAX_BINS) {
+ if (indexBin == GenomicIndexUtil.MAX_BINS) {
for (int ci = 0; ci < nChunks; ci++) {
final long chunkBegin = readLong();
final long chunkEnd = readLong();
@@ -273,7 +256,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
return null;
}
- final BitSet regionBins = regionToBins(startPos, endPos);
+ final BitSet regionBins = GenomicIndexUtil.regionToBins(startPos, endPos);
if (regionBins == null) {
return null;
}
@@ -296,7 +279,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
lastChunk = new Chunk(chunkBegin, chunkEnd);
chunks.add(lastChunk);
}
- } else if (indexBin == MAX_BINS) {
+ } else if (indexBin == GenomicIndexUtil.MAX_BINS) {
// meta data - build the bin so that the count of bins is correct;
// but don't attach meta chunks to the bin, or normal queries will be off
for (int ci = 0; ci < nChunks; ci++) {
@@ -345,7 +328,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
final int sequenceLength = mBamDictionary.getSequence(reference).getSequenceLength();
return getMaxBinNumberForSequenceLength(sequenceLength);
} catch (final Exception e) {
- return MAX_BINS;
+ return GenomicIndexUtil.MAX_BINS;
}
}
@@ -364,7 +347,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
* @return How many bins could possibly be used according to this indexing scheme to index a single contig.
*/
protected int getMaxAddressibleGenomicLocation() {
- return BIN_GENOMIC_SPAN;
+ return GenomicIndexUtil.BIN_GENOMIC_SPAN;
}
/**
@@ -381,7 +364,7 @@ public abstract class AbstractBAMFileIndex implements BAMIndex {
return null;
}
int k;
- final BitSet bitSet = new BitSet(MAX_BINS);
+ final BitSet bitSet = new BitSet(GenomicIndexUtil.MAX_BINS);
bitSet.set(0);
for (k = 1 + (start>>26); k <= 1 + (end>>26); ++k) bitSet.set(k);
for (k = 9 + (start>>23); k <= 9 + (end>>23); ++k) bitSet.set(k);
diff --git a/src/java/net/sf/samtools/BAMFileReader.java b/src/java/net/sf/samtools/BAMFileReader.java
index a3f6c18..e0bf6d7 100644
--- a/src/java/net/sf/samtools/BAMFileReader.java
+++ b/src/java/net/sf/samtools/BAMFileReader.java
@@ -944,12 +944,19 @@ class BAMFileReader extends SAMFileReader.ReaderImplementation {
private IntervalComparison compareIntervalToRecord(final SAMFileReader.QueryInterval interval, final SAMRecord record) {
// interval.end <= 0 implies the end of the reference sequence.
final int intervalEnd = (interval.end <= 0? Integer.MAX_VALUE: interval.end);
+ final int alignmentEnd;
+ if (record.getReadUnmappedFlag() && record.getAlignmentStart() != SAMRecord.NO_ALIGNMENT_START) {
+ // Unmapped read with coordinate of mate.
+ alignmentEnd = record.getAlignmentStart();
+ } else {
+ alignmentEnd = record.getAlignmentEnd();
+ }
if (interval.referenceIndex < record.getReferenceIndex()) return IntervalComparison.BEFORE;
else if (interval.referenceIndex > record.getReferenceIndex()) return IntervalComparison.AFTER;
else if (intervalEnd < record.getAlignmentStart()) return IntervalComparison.BEFORE;
- else if (record.getAlignmentEnd() < interval.start) return IntervalComparison.AFTER;
- else if (CoordMath.encloses(interval.start, intervalEnd, record.getAlignmentStart(), record.getAlignmentEnd())) {
+ else if (alignmentEnd < interval.start) return IntervalComparison.AFTER;
+ else if (CoordMath.encloses(interval.start, intervalEnd, record.getAlignmentStart(), alignmentEnd)) {
return IntervalComparison.CONTAINED;
} else return IntervalComparison.OVERLAPPING;
}
diff --git a/src/java/net/sf/samtools/BAMIndexContent.java b/src/java/net/sf/samtools/BAMIndexContent.java
index 50e44ff..8ad8aeb 100644
--- a/src/java/net/sf/samtools/BAMIndexContent.java
+++ b/src/java/net/sf/samtools/BAMIndexContent.java
@@ -23,76 +23,40 @@
*/
package net.sf.samtools;
-import java.util.*;
-
/**
* Represents the contents of a bam index file for one reference.
* A BAM index (.bai) file contains information for all references in the bam file.
* This class describes the data present in the index file for one of these references;
* including the bins, chunks, and linear index.
*/
-class BAMIndexContent {
- /**
- * The reference sequence for the data currently loaded.
- */
- private final int mReferenceSequence;
-
- /**
- * A list of all bins in the above reference sequence.
- */
- private final BinList mBinList;
-
+class BAMIndexContent extends BinningIndexContent {
/**
* Chunks containing metaData for the reference, e.g. number of aligned and unaligned records
*/
private final BAMIndexMetaData mMetaData;
- /**
- * The linear index for the reference sequence above.
- */
- private final LinearIndex mLinearIndex;
/**
* @param referenceSequence Content corresponds to this reference.
- * @param bins Array of bins represented by this content, possibly sparse
- * @param numberOfBins Number of non-null bins
+ * @param binList Array of bins represented by this content, possibly sparse
* @param metaData Extra information about the reference in this index
* @param linearIndex Additional index used to optimize queries
*/
- BAMIndexContent(final int referenceSequence, final Bin[] bins, final int numberOfBins, final BAMIndexMetaData metaData, final LinearIndex linearIndex) {
- this.mReferenceSequence = referenceSequence;
- this.mBinList = new BinList(bins, numberOfBins);
+ BAMIndexContent(final int referenceSequence, final BinList binList, final BAMIndexMetaData metaData, final LinearIndex linearIndex) {
+ super(referenceSequence, binList, linearIndex);
this.mMetaData = metaData;
- this.mLinearIndex = linearIndex;
- }
-
- /**
- * Reference for this Content
- */
- public int getReferenceSequence() {
- return mReferenceSequence;
- }
-
- /**
- * Does this content have anything in this bin?
- */
- public boolean containsBin(final Bin bin) {
- return mBinList.getBin(bin.getBinNumber()) != null;
}
/**
- * @return iterable list of bins represented by this content
- */
- public BinList getBins() {
- return mBinList;
- }
-
- /**
- * @return the number of non-null bins represented by this content
+ * @param referenceSequence Content corresponds to this reference.
+ * @param bins Array of bins represented by this content, possibly sparse
+ * @param numberOfBins Number of non-null bins
+ * @param metaData Extra information about the reference in this index
+ * @param linearIndex Additional index used to optimize queries
*/
- int getNumberOfNonNullBins() {
- return mBinList.getNumberOfNonNullBins();
+ BAMIndexContent(final int referenceSequence, final Bin[] bins, final int numberOfBins, final BAMIndexMetaData metaData, final LinearIndex linearIndex) {
+ this(referenceSequence, new BinList(bins, numberOfBins), metaData, linearIndex);
}
/**
@@ -102,102 +66,4 @@ class BAMIndexContent {
return mMetaData;
}
- /**
- * @return all chunks associated with all bins in this content
- */
- public List<Chunk> getAllChunks() {
- List<Chunk> allChunks = new ArrayList<Chunk>();
- for (Bin b : mBinList)
- if (b.getChunkList() != null) {
- allChunks.addAll(b.getChunkList());
- }
- return Collections.unmodifiableList(allChunks);
- }
-
- /**
- * @return the linear index represented by this content
- */
- public LinearIndex getLinearIndex() {
- return mLinearIndex;
- }
-
- /**
- * This class is used to encapsulate the list of Bins store in the BAMIndexContent
- * While it is currently represented as an array, we may decide to change it to an ArrayList or other structure
- */
- class BinList implements Iterable<Bin> {
-
- private final Bin[] mBinArray;
- public final int numberOfNonNullBins;
- public final int maxBinNumber; // invariant: maxBinNumber = mBinArray.length -1 since array is 0 based
-
- /**
- * @param binArray a sparse array representation of the bins. The index into the array is the bin number.
- * @param numberOfNonNullBins
- */
- BinList(Bin[] binArray, int numberOfNonNullBins) {
- this.mBinArray = binArray;
- this.numberOfNonNullBins = numberOfNonNullBins;
- this.maxBinNumber = mBinArray.length - 1;
- }
-
- Bin getBin(int binNumber) {
- if (binNumber > maxBinNumber) return null;
- return mBinArray[binNumber];
- }
-
- int getNumberOfNonNullBins() {
- return numberOfNonNullBins;
- }
-
- /**
- * Gets an iterator over all non-null bins.
- *
- * @return An iterator over all bins.
- */
- public Iterator<Bin> iterator() {
- return new BinIterator();
- }
-
- private class BinIterator implements Iterator<Bin> {
- /**
- * Stores the bin # of the Bin currently in use.
- */
- private int nextBin;
-
- public BinIterator() {
- nextBin = 0;
- }
-
- /**
- * Are there more bins in this set, waiting to be returned?
- *
- * @return True if more bins are remaining.
- */
- public boolean hasNext() {
- while (nextBin <= maxBinNumber) {
- if (getBin(nextBin) != null) return true;
- nextBin++;
- }
- return false;
- }
-
- /**
- * Gets the next bin in the provided BinList.
- *
- * @return the next available bin in the BinList.
- */
- public Bin next() {
- if (!hasNext())
- throw new NoSuchElementException("This BinIterator is currently empty");
- Bin result = getBin(nextBin);
- nextBin++;
- return result;
- }
-
- public void remove() {
- throw new UnsupportedOperationException("Unable to remove from a bin iterator");
- }
- }
- }
}
diff --git a/src/java/net/sf/samtools/BAMIndexer.java b/src/java/net/sf/samtools/BAMIndexer.java
index 5a041f6..b58295a 100644
--- a/src/java/net/sf/samtools/BAMIndexer.java
+++ b/src/java/net/sf/samtools/BAMIndexer.java
@@ -23,13 +23,8 @@
*/
package net.sf.samtools;
-import net.sf.samtools.util.BlockCompressedFilePointerUtil;
-
-import java.io.*;
-import java.util.Arrays;
-import java.util.List;
-
-import static net.sf.samtools.AbstractBAMFileIndex.MAX_BINS;
+import java.io.File;
+import java.io.OutputStream;
/**
* Class for both constructing BAM index content and writing it out.
@@ -56,10 +51,10 @@ public class BAMIndexer {
* @param output binary BAM Index (.bai) file
* @param fileHeader header for the corresponding bam file
*/
- public BAMIndexer(final File output, SAMFileHeader fileHeader) {
+ public BAMIndexer(final File output, final SAMFileHeader fileHeader) {
numReferences = fileHeader.getSequenceDictionary().size();
- indexBuilder = new BAMIndexBuilder(fileHeader);
+ indexBuilder = new BAMIndexBuilder(fileHeader.getSequenceDictionary());
outputWriter = new BinaryBAMIndexWriter(numReferences, output);
}
@@ -68,10 +63,10 @@ public class BAMIndexer {
* @param output Index will be written here. output will be closed when finish() method is called.
* @param fileHeader header for the corresponding bam file.
*/
- public BAMIndexer(final OutputStream output, SAMFileHeader fileHeader) {
+ public BAMIndexer(final OutputStream output, final SAMFileHeader fileHeader) {
numReferences = fileHeader.getSequenceDictionary().size();
- indexBuilder = new BAMIndexBuilder(fileHeader);
+ indexBuilder = new BAMIndexBuilder(fileHeader.getSequenceDictionary());
outputWriter = new BinaryBAMIndexWriter(numReferences, output);
}
@@ -90,7 +85,7 @@ public class BAMIndexer {
advanceToReference(reference);
}
indexBuilder.processAlignment(rec);
- } catch (Exception e) {
+ } catch (final Exception e) {
throw new SAMException("Exception creating BAM index for record " + rec, e);
}
}
@@ -107,12 +102,14 @@ public class BAMIndexer {
}
/** write out any references between the currentReference and the nextReference */
- private void advanceToReference(int nextReference) {
+ private void advanceToReference(final int nextReference) {
while (currentReference < nextReference) {
- BAMIndexContent content = indexBuilder.processReference(currentReference);
+ final BAMIndexContent content = indexBuilder.processReference(currentReference);
outputWriter.writeReference(content);
currentReference++;
- indexBuilder.startNewReference();
+ if (currentReference < numReferences) {
+ indexBuilder.startNewReference();
+ }
}
}
@@ -144,7 +141,7 @@ public class BAMIndexer {
outputWriter.writeNoCoordinateRecordCount(existingIndex.getNoCoordinateCount());
outputWriter.close();
- } catch (Exception e) {
+ } catch (final Exception e) {
throw new SAMException("Exception creating BAM index", e);
}
}
@@ -157,24 +154,18 @@ public class BAMIndexer {
*/
private class BAMIndexBuilder {
- private final SAMFileHeader bamHeader;
+ private final SAMSequenceDictionary sequenceDictionary;
- // the bins for the current reference
- private Bin[] bins; // made only as big as needed for each reference
- private int binsSeen = 0;
+ private BinningIndexBuilder binningIndexBuilder;
- // linear index for the current reference
- private final long[] index = new long[LinearIndex.MAX_LINEAR_INDEX_SIZE];
- private int largestIndexSeen = -1;
+ private int currentReference = -1;
// information in meta data
- private BAMIndexMetaData indexStats = new BAMIndexMetaData();
+ private final BAMIndexMetaData indexStats = new BAMIndexMetaData();
- /**
- * @param header SAMFileheader used for reference name (in index stats) and for max bin number
- */
- BAMIndexBuilder(SAMFileHeader header) {
- this.bamHeader = header;
+ BAMIndexBuilder(final SAMSequenceDictionary sequenceDictionary) {
+ this.sequenceDictionary = sequenceDictionary;
+ if (!sequenceDictionary.isEmpty()) startNewReference();
}
/**
@@ -187,8 +178,7 @@ public class BAMIndexer {
// metadata
indexStats.recordMetaData(rec);
- final int alignmentStart = rec.getAlignmentStart();
- if (alignmentStart == SAMRecord.NO_ALIGNMENT_START) {
+ if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
return; // do nothing for records without coordinates, but count them
}
@@ -199,121 +189,52 @@ public class BAMIndexer {
" when constructing index for " + currentReference + " for record " + rec);
}
- // process bins
-
- final Integer binNumber = rec.getIndexingBin();
- final int binNum = binNumber == null ? rec.computeIndexingBin() : binNumber;
-
- // has the bins array been allocated? If not, do so
- if (bins == null) {
- final SAMSequenceRecord seq = bamHeader.getSequence(reference);
- if (seq == null) {
- bins = new Bin[MAX_BINS + 1];
- } else {
- bins = new Bin[AbstractBAMFileIndex.getMaxBinNumberForSequenceLength(seq.getSequenceLength()) + 1];
+ binningIndexBuilder.processFeature(new BinningIndexBuilder.FeatureToBeIndexed() {
+ @Override
+ public int getStart() {
+ return rec.getAlignmentStart();
}
- }
-
- // is there a bin already represented for this index? if not, add one
- final Bin bin;
- if (bins[binNum] != null) {
- bin = bins[binNum];
- } else {
- bin = new Bin(reference, binNum);
- bins[binNum] = bin;
- binsSeen++;
- }
-
- // process chunks
- final SAMFileSource source = rec.getFileSource();
- if (source == null) {
- throw new SAMException("No source (virtual file offsets); needed for indexing on BAM Record " + rec);
- }
- final Chunk newChunk = ((BAMFileSpan) source.getFilePointer()).getSingleChunk();
- final long chunkStart = newChunk.getChunkStart();
- final long chunkEnd = newChunk.getChunkEnd();
-
- final List<Chunk> oldChunks = bin.getChunkList();
- if (!bin.containsChunks()) {
- bin.addInitialChunk(newChunk);
-
- } else {
- final Chunk lastChunk = bin.getLastChunk();
-
- // Coalesce chunks that are in the same or adjacent file blocks.
- // Similar to AbstractBAMFileIndex.optimizeChunkList,
- // but no need to copy the list, no minimumOffset, and maintain bin.lastChunk
- if (BlockCompressedFilePointerUtil.areInSameOrAdjacentBlocks(lastChunk.getChunkEnd(),chunkStart)) {
- lastChunk.setChunkEnd(chunkEnd); // coalesced
- } else {
- oldChunks.add(newChunk);
- bin.setLastChunk(newChunk);
+ @Override
+ public int getEnd() {
+ return rec.getAlignmentEnd();
}
- }
-
- // process linear index
- // the smallest file offset that appears in the 16k window for this bin
- final int alignmentEnd = rec.getAlignmentEnd();
- int startWindow = LinearIndex.convertToLinearIndexOffset(alignmentStart); // the 16k window
- final int endWindow;
+ @Override
+ public Integer getIndexingBin() {
+ final Integer binNumber = rec.getIndexingBin();
+ return (binNumber == null ? rec.computeIndexingBin() : binNumber);
- if (alignmentEnd == SAMRecord.NO_ALIGNMENT_START) { // assume alignment uses one position
- // Next line for C (samtools index) compatibility. Differs only when on a window boundary
- startWindow = LinearIndex.convertToLinearIndexOffset(alignmentStart - 1);
- endWindow = startWindow;
- } else {
- endWindow = LinearIndex.convertToLinearIndexOffset(alignmentEnd);
- }
-
- if (endWindow > largestIndexSeen) {
- largestIndexSeen = endWindow;
- }
+ }
- // set linear index at every 16K window that this alignment overlaps
- for (int win = startWindow; win <= endWindow; win++) {
- if (index[win] == 0 || chunkStart < index[win]) {
- index[win] = chunkStart;
+ @Override
+ public Chunk getChunk() {
+ final SAMFileSource source = rec.getFileSource();
+ if (source == null) {
+ throw new SAMException("No source (virtual file offsets); needed for indexing on BAM Record " + rec);
+ }
+ return ((BAMFileSpan) source.getFilePointer()).getSingleChunk();
}
- }
+ });
+
}
/**
* Creates the BAMIndexContent for this reference.
* Requires all alignments of the reference have already been processed.
+ * @return Null if there are no features for this reference.
*/
- public BAMIndexContent processReference(int reference) {
+ public BAMIndexContent processReference(final int reference) {
if (reference != currentReference) {
throw new SAMException("Unexpected reference " + reference + " when constructing index for " + currentReference);
}
- // process bins
- if (binsSeen == 0) return null; // no bins for this reference
-
- // process chunks
- // nothing needed
+ final BinningIndexContent indexContent = binningIndexBuilder.generateIndexContent();
+ if (indexContent == null) return null;
+ return new BAMIndexContent(indexContent.getReferenceSequence(), indexContent.getBins(),
+ indexStats, indexContent.getLinearIndex());
- // process linear index
- // linear index will only be as long as the largest index seen
- final long[] newIndex = new long[largestIndexSeen + 1]; // in java1.6 Arrays.copyOf(index, largestIndexSeen + 1);
-
- // C (samtools index) also fills in intermediate 0's with values. This seems unnecessary, but safe
- long lastNonZeroOffset = 0;
- for (int i = 0; i <= largestIndexSeen; i++) {
- if (index[i] == 0) {
- index[i] = lastNonZeroOffset; // not necessary, but C (samtools index) does this
- // note, if you remove the above line BAMIndexWriterTest.compareTextual and compareBinary will have to change
- } else {
- lastNonZeroOffset = index[i];
- }
- newIndex[i] = index[i];
- }
-
- final LinearIndex linearIndex = new LinearIndex(reference, 0, newIndex);
-
- return new BAMIndexContent(reference, bins, binsSeen, indexStats, linearIndex);
}
/**
@@ -327,13 +248,12 @@ public class BAMIndexer {
* reinitialize all data structures when the reference changes
*/
void startNewReference() {
- bins = null;
- if (binsSeen > 0) {
- Arrays.fill(index, 0);
- }
- binsSeen = 0;
- largestIndexSeen = -1;
+ ++currentReference;
+ // I'm not crazy about recycling this object, but that is the way it was originally written and
+ // it helps keep track of no-coordinate read count (which shouldn't be stored in this class anyway).
indexStats.newReference();
+ binningIndexBuilder = new BinningIndexBuilder(currentReference,
+ sequenceDictionary.getSequence(currentReference).getSequenceLength());
}
}
}
\ No newline at end of file
diff --git a/src/java/net/sf/samtools/Bin.java b/src/java/net/sf/samtools/Bin.java
index 9c56d2d..7f4f5c6 100644
--- a/src/java/net/sf/samtools/Bin.java
+++ b/src/java/net/sf/samtools/Bin.java
@@ -64,7 +64,7 @@ public class Bin implements Comparable<Bin> {
return referenceSequence;
}
- protected int getBinNumber() {
+ public int getBinNumber() {
return binNumber;
}
@@ -75,11 +75,11 @@ public class Bin implements Comparable<Bin> {
* @return True if the two bins are equal. False otherwise.
*/
@Override
- public boolean equals(Object other) {
+ public boolean equals(final Object other) {
if(other == null) return false;
if(!(other instanceof Bin)) return false;
- Bin otherBin = (Bin)other;
+ final Bin otherBin = (Bin)other;
return this.referenceSequence == otherBin.referenceSequence && this.binNumber == otherBin.binNumber;
}
@@ -105,7 +105,7 @@ public class Bin implements Comparable<Bin> {
* @param other Other bin to which this bin should be compared.
* @return -1 if this < other, 0 if this == other, 1 if this > other.
*/
- public int compareTo(Bin other) {
+ public int compareTo(final Bin other) {
if(other == null)
throw new ClassCastException("Cannot compare to a null object");
@@ -120,8 +120,8 @@ public class Bin implements Comparable<Bin> {
/**
* Adds the first chunk to the bin
*/
- public void addInitialChunk(Chunk newChunk){
- List<Chunk> oldChunks = new ArrayList<Chunk>();
+ public void addInitialChunk(final Chunk newChunk){
+ final List<Chunk> oldChunks = new ArrayList<Chunk>();
setChunkList(oldChunks);
setLastChunk(newChunk);
oldChunks.add(newChunk);
@@ -130,7 +130,7 @@ public class Bin implements Comparable<Bin> {
/**
* Sets the chunks associated with this bin
*/
- public void setChunkList(List<Chunk> list){
+ public void setChunkList(final List<Chunk> list){
chunkList = list;
}
@@ -147,7 +147,7 @@ public class Bin implements Comparable<Bin> {
/**
* Optimization to keep lastChunk instead of iterating over all chunks repeatedly
*/
- public void setLastChunk(Chunk c){
+ public void setLastChunk(final Chunk c){
lastChunk = c;
}
diff --git a/src/java/net/sf/samtools/BinaryBAMIndexWriter.java b/src/java/net/sf/samtools/BinaryBAMIndexWriter.java
index ec42148..c0a009e 100644
--- a/src/java/net/sf/samtools/BinaryBAMIndexWriter.java
+++ b/src/java/net/sf/samtools/BinaryBAMIndexWriter.java
@@ -51,7 +51,7 @@ class BinaryBAMIndexWriter implements BAMIndexWriter {
try {
codec = new BinaryCodec(output, true);
writeHeader();
- } catch (Exception e) {
+ } catch (final Exception e) {
throw new SAMException("Exception opening output file " + output, e);
}
}
@@ -68,7 +68,7 @@ class BinaryBAMIndexWriter implements BAMIndexWriter {
try {
codec = new BinaryCodec(output);
writeHeader();
- } catch (Exception e) {
+ } catch (final Exception e) {
throw new SAMException("Exception opening output stream", e);
}
}
@@ -102,12 +102,12 @@ class BinaryBAMIndexWriter implements BAMIndexWriter {
//final List<Chunk> chunks = content.getMetaData() == null ? null
// : content.getMetaData().getMetaDataChunks();
- BAMIndexMetaData metaData = content.getMetaData();
+ final BAMIndexMetaData metaData = content.getMetaData();
codec.writeInt(size + ((metaData != null)? 1 : 0 ));
// codec.writeInt(size);
- for (Bin bin : bins) { // note, bins will always be sorted
- if (bin.getBinNumber() == AbstractBAMFileIndex.MAX_BINS)
+ for (final Bin bin : bins) { // note, bins will always be sorted
+ if (bin.getBinNumber() == GenomicIndexUtil.MAX_BINS)
continue;
writeBin(bin);
}
@@ -135,7 +135,7 @@ class BinaryBAMIndexWriter implements BAMIndexWriter {
}
try {
codec.getOutputStream().flush();
- } catch (IOException e) {
+ } catch (final IOException e) {
throw new SAMException("IOException in BinaryBAMIndexWriter reference " + content.getReferenceSequence(), e);
}
}
@@ -156,9 +156,9 @@ class BinaryBAMIndexWriter implements BAMIndexWriter {
codec.close();
}
- private void writeBin(Bin bin) {
+ private void writeBin(final Bin bin) {
final int binNumber = bin.getBinNumber();
- if (binNumber >= AbstractBAMFileIndex.MAX_BINS){
+ if (binNumber >= GenomicIndexUtil.MAX_BINS){
throw new SAMException("Unexpected bin number when writing bam index " + binNumber);
}
@@ -181,8 +181,8 @@ class BinaryBAMIndexWriter implements BAMIndexWriter {
*
* @param metaData information describing numAligned records, numUnAligned, etc
*/
- private void writeChunkMetaData(BAMIndexMetaData metaData) {
- codec.writeInt(AbstractBAMFileIndex.MAX_BINS);
+ private void writeChunkMetaData(final BAMIndexMetaData metaData) {
+ codec.writeInt(GenomicIndexUtil.MAX_BINS);
final int nChunk = 2;
codec.writeInt(nChunk);
codec.writeLong(metaData.getFirstOffset());
diff --git a/src/java/net/sf/samtools/BinningIndexBuilder.java b/src/java/net/sf/samtools/BinningIndexBuilder.java
new file mode 100644
index 0000000..e3d5492
--- /dev/null
+++ b/src/java/net/sf/samtools/BinningIndexBuilder.java
@@ -0,0 +1,187 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools;
+
+import net.sf.samtools.util.BlockCompressedFilePointerUtil;
+
+import java.util.List;
+
+import static net.sf.samtools.GenomicIndexUtil.MAX_BINS;
+
+/**
+ * Builder for a BinningIndexContent object.
+ */
+public class BinningIndexBuilder {
+ private final int referenceSequence;
+ // the bins for the current reference
+ private final Bin[] bins; // made only as big as needed for each reference
+ private int binsSeen = 0;
+
+ // linear index for the current reference
+ private final long[] index = new long[LinearIndex.MAX_LINEAR_INDEX_SIZE];
+ private int largestIndexSeen = -1;
+
+
+ /**
+ *
+ * @param referenceSequence
+ * @param sequenceLength 0 implies unknown length. Known length will reduce memory use.
+ */
+ public BinningIndexBuilder(final int referenceSequence, final int sequenceLength) {
+ this.referenceSequence = referenceSequence;
+ final int numBins;
+ if (sequenceLength <= 0) numBins = MAX_BINS + 1;
+ else numBins = AbstractBAMFileIndex.getMaxBinNumberForSequenceLength(sequenceLength) + 1;
+ bins = new Bin[numBins];
+ }
+
+ public BinningIndexBuilder(final int referenceSequence) {
+ this(referenceSequence, 0);
+ }
+
+ /**
+ * coordinates are 1-based, inclusive
+ */
+ public interface FeatureToBeIndexed {
+ public int getStart();
+ public int getEnd();
+ public Integer getIndexingBin();
+ public Chunk getChunk();
+ }
+
+ public void processFeature(final FeatureToBeIndexed feature) {
+
+ // process bins
+
+ final Integer binNumber = feature.getIndexingBin();
+ final int binNum = binNumber == null ? computeIndexingBin(feature) : binNumber;
+
+
+ // is there a bin already represented for this index? if not, add one
+ final Bin bin;
+ if (bins[binNum] != null) {
+ bin = bins[binNum];
+ } else {
+ bin = new Bin(referenceSequence, binNum);
+ bins[binNum] = bin;
+ binsSeen++;
+ }
+
+ // process chunks
+
+ final Chunk newChunk = feature.getChunk();
+ final long chunkStart = newChunk.getChunkStart();
+ final long chunkEnd = newChunk.getChunkEnd();
+
+ final List<Chunk> oldChunks = bin.getChunkList();
+ if (!bin.containsChunks()) {
+ bin.addInitialChunk(newChunk);
+
+ } else {
+ final Chunk lastChunk = bin.getLastChunk();
+
+ // Coalesce chunks that are in the same or adjacent file blocks.
+ // Similar to AbstractBAMFileIndex.optimizeChunkList,
+ // but no need to copy the list, no minimumOffset, and maintain bin.lastChunk
+ if (BlockCompressedFilePointerUtil.areInSameOrAdjacentBlocks(lastChunk.getChunkEnd(), chunkStart)) {
+ lastChunk.setChunkEnd(chunkEnd); // coalesced
+ } else {
+ oldChunks.add(newChunk);
+ bin.setLastChunk(newChunk);
+ }
+ }
+
+ // process linear index
+
+ // the smallest file offset that appears in the 16k window for this bin
+ final int featureEnd = feature.getEnd();
+ int startWindow = LinearIndex.convertToLinearIndexOffset(feature.getStart()); // the 16k window
+ final int endWindow;
+
+ if (featureEnd == GenomicIndexUtil.UNSET_GENOMIC_LOCATION) { // assume feature uses one position
+ // Next line for C (samtools index) compatibility. Differs only when on a window boundary
+ startWindow = LinearIndex.convertToLinearIndexOffset(feature.getStart() - 1);
+ endWindow = startWindow;
+ } else {
+ endWindow = LinearIndex.convertToLinearIndexOffset(featureEnd);
+ }
+
+ if (endWindow > largestIndexSeen) {
+ largestIndexSeen = endWindow;
+ }
+
+ // set linear index at every 16K window that this feature overlaps
+ for (int win = startWindow; win <= endWindow; win++) {
+ if (index[win] == 0 || chunkStart < index[win]) {
+ index[win] = chunkStart;
+ }
+ }
+ }
+
+ /**
+ * Creates the BAMIndexContent for this reference.
+ * Requires all features of the reference have already been processed.
+ */
+ public BinningIndexContent generateIndexContent() {
+
+
+ // process bins
+ if (binsSeen == 0) return null; // no bins for this reference
+
+ // process chunks
+ // nothing needed
+
+ // process linear index
+ // linear index will only be as long as the largest index seen
+ final long[] newIndex = new long[largestIndexSeen + 1]; // in java1.6 Arrays.copyOf(index, largestIndexSeen + 1);
+
+ // C (samtools index) also fills in intermediate 0's with values. This seems unnecessary, but safe
+ long lastNonZeroOffset = 0;
+ for (int i = 0; i <= largestIndexSeen; i++) {
+ if (index[i] == 0) {
+ index[i] = lastNonZeroOffset; // not necessary, but C (samtools index) does this
+ // note, if you remove the above line BAMIndexWriterTest.compareTextual and compareBinary will have to change
+ } else {
+ lastNonZeroOffset = index[i];
+ }
+ newIndex[i] = index[i];
+ }
+
+ final LinearIndex linearIndex = new LinearIndex(referenceSequence, 0, newIndex);
+
+ return new BinningIndexContent(referenceSequence, new BinningIndexContent.BinList(bins, binsSeen), linearIndex);
+ }
+
+ private int computeIndexingBin(final FeatureToBeIndexed feature) {
+ // reg2bin has zero-based, half-open API
+ final int start = feature.getStart()-1;
+ int end = feature.getEnd();
+ if (end <= 0) {
+ // If feature end cannot be determined (e.g. because a read is not really aligned),
+ // then treat this as a one base feature for indexing purposes.
+ end = start + 1;
+ }
+ return GenomicIndexUtil.reg2bin(start, end);
+ }
+}
diff --git a/src/java/net/sf/samtools/BAMIndexContent.java b/src/java/net/sf/samtools/BinningIndexContent.java
similarity index 60%
copy from src/java/net/sf/samtools/BAMIndexContent.java
copy to src/java/net/sf/samtools/BinningIndexContent.java
index 50e44ff..2054e7b 100644
--- a/src/java/net/sf/samtools/BAMIndexContent.java
+++ b/src/java/net/sf/samtools/BinningIndexContent.java
@@ -1,203 +1,262 @@
-/*
- * The MIT License
- *
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-package net.sf.samtools;
-
-import java.util.*;
-
-/**
- * Represents the contents of a bam index file for one reference.
- * A BAM index (.bai) file contains information for all references in the bam file.
- * This class describes the data present in the index file for one of these references;
- * including the bins, chunks, and linear index.
- */
-class BAMIndexContent {
- /**
- * The reference sequence for the data currently loaded.
- */
- private final int mReferenceSequence;
-
- /**
- * A list of all bins in the above reference sequence.
- */
- private final BinList mBinList;
-
- /**
- * Chunks containing metaData for the reference, e.g. number of aligned and unaligned records
- */
- private final BAMIndexMetaData mMetaData;
-
- /**
- * The linear index for the reference sequence above.
- */
- private final LinearIndex mLinearIndex;
-
-
- /**
- * @param referenceSequence Content corresponds to this reference.
- * @param bins Array of bins represented by this content, possibly sparse
- * @param numberOfBins Number of non-null bins
- * @param metaData Extra information about the reference in this index
- * @param linearIndex Additional index used to optimize queries
- */
- BAMIndexContent(final int referenceSequence, final Bin[] bins, final int numberOfBins, final BAMIndexMetaData metaData, final LinearIndex linearIndex) {
- this.mReferenceSequence = referenceSequence;
- this.mBinList = new BinList(bins, numberOfBins);
- this.mMetaData = metaData;
- this.mLinearIndex = linearIndex;
- }
-
- /**
- * Reference for this Content
- */
- public int getReferenceSequence() {
- return mReferenceSequence;
- }
-
- /**
- * Does this content have anything in this bin?
- */
- public boolean containsBin(final Bin bin) {
- return mBinList.getBin(bin.getBinNumber()) != null;
- }
-
- /**
- * @return iterable list of bins represented by this content
- */
- public BinList getBins() {
- return mBinList;
- }
-
- /**
- * @return the number of non-null bins represented by this content
- */
- int getNumberOfNonNullBins() {
- return mBinList.getNumberOfNonNullBins();
- }
-
- /**
- * @return the meta data chunks for this content
- */
- public BAMIndexMetaData getMetaData() {
- return mMetaData;
- }
-
- /**
- * @return all chunks associated with all bins in this content
- */
- public List<Chunk> getAllChunks() {
- List<Chunk> allChunks = new ArrayList<Chunk>();
- for (Bin b : mBinList)
- if (b.getChunkList() != null) {
- allChunks.addAll(b.getChunkList());
- }
- return Collections.unmodifiableList(allChunks);
- }
-
- /**
- * @return the linear index represented by this content
- */
- public LinearIndex getLinearIndex() {
- return mLinearIndex;
- }
-
- /**
- * This class is used to encapsulate the list of Bins store in the BAMIndexContent
- * While it is currently represented as an array, we may decide to change it to an ArrayList or other structure
- */
- class BinList implements Iterable<Bin> {
-
- private final Bin[] mBinArray;
- public final int numberOfNonNullBins;
- public final int maxBinNumber; // invariant: maxBinNumber = mBinArray.length -1 since array is 0 based
-
- /**
- * @param binArray a sparse array representation of the bins. The index into the array is the bin number.
- * @param numberOfNonNullBins
- */
- BinList(Bin[] binArray, int numberOfNonNullBins) {
- this.mBinArray = binArray;
- this.numberOfNonNullBins = numberOfNonNullBins;
- this.maxBinNumber = mBinArray.length - 1;
- }
-
- Bin getBin(int binNumber) {
- if (binNumber > maxBinNumber) return null;
- return mBinArray[binNumber];
- }
-
- int getNumberOfNonNullBins() {
- return numberOfNonNullBins;
- }
-
- /**
- * Gets an iterator over all non-null bins.
- *
- * @return An iterator over all bins.
- */
- public Iterator<Bin> iterator() {
- return new BinIterator();
- }
-
- private class BinIterator implements Iterator<Bin> {
- /**
- * Stores the bin # of the Bin currently in use.
- */
- private int nextBin;
-
- public BinIterator() {
- nextBin = 0;
- }
-
- /**
- * Are there more bins in this set, waiting to be returned?
- *
- * @return True if more bins are remaining.
- */
- public boolean hasNext() {
- while (nextBin <= maxBinNumber) {
- if (getBin(nextBin) != null) return true;
- nextBin++;
- }
- return false;
- }
-
- /**
- * Gets the next bin in the provided BinList.
- *
- * @return the next available bin in the BinList.
- */
- public Bin next() {
- if (!hasNext())
- throw new NoSuchElementException("This BinIterator is currently empty");
- Bin result = getBin(nextBin);
- nextBin++;
- return result;
- }
-
- public void remove() {
- throw new UnsupportedOperationException("Unable to remove from a bin iterator");
- }
- }
- }
-}
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools;
+
+import java.util.*;
+
+/**
+ * In-memory representation of the binning index for a single reference. BAM and Tabix are both binning indices
+ * with slightly different disk formats but identical in-memory representations.
+ */
+public class BinningIndexContent {
+ /**
+ * The reference sequence for the data currently loaded.
+ */
+ private final int mReferenceSequence;
+
+ /**
+ * A list of all bins in the above reference sequence.
+ */
+ private final BinList mBinList;
+
+ /**
+ * The linear index for the reference sequence above.
+ */
+ private final LinearIndex mLinearIndex;
+
+
+ /**
+ * @param referenceSequence Content corresponds to this reference.
+ * @param binList Array of bins represented by this content, possibly sparse
+ * @param linearIndex Additional index used to optimize queries
+ */
+ public BinningIndexContent(final int referenceSequence, final BinList binList, final LinearIndex linearIndex) {
+ this.mReferenceSequence = referenceSequence;
+ this.mBinList = binList;
+ this.mLinearIndex = linearIndex;
+ }
+
+ /**
+ * Reference for this Content
+ */
+ public int getReferenceSequence() {
+ return mReferenceSequence;
+ }
+
+ /**
+ * Does this content have anything in this bin?
+ */
+ public boolean containsBin(final Bin bin) {
+ return mBinList.getBin(bin.getBinNumber()) != null;
+ }
+
+ /**
+ * @return iterable list of bins represented by this content
+ */
+ public BinList getBins() {
+ return mBinList;
+ }
+
+ /**
+ * @return the number of non-null bins represented by this content
+ */
+ int getNumberOfNonNullBins() {
+ return mBinList.getNumberOfNonNullBins();
+ }
+
+ /**
+ * @return all chunks associated with all bins in this content
+ */
+ public List<Chunk> getAllChunks() {
+ final List<Chunk> allChunks = new ArrayList<Chunk>();
+ for (final Bin b : mBinList)
+ if (b.getChunkList() != null) {
+ allChunks.addAll(b.getChunkList());
+ }
+ return Collections.unmodifiableList(allChunks);
+ }
+
+ /**
+ * @return the linear index represented by this content
+ */
+ public LinearIndex getLinearIndex() {
+ return mLinearIndex;
+ }
+
+
+ /**
+ *
+ * @param startPos 1-based, inclusive
+ * @param endPos 1-based, inclusive
+ * @return List of Chunks overlapping the given region. May return null if there are none.
+ */
+ public List<Chunk> getChunksOverlapping(final int startPos, final int endPos) {
+ final BitSet overlappingBins = GenomicIndexUtil.regionToBins(startPos,endPos);
+ if (overlappingBins == null) return null;
+
+ // System.out.println("# Sequence target TID: " + referenceIndex);
+ final List<Bin> bins = new ArrayList<Bin>();
+ for(final Bin bin: this.getBins()) {
+ if (overlappingBins.get(bin.getBinNumber()))
+ bins.add(bin);
+ }
+
+ if (bins.isEmpty()) {
+ return null;
+ }
+
+ final List<Chunk> chunkList = new ArrayList<Chunk>();
+ for(final Bin bin: bins) {
+ for(final Chunk chunk: bin.getChunkList())
+ chunkList.add(chunk.clone());
+ }
+
+ if (chunkList.isEmpty()) {
+ return null;
+ }
+
+ return Chunk.optimizeChunkList(chunkList,this.getLinearIndex().getMinimumOffset(startPos));
+ }
+ /**
+ * This class is used to encapsulate the list of Bins store in the BAMIndexContent
+ * While it is currently represented as an array, we may decide to change it to an ArrayList or other structure
+ */
+ public static class BinList implements Iterable<Bin> {
+
+ private final Bin[] mBinArray;
+ public final int numberOfNonNullBins;
+ public final int maxBinNumber; // invariant: maxBinNumber = mBinArray.length -1 since array is 0 based
+
+ /**
+ * @param binArray a sparse array representation of the bins. The index into the array is the bin number.
+ * @param numberOfNonNullBins
+ */
+ public BinList(final Bin[] binArray, final int numberOfNonNullBins) {
+ this.mBinArray = binArray;
+ this.numberOfNonNullBins = numberOfNonNullBins;
+ this.maxBinNumber = mBinArray.length - 1;
+ }
+
+ Bin getBin(final int binNumber) {
+ if (binNumber > maxBinNumber) return null;
+ return mBinArray[binNumber];
+ }
+
+ int getNumberOfNonNullBins() {
+ return numberOfNonNullBins;
+ }
+
+ /**
+ * @return An iterator over all non-empty bins.
+ */
+ public Iterator<Bin> iterator() {
+ return new BinIterator();
+ }
+
+ private class BinIterator implements Iterator<Bin> {
+ /**
+ * Stores the bin # of the Bin currently in use.
+ */
+ private int nextBin;
+
+ public BinIterator() {
+ nextBin = 0;
+ }
+
+ /**
+ * Are there more bins in this set, waiting to be returned?
+ *
+ * @return True if more bins are remaining.
+ */
+ public boolean hasNext() {
+ while (nextBin <= maxBinNumber) {
+ if (getBin(nextBin) != null) return true;
+ nextBin++;
+ }
+ return false;
+ }
+
+ /**
+ * Gets the next bin in the provided BinList.
+ *
+ * @return the next available bin in the BinList.
+ */
+ public Bin next() {
+ if (!hasNext())
+ throw new NoSuchElementException("This BinIterator is currently empty");
+ final Bin result = getBin(nextBin);
+ nextBin++;
+ return result;
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException("Unable to remove from a bin iterator");
+ }
+ }
+
+ @Override
+ public boolean equals(final Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ final BinList bins = (BinList) o;
+
+ if (maxBinNumber != bins.maxBinNumber) return false;
+ if (numberOfNonNullBins != bins.numberOfNonNullBins) return false;
+ if (!Arrays.equals(mBinArray, bins.mBinArray)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = Arrays.hashCode(mBinArray);
+ result = 31 * result + numberOfNonNullBins;
+ result = 31 * result + maxBinNumber;
+ return result;
+ }
+ }
+
+ @Override
+ public boolean equals(final Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ final BinningIndexContent that = (BinningIndexContent) o;
+
+ if (mReferenceSequence != that.mReferenceSequence) return false;
+ if (!mBinList.equals(that.mBinList)) return false;
+ if (!mLinearIndex.equals(that.mLinearIndex)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = mReferenceSequence;
+ result = 31 * result + mBinList.hashCode();
+ result = 31 * result + mLinearIndex.hashCode();
+ return result;
+ }
+}
diff --git a/src/java/net/sf/samtools/CachingBAMFileIndex.java b/src/java/net/sf/samtools/CachingBAMFileIndex.java
index 516ca77..65d777e 100644
--- a/src/java/net/sf/samtools/CachingBAMFileIndex.java
+++ b/src/java/net/sf/samtools/CachingBAMFileIndex.java
@@ -56,6 +56,7 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
* @param endPos 1-based end of the desired interval, inclusive
* @return the virtual file position. Each pair is the first and last virtual file position
* in a range that can be scanned to find SAMRecords that overlap the given positions.
+ * May return null if there is no content overlapping the region.
*/
public BAMFileSpan getSpanOverlapping(final int referenceIndex, final int startPos, final int endPos) {
final BAMIndexContent queryResults = getQueryResults(referenceIndex);
@@ -63,30 +64,9 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
if(queryResults == null)
return null;
- final BinList overlappingBins = getBinsOverlapping(referenceIndex,startPos,endPos);
+ final List<Chunk> chunkList = queryResults.getChunksOverlapping(startPos, endPos);
+ if (chunkList == null) return null;
- // System.out.println("# Sequence target TID: " + referenceIndex);
- final List<Bin> bins = new ArrayList<Bin>();
- for(final Bin bin: queryResults.getBins()) {
- if (overlappingBins.getBins().get(bin.getBinNumber()))
- bins.add(bin);
- }
-
- if (bins.isEmpty()) {
- return null;
- }
-
- List<Chunk> chunkList = new ArrayList<Chunk>();
- for(final Bin bin: bins) {
- for(final Chunk chunk: bin.getChunkList())
- chunkList.add(chunk.clone());
- }
-
- if (chunkList.isEmpty()) {
- return null;
- }
-
- chunkList = Chunk.optimizeChunkList(chunkList,queryResults.getLinearIndex().getMinimumOffset(startPos));
return new BAMFileSpan(chunkList);
}
@@ -98,7 +78,7 @@ class CachingBAMFileIndex extends AbstractBAMFileIndex implements BrowseableBAMI
* @return a list of bins that contain relevant data.
*/
public BinList getBinsOverlapping(final int referenceIndex, final int startPos, final int endPos) {
- final BitSet regionBins = regionToBins(startPos,endPos);
+ final BitSet regionBins = GenomicIndexUtil.regionToBins(startPos, endPos);
if (regionBins == null) {
return null;
}
diff --git a/src/java/net/sf/samtools/Chunk.java b/src/java/net/sf/samtools/Chunk.java
index 0555f56..7e51d2b 100644
--- a/src/java/net/sf/samtools/Chunk.java
+++ b/src/java/net/sf/samtools/Chunk.java
@@ -15,7 +15,7 @@ import java.util.*;
*
* See the SAM/BAM spec for more details.
*/
-class Chunk implements Cloneable, Serializable,Comparable<Chunk> {
+public class Chunk implements Cloneable, Serializable,Comparable<Chunk> {
private static final long serialVersionUID = 1L;
/**
@@ -40,7 +40,7 @@ class Chunk implements Cloneable, Serializable,Comparable<Chunk> {
return new Chunk(mChunkStart,mChunkEnd);
}
- protected long getChunkStart() {
+ public long getChunkStart() {
return mChunkStart;
}
@@ -48,7 +48,7 @@ class Chunk implements Cloneable, Serializable,Comparable<Chunk> {
mChunkStart = value;
}
- protected long getChunkEnd() {
+ public long getChunkEnd() {
return mChunkEnd;
}
diff --git a/src/java/net/sf/samtools/Defaults.java b/src/java/net/sf/samtools/Defaults.java
index 199e00a..ed0b7eb 100644
--- a/src/java/net/sf/samtools/Defaults.java
+++ b/src/java/net/sf/samtools/Defaults.java
@@ -22,6 +22,12 @@ public class Defaults {
/** Buffer size, in bytes, used whenever reading/writing files or streams. Default = 128k. */
public static final int BUFFER_SIZE;
+ /**
+ * Even if BUFFER_SIZE is 0, this is guaranteed to be non-zero. If BUFFER_SIZE is non-zero,
+ * this == BUFFER_SIZE
+ */
+ public static final int NON_ZERO_BUFFER_SIZE;
+
/** Should BlockCompressedOutputStream attempt to load libIntelDeflater? */
public static final boolean TRY_USE_INTEL_DEFLATER;
@@ -37,6 +43,11 @@ public class Defaults {
BUFFER_SIZE = getIntProperty("buffer_size", 1024 * 128);
TRY_USE_INTEL_DEFLATER = getBooleanProperty("try_use_intel_deflater", true);
INTEL_DEFLATER_SHARED_LIBRARY_PATH = getStringProperty("intel_deflater_so_path", null);
+ if (BUFFER_SIZE == 0) {
+ NON_ZERO_BUFFER_SIZE = 1024 * 128;
+ } else {
+ NON_ZERO_BUFFER_SIZE = BUFFER_SIZE;
+ }
}
/** Gets a string system property, prefixed with "samjdk." using the default if the property does not exist.*/
diff --git a/src/java/net/sf/samtools/GenomicIndexUtil.java b/src/java/net/sf/samtools/GenomicIndexUtil.java
new file mode 100644
index 0000000..f9785be
--- /dev/null
+++ b/src/java/net/sf/samtools/GenomicIndexUtil.java
@@ -0,0 +1,100 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools;
+
+import java.util.BitSet;
+
+/**
+ * Constants and methods used by BAM and Tribble indices
+ */
+public class GenomicIndexUtil {
+ /**
+ * Reports the total amount of genomic data that any bin can index.
+ */
+ public static final int BIN_GENOMIC_SPAN = 512*1024*1024;
+
+ /**
+ * What is the starting bin for each level?
+ */
+ public static final int[] LEVEL_STARTS = {0,1,9,73,585,4681};
+
+ /**
+ * Reports the maximum number of bins that can appear in a binning index.
+ */
+ public static final int MAX_BINS = 37450; // =(8^6-1)/7+1
+
+ public static final int MAX_LINEAR_INDEX_SIZE = MAX_BINS+1-LEVEL_STARTS[LEVEL_STARTS.length-1];
+
+
+ /**
+ * E.g. for a SAMRecord with no genomic coordinate.
+ */
+ public static final int UNSET_GENOMIC_LOCATION = 0;
+
+ /**
+ * calculate the bin given an alignment in [beg,end)
+ * Copied from SAM spec.
+ * @param beg 0-based start of read (inclusive)
+ * @param end 0-based end of read (exclusive)
+ */
+ static int reg2bin(final int beg, int end)
+ {
+ --end;
+
+ if (beg>>14 == end>>14) return ((1<<15)-1)/7 + (beg>>14);
+ if (beg>>17 == end>>17) return ((1<<12)-1)/7 + (beg>>17);
+ if (beg>>20 == end>>20) return ((1<<9)-1)/7 + (beg>>20);
+ if (beg>>23 == end>>23) return ((1<<6)-1)/7 + (beg>>23);
+ if (beg>>26 == end>>26) return ((1<<3)-1)/7 + (beg>>26);
+ return 0;
+ }
+
+ // TODO: It is disturbing that reg2bin is 0-based, but regionToBins is 1-based.
+ // TODO: It is also suspicious that regionToBins decrements endPos. Test it!
+ // TODO: However end is decremented in reg2bin so perhaps there is no conflict.
+ /**
+ * Get candidate bins for the specified region
+ * @param startPos 1-based start of target region, inclusive.
+ * @param endPos 1-based end of target region, inclusive.
+ * @return bit set for each bin that may contain SAMRecords in the target region.
+ */
+ public static BitSet regionToBins(final int startPos, final int endPos) {
+ final int maxPos = 0x1FFFFFFF;
+ final int start = (startPos <= 0) ? 0 : (startPos-1) & maxPos;
+ final int end = (endPos <= 0) ? maxPos : (endPos-1) & maxPos;
+ if (start > end) {
+ return null;
+ }
+ int k;
+ final BitSet bitSet = new BitSet(GenomicIndexUtil.MAX_BINS);
+ bitSet.set(0);
+ for (k = 1 + (start>>26); k <= 1 + (end>>26); ++k) bitSet.set(k);
+ for (k = 9 + (start>>23); k <= 9 + (end>>23); ++k) bitSet.set(k);
+ for (k = 73 + (start>>20); k <= 73 + (end>>20); ++k) bitSet.set(k);
+ for (k = 585 + (start>>17); k <= 585 + (end>>17); ++k) bitSet.set(k);
+ for (k = 4681 + (start>>14); k <= 4681 + (end>>14); ++k) bitSet.set(k);
+ return bitSet;
+ }
+
+}
diff --git a/src/java/net/sf/samtools/LinearIndex.java b/src/java/net/sf/samtools/LinearIndex.java
index 1344872..21be098 100644
--- a/src/java/net/sf/samtools/LinearIndex.java
+++ b/src/java/net/sf/samtools/LinearIndex.java
@@ -23,6 +23,8 @@
*/
package net.sf.samtools;
+import java.util.Arrays;
+
/**
* The linear index associated with a given reference in a BAM index.
*
@@ -31,7 +33,7 @@ package net.sf.samtools;
*/
public class LinearIndex {
- public static final int MAX_LINEAR_INDEX_SIZE = AbstractBAMFileIndex.MAX_LINEAR_INDEX_SIZE;
+ public static final int MAX_LINEAR_INDEX_SIZE = GenomicIndexUtil.MAX_LINEAR_INDEX_SIZE;
public static final int BAM_LIDX_SHIFT = 14;
@@ -92,11 +94,33 @@ public class LinearIndex {
* Direct access to the array. Be careful!
* @return The elements of the linear index.
*/
- protected long[] getIndexEntries() {
+ public long[] getIndexEntries() {
return mIndexEntries;
}
- protected int getIndexStart() {
+ public int getIndexStart() {
return mIndexStart;
}
+
+ @Override
+ public boolean equals(final Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ final LinearIndex that = (LinearIndex) o;
+
+ if (mIndexStart != that.mIndexStart) return false;
+ if (mReferenceSequence != that.mReferenceSequence) return false;
+ if (!Arrays.equals(mIndexEntries, that.mIndexEntries)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = mReferenceSequence;
+ result = 31 * result + mIndexStart;
+ result = 31 * result + Arrays.hashCode(mIndexEntries);
+ return result;
+ }
}
diff --git a/src/java/net/sf/samtools/SAMFileReader.java b/src/java/net/sf/samtools/SAMFileReader.java
index d3fd9db..5b6ed4e 100644
--- a/src/java/net/sf/samtools/SAMFileReader.java
+++ b/src/java/net/sf/samtools/SAMFileReader.java
@@ -154,7 +154,7 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
/**
* Read a SAM or BAM file. Indexed lookup not allowed because reading from InputStream.
*
- * @param stream input SAM or BAM.
+ * @param stream input SAM or BAM. This is buffered internally so caller need not buffer.
* @param eagerDecode if true, decode SAM record entirely when reading it.
*/
public SAMFileReader(final InputStream stream, final boolean eagerDecode) {
@@ -209,6 +209,10 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
init(strm, indexFile, eagerDecode, defaultValidationStringency);
}
+ /**
+ * @param strm BAM -- If the stream is not buffered, caller should wrap in SeekableBufferedStream for
+ * better performance.
+ */
public SAMFileReader(final SeekableStream strm, final SeekableStream indexStream, final boolean eagerDecode) {
init(strm, indexStream, eagerDecode, defaultValidationStringency);
}
@@ -629,7 +633,9 @@ public class SAMFileReader implements Iterable<SAMRecord>, Closeable {
try {
final BufferedInputStream bufferedStream;
- if (file != null) bufferedStream = new BufferedInputStream(new FileInputStream(file), Defaults.BUFFER_SIZE);
+ // Buffering is required because mark() and reset() are called on the input stream.
+ final int bufferSize = Math.max(Defaults.BUFFER_SIZE, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE);
+ if (file != null) bufferedStream = new BufferedInputStream(new FileInputStream(file), bufferSize);
else bufferedStream = IOUtil.toBufferedStream(stream);
if (isBAMFile(bufferedStream)) {
mIsBinary = true;
diff --git a/src/java/net/sf/samtools/SAMFileWriterFactory.java b/src/java/net/sf/samtools/SAMFileWriterFactory.java
index cfc495b..94fb183 100644
--- a/src/java/net/sf/samtools/SAMFileWriterFactory.java
+++ b/src/java/net/sf/samtools/SAMFileWriterFactory.java
@@ -161,8 +161,7 @@ public class SAMFileWriterFactory {
if (this.createMd5File && !createMd5File) {
System.err.println("Cannot create MD5 file for BAM because output file is not a regular file: " + outputFile.getAbsolutePath());
}
- OutputStream os = new FileOutputStream(outputFile, false);
- if (bufferSize > 0) os = new BufferedOutputStream(os, bufferSize);
+ OutputStream os = IOUtil.maybeBufferOutputStream(new FileOutputStream(outputFile, false), bufferSize);
if (createMd5File) os = new Md5CalculatingOutputStream(os, new File(outputFile.getAbsolutePath() + ".md5"));
final BAMFileWriter ret = new BAMFileWriter(os, outputFile, compressionLevel);
final boolean createIndex = this.createIndex && IOUtil.isRegularPath(outputFile);
@@ -223,7 +222,8 @@ public class SAMFileWriterFactory {
*
* @param header entire header. Sort order is determined by the sortOrder property of this arg.
* @param presorted if true, SAMRecords must be added to the SAMFileWriter in order that agrees with header.sortOrder.
- * @param stream the stream to write records to.
+ * @param stream the stream to write records to. Note that this method does not buffer the stream, so the
+ * caller must buffer if desired. Note that PrintStream is buffered.
*/
public SAMFileWriter makeSAMWriter(final SAMFileHeader header, final boolean presorted, final OutputStream stream) {
final SAMTextWriter ret = new SAMTextWriter(stream);
diff --git a/src/java/net/sf/samtools/SAMRecord.java b/src/java/net/sf/samtools/SAMRecord.java
index 6d0b378..4f7da39 100644
--- a/src/java/net/sf/samtools/SAMRecord.java
+++ b/src/java/net/sf/samtools/SAMRecord.java
@@ -113,7 +113,7 @@ public class SAMRecord implements Cloneable
/**
* If a read has reference name "*", it will have this value for position.
*/
- public static final int NO_ALIGNMENT_START = 0;
+ public static final int NO_ALIGNMENT_START = GenomicIndexUtil.UNSET_GENOMIC_LOCATION;
/**
* This should rarely be used, since a read with no sequence doesn't make much sense.
@@ -1376,7 +1376,7 @@ public class SAMRecord implements Cloneable
// then treat this as a one base alignment for indexing purposes.
alignmentEnd = alignmentStart + 1;
}
- return SAMUtils.reg2bin(alignmentStart, alignmentEnd);
+ return GenomicIndexUtil.reg2bin(alignmentStart, alignmentEnd);
}
public SAMFileHeader getHeader() {
@@ -1609,12 +1609,15 @@ public class SAMRecord implements Cloneable
* @return
*/
- private List<SAMValidationError> validateCigar(final Cigar cigar, final Integer referenceIndex, final List<AlignmentBlock> alignmentBlocks,
- final long recordNumber, final String cigarTypeName) {
+ private List<SAMValidationError> validateCigar(final Cigar cigar,
+ final Integer referenceIndex,
+ final List<AlignmentBlock> alignmentBlocks,
+ final long recordNumber,
+ final String cigarTypeName) {
// Don't know line number, and don't want to force read name to be decoded.
List<SAMValidationError> ret = cigar.isValid(getReadName(), recordNumber);
if (referenceIndex != NO_ALIGNMENT_REFERENCE_INDEX) {
- final SAMSequenceRecord sequence = getHeader().getSequence(getReferenceIndex());
+ final SAMSequenceRecord sequence = getHeader().getSequence(referenceIndex);
final int referenceSequenceLength = sequence.getSequenceLength();
for (final AlignmentBlock alignmentBlock : alignmentBlocks) {
if (alignmentBlock.getReferenceStart() + alignmentBlock.getLength() - 1 > referenceSequenceLength) {
diff --git a/src/java/net/sf/samtools/SAMRecordSetBuilder.java b/src/java/net/sf/samtools/SAMRecordSetBuilder.java
index c2d876d..a93b999 100644
--- a/src/java/net/sf/samtools/SAMRecordSetBuilder.java
+++ b/src/java/net/sf/samtools/SAMRecordSetBuilder.java
@@ -30,7 +30,6 @@ import net.sf.samtools.util.RuntimeIOException;
import java.io.File;
import java.io.IOException;
import java.util.*;
-
/**
* Factory class for creating SAMRecords for testing purposes. Various methods can be called
* to add new SAM records (or pairs of records) to a list which can then be returned at
@@ -42,9 +41,9 @@ import java.util.*;
*/
public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
private static final String[] chroms = {
- "chrM", "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10",
+ "chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10",
"chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20",
- "chr21", "chr22", "chrX", "chrY"
+ "chr21", "chr22", "chrX", "chrY", "chrM"
};
private static final byte[] BASES = {'A','C','G','T'};
private static final String READ_GROUP_ID = "1";
@@ -54,11 +53,12 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
private final SAMFileHeader header;
private final Collection<SAMRecord> records;
- private final int readLength = 36 ;
+ private int readLength = 36 ;
private SAMProgramRecord programRecord = null;
- private SAMReadGroupRecord readGroup = null;
+ private SAMReadGroupRecord readGroup = null;
+ private static final int DEFAULT_CHROMOSOME_LENGTH = 100000000;
/**
* Constructs a new SAMRecordSetBuilder with all the data needed to keep the records
@@ -80,7 +80,7 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
public SAMRecordSetBuilder(final boolean sortForMe, final SAMFileHeader.SortOrder sortOrder, final boolean addReadGroup) {
final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>();
for (final String chrom : chroms) {
- final SAMSequenceRecord sequenceRecord = new SAMSequenceRecord(chrom, 1000000);
+ final SAMSequenceRecord sequenceRecord = new SAMSequenceRecord(chrom, DEFAULT_CHROMOSOME_LENGTH);
sequences.add(sequenceRecord);
}
@@ -129,7 +129,7 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
}
}
- public void setReadGroup(final SAMReadGroupRecord readGroup) {
+ public void setReadGroup(final SAMReadGroupRecord readGroup) {
this.readGroup = readGroup;
if (readGroup != null) {
this.header.addReadGroup(readGroup);
@@ -151,46 +151,107 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
}
/**
- * Adds a skeletal fragment (non-PE) record to the set using the provided
- * contig start and strand information.
+ * Adds a fragment record (mapped or unmapped) to the set using the provided contig start and optionally the strand,
+ * cigar string, quality string or default quality score. This does not modify the flag field, which should be updated
+ * if desired before adding the return to the list of records.
*/
- public void addFrag(final String name, final int contig, final int start, final boolean negativeStrand) {
+ private SAMRecord createReadNoFlag(final String name, final int contig, final int start, final boolean negativeStrand,
+ final boolean recordUnmapped, final String cigar, final String qualityString,
+ final int defaultQuality) throws SAMException {
final SAMRecord rec = new SAMRecord(this.header);
rec.setReadName(name);
- rec.setReferenceIndex(contig);
- rec.setReferenceName(chroms[contig]);
- rec.setAlignmentStart(start);
- rec.setReadNegativeStrandFlag(negativeStrand);
- rec.setCigarString(readLength + "M");
- rec.setMappingQuality(255);
+ if (chroms.length <= contig) {
+ throw new SAMException("Contig too big [" + chroms.length + " < " + contig);
+ }
+ if (0 <= contig) {
+ rec.setReferenceIndex(contig);
+ rec.setReferenceName(chroms[contig]);
+ rec.setAlignmentStart(start);
+ }
+ if (!recordUnmapped) {
+ rec.setReadNegativeStrandFlag(negativeStrand);
+ if (null != cigar) {
+ rec.setCigarString(cigar);
+ readLength = rec.getCigar().getReadLength();
+ } else if (!rec.getReadUnmappedFlag()) {
+ rec.setCigarString(readLength + "M");
+ }
+ rec.setMappingQuality(255);
+ } else {
+ rec.setReadUnmappedFlag(true);
+ }
rec.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
+
if (programRecord != null) {
rec.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
}
+
if (readGroup != null) {
- rec.setAttribute(SAMTag.RG.name(), readGroup.getReadGroupId());
+ rec.setAttribute(SAMTag.RG.name(), readGroup.getReadGroupId());
}
- fillInBasesAndQualities(rec);
+ fillInBasesAndQualities(rec, qualityString, defaultQuality);
+
+ return rec;
+ }
+
+ /**
+ * Adds a skeletal fragment (non-PE) record to the set using the provided
+ * contig start and strand information.
+ */
+ public void addFrag(final String name, final int contig, final int start, final boolean negativeStrand) {
+ addFrag(name, contig, start, negativeStrand, false, null, null, -1);
+ }
+
+ /**
+ * Adds a fragment record (mapped or unmapped) to the set using the provided contig start and optionally the strand,
+ * cigar string, quality string or default quality score.
+ */
+ public SAMRecord addFrag(final String name, final int contig, final int start, final boolean negativeStrand,
+ final boolean recordUnmapped, final String cigar, final String qualityString,
+ final int defaultQuality) throws SAMException {
+ final SAMRecord rec = createReadNoFlag(name, contig, start, negativeStrand, recordUnmapped, cigar, qualityString, defaultQuality);
this.records.add(rec);
+ return rec;
}
- /** Adds an unmapped fragment read to the builder. */
- public void addUnmappedFragment(final String name) {
- final SAMRecord rec = new SAMRecord(this.header);
- rec.setReadName(name);
- rec.setReadUnmappedFlag(true);
- rec.setAttribute(SAMTag.RG.name(), READ_GROUP_ID);
- if (programRecord != null) {
- rec.setAttribute(SAMTag.PG.name(), programRecord.getProgramGroupId());
+ /**
+ * Fills in the bases and qualities for the given record. Quality data is randomly generated if the defaultQuality
+ * is set to -1. Otherwise all qualities will be set to defaultQuality. If a quality string is provided that string
+ * will be used instead of the defaultQuality.
+ */
+ private void fillInBasesAndQualities(final SAMRecord rec, final String qualityString, final int defaultQuality) {
+
+ if (null == qualityString) {
+ fillInBasesAndQualities(rec, defaultQuality);
+ } else {
+ fillInBases(rec);
+ rec.setBaseQualityString(qualityString);
}
- if (readGroup != null) {
- rec.setAttribute(SAMTag.RG.name(), readGroup.getReadGroupId());
+ }
+
+ /**
+ * Randomly fills in the bases for the given record.
+ */
+ private void fillInBases(final SAMRecord rec){
+ final int length = this.readLength;
+ final byte[] bases = new byte[length];
+
+ for (int i = 0; i < length; ++i) {
+ bases[i] = BASES[this.random.nextInt(BASES.length)];
}
- fillInBasesAndQualities(rec);
- this.records.add(rec);
+
+ rec.setReadBases(bases);
+ }
+
+ /**
+ * Adds an unmapped fragment read to the builder.
+ */
+ public void addUnmappedFragment(final String name) {
+ addFrag(name, -1, -1, false, true, null, null, -1);
}
+
/**
* Adds a skeletal pair of records to the set using the provided
* contig starts. The pair is assumed to be a well
@@ -253,7 +314,43 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
this.records.add(end2);
}
- /** Adds a pair with both ends unmapped to the builder. */
+ /**
+ * Adds a pair of records (mapped or unmmapped) to the set using the provided contig starts.
+ * The pair is assumed to be a well formed pair sitting on a single contig.
+ */
+ public List<SAMRecord> addPair(final String name, final int contig, final int start1, final int start2,
+ final boolean record1Unmapped, final boolean record2Unmapped, final String cigar1,
+ final String cigar2, final boolean strand1, final boolean strand2, final int defaultQuality) {
+ final List<SAMRecord> recordsList = new LinkedList<SAMRecord>();
+
+ final SAMRecord end1 = createReadNoFlag(name, contig, start1, strand1, record1Unmapped, cigar1, null, defaultQuality);
+ final SAMRecord end2 = createReadNoFlag(name, contig, start2, strand2, record2Unmapped, cigar2, null, defaultQuality);
+
+ end1.setReadPairedFlag(true);
+ end1.setFirstOfPairFlag(true);
+
+ if (!record1Unmapped && !record2Unmapped) {
+ end1.setProperPairFlag(true);
+ end2.setProperPairFlag(true);
+ }
+ end2.setReadPairedFlag(true);
+ end2.setSecondOfPairFlag(true);
+
+ // set mate info
+ SamPairUtil.setMateInfo(end1, end2, header);
+
+ recordsList.add(end1);
+ recordsList.add(end2);
+
+ records.add(end1);
+ records.add(end2);
+
+ return recordsList;
+ }
+
+ /**
+ * Adds a pair with both ends unmapped to the builder.
+ */
public void addUnmappedPair(final String name) {
final SAMRecord end1 = new SAMRecord(this.header);
final SAMRecord end2 = new SAMRecord(this.header);
@@ -294,22 +391,32 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
* Relies on the alignment start and end having been set to get read length.
*/
private void fillInBasesAndQualities(final SAMRecord rec) {
+ fillInBasesAndQualities(rec, -1);
+ }
+
+ /**
+ * Fills in bases and qualities with a set default quality. If the defaultQuality is set to -1 quality scores will
+ * be randomly generated.
+ * Relies on the alignment start and end having been set to get read length.
+ */
+ private void fillInBasesAndQualities(final SAMRecord rec, final int defaultQuality) {
final int length = this.readLength;
- final byte[] bases = new byte[length];
final byte[] quals = new byte[length];
- for (int i=0; i<length; ++i) {
- bases[i] = BASES[this.random.nextInt(BASES.length)];
- quals[i] = (byte) this.random.nextInt(50);
+ if (-1 != defaultQuality) {
+ Arrays.fill(quals, (byte) defaultQuality);
+ } else {
+ for (int i = 0; i < length; ++i) {
+ quals[i] = (byte) this.random.nextInt(50);
+ }
}
-
- rec.setReadBases(bases);
rec.setBaseQualities(quals);
+ fillInBases(rec);
}
/**
* Creates samFileReader from the data in instance of this class
- * @return SAMFileReader
+ * @return SAMFileReader
*/
public SAMFileReader getSamReader() {
@@ -341,4 +448,6 @@ public class SAMRecordSetBuilder implements Iterable<SAMRecord> {
public SAMFileHeader getHeader() {
return header;
}
+ public void setReadLength(final int readLength) { this.readLength = readLength; }
+
}
diff --git a/src/java/net/sf/samtools/SAMUtils.java b/src/java/net/sf/samtools/SAMUtils.java
index e3a83e0..2dfdd05 100644
--- a/src/java/net/sf/samtools/SAMUtils.java
+++ b/src/java/net/sf/samtools/SAMUtils.java
@@ -423,18 +423,11 @@ public final class SAMUtils
* Copied from SAM spec.
* @param beg 0-based start of read (inclusive)
* @param end 0-based end of read (exclusive)
+ * @deprecated Use GenomicIndexUtil.reg2bin
*/
- static int reg2bin(final int beg, int end)
+ static int reg2bin(final int beg, final int end)
{
-
- --end;
-
- if (beg>>14 == end>>14) return ((1<<15)-1)/7 + (beg>>14);
- if (beg>>17 == end>>17) return ((1<<12)-1)/7 + (beg>>17);
- if (beg>>20 == end>>20) return ((1<<9)-1)/7 + (beg>>20);
- if (beg>>23 == end>>23) return ((1<<6)-1)/7 + (beg>>23);
- if (beg>>26 == end>>26) return ((1<<3)-1)/7 + (beg>>26);
- return 0;
+ return GenomicIndexUtil.reg2bin(beg, end);
}
/**
@@ -464,7 +457,7 @@ public final class SAMUtils
}
public static void processValidationError(final SAMValidationError validationError,
- SAMFileReader.ValidationStringency validationStringency) {
+ final SAMFileReader.ValidationStringency validationStringency) {
if (validationStringency == SAMFileReader.ValidationStringency.STRICT) {
throw new SAMFormatException("SAM validation error: " + validationError);
}
@@ -541,17 +534,17 @@ public final class SAMUtils
* it can be used whether creating a SAMProgramRecord with a constructor or when
* calling SAMFileHeader.createProgramRecord().
*/
- public static void chainSAMProgramRecord(SAMFileHeader header, SAMProgramRecord program) {
+ public static void chainSAMProgramRecord(final SAMFileHeader header, final SAMProgramRecord program) {
- List<SAMProgramRecord> pgs = header.getProgramRecords();
+ final List<SAMProgramRecord> pgs = header.getProgramRecords();
if (pgs.size() > 0) {
- List<String> referencedIds = new ArrayList<String>();
- for (SAMProgramRecord pg : pgs) {
+ final List<String> referencedIds = new ArrayList<String>();
+ for (final SAMProgramRecord pg : pgs) {
if (pg.getPreviousProgramGroupId() != null) {
referencedIds.add(pg.getPreviousProgramGroupId());
}
}
- for (SAMProgramRecord pg : pgs) {
+ for (final SAMProgramRecord pg : pgs) {
// if record being chained has already been added, ignore it
if (pg.getProgramGroupId().equals(program.getProgramGroupId())) {
continue;
@@ -564,7 +557,7 @@ public final class SAMUtils
}
}
- public static void makeReadUnmapped(SAMRecord rec) {
+ public static void makeReadUnmapped(final SAMRecord rec) {
if (rec.getReadNegativeStrandFlag()) {
SAMRecordUtil.reverseComplement(rec);
rec.setReadNegativeStrandFlag(false);
@@ -595,6 +588,14 @@ public final class SAMUtils
}
/**
+ * Tests if the provided record is mapped entirely beyond the end of the reference (i.e., the alignment start is greater than the
+ * length of the sequence to which the record is mapped).
+ */
+ public static boolean recordMapsEntirelyBeyondEndOfReference(final SAMRecord record) {
+ return record.getHeader().getSequence(record.getReferenceIndex()).getSequenceLength() < record.getAlignmentStart();
+ }
+
+ /**
*
* @return negative if mapq1 < mapq2, etc.
* Note that MAPQ(0) < MAPQ(255) < MAPQ(1)
diff --git a/src/java/net/sf/picard/sam/SamPairUtil.java b/src/java/net/sf/samtools/SamPairUtil.java
similarity index 89%
rename from src/java/net/sf/picard/sam/SamPairUtil.java
rename to src/java/net/sf/samtools/SamPairUtil.java
index 35534d9..9ff366f 100644
--- a/src/java/net/sf/picard/sam/SamPairUtil.java
+++ b/src/java/net/sf/samtools/SamPairUtil.java
@@ -22,12 +22,7 @@
* THE SOFTWARE.
*/
-package net.sf.picard.sam;
-
-import net.sf.picard.PicardException;
-import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMTag;
+package net.sf.samtools;
import java.util.Iterator;
import java.util.List;
@@ -116,25 +111,25 @@ public class SamPairUtil {
// Validate paired reads arrive as first of pair, then second of pair
if (firstOfPair == null) {
- throw new PicardException(
+ throw new SAMException(
"First record does not exist - cannot perform mate assertion!");
} else if (secondOfPair == null) {
- throw new PicardException(
+ throw new SAMException(
firstOfPair.toString() + " is missing its mate");
} else if (!firstOfPair.getReadPairedFlag()) {
- throw new PicardException(
+ throw new SAMException(
"First record is not marked as paired: " + firstOfPair.toString());
} else if (!secondOfPair.getReadPairedFlag()) {
- throw new PicardException(
+ throw new SAMException(
"Second record is not marked as paired: " + secondOfPair.toString());
} else if (!firstOfPair.getFirstOfPairFlag()) {
- throw new PicardException(
+ throw new SAMException(
"First record is not marked as first of pair: " + firstOfPair.toString());
} else if (!secondOfPair.getSecondOfPairFlag()) {
- throw new PicardException(
+ throw new SAMException(
"Second record is not marked as second of pair: " + secondOfPair.toString());
} else if (!firstOfPair.getReadName().equals(secondOfPair.getReadName())) {
- throw new PicardException(
+ throw new SAMException(
"First [" + firstOfPair.getReadName() + "] and Second [" +
secondOfPair.getReadName() + "] readnames do not match!");
}
@@ -147,7 +142,7 @@ public class SamPairUtil {
* next element in the iteration
* @param firstOfPair the firstOfPair SAMRecord
* @return the secondOfPair SAMRecord
- * @throws PicardException when the secondOfPair mate cannot be obtained due to assertion failures
+ * @throws SAMException when the secondOfPair mate cannot be obtained due to assertion failures
*/
public static SAMRecord obtainAssertedMate(final Iterator<SAMRecord> samRecordIterator,
final SAMRecord firstOfPair) {
@@ -156,7 +151,7 @@ public class SamPairUtil {
assertMate(firstOfPair, secondOfPair);
return secondOfPair;
} else {
- throw new PicardException(
+ throw new SAMException(
"Second record does not exist: " + firstOfPair.getReadName());
}
}
@@ -194,14 +189,16 @@ public class SamPairUtil {
rec1.setMateNegativeStrandFlag(rec2.getReadNegativeStrandFlag());
rec1.setMateUnmappedFlag(false);
rec1.setAttribute(SAMTag.MQ.name(), rec2.getMappingQuality());
- rec1.setAttribute(SAMTag.MC.name(), rec2.getCigarString());
+ // TODO: Consider re-enabling when mate cigar issues are resolved
+ // rec1.setAttribute(SAMTag.MC.name(), rec2.getCigarString());
rec2.setMateReferenceIndex(rec1.getReferenceIndex());
rec2.setMateAlignmentStart(rec1.getAlignmentStart());
rec2.setMateNegativeStrandFlag(rec1.getReadNegativeStrandFlag());
rec2.setMateUnmappedFlag(false);
rec2.setAttribute(SAMTag.MQ.name(), rec1.getMappingQuality());
- rec2.setAttribute(SAMTag.MC.name(), rec1.getCigarString());
+ // TODO: Consider re-enabling when mate cigar issues are resolved
+ // rec2.setAttribute(SAMTag.MC.name(), rec1.getCigarString());
}
// Else if they're both unmapped set that straight
else if (rec1.getReadUnmappedFlag() && rec2.getReadUnmappedFlag()) {
@@ -212,7 +209,8 @@ public class SamPairUtil {
rec1.setMateNegativeStrandFlag(rec2.getReadNegativeStrandFlag());
rec1.setMateUnmappedFlag(true);
rec1.setAttribute(SAMTag.MQ.name(), null);
- rec1.setAttribute(SAMTag.MC.name(), null);
+ // TODO: Consider re-enabling when mate cigar issues are resolved
+ // rec1.setAttribute(SAMTag.MC.name(), null);
rec1.setInferredInsertSize(0);
rec2.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
@@ -222,7 +220,8 @@ public class SamPairUtil {
rec2.setMateNegativeStrandFlag(rec1.getReadNegativeStrandFlag());
rec2.setMateUnmappedFlag(true);
rec2.setAttribute(SAMTag.MQ.name(), null);
- rec2.setAttribute(SAMTag.MC.name(), null);
+ // TODO: Consider re-enabling when mate cigar issues are resolved
+ // rec2.setAttribute(SAMTag.MC.name(), null);
rec2.setInferredInsertSize(0);
}
// And if only one is mapped copy it's coordinate information to the mate
@@ -237,7 +236,8 @@ public class SamPairUtil {
mapped.setMateNegativeStrandFlag(unmapped.getReadNegativeStrandFlag());
mapped.setMateUnmappedFlag(true);
// For the mapped read, set it's mateCigar to null, since the other read must be unmapped
- mapped.setAttribute(SAMTag.MC.name(), null);
+ // TODO: Consider re-enabling when mate cigar issues are resolved
+ // mapped.setAttribute(SAMTag.MC.name(), null);
mapped.setInferredInsertSize(0);
unmapped.setMateReferenceIndex(mapped.getReferenceIndex());
@@ -245,7 +245,8 @@ public class SamPairUtil {
unmapped.setMateNegativeStrandFlag(mapped.getReadNegativeStrandFlag());
unmapped.setMateUnmappedFlag(false);
// For the unmapped read, set it's mateCigar to the mate's Cigar, since the mate must be mapped
- unmapped.setAttribute(SAMTag.MC.name(), mapped.getCigarString());
+ // TODO: Consider re-enabling when mate cigar issues are resolved
+ // unmapped.setAttribute(SAMTag.MC.name(), mapped.getCigarString());
unmapped.setInferredInsertSize(0);
}
diff --git a/src/java/net/sf/samtools/TextualBAMIndexWriter.java b/src/java/net/sf/samtools/TextualBAMIndexWriter.java
index 0f4560b..c84cff2 100644
--- a/src/java/net/sf/samtools/TextualBAMIndexWriter.java
+++ b/src/java/net/sf/samtools/TextualBAMIndexWriter.java
@@ -52,7 +52,7 @@ class TextualBAMIndexWriter implements BAMIndexWriter {
this.nRef = nRef;
try {
pw = new PrintWriter(output);
- } catch (FileNotFoundException e) {
+ } catch (final FileNotFoundException e) {
throw new SAMException("Can't find output file " + output, e);
}
writeHeader();
@@ -93,13 +93,13 @@ class TextualBAMIndexWriter implements BAMIndexWriter {
//final List<Chunk> chunks = content.getMetaData() == null ? null
// : content.getMetaData().getMetaDataChunks();
- BAMIndexMetaData metaData = content.getMetaData();
+ final BAMIndexMetaData metaData = content.getMetaData();
pw.println("Reference " + reference + " has n_bin= " + Integer.toString(size + (metaData != null? 1 : 0)));
// chunks
- for (Bin bin : bins) { // note, bins will always be sorted
- if (bin.getBinNumber() == AbstractBAMFileIndex.MAX_BINS) break;
+ for (final Bin bin : bins) { // note, bins will always be sorted
+ if (bin.getBinNumber() == GenomicIndexUtil.MAX_BINS) break;
if (bin.getChunkList() == null) {
pw.println(" Ref " + reference + " bin " + bin.getBinNumber() + " has no binArray"); // remove?
continue;
@@ -146,7 +146,7 @@ class TextualBAMIndexWriter implements BAMIndexWriter {
*
* @param metaData information describing numAligned records, numUnAligned, etc
*/
- private void writeChunkMetaData(int reference, BAMIndexMetaData metaData) {
+ private void writeChunkMetaData(final int reference, final BAMIndexMetaData metaData) {
final int nChunks = metaData == null ? 0 : 2;
pw.print(" Ref " + reference + " bin 37450 has n_chunk= " + nChunks);
if (nChunks == 0) {
@@ -162,7 +162,7 @@ class TextualBAMIndexWriter implements BAMIndexWriter {
}
- private void writeNullContent(int reference) {
+ private void writeNullContent(final int reference) {
pw.println("Reference " + reference + " has n_bin=0");
pw.println("Reference " + reference + " has n_intv=0");
}
diff --git a/src/java/net/sf/samtools/seekablestream/SeekableStreamFactory.java b/src/java/net/sf/samtools/seekablestream/SeekableStreamFactory.java
index e69ca72..2dc1b2b 100644
--- a/src/java/net/sf/samtools/seekablestream/SeekableStreamFactory.java
+++ b/src/java/net/sf/samtools/seekablestream/SeekableStreamFactory.java
@@ -86,7 +86,8 @@ public class SeekableStreamFactory{
}
public SeekableStream getBufferedStream(SeekableStream stream, int bufferSize){
- return new SeekableBufferedStream(stream, bufferSize);
+ if (bufferSize == 0) return stream;
+ else return new SeekableBufferedStream(stream, bufferSize);
}
}
diff --git a/src/java/net/sf/samtools/util/AsciiWriter.java b/src/java/net/sf/samtools/util/AsciiWriter.java
index 31c7119..886d4a0 100644
--- a/src/java/net/sf/samtools/util/AsciiWriter.java
+++ b/src/java/net/sf/samtools/util/AsciiWriter.java
@@ -23,6 +23,8 @@
*/
package net.sf.samtools.util;
+import net.sf.samtools.Defaults;
+
import java.io.IOException;
import java.io.OutputStream;
import java.io.Writer;
@@ -34,7 +36,7 @@ public class AsciiWriter extends Writer {
private final OutputStream os;
// Buffer size has not been tuned.
- private final byte[] buffer = new byte[IOUtil.STANDARD_BUFFER_SIZE];
+ private final byte[] buffer = new byte[Defaults.NON_ZERO_BUFFER_SIZE];
private int numBytes;
/**
diff --git a/src/java/net/sf/samtools/util/BlockCompressedInputStream.java b/src/java/net/sf/samtools/util/BlockCompressedInputStream.java
index 6067641..f5a3926 100755
--- a/src/java/net/sf/samtools/util/BlockCompressedInputStream.java
+++ b/src/java/net/sf/samtools/util/BlockCompressedInputStream.java
@@ -48,7 +48,7 @@ import net.sf.samtools.seekablestream.SeekableStream;
*
* c.f. http://samtools.sourceforge.net/SAM1.pdf for details of BGZF format
*/
-public class BlockCompressedInputStream extends InputStream {
+public class BlockCompressedInputStream extends InputStream implements LocationAware {
private InputStream mStream = null;
private SeekableStream mFile = null;
private byte[] mFileBuffer = null;
@@ -320,6 +320,11 @@ public class BlockCompressedInputStream extends InputStream {
return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress, mCurrentOffset);
}
+ @Override
+ public long getPosition() {
+ return getFilePointer();
+ }
+
public static long getFileBlock(final long bgzfOffset) {
return BlockCompressedFilePointerUtil.getBlockAddress(bgzfOffset);
}
@@ -389,7 +394,7 @@ public class BlockCompressedInputStream extends InputStream {
if (buffer == null || buffer.length != uncompressedLength) {
try {
buffer = new byte[uncompressedLength];
- } catch (NegativeArraySizeException e) {
+ } catch (final NegativeArraySizeException e) {
throw new RuntimeException("BGZF file has invalid uncompressedLength: " + uncompressedLength, e);
}
}
diff --git a/src/java/net/sf/samtools/util/BlockCompressedOutputStream.java b/src/java/net/sf/samtools/util/BlockCompressedOutputStream.java
index ac1834c..5c98742 100644
--- a/src/java/net/sf/samtools/util/BlockCompressedOutputStream.java
+++ b/src/java/net/sf/samtools/util/BlockCompressedOutputStream.java
@@ -48,6 +48,7 @@ import java.util.zip.Deflater;
*/
public class BlockCompressedOutputStream
extends OutputStream
+ implements LocationAware
{
private static int defaultCompressionLevel = BlockCompressedStreamConstants.DEFAULT_COMPRESSION_LEVEL;
@@ -132,7 +133,7 @@ public class BlockCompressedOutputStream
* Constructors that take output streams
* file may be null
*/
- public BlockCompressedOutputStream(final OutputStream os, File file) {
+ public BlockCompressedOutputStream(final OutputStream os, final File file) {
this(os, file, defaultCompressionLevel);
}
@@ -146,6 +147,21 @@ public class BlockCompressedOutputStream
}
/**
+ *
+ * @param location May be null. Used for error messages, and for checking file termination.
+ * @param output May or not already be a BlockCompressedOutputStream.
+ * @return A BlockCompressedOutputStream, either by wrapping the given OutputStream, or by casting if it already
+ * is a BCOS.
+ */
+ public static BlockCompressedOutputStream maybeBgzfWrapOutputStream(final File location, OutputStream output) {
+ if (!(output instanceof BlockCompressedOutputStream)) {
+ return new BlockCompressedOutputStream(output, location);
+ } else {
+ return (BlockCompressedOutputStream)output;
+ }
+ }
+
+ /**
* Writes b.length bytes from the specified byte array to this output stream. The general contract for write(b)
* is that it should have exactly the same effect as the call write(b, 0, b.length).
* @param bytes the data
@@ -237,6 +253,11 @@ public class BlockCompressedOutputStream
return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress, numUncompressedBytes);
}
+ @Override
+ public long getPosition() {
+ return getFilePointer();
+ }
+
/**
* Attempt to write the data in uncompressedBuffer to the underlying file in a gzip block.
* If the entire uncompressedBuffer does not fit in the maximum allowed size, reduce the amount
@@ -248,7 +269,7 @@ public class BlockCompressedOutputStream
if (numUncompressedBytes == 0) {
return 0;
}
- int bytesToCompress = numUncompressedBytes;
+ final int bytesToCompress = numUncompressedBytes;
// Compress the input
deflater.reset();
deflater.setInput(uncompressedBuffer, 0, bytesToCompress);
diff --git a/src/java/net/sf/samtools/util/BufferedLineReader.java b/src/java/net/sf/samtools/util/BufferedLineReader.java
index 07ab61f..90ea98b 100644
--- a/src/java/net/sf/samtools/util/BufferedLineReader.java
+++ b/src/java/net/sf/samtools/util/BufferedLineReader.java
@@ -23,6 +23,8 @@
*/
package net.sf.samtools.util;
+import net.sf.samtools.Defaults;
+
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
@@ -45,7 +47,7 @@ public class BufferedLineReader implements LineReader {
private String peekedLine;
public BufferedLineReader(final InputStream is) {
- this(is, IOUtil.STANDARD_BUFFER_SIZE);
+ this(is, Defaults.NON_ZERO_BUFFER_SIZE);
}
public BufferedLineReader(final InputStream is, final int bufferSize) {
diff --git a/src/java/net/sf/samtools/util/IOUtil.java b/src/java/net/sf/samtools/util/IOUtil.java
index 875ee48..6873083 100644
--- a/src/java/net/sf/samtools/util/IOUtil.java
+++ b/src/java/net/sf/samtools/util/IOUtil.java
@@ -25,20 +25,20 @@ package net.sf.samtools.util;
import net.sf.samtools.Defaults;
+import net.sf.samtools.seekablestream.SeekableBufferedStream;
+import net.sf.samtools.seekablestream.SeekableFileStream;
+import net.sf.samtools.seekablestream.SeekableStream;
-import java.io.BufferedInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.File;
+import java.io.*;
/**
* Miscellaneous stateless static IO-oriented methods.
*/
public class IOUtil {
/**
- * @deprecated Use Defaults.BUFFER_SIZE instead.
+ * @deprecated Use Defaults.NON_ZERO_BUFFER_SIZE instead.
*/
- @Deprecated public static final int STANDARD_BUFFER_SIZE = Defaults.BUFFER_SIZE;
+ @Deprecated public static final int STANDARD_BUFFER_SIZE = Defaults.NON_ZERO_BUFFER_SIZE;
public static final long ONE_GB = 1024 * 1024 * 1024;
public static final long TWO_GBS = 2 * ONE_GB;
@@ -53,11 +53,76 @@ public class IOUtil {
if (stream instanceof BufferedInputStream) {
return (BufferedInputStream) stream;
} else {
- return new BufferedInputStream(stream, STANDARD_BUFFER_SIZE);
+ return new BufferedInputStream(stream, Defaults.NON_ZERO_BUFFER_SIZE);
}
}
/**
+ * @return If Defaults.BUFFER_SIZE > 0, wrap os in BufferedOutputStream, else return os itself.
+ */
+ public static OutputStream maybeBufferOutputStream(final OutputStream os) {
+ return maybeBufferOutputStream(os, Defaults.BUFFER_SIZE);
+ }
+
+ /**
+ * @return If bufferSize > 0, wrap os in BufferedOutputStream, else return os itself.
+ */
+ public static OutputStream maybeBufferOutputStream(final OutputStream os, final int bufferSize) {
+ if (bufferSize > 0) return new BufferedOutputStream(os, bufferSize);
+ else return os;
+ }
+
+ public static SeekableStream maybeBufferedSeekableStream(final SeekableStream stream, final int bufferSize) {
+ return bufferSize > 0 ? new SeekableBufferedStream(stream, bufferSize) : stream;
+ }
+
+ public static SeekableStream maybeBufferedSeekableStream(final SeekableStream stream) {
+ return maybeBufferedSeekableStream(stream, Defaults.BUFFER_SIZE);
+ }
+
+ public static SeekableStream maybeBufferedSeekableStream(final File file) {
+ try {
+ return maybeBufferedSeekableStream(new SeekableFileStream(file));
+ } catch (final FileNotFoundException e) {
+ throw new RuntimeIOException(e);
+ }
+ }
+
+ /**
+ * @return If Defaults.BUFFER_SIZE > 0, wrap is in BufferedInputStream, else return is itself.
+ */
+ public static InputStream maybeBufferInputStream(final InputStream is) {
+ return maybeBufferInputStream(is, Defaults.BUFFER_SIZE);
+ }
+
+ /**
+ * @return If bufferSize > 0, wrap is in BufferedInputStream, else return is itself.
+ */
+ public static InputStream maybeBufferInputStream(final InputStream is, final int bufferSize) {
+ if (bufferSize > 0) return new BufferedInputStream(is, bufferSize);
+ else return is;
+ }
+
+ public static Reader maybeBufferReader(Reader reader, final int bufferSize) {
+ if (bufferSize > 0) reader = new BufferedReader(reader, bufferSize);
+ return reader;
+ }
+
+ public static Reader maybeBufferReader(final Reader reader) {
+ return maybeBufferReader(reader, Defaults.BUFFER_SIZE);
+ }
+
+ public static Writer maybeBufferWriter(Writer writer, final int bufferSize) {
+ if (bufferSize > 0) writer = new BufferedWriter(writer, bufferSize);
+ return writer;
+ }
+
+ public static Writer maybeBufferWriter(final Writer writer) {
+ return maybeBufferWriter(writer, Defaults.BUFFER_SIZE);
+ }
+
+
+ /**
* Delete a list of files, and write a warning message if one could not be deleted.
* @param files Files to be deleted.
*/
@@ -118,7 +183,7 @@ public class IOUtil {
final String user = System.getProperty("user.name");
final String tmp = System.getProperty("java.io.tmpdir");
- if (tmp.endsWith("/" + user)) return new File(tmp);
+ if (tmp.endsWith(File.separatorChar + user)) return new File(tmp);
else return new File(tmp, user);
}
}
diff --git a/src/java/org/broad/tribble/readers/LocationAware.java b/src/java/net/sf/samtools/util/LocationAware.java
similarity index 79%
rename from src/java/org/broad/tribble/readers/LocationAware.java
rename to src/java/net/sf/samtools/util/LocationAware.java
index 832b14f..7ad3f5c 100644
--- a/src/java/org/broad/tribble/readers/LocationAware.java
+++ b/src/java/net/sf/samtools/util/LocationAware.java
@@ -1,4 +1,4 @@
-package org.broad.tribble.readers;
+package net.sf.samtools.util;
/**
* Describes API for getting current position in a stream, writer, or underlying file.
@@ -9,7 +9,7 @@ package org.broad.tribble.readers;
*
* In the context of an iterator or any producer-like object that doesn't map directly to a byte stream, {@link #getPosition()} should
* return the position (in the underlying stream being read/written to) of the most-recently read/written element. For example, if you
- * are reading lines from a file with a {@link AsciiLineReaderIterator}, calling {@link #getPosition()} should return the byte position
+ * are reading lines from a file with a {@link org.broad.tribble.readers.AsciiLineReaderIterator}, calling {@link #getPosition()} should return the byte position
* of the start of the most recent line returned by {@link org.broad.tribble.readers.AsciiLineReaderIterator#next()}.
*
* @author mccowan
@@ -19,6 +19,8 @@ public interface LocationAware {
* The current offset, in bytes, of this stream/writer/file. Or, if this is an iterator/producer, the offset (in bytes) of the
* END of the most recently returned record (since a produced record corresponds to something that has been read already). See class
* javadoc for more.
+ *
+ * Note that for BGZF files, this does not represent an actually file position, but a virtual file pointer.
*/
public long getPosition();
}
diff --git a/src/java/net/sf/samtools/util/SortingCollection.java b/src/java/net/sf/samtools/util/SortingCollection.java
index 3fe2ec4..4162b04 100644
--- a/src/java/net/sf/samtools/util/SortingCollection.java
+++ b/src/java/net/sf/samtools/util/SortingCollection.java
@@ -23,6 +23,8 @@
*/
package net.sf.samtools.util;
+import net.sf.samtools.Defaults;
+
import java.io.*;
import java.lang.reflect.Array;
import java.util.*;
@@ -204,7 +206,7 @@ public class SortingCollection<T> implements Iterable<T> {
final File f = newTempFile();
OutputStream os = null;
try {
- os = tempStreamFactory.wrapTempOutputStream(new FileOutputStream(f), IOUtil.STANDARD_BUFFER_SIZE);
+ os = tempStreamFactory.wrapTempOutputStream(new FileOutputStream(f), Defaults.BUFFER_SIZE);
this.codec.setOutputStream(os);
for (int i = 0; i < this.numRecordsInRam; ++i) {
this.codec.encode(ramRecords[i]);
@@ -440,7 +442,7 @@ public class SortingCollection<T> implements Iterable<T> {
try {
this.is = new FileInputStream(file);
this.codec = SortingCollection.this.codec.clone();
- this.codec.setInputStream(tempStreamFactory.wrapTempInputStream(this.is, IOUtil.STANDARD_BUFFER_SIZE));
+ this.codec.setInputStream(tempStreamFactory.wrapTempInputStream(this.is, Defaults.BUFFER_SIZE));
advance();
}
catch (FileNotFoundException e) {
diff --git a/src/java/net/sf/samtools/util/SortingLongCollection.java b/src/java/net/sf/samtools/util/SortingLongCollection.java
index db5ff77..932f969 100644
--- a/src/java/net/sf/samtools/util/SortingLongCollection.java
+++ b/src/java/net/sf/samtools/util/SortingLongCollection.java
@@ -156,7 +156,7 @@ public class SortingLongCollection {
DataOutputStream os = null;
try {
final long numBytes = this.numValuesInRam * SIZEOF;
- os = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(f), Defaults.BUFFER_SIZE));
+ os = new DataOutputStream(IOUtil.maybeBufferOutputStream(new FileOutputStream(f)));
f.deleteOnExit();
for (int i = 0; i < this.numValuesInRam; ++i) {
os.writeLong(ramValues[i]);
@@ -243,7 +243,7 @@ public class SortingLongCollection {
FileValueIterator(final File file) {
this.file = file;
try {
- is = new DataInputStream(new BufferedInputStream(new FileInputStream(file),Defaults.BUFFER_SIZE));
+ is = new DataInputStream(IOUtil.maybeBufferInputStream(new FileInputStream(file)));
next();
}
catch (FileNotFoundException e) {
diff --git a/src/java/net/sf/samtools/util/TempStreamFactory.java b/src/java/net/sf/samtools/util/TempStreamFactory.java
index 7f7158e..985ba11 100644
--- a/src/java/net/sf/samtools/util/TempStreamFactory.java
+++ b/src/java/net/sf/samtools/util/TempStreamFactory.java
@@ -51,16 +51,15 @@ public class TempStreamFactory {
* Otherwise inputStream is returned.
*/
public InputStream wrapTempInputStream(final InputStream inputStream, final int bufferSize) {
+ InputStream is = IOUtil.maybeBufferInputStream(inputStream, bufferSize);
if (getSnappyLoader().SnappyAvailable) {
try {
- return getSnappyLoader().wrapInputStream(inputStream);
+ return getSnappyLoader().wrapInputStream(is);
} catch (Exception e) {
throw new SAMException("Error creating SnappyInputStream", e);
}
- } else if (bufferSize > 0) {
- return new BufferedInputStream(inputStream, bufferSize);
} else {
- return inputStream;
+ return is;
}
}
@@ -71,16 +70,15 @@ public class TempStreamFactory {
* Otherwise outputStream is returned.
*/
public OutputStream wrapTempOutputStream(final OutputStream outputStream, final int bufferSize) {
+ OutputStream os = outputStream;
+ if (bufferSize > 0) os = new BufferedOutputStream(os, bufferSize);
if (getSnappyLoader().SnappyAvailable) {
try {
- return getSnappyLoader().wrapOutputStream(outputStream);
+ os = getSnappyLoader().wrapOutputStream(os);
} catch (Exception e) {
throw new SAMException("Error creating SnappyOutputStream", e);
}
- } else if (bufferSize > 0) {
- return new BufferedOutputStream(outputStream, bufferSize);
- } else {
- return outputStream;
}
+ return os;
}
}
diff --git a/src/java/org/broad/tribble/AbstractFeatureReader.java b/src/java/org/broad/tribble/AbstractFeatureReader.java
index 4853375..1192f90 100644
--- a/src/java/org/broad/tribble/AbstractFeatureReader.java
+++ b/src/java/org/broad/tribble/AbstractFeatureReader.java
@@ -20,6 +20,7 @@ package org.broad.tribble;
import org.broad.tribble.index.Index;
import org.broad.tribble.util.ParsingUtils;
+import org.broad.tribble.util.TabixUtils;
import java.io.IOException;
import java.util.Iterator;
@@ -145,7 +146,7 @@ public abstract class AbstractFeatureReader<T extends Feature, SOURCE> implement
public boolean isTabix(String resourcePath, String indexPath) throws IOException{
if(indexPath == null){
- indexPath = ParsingUtils.appendToPath(resourcePath, ".tbi");
+ indexPath = ParsingUtils.appendToPath(resourcePath, TabixUtils.STANDARD_INDEX_EXTENSION);
}
return resourcePath.endsWith(".gz") && ParsingUtils.resourceExists(indexPath);
}
diff --git a/src/java/org/broad/tribble/AsciiFeatureCodec.java b/src/java/org/broad/tribble/AsciiFeatureCodec.java
index 8554997..225938f 100644
--- a/src/java/org/broad/tribble/AsciiFeatureCodec.java
+++ b/src/java/org/broad/tribble/AsciiFeatureCodec.java
@@ -19,6 +19,7 @@
package org.broad.tribble;
import net.sf.samtools.util.CloserUtil;
+import net.sf.samtools.util.LocationAware;
import org.broad.tribble.readers.*;
import java.io.IOException;
diff --git a/src/java/org/broad/tribble/BinaryFeatureCodec.java b/src/java/org/broad/tribble/BinaryFeatureCodec.java
index e28ccd9..9a153d5 100644
--- a/src/java/org/broad/tribble/BinaryFeatureCodec.java
+++ b/src/java/org/broad/tribble/BinaryFeatureCodec.java
@@ -1,7 +1,7 @@
package org.broad.tribble;
import net.sf.samtools.util.CloserUtil;
-import org.broad.tribble.readers.LocationAware;
+import net.sf.samtools.util.LocationAware;
import org.broad.tribble.readers.PositionalBufferedStream;
import java.io.IOException;
@@ -35,7 +35,7 @@ abstract public class BinaryFeatureCodec<T extends Feature> implements FeatureCo
public boolean isDone(final PositionalBufferedStream source) {
try {
return source.isDone();
- } catch (IOException e) {
+ } catch (final IOException e) {
throw new RuntimeException("Failure reading from stream.", e);
}
}
diff --git a/src/java/org/broad/tribble/FeatureCodec.java b/src/java/org/broad/tribble/FeatureCodec.java
index 8b277b0..ca49fd3 100644
--- a/src/java/org/broad/tribble/FeatureCodec.java
+++ b/src/java/org/broad/tribble/FeatureCodec.java
@@ -18,7 +18,7 @@
package org.broad.tribble;
-import org.broad.tribble.readers.LocationAware;
+import net.sf.samtools.util.LocationAware;
import java.io.IOException;
import java.io.InputStream;
diff --git a/src/java/org/broad/tribble/TribbleIndexedFeatureReader.java b/src/java/org/broad/tribble/TribbleIndexedFeatureReader.java
index fd77a16..8a7b418 100644
--- a/src/java/org/broad/tribble/TribbleIndexedFeatureReader.java
+++ b/src/java/org/broad/tribble/TribbleIndexedFeatureReader.java
@@ -63,6 +63,12 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
private SeekableStream seekableStream = null;
/**
+ * We lazy-load the index but it might not even exist
+ * Don't want to keep checking if that's the case
+ */
+ private boolean needCheckForIndex = true;
+
+ /**
* @param featurePath - path to the feature file, can be a local file path, http url, or ftp url
* @param codec - codec to decode the features
* @param requireIndex - true if the reader will be queries for specific ranges. An index (idx) file must exist
@@ -73,17 +79,9 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
super(featurePath, codec);
if (requireIndex) {
- String indexFile = Tribble.indexFile(featurePath);
- if (ParsingUtils.resourceExists(indexFile)) {
- index = IndexFactory.loadIndex(indexFile);
- } else {
- // See if the index itself is gzipped
- indexFile = ParsingUtils.appendToPath(indexFile, ".gz");
- if (ParsingUtils.resourceExists(indexFile)) {
- index = IndexFactory.loadIndex(indexFile);
- } else {
- throw new TribbleException("An index is required, but none found.");
- }
+ this.loadIndex();
+ if(!this.hasIndex()){
+ throw new TribbleException("An index is required, but none found.");
}
}
@@ -94,6 +92,38 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
}
/**
+ * @param featureFile - path to the feature file, can be a local file path, http url, or ftp url
+ * @param codec - codec to decode the features
+ * @param index - a tribble Index object
+ * @throws IOException
+ */
+ public TribbleIndexedFeatureReader(final String featureFile, final FeatureCodec<T, SOURCE> codec, final Index index) throws IOException {
+ this(featureFile, codec, false); // required to read the header
+ this.index = index;
+ this.needCheckForIndex = false;
+ }
+
+ /**
+ * Attempt to load the index for the specified {@link #path}.
+ * If the {@link #path} has no available index file,
+ * does nothing
+ * @throws IOException
+ */
+ private void loadIndex() throws IOException{
+ String indexFile = Tribble.indexFile(this.path);
+ if (ParsingUtils.resourceExists(indexFile)) {
+ index = IndexFactory.loadIndex(indexFile);
+ } else {
+ // See if the index itself is gzipped
+ indexFile = ParsingUtils.appendToPath(indexFile, ".gz");
+ if (ParsingUtils.resourceExists(indexFile)) {
+ index = IndexFactory.loadIndex(indexFile);
+ }
+ }
+ this.needCheckForIndex = false;
+ }
+
+ /**
* Get a seekable stream appropriate to read information from the current feature path
* <p/>
* This function ensures that if reuseStreamInQuery returns true then this function will only
@@ -126,18 +156,6 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
return pathIsRegularFile;
}
- /**
- * @param featureFile - path to the feature file, can be a local file path, http url, or ftp url
- * @param codec - codec to decode the features
- * @param index - a tribble Index object
- * @throws IOException
- */
- public TribbleIndexedFeatureReader(final String featureFile, final FeatureCodec<T, SOURCE> codec, final Index index) throws IOException {
- this(featureFile, codec, false); // required to read the header
- this.index = index;
- }
-
-
public void close() throws IOException {
// close the seekable stream if that's necessary
if (seekableStream != null) seekableStream.close();
@@ -149,11 +167,18 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
* @return list of strings of the contig names
*/
public List<String> getSequenceNames() {
- return index == null ? new ArrayList<String>() : new ArrayList<String>(index.getSequenceNames());
+ return !this.hasIndex() ? new ArrayList<String>() : new ArrayList<String>(index.getSequenceNames());
}
@Override
public boolean hasIndex() {
+ if(index == null && this.needCheckForIndex){
+ try {
+ this.loadIndex();
+ } catch (IOException e) {
+ throw new TribbleException("Error loading index file: " + e.getMessage(), e);
+ }
+ }
return index != null;
}
@@ -202,7 +227,7 @@ public class TribbleIndexedFeatureReader<T extends Feature, SOURCE> extends Abst
*/
public CloseableTribbleIterator<T> query(final String chr, final int start, final int end) throws IOException {
- if (index == null) {
+ if (!this.hasIndex()) {
throw new TribbleException("Index not found for: " + path);
}
diff --git a/src/java/org/broad/tribble/index/AbstractIndex.java b/src/java/org/broad/tribble/index/AbstractIndex.java
index 24ed394..a4aa781 100644
--- a/src/java/org/broad/tribble/index/AbstractIndex.java
+++ b/src/java/org/broad/tribble/index/AbstractIndex.java
@@ -18,11 +18,13 @@
package org.broad.tribble.index;
+import org.broad.tribble.Tribble;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.LittleEndianInputStream;
import org.broad.tribble.util.LittleEndianOutputStream;
import java.io.File;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.util.*;
@@ -34,14 +36,26 @@ import java.util.*;
*
* @author jrobinso
*/
-public abstract class AbstractIndex implements Index {
+public abstract class AbstractIndex implements MutableIndex {
+
+ public enum IndexType {
+ LINEAR(1),
+ INTERVAL_TREE(2);
+ public final int fileHeaderTypeIdentifier;
+
+ IndexType(int fileHeaderTypeIdentifier) {
+ this.fileHeaderTypeIdentifier = fileHeaderTypeIdentifier;
+ }
+ }
// todo -- up to version 4 and use ETag to detect out of date
// todo -- inode number + size in bytes + modification time
// todo -- remove MD5
// the current version of the index
- public static int VERSION = 3;
+ public static final int VERSION = 3;
+ public static final int MAGIC_NUMBER = 1480870228; // byte[]{'T', 'I', 'D', 'X'};
+
private final static String NO_MD5 = "";
private final static long NO_FILE_SIZE = -1L;
@@ -82,14 +96,14 @@ public abstract class AbstractIndex implements Index {
* @param obj
* @return true if this and obj are 'effectively' equivalent data structures.
*/
- public boolean equalsIgnoreProperties(Object obj) {
+ public boolean equalsIgnoreProperties(final Object obj) {
if (this == obj) return true;
if (!(obj instanceof AbstractIndex)) {
System.err.printf("equals: %s not instance of AbstractIndex", obj);
return false;
}
- AbstractIndex other = (AbstractIndex) obj;
+ final AbstractIndex other = (AbstractIndex) obj;
if (version != other.version) {
System.err.printf("equals version: this %d != other %d%n", version, other.version);
@@ -138,16 +152,16 @@ public abstract class AbstractIndex implements Index {
*
* @param featureFile the feature file to create an index from
*/
- public AbstractIndex(String featureFile) {
+ public AbstractIndex(final String featureFile) {
this(new File(featureFile));
}
- public AbstractIndex(File featureFile) {
+ public AbstractIndex(final File featureFile) {
this();
this.indexedFile = featureFile;
}
- public AbstractIndex(AbstractIndex parent) {
+ public AbstractIndex(final AbstractIndex parent) {
this();
this.version = parent.version;
this.indexedFile = parent.indexedFile;
@@ -158,6 +172,17 @@ public abstract class AbstractIndex implements Index {
this.properties = (LinkedHashMap<String, String>) parent.properties.clone();
}
+ protected void validateIndexHeader(final int indexType, final LittleEndianInputStream dis) throws IOException {
+ final int magicNumber = dis.readInt();
+ if (magicNumber != MAGIC_NUMBER) {
+ throw new TribbleException(String.format("Unexpected magic number %d", magicNumber));
+ }
+ final int type = dis.readInt();
+ if (type != indexType) {
+ throw new TribbleException(String.format("Unexpected index type %d", type));
+ }
+
+ }
/**
* check the current version against the version we read in
@@ -192,11 +217,11 @@ public abstract class AbstractIndex implements Index {
return version;
}
- public void setMD5(String md5) {
+ public void setMD5(final String md5) {
this.indexedFileMD5 = md5;
}
- public boolean containsChromosome(String chr) {
+ public boolean containsChromosome(final String chr) {
return chrIndices.containsKey(chr);
}
@@ -214,10 +239,8 @@ public abstract class AbstractIndex implements Index {
* @param dos the little endian output stream
* @throws IOException an exception when we can't write to the file
*/
- private void writeHeader(LittleEndianOutputStream dos) throws IOException {
- int magicNumber = 1480870228; // byte[]{'T', 'I', 'D', 'X'};
-
- dos.writeInt(magicNumber);
+ private void writeHeader(final LittleEndianOutputStream dos) throws IOException {
+ dos.writeInt(MAGIC_NUMBER);
dos.writeInt(getType());
dos.writeInt(version);
dos.writeString(indexedFile.getAbsolutePath());
@@ -228,7 +251,7 @@ public abstract class AbstractIndex implements Index {
// Properties (Version 3 and later)
dos.writeInt(properties.size());
- for (Map.Entry<String, String> prop : properties.entrySet()) {
+ for (final Map.Entry<String, String> prop : properties.entrySet()) {
dos.writeString(prop.getKey());
dos.writeString(prop.getValue());
}
@@ -240,7 +263,7 @@ public abstract class AbstractIndex implements Index {
* @param dis the little endian input stream
* @throws IOException if we fail to read from the file at any point
*/
- private void readHeader(LittleEndianInputStream dis) throws IOException {
+ private void readHeader(final LittleEndianInputStream dis) throws IOException {
version = dis.readInt();
indexedFile = new File(dis.readString());
@@ -255,8 +278,8 @@ public abstract class AbstractIndex implements Index {
if (version >= 3) {
int nProperties = dis.readInt();
while (nProperties-- > 0) {
- String key = dis.readString();
- String value = dis.readString();
+ final String key = dis.readString();
+ final String value = dis.readString();
properties.put(key, value);
}
}
@@ -269,8 +292,8 @@ public abstract class AbstractIndex implements Index {
* @param dis
* @throws IOException
*/
- private void readSequenceDictionary(LittleEndianInputStream dis) throws IOException {
- int size = dis.readInt();
+ private void readSequenceDictionary(final LittleEndianInputStream dis) throws IOException {
+ final int size = dis.readInt();
if (size < 0) throw new IllegalStateException("Size of the sequence dictionary entries is negative");
for (int x = 0; x < size; x++) {
dis.readString();
@@ -278,15 +301,15 @@ public abstract class AbstractIndex implements Index {
}
}
- public LinkedHashSet<String> getSequenceNames() {
- return new LinkedHashSet(chrIndices.keySet());
+ public List<String> getSequenceNames() {
+ return new ArrayList<String>(chrIndices.keySet());
}
- public List<Block> getBlocks(String chr, int start, int end) {
+ public List<Block> getBlocks(final String chr, final int start, final int end) {
return getChrIndex(chr).getBlocks(start, end);
}
- public List<Block> getBlocks(String chr) {
+ public List<Block> getBlocks(final String chr) {
return getChrIndex(chr).getBlocks();
}
@@ -296,7 +319,7 @@ public abstract class AbstractIndex implements Index {
* @throws IllegalArgumentException if {@code chr} not found
*/
private final ChrIndex getChrIndex(final String chr) {
- ChrIndex chrIdx = chrIndices.get(chr);
+ final ChrIndex chrIdx = chrIndices.get(chr);
if (chrIdx == null) {
throw new IllegalArgumentException("getBlocks() called with of unknown contig " + chr);
} else {
@@ -304,17 +327,27 @@ public abstract class AbstractIndex implements Index {
}
}
- public void write(LittleEndianOutputStream stream) throws IOException {
+ public void write(final LittleEndianOutputStream stream) throws IOException {
writeHeader(stream);
//# of chromosomes
stream.writeInt(chrIndices.size());
- for (ChrIndex chrIdx : chrIndices.values()) {
+ for (final ChrIndex chrIdx : chrIndices.values()) {
chrIdx.write(stream);
}
}
- public void read(LittleEndianInputStream dis) throws IOException {
+ @Override
+ public void writeBasedOnFeatureFile(final File featureFile) throws IOException {
+ if (!featureFile.isFile()) return;
+ final LittleEndianOutputStream idxStream =
+ new LittleEndianOutputStream(new FileOutputStream(Tribble.indexFile(featureFile)));
+ write(idxStream);
+ idxStream.close();
+
+ }
+
+ public void read(final LittleEndianInputStream dis) throws IOException {
try {
readHeader(dis);
@@ -322,14 +355,14 @@ public abstract class AbstractIndex implements Index {
chrIndices = new LinkedHashMap<String, ChrIndex>(nChromosomes);
while (nChromosomes-- > 0) {
- ChrIndex chrIdx = (ChrIndex) getChrIndexClass().newInstance();
+ final ChrIndex chrIdx = (ChrIndex) getChrIndexClass().newInstance();
chrIdx.read(dis);
chrIndices.put(chrIdx.getName(), chrIdx);
}
- } catch (InstantiationException e) {
+ } catch (final InstantiationException e) {
throw new TribbleException.UnableToCreateCorrectIndexType("Unable to create class " + getChrIndexClass(), e);
- } catch (IllegalAccessException e) {
+ } catch (final IllegalAccessException e) {
throw new TribbleException.UnableToCreateCorrectIndexType("Unable to create class " + getChrIndexClass(), e);
} finally {
dis.close();
@@ -340,7 +373,7 @@ public abstract class AbstractIndex implements Index {
protected void printIndexInfo() {
System.out.println(String.format("Index for %s with %d indices", indexedFile, chrIndices.size()));
- BlockStats stats = getBlockStats(true);
+ final BlockStats stats = getBlockStats(true);
System.out.println(String.format(" total blocks %d", stats.total));
System.out.println(String.format(" total empty blocks %d", stats.empty));
}
@@ -349,16 +382,16 @@ public abstract class AbstractIndex implements Index {
long total = 0, empty = 0, objects = 0, size = 0;
}
- protected BlockStats getBlockStats(boolean logDetails) {
- BlockStats stats = new BlockStats();
- for (Map.Entry<String, ChrIndex> elt : chrIndices.entrySet()) {
- List<Block> blocks = elt.getValue().getBlocks();
+ protected BlockStats getBlockStats(final boolean logDetails) {
+ final BlockStats stats = new BlockStats();
+ for (final Map.Entry<String, ChrIndex> elt : chrIndices.entrySet()) {
+ final List<Block> blocks = elt.getValue().getBlocks();
if (blocks != null) {
- int nBlocks = blocks.size();
+ final int nBlocks = blocks.size();
int nEmptyBlocks = 0;
- for (Block b : elt.getValue().getBlocks()) {
+ for (final Block b : elt.getValue().getBlocks()) {
if (b.getSize() == 0) nEmptyBlocks++;
}
stats.empty += nEmptyBlocks;
@@ -373,14 +406,18 @@ public abstract class AbstractIndex implements Index {
}
protected String statsSummary() {
- BlockStats stats = getBlockStats(false);
+ final BlockStats stats = getBlockStats(false);
return String.format("%12d blocks (%12d empty (%.2f%%))", stats.total, stats.empty, (100.0 * stats.empty) / stats.total);
}
- public void addProperty(String key, String value) {
+ public void addProperty(final String key, final String value) {
properties.put(key, value);
}
+ public void addProperties(final Map<String, String> properties) {
+ this.properties.putAll(properties);
+ }
+
/**
* return a mapping of name to property value
*
diff --git a/src/java/org/broad/tribble/index/Block.java b/src/java/org/broad/tribble/index/Block.java
index 4cd153e..0308f0b 100644
--- a/src/java/org/broad/tribble/index/Block.java
+++ b/src/java/org/broad/tribble/index/Block.java
@@ -28,14 +28,14 @@ package org.broad.tribble.index;
*/
public class Block {
- private long startPosition;
+ private final long startPosition;
private long size;
/**
* @param startPosition in bytes
* @param size in bytes
*/
- public Block(long startPosition, long size) {
+ public Block(final long startPosition, final long size) {
this.startPosition = startPosition;
this.size = size;
}
@@ -56,7 +56,7 @@ public class Block {
* Sets the size based on the provided {@code endPosition}
* @param endPosition Where the block ends, in bytes
*/
- public void setEndPosition(long endPosition) {
+ public void setEndPosition(final long endPosition) {
if(endPosition < startPosition)
throw new IllegalArgumentException("Attempting to set block end position to " +
endPosition + " which is before the start of " + startPosition);
@@ -65,16 +65,18 @@ public class Block {
}
/**
- * @return the # of bytes in this block
+ * @return the # of bytes in this block. Note that for block-compressed files, this is not truly the
+ * size of the block in the file. getEndPosition should be used to determine the virtual file offset
+ * of the end of the region of interest.
*/
public long getSize() {
return size;
}
- public boolean equals(Object obj) {
+ public boolean equals(final Object obj) {
if ( this == obj ) return true;
if ( ! (obj instanceof Block) ) return false;
- Block otherBlock = (Block)obj;
+ final Block otherBlock = (Block)obj;
return this.startPosition == otherBlock.startPosition && this.size == otherBlock.size;
}
}
diff --git a/src/java/org/broad/tribble/index/DynamicIndexCreator.java b/src/java/org/broad/tribble/index/DynamicIndexCreator.java
index f122efc..629df96 100644
--- a/src/java/org/broad/tribble/index/DynamicIndexCreator.java
+++ b/src/java/org/broad/tribble/index/DynamicIndexCreator.java
@@ -39,11 +39,12 @@ import java.util.TreeMap;
/**
* A DynamicIndexCreator creates the proper index based on an {@link IndexFactory.IndexBalanceApproach} and
- * the characteristics of the file.
+ * the characteristics of the file. Ultimately this is either a LinearIndex or an IntervalTreeIndex, with index
+ * parameters based on whether seek time or file size is to be minimized.
*/
-public class DynamicIndexCreator implements IndexCreator {
+public class DynamicIndexCreator extends TribbleIndexCreator {
IndexFactory.IndexBalanceApproach iba;
- Map<IndexFactory.IndexType,IndexCreator> creators;
+ Map<IndexFactory.IndexType,TribbleIndexCreator> creators;
/**
* we're interested in two stats:
@@ -57,38 +58,33 @@ public class DynamicIndexCreator implements IndexCreator {
Feature lastFeature = null;
File inputFile;
- public DynamicIndexCreator(IndexFactory.IndexBalanceApproach iba) {
+ public DynamicIndexCreator(final File inputFile, final IndexFactory.IndexBalanceApproach iba) {
this.iba = iba;
- }
-
- public void initialize(File inputFile, int binSize) {
// get a list of index creators
this.inputFile = inputFile;
creators = getIndexCreators(inputFile,iba);
}
- public int defaultBinSize() { return -1; }
- public int getBinSize() { return -1; }
-
- public Index finalizeIndex(long finalFilePosition) {
+ public Index finalizeIndex(final long finalFilePosition) {
// finalize all of the indexes
// return the score of the indexes we've generated
- Map<Double,IndexCreator> mapping = scoreIndexes((double)featureCount/(double)basesSeen, creators, longestFeatureLength, iba);
- IndexCreator creator = getMinIndex(mapping, this.iba);
+ final Map<Double,TribbleIndexCreator> mapping = scoreIndexes((double)featureCount/(double)basesSeen, creators, longestFeatureLength, iba);
+ final TribbleIndexCreator creator = getMinIndex(mapping, this.iba);
- // Now let's finalize and create the index itself
- Index idx = creator.finalizeIndex(finalFilePosition);
- idx.finalizeIndex();
+ for (final Map.Entry<String, String> entry : properties.entrySet()) {
+ creator.addProperty(entry.getKey(), entry.getValue());
+ }
// add our statistics to the file
- idx.addProperty("FEATURE_LENGTH_MEAN",String.valueOf(stats.mean()));
- idx.addProperty("FEATURE_LENGTH_STD_DEV",String.valueOf(stats.standardDeviation()));
- idx.addProperty("MEAN_FEATURE_VARIANCE",String.valueOf(stats.variance()));
+ creator.addProperty("FEATURE_LENGTH_MEAN",String.valueOf(stats.mean()));
+ creator.addProperty("FEATURE_LENGTH_STD_DEV",String.valueOf(stats.standardDeviation()));
+ creator.addProperty("MEAN_FEATURE_VARIANCE",String.valueOf(stats.variance()));
// add the feature count
- idx.addProperty("FEATURE_COUNT",String.valueOf(featureCount));
+ creator.addProperty("FEATURE_COUNT",String.valueOf(featureCount));
- return idx;
+ // Now let's finalize and create the index itself
+ return creator.finalizeIndex(finalFilePosition);
}
/**
@@ -96,18 +92,16 @@ public class DynamicIndexCreator implements IndexCreator {
* @param inputFile the input file to use to create the indexes
* @return a map of index type to the best index for that balancing approach
*/
- private Map<IndexFactory.IndexType,IndexCreator> getIndexCreators(File inputFile, IndexFactory.IndexBalanceApproach iba) {
- Map<IndexFactory.IndexType,IndexCreator> creators = new HashMap<IndexFactory.IndexType,IndexCreator>();
+ private Map<IndexFactory.IndexType,TribbleIndexCreator> getIndexCreators(final File inputFile, final IndexFactory.IndexBalanceApproach iba) {
+ final Map<IndexFactory.IndexType,TribbleIndexCreator> creators = new HashMap<IndexFactory.IndexType,TribbleIndexCreator>();
if (iba == IndexFactory.IndexBalanceApproach.FOR_SIZE) {
// add a linear index with the default bin size
- LinearIndexCreator linearNormal = new LinearIndexCreator();
- linearNormal.initialize(inputFile, linearNormal.defaultBinSize());
+ final LinearIndexCreator linearNormal = new LinearIndexCreator(inputFile, LinearIndexCreator.DEFAULT_BIN_WIDTH);
creators.put(IndexFactory.IndexType.LINEAR,linearNormal);
// create a tree index with the default size
- IntervalIndexCreator treeNormal = new IntervalIndexCreator();
- treeNormal.initialize(inputFile, treeNormal.defaultBinSize());
+ final IntervalIndexCreator treeNormal = new IntervalIndexCreator(inputFile, IntervalIndexCreator.DEFAULT_FEATURE_COUNT);
creators.put(IndexFactory.IndexType.INTERVAL_TREE,treeNormal);
}
@@ -115,13 +109,13 @@ public class DynamicIndexCreator implements IndexCreator {
// values were determined experimentally
if (iba == IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME) {
// create a linear index with a small bin size
- LinearIndexCreator linearSmallBin = new LinearIndexCreator();
- linearSmallBin.initialize(inputFile, Math.max(200, linearSmallBin.defaultBinSize() / 4));
+ final LinearIndexCreator linearSmallBin =
+ new LinearIndexCreator(inputFile, Math.max(200, LinearIndexCreator.DEFAULT_BIN_WIDTH / 4));
creators.put(IndexFactory.IndexType.LINEAR,linearSmallBin);
// create a tree index with a small index size
- IntervalIndexCreator treeSmallBin = new IntervalIndexCreator();
- treeSmallBin.initialize(inputFile, Math.max(20, treeSmallBin.defaultBinSize() / 8));
+ final IntervalIndexCreator treeSmallBin =
+ new IntervalIndexCreator(inputFile, Math.max(20, IntervalIndexCreator.DEFAULT_FEATURE_COUNT / 8));
creators.put(IndexFactory.IndexType.INTERVAL_TREE,treeSmallBin);
}
@@ -129,7 +123,7 @@ public class DynamicIndexCreator implements IndexCreator {
}
- public void addFeature(Feature f, long filePosition) {
+ public void addFeature(final Feature f, final long filePosition) {
// protected static Map<Double,Index> createIndex(FileBasedFeatureIterator<Feature> iterator, Map<IndexType,IndexCreator> creators, IndexBalanceApproach iba) {
// feed each feature to the indexes we've created
// first take care of the stats
@@ -145,7 +139,7 @@ public class DynamicIndexCreator implements IndexCreator {
stats.push(longestFeatureLength);
// now feed the feature to each of our creators
- for (IndexCreator creator : creators.values()) {
+ for (final IndexCreator creator : creators.values()) {
creator.addFeature(f,filePosition);
}
@@ -173,18 +167,18 @@ public class DynamicIndexCreator implements IndexCreator {
* @param iba the index balancing approach
* @return the best index available for the target indexes
*/
- protected static LinkedHashMap<Double,IndexCreator> scoreIndexes(double densityOfFeatures, Map<IndexFactory.IndexType,IndexCreator> indexes, int longestFeature, IndexFactory.IndexBalanceApproach iba) {
+ protected static LinkedHashMap<Double,TribbleIndexCreator> scoreIndexes(final double densityOfFeatures, final Map<IndexFactory.IndexType,TribbleIndexCreator> indexes, final int longestFeature, final IndexFactory.IndexBalanceApproach iba) {
if (indexes.size() < 1) throw new IllegalArgumentException("Please specify at least one index to evaluate");
- LinkedHashMap<Double,IndexCreator> scores = new LinkedHashMap<Double,IndexCreator>();
+ final LinkedHashMap<Double,TribbleIndexCreator> scores = new LinkedHashMap<Double,TribbleIndexCreator>();
- for (Map.Entry<IndexFactory.IndexType,IndexCreator> entry : indexes.entrySet()) {
+ for (final Map.Entry<IndexFactory.IndexType,TribbleIndexCreator> entry : indexes.entrySet()) {
// we have different scoring
if (entry.getValue() instanceof LinearIndexCreator) {
- double binSize = entry.getValue().getBinSize();
+ final double binSize = ((LinearIndexCreator)(entry.getValue())).getBinSize();
scores.put(binSize * densityOfFeatures * Math.ceil((double) longestFeature / binSize), entry.getValue());
} else if (entry.getValue() instanceof IntervalIndexCreator) {
- scores.put((double) entry.getValue().getBinSize(), entry.getValue());
+ scores.put((double) ((IntervalIndexCreator)entry.getValue()).getFeaturesPerInterval(), entry.getValue());
} else {
throw new TribbleException.UnableToCreateCorrectIndexType("Unknown index type, we don't have a scoring method for " + entry.getValue().getClass());
}
@@ -197,12 +191,19 @@ public class DynamicIndexCreator implements IndexCreator {
* @param scores the list of scaled features/bin scores for each index type
* @return the best score <b>index value</b>
*/
- private IndexCreator getMinIndex(Map<Double,IndexCreator> scores, IndexFactory.IndexBalanceApproach iba) {
- TreeMap<Double,IndexCreator> map = new TreeMap<Double,IndexCreator>();
+ private TribbleIndexCreator getMinIndex(final Map<Double,TribbleIndexCreator> scores, final IndexFactory.IndexBalanceApproach iba) {
+ final TreeMap<Double,TribbleIndexCreator> map = new TreeMap<Double,TribbleIndexCreator>();
map.putAll(scores);
// if we are optimizing for seek time, choose the lowest score (adjusted features/bin value), if for storage size, choose the opposite
- IndexCreator idx = (iba != IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME) ? map.get(map.lastKey()) : map.get(map.firstKey());
+ final TribbleIndexCreator idx = (iba != IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME) ? map.get(map.lastKey()) : map.get(map.firstKey());
return idx;
}
+
+ @Override
+ public void addProperty(final String key, final String value) {
+ for (final TribbleIndexCreator creator : creators.values()) {
+ creator.addProperty(key, value);
+ }
+ }
}
diff --git a/src/java/org/broad/tribble/index/Index.java b/src/java/org/broad/tribble/index/Index.java
index 651106f..f60e23a 100644
--- a/src/java/org/broad/tribble/index/Index.java
+++ b/src/java/org/broad/tribble/index/Index.java
@@ -23,11 +23,10 @@
*/
package org.broad.tribble.index;
-import org.broad.tribble.util.LittleEndianInputStream;
import org.broad.tribble.util.LittleEndianOutputStream;
+import java.io.File;
import java.io.IOException;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
@@ -38,7 +37,7 @@ import java.util.Map;
*/
public interface Index {
/**
- *
+ * Query the index.
* @param chr the chromosome
* @param start the start position
* @param end the end position
@@ -53,10 +52,9 @@ public interface Index {
public boolean isCurrentVersion();
/**
- * get a list of the sequence names we've seen during indexing, in order
- * @return a LinkedHashSet, which guarantees the ordering
+ * @return a list of the sequence names we've seen during indexing, in order
*/
- LinkedHashSet<String> getSequenceNames();
+ List<String> getSequenceNames();
/**
* @param chr the chromosome (or contig) name
@@ -65,35 +63,21 @@ public interface Index {
public boolean containsChromosome(final String chr);
/**
- * read in the index
- * @param stream an input stream to read from
- * @throws IOException if we have problems reading the index from the stream
- */
- public void read(LittleEndianInputStream stream) throws IOException;
-
- /**
* all indexes are writable to disk
- * @param stream the stream to write the index to
+ * @param stream the stream to write the index to. Caller must close after invocation.
* @throws IOException if the index is unable to write to the specified location
*/
public void write(LittleEndianOutputStream stream) throws IOException;
/**
- * this method allows properties to added to the index; warning: if you don't write out the index
- * to disk you'll lose these changes.
- * @param key the key
- * @param value the value, stored as a string, though it may represent an different underlying type
- */
- public void addProperty(String key, String value);
-
- /**
- * To be called after the index has been created and is ready to be used. Filling in final metadata or
- * otherwise optimizes the index given that no more records will be added
+ * Write an appropriately named and located Index file based on the name and location of the featureFile.
+ * If featureFile is not a normal file, the index will silently not be written.
+ * @param featureFile
*/
- public void finalizeIndex();
+ public void writeBasedOnFeatureFile(File featureFile) throws IOException;
/**
- * @return get the list of properties for this index
+ * @return get the list of properties for this index. Returns null if no properties.
*/
public Map<String,String> getProperties();
diff --git a/src/java/org/broad/tribble/index/IndexCreator.java b/src/java/org/broad/tribble/index/IndexCreator.java
index 9105360..87e0006 100644
--- a/src/java/org/broad/tribble/index/IndexCreator.java
+++ b/src/java/org/broad/tribble/index/IndexCreator.java
@@ -25,8 +25,6 @@ package org.broad.tribble.index;
import org.broad.tribble.Feature;
-import java.io.File;
-
/**
*
* An interface for creating indexes
@@ -35,17 +33,6 @@ import java.io.File;
*/
public interface IndexCreator {
/**
- * Initialize the index creator with the input file and the bin size. Be warned, the bin size
- * is HIGHLY dependent on the index implementation; in one implementation 100 may result in excessively
- * large files, and other this may be too small for effective discernment between bins. It's recommended to
- * use the defaultBinSize() function to get an appropriately sized bin.
- *
- * @param inputFile the input file
- * @param binSize the bin size
- */
- public void initialize(File inputFile, int binSize);
-
- /**
* Add a feature to the index
* @param feature the feature, of which start, end, and contig must be filled in
* @param filePosition the current file position, at the beginning of the specified feature
@@ -58,18 +45,6 @@ public interface IndexCreator {
* @return an index object
*/
public Index finalizeIndex(long finalFilePosition);
-
- /**
- * The default bin size for this index type; use this unless you're aware of the nuances of the particular index type.
- * @return the default bin size appropriate for this index type
- */
- public int defaultBinSize();
-
- /**
- * @eturn the bin size of associated with the index with are creating
- * @return the index bin size
- */
- public int getBinSize();
}
diff --git a/src/java/org/broad/tribble/index/IndexFactory.java b/src/java/org/broad/tribble/index/IndexFactory.java
index bccde40..f92281f 100644
--- a/src/java/org/broad/tribble/index/IndexFactory.java
+++ b/src/java/org/broad/tribble/index/IndexFactory.java
@@ -23,26 +23,31 @@
*/
package org.broad.tribble.index;
+import net.sf.samtools.Defaults;
+import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.*;
import org.broad.tribble.index.interval.IntervalIndexCreator;
import org.broad.tribble.index.interval.IntervalTreeIndex;
import org.broad.tribble.index.linear.LinearIndex;
import org.broad.tribble.index.linear.LinearIndexCreator;
-import org.broad.tribble.readers.LocationAware;
+import org.broad.tribble.index.tabix.TabixFormat;
+import org.broad.tribble.index.tabix.TabixIndex;
+import net.sf.samtools.util.LocationAware;
+import org.broad.tribble.index.tabix.TabixIndexCreator;
import org.broad.tribble.readers.PositionalBufferedStream;
import org.broad.tribble.util.LittleEndianInputStream;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broad.tribble.util.ParsingUtils;
import java.io.*;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
+import java.lang.reflect.Constructor;
+import java.util.*;
import java.util.zip.GZIPInputStream;
/**
* Factory class for creating indexes. It is the responsibility of this class to determine and create the
- * correct index type from the input file or stream
+ * correct index type from the input file or stream. Only LinearIndex and IntervalTreeIndex are supported
+ * by this factory.
*/
public class IndexFactory {
/** We can optimize index-file-creation for different factors. As of this writing, those are index-file size or seeking time. */
@@ -55,11 +60,13 @@ public class IndexFactory {
* an enum that contains all of the information about the index types, and how to create them
*/
public enum IndexType {
- LINEAR(1, LinearIndexCreator.class, LinearIndex.class, LinearIndexCreator.DEFAULT_BIN_WIDTH),
- INTERVAL_TREE(2, IntervalIndexCreator.class, IntervalTreeIndex.class, IntervalIndexCreator.DEFAULT_FEATURE_COUNT),
- TABIX(3, null, null, -1);
+ LINEAR(LinearIndex.MAGIC_NUMBER, LinearIndex.INDEX_TYPE, LinearIndexCreator.class, LinearIndex.class, LinearIndexCreator.DEFAULT_BIN_WIDTH),
+ INTERVAL_TREE(IntervalTreeIndex.MAGIC_NUMBER, IntervalTreeIndex.INDEX_TYPE, IntervalIndexCreator.class, IntervalTreeIndex.class, IntervalIndexCreator.DEFAULT_FEATURE_COUNT),
+ // Tabix index initialization requires additional information, so generic construction won't work, thus indexCreatorClass is null.
+ TABIX(TabixIndex.MAGIC_NUMBER, null, null, TabixIndex.class, -1);
- private final int indexValue;
+ private final int magicNumber;
+ private final Integer tribbleIndexType;
private final Class<IndexCreator> indexCreatorClass;
private final int defaultBinSize;
private final Class<Index> indexType;
@@ -71,9 +78,9 @@ public class IndexFactory {
public IndexCreator getIndexCreator() {
try {
return indexCreatorClass.newInstance();
- } catch ( InstantiationException e ) {
+ } catch ( final InstantiationException e ) {
throw new TribbleException("Couldn't make index creator in " + this, e);
- } catch ( IllegalAccessException e ) {
+ } catch ( final IllegalAccessException e ) {
throw new TribbleException("Couldn't make index creator in " + this, e);
}
}
@@ -82,31 +89,56 @@ public class IndexFactory {
return indexCreatorClass != null;
}
- IndexType(final int headerValue, final Class creator, final Class indexClass, final int defaultBinSize) {
- indexValue = headerValue;
+ IndexType(final int magicNumber, final Integer tribbleIndexType, final Class creator, final Class indexClass, final int defaultBinSize) {
+ this.magicNumber = magicNumber;
+ this.tribbleIndexType = tribbleIndexType;
indexCreatorClass = creator;
indexType = indexClass;
this.defaultBinSize = defaultBinSize;
}
- public int getHeaderValue() {
- return indexValue;
+ public Integer getTribbleIndexType() {
+ return tribbleIndexType;
}
public Class getIndexType() {
return indexType;
}
+ public int getMagicNumber() { return magicNumber; }
+
/**
*
- * @param headerValue
+ * @param is InputStream of index. This will be reset to location it was at when method was invoked.
* @return The {@code IndexType} based on the {@code headerValue}
* @throws TribbleException.UnableToCreateCorrectIndexType
*/
- public static IndexType getIndexType(final int headerValue) {
- for (final IndexType type : IndexType.values())
- if (type.indexValue == headerValue) return type;
- throw new TribbleException.UnableToCreateCorrectIndexType("Unknown index type value" + headerValue);
+ public static IndexType getIndexType(final BufferedInputStream is) {
+ // Currently only need 8 bytes, so this should be plenty
+ is.mark(128);
+ final LittleEndianInputStream dis = new LittleEndianInputStream(is);
+ final int magicNumber;
+ final int type;
+
+ try {
+ // Read the type and version, then create the appropriate type
+ magicNumber = dis.readInt();
+ // This is not appropriate for all types, but it doesn't hurt to read it.
+ type = dis.readInt();
+ is.reset();
+
+ for (final IndexType indexType : IndexType.values()) {
+ if (indexType.magicNumber == magicNumber &&
+ (indexType.tribbleIndexType == null || indexType.tribbleIndexType == type)) {
+ return indexType;
+ }
+ }
+ } catch (final IOException e) {
+ throw new TribbleException("Problem detecting index type", e);
+ }
+
+ throw new TribbleException.UnableToCreateCorrectIndexType(
+ String.format("Unknown index type. magic number: 0x%x; type %d", magicNumber, type));
}
}
@@ -116,42 +148,36 @@ public class IndexFactory {
* at run time by reading the type flag in the file.
*
* @param indexFile from which to load the index
- * @return the parsed index object
*/
public static Index loadIndex(final String indexFile) {
- Index idx = null;
- InputStream is = null;
- LittleEndianInputStream dis = null;
+ final Index idx = null;
+ BufferedInputStream bufferedInputStream = null;
+ final LittleEndianInputStream dis = null;
try {
+ InputStream inputStream = ParsingUtils.openInputStream(indexFile);
if (indexFile.endsWith(".gz")) {
- is = new BufferedInputStream(new GZIPInputStream(ParsingUtils.openInputStream(indexFile)), 512000);
- } else {
- is = new BufferedInputStream(ParsingUtils.openInputStream(indexFile), 512000);
+ inputStream = new GZIPInputStream(inputStream);
}
+ // Must be buffered, because getIndexType uses mark and reset
+ bufferedInputStream = new BufferedInputStream(inputStream, Defaults.NON_ZERO_BUFFER_SIZE);
+ final Class indexClass = IndexType.getIndexType(bufferedInputStream).getIndexType();
- dis = new LittleEndianInputStream(is);
+ final Constructor ctor = indexClass.getConstructor(InputStream.class);
- // Read the type and version, then create the appropriate type
- final int magicNumber = dis.readInt();
- final int type = dis.readInt();
- final Class indexClass = IndexType.getIndexType(type).getIndexType();
-
- idx = (Index) indexClass.newInstance();
- idx.read(dis);
- } catch (IOException ex) {
+ return (Index) ctor.newInstance(bufferedInputStream);
+ } catch (final IOException ex) {
throw new TribbleException.UnableToReadIndexFile("Unable to read index file", indexFile, ex);
- } catch (Exception ex) {
+ } catch (final Exception ex) {
throw new RuntimeException(ex);
} finally {
try {
- if (is != null) is.close();
+ if (bufferedInputStream != null) bufferedInputStream.close();
if (dis != null) dis.close();
//log.info(String.format("Closed %s and %s", is, dis));
- } catch (IOException e) {
+ } catch (final IOException e) {
//log.error("Error closing indexFile: " + indexFile, e);
}
}
- return idx;
}
@@ -160,23 +186,23 @@ public class IndexFactory {
*
* @param inputFile the input file to load features from
* @param codec the codec to use for decoding records
- * @return a index
*/
- public static Index createLinearIndex(final File inputFile, final FeatureCodec codec) {
- return createIndex(inputFile, codec, IndexType.LINEAR);
+ public static LinearIndex createLinearIndex(final File inputFile, final FeatureCodec codec) {
+ return createLinearIndex(inputFile, codec, LinearIndexCreator.DEFAULT_BIN_WIDTH);
}
/**
* a helper method for creating a linear binned index
*
- * @see #createIndex(File, FeatureCodec, IndexType, int)
* @param inputFile the input file to load features from
* @param codec the codec to use for decoding records
* @param binSize the bin size
- * @return a index
*/
- public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createLinearIndex(final File inputFile, final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec, final int binSize) {
- return createIndex(inputFile, codec, IndexType.LINEAR, binSize);
+ public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> LinearIndex createLinearIndex(final File inputFile,
+ final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec,
+ final int binSize) {
+ final LinearIndexCreator indexCreator = new LinearIndexCreator(inputFile, binSize);
+ return (LinearIndex)createIndex(inputFile, new FeatureIterator<FEATURE_TYPE, SOURCE_TYPE>(inputFile, codec), indexCreator);
}
/**
@@ -184,10 +210,10 @@ public class IndexFactory {
*
* @param inputFile the file containing the features
* @param codec to decode the features
- * @return
*/
- public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createIntervalIndex(final File inputFile, final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec) {
- return createIndex(inputFile, codec, IndexType.INTERVAL_TREE);
+ public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> IntervalTreeIndex createIntervalIndex(final File inputFile,
+ final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec) {
+ return createIntervalIndex(inputFile, codec, IntervalIndexCreator.DEFAULT_FEATURE_COUNT);
}
@@ -196,11 +222,13 @@ public class IndexFactory {
*
* @param inputFile the input file to load features from
* @param codec the codec to use for decoding records
- * @param binSize the bin size
- * @return a index
+ * @param featuresPerInterval
*/
- public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createIntervalIndex(final File inputFile, final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec, final int binSize) {
- return createIndex(inputFile, codec, IndexType.INTERVAL_TREE, binSize);
+ public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> IntervalTreeIndex createIntervalIndex(final File inputFile,
+ final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec,
+ final int featuresPerInterval) {
+ final IntervalIndexCreator indexCreator = new IntervalIndexCreator(inputFile, featuresPerInterval);
+ return (IntervalTreeIndex)createIndex(inputFile, new FeatureIterator<FEATURE_TYPE, SOURCE_TYPE>(inputFile, codec), indexCreator);
}
/**
@@ -208,7 +236,6 @@ public class IndexFactory {
*
* @param inputFile the input file to load features from
* @param codec the codec to use for decoding records
- * @return a index
*/
public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createDynamicIndex(final File inputFile, final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec) {
return createDynamicIndex(inputFile, codec, IndexBalanceApproach.FOR_SEEK_TIME);
@@ -220,28 +247,17 @@ public class IndexFactory {
* @param inputFile the input file to load features from
* @param codec the codec to use for decoding records
* @param type the type of index to create
- * @return a index
- */
- public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createIndex(final File inputFile, final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec, final IndexType type) {
- return createIndex(inputFile, codec, type, type.getDefaultBinSize());
- }
-
- /**
- * Create a index of the specified type
- *
- * @param inputFile the input file to load features from
- * @param codec the codec to use for decoding records
- * @param type the type of index to create
- * @param binSize the bin size
- * @return a index
*/
- public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createIndex(final File inputFile, final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec, final IndexType type, final int binSize) {
- if ( ! type.canCreate() )
- throw new TribbleException("Tribble can only read, not create indices of type " + type.name());
-
- final IndexCreator idx = type.getIndexCreator();
- idx.initialize(inputFile, binSize);
- return createIndex(inputFile, new FeatureIterator<FEATURE_TYPE, SOURCE_TYPE>(inputFile, codec), idx);
+ public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createIndex(final File inputFile,
+ final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec,
+ final IndexType type) {
+ switch (type) {
+ case INTERVAL_TREE: return createIntervalIndex(inputFile, codec);
+ case LINEAR: return createLinearIndex(inputFile, codec);
+ // Tabix index initialization requires additional information, so this construction method won't work.
+ case TABIX: throw new UnsupportedOperationException("Tabix indices cannot be created through a generic interface");
+ }
+ throw new IllegalArgumentException("Unrecognized IndexType " + type);
}
/**
@@ -269,15 +285,31 @@ public class IndexFactory {
* @param inputFile the input file to load features from
* @param codec the codec to use for decoding records
* @param iba the index balancing approach
- * @return a index
*/
- public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createDynamicIndex(final File inputFile, final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec, final IndexBalanceApproach iba) {
+ public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> Index createDynamicIndex(final File inputFile,
+ final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec,
+ final IndexBalanceApproach iba) {
// get a list of index creators
- final DynamicIndexCreator indexCreator = new DynamicIndexCreator(iba);
- indexCreator.initialize(inputFile, indexCreator.defaultBinSize());
+ final DynamicIndexCreator indexCreator = new DynamicIndexCreator(inputFile, iba);
return createIndex(inputFile, new FeatureIterator<FEATURE_TYPE, SOURCE_TYPE>(inputFile, codec), indexCreator);
}
+ /**
+ * @param inputFile The file to be indexed.
+ * @param codec Mechanism for reading inputFile.
+ * @param tabixFormat Header fields for TabixIndex to be produced.
+ * @param sequenceDictionary May be null, but if present may reduce memory footprint for index creation. Features
+ * in inputFile must be in the order defined by sequenceDictionary, if it is present.
+ */
+ public static <FEATURE_TYPE extends Feature, SOURCE_TYPE> TabixIndex createTabixIndex(final File inputFile,
+ final FeatureCodec<FEATURE_TYPE, SOURCE_TYPE> codec,
+ final TabixFormat tabixFormat,
+ final SAMSequenceDictionary sequenceDictionary) {
+ final TabixIndexCreator indexCreator = new TabixIndexCreator(sequenceDictionary, tabixFormat);
+ return (TabixIndex)createIndex(inputFile, new FeatureIterator<FEATURE_TYPE, SOURCE_TYPE>(inputFile, codec), indexCreator);
+ }
+
+
private static Index createIndex(final File inputFile, final FeatureIterator iterator, final IndexCreator creator) {
Feature lastFeature = null;
@@ -330,7 +362,7 @@ public class IndexFactory {
*/
static class FeatureIterator<FEATURE_TYPE extends Feature, SOURCE> implements CloseableTribbleIterator<Feature> {
// the stream we use to get features
- private SOURCE source;
+ private final SOURCE source;
// the next feature
private Feature nextFeature;
// our codec
@@ -363,7 +395,7 @@ public class IndexFactory {
final FeatureCodecHeader header = this.codec.readHeader(source);
codec.close(source);
return header;
- } catch (IOException e) {
+ } catch (final IOException e) {
throw new TribbleException.InvalidHeader("Error reading header " + e.getMessage());
}
}
@@ -374,9 +406,9 @@ public class IndexFactory {
final PositionalBufferedStream pbs = new PositionalBufferedStream(is);
if ( skip > 0 ) pbs.skip(skip);
return pbs;
- } catch (FileNotFoundException e) {
+ } catch (final FileNotFoundException e) {
throw new TribbleException.FeatureFileDoesntExist("Unable to open the input file, most likely the file doesn't exist.", inputFile.getAbsolutePath());
- } catch (IOException e) {
+ } catch (final IOException e) {
throw new TribbleException.MalformedFeatureFile("Error initializing stream", inputFile.getAbsolutePath(), e);
}
}
@@ -427,7 +459,7 @@ public class IndexFactory {
while (nextFeature == null && !codec.isDone(source)) {
nextFeature = codec.decodeLoc(source);
}
- } catch (IOException e) {
+ } catch (final IOException e) {
throw new TribbleException.MalformedFeatureFile("Unable to read a line from the file", inputFile.getAbsolutePath(), e);
}
}
diff --git a/src/java/org/broad/tribble/readers/Positional.java b/src/java/org/broad/tribble/index/MutableIndex.java
similarity index 51%
copy from src/java/org/broad/tribble/readers/Positional.java
copy to src/java/org/broad/tribble/index/MutableIndex.java
index 0b5fb7d..e0bb991 100644
--- a/src/java/org/broad/tribble/readers/Positional.java
+++ b/src/java/org/broad/tribble/index/MutableIndex.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2013 The Broad Institute
+ * Copyright (c) 2014 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,38 +21,16 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package org.broad.tribble.readers;
+package org.broad.tribble.index;
-import java.io.IOException;
+import java.util.Map;
/**
- * Minimal interface for an object at support getting the current position in the stream / writer / file, as well as a handful of other
- * reader-like features.
- *
- * @author depristo
+ * Some Index implementations can be modified in memory. Also, properties do not make sense for all index types.
+ * Only the relevant index implementations implement this interface.
*/
-public interface Positional extends LocationAware {
- /**
- * Is the stream done? Equivalent to ! hasNext() for an iterator?
- * @return true if the stream has reached EOF, false otherwise
- */
- public boolean isDone() throws IOException;
+public interface MutableIndex extends Index {
+ void addProperty(String key, String value);
- /**
- * Skip the next nBytes in the stream.
- * @param nBytes to skip, must be >= 0
- * @return the number of bytes actually skippped.
- * @throws IOException
- */
- public long skip(long nBytes) throws IOException;
-
- /**
- * Return the next byte in the first, without actually reading it from the stream.
- *
- * Has the same output as read()
- *
- * @return the next byte, or -1 if EOF encountered
- * @throws IOException
- */
- public int peek() throws IOException;
+ void addProperties(Map<String, String> properties);
}
diff --git a/src/java/org/broad/tribble/readers/Positional.java b/src/java/org/broad/tribble/index/TribbleIndexCreator.java
similarity index 51%
copy from src/java/org/broad/tribble/readers/Positional.java
copy to src/java/org/broad/tribble/index/TribbleIndexCreator.java
index 0b5fb7d..2c5b80f 100644
--- a/src/java/org/broad/tribble/readers/Positional.java
+++ b/src/java/org/broad/tribble/index/TribbleIndexCreator.java
@@ -1,7 +1,7 @@
/*
* The MIT License
*
- * Copyright (c) 2013 The Broad Institute
+ * Copyright (c) 2014 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
@@ -21,38 +21,17 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
-package org.broad.tribble.readers;
+package org.broad.tribble.index;
-import java.io.IOException;
+import java.util.LinkedHashMap;
/**
- * Minimal interface for an object at support getting the current position in the stream / writer / file, as well as a handful of other
- * reader-like features.
- *
- * @author depristo
+ * Base class for Tribble-specific index creators.
*/
-public interface Positional extends LocationAware {
- /**
- * Is the stream done? Equivalent to ! hasNext() for an iterator?
- * @return true if the stream has reached EOF, false otherwise
- */
- public boolean isDone() throws IOException;
+public abstract class TribbleIndexCreator implements IndexCreator {
+ protected LinkedHashMap<String, String> properties = new LinkedHashMap<String, String>();
- /**
- * Skip the next nBytes in the stream.
- * @param nBytes to skip, must be >= 0
- * @return the number of bytes actually skippped.
- * @throws IOException
- */
- public long skip(long nBytes) throws IOException;
-
- /**
- * Return the next byte in the first, without actually reading it from the stream.
- *
- * Has the same output as read()
- *
- * @return the next byte, or -1 if EOF encountered
- * @throws IOException
- */
- public int peek() throws IOException;
+ public void addProperty(final String key, final String value) {
+ properties.put(key, value);
+ }
}
diff --git a/src/java/org/broad/tribble/index/interval/IntervalIndexCreator.java b/src/java/org/broad/tribble/index/interval/IntervalIndexCreator.java
index 50ed432..e8ef0ec 100644
--- a/src/java/org/broad/tribble/index/interval/IntervalIndexCreator.java
+++ b/src/java/org/broad/tribble/index/interval/IntervalIndexCreator.java
@@ -19,11 +19,12 @@
package org.broad.tribble.index.interval;
import org.broad.tribble.Feature;
-import org.broad.tribble.index.interval.IntervalTreeIndex.ChrIndex;
import org.broad.tribble.index.Block;
import org.broad.tribble.index.Index;
-import org.broad.tribble.index.IndexCreator;
-import java.io.*;
+import org.broad.tribble.index.TribbleIndexCreator;
+import org.broad.tribble.index.interval.IntervalTreeIndex.ChrIndex;
+
+import java.io.File;
import java.util.ArrayList;
import java.util.LinkedList;
@@ -31,7 +32,7 @@ import java.util.LinkedList;
* Creates interval indexes from a stream of features
* @author jrobinso
*/
-public class IntervalIndexCreator implements IndexCreator {
+public class IntervalIndexCreator extends TribbleIndexCreator {
public static int DEFAULT_FEATURE_COUNT = 600;
@@ -41,23 +42,27 @@ public class IntervalIndexCreator implements IndexCreator {
*/
private int featuresPerInterval = DEFAULT_FEATURE_COUNT;
- private LinkedList<ChrIndex> chrList = new LinkedList<ChrIndex>();
+ private final LinkedList<ChrIndex> chrList = new LinkedList<ChrIndex>();
/**
* Instance variable for the number of features we currently are storing in the interval
*/
private int featureCount = 0;
- private ArrayList<MutableInterval> intervals = new ArrayList<MutableInterval>();
+ private final ArrayList<MutableInterval> intervals = new ArrayList<MutableInterval>();
File inputFile;
- public void initialize(File inputFile, int binSize) {
+ public IntervalIndexCreator(final File inputFile, final int featuresPerInterval) {
this.inputFile = inputFile;
- this.featuresPerInterval = binSize;
+ this.featuresPerInterval = featuresPerInterval;
}
- public void addFeature(Feature feature, long filePosition) {
+ public IntervalIndexCreator(final File inputFile) {
+ this(inputFile, DEFAULT_FEATURE_COUNT);
+ }
+
+ public void addFeature(final Feature feature, final long filePosition) {
// if we don't have a chrIndex yet, or if the last one was for the previous contig, create a new one
if (chrList.size() == 0 || !chrList.getLast().getName().equals(feature.getChr())) {
// if we're creating a new chrIndex (not the first), make sure to dump the intervals to the old chrIndex
@@ -71,7 +76,7 @@ public class IntervalIndexCreator implements IndexCreator {
// if we're about to overflow the current bin, make a new one
if (featureCount >= featuresPerInterval || intervals.size() == 0) {
- MutableInterval i = new MutableInterval();
+ final MutableInterval i = new MutableInterval();
i.setStart(feature.getStart());
i.setStartFilePosition(filePosition);
if( intervals.size() > 0) intervals.get(intervals.size()-1).setEndFilePosition(filePosition);
@@ -88,7 +93,7 @@ public class IntervalIndexCreator implements IndexCreator {
* dump the intervals we have stored to the last chrList entry
* @param currentPos the current position, for the last entry in the interval list
*/
- private void addIntervalsToLastChr(long currentPos) {
+ private void addIntervalsToLastChr(final long currentPos) {
for (int x = 0; x < intervals.size(); x++) {
if (x == intervals.size()-1) intervals.get(x).setEndFilePosition(currentPos);
chrList.getLast().insert(intervals.get(x).toInterval());
@@ -100,20 +105,17 @@ public class IntervalIndexCreator implements IndexCreator {
* @param finalFilePosition the final file position, for indexes that have to close out with the final position
* @return a Tree Index
*/
- public Index finalizeIndex(long finalFilePosition) {
- IntervalTreeIndex featureIndex = new IntervalTreeIndex(inputFile.getAbsolutePath());
+ public Index finalizeIndex(final long finalFilePosition) {
+ final IntervalTreeIndex featureIndex = new IntervalTreeIndex(inputFile.getAbsolutePath());
// dump the remaining bins to the index
addIntervalsToLastChr(finalFilePosition);
featureIndex.setChrIndex(chrList);
+ featureIndex.addProperties(properties);
featureIndex.finalizeIndex();
return featureIndex;
}
- public int defaultBinSize() {
- return DEFAULT_FEATURE_COUNT;
- }
-
- public int getBinSize() {
+ public int getFeaturesPerInterval() {
return featuresPerInterval;
}
}
@@ -129,21 +131,21 @@ class MutableInterval {
private long startFilePosition;
private long endFilePosition;
- public void setStart(int start) {
+ public void setStart(final int start) {
if (start < 0) throw new IllegalArgumentException("Start must be greater than 0!");
this.start = start;
}
- public void setStop(int stop) {
+ public void setStop(final int stop) {
if (stop < 0) throw new IllegalArgumentException("Start must be greater than 0!");
this.stop = stop;
}
- public void setStartFilePosition(long startFilePosition) {
+ public void setStartFilePosition(final long startFilePosition) {
this.startFilePosition = startFilePosition;
}
- public void setEndFilePosition(long endFilePosition) {
+ public void setEndFilePosition(final long endFilePosition) {
this.endFilePosition = endFilePosition;
}
diff --git a/src/java/org/broad/tribble/index/interval/IntervalTreeIndex.java b/src/java/org/broad/tribble/index/interval/IntervalTreeIndex.java
index 7a4bb55..da98ecc 100644
--- a/src/java/org/broad/tribble/index/interval/IntervalTreeIndex.java
+++ b/src/java/org/broad/tribble/index/interval/IntervalTreeIndex.java
@@ -18,13 +18,14 @@
package org.broad.tribble.index.interval;
+import org.broad.tribble.TribbleException;
import org.broad.tribble.index.AbstractIndex;
import org.broad.tribble.index.Block;
-import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.util.LittleEndianInputStream;
import org.broad.tribble.util.LittleEndianOutputStream;
import java.io.IOException;
+import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
@@ -37,19 +38,24 @@ import java.util.List;
* @date Jul 9, 2010
*/
public class IntervalTreeIndex extends AbstractIndex {
+ public static final int INDEX_TYPE = IndexType.INTERVAL_TREE.fileHeaderTypeIdentifier;
/**
- * Default constructor -- used by factory methods. Do not remove.
+ * Load from file.
+ * @param inputStream This method assumes that the input stream is already buffered as appropriate. Caller
+ * should close after this object is constructed.
*/
- public IntervalTreeIndex() {
-
+ public IntervalTreeIndex(final InputStream inputStream) throws IOException {
+ final LittleEndianInputStream dis = new LittleEndianInputStream(inputStream);
+ validateIndexHeader(INDEX_TYPE, dis);
+ read(dis);
}
/**
- *
+ * Prepare to build an index.
* @param featureFile File which we are indexing
*/
- public IntervalTreeIndex(String featureFile) {
+ public IntervalTreeIndex(final String featureFile) {
super(featureFile);
}
@@ -60,7 +66,7 @@ public class IntervalTreeIndex extends AbstractIndex {
@Override
protected int getType() {
- return IndexFactory.IndexType.INTERVAL_TREE.getHeaderValue();
+ return INDEX_TYPE;
}
/**
@@ -68,7 +74,7 @@ public class IntervalTreeIndex extends AbstractIndex {
* @param chr Chromosome
* @param interval
*/
- public void insert(String chr, Interval interval) {
+ public void insert(final String chr, final Interval interval) {
ChrIndex chrIdx = (ChrIndex) chrIndices.get(chr);
if (chrIdx == null) {
chrIdx = new ChrIndex(chr);
@@ -77,17 +83,17 @@ public class IntervalTreeIndex extends AbstractIndex {
chrIdx.insert(interval);
}
- protected void setChrIndex(List<ChrIndex> indicies) {
- for (ChrIndex index : indicies) {
+ protected void setChrIndex(final List<ChrIndex> indicies) {
+ for (final ChrIndex index : indicies) {
chrIndices.put(index.getName(), index);
}
}
public void printTree() {
- for (String chr : chrIndices.keySet()) {
+ for (final String chr : chrIndices.keySet()) {
System.out.println(chr + ":");
- ChrIndex chrIdx = (ChrIndex) chrIndices.get(chr);
+ final ChrIndex chrIdx = (ChrIndex) chrIndices.get(chr);
chrIdx.printTree();
System.out.println();
}
@@ -105,7 +111,7 @@ public class IntervalTreeIndex extends AbstractIndex {
}
- public ChrIndex(String name) {
+ public ChrIndex(final String name) {
this.name = name;
tree = new IntervalTree();
}
@@ -114,7 +120,7 @@ public class IntervalTreeIndex extends AbstractIndex {
return name;
}
- public void insert(Interval iv) {
+ public void insert(final Interval iv) {
tree.insert(iv);
}
@@ -123,34 +129,34 @@ public class IntervalTreeIndex extends AbstractIndex {
}
- public List<Block> getBlocks(int start, int end) {
+ public List<Block> getBlocks(final int start, final int end) {
// Get intervals and build blocks list
- List<Interval> intervals = tree.findOverlapping(new Interval(start, end));
+ final List<Interval> intervals = tree.findOverlapping(new Interval(start, end));
// save time (and save throwing an exception) if the blocks are empty, return now
if (intervals == null || intervals.size() == 0) return new ArrayList<Block>();
- Block[] blocks = new Block[intervals.size()];
+ final Block[] blocks = new Block[intervals.size()];
int idx = 0;
- for (Interval iv : intervals) {
+ for (final Interval iv : intervals) {
blocks[idx++] = iv.getBlock();
}
// Sort blocks by start position
Arrays.sort(blocks, new Comparator<Block>() {
- public int compare(Block b1, Block b2) {
+ public int compare(final Block b1, final Block b2) {
// this is a little cryptic because the normal method (b1.getStartPosition() - b2.getStartPosition()) wraps in int space and we incorrectly sort the blocks in extreme cases
return b1.getStartPosition() - b2.getStartPosition() < 1 ? -1 : (b1.getStartPosition() - b2.getStartPosition() > 1 ? 1 : 0);
}
});
// Consolidate blocks that are close together
- List<Block> consolidatedBlocks = new ArrayList(blocks.length);
+ final List<Block> consolidatedBlocks = new ArrayList(blocks.length);
Block lastBlock = blocks[0];
consolidatedBlocks.add(lastBlock);
for (int i = 1; i < blocks.length; i++) {
- Block block = blocks[i];
+ final Block block = blocks[i];
if (block.getStartPosition() < (lastBlock.getEndPosition() + 1000)) {
lastBlock.setEndPosition(block.getEndPosition());
} else {
@@ -166,13 +172,13 @@ public class IntervalTreeIndex extends AbstractIndex {
System.out.println(tree.toString());
}
- public void write(LittleEndianOutputStream dos) throws IOException {
+ public void write(final LittleEndianOutputStream dos) throws IOException {
dos.writeString(name);
- List<Interval> intervals = tree.getIntervals();
+ final List<Interval> intervals = tree.getIntervals();
dos.writeInt(intervals.size());
- for (Interval interval : intervals) {
+ for (final Interval interval : intervals) {
dos.writeInt(interval.start);
dos.writeInt(interval.end);
dos.writeLong(interval.getBlock().getStartPosition());
@@ -181,7 +187,7 @@ public class IntervalTreeIndex extends AbstractIndex {
}
- public void read(LittleEndianInputStream dis) throws IOException {
+ public void read(final LittleEndianInputStream dis) throws IOException {
tree = new IntervalTree();
@@ -189,12 +195,12 @@ public class IntervalTreeIndex extends AbstractIndex {
int nIntervals = dis.readInt();
while (nIntervals-- > 0) {
- int start = dis.readInt();
- int end = dis.readInt();
- long pos = dis.readLong();
- int size = dis.readInt();
+ final int start = dis.readInt();
+ final int end = dis.readInt();
+ final long pos = dis.readLong();
+ final int size = dis.readInt();
- Interval iv = new Interval(start, end, new Block(pos, size));
+ final Interval iv = new Interval(start, end, new Block(pos, size));
tree.insert(iv);
}
diff --git a/src/java/org/broad/tribble/index/linear/LinearIndex.java b/src/java/org/broad/tribble/index/linear/LinearIndex.java
index 5221aa3..910c1fc 100644
--- a/src/java/org/broad/tribble/index/linear/LinearIndex.java
+++ b/src/java/org/broad/tribble/index/linear/LinearIndex.java
@@ -18,15 +18,16 @@
package org.broad.tribble.index.linear;
+import org.broad.tribble.TribbleException;
import org.broad.tribble.index.AbstractIndex;
import org.broad.tribble.index.Block;
import org.broad.tribble.index.Index;
-import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.util.LittleEndianInputStream;
import org.broad.tribble.util.LittleEndianOutputStream;
import java.io.File;
import java.io.IOException;
+import java.io.InputStream;
import java.io.PrintStream;
import java.util.*;
@@ -49,10 +50,11 @@ import java.util.*;
* ------ LINEAR INDEX
* nChromosomes integer
*/
-public class LinearIndex extends AbstractIndex implements Index {
+public class LinearIndex extends AbstractIndex {
// NOTE: To debug uncomment the System.getProperty and recompile.
public static final double MAX_FEATURES_PER_BIN = Double.valueOf(System.getProperty("MAX_FEATURES_PER_BIN", "100"));
+ public static final int INDEX_TYPE = IndexType.LINEAR.fileHeaderTypeIdentifier;
private final static int MAX_BIN_WIDTH = 1 * 1000 * 1000 * 1000; // widths must be less than 1 billion
@@ -62,24 +64,19 @@ public class LinearIndex extends AbstractIndex implements Index {
public static boolean enableAdaptiveIndexing = true;
/**
- * Default constructor -- used by factory methods. Do not remove.
- */
- public LinearIndex() {}
-
- /**
* Initialize using the specified {@code indices}
* @param indices
* @param featureFile
*/
- public LinearIndex(List<ChrIndex> indices, File featureFile) {
+ public LinearIndex(final List<ChrIndex> indices, final File featureFile) {
super(featureFile.getAbsolutePath());
- for (ChrIndex index : indices)
+ for (final ChrIndex index : indices)
chrIndices.put(index.getName(), index);
}
- private LinearIndex(LinearIndex parent, List<ChrIndex> indices) {
+ private LinearIndex(final LinearIndex parent, final List<ChrIndex> indices) {
super(parent);
- for (ChrIndex index : indices)
+ for (final ChrIndex index : indices)
chrIndices.put(index.getName(), index);
}
@@ -87,16 +84,25 @@ public class LinearIndex extends AbstractIndex implements Index {
* Initialize with default parameters
* @param featureFile File for which this is an index
*/
- public LinearIndex(String featureFile) {
+ public LinearIndex(final String featureFile) {
super(featureFile);
}
+ /**
+ * Load from file.
+ * @param inputStream This method assumes that the input stream is already buffered as appropriate.
+ */
+ public LinearIndex(final InputStream inputStream) throws IOException {
+ final LittleEndianInputStream dis = new LittleEndianInputStream(inputStream);
+ validateIndexHeader(INDEX_TYPE, dis);
+ read(dis);
+ }
public boolean isCurrentVersion() {
if (!super.isCurrentVersion()) return false;
// todo fixme nasty hack to determine if this is an old style V3 linear index (without nFeaturesPerBin)
- for (org.broad.tribble.index.ChrIndex chrIndex : chrIndices.values())
+ for (final org.broad.tribble.index.ChrIndex chrIndex : chrIndices.values())
if (((ChrIndex) chrIndex).OLD_V3_INDEX)
return false;
@@ -105,11 +111,12 @@ public class LinearIndex extends AbstractIndex implements Index {
@Override
protected int getType() {
- return IndexFactory.IndexType.LINEAR.getHeaderValue();
+ return INDEX_TYPE;
}
- public LinkedHashSet<String> getSequenceNames() {
- return (chrIndices == null ? new LinkedHashSet<String>() : new LinkedHashSet<String>(chrIndices.keySet()));
+ public List<String> getSequenceNames() {
+ return (chrIndices == null ? Collections.EMPTY_LIST :
+ Collections.unmodifiableList(new ArrayList<String>(chrIndices.keySet())));
}
@Override
@@ -154,7 +161,7 @@ public class LinearIndex extends AbstractIndex implements Index {
}
- ChrIndex(String name, int binWidth) {
+ ChrIndex(final String name, final int binWidth) {
this.name = name;
this.binWidth = binWidth;
this.blocks = new ArrayList<Block>(100);
@@ -167,7 +174,7 @@ public class LinearIndex extends AbstractIndex implements Index {
return name;
}
- void addBlock(Block block) {
+ void addBlock(final Block block) {
blocks.add(block);
//largestBlockSize = Math.max(largestBlockSize, block.getSize());
}
@@ -180,28 +187,28 @@ public class LinearIndex extends AbstractIndex implements Index {
return blocks;
}
- public List<Block> getBlocks(int start, int end) {
+ public List<Block> getBlocks(final int start, final int end) {
if (blocks.isEmpty()) {
return Collections.emptyList();
} else {
// Adjust position for the longest feature in this chromosome. This insures we get
// features that start before the bin but extend into it
- int adjustedPosition = Math.max(start - longestFeature, 0);
- int startBinNumber = adjustedPosition / binWidth;
+ final int adjustedPosition = Math.max(start - longestFeature, 0);
+ final int startBinNumber = adjustedPosition / binWidth;
if (startBinNumber >= blocks.size()) // are we off the end of the bin list, so return nothing
return Collections.emptyList();
else {
- int endBinNumber = Math.min((end - 1) / binWidth, blocks.size() - 1);
+ final int endBinNumber = Math.min((end - 1) / binWidth, blocks.size() - 1);
// By definition blocks are adjacent for the liner index. Combine them into one merged block
- long startPos = blocks.get(startBinNumber).getStartPosition();
- long endPos = blocks.get(endBinNumber).getStartPosition() + blocks.get(endBinNumber).getSize();
- long size = endPos - startPos;
+ final long startPos = blocks.get(startBinNumber).getStartPosition();
+ final long endPos = blocks.get(endBinNumber).getStartPosition() + blocks.get(endBinNumber).getSize();
+ final long size = endPos - startPos;
if (size == 0) {
return Collections.EMPTY_LIST;
} else {
- Block mergedBlock = new Block(startPos, size);
+ final Block mergedBlock = new Block(startPos, size);
return Arrays.asList(mergedBlock);
}
}
@@ -209,7 +216,7 @@ public class LinearIndex extends AbstractIndex implements Index {
}
- public void updateLongestFeature(int featureLength) {
+ public void updateLongestFeature(final int featureLength) {
longestFeature = Math.max(longestFeature, featureLength);
}
@@ -221,7 +228,7 @@ public class LinearIndex extends AbstractIndex implements Index {
this.nFeatures++;
}
- public void write(LittleEndianOutputStream dos) throws IOException {
+ public void write(final LittleEndianOutputStream dos) throws IOException {
// Chr name, binSize, # bins, longest feature
dos.writeString(name);
@@ -234,7 +241,7 @@ public class LinearIndex extends AbstractIndex implements Index {
long pos = 0;
long size = 0;
- for (Block block : blocks) {
+ for (final Block block : blocks) {
pos = block.getStartPosition();
size = block.getSize();
dos.writeLong(pos);
@@ -243,10 +250,10 @@ public class LinearIndex extends AbstractIndex implements Index {
dos.writeLong(pos + size);
}
- public void read(LittleEndianInputStream dis) throws IOException {
+ public void read(final LittleEndianInputStream dis) throws IOException {
name = dis.readString();
binWidth = dis.readInt();
- int nBins = dis.readInt();
+ final int nBins = dis.readInt();
longestFeature = dis.readInt();
//largestBlockSize = dis.readInt();
// largestBlockSize and totalBlockSize are old V3 index values. largest block size should be 0 for
@@ -258,17 +265,17 @@ public class LinearIndex extends AbstractIndex implements Index {
blocks = new ArrayList<Block>(nBins);
long pos = dis.readLong();
for (int binNumber = 0; binNumber < nBins; binNumber++) {
- long nextPos = dis.readLong();
- long size = nextPos - pos;
+ final long nextPos = dis.readLong();
+ final long size = nextPos - pos;
blocks.add(new Block(pos, size));
pos = nextPos;
}
}
- public boolean equals(Object obj) {
+ public boolean equals(final Object obj) {
if (this == obj) return true;
if (!(obj instanceof ChrIndex)) return false;
- ChrIndex other = (ChrIndex) obj;
+ final ChrIndex other = (ChrIndex) obj;
return binWidth == other.binWidth
&& longestFeature == other.longestFeature
//&& largestBlockSize == other.largestBlockSize
@@ -282,7 +289,7 @@ public class LinearIndex extends AbstractIndex implements Index {
*/
public long getTotalSize() {
long n = 0;
- for (Block b : getBlocks())
+ for (final Block b : getBlocks())
n += b.getSize();
return n;
}
@@ -295,10 +302,10 @@ public class LinearIndex extends AbstractIndex implements Index {
return (1.0 * getNFeatures()) / getNBlocks();
}
- private double getNFeaturesOfMostDenseBlock(double featureSize) {
+ private double getNFeaturesOfMostDenseBlock(final double featureSize) {
double m = -1;
- for (Block b : getBlocks()) {
- double n = b.getSize() / featureSize;
+ for (final Block b : getBlocks()) {
+ final double n = b.getSize() / featureSize;
if (m == -1 || n > m) m = n;
}
return m;
@@ -308,11 +315,11 @@ public class LinearIndex extends AbstractIndex implements Index {
return getNFeaturesOfMostDenseBlock(getAverageFeatureSize());
}
- public ChrIndex optimize(double threshold) {
+ public ChrIndex optimize(final double threshold) {
return optimize(this, threshold, 0);
}
- private static boolean badBinWidth(ChrIndex idx) {
+ private static boolean badBinWidth(final ChrIndex idx) {
if (idx.binWidth > MAX_BIN_WIDTH || idx.binWidth < 0) // an overflow occurred
return true;
else if (MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX != 0 && idx.getNFeatures() > 1 && idx.binWidth > MAX_BIN_WIDTH_FOR_OCCUPIED_CHR_INDEX) {
@@ -322,11 +329,11 @@ public class LinearIndex extends AbstractIndex implements Index {
}
}
- private static ChrIndex optimize(ChrIndex idx, double threshold, int level) {
+ private static ChrIndex optimize(ChrIndex idx, final double threshold, int level) {
ChrIndex best = idx;
while (true) {
- double score = idx.optimizeScore();
+ final double score = idx.optimizeScore();
if (score > threshold || idx.getNBlocks() == 1 || badBinWidth(idx))
break;
@@ -344,19 +351,19 @@ public class LinearIndex extends AbstractIndex implements Index {
return best;
}
- private static ChrIndex mergeBlocks(ChrIndex idx) {
- ChrIndex merged = new ChrIndex(idx.name, idx.binWidth * 2); // increasing width by 2 each time
+ private static ChrIndex mergeBlocks(final ChrIndex idx) {
+ final ChrIndex merged = new ChrIndex(idx.name, idx.binWidth * 2); // increasing width by 2 each time
merged.longestFeature = idx.longestFeature;
merged.nFeatures = idx.nFeatures;
- Iterator<Block> blocks = idx.getBlocks().iterator();
+ final Iterator<Block> blocks = idx.getBlocks().iterator();
if (!blocks.hasNext())
throw new IllegalStateException("Block iterator cannot be empty at the start for " + idx.getName());
// extremely simple merging algorithm. Walk left to right, joining up blocks adjacent blocks.
while (blocks.hasNext()) {
- Block b1 = blocks.next();
- Block b2 = blocks.hasNext() ? blocks.next() : null;
+ final Block b1 = blocks.next();
+ final Block b2 = blocks.hasNext() ? blocks.next() : null;
if (b2 == null)
merged.addBlock(b1);
@@ -368,8 +375,8 @@ public class LinearIndex extends AbstractIndex implements Index {
return merged;
}
- private static String dupString(char c, int nCopies) {
- char[] chars = new char[nCopies];
+ private static String dupString(final char c, final int nCopies) {
+ final char[] chars = new char[nCopies];
Arrays.fill(chars, c);
return new String(chars);
}
@@ -380,13 +387,13 @@ public class LinearIndex extends AbstractIndex implements Index {
* @param threshold threshold to use for optimizing each constituent {@code chrIndex}
* @return The new optimized index
*/
- public Index optimize(double threshold) {
+ public Index optimize(final double threshold) {
if (enableAdaptiveIndexing) {
- List<ChrIndex> newIndices = new ArrayList<ChrIndex>(this.chrIndices.size());
- for (String name : chrIndices.keySet()) {
- LinearIndex.ChrIndex oldIdx = (LinearIndex.ChrIndex) chrIndices.get(name);
- LinearIndex.ChrIndex newIdx = oldIdx.optimize(threshold);
+ final List<ChrIndex> newIndices = new ArrayList<ChrIndex>(this.chrIndices.size());
+ for (final String name : chrIndices.keySet()) {
+ final LinearIndex.ChrIndex oldIdx = (LinearIndex.ChrIndex) chrIndices.get(name);
+ final LinearIndex.ChrIndex newIdx = oldIdx.optimize(threshold);
newIndices.add(newIdx);
}
return new LinearIndex(this, newIndices);
@@ -403,12 +410,12 @@ public class LinearIndex extends AbstractIndex implements Index {
* Code to convert linear index to a text table for analysis
* @param out Stream to which to write out table to
*/
- public void writeTable(PrintStream out) {
+ public void writeTable(final PrintStream out) {
out.printf("chr binWidth avg.feature.size nFeatures.total block.id start.pos size nFeatures%n");
- for (String name : chrIndices.keySet()) {
- LinearIndex.ChrIndex chrIdx = (LinearIndex.ChrIndex) chrIndices.get(name);
+ for (final String name : chrIndices.keySet()) {
+ final LinearIndex.ChrIndex chrIdx = (LinearIndex.ChrIndex) chrIndices.get(name);
int blockCount = 0;
- for (Block b : chrIdx.getBlocks()) {
+ for (final Block b : chrIdx.getBlocks()) {
out.printf("%s %d %.2f %d %d %d %d %d%n", name, chrIdx.binWidth, chrIdx.getAverageFeatureSize(), chrIdx.getNFeatures(), blockCount,
blockCount * chrIdx.binWidth, b.getSize(), (int) (b.getSize() / chrIdx.getAverageFeatureSize()));
blockCount++;
@@ -417,7 +424,7 @@ public class LinearIndex extends AbstractIndex implements Index {
}
// purely for testing purposes
- protected final void setTS(long ts) {
+ protected final void setTS(final long ts) {
this.indexedFileTS = ts;
}
}
diff --git a/src/java/org/broad/tribble/index/linear/LinearIndexCreator.java b/src/java/org/broad/tribble/index/linear/LinearIndexCreator.java
index 395ae80..fb48ae4 100644
--- a/src/java/org/broad/tribble/index/linear/LinearIndexCreator.java
+++ b/src/java/org/broad/tribble/index/linear/LinearIndexCreator.java
@@ -26,39 +26,45 @@ package org.broad.tribble.index.linear;
import org.broad.tribble.Feature;
import org.broad.tribble.index.Block;
import org.broad.tribble.index.Index;
-import org.broad.tribble.index.IndexCreator;
+import org.broad.tribble.index.TribbleIndexCreator;
import java.io.File;
import java.util.ArrayList;
import java.util.LinkedList;
/**
+ * For creating a LinearIndex from a stream of features.
+ *
* @author jrobinso
*/
-public class LinearIndexCreator implements IndexCreator {
+public class LinearIndexCreator extends TribbleIndexCreator {
public static int DEFAULT_BIN_WIDTH = 8000;
// the set bin width
private int binWidth = DEFAULT_BIN_WIDTH;
// the input file
- private File inputFile;
+ private final File inputFile;
- private LinkedList<LinearIndex.ChrIndex> chrList = new LinkedList<LinearIndex.ChrIndex>();
+ private final LinkedList<LinearIndex.ChrIndex> chrList = new LinkedList<LinearIndex.ChrIndex>();
private int longestFeature= 0;
- private ArrayList<Block> blocks = new ArrayList<Block>();
+ private final ArrayList<Block> blocks = new ArrayList<Block>();
- public void initialize(File inputFile, int binSize) {
+ public LinearIndexCreator(final File inputFile, final int binSize) {
this.inputFile = inputFile;
binWidth = binSize;
}
+ public LinearIndexCreator(final File inputFile) {
+ this(inputFile, DEFAULT_BIN_WIDTH);
+ }
+
/**
* add a feature to the index
* @param feature the feature, from which we use the contig, start, and stop
* @param filePosition the position of the file at the BEGINNING of the current feature
*/
- public void addFeature(Feature feature, long filePosition) {
+ public void addFeature(final Feature feature, final long filePosition) {
// fi we don't have a chrIndex yet, or if the last one was for the previous contig, create a new one
if (chrList.size() == 0 || !chrList.getLast().getName().equals(feature.getChr())) {
// if we're creating a new chrIndex (not the first), make sure to dump the blocks to the old chrIndex
@@ -91,7 +97,7 @@ public class LinearIndexCreator implements IndexCreator {
* @param finalFilePosition the final file position, for indexes that have to close out with the final position
* @return an Index object
*/
- public Index finalizeIndex(long finalFilePosition) {
+ public Index finalizeIndex(final long finalFilePosition) {
if (finalFilePosition == 0)
throw new IllegalArgumentException("finalFilePosition != 0, -> " + finalFilePosition);
@@ -101,7 +107,8 @@ public class LinearIndexCreator implements IndexCreator {
}
blocks.clear();
- LinearIndex index = new LinearIndex(chrList,inputFile);
+ final LinearIndex index = new LinearIndex(chrList,inputFile);
+ index.addProperties(properties);
index.finalizeIndex();
return index.optimize();
}
diff --git a/src/java/org/broad/tribble/index/tabix/TabixFormat.java b/src/java/org/broad/tribble/index/tabix/TabixFormat.java
new file mode 100644
index 0000000..8f82229
--- /dev/null
+++ b/src/java/org/broad/tribble/index/tabix/TabixFormat.java
@@ -0,0 +1,109 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package org.broad.tribble.index.tabix;
+
+import org.broad.tribble.TribbleException;
+
+/**
+ * The values in a Tabix header that define the format of the file being indexed, e.g. gff, bed, vcf
+ */
+public class TabixFormat implements Cloneable {
+ public static final int ZERO_BASED = 0x10000;
+ public static final int GENERIC_FLAGS = 0;
+ public static final int SAM_FLAGS = 1;
+ public static final int VCF_FLAGS = 2;
+ public static final int UCSC_FLAGS = GENERIC_FLAGS | ZERO_BASED;
+
+ /** Predefined headers for known formats */
+ public static TabixFormat GFF = new TabixFormat(GENERIC_FLAGS, 1, 4, 5, '#', 0);
+ public static TabixFormat BED = new TabixFormat(UCSC_FLAGS, 1, 2, 3, '#', 0);
+ public static TabixFormat PSLTBL = new TabixFormat(UCSC_FLAGS, 15, 17, 18, '#', 0);
+ public static TabixFormat SAM = new TabixFormat(SAM_FLAGS, 3, 4, 0, '@', 0);
+ public static TabixFormat VCF = new TabixFormat(VCF_FLAGS, 1, 2, 0, '#', 0);
+
+ /** Describes interpretation of file being indexed. See FLAGS constants above. */
+ public int flags;
+ /** One-based index of the column in the file being indexed containing the sequence name */
+ public int sequenceColumn;
+ /** One-based index of the column in the file being indexed containing the start position. */
+ public int startPositionColumn;
+ /**
+ * One-based index of the column in the file being indexed containing the end position. Zero implies
+ * there is no end position column.
+ */
+ public int endPositionColumn;
+ /** Lines in the file being indexed that start with this character are ignored. */
+ public char metaCharacter;
+ /** This is part of the index header, but does not appear to be used. */
+ public int numHeaderLinesToSkip;
+
+ public TabixFormat() {
+ }
+
+ public TabixFormat(final int flags, final int sequenceColumn, final int startPositionColumn, final int endPositionColumn, final char metaCharacter, final int numHeaderLinesToSkip) {
+ this.flags = flags;
+ this.sequenceColumn = sequenceColumn;
+ this.startPositionColumn = startPositionColumn;
+ this.endPositionColumn = endPositionColumn;
+ this.metaCharacter = metaCharacter;
+ this.numHeaderLinesToSkip = numHeaderLinesToSkip;
+ }
+
+ @Override
+ public TabixFormat clone() {
+ try {
+ return (TabixFormat)super.clone();
+ } catch (final CloneNotSupportedException e) {
+ throw new TribbleException("unpossible!");
+ }
+ }
+
+ @Override
+ public boolean equals(final Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ final TabixFormat that = (TabixFormat) o;
+
+ if (endPositionColumn != that.endPositionColumn) return false;
+ if (flags != that.flags) return false;
+ if (metaCharacter != that.metaCharacter) return false;
+ if (numHeaderLinesToSkip != that.numHeaderLinesToSkip) return false;
+ if (sequenceColumn != that.sequenceColumn) return false;
+ if (startPositionColumn != that.startPositionColumn) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = flags;
+ result = 31 * result + sequenceColumn;
+ result = 31 * result + startPositionColumn;
+ result = 31 * result + endPositionColumn;
+ result = 31 * result + (int) metaCharacter;
+ result = 31 * result + numHeaderLinesToSkip;
+ return result;
+ }
+}
diff --git a/src/java/org/broad/tribble/index/tabix/TabixIndex.java b/src/java/org/broad/tribble/index/tabix/TabixIndex.java
new file mode 100644
index 0000000..44fd98a
--- /dev/null
+++ b/src/java/org/broad/tribble/index/tabix/TabixIndex.java
@@ -0,0 +1,364 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package org.broad.tribble.index.tabix;
+
+import net.sf.samtools.Bin;
+import net.sf.samtools.BinningIndexContent;
+import net.sf.samtools.Chunk;
+import net.sf.samtools.LinearIndex;
+import net.sf.samtools.util.BlockCompressedInputStream;
+import net.sf.samtools.util.BlockCompressedOutputStream;
+import net.sf.samtools.util.CloserUtil;
+import net.sf.samtools.util.StringUtil;
+import org.broad.tribble.TribbleException;
+import org.broad.tribble.index.Block;
+import org.broad.tribble.index.Index;
+import org.broad.tribble.util.LittleEndianInputStream;
+import org.broad.tribble.util.LittleEndianOutputStream;
+import org.broad.tribble.util.TabixUtils;
+
+import java.io.EOFException;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.*;
+
+/**
+ * This class represent a Tabix index that has been built in memory or read from a file. It can be queried or
+ * written to a file.
+ */
+public class TabixIndex implements Index {
+ private static final byte[] MAGIC = {'T', 'B', 'I', 1};
+ public static final int MAGIC_NUMBER;
+ static {
+ final ByteBuffer bb = ByteBuffer.allocate(MAGIC.length);
+ bb.put(MAGIC);
+ bb.flip();
+ MAGIC_NUMBER = bb.order(ByteOrder.LITTLE_ENDIAN).getInt();
+ }
+
+ private final TabixFormat formatSpec;
+ private final List<String> sequenceNames;
+ private final BinningIndexContent[] indices;
+
+ /**
+ * @param formatSpec Information about how to interpret the file being indexed. Unused by this class other than
+ * written to an output file.
+ * @param sequenceNames Sequences in the file being indexed, in the order they appear in the file.
+ * @param indices One for each element of sequenceNames
+ */
+ public TabixIndex(final TabixFormat formatSpec, final List<String> sequenceNames, final BinningIndexContent[] indices) {
+ if (sequenceNames.size() != indices.length) {
+ throw new IllegalArgumentException("sequenceNames.size() != indices.length");
+ }
+ this.formatSpec = formatSpec.clone();
+ this.sequenceNames = Collections.unmodifiableList(new ArrayList<String>(sequenceNames));
+ this.indices = indices;
+ }
+
+ /**
+ * @param inputStream This is expected to be buffered and be gzip-decompressing as appropriate. Caller
+ * should close input stream after ctor returns.
+ */
+ public TabixIndex(final InputStream inputStream) throws IOException {
+ this(inputStream, false);
+ }
+
+ /**
+ * Convenient ctor that opens the file, wraps with with BGZF reader, and closes after reading index.
+ */
+ public TabixIndex(final File tabixFile) throws IOException {
+ this(new BlockCompressedInputStream(tabixFile), true);
+ }
+
+ private TabixIndex(final InputStream inputStream, final boolean closeInputStream) throws IOException {
+ final LittleEndianInputStream dis = new LittleEndianInputStream(inputStream);
+ if (dis.readInt() != MAGIC_NUMBER) {
+ throw new TribbleException(String.format("Unexpected magic number 0x%x", MAGIC_NUMBER));
+ }
+ final int numSequences = dis.readInt();
+ indices = new BinningIndexContent[numSequences];
+ formatSpec = new TabixFormat();
+ formatSpec.flags = dis.readInt();
+ formatSpec.sequenceColumn = dis.readInt();
+ formatSpec.startPositionColumn = dis.readInt();
+ formatSpec.endPositionColumn = dis.readInt();
+ formatSpec.metaCharacter = (char)dis.readInt();
+ formatSpec.numHeaderLinesToSkip = dis.readInt();
+ final int nameBlockSize = dis.readInt();
+ final byte[] nameBlock = new byte[nameBlockSize];
+ if (dis.read(nameBlock) != nameBlockSize) throw new EOFException("Premature end of file reading Tabix header");
+ final List<String> sequenceNames = new ArrayList<String>(numSequences);
+ int startPos = 0;
+ for (int i = 0; i < numSequences; ++i) {
+ int endPos = startPos;
+ while (nameBlock[endPos] != '\0') ++endPos;
+ sequenceNames.add(StringUtil.bytesToString(nameBlock, startPos, endPos - startPos));
+ startPos = endPos + 1;
+ }
+ if (startPos != nameBlockSize) {
+ throw new TribbleException("Tabix header format exception. Sequence name block is longer than expected");
+ }
+ for (int i = 0; i < numSequences; ++i) {
+ indices[i] = loadSequence(i, dis);
+ }
+ if (closeInputStream) CloserUtil.close(dis);
+ this.sequenceNames = Collections.unmodifiableList(sequenceNames);
+ }
+
+ /**
+ *
+ * @param chr the chromosome
+ * @param start the start position, one-based, inclusive.
+ * @param end the end position, one-based, inclusive.
+ * @return List of regions of file that are candidates for the given query.
+ *
+ * TODO: This method has not yet been tested, since the primary task is index writing.
+ */
+ @Override
+ public List<Block> getBlocks(final String chr, final int start, final int end) {
+ final int sequenceIndex = sequenceNames.indexOf(chr);
+ if (sequenceIndex == -1 || indices[sequenceIndex] == null) {
+ return Collections.EMPTY_LIST;
+ }
+ final List<Chunk> chunks = indices[sequenceIndex].getChunksOverlapping(start, end);
+ final List<Block> ret = new ArrayList<Block>(chunks.size());
+ for (final Chunk chunk : chunks) {
+ ret.add(new Block(chunk.getChunkStart(), chunk.getChunkEnd() - chunk.getChunkStart()));
+ }
+ return ret;
+ }
+
+ @Override
+ public boolean isCurrentVersion() {
+ return true;
+ }
+
+ @Override
+ public List<String> getSequenceNames() {
+ return sequenceNames;
+ }
+
+ @Override
+ public boolean containsChromosome(final String chr) {
+ return sequenceNames.contains(chr);
+ }
+
+ /**
+ *
+ * No arbitrary properties in Tabix
+ */
+ @Override
+ public Map<String, String> getProperties() {
+ return null;
+ }
+
+ @Override
+ public boolean equalsIgnoreProperties(final Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ final TabixIndex that = (TabixIndex) o;
+
+ if (!formatSpec.equals(that.formatSpec)) return false;
+ if (!Arrays.equals(indices, that.indices)) return false;
+ return sequenceNames.equals(that.sequenceNames);
+
+ }
+
+ public TabixFormat getFormatSpec() {
+ return formatSpec;
+ }
+
+ /**
+ * Writes the index with BGZF.
+ * @param tabixFile Where to write the index.
+ */
+ public void write(final File tabixFile) {
+ final LittleEndianOutputStream los = new LittleEndianOutputStream(new BlockCompressedOutputStream(tabixFile));
+ try {
+ write(los);
+ los.close();
+ } catch (final IOException e) {
+ throw new TribbleException("Exception writing " + tabixFile.getAbsolutePath(), e);
+ }
+ }
+
+ /**
+ * Writes to a file with appropriate name and directory based on feature file.
+ * @param featureFile File being indexed.
+ */
+ @Override
+ public void writeBasedOnFeatureFile(final File featureFile) throws IOException {
+ if (!featureFile.isFile()) return;
+ write(new File(featureFile.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION));
+ }
+
+ /**
+ *
+ * @param los It is assumes that caller has done appropriate buffering and BlockCompressedOutputStream wrapping.
+ * Caller should close output stream after invoking this method.
+ * @throws IOException
+ */
+ @Override
+ public void write(final LittleEndianOutputStream los) throws IOException {
+ los.writeInt(MAGIC_NUMBER);
+ los.writeInt(sequenceNames.size());
+ los.writeInt(formatSpec.flags);
+ los.writeInt(formatSpec.sequenceColumn);
+ los.writeInt(formatSpec.startPositionColumn);
+ los.writeInt(formatSpec.endPositionColumn);
+ los.writeInt(formatSpec.metaCharacter);
+ los.writeInt(formatSpec.numHeaderLinesToSkip);
+ int nameBlockSize = sequenceNames.size(); // null terminators
+ for (final String sequenceName : sequenceNames) nameBlockSize += sequenceName.length();
+ los.writeInt(nameBlockSize);
+ for (final String sequenceName : sequenceNames) {
+ los.write(StringUtil.stringToBytes(sequenceName));
+ los.write(0);
+ }
+ for (final BinningIndexContent index : indices) {
+ writeSequence(index, los);
+ }
+ }
+
+ private void writeSequence(final BinningIndexContent indexContent, final LittleEndianOutputStream los) throws IOException {
+ if (indexContent == null) {
+ los.writeInt(0);
+ } else {
+ final BinningIndexContent.BinList binList = indexContent.getBins();
+ los.writeInt(binList.numberOfNonNullBins);
+ for (final Bin bin : binList) {
+ writeBin(bin, los);
+ }
+ writeLinearIndex(indexContent.getLinearIndex(), los);
+ }
+ }
+
+ private void writeLinearIndex(final LinearIndex linearIndex, final LittleEndianOutputStream los) throws IOException {
+ if (linearIndex.getIndexStart() != 0) {
+ // This could be handled by writing zeroes, but it is not expected so just fail.
+ throw new IllegalArgumentException("Non-zero linear index start");
+ }
+ final long[] entries = linearIndex.getIndexEntries();
+ los.writeInt(entries.length);
+ for (final long entry : entries) los.writeLong(entry);
+ }
+
+ private void writeBin(final Bin bin, final LittleEndianOutputStream los) throws IOException {
+ los.writeInt(bin.getBinNumber());
+ final List<Chunk> chunkList = bin.getChunkList();
+ los.writeInt(chunkList.size());
+ for (final Chunk chunk: chunkList) {
+ los.writeLong(chunk.getChunkStart());
+ los.writeLong(chunk.getChunkEnd());
+ }
+ }
+
+ /**
+ * Although this is probably identical to BAM index reading code, code does not exist there to load directly
+ * into a BinningIndexContent object, so that is implemented here.
+ * @param referenceSequenceIndex Merely for setting in the returned object, not for seeking into the file.
+ * @param dis This method assumes that the current position is at the start of the reference.
+ */
+ private BinningIndexContent loadSequence(final int referenceSequenceIndex, final LittleEndianInputStream dis) throws IOException {
+ final int numBins = dis.readInt();
+ if (numBins == 0) return null;
+ int nonNullBins = 0;
+ final ArrayList<Bin> bins = new ArrayList<Bin>();
+ for (int i = 0; i < numBins; ++i) {
+ final Bin bin = loadBin(referenceSequenceIndex, dis);
+ if (bin != null) {
+ // File is not sparse, but array being produced is sparse, so grow array with nulls as appropriate
+ // so that bin number == index into array.
+ ++nonNullBins;
+ if (bins.size() > bin.getBinNumber()) {
+ if (bins.get(bin.getBinNumber()) != null) {
+ throw new TribbleException("Bin " + bin.getBinNumber() + " appears more than once in file");
+ }
+ bins.set(bin.getBinNumber(),bin);
+ } else {
+ // Grow bins array as needed.
+ bins.ensureCapacity(bin.getBinNumber() + 1);
+ while (bins.size() < bin.getBinNumber()) bins.add(null);
+ bins.add(bin);
+ }
+ }
+ }
+ final LinearIndex linearIndex = loadLinearIndex(referenceSequenceIndex, dis);
+ return new BinningIndexContent(referenceSequenceIndex,
+ new BinningIndexContent.BinList(bins.toArray(new Bin[bins.size()]), nonNullBins), linearIndex);
+ }
+
+ private LinearIndex loadLinearIndex(final int referenceSequenceIndex, final LittleEndianInputStream dis) throws IOException {
+ final int numElements = dis.readInt();
+ final long[] elements = new long[numElements];
+ for (int i = 0; i < numElements; ++i) {
+ elements[i] = dis.readLong();
+ }
+ return new LinearIndex(referenceSequenceIndex, 0, elements);
+ }
+
+ private Bin loadBin(final int referenceSequenceIndex, final LittleEndianInputStream dis) throws IOException {
+ final int binNumber = dis.readInt();
+ final Bin ret = new Bin(referenceSequenceIndex, binNumber);
+ final int numChunks = dis.readInt();
+ final List<Chunk> chunkList = new ArrayList<Chunk>(numChunks);
+ for (int i = 0; i < numChunks; ++i) {
+ chunkList.add(loadChunk(dis));
+ }
+ ret.setChunkList(chunkList);
+ return ret;
+ }
+
+ private Chunk loadChunk(final LittleEndianInputStream dis) throws IOException {
+ final long start = dis.readLong();
+ final long end = dis.readLong();
+ return new Chunk(start, end);
+ }
+
+ @Override
+ public boolean equals(final Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ final TabixIndex index = (TabixIndex) o;
+
+ if (!formatSpec.equals(index.formatSpec)) return false;
+ if (!Arrays.equals(indices, index.indices)) return false;
+ if (!sequenceNames.equals(index.sequenceNames)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = formatSpec.hashCode();
+ result = 31 * result + sequenceNames.hashCode();
+ result = 31 * result + Arrays.hashCode(indices);
+ return result;
+ }
+}
diff --git a/src/java/org/broad/tribble/index/tabix/TabixIndexCreator.java b/src/java/org/broad/tribble/index/tabix/TabixIndexCreator.java
new file mode 100644
index 0000000..7fdb438
--- /dev/null
+++ b/src/java/org/broad/tribble/index/tabix/TabixIndexCreator.java
@@ -0,0 +1,199 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package org.broad.tribble.index.tabix;
+
+import net.sf.samtools.*;
+import org.broad.tribble.Feature;
+import org.broad.tribble.index.Index;
+import org.broad.tribble.index.IndexCreator;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ * IndexCreator for Tabix.
+ * Features are expected to be 1-based, inclusive.
+ */
+public class TabixIndexCreator implements IndexCreator {
+ private final TabixFormat formatSpec;
+ private final List<BinningIndexContent> indexContents = new ArrayList<BinningIndexContent>();
+ private final List<String> sequenceNames = new ArrayList<String>();
+ // Merely a faster way to ensure that features are added in a specific sequence name order
+ private final Set<String> sequenceNamesSeen = new HashSet<String>();
+ // A sequence dictionary is not required, but if it is provided all sequences names must be present in it.
+ // It is used to determine the length of a sequence in order to optimize index memory allocation.
+ private final SAMSequenceDictionary sequenceDictionary;
+
+ private String currentSequenceName = null;
+ private BinningIndexBuilder indexBuilder = null;
+ // A feature can't be added to the index until the next feature is added because the next feature
+ // defines the location of the end of the previous feature in the output file.
+ private TabixFeature previousFeature = null;
+
+
+ /**
+ * @param sequenceDictionary is not required, but if present all features added must refer to sequences in the
+ * dictionary. It is used to optimize the memory needed to build the index.
+ */
+ public TabixIndexCreator(final SAMSequenceDictionary sequenceDictionary,
+ final TabixFormat formatSpec) {
+ this.sequenceDictionary = sequenceDictionary;
+ this.formatSpec = formatSpec.clone();
+ }
+
+ public TabixIndexCreator(final TabixFormat formatSpec) {
+ this(null, formatSpec);
+ }
+
+ @Override
+ public void addFeature(final Feature feature, final long filePosition) {
+ final String sequenceName = feature.getChr();
+ final int referenceIndex;
+ if (sequenceName.equals(currentSequenceName)) {
+ referenceIndex = sequenceNames.size() - 1;
+ } else {
+ referenceIndex = sequenceNames.size();
+ if (currentSequenceName != null && sequenceNamesSeen.contains(sequenceName)) {
+ throw new IllegalArgumentException("Sequence " + feature + " added out sequence of order");
+ }
+ }
+ final TabixFeature thisFeature = new TabixFeature(referenceIndex, feature.getStart(), feature.getEnd(), filePosition);
+ if (previousFeature != null) {
+ if (previousFeature.compareTo(thisFeature) > 0) {
+ throw new IllegalArgumentException(String.format("Features added out of order: previous (%s) > next (%s)",
+ previousFeature, thisFeature));
+ }
+ finalizeFeature(filePosition);
+ }
+ previousFeature = thisFeature;
+ if (referenceIndex == sequenceNames.size()) {
+ advanceToReference(sequenceName);
+ }
+ }
+
+ private void finalizeFeature(final long featureEndPosition) {
+ previousFeature.featureEndFilePosition = featureEndPosition;
+ if (previousFeature.featureStartFilePosition >= previousFeature.featureEndFilePosition) {
+ throw new IllegalArgumentException(String.format("Feature start position %d >= feature end position %d",
+ previousFeature.featureStartFilePosition, previousFeature.featureEndFilePosition));
+ }
+ indexBuilder.processFeature(previousFeature);
+ }
+
+ private void advanceToReference(final String sequenceName) {
+ if (indexBuilder != null) {
+ indexContents.add(indexBuilder.generateIndexContent());
+ }
+ // If sequence dictionary is provided, BinningIndexBuilder can reduce size of array it allocates.
+ final int sequenceLength;
+ if (sequenceDictionary != null) {
+ sequenceLength = sequenceDictionary.getSequence(sequenceName).getSequenceLength();
+ } else {
+ sequenceLength = 0;
+ }
+ indexBuilder = new BinningIndexBuilder(sequenceNames.size(), sequenceLength);
+ sequenceNames.add(sequenceName);
+ currentSequenceName = sequenceName;
+ sequenceNamesSeen.add(sequenceName);
+ }
+
+ @Override
+ public Index finalizeIndex(final long finalFilePosition) {
+ if (previousFeature != null) {
+ finalizeFeature(finalFilePosition);
+ }
+ if (indexBuilder != null) {
+ indexContents.add(indexBuilder.generateIndexContent());
+ }
+ // Make this as big as the sequence dictionary, even if there is not content for every sequence,
+ // but truncate the sequence dictionary before its end if there are sequences in the sequence dictionary without
+ // any features.
+ final BinningIndexContent[] indices = indexContents.toArray(new BinningIndexContent[sequenceNames.size()]);
+ return new TabixIndex(formatSpec, sequenceNames, indices);
+ }
+
+
+ private static class TabixFeature implements BinningIndexBuilder.FeatureToBeIndexed, Comparable<TabixFeature> {
+ private final int referenceIndex;
+ private final int start;
+ private final int end;
+ private final long featureStartFilePosition;
+ // Position after this feature in the file.
+ private long featureEndFilePosition = -1;
+
+ private TabixFeature(final int referenceIndex, final int start, final int end, final long featureStartFilePosition) {
+ this.referenceIndex = referenceIndex;
+ this.start = start;
+ this.end = end;
+ this.featureStartFilePosition = featureStartFilePosition;
+ }
+
+ @Override
+ public int getStart() {
+ return start;
+ }
+
+ @Override
+ public int getEnd() {
+ return end;
+ }
+
+ /**
+ *
+ * @return null -- Let index builder compute this.
+ */
+ @Override
+ public Integer getIndexingBin() {
+ return null;
+ }
+
+ @Override
+ public Chunk getChunk() {
+ if (featureEndFilePosition == -1) {
+ throw new IllegalStateException("End position is not set");
+ }
+ return new Chunk(featureStartFilePosition, featureEndFilePosition);
+ }
+
+ @Override
+ public int compareTo(final TabixFeature other) {
+ final int ret = this.referenceIndex - other.referenceIndex;
+ if (ret != 0) return ret;
+ return this.start - other.start;
+ }
+
+ @Override
+ public String toString() {
+ return "TabixFeature{" +
+ "referenceIndex=" + referenceIndex +
+ ", start=" + start +
+ ", end=" + end +
+ ", featureStartFilePosition=" + featureStartFilePosition +
+ ", featureEndFilePosition=" + featureEndFilePosition +
+ '}';
+ }
+ }
+}
diff --git a/src/java/org/broad/tribble/readers/AsciiLineReader.java b/src/java/org/broad/tribble/readers/AsciiLineReader.java
index 37c48bb..0ef2c3d 100644
--- a/src/java/org/broad/tribble/readers/AsciiLineReader.java
+++ b/src/java/org/broad/tribble/readers/AsciiLineReader.java
@@ -17,6 +17,7 @@
*/
package org.broad.tribble.readers;
+import net.sf.samtools.util.LocationAware;
import org.broad.tribble.TribbleException;
import java.io.*;
@@ -25,7 +26,7 @@ import java.io.*;
* A simple class that provides {@link #readLine()} functionality around a PositionalBufferedStream
*
* {@link BufferedReader} and its {@link java.io.BufferedReader#readLine()} method should be used in preference to this class (when the
- * {@link LocationAware} functionality is not required) because it offers greater performance.
+ * {@link net.sf.samtools.util.LocationAware} functionality is not required) because it offers greater performance.
*
* @author jrobinso
*/
@@ -37,11 +38,11 @@ public class AsciiLineReader implements LineReader, LocationAware {
PositionalBufferedStream is;
char[] lineBuffer;
- public AsciiLineReader(InputStream is){
+ public AsciiLineReader(final InputStream is){
this(new PositionalBufferedStream(is));
}
- public AsciiLineReader(PositionalBufferedStream is) {
+ public AsciiLineReader(final PositionalBufferedStream is) {
this.is = is;
// Allocate this only once, even though it is essentially a local variable of
// readLine. This makes a huge difference in performance
@@ -94,7 +95,7 @@ public class AsciiLineReader implements LineReader, LocationAware {
// for potential line-terminators in return string
if (linePosition > (lineBuffer.length - 3)) {
- char[] temp = new char[BUFFER_OVERFLOW_INCREASE_FACTOR * lineBuffer.length];
+ final char[] temp = new char[BUFFER_OVERFLOW_INCREASE_FACTOR * lineBuffer.length];
System.arraycopy(lineBuffer, 0, temp, 0, lineBuffer.length);
lineBuffer = temp;
}
@@ -122,8 +123,8 @@ public class AsciiLineReader implements LineReader, LocationAware {
lineBuffer = null;
}
- public static void main(String[] args) throws Exception {
- File testFile = new File(args[0]);
+ public static void main(final String[] args) throws Exception {
+ final File testFile = new File(args[0]);
final int iterations = Integer.valueOf(args[1]);
final boolean includeBufferedReader = Boolean.valueOf(args[2]);
long t0, lineCount, dt;
@@ -132,7 +133,7 @@ public class AsciiLineReader implements LineReader, LocationAware {
System.out.printf("Testing %s%n", args[0]);
for (int i = 0; i < iterations; i++) {
if ( includeBufferedReader ) {
- BufferedReader reader2 = new BufferedReader(new FileReader(testFile));
+ final BufferedReader reader2 = new BufferedReader(new FileReader(testFile));
t0 = System.currentTimeMillis();
lineCount = 0;
while (reader2.readLine() != null) {
@@ -145,7 +146,7 @@ public class AsciiLineReader implements LineReader, LocationAware {
}
if ( includeBufferedReader ) {
- LongLineBufferedReader longLineBufferedReader = new LongLineBufferedReader(new BufferedReader(new FileReader(testFile)));
+ final LongLineBufferedReader longLineBufferedReader = new LongLineBufferedReader(new BufferedReader(new FileReader(testFile)));
t0 = System.currentTimeMillis();
lineCount = 0;
while (longLineBufferedReader.readLine() != null) {
@@ -157,8 +158,8 @@ public class AsciiLineReader implements LineReader, LocationAware {
longLineBufferedReader.close();
}
- PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(testFile));
- LineReader reader = new AsciiLineReader(pbs);
+ final PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(testFile));
+ final LineReader reader = new AsciiLineReader(pbs);
t0 = System.currentTimeMillis();
lineCount = 0;
while (reader.readLine() != null) {
@@ -171,7 +172,7 @@ public class AsciiLineReader implements LineReader, LocationAware {
}
}
- private static final void printStatus(final String name, long lineCount, double rate, long dt) {
+ private static final void printStatus(final String name, final long lineCount, final double rate, final long dt) {
System.out.printf("%30s: %d lines read. Rate = %.2e lines per second. DT = %d%n", name, lineCount, rate, dt);
System.out.flush();
}
diff --git a/src/java/org/broad/tribble/readers/AsciiLineReaderIterator.java b/src/java/org/broad/tribble/readers/AsciiLineReaderIterator.java
index 297a685..6ffcef7 100644
--- a/src/java/org/broad/tribble/readers/AsciiLineReaderIterator.java
+++ b/src/java/org/broad/tribble/readers/AsciiLineReaderIterator.java
@@ -2,6 +2,7 @@ package org.broad.tribble.readers;
import net.sf.samtools.util.AbstractIterator;
import net.sf.samtools.util.CloserUtil;
+import net.sf.samtools.util.LocationAware;
import net.sf.samtools.util.Tuple;
import java.io.Closeable;
@@ -11,13 +12,13 @@ import java.io.IOException;
* A class that iterates over the lines and line positions in an {@link AsciiLineReader}.
*
* This class is slower than other {@link LineIterator}s because it is driven by {@link AsciiLineReader}, but offers the benefit of
- * implementing {@link LocationAware}, which is required for indexing. If you do not require {@link LocationAware}, consider using
+ * implementing {@link net.sf.samtools.util.LocationAware}, which is required for indexing. If you do not require {@link net.sf.samtools.util.LocationAware}, consider using
* {@link LineIteratorImpl} as an alternative to this class.
*
* Note an important distinction in the way this class and its inner iterator differ: in the inner iterator, the position stored with
* a line is the position at the start of that line. However, {@link #getPosition()} of the outer class must return the position at the
* end of the most-recently-returned line (or the start of the underlying {@link AsciiLineReader}, if no line has been read). The latter
- * bit of logic here is required to conform with the interface described by {@link org.broad.tribble.readers.LocationAware#getPosition()}.
+ * bit of logic here is required to conform with the interface described by {@link net.sf.samtools.util.LocationAware#getPosition()}.
*
* @author mccowan
*/
diff --git a/src/java/org/broad/tribble/readers/AsynchronousLineReader.java b/src/java/org/broad/tribble/readers/AsynchronousLineReader.java
index b445be4..166e079 100644
--- a/src/java/org/broad/tribble/readers/AsynchronousLineReader.java
+++ b/src/java/org/broad/tribble/readers/AsynchronousLineReader.java
@@ -20,13 +20,14 @@ public class AsynchronousLineReader implements LineReader {
private final LongLineBufferedReader bufferedReader;
private final BlockingQueue<String> lineQueue;
private final Thread worker;
- private volatile Exception workerException = null;
+ private volatile Throwable workerException = null;
private volatile boolean eofReached = false;
public AsynchronousLineReader(final Reader reader, final int lineReadAheadSize) {
bufferedReader = new LongLineBufferedReader(reader);
lineQueue = new LinkedBlockingQueue<String>(lineReadAheadSize);
worker = new Thread(new Worker());
+ worker.setDaemon(true);
worker.start();
}
@@ -50,7 +51,7 @@ public class AsynchronousLineReader implements LineReader {
return pollResult;
}
}
- } catch (InterruptedException e) {
+ } catch (final InterruptedException e) {
throw new TribbleException("Line polling interrupted.", e);
}
}
@@ -78,7 +79,7 @@ public class AsynchronousLineReader implements LineReader {
} else {
try {
lineQueue.put(line);
- } catch (InterruptedException e) {
+ } catch (final InterruptedException e) {
/**
* A thread interruption is not an exceptional state: it means a {@link AsynchronousLineReader#close();} has
* been called, so shut down gracefully.
@@ -87,7 +88,7 @@ public class AsynchronousLineReader implements LineReader {
}
}
}
- } catch (Exception e) {
+ } catch (final Throwable e) {
AsynchronousLineReader.this.workerException = e;
} finally {
CloserUtil.close(AsynchronousLineReader.this.bufferedReader);
diff --git a/src/java/org/broad/tribble/readers/Positional.java b/src/java/org/broad/tribble/readers/Positional.java
index 0b5fb7d..f79ee2a 100644
--- a/src/java/org/broad/tribble/readers/Positional.java
+++ b/src/java/org/broad/tribble/readers/Positional.java
@@ -23,6 +23,8 @@
*/
package org.broad.tribble.readers;
+import net.sf.samtools.util.LocationAware;
+
import java.io.IOException;
/**
diff --git a/src/java/org/broad/tribble/readers/TabixReader.java b/src/java/org/broad/tribble/readers/TabixReader.java
index e7a3965..b1c2d3a 100644
--- a/src/java/org/broad/tribble/readers/TabixReader.java
+++ b/src/java/org/broad/tribble/readers/TabixReader.java
@@ -35,6 +35,7 @@ import net.sf.samtools.util.BlockCompressedInputStream;
import net.sf.samtools.seekablestream.SeekableStreamFactory;
import org.broad.tribble.Tribble;
import org.broad.tribble.util.ParsingUtils;
+import org.broad.tribble.util.TabixUtils;
/**
* @author Heng Li <hengli at broadinstitute.org>
@@ -115,7 +116,7 @@ public class TabixReader {
mFn = fn;
mFp = new BlockCompressedInputStream(stream);
if(idxFn == null){
- mIdxFn = ParsingUtils.appendToPath(fn, ".tbi");
+ mIdxFn = ParsingUtils.appendToPath(fn, TabixUtils.STANDARD_INDEX_EXTENSION);
}
readIndex();
}
diff --git a/src/java/org/broadinstitute/variant/variantcontext/writer/IndexingVariantContextWriter.java b/src/java/org/broadinstitute/variant/variantcontext/writer/IndexingVariantContextWriter.java
index c036d5d..1a9fe24 100644
--- a/src/java/org/broadinstitute/variant/variantcontext/writer/IndexingVariantContextWriter.java
+++ b/src/java/org/broadinstitute/variant/variantcontext/writer/IndexingVariantContextWriter.java
@@ -29,16 +29,14 @@ import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
-import org.broad.tribble.Tribble;
-import org.broad.tribble.index.DynamicIndexCreator;
-import org.broad.tribble.index.Index;
-import org.broad.tribble.index.IndexCreator;
-import org.broad.tribble.index.IndexFactory;
-import org.broad.tribble.util.LittleEndianOutputStream;
-import org.broadinstitute.variant.vcf.VCFHeader;
+import net.sf.samtools.util.LocationAware;
+import org.broad.tribble.index.*;
import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.vcf.VCFHeader;
-import java.io.*;
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
/**
* this class writes VCF files
@@ -49,9 +47,8 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter {
private final SAMSequenceDictionary refDict;
private OutputStream outputStream;
- private PositionalOutputStream positionalOutputStream = null;
+ private LocationAware locationSource = null;
private IndexCreator indexer = null;
- private LittleEndianOutputStream idxStream = null;
private IndexingVariantContextWriter(final String name, final File location, final OutputStream output, final SAMSequenceDictionary refDict) {
this.name = name;
@@ -77,9 +74,7 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter {
this(name, location, output, refDict);
if ( enableOnTheFlyIndexing ) {
- final IndexCreator idxCreator = new DynamicIndexCreator(IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
- idxCreator.initialize(location, idxCreator.defaultBinSize());
- initIndexingWriter(idxCreator);
+ initIndexingWriter(new DynamicIndexCreator(location, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME));
}
}
@@ -102,23 +97,20 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter {
this(name, location, output, refDict);
if ( enableOnTheFlyIndexing ) {
+ // TODO: Handle non-Tribble IndexCreators
initIndexingWriter(idxCreator);
}
}
@Requires({"idxCreator != null"})
private void initIndexingWriter(final IndexCreator idxCreator) {
- try {
- indexer = idxCreator;
- idxStream = new LittleEndianOutputStream(new FileOutputStream(Tribble.indexFile(location)));
- //System.out.println("Creating index on the fly for " + location);
- positionalOutputStream = new PositionalOutputStream(outputStream);
+ indexer = idxCreator;
+ if (outputStream instanceof LocationAware) {
+ locationSource = (LocationAware)outputStream;
+ } else {
+ final PositionalOutputStream positionalOutputStream = new PositionalOutputStream(outputStream);
+ locationSource = positionalOutputStream;
outputStream = positionalOutputStream;
- } catch ( IOException ex ) {
- // No matter what we keep going, since we don't care if we can't create the index file
- idxStream = null;
- indexer = null;
- positionalOutputStream = null;
}
}
@@ -144,14 +136,15 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter {
// close the index stream (keep it separate to help debugging efforts)
if (indexer != null) {
- final Index index = indexer.finalizeIndex(positionalOutputStream.getPosition());
- setIndexSequenceDictionary(index, refDict);
- index.write(idxStream);
- idxStream.close();
+ if (indexer instanceof TribbleIndexCreator) {
+ setIndexSequenceDictionary((TribbleIndexCreator)indexer, refDict);
+ }
+ final Index index = indexer.finalizeIndex(locationSource.getPosition());
+ index.writeBasedOnFeatureFile(location);
}
- } catch (IOException e) {
+ } catch (final IOException e) {
throw new RuntimeException("Unable to close index for " + getStreamName(), e);
}
}
@@ -171,7 +164,7 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter {
public void add(final VariantContext vc) {
// if we are doing on the fly indexing, add the record ***before*** we write any bytes
if ( indexer != null )
- indexer.addFeature(vc, positionalOutputStream.getPosition());
+ indexer.addFeature(vc, locationSource.getPosition());
}
/**
@@ -188,11 +181,11 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter {
// a constant we use for marking sequence dictionary entries in the Tribble index property list
private static final String SequenceDictionaryPropertyPredicate = "DICT:";
- private static void setIndexSequenceDictionary(final Index index, final SAMSequenceDictionary dict) {
+ private static void setIndexSequenceDictionary(final TribbleIndexCreator indexCreator, final SAMSequenceDictionary dict) {
for (final SAMSequenceRecord seq : dict.getSequences()) {
final String contig = SequenceDictionaryPropertyPredicate + seq.getSequenceName();
final String length = String.valueOf(seq.getSequenceLength());
- index.addProperty(contig,length);
+ indexCreator.addProperty(contig,length);
}
}
}
@@ -201,7 +194,8 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter {
* Wraps output stream in a manner which keeps track of the position within the file and allowing writes
* at arbitrary points
*/
-final class PositionalOutputStream extends OutputStream {
+final class PositionalOutputStream extends OutputStream implements LocationAware
+{
private final OutputStream out;
private long position = 0;
@@ -218,7 +212,7 @@ final class PositionalOutputStream extends OutputStream {
out.write(bytes, startIndex, numBytes);
}
- public final void write(int c) throws IOException {
+ public final void write(final int c) throws IOException {
position++;
out.write(c);
}
diff --git a/src/java/org/broadinstitute/variant/variantcontext/writer/VariantContextWriterFactory.java b/src/java/org/broadinstitute/variant/variantcontext/writer/VariantContextWriterFactory.java
index ec7f41e..1ddfa8d 100644
--- a/src/java/org/broadinstitute/variant/variantcontext/writer/VariantContextWriterFactory.java
+++ b/src/java/org/broadinstitute/variant/variantcontext/writer/VariantContextWriterFactory.java
@@ -28,7 +28,10 @@ package org.broadinstitute.variant.variantcontext.writer;
import net.sf.samtools.Defaults;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.BlockCompressedOutputStream;
+import net.sf.samtools.util.IOUtil;
import org.broad.tribble.index.IndexCreator;
+import org.broad.tribble.index.tabix.TabixFormat;
+import org.broad.tribble.index.tabix.TabixIndexCreator;
import java.io.*;
import java.util.EnumSet;
@@ -61,12 +64,18 @@ public class VariantContextWriterFactory {
return create(location, openOutputStream(location), refDict, options);
}
+ /**
+ * @param output If buffered writing is desired, caller must provide some kind of buffered OutputStream.
+ */
public static VariantContextWriter create(final File location,
final OutputStream output,
final SAMSequenceDictionary refDict) {
return create(location, output, refDict, DEFAULT_OPTIONS);
}
+ /**
+ * @param output If buffered writing is desired, caller must provide some kind of buffered OutputStream.
+ */
public static VariantContextWriter create(final OutputStream output,
final SAMSequenceDictionary refDict,
final EnumSet<Options> options) {
@@ -76,7 +85,8 @@ public class VariantContextWriterFactory {
/**
* @param location Note that this parameter is used to producing intelligent log messages, and for naming the index,
* but does not control where the file is written
- * @param output This is where the BCF is actually written.
+ * @param output This is where the BCF is actually written. If buffered writing is desired, caller must provide
+ * some kind of buffered OutputStream.
*/
public static VariantContextWriter createBcf2(final File location,
final OutputStream output,
@@ -90,7 +100,8 @@ public class VariantContextWriterFactory {
/**
* @param location Note that this parameter is used to producing intelligent log messages, and for naming the index,
* but does not control where the file is written
- * @param output This is where the BCF is actually written.
+ * @param output This is where the BCF is actually written. If buffered writing is desired, caller must provide
+ * some kind of buffered OutputStream.
*/
public static VariantContextWriter createBcf2(final File location,
final OutputStream output,
@@ -105,7 +116,8 @@ public class VariantContextWriterFactory {
/**
* @param location Note that this parameter is used to producing intelligent log messages, and for naming the index,
* but does not control where the file is written
- * @param output This is where the VCF is actually written.
+ * @param output This is where the VCF is actually written. If buffered writing is desired, caller must provide
+ * some kind of buffered OutputStream.
*/
public static VariantContextWriter createVcf(final File location,
final OutputStream output,
@@ -120,7 +132,8 @@ public class VariantContextWriterFactory {
/**
* @param location Note that this parameter is used to producing intelligent log messages, and for naming the index,
* but does not control where the file is written
- * @param output This is where the VCF is actually written.
+ * @param output This is where the VCF is actually written. If buffered writing is desired, caller must provide
+ * some kind of buffered OutputStream.
*/
public static VariantContextWriter createVcf(final File location,
final OutputStream output,
@@ -136,14 +149,21 @@ public class VariantContextWriterFactory {
/**
* @param location Note that this parameter is used to producing intelligent log messages,
* but does not control where the file is written
- * @param output This is where the VCF is actually written.
+ * @param output This is where the VCF is actually written. If buffered writing is desired, caller must provide
+ * some kind of buffered OutputStream.
*/
public static VariantContextWriter createBlockCompressedVcf(final File location,
final OutputStream output,
final SAMSequenceDictionary refDict,
final EnumSet<Options> options) {
- return maybeWrapWithAsyncWriter(new VCFWriter(location, maybeBgzfWrapOutputStream(location, output, options),
- refDict,
+ final TabixIndexCreator indexCreator;
+ if (options.contains(Options.INDEX_ON_THE_FLY)) {
+ indexCreator = new TabixIndexCreator(refDict, TabixFormat.VCF);
+ } else {
+ indexCreator = null;
+ }
+ return maybeWrapWithAsyncWriter(new VCFWriter(location, BlockCompressedOutputStream.maybeBgzfWrapOutputStream(location, output),
+ refDict, indexCreator,
options.contains(Options.INDEX_ON_THE_FLY),
options.contains(Options.DO_NOT_WRITE_GENOTYPES),
options.contains(Options.ALLOW_MISSING_FIELDS_IN_HEADER)), options);
@@ -152,14 +172,15 @@ public class VariantContextWriterFactory {
/**
* @param location Note that this parameter is used to producing intelligent log messages,
* but does not control where the file is written
- * @param output This is where the VCF is actually written.
+ * @param output This is where the VCF is actually written. If buffered writing is desired, caller must provide
+ * some kind of buffered OutputStream.
*/
public static VariantContextWriter createBlockCompressedVcf(final File location,
final OutputStream output,
final SAMSequenceDictionary refDict,
final IndexCreator indexCreator,
final EnumSet<Options> options) {
- return maybeWrapWithAsyncWriter(new VCFWriter(location, maybeBgzfWrapOutputStream(location, output, options),
+ return maybeWrapWithAsyncWriter(new VCFWriter(location, BlockCompressedOutputStream.maybeBgzfWrapOutputStream(location, output),
refDict, indexCreator,
options.contains(Options.INDEX_ON_THE_FLY),
options.contains(Options.DO_NOT_WRITE_GENOTYPES),
@@ -180,6 +201,9 @@ public class VariantContextWriterFactory {
}
}
+ /**
+ * @param output If buffered writing is desired, caller must provide some kind of buffered OutputStream.
+ */
public static VariantContextWriter create(final File location,
final OutputStream output,
final SAMSequenceDictionary refDict,
@@ -195,17 +219,6 @@ public class VariantContextWriterFactory {
}
}
- private static OutputStream maybeBgzfWrapOutputStream(final File location, OutputStream output,
- final EnumSet<Options> options) {
- if (options.contains(Options.INDEX_ON_THE_FLY)) {
- throw new IllegalArgumentException("VCF index creation not supported for block-compressed output format.");
- }
- if (!(output instanceof BlockCompressedOutputStream)) {
- output = new BlockCompressedOutputStream(output, location);
- }
- return output;
- }
-
private static VariantContextWriter maybeWrapWithAsyncWriter(final VariantContextWriter writer,
final EnumSet<Options> options) {
if (options.contains(Options.USE_ASYNC_IO)) {
@@ -251,7 +264,7 @@ public class VariantContextWriterFactory {
*/
protected static OutputStream openOutputStream(final File location) {
try {
- return new FileOutputStream(location);
+ return IOUtil.maybeBufferOutputStream(new FileOutputStream(location));
} catch (final FileNotFoundException e) {
throw new RuntimeException(location + ": Unable to create VCF writer", e);
}
diff --git a/src/tests/java/net/sf/picard/analysis/CollectRnaSeqMetricsTest.java b/src/tests/java/net/sf/picard/analysis/CollectRnaSeqMetricsTest.java
index e6ecda1..da716c6 100644
--- a/src/tests/java/net/sf/picard/analysis/CollectRnaSeqMetricsTest.java
+++ b/src/tests/java/net/sf/picard/analysis/CollectRnaSeqMetricsTest.java
@@ -25,6 +25,7 @@ package net.sf.picard.analysis;
import net.sf.picard.annotation.RefFlatReader.RefFlatColumns;
import net.sf.picard.metrics.MetricsFile;
+import net.sf.samtools.SAMRecordSetBuilder;
import net.sf.picard.util.Interval;
import net.sf.picard.util.IntervalList;
import net.sf.samtools.*;
diff --git a/src/tests/java/net/sf/picard/illumina/CheckIlluminaDirectoryTest.java b/src/tests/java/net/sf/picard/illumina/CheckIlluminaDirectoryTest.java
index 0975f7d..20b9404 100644
--- a/src/tests/java/net/sf/picard/illumina/CheckIlluminaDirectoryTest.java
+++ b/src/tests/java/net/sf/picard/illumina/CheckIlluminaDirectoryTest.java
@@ -1,16 +1,5 @@
package net.sf.picard.illumina;
-import java.io.*;
-import java.nio.ByteOrder;
-import java.nio.MappedByteBuffer;
-import java.nio.channels.FileChannel;
-import java.util.*;
-
-import static net.sf.samtools.util.CollectionUtil.makeList;
-import static net.sf.picard.illumina.parser.IlluminaFileUtil.SupportedIlluminaFormat;
-import static net.sf.picard.illumina.parser.IlluminaFileUtil.SupportedIlluminaFormat.*;
-import static net.sf.picard.illumina.parser.IlluminaDataType.*;
-
import net.sf.picard.PicardException;
import net.sf.picard.cmdline.StandardOptionDefinitions;
import net.sf.picard.illumina.parser.IlluminaDataType;
@@ -19,7 +8,33 @@ import net.sf.picard.illumina.parser.IlluminaFileUtilTest;
import net.sf.picard.io.IoUtil;
import net.sf.samtools.util.CloserUtil;
import org.testng.Assert;
-import org.testng.annotations.*;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.RandomAccessFile;
+import java.nio.ByteOrder;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static net.sf.picard.illumina.parser.IlluminaDataType.BaseCalls;
+import static net.sf.picard.illumina.parser.IlluminaDataType.Noise;
+import static net.sf.picard.illumina.parser.IlluminaFileUtil.SupportedIlluminaFormat;
+import static net.sf.picard.illumina.parser.IlluminaFileUtil.SupportedIlluminaFormat.Bcl;
+import static net.sf.picard.illumina.parser.IlluminaFileUtil.SupportedIlluminaFormat.Filter;
+import static net.sf.picard.illumina.parser.IlluminaFileUtil.SupportedIlluminaFormat.Locs;
+import static net.sf.picard.illumina.parser.IlluminaFileUtil.SupportedIlluminaFormat.Pos;
+import static net.sf.picard.illumina.parser.IlluminaFileUtil.SupportedIlluminaFormat.Qseq;
+import static net.sf.samtools.util.CollectionUtil.makeList;
public class CheckIlluminaDirectoryTest {
@@ -34,23 +49,23 @@ public class CheckIlluminaDirectoryTest {
private void setUp() throws Exception {
illuminaDir = IoUtil.createTempDir("ift_test", "IlluminaDir");
- interopDir = new File(illuminaDir, "InterOp");
- if(!interopDir.mkdir()) {
+ interopDir = new File(illuminaDir, "InterOp");
+ if (!interopDir.mkdir()) {
throw new RuntimeException("Couldn't make interop dir " + interopDir.getAbsolutePath());
}
dataDir = new File(illuminaDir, "Data");
- if(!dataDir.mkdir()) {
+ if (!dataDir.mkdir()) {
throw new RuntimeException("Couldn't make data dir " + dataDir.getAbsolutePath());
}
intensityDir = new File(dataDir, "Intensities");
- if(!intensityDir.mkdir()) {
+ if (!intensityDir.mkdir()) {
throw new RuntimeException("Couldn't make intensity dir " + intensityDir.getAbsolutePath());
}
- basecallDir = new File(intensityDir, "BaseCalls");
- if(!basecallDir.mkdir()) {
+ basecallDir = new File(intensityDir, "BaseCalls");
+ if (!basecallDir.mkdir()) {
throw new RuntimeException("Couldn't make basecalls dir " + basecallDir.getAbsolutePath());
}
}
@@ -60,44 +75,58 @@ public class CheckIlluminaDirectoryTest {
IoUtil.deleteDirectoryTree(intensityDir);
}
- public void makeFiles(SupportedIlluminaFormat [] formats, int lane, List<Integer> tiles, int [] cycles) {
- for(final IlluminaFileUtil.SupportedIlluminaFormat format : formats) {
+ public void makeFiles(final SupportedIlluminaFormat[] formats, final int lane, final List<Integer> tiles,
+ final int[] cycles) {
+ for (final IlluminaFileUtil.SupportedIlluminaFormat format : formats) {
IlluminaFileUtilTest.makeFiles(format, intensityDir, lane, tiles, cycles, 0);
}
}
- public String [] makeCheckerArgs(final File basecallDir, final int lane, final String readStructure, final IlluminaDataType [] dataTypes) {
- final String [] dataTypeArgs = new String[dataTypes.length + 3];
+ public String[] makeCheckerArgs(final File basecallDir, final int lane, final String readStructure,
+ final IlluminaDataType[] dataTypes, final List<Integer> filterTiles,
+ final boolean makeFakeFiles) {
+ final String[] dataTypeArgs = new String[dataTypes.length + filterTiles.size() + 4];
dataTypeArgs[0] = "B=" + basecallDir;
dataTypeArgs[1] = StandardOptionDefinitions.LANE_SHORT_NAME + "=" + lane;
dataTypeArgs[2] = "RS=" + readStructure;
+ dataTypeArgs[3] = "F=" + makeFakeFiles;
- for(int i = 0; i < dataTypes.length; i++) {
- dataTypeArgs[i+3] = "DT=" + dataTypes[i];
+ for (int i = 0; i < dataTypes.length; i++) {
+ dataTypeArgs[i + 4] = "DT=" + dataTypes[i];
+ }
+
+ if (filterTiles.size() > 0) {
+ final int start = dataTypes.length + 4;
+ for (int i = start; i < dataTypeArgs.length; i++) {
+ dataTypeArgs[i] = "T=" + filterTiles.get(i - start);
+ }
}
return dataTypeArgs;
}
- public File writeTileMetricsOutFile(Map<Integer, List<Integer>> lanesToTiles) {
- return writeTileMetricsOutFile(interopDir, (byte)2, (byte)10, lanesToTiles);
+ public File writeTileMetricsOutFile(final Map<Integer, List<Integer>> lanesToTiles) {
+ return writeTileMetricsOutFile(interopDir, (byte) 2, (byte) 10, lanesToTiles);
}
- public File writeTileMetricsOutFile(final File interopDir, final byte versionNumber, final byte recordSize, Map<Integer, List<Integer>> lanesToTiles) {
+ public File writeTileMetricsOutFile(final File interopDir, final byte versionNumber, final byte recordSize,
+ final Map<Integer, List<Integer>> lanesToTiles) {
final File tileMetricsOut = new File(interopDir, "TileMetricsOut.bin");
- if(!tileMetricsOut.exists()) {
+ if (!tileMetricsOut.exists()) {
try {
- if(!tileMetricsOut.createNewFile()) {
- throw new PicardException("Could not create tileMetricsOut file(" + tileMetricsOut.getAbsolutePath() + ")");
+ if (!tileMetricsOut.createNewFile()) {
+ throw new PicardException(
+ "Could not create tileMetricsOut file(" + tileMetricsOut.getAbsolutePath() + ")");
}
- } catch (IOException e) {
- throw new PicardException("IOException creating tileMetricsOut file (" + tileMetricsOut + ") for writing!", e);
+ } catch (final IOException e) {
+ throw new PicardException(
+ "IOException creating tileMetricsOut file (" + tileMetricsOut + ") for writing!", e);
}
}
int totalEntries = 0;
- for(final Map.Entry<Integer, List<Integer>> l2t : lanesToTiles.entrySet()) {
+ for (final Map.Entry<Integer, List<Integer>> l2t : lanesToTiles.entrySet()) {
totalEntries += l2t.getValue().size();
}
@@ -111,11 +140,11 @@ public class CheckIlluminaDirectoryTest {
buf.put(versionNumber);
buf.put(recordSize);
- for(final int lane : lanesToTiles.keySet()) {
- for(final int tile : lanesToTiles.get(lane)) {
- buf.putShort((short)lane);
- buf.putShort((short)tile);
- buf.putShort((short)0);
+ for (final int lane : lanesToTiles.keySet()) {
+ for (final int tile : lanesToTiles.get(lane)) {
+ buf.putShort((short) lane);
+ buf.putShort((short) tile);
+ buf.putShort((short) 0);
buf.putFloat(0F);
}
}
@@ -123,7 +152,7 @@ public class CheckIlluminaDirectoryTest {
buf.force();
CloserUtil.close(channel);
CloserUtil.close(raf);
- } catch (IOException e) {
+ } catch (final IOException e) {
throw new PicardException("IOException writing tileMetricsOut file (" + tileMetricsOut + ")", e);
}
@@ -133,86 +162,121 @@ public class CheckIlluminaDirectoryTest {
public static Map<Integer, List<Integer>> makeMap(final List<Integer> lanes, final List<List<Integer>> tiles) {
final Map<Integer, List<Integer>> map = new HashMap<Integer, List<Integer>>();
- if(lanes.size() != tiles.size()) {
+ if (lanes.size() != tiles.size()) {
throw new IllegalArgumentException("Number of lanes (" + lanes + ") does not equal number of tiles!");
}
- for(int i = 0; i < lanes.size(); i++) {
+ for (int i = 0; i < lanes.size(); i++) {
map.put(lanes.get(i), tiles.get(i));
}
return map;
}
- @DataProvider(name="positiveTestData")
- public Object [][] positiveTestData() {
- return new Object[][] {
- {
- new SupportedIlluminaFormat[]{Bcl, Locs, Pos, Filter, Qseq}, new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.Position, IlluminaDataType.PF},
- 3, makeList(1101,1201,1301, 2101,2201,2301), IlluminaFileUtilTest.cycleRange(1,50), "25T25T"
- },
- {
- new SupportedIlluminaFormat[]{Bcl, Locs, Filter}, new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.Position, IlluminaDataType.PF},
- 2, makeList(1101,1201,1301, 2101,2201,2301), IlluminaFileUtilTest.cycleRange(1,50), "8S15T8S"
- },
- {
- new SupportedIlluminaFormat[]{Bcl, Filter}, new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF},
- 2, makeList(1101,1201,1301, 2101,2201,2301), IlluminaFileUtilTest.cycleRange(1,152), "68T8B68T"
- },
- {
- new SupportedIlluminaFormat[]{Bcl, Pos, Filter}, new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.Position, IlluminaDataType.PF},
- 5, makeList(1101,1201,1301, 2101,2201,2301), IlluminaFileUtilTest.cycleRange(1,50), "25T25T"
- }
+ @DataProvider(name = "positiveTestData")
+ public Object[][] positiveTestData() {
+ return new Object[][]{
+ {
+ new SupportedIlluminaFormat[]{Bcl, Locs, Pos, Filter, Qseq},
+ new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.Position,
+ IlluminaDataType.PF},
+ 3, makeList(1101, 1201, 1301, 2101, 2201, 2301), IlluminaFileUtilTest.cycleRange(1, 50),
+ "25T25T", new ArrayList<Integer>()
+ },
+ {
+ new SupportedIlluminaFormat[]{Bcl, Locs, Filter},
+ new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.Position,
+ IlluminaDataType.PF},
+ 2, makeList(1101, 1201, 1301, 2101, 2201, 2301), IlluminaFileUtilTest.cycleRange(1, 50),
+ "8S15T8S", new ArrayList<Integer>()
+ },
+ {
+ new SupportedIlluminaFormat[]{Bcl, Filter},
+ new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF},
+ 2, makeList(1101, 1201, 1301, 2101, 2201, 2301), IlluminaFileUtilTest.cycleRange(1, 152),
+ "68T8B68T", new ArrayList<Integer>()
+ },
+ {
+ new SupportedIlluminaFormat[]{Bcl, Pos, Filter},
+ new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.Position,
+ IlluminaDataType.PF},
+ 5, makeList(1101, 1201, 1301, 2101, 2201, 2301), IlluminaFileUtilTest.cycleRange(1, 50),
+ "25T25T", new ArrayList<Integer>()
+ },
+ {
+ new SupportedIlluminaFormat[]{Bcl, Pos, Filter},
+ new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.Position,
+ IlluminaDataType.PF},
+ 5, makeList(1101, 1201, 1301, 2101, 2201, 2301), IlluminaFileUtilTest.cycleRange(1, 50),
+ "25T25T", makeList(1301, 2101)
+ }
};
}
//Note: The positiveTest and negativeTests don't actually test Qseqs (the Qseq in the first test case above is there to make sure
//BCLs are preferred over Qseqs)
- @Test(dataProvider="positiveTestData")
+ @Test(dataProvider = "positiveTestData")
public void positiveTests(final IlluminaFileUtil.SupportedIlluminaFormat[] formats,
final IlluminaDataType[] dataTypes,
final int lane,
final List<Integer> tiles,
final int[] cycles,
- final String readStructure) {
+ final String readStructure,
+ final List<Integer> filterTiles) {
makeFiles(formats, lane, tiles, cycles);
- writeTileMetricsOutFile(makeMap(makeList(lane-1, lane + 1, lane),
- makeList(makeList(1,2,3), tiles, tiles)));
+ writeTileMetricsOutFile(makeMap(makeList(lane - 1, lane + 1, lane),
+ makeList(makeList(1, 2, 3), tiles, tiles)));
- String [] args = makeCheckerArgs(basecallDir, lane, readStructure, dataTypes);
- int result = new CheckIlluminaDirectory().instanceMain(args);
+ final String[] args = makeCheckerArgs(basecallDir, lane, readStructure, dataTypes, filterTiles, false);
+ final int result = new CheckIlluminaDirectory().instanceMain(args);
Assert.assertEquals(result, 0);
}
- @DataProvider(name="negativeTestData")
+ @DataProvider(name = "negativeTestData")
public Object[][] negativeTestData() {
- return new Object[][] {
- { //Completely missing data types
- new SupportedIlluminaFormat[]{Bcl, Filter}, new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF, IlluminaDataType.Position, IlluminaDataType.RawIntensities},
- new ArrayList<String>(),
- new ArrayList<String>(),
- 2, makeList(1101,1201,1301, 2101,2201,2301), IlluminaFileUtilTest.cycleRange(1,152), "68T8B68T",
- 2
- },
- {
- new SupportedIlluminaFormat[]{Bcl, Filter}, new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF},
- makeList("BaseCalls/L002/C13.1/s_2_1201.bcl", "BaseCalls/L002/C13.1/s_2_2101.bcl"),
- makeList("BaseCalls/L002/s_2_2101.filter"),
- 2, makeList(1101,1201,1301, 2101,2201,2301), IlluminaFileUtilTest.cycleRange(1,20), "13T",
- 3
- },
- {
- new SupportedIlluminaFormat[]{Bcl, Filter}, new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF},
- new ArrayList<String>(),
- new ArrayList<String>(),
- 5, makeList(1101,1201,1301, 2101,2201,2301), IlluminaFileUtilTest.cycleRange(1,152), "250T",
- 98
- },
+ return new Object[][]{
+ { //Completely missing data types
+ new SupportedIlluminaFormat[]{Bcl, Filter},
+ new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF,
+ IlluminaDataType.Position, IlluminaDataType.RawIntensities, IlluminaDataType.Barcodes,
+ Noise},
+ new ArrayList<String>(),
+ new ArrayList<String>(),
+ 2, makeList(1101, 1201, 1301, 2101, 2201, 2301), IlluminaFileUtilTest.cycleRange(1, 152),
+ "68T8B68T",
+ 4, new ArrayList<Integer>(), true
+ },
+ {
+ new SupportedIlluminaFormat[]{Bcl, Filter},
+ new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF},
+ makeList("BaseCalls/L002/C13.1/s_2_1201.bcl", "BaseCalls/L002/C13.1/s_2_2101.bcl"),
+ makeList("BaseCalls/L002/s_2_2101.filter"),
+ 2, makeList(1101, 1201, 1301, 2101, 2201, 2301), IlluminaFileUtilTest.cycleRange(1, 20), "13T",
+ 3, new ArrayList<Integer>(), true
+ },
+ {
+ new SupportedIlluminaFormat[]{Bcl, Filter},
+ new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF},
+ new ArrayList<String>(),
+ new ArrayList<String>(),
+ 5, makeList(1101, 1201, 1301, 2101, 2201, 2301), IlluminaFileUtilTest.cycleRange(1, 152),
+ "250T",
+ 98, new ArrayList<Integer>(), true
+ },
+ {
+ new SupportedIlluminaFormat[]{Bcl, Filter},
+ new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF},
+ new ArrayList<String>(),
+ new ArrayList<String>(),
+ 5, makeList(1101, 1201, 1301, 2101, 2201, 2301), IlluminaFileUtilTest.cycleRange(1, 152),
+ "250T",
+ 98, makeList(1301, 2201), true
+ }
};
}
- @Test(dataProvider="negativeTestData")
+ @Test(dataProvider = "negativeTestData")
public void negativeTests(final IlluminaFileUtil.SupportedIlluminaFormat[] formats,
final IlluminaDataType[] dataTypes,
final List<String> filesToDelete,
@@ -221,83 +285,95 @@ public class CheckIlluminaDirectoryTest {
final List<Integer> tiles,
final int[] cycles,
final String readStructure,
- final int expectedNumErrors) {
+ final int expectedNumErrors,
+ final List<Integer> filterTiles,
+ final boolean makeFakeFiles) {
makeFiles(formats, lane, tiles, cycles);
IlluminaFileUtilTest.deleteRelativeFiles(intensityDir, filesToDelete);
- IlluminaFileUtilTest.emptyRelativeFiles(intensityDir, filesToEmpty);
+ IlluminaFileUtilTest.emptyRelativeFiles(intensityDir, filesToEmpty);
writeTileMetricsOutFile(makeMap(makeList(lane - 1, lane + 1, lane), makeList(makeList(1, 2, 3), tiles, tiles)));
- String [] args = makeCheckerArgs(basecallDir, lane, readStructure, dataTypes);
+ final String[] args = makeCheckerArgs(basecallDir, lane, readStructure, dataTypes, filterTiles, makeFakeFiles);
int result = new CheckIlluminaDirectory().instanceMain(args);
Assert.assertEquals(expectedNumErrors, result);
+ //if we previously faked files make sure CheckIlluminaDirectory returns with no failures
+ if (makeFakeFiles) {
+ result = new CheckIlluminaDirectory().instanceMain(args);
+ Assert.assertEquals(0, result);
+ }
}
public void writeFileOfSize(final File file, final int size) {
try {
final BufferedWriter writer = new BufferedWriter(new FileWriter(file));
- for(int i = 0; i < size; i++) {
- int toWrite = Math.min(1000, size);
- char [] writeBuffer = new char[toWrite];
- for(int j = 0; j < writeBuffer.length; j++) {
- writeBuffer[j] = (char)(Math.random() * 150);
+ for (int i = 0; i < size; i++) {
+ final int toWrite = Math.min(1000, size);
+ final char[] writeBuffer = new char[toWrite];
+ for (int j = 0; j < writeBuffer.length; j++) {
+ writeBuffer[j] = (char) (Math.random() * 150);
}
writer.write(writeBuffer);
}
writer.flush();
writer.close();
- } catch(Exception exc) {
+ } catch (final Exception exc) {
throw new RuntimeException(exc);
}
}
@Test
public void differentSizedBclTest() {
- final SupportedIlluminaFormat [] formats = new SupportedIlluminaFormat[]{Bcl, Filter};
+ final SupportedIlluminaFormat[] formats = new SupportedIlluminaFormat[]{Bcl, Filter};
final int lane = 5;
- final List<Integer> tiles = makeList(1,2,3,4);
- final int [] cycles = IlluminaFileUtilTest.cycleRange(1, 50);
- final IlluminaDataType [] dataTypes = new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores};
+ final List<Integer> tiles = makeList(1, 2, 3, 4);
+ final int[] cycles = IlluminaFileUtilTest.cycleRange(1, 50);
+ final IlluminaDataType[] dataTypes = new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores};
makeFiles(new SupportedIlluminaFormat[]{Bcl, Filter}, lane, tiles, cycles);
- writeTileMetricsOutFile(makeMap(makeList(lane-1, lane + 1, lane),
- makeList(makeList(1,2,3), tiles, tiles)));
+ writeTileMetricsOutFile(makeMap(makeList(lane - 1, lane + 1, lane),
+ makeList(makeList(1, 2, 3), tiles, tiles)));
final File cycleDir = new File(basecallDir, "L005/C9.1");
writeFileOfSize(new File(cycleDir, "s_5_3.bcl"), 222);
- String [] args = makeCheckerArgs(basecallDir, lane, "50T", dataTypes);
- int result = new CheckIlluminaDirectory().instanceMain(args);
+ final String[] args = makeCheckerArgs(basecallDir, lane, "50T", dataTypes, new ArrayList<Integer>(), false);
+ final int result = new CheckIlluminaDirectory().instanceMain(args);
Assert.assertEquals(1, result);
}
@Test(expectedExceptions = PicardException.class)
public void basedirDoesntExistTest() {
- String [] args = makeCheckerArgs(new File("a_made_up_file/in_some_weird_location"), 1, "76T76T", new IlluminaDataType[]{IlluminaDataType.Position});
-
+ final String[] args = makeCheckerArgs(new File("a_made_up_file/in_some_weird_location"), 1, "76T76T",
+ new IlluminaDataType[]{IlluminaDataType.Position},
+ new ArrayList<Integer>(), false);
+
final int result = new CheckIlluminaDirectory().instanceMain(args);
}
@Test
public void qseqTest() {
- final IlluminaDataType [] dataTypes = new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF, IlluminaDataType.Position};
+ final IlluminaDataType[] dataTypes =
+ new IlluminaDataType[]{BaseCalls, IlluminaDataType.QualityScores, IlluminaDataType.PF,
+ IlluminaDataType.Position};
final int lane = 4;
- final List<Integer> tiles = makeList(1,2,3);
+ final List<Integer> tiles = makeList(1, 2, 3);
IoUtil.copyDirectoryTree(new File("testdata/net/sf/picard/illumina/IlluminaTests/BasecallsDir"), basecallDir);
writeTileMetricsOutFile(makeMap(makeList(lane), makeList(tiles)));
- String [] args = makeCheckerArgs(basecallDir, lane, "76T76T", dataTypes);
+ String[] args = makeCheckerArgs(basecallDir, lane, "76T76T", dataTypes, new ArrayList<Integer>(),
+ false);
int result = new CheckIlluminaDirectory().instanceMain(args);
Assert.assertEquals(result, 0);
- args = makeCheckerArgs(basecallDir, lane, "76T77T", dataTypes);
+ args = makeCheckerArgs(basecallDir, lane, "76T77T", dataTypes, new ArrayList<Integer>(), false);
result = new CheckIlluminaDirectory().instanceMain(args);
Assert.assertEquals(result, 1);
IlluminaFileUtilTest.deleteRelativeFiles(basecallDir, makeList("s_4_1_0002_qseq.txt"));
- args = makeCheckerArgs(basecallDir, lane, "76T76T", dataTypes);
+ args = makeCheckerArgs(basecallDir, lane, "76T76T", dataTypes, new ArrayList<Integer>(), false);
result = new CheckIlluminaDirectory().instanceMain(args);
Assert.assertEquals(result, 1);
}
diff --git a/src/tests/java/net/sf/picard/illumina/parser/TileMetricsUtilTest.java b/src/tests/java/net/sf/picard/illumina/parser/TileMetricsUtilTest.java
deleted file mode 100644
index 4add808..0000000
--- a/src/tests/java/net/sf/picard/illumina/parser/TileMetricsUtilTest.java
+++ /dev/null
@@ -1,999 +0,0 @@
-package net.sf.picard.illumina.parser;
-
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import java.io.File;
-import java.util.*;
-
-/**
- * @author mccowan
- */
-public class TileMetricsUtilTest {
- static final File TEST_DATA_DIRECTORY = new File("testdata/net/sf/picard/illumina/parser/TileMetricsUtilTest");
-
- static class Testcase {
- final File tileMetricsFile;
- final Set<HashableTile> tiles;
-
- Testcase(final File tileMetricsFile, final Set<HashableTile> tiles) {
- this.tileMetricsFile = tileMetricsFile;
- this.tiles = tiles;
- }
- }
-
- /** Wrapper for Tile so we can use it in sets. */
- static class HashableTile extends TileMetricsUtil.Tile {
- private HashableTile(final TileMetricsUtil.Tile tile) {
- super(tile.getLaneNumber(), tile.getTileNumber(), tile.getClusterDensity(), tile.getClusterCount());
- }
-
- @Override
- public int hashCode() {
- return getTileNumber();
- }
-
- @Override
- public boolean equals(final Object o) {
- if (!(o instanceof TileMetricsUtil.Tile))
- return false;
- final TileMetricsUtil.Tile that = (TileMetricsUtil.Tile) o;
- return (that.getClusterCount() == this.getClusterCount() && that.getClusterDensity() == this.getClusterDensity()
- && that.getLaneNumber() == this.getLaneNumber() && that.getTileNumber() == this.getTileNumber());
- }
-
- public static Set<HashableTile> from(final TileMetricsUtil.Tile... tiles) {
- final HashSet<HashableTile> hashableTiles = new HashSet<HashableTile>();
- for (final TileMetricsUtil.Tile tile : tiles) {
- hashableTiles.add(new HashableTile(tile));
- }
- return hashableTiles;
- }
- }
-
- final Comparator<TileMetricsUtil.Tile> TILE_COMPARATOR = new Comparator<TileMetricsUtil.Tile>() {
- @Override
- public int compare(final TileMetricsUtil.Tile o1, final TileMetricsUtil.Tile o2) {
- return o1.getTileNumber() - o2.getTileNumber();
- }
- };
-
- @Test
- public void testParseTileData() throws Exception {
- for (final Testcase testcase : TESTCASES) {
- final List<TileMetricsUtil.Tile> tiles = new ArrayList<TileMetricsUtil.Tile>(TileMetricsUtil.parseTileMetrics(testcase.tileMetricsFile));
- System.out.println(testcase.tileMetricsFile);
- for (final TileMetricsUtil.Tile tile : tiles) {
- System.out.println(String.format("%s\t%s\t%s\t%s", tile.getLaneNumber(), tile.getTileNumber(), tile.getClusterDensity(), tile.getClusterCount()));
- }
- // TestNG does not handle set equality properly.
- Assert.assertTrue(testcase.tiles.equals(HashableTile.from(tiles.toArray(new TileMetricsUtil.Tile[tiles.size()]))));
- }
- }
-
- final Collection<Testcase> TESTCASES = Arrays.asList(
- new Testcase(
- new File(TEST_DATA_DIRECTORY, "MiSeqTileMetricsOut.bin"),
- HashableTile.from(
- new TileMetricsUtil.Tile(1, 1108, 699852.94f, 482438.0f),
- new TileMetricsUtil.Tile(1, 1107, 699791.7f, 482610.0f),
- new TileMetricsUtil.Tile(1, 1109, 702601.44f, 484489.0f),
- new TileMetricsUtil.Tile(1, 1112, 718619.8f, 495960.0f),
- new TileMetricsUtil.Tile(1, 1113, 725377.25f, 500602.0f),
- new TileMetricsUtil.Tile(1, 1101, 698977.6f, 482926.0f),
- new TileMetricsUtil.Tile(1, 1110, 704098.25f, 485448.0f),
- new TileMetricsUtil.Tile(1, 1102, 693865.75f, 478827.0f),
- new TileMetricsUtil.Tile(1, 1111, 710164.2f, 488936.0f),
- new TileMetricsUtil.Tile(1, 1103, 698093.56f, 479817.0f),
- new TileMetricsUtil.Tile(1, 1104, 693708.2f, 479144.0f),
- new TileMetricsUtil.Tile(1, 1114, 734585.6f, 503048.0f),
- new TileMetricsUtil.Tile(1, 1105, 700553.2f, 479396.0f),
- new TileMetricsUtil.Tile(1, 1106, 694881.1f, 480108.0f)
- )
- ),
- new Testcase(new File(TEST_DATA_DIRECTORY, "HiSeq2000TileMetricsOut.bin"),
- HashableTile.from(
- new TileMetricsUtil.Tile(4, 1101, 544523.94f, 1568229.0f),
- new TileMetricsUtil.Tile(4, 1209, 668636.44f, 1925673.0f),
- new TileMetricsUtil.Tile(4, 1105, 602357.25f, 1734789.0f),
- new TileMetricsUtil.Tile(4, 1104, 582857.25f, 1678629.0f),
- new TileMetricsUtil.Tile(4, 1103, 571702.06f, 1646502.0f),
- new TileMetricsUtil.Tile(4, 1309, 664930.2f, 1914999.0f),
- new TileMetricsUtil.Tile(4, 1102, 559316.3f, 1610831.0f),
- new TileMetricsUtil.Tile(4, 1308, 659291.6f, 1898760.0f),
- new TileMetricsUtil.Tile(4, 1109, 650628.8f, 1873811.0f),
- new TileMetricsUtil.Tile(4, 1108, 640943.0f, 1845916.0f),
- new TileMetricsUtil.Tile(4, 1107, 629443.75f, 1812798.0f),
- new TileMetricsUtil.Tile(4, 1106, 623904.1f, 1796844.0f),
- new TileMetricsUtil.Tile(4, 1302, 563119.44f, 1621784.0f),
- new TileMetricsUtil.Tile(4, 1303, 583720.1f, 1681114.0f),
- new TileMetricsUtil.Tile(4, 1301, 551133.3f, 1587264.0f),
- new TileMetricsUtil.Tile(4, 1306, 628892.0f, 1811209.0f),
- new TileMetricsUtil.Tile(4, 1307, 637835.06f, 1836965.0f),
- new TileMetricsUtil.Tile(4, 1304, 592503.1f, 1706409.0f),
- new TileMetricsUtil.Tile(4, 1305, 608899.3f, 1753630.0f),
- new TileMetricsUtil.Tile(4, 1207, 638901.4f, 1840036.0f),
- new TileMetricsUtil.Tile(4, 1208, 662913.9f, 1909192.0f),
- new TileMetricsUtil.Tile(4, 1205, 607855.56f, 1750624.0f),
- new TileMetricsUtil.Tile(4, 1206, 628009.7f, 1808668.0f),
- new TileMetricsUtil.Tile(4, 1203, 578895.1f, 1667218.0f),
- new TileMetricsUtil.Tile(4, 1204, 592665.25f, 1706876.0f),
- new TileMetricsUtil.Tile(4, 1201, 545689.94f, 1571587.0f),
- new TileMetricsUtil.Tile(4, 1202, 562946.5f, 1621286.0f),
- new TileMetricsUtil.Tile(8, 2201, 580058.7f, 1670569.0f),
- new TileMetricsUtil.Tile(1, 1216, 882619.06f, 2541943.0f),
- new TileMetricsUtil.Tile(4, 1110, 541470.1f, 1559434.0f),
- new TileMetricsUtil.Tile(1, 1215, 860050.3f, 2476945.0f),
- new TileMetricsUtil.Tile(1, 1214, 840209.3f, 2419803.0f),
- new TileMetricsUtil.Tile(4, 1112, 723241.6f, 2082936.0f),
- new TileMetricsUtil.Tile(8, 2309, 706817.3f, 2035634.0f),
- new TileMetricsUtil.Tile(1, 1213, 820381.2f, 2362698.0f),
- new TileMetricsUtil.Tile(4, 1111, 680317.0f, 1959313.0f),
- new TileMetricsUtil.Tile(8, 2308, 636503.1f, 1833129.0f),
- new TileMetricsUtil.Tile(8, 2205, 625169.06f, 1800487.0f),
- new TileMetricsUtil.Tile(1, 1212, 798143.4f, 2298653.0f),
- new TileMetricsUtil.Tile(8, 2307, 679264.56f, 1956282.0f),
- new TileMetricsUtil.Tile(8, 2204, 613535.06f, 1766981.0f),
- new TileMetricsUtil.Tile(1, 1211, 664055.5f, 1912480.0f),
- new TileMetricsUtil.Tile(8, 2306, 664856.94f, 1914788.0f),
- new TileMetricsUtil.Tile(8, 2203, 598923.56f, 1724900.0f),
- new TileMetricsUtil.Tile(1, 1210, 760540.25f, 2190356.0f),
- new TileMetricsUtil.Tile(8, 2305, 668689.9f, 1925827.0f),
- new TileMetricsUtil.Tile(8, 2202, 587381.56f, 1691659.0f),
- new TileMetricsUtil.Tile(8, 2304, 657572.9f, 1893810.0f),
- new TileMetricsUtil.Tile(8, 2209, 640729.8f, 1845302.0f),
- new TileMetricsUtil.Tile(8, 2303, 644017.3f, 1854770.0f),
- new TileMetricsUtil.Tile(8, 2208, 508849.62f, 1465487.0f),
- new TileMetricsUtil.Tile(8, 2302, 633281.25f, 1823850.0f),
- new TileMetricsUtil.Tile(8, 2207, 622214.2f, 1791977.0f),
- new TileMetricsUtil.Tile(8, 2301, 628018.4f, 1808693.0f),
- new TileMetricsUtil.Tile(8, 2206, 624249.6f, 1797839.0f),
- new TileMetricsUtil.Tile(4, 1114, 757904.1f, 2182764.0f),
- new TileMetricsUtil.Tile(4, 1113, 735293.75f, 2117646.0f),
- new TileMetricsUtil.Tile(4, 1116, 807057.25f, 2324325.0f),
- new TileMetricsUtil.Tile(4, 1115, 774449.6f, 2230415.0f),
- new TileMetricsUtil.Tile(4, 1315, 771113.9f, 2220808.0f),
- new TileMetricsUtil.Tile(4, 1316, 807589.9f, 2325859.0f),
- new TileMetricsUtil.Tile(4, 1311, 682684.0f, 1966130.0f),
- new TileMetricsUtil.Tile(4, 1312, 714275.7f, 2057114.0f),
- new TileMetricsUtil.Tile(4, 1313, 725538.9f, 2089552.0f),
- new TileMetricsUtil.Tile(4, 1314, 748121.9f, 2154591.0f),
- new TileMetricsUtil.Tile(4, 1310, 570019.06f, 1641655.0f),
- new TileMetricsUtil.Tile(8, 2210, 681208.6f, 1961881.0f),
- new TileMetricsUtil.Tile(1, 1203, 489748.25f, 1410475.0f),
- new TileMetricsUtil.Tile(1, 1202, 610748.94f, 1758957.0f),
- new TileMetricsUtil.Tile(8, 2212, 730553.44f, 2103994.0f),
- new TileMetricsUtil.Tile(1, 1205, 679378.8f, 1956611.0f),
- new TileMetricsUtil.Tile(8, 2211, 703858.6f, 2027113.0f),
- new TileMetricsUtil.Tile(1, 1204, 669089.56f, 1926978.0f),
- new TileMetricsUtil.Tile(8, 2214, 775678.1f, 2233953.0f),
- new TileMetricsUtil.Tile(8, 2316, 883011.06f, 2543072.0f),
- new TileMetricsUtil.Tile(8, 2213, 755248.94f, 2175117.0f),
- new TileMetricsUtil.Tile(8, 2216, 835654.5f, 2406685.0f),
- new TileMetricsUtil.Tile(1, 1201, 637199.6f, 1835135.0f),
- new TileMetricsUtil.Tile(8, 2215, 803531.2f, 2314170.0f),
- new TileMetricsUtil.Tile(8, 2313, 817312.1f, 2353859.0f),
- new TileMetricsUtil.Tile(8, 2312, 785540.6f, 2262357.0f),
- new TileMetricsUtil.Tile(8, 2315, 859228.8f, 2474579.0f),
- new TileMetricsUtil.Tile(8, 2314, 834483.6f, 2403313.0f),
- new TileMetricsUtil.Tile(8, 2311, 757663.2f, 2182070.0f),
- new TileMetricsUtil.Tile(8, 2310, 730759.4f, 2104587.0f),
- new TileMetricsUtil.Tile(1, 1206, 694945.44f, 2001443.0f),
- new TileMetricsUtil.Tile(1, 1207, 714633.3f, 2058144.0f),
- new TileMetricsUtil.Tile(1, 1208, 728221.5f, 2097278.0f),
- new TileMetricsUtil.Tile(1, 1209, 738294.75f, 2126289.0f),
- new TileMetricsUtil.Tile(7, 2110, 673538.9f, 1939792.0f),
- new TileMetricsUtil.Tile(7, 2112, 704191.6f, 2028072.0f),
- new TileMetricsUtil.Tile(7, 2111, 685426.4f, 1974028.0f),
- new TileMetricsUtil.Tile(7, 2114, 721513.9f, 2077960.0f),
- new TileMetricsUtil.Tile(7, 2113, 703654.1f, 2026524.0f),
- new TileMetricsUtil.Tile(7, 2116, 832204.1f, 2396748.0f),
- new TileMetricsUtil.Tile(7, 2115, 800356.56f, 2305027.0f),
- new TileMetricsUtil.Tile(1, 1310, 748198.56f, 2154812.0f),
- new TileMetricsUtil.Tile(1, 1311, 760848.56f, 2191244.0f),
- new TileMetricsUtil.Tile(1, 1314, 822341.6f, 2368344.0f),
- new TileMetricsUtil.Tile(1, 1315, 842715.94f, 2427022.0f),
- new TileMetricsUtil.Tile(1, 1312, 782347.9f, 2253162.0f),
- new TileMetricsUtil.Tile(1, 1313, 797066.6f, 2295552.0f),
- new TileMetricsUtil.Tile(4, 1213, 754260.06f, 2172269.0f),
- new TileMetricsUtil.Tile(4, 1212, 738124.6f, 2125799.0f),
- new TileMetricsUtil.Tile(4, 1215, 791817.3f, 2280434.0f),
- new TileMetricsUtil.Tile(4, 1214, 773102.4f, 2226535.0f),
- new TileMetricsUtil.Tile(4, 2215, 813619.75f, 2343225.0f),
- new TileMetricsUtil.Tile(4, 1216, 819997.56f, 2361593.0f),
- new TileMetricsUtil.Tile(4, 2216, 849781.94f, 2447372.0f),
- new TileMetricsUtil.Tile(4, 2213, 766563.9f, 2207704.0f),
- new TileMetricsUtil.Tile(4, 2214, 788909.7f, 2272060.0f),
- new TileMetricsUtil.Tile(4, 2211, 687051.4f, 1978708.0f),
- new TileMetricsUtil.Tile(4, 2212, 744198.56f, 2143292.0f),
- new TileMetricsUtil.Tile(1, 1316, 862726.4f, 2484652.0f),
- new TileMetricsUtil.Tile(4, 2210, 521713.2f, 1502534.0f),
- new TileMetricsUtil.Tile(4, 1211, 688227.75f, 1982096.0f),
- new TileMetricsUtil.Tile(4, 1210, 524505.56f, 1510576.0f),
- new TileMetricsUtil.Tile(1, 1301, 639037.8f, 1840429.0f),
- new TileMetricsUtil.Tile(1, 1302, 627089.2f, 1806017.0f),
- new TileMetricsUtil.Tile(1, 1303, 571894.75f, 1647057.0f),
- new TileMetricsUtil.Tile(1, 1304, 678610.4f, 1954398.0f),
- new TileMetricsUtil.Tile(4, 2206, 626362.8f, 1803925.0f),
- new TileMetricsUtil.Tile(4, 2207, 638985.75f, 1840279.0f),
- new TileMetricsUtil.Tile(4, 2208, 659187.5f, 1898460.0f),
- new TileMetricsUtil.Tile(4, 2209, 662527.4f, 1908079.0f),
- new TileMetricsUtil.Tile(8, 1201, 580114.56f, 1670730.0f),
- new TileMetricsUtil.Tile(4, 2202, 565767.0f, 1629409.0f),
- new TileMetricsUtil.Tile(8, 1202, 588968.4f, 1696229.0f),
- new TileMetricsUtil.Tile(4, 2203, 579432.94f, 1668767.0f),
- new TileMetricsUtil.Tile(8, 1203, 601895.1f, 1733458.0f),
- new TileMetricsUtil.Tile(4, 2204, 593594.75f, 1709553.0f),
- new TileMetricsUtil.Tile(8, 1204, 615764.56f, 1773402.0f),
- new TileMetricsUtil.Tile(4, 2205, 607054.1f, 1748316.0f),
- new TileMetricsUtil.Tile(8, 1205, 626597.2f, 1804600.0f),
- new TileMetricsUtil.Tile(1, 1306, 695878.8f, 2004131.0f),
- new TileMetricsUtil.Tile(8, 1206, 625105.9f, 1800305.0f),
- new TileMetricsUtil.Tile(1, 1305, 688254.8f, 1982174.0f),
- new TileMetricsUtil.Tile(8, 1207, 629282.6f, 1812334.0f),
- new TileMetricsUtil.Tile(1, 1308, 724458.3f, 2086440.0f),
- new TileMetricsUtil.Tile(8, 1208, 518821.84f, 1494207.0f),
- new TileMetricsUtil.Tile(1, 1307, 711070.1f, 2047882.0f),
- new TileMetricsUtil.Tile(4, 2201, 549562.1f, 1582739.0f),
- new TileMetricsUtil.Tile(8, 1209, 643954.1f, 1854588.0f),
- new TileMetricsUtil.Tile(1, 1309, 737026.4f, 2122636.0f),
- new TileMetricsUtil.Tile(5, 1215, 696313.5f, 2005383.0f),
- new TileMetricsUtil.Tile(5, 1112, 629804.8f, 1813838.0f),
- new TileMetricsUtil.Tile(5, 1216, 722283.3f, 2080176.0f),
- new TileMetricsUtil.Tile(5, 1113, 646438.9f, 1861744.0f),
- new TileMetricsUtil.Tile(5, 1213, 649733.6f, 1871233.0f),
- new TileMetricsUtil.Tile(5, 1110, 600022.9f, 1728066.0f),
- new TileMetricsUtil.Tile(5, 1214, 664704.8f, 1914350.0f),
- new TileMetricsUtil.Tile(5, 1111, 613195.1f, 1766002.0f),
- new TileMetricsUtil.Tile(5, 1211, 617793.75f, 1779246.0f),
- new TileMetricsUtil.Tile(5, 1116, 696982.6f, 2007310.0f),
- new TileMetricsUtil.Tile(5, 1212, 633925.0f, 1825704.0f),
- new TileMetricsUtil.Tile(5, 1114, 662869.06f, 1909063.0f),
- new TileMetricsUtil.Tile(5, 1210, 606142.0f, 1745689.0f),
- new TileMetricsUtil.Tile(5, 1115, 682282.94f, 1964975.0f),
- new TileMetricsUtil.Tile(5, 1202, 502318.72f, 1446678.0f),
- new TileMetricsUtil.Tile(5, 1203, 509032.28f, 1466013.0f),
- new TileMetricsUtil.Tile(5, 1204, 524129.5f, 1509493.0f),
- new TileMetricsUtil.Tile(5, 1101, 464122.56f, 1336673.0f),
- new TileMetricsUtil.Tile(5, 1205, 537867.0f, 1549057.0f),
- new TileMetricsUtil.Tile(5, 1102, 475782.97f, 1370255.0f),
- new TileMetricsUtil.Tile(5, 1206, 554514.56f, 1597002.0f),
- new TileMetricsUtil.Tile(5, 1207, 554870.44f, 1598027.0f),
- new TileMetricsUtil.Tile(5, 1208, 534909.0f, 1540538.0f),
- new TileMetricsUtil.Tile(5, 1209, 423839.9f, 1220659.0f),
- new TileMetricsUtil.Tile(5, 1107, 533332.25f, 1535997.0f),
- new TileMetricsUtil.Tile(5, 1108, 526180.2f, 1515399.0f),
- new TileMetricsUtil.Tile(5, 1109, 434699.62f, 1251935.0f),
- new TileMetricsUtil.Tile(5, 1103, 490662.8f, 1413109.0f),
- new TileMetricsUtil.Tile(5, 1104, 505400.7f, 1455554.0f),
- new TileMetricsUtil.Tile(5, 1105, 517630.2f, 1490775.0f),
- new TileMetricsUtil.Tile(5, 1201, 487380.2f, 1403655.0f),
- new TileMetricsUtil.Tile(5, 1106, 535730.2f, 1542903.0f),
- new TileMetricsUtil.Tile(5, 2210, 608615.94f, 1752814.0f),
- new TileMetricsUtil.Tile(5, 2211, 619886.06f, 1785272.0f),
- new TileMetricsUtil.Tile(5, 2212, 639725.3f, 1842409.0f),
- new TileMetricsUtil.Tile(5, 2213, 657432.25f, 1893405.0f),
- new TileMetricsUtil.Tile(5, 2214, 676718.0f, 1948948.0f),
- new TileMetricsUtil.Tile(4, 2302, 567359.7f, 1633996.0f),
- new TileMetricsUtil.Tile(5, 2215, 710085.06f, 2045045.0f),
- new TileMetricsUtil.Tile(4, 2301, 559488.2f, 1611326.0f),
- new TileMetricsUtil.Tile(5, 2216, 743956.94f, 2142596.0f),
- new TileMetricsUtil.Tile(4, 2304, 597910.4f, 1721982.0f),
- new TileMetricsUtil.Tile(4, 2303, 589819.75f, 1698681.0f),
- new TileMetricsUtil.Tile(4, 2306, 631276.7f, 1818077.0f),
- new TileMetricsUtil.Tile(4, 2305, 610759.4f, 1758987.0f),
- new TileMetricsUtil.Tile(4, 2308, 661488.5f, 1905087.0f),
- new TileMetricsUtil.Tile(4, 2307, 642497.56f, 1850393.0f),
- new TileMetricsUtil.Tile(8, 2115, 832733.6f, 2398273.0f),
- new TileMetricsUtil.Tile(4, 2309, 668314.9f, 1924747.0f),
- new TileMetricsUtil.Tile(8, 2114, 803602.4f, 2314375.0f),
- new TileMetricsUtil.Tile(8, 2116, 867472.2f, 2498320.0f),
- new TileMetricsUtil.Tile(8, 2111, 726911.44f, 2093505.0f),
- new TileMetricsUtil.Tile(8, 2110, 702797.56f, 2024057.0f),
- new TileMetricsUtil.Tile(8, 2113, 774282.25f, 2229933.0f),
- new TileMetricsUtil.Tile(8, 2112, 747976.0f, 2154171.0f),
- new TileMetricsUtil.Tile(4, 2311, 685694.06f, 1974799.0f),
- new TileMetricsUtil.Tile(4, 2310, 573007.6f, 1650262.0f),
- new TileMetricsUtil.Tile(4, 2315, 802572.56f, 2311409.0f),
- new TileMetricsUtil.Tile(4, 2314, 776761.44f, 2237073.0f),
- new TileMetricsUtil.Tile(4, 2313, 741337.1f, 2135051.0f),
- new TileMetricsUtil.Tile(4, 2312, 717835.06f, 2067365.0f),
- new TileMetricsUtil.Tile(4, 2316, 846926.4f, 2439148.0f),
- new TileMetricsUtil.Tile(8, 2106, 635906.56f, 1831411.0f),
- new TileMetricsUtil.Tile(8, 2105, 614185.75f, 1768855.0f),
- new TileMetricsUtil.Tile(8, 2104, 602665.94f, 1735678.0f),
- new TileMetricsUtil.Tile(8, 2103, 586181.25f, 1688202.0f),
- new TileMetricsUtil.Tile(8, 2102, 574919.75f, 1655769.0f),
- new TileMetricsUtil.Tile(8, 2101, 567252.44f, 1633687.0f),
- new TileMetricsUtil.Tile(8, 2109, 673661.06f, 1940144.0f),
- new TileMetricsUtil.Tile(8, 2108, 566904.8f, 1632686.0f),
- new TileMetricsUtil.Tile(8, 2107, 649836.06f, 1871528.0f),
- new TileMetricsUtil.Tile(1, 2115, 923581.94f, 2659916.0f),
- new TileMetricsUtil.Tile(1, 2116, 934428.8f, 2691155.0f),
- new TileMetricsUtil.Tile(6, 2116, 772013.2f, 2223398.0f),
- new TileMetricsUtil.Tile(1, 2111, 708667.3f, 2040962.0f),
- new TileMetricsUtil.Tile(1, 2112, 844992.3f, 2433578.0f),
- new TileMetricsUtil.Tile(1, 2113, 874324.6f, 2518055.0f),
- new TileMetricsUtil.Tile(1, 2114, 901182.94f, 2595407.0f),
- new TileMetricsUtil.Tile(6, 2110, 600281.25f, 1728810.0f),
- new TileMetricsUtil.Tile(1, 2210, 754377.75f, 2172608.0f),
- new TileMetricsUtil.Tile(6, 2111, 548103.8f, 1578539.0f),
- new TileMetricsUtil.Tile(1, 2211, 659499.6f, 1899359.0f),
- new TileMetricsUtil.Tile(1, 2212, 808203.8f, 2327627.0f),
- new TileMetricsUtil.Tile(1, 2110, 795863.9f, 2292088.0f),
- new TileMetricsUtil.Tile(1, 2213, 831343.0f, 2394268.0f),
- new TileMetricsUtil.Tile(6, 2114, 716921.5f, 2064734.0f),
- new TileMetricsUtil.Tile(2, 1307, 715829.1f, 2061588.0f),
- new TileMetricsUtil.Tile(1, 2214, 858290.56f, 2471877.0f),
- new TileMetricsUtil.Tile(6, 2115, 742764.2f, 2139161.0f),
- new TileMetricsUtil.Tile(2, 1306, 701070.1f, 2019082.0f),
- new TileMetricsUtil.Tile(1, 2215, 885655.9f, 2550689.0f),
- new TileMetricsUtil.Tile(6, 2112, 605255.56f, 1743136.0f),
- new TileMetricsUtil.Tile(2, 1309, 743669.75f, 2141769.0f),
- new TileMetricsUtil.Tile(1, 2216, 914737.8f, 2634445.0f),
- new TileMetricsUtil.Tile(6, 2113, 686910.06f, 1978301.0f),
- new TileMetricsUtil.Tile(2, 1308, 729707.94f, 2101559.0f),
- new TileMetricsUtil.Tile(2, 1302, 641778.8f, 1848323.0f),
- new TileMetricsUtil.Tile(2, 1303, 643553.44f, 1853434.0f),
- new TileMetricsUtil.Tile(2, 1304, 566862.1f, 1632563.0f),
- new TileMetricsUtil.Tile(2, 1305, 678740.25f, 1954772.0f),
- new TileMetricsUtil.Tile(2, 1301, 645812.5f, 1859940.0f),
- new TileMetricsUtil.Tile(6, 2105, 531359.0f, 1530314.0f),
- new TileMetricsUtil.Tile(6, 2106, 545602.06f, 1571334.0f),
- new TileMetricsUtil.Tile(5, 2209, 422861.78f, 1217842.0f),
- new TileMetricsUtil.Tile(6, 2107, 563268.4f, 1622213.0f),
- new TileMetricsUtil.Tile(5, 2208, 527286.06f, 1518584.0f),
- new TileMetricsUtil.Tile(6, 2108, 576791.3f, 1661159.0f),
- new TileMetricsUtil.Tile(5, 2207, 554846.2f, 1597957.0f),
- new TileMetricsUtil.Tile(6, 2109, 584765.25f, 1684124.0f),
- new TileMetricsUtil.Tile(5, 2301, 497327.06f, 1432302.0f),
- new TileMetricsUtil.Tile(5, 2206, 552916.6f, 1592400.0f),
- new TileMetricsUtil.Tile(5, 2205, 538032.6f, 1549534.0f),
- new TileMetricsUtil.Tile(5, 2204, 523163.2f, 1506710.0f),
- new TileMetricsUtil.Tile(5, 2203, 509249.62f, 1466639.0f),
- new TileMetricsUtil.Tile(5, 2305, 544915.94f, 1569358.0f),
- new TileMetricsUtil.Tile(5, 2202, 503321.84f, 1449567.0f),
- new TileMetricsUtil.Tile(5, 2304, 530174.6f, 1526903.0f),
- new TileMetricsUtil.Tile(5, 2201, 490223.94f, 1411845.0f),
- new TileMetricsUtil.Tile(5, 2303, 518052.06f, 1491990.0f),
- new TileMetricsUtil.Tile(5, 2302, 507330.53f, 1461112.0f),
- new TileMetricsUtil.Tile(6, 2101, 479912.47f, 1382148.0f),
- new TileMetricsUtil.Tile(5, 2309, 467297.22f, 1345816.0f),
- new TileMetricsUtil.Tile(6, 2102, 489415.25f, 1409516.0f),
- new TileMetricsUtil.Tile(5, 2308, 549378.1f, 1582209.0f),
- new TileMetricsUtil.Tile(6, 2103, 503457.28f, 1449957.0f),
- new TileMetricsUtil.Tile(5, 2307, 558159.4f, 1607499.0f),
- new TileMetricsUtil.Tile(6, 2104, 519150.3f, 1495153.0f),
- new TileMetricsUtil.Tile(5, 2306, 557682.25f, 1606125.0f),
- new TileMetricsUtil.Tile(5, 2106, 542138.5f, 1561359.0f),
- new TileMetricsUtil.Tile(5, 2107, 539329.1f, 1553268.0f),
- new TileMetricsUtil.Tile(5, 2104, 511818.72f, 1474038.0f),
- new TileMetricsUtil.Tile(5, 2105, 525148.56f, 1512428.0f),
- new TileMetricsUtil.Tile(5, 2108, 529528.8f, 1525043.0f),
- new TileMetricsUtil.Tile(5, 2109, 436176.38f, 1256188.0f),
- new TileMetricsUtil.Tile(5, 2102, 489248.6f, 1409036.0f),
- new TileMetricsUtil.Tile(5, 2103, 498902.4f, 1436839.0f),
- new TileMetricsUtil.Tile(5, 2101, 478850.7f, 1379090.0f),
- new TileMetricsUtil.Tile(5, 2310, 614297.56f, 1769177.0f),
- new TileMetricsUtil.Tile(1, 1108, 732500.3f, 2109601.0f),
- new TileMetricsUtil.Tile(3, 1314, 761796.5f, 2193974.0f),
- new TileMetricsUtil.Tile(1, 1107, 723057.6f, 2082406.0f),
- new TileMetricsUtil.Tile(3, 1315, 779274.6f, 2244311.0f),
- new TileMetricsUtil.Tile(5, 2312, 654979.1f, 1886340.0f),
- new TileMetricsUtil.Tile(3, 1316, 805652.06f, 2320278.0f),
- new TileMetricsUtil.Tile(5, 2311, 630389.9f, 1815523.0f),
- new TileMetricsUtil.Tile(1, 1109, 737068.0f, 2122756.0f),
- new TileMetricsUtil.Tile(3, 1310, 698116.3f, 2010575.0f),
- new TileMetricsUtil.Tile(3, 1311, 705486.75f, 2031802.0f),
- new TileMetricsUtil.Tile(3, 1312, 722473.94f, 2080725.0f),
- new TileMetricsUtil.Tile(3, 1210, 691075.3f, 1990297.0f),
- new TileMetricsUtil.Tile(3, 1313, 743791.3f, 2142119.0f),
- new TileMetricsUtil.Tile(3, 1211, 669740.25f, 1928852.0f),
- new TileMetricsUtil.Tile(3, 1212, 683764.56f, 1969242.0f),
- new TileMetricsUtil.Tile(2, 2301, 644702.75f, 1856744.0f),
- new TileMetricsUtil.Tile(3, 1213, 745240.6f, 2146293.0f),
- new TileMetricsUtil.Tile(2, 2302, 640053.8f, 1843355.0f),
- new TileMetricsUtil.Tile(3, 1214, 763265.25f, 2198204.0f),
- new TileMetricsUtil.Tile(5, 2314, 707079.1f, 2036388.0f),
- new TileMetricsUtil.Tile(2, 2303, 637679.1f, 1836516.0f),
- new TileMetricsUtil.Tile(3, 1215, 784061.75f, 2258098.0f),
- new TileMetricsUtil.Tile(5, 2313, 685909.0f, 1975418.0f),
- new TileMetricsUtil.Tile(2, 2304, 558232.6f, 1607710.0f),
- new TileMetricsUtil.Tile(3, 1216, 817701.7f, 2354981.0f),
- new TileMetricsUtil.Tile(5, 2316, 756095.44f, 2177555.0f),
- new TileMetricsUtil.Tile(2, 2305, 672488.2f, 1936766.0f),
- new TileMetricsUtil.Tile(5, 2315, 726102.75f, 2091176.0f),
- new TileMetricsUtil.Tile(2, 2306, 696259.7f, 2005228.0f),
- new TileMetricsUtil.Tile(2, 2308, 724660.06f, 2087021.0f),
- new TileMetricsUtil.Tile(2, 2307, 709184.0f, 2042450.0f),
- new TileMetricsUtil.Tile(2, 2309, 738271.9f, 2126223.0f),
- new TileMetricsUtil.Tile(5, 2115, 719669.75f, 2072649.0f),
- new TileMetricsUtil.Tile(5, 2116, 747281.94f, 2152172.0f),
- new TileMetricsUtil.Tile(5, 2111, 622326.7f, 1792301.0f),
- new TileMetricsUtil.Tile(5, 2112, 642663.9f, 1850872.0f),
- new TileMetricsUtil.Tile(5, 2113, 666584.4f, 1919763.0f),
- new TileMetricsUtil.Tile(1, 1101, 638016.3f, 1837487.0f),
- new TileMetricsUtil.Tile(5, 2114, 690995.8f, 1990068.0f),
- new TileMetricsUtil.Tile(1, 1102, 628837.8f, 1811053.0f),
- new TileMetricsUtil.Tile(1, 1103, 541017.3f, 1558130.0f),
- new TileMetricsUtil.Tile(1, 1104, 684565.25f, 1971548.0f),
- new TileMetricsUtil.Tile(1, 1105, 694928.44f, 2001394.0f),
- new TileMetricsUtil.Tile(5, 2110, 603435.75f, 1737895.0f),
- new TileMetricsUtil.Tile(1, 1106, 709664.56f, 2043834.0f),
- new TileMetricsUtil.Tile(1, 2102, 644288.5f, 1855551.0f),
- new TileMetricsUtil.Tile(3, 1305, 637067.3f, 1834754.0f),
- new TileMetricsUtil.Tile(1, 2103, 558346.5f, 1608038.0f),
- new TileMetricsUtil.Tile(3, 1306, 650456.25f, 1873314.0f),
- new TileMetricsUtil.Tile(3, 1303, 608183.7f, 1751569.0f),
- new TileMetricsUtil.Tile(1, 2101, 651085.75f, 1875127.0f),
- new TileMetricsUtil.Tile(3, 1304, 621200.3f, 1789057.0f),
- new TileMetricsUtil.Tile(1, 2106, 737575.7f, 2124218.0f),
- new TileMetricsUtil.Tile(3, 1301, 589746.9f, 1698471.0f),
- new TileMetricsUtil.Tile(1, 2107, 749932.6f, 2159806.0f),
- new TileMetricsUtil.Tile(3, 1302, 595741.6f, 1715736.0f),
- new TileMetricsUtil.Tile(1, 2104, 709154.1f, 2042364.0f),
- new TileMetricsUtil.Tile(1, 2105, 722732.94f, 2081471.0f),
- new TileMetricsUtil.Tile(2, 2312, 791178.8f, 2278595.0f),
- new TileMetricsUtil.Tile(3, 1202, 576400.0f, 1660032.0f),
- new TileMetricsUtil.Tile(2, 2313, 823073.56f, 2370452.0f),
- new TileMetricsUtil.Tile(3, 1203, 590679.1f, 1701156.0f),
- new TileMetricsUtil.Tile(2, 2310, 754680.2f, 2173479.0f),
- new TileMetricsUtil.Tile(2, 2311, 775202.75f, 2232584.0f),
- new TileMetricsUtil.Tile(3, 1201, 565834.0f, 1629602.0f),
- new TileMetricsUtil.Tile(2, 2316, 901895.44f, 2597459.0f),
- new TileMetricsUtil.Tile(3, 1206, 629970.44f, 1814315.0f),
- new TileMetricsUtil.Tile(3, 1207, 644813.2f, 1857062.0f),
- new TileMetricsUtil.Tile(2, 2314, 850203.06f, 2448585.0f),
- new TileMetricsUtil.Tile(3, 1204, 604347.56f, 1740521.0f),
- new TileMetricsUtil.Tile(2, 2315, 878050.3f, 2528785.0f),
- new TileMetricsUtil.Tile(3, 1205, 617782.94f, 1779215.0f),
- new TileMetricsUtil.Tile(3, 1209, 672435.75f, 1936615.0f),
- new TileMetricsUtil.Tile(3, 1208, 658543.75f, 1896606.0f),
- new TileMetricsUtil.Tile(1, 1112, 804387.1f, 2316635.0f),
- new TileMetricsUtil.Tile(1, 1113, 812433.3f, 2339808.0f),
- new TileMetricsUtil.Tile(1, 1110, 759368.75f, 2186982.0f),
- new TileMetricsUtil.Tile(1, 2109, 772350.0f, 2224368.0f),
- new TileMetricsUtil.Tile(1, 1111, 679406.56f, 1956691.0f),
- new TileMetricsUtil.Tile(1, 2108, 763281.25f, 2198250.0f),
- new TileMetricsUtil.Tile(1, 1116, 877692.7f, 2527755.0f),
- new TileMetricsUtil.Tile(3, 1309, 680868.4f, 1960901.0f),
- new TileMetricsUtil.Tile(1, 1114, 825847.2f, 2378440.0f),
- new TileMetricsUtil.Tile(3, 1308, 670088.5f, 1929855.0f),
- new TileMetricsUtil.Tile(1, 1115, 840287.1f, 2420027.0f),
- new TileMetricsUtil.Tile(3, 1307, 659739.2f, 1900049.0f),
- new TileMetricsUtil.Tile(3, 1103, 592339.56f, 1705938.0f),
- new TileMetricsUtil.Tile(3, 1104, 607652.75f, 1750040.0f),
- new TileMetricsUtil.Tile(3, 1101, 564907.25f, 1626933.0f),
- new TileMetricsUtil.Tile(3, 1102, 578869.75f, 1667145.0f),
- new TileMetricsUtil.Tile(3, 1107, 651271.1f, 1875661.0f),
- new TileMetricsUtil.Tile(3, 1108, 669996.1f, 1929589.0f),
- new TileMetricsUtil.Tile(3, 1105, 622082.25f, 1791597.0f),
- new TileMetricsUtil.Tile(3, 1106, 634596.9f, 1827639.0f),
- new TileMetricsUtil.Tile(3, 1109, 683126.7f, 1967405.0f),
- new TileMetricsUtil.Tile(2, 1206, 724352.75f, 2086136.0f),
- new TileMetricsUtil.Tile(3, 1112, 714129.8f, 2056694.0f),
- new TileMetricsUtil.Tile(2, 1205, 698064.2f, 2010425.0f),
- new TileMetricsUtil.Tile(3, 1113, 774392.0f, 2230249.0f),
- new TileMetricsUtil.Tile(2, 1204, 526577.44f, 1516543.0f),
- new TileMetricsUtil.Tile(3, 1114, 792397.9f, 2282106.0f),
- new TileMetricsUtil.Tile(2, 1203, 657213.5f, 1892775.0f),
- new TileMetricsUtil.Tile(3, 1115, 802558.3f, 2311368.0f),
- new TileMetricsUtil.Tile(2, 1202, 667077.06f, 1921182.0f),
- new TileMetricsUtil.Tile(3, 1116, 839689.56f, 2418306.0f),
- new TileMetricsUtil.Tile(2, 1201, 667060.75f, 1921135.0f),
- new TileMetricsUtil.Tile(3, 1110, 704187.8f, 2028061.0f),
- new TileMetricsUtil.Tile(3, 1111, 694706.94f, 2000756.0f),
- new TileMetricsUtil.Tile(2, 1209, 768146.1f, 2212261.0f),
- new TileMetricsUtil.Tile(2, 1207, 738194.75f, 2126001.0f),
- new TileMetricsUtil.Tile(2, 1208, 753541.6f, 2170200.0f),
- new TileMetricsUtil.Tile(6, 2307, 611084.4f, 1759923.0f),
- new TileMetricsUtil.Tile(6, 1110, 595422.2f, 1714816.0f),
- new TileMetricsUtil.Tile(2, 1211, 801727.06f, 2308974.0f),
- new TileMetricsUtil.Tile(6, 2308, 623703.8f, 1796267.0f),
- new TileMetricsUtil.Tile(2, 1210, 782715.25f, 2254220.0f),
- new TileMetricsUtil.Tile(6, 2309, 626620.1f, 1804666.0f),
- new TileMetricsUtil.Tile(2, 1213, 840329.8f, 2420150.0f),
- new TileMetricsUtil.Tile(2, 1212, 817961.44f, 2355729.0f),
- new TileMetricsUtil.Tile(6, 2303, 564128.1f, 1624689.0f),
- new TileMetricsUtil.Tile(6, 1114, 677938.9f, 1952464.0f),
- new TileMetricsUtil.Tile(2, 1215, 870524.94f, 2507112.0f),
- new TileMetricsUtil.Tile(6, 2304, 574673.25f, 1655059.0f),
- new TileMetricsUtil.Tile(6, 1113, 662806.25f, 1908882.0f),
- new TileMetricsUtil.Tile(2, 1214, 853374.94f, 2457720.0f),
- new TileMetricsUtil.Tile(6, 2305, 588415.6f, 1694637.0f),
- new TileMetricsUtil.Tile(6, 1112, 594454.1f, 1712028.0f),
- new TileMetricsUtil.Tile(6, 2306, 598005.56f, 1722256.0f),
- new TileMetricsUtil.Tile(6, 1111, 539406.94f, 1553492.0f),
- new TileMetricsUtil.Tile(2, 1216, 876697.56f, 2524889.0f),
- new TileMetricsUtil.Tile(2, 1314, 824424.94f, 2374344.0f),
- new TileMetricsUtil.Tile(1, 2207, 706444.06f, 2034559.0f),
- new TileMetricsUtil.Tile(2, 1313, 804652.4f, 2317399.0f),
- new TileMetricsUtil.Tile(1, 2208, 720043.0f, 2073724.0f),
- new TileMetricsUtil.Tile(6, 2301, 546032.25f, 1572573.0f),
- new TileMetricsUtil.Tile(6, 1116, 706471.9f, 2034639.0f),
- new TileMetricsUtil.Tile(2, 1316, 860230.5f, 2477464.0f),
- new TileMetricsUtil.Tile(1, 2209, 729175.3f, 2100025.0f),
- new TileMetricsUtil.Tile(6, 2302, 553479.1f, 1594020.0f),
- new TileMetricsUtil.Tile(6, 1115, 691797.9f, 1992378.0f),
- new TileMetricsUtil.Tile(2, 1315, 846660.75f, 2438383.0f),
- new TileMetricsUtil.Tile(2, 1310, 756808.3f, 2179608.0f),
- new TileMetricsUtil.Tile(2, 1312, 788834.7f, 2271844.0f),
- new TileMetricsUtil.Tile(2, 1311, 774573.94f, 2230773.0f),
- new TileMetricsUtil.Tile(2, 1115, 853926.0f, 2459307.0f),
- new TileMetricsUtil.Tile(2, 1116, 858920.8f, 2473692.0f),
- new TileMetricsUtil.Tile(1, 2202, 611577.44f, 1761343.0f),
- new TileMetricsUtil.Tile(1, 2201, 639297.2f, 1841176.0f),
- new TileMetricsUtil.Tile(2, 1111, 803672.9f, 2314578.0f),
- new TileMetricsUtil.Tile(1, 2204, 665189.9f, 1915747.0f),
- new TileMetricsUtil.Tile(2, 1112, 816662.44f, 2351988.0f),
- new TileMetricsUtil.Tile(1, 2203, 487072.2f, 1402768.0f),
- new TileMetricsUtil.Tile(2, 1113, 832510.4f, 2397630.0f),
- new TileMetricsUtil.Tile(1, 2206, 690076.4f, 1987420.0f),
- new TileMetricsUtil.Tile(2, 1114, 842916.3f, 2427599.0f),
- new TileMetricsUtil.Tile(1, 2205, 676509.4f, 1948347.0f),
- new TileMetricsUtil.Tile(2, 1110, 786951.4f, 2266420.0f),
- new TileMetricsUtil.Tile(6, 1314, 726556.94f, 2092484.0f),
- new TileMetricsUtil.Tile(6, 1313, 705262.5f, 2031156.0f),
- new TileMetricsUtil.Tile(6, 1316, 762970.8f, 2197356.0f),
- new TileMetricsUtil.Tile(6, 1315, 745886.06f, 2148152.0f),
- new TileMetricsUtil.Tile(6, 2316, 784323.25f, 2258851.0f),
- new TileMetricsUtil.Tile(6, 2314, 745560.06f, 2147213.0f),
- new TileMetricsUtil.Tile(6, 2315, 764134.7f, 2200708.0f),
- new TileMetricsUtil.Tile(6, 2312, 646565.6f, 1862109.0f),
- new TileMetricsUtil.Tile(6, 2313, 717774.25f, 2067190.0f),
- new TileMetricsUtil.Tile(6, 2310, 650334.4f, 1872963.0f),
- new TileMetricsUtil.Tile(6, 2311, 607506.25f, 1749618.0f),
- new TileMetricsUtil.Tile(6, 1310, 652161.44f, 1878225.0f),
- new TileMetricsUtil.Tile(6, 1312, 643798.94f, 1854141.0f),
- new TileMetricsUtil.Tile(6, 1311, 608755.2f, 1753215.0f),
- new TileMetricsUtil.Tile(6, 1301, 543110.4f, 1564158.0f),
- new TileMetricsUtil.Tile(6, 1308, 628543.75f, 1810206.0f),
- new TileMetricsUtil.Tile(6, 1309, 632060.75f, 1820335.0f),
- new TileMetricsUtil.Tile(6, 1306, 601763.9f, 1733080.0f),
- new TileMetricsUtil.Tile(6, 1307, 612346.9f, 1763559.0f),
- new TileMetricsUtil.Tile(6, 1304, 577054.5f, 1661917.0f),
- new TileMetricsUtil.Tile(6, 1305, 589227.44f, 1696975.0f),
- new TileMetricsUtil.Tile(6, 1302, 552162.1f, 1590227.0f),
- new TileMetricsUtil.Tile(6, 1303, 564858.7f, 1626793.0f),
- new TileMetricsUtil.Tile(2, 2112, 831845.1f, 2395714.0f),
- new TileMetricsUtil.Tile(2, 2113, 869011.06f, 2502752.0f),
- new TileMetricsUtil.Tile(2, 2114, 893550.3f, 2573425.0f),
- new TileMetricsUtil.Tile(2, 2210, 781965.6f, 2252061.0f),
- new TileMetricsUtil.Tile(2, 2115, 915250.7f, 2635922.0f),
- new TileMetricsUtil.Tile(2, 2116, 935106.2f, 2693106.0f),
- new TileMetricsUtil.Tile(2, 2215, 916326.4f, 2639020.0f),
- new TileMetricsUtil.Tile(2, 2216, 934624.25f, 2691718.0f),
- new TileMetricsUtil.Tile(2, 2211, 801881.2f, 2309418.0f),
- new TileMetricsUtil.Tile(2, 2212, 831820.1f, 2395642.0f),
- new TileMetricsUtil.Tile(2, 2213, 865767.3f, 2493410.0f),
- new TileMetricsUtil.Tile(2, 2110, 771794.75f, 2222769.0f),
- new TileMetricsUtil.Tile(2, 2214, 889628.06f, 2562129.0f),
- new TileMetricsUtil.Tile(2, 2111, 804690.25f, 2317508.0f),
- new TileMetricsUtil.Tile(2, 1108, 755969.44f, 2177192.0f),
- new TileMetricsUtil.Tile(2, 1109, 773586.75f, 2227930.0f),
- new TileMetricsUtil.Tile(2, 2209, 764854.1f, 2202780.0f),
- new TileMetricsUtil.Tile(3, 2308, 665623.56f, 1916996.0f),
- new TileMetricsUtil.Tile(2, 2208, 748762.5f, 2156436.0f),
- new TileMetricsUtil.Tile(3, 2309, 677830.2f, 1952151.0f),
- new TileMetricsUtil.Tile(1, 2312, 795999.25f, 2292478.0f),
- new TileMetricsUtil.Tile(1, 2311, 768010.06f, 2211869.0f),
- new TileMetricsUtil.Tile(1, 2310, 756285.75f, 2178103.0f),
- new TileMetricsUtil.Tile(2, 2109, 716002.75f, 2062088.0f),
- new TileMetricsUtil.Tile(1, 2316, 925979.8f, 2666822.0f),
- new TileMetricsUtil.Tile(1, 2315, 896454.1f, 2581788.0f),
- new TileMetricsUtil.Tile(1, 2314, 864106.56f, 2488627.0f),
- new TileMetricsUtil.Tile(1, 2313, 819590.94f, 2360422.0f),
- new TileMetricsUtil.Tile(2, 2103, 401740.97f, 1157014.0f),
- new TileMetricsUtil.Tile(2, 2104, 395323.25f, 1138531.0f),
- new TileMetricsUtil.Tile(2, 2101, 468776.03f, 1350075.0f),
- new TileMetricsUtil.Tile(2, 2102, 351150.7f, 1011314.0f),
- new TileMetricsUtil.Tile(2, 2107, 622077.4f, 1791583.0f),
- new TileMetricsUtil.Tile(2, 2108, 616934.7f, 1776772.0f),
- new TileMetricsUtil.Tile(2, 2105, 608495.8f, 1752468.0f),
- new TileMetricsUtil.Tile(2, 2106, 654560.4f, 1885134.0f),
- new TileMetricsUtil.Tile(2, 2206, 721626.7f, 2078285.0f),
- new TileMetricsUtil.Tile(2, 1103, 658908.6f, 1897657.0f),
- new TileMetricsUtil.Tile(3, 2307, 654492.7f, 1884939.0f),
- new TileMetricsUtil.Tile(2, 2207, 732366.6f, 2109216.0f),
- new TileMetricsUtil.Tile(2, 1102, 656570.8f, 1890924.0f),
- new TileMetricsUtil.Tile(3, 2306, 645191.3f, 1858151.0f),
- new TileMetricsUtil.Tile(2, 2204, 522755.2f, 1505535.0f),
- new TileMetricsUtil.Tile(2, 1101, 655590.94f, 1888102.0f),
- new TileMetricsUtil.Tile(3, 2305, 629939.56f, 1814226.0f),
- new TileMetricsUtil.Tile(2, 2205, 695380.2f, 2002695.0f),
- new TileMetricsUtil.Tile(3, 2304, 615780.2f, 1773447.0f),
- new TileMetricsUtil.Tile(2, 2202, 651867.3f, 1877378.0f),
- new TileMetricsUtil.Tile(2, 1107, 741657.25f, 2135973.0f),
- new TileMetricsUtil.Tile(3, 2303, 603444.06f, 1737919.0f),
- new TileMetricsUtil.Tile(2, 2203, 640879.8f, 1845734.0f),
- new TileMetricsUtil.Tile(2, 1106, 727610.4f, 2095518.0f),
- new TileMetricsUtil.Tile(3, 2302, 594963.9f, 1713496.0f),
- new TileMetricsUtil.Tile(2, 1105, 705420.8f, 2031612.0f),
- new TileMetricsUtil.Tile(3, 2301, 590563.9f, 1700824.0f),
- new TileMetricsUtil.Tile(2, 2201, 658020.1f, 1895098.0f),
- new TileMetricsUtil.Tile(2, 1104, 553236.8f, 1593322.0f),
- new TileMetricsUtil.Tile(5, 1304, 527790.25f, 1520036.0f),
- new TileMetricsUtil.Tile(1, 2303, 571845.1f, 1646914.0f),
- new TileMetricsUtil.Tile(5, 1303, 514778.1f, 1482561.0f),
- new TileMetricsUtil.Tile(1, 2302, 634523.25f, 1827427.0f),
- new TileMetricsUtil.Tile(5, 1302, 503301.72f, 1449509.0f),
- new TileMetricsUtil.Tile(1, 2305, 687812.1f, 1980899.0f),
- new TileMetricsUtil.Tile(5, 1301, 489341.66f, 1409304.0f),
- new TileMetricsUtil.Tile(1, 2304, 678138.5f, 1953039.0f),
- new TileMetricsUtil.Tile(5, 1308, 551351.7f, 1587893.0f),
- new TileMetricsUtil.Tile(5, 1307, 555872.56f, 1600913.0f),
- new TileMetricsUtil.Tile(5, 1306, 554025.0f, 1595592.0f),
- new TileMetricsUtil.Tile(1, 2301, 645645.44f, 1859459.0f),
- new TileMetricsUtil.Tile(5, 1305, 542228.8f, 1561619.0f),
- new TileMetricsUtil.Tile(5, 1309, 468386.78f, 1348954.0f),
- new TileMetricsUtil.Tile(1, 2306, 694729.1f, 2000820.0f),
- new TileMetricsUtil.Tile(1, 2307, 712121.9f, 2050911.0f),
- new TileMetricsUtil.Tile(1, 2308, 727114.9f, 2094091.0f),
- new TileMetricsUtil.Tile(1, 2309, 740285.75f, 2132023.0f),
- new TileMetricsUtil.Tile(5, 1311, 632258.3f, 1820904.0f),
- new TileMetricsUtil.Tile(5, 1310, 618028.44f, 1779922.0f),
- new TileMetricsUtil.Tile(5, 1313, 673544.44f, 1939808.0f),
- new TileMetricsUtil.Tile(5, 1312, 649723.25f, 1871203.0f),
- new TileMetricsUtil.Tile(5, 1315, 710485.06f, 2046197.0f),
- new TileMetricsUtil.Tile(5, 1314, 692534.4f, 1994499.0f),
- new TileMetricsUtil.Tile(5, 1316, 736742.3f, 2121818.0f),
- new TileMetricsUtil.Tile(7, 1109, 659062.8f, 1898101.0f),
- new TileMetricsUtil.Tile(4, 2102, 575167.0f, 1656481.0f),
- new TileMetricsUtil.Tile(4, 2101, 556701.7f, 1603301.0f),
- new TileMetricsUtil.Tile(7, 1105, 607005.56f, 1748176.0f),
- new TileMetricsUtil.Tile(7, 1106, 622574.6f, 1793015.0f),
- new TileMetricsUtil.Tile(7, 1107, 634426.7f, 1827149.0f),
- new TileMetricsUtil.Tile(7, 1108, 648441.3f, 1867511.0f),
- new TileMetricsUtil.Tile(7, 1101, 514711.78f, 1482370.0f),
- new TileMetricsUtil.Tile(7, 1102, 562830.9f, 1620953.0f),
- new TileMetricsUtil.Tile(4, 2109, 652550.7f, 1879346.0f),
- new TileMetricsUtil.Tile(7, 1103, 580733.7f, 1672513.0f),
- new TileMetricsUtil.Tile(4, 2108, 641942.3f, 1848794.0f),
- new TileMetricsUtil.Tile(7, 1104, 593361.8f, 1708882.0f),
- new TileMetricsUtil.Tile(4, 2107, 633418.0f, 1824244.0f),
- new TileMetricsUtil.Tile(4, 2106, 628767.3f, 1810850.0f),
- new TileMetricsUtil.Tile(4, 2105, 608104.8f, 1751342.0f),
- new TileMetricsUtil.Tile(4, 2104, 590379.8f, 1700294.0f),
- new TileMetricsUtil.Tile(4, 2103, 582709.7f, 1678204.0f),
- new TileMetricsUtil.Tile(7, 1302, 610862.5f, 1759284.0f),
- new TileMetricsUtil.Tile(7, 1301, 562763.9f, 1620760.0f),
- new TileMetricsUtil.Tile(7, 1308, 692898.56f, 1995548.0f),
- new TileMetricsUtil.Tile(7, 1307, 679246.1f, 1956229.0f),
- new TileMetricsUtil.Tile(7, 1309, 704346.9f, 2028519.0f),
- new TileMetricsUtil.Tile(7, 1304, 640838.9f, 1845616.0f),
- new TileMetricsUtil.Tile(7, 1303, 626787.1f, 1805147.0f),
- new TileMetricsUtil.Tile(7, 1306, 667224.25f, 1921606.0f),
- new TileMetricsUtil.Tile(7, 1305, 654089.56f, 1883778.0f),
- new TileMetricsUtil.Tile(8, 1114, 757451.0f, 2181459.0f),
- new TileMetricsUtil.Tile(4, 2111, 680774.25f, 1960630.0f),
- new TileMetricsUtil.Tile(8, 1216, 805801.0f, 2320707.0f),
- new TileMetricsUtil.Tile(8, 1113, 740480.2f, 2132583.0f),
- new TileMetricsUtil.Tile(4, 2110, 540845.8f, 1557636.0f),
- new TileMetricsUtil.Tile(8, 1116, 795597.2f, 2291320.0f),
- new TileMetricsUtil.Tile(4, 2113, 750105.9f, 2160305.0f),
- new TileMetricsUtil.Tile(8, 1115, 771454.1f, 2221788.0f),
- new TileMetricsUtil.Tile(4, 2112, 722734.0f, 2081474.0f),
- new TileMetricsUtil.Tile(8, 1110, 696113.9f, 2004808.0f),
- new TileMetricsUtil.Tile(8, 1112, 724169.44f, 2085608.0f),
- new TileMetricsUtil.Tile(7, 1116, 759220.1f, 2186554.0f),
- new TileMetricsUtil.Tile(8, 1111, 709771.5f, 2044142.0f),
- new TileMetricsUtil.Tile(7, 1114, 676538.2f, 1948430.0f),
- new TileMetricsUtil.Tile(7, 1115, 742581.94f, 2138636.0f),
- new TileMetricsUtil.Tile(8, 1211, 695630.9f, 2003417.0f),
- new TileMetricsUtil.Tile(7, 1112, 682896.9f, 1966743.0f),
- new TileMetricsUtil.Tile(8, 1210, 678184.7f, 1953172.0f),
- new TileMetricsUtil.Tile(7, 1113, 671356.56f, 1933507.0f),
- new TileMetricsUtil.Tile(8, 1213, 738898.25f, 2128027.0f),
- new TileMetricsUtil.Tile(7, 1110, 666704.8f, 1920110.0f),
- new TileMetricsUtil.Tile(4, 2115, 807847.56f, 2326601.0f),
- new TileMetricsUtil.Tile(8, 1212, 719052.06f, 2070870.0f),
- new TileMetricsUtil.Tile(7, 1111, 674252.06f, 1941846.0f),
- new TileMetricsUtil.Tile(4, 2114, 778309.7f, 2241532.0f),
- new TileMetricsUtil.Tile(8, 1215, 777625.3f, 2239561.0f),
- new TileMetricsUtil.Tile(8, 1214, 753288.2f, 2169470.0f),
- new TileMetricsUtil.Tile(4, 2116, 844673.56f, 2432660.0f),
- new TileMetricsUtil.Tile(7, 1313, 753995.8f, 2171508.0f),
- new TileMetricsUtil.Tile(7, 1312, 746043.75f, 2148606.0f),
- new TileMetricsUtil.Tile(7, 1311, 730011.44f, 2102433.0f),
- new TileMetricsUtil.Tile(7, 1310, 711337.8f, 2048653.0f),
- new TileMetricsUtil.Tile(7, 1316, 816043.0f, 2350204.0f),
- new TileMetricsUtil.Tile(7, 1315, 802815.94f, 2312110.0f),
- new TileMetricsUtil.Tile(7, 1314, 771105.5f, 2220784.0f),
- new TileMetricsUtil.Tile(8, 1101, 560968.06f, 1615588.0f),
- new TileMetricsUtil.Tile(3, 2313, 763862.5f, 2199924.0f),
- new TileMetricsUtil.Tile(3, 2314, 791914.56f, 2280714.0f),
- new TileMetricsUtil.Tile(3, 2311, 706188.9f, 2033824.0f),
- new TileMetricsUtil.Tile(3, 2312, 726022.9f, 2090946.0f),
- new TileMetricsUtil.Tile(8, 1105, 614783.6f, 1770577.0f),
- new TileMetricsUtil.Tile(7, 2301, 567577.75f, 1634624.0f),
- new TileMetricsUtil.Tile(8, 1104, 603324.6f, 1737575.0f),
- new TileMetricsUtil.Tile(8, 1103, 587471.2f, 1691917.0f),
- new TileMetricsUtil.Tile(7, 2303, 628137.5f, 1809036.0f),
- new TileMetricsUtil.Tile(3, 2315, 817940.94f, 2355670.0f),
- new TileMetricsUtil.Tile(8, 1102, 573460.4f, 1651566.0f),
- new TileMetricsUtil.Tile(7, 2302, 615363.5f, 1772247.0f),
- new TileMetricsUtil.Tile(3, 2316, 859486.44f, 2475321.0f),
- new TileMetricsUtil.Tile(8, 1109, 669675.7f, 1928666.0f),
- new TileMetricsUtil.Tile(7, 2305, 655222.2f, 1887040.0f),
- new TileMetricsUtil.Tile(8, 1108, 567472.2f, 1634320.0f),
- new TileMetricsUtil.Tile(7, 2304, 638620.1f, 1839226.0f),
- new TileMetricsUtil.Tile(8, 1107, 647659.7f, 1865260.0f),
- new TileMetricsUtil.Tile(7, 2307, 683385.4f, 1968150.0f),
- new TileMetricsUtil.Tile(8, 1106, 633415.25f, 1824236.0f),
- new TileMetricsUtil.Tile(7, 2306, 672001.0f, 1935363.0f),
- new TileMetricsUtil.Tile(7, 2309, 711445.44f, 2048963.0f),
- new TileMetricsUtil.Tile(7, 2308, 696809.0f, 2006810.0f),
- new TileMetricsUtil.Tile(3, 2310, 694548.94f, 2000301.0f),
- new TileMetricsUtil.Tile(7, 2106, 624401.7f, 1798277.0f),
- new TileMetricsUtil.Tile(7, 2107, 635721.1f, 1830877.0f),
- new TileMetricsUtil.Tile(7, 2108, 651519.75f, 1876377.0f),
- new TileMetricsUtil.Tile(7, 2109, 665493.4f, 1916621.0f),
- new TileMetricsUtil.Tile(3, 2112, 705862.8f, 2032885.0f),
- new TileMetricsUtil.Tile(3, 2111, 686178.1f, 1976193.0f),
- new TileMetricsUtil.Tile(3, 2110, 695652.75f, 2003480.0f),
- new TileMetricsUtil.Tile(3, 2116, 857244.06f, 2468863.0f),
- new TileMetricsUtil.Tile(3, 2115, 824751.0f, 2375283.0f),
- new TileMetricsUtil.Tile(3, 2114, 805906.2f, 2321010.0f),
- new TileMetricsUtil.Tile(7, 2101, 523016.3f, 1506287.0f),
- new TileMetricsUtil.Tile(3, 2113, 779751.7f, 2245685.0f),
- new TileMetricsUtil.Tile(7, 2102, 568420.44f, 1637051.0f),
- new TileMetricsUtil.Tile(7, 2103, 580344.06f, 1671391.0f),
- new TileMetricsUtil.Tile(7, 2104, 593800.7f, 1710146.0f),
- new TileMetricsUtil.Tile(7, 2105, 608100.3f, 1751329.0f),
- new TileMetricsUtil.Tile(8, 1307, 678750.7f, 1954802.0f),
- new TileMetricsUtil.Tile(7, 1205, 602532.6f, 1735294.0f),
- new TileMetricsUtil.Tile(8, 1306, 664292.0f, 1913161.0f),
- new TileMetricsUtil.Tile(7, 1204, 589775.3f, 1698553.0f),
- new TileMetricsUtil.Tile(8, 1305, 667551.4f, 1922548.0f),
- new TileMetricsUtil.Tile(7, 1207, 630500.3f, 1815841.0f),
- new TileMetricsUtil.Tile(8, 1304, 656542.7f, 1890843.0f),
- new TileMetricsUtil.Tile(7, 1206, 621207.6f, 1789078.0f),
- new TileMetricsUtil.Tile(7, 1209, 656298.25f, 1890139.0f),
- new TileMetricsUtil.Tile(7, 1208, 644718.75f, 1856790.0f),
- new TileMetricsUtil.Tile(8, 1309, 706734.4f, 2035395.0f),
- new TileMetricsUtil.Tile(8, 1308, 640287.8f, 1844029.0f),
- new TileMetricsUtil.Tile(8, 1303, 645088.2f, 1857854.0f),
- new TileMetricsUtil.Tile(7, 1201, 523716.66f, 1508304.0f),
- new TileMetricsUtil.Tile(8, 1302, 634223.94f, 1826565.0f),
- new TileMetricsUtil.Tile(8, 1301, 628762.8f, 1810837.0f),
- new TileMetricsUtil.Tile(7, 1203, 579497.9f, 1668954.0f),
- new TileMetricsUtil.Tile(7, 1202, 566370.1f, 1631146.0f),
- new TileMetricsUtil.Tile(7, 2210, 659881.25f, 1900458.0f),
- new TileMetricsUtil.Tile(7, 2211, 681405.9f, 1962449.0f),
- new TileMetricsUtil.Tile(3, 2209, 665386.75f, 1916314.0f),
- new TileMetricsUtil.Tile(7, 1216, 798989.56f, 2301090.0f),
- new TileMetricsUtil.Tile(7, 1215, 755635.06f, 2176229.0f),
- new TileMetricsUtil.Tile(7, 2214, 700608.3f, 2017752.0f),
- new TileMetricsUtil.Tile(7, 2215, 781395.44f, 2250419.0f),
- new TileMetricsUtil.Tile(7, 2315, 838348.56f, 2414444.0f),
- new TileMetricsUtil.Tile(7, 2212, 707106.56f, 2036467.0f),
- new TileMetricsUtil.Tile(7, 2316, 855710.4f, 2464446.0f),
- new TileMetricsUtil.Tile(7, 2213, 694383.6f, 1999825.0f),
- new TileMetricsUtil.Tile(7, 2313, 781775.3f, 2251513.0f),
- new TileMetricsUtil.Tile(7, 1210, 660261.75f, 1901554.0f),
- new TileMetricsUtil.Tile(7, 2314, 802218.4f, 2310389.0f),
- new TileMetricsUtil.Tile(7, 2311, 739138.2f, 2128718.0f),
- new TileMetricsUtil.Tile(7, 2216, 830322.9f, 2391330.0f),
- new TileMetricsUtil.Tile(7, 2312, 761812.8f, 2194021.0f),
- new TileMetricsUtil.Tile(7, 1214, 682258.6f, 1964905.0f),
- new TileMetricsUtil.Tile(7, 2310, 719639.56f, 2072562.0f),
- new TileMetricsUtil.Tile(7, 1213, 679747.9f, 1957674.0f),
- new TileMetricsUtil.Tile(7, 1212, 695909.0f, 2004218.0f),
- new TileMetricsUtil.Tile(7, 1211, 677979.8f, 1952582.0f),
- new TileMetricsUtil.Tile(3, 2106, 631833.6f, 1819681.0f),
- new TileMetricsUtil.Tile(3, 2107, 645582.6f, 1859278.0f),
- new TileMetricsUtil.Tile(6, 1108, 576558.7f, 1660489.0f),
- new TileMetricsUtil.Tile(3, 2108, 660356.56f, 1901827.0f),
- new TileMetricsUtil.Tile(6, 1109, 582984.7f, 1678996.0f),
- new TileMetricsUtil.Tile(3, 2109, 674721.9f, 1943199.0f),
- new TileMetricsUtil.Tile(6, 1201, 515860.06f, 1485677.0f),
- new TileMetricsUtil.Tile(6, 1106, 545413.5f, 1570791.0f),
- new TileMetricsUtil.Tile(3, 2102, 582434.4f, 1677411.0f),
- new TileMetricsUtil.Tile(6, 1202, 523040.94f, 1506358.0f),
- new TileMetricsUtil.Tile(6, 1107, 553355.9f, 1593665.0f),
- new TileMetricsUtil.Tile(3, 2103, 593973.56f, 1710644.0f),
- new TileMetricsUtil.Tile(6, 1104, 516796.84f, 1488375.0f),
- new TileMetricsUtil.Tile(3, 2104, 607844.75f, 1750593.0f),
- new TileMetricsUtil.Tile(6, 1105, 530281.56f, 1527211.0f),
- new TileMetricsUtil.Tile(3, 2105, 621182.6f, 1789006.0f),
- new TileMetricsUtil.Tile(6, 1205, 560279.5f, 1613605.0f),
- new TileMetricsUtil.Tile(6, 1102, 483174.28f, 1391542.0f),
- new TileMetricsUtil.Tile(3, 2201, 567232.25f, 1633629.0f),
- new TileMetricsUtil.Tile(6, 1206, 569772.56f, 1640945.0f),
- new TileMetricsUtil.Tile(6, 1103, 500713.88f, 1442056.0f),
- new TileMetricsUtil.Tile(3, 2202, 578353.1f, 1665657.0f),
- new TileMetricsUtil.Tile(6, 1203, 538762.1f, 1551635.0f),
- new TileMetricsUtil.Tile(3, 2203, 589069.44f, 1696520.0f),
- new TileMetricsUtil.Tile(6, 1204, 548861.8f, 1580722.0f),
- new TileMetricsUtil.Tile(6, 1101, 470429.16f, 1354836.0f),
- new TileMetricsUtil.Tile(3, 2101, 571115.25f, 1644812.0f),
- new TileMetricsUtil.Tile(3, 2204, 600790.6f, 1730277.0f),
- new TileMetricsUtil.Tile(6, 1209, 600827.44f, 1730383.0f),
- new TileMetricsUtil.Tile(3, 2205, 611413.9f, 1760872.0f),
- new TileMetricsUtil.Tile(3, 2206, 625997.9f, 1802874.0f),
- new TileMetricsUtil.Tile(6, 1207, 574950.0f, 1655856.0f),
- new TileMetricsUtil.Tile(3, 2207, 638296.5f, 1838294.0f),
- new TileMetricsUtil.Tile(6, 1208, 597518.4f, 1720853.0f),
- new TileMetricsUtil.Tile(3, 2208, 653278.8f, 1881443.0f),
- new TileMetricsUtil.Tile(7, 2201, 524665.25f, 1511036.0f),
- new TileMetricsUtil.Tile(7, 2202, 567193.75f, 1633518.0f),
- new TileMetricsUtil.Tile(7, 2203, 581718.4f, 1675349.0f),
- new TileMetricsUtil.Tile(7, 2204, 591475.3f, 1703449.0f),
- new TileMetricsUtil.Tile(7, 2209, 657746.9f, 1894311.0f),
- new TileMetricsUtil.Tile(7, 2205, 603689.56f, 1738626.0f),
- new TileMetricsUtil.Tile(7, 2206, 621389.2f, 1789601.0f),
- new TileMetricsUtil.Tile(7, 2207, 630440.25f, 1815668.0f),
- new TileMetricsUtil.Tile(7, 2208, 644510.4f, 1856190.0f),
- new TileMetricsUtil.Tile(6, 2207, 580963.2f, 1673174.0f),
- new TileMetricsUtil.Tile(6, 1210, 612937.5f, 1765260.0f),
- new TileMetricsUtil.Tile(3, 2210, 685847.56f, 1975241.0f),
- new TileMetricsUtil.Tile(6, 2206, 567367.7f, 1634019.0f),
- new TileMetricsUtil.Tile(6, 1211, 539128.8f, 1552691.0f),
- new TileMetricsUtil.Tile(3, 2211, 662213.5f, 1907175.0f),
- new TileMetricsUtil.Tile(6, 2205, 555303.8f, 1599275.0f),
- new TileMetricsUtil.Tile(6, 1212, 583076.7f, 1679261.0f),
- new TileMetricsUtil.Tile(6, 2204, 545144.44f, 1570016.0f),
- new TileMetricsUtil.Tile(6, 1213, 671834.4f, 1934883.0f),
- new TileMetricsUtil.Tile(6, 2209, 592840.6f, 1707381.0f),
- new TileMetricsUtil.Tile(6, 2208, 589674.3f, 1698262.0f),
- new TileMetricsUtil.Tile(3, 2216, 850352.06f, 2449014.0f),
- new TileMetricsUtil.Tile(6, 2203, 534482.6f, 1539310.0f),
- new TileMetricsUtil.Tile(6, 1214, 689705.9f, 1986353.0f),
- new TileMetricsUtil.Tile(3, 2214, 782345.1f, 2253154.0f),
- new TileMetricsUtil.Tile(6, 2202, 522701.72f, 1505381.0f),
- new TileMetricsUtil.Tile(6, 1215, 708256.94f, 2039780.0f),
- new TileMetricsUtil.Tile(3, 2215, 805199.6f, 2318975.0f),
- new TileMetricsUtil.Tile(6, 2201, 516852.4f, 1488535.0f),
- new TileMetricsUtil.Tile(6, 1216, 743507.6f, 2141302.0f),
- new TileMetricsUtil.Tile(3, 2212, 688010.75f, 1981471.0f),
- new TileMetricsUtil.Tile(3, 2213, 757998.56f, 2183036.0f),
- new TileMetricsUtil.Tile(8, 1316, 831816.6f, 2395632.0f),
- new TileMetricsUtil.Tile(8, 1315, 814369.75f, 2345385.0f),
- new TileMetricsUtil.Tile(8, 1312, 765417.0f, 2204401.0f),
- new TileMetricsUtil.Tile(8, 1311, 747768.75f, 2153574.0f),
- new TileMetricsUtil.Tile(8, 1314, 796930.2f, 2295159.0f),
- new TileMetricsUtil.Tile(8, 1313, 783144.06f, 2255455.0f),
- new TileMetricsUtil.Tile(8, 1310, 727851.0f, 2096211.0f),
- new TileMetricsUtil.Tile(6, 2216, 759651.0f, 2187795.0f),
- new TileMetricsUtil.Tile(6, 2215, 718353.8f, 2068859.0f),
- new TileMetricsUtil.Tile(6, 2210, 602608.3f, 1735512.0f),
- new TileMetricsUtil.Tile(6, 2212, 580998.94f, 1673277.0f),
- new TileMetricsUtil.Tile(6, 2211, 530460.4f, 1527726.0f),
- new TileMetricsUtil.Tile(6, 2214, 697545.1f, 2008930.0f),
- new TileMetricsUtil.Tile(6, 2213, 676289.56f, 1947714.0f)
- )
- ),
- new Testcase(new File(TEST_DATA_DIRECTORY, "HiSeq2500TileMetricsOut.bin"),
- HashableTile.from(
- new TileMetricsUtil.Tile(2, 1206, 653662.75f, 1891585.0f),
- new TileMetricsUtil.Tile(2, 1205, 639157.0f, 1849608.0f),
- new TileMetricsUtil.Tile(2, 1204, 624875.56f, 1808280.0f),
- new TileMetricsUtil.Tile(2, 1203, 609763.4f, 1764548.0f),
- new TileMetricsUtil.Tile(2, 1202, 597868.4f, 1730126.0f),
- new TileMetricsUtil.Tile(2, 1201, 587080.25f, 1698907.0f),
- new TileMetricsUtil.Tile(2, 1209, 695481.8f, 2012602.0f),
- new TileMetricsUtil.Tile(2, 1207, 667445.2f, 1931469.0f),
- new TileMetricsUtil.Tile(2, 1208, 678062.6f, 1962194.0f),
- new TileMetricsUtil.Tile(1, 1216, 842967.25f, 2439399.0f),
- new TileMetricsUtil.Tile(2, 1211, 731061.7f, 2115564.0f),
- new TileMetricsUtil.Tile(1, 1215, 844388.56f, 2443512.0f),
- new TileMetricsUtil.Tile(2, 1210, 698612.25f, 2021661.0f),
- new TileMetricsUtil.Tile(1, 1214, 827993.3f, 2396067.0f),
- new TileMetricsUtil.Tile(2, 1213, 775162.9f, 2243185.0f),
- new TileMetricsUtil.Tile(1, 1213, 810556.9f, 2345609.0f),
- new TileMetricsUtil.Tile(2, 1212, 755212.44f, 2185452.0f),
- new TileMetricsUtil.Tile(1, 1212, 790185.2f, 2286657.0f),
- new TileMetricsUtil.Tile(2, 1215, 819531.8f, 2371581.0f),
- new TileMetricsUtil.Tile(1, 1211, 765546.5f, 2215357.0f),
- new TileMetricsUtil.Tile(2, 1214, 796655.9f, 2305382.0f),
- new TileMetricsUtil.Tile(1, 1210, 736556.9f, 2131466.0f),
- new TileMetricsUtil.Tile(2, 1216, 832327.0f, 2408608.0f),
- new TileMetricsUtil.Tile(1, 2207, 678786.94f, 1964290.0f),
- new TileMetricsUtil.Tile(1, 2208, 693180.0f, 2005941.0f),
- new TileMetricsUtil.Tile(1, 2209, 705999.4f, 2043038.0f),
- new TileMetricsUtil.Tile(2, 1115, 825756.8f, 2389595.0f),
- new TileMetricsUtil.Tile(2, 1116, 832026.75f, 2407739.0f),
- new TileMetricsUtil.Tile(1, 2202, 606812.25f, 1756008.0f),
- new TileMetricsUtil.Tile(1, 2201, 597893.6f, 1730199.0f),
- new TileMetricsUtil.Tile(1, 2204, 635193.44f, 1838138.0f),
- new TileMetricsUtil.Tile(2, 1111, 741915.9f, 2146974.0f),
- new TileMetricsUtil.Tile(1, 2203, 618945.4f, 1791119.0f),
- new TileMetricsUtil.Tile(2, 1112, 765042.7f, 2213899.0f),
- new TileMetricsUtil.Tile(1, 2206, 665331.75f, 1925353.0f),
- new TileMetricsUtil.Tile(2, 1113, 785456.2f, 2272972.0f),
- new TileMetricsUtil.Tile(1, 2205, 652078.0f, 1886999.0f),
- new TileMetricsUtil.Tile(2, 1114, 806461.25f, 2333757.0f),
- new TileMetricsUtil.Tile(2, 1110, 706431.0f, 2044287.0f),
- new TileMetricsUtil.Tile(1, 1203, 632849.44f, 1831355.0f),
- new TileMetricsUtil.Tile(1, 1202, 621137.6f, 1797463.0f),
- new TileMetricsUtil.Tile(1, 1205, 666326.25f, 1928231.0f),
- new TileMetricsUtil.Tile(1, 1204, 650857.44f, 1883467.0f),
- new TileMetricsUtil.Tile(1, 1201, 609630.4f, 1764163.0f),
- new TileMetricsUtil.Tile(1, 1206, 682072.56f, 1973798.0f),
- new TileMetricsUtil.Tile(1, 1207, 696573.44f, 2015761.0f),
- new TileMetricsUtil.Tile(1, 1208, 710467.2f, 2055967.0f),
- new TileMetricsUtil.Tile(1, 1209, 723957.25f, 2095005.0f),
- new TileMetricsUtil.Tile(1, 2115, 861918.0f, 2494239.0f),
- new TileMetricsUtil.Tile(1, 2116, 888303.5f, 2570594.0f),
- new TileMetricsUtil.Tile(1, 2111, 747951.8f, 2164441.0f),
- new TileMetricsUtil.Tile(1, 2112, 780144.2f, 2257600.0f),
- new TileMetricsUtil.Tile(1, 2113, 807111.94f, 2335640.0f),
- new TileMetricsUtil.Tile(1, 2114, 831982.1f, 2407610.0f),
- new TileMetricsUtil.Tile(1, 2210, 719660.9f, 2082572.0f),
- new TileMetricsUtil.Tile(1, 2211, 750860.75f, 2172859.0f),
- new TileMetricsUtil.Tile(1, 2212, 778388.4f, 2252519.0f),
- new TileMetricsUtil.Tile(1, 2110, 718140.75f, 2078173.0f),
- new TileMetricsUtil.Tile(1, 2213, 800401.44f, 2316221.0f),
- new TileMetricsUtil.Tile(1, 2214, 825330.4f, 2388361.0f),
- new TileMetricsUtil.Tile(1, 2215, 854579.25f, 2473002.0f),
- new TileMetricsUtil.Tile(1, 2216, 873627.06f, 2528123.0f),
- new TileMetricsUtil.Tile(2, 2112, 755250.8f, 2185563.0f),
- new TileMetricsUtil.Tile(2, 2113, 779303.75f, 2255168.0f),
- new TileMetricsUtil.Tile(2, 2114, 806558.0f, 2334037.0f),
- new TileMetricsUtil.Tile(2, 2210, 688795.8f, 1993254.0f),
- new TileMetricsUtil.Tile(2, 2115, 837416.1f, 2423335.0f),
- new TileMetricsUtil.Tile(2, 2116, 857135.0f, 2480398.0f),
- new TileMetricsUtil.Tile(2, 2215, 807698.7f, 2337338.0f),
- new TileMetricsUtil.Tile(2, 2216, 828607.06f, 2397843.0f),
- new TileMetricsUtil.Tile(2, 2211, 713208.2f, 2063899.0f),
- new TileMetricsUtil.Tile(2, 2212, 737697.94f, 2134768.0f),
- new TileMetricsUtil.Tile(2, 2213, 757521.9f, 2192135.0f),
- new TileMetricsUtil.Tile(2, 2110, 704750.5f, 2039424.0f),
- new TileMetricsUtil.Tile(2, 2214, 781637.4f, 2261921.0f),
- new TileMetricsUtil.Tile(2, 2111, 732328.9f, 2119231.0f),
- new TileMetricsUtil.Tile(2, 1108, 691490.9f, 2001053.0f),
- new TileMetricsUtil.Tile(2, 1109, 709288.1f, 2052555.0f),
- new TileMetricsUtil.Tile(2, 2209, 679636.7f, 1966749.0f),
- new TileMetricsUtil.Tile(2, 2208, 666553.6f, 1928889.0f),
- new TileMetricsUtil.Tile(2, 2109, 697222.4f, 2017639.0f),
- new TileMetricsUtil.Tile(2, 2103, 610412.0f, 1766425.0f),
- new TileMetricsUtil.Tile(2, 2104, 624860.7f, 1808237.0f),
- new TileMetricsUtil.Tile(2, 2101, 586815.2f, 1698140.0f),
- new TileMetricsUtil.Tile(2, 2102, 598569.2f, 1732154.0f),
- new TileMetricsUtil.Tile(2, 2107, 667244.44f, 1930888.0f),
- new TileMetricsUtil.Tile(2, 2108, 680053.44f, 1967955.0f),
- new TileMetricsUtil.Tile(2, 2105, 638144.5f, 1846678.0f),
- new TileMetricsUtil.Tile(2, 2106, 652707.25f, 1888820.0f),
- new TileMetricsUtil.Tile(2, 2206, 640678.2f, 1854010.0f),
- new TileMetricsUtil.Tile(2, 1103, 619946.8f, 1794017.0f),
- new TileMetricsUtil.Tile(2, 2207, 653583.6f, 1891356.0f),
- new TileMetricsUtil.Tile(2, 1102, 607545.56f, 1758130.0f),
- new TileMetricsUtil.Tile(2, 2204, 612980.25f, 1773857.0f),
- new TileMetricsUtil.Tile(2, 1101, 594830.2f, 1721334.0f),
- new TileMetricsUtil.Tile(2, 2205, 626773.44f, 1813772.0f),
- new TileMetricsUtil.Tile(2, 2202, 586832.5f, 1698190.0f),
- new TileMetricsUtil.Tile(2, 1107, 678805.6f, 1964344.0f),
- new TileMetricsUtil.Tile(2, 2203, 599638.75f, 1735249.0f),
- new TileMetricsUtil.Tile(2, 1106, 665179.7f, 1924913.0f),
- new TileMetricsUtil.Tile(2, 1105, 650463.2f, 1882326.0f),
- new TileMetricsUtil.Tile(2, 2201, 576017.75f, 1666894.0f),
- new TileMetricsUtil.Tile(2, 1104, 635376.56f, 1838668.0f),
- new TileMetricsUtil.Tile(1, 1108, 707248.25f, 2046652.0f),
- new TileMetricsUtil.Tile(1, 1107, 694722.6f, 2010405.0f),
- new TileMetricsUtil.Tile(1, 1109, 720901.8f, 2086163.0f),
- new TileMetricsUtil.Tile(1, 1101, 604748.94f, 1750037.0f),
- new TileMetricsUtil.Tile(1, 1102, 618156.06f, 1788835.0f),
- new TileMetricsUtil.Tile(1, 1103, 630256.7f, 1823852.0f),
- new TileMetricsUtil.Tile(1, 1104, 644520.2f, 1865128.0f),
- new TileMetricsUtil.Tile(1, 1105, 663272.5f, 1919394.0f),
- new TileMetricsUtil.Tile(1, 1106, 680005.06f, 1967815.0f),
- new TileMetricsUtil.Tile(1, 2102, 600601.44f, 1738035.0f),
- new TileMetricsUtil.Tile(1, 2103, 613472.7f, 1775282.0f),
- new TileMetricsUtil.Tile(1, 2101, 589004.4f, 1704475.0f),
- new TileMetricsUtil.Tile(1, 2106, 662705.8f, 1917754.0f),
- new TileMetricsUtil.Tile(1, 2107, 676511.4f, 1957705.0f),
- new TileMetricsUtil.Tile(1, 2104, 627960.75f, 1817208.0f),
- new TileMetricsUtil.Tile(1, 2105, 646421.8f, 1870631.0f),
- new TileMetricsUtil.Tile(1, 1112, 787963.56f, 2280228.0f),
- new TileMetricsUtil.Tile(1, 1113, 807714.6f, 2337384.0f),
- new TileMetricsUtil.Tile(1, 2109, 704151.3f, 2037690.0f),
- new TileMetricsUtil.Tile(1, 1110, 732698.3f, 2120300.0f),
- new TileMetricsUtil.Tile(1, 2108, 689368.8f, 1994912.0f),
- new TileMetricsUtil.Tile(1, 1111, 761022.75f, 2202266.0f),
- new TileMetricsUtil.Tile(1, 1116, 836718.1f, 2421315.0f),
- new TileMetricsUtil.Tile(1, 1114, 818766.75f, 2369367.0f),
- new TileMetricsUtil.Tile(1, 1115, 829341.0f, 2399967.0f)
- )
- )
- );
-}
diff --git a/src/tests/java/net/sf/picard/illumina/parser/readers/BclReaderTest.java b/src/tests/java/net/sf/picard/illumina/parser/readers/BclReaderTest.java
index 446fc5c..865e3ae 100644
--- a/src/tests/java/net/sf/picard/illumina/parser/readers/BclReaderTest.java
+++ b/src/tests/java/net/sf/picard/illumina/parser/readers/BclReaderTest.java
@@ -52,7 +52,7 @@ public class BclReaderTest {
@Test
public void readValidFile() {
final BclQualityEvaluationStrategy bclQualityEvaluationStrategy = new BclQualityEvaluationStrategy(BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY);
- final BclReader reader = new BclReader(PASSING_BCL_FILE, bclQualityEvaluationStrategy);
+ final BclReader reader = BclReader.make(PASSING_BCL_FILE, bclQualityEvaluationStrategy);
final byte[] quals = qualsAsBytes();
Assert.assertEquals(reader.numClusters, expectedBases.length);
@@ -81,7 +81,7 @@ public class BclReaderTest {
@Test(expectedExceptions = PicardException.class, dataProvider = "failingFiles")
public void failingFileTest(final File failingFile) {
final BclQualityEvaluationStrategy bclQualityEvaluationStrategy = new BclQualityEvaluationStrategy(BclQualityEvaluationStrategy.ILLUMINA_ALLEGED_MINIMUM_QUALITY);
- final BclReader reader = new BclReader(failingFile, bclQualityEvaluationStrategy);
+ final BclReader reader = BclReader.make(failingFile, bclQualityEvaluationStrategy);
Assert.assertEquals(reader.numClusters, expectedBases.length);
while (reader.hasNext()) {
reader.next();
@@ -104,7 +104,7 @@ public class BclReaderTest {
callables.add(new Callable<Void>() {
@Override
public Void call() throws Exception {
- final BclReader reader = new BclReader(even_i ? QUAL_1FAILING_BCL_FILE : QUAL_0FAILING_BCL_FILE, bclQualityEvaluationStrategy);
+ final BclReader reader = BclReader.make(even_i ? QUAL_1FAILING_BCL_FILE : QUAL_0FAILING_BCL_FILE, bclQualityEvaluationStrategy);
Assert.assertEquals(reader.numClusters, expectedBases.length);
while (reader.hasNext()) {
reader.next();
@@ -137,7 +137,7 @@ public class BclReaderTest {
callables.add(new Callable<Void>() {
@Override
public Void call() throws Exception {
- final BclReader reader = new BclReader(even_i ? QUAL_1FAILING_BCL_FILE : QUAL_0FAILING_BCL_FILE, bclQualityEvaluationStrategy);
+ final BclReader reader = BclReader.make(even_i ? QUAL_1FAILING_BCL_FILE : QUAL_0FAILING_BCL_FILE, bclQualityEvaluationStrategy);
Assert.assertEquals(reader.numClusters, expectedBases.length);
while (reader.hasNext()) {
reader.next();
diff --git a/src/tests/java/net/sf/picard/sam/CleanSamTest.java b/src/tests/java/net/sf/picard/sam/CleanSamTest.java
index 6a3d678..a7ef22f 100644
--- a/src/tests/java/net/sf/picard/sam/CleanSamTest.java
+++ b/src/tests/java/net/sf/picard/sam/CleanSamTest.java
@@ -23,10 +23,10 @@
*/
package net.sf.picard.sam;
+import net.sf.picard.sam.testers.CleanSamTester;
import net.sf.samtools.SAMFileReader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMValidationError;
-import net.sf.samtools.util.TestUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -35,11 +35,11 @@ import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
-import java.util.Collection;
public class CleanSamTest {
private static final File TEST_DATA_DIR = new File("testdata/net/sf/picard/sam/CleanSam");
+ private static final String qualityScore = "&/,&-.1/6/&&)&).)/,&0768)&/.,/874,&.4137572)&/&&,&1-&.0/&&*,&&&&&&&&&&18775799,&16:8775-56256/69::;0";
@Test(dataProvider = "testCleanSamDataProvider")
public void testCleanSam(final String samFile, final String expectedCigar) throws IOException {
@@ -75,4 +75,24 @@ public class CleanSamTest {
{"long_trailing_insertion.sam", "90M10I"},
};
}
+
+ //identical test case using the SamFileTester to generate that SAM file on the fly
+ @Test(dataProvider = "testCleanSamTesterDataProvider")
+ public void testCleanSamTester(final String expectedCigar, final int length, final int alignStart) throws IOException {
+ final CleanSamTester cleanSamTester = new CleanSamTester(expectedCigar, length);
+ cleanSamTester.addMappedFragment(0, alignStart, false, expectedCigar, qualityScore, -1);
+ cleanSamTester.runTest();
+ }
+
+ @DataProvider(name = "testCleanSamTesterDataProvider")
+ public Object[][] testCleanSamTesterDataProvider() {
+ return new Object[][]{
+ {"100M", 101, 2},
+ {"99M1S", 101, 3},
+ {"91M2D9M", 102, 1},
+ {"91M2D8M1S", 101, 1},
+ {"99M1I", 101, 3},
+ {"90M10I", 101, 3},
+ };
+ }
}
diff --git a/src/tests/java/net/sf/picard/sam/MarkDuplicatesTest.java b/src/tests/java/net/sf/picard/sam/MarkDuplicatesTest.java
index 8cc0ea2..a5342cd 100644
--- a/src/tests/java/net/sf/picard/sam/MarkDuplicatesTest.java
+++ b/src/tests/java/net/sf/picard/sam/MarkDuplicatesTest.java
@@ -26,6 +26,7 @@ package net.sf.picard.sam;
import net.sf.picard.io.IoUtil;
import net.sf.samtools.*;
import net.sf.samtools.util.TestUtil;
+import net.sf.picard.sam.testers.MarkDuplicatesTester;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -136,4 +137,174 @@ public class MarkDuplicatesTest {
{ true, suppressPgMap}
};
}
+
+
+ @Test
+ public void testSingleUnmappedFragment() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addUnmappedFragment(-1, 50);
+ tester.runTest();
+ }
+
+ @Test
+ public void testSingleUnmappedPair() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addUnmappedPair(-1, 50);
+ tester.runTest();
+ }
+
+
+ @Test
+ public void testSingleMappedFragment() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedFragment(1, 1, false, 50);
+ tester.runTest();
+ }
+
+ @Test
+ public void testSingleMappedPair() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedPair(1, 1, 100, false, false, 50);
+ tester.runTest();
+ }
+
+ @Test
+ public void testSingleMappedFragmentAndSingleMappedPair() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedFragment(1, 1, true, 30); // duplicate!!!
+ tester.addMappedPair(1, 1, 100, false, false, 50);
+ tester.runTest();
+ }
+
+ @Test
+ public void testTwoMappedPairs() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedPair(1, 1, 100, false, false, 50);
+ tester.addMappedPair(1, 1, 100, true, true, 30); // duplicate!!!
+ tester.runTest();
+ }
+
+ @Test
+ public void testThreeMappedPairs() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedPair(1, 1, 100, false, false, 50);
+ tester.addMappedPair(1, 1, 100, true, true, 30); // duplicate!!!
+ tester.addMappedPair(1, 1, 100, true, true, 30); // duplicate!!!
+ tester.runTest();
+ }
+
+ @Test
+ public void testSingleMappedFragmentAndTwoMappedPairs() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedFragment(1, 1, true, 30); // duplicate!!!
+ tester.addMappedPair(1, 1, 100, false, false, 50);
+ tester.addMappedPair(1, 1, 100, true, true, 30); // duplicate!!!
+ tester.runTest();
+ }
+
+ @Test
+ public void testTwoMappedPairsMatesSoftClipped() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedPair(1, 10022, 10051, false, false, "76M", "8S68M", false, true, false, 50);
+ tester.addMappedPair(1, 10022, 10063, false, false, "76M", "5S71M", false, true, false, 50);
+ tester.runTest();
+ }
+
+ @Test
+ public void testTwoMappedPairsWithSoftClipping() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ // NB: no duplicates
+ // 5'1: 2, 5'2:46+73M=118
+ // 5'1: 2, 5'2:51+68M=118
+ tester.addMappedPair(1, 2, 46, false, false, "6S42M28S", "3S73M", false, 50);
+ tester.addMappedPair(1, 2, 51, true, true, "6S42M28S", "8S68M", false, 50);
+ tester.runTest();
+ }
+
+ @Test
+ public void testTwoMappedPairsWithSoftClippingFirstOfPairOnlyNoMateCigar() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.setNoMateCigars(true);
+ // NB: no duplicates
+ // 5'1: 2, 5'2:46+73M=118
+ // 5'1: 2, 5'2:51+68M=118
+ tester.addMappedPair(1, 12, 46, false, false, "6S42M28S", null, true, 50); // only add the first one
+ tester.addMappedPair(1, 12, 51, false, false, "6S42M28S", null, true, 50); // only add the first one
+ tester.runTest();
+ }
+
+ @Test
+ public void testTwoMappedPairsWithSoftClippingBoth() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedPair(1, 10046, 10002, false, false, "3S73M", "6S42M28S", true, false, false, 50);
+ tester.addMappedPair(1, 10051, 10002, true, true, "8S68M", "6S48M22S", true, false, false, 50);
+ tester.runTest();
+ }
+
+ @Test
+ public void testMatePairFirstUnmapped() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMatePair(1, 10049, 10049, false, true, false, false, "11M2I63M", null, false, false, false, 50);
+ tester.runTest();
+ }
+
+ @Test
+ public void testMatePairSecondUnmapped() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMatePair(1, 10056, 10056, true, false, false, false, null, "54M22S", false, false, false, 50);
+ tester.runTest();
+ }
+
+ @Test
+ public void testMappedFragmentAndMatePairOneUnmapped() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMatePair(1, 10049, 10049, false, true, false, false, "11M2I63M", null, false, false, false, 50);
+ tester.addMappedFragment(1, 10049, true, 30); // duplicate
+ tester.runTest();
+ }
+
+ @Test
+ public void testMappedPairAndMatePairOneUnmapped() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMatePair(1, 10040, 10040, false, true, true, false, "76M", null, false, false, false, 30); // first a duplicate,
+ // second end unmapped
+ tester.addMappedPair(1, 10189, 10040, false, false, "41S35M", "65M11S", true, false, false, 50); // mapped OK
+ tester.runTest();
+ }
+
+ @Test
+ public void testTwoMappedPairsWithOppositeOrientations() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedPair(1, 10182, 10038, false, false, "32S44M", "66M10S", true, false, false, 50); // -/+
+ tester.addMappedPair(1, 10038, 10182, true, true, "70M6S", "32S44M", false, true, false, 50); // +/-, both are duplicates
+ tester.runTest();
+ }
+
+ @Test
+ public void testTwoMappedPairsWithOppositeOrientationsNumberTwo() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedPair(1, 10038, 10182, false, false, "70M6S", "32S44M", false, true, false, 50); // +/-, both are duplicates
+ tester.addMappedPair(1, 10182, 10038, true, true, "32S44M", "66M10S", true, false, false, 50); // -/+
+ tester.runTest();
+ }
+
+ @Test
+ public void testThreeMappedPairsWithMatchingSecondMate() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ // Read0 and Read2 are duplicates
+ // 10181+35=10216, 10058
+ tester.addMappedPair(1, 10181, 10058, false, false, "41S35M", "47M29S", true, false, false, 50); // -/+
+ // 10181+37=10218, 10058
+ tester.addMappedPair(1, 10181, 10058, false, false, "37S39M", "44M32S", true, false, false, 50); // -/+
+ // 10180+36=10216, 10058
+ tester.addMappedPair(1, 10180, 10058, true, true, "36S40M", "50M26S", true, false, false, 50); // -/+, both are duplicates
+ tester.runTest();
+ }
+
+ @Test
+ public void testMappedPairWithSamePosition() {
+ final MarkDuplicatesTester tester = new MarkDuplicatesTester();
+ tester.addMappedPair(1, 4914, 4914, false, false, "37M39S", "73M3S", false, false, false, 50); // +/+
+ tester.runTest();
+ }
}
diff --git a/src/tests/java/net/sf/picard/sam/MergeBamAlignmentTest.java b/src/tests/java/net/sf/picard/sam/MergeBamAlignmentTest.java
index b3b558e..f9022ca 100644
--- a/src/tests/java/net/sf/picard/sam/MergeBamAlignmentTest.java
+++ b/src/tests/java/net/sf/picard/sam/MergeBamAlignmentTest.java
@@ -24,7 +24,20 @@
package net.sf.picard.sam;
import net.sf.picard.PicardException;
-import net.sf.samtools.*;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMFileWriter;
+import net.sf.samtools.SAMFileWriterFactory;
+import net.sf.samtools.SAMProgramRecord;
+import net.sf.samtools.SAMReadGroupRecord;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMRecordIterator;
+import net.sf.samtools.SAMSequenceRecord;
+import net.sf.samtools.SAMTag;
+import net.sf.samtools.SamPairUtil;
import net.sf.samtools.util.CloserUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
@@ -32,7 +45,12 @@ import org.testng.annotations.Test;
import java.io.File;
import java.io.IOException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
/**
* Test for the MergeBamAlignment class
@@ -321,7 +339,7 @@ public class MergeBamAlignmentTest {
final File target = File.createTempFile("target", "bam");
target.deleteOnExit();
final SamAlignmentMerger merger = new SamAlignmentMerger(unmapped, target, fasta, null, true, false,
- false, Arrays.asList(aligned), 1, null, null, null, null, null,
+ false, Arrays.asList(aligned), 1, null, null, null, null, null, null,
Arrays.asList(SamPairUtil.PairOrientation.FR), SAMFileHeader.SortOrder.coordinate,
new BestMapqPrimaryAlignmentSelectionStrategy());
diff --git a/src/tests/java/net/sf/picard/sam/MergingSamRecordIteratorGroupCollisionTest.java b/src/tests/java/net/sf/picard/sam/MergingSamRecordIteratorGroupCollisionTest.java
index f999cd4..daf5f6e 100644
--- a/src/tests/java/net/sf/picard/sam/MergingSamRecordIteratorGroupCollisionTest.java
+++ b/src/tests/java/net/sf/picard/sam/MergingSamRecordIteratorGroupCollisionTest.java
@@ -25,17 +25,9 @@ package net.sf.picard.sam;
import java.util.ArrayList;
import java.util.Collection;
-import java.util.Iterator;
import java.util.List;
-import net.sf.samtools.AbstractSAMHeaderRecord;
-import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMFileReader;
-import net.sf.samtools.SAMProgramRecord;
-import net.sf.samtools.SAMReadGroupRecord;
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMRecordSetBuilder;
-import net.sf.samtools.SAMTag;
+import net.sf.samtools.*;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
diff --git a/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java b/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java
index cb3ee99..70a7837 100644
--- a/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java
+++ b/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java
@@ -71,9 +71,7 @@ public class ValidateSamFileTest {
final StringWriter results = new StringWriter();
final SamFileValidator validator = new SamFileValidator(new PrintWriter(results), 8000);
validator.setVerbose(true, 10);
- validator.validateSamFileVerbose(
- samBuilder.getSamReader(),
- null);
+ validator.validateSamFileVerbose(samBuilder.getSamReader(), null);
final int lineCount = results.toString().split("\n").length;
Assert.assertEquals(lineCount, 11);
@@ -301,11 +299,13 @@ public class ValidateSamFileTest {
Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_VERSION_NUMBER.getHistogramString()).getValue(), 1.0);
}
- @Test
+ @Test(enabled=false, description="File is actually valid for Standard quality scores so this test fails with an NPE.")
public void testQualityFormatValidation() throws Exception {
final SAMFileReader samReader = new SAMFileReader(new File("./testdata/net/sf/picard/util/QualityEncodingDetectorTest/illumina-as-standard.bam"));
final Histogram<String> results = executeValidation(samReader, null);
- Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_QUALITY_FORMAT.getHistogramString()).getValue(), 1.0);
+ final Histogram<String>.Bin bin = results.get(SAMValidationError.Type.INVALID_QUALITY_FORMAT.getHistogramString());
+ final double value = bin.getValue();
+ Assert.assertEquals(value, 1.0);
}
@Test
diff --git a/src/tests/java/net/sf/picard/sam/testers/CleanSamTester.java b/src/tests/java/net/sf/picard/sam/testers/CleanSamTester.java
new file mode 100644
index 0000000..41aca58
--- /dev/null
+++ b/src/tests/java/net/sf/picard/sam/testers/CleanSamTester.java
@@ -0,0 +1,52 @@
+package net.sf.picard.sam.testers;
+
+import net.sf.picard.cmdline.CommandLineProgram;
+import net.sf.picard.sam.CleanSam;
+import net.sf.picard.sam.SamFileValidator;
+import net.sf.picard.sam.testers.SamFileTester;
+import net.sf.samtools.*;
+import net.sf.samtools.util.TestUtil;
+import org.testng.Assert;
+
+import java.io.PrintWriter;
+import java.util.Arrays;
+
+/**
+ * This class is the extension of the SamFileTester to test CleanSam with SAM files generated on the fly.
+ */
+public class CleanSamTester extends SamFileTester {
+ private final String expectedCigar;
+ private final CleanSam program = new CleanSam();
+
+ public CleanSamTester(final String expectedCigar, final int length) {
+ super(length, true);
+ this.expectedCigar = expectedCigar;
+ }
+
+
+ protected void test() {
+ try {
+ final SamFileValidator validator = new SamFileValidator(new PrintWriter(System.out), 8000);
+ validator.setIgnoreWarnings(true);
+ validator.setVerbose(true, 1000);
+ validator.setErrorsToIgnore(Arrays.asList(SAMValidationError.Type.MISSING_READ_GROUP));
+ SAMFileReader samReader = new SAMFileReader(getOutput());
+ samReader.setValidationStringency(SAMFileReader.ValidationStringency.LENIENT);
+ final SAMRecord rec = samReader.iterator().next();
+ samReader.close();
+ Assert.assertEquals(rec.getCigarString(), expectedCigar);
+ samReader = new SAMFileReader(getOutput());
+ final boolean validated = validator.validateSamFileVerbose(samReader, null);
+ samReader.close();
+ Assert.assertTrue(validated, "ValidateSamFile failed");
+ } finally {
+ TestUtil.recursiveDelete(getOutputDir());
+ }
+ }
+
+ @Override
+ protected CommandLineProgram getProgram() {
+ return program;
+ }
+
+}
diff --git a/src/tests/java/net/sf/picard/sam/testers/MarkDuplicatesTester.java b/src/tests/java/net/sf/picard/sam/testers/MarkDuplicatesTester.java
new file mode 100644
index 0000000..f8ad0a9
--- /dev/null
+++ b/src/tests/java/net/sf/picard/sam/testers/MarkDuplicatesTester.java
@@ -0,0 +1,55 @@
+package net.sf.picard.sam.testers;
+
+
+import net.sf.picard.cmdline.CommandLineProgram;
+import net.sf.picard.sam.MarkDuplicates;
+import net.sf.picard.sam.testers.SamFileTester;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.util.TestUtil;
+import org.testng.Assert;
+
+import java.io.File;
+
+/**
+ * This class is an extension of SamFileTester used to test MarkDuplicates with SAM files generated on the fly.
+ */
+public class MarkDuplicatesTester extends SamFileTester {
+
+ private final MarkDuplicates program = new MarkDuplicates();
+
+ public MarkDuplicatesTester() {
+ super(50, true);
+
+ final File metrics = new File(getOutputDir(), "metrics.txt");
+ addArg("METRICS_FILE=" + metrics);
+ }
+
+ @Override
+ public void test() {
+ try {
+ // Read the output and check the duplicate flag
+ final SAMFileReader reader = new SAMFileReader(getOutput());
+ for (final SAMRecord record : reader) {
+ final String key = samRecordToDuplicatesFlagsKey(record);
+ Assert.assertTrue(this.duplicateFlags.containsKey(key));
+ final boolean value = this.duplicateFlags.get(key);
+ this.duplicateFlags.remove(key);
+ if (value != record.getDuplicateReadFlag()) {
+ System.err.println("Mismatching read:");
+ System.err.print(record.getSAMString());
+ }
+ Assert.assertEquals(record.getDuplicateReadFlag(), value);
+ }
+ reader.close();
+ } finally {
+ TestUtil.recursiveDelete(getOutputDir());
+ }
+ }
+
+ @Override
+ protected CommandLineProgram getProgram() {
+ return program;
+ }
+}
+
diff --git a/src/tests/java/net/sf/picard/sam/testers/SamFileTester.java b/src/tests/java/net/sf/picard/sam/testers/SamFileTester.java
new file mode 100644
index 0000000..422efe7
--- /dev/null
+++ b/src/tests/java/net/sf/picard/sam/testers/SamFileTester.java
@@ -0,0 +1,213 @@
+package net.sf.picard.sam.testers;
+
+import net.sf.picard.cmdline.CommandLineProgram;
+import net.sf.picard.io.IoUtil;
+import net.sf.samtools.SAMRecordSetBuilder;
+import net.sf.samtools.*;
+import org.testng.Assert;
+
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Abstract class for doing basic on the fly SAM file testing.
+ */
+public abstract class SamFileTester {
+
+ public static final String TEST_DATA_BASE_DIR = "testdata/net/sf/picard/sam/";
+ private final SAMRecordSetBuilder samRecordSetBuilder = new SAMRecordSetBuilder();
+ protected final Map<String, Boolean> duplicateFlags = new HashMap<String, Boolean>();
+ private File outputDir;
+ private File output;
+ private int readNameCounter = 0;
+ private boolean noMateCigars = false;
+ private boolean deleteOnExit = true;
+ private final ArrayList<String> args = new ArrayList<String>();
+
+ public SamFileTester(final int length, final boolean deleteOnExit) {
+ this.deleteOnExit = deleteOnExit;
+ samRecordSetBuilder.setReadLength(length);
+ }
+
+
+ public File getOutput() {
+ return output;
+ }
+
+ public void addArg(final String arg) {
+ args.add(arg);
+ }
+
+ public File getOutputDir() {
+ return outputDir;
+ }
+
+ public void setNoMateCigars(final boolean value) {
+ this.noMateCigars = value;
+ }
+
+ protected String samRecordToDuplicatesFlagsKey(final SAMRecord record) {
+ String readName = record.getReadName()
+ + "-"
+ + record.getReadPairedFlag()
+ + "-";
+ if (record.getReadPairedFlag()) {
+ readName += record.getFirstOfPairFlag()
+ + "-"
+ + record.getSecondOfPairFlag();
+ } else {
+ readName += "false-false";
+ }
+ return readName;
+ }
+ // Below are a bunch of utility methods for adding records to the SAMRecordSetBuilder
+ public void addUnmappedFragment(final int referenceSequenceIndex,
+ final int defaultQualityScore) {
+ addFragment(referenceSequenceIndex, -1, true, false, null, null, defaultQualityScore);
+ }
+
+ public void addUnmappedFragment(final int referenceSequenceIndex,
+ final String qualityString){
+ addFragment(referenceSequenceIndex, -1, true, false, null, qualityString, -1);
+ }
+
+ public void addUnmappedPair(final int referenceSequenceIndex,
+ final int defaultQualityScore) {
+ addMatePair(referenceSequenceIndex, -1, -1, true, true, false, false, null, null, false, false, false, defaultQualityScore);
+ }
+
+ public void addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate,
+ final int defaultQualityScore) {
+ addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, null, null, defaultQualityScore);
+ }
+
+ public void addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final String cigar,
+ final int defaultQualityScore) {
+ addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, cigar, null, defaultQualityScore);
+ }
+
+ public void addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final String cigar,
+ final String qualityString,
+ final int defaultQualityScore) {
+ addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, cigar, qualityString, defaultQualityScore);
+ }
+
+ public void addMappedPair(final int referenceSequenceIndex,
+ final int alignmentStart1,
+ final int alignmentStart2,
+ final boolean isDuplicate1,
+ final boolean isDuplicate2,
+ final int defaultQualityScore) {
+ addMappedPair(referenceSequenceIndex, alignmentStart1, alignmentStart2, isDuplicate1, isDuplicate2, null, null,
+ false, defaultQualityScore);
+ }
+
+ public void addMappedPair(final int referenceSequenceIndex,
+ final int alignmentStart1,
+ final int alignmentStart2,
+ final boolean isDuplicate1,
+ final boolean isDuplicate2,
+ final String cigar1,
+ final String cigar2,
+ final boolean firstOnly,
+ final int defaultQualityScore) {
+ addMappedPair(referenceSequenceIndex, alignmentStart1, alignmentStart2, isDuplicate1, isDuplicate2, cigar1,
+ cigar2, false, true, firstOnly, defaultQualityScore);
+ }
+
+ public void addMappedPair(final int referenceSequenceIndex,
+ final int alignmentStart1,
+ final int alignmentStart2,
+ final boolean isDuplicate1,
+ final boolean isDuplicate2,
+ final String cigar1,
+ final String cigar2,
+ final boolean strand1,
+ final boolean strand2,
+ final boolean firstOnly,
+ final int defaultQualityScore) {
+ addMatePair(referenceSequenceIndex, alignmentStart1, alignmentStart2, false, false, isDuplicate1, isDuplicate2, cigar1, cigar2,
+ strand1, strand2, firstOnly, defaultQualityScore);
+ }
+
+ private void addFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean recordUnmapped, final boolean isDuplicate, final String cigar,
+ final String qualityString, final int defaultQualityScore) {
+ final SAMRecord record = samRecordSetBuilder.addFrag("READ" + readNameCounter++, referenceSequenceIndex, alignmentStart, false,
+ recordUnmapped, cigar, qualityString, defaultQualityScore);
+
+ this.duplicateFlags.put(samRecordToDuplicatesFlagsKey(record), isDuplicate);
+ }
+
+ public void addMatePair(final int referenceSequenceIndex,
+ final int alignmentStart1,
+ final int alignmentStart2,
+ final boolean record1Unmapped,
+ final boolean record2Unmapped,
+ final boolean isDuplicate1,
+ final boolean isDuplicate2,
+ final String cigar1,
+ final String cigar2,
+ final boolean strand1,
+ final boolean strand2,
+ final boolean firstOnly,
+ final int defaultQuality) {
+ final List<SAMRecord> samRecordList = samRecordSetBuilder.addPair("READ" + readNameCounter++, referenceSequenceIndex, alignmentStart1, alignmentStart2,
+ record1Unmapped, record2Unmapped, cigar1, cigar2, strand1, strand2, defaultQuality);
+
+ final SAMRecord record1 = samRecordList.get(0);
+ final SAMRecord record2 = samRecordList.get(1);
+
+ if (this.noMateCigars) {
+ record1.setAttribute("MC", null);
+ record2.setAttribute("MC", null);
+ }
+
+ if (firstOnly) {
+ samRecordSetBuilder.getRecords().remove(record2);
+ }
+
+ this.duplicateFlags.put(samRecordToDuplicatesFlagsKey(record1), isDuplicate1);
+ this.duplicateFlags.put(samRecordToDuplicatesFlagsKey(record2), isDuplicate2);
+ }
+
+ protected abstract void test();
+
+ protected abstract CommandLineProgram getProgram();
+
+ /**
+ * Sets up the basic command line arguments for input and output and runs instanceMain.
+ */
+ public void runTest() {
+ if (getProgram() != null) {
+ outputDir = IoUtil.createTempDir(this.getClass().getSimpleName() + ".", ".tmp");
+ if(deleteOnExit){
+ outputDir.deleteOnExit();
+ }
+ final File input = createInputFile();
+
+ output = new File(outputDir, "output.sam");
+ args.add("INPUT=" + input.getAbsoluteFile());
+ args.add("OUTPUT=" + output.getAbsoluteFile());
+ Assert.assertEquals(getProgram().instanceMain(args.toArray(new String[args.size()])), 0);
+ }
+ test();
+ }
+
+ private File createInputFile() {
+ // Create the input file
+ final File input = new File(outputDir, "input.sam");
+ final SAMFileWriter writer = new SAMFileWriterFactory().makeSAMOrBAMWriter(samRecordSetBuilder.getHeader(), true, input);
+ for (final SAMRecord record : samRecordSetBuilder.getRecords()) {
+ writer.addAlignment(record);
+ }
+ writer.close();
+ return input;
+ }
+
+ public SAMFileReader getInput(){
+ return samRecordSetBuilder.getSamReader();
+ }
+}
\ No newline at end of file
diff --git a/src/tests/java/net/sf/picard/util/QualityEncodingDetectorTest.java b/src/tests/java/net/sf/picard/util/QualityEncodingDetectorTest.java
index 996d4af..2092693 100644
--- a/src/tests/java/net/sf/picard/util/QualityEncodingDetectorTest.java
+++ b/src/tests/java/net/sf/picard/util/QualityEncodingDetectorTest.java
@@ -1,7 +1,9 @@
package net.sf.picard.util;
+import net.sf.picard.PicardException;
import net.sf.picard.fastq.FastqReader;
import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecordSetBuilder;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -33,7 +35,7 @@ public class QualityEncodingDetectorTest {
new Testcase(new File("./testdata/net/sf/samtools/BAMFileIndexTest/index_test.bam"), FastqQualityFormat.Standard),
new Testcase(new File("./testdata/net/sf/picard/util/QualityEncodingDetectorTest/solexa-as-standard.bam"), FastqQualityFormat.Solexa),
new Testcase(new File("./testdata/net/sf/picard/util/QualityEncodingDetectorTest/illumina-as-standard.bam"), FastqQualityFormat.Illumina)
-
+
);
Object[][] renderObjectArrayArray(final List<Testcase> testcaseList) {
@@ -68,4 +70,40 @@ public class QualityEncodingDetectorTest {
Assert.assertEquals(QualityEncodingDetector.detect(reader), expectedQualityFormat);
reader.close();
}
+
+ @Test
+ public void testSmallBamForDetectorFailure() {
+ final SAMRecordSetBuilder samRecordSetBuilder = createSmallUnmappedSam();
+ Assert.assertNotSame(QualityEncodingDetector.detect(samRecordSetBuilder.getSamReader(),
+ null), FastqQualityFormat.Standard);
+ }
+
+ @Test
+ public void testSmallBamWithExpectedQuality() {
+ final SAMRecordSetBuilder samRecordSetBuilder = createSmallUnmappedSam();
+ Assert.assertEquals(QualityEncodingDetector.detect(samRecordSetBuilder.getSamReader(),
+ FastqQualityFormat.Standard), FastqQualityFormat.Standard);
+ }
+
+ @Test (expectedExceptions = PicardException.class)
+ public void testQualitySanity() {
+ final SAMRecordSetBuilder samRecordSetBuilder = createSmallUnmappedSam();
+ QualityEncodingDetector.detect(samRecordSetBuilder.getSamReader(),
+ FastqQualityFormat.Illumina);
+ }
+
+ private SAMRecordSetBuilder createSmallUnmappedSam() {
+ final SAMRecordSetBuilder samRecordSetBuilder = new SAMRecordSetBuilder();
+ samRecordSetBuilder.setReadLength(25);
+ samRecordSetBuilder.addFrag("READ0", -1, -1, false, true, null, "@@@FFFFFHHHHHJIJIIJIIJJJJ", -1);
+ samRecordSetBuilder.addFrag("READ1", -1, -1, false, true, null, "@@@FFFFFHHHHHJIJIIJIIJJJJ", -1);
+ samRecordSetBuilder.addFrag("READ2", -1, -1, false, true, null, "@CCFDFEDHHHFFHIIII at GH<FFH", -1);
+ samRecordSetBuilder.addFrag("READ3", -1, -1, false, true, null, "@@?DFFDFHFFHDHIIHIIEIIJGG", -1);
+ samRecordSetBuilder.addFrag("READ4", -1, -1, false, true, null, "@CCFFDDFHHHHHIIJJHFJJJJJH", -1);
+ samRecordSetBuilder.addFrag("READ5", -1, -1, false, true, null, "BCCFFFFFHHHHHJJJJJIJJJJJJ", -1);
+ samRecordSetBuilder.addFrag("READ6", -1, -1, false, true, null, "@@CDFFFFHHHFHHIJJJJJJJIJJ", -1);
+ samRecordSetBuilder.addFrag("READ7", -1, -1, false, true, null, "CCCFFFFFHHHHHJJJJIJJJJHII", -1);
+ samRecordSetBuilder.addFrag("READ8", -1, -1, false, true, null, "CCCFFFFFHHHHHJJJJJJJJJJJJ", -1);
+ return samRecordSetBuilder;
+ }
}
diff --git a/src/tests/java/net/sf/samtools/BAMFileIndexTest.java b/src/tests/java/net/sf/samtools/BAMFileIndexTest.java
index 5b3086b..e642a3a 100755
--- a/src/tests/java/net/sf/samtools/BAMFileIndexTest.java
+++ b/src/tests/java/net/sf/samtools/BAMFileIndexTest.java
@@ -25,12 +25,14 @@ package net.sf.samtools;
import net.sf.samtools.util.CloseableIterator;
import net.sf.samtools.util.StopWatch;
+import net.sf.samtools.util.StringUtil;
import org.testng.Assert;
import static org.testng.Assert.*;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
+import java.io.ByteArrayInputStream;
import java.io.File;
import java.util.*;
@@ -258,6 +260,38 @@ public class BAMFileIndexTest
return new Object[][]{{true}, {false}};
}
+ @Test
+ public void testUnmappedMateWithCoordinate() throws Exception {
+ // TODO: Use SAMRecordSetBuilder when it is able to create a pair with one end unmapped
+ final String samText = "@HD\tVN:1.0\tSO:coordinate\n" +
+ "@SQ\tSN:chr1\tLN:101\n" +
+ "@SQ\tSN:chr2\tLN:101\n" +
+ "@SQ\tSN:chr3\tLN:101\n" +
+ "@SQ\tSN:chr4\tLN:101\n" +
+ "@SQ\tSN:chr5\tLN:101\n" +
+ "@SQ\tSN:chr6\tLN:101\n" +
+ "@SQ\tSN:chr7\tLN:404\n" +
+ "@SQ\tSN:chr8\tLN:202\n" +
+ "@RG\tID:0\tSM:Hi,Mom!\n" +
+ "@PG\tID:1\tPN:Hey!\tVN:2.0\n" +
+ "one_end_mapped\t73\tchr7\t100\t255\t101M\t*\t0\t0\tCAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN\t)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&\tRG:Z:0\n" +
+ "one_end_mapped\t133\tchr7\t100\t0\t*\t=\t100\t0\tNCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA\t&/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1\tRG:Z:0\n";
+ final ByteArrayInputStream bis = new ByteArrayInputStream(StringUtil.stringToBytes(samText));
+ final File bamFile = File.createTempFile("BAMFileIndexTest.", ".bam");
+ bamFile.deleteOnExit();
+ final SAMFileReader textReader = new SAMFileReader(bis);
+ SAMFileWriterFactory samFileWriterFactory = new SAMFileWriterFactory();
+ samFileWriterFactory.setCreateIndex(true);
+ final SAMFileWriter writer = samFileWriterFactory.makeBAMWriter(textReader.getFileHeader(), true, bamFile);
+ for (final SAMRecord rec : textReader) {
+ writer.addAlignment(rec);
+ }
+ writer.close();
+ final SAMFileReader bamReader = new SAMFileReader(bamFile);
+ Assert.assertEquals(countElements(bamReader.queryContained("chr7", 100, 100)), 1);
+ Assert.assertEquals(countElements(bamReader.queryOverlapping("chr7", 100, 100)), 2);
+ }
+
private <E> void consumeAll(final Collection<E> collection, final CloseableIterator<E> iterator) {
while (iterator.hasNext()) {
collection.add(iterator.next());
@@ -279,6 +313,7 @@ public class BAMFileIndexTest
int num;
for (num = 0; it.hasNext(); ++num, it.next()) {
}
+ it.close();
return num;
}
diff --git a/src/tests/java/net/sf/samtools/BAMFileWriterTest.java b/src/tests/java/net/sf/samtools/BAMFileWriterTest.java
index fab3552..c74b48c 100644
--- a/src/tests/java/net/sf/samtools/BAMFileWriterTest.java
+++ b/src/tests/java/net/sf/samtools/BAMFileWriterTest.java
@@ -30,8 +30,6 @@ import org.testng.annotations.Test;
import java.io.File;
-import net.sf.samtools.*;
-
/**
* Test that BAM writing doesn't blow up. For presorted writing, the resulting BAM file is read and contents are
* compared with the original SAM file.
diff --git a/src/tests/java/net/sf/picard/sam/SamPairUtilTest.java b/src/tests/java/net/sf/samtools/SamPairUtilTest.java
similarity index 99%
rename from src/tests/java/net/sf/picard/sam/SamPairUtilTest.java
rename to src/tests/java/net/sf/samtools/SamPairUtilTest.java
index 2ba0da0..f7792e3 100644
--- a/src/tests/java/net/sf/picard/sam/SamPairUtilTest.java
+++ b/src/tests/java/net/sf/samtools/SamPairUtilTest.java
@@ -26,12 +26,11 @@ package net.sf.picard.sam;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord;
+import net.sf.samtools.SamPairUtil;
import org.testng.Assert;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
-import java.util.ArrayList;
-
public class SamPairUtilTest {
diff --git a/src/tests/java/org/broad/tribble/FeatureReaderTest.java b/src/tests/java/org/broad/tribble/FeatureReaderTest.java
index e4530d3..44b5964 100644
--- a/src/tests/java/org/broad/tribble/FeatureReaderTest.java
+++ b/src/tests/java/org/broad/tribble/FeatureReaderTest.java
@@ -7,7 +7,7 @@ import org.broad.tribble.example.ExampleBinaryCodec;
import org.broad.tribble.index.Block;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
-import org.broad.tribble.readers.LocationAware;
+import net.sf.samtools.util.LocationAware;
import org.broad.tribble.util.ParsingUtils;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
diff --git a/src/tests/java/org/broad/tribble/index/linear/LinearIndexTest.java b/src/tests/java/org/broad/tribble/index/linear/LinearIndexTest.java
index a130b05..38bdfc0 100644
--- a/src/tests/java/org/broad/tribble/index/linear/LinearIndexTest.java
+++ b/src/tests/java/org/broad/tribble/index/linear/LinearIndexTest.java
@@ -60,25 +60,25 @@ public class LinearIndexTest {
// chr2 (0, 100]
// chr2 (100, 200]
private static LinearIndex createTestIndex() {
- LinearIndex.ChrIndex chr1 = new LinearIndex.ChrIndex("chr1", 10);
+ final LinearIndex.ChrIndex chr1 = new LinearIndex.ChrIndex("chr1", 10);
chr1.addBlock(CHR1_B1);
chr1.addBlock(CHR1_B2);
chr1.addBlock(CHR1_B3);
chr1.updateLongestFeature(1);
- LinearIndex.ChrIndex chr2 = new LinearIndex.ChrIndex("chr2", 100);
+ final LinearIndex.ChrIndex chr2 = new LinearIndex.ChrIndex("chr2", 100);
chr2.addBlock(CHR2_B1);
chr2.addBlock(CHR2_B2);
chr2.updateLongestFeature(50);
- List<LinearIndex.ChrIndex> indices = Arrays.asList(chr1, chr2);
+ final List<LinearIndex.ChrIndex> indices = Arrays.asList(chr1, chr2);
return new LinearIndex(indices, RANDOM_FILE);
}
@Test()
public void testBasicFeatures() {
Assert.assertEquals(idx.getChrIndexClass(), LinearIndex.ChrIndex.class);
- Assert.assertEquals(idx.getType(), IndexFactory.IndexType.LINEAR.getHeaderValue());
+ Assert.assertEquals(idx.getType(), LinearIndex.INDEX_TYPE);
Assert.assertFalse(idx.hasFileSize());
Assert.assertFalse(idx.hasTimestamp());
Assert.assertFalse(idx.hasMD5());
@@ -103,7 +103,7 @@ public class LinearIndexTest {
@Test()
public void testEquals() {
- LinearIndex idx2 = createTestIndex();
+ final LinearIndex idx2 = createTestIndex();
Assert.assertEquals(idx, idx, "Identical indices are equal");
Assert.assertTrue(idx.equalsIgnoreProperties(idx), "Identical indices are equalIgnoreTimeStamp");
@@ -147,9 +147,9 @@ public class LinearIndexTest {
testQuery("chr2", 251, 251); // just escaping the 50 bp longest event
}
- private final void testQuery(String chr, int start, int stop, Block... expectedBlocksArray) {
- List<Block> qBlocks = idx.getBlocks(chr, start, stop);
- List<Block> eBlocks = Arrays.asList(expectedBlocksArray);
+ private final void testQuery(final String chr, final int start, final int stop, final Block... expectedBlocksArray) {
+ final List<Block> qBlocks = idx.getBlocks(chr, start, stop);
+ final List<Block> eBlocks = Arrays.asList(expectedBlocksArray);
Assert.assertEquals(qBlocks.size(), eBlocks.size(),
String.format("Query %s:%d-%d returned %d blocks but we only expected %d.", chr, start, stop, qBlocks.size(), eBlocks.size()));
@@ -161,19 +161,19 @@ public class LinearIndexTest {
@Test
public void oneEntryFirstChr() {
- BEDCodec code = new BEDCodec();
- Index index = IndexFactory.createLinearIndex(fakeBed, code);
- AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(fakeBed.getAbsolutePath(), code, index);
+ final BEDCodec code = new BEDCodec();
+ final Index index = IndexFactory.createLinearIndex(fakeBed, code);
+ final AbstractFeatureReader reader = AbstractFeatureReader.getFeatureReader(fakeBed.getAbsolutePath(), code, index);
try {
- CloseableTribbleIterator it = reader.iterator();
+ final CloseableTribbleIterator it = reader.iterator();
int count = 0;
while (it.hasNext()) {
it.next();
count++;
}
Assert.assertEquals(51, count);
- } catch (IOException e) {
+ } catch (final IOException e) {
Assert.fail("Unable to get iterator due to " + e.getMessage());
}
}
@@ -194,26 +194,26 @@ public class LinearIndexTest {
public void testOverlappingFeatures() throws Exception {
//chr2:179,222,066-179,262,059<- CONTAINS TTN
- Set<String> names = new HashSet<String>(Arrays.asList("Hs.134602", "Hs.620337", "Hs.609465", "Hs.623987",
+ final Set<String> names = new HashSet<String>(Arrays.asList("Hs.134602", "Hs.620337", "Hs.609465", "Hs.623987",
"Hs.594545", "LONG_FEATURE"));
- String bedFile = TestUtils.DATA_DIR + "bed/Unigene.sample.bed";
- String chr = "chr2";
- int start = 179266309;
- int end = 179303488;
- int expectedCount = 6;
+ final String bedFile = TestUtils.DATA_DIR + "bed/Unigene.sample.bed";
+ final String chr = "chr2";
+ final int start = 179266309;
+ final int end = 179303488;
+ final int expectedCount = 6;
// Linear binned index
LinearIndex.enableAdaptiveIndexing = false;
- int binSize = 1000;
+ final int binSize = 1000;
Index idx = IndexFactory.createLinearIndex(new File(bedFile), new BEDCodec(), binSize);
FeatureReader<BEDFeature> bfr = AbstractFeatureReader.getFeatureReader(bedFile, new BEDCodec(), idx);
CloseableTribbleIterator<BEDFeature> iter = bfr.query(chr, start, end);
int countInterval = 0;
while (iter.hasNext()) {
- BEDFeature feature = iter.next();
+ final BEDFeature feature = iter.next();
Assert.assertTrue(feature.getEnd() >= start && feature.getStart() <= end);
Assert.assertTrue(names.contains(feature.getName()));
countInterval++;
@@ -229,7 +229,7 @@ public class LinearIndexTest {
iter = bfr.query(chr, start, end);
countInterval = 0;
while (iter.hasNext()) {
- BEDFeature feature = iter.next();
+ final BEDFeature feature = iter.next();
Assert.assertTrue(feature.getEnd() >= start && feature.getStart() <= end);
Assert.assertTrue(names.contains(feature.getName()));
countInterval++;
diff --git a/src/tests/java/org/broad/tribble/index/tabix/TabixIndexTest.java b/src/tests/java/org/broad/tribble/index/tabix/TabixIndexTest.java
new file mode 100644
index 0000000..b316aed
--- /dev/null
+++ b/src/tests/java/org/broad/tribble/index/tabix/TabixIndexTest.java
@@ -0,0 +1,65 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package org.broad.tribble.index.tabix;
+
+import net.sf.samtools.util.BlockCompressedOutputStream;
+import org.broad.tribble.util.LittleEndianOutputStream;
+import org.broad.tribble.util.TabixUtils;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+
+public class TabixIndexTest {
+ private static final File SMALL_TABIX_FILE = new File("testdata/tribble/tabix/trioDup.vcf.gz.tbi");
+ private static final File BIGGER_TABIX_FILE = new File("testdata/tribble/tabix/bigger.vcf.gz.tbi");
+
+ /**
+ * Read an existing index from disk, write it to a temp file, read that in, and assert that both in-memory
+ * representations are identical. Disk representations may not be identical due to arbitrary bin order and
+ * compression differences.
+ */
+ @Test(dataProvider = "readWriteTestDataProvider")
+ public void readWriteTest(final File tabixFile) throws Exception {
+ final TabixIndex index = new TabixIndex(tabixFile);
+ final File indexFile = File.createTempFile("TabixIndexTest.", TabixUtils.STANDARD_INDEX_EXTENSION);
+ final LittleEndianOutputStream los = new LittleEndianOutputStream(new BlockCompressedOutputStream(indexFile));
+ index.write(los);
+ los.close();
+ final TabixIndex index2 = new TabixIndex(indexFile);
+ Assert.assertEquals(index, index2);
+ // Unfortunately, can't do byte comparison of original file and temp file, because 1) different compression
+ // levels; and more importantly, arbitrary order of bins in bin list.
+ }
+
+ @DataProvider(name = "readWriteTestDataProvider")
+ public Object[][] readWriteTestDataProvider() {
+ return new Object[][] {
+ {SMALL_TABIX_FILE},
+ {BIGGER_TABIX_FILE}
+ };
+ }
+
+}
diff --git a/src/tests/java/org/broadinstitute/variant/variantcontext/writer/TabixOnTheFlyIndexCreationTest.java b/src/tests/java/org/broadinstitute/variant/variantcontext/writer/TabixOnTheFlyIndexCreationTest.java
new file mode 100644
index 0000000..f86e4b7
--- /dev/null
+++ b/src/tests/java/org/broadinstitute/variant/variantcontext/writer/TabixOnTheFlyIndexCreationTest.java
@@ -0,0 +1,66 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2014 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package org.broadinstitute.variant.variantcontext.writer;
+
+import org.broad.tribble.AbstractFeatureReader;
+import org.broad.tribble.CloseableTribbleIterator;
+import org.broad.tribble.FeatureReader;
+import org.broad.tribble.index.tabix.TabixIndex;
+import org.broad.tribble.util.TabixUtils;
+import org.broadinstitute.variant.variantcontext.VariantContext;
+import org.broadinstitute.variant.vcf.VCF3Codec;
+import org.broadinstitute.variant.vcf.VCFHeader;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.util.EnumSet;
+
+public class TabixOnTheFlyIndexCreationTest {
+ private static final File SMALL_VCF = new File("testdata/tribble/tabix/trioDup.vcf.gz");
+ @Test
+ public void simpleTest() throws Exception {
+ final VCF3Codec codec = new VCF3Codec();
+ final FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(SMALL_VCF.getAbsolutePath(), codec, false);
+ final VCFHeader headerFromFile = (VCFHeader)reader.getHeader();
+ final File vcf = File.createTempFile("TabixOnTheFlyIndexCreationTest.", ".vcf.gz");
+ final File tabix = new File(vcf.getAbsolutePath() + TabixUtils.STANDARD_INDEX_EXTENSION);
+ vcf.deleteOnExit();
+ tabix.deleteOnExit();
+ final FileOutputStream os = new FileOutputStream(vcf);
+ final VariantContextWriter vcfWriter =
+ VariantContextWriterFactory.createBlockCompressedVcf(vcf, os, headerFromFile.getSequenceDictionary(),
+ EnumSet.of(Options.INDEX_ON_THE_FLY, Options.ALLOW_MISSING_FIELDS_IN_HEADER));
+ vcfWriter.writeHeader(headerFromFile);
+ final CloseableTribbleIterator<VariantContext> it = reader.iterator();
+ while (it.hasNext()) {
+ vcfWriter.add(it.next());
+ }
+ it.close();
+ vcfWriter.close();
+
+ // Hard to validate, so just confirm that index can be read.
+ new TabixIndex(tabix);
+ }
+}
diff --git a/src/tests/java/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java b/src/tests/java/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java
index e91fe79..beaa5a3 100644
--- a/src/tests/java/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java
+++ b/src/tests/java/org/broadinstitute/variant/variantcontext/writer/VCFWriterUnitTest.java
@@ -30,31 +30,18 @@ import net.sf.samtools.util.TestUtil;
import org.broad.tribble.AbstractFeatureReader;
import org.broad.tribble.FeatureReader;
import org.broad.tribble.Tribble;
+import org.broad.tribble.util.TabixUtils;
import org.broadinstitute.variant.VariantBaseTest;
-import org.broadinstitute.variant.variantcontext.Allele;
-import org.broadinstitute.variant.variantcontext.Genotype;
-import org.broadinstitute.variant.variantcontext.GenotypeBuilder;
-import org.broadinstitute.variant.variantcontext.GenotypesContext;
-import org.broadinstitute.variant.variantcontext.VariantContext;
-import org.broadinstitute.variant.variantcontext.VariantContextBuilder;
-import org.broadinstitute.variant.vcf.VCFCodec;
-import org.broadinstitute.variant.vcf.VCFHeader;
-import org.broadinstitute.variant.vcf.VCFHeaderLine;
-import org.broadinstitute.variant.vcf.VCFHeaderVersion;
+import org.broadinstitute.variant.variantcontext.*;
+import org.broadinstitute.variant.vcf.*;
import org.testng.Assert;
+import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.EnumSet;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
+import java.util.*;
/**
@@ -69,40 +56,39 @@ public class VCFWriterUnitTest extends VariantBaseTest {
private Set<String> additionalColumns;
/** test, using the writer and reader, that we can output and input a VCF file without problems */
- @Test
- public void testBasicWriteAndRead() throws IOException {
- File fakeVCFFile = File.createTempFile("testBasicWriteAndRead.", ".vcf");
+ @Test(dataProvider = "vcfExtensionsDataProvider")
+ public void testBasicWriteAndRead(final String extension) throws IOException {
+ final File fakeVCFFile = File.createTempFile("testBasicWriteAndRead.", extension);
fakeVCFFile.deleteOnExit();
+ Tribble.indexFile(fakeVCFFile).deleteOnExit();
metaData = new HashSet<VCFHeaderLine>();
additionalColumns = new HashSet<String>();
- VCFHeader header = createFakeHeader(metaData,additionalColumns);
- final EnumSet<Options> options = EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER);
- VariantContextWriter writer = VariantContextWriterFactory.create(fakeVCFFile, createArtificialSequenceDictionary(), options);
+ final SAMSequenceDictionary sequenceDict = createArtificialSequenceDictionary();
+ final VCFHeader header = createFakeHeader(metaData, additionalColumns, sequenceDict);
+ final EnumSet<Options> options = EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER, Options.INDEX_ON_THE_FLY);
+ final VariantContextWriter writer = VariantContextWriterFactory.create(fakeVCFFile, sequenceDict, options);
writer.writeHeader(header);
writer.add(createVC(header));
writer.add(createVC(header));
writer.close();
- VCFCodec codec = new VCFCodec();
- VCFHeader headerFromFile = null;
- FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(fakeVCFFile.getAbsolutePath(), codec, false);
- headerFromFile = (VCFHeader)reader.getHeader();
+ final VCFCodec codec = new VCFCodec();
+ final FeatureReader<VariantContext> reader = AbstractFeatureReader.getFeatureReader(fakeVCFFile.getAbsolutePath(), codec, false);
+ final VCFHeader headerFromFile = (VCFHeader)reader.getHeader();
int counter = 0;
// validate what we're reading in
- validateHeader(headerFromFile);
+ validateHeader(headerFromFile, sequenceDict);
try {
- Iterator<VariantContext> it = reader.iterator();
+ final Iterator<VariantContext> it = reader.iterator();
while(it.hasNext()) {
- VariantContext vc = it.next();
+ it.next();
counter++;
}
Assert.assertEquals(counter, 2);
- Tribble.indexFile(fakeVCFFile).delete();
- fakeVCFFile.delete();
}
- catch (IOException e ) {
+ catch (final IOException e ) {
throw new RuntimeException(e.getMessage());
}
@@ -114,12 +100,15 @@ public class VCFWriterUnitTest extends VariantBaseTest {
* @param additionalColumns the additional column names
* @return a fake VCF header
*/
- public static VCFHeader createFakeHeader(Set<VCFHeaderLine> metaData, Set<String> additionalColumns) {
+ public static VCFHeader createFakeHeader(final Set<VCFHeaderLine> metaData, final Set<String> additionalColumns,
+ final SAMSequenceDictionary sequenceDict) {
metaData.add(new VCFHeaderLine(VCFHeaderVersion.VCF4_0.getFormatString(), VCFHeaderVersion.VCF4_0.getVersionString()));
metaData.add(new VCFHeaderLine("two", "2"));
additionalColumns.add("extra1");
additionalColumns.add("extra2");
- return new VCFHeader(metaData, additionalColumns);
+ final VCFHeader ret = new VCFHeader(metaData, additionalColumns);
+ ret.setSequenceDictionary(sequenceDict);
+ return ret;
}
/**
@@ -127,23 +116,22 @@ public class VCFWriterUnitTest extends VariantBaseTest {
* @param header the VCF header
* @return a VCFRecord
*/
- private VariantContext createVC(VCFHeader header) {
+ private VariantContext createVC(final VCFHeader header) {
- return createVCGeneral(header,"chr1",1);
+ return createVCGeneral(header,"1",1);
}
- private VariantContext createVCGeneral(VCFHeader header,String chrom, int position) {
- List<Allele> alleles = new ArrayList<Allele>();
- Set<String> filters = null;
- Map<String, Object> attributes = new HashMap<String,Object>();
- GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
+ private VariantContext createVCGeneral(final VCFHeader header, final String chrom, final int position) {
+ final List<Allele> alleles = new ArrayList<Allele>();
+ final Map<String, Object> attributes = new HashMap<String,Object>();
+ final GenotypesContext genotypes = GenotypesContext.create(header.getGenotypeSamples().size());
alleles.add(Allele.create("A",true));
alleles.add(Allele.create("ACC",false));
attributes.put("DP","50");
- for (String name : header.getGenotypeSamples()) {
- Genotype gt = new GenotypeBuilder(name,alleles.subList(1,2)).GQ(0).attribute("BB", "1").phased(true).make();
+ for (final String name : header.getGenotypeSamples()) {
+ final Genotype gt = new GenotypeBuilder(name,alleles.subList(1,2)).GQ(0).attribute("BB", "1").phased(true).make();
genotypes.add(gt);
}
return new VariantContextBuilder("RANDOM", chrom, position, position, alleles)
@@ -155,42 +143,48 @@ public class VCFWriterUnitTest extends VariantBaseTest {
* validate a VCF header
* @param header the header to validate
*/
- public void validateHeader(VCFHeader header) {
+ public void validateHeader(final VCFHeader header, final SAMSequenceDictionary sequenceDictionary) {
// check the fields
int index = 0;
- for (VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {
+ for (final VCFHeader.HEADER_FIELDS field : header.getHeaderFields()) {
Assert.assertEquals(VCFHeader.HEADER_FIELDS.values()[index], field);
index++;
}
- Assert.assertEquals(header.getMetaDataInSortedOrder().size(), metaData.size());
+ Assert.assertEquals(header.getMetaDataInSortedOrder().size(), metaData.size() + sequenceDictionary.size());
index = 0;
- for (String key : header.getGenotypeSamples()) {
+ for (final String key : header.getGenotypeSamples()) {
Assert.assertTrue(additionalColumns.contains(key));
index++;
}
Assert.assertEquals(index, additionalColumns.size());
}
- @Test(enabled=true)
- public void TestWritingLargeVCF() throws FileNotFoundException, InterruptedException {
+ @Test(dataProvider = "vcfExtensionsDataProvider")
+ public void TestWritingLargeVCF(final String extension) throws FileNotFoundException, InterruptedException {
- Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
+ final Set<VCFHeaderLine> metaData = new HashSet<VCFHeaderLine>();
final Set<String> Columns = new HashSet<String>();
for (int i = 0; i < 123; i++) {
Columns.add(String.format("SAMPLE_%d", i));
}
- final VCFHeader header = createFakeHeader(metaData,Columns);
+ final SAMSequenceDictionary dict = createArtificialSequenceDictionary();
+ final VCFHeader header = createFakeHeader(metaData,Columns, dict);
final EnumSet<Options> options = EnumSet.of(Options.ALLOW_MISSING_FIELDS_IN_HEADER,Options.INDEX_ON_THE_FLY);
final File tempDir = TestUtil.getTempDirectory("VCFWriter", "StaleIndex");
tempDir.deleteOnExit();
- final File vcf = new File(tempDir, "test.vcf");
- final File vcfIndex = new File(tempDir, "test.vcf.idx");
- final SAMSequenceDictionary dict=createArtificialSequenceDictionary();
+ final File vcf = new File(tempDir, "test" + extension);
+ final String indexExtension;
+ if (extension.equals(".vcf.gz")) {
+ indexExtension = TabixUtils.STANDARD_INDEX_EXTENSION;
+ } else {
+ indexExtension = ".idx";
+ }
+ final File vcfIndex = new File(vcf.getAbsolutePath() + indexExtension);
for(int count=1;count<2; count++){
final VariantContextWriter writer = VariantContextWriterFactory.create(vcf, dict, options);
@@ -198,7 +192,7 @@ public class VCFWriterUnitTest extends VariantBaseTest {
for (int i = 1; i < 17 ; i++) { // write 17 chromosomes
for (int j = 1; j < 10; j++) { //10 records each
- writer.add(createVCGeneral(header, String.format("chr%d", i), j * 100));
+ writer.add(createVCGeneral(header, String.format("%d", i), j * 100));
}
}
writer.close();
@@ -206,5 +200,15 @@ public class VCFWriterUnitTest extends VariantBaseTest {
Assert.assertTrue(vcf.lastModified() <= vcfIndex.lastModified());
}
}
+
+ @DataProvider(name = "vcfExtensionsDataProvider")
+ public Object[][]vcfExtensionsDataProvider() {
+ return new Object[][] {
+ // TODO: BCF doesn't work because header is not properly constructed.
+ // {".bcf"},
+ {".vcf"},
+ {".vcf.gz"}
+ };
+ }
}
diff --git a/testdata/net/sf/picard/illumina/parser/TileMetricsUtilTest/HiSeq2000TileMetricsOut.bin b/testdata/net/sf/picard/illumina/parser/TileMetricsUtilTest/HiSeq2000TileMetricsOut.bin
deleted file mode 100755
index fe6a428..0000000
Binary files a/testdata/net/sf/picard/illumina/parser/TileMetricsUtilTest/HiSeq2000TileMetricsOut.bin and /dev/null differ
diff --git a/testdata/net/sf/picard/illumina/parser/TileMetricsUtilTest/HiSeq2500TileMetricsOut.bin b/testdata/net/sf/picard/illumina/parser/TileMetricsUtilTest/HiSeq2500TileMetricsOut.bin
deleted file mode 100755
index d57208c..0000000
Binary files a/testdata/net/sf/picard/illumina/parser/TileMetricsUtilTest/HiSeq2500TileMetricsOut.bin and /dev/null differ
diff --git a/testdata/net/sf/picard/illumina/parser/TileMetricsUtilTest/MiSeqTileMetricsOut.bin b/testdata/net/sf/picard/illumina/parser/TileMetricsUtilTest/MiSeqTileMetricsOut.bin
deleted file mode 100755
index 082ccc9..0000000
Binary files a/testdata/net/sf/picard/illumina/parser/TileMetricsUtilTest/MiSeqTileMetricsOut.bin and /dev/null differ
diff --git a/testdata/tribble/tabix/bigger.vcf.gz.tbi b/testdata/tribble/tabix/bigger.vcf.gz.tbi
new file mode 100644
index 0000000..232a1a2
Binary files /dev/null and b/testdata/tribble/tabix/bigger.vcf.gz.tbi differ
diff --git a/testdata/variant/ex2.bgzf.bcf b/testdata/variant/ex2.bgzf.bcf
new file mode 100755
index 0000000..eaa40af
Binary files /dev/null and b/testdata/variant/ex2.bgzf.bcf differ
diff --git a/testdata/variant/ex2.bgzf.bcf.csi b/testdata/variant/ex2.bgzf.bcf.csi
new file mode 100755
index 0000000..722375b
Binary files /dev/null and b/testdata/variant/ex2.bgzf.bcf.csi differ
diff --git a/testdata/variant/ex2.uncompressed.bcf b/testdata/variant/ex2.uncompressed.bcf
new file mode 100755
index 0000000..d0e41aa
Binary files /dev/null and b/testdata/variant/ex2.uncompressed.bcf differ
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/picard-tools.git
More information about the debian-med-commit
mailing list