[med-svn] [beagle] 02/04: Imported Upstream version 4.1~160222-8ef
Dylan Aïssi
bob.dybian-guest at moszumanska.debian.org
Sat Mar 26 08:07:13 UTC 2016
This is an automated email from the git hooks/post-receive script.
bob.dybian-guest pushed a commit to branch master
in repository beagle.
commit 486aca4430b06bfa4d06b5297062cd3c8fdcbf49
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date: Sat Mar 26 08:38:54 2016 +0100
Imported Upstream version 4.1~160222-8ef
---
beagleutil/BasicIntInterval.java | 148 +++++
beagleutil/CenteredIntIntervalTree.java | 616 +++++++++++++++++
beagleutil/ChromIds.java | 126 ++++
beagleutil/ChromInterval.java | 343 ++++++++++
beagleutil/IntInterval.java | 42 ++
beagleutil/IntIntervalTree.java | 145 ++++
beagleutil/Phase.java | 78 +++
beagleutil/SampleIds.java | 127 ++++
beagleutil/Samples.java | 225 +++++++
beagleutil/ThreadSafeIndexer.java | 153 +++++
blbutil/ByteIndexArray.java | 85 +++
blbutil/CharIndexArray.java | 85 +++
blbutil/Const.java | 93 +++
blbutil/FileIt.java | 70 ++
blbutil/FileUtil.java | 235 +++++++
blbutil/Filter.java | 100 +++
blbutil/IndexMap.java | 195 ++++++
blbutil/IndexSet.java | 152 +++++
blbutil/InputIt.java | 320 +++++++++
blbutil/IntArray.java | 102 +++
blbutil/IntList.java | 212 ++++++
blbutil/IntPair.java | 143 ++++
blbutil/IntSet.java | 265 ++++++++
blbutil/SampleFileIt.java | 41 ++
blbutil/ShiftedByteIndexArray.java | 86 +++
blbutil/StringUtil.java | 281 ++++++++
blbutil/Utilities.java | 208 ++++++
blbutil/Validate.java | 576 ++++++++++++++++
blbutil/WrappedIntArray.java | 58 ++
dag/Dag.java | 384 +++++++++++
dag/DagLevel.java | 241 +++++++
dag/DagUtil.java | 182 +++++
dag/HighCapacityDagLevel.java | 340 ++++++++++
dag/ImmutableDag.java | 265 ++++++++
dag/LinkageEquilibriumDag.java | 301 +++++++++
dag/LowCapacityDagLevel.java | 369 +++++++++++
dag/MergeableDag.java | 406 ++++++++++++
dag/MergeableDagLevel.java | 731 +++++++++++++++++++++
dag/Score.java | 200 ++++++
gpl_license | 674 +++++++++++++++++++
haplotype/BasicHapPairs.java | 129 ++++
haplotype/BasicSampleHapPairs.java | 260 ++++++++
haplotype/BitHapPair.java | 172 +++++
haplotype/ConsensusPhaser.java | 271 ++++++++
haplotype/GLSampleHapPairs.java | 152 +++++
haplotype/GenotypeCorrection.java | 273 ++++++++
haplotype/HapPair.java | 90 +++
haplotype/HapPairs.java | 132 ++++
haplotype/HapsMarkerIterator.java | 127 ++++
haplotype/RefHapPairs.java | 189 ++++++
haplotype/RevHapPair.java | 88 +++
haplotype/RevHapPairs.java | 121 ++++
haplotype/RevSampleHapPairs.java | 138 ++++
haplotype/SampleHapPairs.java | 114 ++++
haplotype/Weights.java | 150 +++++
haplotype/WrappedHapPair.java | 88 +++
ibd/HapSegment.java | 169 +++++
ibd/HaploidIbd.java | 264 ++++++++
ibd/Haplotype.java | 242 +++++++
ibd/IbdBaum.java | 292 ++++++++
ibd/IbdSegment.java | 247 +++++++
ibd/IbsHapSegments.java | 353 ++++++++++
main/AlleleProbs.java | 162 +++++
main/BasicAlleleProbs.java | 139 ++++
main/BasicGenotypeValues.java | 116 ++++
main/ConstrainedAlleleProbs.java | 157 +++++
main/CurrentData.java | 484 ++++++++++++++
main/GeneticMap.java | 142 ++++
main/GenotypeValues.java | 134 ++++
main/HapAlleleProbs.java | 93 +++
main/HapPairSampler.java | 233 +++++++
main/LiAndStephensHapSampler.java | 158 +++++
main/LowMemHapAlleleProbs.java | 203 ++++++
main/Main.java | 451 +++++++++++++
main/MainHelper.java | 261 ++++++++
main/NuclearFamilies.java | 374 +++++++++++
main/Par.java | 550 ++++++++++++++++
main/PlinkGeneticMap.java | 369 +++++++++++
main/PositionMap.java | 84 +++
main/RecombHapPairSampler.java | 178 +++++
main/RevGenotypeValues.java | 111 ++++
main/RunStats.java | 370 +++++++++++
main/SampleGenotypeValues.java | 183 ++++++
main/SampleHapPairAlleleProbs.java | 105 +++
main/WindowWriter.java | 274 ++++++++
net/sf/samtools/Defaults.java | 49 ++
net/sf/samtools/FileTruncatedException.java | 46 ++
net/sf/samtools/SAMException.java | 44 ++
net/sf/samtools/SAMFormatException.java | 44 ++
net/sf/samtools/util/BinaryCodec.java | 662 +++++++++++++++++++
.../util/BlockCompressedFilePointerUtil.java | 101 +++
.../samtools/util/BlockCompressedInputStream.java | 484 ++++++++++++++
.../samtools/util/BlockCompressedOutputStream.java | 312 +++++++++
.../util/BlockCompressedStreamConstants.java | 118 ++++
net/sf/samtools/util/BlockGunzipper.java | 115 ++++
net/sf/samtools/util/HttpUtils.java | 102 +++
net/sf/samtools/util/IOUtil.java | 124 ++++
net/sf/samtools/util/RuntimeEOFException.java | 46 ++
net/sf/samtools/util/RuntimeIOException.java | 46 ++
net/sf/samtools/util/SeekableBufferedStream.java | 90 +++
net/sf/samtools/util/SeekableFileStream.java | 69 ++
net/sf/samtools/util/SeekableHTTPStream.java | 153 +++++
net/sf/samtools/util/SeekableStream.java | 37 ++
net/sf/samtools/util/StringUtil.java | 460 +++++++++++++
sample/ConsumeSingleSamples.java | 163 +++++
sample/DiploidStates.java | 88 +++
sample/DuoBaumLevel.java | 641 ++++++++++++++++++
sample/DuoNodes.java | 338 ++++++++++
sample/HapBaumLevel.java | 445 +++++++++++++
sample/HapNodes.java | 233 +++++++
sample/HaplotypeCoder.java | 204 ++++++
sample/ImputationData.java | 341 ++++++++++
sample/LSHapBaum.java | 324 +++++++++
sample/RecombSingleBaum.java | 326 +++++++++
sample/RecombSingleBaumLevel.java | 613 +++++++++++++++++
sample/RecombSingleNodes.java | 361 ++++++++++
sample/RefHapSeg.java | 283 ++++++++
sample/RefHapSegs.java | 221 +++++++
sample/RestrictedDag.java | 402 +++++++++++
sample/SamplerData.java | 240 +++++++
sample/SingleBaum.java | 301 +++++++++
sample/SingleBaumInterface.java | 108 +++
sample/SingleBaumLevel.java | 530 +++++++++++++++
sample/SingleNodes.java | 294 +++++++++
vcf/AL.java | 118 ++++
vcf/AllData.java | 355 ++++++++++
vcf/BasicGL.java | 193 ++++++
vcf/BasicMarker.java | 547 +++++++++++++++
vcf/BitSetGT.java | 482 ++++++++++++++
vcf/BitSetRefGT.java | 252 +++++++
vcf/Bref.java | 550 ++++++++++++++++
vcf/BrefIt.java | 350 ++++++++++
vcf/ByteArrayRefGT.java | 222 +++++++
vcf/Data.java | 249 +++++++
vcf/FilterUtil.java | 139 ++++
vcf/FuzzyGL.java | 143 ++++
vcf/GL.java | 182 +++++
vcf/GprobsStatistics.java | 313 +++++++++
vcf/HapsMarker.java | 82 +++
vcf/HbdAL.java | 117 ++++
vcf/IntervalVcfIt.java | 137 ++++
vcf/LowMafRefDiallelicGT.java | 220 +++++++
vcf/LowMafRefGT.java | 264 ++++++++
vcf/Marker.java | 177 +++++
vcf/MarkerContainer.java | 34 +
vcf/Markers.java | 389 +++++++++++
vcf/MaskedEndsGL.java | 182 +++++
vcf/NoPhaseGL.java | 120 ++++
vcf/RefGL.java | 172 +++++
vcf/RefIt.java | 449 +++++++++++++
vcf/RestrictedVcfWindow.java | 285 ++++++++
vcf/RevGL.java | 121 ++++
vcf/SeqCodedRefGT.java | 178 +++++
vcf/SplicedGL.java | 167 +++++
vcf/TargetData.java | 230 +++++++
vcf/VcfEmission.java | 208 ++++++
vcf/VcfEmissionCompressor.java | 298 +++++++++
vcf/VcfHeader.java | 289 ++++++++
vcf/VcfIt.java | 396 +++++++++++
vcf/VcfMetaInfo.java | 101 +++
vcf/VcfRecGTParser.java | 300 +++++++++
vcf/VcfRecord.java | 676 +++++++++++++++++++
vcf/VcfWindow.java | 321 +++++++++
vcf/VcfWriter.java | 380 +++++++++++
164 files changed, 38741 insertions(+)
diff --git a/beagleutil/BasicIntInterval.java b/beagleutil/BasicIntInterval.java
new file mode 100644
index 0000000..ed1d786
--- /dev/null
+++ b/beagleutil/BasicIntInterval.java
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2014 Brian L. Browning
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package beagleutil;
+
+/**
+ * <p>Class {@code BasicIntInterval} represents an interval of
+ * consecutive integers.
+ * </p>
+ * Instances of class {@code BasicIntInterval} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+
+} */
+public final class BasicIntInterval implements IntInterval,
+ Comparable<BasicIntInterval> {
+
+ private final int start;
+ private final int end;
+
+ /**
+ * Constructs an {@code SimpleIntInterval} instance.
+ * @param start the starting integer (inclusive).
+ * @param end the ending integer (inclusive).
+ * @throws IllegalArgumentException if {@code start>end}.
+ */
+ public BasicIntInterval(int start, int end) {
+ if (start > end) {
+ String s = "start=" + start + " end=" + end;
+ throw new IllegalArgumentException(s);
+ }
+ this.start = start;
+ this.end = end;
+ }
+
+ @Override
+ public int start() {
+ return start;
+ }
+
+ @Override
+ public int end() {
+ return end;
+ }
+
+ /**
+ * <p>Returns a hash code value for the object.
+ * </p>
+ * <p>The hash code is defined by the following calculation:
+ * </p>
+ * <pre>
+ int hash = 3;
+ hash += 59 * hash + this.start();
+ hash += 59 * hash + this.end();
+ * </pre>
+ * @return a hash code value for the object.
+ */
+ @Override
+ public int hashCode() {
+ int hash = 3;
+ hash = 59 * hash + this.start;
+ hash = 59 * hash + this.end;
+ return hash;
+ }
+
+ /**
+ * Returns {@code true} if the specified object is an
+ * {@code BasicIntInterval} instance and
+ * {@code this.start() == ((BasicIntInterval) obj).start()}, and
+ * {@code this.end() == ((BasicIntInterval) obj).end()},
+ * and returns {@code false} otherwise.
+ *
+ * @param obj the object to be compared with {@code this} for equality.
+ * @return {@code true} if the specified object is equal to
+ * {@code this}, and returns false otherwise.
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (this==obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final BasicIntInterval other = (BasicIntInterval) obj;
+ if (this.start != other.start) {
+ return false;
+ }
+ return this.end==other.end;
+ }
+
+ /**
+ * Compares the specified {@code BasicIntInterval} with this for order,
+ * and returns a negative integer, zero, or a positive integer as
+ * {@code this} is less than, equal to, or greater than the specified
+ * {@code BasicIntInterval} object.
+ * {@code BasicIntInterval} objects are
+ * ordered by their start and their end values in that order.
+ *
+ * @param o the {@code BasicIntInterval} to be compared with this.
+ *
+ * @return a negative integer, zero, or a positive integer as this
+ * object is less than, equal to, or greater than the specified object.
+ */
+ @Override
+ public int compareTo(BasicIntInterval o) {
+ if (this.start != o.start) {
+ return this.start < o.start ? -1 : 1;
+ }
+ else if (this.end != o.end) {
+ return this.end < o.end ? -1 : 1;
+ }
+ return 0;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ *
+ * @return a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append('[');
+ sb.append(start);
+ sb.append(", ");
+ sb.append(end);
+ sb.append(']');
+ return sb.toString();
+ }
+}
diff --git a/beagleutil/CenteredIntIntervalTree.java b/beagleutil/CenteredIntIntervalTree.java
new file mode 100644
index 0000000..e8b779e
--- /dev/null
+++ b/beagleutil/CenteredIntIntervalTree.java
@@ -0,0 +1,616 @@
+package beagleutil;
+
+import blbutil.Const;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+
+/**
+ * <p>Class {@code CenteredIntIntervalTree} implements a centered
+ * interval tree that stores {@code IntInterval} objects.
+ * </p>
+ * Instances of class {@code CenteredIntIntervalTree} are not thread-safe.
+ * @param <E> the objects stored by {@code this}.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class CenteredIntIntervalTree<E extends IntInterval & Comparable<E>>
+ implements IntIntervalTree<E> {
+
+ private final int start;
+ private final int end;
+ private int size;
+ private Node<E> root;
+
+ /**
+ * Creates a new {@code CenteredIntIntervalTree} instance for the
+ * specified range.
+ * @param start the minimum start value (inclusive) for intervals stored in
+ * this interval tree
+ * @param end the maximum end value (inclusive) for intervals stored
+ * in this interval tree
+ *
+ * @throws IllegalArgumentException if {@code end < start}
+ */
+ public CenteredIntIntervalTree(int start, int end) {
+ if (end < start) {
+ String s = "end= " + end + " start=" + start;
+ throw new IllegalArgumentException(s);
+ }
+ int length = (end - start + 1);
+ int center = start + (length/2);
+ this.start = start;
+ this.end = end;
+ this.size = 0;
+ this.root = new Node<>(center);
+ }
+
+ @Override
+ public int start() {
+ return start;
+ }
+
+ @Override
+ public int end() {
+ return end;
+ }
+
+ @Override
+ public void clear() {
+ clear(root);
+ size = 0;
+ }
+
+ private void clear(Node<E> tree) {
+ if (tree==null) {
+ return;
+ }
+ tree.clear();
+ clear(tree.leftChild);
+ clear(tree.rightChild);
+ }
+
+ @Override
+ public boolean add(E element) {
+ if (element.start() < start || element.end() > end) {
+ String s = "element out of range [" + start + ", " + end + ") : "
+ + element;
+ throw new IllegalArgumentException(s);
+ }
+ boolean added = add(root, element);
+ if (added) {
+ ++size;
+ }
+ return added;
+ }
+
+ private boolean add(Node<E> tree, E element) {
+ if (element.end() < tree.center) {
+ if (tree.leftChild==null) {
+ int nextOffset = nextOffset(tree);
+ tree.leftChild = new Node<>(tree.center - nextOffset);
+ tree.leftChild.parent = tree;
+ }
+ return add(tree.leftChild, element);
+ }
+ else if (element.start() > tree.center) {
+ if (tree.rightChild==null) {
+ int nextOffset = nextOffset(tree);
+ tree.rightChild = new Node<>(tree.center + nextOffset);
+ tree.rightChild.parent = tree;
+ }
+ return add(tree.rightChild, element);
+ }
+ else {
+ return tree.add(element);
+ }
+ }
+
+ private int nextOffset(Node<E> node) {
+ int lastOffset;
+ if (node.parent==null) {
+ lastOffset = (end - start + 1)/2;
+ }
+ else {
+ lastOffset = Math.abs(node.center - node.parent.center);
+ }
+ assert lastOffset > 0;
+ int offset = (lastOffset+1)/2;
+ return offset;
+ }
+
+ @Override
+ public boolean contains(E element) {
+ return contains(root, element);
+ }
+
+ private boolean contains(Node<E> tree, E element) {
+ if (tree==null) {
+ return false;
+ }
+ else if (element.end() < tree.center) {
+ return contains(tree.leftChild, element);
+ }
+ else if (element.start() > tree.center) {
+ return contains(tree.rightChild, element);
+ }
+ else {
+ return tree.contains(element);
+ }
+ }
+
+ @Override
+ public boolean remove(E element) {
+ boolean removed = remove(root, element);
+ if (removed) {
+ --size;
+ }
+ return removed;
+ }
+
+ private boolean remove(Node<E> tree, E element) {
+ if (tree==null) {
+ return false;
+ }
+ else if (element.end() < tree.center) {
+ return remove(tree.leftChild, element);
+ }
+ else if (element.start() > tree.center) {
+ return remove(tree.rightChild, element);
+ }
+ else {
+ return tree.remove(element);
+ }
+ }
+
+ @Override
+ public void intersect(final int point, Collection<E> collection) {
+ intersect(root, point, collection);
+ }
+
+ private void intersect(Node<E> tree, int point, Collection<E> collection) {
+ if (tree==null) {
+ return;
+ }
+ tree.intersect(point, collection);
+ if (point < tree.center) {
+ intersect(tree.leftChild, point, collection);
+ }
+ else if (point > tree.center) {
+ intersect(tree.rightChild, point, collection);
+ }
+ }
+
+ @Override
+ public void intersectPart(int start, int end, Collection<E> collection) {
+ intersectPart(root, start, end, collection);
+ }
+
+ private void intersectPart(Node<E> tree, int start, int end,
+ Collection<E> collection) {
+ if (tree==null) {
+ return;
+ }
+ tree.intersectPart(start, end, collection);
+ if (start < tree.center) {
+ intersectPart(tree.leftChild, start, end, collection);
+ }
+ if (end > tree.center) {
+ intersectPart(tree.rightChild, start, end, collection);
+ }
+ }
+
+ @Override
+ public void intersectAll(int start, int end, Collection<E> collection) {
+ intersectAll(root, start, end, collection);
+ }
+
+ private void intersectAll(Node<E> tree, int start, int end,
+ Collection<E> collection) {
+ if (tree==null) {
+ return;
+ }
+ tree.intersectAll(start, end, collection);
+ if (end < tree.center) {
+ intersectAll(tree.leftChild, start, end, collection);
+ }
+ if (start > tree.center) {
+ intersectAll(tree.rightChild, start, end, collection);
+ }
+ }
+
+ @Override
+ public boolean isEmpty() {
+ return size==0;
+ }
+
+ @Override
+ public int size() {
+ return size;
+ }
+
+ @Override
+ public E[] toArray() {
+ List<E> list = new ArrayList<>(size);
+ toArray(root, list);
+ return (E[]) list.toArray();
+ }
+
+ private void toArray(Node<E> tree, List<E> list) {
+ if (tree==null) {
+ return;
+ }
+ toArray(tree.leftChild, list);
+ list.addAll(tree.sortedStart);
+ toArray(tree.rightChild, list);
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The
+ * exact details of the representation are unspecified and
+ * subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("[ CenteredIntIntervalTree: ");
+ sb.append(Const.nl);
+ sb.append("start=");
+ sb.append(start);
+ sb.append(" end=");
+ sb.append(end);
+ sb.append(" size=");
+ sb.append(size);
+ sb.append(Const.nl);
+ toString(root, sb);
+ sb.append(']');
+ return sb.toString();
+ }
+
+ private void toString(Node<E> tree, StringBuilder sb) {
+ if (tree==null) {
+ return;
+ }
+ toString(tree.leftChild, sb);
+ sb.append(tree);
+ toString(tree.rightChild, sb);
+ }
+
+ private static final class Node<E extends IntInterval & Comparable<E>> {
+
+ private final int center;
+ private final SortedSet<E> sortedStart;
+ private final SortedSet<E> sortedEnd;
+ private Node<E> parent;
+ private Node<E> leftChild;
+ private Node<E> rightChild;
+
+ /**
+ * Returns a {@code Comparator} that is consistent with equals and
+ * orders elements in order of increasing start values.
+ * @return a {@code Comparator} that is consistent with equals and
+ * orders elements in order of increasing start values
+ */
+ private static <E extends IntInterval & Comparable<E>> Comparator<E> startComparator() {
+ return (E e1, E e2) -> {
+ int start1 = e1.start();
+ int start2 = e2.start();
+ if (start1 == start2) {
+ return e1.compareTo(e2);
+ }
+ else {
+ return (start1 < start2) ? -1 : 1;
+ }
+ } ;
+ }
+
+ /**
+ * Returns a {@code Comparator} that is consistent with equals and
+ * orders elements in order of decreasing end values.
+ * @return a {@code Comparator} that is consistent with equals and
+ * orders elements in order of decreasing end values
+ */
+ private static <E extends IntInterval & Comparable<E>> Comparator<E> endComparator() {
+ return (E e1, E e2) -> {
+ int end1 = e1.end();
+ int end2 = e2.end();
+ if (end1 == end2) {
+ return e1.compareTo(e2);
+ }
+ else {
+ return (end1 > end2) ? -1 : 1;
+ }
+ } ;
+ }
+
+ Node(int center) {
+ this.center = center;
+ Comparator<E> startComparator = startComparator();
+ Comparator<E> endComparator = endComparator();
+ this.sortedStart = new TreeSet<>(startComparator);
+ this.sortedEnd = new TreeSet<>(endComparator);
+ this.leftChild = null;
+ this.rightChild = null;
+ }
+
+ boolean add(E element) {
+ if (element.start() > center || element.end() < center) {
+ String s = "element does not overlap center=" + center + ": "
+ + element;
+ throw new IllegalArgumentException(s);
+ }
+ boolean addedStart = sortedStart.add(element);
+ boolean addedEnd = sortedEnd.add(element);
+ assert addedStart == addedEnd;
+ return addedStart;
+ }
+
+ boolean contains(E element) {
+ boolean startContains = sortedStart.contains(element);
+ assert startContains == sortedEnd.contains(element);
+ return startContains;
+ }
+
+ boolean remove(E element) {
+ boolean removedStart = sortedStart.remove(element);
+ boolean removedEnd = sortedEnd.remove(element);
+ assert removedStart == removedEnd;
+ return removedStart;
+ }
+
+ void intersect(int point, Collection<E> collection) {
+ if (point <= center) {
+ boolean finished = false;
+ Iterator<E> it = sortedStart.iterator();
+ while (it.hasNext() && finished==false) {
+ E e = it.next();
+ if (e.start() <= point) {
+ collection.add(e);
+ }
+ else {
+ finished = true;
+ }
+ }
+ }
+ else {
+ boolean finished = false;
+ Iterator<E> it = sortedEnd.iterator();
+ while (it.hasNext() && finished==false) {
+ E e = it.next();
+ if (e.end() >= point) {
+ collection.add(e);
+ }
+ else {
+ finished = true;
+ }
+ }
+ }
+ }
+
+ void intersectPart(int start, int end, Collection<E> collection) {
+ if (end < center) {
+ boolean finished = false;
+ Iterator<E> it = sortedStart.iterator();
+ while (it.hasNext() && finished==false) {
+ E e = it.next();
+ if (e.start() <= end) {
+ collection.add(e);
+ }
+ else {
+ finished = true;
+ }
+ }
+ }
+ else if (start > center) {
+ boolean finished = false;
+ Iterator<E> it = sortedEnd.iterator();
+ while (it.hasNext() && finished==false) {
+ E e = it.next();
+ if (start <= e.end()) {
+ collection.add(e);
+ }
+ else {
+ finished = true;
+ }
+ }
+ }
+ else {
+ collection.addAll(sortedStart);
+ }
+ }
+
+ void intersectAll(int start, int end, Collection<E> collection) {
+ boolean finished = false;
+ Iterator<E> it = sortedStart.iterator();
+ while (it.hasNext() && finished==false) {
+ E e = it.next();
+ if (e.start() <= start) {
+ if (e.end() >= end) {
+ collection.add(e);
+ }
+ }
+ else {
+ finished = true;
+ }
+ }
+ }
+
+ void clear() {
+ sortedStart.clear();
+ sortedEnd.clear();
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The
+ * exact details of the representation are unspecified and
+ * subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(Const.nl);
+ sb.append("[ CenteredIntIntervalTree.Node:");
+ sb.append(Const.nl);
+ sb.append("center=");
+ sb.append(center);
+ sb.append(" parent.center=");
+ sb.append(parent!=null ? parent.center : null);
+ sb.append(" leftchild.center=");
+ sb.append(leftChild!=null ? leftChild.center : null);
+ sb.append(" rightchild.center=");
+ sb.append(rightChild!=null ? rightChild.center : null);
+ sb.append(Const.nl);
+ sb.append("sortedStart: ");
+ sb.append(sortedStart);
+ sb.append(Const.nl);
+ sb.append("sortedEnd: ");
+ sb.append(sortedEnd);
+ sb.append(Const.nl);
+ sb.append(']');
+ return sb.toString();
+ }
+ }
+
+ //<editor-fold defaultstate="collapsed" desc="code for testing class">
+ /*
+ * The main() method if for testing the CenteredIntIntervalTree class
+ */
+// public static void main(String[] args) {
+// main1(args);
+// main2(args);
+// }
+
+ private static void main1(String[] args) {
+ int length=16;
+ IntIntervalTree<BasicIntInterval> tree = new CenteredIntIntervalTree<>(0,length);
+ assert tree.start()==0;
+ assert tree.end()==length;
+ assert tree.isEmpty();
+ assert tree.isEmpty();
+ for (int j=0; j<length; ++j) {
+ BasicIntInterval i = new BasicIntInterval(j, j+1);
+ assert tree.contains(i)==false;
+ boolean added = tree.add(i);
+ assert added == true;
+ assert tree.contains(i)==true;
+ added = tree.add(i);
+ assert added == false;
+ }
+ assert tree.size()==length;
+ System.out.println("Initial Tree: " + java.util.Arrays.toString(tree.toArray()));
+ for (int j=0; j<length; j+=2) {
+ BasicIntInterval i = new BasicIntInterval(j, j+1);
+ assert tree.contains(i)==true;
+ boolean removed = tree.remove(i);
+ assert removed == true;
+ assert tree.contains(i)==false;
+ removed = tree.remove(i);
+ assert removed==false;
+ }
+ assert tree.size()==(length/2);
+ System.out.println("Pruned Tree: " + java.util.Arrays.toString(tree.toArray()));
+
+ List<BasicIntInterval> list = new ArrayList<>(length);
+ for (int j=0; j<length; ++j) {
+ tree.intersect(j, list);
+ System.out.println("point=" + j + ": " + list);
+ list.clear();
+ }
+
+ int intSize = 3;
+ for (int j=0; j<length; ++j) {
+ tree.intersectPart(j, j+intSize, list);
+ System.out.println("start=" + j + " end=" + (j+intSize) + ": " + list);
+ list.clear();
+ }
+
+ for (int j=0; j<length; ++j) {
+ tree.intersectAll(j, j, list);
+ System.out.println("start=" + j + " end=" + j + ": " + list);
+ list.clear();
+ }
+
+ tree.clear();
+ assert tree.isEmpty()==true;
+ System.out.println("Empty Tree: " + java.util.Arrays.toString(tree.toArray()));
+ }
+
+ private static void main2(String[] args) {
+ int length=16;
+ int nOverlaps = 4;
+ IntIntervalTree<BasicIntInterval> tree = new CenteredIntIntervalTree<>(-length,length);
+ assert tree.start()==-length;
+ assert tree.end()==length;
+ assert tree.isEmpty();
+ assert tree.isEmpty();
+ for (int j=1; j<=nOverlaps; ++j) {
+ BasicIntInterval i = new BasicIntInterval(-j - length/2, -j);
+ assert tree.contains(i)==false;
+ boolean added = tree.add(i);
+ assert added == true;
+ assert tree.contains(i)==true;
+ added = tree.add(i);
+ assert added == false;
+ }
+ assert tree.size()==nOverlaps;
+ for (int j=1; j<=nOverlaps; ++j) {
+ BasicIntInterval i = new BasicIntInterval(j, j + length/2);
+ assert tree.contains(i)==false;
+ boolean added = tree.add(i);
+ assert added == true;
+ assert tree.contains(i)==true;
+ added = tree.add(i);
+ assert added == false;
+ }
+ assert tree.size()==2*nOverlaps;
+ System.out.println(java.util.Arrays.toString(tree.toArray()));
+ System.out.println(tree);
+
+ for (int j=1; j<=nOverlaps; j+=2) {
+ BasicIntInterval i = new BasicIntInterval(-j - length/2, -j);
+ assert tree.contains(i)==true;
+ boolean removed = tree.remove(i);
+ assert removed == true;
+ assert tree.contains(i)==false;
+ removed = tree.remove(i);
+ assert removed==false;
+ }
+ for (int j=1; j<=nOverlaps; j+=2) {
+ BasicIntInterval i = new BasicIntInterval(j, j + length/2);
+ assert tree.contains(i)==true;
+ boolean removed = tree.remove(i);
+ assert removed == true;
+ assert tree.contains(i)==false;
+ removed = tree.remove(i);
+ assert removed==false;
+ }
+ assert tree.size()==nOverlaps;
+ System.out.println(java.util.Arrays.toString(tree.toArray()));
+
+ List<BasicIntInterval> list = new ArrayList<>(length);
+ for (int j=-length; j<length; ++j) {
+ tree.intersect(j, list);
+ System.out.println("point=" + j + ": " + list);
+ list.clear();
+ }
+
+ int intSize = 3;
+ for (int j=-length; j<length; ++j) {
+ tree.intersectPart(j, j+intSize, list);
+ System.out.println("start=" + j + " end=" + (j+intSize) + ": " + list);
+ list.clear();
+ }
+
+ tree.clear();
+ assert tree.isEmpty()==true;
+ System.out.println(java.util.Arrays.toString(tree.toArray()));
+ }
+ //</editor-fold>
+}
diff --git a/beagleutil/ChromIds.java b/beagleutil/ChromIds.java
new file mode 100644
index 0000000..78c3e69
--- /dev/null
+++ b/beagleutil/ChromIds.java
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package beagleutil;
+
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code ChromIds} is a singleton class that represents a
+ * list of chromosome identifiers.
+ * </p>
+ * The singleton instance of {@code ChromIds} is thread-safe.
+ *
+ * @author Brian L. Browning
+ */
+public final class ChromIds {
+
+ private static final ChromIds chromIds = new ChromIds();
+
+ private final ThreadSafeIndexer<String> instance;
+
+ private ChromIds() {
+ // private constructor to restrict instantiation.
+ int initCapacity = 4;
+ this.instance = new ThreadSafeIndexer<>(initCapacity);
+ }
+
+ /**
+ * Returns the singleton {@code ChromIds} instance.
+ * @return the singleton {@code ChromIds} instance
+ */
+ public static ChromIds instance() {
+ return chromIds;
+ }
+
+ /**
+ * Returns the index of the specified chromosome identifier. If
+ * the chromosome identifiers is not yet indexed, the chromosome identifier
+ * will be indexed. Chromosome identifier indices are assigned in
+ * consecutive order beginning with 0.
+ * @param id a chromosome identifier
+ * @return the index of the specified chromosome identifier
+ * @throws IllegalArgumentException if {@code id.isEmpty()}
+ * @throws NullPointerException if {@code id == null}
+ */
+ public int getIndex(String id) {
+ if (id.isEmpty()) {
+ throw new IllegalArgumentException("id.isEmpty()");
+ }
+ return instance.getIndex(id);
+ }
+
+ /**
+ * Returns the index of the specified chromosome identifier, or returns
+ * {@code -1} if the specified chromosome identifier is not indexed.
+ *
+ * @param id a chromosome identifier.
+ * @return the index of the specified chromosome identifier, or
+ * {@code -1} if the specified chromosome identifier is not indexed.
+ *
+ * @throws IllegalArgumentException if {@code id.isEmpty()}
+ * @throws NullPointerException if {@code id == null}
+ */
+ public int getIndexIfIndexed(String id) {
+ if (id.isEmpty()) {
+ throw new IllegalArgumentException("id.isEmpty()");
+ }
+ return instance.getIndexIfIndexed(id);
+ }
+
+ /**
+ * Returns the number of indexed chromosomes identifiers.
+ * @return the number of indexed chromosomes identifiers
+ */
+ public int size() {
+ return instance.size();
+ }
+
+ /**
+ * Returns the chromosome identifier with the specified index.
+ * @param index a chromosome identifier index.
+ * @return the specified chromosome identifier.
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public String id(int index) {
+ return instance.item(index);
+ }
+
+ /**
+ * Returns the list of chromosome identifiers as an array.
+ * The returned array will have length {@code this.size()}, and
+ * it will satisfy {@code this.ids()[k].equals(this.id(k)) == true}
+ * for {@code 0 <= k < this.size()}.
+ *
+ * @return an array of chromosome identifiers
+ */
+ public String[] ids() {
+ return instance.items().toArray(new String[0]);
+ }
+
+ /**
+ * Returns {@code java.util.Arrays.toString(this.ids())}.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ return Arrays.toString(this.ids());
+ }
+}
diff --git a/beagleutil/ChromInterval.java b/beagleutil/ChromInterval.java
new file mode 100644
index 0000000..0b1038c
--- /dev/null
+++ b/beagleutil/ChromInterval.java
@@ -0,0 +1,343 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package beagleutil;
+
+import blbutil.Const;
+import vcf.Marker;
+
+/**
+ * <p>Class {@code ChromInterval} represents a chromosome interval whose
+ * end points are genome coordinates.
+ * </p>
+ *
+ * Instances of class {@code ChromInterval} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+*/
+public final class ChromInterval implements IntInterval,
+ Comparable<ChromInterval> {
+
+ private final int chromIndex;
+ private final int start;
+ private final int end;
+
+ /**
+ * Constructs a new {@code ChromInterval} instance.
+ *
+ * @param start the first marker in the interval.
+ * @param end the last marker in the interval.
+ *
+ * @throws IllegalArgumentException if
+ * {@code start.chromIndex()!=end.chromIndex() ||
+ * start.pos()<0 || start.pos()>end.pos()}.
+ * @throws NullPointerException if
+ * {@code start==null || end==null}.
+ */
+ public ChromInterval(Marker start, Marker end) {
+ if (start.chromIndex() != end.chromIndex()) {
+ String s = "start.chromIndex() != end.chromIndex()";
+ throw new IllegalArgumentException(s);
+ }
+ if (start.pos() < 0 || start.pos() > end.pos()) {
+ String s = "start=" + start + " end=" + end;
+ throw new IllegalArgumentException(s);
+ }
+ this.chromIndex = start.chromIndex();
+ this.start = start.pos();
+ this.end = end.pos();
+ }
+
+ /**
+ * Constructs a new {@code ChromInterval} instance.
+ *
+ * @param chrom the chromosome,
+ * @param start the first genome coordinate in the interval.
+ * @param end the last genome coordinate in the interval.
+ *
+ * @throws IllegalArgumentException if
+ * {@code start<0 || start>end || chrom.isEmpty()}.
+ * @throws NullPointerException if {@code chrom==null}.
+ */
+ public ChromInterval(String chrom, int start, int end) {
+ if (start < 0 || start > end) {
+ String s = "start=" + start + " end=" + end;
+ throw new IllegalArgumentException(s);
+ }
+ this.chromIndex = ChromIds.instance().getIndex(chrom);
+ this.start = start;
+ this.end = end;
+ }
+
+ /**
+ * <p>Returns a {@code ChromInterval} instance corresponding to the
+ * specified string, or returns {@code null} if the specified
+ * string does not represent a valid chromosome interval or if the
+ * specified string is {@code null}.
+ * </p>
+ * The string representation of the chromosome interval must have one
+ * of the following forms:<br>
+ * <pre>
+ * [chrom]:[start]-[end]
+ * [chrom]
+ * [chrom]:
+ * [chrom]:[start]-
+ * [chrom]:-end
+ * </pre>
+ * where <br>
+ * <br>
+ * {@code [chrom]} is a chromosome identifier, and
+ * {@code [start]} and {@code [end]} are non-negative
+ * integers satisfying {@code [start]<=[end]}. If the specified
+ * string does not contain a start position, the {@code start()}
+ * method of the returned {@code ChromInterval} instance returns 0.
+ * If no end position is specified, the {@code end()} method of the
+ * returned {@code ChromInterval} instance returns
+ * {@code Integer.MAX_VALUE}.
+ *
+ * @param str a chromosome interval.
+ * @return a {@code ChromInterval} instance corresponding to the
+ * specified string, or returns {@code null} if the specified
+ * string does not represent a valid chromosome interval or if the
+ * specified string is {@code null}.
+ */
+ public static ChromInterval parse(String str) {
+ if (str==null) {
+ return null;
+ }
+ str = str.trim();
+ int length = str.length();
+ int start = 0;
+ int end = Integer.MAX_VALUE;
+ int chrDelim = str.lastIndexOf(Const.colon);
+ int posDelim = str.lastIndexOf(Const.hyphen);
+ if (length==0) {
+ return null;
+ }
+ else if (chrDelim == -1) {
+ return new ChromInterval(str, start, end);
+ }
+ else if (chrDelim == length -1) {
+ return new ChromInterval(str.substring(0, length-1), start, end);
+ }
+ else {
+ if ( (posDelim == -1) || (posDelim <= chrDelim)
+ || (chrDelim == length-2)
+ || (isValidPos(str, chrDelim+1, posDelim)==false)
+ || (isValidPos(str, posDelim+1, length)==false) ) {
+ return null;
+ }
+ if (posDelim > chrDelim + 1) {
+ start = Integer.parseInt(str.substring(chrDelim+1, posDelim));
+ }
+ if (length > posDelim + 1) {
+ end = Integer.parseInt(str.substring(posDelim+1, length));
+ }
+ if (start < 0 || start > end) {
+ return null;
+ }
+ }
+ return new ChromInterval(str.substring(0, chrDelim), start, end);
+ }
+
+ /* endIndex is exclusive */
+ private static boolean isValidPos(String s, int startIndex,
+ int endIndex) {
+ if (startIndex==endIndex) {
+ return true;
+ }
+ int length = endIndex - startIndex;
+ if ((length > 1) && s.charAt(startIndex)==0) {
+ return false;
+ }
+ for (int j=startIndex; j<endIndex; ++j) {
+ char c = s.charAt(j);
+ if (Character.isDigit(c)==false) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Returns the chromosome index. The chromosome index is equal to
+ * {@code ChromIds.indexOf(this.chrom())}.
+ * @return the chromosome index.
+ */
+ public int chromIndex() {
+ return chromIndex;
+ }
+
+ /**
+ * Returns the chromosome identifier.
+ * @return the chromosome identifier.
+ */
+ public String chrom() {
+ return ChromIds.instance().id(chromIndex);
+ }
+
+ /**
+ * Returns the first genome coordinate in this chromosome interval.
+ * @return the first genome coordinate in this chromosomet interval.
+ */
+ @Override
+ public int start() {
+ return start;
+ }
+
+ /**
+ * Returns the last genome coordinate in this chromosome interval.
+ * @return the last genome coordinate in this chromosome interval.
+ */
+ @Override
+ public int end() {
+ return end;
+ }
+
+ /**
+ * <p>Compares this {@code ChromInteval} with the specified
+ * {@code ChromInterval} instance for order, and
+ * returns -1, 0, or 1 depending on whether {@code this}
+ * is less than, equal or greater than the specified instance.
+ * </p>
+ * {@code ChromInterval} objects are ordered first by
+ * {@code this.chromIndex()}, then by
+ * {@code this.start()}, and finally by {@code this.end()}.
+ * All fields are ordered in ascending order.
+ * @param o the {@code ChromInterval} to be compared with {@code this}.
+ * @return -1, 0, or 1 depending on whether {@code this}
+ * is less than, equal or greater than the specified instance.
+ */
+ @Override
+ public int compareTo(ChromInterval o) {
+ if (this.chromIndex != o.chromIndex) {
+ return (this.chromIndex < o.chromIndex) ? -1 : 1;
+ }
+ if (this.start != o.start) {
+ return (this.start < o.start) ? -1 : 1;
+ }
+ if (this.end != o.end) {
+ return (this.end < o.end) ? -1 : 1;
+ }
+ return 0;
+ }
+
+ /**
+ * <p>Returns a hash code value for the object.
+ * </p>
+ * <p>The hash code is defined by the following calculation:
+ * </p>
+ * <pre>
+ int hash = 7;
+ hash = 67 * hash + this.chromIndex();
+ hash = 67 * hash + this.start();
+ hash = 67 * hash + this.end();
+ * </pre>
+ * @return a hash code value for the object.
+ */
+ @Override
+ public int hashCode() {
+ int hash = 7;
+ hash = 67 * hash + this.chromIndex;
+ hash = 67 * hash + this.start;
+ hash = 67 * hash + this.end;
+ return hash;
+ }
+
+ /**
+ * Returns {@code true} if the specified object is a
+ * {@code ChromInterval} instance representing the same
+ * interval of genome coordinates as {@code this}, and
+ * returns {@code false} otherwise.
+ *
+ * @param obj the object to be compared with {@code this} for
+ * equality.
+ * @return {@code true} if the specified object is a
+ * {@code ChromInterval} instance representing the same
+ * interval of genome coordinates as {@code this}, and
+ * returns {@code false} otherwise.
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final ChromInterval other = (ChromInterval) obj;
+ if (this.chromIndex != other.chromIndex) {
+ return false;
+ }
+ if (this.start != other.start) {
+ return false;
+ }
+ return this.end == other.end;
+ }
+
+ /**
+ * Returns the string:
+ * {@code this.chrom() + ":" + this.start() + "-" + this.end()}
+ * @return {@code this.chrom() + ":" + this.start() + "-" + this.end()}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(ChromIds.instance().id(chromIndex));
+ sb.append(Const.colon);
+ sb.append(start);
+ sb.append(Const.hyphen);
+ sb.append(end);
+ return sb.toString();
+ }
+
+ /**
+ * Returns {@code true} if the specified chromosome intervals
+ * have non-empty intersection and returns {@code false} otherwise.
+ * @param a a chromosome interval.
+ * @param b a chromosome interval.
+ * @return {@code true} if the specified chromosome intervals
+ * have non-empty intersection and returns {@code false} otherwise.
+ */
+ public static boolean overlap(ChromInterval a, ChromInterval b) {
+ if (a.chromIndex() != b.chromIndex()) {
+ return false;
+ }
+ else {
+ return (a.start() <= b.end()) && (b.start() <= a.end());
+ }
+ }
+
+ /**
+ * Returns the union of the specified overlapping chromosome intervals.
+ * @param a a chromosome interval.
+ * @param b a chromosome interval.
+ * @return the union of the specified overlapping chromosome intervals.
+ * @throws IllegalArgumentException if
+ * {@code ChromInterval.overlap(a, b)==false}.
+ */
+ public static ChromInterval merge(ChromInterval a, ChromInterval b) {
+ if (overlap(a, b)==false) {
+ String s = "non-overlappng intervals: " + a + " " + b;
+ throw new IllegalArgumentException(s);
+ }
+ int start = Math.min(a.start(), b.start());
+ int end = Math.max(a.end(), b.end());
+ return new ChromInterval(a.chrom(), start, end);
+ }
+}
diff --git a/beagleutil/IntInterval.java b/beagleutil/IntInterval.java
new file mode 100644
index 0000000..4e12819
--- /dev/null
+++ b/beagleutil/IntInterval.java
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package beagleutil;
+
+/**
+ * <p>Interface {@code IntInterval} represents an interval of
+ * consecutive integers.
+ * </p>
+ * Instances of class {@code IntInterval} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface IntInterval {
+
+ /**
+ * Returns the start of the interval (inclusive).
+ * @return the start of the interval (inclusive).
+ */
+ public int start();
+
+ /**
+ * Returns the end of the interval (inclusive).
+ * @return the end of the interval (inclusive).
+ */
+ public int end();
+}
diff --git a/beagleutil/IntIntervalTree.java b/beagleutil/IntIntervalTree.java
new file mode 100644
index 0000000..18cd24d
--- /dev/null
+++ b/beagleutil/IntIntervalTree.java
@@ -0,0 +1,145 @@
+/*
+ * Copyright 2014 Brian L. Browning
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package beagleutil;
+
+import java.util.Collection;
+
+/**
+ * Interface {@code IntIntervalTree} represents an interval
+ * tree whose elements are {@code IntInterval} objects.
+ *
+ * @param <E> the type of objected stored in {@code this}
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+
+ */
+public interface IntIntervalTree<E extends IntInterval> {
+
+ /**
+ * Returns the minimum start (inclusive) of an interval
+ * that can be stored in this interval tree.
+ * @return the minimum start (inclusive) of an interval
+ * that can be stored in this interval tree
+ */
+ public int start();
+
+ /**
+ * Returns the maximum end (inclusive) of an interval
+ * that can be stored in this interval tree.
+ * @return the maximum end (inclusive) of an interval
+ * that can be stored in this interval tree
+ */
+ public int end();
+
+ /**
+ * Removes all of the elements from this interval tree.
+ */
+ public void clear();
+
+ /**
+ * Adds the specified element to this interval tree, and returns
+ * {@code true} if the interval tree is changed as a result of
+ * the call. The method returns {@code false} if
+ * {@code this.contains(E) == true} when the method is invoked.
+ *
+ * @param element the element to be added
+ * @return {@code true} if the interval tree changed as
+ * a result of the call
+ *
+ * @throws IllegalArgumentException if
+ * {@code element.start() < this.start() || element.end() > this.end()}
+ * @throws NullPointerException if {@code element == null}
+ */
+ public boolean add(E element);
+
+ /**
+ * Returns {@code true} if the interval tree contains the specified
+ * element, and returns {@code false} otherwise.
+ * @param element the element whose presence in the interval tree
+ * is to be tested
+ * @return {@code true} if the interval tree contains the specified
+ * element
+ * @throws NullPointerException if {@code element == null}
+ */
+ public boolean contains(E element);
+
+ /**
+ * Removes the specified element from this interval tree if the
+ * specified element is found in the interval tree.
+ * @param element the element to be removed from this interval tree
+ * @return {@code true} if the interval tree is changed as
+ * a result of the call
+ * @throws NullPointerException if {@code element == null}
+ */
+ public boolean remove(E element);
+
+ /**
+ * Adds the elements in this interval tree that intersect the specified
+ * point to the specified collection.
+ *
+ * @param point a point
+ * @param collection a collection to which will be added the elements of
+ * this interval tree that intersect the specified point
+ *
+ * @throws NullPointerException if {@code collection == null}
+ */
+ public void intersect(int point, Collection<E> collection);
+
+ /**
+ * Adds the elements in this interval tree that intersect any part of
+ * the specified interval to the specified collection.
+ *
+ * @param start the start (inclusive) of the specified interval
+ * @param end the end (inclusive) of the specified interval
+ * @param collection a collection to which will be added the elements of
+ * this interval tree that intersect any part of the specified interval
+ *
+ * @throws NullPointerException if {@code collection == null}.
+ */
+ public void intersectPart(int start, int end, Collection<E> collection);
+
+ /**
+ * Adds the elements in this interval tree that contain
+ * the specified interval to the specified collection.
+ *
+ * @param start the start (inclusive) of the specified interval
+ * @param end the end (inclusive) of the specified interval
+ * @param collection a collection to which will be added the elements
+ * of this interval tree that contain the specified interval
+ *
+ * @throws NullPointerException if {@code collection == null}
+ */
+ public void intersectAll(int start, int end, Collection<E> collection);
+
+ /**
+ * Returns {@code true} if this interval tree contains no elements.
+ * @return {@code true} if this interval tree contains no elements
+ */
+ public boolean isEmpty();
+
+ /**
+ * Returns the number of elements in this interval tree.
+ *
+ * @return the number of elements in this interval tree
+ */
+ public int size();
+
+ /**
+ * Returns an array containing all of the elements of this interval tree.
+ * @return an array containing all of the elements of this interval tree
+ */
+ public E[] toArray();
+}
diff --git a/beagleutil/Phase.java b/beagleutil/Phase.java
new file mode 100644
index 0000000..a08e064
--- /dev/null
+++ b/beagleutil/Phase.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package beagleutil;
+
+/**
+ * <p>Class {@code Phase} represents the equivalence of two phased genotypes
+ * for a marker or for a set of markers. Genotype equivalence is defined
+ * in terms of allele equivalence. Two alleles are equivalent if
+ * either allele is missing or if both alleles are non-missing and equal.
+ * </p>
+ * <p>
+ * For the case of a single marker with phased (i.e. ordered) genotypes
+ * ({@code a1}, {@code a2})
+ * and ({@code b1}, {@code b2}), then
+ * <br>
+ * 1) the genotypes have IDENTICAL phase if a) alleles {@code a1}
+ * and {@code b1} are equivalent, b) alleles {@code a2} and
+ * {@code b2} are equivalent, and c) either alleles {@code a1}
+ * and {@code b2} are not equivalent or alleles
+ * {@code a2} and {@code b1} are not equivalent.
+ * <br>
+ * 2) the genotypes have OPPOSITE phase if a) alleles {@code a1}
+ * and {@code b2} are equivalent, b) alleles {@code a2} and
+ * {@code b1} are equivalent, and c) either alleles {@code a1}
+ * and {@code b1} are not equivalent or alleles {@code a2} and
+ * {@code b2} are not equivalent.
+ * <br>
+ * 3) the genotypes have UNKOWN phase if a) alleles {@code a1}
+ * and {@code b1} are equivalent, b) alleles {@code a2} and
+ * {@code b2} are equivalent, c) alleles {@code a1} and
+ * {@code b2} are equivalent, and d) alleles {@code a2} and
+ * {@code b1} are equivalent.
+ * <br>
+ * 4) the genotypes have INCONSISTENT phase if a) either alleles
+ * {@code a1} and {@code b1} are not equivalent or alleles
+ * {@code a2} and {@code b2} are not equivalent, and
+ * b) either alleles {@code a1} and {@code b2} are not equivalent
+ * or alleles {@code a2} and {@code b1} are not equivalent.
+ * </p>
+ * For the case of two sets of phased genotypes for the same markers,
+ * the two sets have
+ * <br>
+ * 1) IDENTICAL phase if the phase is IDENTICAL for at least
+ * one marker and is either IDENTICAL or UNKNOWN for all markers.
+ * <br>
+ * 2) OPPOSITE phase if the if the phase is OPPOSITE for at
+ * lease one marker and is either OPPOSITE or UNKNOWN for all markers.
+ * <br>
+ * 3) UNKNOWN phase if the phase is UNKNOWN for all markers.
+ * <br>
+ * 4) INCONSISTENT phase if a) the phase is INCONSISTENT for at least one
+ * marker or if b) the relative phase is IDENTICAL for at least one marker and
+ * OPPOSITE for at least one marker.
+ *
+ * @author Brian L. Browning
+ */
+public enum Phase {
+ IDENTICAL,
+ OPPOSITE,
+ UNKNOWN,
+ INCONSISTENT
+}
diff --git a/beagleutil/SampleIds.java b/beagleutil/SampleIds.java
new file mode 100644
index 0000000..b1783d5
--- /dev/null
+++ b/beagleutil/SampleIds.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package beagleutil;
+
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code SampleIds} is a singleton class that represents a
+ * list of sample identifiers.
+ * </p>
+ * The singleton instance of {@code SampleIds} is thread-safe.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class SampleIds {
+
+ private static final SampleIds sampleIds = new SampleIds();
+
+ private final ThreadSafeIndexer<String> instance;
+
+ private SampleIds() {
+ // private constructor to restrict instantiation.
+ int initCapacity = 5000;
+ this.instance = new ThreadSafeIndexer<>(initCapacity);
+ }
+
+ /**
+ * Returns the singleton {@code SampleIds} instance.
+ * @return the singleton {@code SampleIds} instance
+ */
+ public static SampleIds instance() {
+ return sampleIds;
+ }
+
+ /**
+ * Returns the index of the specified sample identifier. If
+ * the sample identifier is not yet indexed, the sample identifier
+ * will be indexed. Sample identifier indices are assigned in
+ * consecutive order beginning with 0.
+ * @param id a sample identifier
+ * @return the index of the specified sample identifier
+ * @throws IllegalArgumentException if {@code id.isEmpty()}
+ * @throws NullPointerException if {@code id == null}
+ */
+ public int getIndex(String id) {
+ if (id.isEmpty()) {
+ throw new IllegalArgumentException("id.isEmpty()");
+ }
+ return instance.getIndex(id);
+ }
+
+ /**
+ * Returns the index of the specified sampled identifier, or returns
+ * {@code -1} if the specified sample identifier is not indexed.
+ *
+ * @param id a sample identifiers
+ * @return the index of the specified sampled identifier, or
+ * {@code -1} if the specified sample identifier is not indexed
+ *
+ * @throws IllegalArgumentException if {@code id.isEmpty()}
+ * @throws NullPointerException if {@code id == null}
+ */
+ public int getIndexIfIndexed(String id) {
+ if (id.isEmpty()) {
+ throw new IllegalArgumentException("id.isEmpty()");
+ }
+ return instance.getIndexIfIndexed(id);
+ }
+
+ /**
+ * Returns the number of indexed sample identifiers.
+ * @return the number of indexed samples identifiers
+ */
+ public int size() {
+ return instance.size();
+ }
+
+ /**
+ * Returns the sample identifier with the specified index.
+ * @param index a sample identifier index
+ * @return the specified sample identifier
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public String id(int index) {
+ return instance.item(index);
+ }
+
+ /**
+ * Returns the list of indexed sample identifiers as an array.
+ * The returned array will have length {@code this.size()}, and
+ * it will satisfy
+ * {@code this.ids()[k].equals(this.id(k)) == true}
+ * for {@code 0 <= k && k < this.size()}.
+ *
+ * @return an array of sample identifiers
+ */
+ public String[] ids() {
+ return instance.items().toArray(new String[0]);
+ }
+
+ /**
+ * Returns {@code java.util.Arrays.toString(this.ids())}.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ return Arrays.toString(this.ids());
+ }
+}
diff --git a/beagleutil/Samples.java b/beagleutil/Samples.java
new file mode 100644
index 0000000..b463561
--- /dev/null
+++ b/beagleutil/Samples.java
@@ -0,0 +1,225 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package beagleutil;
+
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code Samples} stores a list of samples.
+ * </p>
+ * Instances of class {@code Samples} are immutable.
+ *
+ * @author Brian L. Browning
+ */
+public final class Samples {
+
+ private static final SampleIds sampleIds = SampleIds.instance();
+ private final int[] idIndexToIndex;
+ private final int[] indexToIdIndex;
+
+ /**
+ * Constructs a new instance of {@code Samples} corresponding to
+ * the specified list of sample identifier indices.
+ * @param idIndices an array of sample identifier indices
+ *
+ * @throws IllegalArgumentException if the specified array
+ * has two or more elements that are equal
+ * @throws IndexOutOfBoundsException if any element of the specified
+ * array is negative or greater than or equal to
+ * {@code beagleutil.SampleIds.size()}
+ * @throws NullPointerException if {@code idIndices == null}
+ */
+ public Samples(int[] idIndices) {
+ int[] copy = idIndices.clone();
+ this.idIndexToIndex = idIndexToIndex(copy);
+ this.indexToIdIndex = copy;
+ }
+
+ private static int[] idIndexToIndex(int[] idIndices) {
+ int[] idIndexToIndex = new int[sampleIds.size()];
+ Arrays.fill(idIndexToIndex, -1);
+ for (int j=0; j<idIndices.length; ++j) {
+ int idIndex = idIndices[j];
+ if (idIndexToIndex[idIndex] != -1) {
+ String s = "duplicate sample: " + sampleIds.id(idIndex) +
+ " (ID index: " + idIndex + ")";
+ throw new IllegalArgumentException(s);
+ }
+ else {
+ idIndexToIndex[idIndex] = j;
+ }
+ }
+ return idIndexToIndex;
+ }
+
+ /**
+ * Constructs and returns a {@code Samples} instance
+ * corresponding to the specified list of sample identifiers.
+ * @param ids an array of sample identifiers.
+ * @return a {@code Samples} instance corresponding to the specified
+ * list of sample identifiers
+ *
+ * @throws IllegalArgumentException if the specified array
+ * has two or more elements that are equal as strings
+ * @throws NullPointerException if {@code ids == null}
+ */
+ public static Samples fromIds(String[] ids) {
+ int[] indices = new int[ids.length];
+ for (int j=0; j<ids.length; ++j) {
+ indices[j] = sampleIds.getIndex(ids[j]);
+ }
+ return new Samples(indices);
+ }
+
+ /**
+ * <p>Returns a hash code value for the object.
+ * </p>
+ * <p>The hash code is defined by the following calculation:
+ * </p>
+ * <pre>
+ int hash = 1;
+ for (int j, n=this.nSamples(); j<n; ++j) {
+ hash = 31 * hash + this.idIndex(j);
+ }
+ * </pre>
+ * @return a hash code value for the object.
+ */
+ @Override
+ public int hashCode() {
+ return Arrays.hashCode(this.indexToIdIndex);
+ }
+
+ /**
+ * Returns {@code true} if the specified object is a
+ * {@code Samples} object which represents the same ordered
+ * list of samples as {@code this}, and returns {@code false}
+ * otherwise.
+ * @param obj the object to be tested for equality with {@code this}
+ * @return {@code true} if the specified object is a
+ * {@code Samples} object which represents the same ordered
+ * list of samples as {@code this}
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (this==obj) {
+ return true;
+ }
+ if (obj == null || this.getClass() != obj.getClass()) {
+ return false;
+ }
+ final Samples other = (Samples) obj;
+ return Arrays.equals(this.indexToIdIndex, other.indexToIdIndex);
+ }
+
+ /**
+ * Returns the sample identifier index corresponding to the sample
+ * with the specified index in this list of samples.
+ * @param index a sample index
+ * @return the sample identifier index corresponding to the sample
+ * with the specified index in this list of samples
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nSamples()}
+ */
+ public int idIndex(int index) {
+ return indexToIdIndex[index];
+ }
+
+ /**
+ * Returns the index of the sample that corresponds to the
+ * specified sample identifier index, or returns {@code -1}
+ * if there is no corresponding sample in this list of samples.
+ * @param idIndex a sample identifier index
+ * @return the index of the sample that corresponds to the
+ * specified sample identifier index, or returns {@code -1}
+ * if there is no corresponding sample in this list of samples
+ * @throws IndexOutOfBoundsException if {@code index < 0}
+ */
+ public int index(int idIndex) {
+ if (idIndex >= idIndexToIndex.length) {
+ return -1;
+ }
+ return idIndexToIndex[idIndex];
+ }
+
+ /**
+ * Returns the index of the sample that corresponds to the
+ * specified sample identifier, or returns {@code -1}
+ * if there is no corresponding sample in this list of samples.
+ * @param id a sample identifier
+ * @return the index of the sample that corresponds to the
+ * specified sample identifier, or returns {@code -1}
+ * if there is no corresponding sample in this list of samples
+ * @throws NullPointerException if {@code id == null}
+ */
+ public int index(String id) {
+ int idIndex = SampleIds.instance().getIndexIfIndexed(id);
+ if (idIndex != -1) {
+ return index(idIndex);
+ }
+ else {
+ return -1;
+ }
+ }
+
+ /**
+ * Returns the number of samples in this list.
+ * @return the number of samples in this list
+ */
+ public int nSamples() {
+ return indexToIdIndex.length;
+ }
+
+ /**
+ * Returns the identifier for the sample with the specified
+ * index in this list of samples.
+ * @param index a sample index
+ * @return the identifier for the sample with the specified
+ * index in this list of samples
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nSamples()}
+ */
+ public String id(int index) {
+ return sampleIds.id(indexToIdIndex[index]);
+ }
+
+ /**
+ * Returns this list of samples as an array of sample identifiers.
+ * The returned array has length {@code this.nSamples()}, and it
+ * satisfies {@code this.ids()[j].equals(this.id(j))} for
+ * {@code 0 <= j && j < this.nSamples()}
+ * @return this list of samples as an array of sample identifiers
+ */
+ public String[] ids() {
+ String[] ids = new String[indexToIdIndex.length];
+ for (int j=0; j<ids.length; ++j) {
+ ids[j] = sampleIds.id(indexToIdIndex[j]);
+ }
+ return ids;
+ }
+
+ /**
+ * Returns {@code java.util.Arrays.toString(this.ids())}.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ return Arrays.toString(ids());
+ }
+}
diff --git a/beagleutil/ThreadSafeIndexer.java b/beagleutil/ThreadSafeIndexer.java
new file mode 100644
index 0000000..4361073
--- /dev/null
+++ b/beagleutil/ThreadSafeIndexer.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package beagleutil;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * <p>Class {@code ThreadSafeIndexer} indexes objects.
+ * </p>
+ * Instances of class {@code ThreadSafeIndexer} are thread-safe.
+ *
+ * @param <T> the type parameter.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class ThreadSafeIndexer<T> {
+
+ /**
+ * The default initial capacity, which is 500.
+ */
+ public static final int DEFAULT_INIT_CAPACITY = 500;
+
+ private final List<T> list ;
+ private final Map<T, Integer> map;
+
+ /**
+ * Creates a new {@code ThreadSafeIndexer} instance with the default
+ * initial capacity.
+ *
+ * @see #DEFAULT_INIT_CAPACITY
+ */
+ public ThreadSafeIndexer() {
+ this(DEFAULT_INIT_CAPACITY);
+ }
+
+ /**
+ * Creates a new {@code ThreadSafeIndexer}instance with the specified
+ * initial capacity.
+ * @param initCapacity the initial capacity
+ * @throws IllegalArgumentException if {@code initCapacity < 1}
+ */
+ public ThreadSafeIndexer(int initCapacity) {
+ if (initCapacity < 1) {
+ throw new IllegalArgumentException(String.valueOf(initCapacity));
+ }
+ this.list = new ArrayList<>(initCapacity);
+ this.map = new HashMap<>(initCapacity);
+ }
+
+ /**
+ * Returns the index of the specified object. If the object
+ * is not yet indexed, the object will be indexed. Indices
+ * are assigned in consecutive order beginning with 0.
+ * @param object the object whose index will be retrieved
+ * @return the index of the specified object
+ * @throws NullPointerException if {@code object==null}
+ */
+ public synchronized int getIndex(T object) {
+ if (object==null) {
+ throw new NullPointerException();
+ }
+ if (map.keySet().contains(object)) {
+ return map.get(object);
+ }
+ else {
+ int idIndex = list.size();
+ list.add(object);
+ map.put(object, idIndex);
+ return idIndex;
+ }
+ }
+
+ /**
+ * Returns the index of the specified object, or returns
+ * {@code -1} if the specified object is not indexed.
+ *
+ * @param object an object
+ * @return the index of the specified object, or
+ * {@code -1} if the specified object is not indexed
+ *
+ * @throws NullPointerException if {@code object == null}.
+ */
+ public synchronized int getIndexIfIndexed(T object) {
+ if (object==null) {
+ throw new NullPointerException();
+ }
+ if (map.keySet().contains(object)) {
+ return map.get(object);
+ }
+ else {
+ return -1;
+ }
+ }
+
+ /**
+ * Returns the number of indexed objects.
+ * @return the number of indexed objects
+ */
+ public synchronized int size() {
+ return list.size();
+ }
+
+ /**
+ * Returns the object with the specified index.
+ * @param index an object index
+ * @return the object with the specified index
+ * @throws IndexOutOfBoundsException if
+ * {@code index<0 || index>=this.size()}
+ */
+ public synchronized T item(int index) {
+ return list.get(index);
+ }
+
+ /**
+ * Returns an listed of indexed objects. The returned list will
+ * have size {@code this.size()}, and it will satisfy
+ * {@code this.items().get(k).equals(this.item(k))==true}
+ * for {@code 0 <= k && k < this.size()}
+ *
+ * @return an array of objects
+ */
+ public synchronized List<T> items() {
+ return new ArrayList<>(list);
+ }
+
+ /**
+ * Returns {@code this.items().toString()}.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public synchronized String toString() {
+ return this.items().toString();
+ }
+}
diff --git a/blbutil/ByteIndexArray.java b/blbutil/ByteIndexArray.java
new file mode 100644
index 0000000..dd17933
--- /dev/null
+++ b/blbutil/ByteIndexArray.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+/**
+ * <p>Class {@code ByteIndexArray} represents an immutable
+ * {@code int[]} array that is stored as a {@code byte[]} array.
+ * </p>
+ * Instances of {@code ByteIndexArray} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class ByteIndexArray implements IntArray {
+
+ private final byte[] ba;
+
+ /**
+ * Constructs a new {@code ByteIndexArray} instance.
+ * @param ia an array of integers
+ * @throws IllegalArgumentException if
+ * {@code ia[j] < 0 || ia[j] > 127} for any index {@code j}
+ * satisfying {@code j >= 0 && j < ia.length}
+ * @throws NullPointerException if {@code ia == null}
+ */
+ public ByteIndexArray(int[] ia) {
+ this(ia, 0, ia.length);
+ }
+
+ /**
+ * Constructs a new {@code ByteIndexArray} instance from the
+ * specified subarray.
+ * @param ia an array of integers
+ * @param start the first element to be included (inclusive)
+ * @param end the last element to be included (exclusive)
+ * @throws IllegalArgumentException if
+ * {@code ia[j] < 0 || ia[j] > 127} for any index {@code j}
+ * satisfying {@code j >= start && j < end}
+ * @throws IndexOutOfBoundsException if {@code start < 0 or end > ia.length}
+ * @throws IllegalArgumentException if {@code end > start}
+ * @throws NullPointerException if {@code ia == null}
+ */
+ public ByteIndexArray(int[] ia, int start, int end) {
+ if (start > end) {
+ throw new IllegalArgumentException("start > end");
+ }
+ this.ba = new byte[end - start];
+ for (int j=start; j<end; ++j) {
+ if (ia[j] < 0 || ia[j] > 127) {
+ throw new IllegalArgumentException(String.valueOf(ia[j]));
+ }
+ ba[j - start] = (byte) ia[j];
+ }
+ }
+
+ @Override
+ public int size() {
+ return ba.length;
+ }
+
+ @Override
+ public int get(int index) {
+ return ba[index];
+ }
+
+ @Override
+ public String toString() {
+ return this.asString();
+ }
+}
diff --git a/blbutil/CharIndexArray.java b/blbutil/CharIndexArray.java
new file mode 100644
index 0000000..916c676
--- /dev/null
+++ b/blbutil/CharIndexArray.java
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+/**
+ * <p>Class {@code CharIndexArray} represents an immutable
+ * {@code int[]} array that is stored as a {@code char[]} array.
+ * </p>
+ * Instances of {@code CharIndexArray} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class CharIndexArray implements IntArray {
+
+ private final char[] ca;
+
+ /**
+ * Constructs a new {@code CharIndexArray} instance.
+ * @param ia an array of integers
+ * @throws IllegalArgumentException if
+ * {@code ia[j] < 0 || ia[j] > 65535} for any index {@code j}
+ * satisfying {@code j >= 0 && j < ia.length}
+ * @throws NullPointerException if {@code ia == null}
+ */
+ public CharIndexArray(int[] ia) {
+ this(ia, 0, ia.length);
+ }
+
+ /**
+ * Constructs a new {@code CharIndexArray} instance from the
+ * specified subarray.
+ * @param ia an array of integers
+ * @param start the first element to be included (inclusive)
+ * @param end the last element to be included (exclusive)
+ * @throws IllegalArgumentException if
+ * {@code ia[j] < 0 || ia[j] > 65535} for any index {@code j}
+ * satisfying {@code j >= start && j < end}
+ * @throws IndexOutOfBoundsException if {@code start < 0 or end > ia.length}
+ * @throws IllegalArgumentException if {@code end > start}
+ * @throws NullPointerException if {@code ia == null}
+ */
+ public CharIndexArray(int[] ia, int start, int end) {
+ if (start > end) {
+ throw new IllegalArgumentException("start > end");
+ }
+ this.ca = new char[end - start];
+ for (int j=start; j<end; ++j) {
+ if (ia[j] < 0 || ia[j] > 65535) {
+ throw new IllegalArgumentException(String.valueOf(ia[j]));
+ }
+ ca[j - start] = (char) ia[j];
+ }
+ }
+
+ @Override
+ public int size() {
+ return ca.length;
+ }
+
+ @Override
+ public int get(int index) {
+ return ca[index];
+ }
+
+ @Override
+ public String toString() {
+ return this.asString();
+ }
+}
diff --git a/blbutil/Const.java b/blbutil/Const.java
new file mode 100644
index 0000000..a7f4ccf
--- /dev/null
+++ b/blbutil/Const.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+/**
+ * Class {@code Const} provides public static final fields with
+ * string and character constants.
+ *
+ * @author Brian L Browning
+ */
+public class Const {
+
+ private Const() {
+ // private constructor to prevent instantiation.
+ }
+
+ /**
+ * The system-dependent string representing a new line-line:
+ * {@code System.getProperty("line.separator")}
+ */
+ public static final String nl = System.getProperty("line.separator");
+
+ /**
+ * The VCF missing-data symbol as a string: {@code "."}
+ */
+ public static final String MISSING_DATA_STRING = ".";
+
+ /**
+ * The VCF missing-data symbol as a character: {@code '.'}
+ */
+ public static final char MISSING_DATA_CHAR = '.';
+
+ /**
+ * The colon character: {@code ':'}
+ */
+ public static final char colon = ':';
+
+ /**
+ * The hyphen character: {@code '-'}
+ */
+ public static final char hyphen = '-';
+
+ /**
+ * The tab character: {@code '\t'}
+ */
+ public static final char tab = '\t';
+
+ /**
+ * The semicolon character: {@code ';'}
+ */
+ public static final char semicolon = ';';
+
+ /**
+ * The comma character: {@code ','}
+ */
+ public static final char comma = ',';
+
+ /**
+ * The phased allele separator: {@code '|'}
+ */
+ public static final char phasedSep = '|';
+
+ /**
+ * The unphased allele separator: {@code '/'}
+ */
+ public static final char unphasedSep = '/';
+
+ /**
+ * The value 1,000,000,000
+ */
+ public static final int giga = 1000000000;
+
+ /**
+ * The value 1,000,000
+ */
+ public static final int mega = 1000000;
+}
diff --git a/blbutil/FileIt.java b/blbutil/FileIt.java
new file mode 100644
index 0000000..d9c147c
--- /dev/null
+++ b/blbutil/FileIt.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.io.Closeable;
+import java.io.File;
+
+/**
+ * <p>An iterator for data elements in a file. If an IOExceptions is thrown while
+ * reading a file, the IOException is trapped, an appropriate error message
+ * is written to standard out, and the Java Virtual Machine is
+ * terminated. The {@code Iterator.remove()} method is unsupported
+ * and throws an {@code UnsupportedOperationException}.
+ * </p>
+ * When the {@code FileIterator} object is no longer needed,
+ * the {@code close()} method should be invoked to release any
+ * system resources controlled by the object. After calling {@code close()},
+ * invoking {@code hasNext()} returns {@code false}, and invoking
+ * {@code next()} will throw a {@code NoSuchElementException}.
+ *
+ * @param <E> the type of the elements returned by this iterator's
+ * {@code next()} method.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface FileIt<E> extends java.util.Iterator<E>, Closeable {
+
+ /**
+ * Returns the file from which the data are read, or
+ * {@code null} if the data are read from standard input or are
+ * computed data.
+ * @return the file from which the data are read, or
+ * {@code null} if the data are read from standard input or are
+ * computed data
+ */
+ File file();
+
+ /**
+ * Terminates the iteration and releases any system resources that
+ * are held by this object. After invoking {@code close}, further
+ * invocations of {@code close()} have no effect.
+ */
+ @Override
+ public void close();
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString();
+}
diff --git a/blbutil/FileUtil.java b/blbutil/FileUtil.java
new file mode 100644
index 0000000..8457dfc
--- /dev/null
+++ b/blbutil/FileUtil.java
@@ -0,0 +1,235 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedOutputStream;
+import java.io.BufferedWriter;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.util.zip.GZIPOutputStream;
+import net.sf.samtools.util.BlockCompressedOutputStream;
+
+/**
+ * Class {@code FileUtil} contains static methods for working with files.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class FileUtil {
+
+ private FileUtil() {
+ // private constructor prevents instantiation
+ }
+
+ /**
+ * Returns a buffered {@code java.io.DataInputStream} reading from the
+ * specified file. If the input stream cannot be opened, an error message
+ * will be printed and the java interpreter will exit.
+ * @param file a file
+ * @return a buffered {@code java.io.DataInputStream} reading from the
+ * specified file
+ * @throws NullPointerException if {@code file == null}
+ */
+ public static DataInputStream dataInputStream(File file) {
+ DataInputStream dis = null;
+ try {
+ dis = new DataInputStream(new BufferedInputStream(
+ new FileInputStream(file)));
+ } catch (FileNotFoundException e) {
+ Utilities.exit("Error opening " + file, e);
+ }
+ return dis;
+ }
+
+ /**
+ * Returns a buffered {@code java.io.DataOutputStream} writing to
+ * the specified file. Any existing file corresponding to the
+ * {@code File} object will be deleted. If the file cannot be opened,
+ * an error message will be printed and the java interpreter will exit.
+ * @param file a file
+ * @return a buffered {@code java.io.DataOutputStream} writing to
+ * the specified file
+ * @throws NullPointerException if {@code file == null}
+ */
+ public static DataOutputStream dataOutputStream(File file) {
+ OutputStream dos = null;
+ try {
+ dos = new FileOutputStream(file);
+ } catch (FileNotFoundException e) {
+ Utilities.exit("Error opening " + file, e);
+ }
+ DataOutputStream out = new DataOutputStream(
+ new BufferedOutputStream(dos));
+ return out;
+ }
+
+ /**
+ * Returns a {@code java.io.PrintWriter} that writes
+ * to standard out.
+ *
+ * @return a {@code java.io.PrintWriter} that writes
+ * to standard out
+ */
+ public static PrintWriter stdOutPrintWriter() {
+ return new PrintWriter(
+ new BufferedOutputStream(System.out));
+ }
+
+ /**
+ * Returns a buffered {@code java.io.PrintWriter} writing to
+ * the specified file. The resulting file will be compressed using
+ * the GZIP compression algorithm. Any existing file corresponding
+ * to the specified file will be deleted. If the file
+ * cannot be opened, an error message will be printed and the
+ * java interpreter will exit.
+ * @param file a file
+ * @return a {@code java.io.PrintWriter} writing to the specified file
+ * @throws NullPointerException if {@code file == null}
+ */
+ public static PrintWriter gzipPrintWriter(File file) {
+ PrintWriter out = null;
+ try {
+ out = new PrintWriter(
+ new GZIPOutputStream(new FileOutputStream(file)));
+ } catch (IOException e) {
+ Utilities.exit("Error opening " + file, e);
+ }
+ return out;
+ }
+
+ /**
+ * Returns a buffered {@code java.io.PrintWriter} writing to
+ * the specified file. The resulting file will be compressed using
+ * the BGZIP compression algorithm. Any existing file corresponding
+ * to the specified file will be deleted. If the file
+ * cannot be opened, an error message will be printed and the
+ * java interpreter will exit.
+ *
+ * @param file a file
+ * @return a buffered {@code java.io.PrintWriter} writing to
+ * the specified file
+ * @throws NullPointerException if {@code file == null}
+ */
+ public static PrintWriter bgzipPrintWriter(File file) {
+ PrintWriter out = null;
+ try {
+ OutputStream fout = new FileOutputStream(file);
+ out = new PrintWriter(new BlockCompressedOutputStream(
+ new BufferedOutputStream(fout), file));
+ } catch (FileNotFoundException e) {
+ Utilities.exit("Error opening " + file, e);
+ }
+ return out;
+ }
+
+ /**
+ * Returns a buffered {@code java.io.PrintWriter} writing to
+ * the specified file. Any existing file corresponding
+ * to the specified filename will be deleted. If the file
+ * cannot be opened, an error message will be printed and the
+ * java interpreter will exit.
+ * @param file a file
+ * @return a buffered {@code java.io.PrintWriter} writing to
+ * the specified file
+ * @throws NullPointerException if {@code file == null}
+ */
+ public static PrintWriter printWriter(File file) {
+ return printWriter(file, false);
+ }
+
+ /**
+ * Returns a buffered {@code java.io.PrintWriter} writing to
+ * the specified file. If {@code append == false}
+ * any existing file corresponding to the specified file will be deleted.
+ * If the file cannot be opened, an error message will be printed and the
+ * java interpreter will exit.
+ *
+ * @param file a file
+ * @param append {@code true} if the data will be appended
+ * to the end of any existing file
+ * @return a buffered {@code java.io.PrintWriter} writing to
+ * the specified file
+ * @throws NullPointerException if {@code file == null}
+ */
+ public static PrintWriter printWriter(File file, boolean append) {
+ PrintWriter out = null;
+ try {
+ out = new PrintWriter(
+ new BufferedWriter(new FileWriter(file, append)));
+ } catch (IOException e) {
+ Utilities.exit("Error opening " + file, e);
+ }
+ return out;
+ }
+
+ /**
+ * Returns a non-buffered {@code java.io.PrintWriter} writing to
+ * the specified file.
+ * If {@code append == false} any existing file corresponding
+ * to the specified file will be deleted. If the file cannot be opened,
+ * an error message will be printed and the java interpreter will exit.
+ *
+ * @param file a file
+ * @param append {@code true} if the data will be appended
+ * to the end of any existing file
+ * @return a non-buffered {@code java.io.PrintWriter} writing to
+ * the specified file
+ * @throws NullPointerException if {@code file == null}
+ */
+ public static PrintWriter nonBufferedPrintWriter(File file, boolean append) {
+ boolean autoflush = true;
+ PrintWriter pw = null;
+ try {
+ pw = new PrintWriter(new FileWriter(file, append), autoflush);
+ } catch (IOException e) {
+ Utilities.exit("Error opening " + file, e);
+ }
+ return pw;
+ }
+
+ /**
+ * Returns a temporary {@code File} that will be deleted when
+ * the Java virtual machine exits.
+ *
+ * @param prefix the filename prefix.
+ *
+ * @return a {@code File} a new empty file.
+ *
+ * @throws IllegalArgumentException if {@code prefix} contains fewer than
+ * three characters
+ */
+ public static File tempFile(String prefix) {
+ File tempFile = null;
+ try {
+ tempFile = File.createTempFile(prefix, null);
+ tempFile.deleteOnExit();
+ } catch (IOException e) {
+ Utilities.exit("Exception thrown by createTempFile: ", e);
+ }
+ return tempFile;
+ }
+}
diff --git a/blbutil/Filter.java b/blbutil/Filter.java
new file mode 100644
index 0000000..4949bf0
--- /dev/null
+++ b/blbutil/Filter.java
@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * <p>A filter for accepting or rejecting objects.
+ * </p>
+ * Instances of class {@code Filter} are required to be immutable.
+ *
+ * @param <E> the type of object that is filtered.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface Filter<E> {
+
+ /**
+ * Returns a filter that accepts all non-null objects.
+ * @param <E> the type of object that is filtered
+ * @return a filter that accepts all non-null objects
+ */
+ static <E> Filter<E> acceptAllFilter() {
+ return (E e) -> {
+ if (e == null) {
+ throw new NullPointerException("e==null");
+ }
+ return true;
+ };
+ }
+
+ /**
+ * Returns a filter that accepts all non-null objects that are
+ * contained in the specified collection.
+ * @param <E> the type of object that is filtered
+ * @param include the collection of objects that will be accepted by
+ * the filter
+ * @return a filter that accepts all non-null objects that are
+ * contained in the specified collection
+ * @throws NullPointerException if {@code include == null}
+ */
+ static <E> Filter<E> includeFilter(Collection<E> include) {
+ final Set<E> includeSet = new HashSet<>(include);
+ return (E e) -> {
+ if (e == null) {
+ throw new NullPointerException("e==null");
+ }
+ return includeSet.contains(e);
+ };
+ }
+
+ /**
+ * Returns a filter that accepts all non-null objects that are not
+ * contained in the specified collection.
+ * @param <E> the type of object that is filtered
+ * @param exclude the collection of objects that will be rejected
+ * by the filter
+ * @return a filter that accepts all non-null objects that are not
+ * contained in the specified collection
+ * @throws NullPointerException if {@code exclude == null}
+ */
+ static <E> Filter<E> excludeFilter(Collection<E> exclude) {
+ final Set<E> includeSet = new HashSet<>(exclude);
+ return (E e) -> {
+ if (e == null) {
+ throw new NullPointerException("e==null");
+ }
+ return !includeSet.contains(e);
+ };
+ }
+
+ /**
+ * Returns {@code true} if the specified object is
+ * accepted and returns {@code false} if the specified object
+ * is rejected.
+ * @param e the object to be filtered
+ * @return {@code true} if the specified object is
+ * accepted
+ * @throws NullPointerException if {@code e==null}
+ */
+ boolean accept(E e);
+}
diff --git a/blbutil/IndexMap.java b/blbutil/IndexMap.java
new file mode 100644
index 0000000..b15ea4a
--- /dev/null
+++ b/blbutil/IndexMap.java
@@ -0,0 +1,195 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code IndexMap} is a map whose keys are a bounded set of
+ * non-negative integers and whose values are integers.
+ * </p>
+ * <p>Class {@code IndexMap} supports a {@code clear()} method, but it does not
+ * support a {@code remove()} method.
+ * </p>
+ * <p>Class {@code IndexMap} is not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class IndexMap {
+
+ private final int nil;
+ private final int[] values;
+ private final int[] keys;
+ private int size = 0;
+
+ /**
+ * Creates a new instance of {@code IndexMap} whose {@code nil()} method
+ * will return the specified {@code nil} value.
+ * @param maxKey the maximum key
+ * @param nil the value that will be returned by the instance's
+ * {@code get()} method if a key has no assigned value
+ * @throws IllegalArgumentException if {@code maxKey < 0}
+ */
+ public IndexMap(int maxKey, int nil) {
+ if (maxKey < 0) {
+ throw new IllegalArgumentException(String.valueOf(maxKey));
+ }
+ this.nil = nil;
+ this.values = new int[maxKey+1];
+ this.keys = new int[maxKey+1];
+ Arrays.fill(values, nil);
+ }
+
+ /**
+ * Returns the value that is returned by {@code this.get()} if
+ * a key has no assigned value.
+ * @return the value that is returned by {@code this.get()} if
+ * a key has no assigned value
+ */
+ public int nil() {
+ return nil;
+ }
+
+ /**
+ * Adds the specified key and value to the map. If the map
+ * contains a value for the specified key when the method is invoked,
+ * the old value is replaced by the specified value.
+ *
+ * @param key the key
+ * @param value the value
+ * @return the previous value associated with {@code key}, or
+ * {@code this.nil()} if no such previous value exists
+ *
+ * @throws IllegalArgumentException if {@code value == this.nil()}
+ * @throws IndexOutOfBoundsException if
+ * {@code key < 0 || key > this.maxKey()}
+ */
+ public int put(int key, int value) {
+ if (value==nil) {
+ throw new IllegalArgumentException("value==nil()");
+ }
+ int prevValue = values[key];
+ if (prevValue == nil) {
+ keys[size++] = key;
+ }
+ this.values[key] = value;
+ return prevValue;
+ }
+
+ /**
+ * Returns the value associated with the specified key, or
+ * {@code this.nil()} if the specified key is not contained in the map.
+ * @param key the key
+ * @return the value associated with the specified key, or
+ * {@code this.nil()} if the specified key is not contained in the map.
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code key < 0 || key > this.maxKey()}
+ */
+ public int get(int key) {
+ return values[key];
+ }
+
+ /**
+ * Returns the number of key-value pairs in the map.
+ *
+ * @return the number of key-value pairs in the map
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * Returns the maximum key.
+ *
+ * @return the maximum key
+ */
+ public int maxKey() {
+ return keys.length-1;
+ }
+
+ /**
+ * Removes all key-value pairs from the map.
+ */
+ public void clear() {
+ for (int j=0, n=size; j<n; ++j) {
+ values[keys[j]] = nil;
+ }
+ size = 0;
+ }
+
+ /**
+ * Returns the specified key in an enumeration of the keys in the map.
+ * @param index an index of an element in the enumeration
+ * @return the specified key in an enumeration of the keys-value
+ * pairs in the map
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int enumeratedKey(int index) {
+ if (index>=size) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ return keys[index];
+ }
+
+ /**
+ * Returns the value associated with the specified key
+ * in an enumeration of the keys in the map.
+ * If {@code (index >= 0 && index < this.size())}, then the returned value
+ * will satisfy:
+ * {@code this.get(this.enumeratedKey(index)==this.enumeratedValue(index)}.
+ * @param index an index of an element in the enumeration
+ * @return the value associated with the specified key
+ * in an enumeration of the keys in the map
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int enumeratedValue(int index) {
+ if (index>=size) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ return values[keys[index]];
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ *
+ * @return a string representation of {@code this}.
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append("size=");
+ sb.append(size);
+ sb.append(" {");
+ for (int j=0; j<size; ++j) {
+ sb.append(enumeratedKey(j));
+ sb.append(" : ");
+ sb.append(enumeratedValue(j));
+ if (j+1 < size) {
+ sb.append(Const.comma);
+ }
+ }
+ sb.append("}");
+ return sb.toString();
+ }
+}
diff --git a/blbutil/IndexSet.java b/blbutil/IndexSet.java
new file mode 100644
index 0000000..f2076e8
--- /dev/null
+++ b/blbutil/IndexSet.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code IndexSet} is a set that stores non-negative indices that are
+ * less than or equal to a specified maximum value.
+ * </p>
+ * <p>Class {@code IndexSet} supports a {@code clear()} method, but it does not
+ * support a {@code remove()} method.
+ * </p>
+ * <p>Class {@code IndexSet} is not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class IndexSet {
+
+ private final boolean[] inSet;
+ private final int[] indices;
+ private int size = 0;
+
+ /**
+ * Creates a new instance of {@code IndexSet} that can contain
+ * non-negative integer indices that are less than or equal to the specified
+ * maximum value.
+ *
+ * @param max the maximum element that is permitted in the set.
+ * @throws IllegalArgumentException if {@code max < 0}
+ */
+ public IndexSet(int max) {
+ if (max < 0) {
+ throw new IllegalArgumentException(String.valueOf(max));
+ }
+ this.inSet = new boolean[max+1];
+ this.indices = new int[max+1];
+ }
+
+ /**
+ * Adds the specified element to the set.
+ *
+ * @param element an element to add to this set.
+ * @return {@code true} if the set was changed by the operation, and
+ * {@code false} otherwise.
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index > this.maxPermittedIndex()}
+ */
+ public boolean add(int element) {
+ if (inSet[element]==false) {
+ indices[size++] = element;
+ inSet[element]=true;
+ return true;
+ }
+ else {
+ return false;
+ }
+ }
+
+ /**
+ * Returns {@code true} if the set contains the specified element,
+ * and returns {@code false} otherwise.
+ * @param element an element
+ * @return {@code true} if the set contains the specified element
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index > this.maxPermittedIndex()}
+ */
+ public boolean contains(int element) {
+ return inSet[element];
+ }
+
+ /**
+ * Returns the number of elements in this set.
+ *
+ * @return the number of elements in this set
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * Returns the maximum permitted element in the set.
+ *
+ * @return the maximum permitted element in the set
+ */
+ public int maxPermittedElement() {
+ return indices.length-1;
+ }
+
+ /**
+ * Removes all elements from the set.
+ */
+ public void clear() {
+ for (int j=0, n=size; j<n; ++j) {
+ inSet[indices[j]] = false;
+ }
+ size = 0;
+ }
+
+ /**
+ * Returns the specified element in an enumeration of the elements in the
+ * set.
+ * @param enumIndex an index of an element in the enumeration
+ * @return the specified element in an enumeration of the elements in the
+ * set
+ * @throws IndexOutOfBoundsException if
+ * {@code enumIndex < 0 || enumIndex >= this.size()}
+ */
+ public int enumeratedValue(int enumIndex) {
+ if (enumIndex>=size) {
+ throw new IndexOutOfBoundsException(String.valueOf(enumIndex));
+ }
+ return indices[enumIndex];
+ }
+
+ /**
+ * Returns an array containing the elements in this set.
+ * @return an array containing the elements in this set
+ */
+ public int[] toArray() {
+ return Arrays.copyOf(indices, size);
+ }
+
+ /**
+ * Returns {@code java.util.Arrays.toString(this.toArray())}.
+ *
+ * @return {@code java.util.Arrays.toString(this.toArray())}
+ */
+ @Override
+ public String toString() {
+ return Arrays.toString(toArray());
+ }
+}
diff --git a/blbutil/InputIt.java b/blbutil/InputIt.java
new file mode 100644
index 0000000..ae2b633
--- /dev/null
+++ b/blbutil/InputIt.java
@@ -0,0 +1,320 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.util.NoSuchElementException;
+import java.util.zip.GZIPInputStream;
+import net.sf.samtools.util.BlockCompressedInputStream;
+
+/**
+ * <p>Class {@code InputIt} is a buffered iterator whose {@code next()}
+ * method returns lines of a text input stream.
+ * </p>
+ * <p>If an {@code IOException} is thrown when an {@code InputIt}
+ * instance reads from the text input stream, the {@code IOException}
+ * is trapped, an error message is written to standard out, and the
+ * Java Virtual Machine is terminated.
+ * </p>
+ * Instances of class {@code InputIt} are not thread-safe.
+ *
+ * @see #DEFAULT_BUFFER_SIZE
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class InputIt implements FileIt<String> {
+
+ /**
+ * The default buffer size, which is 4,194,304 bytes.
+ */
+ public static final int DEFAULT_BUFFER_SIZE = 1<<22;
+
+ private final File file;
+ private final BufferedReader in;
+ private String next = null;
+
+ /**
+ * Constructs a new {@code InputStreamIterator} with default buffer
+ * size that will iterate through lines of the specified input stream.
+ *
+ * @param is input stream of text data
+ *
+ * @see #DEFAULT_BUFFER_SIZE
+ */
+ private InputIt(InputStream is, File file) {
+ this(is, file, DEFAULT_BUFFER_SIZE);
+ }
+
+ /**
+ * Constructs a new {@code InputStreamIterator} with default buffer size
+ * that will iterate through the lines of the specified input stream.
+ *
+ * @param is input stream of text data
+ * @param bufferSize the buffer size in bytes
+ *
+ * @throws IllegalArgumentException if {@code bufferSize < 0}
+ */
+ private InputIt(InputStream is, File file, int bufferSize) {
+ BufferedReader br = null;
+ try {
+ InputStreamReader isr = new InputStreamReader(is);
+ br = new BufferedReader(isr, bufferSize);
+ next = br.readLine();
+ }
+ catch(IOException e) {
+ Utilities.exit("Error reading " + is, e);
+ }
+ this.in = br;
+ this.file = file;
+ }
+
+ @Override
+ public File file() {
+ return file;
+ }
+
+ /**
+ * Returns {@code true} if the iteration has more elements.
+ * @return {@code true} if the iteration has more elements
+ */
+ @Override
+ public boolean hasNext() {
+ return (next != null);
+ }
+
+ /**
+ * Returns the next element in the iteration.
+ * @return the next element in the iteration
+ * @throws NoSuchElementException if the iteration has no more elements
+ */
+ @Override
+ public String next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException();
+ }
+ String current = next;
+ try {
+ next = in.readLine();
+ }
+ catch (IOException e) {
+ Utilities.exit("Error reading " + in, e);
+ }
+ return current;
+ }
+
+ /**
+ * The {@code remove} method is not supported by this iterator.
+ * @throws UnsupportedOperationException if this method is invoked
+ */
+ @Override
+ public void remove() {
+ String s = this.getClass().toString() + ".remove()";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public void close() {
+ try {
+ in.close();
+ }
+ catch (IOException e) {
+ Utilities.exit("Error closing " + in, e);
+ }
+ next=null;
+ }
+
+ /**
+ * Returns a string representation of this iterator. The exact details
+ * of the representation are unspecified and subject to change.
+ * @return a string representation of this iterator
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(200);
+ sb.append("[file= ");
+ sb.append(file);
+ sb.append("; next=\"");
+ sb.append(next);
+ sb.append("\"]");
+ return sb.toString();
+ }
+
+ /**
+ * Constructs and returns an {@code InputIt} instance with the default
+ * buffer size that iterates through lines of text read from standard input.
+ *
+ * @return a new {@code InputIt} instance that iterates
+ * through lines of text read from standard input
+ */
+ public static InputIt fromStdIn() {
+ File file = null;
+ return new InputIt(System.in, file);
+ }
+
+ /**
+ * Constructs and returns an {@code InputIt} instance with the specified
+ * buffer size that iterates through lines of text read from standard input.
+ *
+ * @param bufferSize the buffer size in bytes
+ *
+ * @return a new {@code InputIt} instance that iterates
+ * through lines of text read from standard input
+ *
+ * @throws IllegalArgumentException if {@code bufferSize < 0}
+ */
+ public static InputIt fromStdIn(int bufferSize) {
+ File file = null;
+ return new InputIt(System.in, file, bufferSize);
+ }
+
+ /**
+ * Constructs and returns an {@code InputIt} instance with the default
+ * buffer size that iterates through lines of the specified compressed
+ * or uncompressed text file. If the filename ends in ".gz", the file
+ * must be either BGZIP-compressed or GZIP-compressed.
+ *
+ * @param file a compressed or uncompressed text file
+ * @return a new {@code InputIt} instance that iterates
+ * through lines of the specified text file
+ *
+ * @throws NullPointerException if {@code file == null}
+ */
+ public static InputIt fromGzipFile(File file) {
+ try {
+ InputStream is = new FileInputStream(file);
+ if (file.getName().endsWith(".gz")) {
+ if (isBGZipFile(file)) {
+ return new InputIt(
+ new BlockCompressedInputStream(is), file);
+ }
+ else {
+ return new InputIt(new GZIPInputStream(is), file);
+ }
+ }
+ else {
+ return new InputIt(is, file);
+ }
+ }
+ catch(FileNotFoundException e) {
+ Utilities.exit("Error opening " + file, e);
+ }
+ catch(IOException e) {
+ Utilities.exit("Error reading " + file, e);
+ }
+ assert false;
+ return null;
+ }
+
+ /**
+ * Constructs and returns an {@code InputIt} instance with the specified
+ * buffer size that iterates through lines of the specified compressed
+ * or uncompressed text file. If the filename ends in ".gz", the file must
+ * be either BGZIP-compressed or GZIP-compressed.
+ *
+ * @param file a compressed or uncompressed text file
+ * @param bufferSize the buffer size in bytes
+ * @return a new {@code InputIt} instance that iterates
+ * through lines of the specified text file
+ *
+ * @throws IllegalArgumentException if {@code bufferSize < 0}
+ * @throws NullPointerException if {@code file == null}
+ */
+ public static InputIt fromGzipFile(File file, int bufferSize) {
+ try {
+ InputStream is = new FileInputStream(file);
+ if (file.getName().endsWith(".gz")) {
+ if (isBGZipFile(file)) {
+ return new InputIt(
+ new BlockCompressedInputStream(is), file, bufferSize);
+ }
+ else {
+ return new InputIt(new GZIPInputStream(is), file, bufferSize);
+ }
+ }
+ else {
+ return new InputIt(is, file);
+ }
+ }
+ catch(FileNotFoundException e) {
+ Utilities.exit("Error opening " + file, e);
+ }
+ catch(IOException e) {
+ Utilities.exit("Error reading " + file, e);
+ }
+ assert false;
+ return null;
+ }
+
+ private static boolean isBGZipFile(File file) throws IOException {
+ try (InputStream is=new BufferedInputStream(new FileInputStream(file))) {
+ return BlockCompressedInputStream.isValidFile(is);
+ }
+ }
+
+ /**
+ * Constructs and returns an {@code InputIt} instance with the default
+ * buffer size that iterates through lines of the specified text file.
+ *
+ * @param file a text file
+ * @return a new {@code InputIt} instance that iterates through
+ * lines of the specified text file
+ *
+ * @throws NullPointerException if {@code filename == null}
+ */
+ public static InputIt fromTextFile(File file) {
+ try {
+ return new InputIt(new FileInputStream(file), file);
+ }
+ catch(FileNotFoundException e) {
+ Utilities.exit("Error opening " + file, e);
+ }
+ assert false;
+ return null;
+ }
+
+ /**
+ * Constructs and returns an {@code InputIt} instance with the specified
+ * buffer size that iterates through lines of the specified text file.
+ *
+ * @param file a text file
+ * @param bufferSize the buffer size in bytes
+ * @return a new {@code InputIt} instance that iterates through
+ * lines of the specified text file
+ *
+ * @throws IllegalArgumentException if {@code bufferSize < 0}
+ * @throws NullPointerException if {@code filename == null}
+ */
+ public static InputIt fromTextFile(File file, int bufferSize) {
+ try {
+ return new InputIt(new FileInputStream(file), file, bufferSize);
+ }
+ catch(FileNotFoundException e) {
+ Utilities.exit("Error opening " + file, e);
+ }
+ assert false;
+ return null;
+ }
+}
diff --git a/blbutil/IntArray.java b/blbutil/IntArray.java
new file mode 100644
index 0000000..21de1a5
--- /dev/null
+++ b/blbutil/IntArray.java
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.util.Arrays;
+
+/**
+ * <p>Interface {@code IntArray} represents an immutable {@code int[]} array.
+ * </p>
+ * Instances of class {@code IntArray} are required to be immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface IntArray {
+
+ /**
+ * Returns the number of elements.
+ * @return the number of elements.
+ */
+ int size();
+
+ /**
+ * Returns the specified array element.
+ * @param index an array index
+ * @return the specified array element
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ int get(int index);
+
+ /**
+ * Returns a string representation of this {@code IntArray} by applying
+ * {@code java.utils.Arrays.toString()} to an equivalent {@code int[]}
+ * object.
+ *
+ * @return a string representation of this {@code IntArray}
+ */
+ @Override
+ String toString();
+
+ /**
+ * Returns a string representation of this {@code IntArray} by applying
+ * {@code java.utils.Arrays.toString()} to an equivalent {@code int[]}
+ * object.
+ *
+ * @return a string representation of this {@code IntArray}.
+ */
+ default String asString() {
+ int[] ia = new int[size()];
+ for (int j=0; j<ia.length; ++j) {
+ ia[j] = get(j);
+ }
+ return Arrays.toString(ia);
+ }
+
+ /**
+ * Returns a new {@code IntArray} instance that has the same
+ * sequence of nonnegative integers as the specified array.
+ * @param ia the array of non-negative integers to be copied
+ * @param min the minimum element in the specified array
+ * @param max the maximum element in the specified array
+ * @return a new {@code IntArray} instance that has
+ * the same sequence of integers as the specified array
+ * @throws IllegalArgumentException if {@code minElement > maxElement}
+ * @throws IllegalArgumentException if an out-of-range
+ * element is detected
+ * @throws NullPointerException if {@code ia == null}
+ */
+ static IntArray create(int[] ia, int min, int max) {
+ if (min > max) {
+ throw new IllegalArgumentException("min > max");
+ }
+ if (min >= 0) {
+ if (max < 128) {
+ return new ByteIndexArray(ia);
+ }
+ if (max < 256) {
+ return new ShiftedByteIndexArray(ia);
+ }
+ else if (max < 65536) {
+ return new CharIndexArray(ia);
+ }
+ }
+ return new WrappedIntArray(ia);
+ }
+}
diff --git a/blbutil/IntList.java b/blbutil/IntList.java
new file mode 100644
index 0000000..bdbeaff
--- /dev/null
+++ b/blbutil/IntList.java
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code IntList} represents a list of integers.
+ * Class {@code IntList} supports a {@code clear()} method, but it does not
+ * support a {@code remove()} method.
+ * </p>
+ * Class {@code IntList} is not thread-safe.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class IntList {
+
+ /**
+ * The default initial capacity of an {@code IntList}, which is 10.
+ */
+ public static final int DEFAULT_INIT_CAPACITY = 10;
+
+ private int size;
+ private int[] values;
+
+ /**
+ * Constructs an {@code IntList} object with the default
+ * initial capacity.
+ *
+ * @see #DEFAULT_INIT_CAPACITY
+ */
+ public IntList() {
+ this(DEFAULT_INIT_CAPACITY);
+ }
+
+ /**
+ * Constructs an {@code IntList} object with the specified
+ * initial capacity.
+ *
+ * @param initCapacity the initial capacity of this list
+ * @throws IllegalArgumentException if {@code initCapacity < 0}
+ */
+ public IntList(int initCapacity) {
+ if (initCapacity < 0) {
+ throw new IllegalArgumentException(String.valueOf(initCapacity));
+ }
+ this.size = 0;
+ this.values = new int[initCapacity];
+ }
+
+ /**
+ * Returns the element at the specified position in this list.
+ * @param index the index of the element to be returned
+ * @return the element at the specified position in this list
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int get(int index) {
+ if (index < 0 && index >= size) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ return values[index];
+ }
+
+ /**
+ * Replaces the element at the specified position in this list with the
+ * specified element.
+ * @param index the index of the element to be replaced
+ * @param element the element to be stored at the specified position
+ * in this list
+ * @return the previous element at the specified position in this list
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int set(int index, int element) {
+ if (index < 0 && index >= size) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ int value = values[index];
+ values[index] = element;
+ return value;
+ }
+
+ /**
+ * Increments by one the element at the specified position in this list.
+ * @param index the index of the element to be incremented
+ * @return the previous element at the specified position in this list
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int getAndIncrement(int index) {
+ if (index < 0 && index >= size) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ return values[index]++;
+ }
+
+ /**
+ * Decrements by one the element at the specified position in this list.
+ * @param index the index of the element to be decremented
+ * @return the previous element at the specified position in this list
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int getAndDecrement(int index) {
+ if (index < 0 && index >= size) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ return values[index]--;
+ }
+
+ /**
+ * Increments by one the element at the specified position in this list.
+ * @param index the index of the element to be incremented
+ * @return the updated element at the specified position in this list
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int incrementAndGet(int index) {
+ if (index < 0 && index >= size) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ return ++values[index];
+ }
+
+ /**
+ * Decrements by one the element at the specified position in this list.
+ * @param index the index of the element to be decremented
+ * @return the updated element at the specified position in this list
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int decrementAndGet(int index) {
+ if (index < 0 && index >= size) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ return --values[index];
+ }
+
+ /**
+ * Returns the number of elements in this list.
+ * @return the number of elements in this list
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * Returns {@code true} if this list has no elements, and returns
+ * {@code false} otherwise.
+ * @return {@code true} if this list has no elements
+ */
+ public boolean isEmpty() {
+ return size==0;
+ }
+
+ /**
+ * Returns an integer array containing the sequence of elements in this
+ * list.
+ * @return an integer array containing the sequence of elements in this
+ * list
+ */
+ public int[] toArray() {
+ return Arrays.copyOf(values, size);
+ }
+
+ /**
+ * Adds the specified integer to the end of this list.
+ *
+ * @param element the integer to be added to the end of this list
+ */
+ public void add(int element) {
+ if (size==values.length) {
+ int newCapacity = (values.length * 3)/2 + 1;
+ this.values = Arrays.copyOf(this.values, newCapacity);
+ }
+ this.values[size++] = element;
+ }
+
+ /**
+ * Removes all elements from this list.
+ */
+ public void clear() {
+ this.size = 0;
+ }
+
+ /**
+ * Returns {@code java.util.Arrays.toString(this.toArray())}
+ *
+ * @return {@code java.util.Arrays.toString(this.toArray())}
+ */
+ @Override
+ public String toString() {
+ return Arrays.toString(toArray());
+ }
+}
diff --git a/blbutil/IntPair.java b/blbutil/IntPair.java
new file mode 100644
index 0000000..c64ecf5
--- /dev/null
+++ b/blbutil/IntPair.java
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+/**
+ * <p>Class {@code IntPair} represents an ordered pair of integers.
+ * </p>
+ * Instances of class {@code IntPair} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class IntPair implements Comparable<IntPair> {
+
+ private final int first;
+ private final int second;
+
+ /**
+ * Constructs an {@code IntPair} instance that represents the
+ * specified ordered pair of integers.
+ * @param first the first element of the ordered pair of integers.
+ * @param second the second element of the ordered pair of integers.
+ */
+ public IntPair(int first, int second) {
+ this.first = first;
+ this.second = second;
+ }
+
+ /**
+ * Returns the first integer in the ordered pair of integers.
+ * @return the first integer in the ordered pair of integers.
+ */
+ public int first() {
+ return first;
+ }
+
+ /**
+ * Returns the second integer in the ordered pair of integers.
+ * @return the second integer in the ordered pair of integers.
+ */
+ public int second() {
+ return second;
+ }
+
+ /**
+ * Compares the specified object with this {@code IntPair} for
+ * equality. Returns {@code true} if the specified object
+ * is an {@code IntPair} that represents the same ordered
+ * pair of integers as {@code this}, and returns {@code false}
+ * otherwise.
+ * @param obj the object to be compared for equality with this
+ * {@code IntPair}.
+ * @return {@code true} if the specified object is an {@code IntPair}
+ * that represents the same ordered pair of integers as {@code this},
+ * and returns {@code false} otherwise.
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == this) {
+ return true;
+ }
+ if (!(obj instanceof IntPair)) {
+ return false;
+ }
+ IntPair other = (IntPair) obj;
+ return (this.first==other.first) && (this.second==other.second);
+ }
+
+ /**
+ * Returns a hash code value for the object.
+ *
+ * <p>The hash code is defined by the following calculation:
+ * </p>
+ * <pre>
+ int hash = 5;
+ hash = 29 * hash + this.first;
+ hash = 29 * hash + this.second;
+ * </pre>
+ * @return a hash code value for the object.
+ */
+ @Override
+ public int hashCode() {
+ int hash = 5;
+ hash = 29 * hash + this.first;
+ hash = 29 * hash + this.second;
+ return hash;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The string
+ * representation is {@code "[i1, i2]"} where
+ * {@code i1} and {@code i2} are the first and second integers
+ * in the ordered pair of integers represented by {@code this}.
+ * @return a string representation of {@code this}.
+ */
+ @Override
+ public String toString() {
+ return "[" + first + ", " + second + "]";
+ }
+
+ /**
+ * Returns -1, 0, or 1 depending on whether {@code this} is
+ * less than, equal, or greater than the specified {@code IntPair}
+ * object. {@code IntPair} instances are ordered using
+ * lexicographical order.
+ * @param other an {@code IntPair} instance to be compared to
+ * {@code this}.
+ * @return -1, 0, or 1 depending on whether {@code this} is
+ * less than, equal, or greater than the specified {@code IntPair}
+ * object.
+ */
+ @Override
+ public int compareTo(IntPair other) {
+ if (this.first < other.first) {
+ return -1;
+ }
+ if (this.first > other.first) {
+ return 1;
+ }
+ if (this.second < other.second) {
+ return -1;
+ }
+ if (this.second > other.second) {
+ return 1;
+ }
+ return 0;
+ }
+}
diff --git a/blbutil/IntSet.java b/blbutil/IntSet.java
new file mode 100644
index 0000000..e46f7d0
--- /dev/null
+++ b/blbutil/IntSet.java
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2016 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code IntSet} represents an indexed set of integers.
+ * </p>
+ * <p>Class {@code IntSet} is not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class IntSet {
+
+ private static final int NIL = -1;
+ private static final float loadFactor = 0.75f;
+
+ private int size;
+ private int nBuckets;
+
+ private int[] next;
+ private int[] data; // stores list index of element
+ private int[] list;
+ private int firstFreeIndex;
+
+ /**
+ * Creates a new {@code IntSet} instance.
+ *
+ * @param capacity the initial capacity of the set
+ * @throws IllegalArgumentException if
+ * {@code capacity < 0 || (capacity > (1 << 30))}
+ */
+ public IntSet(int capacity) {
+ if (capacity < 0 || capacity > (1<<30)) {
+ throw new IllegalArgumentException(String.valueOf(capacity));
+ }
+ int numBuckets = (int) Math.ceil(capacity/loadFactor);
+ allocateArrays(capacity, numBuckets);
+ initializeFields(capacity);
+ }
+
+ private void allocateArrays(int capacity, int buckets) {
+ this.next = new int[buckets + capacity];
+ this.data = new int[buckets + capacity];
+ this.list = new int[capacity];
+ }
+
+ private void initializeFields(int numBuckets) {
+ size = 0;
+ nBuckets = numBuckets;
+ firstFreeIndex = nBuckets;
+ Arrays.fill(next, 0, nBuckets, NIL);
+ for (int j=nBuckets; j<next.length; ++j) {
+ next[j] = j+1;
+ }
+ }
+
+ /*
+ * Increases the capacity of the internal hash table.
+ */
+ private void rehash(int newCapacity) {
+ if (newCapacity > size) {
+ int oldSize = size;
+ int[] oldList = list;
+ int newNumBuckets = (int) Math.ceil(newCapacity/loadFactor);
+ allocateArrays(newCapacity, newNumBuckets);
+ initializeFields(newNumBuckets);
+ for (int j=0; j<oldSize; ++j) {
+ add(oldList[j]);
+ }
+ }
+ }
+
+ /**
+ * Returns {@code true} if the set contains the specified element,
+ * and returns {@code false} otherwise.
+ * @param element an nonnegative integer
+ * @return {@code true} if the set contains the specified element
+ */
+ public boolean contains(int element) {
+ int index = next[bucket(element)];
+ while (index!=NIL && list[data[index]]<element) {
+ index = next[index];
+ }
+ return (index!=NIL && list[data[index]]==element);
+ }
+
+ /**
+ * Adds the specified element to this set. The indexing of set elements
+ * immediately before and after this command is invoked may differ if
+ * the set is changed by the operation.
+ * @param element an integer to add to this set
+ * @return {@code true} if the set was changed by the operation, and
+ * {@code false} otherwise
+ */
+ public boolean add(int element) {
+ int prevIndex = prevIndex(element);
+ int nextIndex = next[prevIndex];
+ if (nextIndex==NIL || list[data[nextIndex]]!=element) {
+ int index = firstFreeIndex;
+ firstFreeIndex = next[firstFreeIndex];
+ next[prevIndex] = index;
+ data[index] = size;
+ next[index] = nextIndex;
+ list[size++] = element;
+ if (size == list.length) {
+ int newCapacity = 3*list.length/2 + 1;
+ rehash(newCapacity);
+ }
+ return true;
+ }
+ else {
+ return false;
+ }
+ }
+
+ /**
+ * Removes the specified element from this set. The indexing of set elements
+ * immediately before and after this command is invoked may differ if
+ * the set is changed by the operation.
+ *
+ * @param element an integer to remove this set
+ * @return {@code true} if the set was changed by the operation, and
+ * {@code false} otherwise
+ */
+ public boolean remove(int element) {
+ int prevIndex = prevIndex(element);
+ int index = next[prevIndex];
+ if (index==NIL || list[data[index]]!=element) {
+ return false;
+ }
+ else {
+ int oldListIndex = data[index];
+ next[prevIndex] = next[index];
+ next[index] = firstFreeIndex;
+ firstFreeIndex = index;
+
+ --size;
+ if (oldListIndex!=size) {
+ index = index(list[size]);
+ data[index] = oldListIndex;
+ list[oldListIndex] = list[size]; // overwrite removed element
+ }
+ return true;
+ }
+ }
+
+ private int bucket(int element) {
+ return Math.abs((71*element) % nBuckets);
+ }
+
+ private int prevIndex(int element) {
+ int prevIndex = bucket(element);
+ int index = next[prevIndex];
+ while (index!=NIL && list[data[index]]<element) {
+ prevIndex = index;
+ index = next[index];
+ }
+ return prevIndex;
+ }
+
+ private int index(int element) {
+ int index = next[bucket(element)];
+ while (index!=NIL && list[data[index]]<element) {
+ index = next[index];
+ }
+ return (index!=NIL && list[data[index]]==element) ? index : NIL;
+ }
+
+ /**
+ * Returns the specified element.
+ * @param index an index of an element in this set
+ * @return the specified element
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int elementWithIndex(int index) {
+ if (index>=size) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ return list[index];
+ }
+
+ /**
+ * Removes all elements from this set.
+ */
+ public void clear() {
+ initializeFields(nBuckets);
+ }
+
+ /**
+ * Returns the number of elements in this set.
+ *
+ * @return the number of elements in this set
+ */
+ public int size() {
+ return size;
+ }
+
+ /**
+ * Returns the capacity of this set. The capacity of this set
+ * is the maximum number of elements that may be stored without
+ * allocating more memory.
+ *
+ * @return the capacity of this set
+ */
+ public int capacity() {
+ return list.length;
+ }
+
+ /**
+ * Sets the capacity of this list to the specified value. The capacity
+ * of this set is the maximum number of elements that may be stored
+ * without allocating more memory.
+ * @param capacity the desired capacity
+ * @throws IllegalArgumentException if {@code capacity < this.size()}
+ */
+ public void setCapacity(int capacity) {
+ if (capacity < size) {
+ throw new IllegalArgumentException(String.valueOf(capacity));
+ }
+ if (capacity != list.length) {
+ rehash(capacity);
+ }
+ }
+
+ /**
+ * Returns an array containing the elements in this set. The returned
+ * array will satisfy:
+ * {@code this.toArray()[j]==this.elementWithIndex(j)} for each
+ * {@code j} satisfying {@code 0 < j && j < this.size()}
+ * @return an array containing the elements in this set
+ */
+ public int[] toArray() {
+ return Arrays.copyOf(list, size);
+ }
+
+ /**
+ * Returns {@code java.util.Arrays.toString(this.toArray())}.
+ *
+ * @return {@code java.util.Arrays.toString(this.toArray())}
+ */
+ @Override
+ public String toString() {
+ return Arrays.toString(toArray());
+ }
+}
diff --git a/blbutil/SampleFileIt.java b/blbutil/SampleFileIt.java
new file mode 100644
index 0000000..ea900d6
--- /dev/null
+++ b/blbutil/SampleFileIt.java
@@ -0,0 +1,41 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import beagleutil.Samples;
+
+/**
+ * <p>An iterator for records in a file. Each records contains
+ * data for the same set of samples.
+ *</p>
+ * Instances of class {@code SampleFileIt} are not thread-safe.
+ *
+ * @param <E> the type of the elements returned by this iterator's
+ * {@code next()} method.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface SampleFileIt<E> extends FileIt<E> {
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ Samples samples();
+}
diff --git a/blbutil/ShiftedByteIndexArray.java b/blbutil/ShiftedByteIndexArray.java
new file mode 100644
index 0000000..e8626c6
--- /dev/null
+++ b/blbutil/ShiftedByteIndexArray.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+/**
+ * <p>Class {@code ShiftedByteIndexArray} represents an immutable
+ * {@code int[]} array that is stored as a {@code byte[]} array.
+ * </p>
+ * Instances of {@code ShiftedByteIndexArray} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class ShiftedByteIndexArray implements IntArray {
+
+ private static final int shift = 128;
+ private final byte[] ba;
+
+ /**
+ * Constructs a new {@code ShiftedByteIndexArray} instance.
+ * @param ia an array of integers
+ * @throws IllegalArgumentException if
+ * {@code ia[j] < 0 || ia[j] > 255} for any index {@code j}
+ * satisfying {@code j >= 0 && j < ia.length}
+ * @throws NullPointerException if {@code ia == null}
+ */
+ public ShiftedByteIndexArray(int[] ia) {
+ this(ia, 0, ia.length);
+ }
+
+ /**
+ * Constructs a new {@code ShiftedByteIndexArray} instance from the
+ * specified subarray.
+ * @param ia an array of integers
+ * @param start the first element to be included (inclusive)
+ * @param end the last element to be included (exclusive)
+ * @throws IllegalArgumentException if
+ * {@code ia[j] < 0 || ia[j] > 255} for any index {@code j}
+ * satisfying {@code j >= start && j < end}
+ * @throws IndexOutOfBoundsException if {@code start < 0 or end > ia.length}
+ * @throws IllegalArgumentException if {@code end > start}
+ * @throws NullPointerException if {@code ia == null}
+ */
+ public ShiftedByteIndexArray(int[] ia, int start, int end) {
+ if (start > end) {
+ throw new IllegalArgumentException("start > end");
+ }
+ this.ba = new byte[end - start];
+ for (int j=start; j<end; ++j) {
+ if (ia[j] < 0 || ia[j] > 255) {
+ throw new IllegalArgumentException(String.valueOf(ia[j]));
+ }
+ ba[j - start] = (byte) (ia[j] - shift);
+ }
+ }
+
+ @Override
+ public int size() {
+ return ba.length;
+ }
+
+ @Override
+ public int get(int index) {
+ return ba[index] + shift;
+ }
+
+ @Override
+ public String toString() {
+ return this.asString();
+ }
+}
diff --git a/blbutil/StringUtil.java b/blbutil/StringUtil.java
new file mode 100644
index 0000000..09632bd
--- /dev/null
+++ b/blbutil/StringUtil.java
@@ -0,0 +1,281 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+/**
+ * Class {@code StringUtil} is a utility class with static methods
+ * for counting and returning delimited fields in a string.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class StringUtil {
+
+ /* Private constructor to prevent instantiation */
+ private StringUtil() {
+ }
+
+ /**
+ * Returns the number of delimited fields in the specified
+ * string. Returns 0 if the specified string has length 0.
+ *
+ * @param s a string
+ * @param delimiter a delimiter character
+ * @return the number of delimited fields in the specified string
+ * @throws NullPointerException if {@code s == null}
+ */
+ public static int countFields(String s, char delimiter) {
+ int cnt = 0;
+ for (int j=0, n=s.length(); j<n; ++j) {
+ if (s.charAt(j)==delimiter) {
+ ++cnt;
+ }
+ }
+ return cnt + 1;
+ }
+
+ /**
+ * Returns {@code Math.min(countFields(s, delimiter), max)}.
+ *
+ * @param s a string with 0 or more {@code delimiter} characters
+ * @param delimiter the delimiter character
+ * @param max the maximum value that can be returned
+ *
+ * @return {@code Math.min(countFields(s, delimiter), max)}
+ *
+ * @throws NullPointerException if {@code s == null}
+ */
+ public static int countFields(String s, char delimiter, int max) {
+ int cnt = 0;
+ int maxCnt = max - 1;
+ for (int j=0, n=s.length(); j<n && cnt<maxCnt; ++j) {
+ if (s.charAt(j)==delimiter) {
+ ++cnt;
+ }
+ }
+ return Math.min(cnt + 1, max);
+ }
+
+ /**
+ * Returns an array obtained by splitting the specified string
+ * around the specified delimiter.
+ * The array returned by this method contains each substring of
+ * the string that does not contain the delimiter and that
+ * is preceded by the delimiter or the beginning of
+ * the string and that is terminated by the delimiter or the end
+ * of the string. The substrings in the array are in
+ * the order in which they occur in the specified string.
+ * If there are no delimiters in the specified string then the method
+ * return an array of length one, whose single element is the specified
+ * string.
+ *
+ * @param s a string
+ * @param delimiter a delimiter character
+ *
+ * @return the array of strings obtained by splitting the specified string
+ * around the specified delimiter
+ *
+ * @throws NullPointerException if {@code s == null}
+ */
+ public static String[] getFields(String s, char delimiter) {
+ String[] fields = new String[countFields(s, delimiter)];
+ int start = 0;
+ for (int j=0; j<fields.length; ++j) {
+ int end = s.indexOf(delimiter, start);
+ fields[j] = end>=0 ? s.substring(start,end) : s.substring(start);
+ start = end + 1;
+ }
+ return fields;
+ }
+
+ /**
+ * Returns an array obtained by splitting the specified string
+ * around the first {@code (limit - 1)} occurrences of the specified
+ * delimiter. If the string contains fewer than {@code (limit - 1)}
+ * delimiter characters, the returned value will equal
+ * {@code StringUtil.getFields(s, delimiter)}
+ *
+ * @param s a string
+ * @param delimiter a delimiter character
+ * @param limit the maximum length of the returned array
+ *
+ * @return an array obtained by splitting the specified string
+ * around the specified delimiter
+ *
+ * @throws NullPointerException if {@code s == null}
+ * @throws IllegalArgumentException if {@code limit < 2 }
+ */
+ public static String[] getFields(String s, char delimiter, int limit) {
+ if (limit < 2) {
+ throw new IllegalArgumentException("limit: " + limit);
+ }
+ String[] fields = new String[countFields(s, delimiter, limit)];
+ if (fields.length > 0) {
+ int start = 0;
+ for (int j=0, n=fields.length-1; j<n; ++j) {
+ int end = s.indexOf(delimiter, start);
+ fields[j] = s.substring(start, end);
+ start = end + 1;
+ }
+ fields[fields.length - 1] = s.substring(start);
+ }
+ return fields;
+ }
+
+ /**
+ * Returns the number of white-space delimited fields in the specified
+ * string. A field is a maximal set of consecutive characters that are not
+ * white space characters. White space is defined as any unicode
+ * characters less than or equal to '\u0020'.
+ *
+ * @param s a string
+ * @return the number of white-space delimited fields in the specified
+ * string
+ * @throws NullPointerException if {@code s == null}
+ */
+ public static int countFields(String s) {
+ int start = 0;
+ int end = s.length();
+ while (start<end && s.charAt(start)<=' ') {
+ ++start;
+ }
+ while (end>start && s.charAt(end-1)<=' ') {
+ --end;
+ }
+ int fieldCount = (start<end) ? 1 : 0;
+ while (++start<end && s.charAt(start)>' ') {
+ }
+ while (start<end) {
+ while (s.charAt(++start)<=' ') {
+ }
+ ++fieldCount;
+ while (++start<end && s.charAt(start)>' ') {
+ }
+ }
+ return fieldCount;
+ }
+
+ /**
+ * Returns an array obtained by trimming white-space from the
+ * beginning and end of the specified string, and splitting the resulting
+ * string around white space.
+ * White space is any maximal substring of unicode characters
+ * less than or equal to '\u0020'. White-space at the beginning and
+ * end of the string is ignored. The substrings in the returned array
+ * are in the order in which they occur in this string. If there is no
+ * white-space in the specified string, the method returns an array
+ * of length one whose single element is the trimmed string. If the
+ * specified string contains only white-space a string array
+ * of length 0 is returned.
+ *
+ * @param s a string
+ * @return the array of strings obtained by splitting the specified string
+ * around white space
+ *
+ * @throws NullPointerException if {@code s == null}
+ */
+ public static String[] getFields(String s) {
+ s = s.trim();
+ int n = s.length();
+ String[] fields = new String[countFields(s)];
+ if (fields.length > 0) {
+ int index = 0;
+ int start = 0;
+ int j = -1;
+ while (++j<n && s.charAt(j)>' ') {
+ }
+ fields[index++] = s.substring(start, j);
+ while (j<n) {
+ while (s.charAt(++j)<=' ') {
+ }
+ start = j;
+ while (++j<n && s.charAt(j)>' ') {
+ }
+ fields[index++] = s.substring(start, j);
+ }
+ assert index==fields.length;
+ }
+ return fields;
+ }
+
+ /**
+ * <p>Returns an array obtained by trimming white-space from the
+ * beginning and end of the specified string, and splitting the resulting
+ * string around the first {@code (limit-1)} white-space delimiters.
+ * A white-space delimiter is any maximal substring of unicode characters
+ * less than or equal to '\u0020'. If the trimemed string contains
+ * fewer than {@code (limit - 1)} white space delimiters, the returned value
+ * will equal {@code StringUtil.getFields(s)}. The substrings in the
+ * returned array are in the order in which they occur in this string.
+ * If there are no white-space delimiters in the specified string, the
+ * method returns an array of length one whose single element is the
+ * trimmed string. If the specified string contains only white-space,
+ * a string array of length 0 is returned.
+ *</p>
+ *
+ * @param s a string
+ * @param limit the maximum length of the returned array
+ *
+ * @return the array of strings obtained by splitting the specified string
+ * around white space
+ *
+ * @throws NullPointerException if {@code s == null}
+ * @throws IllegalArgumentException if {@code limit < 2}
+ */
+ public static String[] getFields(String s, int limit) {
+ if (limit<2) {
+ throw new IllegalArgumentException("limit: " + limit);
+ }
+ s = s.trim();
+ int n = s.length();
+ int j=-1;
+ while (++j<n && s.charAt(j)>' ') {
+ }
+ int fieldCount = (j>0) ? 1 : 0;
+ while (j<n && fieldCount<limit) {
+ while (s.charAt(++j)<=' ') {
+ }
+ ++fieldCount;
+ while (++j<n && s.charAt(j)>' ') {
+ }
+ }
+ String[] fields = new String[fieldCount];
+ if (fields.length>0) {
+ int index = 0;
+ int start = 0;
+ j = -1;
+ while (++j<n && s.charAt(j)>' ') {
+ }
+ fields[index++] = s.substring(start, j);
+ while (j<n && index<limit) {
+ while (s.charAt(++j)<=' ') {
+ }
+ start = j;
+ while (++j<n && s.charAt(j)>' ') {
+ }
+ if (index < limit-1) {
+ fields[index++] = s.substring(start, j);
+ }
+ else {
+ fields[index++] = s.substring(start);
+ }
+ }
+ }
+ return fields;
+ }
+}
diff --git a/blbutil/Utilities.java b/blbutil/Utilities.java
new file mode 100644
index 0000000..bee3ff3
--- /dev/null
+++ b/blbutil/Utilities.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.io.File;
+import java.io.PrintWriter;
+import java.text.SimpleDateFormat;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Class {@code Utilities} contains miscellaneous static utility methods.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class Utilities {
+
+ private Utilities() {
+ // private constructor to prevent instantiation
+ }
+
+ /**
+ * Prints a summary of memory use at the time of method invocation
+ * to standard output.
+ * @param msg a string a message to be printed with the summary
+ * of memory use
+ */
+ public static void printMemoryUse(String msg) {
+ long Mb = 1024*1024;
+ Runtime rt = Runtime.getRuntime();
+ System.out.println(Const.nl + msg
+ + Const.tab + "maxMb=" + (rt.maxMemory()/Mb)
+ + Const.tab + "totalMb=" + (rt.totalMemory()/Mb)
+ + Const.tab + "freeMb=" + (rt.freeMemory()/Mb)
+ + Const.tab + "usedMb=" + ((rt.totalMemory() - rt.freeMemory())/Mb));
+ }
+
+ /**
+ * Returns the current local time as a string. The
+ * exact details of the string representation
+ * are unspecified and subject to change.
+ *
+ * @return the current local time as a string.
+ */
+ public static String timeStamp() {
+ Date now = new Date();
+ SimpleDateFormat sdf =
+ new SimpleDateFormat("hh:mm a z 'on' dd MMM yyyy");
+ return sdf.format(now);
+ }
+
+ /**
+ * <p>Returns a set of identifiers found in a text file that has
+ * one identifier per line. The empty set is returned if
+ * {@code file == null}. Blank lines are ignored, and white-space that
+ * begins or ends a line is ignored.
+ * </p>
+ * If an {@code IOException} is thrown, an error message is printed
+ * to standard error and the Java virtual machine is forced to terminate.
+ *
+ * @param file a text file with one identifier per line
+ * @return a set of identifiers
+ *
+ * @throws IllegalArgumentException if the specified file does not exist
+ * @throws IllegalArgumentException if the specified file is a directory
+ * @throws IllegalArgumentException if any line of the specified
+ * file contains two non-white-space characters separated by one or
+ * more white-space characters
+ */
+ public static Set<String> idSet(File file) {
+ if (file==null) {
+ return Collections.emptySet();
+ }
+ else {
+ if (file.exists()==false) {
+ String s = "file does not exist: " + file;
+ throw new IllegalArgumentException(s);
+ }
+ if (file.isDirectory()) {
+ String s = "file is a directory: " + file;
+ throw new IllegalArgumentException(s);
+ }
+ Set<String> idSet = new HashSet<>();
+ try (FileIt<String> it = InputIt.fromGzipFile(file)) {
+ while (it.hasNext()) {
+ String line = it.next().trim();
+ if (line.length() > 0) {
+ if (StringUtil.countFields(line) > 1) {
+ String s = "line has >1 white-space delimited fields: "
+ + line;
+ throw new IllegalArgumentException(s);
+ }
+ idSet.add(line);
+ }
+ }
+ }
+ return idSet;
+ }
+ }
+
+ /**
+ * Prints the specified string to the specified {@code PrintWriter} and
+ * to standard out. The line separator string is not appended to the
+ * specified string before printing.
+ *
+ * @param out a print writer
+ * @param s a string to be printed
+ *
+ * @throws NullPointerException if {@code out == null}
+ */
+ public static void duoPrint(PrintWriter out, String s) {
+ System.out.print(s);
+ out.print(s);
+ }
+
+ /**
+ * Prints the specified string to the specified {@code PrintWriter} and
+ * to standard out. The line separator string is appended to the
+ * specified string before printing.
+ *
+ * @param out a print writer
+ * @param s a string to be printed
+ *
+ * @throws NullPointerException if {@code out == null}
+ */
+ public static void duoPrintln(PrintWriter out, String s) {
+ System.out.println(s);
+ out.println(s);
+ }
+
+ /**
+ * Returns a string representation of the specified elapsed time
+ * in the format "H hours M minutes S seconds".
+ *
+ * @param nanoseconds the elapsed time in nanoseconds
+ *
+ * @return a string representation of the specified elapsed time
+ */
+ public static String elapsedNanos(long nanoseconds) {
+ long seconds = Math.round(nanoseconds /1000000000.0);
+ StringBuilder sb = new StringBuilder(80);
+ if (seconds >= 3600) {
+ long hours = seconds / 3600;
+ sb.append(hours);
+ sb.append(hours==1 ? " hour " : " hours ");
+ seconds %= 3600;
+
+ }
+ if (seconds >= 60) {
+ long minutes = seconds / 60;
+ sb.append(minutes);
+ sb.append(minutes==1 ? " minute " : " minutes ");
+ seconds %= 60;
+ }
+ sb.append(seconds);
+ sb.append(seconds==1 ? " second" : " seconds");
+ return sb.toString();
+ }
+
+ /**
+ * Prints the specified exception, its stack trace, and
+ * the specified string to standard out and then terminates the
+ * Java virtual machine.
+ *
+ * @param s a string to be printed to standard err
+ * @param e an exception or error to be printed to standard err
+ *
+ * @throws NullPointerException if {@code e == null}
+ */
+ public static void exit(String s, Throwable e) {
+ e.printStackTrace(System.out);
+ System.out.println(e);
+ System.out.println(s);
+ System.out.println("terminating program.");
+ System.exit(1);
+ }
+
+ /**
+ * Prints the specified string to standard out and then terminates the
+ * Java virtual machine.
+ *
+ * @param s a string to be written to standard output
+ */
+ public static void exit(String s) {
+ System.out.println(s);
+ System.out.flush();
+ System.exit(0);
+ }
+}
+
diff --git a/blbutil/Validate.java b/blbutil/Validate.java
new file mode 100644
index 0000000..42bdf98
--- /dev/null
+++ b/blbutil/Validate.java
@@ -0,0 +1,576 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Class Validate contains static methods for validating command line
+ * arguments.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class Validate {
+
+
+ private Validate() {
+ // private constructor to prevent instantiation
+ }
+
+ /**
+ * Returns a map with one (key, value) pair for each element
+ * of the specified array. Each element of the specified {@code String[]}
+ * array must contain the specified delimiter character.
+ * For each array element {@code s}, the key is
+ * {@code s.substring(0, s.indexOf(sep))}
+ * and the value is {@code s.substring(s.indexOf(sep) + 1)}.
+ *
+ * @param args a string array
+ * @param delim the delimiter character separating a key and value
+ * @return a map with one (key, value) pair for each element
+ * of the specified array
+ *
+ * @throws IllegalArgumentException if the specified delimiter character is
+ * not found in any string element in the specified {@code String[]} array
+ * @throws IllegalArgumentException if the specified delimiter
+ * is the first or last character of each string element in the specified
+ * {@code String[]} array
+ * @throws IllegalArgumentException if any two elements of the
+ * specified string array have the same key
+ * @throws NullPointerException if {@code args == null} or if
+ * {@code args[j] == null} for any {@code j} satisfying
+ * {@code (0 <= j && j <= args.length)}
+ */
+ public static Map<String, String> argsToMap(String[] args, char delim) {
+ Map<String, String> argMap=new HashMap<>();
+ for (String arg : args) {
+ int index=arg.indexOf(delim);
+ if (index!=-1) {
+ if (index == 0) {
+ String s = "missing key in key-value pair: " + arg;
+ throw new IllegalArgumentException(s);
+ }
+ if (index==(arg.length()-1)) {
+ String s = "missing value in key-value pair: " + arg;
+ throw new IllegalArgumentException(s);
+ }
+ String key = arg.substring(0, index);
+ String value = arg.substring(index+1);
+ if (argMap.containsKey(key)) {
+ String s = "duplicate arguments: " + key;
+ throw new IllegalArgumentException(s);
+ }
+ argMap.put(key, value);
+ } else {
+ String s = "missing delimiter character (" + delim + "): "
+ + arg;
+ throw new IllegalArgumentException(s);
+ }
+ }
+ return argMap;
+ }
+
+ /**
+ * Checks whether the specified map of key-value pairs is empty.
+ * If the map is non-empty, the method will print an error message
+ * and terminate the Java virtual machine.
+ *
+ * @param argsMap a map of key-value pairs
+ * @throws NullPointerException if {@code argsMap == null}
+ */
+ public static void confirmEmptyMap(Map<String, String> argsMap) {
+ Set<String> keySet = argsMap.keySet();
+ if (keySet.isEmpty()==false) {
+ StringBuilder sb = new StringBuilder(50);
+ sb.append("Error: unrecognized parameter");
+ sb.append(keySet.size()==1 ? ":" : "s:");
+ for (String key : keySet) {
+ String value = argsMap.get(key);
+ sb.append(' ');
+ sb.append(key);
+ sb.append('=');
+ sb.append(value);
+ }
+ Utilities.exit(sb.toString());
+ }
+ }
+
+ /**
+ * Returns a {@code File} object corresponding to the specified filename or
+ * {@code null} if {@code filename == null}
+ *
+ * @param filename a filename
+ * @return a file corresponding to the specified filename, or {@code null}
+ * if {@code filename == null}
+ *
+ * @throws IllegalArgumentException if {@code filename.isEmpty() == true}
+ * @throws IllegalArgumentException if {@code filename != null} and the
+ * specified file does not exist or is a directory
+ */
+ public static File getFile(String filename) {
+ if (filename==null) {
+ return null;
+ }
+ if (filename.isEmpty()) {
+ throw new IllegalArgumentException("filename is empty string");
+ }
+ else {
+ File file = new File(filename);
+ if (file.exists()==false) {
+ String s = "File does not exist: " + file;
+ throw new IllegalArgumentException(s);
+ }
+ if (file.isDirectory()) {
+ String s = "File is a directory: " + file;
+ throw new IllegalArgumentException(s);
+ }
+ return file;
+ }
+ }
+
+ /**
+ * Removes the specified key from the specified map, and returns the
+ * integer value corresponding to the specified key.
+ *
+ * @param key the key
+ * @param map a map of key-value pairs
+ * @param isRequired {@code true} if the specified key
+ * is required to be in the specified map, and {@code false} otherwise
+ * @param defaultValue the value that will be returned if
+ * {@code (isRequired == false && map.get(key) == null)}
+ * @param min the minimum valid integer value
+ * @param max the maximum valid integer value
+ *
+ * @return the integer value corresponding to the specified key
+ *
+ * @throws IllegalArgumentException if {@code min > max}
+ * @throws IllegalArgumentException if
+ * {@code defaultValue < min || defaultValue > max}
+ * @throws IllegalArgumentException if
+ * {@code isRequired == true && map.get(key) == null}
+ * @throws IllegalArgumentException if
+ * {@code map.get(key) != null
+ * && (Integer.parseInt(map.get(key)) < min
+ * || Integer.parseInt(map.get(key)) > max)}
+ * @throws NumberFormatException if {@code map.get(key) != null}
+ * and {@code map.get(key)} is not a parsable {@code int}
+ * @throws NullPointerException if {@code key == null || map == null}
+ */
+ public static int intArg(String key, Map<String, String> map,
+ boolean isRequired, int defaultValue, int min, int max) {
+ checkIntValue(key, defaultValue, min, max);
+ String value = map.remove(key);
+ if (value==null) {
+ if (isRequired) {
+ String s = "missing " + key + " argument";
+ throw new IllegalArgumentException(s);
+ }
+ else {
+ return defaultValue;
+ }
+ }
+ else {
+ return parseInt(key, value, min, max);
+ }
+ }
+
+ /**
+ * Removes the specified key from the specified map, and returns the
+ * long value corresponding to the specified key.
+ *
+ * @param key the key
+ * @param map a map of key-value pairs
+ * @param isRequired {@code true} if the specified key
+ * is required to be in the specified map, and {@code false} otherwise
+ * @param defaultValue the value that will be returned if
+ * {@code (isRequired == false && map.get(key) == null)}
+ * @param min the minimum valid long value
+ * @param max the maximum valid long value
+ *
+ * @return the long value corresponding to the specified key
+ *
+ * @throws IllegalArgumentException if {@code min > max}
+ * @throws IllegalArgumentException if
+ * {@code defaultValue < min || defaultValue > max}
+ * @throws IllegalArgumentException if
+ * {@code isRequired == true && map.get(key) == null}
+ * @throws IllegalArgumentException if
+ * {@code map.get(key) != null
+ * && (Long.parseLong(map.get(key)) < min
+ * || Long.parseLong(map.get(key)) > max)}
+ * @throws NumberFormatException if {@code map.get(key) != null}
+ * and {@code map.get(key)} is not a parsable {@code long}
+ * @throws NullPointerException if {@code key == null || map == null}
+ */
+ public static long longArg(String key, Map<String, String> map,
+ boolean isRequired, long defaultValue, long min, long max) {
+ checkLongValue(key, defaultValue, min, max);
+ String value = map.remove(key);
+ if (value==null) {
+ if (isRequired) {
+ String s = "missing " + key + " argument";
+ throw new IllegalArgumentException(s);
+ }
+ else {
+ return defaultValue;
+ }
+ }
+ else {
+ return parseLong(key, value, min, max);
+ }
+ }
+
+ /**
+ * Removes the specified key from the specified map, and returns the
+ * float value corresponding to the specified key.
+ *
+ * @param key the key
+ * @param map a map of key-value pairs
+ * @param isRequired {@code true} if the specified key
+ * is required to be in the specified map, and {@code false} otherwise
+ * @param defaultValue the value that will be returned if
+ * {@code (isRequired == false && map.get(key) == null)}
+ * @param min the minimum valid float value
+ * @param max the maximum valid float value
+ *
+ * @return the float value corresponding to the specified key
+ *
+ * @throws IllegalArgumentException if {@code min > max}
+ * @throws IllegalArgumentException if
+ * {@code defaultValue < min || defaultValue > max
+ * || Float.isNan(defaultValue)==true}
+ * @throws IllegalArgumentException if
+ * {@code isRequired == true && map.get(key) == null}
+ * @throws IllegalArgumentException if
+ * {@code map.get(key) != null
+ * && (Float.parseFloat(map.get(key)) < min
+ * || Float.parseFloat(map.get(key)) > max
+ * || Float.isNaN(map.get(key))}
+ * @throws NumberFormatException if {@code map.get(key) != null}
+ * and {@code map.get(key)} is not a parsablbe {@code float}
+ * @throws NullPointerException if {@code key == null || map == null}
+ */
+ public static float floatArg(String key, Map<String, String> map,
+ boolean isRequired, float defaultValue, float min, float max) {
+ checkFloatValue(key, defaultValue, min, max);
+ String value = map.remove(key);
+ if (value==null) {
+ if (isRequired) {
+ String s = "missing " + key + " argument";
+ throw new IllegalArgumentException(s);
+ }
+ else {
+ return defaultValue;
+ }
+ }
+ else {
+ return parseFloat(key, value, min, max);
+ }
+ }
+
+ /**
+ * Removes the specified key from the specified map, and returns the
+ * double value corresponding to the specified key.
+ *
+ * @param key the key
+ * @param map a map of key-value pairs
+ * @param isRequired {@code true} if the specified key
+ * is required to be in the specified map, and {@code false} otherwise
+ * @param defaultValue the value that will be returned if
+ * {@code (isRequired == false && map.get(key) == null)}
+ * @param min the minimum valid double value
+ * @param max the maximum valid double value
+ *
+ * @return the double value corresponding to the specified key
+ *
+ * @throws IllegalArgumentException if {@code min > max}
+ * @throws IllegalArgumentException if
+ * {@code defaultValue < min || defaultValue > max
+ * || Double.isNan(defaultValue)==true}
+ * @throws IllegalArgumentException if
+ * {@code isRequired == true && map.get(key) == null}
+ * @throws IllegalArgumentException if
+ * {@code map.get(key) != null
+ * && (Double.parseDouble(map.get(key)) < min
+ * || Double.parseDouble(map.get(key)) > max
+ * || Double.isNaN(map.get(key))}
+ * @throws NumberFormatException if {@code map.get(key) != null}
+ * and {@code map.get(key)} is not a parsable {@code double}
+ * @throws NullPointerException if {@code key == null || map == null}
+ */
+ public static double doubleArg(String key, Map<String, String> map,
+ boolean isRequired, double defaultValue, double min, double max) {
+ checkDoubleValue(key, defaultValue, min, max);
+ String value = map.remove(key);
+ if (value==null) {
+ if (isRequired) {
+ String s = "missing " + key + " argument";
+ throw new IllegalArgumentException(s);
+ }
+ else {
+ return defaultValue;
+ }
+ }
+ else {
+ return parseDouble(key, value, min, max);
+ }
+ }
+
+ /**
+ * Removes the specified key from the specified map, and returns the
+ * boolean value corresponding to the specified key. If the value
+ * is {@code v}, then {@code true} is returned if
+ * {@code (v.equalsIgnoreCase("true") || v.equalsIgnoreCase("t"))}
+ * and {@code false} is returned if
+ * {@code (v.equalsIgnoreCase("false") || v.equalsIgnoreCase("f"))}.
+ *
+ * @param key the key
+ * @param map a map of key-value pairs
+ * @param isRequired {@code true} if the specified key
+ * is required to be in the specified map, and {@code false} otherwise
+ * @param defaultValue the value that will be returned if
+ * {@code (isRequired == false && map.get(key) == null)}
+ *
+ * @return the boolean value corresponding to the specified key
+ *
+ * @throws IllegalArgumentException if
+ * {@code isRequired == true && map.get(key) == null}
+ * @throws IllegalArgumentException if the value
+ * {@code (v = map.get(key)) != null &&
+ * false == (v.equalsIgnoreCase("true") || v.equalsIgnoreCase("t")
+ * || v.equalsIgnoreCase("false") || v.equalsIgnoreCase("f"))
+ * }
+ * @throws NullPointerException if {@code key == null || map == null}
+ */
+ public static boolean booleanArg(String key, Map<String, String> map,
+ boolean isRequired, boolean defaultValue) {
+ String value = map.remove(key);
+ if (value==null) {
+ if (isRequired) {
+ String s = "missing " + key + " argument";
+ throw new IllegalArgumentException(s);
+ }
+ else {
+ return defaultValue;
+ }
+ }
+ else {
+ return parseBoolean(value);
+ }
+ }
+
+ /**
+ * Removes the specified key from the specified map, and returns the
+ * string value corresponding to the specified key. The value is permitted
+ * to be {@code null}
+ *
+ * @param key the key
+ * @param map a map of key-value pairs
+ * @param isRequired {@code true} if the specified key
+ * is required to be in the specified map, and {@code false} otherwise
+ * @param defaultValue the value that will be returned if
+ * {@code (isRequired == false && map.get(key) == null)}
+ * @param possibleValues an array of valid string values or {@code null} if
+ * the valid values are {@code null} and all non-empty strings.
+ *
+ * @return the string value corresponding to the specified key
+ *
+ * @throws IllegalArgumentException if
+ * {@code isRequired == true && map.get(key) == null}
+ * @throws IllegalArgumentException if
+ * {@code possibleValues != null} and {@code defaultValue} does not
+ * equal any element of the {@code possibleValues} array
+ * @throws IllegalArgumentException if
+ * {@code possibleValues != null} and {@code map.get(key)} does not
+ * equal any element of the {@code possibleValues} array
+ * @throws NullPointerException if {@code key == null || map == null}
+ */
+ public static String stringArg(String key, Map<String, String> map,
+ boolean isRequired, String defaultValue, String[] possibleValues) {
+ checkStringValue(key, defaultValue, possibleValues);
+ String value = map.remove(key);
+ if (value==null) {
+ if (isRequired) {
+ String s = "missing " + key + " argument";
+ throw new IllegalArgumentException(s);
+ }
+ else {
+ return defaultValue;
+ }
+ }
+ checkStringValue(key, value, possibleValues);
+ return value;
+ }
+
+ private static int parseInt(String key, String toParse, int min, int max) {
+ try {
+ int i = Integer.parseInt(toParse);
+ checkIntValue(key, i, min, max);
+ return i;
+ }
+ catch (NumberFormatException e) {
+ throw new IllegalArgumentException(toParse + " is not a number");
+ }
+ }
+
+ private static long parseLong(String key, String toParse, long min, long max) {
+ try {
+ long l = Long.parseLong(toParse);
+ checkLongValue(key, l, min, max);
+ return l;
+ }
+ catch (NumberFormatException e) {
+ throw new IllegalArgumentException(toParse + " is not a number");
+ }
+ }
+
+ private static float parseFloat(String key, String toParse, float min,
+ float max) {
+ try {
+ float f = Float.parseFloat(toParse);
+ checkFloatValue(key, f, min, max);
+ return f;
+ }
+ catch (NumberFormatException e) {
+ throw new IllegalArgumentException(toParse + " is not a number");
+ }
+ }
+
+ private static double parseDouble(String key, String toParse, double min,
+ double max) {
+ try {
+ double d = Double.parseDouble(toParse);
+ checkDoubleValue(key, d, min, max);
+ return d;
+ }
+ catch (NumberFormatException e) {
+ throw new IllegalArgumentException(toParse + " is not a number");
+ }
+ }
+
+ private static boolean parseBoolean(String s) {
+ if (s.equalsIgnoreCase("true") || s.equalsIgnoreCase("t")) {
+ return true;
+ }
+ else if (s.equalsIgnoreCase("false") || s.equalsIgnoreCase("f")) {
+ return false;
+ }
+ else {
+ String msg = s + " is not \"true\" or \"false\"";
+ throw new IllegalArgumentException(msg);
+ }
+ }
+
+ private static void checkIntValue(String key, int value, int min, int max) {
+ String s = null;
+ if (min > max) {
+ s = "min=" + min + " > max=" + max;
+ }
+ else if (value < min) {
+ s = "value=" + value + " < " + min;
+ }
+ else if (value > max) {
+ s = "value=" + value + " > " + max;
+ }
+ if (s != null) {
+ String prefix = "Error in \"" + key + "\" argument: ";
+ throw new IllegalArgumentException(prefix + s);
+ }
+ }
+
+ private static void checkLongValue(String key, long value, long min,
+ long max) {
+ String s = null;
+ if (min > max) {
+ s = "min=" + min + " > max=" + max;
+ }
+ else if (value < min) {
+ s = "value=" + value + " < " + min;
+ }
+ else if (value > max) {
+ s = "value=" + value + " > " + max;
+ }
+ if (s != null) {
+ String prefix = "Error in \"" + key + "\" argument: ";
+ throw new IllegalArgumentException(prefix + s);
+ }
+ }
+
+ private static void checkFloatValue(String key, float value, float min,
+ float max) {
+ String s = null;
+ if (Float.isNaN(value)) {
+ s = "value=" + value;
+ }
+ else if (min > max) {
+ s = "min=" + min + " > max=" + max;
+ }
+ else if (value < min) {
+ s = "value=" + value + " < " + min;
+ }
+ else if (value > max) {
+ s = "value=" + value + " > " + max;
+ }
+ if (s != null) {
+ String prefix = "Error in \"" + key + "\" argument: ";
+ throw new IllegalArgumentException(prefix + s);
+ }
+ }
+
+ private static void checkDoubleValue(String key, double value, double min,
+ double max) {
+ String s = null;
+ if (Double.isNaN(value)) {
+ s = "value=" + value;
+ }
+ else if (min > max) {
+ s = "min=" + min + " > max=" + max;
+ }
+ else if (value < min) {
+ s = "value=" + value + " < " + min;
+ }
+ else if (value > max) {
+ s = "value=" + value + " > " + max;
+ }
+ if (s != null) {
+ String prefix = "Error in \"" + key + "\" argument: ";
+ throw new IllegalArgumentException(prefix + s);
+ }
+ }
+
+ private static void checkStringValue(String key, String value,
+ String[] possibleValues) {
+ if (possibleValues != null) {
+ boolean foundMatch = false;
+ for (int j=0; j<possibleValues.length && foundMatch==false; ++j) {
+ String s = possibleValues[j];
+ foundMatch = (s==null) ? value==null : s.equalsIgnoreCase(value);
+ }
+ if (foundMatch==false) {
+ String s = "Error in \"" + key + "\" argument: \"" + value
+ + "\" is not in " + Arrays.toString(possibleValues);
+ throw new IllegalArgumentException(s);
+ }
+ }
+ }
+}
diff --git a/blbutil/WrappedIntArray.java b/blbutil/WrappedIntArray.java
new file mode 100644
index 0000000..c4eeb82
--- /dev/null
+++ b/blbutil/WrappedIntArray.java
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code WrappedIntArray} represents an immutable
+ * {@code int[]} array.
+ * </p>
+ * Instances of {@code WrappedIntArray} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class WrappedIntArray implements IntArray {
+
+ private final int[] ia;
+
+ /**
+ * Constructs a new {@code CharCompressedIntArray} instance.
+ * @param ia an array of integers
+ * @throws NullPointerException if {@code ia == null}
+ */
+ public WrappedIntArray(int[] ia) {
+ this.ia = ia.clone();
+ }
+
+ @Override
+ public int size() {
+ return ia.length;
+ }
+
+ @Override
+ public int get(int index) {
+ return ia[index];
+ }
+
+ @Override
+ public String toString() {
+ return Arrays.toString(ia);
+ }
+}
diff --git a/dag/Dag.java b/dag/Dag.java
new file mode 100644
index 0000000..77b15ea
--- /dev/null
+++ b/dag/Dag.java
@@ -0,0 +1,384 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package dag;
+
+import vcf.Markers;
+
+/**
+ * <p>Interface {@code Dag} represents a leveled directed acyclic graph (DAG).
+ * </p>
+ * All instances of {@code DAG} are required to be immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface Dag {
+
+ /**
+ * Returns the number of edges at the specified level of the DAG.
+ *
+ * @param level a level of the DAG
+ * @return the number of edges at the specified level of the DAG.
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ */
+ public int nEdges(int level);
+
+ /**
+ * Returns the number of parent nodes at the specified level of the DAG.
+ *
+ * @param level a level of the DAG
+ * @return the number of parent nodes at the specified level of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ */
+ public int nParentNodes(int level);
+
+ /**
+ * Returns the number of child nodes at the specified level of the DAG.
+ *
+ * @param level a level of the DAG
+ * @return the number of child nodes at the specified level of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ */
+ public int nChildNodes(int level);
+
+ /**
+ * Returns the index of the specified parent node in the DAG.
+ *
+ * @param level a level of the DAG.
+ * @param edge the index of an edge at the specified level of the DAG
+ * @return the index of the specified parent node in the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges(level)}
+ */
+ public int parentNode(int level, int edge);
+
+ /**
+ * Returns the index of the specified child node in the DAG.
+ *
+ * @param level a level of the DAG
+ * @param edge the index of an edge at the specified level of the DAG
+ * @return the index of the specified child node in the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges(level)}
+ */
+ public int childNode(int level, int edge);
+
+ /**
+ * Returns the symbol labeling the specified edge of the DAG.
+ *
+ * @param level a level of the DAG
+ * @param edge the index of an edge at the specified level of the DAG
+ * @return the symbol labeling the specified edge of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges(level)}
+ */
+ public int symbol(int level, int edge);
+
+ /**
+ * Returns the sum of the weights of the sequences that pass
+ * through the specified edge of the DAG.
+ *
+ * @param level a level of the DAG
+ * @param edge the index of an edge at the specified level of the DAG
+ * @return the sum of the weights of the sequences that pass
+ * through the specified edge of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges(level)}
+ */
+ public float edgeWeight(int level, int edge);
+
+ /**
+ * Returns the sum of the weights of the sequences that pass
+ * through the specified node of the DAG.
+ *
+ * @param level a level of the DAG
+ * @param parentNode the index of a parent node at the specified level
+ * of the DAG
+ * @return the sum of the weights of the sequences that pass
+ * through the specified node of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || node >= this.nParentNodes(level)}
+ */
+ public float parentWeight(int level, int parentNode);
+
+ /**
+ * Returns the ratio of the sum of the weights of the sequences that pass
+ * through the specified edge of the DAG and
+ * the sum of the weights of the sequences that pass through the parent
+ * node of the specified edge of the DAG.
+ *
+ * @param level a level of the DAG
+ * @param edge the index of an edge at the specified level of the DAG
+ * @return the ratio of the sum of the weights of the sequences that pass
+ * through the specified edge of the DAG and
+ * the sum of the weights of the sequences that pass through the parent
+ * node of the specified edge of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges(level)}
+ */
+ public float condEdgeProb(int level, int edge);
+
+ /**
+ * Returns the ratio of the sum of the weights of the sequences that pass
+ * through the specified edge of the DAG and the sum of the weights of all
+ * sequences.
+ *
+ * @param level a level of the DAG
+ * @param edge the index of an edge at the specified level of the DAG
+ * @return the ratio of the sum of the weights of the sequences that pass
+ * through the specified edge of the DAG and the sum of the weights of all
+ * sequences
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges(level)}
+ */
+ public float edgeProb(int level, int edge);
+
+ /**
+ * Returns the ratio of the sum of the weights of the sequences that pass
+ * through the specified parent node of the DAG and the sum of the weights
+ * of all sequences.
+ *
+ * @param level a level of the DAG
+ * @param parentNode the index of a parent node at the specified level
+ * of the DAG
+ * @return the ratio of the sum of the weights of the sequences that pass
+ * through the specified parent node of the DAG and the sum of the weights
+ * of all sequences
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= this.nParentNodes(level)}
+ */
+ public float parentProb(int level, int parentNode);
+
+ /**
+ * Returns the number of markers.
+ *
+ * @return the number of markers
+ */
+ public int nLevels();
+
+ /**
+ * Returns the markers represented by this DAG.
+ * @return the markers represented by this DAG
+ */
+ public Markers markers();
+
+ /**
+ * Returns the number of nodes in the DAG.
+ *
+ * @return the number of node in the DAG
+ */
+ public long nNodes();
+
+ /**
+ * Returns the number of edges in the DAG.
+ *
+ * @return the number of edges in the DAG
+ */
+ public long nEdges();
+
+ /**
+ * Returns the maximum number of parent nodes at any level of the DAG.
+ *
+ * @return the maximum number of parent nodes at any level of the DAG
+ */
+ public int maxNodes();
+
+ /**
+ * Returns the maximum number of edges at any level of the DAG.
+ *
+ * @return the maximum number of edges at any level of the DAG
+ */
+ public int maxEdges();
+
+ /**
+ * Returns the number of outgoing edges for the specified node of the DAG.
+ *
+ * @param level a level of the DAG
+ * @param parentNode the index of a parent node at the specified
+ * level of the DAG
+ * @return the number of outgoing edges for the specified node of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= this.nParentNodes(level)}
+ */
+ public int nOutEdges(int level, int parentNode);
+
+ /**
+ * Returns the index of the specified edge in the DAG.
+ *
+ * @param level a level of the DAG
+ * @param parentNode the index of a parent node at the specified
+ * level of the DAG
+ * @param outEdge the index of an outgoing edge of the specified
+ * parent node
+ * @return the index of the specified edge in the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= nParentNodes(level)}
+ * @throws IndexOutOfBoundsException if
+ * {@code outEdge < 0 || outEdge >= this.nOutEdges(level, parentNode)}
+ */
+ public int outEdge(int level, int parentNode, int outEdge);
+
+ /**
+ * Returns the index of the specified edge at the specified level of the
+ * DAG or {@code -1} if no such edge exists.
+ *
+ * @param level a level of the DAG
+ * @param parentNode the index of a parent node at the specified
+ * level of the DAG
+ * @param symbol a symbol labeling an outgoing edge of the specified
+ * parent node of the DAG
+ * @return the index of the specified edge at the specified level of the
+ * DAG or {@code -1} if no such edge exists
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= this.nParentNodes(level)}
+ * @throws IndexOutOfBoundsException if
+ * {@code symbol < 0 || symbol >= this.marker(level).nAlleles()}}
+ */
+ public int outEdgeBySymbol(int level, int parentNode, int symbol);
+
+ /**
+ * Returns the number of ingoing edges for the specified node of the DAG.
+ *
+ * @param level a level of the DAG
+ * @param childNode the index of a child node at the specified
+ * level of the DAG
+ * @return the number of ingoing edges for the specified node of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code childNode < 0 || childNode >= this.nChildNodes(level)}
+ */
+ public int nInEdges(int level, int childNode);
+
+ /**
+ * Returns the index of the specified edge in the DAG.
+ *
+ * @param level a level of the DAG
+ * @param childNode the index of a child node at the specified
+ * level of the DAG
+ * @param inEdge the index of an ingoing edge of the specified
+ * child node in the DAG
+ * @return the index of the specified edge in the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code level < 0 || level >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code childNode < 0 || childNode >= nChildNodes(level)}
+ * @throws IndexOutOfBoundsException if
+ * {@code inEdge < 0 || inEdge >= this.nInEdges(level, childNode)}
+ */
+ public int inEdge(int level, int childNode, int inEdge);
+
+ /**
+ * Returns {@code true} if the child node of the specified parent
+ * edge equals the parent node of the specified child edge and
+ * returns {@code false} otherwise.
+ *
+ * @param parentLevel a level of the DAG
+ * @param parentEdge the index of an edge at the specified level
+ * of the DAG
+ * @param childEdge the index of an edge at level {@code (parentLevel + 1)}
+ * of the DAG
+ * @return {@code true} if the child node of the specified parent
+ * edge equals the parent node of the specified child edge
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code parentLevel < 0 || parentLevel >= (this.nMarkers() - 1)}
+ * @throws IndexOutOfBoundsException if
+ * {@code parentEdge < 0 || parentEdge >= this.nEdges(level)}
+ * @throws IndexOutOfBoundsException if
+ * {@code childEdge < 0 || childEdge >= this.nEdges(level + 1)}
+ */
+ public boolean isChildOf(int parentLevel, int parentEdge, int childEdge);
+
+ /**
+ * Returns an array of length {@code this.nMarkers()} whose {@code j}-th
+ * element is the distance from the root node to
+ * the child node at level {@code j} of the DAG.
+ * The distance from parent node to child node at level {@code lev}
+ * equals {@code -Math.log10(P)} where {@code P} is the weighted conditional
+ * edge probability at level {@code lev}, when each edge {@code e} is
+ * weighted by {@code this.counts(lev, e)}.
+ *
+ * @return an array of length {@code this.nMarkers()} whose {@code j}-th
+ * element is the distance from the root node to
+ * the child node at level {@code j} of the DAG
+ */
+ public double[] posArray();
+
+ /**
+ * Returns a description of the specified levels of the DAG. The
+ * exact details of the description are unspecified and subject to change.
+ *
+ * @param start the first level (inclusive)
+ * @param end the last level (exclusive)
+ * @return a description of the specified levels of the DAG
+ *
+ * @throws IllegalArgumentException if
+ * {@code start < 0 || start > end || end >= this.nMarkers()}
+ */
+ public String toString(int start, int end);
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecifed and subject to change.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ String toString();
+}
diff --git a/dag/DagLevel.java b/dag/DagLevel.java
new file mode 100644
index 0000000..3650608
--- /dev/null
+++ b/dag/DagLevel.java
@@ -0,0 +1,241 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package dag;
+
+/**
+ * <p>Interface {@code DagLevel} represents a level of a leveled directed
+ * acyclic graph (DAG).
+ * </p>
+ * <p>All instances of {@code DagLevel} are required to be immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface DagLevel {
+
+ /**
+ * Returns the number of edges at this level of the DAG.
+ *
+ * @return the number of edges at this level of the DAG
+ */
+ public int nEdges();
+
+ /**
+ * Returns the number of parent nodes at this level of the DAG.
+ *
+ * @return the number of parent nodes at this level of the DAG
+ */
+ public int nParentNodes();
+
+ /**
+ * Returns the number of child nodes at this level of the DAG.
+ *
+ * @return the number of child nodes at this level of the DAG
+ */
+ public int nChildNodes();
+
+ /**
+ * Returns the index of the parent node of the specified edge
+ * at this level of the DAG.
+ *
+ * @param edge an edge index
+ * @return the index of the parent node of the specified edge
+ * at this level of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges()}
+ */
+ public int parentNode(int edge);
+
+ /**
+ * Returns the index of the child node of the specified edge
+ * at this level of the DAG.
+ *
+ * @param edge an edge index.
+ * @return the index of the child node of the specified edge
+ * at this level of the DAG.
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code edge<0 || edge>=this.nEdges()}
+ */
+ public int childNode(int edge);
+
+ /**
+ * Returns the symbol labeling the specified edge at this level
+ * of the DAG.
+ *
+ * @param edge an edge index
+ * @return the symbol labeling the specified edge at this level
+ * of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges()}
+ */
+ public int symbol(int edge);
+
+ /**
+ * Returns the sum of weights for the sequences that pass
+ * through the specified edge at this level of the DAG.
+ *
+ * @param edge an edge index
+ * @return the sum of weights for the sequences that pass
+ * through the specified edge at this level of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges()}
+ */
+ public float edgeWeight(int edge);
+
+ /**
+ * Returns the sum of weights for the sequences that pass
+ * through the specified node at this level of the DAG.
+ *
+ * @param parentNode a parent node index
+ * @return the sum of weights for the sequences that pass
+ * through the specified node at this level of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= this.nParentNodes()}
+ */
+ public float parentWeight(int parentNode);
+
+ /**
+ * Returns the conditional edge probability, which is defined to be
+ * the ratio of the sum of the weights of the sequences that pass
+ * through the specified edge at this level of the DAG and
+ * the sum of the weights of the sequences that pass through the parent
+ * node of the specified edge.
+ *
+ * @param edge an edge index
+ * @return the conditional edge probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges()}
+ */
+ public float condEdgeProb(int edge);
+
+ /**
+ * Returns the edge probability, which is defined to be the ratio of the
+ * sum of the weights of the sequences that pass through the specified
+ * edge at this level of the DAG and the sum of the weights of the
+ * sequences that pass through any edge at this level of the DAG.
+ *
+ * @param edge an edge index
+ * @return the edge probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nEdges()}
+ */
+ public float edgeProb(int edge);
+
+ /**
+ * Returns the parent node probability, which is defined to be the
+ * ratio of the sum of the weights of the sequences that pass through
+ * the specified parent node at this level of the DAG and the sum of
+ * the weights of the sequences that pass through any parent node at this
+ * level of the DAG.
+ *
+ * @param parentNode a parent node index
+ * @return the parent node probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= this.nParentNodes()}
+ */
+ public float parentProb(int parentNode);
+
+ /**
+ * Returns the number of outgoing edges of the specified parent node
+ * at this level of the DAG.
+ *
+ * @param parentNode a parent node index
+ * @return the number of outgoing edges of the specified parent node
+ * at this level of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= nParentNodes()}
+ */
+ public int nOutEdges(int parentNode);
+
+ /**
+ * Returns the index of the specified edge at this level of the DAG.
+ *
+ * @param parentNode a parent node index
+ * @param outEdge the index of the outgoing edge of the specified
+ * parent node
+ * @return the index of the specified edge at this level of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= nParentNodes()}
+ * @throws IndexOutOfBoundsException if
+ * {@code outEdge < 0 || outEdge >= this.nOutEdges(parentNode)}
+ */
+ public int outEdge(int parentNode, int outEdge);
+
+ /**
+ * Returns the index of the specified edge at this level of the
+ * DAG or {@code -1} if no such edge exists.
+ *
+ * @param parentNode a parent node index
+ * @param symbol a symbol labeling an outgoing edge of the specified
+ * parent node
+ * @return the index of the specified edge at this level of the
+ * DAG or {@code -1} if no such edge exists
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= nParentNodes()}
+ */
+ public int outEdgeBySymbol(int parentNode, int symbol);
+
+ /**
+ * Returns the number of ingoing edges for the specified child node
+ * at this level of the DAG.
+ *
+ * @param childNode a child node index
+ * @return the number of ingoing edges for the specified child node
+ * at this level of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code childNode < 0 || childNode >= this.nChildNodes()}
+ */
+ public int nInEdges(int childNode);
+
+ /**
+ * Returns the index of the specified edge at this level of the DAG.
+ *
+ * @param childNode index of the child node
+ * @param inEdge index of an ingoing edge of the specified child node
+ * @return the index of the specified edge at this level of the DAG
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code childNode < 0 || childNode >= this.nChildNodes()}
+ * @throws IndexOutOfBoundsException if
+ * {@code inEdge < 0 || inEdge >= this.nInEdges(childNode)}
+ */
+ public int inEdge(int childNode, int inEdge);
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString();
+
+}
diff --git a/dag/DagUtil.java b/dag/DagUtil.java
new file mode 100644
index 0000000..d72946e
--- /dev/null
+++ b/dag/DagUtil.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package dag;
+
+import blbutil.Const;
+import java.text.DecimalFormat;
+
+/**
+ * <p>Class {@code DagUtil} contains static, thread-safe methods for
+ * removing elements of an array that have a specified value and
+ * for creating a string representation of a DAG.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class DagUtil {
+
+ private DagUtil() {
+ // private constructor to prevent instantiation.
+ }
+
+ /**
+ * Return a string description of the specified {@code DAG} object. The
+ * exact details of the description are unspecified and subject
+ * to change.
+ *
+ * @param dag a directed acyclic graph
+ * @return a string description of the specified {@code DAG}
+ * @throws NullPointerException if {@code dag == null}
+ */
+ public static String dagStats(Dag dag) {
+ DecimalFormat df2 = new DecimalFormat("0.000");
+ int fieldWidth = 7;
+ double nEdges = dag.nEdges();
+ double nNodes = dag.nNodes();
+ double nMarkers = dag.nLevels();
+
+ long nEdgesPerLevel = (int) Math.round(nEdges/nMarkers);
+ double nEdgesPerNode = nEdges / nNodes;
+ int nHapsPerEdge = (int) Math.round(dag.parentWeight(0, 0) / nEdgesPerLevel);
+
+ String edgesPerLevel = String.valueOf(nEdgesPerLevel);
+ String maxEdgesPerLevel = String.valueOf(dag.maxEdges());
+ String edgesPerNode = df2.format(nEdgesPerNode);
+ String hapsPerNode = String.valueOf(nHapsPerEdge);
+
+ StringBuilder sb = new StringBuilder(100);
+ sb.append("mean edges/level: ");
+ sb.append(edgesPerLevel);
+ padField(sb, fieldWidth - edgesPerLevel.length());
+
+ sb.append("max edges/level: ");
+ sb.append(maxEdgesPerLevel);
+ sb.append(Const.nl);
+
+ sb.append("mean edges/node: ");
+ sb.append(edgesPerNode);
+ padField(sb, fieldWidth - edgesPerNode.length());
+
+ sb.append("mean count/edge: ");
+ sb.append(hapsPerNode);
+ sb.append(Const.nl);
+
+ return sb.toString();
+ }
+
+ private static void padField(StringBuilder sb, int nSpaces) {
+ for (int j=0; j<nSpaces; ++j) {
+ sb.append(" ");
+ }
+ }
+
+ /**
+ * Returns the number of the elements in the specified array that
+ * equal the specified value.
+ * @param array an array of integers
+ * @param value an integer value
+ * @return the number of the elements in the specified array that
+ * equal the specified value
+ *
+ * @throws NullPointerException if {@code array == null}
+ */
+ private static int count(int[] array, int value) {
+ int cnt = 0;
+ for (int i : array) {
+ if (i == value) {
+ ++cnt;
+ }
+ }
+ return cnt;
+ }
+
+ /**
+ * Returns the number of the elements in the specified array that
+ * equal the specified value.
+ * @param array an array of float values
+ * @param value a float value
+ * @return the number of the elements in the specified array that
+ * equal the specified value
+ *
+ * @throws IllegalArgumentException if {@code Float.isNaN(value) == true}
+ * @throws NullPointerException if {@code array == null}
+ */
+ private static int count(float[] array, float value) {
+ if (Float.isNaN(value)) {
+ throw new IllegalArgumentException(String.valueOf(value));
+ }
+ int cnt = 0;
+ for (float f : array) {
+ if (f == value) {
+ ++cnt;
+ }
+ }
+ return cnt;
+ }
+
+ /**
+ * Returns an array obtained by removing all elements in the
+ * specified array that equal the specified value.
+ * @param array an array of integers
+ * @param value an integer value
+ * @return an array obtained by removing all elements in the
+ * specified array that equal the specified value
+ *
+ * @throws NullPointerException if {@code array == null}
+ */
+ public static int[] removeValues(int[] array, int value) {
+ int cnt = DagUtil.count(array, value);
+ int[] reducedArray = new int[array.length - cnt];
+ int index=0;
+ for (int j=0; j<array.length; ++j) {
+ if (array[j] != value) {
+ reducedArray[index++] = array[j];
+ }
+ }
+ assert index==reducedArray.length;
+ return reducedArray;
+ }
+
+ /**
+ * Returns an array obtained by removing all elements in the
+ * specified array that equal the specified value.
+ * @param array an array of float values
+ * @param value a float value
+ * @return an array obtained by removing all elements in the
+ * specified array that equal the specified value
+ *
+ * @throws IllegalArgumentException if {@code Float.isNaN(value) == true}
+ * @throws NullPointerException if {@code array == null}
+ */
+ public static float[] removeValues(float[] array, float value) {
+ if (Float.isNaN(value)) {
+ throw new IllegalArgumentException(String.valueOf(value));
+ }
+ int cnt = DagUtil.count(array, value);
+ float[] reducedArray = new float[array.length - cnt];
+ int index=0;
+ for (int j=0; j<array.length; ++j) {
+ if (array[j] != value) {
+ reducedArray[index++] = array[j];
+ }
+ }
+ assert index==reducedArray.length;
+ return reducedArray;
+ }
+}
diff --git a/dag/HighCapacityDagLevel.java b/dag/HighCapacityDagLevel.java
new file mode 100644
index 0000000..be5099f
--- /dev/null
+++ b/dag/HighCapacityDagLevel.java
@@ -0,0 +1,340 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package dag;
+
+import blbutil.Const;
+import blbutil.IntSet;
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code HighCapacityDagLevel} represents a level of a leveled
+ * directed acyclic graph (DAG) that can contain up to
+ * {@code Integer.MAX_VALUE} edges.
+ * </p>
+ * <p>Instances of {@code HighCapacityDagLevel} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class HighCapacityDagLevel implements DagLevel {
+
+/*
+ * The k-th edge parent node index is stored in {@code this.parentNodes[k]}.
+ * The k-th edge child node index is stored in {@code this.childNodes[k]}.
+ * The k-th edge symbol is stored in {@code this.symbols[k]}.
+ * The k-th edge count is stored in {@code this.edgeCounts[k]}.
+ * The k-th edge conditional edge probability is stored in
+ * {@code this.condEdgeProbs[k]}, and is defined to be the
+ * k-th edge count divided by the k-th edge's parent node count.
+ * The k-th node count is stored in {@code this.nodeCounts[k]}.
+ *
+ * The outgoing edges indices of the k-th parent node are stored in consecutive
+ * entries of {@code this.parents} beginning with
+ * {@code this.parentIndices[k]} (inclusive) and ending with
+ * {@code this.parentIndices[k+1]} (exclusive).
+ *
+ * The ingoing edges indices of the k-th child node are stored in consecutive
+ * entries of {@code this.children} beginning with
+ * {@code this.childIndices[k]} (inclusive) and ending with
+ * {@code this.childIndices[k+1]} (exclusive).
+ */
+ private final float count;
+ private final int[] parentNodes;
+ private final int[] childNodes;
+ private final int[] parentIndices;
+ private final int[] parents;
+ private final int[] childIndices;
+ private final int[] children;
+ private final int[] symbols;
+ private final float[] edgeCounts;
+ private final float[] condEdgeProbs;
+ private final float[] parentCounts;
+
+ /**
+ * Constructs a new {@code HighCapacityDagLevel} instance from the
+ * specified data.
+ *
+ * @param parentNodes an array mapping edge index to parent node index
+ * @param childNodes an array mapping edge index to child node index
+ * @param symbols an array mapping edge index to the symbol labeling the
+ * edge
+ * @param counts an array mapping edge index to edge count
+ *
+ * @throws IllegalArgumentException if the specified arrays do not all
+ * have the same length
+ * @throws IllegalArgumentException if any element of the symbols array
+ * is negative
+ * @throws IllegalArgumentException if any two edges have the same
+ * parent node and are both labeled with the same symbol
+ * @throws IllegalArgumentException if the set of values of the
+ * {@code parentNodes} array is not equal to {@code {0, 1, 2, ..., k}} for
+ * some {@code k}
+ * @throws IllegalArgumentException if the set of values of the
+ * {@code childNodes} array is not equal to {@code {0, 1, 2, ..., k}}
+ * for some {@code k}
+ *
+ * @throws NullPointerException if any parameter is {@code null}
+ */
+ public HighCapacityDagLevel(int[] parentNodes, int[] childNodes,
+ int[] symbols, float[] counts) {
+ int nEdges = checkLengths(parentNodes, childNodes, symbols, counts);
+ this.parentIndices = getIndicesArray(parentNodes);
+ this.childIndices = getIndicesArray(childNodes);
+ this.parentNodes = parentNodes.clone();
+ this.childNodes = childNodes.clone();
+ this.symbols = symbols.clone();
+ this.edgeCounts = counts.clone();
+ this.condEdgeProbs = new float[nEdges];
+ this.parents = new int[nEdges];
+ this.children = new int[nEdges];
+
+ int[] pIndices = Arrays.copyOfRange(parentIndices, 0,
+ parentIndices.length-1);
+ int[] cIndices = Arrays.copyOfRange(childIndices, 0,
+ childIndices.length-1);
+ this.parentCounts = parentCnts(parentNodes, counts, pIndices.length);
+ this.count = sum(this.parentCounts);
+
+ for (int j=0; j<nEdges; ++j) {
+ int p = parentNodes[j];
+ int c = childNodes[j];
+ this.parents[pIndices[p]++] = j;
+ this.children[cIndices[c]++] = j;
+ this.condEdgeProbs[j] = counts[j] / parentCounts[p];
+ }
+ checkForDuplicateOutEdges(parentIndices, parents, symbols);
+ }
+
+ private static int checkLengths(int[] parentNodes, int[] childNodes,
+ int[] symbols, float[] counts) {
+ if ( ((parentNodes.length != childNodes.length)
+ || (parentNodes.length != symbols.length))
+ || (parentNodes.length != counts.length) ) {
+ throw new IllegalArgumentException("inconsistent arrays");
+ }
+ return parentNodes.length;
+ }
+
+ private static void checkForDuplicateOutEdges(int[] parentIndices,
+ int[] parents, int[] symbols) {
+ IntSet indexSet = new IntSet(symbols.length);
+ for (int j=1; j<parentIndices.length; ++j) {
+ indexSet.clear();
+ for (int k=parentIndices[j-1], n=parentIndices[j]; k<n; ++k) {
+ int edge = parents[k];
+ if (indexSet.add(symbols[edge])==false) {
+ throw new IllegalArgumentException("duplicate edge");
+ }
+ }
+ }
+ }
+
+ private static int[] getIndicesArray(int[] nodes) {
+ int[] countArray = elementCounts(nodes);
+ int[] indicesArray = new int[countArray.length + 1];
+ for (int j=1; j<indicesArray.length; ++j) {
+ assert countArray[j-1]>0;
+ indicesArray[j] = indicesArray[j-1] + countArray[j-1];
+ }
+ return indicesArray;
+ }
+
+ /*
+ * Returns an array of length {@code max(nodes) + 1}
+ * whose {@code j}-th element is the number of
+ * elements of the specified array that have value {@code j}.
+ *
+ * @param nodes an array of non-negative values.
+ * @return an array whose {@code j}-th element is the number of
+ * elements of the specified array that have value {@code j}.
+ * @throws IllegalArgumenException if set of elements of the
+ * specified array is not equal to {@code {0, 1, 2, ..., k}} for some
+ * {@code k}.
+ */
+ private static int[] elementCounts(int[] array) {
+ int maxNode = max(array);
+ int[] nodeCounts = new int[maxNode + 1];
+ for (int i : array) {
+ ++nodeCounts[i];
+ }
+ for (int j=0; j<nodeCounts.length; ++j) {
+ if (nodeCounts[j]==0) {
+ throw new IllegalArgumentException("no element with value " + j);
+ }
+ }
+ return nodeCounts;
+ }
+
+ private static int max(int[] ia) {
+ int max = 0;
+ for (int i : ia) {
+ if (i>max) {
+ max=i;
+ }
+ }
+ return max;
+ }
+
+ private float sum(float[] fa) {
+ float sum = 0.0f;
+ for (float f : fa) {
+ sum += f;
+ }
+ return sum;
+ }
+
+ private float[] parentCnts(int[] parentNodes, float[] counts, int nNodes) {
+ float[] parentCnts = new float[nNodes];
+ for (int j=0; j<condEdgeProbs.length; ++j) {
+ int p = parentNodes[j];
+ parentCnts[p] += counts[j];
+ }
+ return parentCnts;
+ }
+
+ @Override
+ public int nEdges() {
+ return condEdgeProbs.length;
+ }
+
+ @Override
+ public int nParentNodes() {
+ return parentIndices.length - 1;
+ }
+
+ @Override
+ public int nChildNodes() {
+ return childIndices.length - 1;
+ }
+
+ @Override
+ public int parentNode(int edge) {
+ return parentNodes[edge];
+ }
+
+ @Override
+ public int childNode(int edge) {
+ return childNodes[edge];
+ }
+
+ @Override
+ public int symbol(int edge) {
+ return symbols[edge];
+ }
+
+ @Override
+ public float edgeWeight(int edge) {
+ return edgeCounts[edge];
+ }
+
+ @Override
+ public float parentWeight(int parentNode) {
+ return parentCounts[parentNode];
+ }
+
+ @Override
+ public float condEdgeProb(int edge) {
+ return condEdgeProbs[edge];
+ }
+
+ @Override
+ public float edgeProb(int edge) {
+ return (edgeCounts[edge] / count);
+ }
+
+ @Override
+ public float parentProb(int node) {
+ return (parentCounts[node] / count);
+ }
+
+ @Override
+ public int nOutEdges(int parentNode) {
+ return (parentIndices[parentNode+1] - parentIndices[parentNode]);
+ }
+
+ @Override
+ public int outEdge(int parentNode, int outEdgeIndex) {
+ if (outEdgeIndex<0 || outEdgeIndex>=nOutEdges(parentNode)) {
+ throw new IndexOutOfBoundsException(String.valueOf(outEdgeIndex));
+ }
+ return (parents[parentIndices[parentNode] + outEdgeIndex]);
+ }
+
+ @Override
+ public int outEdgeBySymbol(int parentNode, int symbol) {
+ int start = parentIndices[parentNode];
+ int end = parentIndices[parentNode+1];
+ for (int j=start; j<end; ++j) {
+ int edgeIndex = parents[j];
+ if (symbols[edgeIndex]==symbol) {
+ return edgeIndex;
+ }
+ }
+ return -1;
+ }
+
+ @Override
+ public int nInEdges(int childNode) {
+ return (childIndices[childNode+1] - childIndices[childNode]);
+ }
+
+ @Override
+ public int inEdge(int childNode, int inEdgeIndex) {
+ if (inEdgeIndex<0 || inEdgeIndex>=nInEdges(childNode)) {
+ throw new IndexOutOfBoundsException(String.valueOf(inEdgeIndex));
+ }
+ return this.children[childIndices[childNode] + inEdgeIndex];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(500);
+ sb.append(Const.nl);
+ sb.append("parentNodes=");
+ sb.append(Arrays.toString(parentNodes));
+ sb.append(Const.nl);
+ sb.append("childNodes=");
+ sb.append(Arrays.toString(childNodes));
+ sb.append(Const.nl);
+ sb.append("symbols=");
+ sb.append(Arrays.toString(symbols));
+ sb.append(Const.nl);
+ sb.append("condEdgeProbs=");
+ sb.append(Arrays.toString(condEdgeProbs));
+ sb.append(Const.nl);
+ sb.append("parentCounts=");
+ sb.append(Arrays.toString(parentCounts));
+ sb.append(Const.nl);
+ sb.append("edgeCounts=");
+ sb.append(Arrays.toString(edgeCounts));
+ sb.append(Const.nl);
+ sb.append("parentIndices=");
+ sb.append(Arrays.toString(parentIndices));
+ sb.append(Const.nl);
+ sb.append("parents=");
+ sb.append(Arrays.toString(parents));
+ sb.append(Const.nl);
+ sb.append("childIndices=");
+ sb.append(Arrays.toString(childIndices));
+ sb.append(Const.nl);
+ sb.append("children=");
+ sb.append(Arrays.toString(children));
+ return sb.toString();
+ }
+}
diff --git a/dag/ImmutableDag.java b/dag/ImmutableDag.java
new file mode 100644
index 0000000..4c3cb08
--- /dev/null
+++ b/dag/ImmutableDag.java
@@ -0,0 +1,265 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package dag;
+
+import vcf.Markers;
+
+/**
+ * <p>Class {@code ImmutableDag} represents a leveled Directed Acyclic Graph
+ * (DAG).
+ * </p>
+ * <p>Instances of class {@code ImmutableDag} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class ImmutableDag implements Dag {
+
+ private final Markers markers;
+ private final long nNodes; // total number of nodes
+ private final long nEdges; // total number of edges
+ private final int maxNodes; // maximum number of nodes on any level
+ private final int maxEdges; // maximum number of edges on any level
+
+ private final DagLevel[] dagLevels;
+ private final double[] posArray;
+
+ /**
+ * Constructs a new {@code ImmutableDag} instance from the specified data.
+ * @param markers the markers
+ * @param levels the levels of the leveled DAG
+ * @throws IllegalArgumentException if {@code levels.length == 0}
+ * @throws IllegalArgumentException if {@code levels[0].nParentNodes() != 1}
+ * @throws IllegalArgumentException if
+ * {@code levels[j-1].nChildNodes() != levels[j].nParentNodes()} for any
+ * {@code j} satisfying {@code 0 < j && j < levels.length}
+ * @throws NullPointerException if
+ * {@code (markers == null || levels==null)}, or if
+ * {@code levels[j] == null} for any
+ * {@code j} satisfying {@code 0 <= j && j < levels.length}
+ */
+ public ImmutableDag(Markers markers, DagLevel[] levels) {
+ if (levels.length==0) {
+ throw new IllegalArgumentException("levels.length==0");
+ }
+ if (levels[0].nParentNodes()!=1) {
+ throw new IllegalArgumentException("levels[0].nParentNodes()!=1");
+ }
+ int cumulativeNodeCnt = 1;
+ int cumulativeEdgeCnt = 0;
+ int maxNodesPerLevel = 0;
+ int maxEdgesPerLevel = 0;
+ double[] pos = new double[levels.length];
+ for (int j=0; j<levels.length; ++j) {
+ if (j>0 && levels[j-1].nChildNodes()!=levels[j].nParentNodes()) {
+ throw new IllegalArgumentException("inconsistent levels");
+ }
+ cumulativeNodeCnt += levels[j].nChildNodes();
+ cumulativeEdgeCnt += levels[j].nEdges();
+ if (levels[j].nChildNodes() > maxNodesPerLevel) {
+ maxNodesPerLevel = levels[j].nChildNodes();
+ }
+ if (levels[j].nEdges() > maxEdgesPerLevel) {
+ maxEdgesPerLevel = levels[j].nEdges();
+ }
+ double d = minusLog10CondEdgeProb(levels[j]);
+ pos[j] = (j==0) ? d : (pos[j-1] + d);
+ }
+ this.dagLevels = levels.clone();
+ this.posArray = pos;
+ this.markers = markers;
+ this.nNodes = cumulativeNodeCnt;
+ this.nEdges = cumulativeEdgeCnt;
+ this.maxEdges = maxEdgesPerLevel;
+ this.maxNodes = maxNodesPerLevel;
+ }
+
+ private static double minusLog10CondEdgeProb(DagLevel level) {
+ float meanScore = 0.0f;
+ for (int e=0, n=level.nEdges(); e<n; ++e) {
+ meanScore += level.edgeProb(e)*level.condEdgeProb(e);
+ }
+ double d = -Math.log10(meanScore);
+ return (d<0) ? 0.0 : d;
+ }
+
+ @Override
+ public int nEdges(int level) {
+ return dagLevels[level].nEdges();
+ }
+
+ @Override
+ public int nParentNodes(int level) {
+ return dagLevels[level].nParentNodes();
+ }
+
+ @Override
+ public int nChildNodes(int level) {
+ return dagLevels[level].nChildNodes();
+ }
+
+ @Override
+ public int parentNode(int level, int edge) {
+ return dagLevels[level].parentNode(edge);
+ }
+
+ @Override
+ public int childNode(int level, int edge) {
+ return dagLevels[level].childNode(edge);
+ }
+
+ @Override
+ public int symbol(int level, int edge) {
+ return dagLevels[level].symbol(edge);
+ }
+
+ @Override
+ public float edgeWeight(int level, int edge) {
+ return dagLevels[level].edgeWeight(edge);
+ }
+
+
+ @Override
+ public float parentWeight(int level, int parentNode) {
+ return dagLevels[level].parentWeight(parentNode);
+ }
+
+ @Override
+ public float condEdgeProb(int level, int edge) {
+ return dagLevels[level].condEdgeProb(edge);
+ }
+
+
+ @Override
+ public float edgeProb(int level, int edge) {
+ return dagLevels[level].edgeProb(edge);
+ }
+
+ @Override
+ public float parentProb(int level, int node) {
+ return dagLevels[level].parentProb(node);
+ }
+
+ @Override
+ public int nLevels() {
+ return dagLevels.length;
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public long nNodes() {
+ return nNodes;
+ }
+
+ @Override
+ public long nEdges() {
+ return nEdges;
+ }
+
+ @Override
+ public int maxNodes() {
+ return maxNodes;
+ }
+
+ @Override
+ public int maxEdges() {
+ return maxEdges;
+ }
+
+ @Override
+ public int nOutEdges(int level, int parentNode) {
+ return dagLevels[level].nOutEdges(parentNode);
+ }
+
+ @Override
+ public int outEdge(int level, int parentNode, int outEdge) {
+ return dagLevels[level].outEdge(parentNode, outEdge);
+ }
+
+ @Override
+ public int outEdgeBySymbol(int level, int parentNode, int symbol) {
+ return dagLevels[level].outEdgeBySymbol(parentNode, symbol);
+ }
+
+ @Override
+ public int nInEdges(int level, int childNode) {
+ return dagLevels[level].nInEdges(childNode);
+ }
+
+ @Override
+ public int inEdge(int level, int childNode, int inEdge) {
+ return dagLevels[level].inEdge(childNode, inEdge);
+ }
+
+ @Override
+ public boolean isChildOf(int parentLevel, int parentEdge, int childEdge) {
+ int nodeA = dagLevels[parentLevel+1].parentNode(childEdge);
+ int nodeB = dagLevels[parentLevel].childNode(parentEdge);
+ return nodeA==nodeB;
+ }
+
+ @Override
+ public double[] posArray() {
+ return posArray.clone();
+ }
+
+ @Override
+ public String toString(int startLevel, int endLevel) {
+ String nl = System.getProperty("line.separator");
+ StringBuilder sb = new StringBuilder(1000);
+ for (int j=startLevel; j<endLevel; ++j) {
+ sb.append("level=");
+ sb.append(j);
+ sb.append(": ");
+ sb.append(dagLevels[j]);
+ sb.append(nl);
+ }
+ return sb.toString();
+ }
+
+ @Override
+ public String toString() {
+ String nl = System.getProperty("line.separator");
+ StringBuilder sb = new StringBuilder(2000);
+ sb.append("[Dag: nMarkers=");
+ sb.append(dagLevels.length);
+ sb.append(" nodes=");
+ sb.append(nNodes);
+ sb.append(" edges=");
+ sb.append(nEdges);
+ sb.append(" maxNodes=");
+ sb.append((int) maxNodes);
+ sb.append(" maxEdges=");
+ sb.append((int) maxEdges);
+ sb.append(nl);
+ for (int j=0; j<dagLevels.length; ++j) {
+ sb.append(nl);
+ sb.append("level=");
+ sb.append(j);
+ sb.append(": ");
+ sb.append(dagLevels[j]);
+ sb.append(nl);
+ }
+ return sb.toString();
+ }
+}
diff --git a/dag/LinkageEquilibriumDag.java b/dag/LinkageEquilibriumDag.java
new file mode 100644
index 0000000..46ce9d7
--- /dev/null
+++ b/dag/LinkageEquilibriumDag.java
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package dag;
+
+import blbutil.Const;
+import java.util.Arrays;
+import vcf.GL;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code LinkageEquilibriumDag} represents a leveled DAG with one parent
+ * node at each level.
+ * </p>
+ * <p>Instances of class {@code LinkageEquilibriumDag} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class LinkageEquilibriumDag implements Dag {
+
+ private final Markers markers;
+ private final float[][] alleleFreq;
+ private final int maxAlleles;
+ private final int sumAlleles;
+
+ /**
+ * Constructs a new {@code LinkageEquilibriumDag} instance that represents
+ * markers in linkage equilibrium, with one level per marker,
+ * one parent node per level, one edge per allele at each level,
+ * and edge count equal to the estimated allele frequency.
+ * @param gl the genotype emission probabilities which determine
+ * the estimated allele frequencies
+ * @param minFreq the minimum allele frequency that will be used
+ * @throws IllegalArgumentException if
+ * {@code minFreq <= 0.0f || minFreq >= 0.5f || Float.isNaN(minFreq) == true}
+ * @throws NullPointerException if {@code gl == null}
+ */
+ public LinkageEquilibriumDag(GL gl, float minFreq) {
+ if (minFreq <= 0.0f || minFreq >= 0.5f || Float.isNaN(minFreq)) {
+ throw new IllegalArgumentException(String.valueOf(minFreq));
+ }
+ int nMarkers = gl.nMarkers();
+ int localMaxAlleles = 0;
+ this.markers = gl.markers();
+ this.alleleFreq = new float[nMarkers][];
+ for (int marker=0; marker<nMarkers; ++marker) {
+ alleleFreq[marker] = alleleFrequencies(gl, marker, minFreq);
+ if (alleleFreq[marker].length > localMaxAlleles) {
+ localMaxAlleles = alleleFreq[marker].length;
+ }
+ }
+ this.maxAlleles = localMaxAlleles;
+ this.sumAlleles = gl.markers().sumAlleles();
+ }
+
+ private static float[] alleleFrequencies(GL gl, int marker,
+ float minFreq) {
+ int nSamples = gl.nSamples();
+ int nAlleles = gl.marker(marker).nAlleles();
+ float[] alleleFreq = new float[nAlleles];
+ float[] scaledFreq = new float[nAlleles];
+ for (int sample=0; sample<nSamples; ++sample) {
+ for (int a1=0; a1<nAlleles; ++a1) {
+ for (int a2=0; a2<nAlleles; ++a2) {
+ float likelihood = gl.gl(marker, sample, a1, a2);
+ scaledFreq[a1] += likelihood;
+ scaledFreq[a2] += likelihood;
+ }
+ }
+ divideEntriesBySum(scaledFreq);
+ for (int j=0; j<scaledFreq.length; ++j) {
+ alleleFreq[j] += scaledFreq[j];
+ scaledFreq[j] = 0.0f;
+ }
+ }
+ divideEntriesBySum(alleleFreq);
+ enforceMinFrequency(alleleFreq, minFreq);
+ return alleleFreq;
+ }
+
+ private static void divideEntriesBySum(float[] fa) {
+ float sum = 0.0f;
+ for (float f : fa) {
+ sum += f;
+ }
+ for (int j=0; j<fa.length; ++j) {
+ fa[j] /= sum;
+ }
+ }
+
+ private static void enforceMinFrequency(float[] alleleFreq, float minFreq) {
+ boolean changedFreq = false;
+ for (int j=0; j<alleleFreq.length; ++j) {
+ if (alleleFreq[j] < minFreq) {
+ alleleFreq[j] = minFreq;
+ changedFreq = true;
+ }
+ }
+ if (changedFreq) {
+ divideEntriesBySum(alleleFreq);
+ }
+ }
+
+ private void checkLevel(int level) {
+ if (level<0 || level >= alleleFreq.length) {
+ throw new IllegalArgumentException("level: " + level);
+ }
+ }
+
+ private void checkEdge(int level, int edge) {
+ if (edge<0 || edge>=alleleFreq[level].length) {
+ throw new IndexOutOfBoundsException("edge: " + (int) edge);
+ }
+ }
+
+ private void checkParentNode(int level, int node) {
+ checkLevel(level);
+ if (node!=0) {
+ throw new IndexOutOfBoundsException("node: " + (int) node);
+ }
+ }
+
+ @Override
+ public int nEdges(int level) {
+ return alleleFreq[level].length;
+ }
+
+ @Override
+ public int nParentNodes(int level) {
+ checkLevel(level);
+ return 1;
+ }
+
+ @Override
+ public int nChildNodes(int level) {
+ checkLevel(level);
+ return 1;
+ }
+
+ @Override
+ public int parentNode(int level, int edge) {
+ checkEdge(level, edge);
+ return 0;
+ }
+
+ @Override
+ public int childNode(int level, int edge) {
+ checkEdge(level, edge);
+ return 0;
+ }
+
+ @Override
+ public int symbol(int level, int edge) {
+ checkEdge(level, edge);
+ return edge;
+ }
+
+ @Override
+ public float edgeWeight(int level, int edge) {
+ return alleleFreq[level][edge];
+ }
+
+ @Override
+ public float parentWeight(int level, int parentNode) {
+ checkParentNode(level, parentNode);
+ return 1.0f;
+ }
+
+ @Override
+ public float condEdgeProb(int level, int edge) {
+ return alleleFreq[level][edge];
+ }
+
+ @Override
+ public float edgeProb(int level, int edge) {
+ return alleleFreq[level][edge];
+ }
+
+ @Override
+ public float parentProb(int level, int node) {
+ checkParentNode(level, node);
+ return 1.0f;
+ }
+
+ @Override
+ public int nLevels() {
+ return alleleFreq.length;
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public long nNodes() {
+ return (alleleFreq.length + 1);
+ }
+
+ @Override
+ public long nEdges() {
+ return sumAlleles;
+ }
+
+ @Override
+ public int maxNodes() {
+ return 1;
+ }
+
+ @Override
+ public int maxEdges() {
+ return maxAlleles;
+ }
+
+ @Override
+ public int nOutEdges(int level, int parentNode) {
+ return alleleFreq[level].length;
+ }
+
+ @Override
+ public int outEdge(int level, int parentNode, int outEdge) {
+ checkParentNode(level, parentNode);
+ checkEdge(level, outEdge);
+ return outEdge;
+ }
+
+ @Override
+ public int outEdgeBySymbol(int level, int parentNode, int symbol) {
+ return symbol;
+ }
+
+ @Override
+ public int nInEdges(int level, int childNode) {
+ checkLevel(level);
+ return alleleFreq[level].length;
+ }
+
+ @Override
+ public int inEdge(int level, int childNode, int inEdge) {
+ checkEdge(level, inEdge);
+ if (childNode!=0) {
+ throw new IllegalArgumentException("childNode: " + (int) childNode);
+ }
+ return inEdge;
+ }
+
+ @Override
+ public boolean isChildOf(int parentLevel, int parentEdge, int childEdge) {
+ checkEdge(parentLevel, parentEdge);
+ checkEdge(parentLevel+1, childEdge);
+ return true;
+ }
+
+ @Override
+ public double[] posArray() {
+ double[] pos = new double[alleleFreq.length];
+ for (int j=0; j<pos.length; ++j) {
+ double condEdgeProb = 0.0;
+ for (int a=0; a<alleleFreq[j].length; ++a) {
+ condEdgeProb += alleleFreq[j][a]*alleleFreq[j][a];
+ }
+ if (j==0) {
+ pos[j] = -Math.log10(condEdgeProb);
+ }
+ else {
+ pos[j] = pos[j-1] - Math.log10(condEdgeProb);
+ }
+ }
+ return pos;
+ }
+
+ @Override
+ public String toString(int start, int end) {
+ if (start<0 || start>end || end>=alleleFreq.length) {
+ String s = "start=" + start + " end=" + end;
+ throw new IllegalArgumentException(s);
+ }
+ StringBuilder sb = new StringBuilder((end-start) * 20);
+ for (int level=start; level<end; ++level) {
+ sb.append(Arrays.toString(alleleFreq[level]));
+ sb.append(Const.nl);
+ }
+ return sb.toString();
+ }
+}
diff --git a/dag/LowCapacityDagLevel.java b/dag/LowCapacityDagLevel.java
new file mode 100644
index 0000000..df9d989
--- /dev/null
+++ b/dag/LowCapacityDagLevel.java
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package dag;
+
+import blbutil.Const;
+import blbutil.IntSet;
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code LowCapacityDagLevel} represents a level of a leveled
+ * directed acyclic graph (DAG) that can contain up to
+ * {@code Character.MAX_VALUE} edges.
+ * </p>
+ * <p>Instances of class {@code LowCapacityDagLevel} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class LowCapacityDagLevel implements DagLevel {
+
+/*
+ * The k-th edge parent node index is stored in {@code this.parentNodes[k]}.
+ * The k-th edge child node index is stored in {@code this.childNodes[k]}.
+ * The k-th edge symbol is stored in {@code this.symbols[k]}.
+ * The k-th edge count is stored in {@code this.edgeCounts[k]}.
+ * The k-th edge conditional edge probability is stored in
+ * {@code this.condEdgeProbs[k]}, and is defined to be the
+ * k-th edge count divided by the k-th edge's parent node count.
+ * The k-th node count is stored in {@code this.nodeCounts[k]}.
+ *
+ * The outgoing edges indices of the k-th parent node are stored in consecutive
+ * entries of {@code this.parents} beginning with
+ * {@code this.parentIndices[k]} (inclusive) and ending with
+ * {@code this.parentIndices[k+1]} (exclusive).
+ *
+ * The ingoing edges indices of the k-th child node are stored in consecutive
+ * entries of {@code this.children} beginning with
+ * {@code this.childIndices[k]} (inclusive) and ending with
+ * {@code this.childIndices[k+1]} (exclusive).
+ */
+ private final float count;
+ private final char[] parentNodes;
+ private final char[] childNodes;
+ private final char[] parentIndices;
+ private final char[] parents;
+ private final char[] childIndices;
+ private final char[] children;
+ private final char[] symbols;
+ private final float[] edgeCounts;
+ private final float[] condEdgeProbs;
+ private final float[] parentCounts;
+
+ /**
+ * Constructs a new {@code LowCapacityDagLevel} instance from the
+ * specified data.
+ *
+ * @param parentNodes an array mapping edge index to parent node index
+ * @param childNodes an array mapping edge index to child node index
+ * @param symbols an array mapping edge index to the symbol labeling the
+ * edge
+ * @param counts an array mapping edge index to edge count
+ *
+ * @throws IllegalArgumentException if the specified arrays do not all
+ * have the same length
+ * @throws IllegalArgumentException if any array has length greater than
+ * {@code Character.MAX_VALUE}
+ * @throws IllegalArgumentException if any two edges have the same
+ * parent node and are both labeled with the same symbol
+ * @throws IllegalArgumentException if the set of values of the
+ * {@code parentNodes} array is not equal to {@code {0, 1, 2, ..., k}} for
+ * some {@code k}
+ * @throws IllegalArgumentException if the set of values of the
+ * {@code childNodes} array is not equal to {@code {0, 1, 2, ..., k}}
+ * for some {@code k}
+ *
+ * @throws NullPointerException if any parameter is {@code null}
+ */
+ public LowCapacityDagLevel(char[] parentNodes, char[] childNodes,
+ char[] symbols, float[] counts) {
+ int nEdges = checkLengths(parentNodes, childNodes, symbols, counts);
+ this.parentIndices = getIndicesArray(parentNodes);
+ this.childIndices = getIndicesArray(childNodes);
+ this.parentNodes = parentNodes.clone();
+ this.childNodes = childNodes.clone();
+ this.symbols = symbols.clone();
+ this.edgeCounts = counts.clone();
+ this.condEdgeProbs = new float[nEdges];
+ this.parents = new char[nEdges];
+ this.children = new char[nEdges];
+
+ char[] pIndices = Arrays.copyOfRange(parentIndices, 0,
+ parentIndices.length-1);
+ char[] cIndices = Arrays.copyOfRange(childIndices, 0,
+ childIndices.length-1);
+ this.parentCounts = parentCnts(parentNodes, counts, pIndices.length);
+ this.count = sum(this.parentCounts);
+
+ for (char j=0; j<nEdges; ++j) {
+ char p = parentNodes[j];
+ char c = childNodes[j];
+ this.parents[pIndices[p]++] = j;
+ this.children[cIndices[c]++] = j;
+ this.condEdgeProbs[j] = counts[j] / parentCounts[p];
+ }
+ checkForDuplicateOutEdges(parentIndices, parents, symbols);
+ }
+
+ private static int checkLengths(char[] parentNodes, char[] childNodes,
+ char[] symbols, float[] counts) {
+ if (parentNodes.length > Character.MAX_VALUE) {
+ String s = "parentNodes.length>Character.MAX_VALUE";
+ throw new IllegalArgumentException(s);
+ }
+ if ( ((parentNodes.length != childNodes.length)
+ || (parentNodes.length != symbols.length))
+ || (parentNodes.length != counts.length) ) {
+ throw new IllegalArgumentException("inconsistent arrays");
+ }
+ return parentNodes.length;
+ }
+
+ private static void checkForDuplicateOutEdges(char[] parentIndices,
+ char[] parents, char[] symbols) {
+ IntSet indexSet = new IntSet(symbols.length);
+ for (int j=1; j<parentIndices.length; ++j) {
+ indexSet.clear();
+ for (int k=parentIndices[j-1], n=parentIndices[j]; k<n; ++k) {
+ int edge = parents[k];
+ if (indexSet.add(symbols[edge])==false) {
+ throw new IllegalArgumentException("duplicate edge");
+ }
+ }
+ }
+ }
+
+ private static char[] getIndicesArray(char[] nodes) {
+ int[] countArray = elementCounts(nodes);
+ char[] indicesArray = new char[countArray.length + 1];
+ for (int j=1; j<indicesArray.length; ++j) {
+ assert countArray[j-1]>0;
+ int x = indicesArray[j-1] + countArray[j-1];
+ assert x <= Character.MAX_VALUE;
+ indicesArray[j] = (char) x;
+ }
+ return indicesArray;
+ }
+
+ /*
+ * Returns an array of length {@code max(nodes) + 1}
+ * whose {@code j}-th element is the number of
+ * elements of the specified array that have value {@code j}.
+ *
+ * @param nodes an array of non-negative values.
+ * @return an array whose {@code j}-th element is the number of
+ * elements of the specified array that have value {@code j}.
+ * @throws IllegalArgumenException if set of elements of the
+ * specified array is not equal to {@code {0, 1, 2, ..., k}} for some
+ * {@code k}.
+ */
+ private static int[] elementCounts(char[] array) {
+ int maxNode = max(array);
+ int[] nodeCounts = new int[maxNode + 1];
+ for (char c : array) {
+ ++nodeCounts[c];
+ }
+ for (int j=0; j<nodeCounts.length; ++j) {
+ if (nodeCounts[j]==0) {
+ throw new IllegalArgumentException("no element with value " + j);
+ }
+ }
+ return nodeCounts;
+ }
+
+ private static int max(char[] ca) {
+ char max = 0;
+ for (char c : ca) {
+ if (c>max) {
+ max=c;
+ }
+ }
+ return max;
+ }
+
+ private float sum(float[] fa) {
+ float sum = 0.0f;
+ for (float f : fa) {
+ sum += f;
+ }
+ return sum;
+ }
+
+ private float[] parentCnts(char[] parentNodes, float[] counts, int nNodes) {
+ float[] parentCnts = new float[nNodes];
+ for (int j=0; j<condEdgeProbs.length; ++j) {
+ char p = parentNodes[j];
+ parentCnts[p] += counts[j];
+ }
+ return parentCnts;
+ }
+
+ @Override
+ public int nEdges() {
+ return condEdgeProbs.length;
+ }
+
+ @Override
+ public int nParentNodes() {
+ return parentIndices.length - 1;
+ }
+
+ @Override
+ public int nChildNodes() {
+ return childIndices.length - 1;
+ }
+
+ @Override
+ public int parentNode(int edge) {
+ return parentNodes[edge];
+ }
+
+ @Override
+ public int childNode(int edge) {
+ return childNodes[edge];
+ }
+
+ @Override
+ public int symbol(int edge) {
+ return symbols[edge];
+ }
+
+ @Override
+ public float edgeWeight(int edge) {
+ return edgeCounts[edge];
+ }
+
+ @Override
+ public float parentWeight(int parentNode) {
+ return parentCounts[parentNode];
+ }
+
+ @Override
+ public float condEdgeProb(int edge) {
+ return condEdgeProbs[edge];
+ }
+
+ @Override
+ public float edgeProb(int edge) {
+ return (edgeCounts[edge] / count);
+ }
+
+ @Override
+ public float parentProb(int node) {
+ return (parentCounts[node] / count);
+ }
+
+ @Override
+ public int nOutEdges(int parentNode) {
+ return (parentIndices[parentNode+1] - parentIndices[parentNode]);
+ }
+
+ @Override
+ public int outEdge(int parentNode, int outEdgeIndex) {
+ if (outEdgeIndex<0 || outEdgeIndex>=nOutEdges(parentNode)) {
+ throw new IndexOutOfBoundsException(String.valueOf(outEdgeIndex));
+ }
+ return (parents[parentIndices[parentNode] + outEdgeIndex]);
+ }
+
+ @Override
+ public int outEdgeBySymbol(int parentNode, int symbol) {
+ int start = parentIndices[parentNode];
+ int end = parentIndices[parentNode+1];
+ for (int j=start; j<end; ++j) {
+ char edgeIndex = parents[j];
+ if (symbols[edgeIndex]==symbol) {
+ return edgeIndex;
+ }
+ }
+ return -1;
+ }
+
+ @Override
+ public int nInEdges(int childNode) {
+ return (childIndices[childNode+1] - childIndices[childNode]);
+ }
+
+ @Override
+ public int inEdge(int childNode, int inEdgeIndex) {
+ if (inEdgeIndex<0 || inEdgeIndex>=nInEdges(childNode)) {
+ throw new IndexOutOfBoundsException(String.valueOf(inEdgeIndex));
+ }
+ return this.children[childIndices[childNode] + inEdgeIndex];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(500);
+ sb.append(Const.nl);
+ sb.append("parentNodes=");
+ sb.append(charArrayToString(parentNodes));
+ sb.append(Const.nl);
+ sb.append("childNodes=");
+ sb.append(charArrayToString(childNodes));
+ sb.append(Const.nl);
+ sb.append("symbols=");
+ sb.append(charArrayToString(symbols));
+ sb.append(Const.nl);
+ sb.append("condEdgeProbs=");
+ sb.append(Arrays.toString(condEdgeProbs));
+ sb.append(Const.nl);
+ sb.append("parentCounts=");
+ sb.append(Arrays.toString(parentCounts));
+ sb.append(Const.nl);
+ sb.append("edgeCounts=");
+ sb.append(Arrays.toString(edgeCounts));
+ sb.append(Const.nl);
+ sb.append("parentIndices=");
+ sb.append(charArrayToString(parentIndices));
+ sb.append(Const.nl);
+ sb.append("parents=");
+ sb.append(charArrayToString(parents));
+ sb.append(Const.nl);
+ sb.append("childIndices=");
+ sb.append(charArrayToString(childIndices));
+ sb.append(Const.nl);
+ sb.append("children=");
+ sb.append(charArrayToString(children));
+ return sb.toString();
+ }
+
+ /*
+ * Returns a string representation of the specified character array
+ * with the character elements converted to integers for printing.
+ * The representation has the format
+ * "[a[1], a[2], a[3], ..., a[length-1]]".
+ *
+ * @param a a character array.
+ *
+ * @return a string representation of the specified character array
+ * with the character elements converted to integers for printing.
+ */
+ private static String charArrayToString(char[] a) {
+ StringBuilder sb = new StringBuilder(a.length * 4 + 10);
+ sb.append("[");
+ sb.append((int) a[0]);
+ for (int j = 1; j < a.length; ++j) {
+ sb.append(", ");
+ sb.append((int) a[j]);
+ }
+ sb.append("]");
+ return sb.toString();
+ }
+}
diff --git a/dag/MergeableDag.java b/dag/MergeableDag.java
new file mode 100644
index 0000000..d24821e
--- /dev/null
+++ b/dag/MergeableDag.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package dag;
+
+import blbutil.FileIt;
+import haplotype.HapPairs;
+import vcf.HapsMarker;
+import haplotype.HapsMarkerIterator;
+import java.util.ArrayList;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+
+/**
+ * <p>Class {@code MergeableDag} contains a static, thread-safe factory
+ * method that constructs a Directed Acyclic Graph (DAG) from sequence data.
+ * </p>
+ *
+ * References:
+ * <br>
+ * Ron D, Singer Y, and Tishby N (1998) On the Learnability and
+ * usage of acyclic probabilistic finite automata. Journal of Computer
+ * and SystemSciences 56:133-152.
+ * <br>
+ * Browning S (2006) Multi-locus association mapping using variable length
+ * Markov chains. Am J Hum Genet. 78:903-913.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class MergeableDag {
+
+ private static final Score MAX_SCORE =
+ new Score(-1, -1, Float.POSITIVE_INFINITY, false);
+ private static final float MAX_THRESHOLD_RATIO = 1.4f;
+ private static final int MIN_DEPTH = 10;
+
+ private final float scale;
+ private final Dag dag;
+
+ private float nUnmergedAtLeaf = 0f;
+
+ /**
+ * Constructs and returns a new {@code Dag} instance from the
+ * specified data.
+ * @param hapPairs the sequence data
+ * @param weights an array whose {@code j}-th element is the
+ * weight for the {@code j}-th haplotype
+ * @param scale a parameter that multiplicatively scales the node
+ * similarity threshold
+ * @param nInitLevels the number of initial levels to read
+ * @return a new {@code Dag} instance
+ *
+ * @throws IllegalArgumentException if {@code hapPairs.nMarkers() == 0}
+ * @throws IllegalArgumentException if
+ * {@code (weights[j] <= 0 || Float.isFinite(weights[j]) == false)}
+ * for any {@code j} satisfying {@code (0 <= j && j < weights.length)}
+ * @throws IllegalArgumentException if
+ * {@code Double.isFinite(scale) == false || scale <= 0}
+ * @throws IllegalArgumentException if {@code nInitLevels < 1}
+ * @throws NullPointerException if
+ * {@code hapPairs == null || weights == null}
+ */
+ public static Dag dag(HapPairs hapPairs, float[] weights, float scale,
+ int nInitLevels) {
+ MergeableDag md = new MergeableDag(hapPairs, weights, scale, nInitLevels);
+ return md.dag();
+ }
+
+ /**
+ * Constructs a new {@code MergeableDag} instance from the specified data.
+ * @param hapPairs the sequence data
+ * @param weights an array whose {@code j}-th element is the
+ * weight for the {@code j}-th haplotype
+ * @param scale a parameter that multiplicatively scales the node
+ * similarity threshold
+ * @param nInitLevels the number of initial levels to read
+ *
+ * @throws IllegalArgumentException if {@code hapPairs.nMarkers() == 0}
+ * @throws IllegalArgumentException if
+ * {@code (weights[j] <= 0 || Float.isFinite(weights[j]) == false)}
+ * for any {@code j} satisfying {@code (0 <= j && j < weights.length)}
+ * @throws IllegalArgumentException if
+ * {@code Double.isFinite(scale) == false || scale <= 0}
+ * @throws IllegalArgumentException if {@code nInitLevels < 1}
+ * @throws NullPointerException if
+ * {@code hapPairs == null || weights == null}
+ */
+ private MergeableDag(HapPairs hapPairs, float[] weights, float scale,
+ int nInitLevels) {
+ checkParameters(hapPairs, weights, scale, nInitLevels);
+ this.scale = scale;
+ List<DagLevel> mergedLevels = new ArrayList<>(hapPairs.nMarkers());
+ float maxUnmerged = maxUnmergedAtLeaf(hapPairs, weights);
+ int lastReadDepth = nInitLevels;
+ try (FileIt<HapsMarker> it = new HapsMarkerIterator(hapPairs)) {
+ MergeableDagLevel current = readFirstLevel(it, weights);
+ MergeableDagLevel leaf = readLevels(it, nInitLevels, current);
+ while (current.next() != null) {
+ nUnmergedAtLeaf = 0f;
+ current = current.next();
+ mergeParentNodes(current);
+ MergeableDagLevel previousLevel = current.setPreviousToNull();
+ mergedLevels.add(previousLevel.toDagLevel());
+
+ if (it.hasNext()) {
+ float ratio = (nUnmergedAtLeaf / maxUnmerged);
+ int depth = (leaf.index() - current.index());
+ int readDepth = nextReadDepth(ratio, depth, lastReadDepth);
+ if (readDepth>depth) {
+ leaf = readLevels(it, (readDepth - depth), leaf);
+ lastReadDepth = readDepth;
+ }
+ }
+ }
+ mergedLevels.add(current.toDagLevel());
+ DagLevel[] levels = mergedLevels.toArray(new DagLevel[0]);
+ this.dag = new ImmutableDag(hapPairs.markers(), levels);
+ }
+ }
+
+ private static void checkParameters(HapPairs hapPairs, float[] weights,
+ float scale, int nInitLevels) {
+ if (nInitLevels < 1) {
+ throw new IllegalArgumentException(String.valueOf(nInitLevels));
+ }
+ if (hapPairs.nMarkers()==0) {
+ throw new IllegalArgumentException("hapPairs.nMarkers()==0");
+ }
+ if (weights!=null) {
+ for (int j=0; j<weights.length; ++j) {
+ float f = weights[j];
+ if (f <= 0.0 || Float.isFinite(f) == false) {
+ throw new IllegalArgumentException(String.valueOf(f));
+ }
+ }
+ }
+ if (scale <= 0 || Float.isFinite(scale) == false) {
+ throw new IllegalArgumentException(String.valueOf(scale));
+ }
+ }
+
+ private static MergeableDagLevel readFirstLevel(FileIt<HapsMarker> it,
+ float[] weights) {
+ HapsMarker hapsMarker = it.next();
+ if (weights==null) {
+ return new MergeableDagLevel(hapsMarker);
+ }
+ else {
+ return new MergeableDagLevel(hapsMarker, weights);
+ }
+ }
+
+ private static float maxUnmergedAtLeaf(HapPairs hapPairs, float[] weights) {
+ float maxPropUnmerged = 0.01f;
+ float sum = (weights==null ? hapPairs.nHaps() : sum(weights));
+ return maxPropUnmerged * sum;
+ }
+
+ private static int nextReadDepth(float unmergedRatio, int depth,
+ int lastDepth) {
+ if (unmergedRatio <= 1) {
+ return MIN_DEPTH;
+ }
+ else if (depth < (0.85*lastDepth) ) {
+ return 1 + (int) Math.round(0.95*lastDepth);
+ }
+ else if ( (unmergedRatio>2) && (depth > (0.95*lastDepth)) ) {
+ return (int) Math.ceil((1 + unmergedRatio/20)*lastDepth);
+ }
+ else {
+ return lastDepth;
+ }
+ }
+
+ private static MergeableDagLevel readLevels(FileIt<HapsMarker> it,
+ int nLevelsToRead, MergeableDagLevel leafLevel) {
+ for (int j=0; it.hasNext() && j<nLevelsToRead; ++j) {
+ HapsMarker hapsMarker = it.next();
+ MergeableDagLevel newLeaf =
+ new MergeableDagLevel(leafLevel, hapsMarker);
+ leafLevel.setNextLevel(newLeaf);
+ leafLevel = newLeaf;
+ }
+ return leafLevel;
+ }
+
+ private static float sum(float[] fa) {
+ float sum = 0f;
+ for (float f : fa) {
+ sum += f;
+ }
+ return sum;
+ }
+
+ private void mergeParentNodes(MergeableDagLevel level) {
+ List<Score> scores = new LinkedList<>();
+ Score min = getPairwiseScores(level, scores);
+ while (min.isMergeable()) {
+ int retainedNode = min.nodeA();
+ int removedNode = min.nodeB();
+ if (level.hasSibling(retainedNode)==false) {
+ // Ensure that no-sibling nodes are always removed
+ retainedNode = min.nodeB();
+ removedNode = min.nodeA();
+ assert level.hasSibling(retainedNode);
+ }
+ else if (level.hasSibling(removedNode)
+ && level.nodeCount(min.nodeA())<level.nodeCount(min.nodeB())) {
+ removedNode = min.nodeB();
+ retainedNode = min.nodeA();
+ }
+ level.mergeParentNodes(retainedNode, removedNode);
+
+ min = MAX_SCORE;
+ ListIterator<Score> scoreIt = scores.listIterator();
+ while (scoreIt.hasNext()) {
+ Score s = scoreIt.next();
+ if (s.nodeA()==removedNode || s.nodeB()==removedNode) {
+ scoreIt.remove();
+ }
+ else {
+ if (s.nodeA()==retainedNode || s.nodeB()==retainedNode) {
+ s = score(level, s.nodeA(), s.nodeB());
+ if (s!=null) {
+ if (s.score()<min.score() && s.isMergeable()) {
+ min = s;
+ }
+ scoreIt.set(s);
+ }
+ else {
+ scoreIt.remove();
+ }
+ }
+ else if (s.score()<min.score() && s.isMergeable()) {
+ min = s;
+ }
+ }
+ }
+ }
+ }
+
+ private Score getPairwiseScores(MergeableDagLevel level,
+ List<Score> scores) {
+ Score minScore = MAX_SCORE;
+ SortedNodes parentNodes = sortedParents(level);
+ int[] parents = parentNodes.sorted;
+ int nParentsWithSibs = parentNodes.nWithSibs;
+ for (int j=0; j<nParentsWithSibs; ++j) {
+ int nodeA = parents[j];
+ for (int k=j+1; k<parents.length; ++k) {
+ int nodeB = parents[k];
+ Score s = score(level, nodeA, nodeB);
+ if (s!=null) {
+ if (s.score()<minScore.score() && s.isMergeable()) {
+ minScore = s;
+ }
+ scores.add(s);
+ }
+ }
+ }
+ return minScore;
+ }
+
+ private static class SortedNodes {
+
+ public int[] sorted;
+ public int nWithSibs;
+
+ public SortedNodes(int[] sorted, int nWithSibs) {
+ this.sorted = sorted;
+ this.nWithSibs = nWithSibs;
+ }
+ }
+
+ private SortedNodes sortedParents(MergeableDagLevel level) {
+ int[] nodeArray = level.parentNodeArray();
+ int index1 = 0;
+ int index2 = nodeArray.length-1;
+ while (index1<index2) {
+ if (level.hasSibling(nodeArray[index1])) {
+ ++index1;
+ }
+ else {
+ int tmp = nodeArray[index1];
+ nodeArray[index1] = nodeArray[index2];
+ nodeArray[index2--] = tmp;
+ }
+ }
+ if (level.hasSibling(nodeArray[index1])) {
+ ++index1;
+ }
+ return new SortedNodes(nodeArray, index1);
+ }
+
+ private Score score(MergeableDagLevel level, int nodeA, int nodeB) {
+ float maxDiff = 0.0f;
+ float nodeCntA = level.nodeCount(nodeA);
+ float nodeCntB = level.nodeCount(nodeB);
+ float threshold = (float) (scale*Math.sqrt((1.0/nodeCntA)+(1.0/nodeCntB)));
+ maxDiff = similar(level, nodeA, nodeB,
+ nodeCntA, nodeCntB, level.index(),
+ nodeCntA, nodeCntB, maxDiff, threshold);
+ if (maxDiff > MAX_THRESHOLD_RATIO*threshold) {
+ return null;
+ }
+ else {
+ boolean isMergeable = (maxDiff < threshold);
+ return new Score(nodeA, nodeB, maxDiff, isMergeable);
+ }
+ }
+
+ /*
+ * Returns a similarity-score (lower scores correspond to higher
+ * similarity).
+ *
+ * @param marker marker DAG marker containing the specified nodes
+ * @param nodeA a parent node at the specified DAG level in tree A
+ * @param nodeB a parent node at the specified DAG level in tree B
+ * @param nodeCntA the count of the parent node in tree A
+ * @param nodeCntB the count of the parent node in tree B
+ * @param baseMarker the marker index at the root of trees A and B
+ * @param nA the node count of the root of tree A
+ * @param nB the node count of the root of tree B
+ * @param maxDiff the current maximum difference in proportions in
+ * the counts of corresponding tree branches
+ * @param threshold the maximum permitted node similarity
+ * @return a similarity-score
+ */
+ private float similar(MergeableDagLevel level,
+ int nodeA, int nodeB, float nodeCntA, float nodeCntB,
+ int baseMarker, float nA, float nB, float maxDiff, float threshold) {
+ float propA = nodeCntA / nA;
+ float propB = nodeCntB / nB;
+ float diff = Math.abs(propA - propB);
+ if (diff >= threshold) {
+ return diff;
+ }
+ else if (propA <= maxDiff && propB <= maxDiff) {
+ return maxDiff;
+ }
+ else if (diff > maxDiff) {
+ maxDiff = diff;
+ }
+ if (nodeA == -1 ^ nodeB == -1) {
+ return maxDiff;
+ }
+ else if (level==null) {
+ nUnmergedAtLeaf += (nodeCntA + nodeCntB);
+ return maxDiff;
+ }
+ for (int j=0, n=level.nAlleles(); j<n; ++j) {
+ int edgeA = level.outEdge(nodeA, j);
+ int edgeB = level.outEdge(nodeB, j);
+ int childA = (edgeA != -1) ? level.childNode(edgeA) : -1;
+ int childB = (edgeB != -1) ? level.childNode(edgeB) : -1;
+ nodeCntA = (edgeA != -1) ? level.edgeCount(edgeA) : 0.0f;
+ nodeCntB = (edgeB != -1) ? level.edgeCount(edgeB) : 0.0f;
+ float childMaxDiff = similar(level.next(), childA, childB,
+ nodeCntA, nodeCntB, baseMarker, nA, nB, maxDiff, threshold);
+ if (childMaxDiff > maxDiff) {
+ if (childMaxDiff >= threshold) {
+ return childMaxDiff;
+ }
+ else {
+ maxDiff = childMaxDiff;
+ }
+ }
+ }
+ return maxDiff;
+ }
+
+ /**
+ * Returns the constructed DAG.
+ * @return the constructed DAG
+ */
+ public Dag dag() {
+ return dag;
+ }
+
+ /**
+ * Returns a string description of {@code this}. The
+ * exact details of the representation are unspecified and subject
+ * to change.
+ * @return a string description of {@code this}
+ */
+ @Override
+ public String toString() {
+ return dag.toString();
+ }
+}
diff --git a/dag/MergeableDagLevel.java b/dag/MergeableDagLevel.java
new file mode 100644
index 0000000..27adef9
--- /dev/null
+++ b/dag/MergeableDagLevel.java
@@ -0,0 +1,731 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package dag;
+
+import blbutil.Const;
+import vcf.HapsMarker;
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code MergeableDagLevel} represents a level of a leveled
+ * directed acyclic graph (DAG). The class includes a public method for
+ * merging parent nodes.
+ * </p>
+ * <p>
+ * Instances of class {@code MergebleDagLevel} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class MergeableDagLevel {
+
+ private MergeableDagLevel nextLevel = null;
+ private MergeableDagLevel prevLevel = null;
+
+ private final int levelIndex;
+ private final int nAlleles;
+ private final int nHaps;
+ private final float[] weights;
+
+ private int[][] outEdges; // [allele][parent node]
+ private int[] child2FirstInEdge;
+ private int[] inEdge2NextInEdge;
+
+ private int[] parentNodes; // edge -> parent node
+ private int[] childNodes; // edge -> child node
+ private int[] symbols; // edge -> symbol
+ private float[] counts; // edge -> weight
+
+ private int[] child2FirstHap; // child node -> first hap index
+ private int[] hap2NextHap; // current hap index -> next hap index
+
+ private static float[] defaultWeights(HapsMarker data) {
+ float[] fa = new float[data.nHaps()];
+ Arrays.fill(fa, 1f);
+ return fa;
+ }
+
+ /**
+ * Constructs a new {@code MergeableDagLevel} instance from the specified
+ * phased genotype data. The {@code previous()} method of the
+ * constructed instance will return {@code null}. Each haplotype
+ * will be assigned a weight of 1.
+ * @param data the phased genotype data
+ * @throws NullPointerException if {@code data == null}
+ */
+ public MergeableDagLevel(HapsMarker data) {
+ this(data, defaultWeights(data));
+ }
+
+ /**
+ * Constructs a new {@code MergeableDagLevel} instance from the specified
+ * phased genotype data and haplotype weights. The {@code previous()}
+ * method of the constructed instance will return {@code null}.
+ * @param data the phased genotype data
+ * @param weights an array mapping haplotype indices to non-negative
+ * weights
+ * @throws IllegalArgumentException if
+ * {@code weights.length != data.nHaps()}
+ * @throws NullPointerException if {@code data==null || weights==null}
+ */
+ public MergeableDagLevel(HapsMarker data, float[] weights) {
+ checkParameters(data, weights);
+ this.prevLevel = null;
+ this.nextLevel = null;
+ this.levelIndex = 0;
+ this.nAlleles = data.marker().nAlleles();
+ this.nHaps = data.nHaps();
+ this.weights = weights.clone();
+ allocateAndInitializeArrays(nAlleles, nHaps);
+ fillArrays(data, weights);
+ }
+
+ /**
+ * Constructs a new {@code MergeableDagLevel} instance with the
+ * specified previous {@code MergeableDagLevel} and the
+ * specified phased genotype data. This constructor does not alter
+ * any field of the specified {@code prevLevel} object.
+ * @param prevLevel the previous {@code MergeableDagLevel}
+ * @param data the phased genotype data
+ *
+ * @throws IllegalArgumentException if
+ * {@code prevLevel.nextLevel() != null}
+ * @throws IllegalArgumentException if
+ * {@code parent.nHaps() != data.nHaps()}
+ * @throws NullPointerException if
+ * {@code parent == null || data == null}
+ */
+ public MergeableDagLevel(MergeableDagLevel prevLevel, HapsMarker data) {
+ checkParameters(prevLevel, data);
+ this.prevLevel = prevLevel;
+ this.nextLevel = null;
+ this.levelIndex = prevLevel.index() + 1;
+ this.nAlleles = data.marker().nAlleles();
+ this.nHaps = data.nHaps();
+ this.weights = prevLevel.weights;
+ allocateAndInitializeArrays(nAlleles, nHaps);
+ fillArrays(prevLevel, data, weights);
+ }
+
+ private void checkParameters(HapsMarker data, float[] weights) {
+ if (weights.length != data.nHaps()) {
+ String s = "data.nHaps()=" + data.nHaps()
+ + " != weights.length=" + weights.length;
+ throw new IllegalArgumentException(s);
+ }
+ }
+
+ private void checkParameters(MergeableDagLevel parent, HapsMarker data) {
+ if (parent.nextLevel!=null) {
+ throw new IllegalArgumentException("parent.nextLevel!=null");
+ }
+ if (parent.nHaps()!=data.nHaps()) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ // NB: the sequences of sample ID indices are not checked
+ }
+
+ private void allocateAndInitializeArrays(int nAlleles, int nHaps) {
+ this.outEdges = new int[nAlleles][nHaps];
+ this.child2FirstInEdge = new int[nHaps];
+ this.inEdge2NextInEdge = new int[nHaps];
+ this.parentNodes = new int[nHaps];
+ this.childNodes = new int[nHaps];
+ this.symbols = new int[nHaps];
+ this.counts = new float[nHaps];
+ this.child2FirstHap = new int[nHaps];
+ this.hap2NextHap = new int[nHaps];
+
+ for (int[] oe : outEdges) {
+ Arrays.fill(oe, -1);
+ }
+ Arrays.fill(child2FirstInEdge, -1);
+ Arrays.fill(inEdge2NextInEdge, -1);
+ Arrays.fill(parentNodes, -1);
+ Arrays.fill(childNodes, -1);
+ Arrays.fill(symbols, -1);
+ Arrays.fill(child2FirstHap, -1);
+ Arrays.fill(hap2NextHap, -1);
+ }
+
+ private void fillArrays(HapsMarker data, float[] weights) {
+ int parentNode = 0;
+ for (int hap=0, n=data.nHaps(); hap<n; ++hap) {
+ int symbol = data.allele(hap);
+ float count = weights[hap];
+ int edge = this.outEdges[symbol][parentNode];
+ if (edge == -1) {
+ edge = symbol;
+ addEdge(parentNode, symbol, count, edge, hap);
+ }
+ else {
+ assert edge == symbol;
+ assert edge==childNodes[edge];
+ int child = childNodes[edge];
+ this.counts[edge] += count;
+ this.hap2NextHap[hap] = this.child2FirstHap[child];
+ this.child2FirstHap[child] = hap;
+ }
+ }
+ }
+
+ private void fillArrays(MergeableDagLevel prevLevel, HapsMarker data,
+ float[] weights) {
+ int nEdges = 0;
+ for (int node=0, n=prevLevel.child2FirstHap.length; node<n; ++node) {
+ if (prevLevel.child2FirstHap[node] >= 0) {
+ int hap = prevLevel.child2FirstHap[node];
+ while (hap != -1) {
+ int symbol = data.allele(hap);
+ float count = weights[hap];
+ int edge = this.outEdges[symbol][node];
+ if (edge == -1) {
+ addEdge(node, symbol, count, nEdges++, hap);
+ }
+ else {
+ assert edge==childNodes[edge];
+ int child = childNodes[edge];
+ this.counts[edge] += count;
+ this.hap2NextHap[hap] = this.child2FirstHap[child];
+ this.child2FirstHap[child] = hap;
+ }
+ hap = prevLevel.hap2NextHap[hap];
+ }
+ }
+ }
+ if (nEdges < 0.75*nHaps) {
+ reduceEdgeArrayLengths(nEdges);
+ }
+ prevLevel.removeHaplotypeIndices();
+ }
+
+ private void addEdge(int parentNode, int symbol, float weight,
+ int edge, int haplotype) {
+ int childNode = edge;
+ outEdges[symbol][parentNode] = edge;
+ child2FirstInEdge[childNode] = edge;
+ parentNodes[edge] = parentNode;
+ childNodes[edge] = childNode;
+ symbols[edge] = symbol;
+ counts[edge] = weight;
+ child2FirstHap[childNode] = haplotype;
+ }
+
+ private void reduceEdgeArrayLengths(int newLength) {
+ child2FirstInEdge = Arrays.copyOf(child2FirstInEdge, newLength);
+ inEdge2NextInEdge = Arrays.copyOf(inEdge2NextInEdge, newLength);
+ parentNodes = Arrays.copyOf(parentNodes, newLength);
+ childNodes = Arrays.copyOf(childNodes, newLength);
+ symbols = Arrays.copyOf(symbols, newLength);
+ counts = Arrays.copyOf(counts, newLength);
+ }
+
+ /**
+ * Removes haplotype index data from {@code this}.
+ */
+ private void removeHaplotypeIndices() {
+ this.child2FirstHap = null;
+ this.hap2NextHap = null;
+ }
+
+ /**
+ * Sets the previous DAG level to {@code null}, and returns
+ * the previous DAG level that existed immediately prior to the invocation
+ * of this method.
+ * @return the previous DAG level that existed immediately prior to the
+ * invocation of this method.
+ */
+ public MergeableDagLevel setPreviousToNull() {
+ MergeableDagLevel prev = this.prevLevel;
+ this.prevLevel = null;
+ return prev;
+ }
+
+ /**
+ * Sets the next level to the specified {@code MergeableDagLevel}.
+ * @param nextLevel the next level
+ * @throws IllegalArgumentException if
+ * {@code nextLevel.previousLevel() != this}
+ */
+ public void setNextLevel(MergeableDagLevel nextLevel) {
+ if (nextLevel.prevLevel != this) {
+ throw new IllegalArgumentException("nextLevel.previousLevel!=this");
+ }
+ this.nextLevel = nextLevel;
+ }
+
+ /**
+ * Returns the previous DAG level or {@code null} if no previous level
+ * is stored.
+ * @return the previous DAG level
+ */
+ public MergeableDagLevel previous() {
+ return prevLevel;
+ }
+
+ /**
+ * Returns the next DAG level or {@code null} if no next level is stored.
+ * @return the next DAG level
+ */
+ public MergeableDagLevel next() {
+ return nextLevel;
+ }
+
+ /**
+ * Returns {@code true} if the specified parent node has a
+ * sibling and returns {@code false} otherwise.
+ * Two parent nodes are siblings if they are connected by an
+ * edge to the same parent node at the previous level of the DAG.
+ *
+ * @param parentNode a parent node index
+ * @return {@code true} if the specified parent node has a
+ * sibling
+ */
+ public boolean hasSibling(int parentNode) {
+ int edge = prevLevel.child2FirstInEdge[parentNode];
+ while (edge>=0) {
+ int pn = prevLevel.parentNodes[edge];
+ int cnt = 0;
+ for (int allele=0, n=prevLevel.nAlleles; allele<n; ++allele) {
+ if (prevLevel.outEdges[allele][pn]>=0) {
+ ++cnt;
+ }
+ }
+ if (cnt>1) {
+ return true;
+ }
+ edge = prevLevel.inEdge2NextInEdge[edge];
+ }
+ return false;
+ }
+
+ /**
+ * Returns an immutable {@code DagLevel} corresponding to
+ * {@code this}. The parent node, edge, and child node indices
+ * in the returned {@code DagLevel} are the ranks of the
+ * parent node, edge, and child node indices for {@code this},
+ * with rank 0 corresponding to the smallest index.
+ * @return an immutable {@code DagLevel} corresponding to {@code this}
+ */
+ public DagLevel toDagLevel() {
+ float[] modCounts = DagUtil.removeValues(counts, 0f);
+ int[] modSymbols = DagUtil.removeValues(symbols, -1);
+ int[] modParentNodes = DagUtil.removeValues(parentNodes, -1);
+ int[] modChildNodes = DagUtil.removeValues(childNodes, -1);
+ if (modCounts.length<=Character.MAX_VALUE) {
+ char[] mod2Symbols = toCharArray(modSymbols);
+ char[] mod2ParentNodes = rankedCharValues(modParentNodes);
+ char[] mod2ChildNodes = rankedCharValues(modChildNodes);
+ return new LowCapacityDagLevel(mod2ParentNodes, mod2ChildNodes,
+ mod2Symbols, modCounts);
+ }
+ else {
+ int[] mod2ParentNodes = rankedIntValues(modParentNodes);
+ int[] mod2ChildNodes = rankedIntValues(modChildNodes);
+ return new HighCapacityDagLevel(mod2ParentNodes, mod2ChildNodes,
+ modSymbols, modCounts);
+ }
+ }
+
+ private static char[] toCharArray(int[] ia) {
+ char[] ca = new char[ia.length];
+ for (int j=0; j < ca.length; ++j) {
+ if (ia[j] < 0 || ia[j] > Character.MAX_VALUE) {
+ throw new IllegalArgumentException(String.valueOf(ia[j]));
+ }
+ ca[j] = (char) ia[j];
+ }
+ return ca;
+ }
+
+ /*
+ * Returns an array obtained by replacing each array value with it's
+ * rank when the set of array values is ordered: the smallest value
+ * is replaced by 0, the next smallest value is replaced by 1, etc.
+ *
+ * @throws IllegalArgumentException if {@code array.length == 0}
+ * @throws IllegalArgumentException if any element of the array
+ * is negative
+ * @throws IllegalArgumentException if the array has more than
+ * {@code Character.MAX_VALUE + 1} distinct values
+ * @throws NullPointerException if {@code array == null}
+ */
+ private static char[] rankedCharValues(int[] array) {
+ if (array.length==0) {
+ throw new IllegalArgumentException("array.length==0");
+ }
+ int[] sortedCopy = array.clone();
+ Arrays.sort(sortedCopy);
+ if (sortedCopy[0] < 0) {
+ throw new IllegalArgumentException(String.valueOf(sortedCopy[0]));
+ }
+ int n = sortedCopy[sortedCopy.length - 1] + 1;
+ int[] indexMap = new int[n];
+ int index = 0;
+ indexMap[sortedCopy[0]] = index++;
+ for (int j=1; j<sortedCopy.length; ++j) {
+ if (sortedCopy[j] != sortedCopy[j-1]) {
+ indexMap[sortedCopy[j]] = index++;
+ }
+ }
+ if ( (index - 1) >= Character.MAX_VALUE) {
+ String s = "Array has more than (Character.MAX_VALUE + 1) values";
+ throw new IllegalArgumentException(s);
+ }
+ char[] transformedArray = new char[array.length];
+ for (int j=0; j<transformedArray.length; ++j) {
+ transformedArray[j] = (char) indexMap[array[j]];
+ }
+ return transformedArray;
+ }
+
+ /*
+ * Returns an array obtained by replacing each array value with it's
+ * rank when the set of array values is ordered: the smallest value
+ * is replaced by 0, the next smallest value is replaced by 1, etc.
+ *
+ * @throws IllegalArgumentException if {@code array.length == 0}
+ * @throws IllegalArgumentException if any element of the array
+ * is negative
+ * @throws NullPointerException if {@code array == null}
+ */
+ private static int[] rankedIntValues(int[] array) {
+ if (array.length==0) {
+ throw new IllegalArgumentException("array.length==0");
+ }
+ int[] sortedCopy = array.clone();
+ Arrays.sort(sortedCopy);
+ if (sortedCopy[0] < 0) {
+ throw new IllegalArgumentException(String.valueOf(sortedCopy[0]));
+ }
+ int n = sortedCopy[sortedCopy.length - 1] + 1;
+ int[] indexMap = new int[n];
+ int index = 0;
+ indexMap[sortedCopy[0]] = index++;
+ for (int j=1; j<sortedCopy.length; ++j) {
+ if (sortedCopy[j] != sortedCopy[j-1]) {
+ indexMap[sortedCopy[j]] = index++;
+ }
+ }
+ int[] transformedArray = new int[array.length];
+ for (int j=0; j<transformedArray.length; ++j) {
+ transformedArray[j] = indexMap[array[j]];
+ }
+ return transformedArray;
+ }
+
+ /**
+ * Merges the two specified parent nodes and assigns the specified
+ * {@code retainedNode} index to the merged node.
+ *
+ * @param retainedNode a parent node which will receive ingoing and
+ * outgoing edges of {@code removedNode}
+ * @param removedNode a parent node that will be deleted after merging.
+ *
+ * @throws IllegalArgumentException if {@code retainedNode}
+ * or {@code returnedNode} is not a valid parent node index.
+ */
+ public void mergeParentNodes(int retainedNode, int removedNode) {
+ if (isParentNode(retainedNode)==false) {
+ String s = "invalid parent node: " + retainedNode;
+ throw new IllegalArgumentException(s);
+ }
+ if (isParentNode(removedNode)==false) {
+ String s = "invalid parent node: " + removedNode;
+ throw new IllegalArgumentException(s);
+ }
+ prevLevel.mergeChildNodes(retainedNode, removedNode);
+ mergeParentNodes2(retainedNode, removedNode);
+ }
+
+ private void mergeParentNodes2(int retainedNode, int removedNode) {
+ for (int j=0; j<nAlleles; ++j) {
+ int retainedEdge = outEdges[j][retainedNode];
+ int removedEdge = outEdges[j][removedNode];
+ if (removedEdge >= 0) {
+ if (retainedEdge == -1) {
+ changeParent(removedEdge, retainedNode);
+ }
+ else {
+ int retainedChild = childNode(retainedEdge);
+ int removedChild = childNode(removedEdge);
+ mergeEdges(retainedEdge, removedEdge);
+ if (nextLevel != null) {
+ nextLevel.mergeParentNodes2(retainedChild, removedChild);
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * Merges the two specified child nodes and assigns the merged
+ * node to the specified {@code retainedNode} index. Ingoing edges
+ * to {@code removedNode} are redirected to be ingoing edges
+ * to {@code retainedNode}.
+ *
+ * @param retainedNode a child node which will receive ingoing edges of
+ * {@code removedNode}
+ * @param removedNode a child node that will be deleted after merging
+ */
+ private void mergeChildNodes(int retainedNode, int removedNode) {
+ int lastEdge = -1;
+ int edge = child2FirstInEdge[removedNode];
+ while (edge != -1) {
+ assert childNodes[edge] == removedNode;
+ childNodes[edge] = retainedNode;
+ lastEdge = edge;
+ edge = inEdge2NextInEdge[edge];
+ }
+ if (lastEdge != -1) {
+ inEdge2NextInEdge[lastEdge] = child2FirstInEdge[retainedNode];
+ child2FirstInEdge[retainedNode] = child2FirstInEdge[removedNode];
+ child2FirstInEdge[removedNode] = -1;
+ }
+ }
+
+ private void changeParent(int edge, int newParent) {
+ int oldParent = parentNodes[edge];
+ int symbol = symbols[edge];
+ assert (outEdges[symbol][oldParent] == edge);
+ assert (outEdges[symbol][newParent] == -1);
+ outEdges[symbol][oldParent] = -1;
+ outEdges[symbol][newParent] = edge;
+ parentNodes[edge] = newParent;
+ }
+
+ private void mergeEdges(int retainedEdge, int removedEdge) {
+ assert symbols[retainedEdge] == symbols[removedEdge];
+ assert counts[removedEdge] > 0.0f;
+ counts[retainedEdge] += counts[removedEdge];
+ if (nextLevel==null) {
+ mergeHaplotypes(childNodes[retainedEdge], childNodes[removedEdge]);
+ }
+ int parentNode = parentNodes[removedEdge];
+ int childNode = childNodes[removedEdge];
+ int symbol = symbols[removedEdge];
+ assert inEdge2NextInEdge[child2FirstInEdge[childNode]] == -1;
+ outEdges[symbol][parentNode] = -1;
+ child2FirstInEdge[childNode] = -1;
+ counts[removedEdge] = 0.0f;
+ parentNodes[removedEdge] = -1;
+ childNodes[removedEdge] = -1;
+ symbols[removedEdge] = -1;
+ }
+
+ private void mergeHaplotypes(int retainedChild, int removedChild) {
+ int hap = child2FirstHap[removedChild];
+ while (hap2NextHap[hap] != -1) {
+ hap = hap2NextHap[hap];
+ }
+ hap2NextHap[hap] = child2FirstHap[retainedChild];
+ child2FirstHap[retainedChild] = child2FirstHap[removedChild];
+ child2FirstHap[removedChild] = -1;
+ }
+
+ /**
+ * Returns the marker index.
+ * @return the marker index
+ */
+ public int index() {
+ return this.levelIndex;
+ }
+
+ /**
+ * Returns the number of sequences used to construct the DAG.
+ * @return the number of sequences used to construct the DAG
+ */
+ public int nHaps() {
+ return this.nHaps;
+ }
+
+ /**
+ * Returns the number of alleles.
+ *
+ * @return the number of alleles
+ */
+ public int nAlleles() {
+ return this.nAlleles;
+ }
+
+ /**
+ * Returns the sum of weights for the sequences that pass
+ * through the specified edge or 0 if the edge does not exist.
+ *
+ * @param edge index of the edge
+ * @return sum of weights for the sequences that pass
+ * through the specified edge or 0 if the edge does not exist
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nHaps()}
+ */
+ public float edgeCount(int edge) {
+ return counts[edge];
+ }
+
+ /**
+ * Returns the sum of weights for the sequences that pass
+ * through the specified parent node or 0 if the parent node
+ * does not exist.
+ *
+ * @param parentNode index of the parent node
+ * @return sum of weights for the sequences that pass
+ * through the specified parent node or 0 if the parent node
+ * does not exist
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= this.nHaps()}
+ */
+ public float nodeCount(int parentNode) {
+ float sum = 0.0f;
+ for (int symbol=0; symbol<nAlleles; ++symbol) {
+ if (outEdges[symbol][parentNode] >= 0) {
+ sum += edgeCount(outEdges[symbol][parentNode]);
+ }
+ }
+ return sum;
+ }
+
+ /**
+ * Returns an array of parent node indices.
+ * @return an array of parent node indices
+ */
+ public int[] parentNodeArray() {
+ int[] sortedReducedArray = DagUtil.removeValues(parentNodes, -1);
+ Arrays.sort(sortedReducedArray);
+ assert sortedReducedArray.length > 0;
+ int cnt = 1;
+ for (int j=1; j<sortedReducedArray.length; ++j) {
+ if (sortedReducedArray[j] != sortedReducedArray[j-1]) {
+ ++cnt;
+ }
+ }
+ int[] parentNodeArray = new int[cnt];
+ int index = 0;
+ parentNodeArray[index++] = sortedReducedArray[0];
+ for (int j=1; j<sortedReducedArray.length; ++j) {
+ if (sortedReducedArray[j] != sortedReducedArray[j-1]) {
+ parentNodeArray[index++] = sortedReducedArray[j];
+ }
+ }
+ assert index==parentNodeArray.length;
+ return parentNodeArray;
+ }
+
+ /**
+ * Returns the parent node of the specified edge or -1 if the edge does
+ * not exist.
+ *
+ * @param edge index of the edge
+ * @return the parent node of the specified edge or -1 if the edge does
+ * not exist
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.nHaps()}
+ */
+ public int parentNode(int edge) {
+ return parentNodes[edge];
+ }
+
+ /**
+ * Returns the child node of the specified edge or -1 if the edge does
+ * not exist
+ *
+ * @param edge the edge
+ * @return the child node of the specified edge or -1 if the edge does
+ * not exist
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code edge < 0 || edge >= this.Haplotypes()}
+ */
+ public int childNode(int edge) {
+ return childNodes[edge];
+ }
+
+ /**
+ * Returns the edge that is the outgoing edge of the specified
+ * parent parent node having the specified symbol, or
+ * returns -1 if no such edge exists.
+ *
+ * @param parentNode the parent node
+ * @param symbol symbol labeling the outgoing edge
+ * @return the edge that is the outgoing edge of the specified
+ * parent parent node having the specified symbol, or
+ * -1 if no such edge exists.
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code parentNode < 0 || parentNode >= this.nHaps()}
+ * @throws IndexOutOfBoundsException if
+ * {@code symbol < 0 || symbol >= this.nAlleles()}
+ */
+ public int outEdge(int parentNode, int symbol) {
+ return outEdges[symbol][parentNode];
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(1000);
+ sb.append(Const.nl);
+ sb.append("[ MergeableDagLevel: marker=");
+ sb.append(levelIndex);
+ sb.append(Const.nl);
+ for (int j=0, n=nHaps(); j<n; ++j) {
+ if (parentNodes[j] != -1) {
+ sb.append("edge=");
+ sb.append(j);
+ sb.append(" parent=");
+ sb.append(parentNodes[j]);
+ sb.append(" child=");
+ sb.append(childNodes[j]);
+ sb.append(" symbol=");
+ sb.append(symbols[j]);
+ sb.append(" count=");
+ sb.append(counts[j]);
+ sb.append(Const.nl);
+ }
+ }
+ sb.append("previous=");
+ sb.append(prevLevel!=null);
+ sb.append(" next=");
+ sb.append(nextLevel!=null);
+ sb.append(Const.nl);
+ sb.append(" ]");
+ return sb.toString();
+ }
+
+ private boolean isParentNode(int node) {
+ if (prevLevel!=null) {
+ return prevLevel.child2FirstInEdge[node]>=0;
+ }
+ else {
+ for (int j=0; j<nAlleles; ++j) {
+ if (outEdges[j][node] != -1) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+}
\ No newline at end of file
diff --git a/dag/Score.java b/dag/Score.java
new file mode 100644
index 0000000..b78f126
--- /dev/null
+++ b/dag/Score.java
@@ -0,0 +1,200 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package dag;
+
+/**
+ * <p>Class {@code Score} represents a similarity score for a pair
+ * of trees.
+ * </p>
+ * Instances of class {@code Score} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class Score implements Comparable<Score> {
+
+ private final int nodeA;
+ private final int nodeB;
+ private final float score;
+
+ /**
+ * Constructs a new {@code Score} instance. Smaller similarity scores
+ * correspond to greater similarity.
+ * @param nodeA root node index for the first tree
+ * @param nodeB root node index for the second tree
+ * @param score the a non-negative similarity score for the two specified
+ * trees
+ * @param isMergeable {@code true} if the two trees may be
+ * merged, and {@code false} otherwise
+ * @throws IllegalArgumentException if
+ * {@code score < 0 || (score==0 && isMergeable==false)}
+ * @throws IllegalArgumentException if {@code Float.isNaN(score)}
+ */
+ public Score(int nodeA, int nodeB, float score, boolean isMergeable) {
+ if (score < 0 || (score==0 && isMergeable==false) || Float.isNaN(score)) {
+ throw new IllegalArgumentException(String.valueOf(score));
+ }
+ this.nodeA = nodeA;
+ this.nodeB = nodeB;
+ this.score = isMergeable ? score : -score;
+ }
+
+ /**
+ * Returns the root node index for the first tree.
+ *
+ * @return the root node index for the first tree
+ */
+ public int nodeA() {
+ return nodeA;
+ }
+
+ /**
+ * Returns the root node index for the second tree.
+ *
+ * @return the root node index for the second tree
+ */
+ public int nodeB() {
+ return nodeB;
+ }
+
+ /**
+ * Returns the similarity score for the two trees.
+ *
+ * @return the similarity score for the two trees
+ */
+ public float score() {
+ return (score < 0) ? -score : score;
+ }
+
+ /**
+ * Returns {@code true} if the two trees may be merged, and
+ * returns {@code false} otherwise.
+ *
+ * @return {@code true} if the two trees may be merged, and
+ * returns {@code false} otherwise
+ */
+ public boolean isMergeable() {
+ return score>=0;
+ }
+
+ /**
+ * Compares the specified object with this {@code Score} for
+ * equality. Returns {@code true} if the specified object
+ * is a {@code Score} instance whose {@code nodeA()}, {@code nodeB()},
+ * {@code score()}, and {@code isMergeable()} methods return the
+ * same values as the corresponding methods for {@code this}, and
+ * returns {@code false} otherwise.
+ * @param obj the object to be compared for equality with this
+ * {@code Score}
+ * @return {@code true} if the specified object is a equal to {@code this}
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final Score other = (Score) obj;
+ if (this.nodeA != other.nodeA) {
+ return false;
+ }
+ if (this.nodeB != other.nodeB) {
+ return false;
+ }
+ return Float.floatToIntBits(this.score)
+ == Float.floatToIntBits(other.score);
+ }
+
+ /**
+ * <p>Returns the hash code value for this object. The hash code is defined
+ * by the following calculation:
+ * </p>
+ * <pre>
+ * int hash = 5;
+ * hash = 53 * hash + this.nodeA();
+ * hash = 53 * hash + this.nodeB();
+ * hash = 53 * hash + Float.floatToIntBits(this.score());
+ </pre>
+ * @return a hash code value for the object
+ */
+ @Override
+ public int hashCode() {
+ int hash = 5;
+ hash = 53 * hash + this.nodeA;
+ hash = 53 * hash + this.nodeB;
+ hash = 53 * hash + Float.floatToIntBits(this.score);
+ return hash;
+ }
+
+ /**
+ * Returns -1, 0, or 1 depending on whether this {@code Score} is less
+ * than, equal to, or greater than the specified {@code Score}. The
+ * two scores are ordered first using {@code -Boolean.compare()} on
+ * the value returned by {@code isMergeable()}, then by
+ * {@code Float.compare()} on the value returned by {@code score()}, then
+ * by the value returned by {@code nodeA()}, and then by the value returned
+ * by {@code nodeB()}.
+ * @param other a {@code Score} element to be compared
+ * @return a negative integer, zero, or a positive integer depending
+ * on whether this {@code Score} is less than, equal to, or greater
+ * than the specified {@code Score}
+ *
+ * @throws NullPointerException if {@code other == null}
+ */
+ @Override
+ public int compareTo(Score other) {
+ int x = -Boolean.compare(this.isMergeable(), other.isMergeable());
+ if (x!=0) {
+ return x;
+ }
+ x = Float.compare(this.score(), other.score());
+ if (x!=0) {
+ return x;
+ }
+ if (this.nodeA() != other.nodeA()) {
+ return (this.nodeA() < other.nodeA()) ? -1 : 1;
+ }
+ if (this.nodeB() != other.nodeB()) {
+ return (this.nodeB() < other.nodeB()) ? -1 : 1;
+ }
+ return 0;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact details of
+ * the representation are unspecified and subject to change.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("[nodeA=");
+ sb.append(nodeA());
+ sb.append(", nodeB=");
+ sb.append(nodeB());
+ sb.append(", score=");
+ sb.append(score());
+ sb.append(" isMergeable=");
+ sb.append(isMergeable());
+ sb.append("]");
+ return sb.toString();
+ }
+}
diff --git a/gpl_license b/gpl_license
new file mode 100644
index 0000000..94a9ed0
--- /dev/null
+++ b/gpl_license
@@ -0,0 +1,674 @@
+ GNU GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+ The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works. By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users. We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors. You can apply it to
+your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+ To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights. Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+ For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received. You must make sure that they, too, receive
+or can get the source code. And you must show them these terms so they
+know their rights.
+
+ Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+ For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software. For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+ Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so. This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software. The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable. Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products. If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+ Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary. To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 0. Definitions.
+
+ "This License" refers to version 3 of the GNU General Public License.
+
+ "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+ "The Program" refers to any copyrightable work licensed under this
+License. Each licensee is addressed as "you". "Licensees" and
+"recipients" may be individuals or organizations.
+
+ To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy. The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+ A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+ To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy. Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+ To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies. Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+ An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License. If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+ 1. Source Code.
+
+ The "source code" for a work means the preferred form of the work
+for making modifications to it. "Object code" means any non-source
+form of a work.
+
+ A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+ The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form. A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+ The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities. However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work. For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+ The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+ The Corresponding Source for a work in source code form is that
+same work.
+
+ 2. Basic Permissions.
+
+ All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met. This License explicitly affirms your unlimited
+permission to run the unmodified Program. The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work. This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+ You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force. You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright. Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+ Conveying under any other circumstances is permitted solely under
+the conditions stated below. Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+ No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+ When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+ 4. Conveying Verbatim Copies.
+
+ You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+ You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+ 5. Conveying Modified Source Versions.
+
+ You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+ a) The work must carry prominent notices stating that you modified
+ it, and giving a relevant date.
+
+ b) The work must carry prominent notices stating that it is
+ released under this License and any conditions added under section
+ 7. This requirement modifies the requirement in section 4 to
+ "keep intact all notices".
+
+ c) You must license the entire work, as a whole, under this
+ License to anyone who comes into possession of a copy. This
+ License will therefore apply, along with any applicable section 7
+ additional terms, to the whole of the work, and all its parts,
+ regardless of how they are packaged. This License gives no
+ permission to license the work in any other way, but it does not
+ invalidate such permission if you have separately received it.
+
+ d) If the work has interactive user interfaces, each must display
+ Appropriate Legal Notices; however, if the Program has interactive
+ interfaces that do not display Appropriate Legal Notices, your
+ work need not make them do so.
+
+ A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit. Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+ 6. Conveying Non-Source Forms.
+
+ You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+ a) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by the
+ Corresponding Source fixed on a durable physical medium
+ customarily used for software interchange.
+
+ b) Convey the object code in, or embodied in, a physical product
+ (including a physical distribution medium), accompanied by a
+ written offer, valid for at least three years and valid for as
+ long as you offer spare parts or customer support for that product
+ model, to give anyone who possesses the object code either (1) a
+ copy of the Corresponding Source for all the software in the
+ product that is covered by this License, on a durable physical
+ medium customarily used for software interchange, for a price no
+ more than your reasonable cost of physically performing this
+ conveying of source, or (2) access to copy the
+ Corresponding Source from a network server at no charge.
+
+ c) Convey individual copies of the object code with a copy of the
+ written offer to provide the Corresponding Source. This
+ alternative is allowed only occasionally and noncommercially, and
+ only if you received the object code with such an offer, in accord
+ with subsection 6b.
+
+ d) Convey the object code by offering access from a designated
+ place (gratis or for a charge), and offer equivalent access to the
+ Corresponding Source in the same way through the same place at no
+ further charge. You need not require recipients to copy the
+ Corresponding Source along with the object code. If the place to
+ copy the object code is a network server, the Corresponding Source
+ may be on a different server (operated by you or a third party)
+ that supports equivalent copying facilities, provided you maintain
+ clear directions next to the object code saying where to find the
+ Corresponding Source. Regardless of what server hosts the
+ Corresponding Source, you remain obligated to ensure that it is
+ available for as long as needed to satisfy these requirements.
+
+ e) Convey the object code using peer-to-peer transmission, provided
+ you inform other peers where the object code and Corresponding
+ Source of the work are being offered to the general public at no
+ charge under subsection 6d.
+
+ A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+ A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling. In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage. For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product. A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+ "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source. The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+ If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information. But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+ The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed. Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+ Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+ 7. Additional Terms.
+
+ "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law. If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+ When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it. (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.) You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+ Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+ a) Disclaiming warranty or limiting liability differently from the
+ terms of sections 15 and 16 of this License; or
+
+ b) Requiring preservation of specified reasonable legal notices or
+ author attributions in that material or in the Appropriate Legal
+ Notices displayed by works containing it; or
+
+ c) Prohibiting misrepresentation of the origin of that material, or
+ requiring that modified versions of such material be marked in
+ reasonable ways as different from the original version; or
+
+ d) Limiting the use for publicity purposes of names of licensors or
+ authors of the material; or
+
+ e) Declining to grant rights under trademark law for use of some
+ trade names, trademarks, or service marks; or
+
+ f) Requiring indemnification of licensors and authors of that
+ material by anyone who conveys the material (or modified versions of
+ it) with contractual assumptions of liability to the recipient, for
+ any liability that these contractual assumptions directly impose on
+ those licensors and authors.
+
+ All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10. If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term. If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+ If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+ Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+ 8. Termination.
+
+ You may not propagate or modify a covered work except as expressly
+provided under this License. Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+ However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+ Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License. If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+ 9. Acceptance Not Required for Having Copies.
+
+ You are not required to accept this License in order to receive or
+run a copy of the Program. Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance. However,
+nothing other than this License grants you permission to propagate or
+modify any covered work. These actions infringe copyright if you do
+not accept this License. Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+ 10. Automatic Licensing of Downstream Recipients.
+
+ Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License. You are not responsible
+for enforcing compliance by third parties with this License.
+
+ An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations. If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+ You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License. For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+ 11. Patents.
+
+ A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based. The
+work thus licensed is called the contributor's "contributor version".
+
+ A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version. For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+ In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement). To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+ If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients. "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+ If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+ A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License. You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+ Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+ 12. No Surrender of Others' Freedom.
+
+ If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License. If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all. For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+ 13. Use with the GNU Affero General Public License.
+
+ Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work. The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+ 14. Revised Versions of this License.
+
+ The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation. If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+ If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+ Later license versions may give you additional or different
+permissions. However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+ 15. Disclaimer of Warranty.
+
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+ 16. Limitation of Liability.
+
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+ 17. Interpretation of Sections 15 and 16.
+
+ If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+ END OF TERMS AND CONDITIONS
+
+ How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) <year> <name of author>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+ If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+ <program> Copyright (C) <year> <name of author>
+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License. Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+ You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+ The GNU General Public License does not permit incorporating your program
+into proprietary programs. If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library. If this is what you want to do, use the GNU Lesser General
+Public License instead of this License. But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/haplotype/BasicHapPairs.java b/haplotype/BasicHapPairs.java
new file mode 100644
index 0000000..d527d6f
--- /dev/null
+++ b/haplotype/BasicHapPairs.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import java.util.List;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code BasicHapPairs} represents a list of haplotype pairs.
+ * Each haplotype pair is guaranteed to have two non-missing
+ * alleles at each marker.
+ * </p>
+ * Instances of class {@code BasicHapPairs} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BasicHapPairs implements HapPairs {
+
+ private final Markers markers;
+ private final HapPair[] hapPairs;
+
+ /**
+ * Constructs a new {@code BasicHapPairs} instance corresponding to
+ * the specified list of haplotype pairs.
+ * @param hapPairList a list of haplotype pairs
+ *
+ * @throws IllegalArgumentException if
+ * {@code hapPairList.isEmpty() == true}
+ * @throws NullPointerException if
+ * {@code (hapPairList == null || hapPairList[j] == null)} for any {@code j}
+ * satisfying {@code (0 <= j && j < hapPairsList.size())}
+ */
+ public BasicHapPairs(List<HapPair> hapPairList) {
+ if (hapPairList.isEmpty()) {
+ throw new IllegalArgumentException("haps.isEmpy()==true");
+ }
+ this.markers = BasicSampleHapPairs.checkAndExtractMarkers(hapPairList);
+ this.hapPairs = hapPairList.toArray(new HapPair[0]);
+ }
+
+ @Override
+ public int allele1(int marker, int hapPair) {
+ return hapPairs[hapPair].allele1(marker);
+ }
+
+ @Override
+ public int allele2(int marker, int hapPair) {
+ return hapPairs[hapPair].allele2(marker);
+ }
+
+ @Override
+ public int allele(int marker, int haplotype) {
+ int hapPair = haplotype / 2;
+ if ((haplotype & 1) == 0) {
+ return hapPairs[hapPair].allele1(marker);
+ } else {
+ return hapPairs[hapPair].allele2(marker);
+ }
+ }
+
+ @Override
+ public int nMarkers() {
+ return markers.nMarkers();
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return markers.marker(marker);
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*hapPairs.length;
+ }
+
+ @Override
+ public int nHapPairs() {
+ return hapPairs.length;
+ }
+
+ @Override
+ public Samples samples(int hapPair) {
+ return hapPairs[hapPair].samples();
+ }
+
+ @Override
+ public int sampleIndex(int hapPair) {
+ return hapPairs[hapPair].sampleIndex();
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact details
+ * of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(10000);
+ sb.append('[');
+ sb.append(this.getClass().toString());
+ sb.append(": nHapPairs=");
+ sb.append(this.nHapPairs());
+ sb.append(']');
+ return sb.toString();
+ }
+}
diff --git a/haplotype/BasicSampleHapPairs.java b/haplotype/BasicSampleHapPairs.java
new file mode 100644
index 0000000..1f1b786
--- /dev/null
+++ b/haplotype/BasicSampleHapPairs.java
@@ -0,0 +1,260 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code BasicSampleHapPairs} stores a list of samples and a
+ * haplotype pair for each sample.
+ * </p>
+ * <p>Instance of class {@code BasicSampleHapPairs} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BasicSampleHapPairs implements SampleHapPairs {
+
+ private final Markers markers;
+ private final Samples samples;
+ private final HapPair[] hapPairs;
+
+ /**
+ * Constructs a new {@code BasicSampleHapPairs} instance.
+ * @param samples a list of samples
+ * @param hapPairList a list of haplotype pairs corresponding to the
+ * specified list of samples
+ *
+ * @throws IllegalArgumentException if
+ * {@code hapPairList.isEmpty() == true}
+ * @throws IllegalArgumentException if
+ * {@code hapPairList.get(j).markers().equals(hapPairList.get(k).markers())
+ * == false}
+ * for any indices {@code j, k} satisfying
+ * {@code 0 <= j && j < k && k < hapPairList.size()}
+ * @throws IllegalArgumentException if the list of samples does not
+ * match the list of samples determined by {@code hapPairList}
+ * @throws NullPointerException if {@code samples == null}
+ * @throws NullPointerException if
+ * {@code (hapPairList == null || hapPairList(j) == null)}
+ * for any {@code j} satisfying {@code (0 <= j && j < hapPairList.size())}
+ */
+ public BasicSampleHapPairs(Samples samples, List<HapPair> hapPairList) {
+ if (hapPairList.isEmpty()) {
+ throw new IllegalArgumentException("haps.isEmpy()==true");
+ }
+ Collections.sort(hapPairList, hapsComparator(samples));
+ checkSamples(samples, hapPairList);
+ this.markers = checkAndExtractMarkers(hapPairList);
+ this.samples = samples;
+ this.hapPairs = hapPairList.toArray(new HapPair[0]);
+ }
+
+ private void checkSamples(Samples samples, List<HapPair> hapPairs) {
+ if (samples.nSamples()!= hapPairs.size()) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ for (int j=0, n=hapPairs.size(); j<n; ++j) {
+ if (samples.equals(hapPairs.get(j).samples())==false) {
+ HapPair hp = hapPairs.get(j);
+ int i1 = samples.idIndex(j);
+ int i2 = hp.samples().idIndex(hp.sampleIndex());
+ if (i1 != i2) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ }
+ }
+ }
+
+ /**
+ * Checks that all haplotype pairs have alleles for the same list of
+ * markers, and returns the list of markers.
+ * @param hapPairList a list of haplotype pairs
+ * @return the list of markers shared by the specified haplotype pairs
+ * @throws IllegalArgumentException if
+ * {@code hapPiarList.get(j).markers().equals(hapPairList.get(k).markers())
+ * == false}
+ * for any indices {@code j, k} satisfying
+ * {@code 0 <= j && j < k && k < hapPairList.size()}
+ * @throws NullPointerException if
+ * {@code hapPairList == null || hapPairList(j) == null}
+ * for any {@code j} satisfying {@code 0 <= j && j < hapPairList.size()}
+ */
+ static Markers checkAndExtractMarkers(List<HapPair> hapPairList) {
+ if (hapPairList.isEmpty()) {
+ return Markers.create(new Marker[0]);
+ }
+ else {
+ Markers m = hapPairList.get(0).markers();
+ for (int j=1, n=hapPairList.size(); j<n; ++j) {
+ if (hapPairList.get(j).markers().equals(m)==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ }
+ return m;
+ }
+ }
+
+ @Override
+ public int allele1(int marker, int hapPair) {
+ return hapPairs[hapPair].allele1(marker);
+ }
+
+ @Override
+ public int allele2(int marker, int hapPair) {
+ return hapPairs[hapPair].allele2(marker);
+ }
+
+ @Override
+ public int allele(int marker, int haplotype) {
+ int pairIndex = haplotype/2;
+ if ((haplotype & 1)==0) {
+ return hapPairs[pairIndex].allele1(marker);
+ }
+ else {
+ return hapPairs[pairIndex].allele2(marker);
+ }
+ }
+
+ @Override
+ public int nMarkers() {
+ return markers.nMarkers();
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return markers.marker(marker);
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*hapPairs.length;
+ }
+
+ @Override
+ public int nHapPairs() {
+ return hapPairs.length;
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public Samples samples(int hapPair) {
+ if (hapPair < 0 || hapPair >= hapPairs.length) {
+ throw new IndexOutOfBoundsException(String.valueOf(hapPair));
+ }
+ return samples;
+ }
+
+ @Override
+ public int sampleIndex(int hapPair) {
+ if (hapPair < 0 || hapPair >= hapPairs.length) {
+ throw new IndexOutOfBoundsException(String.valueOf(hapPair));
+ }
+ return hapPair;
+ }
+
+ @Override
+ public int nAlleles(int marker) {
+ return markers.marker(marker).nAlleles();
+ }
+
+ @Override
+ public boolean storesNonMajorIndices(int marker) {
+ return false;
+ }
+
+ @Override
+ public int majorAllele(int marker) {
+ String s = "this.storesNonMajorIndices(marker)==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int alleleCount(int marker, int allele) {
+ String s = "this.storesNonMajorIndices(marker)==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int hapIndex(int marker, int allele, int copy) {
+ String s = "this.storesNonMajorIndices(marker)==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ /**
+ * Returns a {@code Comparator<HapPairInterface>}
+ * whose {@code compare(hp1, hp2)} method returns -1, 0, or 1
+ * depending on whether {@code samples.index(hp1.idIndex())} is
+ * less than, equal, or greater than
+ * {@code samples.index(hp2.idIndex())}.
+ * @param samples the list of samples used to compare {@code HapsPair}
+ * objects
+ * @return a {@code Comparator<HapPairInterface>}
+ * whose {@code compare(hp1, hp2)} method compares two
+ * haplotype pairs for order
+ * @throws NullPointerException if {@code samples == null}
+ */
+ private static Comparator<HapPair> hapsComparator(final Samples samples) {
+ if (samples==null) {
+ throw new NullPointerException("samples==null");
+ }
+ return (HapPair hp1, HapPair hp2) -> {
+ int id1 = hp1.samples().idIndex(hp1.sampleIndex());
+ int id2 = hp2.samples().idIndex(hp2.sampleIndex());
+ int i1 = samples.index(id1);
+ int i2 = samples.index(id2);
+ if (i1 == -1 || i2 == -1) {
+ String id;
+ if (i1 == -1) {
+ id = hp1.samples().id(hp1.sampleIndex());
+ }
+ else {
+ id = hp2.samples().id(hp2.sampleIndex());
+ }
+ String s = "samples do not contain: " + id;
+ throw new IllegalArgumentException(s);
+ }
+ if (i1==i2) {
+ return 0;
+ }
+ else {
+ return (i1 < i2) ? -1 : 1;
+ }
+ } ;
+ }
+}
diff --git a/haplotype/BitHapPair.java b/haplotype/BitHapPair.java
new file mode 100644
index 0000000..997a918
--- /dev/null
+++ b/haplotype/BitHapPair.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import blbutil.Const;
+import java.util.BitSet;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code BitHapPair} represents a pair of haplotypes for a sample.
+ * The class stores alleles using {@code java.util.BitSet} objects.
+ * </p>
+ * Instances of class {@code BitHapPair} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BitHapPair implements HapPair {
+
+ private final Markers markers;
+ private final Samples samples;
+ private final int sampleIndex;
+ private final BitSet alleles1;
+ private final BitSet alleles2;
+
+ /**
+ * Constructs a new {@code BitHapPair} instance.
+ * @param markers the sequence of markers
+ * @param samples the list of samples
+ * @param sampleIndex the sample index
+ * @param alleles1 the sequence of allele indices for the first haplotype
+ * @param alleles2 the sequence of alleles indices for the second haplotype
+ *
+ * @throws IllegalArgumentException if
+ * {@code alleles1.length != markers.nMarkers()
+ * || alleles2.length != markers.nMarkers()}
+ * @throws IllegalArgumentException if {@code alleles1[k] < 0 ||
+ * allele1[k] >= markers.marker(k).nAlleles()} for some {@code k} satisfying
+ * {@code 0 <= k && k < markers.nMarkers()}
+ * @throws IllegalArgumentException if {@code alleles2[k] < 0 ||
+ * allele2[k] >= markers.marker(k).nAlleles()} for some {@code k} satisfying
+ * {@code 0 <= k && k < markers.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sampleIndex < 0 || sampleIndex >= samples.nSamples()}
+ * @throws NullPointerException if
+ * {@code marker == null || samples == null || alleles1 == null
+ * || allele2 == null}
+ */
+ public BitHapPair(Markers markers, Samples samples, int sampleIndex,
+ int[] alleles1, int[] alleles2) {
+ if (alleles1.length != markers.nMarkers()
+ || alleles2.length != markers.nMarkers()) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ if (sampleIndex < 0 || sampleIndex >= samples.nSamples()) {
+ throw new IndexOutOfBoundsException(String.valueOf(sampleIndex));
+ }
+ this.markers = markers;
+ this.samples = samples;
+ this.sampleIndex = sampleIndex;
+ this.alleles1 = toBitSet(markers, alleles1);
+ this.alleles2 = toBitSet(markers, alleles2);
+ }
+
+ private static BitSet toBitSet(Markers markers, int[] alleles) {
+ int index = 0;
+ BitSet bs = new BitSet(markers.sumHaplotypeBits());
+ for (int k=0; k<alleles.length; ++k) {
+ int allele = alleles[k];
+ if (allele < 0 || allele >= markers.marker(k).nAlleles()) {
+ String s = "allele \"" + allele + "\" out of bounds for marker: "
+ + markers.marker(k);
+ throw new IllegalArgumentException(s);
+ }
+ int mask = 1;
+ int nBits = markers.sumHaplotypeBits(k+1) - markers.sumHaplotypeBits(k);
+ for (int l=0; l<nBits; ++l) {
+ boolean b = (allele & mask)==mask;
+ bs.set(index++, b);
+ mask <<= 1;
+ }
+ }
+ return bs;
+ }
+
+ @Override
+ public int allele1(int marker) {
+ return allele(alleles1, marker);
+ }
+
+ @Override
+ public int allele2(int marker) {
+ return allele(alleles2, marker);
+ }
+
+ private int allele(BitSet bitset, int marker) {
+ int start = markers.sumHaplotypeBits(marker);
+ int end = markers.sumHaplotypeBits(marker+1);
+ if (end==(start+1)) {
+ return bitset.get(start) ? 1 : 0;
+ }
+ int allele = 0;
+ int mask = 1;
+ for (int j=start; j<end; ++j) {
+ if (bitset.get(j)) {
+ allele += mask;
+ }
+ mask <<= 1;
+ }
+ return allele;
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return markers.marker(marker);
+ }
+
+ @Override
+ public int nMarkers() {
+ return markers.nMarkers();
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public int sampleIndex() {
+ return sampleIndex;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The
+ * exact details of the representation are unspecified and subject
+ * to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("sampleIndex=");
+ sb.append(sampleIndex);
+ sb.append(Const.nl);
+ sb.append(alleles1);
+ sb.append(Const.nl);
+ sb.append(alleles2);
+ return sb.toString();
+ }
+}
diff --git a/haplotype/ConsensusPhaser.java b/haplotype/ConsensusPhaser.java
new file mode 100644
index 0000000..502193c
--- /dev/null
+++ b/haplotype/ConsensusPhaser.java
@@ -0,0 +1,271 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Phase;
+import beagleutil.Samples;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Random;
+import vcf.Markers;
+import vcf.VcfRecord;
+
+/**
+ * Class {@code ConsensusPhaser} contains a static method for
+ * calculating a consensus phasing from multiple estimated haplotype pairs
+ * for an individual.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class ConsensusPhaser {
+
+ private ConsensusPhaser() {
+ // private constructor prevents instantiation
+ }
+
+ /**
+ * Returns a list of consensus haplotype pairs (one pair per individual)
+ * sorted in order of increasing sample index. The specified list of
+ * haplotype pairs may contain multiple haplotype pairs for each individual.
+ *
+ * @param hapPairs a list of haplotype pairs
+ * @return a list of consensus haplotype pairs
+ *
+ * @throws IllegalArgumentException if
+ * {@code (hapPairs.get(j).markers().equals(hapPairs.get(k).markers() == false)}
+ * for any {@code j, k} satisfying {@code 0 <= j < k < hapPairs.size()}
+ * @throws IllegalArgumentException if
+ * {@code (hapPairs.get(j).samples().equals(hapPairs.get(k).samples() == false)}
+ * for any {@code j, k} satisfying {@code 0 <= j < k < hapPairs.size()}
+ * @throws NullPointerException if {@code hapPairs == null}
+ */
+ public static List<HapPair> run(List<HapPair> hapPairs) {
+ List<HapPair> copy = new ArrayList<>(hapPairs);
+ if (copy.isEmpty()) {
+ return copy;
+ }
+ checkMarkers(copy);
+ Random random = new Random(copy.size());
+
+ Collections.sort(copy, hapsComparator());
+ List<HapPair> consensus = new ArrayList<>(copy.size()/20);
+ int start = 0;
+ while (start < copy.size()) {
+ int end = start+1;
+ while (end < copy.size()
+ && copy.get(end).sampleIndex()==copy.get(start).sampleIndex()) {
+ ++end;
+ }
+ if (end-start==1) {
+ consensus.add(copy.get(start));
+ }
+ else {
+ consensus.add(consensus(copy.subList(start, end), random));
+ }
+ start = end;
+ }
+ return consensus;
+ }
+
+ private static void checkMarkers(List<HapPair> hapList) {
+ Markers markers = hapList.get(0).markers();
+ Samples samples = hapList.get(0).samples();
+ for (int j=1; j<hapList.size(); ++j) {
+ if (markers.equals(hapList.get(j).markers())==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ if (samples.equals(hapList.get(j).samples())==false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ }
+ }
+
+ private static HapPair consensus(List<HapPair> hapList, Random rand) {
+ HapPair firstHP = hapList.get(0);
+ int sampleIndex = firstHP.sampleIndex();
+ Samples samples = firstHP.samples();
+ Markers markers = firstHP.markers();
+ int nMarkers = markers.nMarkers();
+ Phase lastConsensus = null;
+ Phase[] lastPhase = new Phase[hapList.size()];
+ Phase[] currentPhase = new Phase[hapList.size()];
+ int[] alleles1 = new int[nMarkers];
+ int[] alleles2 = new int[nMarkers];
+
+ for (int m=0; m<nMarkers; ++m) {
+ int hp = hapPairWithConsensusGT(hapList, markers, m, rand);
+ // retrieve actual allele order to match input phased data
+ int a1 = hapList.get(hp).allele1(m);
+ int a2 = hapList.get(hp).allele2(m);
+ if (a1!=a2) {
+ storePhase(hapList, m, a1, a2, currentPhase);
+ Phase consensus;
+ if (lastConsensus != null) {
+ Phase relPhase = relPhase(lastPhase, currentPhase, rand);
+ if (relPhase == Phase.IDENTICAL) {
+ consensus = lastConsensus;
+ }
+ else {
+ assert relPhase == Phase.OPPOSITE;
+ consensus = flip(lastConsensus);
+ }
+ if ( (consensus == Phase.IDENTICAL && a1 > a2)
+ || (consensus == Phase.OPPOSITE && a1 < a2)) {
+ int tmp = a1;
+ a1 = a2;
+ a2 = tmp;
+ }
+ }
+ lastConsensus = a1 < a2 ? Phase.IDENTICAL : Phase.OPPOSITE;
+ Phase[] tmp = currentPhase;
+ currentPhase = lastPhase;
+ lastPhase = tmp;
+ }
+ alleles1[m] = a1;
+ alleles2[m] = a2;
+ }
+ return new BitHapPair(markers, samples, sampleIndex, alleles1, alleles2);
+ }
+
+ private static Phase flip(Phase phase) {
+ if (phase == Phase.IDENTICAL) {
+ return Phase.OPPOSITE;
+ }
+ else if (phase == Phase.OPPOSITE) {
+ return Phase.IDENTICAL;
+ }
+ else {
+ throw new IllegalArgumentException(phase.toString());
+ }
+ }
+
+ private static int hapPairWithConsensusGT(List<HapPair> hapList,
+ Markers markers, int marker, Random random) {
+ int consensusGT = consensusGT(hapList, markers, marker, random);
+ for (int j=0, n=hapList.size(); j<n; ++j) {
+ HapPair hp = hapList.get(j);
+ int a1 = hp.allele1(marker);
+ int a2 = hp.allele2(marker);
+ if (VcfRecord.gtIndex(a1, a2) == consensusGT) {
+ return j;
+ }
+ }
+ assert false;
+ throw new IllegalArgumentException("no sample with consensus GT");
+ }
+
+ private static int consensusGT(List<HapPair> hapList, Markers markers,
+ int marker, Random random) {
+ int[] gtCounts = gtCounts(hapList, markers, marker);
+ int start = random.nextInt(gtCounts.length);
+ int bestGt = start;
+ for (int j=1; j<gtCounts.length; ++j) {
+ int gt = start + j;
+ if (gt >= gtCounts.length) {
+ gt -= gtCounts.length;
+ }
+ if (gtCounts[gt] > gtCounts[bestGt]) {
+ bestGt = gt;
+ }
+ }
+ return bestGt;
+ }
+
+ private static int[] gtCounts(List<HapPair> hapList, Markers markers,
+ int marker) {
+ int nGt = markers.marker(marker).nGenotypes();
+ int[] gtCounts = new int[nGt];
+ for (int j=0, n=hapList.size(); j<n; ++j) {
+ HapPair hp = hapList.get(j);
+ int a1 = hp.allele1(marker);
+ int a2 = hp.allele2(marker);
+ int gt = VcfRecord.gtIndex(a1, a2);
+ ++gtCounts[gt];
+ }
+ return gtCounts;
+ }
+
+ private static void storePhase(List<HapPair> hapList, int marker,
+ int a1, int a2, Phase[] phaseArray) {
+ assert phaseArray.length == hapList.size();
+ for (int j=0; j<phaseArray.length; ++j) {
+ int b1 = hapList.get(j).allele1(marker);
+ int b2 = hapList.get(j).allele2(marker);
+ if ( (a1==b1 && a2==b2) || (a1==b2 && a2==b1) ) {
+ phaseArray[j] = (b1 < b2) ? Phase.IDENTICAL : Phase.OPPOSITE;
+ }
+ else {
+ phaseArray[j] = Phase.INCONSISTENT;
+ }
+ }
+ }
+
+ private static Phase relPhase(Phase[] ph1, Phase[] ph2, Random rand) {
+ assert ph1.length == ph2.length;
+ int identCnt = 0;
+ int oppCnt = 0;
+ for (int j=0; j<ph1.length; ++j) {
+ if (ph1[j]==Phase.IDENTICAL) {
+ switch (ph2[j]) {
+ case IDENTICAL: ++identCnt; break;
+ case OPPOSITE: ++oppCnt; break;
+ }
+ }
+ else if(ph1[j] == Phase.OPPOSITE) {
+ switch (ph2[j]) {
+ case IDENTICAL: ++oppCnt; break;
+ case OPPOSITE: ++identCnt; break;
+ }
+ }
+ }
+ if (identCnt > oppCnt) {
+ return Phase.IDENTICAL;
+ }
+ else if (oppCnt > identCnt) {
+ return Phase.OPPOSITE;
+ }
+ else {
+ return rand.nextBoolean() ? Phase.IDENTICAL : Phase.OPPOSITE;
+ }
+ }
+
+ /**
+ * Returns a {@code Comparator<HapPairInterface>}
+ * whose {@code compare(hp1, hp2)} method returns -1, 0, or 1
+ * depending on whether {@code hp1.idIndex()} is less than, equal,
+ * or greater than {@code hp2.idIndex()}.
+ * @return a {@code Comparator<HapPairInterface>}
+ * whose {@code compare(hp1, hp2)} method compares two
+ * haplotype pairs for order.
+ */
+ private static Comparator<HapPair> hapsComparator() {
+ return (HapPair hp1, HapPair hp2) -> {
+ int i1 = hp1.sampleIndex();
+ int i2 = hp2.sampleIndex();
+ if (i1==i2) {
+ return 0;
+ }
+ else {
+ return (i1 < i2) ? -1 : 1;
+ }
+ } ;
+ }
+}
diff --git a/haplotype/GLSampleHapPairs.java b/haplotype/GLSampleHapPairs.java
new file mode 100644
index 0000000..0f4c660
--- /dev/null
+++ b/haplotype/GLSampleHapPairs.java
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import vcf.GL;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code GLSampleHapPairs} wraps a {@code GL} instance that stores
+ * phased, non-missing genotypes.
+ * </p>
+ * Instances of class {@code GLSampleHapPairs} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class GLSampleHapPairs implements SampleHapPairs {
+
+ private final GL gl;
+
+ /**
+ * Constructs a new {@code GLSampleHapPairs} instance from the
+ * specified data.
+ * @param gl phased, non-missing genotypes
+ *
+ * @throws IllegalArgumentException if {@code gl.isRefData() == false}
+ * @throws NullPointerException if {@code gl == null}
+ */
+ public GLSampleHapPairs(GL gl) {
+ if (gl.isRefData() == false) {
+ throw new IllegalArgumentException("gl.isRefData()==false");
+ }
+ this.gl = gl;
+ }
+
+ @Override
+ public Samples samples() {
+ return gl.samples();
+ }
+
+ @Override
+ public int nSamples() {
+ return gl.nSamples();
+ }
+
+ @Override
+ public int allele(int marker, int haplotype) {
+ int sample = haplotype/2;
+ if ( (haplotype & 1) == 0) {
+ return gl.allele1(marker, sample);
+ }
+ else {
+ return gl.allele2(marker, sample);
+ }
+ }
+
+ @Override
+ public int allele1(int marker, int hapPair) {
+ return gl.allele1(marker, hapPair);
+ }
+
+ @Override
+ public int allele2(int marker, int hapPair) {
+ return gl.allele2(marker, hapPair);
+ }
+
+ @Override
+ public int nMarkers() {
+ return gl.nMarkers();
+ }
+
+ @Override
+ public Markers markers() {
+ return gl.markers();
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return gl.marker(marker);
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*gl.nSamples();
+ }
+
+ @Override
+ public int nHapPairs() {
+ return gl.nSamples();
+ }
+
+ @Override
+ public Samples samples(int hapPair) {
+ if (hapPair < 0 || hapPair >= gl.nSamples()) {
+ throw new IndexOutOfBoundsException(String.valueOf(hapPair));
+ }
+ return gl.samples();
+ }
+
+ @Override
+ public int sampleIndex(int hapPair) {
+ if (hapPair < 0 || hapPair >= gl.nSamples()) {
+ throw new IndexOutOfBoundsException(String.valueOf(hapPair));
+ }
+ return hapPair;
+ }
+
+ @Override
+ public int nAlleles(int marker) {
+ return gl.marker(marker).nAlleles();
+ }
+
+ @Override
+ public boolean storesNonMajorIndices(int marker) {
+ return false;
+ }
+
+ @Override
+ public int majorAllele(int marker) {
+ String s = "this.storesNonMajorIndices(marker)==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int alleleCount(int marker, int allele) {
+ String s = "this.storesNonMajorIndices(marker)==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int hapIndex(int marker, int allele, int copy) {
+ String s = "this.storesNonMajorIndices(marker)==false";
+ throw new UnsupportedOperationException(s);
+ }
+}
diff --git a/haplotype/GenotypeCorrection.java b/haplotype/GenotypeCorrection.java
new file mode 100644
index 0000000..eee3c31
--- /dev/null
+++ b/haplotype/GenotypeCorrection.java
@@ -0,0 +1,273 @@
+/*
+ * Copyright 2014 Brian L. Browning
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package haplotype;
+
+import blbutil.Const;
+import blbutil.FileUtil;
+import java.io.File;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import vcf.GL;
+import vcf.Marker;
+
+/**
+ * <p>Class {@code GenotypeCorrection} removes any inconsistencies between
+ * haplotype pairs and genotypes that determine genotype likelihoods.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class GenotypeCorrection {
+
+ private static final String headerLine = "MARKER" + Const.tab
+ + "SAMPLE" + Const.tab
+ + "REF" + Const.tab + "ALT" + Const.tab
+ + "INPUT_GT" + Const.tab + "ESTIMATED_GT";
+
+ private GenotypeCorrection() {
+ // private constructor to prevent instantiation
+ }
+
+ /**
+ * Removes any inconsistencies between the specified list of
+ * haplotype pairs and the genotypes determined by the {@code allele1()}
+ * and {@code allele2()} methods of the specified genotype likelihoods.
+ * Inconsistencies are resolved by changing the minimum number
+ * of alleles in the haplotype pairs.
+ *
+ * @param hapPairs a list of haplotype pairs
+ * @param gl genotype likelihoods
+ * @param seed a seed for generating random numbers
+ *
+ * @throws IllegalArgumentException if
+ * {@code hapPairs.get(j).markers().equals(gl.markers()) == false}
+ * for any {@code j} satisfying {@code (0 <= j && j < hapPairs.size())}
+ * @throws IllegalArgumentException if
+ * {@code hapPairs.get(j).samples().equals(gl.samples()) == false}
+ * for any {@code j} satisfying {@code (0 <= j && j < hapPairs.size())}
+ *
+ * @throws NullPointerException if {@code hapPairs == null || gl == null},
+ * or if {@code (hapPair.get(j) == null)} for any {@code j} satisfying
+ * {@code (0 <= j && j < hapPairs.size())}
+ */
+ public static void run(List<HapPair> hapPairs, GL gl, long seed) {
+ Random random = new Random(seed);
+ int[] alleles1 = new int[gl.nMarkers()];
+ int[] alleles2 = new int[gl.nMarkers()];
+ for (int j=0, n=hapPairs.size(); j<n; ++j) {
+ HapPair hapPair = hapPairs.get(j);
+ checkMarkersAndSamples(hapPair, gl);
+ List<Edit> edits = getEdits(hapPair, gl, random);
+ HapPair revHapPair = updatedHapPair(hapPair, edits, alleles1,
+ alleles2);
+ hapPairs.set(j, revHapPair);
+ }
+ }
+
+ /**
+ * Removes any inconsistencies between the specified list of
+ * haplotype pairs and the genotypes determined by the {@code allele1()}
+ * and {@code allele2()} methods of the specified genotype likelihoods.
+ * Inconsistencies are resolved by changing the minimum number
+ * of alleles in the haplotype pairs.
+ *
+ * @param hapPairs a list of haplotype pairs
+ * @param gl genotype likelihoods
+ * @param seed a seed for generating random numbers
+ * @param outFile an output file to which a record of the
+ * genotype changes will be written
+ * @param append {@code true} if the genotype changes should be
+ * written to the end of the specified output file
+ *
+ * @throws IllegalArgumentException if
+ * {@code hapPairs.get(j).markers().equals(gl.markers()) == false}
+ * for any {@code j} satisfying {@code (0 <= j && j < hapPairs.size())}
+ * @throws IllegalArgumentException if
+ * {@code hapPairs.get(j).samples().equals(gl.samples()) == false}
+ * for any {@code j} satisfying {@code (0 <= j && j < hapPairs.size())}
+ *
+ * @throws NullPointerException if
+ * {@code (hapPairs == null || gl == null || outFile == null)},
+ * or if {@code hapPair.get(j) == null} for any {@code j} satisfying
+ * {@code (0 <= j && j < hapPairs.size())}
+ */
+ public static void run(List<HapPair> hapPairs, GL gl, long seed,
+ File outFile, boolean append) {
+ Random random = new Random(seed);
+ int[] alleles1 = new int[gl.nMarkers()];
+ int[] alleles2 = new int[gl.nMarkers()];
+ try (PrintWriter out = FileUtil.printWriter(outFile, append)) {
+ if (append==false) {
+ out.println(headerLine);
+ }
+ for (int j=0, n=hapPairs.size(); j<n; ++j) {
+ HapPair hapPair = hapPairs.get(j);
+ checkMarkersAndSamples(hapPair, gl);
+ List<Edit> edits = getEdits(hapPair, gl, random);
+ for (Edit edit : edits) {
+ out.println(edit);
+ }
+ HapPair revHapPair = updatedHapPair(hapPair, edits, alleles1,
+ alleles2);
+ hapPairs.set(j, revHapPair);
+ }
+ }
+ }
+
+ private static void checkMarkersAndSamples(HapPair hapPair, GL gl) {
+ if (hapPair.markers().equals(gl.markers())==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ if (hapPair.samples().equals(gl.samples())==false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ }
+
+ private static List<Edit> getEdits(HapPair hapPair, GL gl, Random random) {
+ List<Edit> corrections = new ArrayList<>();
+ int sample = hapPair.sampleIndex();
+ for (int marker=0, n=gl.nMarkers(); marker<n; ++marker) {
+ int hapPairA1 = hapPair.allele1(marker);
+ int hapPairA2 = hapPair.allele2(marker);
+ if (gl.gl(marker, sample, hapPairA1, hapPairA2) <= 0f) {
+ int glA1 = gl.allele1(marker, sample);
+ int glA2 = gl.allele2(marker, sample);
+ if (glA1>=0 && glA2>=0) {
+ if (gl.isPhased(marker, sample)==false && random.nextBoolean()) {
+ int tmp = glA1;
+ glA1 = glA2;
+ glA2 = tmp;
+ }
+ corrections.add(new Edit(hapPair, marker, glA1, glA2));
+ }
+ }
+ }
+ return corrections;
+ }
+
+ private static HapPair updatedHapPair(HapPair hapPair,
+ List<Edit> edits, int[] alleles1, int[] alleles2) {
+ if (edits.isEmpty()) {
+ return hapPair;
+ }
+ else {
+ copyAlleles(hapPair, alleles1, alleles2);
+ for (int j=0, n=edits.size(); j<n; ++j) {
+ Edit edit = edits.get(j);
+ alleles1[edit.marker()] = edit.newAllele1();
+ alleles2[edit.marker()] = edit.newAllele2();
+ }
+ return new BitHapPair(hapPair.markers(), hapPair.samples(),
+ hapPair.sampleIndex(), alleles1, alleles2);
+ }
+ }
+
+ private static void copyAlleles(HapPair hapPair, int[] alleles1,
+ int[] alleles2) {
+ assert hapPair.nMarkers()==alleles1.length;
+ assert alleles1.length==alleles2.length;
+ for (int m=0, n=hapPair.nMarkers(); m<n; ++m) {
+ alleles1[m] = hapPair.allele1(m);
+ alleles2[m] = hapPair.allele2(m);
+ }
+ }
+
+ private static class Edit {
+ private final HapPair hapPair;
+ private final int marker;
+ private final int newAllele1;
+ private final int newAllele2;
+
+ /**
+ * Constructs a new {@code Edit} instance.
+ *
+ * @param hapPair a haplotype pair
+ * @param marker the marker index
+ * @param newAllele1 the post-edit first allele
+ * @param newAllele2 the post-edit second allele
+ * @throws NullPointerException if {@code hapPair == null}
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker > hapPair.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code newAllele1 < 0 || newAllele1 >= hapPair.marker(marker).nAlleles()}
+ * @throws IndexOutOfBoundsException if
+ * {@code newAllele2 < 0 || newAllele2 >= hapPair.marker(marker).nAlleles()}
+ */
+ public Edit(HapPair hapPair, int marker, int newAllele1, int newAllele2) {
+ if (marker<0 || marker>hapPair.nMarkers()) {
+ throw new IndexOutOfBoundsException("marker=" + marker);
+ }
+ if (newAllele1<0 || newAllele1>=hapPair.marker(marker).nAlleles()) {
+ throw new IndexOutOfBoundsException("newAllele1=" + newAllele1);
+ }
+ if (newAllele2<0 || newAllele2>=hapPair.marker(marker).nAlleles()) {
+ throw new IndexOutOfBoundsException("newAllele2=" + newAllele2);
+ }
+ this.hapPair = hapPair;
+ this.marker = marker;
+ this.newAllele1 = newAllele1;
+ this.newAllele2 = newAllele2;
+ }
+
+ public int marker() {
+ return marker;
+ }
+
+ public HapPair hapPair() {
+ return hapPair;
+ }
+
+ public int newAllele1() {
+ return newAllele1;
+ }
+
+ public int newAllele2() {
+ return newAllele2;
+ }
+
+ /**
+ * Returns a string description of {@code this}. The returned string
+ * has five tab-delimited fields: 1) Marker identifier, 2) REF allele,
+ * 3) ALT alleles, 4) pre-edit genotype, and 5) post-edit genotype.
+ * @return a string description of {@code this}
+ */
+ @Override
+ public String toString() {
+ Marker m = hapPair.markers().marker(marker);
+ String sampleId = hapPair.samples().id(hapPair.sampleIndex());
+
+ StringBuilder sb = new StringBuilder(100);
+ sb.append(m.id());
+ sb.append(Const.tab);
+ sb.append(sampleId);
+ for (int j=0, n=m.nAlleles(); j<n; ++j) {
+ sb.append(j<2 ? Const.tab : Const.comma);
+ sb.append(m.allele(j));
+ }
+ sb.append(Const.tab);
+ sb.append(newAllele1);
+ sb.append(Const.unphasedSep);
+ sb.append(newAllele2);
+ sb.append(Const.tab);
+ sb.append(hapPair.allele1(marker));
+ sb.append(Const.unphasedSep);
+ sb.append(hapPair.allele2(marker));
+ return sb.toString();
+ }
+ }
+}
diff --git a/haplotype/HapPair.java b/haplotype/HapPair.java
new file mode 100644
index 0000000..002bb3b
--- /dev/null
+++ b/haplotype/HapPair.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Interface {@code HapPair} represents a pair of haplotypes for a sample.
+ * The pair of haplotypes are guaranteed to have non-missing alleles at each
+ * marker.
+ * </p>
+ * All instances of {@code HapPair} are required to be immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface HapPair {
+
+ /**
+ * Returns the first allele for the specified marker.
+ * @param marker a marker index
+ * @return the first allele for the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ int allele1(int marker);
+
+ /**
+ * Returns the second allele for the specified marker.
+ * @param marker a marker index
+ * @return the second allele for the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ int allele2(int marker);
+
+ /**
+ * Returns the markers.
+ * @return the markers
+ */
+ Markers markers();
+
+ /**
+ * Returns the specified marker.
+ * @param marker a marker index
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ Marker marker(int marker);
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ int nMarkers();
+
+ /**
+ * Returns the list of samples containing the sample associated with
+ * this haplotype pair.
+ * @return the list of samples containing the sample associated with
+ * this haplotype pair
+ */
+ Samples samples();
+
+ /**
+ * Returns the index of the sample associated with this haplotype pair
+ * in the list of samples returned by {@code this.samples()}.
+ * @return the index of the sample associated with this haplotype pair
+ * in the list of samples returned by {@code this.samples()}
+ */
+ int sampleIndex();
+}
diff --git a/haplotype/HapPairs.java b/haplotype/HapPairs.java
new file mode 100644
index 0000000..cb682cf
--- /dev/null
+++ b/haplotype/HapPairs.java
@@ -0,0 +1,132 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Interface {@code HapPairs} represents a list of haplotype pairs.
+ * Each haplotype pair is guaranteed to have two non-missing
+ * alleles at each marker.
+ * </p>
+ * All instances of {@code HapPairs} are required to
+ * be immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface HapPairs {
+
+ /**
+ * Returns the allele for the specified marker and haplotype.
+ * @param marker a marker index
+ * @param haplotype a haplotype index
+ * @return the allele for the specified marker and haplotype
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code haplotype < 0 || haplotype >= this.nHaps()}
+ */
+ int allele(int marker, int haplotype);
+
+ /**
+ * Returns the first allele for the specified marker and haplotype pair.
+ * @param marker a marker index
+ * @param hapPair a haplotype pair index
+ * @return the first allele for the specified marker and haplotype pair
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code hapPair < 0 || hapPair >= this.nHapPairs()}
+ */
+ int allele1(int marker, int hapPair);
+
+ /**
+ * Returns the second allele for the specified marker and haplotype pair.
+ * @param marker a marker index
+ * @param hapPair a haplotype pair index
+ * @return the second allele for the specified marker and haplotype pair
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code hapPair < 0 || hapPair >= this.nHapPairs()}
+ */
+ int allele2(int marker, int hapPair);
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ int nMarkers();
+
+ /**
+ * Returns the markers.
+ * @return the markers
+ */
+ Markers markers();
+
+ /**
+ * Returns the specified marker.
+ * @param marker a marker index
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ Marker marker(int marker);
+
+ /**
+ * Returns the number of haplotypes. The returned value is equal to
+ * {@code 2*this.nHapPairs()}.
+ * @return the number of haplotypes
+ */
+ int nHaps();
+
+ /**
+ * Returns the number of haplotype pairs. The returned value is
+ * equal to {@code this.nHaps()/2}.
+ * @return the number of haplotype pairs
+ */
+ int nHapPairs();
+
+ /**
+ * Returns a list of samples containing the sample associated with
+ * the specified haplotype pair
+ * @param hapPair a haplotype pair index
+ * @return a list of samples containing the sample associated with
+ * the specified haplotype pair
+ * @throws IndexOutOfBoundsException if
+ * {@code hapPair < 0 || hapPair >= this.nHapPairs()}
+ */
+ public Samples samples(int hapPair);
+
+ /**
+ * Returns the index of the sample associated with the specified
+ * haplotype pair in the list of samples returned by {@code this.samples()}.
+ * @param hapPair a haplotype pair index
+ * @return the index of the sample associated with the specified
+ * haplotype pair in the list of samples returned by {@code this.samples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code hapPair < 0 || hapPair >= this.nHapPairs()}
+ */
+ public int sampleIndex(int hapPair);
+}
diff --git a/haplotype/HapsMarkerIterator.java b/haplotype/HapsMarkerIterator.java
new file mode 100644
index 0000000..ad1c193
--- /dev/null
+++ b/haplotype/HapsMarkerIterator.java
@@ -0,0 +1,127 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import vcf.HapsMarker;
+import blbutil.FileIt;
+import java.io.File;
+import java.util.NoSuchElementException;
+import vcf.Marker;
+
+/**
+ * <p>Class {@code HapsMarkerIterator} represents a file iterator whose
+ * {@code next()} method returns {@code HapsMarker} objects.
+ * </p>
+ * <p>Instances of class {@code HapsMarkerIterator} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class HapsMarkerIterator implements FileIt<HapsMarker> {
+
+ private final HapPairs haps;
+ private int nextIndex = 0;
+
+ /**
+ * Constructs a new {@code HapsMarkerIterator} instance that iterates
+ * through the markers of the specified {@code HapPairs} object.
+ *
+ * @param haps the haplotype pairs
+ * @throws NullPointerException if {@code haps == null}
+ */
+ public HapsMarkerIterator(HapPairs haps) {
+ if (haps==null) {
+ throw new NullPointerException("haps==nullt");
+ }
+ this.haps = haps;
+ }
+
+ @Override
+ public File file() {
+ return null;
+ }
+
+ @Override
+ public void close() {
+ nextIndex = haps.nMarkers();
+ }
+
+ /**
+ * Returns {@code true} if the iteration has more elements, and returns
+ * {@code false} otherwise.
+ * @return {@code true} if the iteration has more elements
+ */
+ @Override
+ public boolean hasNext() {
+ return nextIndex < haps.nMarkers();
+ }
+
+ /**
+ * Returns the next element in the iteration.
+ * @return the next element in the iteration
+ * @throws NoSuchElementException if the iteration has no more elements
+ */
+ @Override
+ public HapsMarker next() {
+ if (hasNext() == false) {
+ throw new NoSuchElementException("hasNext()==false");
+ }
+ final int index = nextIndex++;
+ return new HapsMarker() {
+ @Override
+ public int allele(int haplotype) {
+ return haps.allele(index, haplotype);
+ }
+
+ @Override
+ public int allele1(int hapPair) {
+ return haps.allele(index, 2 * hapPair);
+ }
+
+ @Override
+ public int allele2(int hapPair) {
+ return haps.allele(index, 2 * hapPair + 1);
+ }
+
+ @Override
+ public Marker marker() {
+ return haps.marker(index);
+ }
+
+ @Override
+ public int nHaps() {
+ return haps.nHaps();
+ }
+
+ @Override
+ public int nHapPairs() {
+ return haps.nHapPairs();
+ }
+ };
+ }
+
+ /**
+ * The {@code remove} method is not supported by this iterator.
+ * @throws UnsupportedOperationException if this method is invoked
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException("Not supported.");
+ }
+}
diff --git a/haplotype/RefHapPairs.java b/haplotype/RefHapPairs.java
new file mode 100644
index 0000000..14f0dc7
--- /dev/null
+++ b/haplotype/RefHapPairs.java
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import vcf.Marker;
+import vcf.Markers;
+import vcf.VcfEmission;
+
+/**
+ * <p>Class {@code RefHapPairs} stores a list of samples and a
+ * haplotype pair for each sample.
+ * </p>
+ * <p>Instances of class {@code RefHapPairs} are immutable.<p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RefHapPairs implements SampleHapPairs {
+
+ private final Markers markers;
+ private final Samples samples;
+ private final VcfEmission[] refVcfRecs;
+
+ /**
+ * Constructs a new {@code RefHapPairs} instance.
+ * @param markers the sequence of markers
+ * @param samples the sequence of samples
+ * @param refVcfRecs the sequence of per-marker genotype data
+ *
+ * @throws IllegalArgumentException if
+ * {@code markers.nMarkers() != refVcfRecs.length}
+ * @throws IllegalArgumentException if
+ * {@code refVcfRecs[k].samples().equals(samples) == false} for any
+ * {@code k} satisfying {@code 0 <= k && k < refVcfRecs.length}
+ * @throws IllegalArgumentException if
+ * {@code refVcfRecs[k].marker().equals(markers.marker(k)) == false}
+ * for any {@code k} satisfying {@code 0 <= k && k < refVcfRecs.length}
+ * @throws IllegalArgumentException if
+ * {@code refVcfRecs[k].isRefData() == false} for any {@code k}
+ * satisfying {@code 0 <= k && k < refVcfRecs.length}
+ * @throws NullPointerException if
+ * {@code markers == null || samples == null || refVcfRecs == null
+ * || refVcfRecs[k] == null} for any {@code k} satisfying
+ * {@code 0 <= k && k <= refVcfRecs.length}
+ */
+ public RefHapPairs(Markers markers, Samples samples,
+ VcfEmission[] refVcfRecs) {
+ checkPhasedMarkers(markers, samples, refVcfRecs);
+ this.markers = markers;
+ this.samples = samples;
+ this.refVcfRecs = refVcfRecs.clone();
+ }
+
+ private static void checkPhasedMarkers(Markers markers, Samples samples,
+ VcfEmission[] refVcfRecs) {
+ if (markers.nMarkers()!=refVcfRecs.length) {
+ String s = "markers.nMarkers()=" + markers.nMarkers()
+ + " refVcfRecs.length=" + refVcfRecs.length;
+ throw new IllegalArgumentException(s);
+ }
+ for (int j=0; j<refVcfRecs.length; ++j) {
+ if (refVcfRecs[j].samples().equals(samples)==false) {
+ String s = "sample inconsistency at index " + j;
+ throw new IllegalArgumentException(s);
+ }
+ if (refVcfRecs[j].marker().equals(markers.marker(j))==false) {
+ String s = "marker inconsistency at index " + j;
+ throw new IllegalArgumentException(s);
+ }
+ if (refVcfRecs[j].isRefData()==false) {
+ String s = "non-reference data at marker index " + j;
+ throw new IllegalArgumentException(s);
+ }
+ }
+ }
+
+ @Override
+ public int allele1(int marker, int hapPair) {
+ return refVcfRecs[marker].allele1(hapPair);
+ }
+
+ @Override
+ public int allele2(int marker, int hapPair) {
+ return refVcfRecs[marker].allele2(hapPair);
+ }
+
+ @Override
+ public int allele(int marker, int haplotype) {
+ int hapPair = haplotype/2;
+ if ((haplotype & 1)==0) {
+ return refVcfRecs[marker].allele1(hapPair);
+ }
+ else {
+ return refVcfRecs[marker].allele2(hapPair);
+ }
+ }
+
+ @Override
+ public int nMarkers() {
+ return markers.nMarkers();
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return markers.marker(marker);
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*samples.nSamples();
+ }
+
+ @Override
+ public int nHapPairs() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public Samples samples(int hapPair) {
+ if (hapPair < 0 || hapPair >= samples.nSamples()) {
+ throw new IndexOutOfBoundsException(String.valueOf(hapPair));
+ }
+ return samples;
+ }
+
+ @Override
+ public int sampleIndex(int hapPair) {
+ if (hapPair < 0 || hapPair >= samples.nSamples()) {
+ throw new IndexOutOfBoundsException(String.valueOf(hapPair));
+ }
+ return hapPair;
+ }
+
+ @Override
+ public int nAlleles(int marker) {
+ return refVcfRecs[marker].nAlleles();
+ }
+
+ @Override
+ public boolean storesNonMajorIndices(int marker) {
+ return refVcfRecs[marker].storesNonMajorIndices();
+ }
+
+ @Override
+ public int majorAllele(int marker) {
+ return refVcfRecs[marker].majorAllele();
+ }
+
+ @Override
+ public int alleleCount(int marker, int allele) {
+ return refVcfRecs[marker].alleleCount(allele);
+ }
+
+ @Override
+ public int hapIndex(int marker, int allele, int copy) {
+ return refVcfRecs[marker].hapIndex(allele, copy);
+ }
+}
diff --git a/haplotype/RevHapPair.java b/haplotype/RevHapPair.java
new file mode 100644
index 0000000..3a883c7
--- /dev/null
+++ b/haplotype/RevHapPair.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code RevHapPair} is a wrapper for a {@code HapPair}
+ * instance. The wrapper reverses the order of markers in the wrapped object.
+ * </p>
+ * <p>Instances of class {@code RevHapPair} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class RevHapPair implements HapPair {
+
+ /*
+ * All instances of the {@code HapPair} interface are required to be
+ * immutable.
+ */
+ private final HapPair hapPair;
+ private final int lastMarker;
+
+ /**
+ * Creates a new {@code RevHapPair} instance from the specified data.
+ * @param hapPair the haplotype pair that will be wrapped by the
+ * new instance
+ * @throws NullPointerException if {@code hapPair == null}
+ */
+ public RevHapPair(HapPair hapPair) {
+ this.hapPair = hapPair;
+ this.lastMarker = hapPair.nMarkers() - 1;
+ }
+
+ @Override
+ public int allele1(int marker) {
+ return hapPair.allele1(lastMarker - marker);
+ }
+
+ @Override
+ public int allele2(int marker) {
+ return hapPair.allele2(lastMarker - marker);
+ }
+
+ @Override
+ public Markers markers() {
+ return hapPair.markers().reverse();
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return hapPair.marker(lastMarker - marker);
+ }
+
+ @Override
+ public int nMarkers() {
+ return hapPair.nMarkers();
+ }
+
+ @Override
+ public Samples samples() {
+ return hapPair.samples();
+ }
+
+ @Override
+ public int sampleIndex() {
+ return hapPair.sampleIndex();
+ }
+}
diff --git a/haplotype/RevHapPairs.java b/haplotype/RevHapPairs.java
new file mode 100644
index 0000000..a47a8e4
--- /dev/null
+++ b/haplotype/RevHapPairs.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code RevHapPairs} is a wrapper for a {@code HapPairs}
+ * instance. The wrapper reverses the order of markers in the wrapped object.
+ * </p>
+ * <p>Instances of class {@code RevHapPairs} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class RevHapPairs implements HapPairs {
+
+ /*
+ * All instances of the {@code HapPairs} interface are required to be
+ * immutable.
+ */
+ private final HapPairs hapPairs;
+ private final int lastMarker;
+
+
+ /**
+ * Creates a new {@code RevSampleHapPairs} instance from the specified data.
+ * @param hapPairs the haplotype pairs that will be wrapped by the
+ * new instance
+ * @throws NullPointerException if {@code hapPairs == null}
+ */
+ public RevHapPairs(HapPairs hapPairs) {
+ this.hapPairs = hapPairs;
+ this.lastMarker = hapPairs.nMarkers() - 1;
+ }
+
+
+ @Override
+ public int allele1(int marker, int hapPair) {
+ return hapPairs.allele1(lastMarker - marker, hapPair);
+ }
+
+ @Override
+ public int allele2(int marker, int hapPair) {
+ return hapPairs.allele2(lastMarker - marker, hapPair);
+ }
+
+ @Override
+ public int allele(int marker, int haplotype) {
+ return hapPairs.allele(lastMarker - marker, haplotype);
+ }
+
+ @Override
+ public int nMarkers() {
+ return hapPairs.nMarkers();
+ }
+
+ @Override
+ public Markers markers() {
+ return hapPairs.markers().reverse();
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return hapPairs.marker(lastMarker - marker);
+ }
+
+ @Override
+ public int nHaps() {
+ return hapPairs.nHaps();
+ }
+
+ @Override
+ public int nHapPairs() {
+ return hapPairs.nHapPairs();
+ }
+
+ @Override
+ public Samples samples(int hapPair) {
+ return hapPairs.samples(hapPair);
+ }
+
+ @Override
+ public int sampleIndex(int hapPair) {
+ return hapPairs.sampleIndex(hapPair);
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact details
+ * of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(10000);
+ sb.append('[');
+ sb.append(this.getClass().toString());
+ sb.append(": nHapPairs=");
+ sb.append(this.nHapPairs());
+ sb.append(']');
+ return sb.toString();
+ }
+}
diff --git a/haplotype/RevSampleHapPairs.java b/haplotype/RevSampleHapPairs.java
new file mode 100644
index 0000000..f3dd9dd
--- /dev/null
+++ b/haplotype/RevSampleHapPairs.java
@@ -0,0 +1,138 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code RevSampleHapPairs} is a wrapper for a {@code SampleHapPairs}
+ * instance. The wrapper reverses the order of markers in the wrapped object.
+ * </p>
+ * <p>Instances of class {@code RevSampleHapPairs} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class RevSampleHapPairs implements SampleHapPairs {
+
+ /*
+ * All instances of the {@code SampleHapPairs} interface are required to be
+ * immutable.
+ */
+ private final SampleHapPairs hapPairs;
+ private final int lastMarker;
+
+ /**
+ * Creates a new {@code RevSampleHapPairs} instance from the specified data.
+ * @param hapPairs the sample haplotype pairs that will be wrapped by the
+ * new instance
+ * @throws NullPointerException if {@code hapPairs == null}
+ */
+ public RevSampleHapPairs(SampleHapPairs hapPairs) {
+ this.hapPairs = hapPairs;
+ this.lastMarker = hapPairs.nMarkers() - 1;
+ }
+
+ @Override
+ public int allele1(int marker, int hapPair) {
+ return hapPairs.allele1(lastMarker - marker, hapPair);
+ }
+
+ @Override
+ public int allele2(int marker, int hapPair) {
+ return hapPairs.allele2(lastMarker - marker, hapPair);
+ }
+
+ @Override
+ public int allele(int marker, int haplotype) {
+ return hapPairs.allele(lastMarker - marker, haplotype);
+ }
+
+ @Override
+ public int nMarkers() {
+ return hapPairs.nMarkers();
+ }
+
+ @Override
+ public Markers markers() {
+ return hapPairs.markers().reverse();
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return hapPairs.marker(lastMarker - marker);
+ }
+
+ @Override
+ public int nHaps() {
+ return hapPairs.nHaps();
+ }
+
+ @Override
+ public int nHapPairs() {
+ return hapPairs.nHapPairs();
+ }
+
+ @Override
+ public int nSamples() {
+ return hapPairs.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return hapPairs.samples();
+ }
+
+ @Override
+ public Samples samples(int hapPair) {
+ return hapPairs.samples(hapPair);
+ }
+
+ @Override
+ public int sampleIndex(int hapPair) {
+ return hapPairs.sampleIndex(hapPair);
+ }
+
+ @Override
+ public int nAlleles(int marker) {
+ return hapPairs.nAlleles(lastMarker - marker);
+ }
+
+ @Override
+ public boolean storesNonMajorIndices(int marker) {
+ return hapPairs.storesNonMajorIndices(lastMarker - marker);
+ }
+
+ @Override
+ public int majorAllele(int marker) {
+ return hapPairs.majorAllele(lastMarker - marker);
+ }
+
+ @Override
+ public int alleleCount(int marker, int allele) {
+ return hapPairs.alleleCount(lastMarker - marker, allele);
+ }
+
+ @Override
+ public int hapIndex(int marker, int allele, int copy) {
+ return hapPairs.hapIndex(lastMarker - marker, allele, copy);
+ }
+}
diff --git a/haplotype/SampleHapPairs.java b/haplotype/SampleHapPairs.java
new file mode 100644
index 0000000..4e35af9
--- /dev/null
+++ b/haplotype/SampleHapPairs.java
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+
+/**
+ * <p>Interface {@code SampleHapPairs} represents a list of samples and a
+ * haplotype pair for each sample. Each haplotype pair is guaranteed
+ * to have two non-missing alleles at each marker.
+ * </p>
+ * <p>All instances of {@code SampleHapPairs} are required to be immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface SampleHapPairs extends HapPairs {
+
+ /**
+ * Returns the samples. The {@code k}-th sample corresponds to
+ * the {@code k}-th haplotype pair.
+ * @return the samples
+ */
+ Samples samples();
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ int nSamples();
+
+ /**
+ * Returns the number of marker alleles.
+ * @param marker a marker index
+ * @return the number of marker alleles.
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ int nAlleles(int marker);
+
+ /**
+ * Returns {@code true} if this object stores the indices of haplotypes
+ * that carry non-major alleles, and returns {@code false} otherwise.
+ * @param marker a marker index
+ * @return {@code true} if this object stores the indices of haplotypes
+ * that carry non-major alleles
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ boolean storesNonMajorIndices(int marker);
+
+ /**
+ * Returns the index of the major allele.
+ * @param marker a marker index
+ * @return the index of the major allele
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws UnsupportedOperationException if
+ * {@code storesNonMajorIndices(marker) == false}
+ */
+ int majorAllele(int marker);
+
+ /**
+ * Returns the number of haplotypes that carry the specified allele.
+ * @param marker a marker index
+ * @param allele an allele index
+ * @return the number of haplotypes that carry the specified allele
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IllegalArgumentException if
+ * {@code allele == this.majorAllele()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele < 0 || allele >= this.nAlleles()}
+ * @throws UnsupportedOperationException if
+ * {@code storesNonMajorIndices(marker) == false}
+ */
+ int alleleCount(int marker, int allele);
+
+ /**
+ * Returns index of the haplotype that carries the specified copy of the
+ * specified allele.
+ * @param marker a marker index
+ * @param allele an allele index
+ * @param copy a copy index.
+ * @return index of the haplotype that carries the specified allele.
+ * @throws IllegalArgumentException if
+ * {@code allele == this.majorAllele()}
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele < 0 || allele >= this.nAlleles()}
+ * @throws IndexOutOfBoundsException if
+ * {@code copy < 0 || copy >= this.alleleCount(allele)}
+ * @throws UnsupportedOperationException if
+ * {@code storesNonMajorIndices(marker) == false}
+ */
+ int hapIndex(int marker, int allele, int copy);
+}
diff --git a/haplotype/Weights.java b/haplotype/Weights.java
new file mode 100644
index 0000000..041687a
--- /dev/null
+++ b/haplotype/Weights.java
@@ -0,0 +1,150 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import java.util.HashMap;
+import java.util.Map;
+import main.NuclearFamilies;
+
+/**
+ * <p>Class {@code Weights} represents per-haplotype weights.
+ * </p>
+ * Instances of class {@code Weights} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class Weights {
+
+ private final NuclearFamilies fam;
+ private final float nonRefWt;
+
+ /**
+ * Constructs a new {@code Weights} instance with a weight of 1.0f
+ * for all samples.
+ * @param fam the parent-offspring relationships
+ * @throws NullPointerException if {@code fam == null}
+ */
+ public Weights(NuclearFamilies fam) {
+ this(fam, 1.0f);
+ }
+
+ /**
+ * Constructs a new {@code Weights} instance with a weight of 1.0f
+ * for reference samples, and a weight of {@code nonRefWt} for
+ * non-reference samples. Non-reference samples are samples
+ * which are not present in {@code fam.samples()}.
+ * @param fam the parent-offspring data
+ * @param nonRefWt the non-reference sample weight
+ * @throws IllegalArgumentException if
+ * {@code nonRefWt < 0.0f || nonRefWt > 1.0f || Float.isNaN(nonRefWt)}
+ * @throws NullPointerException if {@code fam == null}
+ */
+ public Weights(NuclearFamilies fam, float nonRefWt) {
+ if (fam==null) {
+ throw new NullPointerException("fam==null");
+ }
+ if (nonRefWt < 0.0f || nonRefWt > 1.0f || Float.isNaN(nonRefWt)) {
+ throw new IllegalArgumentException("nonRefWeight: " + nonRefWt);
+ }
+ this.fam = fam;
+ this.nonRefWt = nonRefWt;
+ }
+
+ /**
+ * Returns an array of length {@code haps.nHaps()} with
+ * per-haplotype weights. Array elements {@code 2*j} and {@code 2*j + 1}
+ * are the weights for the first and second haplotype in the
+ * {@code j}-th haplotype pair. Reference haplotypes are assigned
+ * a weight of {@code 1.0f}. Non-reference haplotypes are assigned
+ * a weight of {@code this.nonRefWt()} if the haplotype is not
+ * inherited from a parent in the sample, and a weight of {@code 0.01f}
+ * if the haplotype is inherited from a parent in the sample.
+ * The first haplotype in the offspring is required to be the transmitted
+ * transmitted haplotype for a parent-offspring duo.
+ *
+ * @param haps an array of haplotype pairs
+ * @return an array of per-haplotype weights
+ *
+ * @throws NullPointerException if {@code hapPairs == null}
+ */
+ public float[] get(HapPairs haps) {
+ Samples samples = families().samples();
+ float[] fa = new float[haps.nHaps()];
+ Map<Integer, Integer> cntMap = cntMap(haps);
+ int hapIndex = 0;
+ for (int j=0, n=haps.nHapPairs(); j<n; ++j) {
+ int idIndex = haps.samples(j).idIndex(haps.sampleIndex(j));
+ int sampleIndex = samples.index(idIndex);
+ int parentCnt = 0;
+ if (sampleIndex != -1) {
+ // sample is a non-reference sample
+ if (families().father(sampleIndex)>=0) {
+ ++parentCnt;
+ }
+ if (families().mother(sampleIndex)>=0) {
+ ++parentCnt;
+ }
+ }
+ float sampleWeight = (sampleIndex == -1) ? 1.0f : nonRefWt();
+ int cnt = cntMap.get(idIndex);
+ float wt = sampleWeight/cnt;
+ float MIN_SAMPLE_WEIGHT = 0.01f;
+ float minWt = MIN_SAMPLE_WEIGHT/cnt;
+ fa[hapIndex++] = parentCnt>0 ? minWt : wt;
+ fa[hapIndex++] = parentCnt==2 ? minWt : wt;
+ }
+ return fa;
+ }
+
+ /*
+ * Returns a map from the haplotype ID index to the number of
+ * haplotype pairs with the ID index.
+ */
+ private static Map<Integer, Integer> cntMap(HapPairs haps) {
+ int nHapPairs = haps.nHapPairs();
+ int initCapacity = 1 + (3*nHapPairs + 1)/2;
+ Map<Integer, Integer> cntMap = new HashMap<>(initCapacity);
+ for (int j=0; j<nHapPairs; ++j) {
+ int idIndex = haps.samples(j).idIndex(haps.sampleIndex(j));
+ Integer value = cntMap.get(idIndex);
+ if (value==null) {
+ value = 0;
+ }
+ cntMap.put(idIndex, (value + 1));
+ }
+ return cntMap;
+ }
+
+ /**
+ * Returns the parent-offspring relationships.
+ * @return the parent-offspring relationships
+ */
+ public NuclearFamilies families() {
+ return fam;
+ }
+
+ /**
+ * Returns the non-reference sample weight.
+ * @return the non-reference sample weight
+ */
+ public float nonRefWt() {
+ return nonRefWt;
+ }
+}
diff --git a/haplotype/WrappedHapPair.java b/haplotype/WrappedHapPair.java
new file mode 100644
index 0000000..9c23610
--- /dev/null
+++ b/haplotype/WrappedHapPair.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package haplotype;
+
+import beagleutil.Samples;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * Class {@code WrappedHapPair} is a {@code HapPair} instance
+ * that wraps a {@code SampleHapPairs} object.
+
+* @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class WrappedHapPair implements HapPair {
+
+ private final SampleHapPairs haps;
+ private final int hapPair;
+
+ /**
+ * Creates a {@code WrappedHapPair} instance representing
+ * the specified haplotype pair.
+ * @param sampleHapPairs the {@code SampleHapPairs} object that
+ * will be "wrapped" by {@code this}
+ * @param hapPair a haplotype pair index
+ * @throws IllegalArgumentException if
+ * {@code hapPair < 0 || hapPair >= sampleHapPairs.nHapPairs()}
+ * @throws NullPointerException if {@code sampleHapPairs == null}
+ */
+ public WrappedHapPair(SampleHapPairs sampleHapPairs, int hapPair) {
+ if (hapPair < 0 || hapPair >= sampleHapPairs.nHapPairs()) {
+ throw new IllegalArgumentException("hapPair: " + hapPair);
+ }
+ this.haps = sampleHapPairs;
+ this.hapPair = hapPair;
+ }
+
+ @Override
+ public int allele1(int marker) {
+ return haps.allele1(marker, hapPair);
+ }
+
+ @Override
+ public int allele2(int marker) {
+ return haps.allele2(marker, hapPair);
+ }
+
+ @Override
+ public Markers markers() {
+ return haps.markers();
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return haps.marker(marker);
+ }
+
+ @Override
+ public int nMarkers() {
+ return haps.nMarkers();
+ }
+
+ @Override
+ public Samples samples() {
+ return haps.samples();
+ }
+
+ @Override
+ public int sampleIndex() {
+ return hapPair;
+ }
+}
diff --git a/ibd/HapSegment.java b/ibd/HapSegment.java
new file mode 100644
index 0000000..7b8f33f
--- /dev/null
+++ b/ibd/HapSegment.java
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package ibd;
+
+import beagleutil.IntInterval;
+import blbutil.Const;
+
+/**
+ * <p>Class {@code HapSegment} represents a marker interval
+ * for a haplotype.
+ * </p>
+ * Instances of class {@code HapSegment} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class HapSegment implements Comparable<HapSegment>, IntInterval {
+
+ private final int hap;
+ private final int start;
+ private final int end;
+
+ /**
+ * Constructs a new {@code HapSegment} instance.
+ * @param hap the haplotype index
+ * @param start the start marker index (inclusive)
+ * @param end the end marker index (inclusive)
+ * @throws IllegalArgumentException if {@code start > end}
+ */
+ public HapSegment(int hap, int start, int end) {
+ if (start > end) {
+ throw new IllegalArgumentException(String.valueOf(start));
+ }
+ this.hap = hap;
+ this.start = start;
+ this.end = end;
+ }
+
+ /**
+ * Returns the first haplotype index.
+ * @return the first haplotype index
+ */
+ public int hap() {
+ return hap;
+ }
+
+ /**
+ * Returns the start marker index (inclusive).
+ * @return the start marker index (inclusive)
+ */
+ @Override
+ public int start() {
+ return start;
+ }
+
+ /**
+ * Returns the end marker index (inclusive).
+ * @return the end marker index (inclusive)
+ */
+ @Override
+ public int end() {
+ return end;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(70);
+ sb.append(hap);
+ sb.append(Const.tab);
+ sb.append(start);
+ sb.append(Const.tab);
+ sb.append(end);
+ return sb.toString();
+ }
+
+ /**
+ * <p>Returns the hash code value for this object. The hash code is defined
+ * by the following calculation:
+ * </p>
+ * <pre>
+ * int hash = 5;
+ * hash = 89 * hash + this.hap();
+ * hash = 89 * hash + this.start();
+ * hash = 89 * hash + this.end();
+ </pre>
+ * @return the hash code value for this object
+ */
+ @Override
+ public int hashCode() {
+ int hash = 5;
+ hash = 89*hash + this.hap;
+ hash = 89*hash + this.start;
+ hash = 89*hash + this.end;
+ return hash;
+ }
+
+ /**
+ * Compares the specified object with this {@code HapSegment} for
+ * equality. Returns {@code true} if the specified object is a
+ * {@code HapSegment} instance and if this {@code HapSegment} is
+ * equal to the specified {@code HapSegment}, and returns
+ * {@code false} otherwise. Two {@code HapSegment} instances
+ * are equal if they have equal haplotype indices,
+ * equal starting marker indices, and equal ending marker indices.
+ * @param o the reference object with which to compare.
+ * @return {@code true} if the specified object is an
+ * {@code HapSegment} instance and if this {@code HapSegment} is
+ * equal to the specified {@code HapSegment}
+ */
+ @Override
+ public boolean equals(Object o) {
+ if (o==null) {
+ return false;
+ }
+ if (getClass()!=o.getClass()) {
+ return false;
+ }
+ final HapSegment other=(HapSegment) o;
+ return (this.hap==other.hap && this.start==other.start
+ && this.end==other.end);
+ }
+
+ /**
+ * Compares this object with the specified object for order. Returns a
+ * negative integer, zero, or a positive integer as this object is less
+ * than, equal to, or greater than the specified object.
+ * {@code HapSegment} instances are ordered first by
+ * {@code this.start()}, then by {@code this.end()},
+ * and finally by {@code this.hap()}.
+ * @param hs the {@code HapSegment} to be compared
+ * @return a negative integer, zero, or a positive integer as this
+ * {@code HapSegment} is less than, equal to, or greater than the
+ * specified {@code HapSegment}
+ * @throws NullPointerException if {@code o == null}
+ */
+ @Override
+ public int compareTo(HapSegment hs) {
+ if (this.start != hs.start) {
+ return (this.start < hs.start) ? -1 : 1;
+ }
+ else if (this.end != hs.end) {
+ return (this.end < hs.end) ? -1 : 1;
+ }
+ if (this.hap != hs.hap) {
+ return (this.hap < hs.hap) ? -1 : 1;
+ }
+ return 0;
+ }
+}
diff --git a/ibd/HaploidIbd.java b/ibd/HaploidIbd.java
new file mode 100644
index 0000000..925593d
--- /dev/null
+++ b/ibd/HaploidIbd.java
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package ibd;
+
+import blbutil.IntPair;
+import blbutil.Utilities;
+import dag.Dag;
+import haplotype.HapPairs;
+import haplotype.SampleHapPairs;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import vcf.GL;
+
+/**
+ * <p>Class {@code HaploidIbd} implements the Refined IBD algorithm.
+ * The Refined IBD algorithm detects candidate haplotype IBD segments with the
+ * Germline Algorithm and then evaluates candidate IBD segments using a
+ * likelihood ratio test.
+ * </p>
+ * <p>Instances of class {@code HaploidIbd} are immutable.
+ *</p>
+ * Reference: Gusev A, Lowe JK, Stoffel M, Daly MJ, Altshuler D, Breslow JL,
+ * Friedman JM, Pe'er I. Whole population, genomewide mapping
+ * of hidden relatedness. Genome Research 2009;19(2):318-26.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class HaploidIbd {
+
+ private final int ibdTrim;
+ private final float minIbdLod;
+ private final float minIbsLength; // positions from Dag.posArray()
+ private final float minFreqLod; // for shared haplotype
+
+ /**
+ * Constructs a new {@code HaploidIbd} instance from the specified data.
+ * @param ibdTrim the number of markers to trim from an IBS segment
+ * when computing the IBD versus non-IBD likelihood ratio
+ * @param minIbdLod the minimum IBD LOD score of reported IBD segments
+ *
+ * @throws IllegalArgumentException if {@code ibdTrim < 0 }
+ * @throws IllegalArgumentException if
+ * {@code ibdLod <= 0.0f || Float.isFinite(ibdLod) == false}
+ */
+ public HaploidIbd(int ibdTrim, float minIbdLod) {
+ if (ibdTrim < 0) {
+ throw new IllegalArgumentException("trim: " + ibdTrim);
+ }
+ if (minIbdLod <= 0.0 || Float.isFinite(minIbdLod) == false) {
+ throw new IllegalArgumentException("minIbdlod: " + minIbdLod);
+ }
+ this.ibdTrim = ibdTrim;
+ this.minIbdLod = minIbdLod;
+ this.minIbsLength = 0.8f*minIbdLod;
+ this.minFreqLod = minIbdLod;
+ }
+
+ /**
+ * Runs the Refined IBD algorithm, and returns a map whose keys are
+ * ordered pairs of haplotype indices and whose values are thread-safe
+ * lists of IBD segments for each haplotype pair. The minimum haplotype
+ * index is listed first in each ordered pair of haplotype indices.
+ *
+ * @param gl the HMM emission probabilities
+ * @param dag the HMM transition probabilities
+ * @param haps the sample haplotype pairs
+ * @param nThreads the number of threads of execution that may be used
+ * @return the detected IBD segments
+ *
+ * @throws IllegalArgumentException if {@code nThreads < 1}
+ * @throws IllegalArgumentException if
+ * {@code gl.samples().equals(haps.samples()) == false}
+ * @throws IllegalArgumentException if
+ * {@code gl.markers().equals(dag.markers()) == false
+ || gl.markers().equals(haps.markers()) == false}
+ * @throws NullPointerException if
+ * {@code gl == null || dag == null || haps == null}
+ */
+ @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch"})
+ public Map<IntPair, List<IbdSegment>> run(GL gl, Dag dag,
+ SampleHapPairs haps, final int nThreads) {
+ checkParameters(gl, dag, haps);
+ double[] pos = dag.posArray();
+ IbsHapSegments ibsSegments = new IbsHapSegments(haps, pos, minIbsLength);
+ ConcurrentMap<IntPair, List<IbdSegment>> ibdMap
+ = new ConcurrentHashMap<>();
+
+ final BlockingQueue<Integer> qIn = new ArrayBlockingQueue<>(5*nThreads);
+ ExecutorService es = Executors.newFixedThreadPool(nThreads);
+ for (int j=0; j<nThreads; ++j) {
+ IbdBaum baum = new IbdBaum(dag, gl);
+ es.submit(new ProduceIbd(haps, baum, ibsSegments, qIn, ibdMap,
+ ibdTrim, minIbdLod));
+ }
+ try {
+ for (int hap=0, n=haps.nHaps(); hap<n; ++hap) {
+ qIn.put(hap);
+ }
+ for (int j=0; j<nThreads; ++j) {
+ qIn.put(ProduceIbd.POISON);
+ }
+ es.shutdown();
+ es.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
+ }
+ catch (Throwable e) {
+ Utilities.exit("ERROR", e);
+ }
+ return ibdMap;
+ }
+
+ private void checkParameters(GL gl, Dag dag, SampleHapPairs haps) {
+ if (gl.samples().equals(haps.samples())==false) {
+ throw new IllegalArgumentException("inconstent samples");
+ }
+ if (gl.markers().equals(dag.markers())==false
+ || gl.markers().equals(haps.markers())==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ }
+
+ private static double freqLod(int hap, int start, int end, int ibdTrim,
+ Dag dag, HapPairs haps) {
+ int trimmedStart = start + ibdTrim;
+ int trimmedEnd = end - ibdTrim;
+ if (trimmedStart >= trimmedEnd) {
+ return 0.0f;
+ }
+ else {
+ return IbdBaum.freqLod(hap, trimmedStart, trimmedEnd, haps, dag);
+ }
+ }
+
+ private static double ibdLod(IbdBaum ibdBaum, int hap1, int hap2, int start,
+ int end, int ibdTrim) {
+ int trimmedStart = start + ibdTrim;
+ int trimmedEnd = end - ibdTrim;
+ if (trimmedStart >= trimmedEnd) {
+ return 0.0f;
+ }
+ else {
+ int sample1 = hap1/2;
+ int sample2 = hap2/2;
+ return ibdBaum.ibdLod(sample1, sample2, trimmedStart, trimmedEnd);
+ }
+ }
+
+ private class ProduceIbd implements Runnable {
+
+ public static final int POISON = -37;
+
+ private final SampleHapPairs haps;
+ private final IbdBaum baum;
+ private final IbsHapSegments ibsHapSegments;
+ private final BlockingQueue<Integer> qIn;
+ private final ConcurrentMap<IntPair, List<IbdSegment>> ibdMap;
+ private final int ibdTrim;
+ private final float minIbdLod;
+
+ public ProduceIbd(SampleHapPairs haps, IbdBaum baum,
+ IbsHapSegments ibsHapSegments, BlockingQueue<Integer> qIn,
+ ConcurrentMap<IntPair, List<IbdSegment>> ibdMap, int ibdTrim,
+ float minIbdLod) {
+ if (ibdTrim < 0) {
+ throw new IllegalArgumentException("trim < 0: " + ibdTrim);
+ }
+ if (minIbdLod <= 0.0 || Float.isNaN(minIbdLod)) {
+ throw new IllegalArgumentException("ibdlod: " + minIbdLod);
+ }
+ this.haps = haps;
+ this.baum = baum;
+ this.ibsHapSegments = ibsHapSegments;
+ this.qIn = qIn;
+ this.ibdMap = ibdMap;
+ this.ibdTrim = ibdTrim;
+ this.minIbdLod = minIbdLod;
+ }
+
+ /*
+ * Takes haplotype indices from a thread-safe work-queue and stores
+ * detected IBD segments that between the haplotype and
+ * haplotypes with larger index in {@code this.ibdMap}. The method
+ * exits when {@code ProduceSingleSamples.POISON} is taken from the
+ * work queue.
+ *
+ * @throws IndexOutOfBounds exception if a negative integer
+ * other than {@code ProduceSingleSamples.POISON} is taken from the
+ * work queue
+ */
+ @Override
+ @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch"})
+ public void run() {
+ try {
+ int hap = qIn.take();
+ while (hap!=POISON) {
+ List<HapSegment> ibsSegs = ibsHapSegments.find(hap);
+ for (int j=0, n=ibsSegs.size(); j<n; ++j) {
+ HapSegment hs = ibsSegs.get(j);
+ if (hap < hs.hap()) {
+ int start = hs.start();
+ int end = hs.end();
+ double freqLod = HaploidIbd.freqLod(hap, start,
+ (end+1), ibdTrim, baum.dag(), haps);
+ if (freqLod >= minFreqLod) {
+ float ibdLod;
+ if ( (hap/2) == (hs.hap()/2) ) {
+ int sample = hap/2;
+ ibdLod = (float) baum.hbdLod(sample, start, (end+1));
+ }
+ else {
+ ibdLod = (float) HaploidIbd.ibdLod(baum, hap,
+ hs.hap(), start, (end+1), ibdTrim);
+ }
+ if (ibdLod >= minIbdLod) {
+ IntPair hapPair = new IntPair(hap, hs.hap());
+ List<IbdSegment> list = ibdMap.get(hapPair);
+ if (list==null) {
+ list = Collections.synchronizedList(
+ new ArrayList<IbdSegment>(2));
+ ibdMap.putIfAbsent(hapPair, list);
+ list = ibdMap.get(hapPair);
+ }
+ IbdSegment segment = new IbdSegment(hapPair,
+ baum.gl().marker(start),
+ baum.gl().marker(end),
+ ibdLod, start, end );
+ list.add(segment);
+ }
+ }
+ }
+ }
+ hap = qIn.take();
+ }
+ }
+ catch (Throwable e) {
+ Utilities.exit("ProduceSingleSamples: ERROR", e);
+ }
+ }
+ }
+}
diff --git a/ibd/Haplotype.java b/ibd/Haplotype.java
new file mode 100644
index 0000000..e5dc713
--- /dev/null
+++ b/ibd/Haplotype.java
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package ibd;
+
+import haplotype.SampleHapPairs;
+import vcf.Marker;
+
+/**
+ * <p>Class {@code Haplotype} represents a haplotype segment.
+ * </p>
+ * Instances of class {@code Haplotype} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class Haplotype {
+
+ private final int start; // inclusive
+ private final int end; // exclusive
+ private final SampleHapPairs haps;
+ private final int hapIndex;
+
+ /**
+ * Constructs a new {@code Haplotype} instance. The haplotype will
+ * include all markers in the specified {@code SampleHapPairs} parameter.
+ *
+ * @param haps sample haplotype pairs
+ * @param hap a haplotype index
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= haps.nHaps()}
+ * @throws NullPointerException if {@code haps == null}
+ */
+ public Haplotype(SampleHapPairs haps, int hap) {
+ this(haps, hap, 0, haps.nMarkers());
+ }
+
+
+ /**
+ * Constructs a new {@code Haplotype} instance.
+ *
+ * @param haps sample haplotype pairs
+ * @param hap a haplotype index
+ * @param start the starting marker index for the haplotype segment
+ * (inclusive)
+ * @param end the ending marker index for the haplotype segment (exclusive)
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= haps.nHaps()}
+ * @throws IndexOutOfBoundsException if
+ * {@code start < 0 || start > end || end > haps.nMarkers()}
+ * @throws NullPointerException if {@code haps == null}
+ */
+ public Haplotype(SampleHapPairs haps, int hap, int start, int end) {
+ if (start < 0 || start > end || end > haps.nMarkers()) {
+ String s = "start=" + start + " end=" + end + " haps.nMarkers()="
+ + haps.nMarkers();
+ throw new IndexOutOfBoundsException(s);
+ }
+ if (hap < 0 || hap >= haps.nHaps()) {
+ throw new IllegalArgumentException("hapIndex=" + hap);
+ }
+ this.start = start;
+ this.end = end;
+ this.haps = haps;
+ this.hapIndex = hap;
+ }
+
+ /**
+ * Returns a new {@code Haplotype} instance that is
+ * obtained by restricting this haplotype to the specified marker interval.
+ * @param start the starting marker index for the haplotype segment
+ * (inclusive)
+ * @param end the ending marker index for the haplotype segment (exclusive)
+ * @return the restricted haplotype segment
+ * @throws IndexOutOfBoundsException if
+ * {@code start < 0 || start > end || end > this.length()}
+ */
+ public Haplotype restrict(int start, int end) {
+ if (start < 0 || start > end || end > this.length()) {
+ String s = "start=" + start + " end=" + end + " this.length()="
+ + this.length();
+ throw new IndexOutOfBoundsException(s);
+ }
+ int newStart = this.start + start;
+ int newEnd = this.start + end;
+ return new Haplotype(haps, hapIndex, newStart, newEnd);
+ }
+
+ /**
+ * Returns the number of alleles in this haplotype segment.
+ * @return the number of alleles in this haplotype segment
+ */
+ public int length() {
+ return end - start;
+ }
+
+ /**
+ * Returns the index of the haplotype.
+ * @return the index of the haplotype
+ */
+ public int hapIndex() {
+ return hapIndex;
+ }
+
+ /**
+ * Returns the sample haplotype pairs.
+ * @return the sample haplotype pairs
+ */
+ public SampleHapPairs sampleHapPairs() {
+ return haps;
+ }
+
+ /**
+ * Returns the specified marker. The first marker on the haplotype segment
+ * has index 0.
+ * @param index a marker index
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.length()}
+ */
+ public Marker marker(int index) {
+ int i = start + index;
+ if (i < start || i >= end) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ return haps.marker(i);
+ }
+
+ /**
+ * Returns the specified allele on the haplotype. The first allele on
+ * the haplotype segment has index 0.
+ *
+ * @param index a marker index
+ * @return the specified allele on the haplotype
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.length()}
+ */
+ public int allele(int index) {
+ int i = start + index;
+ if (i < start || i >= end) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ return haps.allele(i, hapIndex);
+ }
+
+ /**
+ * Compares the specified object with this {@code Haplotype} for
+ * equality. Returns {@code true} if the specified object
+ * is a {@code Haplotype} that represents the same haplotype segment
+ * as {@code this}, and returns {@code false} otherwise.
+ * @param obj the object to be compared for equality with this
+ * {@code Haplotype}
+ * @return {@code true} if the specified object is an {@code Haplotype}
+ * that represents the same haplotype segment as {@code this}
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final Haplotype other = (Haplotype) obj;
+ int length = this.length();
+ if (length != other.length()) {
+ return false;
+ }
+ for (int j=0; j<length; ++j) {
+ if (this.allele(j) != other.allele(j)) {
+ return false;
+ }
+ }
+ for (int j=0; j<length; ++j) {
+ if (false==this.marker(j).equals(other.marker(j))) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * <p>Returns a hash code value for the object.
+ * </p>
+ * <p>The hash code is defined by the following calculation:
+ * </p>
+ * <pre>
+ int hash = 17;
+ for (int j=0; j<this.length(); ++j) {
+ hash += 29 * hash + haps.allele(j, this.hapIndex());
+ hash += 29 * hash + haps.marker(j).hashCode();
+ }
+ * </pre>
+ * @return a hash code value for the object
+ */
+ @Override
+ public int hashCode() {
+ int hash = 17;
+ for (int j = start; j<end; ++j) {
+ hash += 29 * hash + haps.allele(j, hapIndex);
+ hash += 29 * hash + haps.marker(j).hashCode();
+ }
+ return hash;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The
+ * exact details of the representation are unspecified and
+ * subject to change.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(3 * (end - start));
+ sb.append('[');
+ if (end > start) {
+ sb.append(allele(0));
+ }
+ for (int j = 1, n = end - start; j < n; ++j) {
+ sb.append(", ");
+ sb.append(allele(j));
+ }
+ sb.append(']');
+ return sb.toString();
+ }
+
+}
diff --git a/ibd/IbdBaum.java b/ibd/IbdBaum.java
new file mode 100644
index 0000000..99cebf2
--- /dev/null
+++ b/ibd/IbdBaum.java
@@ -0,0 +1,292 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package ibd;
+
+import dag.Dag;
+import haplotype.HapPairs;
+import sample.DuoBaumLevel;
+import sample.DuoNodes;
+import sample.HapBaumLevel;
+import sample.HapNodes;
+import sample.SingleBaumLevel;
+import sample.SingleNodes;
+import vcf.GL;
+import vcf.HbdAL;
+
+/**
+ * <p>Class {@code IbdBaum} estimates LOD scores for an IBD versus a non-IBD
+ * model, and it estimates LOD scores for an HBD versus a non-HBD model.
+ * </p>
+ * <p>Instances of class {@code IbdBaum} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class IbdBaum {
+
+ private static final double log_e_10 = Math.log(10.0);
+ private final Dag dag;
+ private final GL gl;
+ private final int nMarkers;
+
+ private final SingleNodes fwdNodesA;
+ private final SingleNodes fwdNodesB;
+ private final HapNodes fwdNodesHbd;
+ private final DuoNodes fwdNodesIbd;
+
+ private final SingleBaumLevel scratchSingleLevel;
+ private final HapBaumLevel scratchHapLevel;
+ private final DuoBaumLevel scratchDuoLevel;
+
+ /**
+ * Creates a new {@code IbdBaum} instance from the specified data.
+ *
+ * @param dag the directed acyclic graph that determines the
+ * transition probabilities
+ * @param gl the HMM emission probabilities
+ *
+ * @throws IllegalArgumentException
+ * if {@code dag.markers().equals(gl.markers()) == false}
+ * @throws NullPointerException if {@code dag == null || gl == null}
+ */
+ public IbdBaum(Dag dag, GL gl) {
+ if (dag.markers().equals(gl.markers())==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ this.dag = dag;
+ this.gl = gl;
+ this.nMarkers = dag.nLevels();
+
+ this.fwdNodesHbd = new HapNodes();
+ this.fwdNodesA = new SingleNodes();
+ this.fwdNodesB = new SingleNodes();
+ this.fwdNodesIbd = new DuoNodes();
+
+ this.scratchSingleLevel = new SingleBaumLevel(dag, gl);
+ this.scratchHapLevel = new HapBaumLevel(dag, new HbdAL(gl));
+ this.scratchDuoLevel = new DuoBaumLevel(dag, gl);
+ }
+
+ /**
+ * Returns the directed acyclic graph that determines the transition
+ * probabilities.
+ * @return the directed acyclic graph that determines the transition
+ * probabilities
+ */
+ public Dag dag() {
+ return dag;
+ }
+
+ /**
+ * Returns the HMM emission probabilities.
+ * @return the HMM emission probabilities
+ */
+ public GL gl() {
+ return gl;
+ }
+
+ /**
+ * Returns the homozygosity-by-descent (HBD) LOD score.
+ * @param sample the sample index
+ * @param start the start marker index (inclusive)
+ * @param end the end marker index (exclusive)
+ * @return the HBD LOD score
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.gl().nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code start < 0 || start > end || end > this.dag.nMarkers()}
+ */
+ public double hbdLod(int sample, int start, int end) {
+ checkStartAndEnd(start, end, nMarkers);
+ if (start==end) {
+ return 0.0f;
+ }
+ setInitialNodes(dag, start, fwdNodesHbd);
+ setInitialNodes(dag, start, fwdNodesA);
+ double altHbdLogLike = logLikelihood(fwdNodesHbd, scratchHapLevel,
+ sample, start, end);
+ double nullHbdLogLike = logLikelihood(fwdNodesA, scratchSingleLevel,
+ sample, start, end);
+ return lod(altHbdLogLike - nullHbdLogLike);
+ }
+
+ /**
+ * Returns the identity-by-descent (IBD) LOD score.
+ * @param sampleA the first sample index
+ * @param sampleB the second sample index
+ * @param start the start marker index (inclusive)
+ * @param end the end marker index (exclusive)
+ * @return the IBD LOD score
+ * @throws IndexOutOfBoundsException if
+ * {@code sampleA < 0 || sampleA >= this.gl().nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sampleB < 0 || sampleB >= this.gl().nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code start < 0 || start > end || end > this.dag.nMarkers()}
+ */
+ public double ibdLod(int sampleA, int sampleB, int start, int end) {
+ checkStartAndEnd(start, end, nMarkers);
+ if (start==end) {
+ return 0.0f;
+ }
+ setInitialNodes(dag, start, fwdNodesA);
+ setInitialNodes(dag, start, fwdNodesB);
+ setInitialNodes(dag, start, fwdNodesIbd);
+
+ double nullLogLike = 0.0;
+ nullLogLike += logLikelihood(fwdNodesA, scratchSingleLevel, sampleA,
+ start, end);
+ nullLogLike += logLikelihood(fwdNodesB, scratchSingleLevel, sampleB,
+ start, end);
+
+ double altLogLike = logLikelihood(fwdNodesIbd, scratchDuoLevel,
+ sampleA, sampleB, start, end);
+ return lod(altLogLike - nullLogLike);
+ }
+
+ private static void checkStartAndEnd(int start, int end, int nMarkers) {
+ if (start<0 || start>end || end>nMarkers) {
+ String s = "start=" + start + " end=" + end + " nMarkers="
+ + nMarkers;
+ throw new IllegalArgumentException(s);
+ }
+ }
+
+ private static void setInitialNodes(Dag dag, int marker, HapNodes nodes) {
+ nodes.clear();
+ int nNodes = dag.nParentNodes(marker);
+ for (int node=0; node<nNodes; ++node) {
+ float p = dag.parentProb(marker, node);
+ nodes.sumUpdate(node, p);
+ }
+ }
+
+ private static void setInitialNodes(Dag dag, int marker, SingleNodes nodes) {
+ nodes.clear();
+ int n = dag.nParentNodes(marker);
+ for (int n1=0; n1<n; ++n1) {
+ float p1 = dag.parentProb(marker, n1);
+ for (int n2=0; n2<n; ++n2) {
+ float p2 = dag.parentProb(marker, n2);
+ nodes.sumUpdate(n1, n2, (p1*p2));
+ }
+ }
+ }
+
+ private static void setInitialNodes(Dag dag, int marker, DuoNodes nodes) {
+ nodes.clear();
+ int n = dag.nParentNodes(marker);
+ for (int n1=0; n1<n; ++n1) {
+ float p1 = dag.parentProb(marker, n1);
+ for (int n2=0; n2<n; ++n2) {
+ float p2 = dag.parentProb(marker, n2);
+ for (int n3=0; n3<n; ++n3) {
+ float p3 = dag.parentProb(marker, n3);
+ nodes.sumUpdate(n1, n2, n3, (p1*p2*p3));
+ }
+ }
+ }
+ }
+
+ private static double logLikelihood(HapNodes nodes,
+ HapBaumLevel level, int sample, int start, int end) {
+ int hap = 2*sample;
+ double sum = 0.0;
+ for (int j=start; j<end; ++j) {
+ level.setForwardValues(nodes, j, hap);
+ sum += Math.log(level.forwardValuesSum());
+ }
+ return sum;
+ }
+
+ private static double logLikelihood(SingleNodes nodes,
+ SingleBaumLevel level, int sample, int start, int end) {
+ double sum = 0.0;
+ for (int j=start; j<end; ++j) {
+ level.setForwardValues(nodes, j, sample);
+ sum += Math.log(level.forwardValuesSum());
+ }
+ return sum;
+ }
+
+ private static double logLikelihood(DuoNodes nodes,
+ DuoBaumLevel ibdLevel, int sampleA, int sampleB,
+ int start, int end) {
+ double sum = 0.0;
+ for (int level=start; level<end; ++level) {
+ ibdLevel.setForwardValues(nodes, level, sampleA, sampleB);
+ sum += Math.log(ibdLevel.forwardValuesSum());
+ }
+ return sum;
+ }
+
+ private static double lod(double logLR) {
+ if (Double.isNaN(logLR)) {
+ return 0.0;
+ }
+ else {
+ return (logLR / log_e_10);
+ }
+ }
+
+ /**
+ * Returns the estimated frequency of the haplotype segment on the LOD
+ * {@code (-Math.log10)} scale.
+ * @param hap a haplotype index
+ * @param start the start marker index (inclusive)
+ * @param end the end marker index (exclusive)
+ * @param hapPairs the list of haplotype pairs
+ * @param dag the directed acyclic graph that determines the HMM
+ * transition probabilities
+ * @return the estimated frequency of the haplotype segment on the LOD
+ * {@code (-Math.log10)} scale
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= haps.nHaps()}
+ * @throws IndexOutOfBoundsException if
+ * {@code start < 0 || start > end || end > dag.nMarkers()}
+ * @throws NullPointerException if {@code dag == null || haps == null}
+ */
+ public static double freqLod(int hap, int start, int end, HapPairs hapPairs,
+ Dag dag) {
+ double minSumProbs = 1e-100;
+ checkStartAndEnd(start, end, dag.nLevels());
+ double sumProbs = 0.0;
+ for (int node=0, n=dag.nParentNodes(start); node<n; ++node) {
+ int lastNode = node;
+ double p = dag.parentProb(start, node);
+ for (int m=start; m<end && p>0.0; ++m) {
+ int allele = hapPairs.allele(m, hap);
+ int e = dag.outEdgeBySymbol(m, lastNode, allele);
+ if (e == -1) {
+ p = 0.0;
+ break;
+ }
+ else {
+ p *= dag.condEdgeProb(m, e);
+ lastNode = dag.childNode(m, e);
+ }
+ }
+ sumProbs += p;
+ }
+ if (sumProbs < minSumProbs) {
+ sumProbs = minSumProbs;
+ }
+ return lod(-Math.log(sumProbs));
+ }
+}
diff --git a/ibd/IbdSegment.java b/ibd/IbdSegment.java
new file mode 100644
index 0000000..87a772c
--- /dev/null
+++ b/ibd/IbdSegment.java
@@ -0,0 +1,247 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package ibd;
+
+import blbutil.Const;
+import blbutil.IntPair;
+import java.text.DecimalFormat;
+import vcf.Marker;
+
+/**
+ * <p>Class {@code IbdSegment} represents a pair of IBD haplotype segments.
+ * </p>
+ * <p>Instances of class {@code IbdSegment} are immutable.
+ * </p>
+ *
+ * @author Brian L Browning {@code <browning at uw.edu>}
+ */
+public final class IbdSegment {
+
+ private static final DecimalFormat df2 = new DecimalFormat("0.00");
+
+ private final IntPair hapPair;
+ private final Marker start; // inclusive
+ private final Marker end; // inclusive
+ private final float score;
+ private final int startIndex; // inclusive; -1 if missing
+ private final int endIndex; // inclusive; -1 if missing
+
+ /**
+ * Constructs an new {@code IbdSegment} instance from the specified data.
+ * @param hapPair an ordered pair of haplotype indices
+ * @param start the starting marker for the IBD segment (inclusive)
+ * @param end the ending marker for the IBD segment (inclusive)
+ * @param score the score for the IBD segment
+ * @param startIndex the starting marker index (inclusive) or -1 if
+ * the starting marker index is unknown
+ * @param endIndex the ending marker index (inclusive) or -1 if
+ * the ending marker index is unknown
+ *
+ * @throws IllegalArgumentException if
+ * {@code hapPair.first() < 0 || hapPair.second() <= hapPair.first()}
+ * @throws IllegalArgumentException if
+ * {@code start.chromIndex() != end.chromIndex() || end.pos() < start.pos()}
+ * @throws IllegalArgumentException if
+ * {@code startIndex < -1 || endIndex < -1}
+ * @throws IllegalArgumentException if {@code Float.isNaN(score) == true}
+ * @throws NullPointerException if
+ * {@code hapPair == null || start == null || end == null}
+ */
+ public IbdSegment(IntPair hapPair, Marker start, Marker end, float score,
+ int startIndex, int endIndex) {
+ checkArguments(hapPair, start, end, score, startIndex, endIndex);
+ this.hapPair = hapPair;
+ this.start = start;
+ this.end = end;
+ this.score = score;
+ this.startIndex = startIndex;
+ this.endIndex = endIndex;
+ }
+
+ private void checkArguments(IntPair hapPair, Marker start, Marker end,
+ float score, int startIndex, int endIndex) {
+ if (hapPair.first()<0 || hapPair.second()<=hapPair.first()) {
+ throw new IllegalArgumentException(hapPair.toString());
+ }
+ if ( (start.chromIndex()!=end.chromIndex())
+ || (end.pos() < start.pos()) ){
+ String s = Const.nl + start + Const.nl + end;
+ throw new IllegalArgumentException(s);
+ }
+ if (Float.isNaN(score)) {
+ throw new IllegalArgumentException(String.valueOf(score));
+ }
+ if (startIndex < -1) {
+ throw new IllegalArgumentException(String.valueOf(startIndex));
+ }
+ if (endIndex < -1) {
+ throw new IllegalArgumentException(String.valueOf(endIndex));
+ }
+ }
+
+ /**
+ * Compares the specified object with this {@code IbdSegment} for
+ * equality. Returns {@code true} if the specified object is an
+ * {@code IbdSegment} instance and if this {@code IbdSegment} is
+ * equal to the specified {@code IbdSegment}, and returns
+ * {@code false} otherwise. Two {@code IbdSegment} instances
+ * are equal if they have equal ordered pairs of haplotype indices,
+ * equal starting and ending markers, and equal scores.
+ *
+ * @param o the reference object with which to compare
+ *
+ * @return {@code true} if this {@code IbdSegment} is
+ * equal to the specified object.
+ */
+ @Override
+ public boolean equals(Object o) {
+ if (this==o) {
+ return true;
+ }
+ if ((o instanceof IbdSegment)==false) {
+ return false;
+ }
+ IbdSegment other = (IbdSegment) o;
+ if (this.hapPair.equals(other.hapPair)==false) {
+ return false;
+ }
+ if (this.start.equals(other.start)==false) {
+ return false;
+ }
+ if (this.end.equals(other.end)==false) {
+ return false;
+ }
+ return Float.floatToIntBits(this.score)
+ == Float.floatToIntBits(other.score);
+ }
+
+ /**
+ * <p>Returns the hash code value for this object. The hash code does not
+ * depend on the values of {@code this.startIndex()} or
+ * {@code this.endIndex()}. The hash code is defined by the following
+ * calculation:
+ * </p>
+ * <pre>
+ * int hash = 5;
+ * hash = 67 * hash + this.hapPair().hashCode();
+ * hash = 67 * hash + this.start().hashCode();
+ * hash = 67 * hash + this.end().hashCode();
+ * hash = 67 * hash + Float.floatToIntBits(this.score());
+ </pre>
+ * @return the hash code value for this object
+ */
+ @Override
+ public int hashCode() {
+ int hash = 5;
+ hash = 67 * hash + this.hapPair.hashCode();
+ hash = 67 * hash + this.start.hashCode();
+ hash = 67 * hash + this.end.hashCode();
+ hash = 67 * hash + Float.floatToIntBits(this.score);
+ return hash;
+ }
+
+ /**
+ * Returns the first haplotype index.
+ * @return the first haplotype index
+ */
+ public int hap1() {
+ return hapPair.first();
+ }
+
+ /**
+ * Returns the second haplotype index.
+ * @return the second haplotype index
+ */
+ public int hap2() {
+ return hapPair.second();
+ }
+
+ /**
+ * Returns the ordered pair of haplotype indices.
+ * @return the ordered pair of haplotype indices
+ */
+ public IntPair hapPair() {
+ return hapPair;
+ }
+
+
+ /**
+ * Returns the starting marker (inclusive).
+ * @return the starting marker (inclusive)
+ */
+ public Marker start() {
+ return start;
+ }
+
+ /**
+ * Returns the ending marker (inclusive).
+ * @return the ending marker (inclusive)
+ */
+ public Marker end() {
+ return end;
+ }
+
+ /**
+ * Returns the IBD segment score.
+ * @return the IBD segment score
+ */
+ public float score() {
+ return score;
+ }
+
+ /**
+ * Returns the starting marker index (inclusive) or -1 if the starting
+ * marker index is unknown.
+ * @return the starting marker index (inclusive)
+ */
+ public int startIndex() {
+ return startIndex;
+ }
+
+ /**
+ * Returns the ending marker index (inclusive) or -1 if the ending
+ * marker index is unknown.
+ * @return the ending marker index (inclusive)
+ */
+ public int endIndex() {
+ return endIndex;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(hapPair.first());
+ sb.append(Const.tab);
+ sb.append(hapPair.second());
+ sb.append(Const.tab);
+ sb.append(start.chrom());
+ sb.append(Const.tab);
+ sb.append(start.pos());
+ sb.append(Const.tab);
+ sb.append(end.pos());
+ sb.append(Const.tab);
+ sb.append(df2.format(score));
+ return sb.toString();
+ }
+}
diff --git a/ibd/IbsHapSegments.java b/ibd/IbsHapSegments.java
new file mode 100644
index 0000000..8579264
--- /dev/null
+++ b/ibd/IbsHapSegments.java
@@ -0,0 +1,353 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package ibd;
+
+import blbutil.IndexMap;
+import blbutil.IntList;
+import blbutil.IntPair;
+import haplotype.HapPairs;
+import haplotype.SampleHapPairs;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.IntStream;
+
+/**
+ * <p>Class {@code IbsHapSegments} identifies IBS haplotype segments in
+ * a list of sample halotype pairs.
+ * </p>
+ * <p>Instances of {@code IbsHapSegments} are immutable.
+ * </p>
+ *
+ * Reference: Gusev A, Lowe JK, Stoffel M, Daly MJ, Altshuler D, Breslow JL,
+ * Friedman JM, Pe'er I (2008) Whole population, genomewide mapping
+ * of hidden relatedness. Genome Research 2009;19(2):318-26.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class IbsHapSegments {
+
+ private static final int INIT_LIST_SIZE=500;
+
+ private final SampleHapPairs haps;
+ private final double[] pos;
+ private final double minLength;
+ private final int[] windowStarts;
+ private final int[][][] idSets;
+
+ private final int DEL = -67;
+
+ /**
+ * Constructs a new {@code IbsHapSegments} object from the specified data.
+ * @param haps the sample haplotype pairs
+ * @param pos an array of non-decreasing marker positions whose {@code j}-th
+ * element is the position of marker {@code haps.marker(j)}
+ * @param minLength the minimum length of a reported IBS segment
+ *
+ * @throws IllegalArgumentException if
+ * {@code haps.nMarkers() != pos.length}
+ * @throws IllegalArgumentException if {@code pos[0] < 0}, or if
+ * {@code pos[j] < pos[j-1]} for any {@code j} satisfing
+ * {@code (0 < j && j < pos.length)}
+ * @throws IllegalArgumentException if
+ * {@code (Double.isNaN(pos[j])==true || Double.isInfinite(pos[j]) == true)}
+ * for any {@code j} satisfying {@code (0 <= j && j < pos.length)}
+ * @throws IllegalArgumentException if {@code minLength <= 0.0f}
+ * @throws NullPointerException if {@code haps == null || pos == null}
+ */
+ public IbsHapSegments(SampleHapPairs haps, double[] pos, double minLength) {
+ checkArguments(haps, pos, minLength);
+ this.haps = haps;
+ this.pos = pos.clone();
+ this.minLength = minLength;
+ this.windowStarts = windowStarts(pos, minLength);
+ this.idSets = idSets(haps, windowStarts);
+ }
+
+ /**
+ * Constructs a new {@code IbsHapSegments} object with marker positions
+ * defined to be marker indices.
+ * @param haps the sample haplotype pairs
+ * @param minMarkers the minimum number of shared markers in a reported
+ * IBS segment
+ * @throws NullPointerException if {@code haps == null}
+ */
+ public IbsHapSegments(SampleHapPairs haps, int minMarkers) {
+ this(haps, pos(haps.nMarkers()), minMarkers);
+ }
+
+ private static double[] pos(int nMarkers) {
+ double[] pos = new double[nMarkers];
+ for (int j=0; j<pos.length; ++j) {
+ pos[j] = j;
+ }
+ return pos;
+ }
+
+ private static void checkArguments(SampleHapPairs haps, double[] pos,
+ double minLength) {
+ if (minLength <= 0.0f) {
+ throw new IllegalArgumentException("minLength: " + minLength);
+ }
+ if (haps.nMarkers()!= pos.length) {
+ throw new IllegalArgumentException("haps.nMarkers()!= pos.length");
+ }
+ if (pos[0]<0 || Double.isNaN(pos[0]) || Double.isInfinite(pos[0]) ) {
+ throw new IllegalArgumentException("pos=" + pos[0]);
+ }
+ for (int j=1; j<pos.length; ++j) {
+ if (Double.isNaN(pos[j]) || Double.isInfinite(pos[j]) ) {
+ throw new IllegalArgumentException("pos=" + pos[j]);
+ }
+ if (pos[j] < pos[j-1]) {
+ String s = "positions are not non-decreasing";
+ throw new IllegalArgumentException(s);
+ }
+ }
+ }
+
+ private static int[] windowStarts(double[] pos, double minIbsLength) {
+ double step = minIbsLength/2.0f;
+ IntList indices = new IntList(pos.length/10);
+ int index = 0;
+ do {
+ indices.add(index);
+ double nextPos = pos[index] + step;
+ index = nextIndex(pos, index, nextPos);
+ } while (index < pos.length);
+ return indices.toArray();
+ }
+
+ private static int nextIndex(double[] pos, int start, double targetPos) {
+ int nextIndex = Arrays.binarySearch(pos, start, pos.length, targetPos);
+ return (nextIndex<0) ? -nextIndex-1 : nextIndex;
+ }
+
+ private static int[][][] idSets(SampleHapPairs haps, int[] starts) {
+ return IntStream.range(0, starts.length)
+ .parallel()
+ .mapToObj(j -> intPair(j, starts, haps.nMarkers()))
+ .map(ip -> hapDictionary(haps, ip))
+ .map(m -> fillIdSet(m, haps.nHaps()))
+ .toArray(int[][][]::new);
+ }
+
+ private static IntPair intPair(int index, int[] starts, int nMarkers) {
+ if (index+1 < starts.length) {
+ return new IntPair(starts[index], starts[index+1]);
+ }
+ else {
+ return new IntPair(starts[index], nMarkers);
+ }
+ }
+
+ private static Map<Haplotype, IntList> hapDictionary(SampleHapPairs haps,
+ IntPair ip) {
+ Map<Haplotype, IntList> map = new HashMap<>();
+ IntStream.range(0, haps.nHaps()) // forEach does not allow parallelization
+ .mapToObj(h -> new Haplotype(haps, h, ip.first(), ip.second()))
+ .forEach((h) -> {
+ IntList list = map.get(h);
+ if (list==null) {
+ list = new IntList(10);
+ map.put(h, list);
+ }
+ list.add(h.hapIndex());
+ });
+ return map;
+ }
+
+ private static int[][] fillIdSet(Map<Haplotype, IntList> hapMap, int nHaps) {
+ int[][] value = new int[nHaps][];
+ for (Haplotype key : hapMap.keySet()) {
+ int[] ia = hapMap.get(key).toArray();
+ for (int i : ia) {
+ value[i] = ia;
+ }
+ }
+ return value;
+ }
+
+ /**
+ * Returns the sample haplotype pairs.
+ * @return the sample haplotype pairs
+ */
+ public SampleHapPairs haps() {
+ return haps;
+ }
+
+ /**
+ * Returns an array of non-decreasing marker positions whose {@code j}-th
+ * element is the position of marker {@code this.haps().marker(j)}.
+ * @return an array of marker positions
+ */
+ public double[] pos() {
+ return pos.clone();
+ }
+
+ /**
+ * Returns the minimum length of an IBS segment.
+ * @return the minimum length of an IBS segment
+ */
+ public double minIbsLength() {
+ return minLength;
+ }
+
+ /**
+ * Returns the list of haplotype segments for other haplotypes that
+ * are IBS with the specified haplotype and have length greater
+ * than or equal to {@code this.minIbsLength()}.
+ *
+ * @param hap the haplotype index
+ * @return a list of IBS haplotype segments
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= this.haps().nHaps()}
+ */
+ public List<HapSegment> find(int hap) {
+ List<HapSegment> segments = new ArrayList<>(INIT_LIST_SIZE);
+ int nil = Integer.MIN_VALUE;
+ IndexMap prev = new IndexMap(haps.nHaps()-1, nil);
+ IndexMap next = new IndexMap(haps.nHaps()-1, nil);
+ int window = 0;
+ matches(idSets, hap, window, prev);
+ while (++window < idSets.length) {
+ matches(idSets, hap, window, next);
+ extend(prev, next);
+ save(haps, hap, prev, windowStarts[window], segments);
+ prev.clear();
+ IndexMap tmp = prev;
+ prev = next;
+ next = tmp;
+ }
+ save(haps, hap, prev, haps.nMarkers(), segments);
+ return segments;
+ }
+
+ /**
+ * Returns a list of haplotype segments for other haplotypes
+ * that are IBS with the specified haplotype and that have length greater
+ * than or equal to {@code this.minIbsLength()}. An IBS segment is
+ * permitted (but not required) to be excluded from the returned
+ * list if both end-points of the IBD segment are interior points of
+ * another IBD segment.
+ *
+ * @param hap the haplotype index
+ * @return a list of IBS haplotype segments
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= this.haps().nHaps()}
+ */
+ public List<HapSegment> filteredFind(int hap) {
+ List<HapSegment> segments = new ArrayList<>(INIT_LIST_SIZE);
+ int nil = Integer.MIN_VALUE;
+ IndexMap prev = new IndexMap(haps.nHaps()-1, nil);
+ IndexMap next = new IndexMap(haps.nHaps()-1, nil);
+ int window = 0;
+ matches(idSets, hap, window, prev);
+ while (++window < idSets.length) {
+ matches(idSets, hap, window, next);
+ int minExtendedStartWindow = extend(prev, next);
+ filteredSave(haps, hap, prev, minExtendedStartWindow,
+ windowStarts[window], segments);
+ prev.clear();
+ IndexMap tmp = prev;
+ prev = next;
+ next = tmp;
+ }
+ filteredSave(haps, hap, prev, window, haps.nMarkers(), segments);
+ return segments;
+ }
+
+ private void matches(int[][][] idSets, int hap, int window, IndexMap map) {
+ assert map.size()==0;
+ int[] hapIbsSet = idSets[window][hap];
+ for (int h : hapIbsSet) {
+ if (h!=hap) {
+ map.put(h, window);
+ }
+ }
+ }
+
+ /* Returns minimum start window index from extended segments */
+ private int extend(IndexMap prev, IndexMap next) {
+ int nil = next.nil();
+ int minStart = Integer.MAX_VALUE;
+ for (int i=0, n=next.size(); i<n; ++i) {
+ int hap = next.enumeratedKey(i);
+ int prevStart = prev.get(hap);
+ if (prevStart != nil) {
+ next.put(hap, prevStart);
+ prev.put(hap, DEL);
+ if (prevStart < minStart) {
+ minStart = prevStart;
+ }
+ }
+ }
+ return minStart;
+ }
+
+ private void save(HapPairs haps, int hap1,
+ IndexMap prev, int prevExclEnd, List<HapSegment> segments) {
+ for (int i=0, n=prev.size(); i<n; ++i) {
+ int hap2 = prev.enumeratedKey(i);
+ int startWindow = prev.enumeratedValue(i);
+ if (startWindow != DEL) {
+ int start = start(haps, hap1, hap2, windowStarts[startWindow]);
+ int inclEnd = inclusiveEnd(haps, hap1, hap2, prevExclEnd);
+ if ( (pos[inclEnd] - pos[start]) >= minLength) {
+ segments.add( new HapSegment(hap2, start, inclEnd) );
+ }
+ }
+ }
+ }
+
+ private void filteredSave(HapPairs haps, int hap1, IndexMap prev,
+ int minExtendedStartWindow, int prevExclEnd, List<HapSegment> segments) {
+ for (int i=0, n=prev.size(); i<n; ++i) {
+ int hap2 = prev.enumeratedKey(i);
+ int startWindow = prev.enumeratedValue(i);
+ if (startWindow != DEL && startWindow <= minExtendedStartWindow) {
+ int start = start(haps, hap1, hap2, windowStarts[startWindow]);
+ int inclEnd = inclusiveEnd(haps, hap1, hap2, prevExclEnd);
+ if ( (pos[inclEnd] - pos[start]) >= minLength) {
+ segments.add( new HapSegment(hap2, start, inclEnd) );
+ }
+ }
+ }
+ }
+
+ private int start(HapPairs haps, int hap1, int hap2, int start) {
+ while (start>0
+ && haps.allele(start-1, hap1)==haps.allele(start-1, hap2)) {
+ --start;
+ }
+ return start;
+ }
+
+ private int inclusiveEnd(HapPairs haps, int hap1, int hap2, int end) {
+ while (end<haps.nMarkers()
+ && haps.allele(end, hap1)==haps.allele(end, hap2)) {
+ ++end;
+ }
+ return end-1;
+ }
+}
diff --git a/main/AlleleProbs.java b/main/AlleleProbs.java
new file mode 100644
index 0000000..1baf707
--- /dev/null
+++ b/main/AlleleProbs.java
@@ -0,0 +1,162 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import beagleutil.Samples;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Interface {@code AlleleProbs} represents per-haplotype allele
+ * probabilities for a list of samples.
+ * </p>
+ * <p>All instances of {@code AlleleProbs} are required to be immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface AlleleProbs {
+
+ /**
+ * Returns the probability that the specified marker allele is
+ * present on the first haplotype of the specified sample.
+ *
+ * @param marker a marker index
+ * @param sample a sample index
+ * @param allele an allele index
+ * @return the probability that the specified marker allele is
+ * present on the first haplotype of the specified sample
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele < 0 || allele >= this.marker(marker).nAlleles()}
+ */
+ float alProb1(int marker, int sample, int allele);
+
+ /**
+ * Returns the probability that the specified marker allele is
+ * present on the second haplotype of the specified sample.
+ *
+ * @param marker a marker index
+ * @param sample a sample index
+ * @param allele an allele index
+ * @return the probability that the specified marker allele is
+ * present on the second haplotype of the specified sample.
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele < 0 || allele >= this.marker(marker).nAlleles()}
+ */
+ float alProb2(int marker, int sample, int allele);
+
+ /**
+ * Returns the phased genotype probability, equal to
+ * {@code (this.allele1(marker, sample, allele1)
+ * * this.allele2(marker, sample, allele2))}.
+ *
+ * @param marker a marker index
+ * @param sample a sample index
+ * @param allele1 allele index of the allele on the first haplotype
+ * @param allele2 allele index of the allele on the second haplotype
+ * @return the phased genotype probability equal to
+ * {@code (this.allele1(marker, sample, allele1)
+ * * this.allele2(marker, sample, allele2))}
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele1 < 0 || allele1 >= this.marker(marker).nAlleles()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele2 < 0 || allele2 >= this.marker(marker).nAlleles()}
+ */
+ float gtProb(int marker, int sample, int allele1, int allele2);
+
+ /**
+ * Returns the marker allele with maximum probability for the
+ * first haplotype of the specified sample. If more than one allele
+ * has maximum probability, one of the alleles with maximum
+ * probability will be returned.
+ * @param marker a marker index
+ * @param sample a sample index
+ * @return the marker allele with maximum probability for the
+ * first haplotype of the specified sample
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ */
+ int allele1(int marker, int sample);
+
+ /**
+ * Returns the marker allele with maximum probability for the
+ * second haplotype of the specified sample. If more than one allele
+ * has maximum probability, one of the alleles with maximum
+ * probability will be returned.
+ * @param marker a marker index
+ * @param sample a sample index
+ * @return the marker allele with maximum probability for the
+ * second haplotype of the specified sample
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ */
+ int allele2(int marker, int sample);
+
+ /**
+ * Returns the specified marker.
+ * @param marker a marker index
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ Marker marker(int marker);
+
+ /**
+ * Returns the list of markers.
+ * @return the list of markers
+ */
+ Markers markers();
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ int nMarkers();
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ int nSamples();
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ Samples samples();
+}
diff --git a/main/BasicAlleleProbs.java b/main/BasicAlleleProbs.java
new file mode 100644
index 0000000..1f9eb02
--- /dev/null
+++ b/main/BasicAlleleProbs.java
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import vcf.Markers;
+import vcf.Marker;
+import beagleutil.Samples;
+import java.util.Arrays;
+import java.util.Comparator;
+
+/**
+ * <p>Class {@code BasicAlleleProbs} stores per-haplotype allele probabilities
+ * for a list of samples.
+ * </p>
+ * <p>Instances of class {@code BasicAlleleProbs} are immutable.
+ * </p>
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class BasicAlleleProbs implements AlleleProbs {
+
+ private final Markers markers;
+ private final Samples samples;
+ private final HapAlleleProbs[] alleleProbs;
+
+ /**
+ * Construct a new {@code BasicAlleleProbs} instance from the specified
+ * data.
+ * @param alProbs allele probabilities for each haplotype
+ * @throws IllegalArgumentException if
+ * {@code alProbs[j].markers().equals(alProbs[k].markers) == false}
+ * for any {@code j, k} satisfying
+ * {@code (0 <= j && j < k && k < alProbs.length)}
+ * @throws IllegalArgumentException if
+ * {@code alProbs[j].samples().equals(alProbs[k].samples) == false}
+ * for any {@code j, k} satisfying
+ * {@code (0 <= j && j < k && k < alProbs.length)}
+ * @throws IllegalArgumentException if
+ * {@code alProbs.length == 0 || alProbs.length != alProbs[0].nMarkers()}
+ * @throws NullPointerException if
+ * {@code alProbs == null || alProbs[j] == null} for any {@code j} satisfying
+ * {@code (0 <= j && j < alProbs.length)}
+ */
+ public BasicAlleleProbs(HapAlleleProbs[] alProbs) {
+ if (alProbs.length==0) {
+ throw new IllegalArgumentException("alProbs.length==0");
+ }
+ this.alleleProbs = alProbs.clone();
+ Arrays.sort(alleleProbs, comparator());
+ this.markers = alleleProbs[0].markers();
+ this.samples = alleleProbs[0].samples();
+ for (int j=1; j<alleleProbs.length; ++j) {
+ if (markers.equals(alleleProbs[j].markers())==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ if (samples.equals(alleleProbs[j].samples())==false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ }
+ }
+
+ private static Comparator<HapAlleleProbs> comparator() {
+ return (HapAlleleProbs t, HapAlleleProbs t1) -> {
+ if (t.hapIndex() != t1.hapIndex()) {
+ return t.hapIndex() < t1.hapIndex() ? -1 : 1;
+ }
+ else {
+ return 0;
+ }
+ } ;
+ }
+
+ @Override
+ public float alProb1(int marker, int sample, int allele) {
+ assert alleleProbs[2*sample].hapIndex()/2 == sample;
+ return alleleProbs[2*sample].allele(marker, allele);
+ }
+
+ @Override
+ public float alProb2(int marker, int sample, int allele) {
+ assert alleleProbs[2*sample + 1].hapIndex()/2 == sample;
+ return alleleProbs[2*sample + 1].allele(marker, allele);
+ }
+
+ @Override
+ public float gtProb(int marker, int sample, int allele1, int allele2) {
+ return alProb1(marker, sample, allele1)*alProb2(marker, sample, allele2);
+ }
+
+ @Override
+ public int allele1(int marker, int sample) {
+ return alleleProbs[2*sample].alleleWithMaxProb(marker);
+ }
+
+ @Override
+ public int allele2(int marker, int sample) {
+ return alleleProbs[2*sample + 1].alleleWithMaxProb(marker);
+ }
+
+ @Override
+ public int nMarkers() {
+ return markers.nMarkers();
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return markers.marker(marker);
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+}
diff --git a/main/BasicGenotypeValues.java b/main/BasicGenotypeValues.java
new file mode 100644
index 0000000..3dee1ed
--- /dev/null
+++ b/main/BasicGenotypeValues.java
@@ -0,0 +1,116 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import vcf.Markers;
+import vcf.Marker;
+import beagleutil.Samples;
+import java.util.concurrent.atomic.AtomicReferenceArray;
+
+/**
+ * <p>Class {@code BasicGenotypeValues} stores values for each possible
+ * genotype for each sample at each marker.
+ * </p>
+ * <p>Instances of class {@code BasicGenotypeValues} are thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BasicGenotypeValues implements GenotypeValues {
+
+ private final Markers markers;
+ private final Samples samples;
+
+ /*
+ * Class {@code SampleGenotypeValues} is thread-safe.
+ */
+ private final AtomicReferenceArray<SampleGenotypeValues> values;
+
+ /**
+ * Constructs a new {@code BasicGenotypeValues} instance with initial
+ * value 0 for each possible genotype for each sample at each marker.
+ * @param markers a list of markers
+ * @param samples a list of samples
+ * @throws NullPointerException if
+ * {@code markers == null || samples == null}
+ */
+ public BasicGenotypeValues(Markers markers, Samples samples) {
+ if (markers==null) {
+ throw new NullPointerException("markers");
+ }
+ if (samples==null) {
+ throw new NullPointerException("samples");
+ }
+ this.markers = markers;
+ this.samples = samples;
+ this.values = new AtomicReferenceArray<>(samples.nSamples());
+ for (int j=0, n=samples.nSamples(); j<n; ++j) {
+ this.values.set(j, new SampleGenotypeValues(markers, samples, j));
+ }
+ }
+
+ @Override
+ public float value(int marker, int sample, int genotype) {
+ return this.values.get(sample).value(marker, genotype);
+ }
+
+ @Override
+ public void add(int sample, double[] values) {
+ this.values.get(sample).add(values);
+ }
+
+ @Override
+ public void add(int marker, int sample, int genotype, double value) {
+ this.values.get(sample).add(marker, genotype, value);
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return markers.marker(marker);
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public int nMarkers() {
+ return markers.nMarkers();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(100);
+ sb.append('[');
+ sb.append(this.getClass().toString());
+ sb.append(']');
+ return sb.toString();
+ }
+}
diff --git a/main/ConstrainedAlleleProbs.java b/main/ConstrainedAlleleProbs.java
new file mode 100644
index 0000000..cfa050e
--- /dev/null
+++ b/main/ConstrainedAlleleProbs.java
@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import vcf.Markers;
+import vcf.Marker;
+import beagleutil.Samples;
+import haplotype.SampleHapPairs;
+
+/**
+ * <p>Class {@code ConstrainedAlleleProbs} is a wrapper for an
+ * {@code AlleleProbs} instance that changes the wrapped haplotype allele
+ * probabilities for a subset of markers.
+ * </p>
+ * <p>Instances of class {@code ConstrainedAlleleProbs} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class ConstrainedAlleleProbs implements AlleleProbs {
+
+ private final AlleleProbs alProbs;
+ private final SampleHapPairs shp;
+ private final int[] indexMap;
+
+ /**
+ * Construct a new {@code ConstrainedAlleleProbs} instance. The alleles
+ * in the specified {@code SampleHapPairs} object will
+ * have probability 1 in the new {@code ConstrainedAlleleProbs} object.
+ * All other allele probabilities in the constructed object
+ * are determined by the wrapped {@code AlleleProbs} object.
+ * @param shp phased haplotype pairs for a subset of markers
+ * @param alProbs the allele probabilities
+ * @param indexMap an array of length {@code alProbs.nMarkers()}
+ * whose {@code j}-th element is the index of marker
+ * {@code alProbs.marker(j)} in {@code shp.markers()}, or is -1 if the
+ * marker is not present in {@code shp.markers()}
+ *
+ * @throws IllegalArgumentException if
+ * {@code alProbs.nMarkers() != indexMap.length}
+ * @throws IllegalArgumentException if
+ * {@code (indexMap[j] != -1
+ * && alProbs.marker(j).equals(shp.marker(indexMap[j])) == false)}
+ * for any {@code j} satisfying {@code (0 <= j && j < indexMap.length)}
+ * @throws NullPointerException if
+ * {@code shp == null || alProbs == null || indexMap == null}
+ */
+ public ConstrainedAlleleProbs(SampleHapPairs shp, AlleleProbs alProbs,
+ int[] indexMap) {
+ if (alProbs.nMarkers() != indexMap.length) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ for (int j=0; j<indexMap.length; ++j) {
+ if (indexMap[j] != -1) {
+ if (alProbs.marker(j).equals(shp.marker(indexMap[j]))==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ }
+ }
+ if (shp.samples().equals(alProbs.samples())==false) {
+ throw new IllegalArgumentException("inconsistent sample");
+ }
+ this.shp = shp;
+ this.alProbs = alProbs;
+ this.indexMap = indexMap.clone();
+ }
+
+ @Override
+ public float alProb1(int marker, int sample, int allele) {
+ int glMarker = indexMap[marker];
+ if (glMarker == -1) {
+ return alProbs.alProb1(marker, sample, allele);
+ }
+ else {
+ return shp.allele1(glMarker, sample) == allele ? 1f : 0f;
+ }
+ }
+
+ @Override
+ public float alProb2(int marker, int sample, int allele) {
+ int glMarker = indexMap[marker];
+ if (glMarker == -1) {
+ return alProbs.alProb2(marker, sample, allele);
+ }
+ else {
+ return shp.allele2(glMarker, sample) == allele ? 1f : 0f;
+ }
+ }
+
+ @Override
+ public float gtProb(int marker, int sample, int allele1, int allele2) {
+ return alProb1(marker, sample, allele1)*alProb2(marker, sample, allele2);
+ }
+
+ @Override
+ public int allele1(int marker, int sample) {
+ int glMarker = indexMap[marker];
+ if (glMarker == -1) {
+ return alProbs.allele1(marker, sample);
+ }
+ else {
+ return shp.allele1(glMarker, sample);
+ }
+ }
+
+ @Override
+ public int allele2(int marker, int sample) {
+ int glMarker = indexMap[marker];
+ if (glMarker == -1) {
+ return alProbs.allele2(marker, sample);
+ }
+ else {
+ return shp.allele2(glMarker, sample);
+ }
+ }
+
+ @Override
+ public int nMarkers() {
+ return alProbs.nMarkers();
+ }
+
+ @Override
+ public Markers markers() {
+ return alProbs.markers();
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return alProbs.marker(marker);
+ }
+
+ @Override
+ public int nSamples() {
+ return alProbs.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return alProbs.samples();
+ }
+}
diff --git a/main/CurrentData.java b/main/CurrentData.java
new file mode 100644
index 0000000..d6fbf3f
--- /dev/null
+++ b/main/CurrentData.java
@@ -0,0 +1,484 @@
+/*
+ * Copyright (C) 2015 browning
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package main;
+
+import beagleutil.Samples;
+import haplotype.BasicSampleHapPairs;
+import haplotype.HapPair;
+import haplotype.SampleHapPairs;
+import haplotype.Weights;
+import java.util.List;
+import vcf.Data;
+import vcf.GL;
+import vcf.Markers;
+import vcf.SplicedGL;
+
+/**
+ * <p>Class {@code CurrentData} represents input data for the current marker
+ * window. All marker indices returned my methods of class {@code CurrentData}
+ * are indexed with respect to the current marker window.
+ * </p>
+ * <p>Instances of class {@code CurrentData} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class CurrentData {
+
+ private static final float MIN_GEN_DIST = 1e-7f;
+
+ private final int window;
+ private final SampleHapPairs initHaps;
+ private final int prevSplice;
+ private final int nextOverlap;
+ private final int nextSplice;
+ private final int nextTargetSplice;
+ private final int nextTargetOverlap;
+
+ private final GL targetGL;
+ private final NuclearFamilies families;
+ private final Weights weights;
+
+ private final Samples refSamples;
+ private final Samples targetSamples;
+ private final Samples allSamples;
+
+ private final Markers markers;
+ private final Markers targetMarkers;
+ private final int[] targetMarkerIndex;
+ private final int[] markerIndex;
+
+ private final List<HapPair> restRefHapPairs;
+ private final SampleHapPairs refSampleHapPairs;
+ private final SampleHapPairs restrictedRefSampleHapPairs;
+
+ private final float[] recombRate;
+
+ /**
+ * Constructs a new {@code CurrentData} instance from the specified
+ * data.
+ *
+ * @param par the analysis parameters
+ * @param genMap the genetic map or {@code null} if no
+ * genetic map is specified
+ * @param data input data for the current marker window
+ * @param overlapHaps haplotype constraints in the overlap with previous
+ * window or {@code null} if no such constraints exist
+ * @param families the parent-offspring relationships
+ *
+ * @throws IllegalArgumentException if
+ * {@code data.targetSamples().equals(families.samples()) == false}
+ * @throws IllegalArgumentException if
+ * {@code (overlapHaps != null
+ * && data.targetSamples().equals(overlapHaps.samples()) == false)}
+ * @throws IllegalArgumentException if
+ * {@code (overlapHaps != null &&
+ * overlapHaps.marker(j).equals(data.targetGL().marker(j) == false)}
+ * for some {@code j} satisfying
+ * {@code (0 <= j && j <= overlapHaps.nMarkers())}
+ * @throws NullPointerException if any parameter is {@code null}
+ */
+ public CurrentData(Par par, GeneticMap genMap, Data data,
+ SampleHapPairs overlapHaps, NuclearFamilies families) {
+ if (families.samples().equals(data.targetSamples())==false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ if (overlapHaps != null
+ && data.targetSamples().equals(overlapHaps.samples())==false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ this.window = data.window();
+ this.initHaps = overlapHaps;
+ this.prevSplice = data.overlap()/2;
+ this.nextOverlap = nextOverlap(data, par.overlap());
+ this.nextSplice = nextSplice(data, par.overlap());
+ this.nextTargetOverlap = targetIndex(data, nextOverlap);
+ this.nextTargetSplice = targetIndex(data, nextSplice);
+
+ this.families = families;
+ this.weights = new Weights(families);
+ this.targetGL = (overlapHaps==null) ? data.targetGL() :
+ new SplicedGL(overlapHaps, data.targetGL());
+
+ this.refSamples = data.refSamples();
+ this.targetSamples = data.targetSamples();
+ this.allSamples = data.allSamples();
+ this.markers = data.markers();
+ this.targetMarkers = data.targetMarkers();
+ this.targetMarkerIndex = refToTargetMarker(data);
+ this.markerIndex = targetToRefMarker(data);
+
+ this.restRefHapPairs = data.restrictedRefHapPairs();
+ this.refSampleHapPairs = data.refSampleHapPairs();
+ this.restrictedRefSampleHapPairs = refSamples != null ?
+ new BasicSampleHapPairs(refSamples, restRefHapPairs) : null;
+ this.recombRate = recombRate(targetMarkers, genMap, par.mapscale());
+ }
+
+ /* returns the first index in the next overlap */
+ private static int nextOverlap(Data data, int overlap) {
+ if (data.canAdvanceWindow() && data.lastWindowOnChrom()==false) {
+ return data.nMarkers() - overlap;
+ }
+ else {
+ return data.nMarkers();
+ }
+ }
+
+ /* returns the first index after the next splice point */
+ private static int nextSplice(Data data, int overlap) {
+ if (data.canAdvanceWindow() && data.lastWindowOnChrom()==false) {
+ return data.nMarkers() - overlap + (overlap/2);
+ }
+ else {
+ return data.nMarkers();
+ }
+ }
+
+ /* first target index on or after specified ref index */
+ private static int targetIndex(Data data, int refIndex) {
+ int i=0;
+ while (i<data.nTargetMarkers() && data.markerIndex(i)<refIndex) {
+ ++i;
+ }
+ return i;
+ }
+
+ private static float[] recombRate(Markers markers, GeneticMap map,
+ float mapScale) {
+ if (map==null) {
+ return null;
+ }
+ else {
+ double c = -2.0*mapScale;
+ float[] rr = new float[markers.nMarkers()];
+ rr[0] = 0.0f;
+ double lastGenPos = map.genPos(markers.marker(0));
+ for (int j=1; j<rr.length; ++j) {
+ double genPos = map.genPos(markers.marker(j));
+ double genDist = Math.max(Math.abs(genPos - lastGenPos), MIN_GEN_DIST);
+ rr[j] = (float) -Math.expm1(c*genDist);
+ lastGenPos = genPos;
+ }
+ return rr;
+ }
+ }
+
+ /**
+ * Returns the marker window index.
+ * @return the marker window index
+ */
+ public int window() {
+ return window;
+ }
+
+ /**
+ * Returns the first marker index after the splice point with
+ * the previous marker window. Returns 0 if the current marker window
+ * is the first marker window.
+ * @return the first marker index after the splice point with
+ * the previous marker window
+ */
+ public int prevSplice() {
+ return prevSplice;
+ }
+
+ /**
+ * Returns the first marker index in the overlap between this
+ * marker window and the next marker window, or
+ * returns {@code this.nMarkers()} there is no overlap.
+ * @return the first marker index in the overlap between this
+ * marker window and the next marker window
+ */
+ public int nextOverlap() {
+ return nextOverlap;
+ }
+
+ /**
+ * Returns the first marker index after the splice point between this
+ * marker window and the next marker window, or returns
+ * {@code this.nMarkers()} if there is no overlap or if there are
+ * no markers after the splice point.
+ * @return the first marker index after the next splice point
+ */
+ public int nextSplice() {
+ return nextSplice;
+ }
+
+ /**
+ * Returns the first target marker index after the splice point with
+ * the previous marker window. Returns 0 if the current marker window
+ * is the first marker window.
+ * @return the first target marker index after the splice point with
+ * the previous marker window
+ */
+ public int prevTargetSplice() {
+ return initHaps==null ? 0 : initHaps.nMarkers();
+ }
+
+ /**
+ * Returns the first target marker index in the overlap between this
+ * marker window and the next marker window, or
+ * returns {@code this.nMarkers()} if there is no overlap or if there are
+ * no target markers in the overlap.
+ * @return the first target marker index in the overlap between this
+ * marker window and the next marker window
+ */
+ public int nextTargetOverlap() {
+ return nextTargetOverlap;
+ }
+
+ /**
+ * Returns the first target marker index after the splice point between this
+ * marker window and the next marker window, or returns
+ * {@code this.nTargetMarkers()} if there is no overlap or if there are
+ * no target markers after the splice point
+ * @return the first target marker index after the next splice point
+ */
+ public int nextTargetSplice() {
+ return nextTargetSplice;
+ }
+
+ /**
+ * Returns the target data haplotype pairs in the segment of the current
+ * marker window preceding the splice point with the previous marker window:
+ * {@code this.targetMarkers().restrict(0, this.prevTargetSplice())}
+ * @return the target data haplotype pairs in the segment of the current
+ * marker window preceding the splice point with the previous marker window
+ */
+ public SampleHapPairs initHaps() {
+ return initHaps;
+ }
+
+ private int[] refToTargetMarker(Data data) {
+ int[] ia = new int[data.nMarkers()];
+ for (int j=0; j<ia.length; ++j) {
+ ia[j] = data.targetMarkerIndex(j);
+ }
+ return ia;
+ }
+
+ private static int[] targetToRefMarker(Data data) {
+ int[] ia = new int[data.nTargetMarkers()];
+ for (int j=0; j<ia.length; ++j) {
+ ia[j] = data.markerIndex(j);
+ }
+ return ia;
+ }
+
+ /**
+ * Returns the parent-offspring relationships.
+ * @return the parent-offspring relationships
+ */
+ public NuclearFamilies families() {
+ return families;
+ }
+
+ /**
+ * Returns the per-haplotype weights.
+ * @return the per-haplotype weights
+ */
+ public Weights weights() {
+ return weights;
+ }
+
+ /**
+ * Returns the number of reference samples.
+ * @return the number of reference samples
+ */
+ public int nRefSamples() {
+ return refSamples == null ? 0 : refSamples.nSamples();
+ }
+
+ /**
+ * Returns the list of reference samples, or {@code null} if
+ * there are no reference samples.
+ * @return the list of reference samples, or {@code null} if
+ * there are no reference samples
+ */
+ public Samples refSamples() {
+ return refSamples;
+ }
+
+ /**
+ * Returns the number of target samples.
+ * @return the number of target samples
+ */
+ public int nTargetSamples() {
+ return targetSamples.nSamples();
+ }
+
+ /**
+ * Returns the list of target samples.
+ * @return the list of target samples
+ */
+ public Samples targetSamples() {
+ return targetSamples;
+ }
+
+
+ /**
+ * Returns the number of reference and target samples.
+ * @return the number of reference and target samples
+ */
+ public int nAllSamples() {
+ return allSamples.nSamples();
+ }
+
+ /**
+ * Returns a list of all target and reference samples.
+ * Target samples are listed first in the same order as the list returned
+ * by {@code this.targetSamples()}. Reference samples are listed last
+ * in the same order as the list returned by {@code this.refSamples()}.
+ * @return a list of all target and reference samples
+ */
+ public Samples allSamples() {
+ return allSamples;
+ }
+
+ /**
+ * Returns the number of target data markers.
+ * @return the number of target data markers
+ */
+ public int nTargetMarkers() {
+ return targetMarkers.nMarkers();
+ }
+
+ /**
+ * Returns the list of target data markers.
+ * @return the list of target data markers
+ */
+ public Markers targetMarkers() {
+ return targetMarkers;
+ }
+
+ /**
+ * Returns the number of reference data markers.
+ * @return the number of reference data markers
+ */
+ public int nMarkers() {
+ return markers.nMarkers();
+ }
+
+ /**
+ * Returns the list of reference data markers.
+ * @return the list of reference data markers
+ */
+ public Markers markers() {
+ return markers;
+ }
+
+ /**
+ * Returns the index of the specified marker in the reference data markers.
+ * @param targetMarker index of a marker in the list of target data markers
+ * @return the index of the specified marker in the reference data markers
+ * @throws IndexOutOfBoundsException if
+ * {@code targetMarker < 0 || targetMarker >= this.nTargetMarkers()}
+ */
+ public int markerIndex(int targetMarker) {
+ return markerIndex[targetMarker];
+ }
+
+ /**
+ * Returns an array of length {@code this.nTargetMarkers()} which maps
+ * the {@code k}-th marker in the list of target data markers to the
+ * index of the marker in the list of reference data markers.
+ * @return an array of length {@code this.nTargetMarkers()} which maps
+ * the {@code k}-th marker in the list of target data markers to the
+ * index of the marker in the list of reference data markers
+ */
+ public int[] markerIndices() {
+ return markerIndex.clone();
+ }
+
+ /**
+ * Returns the index of the specified marker in the target data, or
+ * returns -1 if the marker is not present in the target data.
+ * @param marker index of a marker in the reference data
+ * @return the index of the specified marker in the target data, or
+ * returns -1 if the marker is not present in the target data
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}.
+ */
+ public int targetMarkerIndex(int marker) {
+ return targetMarkerIndex[marker];
+ }
+
+ /**
+ * Returns an array of length {@code this.nMarkers()} whose {@code k}-th
+ * element is the index of the {@code k}-th marker in the list of target
+ * markers or is -1 if the marker is not present in the target data.
+ * @return an array of length {@code this.nMarkers()} whose {@code k}-th
+ * element is the index of the {@code k}-th marker in the list of target
+ * markers or is -1 if the marker is not present in the target data
+ */
+ public int[] targetMarkerIndices() {
+ return targetMarkerIndex.clone();
+ }
+
+ /**
+ * Add the reference haplotype pairs that are restricted
+ * to the target data markers to the specified list.
+ * @param list a list of haplotype pairs for target data markers
+ * @throws NullPointerException if {@code list == null}
+ */
+ public void addRestrictedRefHapPairs(List<HapPair> list) {
+ list.addAll(restRefHapPairs);
+ }
+
+ /**
+ * Returns a list of reference haplotype pairs that are restricted
+ * to the target data markers, or returns {@code null}
+ * if there are no reference samples.
+ * @return a list of reference haplotype pairs that are restricted
+ * to the target data markers
+ */
+ public SampleHapPairs restrictedRefSampleHapPairs() {
+ return restrictedRefSampleHapPairs;
+ }
+
+ /**
+ * Returns a list of reference haplotype pairs, or returns {@code null}
+ * if there are no reference samples.
+ * @return a list of reference haplotype pairs
+ */
+ public SampleHapPairs refSampleHapPairs() {
+ return refSampleHapPairs;
+ }
+
+ /**
+ * Returns the genotype likelihoods for the
+ * target samples at the target data markers.
+ * @return the genotype likelihoods for the
+ * target samples at the target data markers.
+ */
+ public GL targetGL() {
+ return targetGL;
+ }
+
+ /**
+ * Returns an array whose initial element is {@code 0} and whose
+ * {@code j}-th element for {@code j > 0} is the recombination rate
+ * between the target markers with indices {@code (j - 1)} and {@code j}.
+ *
+ * @return inter-marker recombination rates for the target markers
+ */
+ public float[] recombRate() {
+ return recombRate==null ? null : recombRate.clone();
+ }
+}
diff --git a/main/GeneticMap.java b/main/GeneticMap.java
new file mode 100644
index 0000000..f86453e
--- /dev/null
+++ b/main/GeneticMap.java
@@ -0,0 +1,142 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Interface {@code GeneticMap} represents a genetic map for one or more
+ * chromosomes.
+ * </p>
+ * <p>Instances of class {@code GeneticMap} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface GeneticMap {
+
+ /**
+ * Returns the base position corresponding to the specified genetic map
+ * position. If the genetic position is not a map position then the base
+ * position is estimated from the nearest genetic map positions using
+ * linear interpolation.
+ *
+ * @param chrom the chromosome index
+ * @param geneticPosition the genetic position on the chromosome
+ * @return the base position corresponding to the specified genetic map
+ * position
+ * @throws IllegalArgumentException if the calculated base position
+ * exceeds {@code Integer.MAX_VALUE}
+ * @throws IllegalArgumentException if this genetic map has no
+ * map positions for the specified chromosome
+ * @throws IndexOutOfBoundsException if
+ * {@code chrom < 0 || chrom >= ChromIds.instance().size()}
+ */
+ int basePos(int chrom, double geneticPosition);
+
+ /**
+ * Returns the genetic map position of the specified marker. The
+ * genetic map position is estimated using linear interpolation.
+ *
+ * @param marker a genetic marker
+ * @return the genetic map position of the specified marker
+ * @throws IllegalArgumentException if this genetic map has no
+ * map positions for the specified chromosome
+ * @throws NullPointerException if {@code marker == null}
+ */
+ double genPos(Marker marker);
+
+ /**
+ * Returns the genetic map position of the specified genome coordinate.
+ * The genetic map position is estimated using linear interpolation.
+ *
+ * @param chrom the chromosome index
+ * @param basePosition the base coordinate on the chromosome
+ * @return the genetic map position of the specified genome coordinate
+ * @throws IllegalArgumentException if this genetic map has no
+ * map positions for the specified chromosome
+ * @throws IndexOutOfBoundsException if
+ * {@code chrom < 0 || chrom >= ChromIds.instance().size()}
+ */
+ double genPos(int chrom, int basePosition);
+
+ /**
+ * Returns a string representation of this genetic map. The exact details
+ * of the representation are unspecified and subject to change.
+ *
+ * @return a string representation of this genetic map
+ */
+ @Override
+ String toString();
+
+ /**
+ * Returns the an array of length {@code hapPairs.nMarkers()} whose
+ * whose {@code j}-th element is the genetic map position
+ * of the {@code j}-th marker.
+ * @param markers the list of markers
+ * @return an array of genetic map positions
+ * @throws NullPointerException if {@code markers == null}
+ */
+ default double[] genPos(Markers markers) {
+ double[] genPos = new double[markers.nMarkers()];
+ for (int j=0; j<genPos.length; ++j) {
+ genPos[j] = this.genPos(markers.marker(j));
+ }
+ return genPos;
+ }
+
+ /**
+ * Returns the an array of length {@code hapPairs.nMarkers()} whose
+ * whose {@code j}-th element for {@code j > 0} is the
+ * probability of recombination between marker {@code j - 1}
+ * and marker {@code j}, and whose initial element is {@code 0}.
+ * Any inter-marker genetic distances less than {@code 1e-7} cM are
+ * increased to {@code 1e-7} cM.
+ * @param markers the list of markers
+ * @param nHaps the number of haplotypes in the sample
+ * @param ne the effective population size
+ * @return an array of inter-marker recombination probabilities
+ * @throws IllegalArgumentException if {@code nHaps < 1}
+ * @throws IllegalArgumentException if {@code ne < 1f}
+ * @throws NullPointerException if {@code markers == null}
+ */
+ default float[] pRecomb(Markers markers, int nHaps, float ne) {
+ if (nHaps < 1) {
+ throw new IllegalArgumentException(String.valueOf(nHaps));
+ }
+ if (ne < 1f) {
+ throw new IllegalArgumentException(String.valueOf(ne));
+ }
+ double MIN_CM_DIST = 1e-7;
+ int chrom = markers.marker(0).chromIndex();
+ float[] pRecomb = new float[markers.nMarkers()];
+ double c = -(0.04*ne/nHaps); // 0.04 = 4/(100 cM/M)
+ double lastGenPos = this.genPos(chrom, markers.marker(0).pos());
+ pRecomb[0] = 0f;
+ for (int j=1; j<pRecomb.length; ++j) {
+ double genPos = this.genPos(markers.marker(j));
+ double genDist = Math.max(Math.abs(genPos - lastGenPos), MIN_CM_DIST);
+ pRecomb[j] = (float) -Math.expm1(c*genDist);
+ lastGenPos = genPos;
+ }
+ return pRecomb;
+ }
+
+}
diff --git a/main/GenotypeValues.java b/main/GenotypeValues.java
new file mode 100644
index 0000000..646f152
--- /dev/null
+++ b/main/GenotypeValues.java
@@ -0,0 +1,134 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import vcf.Markers;
+import vcf.Marker;
+import beagleutil.Samples;
+
+/**
+ * <p>Interface {@code GenotypeValues} represents a value for each
+ * possible genotype for each sample at each marker.
+ * </p>
+ * <p>All instances of {@code GenotypeValues} are required to be thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface GenotypeValues {
+
+ /**
+ * Returns the specified genotype value.
+ *
+ * @param marker a marker index
+ * @param sample a sample index
+ * @param genotype a genotype index
+ * @return the specified genotype value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code genotype < 0 || genotype >= this.marker(marker).nGenotypes()}
+ */
+ float value(int marker, int sample, int genotype);
+
+ /**
+ * Adds the specified genotype values to the stored genotype values
+ * for the specified sample. This method is equivalent to
+ * <pre>
+ * for (m=0; m<this.nMarkers(); ++m) {
+ * offset = this.markers().sumGenotypes(m);
+ * for (gt=0; gt<this.marker(m).nGenotypes(); ++gt) {
+ * this.add(marker, sample, gt, values[offset + gt])
+ * }
+ * }
+ * </pre>
+ *
+ * @param sample a sample index
+ * @param values an array of length {@code this.markers.sumGenotypes()}
+ * containing the genotype values to be added.
+ *
+ * @throws IndexOutOfBoundsException if
+ * if {@code sample < 0 || sample >= this.nSamples()}
+ * @throws IllegalArgumentException if
+ * {@code values.length != this.markers().sumGenotypes()}
+ * @throws NullPointerException if {@code values == null}
+ */
+ void add(int sample, double[] values);
+
+ /**
+ * Adds the specified genotype value to the stored genotype value.
+ *
+ * @param marker a marker index
+ * @param sample a sample index
+ * @param genotype a genotype index
+ * @param value the value to be added
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code genotype < 0 || genotype >= this.marker(marker).nGenotypes()}
+ */
+ void add(int marker, int sample, int genotype, double value);
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ int nMarkers();
+
+ /**
+ * Returns the list of markers.
+ * @return the list of markers
+ */
+ Markers markers();
+
+ /**
+ * Returns the specified marker.
+ * @param marker a marker index
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ Marker marker(int marker);
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ int nSamples();
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ Samples samples();
+
+ /**
+ * Returns a string representation of {@code this}. The exact details
+ * of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString();
+}
diff --git a/main/HapAlleleProbs.java b/main/HapAlleleProbs.java
new file mode 100644
index 0000000..8128631
--- /dev/null
+++ b/main/HapAlleleProbs.java
@@ -0,0 +1,93 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import beagleutil.Samples;
+import vcf.Markers;
+import vcf.Marker;
+
+/**
+ * <p>Interface {@code HapAlleleProbs} stores allele probabilities for
+ * a haplotype.
+ * </p>
+ * All instances of {@code HapAlleleProbs} are required to be immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface HapAlleleProbs {
+
+ /**
+ * Returns the specified allele probability.
+ *
+ * @param marker a marker index
+ * @param allele an allele index
+ * @return the specified allele probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele < 0 || allele >= this.marker(marker).nAlleles()}
+ */
+ float allele(int marker, int allele);
+
+ /**
+ * Returns the allele with maximum posterior probability. If more than
+ * one allele has maximum posterior probability, one of the
+ * alleles with maximum posterior probability will be returned.
+ * @param marker a marker index
+ * @return the allele with maximum posterior probability
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ int alleleWithMaxProb(int marker);
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ int nMarkers();
+
+ /**
+ * Returns the list of markers.
+ * @return the list of markers
+ */
+ Markers markers();
+
+ /**
+ * Returns the specified marker.
+ * @param marker a marker index
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ Marker marker(int marker);
+
+ /**
+ * Returns the haplotype index. The two haplotypes for sample {@code k}
+ * have indices {@code 2*k} and {@code 2*k + 1}.
+ * @return the haplotype index
+ */
+ int hapIndex();
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ Samples samples();
+}
diff --git a/main/HapPairSampler.java b/main/HapPairSampler.java
new file mode 100644
index 0000000..7745a32
--- /dev/null
+++ b/main/HapPairSampler.java
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import blbutil.Utilities;
+import dag.Dag;
+import dag.LinkageEquilibriumDag;
+import dag.MergeableDag;
+import haplotype.BasicHapPairs;
+import haplotype.HapPair;
+import haplotype.HapPairs;
+import haplotype.RevHapPairs;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import sample.ConsumeSingleSamples;
+import sample.SingleBaum;
+import vcf.GL;
+import vcf.RevGL;
+
+/**
+ * <p>Class {@code HapPairSampler} samples haplotype pairs and
+ * estimates posterior genotype probabilities.
+ * </p>
+ * <p>Instances of class {@code HapPairSampler} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class HapPairSampler {
+
+ private final Par par;
+ private final RunStats runStats;
+
+ /**
+ * Constructs a new {@code HapPairSampler} instance from the specified data.
+ * @param par the analysis parameters
+ * @param runStats the object to which run-time statistics will be written
+ * @throws NullPointerException if {@code par == null || runStats == null}
+ */
+ public HapPairSampler(Par par, RunStats runStats) {
+ if (par==null) {
+ throw new IllegalArgumentException("par==null");
+ }
+ if (runStats==null) {
+ throw new IllegalArgumentException("runStats==null");
+ }
+ this.par = par;
+ this.runStats = runStats;
+ }
+
+ /**
+ * Returns a list of sampled haplotype pairs. Haplotype pairs are
+ * sampled conditional on the observed genotype data and a haplotype
+ * frequency model in which all markers are in linkage equilibrium.
+ *
+ * @param cd the input data for the current marker window
+ * @return the a list of sampled haplotype pairs
+ *
+ * @throws NullPointerException if {@code cd == null}
+ */
+ public List<HapPair> initialHaps(CurrentData cd) {
+ GL freqGL = cd.targetGL();
+ GL emitGL = cd.targetGL();
+ boolean useRevDag = false;
+ float minAlleleFreq = 0.0001f;
+ Dag dag = new LinkageEquilibriumDag(freqGL, minAlleleFreq);
+ List<HapPair> sampledHaps = new ArrayList<>();
+ sampledHaps = Collections.synchronizedList(sampledHaps);
+ sample(dag, emitGL, useRevDag, par.nsamples(),
+ sampledHaps, par.nthreads());
+ return new ArrayList<>(sampledHaps);
+ }
+
+ /**
+ * Returns a list of sampled haplotype pairs. Haplotype pairs are
+ * sampled conditional on the observed genotype and a haplotype
+ * frequency model constructed from the specified {@code hapPairs}.
+ * The contract for this method is undefined if the specified
+ * {@code hapPairs} and {@code gv} are inconsistent with the input data
+ * contained in the {@code cd} parameter.
+ *
+ * @param cd the input data for the current marker window
+ * @param hapPairs the haplotype pairs used to build the haplotype
+ * frequency model
+ * @param useRevDag {@code true} if the order of markers should
+ * be reversed when building the haplotype frequency model, and
+ * {@code false} otherwise
+ * @param gv the current scaled genotype probabilities for the target
+ * samples or {@code null} if genotype probabilities are not to be estimated
+ * @return the sampled haplotype pairs
+ *
+ * @throws IllegalArgumentException if {@code haps.isEmpty() == true}
+ * @throws NullPointerException if {@code cd == null || hapPairs == null}
+ */
+ public List<HapPair> sample(CurrentData cd, List<HapPair> hapPairs,
+ boolean useRevDag, GenotypeValues gv) {
+ if (hapPairs.isEmpty()) {
+ throw new IllegalArgumentException("hapPairs.isEmpty()");
+ }
+ int nThreads = par.nthreads();
+ int nSampledHaps = par.nsamples()*cd.nTargetSamples();
+ GL gl = gl(cd, useRevDag);
+ Dag dag = getDagsAndUpdatePos(cd, hapPairs, useRevDag);
+ List<HapPair> sampledHaps = synchronizedEmptyList(nSampledHaps);
+ if (gv!=null) {
+ if (useRevDag) {
+ gv = new RevGenotypeValues(gv);
+ }
+ sample(dag, gl, useRevDag,par.nsamples(), sampledHaps, gv, nThreads);
+ }
+ else {
+ sample(dag, gl, useRevDag, par.nsamples(), sampledHaps, nThreads);
+ }
+ return new ArrayList<>(sampledHaps);
+ }
+
+ private GL gl(CurrentData cd, boolean useRevDag) {
+ GL gl = cd.targetGL();
+ if (useRevDag) {
+ gl = new RevGL(gl);
+ }
+ return gl;
+ }
+
+ private static List<HapPair> synchronizedEmptyList(int capacity) {
+ List<HapPair> sampledHaps = new ArrayList<>(capacity);
+ sampledHaps = Collections.synchronizedList(sampledHaps);
+ return sampledHaps;
+ }
+
+ private Dag getDagsAndUpdatePos(CurrentData cd, List<HapPair> hapPairs,
+ boolean useRevDag) {
+ cd.addRestrictedRefHapPairs(hapPairs);
+ HapPairs dagHaps = new BasicHapPairs(hapPairs);
+ if (useRevDag) {
+ dagHaps = new RevHapPairs(dagHaps);
+ }
+ float[] wts = cd.weights().get(dagHaps);
+ Dag dag = makeDag(dagHaps, wts, par.modelscale());
+ runStats.setDagStats(dag);
+ return dag;
+ }
+
+ private Dag makeDag(HapPairs hapPairs, float[] weights, float scale) {
+ long t0 = System.nanoTime();
+ int nInitLevels = 500;
+ Dag dag = MergeableDag.dag(hapPairs, weights, scale, nInitLevels);
+ runStats.buildNanos(System.nanoTime() - t0);
+ return dag;
+ }
+
+ @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch"})
+ private void sample(Dag dag, GL gl, boolean markersAreReversed,
+ int nSamples, List<HapPair> sampledHaps, int nThreads) {
+ long t0 = System.nanoTime();
+ Random rand = new Random(par.seed());
+ final BlockingQueue<Integer> qIn = new ArrayBlockingQueue<>(3*nThreads);
+ ExecutorService es = Executors.newFixedThreadPool(nThreads);
+ for (int j=0; j<nThreads; ++j) {
+ SingleBaum sb = new SingleBaum(dag, gl, rand.nextLong(),
+ nSamples, par.lowmem());
+ es.submit(new ConsumeSingleSamples(markersAreReversed, sb, qIn,
+ sampledHaps));
+ }
+ try {
+ for (int j=0, n=gl.nSamples(); j<n; ++j) {
+ qIn.put(j);
+ }
+ for (int j=0; j<nThreads; ++j) {
+ qIn.put(ConsumeSingleSamples.POISON);
+ }
+ es.shutdown();
+ es.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
+ }
+ catch (Throwable e) {
+ Utilities.exit("ERROR", e);
+ }
+ runStats.sampleNanos(System.nanoTime() - t0);
+ }
+
+ @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch"})
+ private void sample(Dag dag, GL gl, boolean markersAreReversed, int nCopies,
+ List<HapPair> sampledHaps, GenotypeValues gv,
+ int nThreads) {
+ long t0 = System.nanoTime();
+ Random rand = new Random(par.seed());
+ final BlockingQueue<Integer> qIn = new ArrayBlockingQueue<>(3*nThreads);
+ ExecutorService es = Executors.newFixedThreadPool(nThreads);
+ for (int j=0; j<nThreads; ++j) {
+ SingleBaum sb = new SingleBaum(dag, gl, rand.nextLong(),
+ nCopies, par.lowmem());
+ es.submit(new ConsumeSingleSamples(markersAreReversed, sb, qIn,
+ sampledHaps, gv));
+ }
+ try {
+ for (int j=0, n=gl.nSamples(); j<n; ++j) {
+ qIn.put(j);
+ }
+ for (int j=0; j<nThreads; ++j) {
+ qIn.put(ConsumeSingleSamples.POISON);
+ }
+ es.shutdown();
+ es.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
+ }
+ catch (Throwable e) {
+ Utilities.exit("ERROR", e);
+ }
+ runStats.sampleNanos(System.nanoTime() - t0);
+ }
+}
diff --git a/main/LiAndStephensHapSampler.java b/main/LiAndStephensHapSampler.java
new file mode 100644
index 0000000..baca0d5
--- /dev/null
+++ b/main/LiAndStephensHapSampler.java
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import blbutil.Utilities;
+import haplotype.SampleHapPairs;
+import java.util.Queue;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import sample.LSHapBaum;
+import sample.ImputationData;
+
+/**
+ * <p>Class {@code LiAndStephensHapSampler} estimates posterior allele probabilities.
+ * </p>
+ * <p>Instances of class {@code LiAndStephensHapSampler} are thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class LiAndStephensHapSampler {
+
+ private final Par par;
+ private final GeneticMap genMap;
+
+ /**
+ * Constructs a {@code LiAndStephensHapSampler} instance from the specified
+ * data.
+ * @param par the analysis parameters
+ * @param genMap the genetic map or {@code null} if no genetic map is
+ * specified.
+ * @throws NullPointerException if {@code par == null}
+ */
+ public LiAndStephensHapSampler(Par par, GeneticMap genMap) {
+ if (par==null) {
+ throw new IllegalArgumentException("par==null");
+ }
+ this.par = par;
+ this.genMap = genMap;
+ }
+
+ /**
+ * Returns estimated allele probabilities for each target sample.
+ * The contract for this method is undefined if the data in the
+ * specified {@code shp} and {@code cd} parameters are inconsistent.
+ * @param cd the input data for the current marker window
+ * @param shp estimated target haplotypes at the genotyped markers
+ * @return estimated allele probabilities for each target sample
+ * @throws NullPointerException if {@code cd == null || shp == null}
+ */
+ public BasicAlleleProbs sample(CurrentData cd, SampleHapPairs shp) {
+ Queue<HapAlleleProbs> qOut = new ConcurrentLinkedQueue<>();
+ multiThreadedHapSample(cd, shp, qOut, par.lowmem(), par.nthreads());
+ HapAlleleProbs[] hapAlleleProbs = qOut.toArray(new HapAlleleProbs[0]);
+ return new BasicAlleleProbs(hapAlleleProbs);
+ }
+
+ @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch"})
+ private void multiThreadedHapSample(CurrentData cd,
+ SampleHapPairs targetHapPairs, Queue<HapAlleleProbs> qOut,
+ boolean lowMem, int nThreads) {
+ int qInSize = targetHapPairs.nSamples() + nThreads;
+ final BlockingQueue<Integer> qIn = new ArrayBlockingQueue<>(qInSize);
+ ExecutorService es = Executors.newFixedThreadPool(nThreads);
+ ImputationData impData = new ImputationData(par, cd, targetHapPairs,
+ genMap);
+ try {
+ for (int j=0, n=targetHapPairs.nSamples(); j<n; ++j) {
+ qIn.put(j);
+ }
+ for (int j=0; j<nThreads; ++j) {
+ qIn.put(LSHapSampler.POISON);
+ }
+ for (int j=0; j<nThreads; ++j) {
+ LSHapBaum hb = new LSHapBaum(impData, lowMem);
+ es.submit(new LSHapSampler(hb, qIn, qOut));
+ }
+ es.shutdown();
+ es.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
+ }
+ catch (Throwable e) {
+ Utilities.exit("RecombHapSampler: ERROR", e);
+ }
+ }
+
+ private class LSHapSampler implements Runnable {
+
+ public static final int POISON = -1;
+
+ private final LSHapBaum baum;
+ private final BlockingQueue<Integer> qIn;
+ private final Queue<HapAlleleProbs> qOut;
+
+ /*
+ * Constructs a {@code ProduceHapSample} instance.
+ *
+ * @param markersAreReversed {@code true} if sampled haplotypes
+ * will have their marker order reversed and {@code false} otherwise.
+ * @param baum a thread-confined instance of class
+ * {@code sample.HaplotypeBaum}.
+ * @param qIn a thread-safe input work queue.
+ * @param hapList a thread-safe list for storing sampled haplotype pairs.
+ * @param gv a thread-safe object which stores genotype posterior probabilities.
+ *
+ * @throws NullPointerException if any parameter is {@code null}.
+ */
+ public LSHapSampler(LSHapBaum baum, BlockingQueue<Integer> qIn,
+ Queue<HapAlleleProbs> qOut) {
+ if (baum==null) {
+ throw new NullPointerException("baum=null");
+ }
+ if (qIn==null) {
+ throw new IllegalArgumentException("qIn==null");
+ }
+ this.baum = baum;
+ this.qIn = qIn;
+ this.qOut = qOut;
+ }
+
+ @Override
+ @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch"})
+ public void run() {
+ try {
+ int sample = qIn.take();
+ while (sample != POISON) {
+ int hap1 = 2*sample;
+ int hap2 = 2*sample+1;
+ qOut.add(baum.randomHapSample(hap1));
+ qOut.add(baum.randomHapSample(hap2));
+ sample = qIn.take();
+ }
+ }
+ catch (Throwable e) {
+ Utilities.exit("RecombHapSampler.LSHapSampler: ERROR", e);
+ }
+ }
+ }
+}
diff --git a/main/LowMemHapAlleleProbs.java b/main/LowMemHapAlleleProbs.java
new file mode 100644
index 0000000..e1403f7
--- /dev/null
+++ b/main/LowMemHapAlleleProbs.java
@@ -0,0 +1,203 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import beagleutil.Samples;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code LowMemHapAlleleProbs} stores allele probabilities for
+ * a haplotype.
+ * </p>
+ * Instances of class {@code LowMemHapAlleleProbs} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class LowMemHapAlleleProbs implements HapAlleleProbs {
+
+ private static final int N_BINS = 256;
+ private static final int SHIFT = 128;
+ private static final float INCREMENT = 1f/N_BINS;
+
+ private final Markers markers;
+ private final Samples samples;
+ private final int hap;
+ private final byte[] alleleBin;
+
+ /**
+ * Constructs a new {@code LowMemHapAlleleProbs} instance. The
+ * {@code alleleProbs} array lists the probability of each allele for
+ * each marker, sorted first by marker index, and then by allele index.
+ * @param markers the markers
+ * @param samples the samples
+ * @param hap the haplotype index
+ * @param alleleProbs the allele probabilities
+ * @throws IllegalArgumentException if
+ * {@code alleleProbs.length != markers.sumAlleles()}
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= 2*samples.nSamples()}
+ * @throws NullPointerException if
+ * {@code markers == null || samples == null}
+ */
+ public LowMemHapAlleleProbs(Markers markers, Samples samples, int hap,
+ float[] alleleProbs) {
+ if (alleleProbs.length != markers.sumAlleles()) {
+ throw new IllegalArgumentException("inconsistent data");
+ }
+ if (hap < 0 || hap >= 2*samples.nSamples()) {
+ throw new IndexOutOfBoundsException(String.valueOf(hap));
+ }
+ byte[] nonRefProbs = new byte[alleleProbs.length - markers.nMarkers()];
+ int index = 0;
+ for (int m=0, n=markers.nMarkers(); m<n; ++m) {
+ float sum = 0f;
+ int start = markers.sumAlleles(m);
+ int end = markers.sumAlleles(m+1);
+ for (int j=start; j<end; ++j) {
+ float p = alleleProbs[j];
+ if (p < 0 || p > 1.01f || Float.isNaN(p)) {
+ throw new IllegalArgumentException(String.valueOf(p));
+ }
+ sum += alleleProbs[j];
+ }
+ if (sum > 1.01f) {
+ throw new IllegalArgumentException(String.valueOf(sum));
+ }
+ for (int j=start; j<end - 1; ++j) {
+ nonRefProbs[index++] = convertToByte(alleleProbs[j] / sum);
+ }
+ }
+ assert index == nonRefProbs.length;
+
+ this.markers = markers;
+ this.samples = samples;
+ this.hap = hap;
+ this.alleleBin = nonRefProbs;
+ }
+
+ private static byte convertToByte(float f) {
+ if (f >= 1f) {
+ f = 0.99999f;
+ }
+ int bin = ((int) Math.floor(f*N_BINS)) - SHIFT;
+ return (byte) bin;
+ }
+
+ private static float convertToFloat(byte b) {
+ return (b + 128.5f)*INCREMENT;
+ }
+
+ @Override
+ public float allele(int marker, int allele) {
+ int nAlleles = markers.marker(marker).nAlleles();
+ if (allele < 0 || allele >= nAlleles) {
+ throw new IllegalArgumentException(String.valueOf(allele));
+ }
+ int start = markers.sumAlleles(marker) - marker;
+ if (nAlleles == 2) {
+ float f = convertToFloat(alleleBin[start]);
+ return (allele == 0) ? f : 1f - f;
+ }
+ else if (allele == nAlleles - 1) {
+ return lastAlleleProb(marker);
+ }
+ else {
+ return convertToFloat(alleleBin[start + allele]);
+ }
+ }
+
+ private float lastAlleleProb(int marker) {
+ int nAlleles = markers.marker(marker).nAlleles();
+ int start = markers.sumAlleles(marker) - marker;
+ int end = start + nAlleles - 1;
+ float sum = 1f;
+ for (int j = start; j < end; ++j) {
+ sum -= convertToFloat(alleleBin[j]);
+ }
+ return (sum < 0f) ? 0f : sum;
+ }
+
+ @Override
+ public int alleleWithMaxProb(int marker) {
+ int nAlleles = markers.marker(marker).nAlleles();
+ int start = markers.sumAlleles(marker) - marker;
+ if (nAlleles == 2) {
+ return alleleBin[start] >= 0 ? 0 : 1;
+ }
+ else {
+ int bestIndex = start;
+ int end = start + nAlleles - 1;
+ float sumProb = 0f;
+ for (int j = start; j<end; ++j) {
+ sumProb += convertToFloat(alleleBin[j]);
+ if (alleleBin[j] > alleleBin[bestIndex]) {
+ bestIndex = j;
+ }
+ }
+ if ( sumProb < 0.5f) {
+ return nAlleles - 1;
+ }
+ else {
+ return bestIndex - start;
+ }
+ }
+ }
+
+ @Override
+ public int nMarkers() {
+ return markers.nMarkers();
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return markers.marker(marker);
+ }
+
+ @Override
+ public int hapIndex() {
+ return hap;
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact details
+ * of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(100);
+ sb.append('[');
+ sb.append(this.getClass().toString());
+ sb.append(": hap=");
+ sb.append(hap);
+ sb.append(']');
+ return sb.toString();
+ }
+}
diff --git a/main/Main.java b/main/Main.java
new file mode 100644
index 0000000..9ba5d4c
--- /dev/null
+++ b/main/Main.java
@@ -0,0 +1,451 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import beagleutil.ChromInterval;
+import beagleutil.Samples;
+import blbutil.Const;
+import blbutil.FileIt;
+import blbutil.Filter;
+import blbutil.InputIt;
+import blbutil.IntPair;
+import blbutil.SampleFileIt;
+import blbutil.Utilities;
+import haplotype.BasicSampleHapPairs;
+import haplotype.BitHapPair;
+import haplotype.HapPair;
+import haplotype.SampleHapPairs;
+import ibd.IbdSegment;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import vcf.AllData;
+import vcf.VcfIt;
+import vcf.BrefIt;
+import vcf.Data;
+import vcf.IntervalVcfIt;
+import vcf.Marker;
+import vcf.FilterUtil;
+import vcf.GL;
+import vcf.Markers;
+import vcf.TargetData;
+import vcf.RefIt;
+import vcf.VcfEmission;
+import vcf.VcfRecord;
+
+/**
+ * Class {@code Main} is the entry class for the Beagle program.
+ * See {@code Par.usage()} and online program documentation for usage
+ * instructions.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class Main {
+
+ /**
+ * The program name and version.
+ */
+ public static final String program = "beagle.22Feb16.8ef.jar (version 4.1)";
+ public static final String command = "java -jar beagle.22Feb16.8ef.jar";
+
+ /**
+ * The copyright string.
+ */
+ public static final String copyright = "Copyright (C) 2014-2015 Brian L. Browning";
+
+ /**
+ * The program name and a brief help message.
+ */
+ public static final String shortHelp = Main.program
+ + Const.nl + Main.copyright
+ + Const.nl + "Enter \"java -jar beagle.22Feb16.8ef.jar\" for a "
+ + "summary of command line " + "arguments.";
+
+ private final Par par;
+ private final GeneticMap genMap;
+ private final Data data;
+ private final RunStats runStats;
+ private final WindowWriter windowOut;
+
+ /**
+ * Entry point to Beagle program. See {@code Parameters.usage()} and
+ * online program documentation for usage instructions.
+ *
+ * @param args command line arguments
+ */
+ public static void main(String[] args) {
+ Locale.setDefault(Locale.US);
+ if (args.length==0) {
+ System.out.println(program);
+ System.out.println(copyright);
+ System.out.println(Par.usage());
+ System.exit(0);
+ }
+ Par par = parameters(args);
+ System.setProperty("java.util.concurrent.ForkJoinPool.common.parallelism",
+ String.valueOf(par.nthreads()));
+ RunStats runStats = new RunStats(par);
+ runStats.printStartInfo();
+ GeneticMap genMap = geneticMap(par);
+
+ try (Data data = (par.ref()==null) ? nonRefData(par) : allData(par)) {
+ try (WindowWriter winOut = new WindowWriter(data.targetSamples(),
+ par.out())) {
+ Main main = new Main(par, data, genMap, winOut, runStats);
+ main.phaseData();
+ runStats.printSummaryAndClose(data.nTargetMarkersSoFar(),
+ data.nMarkersSoFar());
+ }
+ }
+ }
+
+ private Main(Par par, Data data, GeneticMap genMap,
+ WindowWriter windowWriter, RunStats runStats) {
+ assert par!=null;
+ assert data!=null;
+ assert windowWriter!=null;
+ assert runStats!=null;
+ this.par = par;
+ this.genMap = genMap;
+ this.data = data;
+ this.runStats = runStats;
+ this.windowOut = windowWriter;
+ }
+
+ /*
+ * Phases the data, imputes ungenotyped markers, and performed IBD segment
+ * detection.
+ */
+ private void phaseData() {
+ NuclearFamilies fam = new NuclearFamilies(data.targetSamples(), par.ped());
+ runStats.printSampleSummary(fam, data);
+ MainHelper mh = new MainHelper(par, genMap, runStats);
+ SampleHapPairs overlapHaps = null;
+ while (data.canAdvanceWindow()) {
+ advanceWindow();
+ CurrentData cd = new CurrentData(par, genMap, data, overlapHaps, fam);
+ GenotypeValues gv = gv(par, cd);
+ SampleHapPairs targetHapPairs = mh.phase(cd, gv);
+ // targetHapPairs required to be aligned, GT-consistent with input data
+
+ if (gv!=null) {
+ windowOut.printGV(cd, gv);
+ }
+ else {
+ Map<IntPair, List<IbdSegment>> ibd = mh.refinedIbd(cd, targetHapPairs);
+ AlleleProbs alProbs = mh.LSImpute(cd, targetHapPairs);
+ printOutput(cd, targetHapPairs, alProbs, ibd);
+ }
+ overlapHaps = overlapHaps(cd, targetHapPairs);
+ }
+ }
+
+ private static GenotypeValues gv(Par par, CurrentData cd) {
+ GenotypeValues gv = null;
+ if (par.gt()==null) {
+ gv = new BasicGenotypeValues(cd.targetMarkers(), cd.targetSamples());
+ if (par.gtgl() != null) {
+ initializeGV(gv, cd.targetGL());
+ }
+ }
+ return gv;
+ }
+
+ /*
+ * Initialize GenotypeValues to have values 1 at known genotypes.
+ */
+ private static void initializeGV(GenotypeValues gv, GL gl) {
+ assert gv.markers().equals(gl.markers());
+ assert gv.samples().equals(gl.samples());
+ int nMarkers = gl.nMarkers();
+ int nSamples = gl.nSamples();
+ for (int m=0; m<nMarkers; ++m) {
+ for (int s=0; s<nSamples; ++s) {
+ int a1 = gl.allele1(m, s);
+ int a2 = gl.allele2(m, s);
+ if (a1>=0 && a2>=0) {
+ int gt = VcfRecord.gtIndex(a1, a2);
+ gv.add(m, s, gt, 1.0);
+ }
+ }
+ }
+ }
+
+ private void printOutput(CurrentData cd, SampleHapPairs targetHapPairs,
+ AlleleProbs alProbs, Map<IntPair, List<IbdSegment>> ibd) {
+ assert par.gt()!=null;
+ boolean markersAreImputed = false;
+ boolean printGprobs = false;
+ if (cd.nTargetMarkers() < cd.nMarkers()){
+ alProbs = new ConstrainedAlleleProbs(targetHapPairs, alProbs,
+ cd.targetMarkerIndices());
+ markersAreImputed = true;
+ printGprobs = par.gprobs();
+ }
+ windowOut.print(cd, alProbs, markersAreImputed, printGprobs);
+ if (par.ibd()) {
+ windowOut.printIbd(cd, ibd);
+ }
+ }
+
+ private SampleHapPairs overlapHaps(CurrentData cd,
+ SampleHapPairs targetHapPairs) {
+ int nextOverlap = cd.nextTargetOverlap();
+ int nextSplice = cd.nextTargetSplice();
+ if (cd.nextOverlap() == cd.nextSplice()) {
+ return null;
+ }
+ int nSamples = targetHapPairs.nSamples();
+ int nMarkers = nextSplice - nextOverlap;
+ Markers markers = targetHapPairs.markers().restrict(nextOverlap, nextSplice);
+ Samples samples = targetHapPairs.samples();
+ List<HapPair> list = new ArrayList<>(nSamples);
+ int[] a1 = new int[nMarkers];
+ int[] a2 = new int[nMarkers];
+ for (int s = 0; s < nSamples; ++s) {
+ for (int m = 0; m < nMarkers; ++m) {
+ a1[m] = targetHapPairs.allele1(nextOverlap + m, s);
+ a2[m] = targetHapPairs.allele2(nextOverlap + m, s);
+ }
+ list.add(new BitHapPair(markers, samples, s, a1, a2));
+ }
+ return new BasicSampleHapPairs(targetHapPairs.samples(), list);
+ }
+
+ private static Data nonRefData(Par par) {
+ Filter<String> sampleFilter = FilterUtil.sampleFilter(
+ par.excludesamples());
+ Filter<Marker> markerFilter = FilterUtil.markerFilter(
+ par.excludemarkers());
+ ChromInterval chromInterval = ChromInterval.parse(par.chrom());
+ SampleFileIt<? extends VcfEmission> targIt;
+ // NB: originally used NuclearFamilies object in VcfEmission constructors.
+ // If this is still required, will need to read nonRefFile
+ // to get samples required to construct NuclearFamilies object.
+ if (par.gt()!=null) {
+ assert par.gl()==null && par.gtgl()==null;
+ FileIt<String> it = InputIt.fromGzipFile(par.gt());
+ targIt = VcfIt.create(it, sampleFilter, markerFilter, VcfIt.toBitSetGT);
+ }
+ else if (par.gl()!=null) {
+ assert par.gt()==null && par.gtgl()==null;
+ FileIt<String> it = InputIt.fromGzipFile(par.gl());
+ targIt = VcfIt.create(it, sampleFilter, markerFilter, VcfIt.toGLRec);
+ }
+ else {
+ assert par.gt()==null && par.gl()==null;
+ FileIt<String> it = InputIt.fromGzipFile(par.gtgl());
+ targIt = VcfIt.create(it, sampleFilter, markerFilter, VcfIt.toGTGLRec);
+ }
+ if (chromInterval!=null) {
+ targIt = new IntervalVcfIt<>(targIt, chromInterval);
+ }
+ return TargetData.targetData(targIt);
+ }
+
+ private static Data allData(Par par) {
+ Filter<String> sampleFilter = FilterUtil.sampleFilter(
+ par.excludesamples());
+ Filter<Marker> markerFilter = FilterUtil.markerFilter(
+ par.excludemarkers());
+ ChromInterval chromInterval = ChromInterval.parse(par.chrom());
+
+ File targFile;
+ SampleFileIt<? extends VcfEmission> targIt;
+ SampleFileIt<VcfEmission> refIt;
+
+ if (par.gt()!=null) {
+ assert par.gl()==null && par.gtgl()==null;
+ targFile = par.gt();
+ FileIt<String> it = InputIt.fromGzipFile(targFile);
+ targIt = VcfIt.create(it, sampleFilter, markerFilter, VcfIt.toBitSetGT);
+ }
+ else if (par.gl()!=null) {
+ assert par.gt()==null && par.gtgl()==null;
+ targFile = par.gl();
+ FileIt<String> it = InputIt.fromGzipFile(targFile);
+ targIt = VcfIt.create(it, sampleFilter, markerFilter, VcfIt.toGLRec);
+ }
+ else {
+ assert par.gt()==null && par.gl()==null && par.gtgl()!=null;
+ targFile = par.gtgl();
+ FileIt<String> it = InputIt.fromGzipFile(targFile);
+ targIt = VcfIt.create(it, sampleFilter, markerFilter, VcfIt.toGTGLRec);
+ }
+
+ if (par.impute()==false || par.gt()==null) {
+ markerFilter = restrictToVcfMarkers(targFile, markerFilter,
+ chromInterval);
+ }
+ if (par.ref().toString().endsWith(".bref")) {
+ refIt = new BrefIt(par.ref(), markerFilter);
+ }
+ else {
+ FileIt<String> it = InputIt.fromGzipFile(par.ref());
+ refIt = RefIt.create(it, sampleFilter, markerFilter,
+ RefIt.DEFAULT_EM_BUFFER_SIZE);
+ }
+ if (chromInterval!=null) {
+ targIt = new IntervalVcfIt<>(targIt, chromInterval);
+ refIt = new IntervalVcfIt<>(refIt, chromInterval);
+
+ }
+ return AllData.allData(refIt, targIt);
+ }
+
+ private static Filter<Marker> restrictToVcfMarkers(File vcfFile,
+ Filter<Marker> markerFilter, ChromInterval chromInterval) {
+ Set<Marker> includedMarkers = new HashSet<>(50000);
+ try (FileIt<String> it = InputIt.fromGzipFile(vcfFile)) {
+ Filter<String> sampleFilter = null;
+ SampleFileIt<VcfRecord> vcfIt = VcfIt.create(it, sampleFilter,
+ markerFilter, VcfIt.toGTGLRec);
+ if (chromInterval != null) {
+ vcfIt = new IntervalVcfIt<>(vcfIt, chromInterval);
+ }
+ while (vcfIt.hasNext()) {
+ includedMarkers.add(vcfIt.next().marker());
+ }
+ vcfIt.close();
+ }
+ return Filter.includeFilter(includedMarkers);
+ }
+
+ private static GeneticMap geneticMap(Par par) {
+ if (par.map()==null) {
+ return null;
+ }
+ else {
+ String chrom = extractChrom(par.chrom());
+ if (chrom==null) {
+ return PlinkGeneticMap.fromPlinkMapFile(par.map());
+ }
+ else {
+ return PlinkGeneticMap.fromPlinkMapFile(par.map(), chrom);
+ }
+ }
+ }
+
+ private static String extractChrom(String chromParameter) {
+ if (chromParameter!=null && chromParameter.length()>0) {
+ int index = chromParameter.indexOf(Const.colon);
+ if (index == -1) {
+ return chromParameter;
+ }
+ else {
+ return new String(chromParameter.substring(0, index));
+ }
+ }
+ else {
+ return null;
+ }
+ }
+
+ /*
+ * Checks that certain parameters are consistent, and prints error
+ * message and exits if parameters are inconsistent.
+ *
+ * @param args the command line arguments.
+ */
+ private static Par parameters(String[] args) {
+ // warnings are printed in RunStats.startInfo() method
+ Par par = new Par(args);
+ checkForOneInputFile(par);
+ checkOutputPrefix(par);
+ if (par.overlap() >= par.window()/2) {
+ String s = shortHelp + Const.nl
+ + Const.nl + "ERROR: The \"window\" parameter must be at least "
+ + "two times the \"overlap\" parameter"
+ + Const.nl + "Exiting program.";
+ Utilities.exit(s);
+ }
+ if (par.chrom()!=null && ChromInterval.parse(par.chrom())==null) {
+ String s = shortHelp + Const.nl
+ + Const.nl + "ERROR: invalid \"chrom\" parameter: \""
+ + par.chrom() + "\""
+ + Const.nl + "Exiting program.";
+ Utilities.exit(s);
+ }
+ if (par.ibd()==true && par.ref()!=null && par.impute()) {
+ String s = shortHelp + Const.nl
+ + Const.nl + "ERROR: The \"impute=false\" argument is "
+ + "required when a reference panel"
+ + Const.nl + " is specified and \"ibd=true\"."
+ + Const.nl + "Exiting program.";
+ Utilities.exit(s);
+ }
+ return par;
+ }
+
+ private static void checkForOneInputFile(Par par) {
+ int cnt = 0;
+ if (par.gt()!=null) {
+ ++cnt;
+ }
+ if (par.gl()!=null) {
+ ++cnt;
+ }
+ if (par.gtgl()!=null) {
+ ++cnt;
+ }
+ if (cnt != 1) {
+ String s = "ERROR: exactly one \"gt\", \"gl\" or \"gtgl\" "
+ + "parameter is required.";
+ Utilities.exit(Par.usage() + s);
+ }
+ }
+
+ private static void checkOutputPrefix(Par par) {
+ File outPrefix = new File(par.out());
+ if (outPrefix.isDirectory()) {
+ String s = "ERROR: \"out\" parameter cannot be a directory: \""
+ + par.out() + "\"";
+ Utilities.exit(Par.usage() + s);
+ }
+
+ File vcfOut = new File(par.out() + ".vcf.gz");
+ if (vcfOut.equals(par.ref())) {
+ String s = "ERROR: VCF output file equals input file: " + par.ref();
+ Utilities.exit(Par.usage() + s);
+ }
+ if (vcfOut.equals(par.gt())) {
+ String s = "ERROR: VCF output file equals input file: " + par.gt();
+ Utilities.exit(Par.usage() + s);
+ }
+ if (vcfOut.equals(par.gl())) {
+ String s = "ERROR: VCF output file equals input file: " + par.gl();
+ Utilities.exit(Par.usage() + s);
+ }
+ if (vcfOut.equals(par.gtgl())) {
+ String s = "ERROR: VCF output file equals input file: " + par.gtgl();
+ Utilities.exit(Par.usage() + s);
+ }
+ }
+
+ private void advanceWindow() {
+ if (data.canAdvanceWindow()) {
+ data.advanceWindow(par.overlap(), par.window());
+ }
+ runStats.printWindowUpdate(data);
+ }
+}
diff --git a/main/MainHelper.java b/main/MainHelper.java
new file mode 100644
index 0000000..57e0095
--- /dev/null
+++ b/main/MainHelper.java
@@ -0,0 +1,261 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import blbutil.Const;
+import blbutil.IntPair;
+import dag.Dag;
+import dag.MergeableDag;
+import haplotype.BasicHapPairs;
+import haplotype.BasicSampleHapPairs;
+import haplotype.ConsensusPhaser;
+import haplotype.GLSampleHapPairs;
+import haplotype.GenotypeCorrection;
+import haplotype.HapPair;
+import haplotype.HapPairs;
+import haplotype.SampleHapPairs;
+import haplotype.WrappedHapPair;
+import ibd.HaploidIbd;
+import ibd.IbdSegment;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import vcf.GL;
+import vcf.MaskedEndsGL;
+import vcf.NoPhaseGL;
+
+/**
+ * Class {@code MainHelper} is an auxiliary class with methods called by
+ * the {@code main.Main} class.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class MainHelper {
+
+ private final Par par;
+ private final HapPairSampler hapSampler;
+ private final RecombHapPairSampler recombSampler;
+ private final GeneticMap genMap;
+ private final RunStats runStats;
+
+ /**
+ * Constructs a new {@code MainHelper} instance.
+ * @param par the command line parameters
+ * @param genMap the genetic map
+ * @param runStats the class for collecting and printing run-time statistics
+ * @throws NullPointerException
+ * if {@code (par == null || genMap == null || runStarts == null)}
+ */
+ MainHelper(Par par, GeneticMap genMap, RunStats runStats) {
+ if (runStats==null) {
+ throw new NullPointerException("runStats==null");
+ }
+ this.par = par;
+ this.hapSampler = new HapPairSampler(par, runStats);
+ this.recombSampler = new RecombHapPairSampler(par, runStats);
+ this.genMap = genMap;
+ this.runStats = runStats;
+ }
+
+ /**
+ * Phases the current window of genotype data.
+ * @param cd the current window of data
+ * @param gv the current scaled genotype probabilities for the target
+ * samples or {@code null} if genotype probabilities are not to be estimated
+ * @return the phased genotype data
+ * @throws IllegalArgumentException if
+ * {@code gv != null && gv.markers().equals(cd.targetMarkers() == false)}
+ * @throws IllegalArgumentException if
+ * {@code gv != null && gv.samples().equals(cd.targetSamples() == false)}
+ * @throws NullPointerException if {@code cd == null}
+ */
+ SampleHapPairs phase(CurrentData cd, GenotypeValues gv) {
+ checkParameters(cd, gv);
+ if (cd.targetGL().isRefData()) {
+ return new GLSampleHapPairs(cd.targetGL());
+ }
+
+ List<HapPair> hapPairs = hapSampler.initialHaps(cd);
+ if (par.burnin_its()>0) {
+ runStats.println(Const.nl + "Starting burn-in iterations");
+ hapPairs = runBurnin1(cd, hapPairs);
+ }
+ if (par.phase_its()>0) {
+ boolean estGprobs = (par.gt()==null && par.niterations()==0);
+ hapPairs = runBurnin2(cd, hapPairs, (estGprobs ? gv : null));
+ }
+ if (par.niterations()>0) {
+ runStats.println(Const.nl + "Starting phasing iterations");
+ hapPairs = runRecomb(cd, hapPairs, gv);
+ }
+ else {
+ hapPairs = ConsensusPhaser.run(hapPairs);
+ }
+ return new BasicSampleHapPairs(cd.targetSamples(), hapPairs);
+ }
+
+ private void checkParameters(CurrentData cd, GenotypeValues gv) {
+ if (gv!=null && gv.markers().equals(cd.targetMarkers())==false) {
+ throw new IllegalArgumentException(String.valueOf(gv));
+ }
+ if (gv!=null && gv.samples().equals(cd.targetSamples())==false) {
+ throw new IllegalArgumentException(String.valueOf(gv));
+ }
+ }
+
+ private List<HapPair> runBurnin1(CurrentData cd, List<HapPair> hapPairs) {
+ GenotypeValues gv = null;
+ for (int j=0; j<par.burnin_its(); ++j) {
+ boolean useRevDag = (j & 1)==1;
+ hapPairs = hapSampler.sample(cd, hapPairs, useRevDag, gv);
+ runStats.printIterationUpdate(cd.window(), j+1);
+ }
+ return hapPairs;
+ }
+
+ private List<HapPair> runBurnin2(CurrentData cd, List<HapPair> hapPairs,
+ GenotypeValues gv) {
+ List<HapPair> cumHapPairs = new ArrayList<>();
+ int start = par.burnin_its();
+ int end = start + par.phase_its();
+ for (int j=start; j<end; ++j) {
+ boolean useRevDag = (j & 1)==1;
+ hapPairs = hapSampler.sample(cd, hapPairs, useRevDag, gv);
+ runStats.printIterationUpdate(cd.window(), j+1);
+ cumHapPairs.addAll(hapPairs);
+ }
+ return cumHapPairs;
+ }
+
+ private List<HapPair> runRecomb(CurrentData cd, List<HapPair> hapPairs,
+ GenotypeValues gv) {
+ hapPairs = ConsensusPhaser.run(hapPairs);
+ List<HapPair> cumHapPairs = new ArrayList<>();
+ int start = par.burnin_its() + par.phase_its();
+ int end = start + par.niterations();
+ for (int j=start; j<end; ++j) {
+ boolean useRevDag = (j & 1)==1;
+ hapPairs = recombSampler.sample(cd, hapPairs, useRevDag, gv);
+ runStats.printIterationUpdate(cd.window(), j+1);
+ cumHapPairs.addAll(hapPairs);
+ }
+ hapPairs = ConsensusPhaser.run(cumHapPairs);
+ hapPairs = correctGenotypes(cd, hapPairs);
+ return hapPairs;
+ }
+
+ private List<HapPair> correctGenotypes(CurrentData cd, List<HapPair> hapPairs) {
+ int start = cd.prevTargetSplice();
+ int end = cd.nextTargetSplice();
+ GL modGL = new MaskedEndsGL(cd.targetGL(), start, end);
+// File outFile = new File(par.out() + ".gterr");
+// boolean append = cd.window() > 1;
+// GenotypeCorrection.run(hapPairs, modGL, par.seed(), outFile, append);
+ GenotypeCorrection.run(hapPairs, modGL, par.seed());
+ return hapPairs;
+ }
+
+ /**
+ * Applies the refined IBD algorithm to the specified data.
+ * @param cd the current window of data
+ * @param targetHapPairs the estimated haplotype pairs
+ * @return the detected IBD segments
+ * @throws NullPointerException if
+ * {@code cd == null || targetHapPairs == null}
+ */
+ Map<IntPair, List<IbdSegment>> refinedIbd(CurrentData cd,
+ SampleHapPairs targetHapPairs) {
+ if (par.ibd()) {
+ long t0 = System.nanoTime();
+ int nSamples = cd.nRefSamples() + cd.nTargetSamples();
+ float scale = par.adjustedIbdScale(nSamples);
+
+ Dag dag = ibdDag(cd, targetHapPairs, scale);
+ HaploidIbd hapIbd = new HaploidIbd(par.ibdtrim(), par.ibdlod());
+ GL ibdGL = new NoPhaseGL(cd.targetGL());
+
+ Map<IntPair, List<IbdSegment>> ibdMap =
+ hapIbd.run(ibdGL, dag, targetHapPairs, par.nthreads());
+ long nanos = (System.nanoTime() - t0);
+ runStats.ibdNanos(nanos);
+ runStats.printRefinedIbdUpdate(scale, dag, nanos);
+ return ibdMap;
+ }
+ else {
+ return null;
+ }
+ }
+
+ private Dag ibdDag(CurrentData cd, SampleHapPairs targetHaps,
+ float scale) {
+ float[] weights = cd.weights().get(targetHaps);
+ float[] combWeights;
+ HapPairs dagHaps;
+ if (cd.nRefSamples() == 0) {
+ dagHaps = targetHaps;
+ combWeights = weights;
+ }
+ else {
+ List<HapPair> hapsList = new ArrayList<>(cd.nAllSamples());
+ cd.addRestrictedRefHapPairs(hapsList);
+ for (int j=0, n=targetHaps.nSamples(); j<n; ++j) {
+ hapsList.add(new WrappedHapPair(targetHaps, j));
+ }
+ dagHaps = new BasicHapPairs(hapsList);
+ int nRefHaps = 2*cd.nRefSamples();
+ combWeights = new float[dagHaps.nHaps()];
+ Arrays.fill(combWeights, 0, nRefHaps, 1.0f);
+ System.arraycopy(weights, 0, combWeights, nRefHaps, weights.length);
+ }
+ long t0 = System.nanoTime();
+ int nInitLevels = 500;
+ Dag ibdDag = MergeableDag.dag(dagHaps, combWeights, scale, nInitLevels);
+ runStats.buildNanos(System.nanoTime() - t0);
+ runStats.setDagStats(ibdDag);
+ return ibdDag;
+ }
+
+ /**
+ * Performs genotype imputation
+ * @param cd the current window of data
+ * @param shp the estimated target haplotype pairs.
+ * @return imputed haplotypes
+ * @throws NullPointerException if {@code cd == null || shp == null}
+ */
+ AlleleProbs LSImpute(CurrentData cd, SampleHapPairs shp) {
+ if (cd.nMarkers()==cd.nTargetMarkers() || par.impute() == false) {
+ return new SampleHapPairAlleleProbs(shp);
+ }
+ long t0 = System.nanoTime();
+ GeneticMap imputationMap = genMap;
+ if (par.map()==null) {
+ double scaleFactor = 1e-6;
+ imputationMap = new PositionMap(scaleFactor);
+ }
+
+ LiAndStephensHapSampler recombHapSampler =
+ new LiAndStephensHapSampler(par, imputationMap);
+
+ BasicAlleleProbs alProbs = recombHapSampler.sample(cd, shp);
+ runStats.imputationNanos(System.nanoTime() - t0);
+ runStats.printImputationUpdate();
+ return alProbs;
+ }
+}
diff --git a/main/NuclearFamilies.java b/main/NuclearFamilies.java
new file mode 100644
index 0000000..0f8e066
--- /dev/null
+++ b/main/NuclearFamilies.java
@@ -0,0 +1,374 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import beagleutil.Samples;
+import blbutil.FileIt;
+import blbutil.InputIt;
+import blbutil.StringUtil;
+import java.io.File;
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code NuclearFamilies} stores parent-offspring relationships
+ * in a list of samples. In particular, class {@code NuclearFamilies}
+ * stores a list of the single individuals in the list of samples,
+ * a list of the parent-offspring duos in the list of samples, and a list of
+ * the parent-offspring trios in the list of samples. A single individual is
+ * an individuals without a parent or offspring in the list of samples.
+ * </p>
+ * <p>Instances of class {@code NuclearFamilies} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class NuclearFamilies {
+
+ private final Samples samples;
+
+ private final int[] single;
+ private final int[] duoOffspring;
+ private final int[] trioOffspring;
+
+ private final int[] mother;
+ private final int[] father;
+
+ /**
+ * Constructs a new {@code NuclearFamilies} instance.
+ *
+ * @param samples the list of samples.
+ * @param pedFile a linkage-format pedigree file, or {@code null}
+ * if no pedigree relationships are known. A pedigree file must have
+ * at least 4 white-space delimited columns. The first column of the
+ * pedigree file (family ID) is ignored. The second, third, and fourth
+ * columns are the individual's ID, the individual's father's ID, and
+ * the individual's mother's ID respectively.
+ *
+ * @throws IllegalArgumentException if a pedigree file is specified,
+ * and if the file has a non-blank line with less than 4 white-space
+ * delimited fields
+ * @throws IllegalArgumentException if a pedigree file is specified,
+ * and if the file has duplicate individual identifiers in the
+ * second white-space delimited column
+ * @throws NullPointerException if {@code samples == null}
+ */
+ public NuclearFamilies(Samples samples, File pedFile) {
+ this.samples = samples;
+ this.father = new int[samples.nSamples()];
+ this.mother = new int[samples.nSamples()];
+ boolean[] isParent = new boolean[samples.nSamples()];
+ Arrays.fill(father, -1);
+ Arrays.fill(mother, -1);
+ if (pedFile != null) {
+ identifyParents(samples, pedFile, isParent, father, mother);
+ }
+ int[] cnts = counts(isParent, father, mother);
+ this.single = new int[cnts[0]];
+ this.duoOffspring = new int[cnts[1]];
+ this.trioOffspring = new int[cnts[2]];
+ fillArrays(samples, isParent, father, mother, single, duoOffspring,
+ trioOffspring);
+ }
+
+ private static void identifyParents(Samples samples, File pedFile,
+ boolean[] isParent, int[] father, int[] mother) {
+ String MISSING_PARENT = "0";
+ boolean[] idHasBeenProcessed = new boolean[samples.nSamples()];
+ try (FileIt<String> pedIt=InputIt.fromGzipFile(pedFile)) {
+ while (pedIt.hasNext()) {
+ String line = pedIt.next().trim();
+ if (line.length() > 0) {
+ String[] fields = getPedFields(line);
+ String offspringId = fields[1];
+ String fatherId = fields[2];
+ String motherId = fields[3];
+ int offspring = samples.index(offspringId);
+ if (offspring != -1) {
+ if (idHasBeenProcessed[offspring]) {
+ String s = "duplicate sample in pedigree file: "
+ + offspringId;
+ throw new IllegalArgumentException(s);
+ }
+ else {
+ idHasBeenProcessed[offspring] = true;
+ }
+ if (fatherId.equals(MISSING_PARENT)==false) {
+ int sampleIndex = samples.index(fatherId);
+ if (sampleIndex != -1) {
+ isParent[sampleIndex] = true;
+ father[offspring] = sampleIndex;
+ }
+ }
+ if (motherId.equals(MISSING_PARENT)==false) {
+ int sampleIndex = samples.index(motherId);
+ if (sampleIndex != -1) {
+ isParent[sampleIndex] = true;
+ mother[offspring] = sampleIndex;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ private static String[] getPedFields(String line) {
+ String[] fields = StringUtil.getFields(line, 5);
+ if (fields.length < 4) {
+ String s = "invalid line in ped file: " + line;
+ throw new IllegalArgumentException(s);
+ }
+ return fields;
+ }
+
+ private int[] counts(boolean[] isParent, int[] fathers, int[] mothers) {
+ assert isParent.length==fathers.length;
+ assert isParent.length==mothers.length;
+ int[] cnts = new int[3];
+ for (int j=0; j<isParent.length; ++j) {
+ int nParents = 0;
+ if (fathers[j] >= 0) {
+ ++nParents;
+ }
+ if (mothers[j] >= 0) {
+ ++nParents;
+ }
+ if (nParents==0) {
+ if (isParent[j]==false) {
+ ++cnts[0]; // increment single count, cnts[0]
+ }
+ }
+ else {
+ // increment duo count, cnts[1], or trio count, cnt[2]
+ ++cnts[nParents];
+ }
+ }
+ return cnts;
+ }
+
+ private static void fillArrays(Samples samples, boolean[] isParent,
+ int[] father, int[] mother, int[] single,
+ int[] duoOffspring, int[] trioOffspring) {
+ int singleIndex = 0;
+ int duoIndex = 0;
+ int trioIndex = 0;
+ for (int j=0, n=samples.nSamples(); j<n; ++j) {
+ int nParents = nParents(j, father, mother);
+ switch (nParents) {
+ case 0:
+ if (isParent[j]==false) {
+ single[singleIndex++] = j;
+ }
+ break;
+ case 1:
+ duoOffspring[duoIndex++] = j;
+ break;
+ case 2:
+ trioOffspring[trioIndex++] = j;
+ break;
+ default:
+ assert false;
+ }
+ }
+ assert singleIndex==single.length;
+ assert duoIndex==duoOffspring.length;
+ assert trioIndex==trioOffspring.length;
+ }
+
+ private static int nParents(int index, int[] father, int[] mother) {
+ int cnt = 0;
+ if (father[index]>=0) {
+ ++cnt;
+ }
+ if (mother[index]>=0) {
+ ++cnt;
+ }
+ return cnt;
+ }
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ public Samples samples() {
+ return samples;
+ }
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ /**
+ * Returns the number of single individuals in the list of samples.
+ * A single individual has no parent or offspring in the list of samples.
+ * @return the number of single individuals in the sample
+ */
+ public int nSingles() {
+ return single.length;
+ }
+
+ /**
+ * Returns the number of parent-offspring duos in the list of samples.
+ * The offspring of a parent-offspring duo has only one parent
+ * in the sample.
+ * @return the number of parent-offspring duos in the list of samples
+ */
+ public int nDuos() {
+ return duoOffspring.length;
+ }
+
+ /**
+ * Returns the number of parent-offspring trios in the list of samples.
+ * The offspring of a parent-offspring trio has two parents
+ * in the sample.
+ * @return the number of parent-offspring trios in the list of samples
+ */
+ public int nTrios() {
+ return trioOffspring.length;
+ }
+
+ /**
+ * Returns the sample index of the specified single individual.
+ * A single individual has no first-degree relative in the list of
+ * samples.
+ * @param index the index of a single individual
+ * @return the sample index of the specified single individual
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nSingles()}
+ */
+ public int single(int index) {
+ return single[index];
+ }
+
+ /**
+ * Returns the sample index of the parent of the specified
+ * parent-offspring duo.
+ * @param index the index of a parent-offspring duo
+ * @return the sample index of the parent of the specified
+ * parent-offspring duo
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nDuos()}
+ */
+ public int duoParent(int index) {
+ int offspring = duoOffspring[index];
+ if (father[offspring]>=0) {
+ return father[offspring];
+ }
+ else {
+ assert mother[offspring]>=0;
+ return mother[offspring];
+ }
+ }
+
+ /**
+ * Returns the sample index of the offspring of the specified
+ * parent-offspring duo.
+ * @param index the index of a parent-offspring duo
+ * @return the sample index of the offspring of the specified
+ * parent-offspring duo
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nDuos()}
+ */
+ public int duoOffspring(int index) {
+ return duoOffspring[index];
+ }
+
+ /**
+ * Returns the sample index of the father of the specified
+ * parent-offspring trio.
+ * @param index the index of a parent-offspring trio
+ * @return the sample index of the father of the specified
+ * parent-offspring trio
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nTrios()}
+ */
+ public int trioFather(int index) {
+ return father[trioOffspring[index]];
+ }
+
+ /**
+ * Returns the sample index of the mother of the specified
+ * parent-offspring trio.
+ * @param index the index of a parent-offspring trio
+ * @return the sample index of the mother of the specified
+ * parent-offspring trio
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nTrios()}
+ */
+ public int trioMother(int index) {
+ return mother[trioOffspring[index]];
+ }
+
+ /**
+ * Returns the sample index of the offspring of the specified
+ * parent-offspring trio.
+ * @param index the index of a parent-offspring trio
+ * @return the sample index of the offspring of the specified
+ * parent-offspring trio
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nTrios()}
+ */
+ public int trioOffspring(int index) {
+ return trioOffspring[index];
+ }
+
+ /**
+ * Returns the sample index of the father of the specified sample,
+ * or returns {@code -1} if the father is unknown or is not present
+ * in the list of samples.
+ * @param sample a sample index
+ * @return the sample index of the father of the specified sample,
+ * or {@code -1} if the father is unknown or is not present in
+ * the list of samples
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()()}
+ */
+ public int father(int sample) {
+ return father[sample];
+ }
+
+ /**
+ * Returns the sample index of the mother of the specified sample,
+ * or returns {@code -1} if the mother is unknown or is not present
+ * in the list of samples.
+ * @param sample a sample index
+ * @return the sample index of the mother of the specified sample,
+ * or {@code -1} if the mother is unknown or is not present
+ * in the list of samples
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()()}
+ */
+ public int mother(int sample) {
+ return mother[sample];
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact details of
+ * the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ return this.getClass().toString();
+ }
+}
diff --git a/main/Par.java b/main/Par.java
new file mode 100644
index 0000000..bd72cef
--- /dev/null
+++ b/main/Par.java
@@ -0,0 +1,550 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import blbutil.Const;
+import blbutil.Validate;
+import java.io.File;
+import java.util.Map;
+
+/**
+ * <p>Class {@code Parameters} represents the parameters for a Beagle analysis.
+ * </p>
+ * <p>Instances of class {@code Parameters} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class Par {
+
+ private final String[] args;
+
+ // data input/output parameters
+ private final File gt;
+ private final File gl;
+ private final File gtgl;
+ private final File ref;
+ private final File dag;
+ private final String out;
+ private final File excludesamples;
+ private final File excludemarkers;
+ private final File ped;
+ private final File map;
+ private final String chrom;
+ private final float maxlr;
+
+ // algorithm parameters
+ private final int nthreads;
+ private final boolean lowmem;
+ private final int window;
+ private final int overlap;
+ private final boolean impute;
+ private final boolean gprobs;
+ private final int niterations;
+ private final float mapscale;
+ private final float ne;
+ private final float err;
+ private final float cluster;
+ private final long seed;
+
+ // ibd parameters
+ private final boolean ibd;
+ private final float ibdlod;
+ private final float ibdscale;
+ private final int ibdtrim;
+
+ // expert parameters
+ private final float modelscale;
+
+ // undocumented parameters
+ private final int burnin_its;
+ private final int phase_its;
+ private final int nsamples;
+ private final float ibdlength;
+ private final float ibdextend;
+
+ /**
+ * Constructs a new {@code Parameters} instance from the specified
+ * command line arguments.
+ * @param args the Beagle command line arguments
+ * @throws IllegalArgumentException if a command line argument
+ * is incorrectly specified
+ * @throws NumberFormatException if a numeric value for a parameter
+ * is incorrectly specified
+ */
+ public Par(String[] args) {
+
+ int IMAX = Integer.MAX_VALUE;
+ long LMIN = Long.MIN_VALUE;
+ long LMAX = Long.MAX_VALUE;
+ float FMIN = Float.MIN_VALUE;
+ float FMAX = Float.MAX_VALUE;
+
+ this.args = args.clone();
+ Map<String, String> argsMap = Validate.argsToMap(args, '=');
+
+ // data input/output parameters
+ gt = Validate.getFile(
+ Validate.stringArg("gt", argsMap, false, null, null));
+ gl = Validate.getFile(
+ Validate.stringArg("gl", argsMap, false, null, null));
+ gtgl = Validate.getFile(
+ Validate.stringArg("gtgl", argsMap, false, null, null));
+ ref = Validate.getFile(
+ Validate.stringArg("ref", argsMap, false, null, null));
+ dag = Validate.getFile(
+ Validate.stringArg("dag", argsMap, false, null, null));
+ out = Validate.stringArg("out", argsMap, true, null, null);
+ excludesamples = Validate.getFile(
+ Validate.stringArg("excludesamples", argsMap, false, null, null));
+ excludemarkers = Validate.getFile(
+ Validate.stringArg("excludemarkers", argsMap, false, null, null));
+ ped = Validate.getFile(
+ Validate.stringArg("ped", argsMap, false, null, null));
+ map = Validate.getFile(Validate.stringArg("map", argsMap, false, null, null));
+ chrom = Validate.stringArg("chrom", argsMap, false, null, null);
+ maxlr = Validate.floatArg("maxlr", argsMap, false, 5000.0f, 1.1f, FMAX);
+
+ // algorithm parameters
+ nthreads = modNthreads(Validate.intArg("nthreads", argsMap, false, IMAX, 0, IMAX));
+ lowmem = Validate.booleanArg("lowmem", argsMap, false, true);
+ window = Validate.intArg("window", argsMap, false, 50000, 1, IMAX);
+ overlap = Validate.intArg("overlap", argsMap, false, 3000, 0, IMAX);
+ niterations = Validate.intArg("niterations", argsMap, false, 5, 0, IMAX);
+ impute = Validate.booleanArg("impute", argsMap, false, true);
+ gprobs = Validate.booleanArg("gprobs", argsMap, false, false);
+ ne = Validate.floatArg("ne", argsMap, false, 1_000_000f, FMIN, FMAX);
+ err = Validate.floatArg("err", argsMap, false, 0.0001f, 0.0f, FMAX);
+ cluster = Validate.floatArg("cluster", argsMap, false, 0.005f, 0.0f, FMAX);
+ seed = Validate.longArg("seed", argsMap, false, -99999, LMIN, LMAX);
+
+ // ibd parameters
+ ibd = Validate.booleanArg("ibd", argsMap, false, false);
+ ibdlod = Validate.floatArg("ibdlod", argsMap, false, 3.0f, FMIN, FMAX);
+ ibdscale = Validate.floatArg("ibdscale", argsMap, false, 0.0f, 0.0f, FMAX);
+ ibdtrim = Validate.intArg("ibdtrim", argsMap, false, 40, 0, IMAX);
+
+ // expert parameters
+ modelscale = Validate.floatArg("modelscale", argsMap, false, 0.8f, FMIN, FMAX);
+
+ // undocumented parameters
+ burnin_its = 5;
+ phase_its = 5;
+ nsamples = 4;
+ mapscale = Validate.floatArg("mapscale", argsMap, false, 1.0f, FMIN, FMAX);
+ ibdlength = Validate.floatArg("ibdlength", argsMap, false, 0.07f, FMIN, FMAX);
+ ibdextend = Validate.floatArg("ibdextend", argsMap, false, 0.13f, 0.0f, FMAX);
+ Validate.confirmEmptyMap(argsMap);
+ }
+
+ /**
+ * Returns the Beagle command line arguments.
+ * @return the Beagle command line arguments
+ */
+ public String[] args() {
+ return args.clone();
+ }
+
+ /**
+ * Returns a description of the possible Beagle command line arguments.
+ * @return a description of the possible Beagle command line arguments
+ */
+ public static String usage() {
+ String nl = Const.nl;
+ return "Command line syntax: " + Main.command + " [arguments]" + nl
+ + nl
+ + "data input/output parameters ..." + nl
+ + " gt=<VCF file: use GT field> (optional)" + nl
+ + " gl=<VCF file: use GL/PL field> (optional)" + nl
+ + " gtgl=<VCF file: use GT (preferred) or GL/PL field> (optional)" + nl
+ + " ref=<VCF file with phased genotypes> (optional)" + nl
+ + " out=<output file prefix> (required)" + nl
+ + " excludesamples=<file with 1 sample ID per line> (optional)" + nl
+ + " excludemarkers=<file with 1 marker ID per line> (optional)" + nl
+// + " ped=<linkage format pedigree file> (optional)" + nl
+ + " map=<PLINK map file with cM units> (optional)" + nl
+ + " chrom=<[chrom] or [chrom]:[start]-[end]> (optional)" + nl
+ + " maxlr=<max GL/PL likelihood ratio> (default=5000)" + nl + nl
+
+ + "general parameters ..." + nl
+ + " nthreads=<number of threads> (default: machine-dependent)" + nl
+ + " lowmem=<use low-memory algorithm (true/false)> (default=false)" + nl
+ + " window=<markers per window> (default=50000)" + nl
+ + " overlap=<overlap between windows> (default=3000)" + nl
+ + " seed=<random seed> (default=-99999)" + nl + nl
+
+ + "phasing and imputation parameters ..." + nl
+ + " niterations=<number of phasing iterations> (default=5)" + nl
+ + " impute=<impute ungenotyped markers (true/false)> (default=true)" + nl
+ + " gprobs=<print GP field for imputed markers> (default=false)" + nl
+ + " ne=<effective population size> (default=1000000)" + nl
+ + " err=<allele miscall rate> (default=0.0001)" + nl
+ + " cluster=<max cM in a marker cluster> (default=0.005)" + nl + nl
+
+ + "IBD parameters ..." + nl
+ + " ibd=<perform IBD detection (true/false)> (default=false)" + nl
+ + " ibdlod=<min LOD score for reporting IBD> (default=3.0)" + nl
+ + " ibdscale=<model scale factor for Refined IBD> (default: data-dependent)" + nl
+ + " ibdtrim=<markers at each segment end> (default=40)" + nl;
+ }
+
+ /**
+ * Returns a sample-size-adjusted IBD scale parameter equal to
+ * {@code Math.max(2.0f, (float) Math.sqrt(nSamples/100.0))} if
+ * {@code this.ibdscale() == 0f}, and returns
+ * {@code this.ibdscale()} otherwise.
+ *
+ * @param nSamples the number of samples
+ * @return a sample-size-adjusted IBD scale parameter if
+ * {@code this.ibdscale() == 0f}, and {@code this.ibdscale()} otherwise
+ * @throws IllegalArgumentException if {@code nSamples < 0}
+ */
+ public float adjustedIbdScale(int nSamples) {
+ if (nSamples <= 0) {
+ throw new IllegalArgumentException(String.valueOf(nSamples));
+ }
+ if (ibdscale==0) {
+ return Math.max(2.0f, (float) Math.sqrt(nSamples/100.0));
+ }
+ else {
+ return ibdscale;
+ }
+ }
+
+ /**
+ * Returns the nthreads parameter, which is equal to
+ * {@code Runtime.getRuntime().availableProcessors()} if
+ * {@code nthreads == Integer.MAX_VALUE}.
+ * @return the nthreads parameter
+ */
+ private static int modNthreads(int nthreads) {
+ if (nthreads==Integer.MAX_VALUE) {
+ return Runtime.getRuntime().availableProcessors();
+ }
+ else {
+ return nthreads;
+ }
+ }
+
+ // data input/output parameters
+
+ /**
+ * Returns the gt parameter or {@code null} if no gt parameter was
+ * specified.
+ * @return the gt parameter or {@code null} if no gt parameter was
+ * specified
+ */
+ public File gt() {
+ return gt;
+ }
+
+ /**
+ * Returns the gl parameter or {@code null} if no gl parameter was
+ * specified.
+ * @return the gl parameter or {@code null} if no gl parameter was
+ * specified
+ */
+ public File gl() {
+ return gl;
+ }
+
+ /**
+ * Returns the gtgl parameter or {@code null} if no gtgl parameter was
+ * specified.
+ * @return the gtgl parameter or {@code null} if no gtgl parameter was
+ * specified.
+ */
+ public File gtgl() {
+ return gtgl;
+ }
+
+ /**
+ * Returns the ref parameter or {@code null} if no ref parameter was
+ * specified.
+ * @return the ref parameter or {@code null} if no ref parameter was
+ * specified
+ */
+ public File ref() {
+ return ref;
+ }
+
+ /**
+ * Returns the dag parameter or {@code null} if no ref parameter was
+ * specified.
+ * @return the dag parameter or {@code null} if no ref parameter was
+ * specified
+ */
+ public File dag() {
+ return dag;
+ }
+
+ /**
+ * Returns the out parameter.
+ * @return the out parameter
+ */
+ public String out() {
+ return out;
+ }
+
+ /**
+ * Returns the excludesamples parameter or {@code null}
+ * if no excludesamples parameter was specified.
+ *
+ * @return the excludesamples parameter or {@code null}
+ * if no excludesamples parameter was specified
+ */
+ public File excludesamples() {
+ return excludesamples;
+ }
+
+ /**
+ * Returns the excludemarkers parameter or {@code null}
+ * if no excludemarkers parameter was specified.
+ *
+ * @return the excludemarkers parameter or {@code null}
+ * if no excludemarkers parameter was specified
+ */
+ public File excludemarkers() {
+ return excludemarkers;
+ }
+
+ /**
+ * Returns the ped parameter or {@code null}
+ * if no ped parameter was specified.
+ *
+ * @return the ped parameter or {@code null}
+ * if no ped parameter was specified
+ */
+ public File ped() {
+ return ped;
+ }
+
+ /**
+ * Returns the map parameter.
+ * @return the map parameter
+ */
+ public File map() {
+ return map;
+ }
+
+ /**
+ * Returns the chrom parameter or {@code null}
+ * if no chrom parameter was specified.
+ *
+ * @return the chrom parameter or {@code null}
+ * if no chrom parameter was specified
+ */
+ public String chrom() {
+ return chrom;
+ }
+
+ /**
+ * Returns the maxlr parameter.
+ * @return the maxlr parameter
+ */
+ public float maxlr() {
+ return maxlr;
+ }
+
+ // general parameters
+
+ /**
+ * Returns the nthreads parameter.
+ * @return the nthreads parameter
+ */
+ public int nthreads() {
+ return nthreads;
+ }
+
+ /**
+ * Returns the lowmem parameter.
+ * @return the lowmem parameter
+ */
+ public boolean lowmem() {
+ return lowmem;
+ }
+
+ /**
+ * Returns the window parameter.
+ * @return the window parameter
+ */
+ public int window() {
+ return window;
+ }
+
+ /**
+ * Return the overlap parameter.
+ * @return the overlap parameter.
+ */
+ public int overlap() {
+ return overlap;
+ }
+
+ /**
+ * Returns the seed parameter.
+ * @return the seed parameter
+ */
+ public long seed() {
+ return seed;
+ }
+
+ // phasing and imputation parameters
+
+ /**
+ * Returns the niterations parameter.
+ * @return the niterations parameter
+ */
+ public int niterations() {
+ return niterations;
+ }
+
+ /**
+ * Returns the impute parameter.
+ * @return the impute parameter
+ */
+ public boolean impute() {
+ return impute;
+ }
+
+ /**
+ * Returns the gprobs parameter.
+ * @return the gprobs parameter
+ */
+ public boolean gprobs() {
+ return gprobs;
+ }
+
+ /**
+ * Returns the ne parameter
+ * @return the ne parameter
+ */
+ public float ne() {
+ return ne;
+ }
+
+ /**
+ * Returns the err parameter.
+ * @return the err parameter
+ */
+ public float err() {
+ return err;
+ }
+
+ /**
+ * Returns the cluster parameter.
+ * @return the cluster parameter
+ */
+ public float cluster() {
+ return cluster;
+ }
+
+ // ibd parameters
+
+ /**
+ * Returns the ibd parameter.
+ * @return the ibd parameter
+ */
+ public boolean ibd() {
+ return ibd;
+ }
+
+ /**
+ * Returns the ibdlod parameter.
+ * @return the ibdlod parameter
+ */
+ public float ibdlod() {
+ return ibdlod;
+ }
+
+ /**
+ * Returns the ibdscale parameter.
+ * @return the ibdscale parameter
+ */
+ public float ibdscale() {
+ return ibdscale;
+ }
+
+ /**
+ * Returns the ibdtrim parameter.
+ * @return the ibdtrim parameter
+ */
+ public int ibdtrim() {
+ return ibdtrim;
+ }
+
+ // expert parameters
+
+ /**
+ * Returns the modelscale parameter.
+ * @return the modelscale parameter
+ */
+ public float modelscale() {
+ return modelscale;
+ }
+
+ // undocumented parameters
+
+ /**
+ * Returns the burnin-its parameter.
+ * @return the burnin-its parameter
+ */
+ public int burnin_its() {
+ return burnin_its;
+ }
+
+ /**
+ * Returns the phase-its parameter.
+ * @return the phase-its parameter
+ */
+ public int phase_its() {
+ return phase_its;
+ }
+
+ /**
+ * Return the nsamples parameter.
+ * @return the nsamples parameter
+ */
+ public int nsamples() {
+ return nsamples;
+ }
+
+ /**
+ * Returns the mapscale parameter.
+ * @return the mapscale parameter
+ */
+ public float mapscale() {
+ return mapscale;
+ }
+
+ /**
+ * Returns the ibdlength parameter.
+ * @return the ibdlength parameter
+ */
+ public float ibdlength() {
+ return ibdlength;
+ }
+
+ /**
+ * Returns the ibdextend parameter.
+ * @return the ibdextend parameter
+ */
+ public float ibdextend() {
+ return ibdextend;
+ }
+}
diff --git a/main/PlinkGeneticMap.java b/main/PlinkGeneticMap.java
new file mode 100644
index 0000000..e3a819a
--- /dev/null
+++ b/main/PlinkGeneticMap.java
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import vcf.Marker;
+import beagleutil.ChromIds;
+import blbutil.FileIt;
+import blbutil.Filter;
+import blbutil.InputIt;
+import blbutil.StringUtil;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * <p>Class {@code PlinkGeneticMap} represents a genetic map derived
+ * from a PLINK map file with map positions in cM units for one or more
+ * chromosomes.
+ * </p>
+ * <p>Instances of class {@code PlinkGeneticMap} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class PlinkGeneticMap implements GeneticMap {
+
+ private final int[][] basePos;
+ private final double[][] genPos;
+
+ private PlinkGeneticMap(List<List<String>> chromList) {
+ this.basePos = new int[chromList.size()][];
+ this.genPos = new double[chromList.size()][];
+ for (int j=0, n=chromList.size(); j<n; ++j) {
+ List<String> list = chromList.get(j);
+ basePos[j] = new int[list.size()];
+ genPos[j] = new double[list.size()];
+ fillMapPositions(list, basePos[j], genPos[j]);
+ }
+ }
+
+ private static void fillMapPositions(List<String> list, int[] basePos,
+ double[] genPos) {
+ int n = list.size();
+ assert basePos.length==n && genPos.length==n;
+ for (int j=0; j<n; ++j) {
+ String[] fields = StringUtil.getFields(list.get(j));
+ if (fields.length!=4) {
+ String s = "Map file format error: " + list.get(j);
+ throw new IllegalArgumentException(s);
+ }
+ basePos[j] = Integer.parseInt(fields[3]);
+ genPos[j] = Double.parseDouble(fields[2]);
+ if (Double.isFinite(genPos[j])==false) {
+ String s = "invalid map position: " + genPos[j];
+ throw new IllegalArgumentException(s);
+ }
+ if (j>0) {
+ if (basePos[j]==basePos[j-1]) {
+ String s = "duplication position: " + list.get(j);
+ throw new IllegalArgumentException(s);
+ }
+ if (basePos[j]<basePos[j-1] || genPos[j]<genPos[j-1]) {
+ String s = "map positions not in ascending order: "
+ + list.get(j);
+ throw new IllegalArgumentException(s);
+ }
+ }
+ }
+ if (n>0 && genPos[0]==genPos[n-1]) {
+ String s = "Genetic map has only one map position: " + list.get(0);
+ throw new IllegalArgumentException(s);
+ }
+ }
+
+ /**
+ * Constructs and returns a new {@code PlinkGeneticMap} instance from
+ * the data in the specified file.
+ *
+ * @param mapFile a genetic map file in PLINK format with genetic map
+ * positions in cM units
+ * @return a new {@code PlinkGeneticMap} instance
+ *
+ * @throws IllegalArgumentException if any map position is infinite
+ * or {@code NaN}
+ * @throws NullPointerException if {@code mapFile == null}
+ * @throws NumberFormatException if the base position on any line of the map
+ * file is not a parsable integer
+ * @throws NumberFormatException if the genetic map position on any
+ * line of the map file is not a parsable double
+ * @throws IllegalArgumentException if a non-empty line of the specified
+ * genetic map file does not contain 4 fields
+ * @throws IllegalArgumentException if the map positions on each
+ * chromosome are not sorted in ascending order
+ * @throws IllegalArgumentException if there are duplicate
+ * base positions on a chromosome
+ * @throws IllegalArgumentException if all base positions on a chromosome
+ * have the same genetic map position
+ */
+ public static PlinkGeneticMap fromPlinkMapFile(File mapFile) {
+ Filter<String> chromFilter = Filter.acceptAllFilter();
+ return new PlinkGeneticMap(divideByChrom(mapFile, chromFilter));
+ }
+
+ /**
+ * Constructs and returns a new {@code PlinkGeneticMap} instance from
+ * the data in the specified file. The returned genetic map will contain
+ * only positions on the specified chromosome
+ *
+ * @param mapFile a genetic map file in PLINK format with genetic map
+ * positions in cM units
+ * @param chrom a chromosome
+ * @return a new {@code PlinkGeneticMap} instance
+ *
+ * @throws IllegalArgumentException if any map position is infinite or
+ * {@code NaN}.
+ * @throws NullPointerException if {@code mapFile == null || chrom == null}
+ * @throws NumberFormatException if the base position on a line of the map
+ * file that corresponds to the specified chromosome is not a parsable
+ * integer
+ * @throws NumberFormatException if the genetic map position on a line
+ * of the map file that corresponds to the specified chromosome is not
+ * a parsable double
+ * @throws IllegalArgumentException if a non-empty line of the specified
+ * genetic map file does not contain 4 fields
+ * @throws IllegalArgumentException if the map positions on the specified
+ * chromosome are not sorted in ascending order
+ * @throws IllegalArgumentException if there are duplicate base positions
+ * on the specified chromosome
+ * @throws IllegalArgumentException if all base positions on the
+ * specified chromosome have the same genetic map position
+ * @throws IllegalArgumentException if the specified chromosome does not
+ * have at least two distinct positions in the genetic map
+ */
+ public static PlinkGeneticMap fromPlinkMapFile(File mapFile, String chrom) {
+ chrom = chrom.trim();
+ Filter<String> chromFilter = singletonFilter(chrom);
+ return new PlinkGeneticMap(divideByChrom(mapFile, chromFilter));
+ }
+
+ /**
+ * Returns a filter that accepts only objects that are equal
+ * to the specified object.
+ * @param <E> the type of object that is filtered
+ * @param singleton the object that will be accepted
+ * @return a filter that accepts only objects that are equal
+ * to the specified object
+ * @throws NullPointerException if {@code singleton == null}
+ */
+ private static <E> Filter<E> singletonFilter(final E singleton) {
+ if (singleton==null) {
+ throw new NullPointerException("singleton==null");
+ }
+ return (E e) -> {
+ if (e==null) {
+ throw new NullPointerException("e==null");
+ }
+ return singleton.equals(e);
+ };
+ }
+
+ private static List<List<String>> divideByChrom(File mapFile,
+ Filter<String> chromFilter) {
+ int initialMapSize = 200;
+ List<List<String>> chromList = new ArrayList<>(25);
+ try (FileIt<String> it = InputIt.fromTextFile(mapFile)) {
+ while (it.hasNext()) {
+ String line = it.next();
+ String[] fields = StringUtil.getFields(line, 4);
+ if (fields.length > 0) {
+ if (fields.length < 4) {
+ String s = "Map file format error: " + line;
+ throw new IllegalArgumentException(s);
+ } else {
+ String chrom = fields[0];
+ if (chromFilter.accept(chrom)) {
+ int chromIndex = ChromIds.instance().getIndex(fields[0]);
+ while (chromIndex >= chromList.size()) {
+ chromList.add(new ArrayList<>(initialMapSize));
+ }
+ chromList.get(chromIndex).add(line);
+ }
+ }
+ }
+ }
+ }
+ return chromList;
+ }
+
+ private void checkChromIndex(int chrom) {
+ if (chrom < 0 || chrom >= ChromIds.instance().size()) {
+ throw new IndexOutOfBoundsException(String.valueOf(chrom));
+ }
+ if (chrom>=basePos.length || basePos[chrom].length == 0) {
+ String s = "missing genetic map for chromosome "
+ + ChromIds.instance().id(chrom);
+ throw new IllegalArgumentException(s);
+ }
+ }
+
+ /**
+ * Returns the number of mapped loci in this genetic map.
+ *
+ * @param chrom a chromosome index
+ * @return the number of mapped loci in this genetic map
+ * @throws IllegalArgumentException if this genetic map has no
+ * map positions for the specified chromosome
+ * @throws IndexOutOfBoundsException if
+ * {@code chrom < 0 || chrom >= ChromIds.instance().size()}
+ */
+ public int nMapPositions(int chrom) {
+ checkChromIndex(chrom);
+ return basePos[chrom].length;
+ }
+
+ /**
+ * Returns the specified base position
+ *
+ * @param chrom a chromosome index
+ * @param index a map position index
+ * @return the specified base position
+ *
+ * @throws IllegalArgumentException if this genetic map has no
+ * map positions for the specified chromosome
+ * @throws IndexOutOfBoundsException if
+ * {@code chrom < 0 || chrom >= ChromIds.instance().size()}
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nMapPositions(chrom)}
+ */
+ public int index2BasePos(int chrom, int index) {
+ checkChromIndex(chrom);
+ return basePos[chrom][index];
+ }
+
+ /**
+ * Returns the specified genetic map position
+ *
+ * @param chrom a chromosome index
+ * @param index a map position index
+ * @return the specified genetic map position
+ *
+ * @throws IllegalArgumentException if this genetic map has no
+ * map positions for the specified chromosome
+ * @throws IndexOutOfBoundsException if
+ * {@code chrom < 0 || chrom >= ChromIds.instance().size()}
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nMapPositions(chrom)}
+ */
+ public double index2GenPos(int chrom, int index) {
+ checkChromIndex(chrom);
+ return genPos[chrom][index];
+ }
+
+ /**
+ * Returns the index of the genetic map position that is closest to the
+ * specified base position.
+ *
+ * @param chrom a chromosome index
+ * @param basePosition a base position
+ * @return the genetic map position index that is closes to the
+ * specified base position.
+ *
+ * @throws IllegalArgumentException if this genetic map has no
+ * map positions for the specified chromosome
+ * @throws IndexOutOfBoundsException if
+ * {@code chrom < 0 || chrom >= ChromIds.instance().size()}
+ */
+ public int closestIndex(int chrom, int basePosition) {
+ checkChromIndex(chrom);
+ assert basePos.length>=2;
+ int mapIndex = Arrays.binarySearch(basePos[chrom], basePosition);
+ if (mapIndex >= 0) {
+ return mapIndex;
+ } else {
+ int insPt = -mapIndex-1;
+ if (insPt==0) {
+ return 0;
+ } else if (insPt==basePos.length) {
+ return basePos.length-1;
+ } else {
+ int distInsPt = basePos[chrom][insPt] - basePosition;
+ int distInsPtM1 = basePosition - basePos[chrom][insPt-1];
+ return (distInsPt<=distInsPtM1) ? insPt : (insPt-1);
+ }
+ }
+ }
+
+ @Override
+ public double genPos(Marker marker) {
+ return genPos(marker.chromIndex(), marker.pos());
+ }
+
+ @Override
+ public double genPos(int chrom, int basePosition) {
+ checkChromIndex(chrom);
+ assert basePos[chrom].length>=2;
+ assert basePos[chrom].length==genPos[chrom].length;
+ int index = Arrays.binarySearch(basePos[chrom], basePosition);
+ if (index>=0) {
+ return genPos[chrom][index];
+ } else {
+ int insPt = -index-1;
+ if (insPt==basePos[chrom].length) {
+ --insPt;
+ }
+ else if (insPt==0) {
+ ++insPt;
+ }
+ int x = basePosition;
+ int a = basePos[chrom][insPt-1];
+ int b = basePos[chrom][insPt];
+ double fa = genPos[chrom][insPt-1];
+ double fb = genPos[chrom][insPt];
+ return fa + ( ( (double) (x-a)/ (double) (b-a)) * (fb-fa) );
+ }
+ }
+
+ @Override
+ public int basePos(int chrom, double geneticPosition) {
+ checkChromIndex(chrom);
+ assert basePos[chrom].length>=2;
+ assert basePos[chrom].length==genPos[chrom].length;
+ int index = Arrays.binarySearch(genPos[chrom], geneticPosition);
+ if (index>=0) {
+ return basePos[chrom][index];
+ } else {
+ int insPt = -index-1;
+ if (insPt==genPos[chrom].length) {
+ --insPt;
+ while (genPos[chrom][insPt]==genPos[chrom][insPt-1]) {
+ --insPt;
+ }
+ }
+ else if (insPt==0) {
+ ++insPt;
+ while (genPos[chrom][insPt]==genPos[chrom][insPt-1]) {
+ ++insPt;
+ }
+ }
+ double x = geneticPosition;
+ double a = genPos[chrom][insPt-1];
+ double b = genPos[chrom][insPt];
+ int fa = basePos[chrom][insPt-1];
+ int fb = basePos[chrom][insPt];
+ return fa + (int) Math.round( ((x-a)/(b-a)) * (fb-fa) );
+ }
+ }
+
+ @Override
+ public String toString() {
+ return this.getClass().toString();
+ }
+}
diff --git a/main/PositionMap.java b/main/PositionMap.java
new file mode 100644
index 0000000..a95a94c
--- /dev/null
+++ b/main/PositionMap.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import beagleutil.ChromIds;
+import vcf.Marker;
+
+/**
+ * <p>Class {@code PositionMap} represents a genetic map obtained by
+ * multiplying chromosome position by a scale factor.
+ * </p>
+ * <p>Instances of class {@code PositionMap} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class PositionMap implements GeneticMap {
+
+ private final double scaleFactor;
+
+ /**
+ * Returns the scale factor that is multiplied by the chromosome position
+ * to obtain the corresponding genetic map position
+ * @return the scale factor.
+ */
+ public double scaleFactor() {
+ return scaleFactor;
+ }
+
+ /**
+ * Constructs a new {@code PositionMap} instance.
+ * @param scaleFactor the factor that is multiplied by
+ * a base position to obtain the corresponding genetic map position
+ * @throws IllegalArgumentException if
+ * {@code scaleFactor <= 0d || Double.isFinite(scaleFactor) == false}
+ */
+ public PositionMap(double scaleFactor) {
+ if (Double.isFinite(scaleFactor) == false || scaleFactor <= 0d) {
+ throw new IllegalArgumentException(String.valueOf(scaleFactor));
+ }
+ this.scaleFactor = scaleFactor;
+ }
+
+ @Override
+ public int basePos(int chrom, double geneticPosition) {
+ if (chrom < 0 || chrom >= ChromIds.instance().size()) {
+ throw new IndexOutOfBoundsException(String.valueOf(chrom));
+ }
+ long pos = Math.round(geneticPosition / scaleFactor);
+ if (pos > Integer.MAX_VALUE) {
+ throw new IllegalArgumentException(String.valueOf(pos));
+ }
+ return (int) pos;
+ }
+
+ @Override
+ public double genPos(Marker marker) {
+ return scaleFactor*marker.pos();
+ }
+
+ @Override
+ public double genPos(int chrom, int basePosition) {
+ if (chrom < 0 || chrom >= ChromIds.instance().size()) {
+ throw new IndexOutOfBoundsException(String.valueOf(chrom));
+ }
+ return scaleFactor*basePosition;
+ }
+}
diff --git a/main/RecombHapPairSampler.java b/main/RecombHapPairSampler.java
new file mode 100644
index 0000000..dcdc06f
--- /dev/null
+++ b/main/RecombHapPairSampler.java
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import blbutil.Utilities;
+import haplotype.HapPair;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import sample.ConsumeSingleSamples;
+import sample.RecombSingleBaum;
+import sample.SamplerData;
+import sample.SingleBaumInterface;
+
+/**
+ * <p>Class {@code RecombHapPairSamples} samples haplotype pairs and
+ * estimates posterior genotype probabilities using a haplotype frequency
+ * model that permits transitions between any two states at adjacent markers.
+ * </p>
+ * <p>Instances of class {@code RecombHapPairSampler} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RecombHapPairSampler {
+
+ private static final int nCopies = 4;
+
+ private final Par par;
+ private final RunStats runStats;
+ private double edgePairsPerMarker;
+
+ /**
+ * Constructs a new {@code RecombHapPairSampler} instance from the
+ * specified data.
+ * @param par the analysis parameters
+ * @param runStats the object to which run-time statistics will be written
+ * @throws NullPointerException if
+ * {@code par == null || runStats == null}
+ */
+ public RecombHapPairSampler(Par par, RunStats runStats) {
+ if (par==null) {
+ throw new NullPointerException("par");
+ }
+ if (runStats==null) {
+ throw new NullPointerException("runStats");
+ }
+ this.par = par;
+ this.runStats = runStats;
+ this.edgePairsPerMarker = 0;
+ }
+
+ /**
+ * Returns a list of sampled haplotype pairs. Haplotype pairs are
+ * sampled conditional on the observed genotype and a haplotype
+ * frequency model constructed from the specified {@code hapPairs}.
+ * The contract for this method is undefined if the specified
+ * {@code hapPairs} and {@code gv} are inconsistent with the input data
+ * contained in the {@code cd} parameter.
+ *
+ * @param cd the input data for the current marker window
+ * @param hapPairs the target haplotype pairs used to build the haplotype
+ * frequency model
+ * @param useRevDag {@code true} if the order of markers should
+ * be reversed when building the haplotype frequency model, and
+ * {@code false} otherwise
+ * @param gv the current scaled genotype probabilities for the target
+ * samples or {@code null} if genotype probabilities are not to be estimated
+ * @return the sampled haplotype pairs
+ *
+ * @throws IllegalArgumentException if {@code haps.isEmpty() == true}
+ * @throws NullPointerException if {@code cd == null || hapPairs == null}
+ */
+ public List<HapPair> sample(CurrentData cd, List<HapPair> hapPairs,
+ boolean useRevDag, GenotypeValues gv) {
+ SamplerData samplerData = new SamplerData(par, cd, hapPairs, useRevDag,
+ runStats);
+ int nSampledHaps = nCopies*cd.nTargetSamples();
+ List<HapPair> sampledHaps = synchronizedEmptyList(nSampledHaps);
+ if (gv!=null) {
+ if (useRevDag) {
+ gv = new RevGenotypeValues(gv);
+ }
+ sample(samplerData, sampledHaps, gv);
+ }
+ else {
+ sample(samplerData, sampledHaps);
+ }
+ return new ArrayList<>(sampledHaps);
+ }
+
+ private static List<HapPair> synchronizedEmptyList(int capacity) {
+ List<HapPair> sampledHaps = new ArrayList<>(capacity);
+ sampledHaps = Collections.synchronizedList(sampledHaps);
+ return sampledHaps;
+ }
+
+ @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch"})
+ private void sample(SamplerData samplerData, List<HapPair> sampledHaps,
+ GenotypeValues gv) {
+ long t0 = System.nanoTime();
+ int nThreads = samplerData.par().nthreads();
+ boolean markersAreReversed = samplerData.markersAreReversed();
+ Random rand = new Random(par.seed());
+ final BlockingQueue<Integer> qIn = new ArrayBlockingQueue<>(3*nThreads);
+ ExecutorService es = Executors.newFixedThreadPool(nThreads);
+ for (int j=0; j<nThreads; ++j) {
+ SingleBaumInterface sb = new RecombSingleBaum(samplerData,
+ rand.nextLong(), nCopies, par.lowmem());
+ es.submit(new ConsumeSingleSamples(markersAreReversed, sb, qIn,
+ sampledHaps, gv));
+ }
+ try {
+ for (int j=0, n=samplerData.nSamples(); j<n; ++j) {
+ qIn.put(j);
+ }
+ for (int j=0; j<nThreads; ++j) {
+ qIn.put(ConsumeSingleSamples.POISON);
+ }
+ es.shutdown();
+ es.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
+ }
+ catch (Throwable e) {
+ Utilities.exit("ERROR", e);
+ }
+ runStats.sampleNanos(System.nanoTime() - t0);
+ }
+
+ @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch"})
+ private void sample(SamplerData samplerData, List<HapPair> sampledHaps) {
+ long t0 = System.nanoTime();
+ int nThreads = samplerData.par().nthreads();
+ boolean markersAreReversed = samplerData.markersAreReversed();
+ Random rand = new Random(par.seed());
+ final BlockingQueue<Integer> qIn = new ArrayBlockingQueue<>(3*nThreads);
+ ExecutorService es = Executors.newFixedThreadPool(nThreads);
+ for (int j=0; j<nThreads; ++j) {
+ SingleBaumInterface sb = new RecombSingleBaum(samplerData,
+ rand.nextLong(), nCopies, par.lowmem());
+ es.submit(new ConsumeSingleSamples(markersAreReversed, sb, qIn,
+ sampledHaps));
+ }
+ try {
+ for (int j=0, n=samplerData.nSamples(); j<n; ++j) {
+ qIn.put(j);
+ }
+ for (int j=0; j<nThreads; ++j) {
+ qIn.put(ConsumeSingleSamples.POISON);
+ }
+ es.shutdown();
+ es.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
+ }
+ catch (Throwable e) {
+ Utilities.exit("ERROR", e);
+ }
+ runStats.sampleNanos(System.nanoTime() - t0);
+ }
+}
diff --git a/main/RevGenotypeValues.java b/main/RevGenotypeValues.java
new file mode 100644
index 0000000..a6ef41c
--- /dev/null
+++ b/main/RevGenotypeValues.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import vcf.Markers;
+import vcf.Marker;
+import beagleutil.Samples;
+
+/**
+ * <p>Class {@code RevGenotypeValues} is a wrapper for a {@code GenotypeValues}
+ * instance. The wrapper reverses the order of markers in the wrapped object.
+ * </p>
+ * Instances of class {@code RevGenotypeValues} are thread-safe.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RevGenotypeValues implements GenotypeValues {
+
+ /*
+ * All instances of the {@code GenotypeValues} interface are required to be
+ * thread-safe.
+ */
+ private final GenotypeValues gv;
+
+ /**
+ * Constructs a new {@code RevGenotypeValues} instance from the specified
+ * data.
+ * @param gv genotype values that will be wrapped by the new instance
+ * @throws NullPointerException if {@code gv == null}
+ */
+ public RevGenotypeValues(GenotypeValues gv) {
+ this.gv = gv;
+ }
+
+ @Override
+ public float value(int marker, int sample, int genotype) {
+ int revMarker = gv.nMarkers() - 1 - marker;
+ return gv.value(revMarker, sample, genotype);
+ }
+
+ @Override
+ public void add(int sample, double[] values) {
+ if (values.length != gv.markers().sumGenotypes()) {
+ throw new IllegalArgumentException("values.length=" + values.length);
+ }
+ int index = 0;
+ for (int m=0, n=gv.nMarkers(); m<n; ++m) {
+ int revMarker = gv.nMarkers() - 1 - m;
+ int nGt = gv.marker(revMarker).nGenotypes();
+ for (int gt=0; gt<nGt; ++gt) {
+ gv.add(revMarker, sample, gt, values[index++]);
+ }
+ }
+ }
+
+ @Override
+ public void add(int marker, int sample, int genotype, double value) {
+ int revMarker = gv.nMarkers() - 1 - marker;
+ gv.add(revMarker, sample, genotype, value);
+ }
+
+ @Override
+ public Samples samples() {
+ return gv.samples();
+ }
+
+ @Override
+ public int nSamples() {
+ return gv.nSamples();
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return gv.markers().reverse().marker(marker);
+ }
+
+ @Override
+ public Markers markers() {
+ return gv.markers().reverse();
+ }
+
+ @Override
+ public int nMarkers() {
+ return gv.nMarkers();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(100);
+ sb.append('[');
+ sb.append(this.getClass().toString());
+ sb.append(']');
+ return sb.toString();
+ }
+}
diff --git a/main/RunStats.java b/main/RunStats.java
new file mode 100644
index 0000000..d6a0f6c
--- /dev/null
+++ b/main/RunStats.java
@@ -0,0 +1,370 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import vcf.Markers;
+import vcf.Marker;
+import vcf.Data;
+import blbutil.Const;
+import blbutil.FileUtil;
+import blbutil.Utilities;
+import dag.Dag;
+import dag.DagUtil;
+import java.io.File;
+import java.io.PrintWriter;
+import java.text.DecimalFormat;
+
+/**
+ * Class {@code RunStats} contains methods for storing and printing
+ * statistics describing a Beagle analysis.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RunStats {
+
+ private static final DecimalFormat df2 = new DecimalFormat("0.00");
+
+ private final Par par;
+ private final PrintWriter log;
+ private final long startNanos;
+
+ private long buildNanos = 0;
+ private long lastBuildNanos = 0;
+
+ private long sampleNanos = 0;
+ private long totalSampleNanos = 0;
+
+ private long imputeNanos = 0;
+ private long totalImputeNanos = 0;
+
+ private long totalIbdNanos = 0;
+
+ private String dagStats = null;
+
+ /**
+ * Constructs a new {@code RunStats} instance.
+ * @param par the analysis parameters
+ * @throws NullPointerException if {@code par == null}
+ */
+ RunStats(Par par) {
+ this.startNanos = System.nanoTime();
+ this.par = par;
+ this.log = log(par.out());
+ }
+
+ private static PrintWriter log(String outPrefix) {
+ File logFile = new File(outPrefix + ".log");
+ boolean append = false;
+ return FileUtil.nonBufferedPrintWriter(logFile, append);
+ }
+
+ /**
+ * Prints initial information about the analysis to a log
+ * file and to standard output.
+ */
+ public void printStartInfo() {
+ Utilities.duoPrint(log, Main.shortHelp + Const.nl);
+ Utilities.duoPrintln(log, "Start time: " + Utilities.timeStamp());
+ Utilities.duoPrint(log, commandLine("beagle.jar", par.args()));
+ if (par.ped() != null) {
+ String s = Const.nl + "WARNING: This version will not model"
+ + " duos or trios in the pedigree file";
+ Utilities.duoPrintln(log, s);
+ }
+ if (par.map() == null) {
+ String s = Const.nl + "No genetic map is specified: using 1 cM = 1 Mb";
+ Utilities.duoPrintln(log, s);
+ }
+ if (par.gt()==null && par.ref()!=null && par.impute()==true) {
+ assert par.gl()!=null || par.gtgl()!=null;
+ String s = Const.nl + "WARNING: Imputation of ungenotyped markers will not be performed."
+ + Const.nl + " Imputation requires the \"gt=\" argument and called genotypes.";
+ Utilities.duoPrintln(log, s);
+ }
+ if (par.gt()==null && par.ibd()) {
+ assert par.gl()!=null || par.gtgl()!=null;
+ String s = Const.nl + "WARNING: IBD segment detection will not be performed."
+ + Const.nl + " IBD analysis requires the \"gt=\" argument and called genotypes.";
+ Utilities.duoPrintln(log, s);
+ }
+ }
+
+ /**
+ * Returns a string representation of the command line arguments.
+ * The exact details of the representation are unspecified and
+ * subject to change.
+ *
+ * @param jarFile the name of the program's jar file.
+ * @param args command line arguments.
+ * @return a string representation of the command line arguments.
+ */
+ private static String commandLine(String jarFile, String[] args) {
+ StringBuilder sb = new StringBuilder(args.length*20);
+ long maxMemory = Runtime.getRuntime().maxMemory();
+ sb.append(Const.nl);
+ sb.append("Command line: java");
+ if (maxMemory!=Long.MAX_VALUE) {
+ long maxMb = maxMemory / (1024*1024);
+ sb.append(" -Xmx");
+ sb.append(maxMb);
+ sb.append("m");
+ }
+ sb.append(" -jar ");
+ sb.append(jarFile);
+ sb.append(Const.nl);
+ for (int j = 0; j < args.length; ++j) {
+ sb.append(" ");
+ sb.append(args[j]);
+ sb.append(Const.nl);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Prints information about the complete analysis to a log
+ * file and to standard output, and closes the log file.
+ * @param nTargetMarkers the total number of target markers analyzed
+ * @param nMarkers the total number of markers analyzed
+ */
+ public void printSummaryAndClose(int nTargetMarkers, int nMarkers) {
+ long totalTime = System.nanoTime() - startNanos;
+ if (nTargetMarkers == nMarkers) {
+ Utilities.duoPrint(log, Const.nl);
+ Utilities.duoPrint(log, "Number of markers: ");
+ Utilities.duoPrintln(log, String.format("%7d", nMarkers));
+ }
+ else {
+ Utilities.duoPrint(log, Const.nl);
+ Utilities.duoPrint(log, "Number of reference markers: ");
+ duoPrintln7d(nMarkers);
+ Utilities.duoPrint(log, "Number of target markers: ");
+ duoPrintln7d(nTargetMarkers);
+ }
+ if (buildNanos > 0) {
+ duoPrintNanos("Total time for building model: ", buildNanos);
+ }
+ if (totalSampleNanos > 1000) {
+ duoPrintNanos("Total time for sampling: ", totalSampleNanos);
+ }
+ if (par.ibd()==true) {
+ duoPrintNanos("Total time for IBD detection: ", totalIbdNanos);
+ }
+ if (totalImputeNanos > 0) {
+ duoPrintNanos("Total time for imputation: ", totalImputeNanos);
+ }
+ duoPrintNanos("Total run time: ", totalTime);
+ Utilities.duoPrintln(log, Const.nl + "End time: "
+ + Utilities.timeStamp());
+ Utilities.duoPrintln(log, Main.program + " finished");
+ log.close();
+ }
+
+ /**
+ * Increases the cumulative time to build the DAG models by the
+ * specified number of nanoseconds.
+ * @param nanos the nanoseconds required to build an instance
+ * of the DAG model
+ */
+ public void buildNanos(long nanos) {
+ buildNanos += nanos;
+ }
+
+ /**
+ * Stores the time for sampling new haplotypes and increases the
+ * cumulative sampling time by the specified number of nanoseconds.
+ * @param nanos the nanoseconds required to sample new haplotypes
+ */
+ public void sampleNanos(long nanos) {
+ sampleNanos = nanos;
+ totalSampleNanos += nanos;
+ }
+
+ /**
+ * Stores the time for imputing ungenotyped marker and increases
+ * the cumulative imputation time by the specified number
+ * of nanoseconds.
+ * @param nanos the nanoseconds required to impute ungenotyped
+ * markers
+ */
+ public void imputationNanos(long nanos) {
+ imputeNanos = nanos;
+ totalImputeNanos += nanos;
+ }
+
+ /**
+ * Increases the cumulative time for detecting identity-by-descent
+ * by the specified number of nanoseconds.
+ * @param nanos the nanoseconds required to perform IBD detection
+ */
+ public void ibdNanos(long nanos) {
+ totalIbdNanos += nanos;
+ }
+
+ /**
+ * Stores statistics for the DAG model used to sample single individuals.
+ * @param dag the DAG model used to sample individuals
+ */
+ public void setDagStats(Dag dag) {
+ dagStats = (dag==null) ? null : DagUtil.dagStats(dag);
+ }
+
+ /**
+ * Prints information about the Refined IBD analysis to a log
+ * file and to standard output.
+ * @param ibdScale the value used to multiplicatively scales the node
+ * similarity threshold when building the DAG model
+ * @param ibdDag the DAG model used for IBD detection
+ * @param nanos the nanoseconds required for IBD detection
+ * @throws NullPointerException if {@code ibdDag == null}
+ */
+ public void printRefinedIbdUpdate(float ibdScale, Dag ibdDag, long nanos) {
+ Utilities.duoPrintln(log, Const.nl + "Refined IBD");
+ Utilities.duoPrintln(log, "model scale: " + df2.format(ibdScale));
+ duoPrintNanos("run time: ", nanos);
+ Utilities.duoPrint(log, Const.nl + DagUtil.dagStats(ibdDag));
+ }
+
+ /**
+ * Prints run time for most recent imputation to a log file
+ * and to standard output.
+ */
+ public void printImputationUpdate() {
+ Utilities.duoPrint(log, Const.nl);
+ duoPrintNanos("Imputation time (this window): ", imputeNanos);
+ }
+
+ /**
+ * Prints information about the samples to a log
+ * file and to standard output.
+ * @param fam the parent-offspring relationships
+ * @param data the input genotype data
+ */
+ public void printSampleSummary(NuclearFamilies fam, Data data) {
+ Utilities.duoPrint(log, Const.nl);
+ Utilities.duoPrint(log, "reference samples: ");
+ duoPrintln7d(data.nRefSamples());
+ Utilities.duoPrint(log, "target samples: ");
+ duoPrintln7d(data.nTargetSamples());
+ if (par.ped() != null) {
+ Utilities.duoPrint(log, " ");
+ Utilities.duoPrint(log, String.valueOf(fam.nSingles()));
+ Utilities.duoPrintln(log, " singles");
+ Utilities.duoPrint(log, " ");
+ Utilities.duoPrint(log, String.valueOf(fam.nDuos()));
+ Utilities.duoPrintln(log, " duos");
+ Utilities.duoPrint(log, " ");
+ Utilities.duoPrint(log, String.valueOf(fam.nTrios()));
+ Utilities.duoPrintln(log, " trios");
+ }
+ }
+
+ /**
+ * Prints information about the marker window to a log
+ * file and to standard output.
+ * @param data the input genotype data
+ */
+ public void printWindowUpdate(Data data) {
+ Markers markers = data.markers();
+ Marker first = markers.marker(0);
+ Marker last = markers.marker(markers.nMarkers() - 1);
+ StringBuilder sb = new StringBuilder(30);
+ sb.append(Const.nl);
+ sb.append("Window ");
+ sb.append(data.window());
+ sb.append(" [ ");
+ String chr = first.chrom();
+ if (chr.equals(Const.MISSING_DATA_STRING)==false) {
+ sb.append(chr);
+ sb.append(Const.colon);
+ }
+ sb.append(first.pos());
+ sb.append(Const.hyphen);
+ if (chr.equals(last.chrom())==false) {
+ sb.append(last.chrom());
+ sb.append(Const.colon);
+ }
+ sb.append(last.pos());
+ sb.append(" ]");
+ sb.append(Const.nl);
+ if (data.nRefSamples()>0) {
+ sb.append("reference markers: ");
+ sb.append(String.format("%7d", data.nMarkers()));
+ sb.append(Const.nl);
+ }
+ sb.append("target markers: ");
+ sb.append(String.format("%7d", data.nTargetMarkers()));
+ Utilities.duoPrintln(log, sb.toString());
+ }
+
+ /**
+ * Prints the specified string to the log file and to standard out.
+ * @param msg the message to be printed
+ */
+ public void println(String msg) {
+ Utilities.duoPrintln(log, msg);
+ }
+
+ /**
+ * Prints information about the specified iteration.
+ * @param window the window
+ * @param iter the iteration
+ */
+ public void printIterationUpdate(int window, int iter) {
+ long buildTime = buildNanos - lastBuildNanos;
+ lastBuildNanos = buildNanos;
+ Utilities.duoPrint(log, Const.nl + "Window=" + window
+ + " Iteration=" + iter + Const.nl);
+ duoPrintNanos("Time for building model: ", buildTime);
+ if (dagStats != null) {
+ duoPrintNanos("Time for sampling (singles): ", sampleNanos);
+ sampleNanos = 0;
+ }
+ if (dagStats != null) {
+ Utilities.duoPrint(log, "DAG statistics" + Const.nl);
+ Utilities.duoPrint(log, dagStats);
+ }
+ log.flush();
+ }
+
+ /**
+ * Returns a string with specified message following by the elapsed time
+ * (in hours, minutes, and seconds).
+ *
+ * @param message a message preceding the elapsed time
+ * @param nanos the number of elapsed nanoseconds
+ *
+ * @return a string with specified message following by the elapsed time
+ * (in hours, minutes, and seconds)
+ */
+ private static String elapsedNanos(String message, long nanos) {
+ StringBuilder sb = new StringBuilder(message.length() + 30);
+ sb.append(message);
+ sb.append(Utilities.elapsedNanos(nanos));
+ sb.append(Const.nl);
+ return sb.toString();
+ }
+
+ private void duoPrintNanos(String message, long nanos) {
+ Utilities.duoPrint(log, elapsedNanos(message, nanos));
+ }
+
+ private void duoPrintln7d(int i) {
+ Utilities.duoPrintln(log, String.format("%7d", i));
+ }
+}
diff --git a/main/SampleGenotypeValues.java b/main/SampleGenotypeValues.java
new file mode 100644
index 0000000..a8365d9
--- /dev/null
+++ b/main/SampleGenotypeValues.java
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import beagleutil.Samples;
+import vcf.Markers;
+import vcf.Marker;
+
+/**
+ * <p>Class {@code SampleGenotypeValues} stores a value for each possible
+ * genotype at each marker for one sample.
+ * </p>
+ * <p>Class {@code SampleGenotypeValues} is thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class SampleGenotypeValues {
+
+ private final Markers markers;
+ private final Samples samples;
+ private final int sampleIndex;
+ private final float[] gtValues;
+
+ /**
+ * Constructs a {@code SampleGenotypeValues} instance for the
+ * specified markers and sample with initial value 0 for each possible
+ * genotype at each marker.
+ * @param markers the list of markers
+ * @param samples the list of samples
+ * @param sampleIndex a sample index
+ * @throws IllegalArgumentException if
+ * {@code sampleIndex < 0 || sampleIndex >= sampes.nSamples()}
+ * @throws NullPointerException if
+ * {@code markers == null || samples == null}
+ */
+ public SampleGenotypeValues(Markers markers, Samples samples, int sampleIndex) {
+ if (sampleIndex < 0 || sampleIndex >= samples.nSamples()) {
+ throw new IllegalArgumentException(String.valueOf(sampleIndex));
+ }
+ this.markers = markers;
+ this.samples = samples;
+ this.sampleIndex = sampleIndex;
+ this.gtValues = new float[markers.sumGenotypes()];
+ }
+
+ /**
+ * Returns the specified genotype value.
+ *
+ * @param marker a marker index
+ * @param genotype a genotype index
+ * @return the specified genotype value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code genotype < 0 || genotype >= this.marker(marker).nGenotypes()}
+ */
+ public synchronized float value(int marker, int genotype) {
+ checkGenotype(marker, genotype);
+ return gtValues[markers.sumGenotypes(marker) + genotype];
+ }
+
+ /**
+ * Adds the specified genotype values to {@code this}. This method is
+ * equivalent to
+ * <pre>
+ * for (int m=0; m<this.nMarkers(); ++m) {
+ * offset = this.markers().sumGenotypes(m);
+ * for (int gt=0; gt<this.marker(m).nGenotypes(); ++gt) {
+ * this.add(marker, gt, values[offset + gt])
+ * }
+ * }
+ * </pre>
+ *
+ * @param values an array with {@code this.markers.sumGenotypes()}
+ * elements containing the genotype values to be added
+ * @throws IllegalArgumentException if
+ * {@code values.length != this.markers().sumGenotypes()}
+ * @throws NullPointerException if {@code values == null}
+ */
+ public synchronized void add(double[] values) {
+ if (values.length != gtValues.length) {
+ String s = "values.length=" + values.length;
+ throw new IllegalArgumentException(s);
+ }
+ for (int j=0; j<values.length; ++j) {
+ gtValues[j] += values[j];
+ }
+ }
+
+ /**
+ * Adds the specified genotype value to {@code this}.
+ * @param marker a marker index
+ * @param genotype a genotype index
+ * @param value the value to be added
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code genotype < 0 || genotype >= this.marker(marker).nGenotypes()}
+ */
+ public synchronized void add(int marker, int genotype, double value) {
+ checkGenotype(marker, genotype);
+ gtValues[markers.sumGenotypes(marker) + genotype] += value;
+ }
+
+ private void checkGenotype(int marker, int genotype) {
+ int nGenotypes = markers.marker(marker).nGenotypes();
+ if (genotype < 0 || genotype >= nGenotypes) {
+ throw new IndexOutOfBoundsException("genotype: " + genotype);
+ }
+ }
+
+ /**
+ * Returns the list of markers.
+ * @return the list of markers
+ */
+ public Markers markers() {
+ return markers;
+ }
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ public Samples samples() {
+ return samples;
+ }
+
+ /**
+ * Returns the sample index.
+ * @return the sample index
+ */
+ public int sampleIndex() {
+ return sampleIndex;
+ }
+
+ /**
+ * Returns the specified marker.
+ * @param marker a marker index
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ public Marker marker(int marker) {
+ return markers.marker(marker);
+ }
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ public int nMarkers() {
+ return markers.nMarkers();
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ return this.getClass().toString();
+ }
+}
diff --git a/main/SampleHapPairAlleleProbs.java b/main/SampleHapPairAlleleProbs.java
new file mode 100644
index 0000000..683cf0e
--- /dev/null
+++ b/main/SampleHapPairAlleleProbs.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import beagleutil.Samples;
+import haplotype.SampleHapPairs;
+import vcf.Marker;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code SampleHapPairAlleleProbs} is a wrapper for a
+ * {@code SampleHapPairs} instance.
+ * </p>
+ * <p>Instances of class {@code HaplotypeAlleleProbs} are immutable.
+ * </p>
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class SampleHapPairAlleleProbs implements AlleleProbs {
+
+ private final SampleHapPairs sampleHapPairs;
+
+ /**
+ * Constructs a new {@code SampleHapPairAlleleProbs} instance that wraps
+ * the specified {@code SampleHapPairs} object. The alleles in
+ * the specified {@code SampleHapPairs} instance will have
+ * probability 1.
+ *
+ * @param sampleHapPairs the sample haplotype pairs that will
+ * be wrapped by {@code this}
+ *
+ * @throws NullPointerException if {@code sampleHapPairs == null}
+ */
+ public SampleHapPairAlleleProbs(SampleHapPairs sampleHapPairs) {
+ if (sampleHapPairs==null) {
+ throw new NullPointerException("sampleHapPairs==null");
+ }
+ this.sampleHapPairs = sampleHapPairs;
+ }
+
+ @Override
+ public float alProb1(int marker, int sample, int allele) {
+ return allele==sampleHapPairs.allele1(marker, sample) ? 1f : 0f;
+ }
+
+ @Override
+ public float alProb2(int marker, int sample, int allele) {
+ return allele==sampleHapPairs.allele2(marker, sample) ? 1f : 0f;
+ }
+
+ @Override
+ public float gtProb(int marker, int sample, int allele1, int allele2) {
+ return alProb1(marker, sample, allele1)*alProb2(marker, sample, allele2);
+ }
+
+ @Override
+ public int allele1(int marker, int sample) {
+ return sampleHapPairs.allele1(marker, sample);
+ }
+
+ @Override
+ public int allele2(int marker, int sample) {
+ return sampleHapPairs.allele2(marker, sample);
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return sampleHapPairs.marker(marker);
+ }
+
+ @Override
+ public Markers markers() {
+ return sampleHapPairs.markers();
+ }
+
+ @Override
+ public int nMarkers() {
+ return sampleHapPairs.nMarkers();
+ }
+
+ @Override
+ public int nSamples() {
+ return sampleHapPairs.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return sampleHapPairs.samples();
+ }
+}
diff --git a/main/WindowWriter.java b/main/WindowWriter.java
new file mode 100644
index 0000000..eb3a21a
--- /dev/null
+++ b/main/WindowWriter.java
@@ -0,0 +1,274 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package main;
+
+import beagleutil.Samples;
+import blbutil.Const;
+import blbutil.FileUtil;
+import blbutil.IntPair;
+import ibd.IbdSegment;
+import java.io.Closeable;
+import java.io.File;
+import java.io.PrintWriter;
+import java.text.DecimalFormat;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import vcf.VcfWriter;
+
+/**
+ * <p>Class {@code WindowWriter} writes VCF and IBD output data.
+ * </p>
+ * <p>Instances of class {@code WindowWriter} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class WindowWriter implements Closeable {
+
+ private static final DecimalFormat df2 = new DecimalFormat("#.##");
+
+ private boolean isClosed = false;
+ private boolean appendIbd = false;
+
+ private final Samples samples;
+ private final File vcfOutFile;
+ private final File ibdOutFile;
+ private final File hbdOutFile;
+ private final PrintWriter vcfOut;
+ private final Map<IntPair, IbdSegment> ibdBuffer = new HashMap<>();
+
+ /**
+ * Constructs a new {@code WindowWriter} object.
+ * @param samples the sample whose data will be printed
+ * @param outPrefix the output file prefix
+ *
+ * @throws IllegalArgumentException if {@code outPrefix.length() == 0}
+ * @throws NullPointerException if
+ * {@code samples == null || outPrefix == null}
+ */
+ public WindowWriter(Samples samples, String outPrefix) {
+ if (samples==null) {
+ throw new NullPointerException("samples==null");
+ }
+ if (outPrefix.length()==0) {
+ throw new IllegalArgumentException("outPrefix.length()==0");
+ }
+ this.samples = samples;
+ this.vcfOutFile = new File(outPrefix + ".vcf.gz");
+ this.ibdOutFile = new File(outPrefix + ".ibd");
+ this.hbdOutFile = new File(outPrefix + ".hbd");
+ this.vcfOut = FileUtil.bgzipPrintWriter(vcfOutFile);
+
+ boolean printGT = true;
+ boolean printGP = true;
+ boolean printGL = false;
+ VcfWriter.writeMetaLines(samples.ids(), Main.program,
+ printGT, printGP, printGL, vcfOut);
+ }
+
+ /**
+ * Returns the samples whose data is written by {@code this}.
+ * @return the samples whose data is written by {@code this}
+ */
+ public Samples samples() {
+ return samples;
+ }
+
+
+ /**
+ * Returns {@code true} if {@code this.close()} method has
+ * been previously invoked and returns {@code false} otherwise.
+ *
+ * @return {@code true} if {@code this.close()} method has
+ * been previously invoked
+ */
+ public boolean isClosed() {
+ return isClosed;
+ }
+
+ /**
+ * Closes this {@code WindowWriter} for writing. Calling the
+ * {@code print()} method after invoking {@code close()} will
+ * throw an {@code IllegalStateException}.
+ */
+ @Override
+ public void close() {
+ vcfOut.close();
+ isClosed = true;
+ }
+
+ /**
+ * Prints VCF records with GT and GP format fields for markers with
+ * index between {@code cd.lastSplice()} (inclusive) and
+ * {@code cd.nextSplice()} (exclusive).
+ *
+ * @param cd the input data for the current marker window
+ * @param gv scaled genotype probabilities for the target samples
+ *
+ * @throws NullPointerException if {@code cd == null || gv == null}
+ */
+ public void printGV(CurrentData cd, GenotypeValues gv) {
+ if (isClosed) {
+ throw new IllegalStateException("isClosed()==true");
+ }
+ VcfWriter.appendRecords(gv, cd.prevTargetSplice(),
+ cd.nextTargetSplice(), vcfOut);
+ vcfOut.flush();
+ }
+
+ /**
+ * Prints the data in {@code alProbs} for markers
+ * with index between {@code cd.lastSplice()} (inclusive) and
+ * {@code cd.nextSplice()} (exclusive).
+ *
+ * @param cd the input data for the current marker window
+ * @param alProbs the estimated haplotype allele probabilities
+ * @param imputed {@code true} if there are imputed markers,
+ * and {@code false} otherwise
+ * @param gprobs {@code true} if the GP field should be printed, and
+ * {@code false} otherwise
+ *
+ * @throws IllegalStateException if {@code this.isClosed() == true}
+ * @throws IllegalArgumentException if
+ * {@code this.samples().equals(cd.targetSamples()) == false}
+ * @throws IllegalArgumentException if
+ * {@code this.samples().equals(alProbs.samples()) == false}
+ * @throws IllegalArgumentException if
+ * {@code cd.markers().equals(alProbs.markers()) == false}
+ * @throws NullPointerException if {@code cd == null || alProbs == null}
+ */
+ public void print(CurrentData cd, AlleleProbs alProbs, boolean imputed,
+ boolean gprobs) {
+ if (isClosed) {
+ throw new IllegalStateException("isClosed()==true");
+ }
+ if (cd.markers().equals(alProbs.markers())==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ if (samples.equals(cd.targetSamples()) == false
+ || samples.equals(alProbs.samples()) == false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ int start = cd.prevSplice();
+ int end = cd.nextSplice();
+ VcfWriter.appendRecords(alProbs, start, end, imputed, gprobs, vcfOut);
+ vcfOut.flush();
+ }
+
+ /**
+ * Prints IBD segments that end between the markers
+ * with index between {@code cd.lastSplice()} (inclusive) and
+ * {@code cd.nextSplice()} (exclusive).
+ * IBD segments that end on or after the marker with index
+ * {@code cd.nextSplice()} are saved so that they can be merged
+ * with IBD segments from the next marker window.
+ *
+ * <p>It is the the caller's responsibility to ensure that the ordered
+ * haplotype pairs between adjacent consecutive markers windows
+ * are identical for each sample.
+ * </p>
+ *
+ * @param cd the input data for the current window
+ * @param ibdMap a map whose keys are pairs of haplotype indices and whose
+ * values are lists of IBD segments involving the haplotype pair key
+ *
+ * @throws IllegalStateException if {@code this.isClosed()==true}
+ * @throws IllegalArgumentException if
+ * {@code this.samples().equals(cd.targetSamples()) == false}
+ * @throws NullPointerException if {@code cd == null || ibdMap == null}
+ */
+ public void printIbd(CurrentData cd, Map<IntPair, List<IbdSegment>> ibdMap) {
+ if (isClosed) {
+ throw new IllegalStateException("isClosed()==true");
+ }
+ if (samples.equals(cd.targetSamples()) == false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ printIbd(ibdMap, cd.prevTargetSplice(), cd.nextTargetOverlap(),
+ cd.nextTargetSplice(), cd.nTargetMarkers());
+ if (appendIbd==false) {
+ appendIbd = true;
+ }
+ }
+
+ private void printIbd(Map<IntPair, List<IbdSegment>> ibd, int lastSplice,
+ int nextOverlap, int nextSplice, int nMarkers) {
+ Map<IntPair, IbdSegment> lastBuffer = new HashMap<>(ibdBuffer);
+ ibdBuffer.clear();
+ try (PrintWriter ibdOut = FileUtil.printWriter(ibdOutFile, appendIbd);
+ PrintWriter hbdOut = FileUtil.printWriter(hbdOutFile, appendIbd)) {
+ Iterator<IntPair> keyIt = ibd.keySet().iterator();
+ while (keyIt.hasNext()) {
+ IntPair key = keyIt.next();
+ List<IbdSegment> list = ibd.get(key);
+ for (IbdSegment seg : list) {
+ if (seg.startIndex()==0) {
+ IbdSegment saved = lastBuffer.get(key);
+ if (saved!=null) {
+ seg = merge(saved, seg);
+ }
+ }
+ int ep1 = seg.endIndex()+1;
+ if (ep1>=lastSplice && (nextSplice==nMarkers || ep1<nextSplice)) {
+ printSegment(samples, seg, ibdOut, hbdOut);
+ }
+ else if (seg.startIndex()<nextOverlap) {
+ ibdBuffer.put(key, seg);
+ }
+ }
+ keyIt.remove();
+ }
+ }
+ }
+
+ private static IbdSegment merge(IbdSegment a, IbdSegment b) {
+ assert a.hapPair().equals(b.hapPair());
+ assert a.start().chromIndex()==b.start().chromIndex();
+ int newStartIndex = -1;
+ float newScore = Math.max(a.score(), b.score());
+ return new IbdSegment(a.hapPair(), a.start(), b.end(),
+ newScore, newStartIndex, b.endIndex());
+ }
+
+ private static void printSegment(Samples samples, IbdSegment tract,
+ PrintWriter ibdOut, PrintWriter hbdOut) {
+ int h1 = tract.hap1();
+ int h2 = tract.hap2();
+ int s1 = h1/2;
+ int s2 = h2/2;
+ PrintWriter out = (s1==s2) ? hbdOut : ibdOut;
+ out.print(samples.id(s1));
+ out.print(Const.tab);
+ out.print((h1 % 2) + 1);
+ out.print(Const.tab);
+ out.print(samples.id(s2));
+ out.print(Const.tab);
+ out.print((h2 % 2) + 1);
+ out.print(Const.tab);
+ out.print(tract.start().chrom());
+ out.print(Const.tab);
+ out.print(tract.start().pos());
+ out.print(Const.tab);
+ out.print(tract.end().pos());
+ out.print(Const.tab);
+ out.println(df2.format(tract.score()));
+ }
+}
diff --git a/net/sf/samtools/Defaults.java b/net/sf/samtools/Defaults.java
new file mode 100644
index 0000000..4c3a652
--- /dev/null
+++ b/net/sf/samtools/Defaults.java
@@ -0,0 +1,49 @@
+package net.sf.samtools;
+
+/**
+ * Embodies defaults for global values that affect how the SAM JDK operates. Defaults are encoded in the class
+ * and are also overridable using system properties.
+ *
+ * @author Tim Fennell
+ */
+public class Defaults {
+ /** Should BAM index files be created when writing out coordinate sorted BAM files? Default = false. */
+ public static final boolean CREATE_INDEX;
+
+ /** Should MD5 files be created when writing out SAM and BAM files? Default = false. */
+ public static final boolean CREATE_MD5;
+
+ /** Should asynchronous I/O be used when writing out SAM and BAM files (one thread per file). Default = false. */
+ public static final boolean USE_ASYNC_IO;
+
+ /** Compresion level to be used for writing BAM and other block-compressed outputs. Default = 5. */
+ public static final int COMPRESSION_LEVEL;
+
+ /** Buffer size, in bytes, used whenever reading/writing files or streams. Default = 128k. */
+ public static final int BUFFER_SIZE;
+
+ static {
+ CREATE_INDEX = getBooleanProperty("create_index", false);
+ CREATE_MD5 = getBooleanProperty("create_md5", false);
+ USE_ASYNC_IO = getBooleanProperty("use_async_io", false);
+ COMPRESSION_LEVEL = getIntProperty("compression_level", 5);
+ BUFFER_SIZE = getIntProperty("buffer_size", 1024 * 128);
+ }
+
+ /** Gets a string system property, prefixed with "samjdk." using the default if the property does not exist.*/
+ private static String getStringProperty(final String name, final String def) {
+ return System.getProperty("samjdk." + name, def);
+ }
+
+ /** Gets a boolean system property, prefixed with "samjdk." using the default if the property does not exist.*/
+ private static boolean getBooleanProperty(final String name, final boolean def) {
+ final String value = getStringProperty(name, new Boolean(def).toString());
+ return Boolean.parseBoolean(value);
+ }
+
+ /** Gets an int system property, prefixed with "samjdk." using the default if the property does not exist.*/
+ private static int getIntProperty(final String name, final int def) {
+ final String value = getStringProperty(name, new Integer(def).toString());
+ return Integer.parseInt(value);
+ }
+}
diff --git a/net/sf/samtools/FileTruncatedException.java b/net/sf/samtools/FileTruncatedException.java
new file mode 100644
index 0000000..0c04cee
--- /dev/null
+++ b/net/sf/samtools/FileTruncatedException.java
@@ -0,0 +1,46 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools;
+
+/**
+ * Thrown when it is possible to detect that a SAM or BAM file is truncated.
+ *
+ * @author alecw at broadinstitute.org
+ */
+public class FileTruncatedException extends SAMException {
+ public FileTruncatedException() {
+ }
+
+ public FileTruncatedException(final String s) {
+ super(s);
+ }
+
+ public FileTruncatedException(final String s, final Throwable throwable) {
+ super(s, throwable);
+ }
+
+ public FileTruncatedException(final Throwable throwable) {
+ super(throwable);
+ }
+}
diff --git a/net/sf/samtools/SAMException.java b/net/sf/samtools/SAMException.java
new file mode 100644
index 0000000..8ec29c5
--- /dev/null
+++ b/net/sf/samtools/SAMException.java
@@ -0,0 +1,44 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools;
+
+/**
+ * @author alecw at broadinstitute.org
+ */
+public class SAMException extends RuntimeException {
+ public SAMException() {
+ }
+
+ public SAMException(final String s) {
+ super(s);
+ }
+
+ public SAMException(final String s, final Throwable throwable) {
+ super(s, throwable);
+ }
+
+ public SAMException(final Throwable throwable) {
+ super(throwable);
+ }
+}
diff --git a/net/sf/samtools/SAMFormatException.java b/net/sf/samtools/SAMFormatException.java
new file mode 100644
index 0000000..1ae70fd
--- /dev/null
+++ b/net/sf/samtools/SAMFormatException.java
@@ -0,0 +1,44 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools;
+
+/**
+ * Thrown when a SAM file being read or decoded (text or binary) looks bad.
+ */
+public class SAMFormatException extends SAMException {
+ public SAMFormatException() {
+ }
+
+ public SAMFormatException(final String s) {
+ super(s);
+ }
+
+ public SAMFormatException(final String s, final Throwable throwable) {
+ super(s, throwable);
+ }
+
+ public SAMFormatException(final Throwable throwable) {
+ super(throwable);
+ }
+}
diff --git a/net/sf/samtools/util/BinaryCodec.java b/net/sf/samtools/util/BinaryCodec.java
new file mode 100644
index 0000000..abffcbe
--- /dev/null
+++ b/net/sf/samtools/util/BinaryCodec.java
@@ -0,0 +1,662 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools.util;
+
+import java.io.*;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+/**
+ * Encapsulates file representation of various primitive data types. Forces little-endian disk representation.
+ * Note that this class is currently not very efficient. There are plans to increase the size of the ByteBuffer,
+ * and move data between the ByteBuffer and the underlying input or output stream in larger chunks.
+ *
+ * All the read methods throw RuntimeEOFException if the input stream is exhausted before the required number
+ * of bytes are read.
+ *
+ * @author Dave Tefft
+ */
+public class BinaryCodec {
+
+ //Outstream to write to
+ private OutputStream outputStream;
+ //If a file or filename was given it will be stored here. Used for error reporting.
+ private String outputFileName;
+
+ //Input stream to read from
+ private InputStream inputStream;
+ //If a file or filename was give to read from it will be stored here. Used for error reporting.
+ private String inputFileName;
+
+ /*
+ Mode that the BinaryCodec is in. It is either writing to a binary file or reading from.
+ This is set to true if it is writing to a binary file
+ Right now we don't support reading and writing to the same file with the same BinaryCodec instance
+ */
+ private boolean isWriting;
+
+ /**
+ * For byte swapping.
+ */
+ private ByteBuffer byteBuffer;
+
+ /**
+ * For reading Strings of known length, this can reduce object creation
+ */
+ private final byte[] scratchBuffer = new byte[16];
+
+ // Byte order used in BAM files.
+ private static final ByteOrder LITTLE_ENDIAN = ByteOrder.LITTLE_ENDIAN;
+ private static final byte NULL_BYTE[] = {0};
+
+ private static final long MAX_UBYTE = (Byte.MAX_VALUE * 2) + 1;
+ private static final long MAX_USHORT = (Short.MAX_VALUE * 2) + 1;
+ private static final long MAX_UINT = ((long)Integer.MAX_VALUE * 2) + 1;
+
+ // We never serialize more than this much at a time (except for Strings)
+ private static final int MAX_BYTE_BUFFER = 8;
+
+ //////////////////////////////////////////////////
+ // Constructors //
+ //////////////////////////////////////////////////
+
+ /**
+ * Constructs BinaryCodec from a file and set it's mode to writing or not
+ *
+ * @param file file to be written to or read from
+ * @param writing whether the file is being written to
+ */
+ public BinaryCodec(final File file, final boolean writing) {
+ this();
+ try {
+ this.isWriting = writing;
+ if (this.isWriting) {
+ this.outputStream = new FileOutputStream(file);
+ this.outputFileName = file.getName();
+ } else {
+ this.inputStream = new FileInputStream(file);
+ this.inputFileName = file.getName();
+ }
+ } catch (FileNotFoundException e) {
+ throw new RuntimeIOException("File not found: " + file, e);
+ }
+ }
+
+ /**
+ * Constructs BinaryCodec from a file name and set it's mode to writing or not
+ *
+ * @param fileName name of the file to be written to or read from
+ * @param writing writing whether the file is being written to
+ */
+ public BinaryCodec(final String fileName, final boolean writing) {
+ this(new File(fileName), writing);
+ }
+
+ /**
+ * Constructs BinaryCodec from an output stream
+ *
+ * @param outputStream Stream to write to, since it's an output stream we know that isWriting
+ * should be set to true
+ */
+ public BinaryCodec(final OutputStream outputStream) {
+ this();
+ setOutputStream(outputStream);
+ }
+
+ /**
+ * Constructs BinaryCodec from an input stream
+ *
+ * @param inputStream Stream to read from, since we are reading isWriting is set to false
+ */
+ public BinaryCodec(final InputStream inputStream) {
+ this();
+ setInputStream(inputStream);
+ }
+
+ /**
+ * Ambiguous whether reading or writing until set{In,Out}putStream is called
+ */
+ public BinaryCodec() {
+ initByteBuffer();
+ }
+
+ /**
+ * Shared among ctors.
+ * Note that if endianness is changed, all the unsigned methods must also be changed.
+ */
+ private void initByteBuffer() {
+ byteBuffer = ByteBuffer.allocate(MAX_BYTE_BUFFER);
+ byteBuffer.order(LITTLE_ENDIAN);
+ }
+
+ //////////////////////////////////////////////////
+ // Writing methods //
+ //////////////////////////////////////////////////
+
+
+ /**
+ * Write whatever has been put into the byte buffer
+ * @param numBytes -- how much to write. Note that in case of writing an unsigned value,
+ * more bytes were put into the ByteBuffer than will get written out.
+ */
+ private void writeByteBuffer(final int numBytes) {
+ assert(numBytes <= byteBuffer.limit());
+ writeBytes(byteBuffer.array(), 0, numBytes);
+ }
+
+ /**
+ * Writes a byte to the output buffer
+ *
+ * @param bite byte array to write
+ */
+ public void writeByte(final byte bite) {
+ byteBuffer.clear();
+ byteBuffer.put(bite);
+ writeByteBuffer(1);
+ }
+
+ public void writeByte(final int b) {
+ writeByte((byte)b);
+ }
+
+ /**
+ * Writes a byte array to the output buffer
+ *
+ * @param bytes value to write
+ */
+ public void writeBytes(final byte[] bytes) {
+ writeBytes(bytes, 0, bytes.length);
+ }
+
+ public void writeBytes(final byte[] bytes, final int startOffset, final int numBytes) {
+ if (!isWriting) {
+ throw new IllegalStateException("Calling write method on BinaryCodec open for read.");
+ }
+ try {
+ outputStream.write(bytes, startOffset, numBytes);
+ } catch (IOException e) {
+ throw new RuntimeIOException(constructErrorMessage("Write error"), e);
+ }
+ }
+
+ /**
+ * Write a 32-bit int to the output stream
+ *
+ * @param value int to write
+ */
+ public void writeInt(final int value) {
+ byteBuffer.clear();
+ byteBuffer.putInt(value);
+ writeByteBuffer(4);
+ }
+
+ /**
+ * Write a double (8 bytes) to the output stream
+ *
+ * @param value double to write
+ */
+ public void writeDouble(final double value) {
+ byteBuffer.clear();
+ byteBuffer.putDouble(value);
+ writeByteBuffer(8);
+ }
+
+ /**
+ * Write a 64-bit long to the output stream
+ *
+ * @param value long to write
+ */
+ public void writeLong(final long value) {
+ byteBuffer.clear();
+ byteBuffer.putLong(value);
+ writeByteBuffer(8);
+ }
+
+
+ /**
+ * Write a 16-bit short to output stream
+ */
+ public void writeShort(final short value) {
+ byteBuffer.clear();
+ byteBuffer.putShort(value);
+ writeByteBuffer(2);
+ }
+
+ /**
+ * Write a float (4 bytes) to the output stream
+ *
+ * @param value float to write
+ */
+ public void writeFloat(final float value) {
+ byteBuffer.clear();
+ byteBuffer.putFloat(value);
+ writeByteBuffer(4);
+ }
+
+ /**
+ * Writes a boolean (1 byte) to the output buffer
+ *
+ * @param value boolean to write
+ */
+ public void writeBoolean(final boolean value) {
+ byteBuffer.clear();
+ byteBuffer.put(value ? (byte)1 : (byte)0);
+ writeByteBuffer(1);
+ }
+
+ /**
+ * Writes a string to the buffer as ASCII bytes
+ *
+ * @param value string to write to buffer
+ * @param writeLength prefix the string with the length as a 32-bit int
+ * @param appendNull add a null byte to the end of the string
+ */
+ public void writeString(final String value, final boolean writeLength, final boolean appendNull) {
+ if (writeLength) {
+ int lengthToWrite = value.length();
+ if (appendNull) lengthToWrite++;
+ writeInt(lengthToWrite);
+ }
+
+ //Actually writes the string to a buffer
+ writeString(value);
+
+ if (appendNull) writeBytes(NULL_BYTE);
+
+ }
+
+
+ /**
+ * Write a string to the buffer as ASCII bytes
+ *
+ * @param value string to write
+ */
+ private void writeString(final String value) {
+ writeBytes(StringUtil.stringToBytes(value));
+ }
+
+ /**
+ * Write an 8-bit unsigned byte.
+ * NOTE: This method will break if we change to big-endian.
+ */
+ public void writeUByte(final short val) {
+ if (val < 0) {
+ throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method.");
+ }
+ if (val > MAX_UBYTE) {
+ throw new IllegalArgumentException("Value (" + val + ") to large to be written as ubyte.");
+ }
+ byteBuffer.clear();
+ byteBuffer.putShort(val);
+ writeByteBuffer(1);
+ }
+
+ /**
+ * Write a 16-bit unsigned short.
+ * NOTE: This method will break if we change to big-endian.
+ */
+ public void writeUShort(final int val) {
+ if (val < 0) {
+ throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method.");
+ }
+ if (val > MAX_USHORT) {
+ throw new IllegalArgumentException("Value (" + val + ") to large to be written as ushort.");
+ }
+ byteBuffer.clear();
+ byteBuffer.putInt(val);
+ writeByteBuffer(2);
+ }
+
+ /**
+ * Write a 32-bit unsigned int.
+ * NOTE: This method will break if we change to big-endian.
+ */
+ public void writeUInt(final long val) {
+ if (val < 0) {
+ throw new IllegalArgumentException("Negative value (" + val + ") passed to unsigned writing method.");
+ }
+ if (val > MAX_UINT) {
+ throw new IllegalArgumentException("Value (" + val + ") to large to be written as uint.");
+ }
+ byteBuffer.clear();
+ byteBuffer.putLong(val);
+ writeByteBuffer(4);
+ }
+
+ //////////////////////////////////////////////////
+ // Reading methods //
+ //////////////////////////////////////////////////
+
+ /**
+ * Read a byte array from the input stream.
+ *
+ * @throws net.sf.samtools.util.RuntimeEOFException if fewer than buffer.length bytes to read
+ */
+ public void readBytes(final byte[] buffer) {
+ readBytes(buffer, 0, buffer.length);
+ }
+
+ /**
+ * Read a byte array from the input stream
+ *
+ * @param buffer where to put bytes read
+ * @param offset offset to start putting bytes into buffer
+ * @param length number of bytes to read
+ * @throws RuntimeEOFException if fewer than length bytes to read
+ */
+ public void readBytes(final byte[] buffer, final int offset, final int length) {
+ int totalNumRead = 0;
+ do {
+ final int numRead = readBytesOrFewer(buffer, offset + totalNumRead, length - totalNumRead);
+ if (numRead < 0) {
+ throw new RuntimeEOFException(constructErrorMessage("Premature EOF"));
+ } else {
+ totalNumRead += numRead;
+ }
+ } while (totalNumRead < length);
+ }
+
+ /**
+ * Reads a byte array from the input stream.
+ *
+ * @param buffer where to put bytes read
+ * @param offset offset to start putting bytes into buffer
+ * @param length number of bytes to read. Fewer bytes may be read if EOF is reached before length bytes
+ * have been read.
+ * @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of the stream has been reached.
+ */
+ public int readBytesOrFewer(final byte[] buffer, final int offset, final int length) {
+ if (isWriting) {
+ throw new IllegalStateException("Calling read method on BinaryCodec open for write.");
+ }
+ try {
+ return inputStream.read(buffer, offset, length);
+ } catch (IOException e) {
+ throw new RuntimeIOException(constructErrorMessage("Read error"), e);
+ }
+ }
+
+ /**
+ * @return a single byte read from the input stream.
+ */
+ public byte readByte() {
+ if (isWriting) {
+ throw new IllegalStateException("Calling read method on BinaryCodec open for write.");
+ }
+ try {
+ final int ret = inputStream.read();
+ if (ret == -1) {
+ throw new RuntimeEOFException(constructErrorMessage("Premature EOF"));
+ }
+ return (byte)ret;
+ } catch (IOException e) {
+ throw new RuntimeIOException(constructErrorMessage("Read error"), e);
+ }
+ }
+
+ /**
+ * @return true if it is possible to know for sure if at EOF, and it is known for sure.
+ * If the input stream is a ByteArrayInputStream, this is faster than causing a RuntimeEOFException
+ * to be thrown.
+ */
+ public boolean knownAtEof() {
+ if (isWriting) {
+ throw new IllegalStateException("Calling knownAtEof method on BinaryCodec open for write.");
+ }
+ try {
+ return inputStream instanceof ByteArrayInputStream && inputStream.available() == 0;
+ } catch (IOException e) {
+ throw new RuntimeIOException(constructErrorMessage("available() error"), e);
+ }
+ }
+
+ /**
+ * Read a string off the input stream, as ASCII bytes
+ *
+ * @param length length of string to read
+ * @return String read from stream
+ */
+ public String readString(final int length) {
+ final byte[] buffer;
+ // Recycle single buffer if possible
+ if (length <= scratchBuffer.length) {
+ buffer = scratchBuffer;
+ } else {
+ buffer = new byte[length];
+
+ }
+ readBytes(buffer, 0, length);
+
+ return StringUtil.bytesToString(buffer, 0, length);
+ }
+
+ /**
+ * Read ASCII bytes from the input stream until a null byte is read
+ * @return String constructed from the ASCII bytes read
+ */
+ public String readNullTerminatedString() {
+ return StringUtil.readNullTerminatedString(this);
+ }
+
+ /**
+ * Read an int length, and then a String of that length
+ * @param devourNull if true, the length include a null terminator, which is read and discarded
+ */
+ public String readLengthAndString(final boolean devourNull) {
+ int length = readInt();
+ if (devourNull) {
+ --length;
+ }
+ final String ret = readString(length);
+ if (devourNull) {
+ readByte();
+ }
+ return ret;
+ }
+
+ private void readByteBuffer(final int numBytes) {
+ assert(numBytes <= byteBuffer.capacity());
+ readBytes(byteBuffer.array(), 0, numBytes);
+ byteBuffer.limit(byteBuffer.capacity());
+ byteBuffer.position(numBytes);
+ }
+
+ /**
+ * Read an int off the input stream
+ *
+ * @return int from input stream
+ */
+ public int readInt() {
+ readByteBuffer(4);
+ byteBuffer.flip();
+ return byteBuffer.getInt();
+ }
+
+ /**
+ * Reads a double off the input stream
+ *
+ * @return double
+ */
+ public double readDouble() {
+ readByteBuffer(8);
+ byteBuffer.flip();
+ return byteBuffer.getDouble();
+ }
+
+ /**
+ * Reads a long off the input stream
+ *
+ * @return long
+ */
+ public long readLong() {
+ readByteBuffer(8);
+ byteBuffer.flip();
+ return byteBuffer.getLong();
+ }
+
+ public short readShort() {
+ readByteBuffer(2);
+ byteBuffer.flip();
+ return byteBuffer.getShort();
+ }
+
+ /**
+ * Reads a float off the input stream
+ *
+ * @return float
+ */
+ public float readFloat() {
+ readByteBuffer(4);
+ byteBuffer.flip();
+ return byteBuffer.getFloat();
+ }
+
+ /**
+ * Reads a boolean off the input stream, represented as a byte with value 1 or 0
+ *
+ * @return boolean
+ */
+ public boolean readBoolean() {
+ return (((int)readByte()) == 1);
+ }
+
+ /**
+ * Reads an 8-bit unsigned byte from the input stream.
+ * This method assumes little-endianness.
+ */
+ public short readUByte() {
+ readByteBuffer(1);
+ byteBuffer.put((byte)0);
+ byteBuffer.flip();
+ return byteBuffer.getShort();
+ }
+
+ /**
+ * Reads a 16-bit unsigned short from the input stream.
+ * This method assumes little-endianness.
+ */
+ public int readUShort() {
+ readByteBuffer(2);
+ byteBuffer.putShort((short)0);
+ byteBuffer.flip();
+ return byteBuffer.getInt();
+ }
+
+ /**
+ * Reads a 32-bit unsigned int from the input stream.
+ * This method assumes little-endianness.
+ */
+ public long readUInt() {
+ readByteBuffer(4);
+ byteBuffer.putInt(0);
+ byteBuffer.flip();
+ return byteBuffer.getLong();
+ }
+
+ /**
+ * Close the appropriate stream
+ */
+ public void close() {
+ try {
+ if (this.isWriting) {
+ // To the degree possible, make sure the bytes get forced to the file system,
+ // or else cause an exception to be thrown.
+ if (this.outputStream instanceof FileOutputStream) {
+ this.outputStream.flush();
+ FileOutputStream fos = (FileOutputStream)this.outputStream;
+ try {
+ fos.getFD().sync();
+ } catch (SyncFailedException e) {
+ // Since the sync is belt-and-suspenders anyway, don't throw an exception if it fails,
+ // because on some OSs it will fail for some types of output. E.g. writing to /dev/null
+ // on some Unixes.
+ }
+ }
+ this.outputStream.close();
+ }
+ else this.inputStream.close();
+ } catch (IOException e) {
+ throw new RuntimeIOException(e.getMessage(), e);
+ }
+ }
+
+ private String constructErrorMessage(final String msg) {
+ final StringBuilder sb = new StringBuilder(msg);
+ sb.append("; BinaryCodec in ");
+ sb.append(isWriting? "write": "read");
+ sb.append("mode; ");
+ final String filename = isWriting? outputFileName: inputFileName;
+ if (filename != null) {
+ sb.append("file: ");
+ sb.append(filename);
+ } else {
+ sb.append("streamed file (filename not available)");
+ }
+ return sb.toString();
+ }
+
+ //////////////////////////////////////////////////
+ // Some getters //
+ //////////////////////////////////////////////////
+
+
+ public String getInputFileName() {
+ return inputFileName;
+ }
+
+ public String getOutputFileName() {
+ return outputFileName;
+ }
+
+ public void setOutputFileName(final String outputFileName) {
+ this.outputFileName = outputFileName;
+ }
+
+ public void setInputFileName(final String inputFileName) {
+ this.inputFileName = inputFileName;
+ }
+
+ public boolean isWriting() {
+ return isWriting;
+ }
+
+ public OutputStream getOutputStream() {
+ return outputStream;
+ }
+
+ public InputStream getInputStream() {
+ return inputStream;
+ }
+
+ public void setInputStream(final InputStream is) {
+ isWriting = false;
+ this.inputStream = is;
+ }
+
+ public void setOutputStream(final OutputStream os) {
+ isWriting = true;
+ this.outputStream = os;
+
+ }
+}
diff --git a/net/sf/samtools/util/BlockCompressedFilePointerUtil.java b/net/sf/samtools/util/BlockCompressedFilePointerUtil.java
new file mode 100644
index 0000000..f593e33
--- /dev/null
+++ b/net/sf/samtools/util/BlockCompressedFilePointerUtil.java
@@ -0,0 +1,101 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools.util;
+
+/**
+ * Static for manipulating virtual file pointers in BGZF files.
+ */
+public class BlockCompressedFilePointerUtil {
+ private static final int SHIFT_AMOUNT = 16;
+ private static final int OFFSET_MASK = 0xffff;
+ private static final long ADDRESS_MASK = 0xFFFFFFFFFFFFL;
+
+ public static final long MAX_BLOCK_ADDRESS = ADDRESS_MASK;
+ public static final int MAX_OFFSET = OFFSET_MASK;
+
+ /**
+ * @param vfp1
+ * @param vfp2
+ * @return negative if vfp1 is earlier in file than vfp2, positive if it is later, 0 if equal.
+ */
+ public static int compare(final long vfp1, final long vfp2) {
+ if (vfp1 == vfp2) return 0;
+ // When treating as unsigned, negative number is > positive.
+ if (vfp1 < 0 && vfp2 >= 0) return 1;
+ if (vfp1 >= 0 && vfp2 < 0) return -1;
+ // Either both negative or both non-negative, so regular comparison works.
+ if (vfp1 < vfp2) return -1;
+ return 1; // vfp1 > vfp2
+ }
+
+ /**
+ * @return true if vfp2 points to somewhere in the same BGZF block, or the one immediately following vfp1's BGZF block.
+ */
+ public static boolean areInSameOrAdjacentBlocks(final long vfp1, final long vfp2) {
+ final long block1 = getBlockAddress(vfp1);
+ final long block2 = getBlockAddress(vfp2);
+ return (block1 == block2 || block1 + 1 == block2);
+ }
+
+ /**
+ * @param blockAddress File offset of start of BGZF block.
+ * @param blockOffset Offset into uncompressed block.
+ * @return Virtual file pointer that embodies the input parameters.
+ */
+ static long makeFilePointer(final long blockAddress, final int blockOffset) {
+ if (blockOffset < 0) {
+ throw new IllegalArgumentException("Negative blockOffset " + blockOffset + " not allowed.");
+ }
+ if (blockAddress < 0) {
+ throw new IllegalArgumentException("Negative blockAddress " + blockAddress + " not allowed.");
+ }
+ if (blockOffset > MAX_OFFSET) {
+ throw new IllegalArgumentException("blockOffset " + blockOffset + " too large.");
+ }
+ if (blockAddress > MAX_BLOCK_ADDRESS) {
+ throw new IllegalArgumentException("blockAddress " + blockAddress + " too large.");
+ }
+ return blockAddress << SHIFT_AMOUNT | blockOffset;
+ }
+
+ /**
+ * @param virtualFilePointer
+ * @return File offset of start of BGZF block for this virtual file pointer.
+ */
+ public static long getBlockAddress(final long virtualFilePointer) {
+ return (virtualFilePointer >> SHIFT_AMOUNT) & ADDRESS_MASK;
+ }
+
+ /**
+ * @param virtualFilePointer
+ * @return Offset into uncompressed block for this virtual file pointer.
+ */
+ public static int getBlockOffset(final long virtualFilePointer) {
+ return (int) (virtualFilePointer & OFFSET_MASK);
+ }
+
+ public static String asString(final long vfp) {
+ return String.format("%d(0x%x): (block address: %d, offset: %d)", vfp, vfp, getBlockAddress(vfp), getBlockOffset(vfp));
+ }
+}
diff --git a/net/sf/samtools/util/BlockCompressedInputStream.java b/net/sf/samtools/util/BlockCompressedInputStream.java
new file mode 100644
index 0000000..ed04c8e
--- /dev/null
+++ b/net/sf/samtools/util/BlockCompressedInputStream.java
@@ -0,0 +1,484 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools.util;
+
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import net.sf.samtools.FileTruncatedException;
+
+/*
+ * Utility class for reading BGZF block compressed files. The caller can treat this file like any other InputStream.
+ * It probably is not necessary to wrap this stream in a buffering stream, because there is internal buffering.
+ * The advantage of BGZF over conventional GZip format is that BGZF allows for seeking without having to read the
+ * entire file up to the location being sought. Note that seeking is only possible if the ctor(File) is used.
+ *
+ * c.f. http://samtools.sourceforge.net/SAM1.pdf for details of BGZF format
+ */
+public class BlockCompressedInputStream extends InputStream {
+ private InputStream mStream = null;
+ private SeekableStream mFile = null;
+ private byte[] mFileBuffer = null;
+ private byte[] mCurrentBlock = null;
+ private int mCurrentOffset = 0;
+ private long mBlockAddress = 0;
+ private int mLastBlockLength = 0;
+ private final BlockGunzipper blockGunzipper = new BlockGunzipper();
+
+
+ /**
+ * Note that seek() is not supported if this ctor is used.
+ */
+ public BlockCompressedInputStream(final InputStream stream) {
+ mStream = IOUtil.toBufferedStream(stream);
+ mFile = null;
+ }
+
+ /**
+ * Use this ctor if you wish to call seek()
+ */
+ public BlockCompressedInputStream(final File file)
+ throws IOException {
+ mFile = new SeekableFileStream(file);
+ mStream = null;
+
+ }
+
+ public BlockCompressedInputStream(final URL url) {
+ mFile = new SeekableBufferedStream(new SeekableHTTPStream(url));
+ mStream = null;
+ }
+
+ /**
+ * For providing some arbitrary data source. No additional buffering is
+ * provided, so if the underlying source is not buffered, wrap it in a
+ * SeekableBufferedStream before passing to this ctor.
+ */
+ public BlockCompressedInputStream(final SeekableStream strm) {
+ mFile = strm;
+ mStream = null;
+ }
+
+ /**
+ * Determines whether or not the inflater will re-calculated the CRC on the decompressed data
+ * and check it against the value stored in the GZIP header. CRC checking is an expensive
+ * operation and should be used accordingly.
+ */
+ public void setCheckCrcs(final boolean check) {
+ this.blockGunzipper.setCheckCrcs(check);
+ }
+
+ /**
+ * @return the number of bytes that can be read (or skipped over) from this input stream without blocking by the
+ * next caller of a method for this input stream. The next caller might be the same thread or another thread.
+ * Note that although the next caller can read this many bytes without blocking, the available() method call itself
+ * may block in order to fill an internal buffer if it has been exhausted.
+ */
+ @Override
+ public int available()
+ throws IOException {
+ if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.length) {
+ readBlock();
+ }
+ if (mCurrentBlock == null) {
+ return 0;
+ }
+ return mCurrentBlock.length - mCurrentOffset;
+ }
+
+ /**
+ * Closes the underlying InputStream or RandomAccessFile
+ */
+ @Override
+ public void close()
+ throws IOException {
+ if (mFile != null) {
+ mFile.close();
+ mFile = null;
+ } else if (mStream != null) {
+ mStream.close();
+ mStream = null;
+ }
+ // Encourage garbage collection
+ mFileBuffer = null;
+ mCurrentBlock = null;
+ }
+
+ /**
+ * Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255.
+ * If no byte is available because the end of the stream has been reached, the value -1 is returned.
+ * This method blocks until input data is available, the end of the stream is detected, or an exception is thrown.
+
+ * @return the next byte of data, or -1 if the end of the stream is reached.
+ */
+ @Override
+ public int read()
+ throws IOException {
+ return (available() > 0) ? (mCurrentBlock[mCurrentOffset++] & 0xFF) : -1;
+ }
+
+ /**
+ * Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes
+ * actually read is returned as an integer. This method blocks until input data is available, end of file is detected,
+ * or an exception is thrown.
+ *
+ * read(buf) has the same effect as read(buf, 0, buf.length).
+ *
+ * @param buffer the buffer into which the data is read.
+ * @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of
+ * the stream has been reached.
+ */
+ @Override
+ public int read(final byte[] buffer)
+ throws IOException {
+ return read(buffer, 0, buffer.length);
+ }
+
+ private volatile ByteArrayOutputStream buf = null;
+ private static final byte eol = '\n';
+ private static final byte eolCr = '\r';
+
+ /**
+ * Reads a whole line. A line is considered to be terminated by either a line feed ('\n'),
+ * carriage return ('\r') or carriage return followed by a line feed ("\r\n").
+ *
+ * @return A String containing the contents of the line, excluding the line terminating
+ * character, or null if the end of the stream has been reached
+ *
+ * @exception IOException If an I/O error occurs
+ *
+ */
+ public String readLine() throws IOException {
+ int available = available();
+ if (available == 0) {
+ return null;
+ }
+ if(null == buf){ // lazy initialisation
+ buf = new ByteArrayOutputStream(8192);
+ }
+ buf.reset();
+ boolean done = false;
+ boolean foundCr = false; // \r found flag
+ while (!done) {
+ int linetmpPos = mCurrentOffset;
+ int bCnt = 0;
+ while((available-- > 0)){
+ final byte c = mCurrentBlock[linetmpPos++];
+ if(c == eol){ // found \n
+ done = true;
+ break;
+ } else if(foundCr){ // previous char was \r
+ --linetmpPos; // current char is not \n so put it back
+ done = true;
+ break;
+ } else if(c == eolCr){ // found \r
+ foundCr = true;
+ continue; // no ++bCnt
+ }
+ ++bCnt;
+ }
+ if(mCurrentOffset < linetmpPos){
+ buf.write(mCurrentBlock, mCurrentOffset, bCnt);
+ mCurrentOffset = linetmpPos;
+ }
+ available = available();
+ if(available == 0){
+ // EOF
+ done = true;
+ }
+ }
+ return buf.toString();
+ }
+
+ /**
+ * Reads up to len bytes of data from the input stream into an array of bytes. An attempt is made to read
+ * as many as len bytes, but a smaller number may be read. The number of bytes actually read is returned as an integer.
+ *
+ * This method blocks until input data is available, end of file is detected, or an exception is thrown.
+ *
+ * @param buffer buffer into which data is read.
+ * @param offset the start offset in array b at which the data is written.
+ * @param length the maximum number of bytes to read.
+ * @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of
+ * the stream has been reached.
+ */
+ @Override
+ public int read(final byte[] buffer, int offset, int length)
+ throws IOException {
+ final int originalLength = length;
+ while (length > 0) {
+ final int available = available();
+ if (available == 0) {
+ // Signal EOF to caller
+ if (originalLength == length) {
+ return -1;
+ }
+ break;
+ }
+ final int copyLength = Math.min(length, available);
+ System.arraycopy(mCurrentBlock, mCurrentOffset, buffer, offset, copyLength);
+ mCurrentOffset += copyLength;
+ offset += copyLength;
+ length -= copyLength;
+ }
+ return originalLength - length;
+ }
+
+ /**
+ * Seek to the given position in the file. Note that pos is a special virtual file pointer,
+ * not an actual byte offset.
+ *
+ * @param pos virtual file pointer
+ */
+ public void seek(final long pos)
+ throws IOException {
+ if (mFile == null) {
+ throw new IOException("Cannot seek on stream based file");
+ }
+ // Decode virtual file pointer
+ // Upper 48 bits is the byte offset into the compressed stream of a block.
+ // Lower 16 bits is the byte offset into the uncompressed stream inside the block.
+ final long compressedOffset = BlockCompressedFilePointerUtil.getBlockAddress(pos);
+ final int uncompressedOffset = BlockCompressedFilePointerUtil.getBlockOffset(pos);
+ final int available;
+ if (mBlockAddress == compressedOffset && mCurrentBlock != null) {
+ available = mCurrentBlock.length;
+ } else {
+ mFile.seek(compressedOffset);
+ mBlockAddress = compressedOffset;
+ mLastBlockLength = 0;
+ readBlock();
+ available = available();
+ }
+ if (uncompressedOffset > available ||
+ (uncompressedOffset == available && !eof())) {
+ throw new IOException("Invalid file pointer: " + pos);
+ }
+ mCurrentOffset = uncompressedOffset;
+ }
+
+ private boolean eof() throws IOException {
+ if (mFile.eof()) {
+ return true;
+ }
+ // If the last remaining block is the size of the EMPTY_GZIP_BLOCK, this is the same as being at EOF.
+ return (mFile.length() - (mBlockAddress + mLastBlockLength) == BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length);
+ }
+
+ /**
+ * @return virtual file pointer that can be passed to seek() to return to the current position. This is
+ * not an actual byte offset, so arithmetic on file pointers cannot be done to determine the distance between
+ * the two.
+ */
+ public long getFilePointer() {
+ if (mCurrentOffset == mCurrentBlock.length) {
+ // If current offset is at the end of the current block, file pointer should point
+ // to the beginning of the next block.
+ return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress + mLastBlockLength, 0);
+ }
+ return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress, mCurrentOffset);
+ }
+
+ public static long getFileBlock(final long bgzfOffset) {
+ return BlockCompressedFilePointerUtil.getBlockAddress(bgzfOffset);
+ }
+
+ /**
+ * @param stream Must be at start of file. Throws RuntimeException if !stream.markSupported().
+ * @return true if the given file looks like a valid BGZF file.
+ */
+ public static boolean isValidFile(final InputStream stream)
+ throws IOException {
+ if (!stream.markSupported()) {
+ throw new RuntimeException("Cannot test non-buffered stream");
+ }
+ stream.mark(BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+ final byte[] buffer = new byte[BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH];
+ final int count = readBytes(stream, buffer, 0, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+ stream.reset();
+ return count == BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH && isValidBlockHeader(buffer);
+ }
+
+ private static boolean isValidBlockHeader(final byte[] buffer) {
+ return (buffer[0] == BlockCompressedStreamConstants.GZIP_ID1 &&
+ (buffer[1] & 0xFF) == BlockCompressedStreamConstants.GZIP_ID2 &&
+ (buffer[3] & BlockCompressedStreamConstants.GZIP_FLG) != 0 &&
+ buffer[10] == BlockCompressedStreamConstants.GZIP_XLEN &&
+ buffer[12] == BlockCompressedStreamConstants.BGZF_ID1 &&
+ buffer[13] == BlockCompressedStreamConstants.BGZF_ID2);
+ }
+
+ private void readBlock()
+ throws IOException {
+
+ if (mFileBuffer == null) {
+ mFileBuffer = new byte[BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE];
+ }
+ int count = readBytes(mFileBuffer, 0, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+ if (count == 0) {
+ // Handle case where there is no empty gzip block at end.
+ mCurrentOffset = 0;
+ mBlockAddress += mLastBlockLength;
+ mCurrentBlock = new byte[0];
+ return;
+ }
+ if (count != BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH) {
+ throw new IOException("Premature end of file");
+ }
+ final int blockLength = unpackInt16(mFileBuffer, BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET) + 1;
+ if (blockLength < BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) {
+ throw new IOException("Unexpected compressed block length: " + blockLength);
+ }
+ final int remaining = blockLength - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH;
+ count = readBytes(mFileBuffer, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, remaining);
+ if (count != remaining) {
+ throw new FileTruncatedException("Premature end of file");
+ }
+ inflateBlock(mFileBuffer, blockLength);
+ mCurrentOffset = 0;
+ mBlockAddress += mLastBlockLength;
+ mLastBlockLength = blockLength;
+ }
+
+ private void inflateBlock(final byte[] compressedBlock, final int compressedLength)
+ throws IOException {
+ final int uncompressedLength = unpackInt32(compressedBlock, compressedLength-4);
+ byte[] buffer = mCurrentBlock;
+ mCurrentBlock = null;
+ if (buffer == null || buffer.length != uncompressedLength) {
+ try {
+ buffer = new byte[uncompressedLength];
+ } catch (NegativeArraySizeException e) {
+ throw new RuntimeException("BGZF file has invalid uncompressedLength: " + uncompressedLength, e);
+ }
+ }
+ blockGunzipper.unzipBlock(buffer, compressedBlock, compressedLength);
+ mCurrentBlock = buffer;
+ }
+
+ private int readBytes(final byte[] buffer, final int offset, final int length)
+ throws IOException {
+ if (mFile != null) {
+ return readBytes(mFile, buffer, offset, length);
+ } else if (mStream != null) {
+ return readBytes(mStream, buffer, offset, length);
+ } else {
+ return 0;
+ }
+ }
+
+ private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length)
+ throws IOException {
+ int bytesRead = 0;
+ while (bytesRead < length) {
+ final int count = file.read(buffer, offset + bytesRead, length - bytesRead);
+ if (count <= 0) {
+ break;
+ }
+ bytesRead += count;
+ }
+ return bytesRead;
+ }
+
+ private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length)
+ throws IOException {
+ int bytesRead = 0;
+ while (bytesRead < length) {
+ final int count = stream.read(buffer, offset + bytesRead, length - bytesRead);
+ if (count <= 0) {
+ break;
+ }
+ bytesRead += count;
+ }
+ return bytesRead;
+ }
+
+ private int unpackInt16(final byte[] buffer, final int offset) {
+ return ((buffer[offset] & 0xFF) |
+ ((buffer[offset+1] & 0xFF) << 8));
+ }
+
+ private int unpackInt32(final byte[] buffer, final int offset) {
+ return ((buffer[offset] & 0xFF) |
+ ((buffer[offset+1] & 0xFF) << 8) |
+ ((buffer[offset+2] & 0xFF) << 16) |
+ ((buffer[offset+3] & 0xFF) << 24));
+ }
+
+ public enum FileTermination {HAS_TERMINATOR_BLOCK, HAS_HEALTHY_LAST_BLOCK, DEFECTIVE}
+
+ public static FileTermination checkTermination(final File file)
+ throws IOException {
+ final long fileSize = file.length();
+ if (fileSize < BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length) {
+ return FileTermination.DEFECTIVE;
+ }
+ try (RandomAccessFile raFile=new RandomAccessFile(file, "r")) {
+ raFile.seek(fileSize - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length);
+ byte[] buf = new byte[BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length];
+ raFile.readFully(buf);
+ if (Arrays.equals(buf, BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK)) {
+ return FileTermination.HAS_TERMINATOR_BLOCK;
+ }
+ final int bufsize = (int)Math.min(fileSize, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE);
+ buf = new byte[bufsize];
+ raFile.seek(fileSize - bufsize);
+ raFile.read(buf);
+ for (int i = buf.length - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length;
+ i >= 0; --i) {
+ if (!preambleEqual(BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE,
+ buf, i, BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length)) {
+ continue;
+ }
+ final ByteBuffer byteBuffer = ByteBuffer.wrap(buf, i + BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length, 4);
+ byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+ final int totalBlockSizeMinusOne = byteBuffer.getShort() & 0xFFFF;
+ if (buf.length - i == totalBlockSizeMinusOne + 1) {
+ return FileTermination.HAS_HEALTHY_LAST_BLOCK;
+ } else {
+ return FileTermination.DEFECTIVE;
+ }
+ }
+ return FileTermination.DEFECTIVE;
+ }
+ }
+
+ private static boolean preambleEqual(final byte[] preamble, final byte[] buf, final int startOffset, final int length) {
+ for (int i = 0; i < length; ++i) {
+ if (preamble[i] != buf[i + startOffset]) {
+ return false;
+ }
+ }
+ return true;
+ }
+}
+
+
diff --git a/net/sf/samtools/util/BlockCompressedOutputStream.java b/net/sf/samtools/util/BlockCompressedOutputStream.java
new file mode 100644
index 0000000..61f3d47
--- /dev/null
+++ b/net/sf/samtools/util/BlockCompressedOutputStream.java
@@ -0,0 +1,312 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools.util;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.zip.CRC32;
+import java.util.zip.Deflater;
+
+/**
+ * Writer for a file that is a series of gzip blocks (BGZF format). The caller just treats it as an
+ * OutputStream, and under the covers a gzip block is written when the amount of uncompressed as-yet-unwritten
+ * bytes reaches a threshold.
+ *
+ * The advantage of BGZF over conventional gzip is that BGZF allows for seeking without having to scan through
+ * the entire file up to the position being sought.
+ *
+ * Note that the flush() method should not be called by client
+ * unless you know what you're doing, because it forces a gzip block to be written even if the
+ * number of buffered bytes has not reached threshold. close(), on the other hand, must be called
+ * when done writing in order to force the last gzip block to be written.
+ *
+ * c.f. http://samtools.sourceforge.net/SAM1.pdf for details of BGZF file format.
+ */
+public class BlockCompressedOutputStream
+ extends OutputStream
+{
+ private static int defaultCompressionLevel = BlockCompressedStreamConstants.DEFAULT_COMPRESSION_LEVEL;
+
+ /**
+ * Sets the GZip compression level for subsequent BlockCompressedOutputStream object creation
+ * that do not specify the compression level.
+ * @param compressionLevel {@code 1 <= compressionLevel <= 9}
+ */
+ public static void setDefaultCompressionLevel(final int compressionLevel) {
+ if (compressionLevel < Deflater.NO_COMPRESSION || compressionLevel > Deflater.BEST_COMPRESSION) {
+ throw new IllegalArgumentException("Invalid compression level: " + compressionLevel);
+ }
+ defaultCompressionLevel = compressionLevel;
+ }
+
+ public static int getDefaultCompressionLevel() {
+ return defaultCompressionLevel;
+ }
+
+ private final BinaryCodec codec;
+ private final byte[] uncompressedBuffer = new byte[BlockCompressedStreamConstants.DEFAULT_UNCOMPRESSED_BLOCK_SIZE];
+ private int numUncompressedBytes = 0;
+ private final byte[] compressedBuffer =
+ new byte[BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE -
+ BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH];
+ private final Deflater deflater;
+
+ // A second deflater is created for the very unlikely case where the regular deflation actually makes
+ // things bigger, and the compressed block is too big. It should be possible to downshift the
+ // primary deflater to NO_COMPRESSION level, recompress, and then restore it to its original setting,
+ // but in practice that doesn't work.
+ // The motivation for deflating at NO_COMPRESSION level is that it will predictably produce compressed
+ // output that is 10 bytes larger than the input, and the threshold at which a block is generated is such that
+ // the size of tbe final gzip block will always be <= 64K. This is preferred over the previous method,
+ // which would attempt to compress up to 64K bytes, and if the resulting compressed block was too large,
+ // try compressing fewer input bytes (aka "downshifting'). The problem with downshifting is that
+ // getFilePointer might return an inaccurate value.
+ private final Deflater noCompressionDeflater = new Deflater(Deflater.NO_COMPRESSION, true);
+ private final CRC32 crc32 = new CRC32();
+ private File file = null;
+ private long mBlockAddress = 0;
+
+
+ // Really a local variable, but allocate once to reduce GC burden.
+ private final byte[] singleByteArray = new byte[1];
+
+ /**
+ * Uses default compression level, which is 5 unless changed by setDefaultCompressionLevel
+ */
+ public BlockCompressedOutputStream(final String filename) {
+ this(filename, defaultCompressionLevel);
+ }
+
+ /**
+ * Uses default compression level, which is 5 unless changed by setDefaultCompressionLevel
+ */
+ public BlockCompressedOutputStream(final File file) {
+ this(file, defaultCompressionLevel);
+ }
+
+ /**
+ * Prepare to compress at the given compression level
+ * @param compressionLevel {@code 1 <= compressionLevel <= 9}
+ */
+ public BlockCompressedOutputStream(final String filename, final int compressionLevel) {
+ this(new File(filename), compressionLevel);
+ }
+
+ /**
+ * Prepare to compress at the given compression level
+ * @param compressionLevel {@code 1 <= compressionLevel <= 9}
+ */
+ public BlockCompressedOutputStream(final File file, final int compressionLevel) {
+ this.file = file;
+ codec = new BinaryCodec(file, true);
+ deflater = new Deflater(compressionLevel, true);
+ }
+
+ /**
+ * Constructors that take output streams
+ * file may be null
+ */
+ public BlockCompressedOutputStream(final OutputStream os, File file) {
+ this(os, file, defaultCompressionLevel);
+ }
+
+ public BlockCompressedOutputStream(final OutputStream os, final File file, final int compressionLevel) {
+ this.file = file;
+ codec = new BinaryCodec(os);
+ if (file != null) {
+ codec.setOutputFileName(file.getAbsolutePath());
+ }
+ deflater = new Deflater(compressionLevel, true);
+ }
+
+ /**
+ * Writes b.length bytes from the specified byte array to this output stream. The general contract for write(b)
+ * is that it should have exactly the same effect as the call write(b, 0, b.length).
+ * @param bytes the data
+ */
+ @Override
+ public void write(final byte[] bytes) throws IOException {
+ write(bytes, 0, bytes.length);
+ }
+
+ /**
+ * Writes len bytes from the specified byte array starting at offset off to this output stream. The general
+ * contract for write(b, off, len) is that some of the bytes in the array b are written to the output stream in order;
+ * element b[off] is the first byte written and b[off+len-1] is the last byte written by this operation.
+ *
+ * @param bytes the data
+ * @param startIndex the start offset in the data
+ * @param numBytes the number of bytes to write
+ */
+ @Override
+ public void write(final byte[] bytes, int startIndex, int numBytes) throws IOException {
+ assert(numUncompressedBytes < uncompressedBuffer.length);
+ while (numBytes > 0) {
+ final int bytesToWrite = Math.min(uncompressedBuffer.length - numUncompressedBytes, numBytes);
+ System.arraycopy(bytes, startIndex, uncompressedBuffer, numUncompressedBytes, bytesToWrite);
+ numUncompressedBytes += bytesToWrite;
+ startIndex += bytesToWrite;
+ numBytes -= bytesToWrite;
+ assert(numBytes >= 0);
+ if (numUncompressedBytes == uncompressedBuffer.length) {
+ deflateBlock();
+ }
+ }
+ }
+
+ /**
+ * WARNING: flush() affects the output format, because it causes the current contents of uncompressedBuffer
+ * to be compressed and written, even if it isn't full. Unless you know what you're doing, don't call flush().
+ * Instead, call close(), which will flush any unwritten data before closing the underlying stream.
+ *
+ */
+ @Override
+ public void flush() throws IOException {
+ while (numUncompressedBytes > 0) {
+ deflateBlock();
+ }
+ codec.getOutputStream().flush();
+ }
+
+ /**
+ * close() must be called in order to flush any remaining buffered bytes. An unclosed file will likely be
+ * defective.
+ *
+ */
+ @Override
+ public void close() throws IOException {
+ flush();
+ // For debugging...
+ // if (numberOfThrottleBacks > 0) {
+ // System.err.println("In BlockCompressedOutputStream, had to throttle back " + numberOfThrottleBacks +
+ // " times for file " + codec.getOutputFileName());
+ // }
+ codec.writeBytes(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);
+ codec.close();
+ // Can't re-open something that is not a regular file, e.g. a named pipe or an output stream
+ if (this.file == null || !this.file.isFile()) return;
+ if (BlockCompressedInputStream.checkTermination(this.file) !=
+ BlockCompressedInputStream.FileTermination.HAS_TERMINATOR_BLOCK) {
+ throw new IOException("Terminator block not found after closing BGZF file " + this.file);
+ }
+ }
+
+ /**
+ * Writes the specified byte to this output stream. The general contract for write is that one byte is written
+ * to the output stream. The byte to be written is the eight low-order bits of the argument b.
+ * The 24 high-order bits of b are ignored.
+ * @param bite
+ * @throws IOException
+ */
+ @Override
+ public void write(final int bite) throws IOException {
+ singleByteArray[0] = (byte)bite;
+ write(singleByteArray);
+ }
+
+ /** Encode virtual file pointer
+ * Upper 48 bits is the byte offset into the compressed stream of a block.
+ * Lower 16 bits is the byte offset into the uncompressed stream inside the block.
+ */
+ public long getFilePointer(){
+ return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress, numUncompressedBytes);
+ }
+
+ /**
+ * Attempt to write the data in uncompressedBuffer to the underlying file in a gzip block.
+ * If the entire uncompressedBuffer does not fit in the maximum allowed size, reduce the amount
+ * of data to be compressed, and slide the excess down in uncompressedBuffer so it can be picked
+ * up in the next deflate event.
+ * @return size of gzip block that was written.
+ */
+ private int deflateBlock() {
+ if (numUncompressedBytes == 0) {
+ return 0;
+ }
+ int bytesToCompress = numUncompressedBytes;
+ // Compress the input
+ deflater.reset();
+ deflater.setInput(uncompressedBuffer, 0, bytesToCompress);
+ deflater.finish();
+ int compressedSize = deflater.deflate(compressedBuffer, 0, compressedBuffer.length);
+
+ // If it didn't all fit in compressedBuffer.length, set compression level to NO_COMPRESSION
+ // and try again. This should always fit.
+ if (!deflater.finished()) {
+ noCompressionDeflater.reset();
+ noCompressionDeflater.setInput(uncompressedBuffer, 0, bytesToCompress);
+ noCompressionDeflater.finish();
+ compressedSize = noCompressionDeflater.deflate(compressedBuffer, 0, compressedBuffer.length);
+ if (!noCompressionDeflater.finished()) {
+ throw new IllegalStateException("unpossible");
+ }
+ }
+ // Data compressed small enough, so write it out.
+ crc32.reset();
+ crc32.update(uncompressedBuffer, 0, bytesToCompress);
+
+ final int totalBlockSize = writeGzipBlock(compressedSize, bytesToCompress, crc32.getValue());
+ assert(bytesToCompress <= numUncompressedBytes);
+
+ // Clear out from uncompressedBuffer the data that was written
+ if (bytesToCompress == numUncompressedBytes) {
+ numUncompressedBytes = 0;
+ } else {
+ System.arraycopy(uncompressedBuffer, bytesToCompress, uncompressedBuffer, 0,
+ numUncompressedBytes - bytesToCompress);
+ numUncompressedBytes -= bytesToCompress;
+ }
+ mBlockAddress += totalBlockSize;
+ return totalBlockSize;
+ }
+
+ /**
+ * Writes the entire gzip block, assuming the compressed data is stored in compressedBuffer
+ * @return size of gzip block that was written.
+ */
+ private int writeGzipBlock(final int compressedSize, final int uncompressedSize, final long crc) {
+ // Init gzip header
+ codec.writeByte(BlockCompressedStreamConstants.GZIP_ID1);
+ codec.writeByte(BlockCompressedStreamConstants.GZIP_ID2);
+ codec.writeByte(BlockCompressedStreamConstants.GZIP_CM_DEFLATE);
+ codec.writeByte(BlockCompressedStreamConstants.GZIP_FLG);
+ codec.writeInt(0); // Modification time
+ codec.writeByte(BlockCompressedStreamConstants.GZIP_XFL);
+ codec.writeByte(BlockCompressedStreamConstants.GZIP_OS_UNKNOWN);
+ codec.writeShort(BlockCompressedStreamConstants.GZIP_XLEN);
+ codec.writeByte(BlockCompressedStreamConstants.BGZF_ID1);
+ codec.writeByte(BlockCompressedStreamConstants.BGZF_ID2);
+ codec.writeShort(BlockCompressedStreamConstants.BGZF_LEN);
+ final int totalBlockSize = compressedSize + BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH +
+ BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH;
+
+ // I don't know why we store block size - 1, but that is what the spec says
+ codec.writeShort((short)(totalBlockSize - 1));
+ codec.writeBytes(compressedBuffer, 0, compressedSize);
+ codec.writeInt((int)crc);
+ codec.writeInt(uncompressedSize);
+ return totalBlockSize;
+ }
+}
diff --git a/net/sf/samtools/util/BlockCompressedStreamConstants.java b/net/sf/samtools/util/BlockCompressedStreamConstants.java
new file mode 100644
index 0000000..ea25733
--- /dev/null
+++ b/net/sf/samtools/util/BlockCompressedStreamConstants.java
@@ -0,0 +1,118 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools.util;
+
+import net.sf.samtools.Defaults;
+
+/**
+ * Constants shared by BlockCompressed{Input,Output}Stream classes
+ */
+public class BlockCompressedStreamConstants {
+ // Number of bytes in the gzip block before the deflated data.
+ // This is not the standard header size, because we include one optional subfield,
+ // but it is the standard for us.
+ public static final int BLOCK_HEADER_LENGTH = 18;
+
+ // Location in the gzip block of the total block size (actually total block size - 1)
+ public static final int BLOCK_LENGTH_OFFSET = 16;
+
+ // Number of bytes that follow the deflated data
+ public static final int BLOCK_FOOTER_LENGTH = 8;
+
+ // We require that a compressed block (including header and footer, be <= this)
+ public static final int MAX_COMPRESSED_BLOCK_SIZE = 64 * 1024;
+
+ // Gzip overhead is the header, the footer, and the block size (encoded as a short).
+ public static final int GZIP_OVERHEAD = BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH + 2;
+
+ // If Deflater has compression level == NO_COMPRESSION, 10 bytes of overhead (determined experimentally).
+ public static final int NO_COMPRESSION_OVERHEAD = 10;
+
+ // Push out a gzip block when this many uncompressed bytes have been accumulated.
+ // This size is selected so that if data is not compressible, if Deflater is given
+ // compression level == NO_COMPRESSION, compressed size is guaranteed to be <= MAX_COMPRESSED_BLOCK_SIZE.
+ public static final int DEFAULT_UNCOMPRESSED_BLOCK_SIZE = 64 * 1024 - (GZIP_OVERHEAD + NO_COMPRESSION_OVERHEAD);
+
+ // Magic numbers
+ public static final byte GZIP_ID1 = 31;
+ public static final int GZIP_ID2 = 139;
+
+ // FEXTRA flag means there are optional fields
+ public static final int GZIP_FLG = 4;
+
+ // extra flags
+ public static final int GZIP_XFL = 0;
+
+ // length of extra subfield
+ public static final short GZIP_XLEN = 6;
+
+ // The deflate compression, which is customarily used by gzip
+ public static final byte GZIP_CM_DEFLATE = 8;
+
+ public static final int DEFAULT_COMPRESSION_LEVEL = Defaults.COMPRESSION_LEVEL;
+
+ // We don't care about OS because we're not doing line terminator translation
+ public static final int GZIP_OS_UNKNOWN = 255;
+
+ // The subfield ID
+ public static final byte BGZF_ID1 = 66;
+ public static final byte BGZF_ID2 = 67;
+
+ // subfield length in bytes
+ public static final byte BGZF_LEN = 2;
+
+ public static final byte[] EMPTY_GZIP_BLOCK = {
+ BlockCompressedStreamConstants.GZIP_ID1,
+ (byte)BlockCompressedStreamConstants.GZIP_ID2,
+ BlockCompressedStreamConstants.GZIP_CM_DEFLATE,
+ BlockCompressedStreamConstants.GZIP_FLG,
+ 0, 0, 0, 0, // Modification time
+ BlockCompressedStreamConstants.GZIP_XFL,
+ (byte)BlockCompressedStreamConstants.GZIP_OS_UNKNOWN,
+ BlockCompressedStreamConstants.GZIP_XLEN, 0, // Little-endian short
+ BlockCompressedStreamConstants.BGZF_ID1,
+ BlockCompressedStreamConstants.BGZF_ID2,
+ BlockCompressedStreamConstants.BGZF_LEN, 0, // Little-endian short
+ // Total block size - 1
+ BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH +
+ BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH - 1 + 2, 0, // Little-endian short
+ // Dummy payload?
+ 3, 0,
+ 0, 0, 0, 0, // crc
+ 0, 0, 0, 0, // uncompressedSize
+ };
+ public static final byte[] GZIP_BLOCK_PREAMBLE = {
+ BlockCompressedStreamConstants.GZIP_ID1,
+ (byte)BlockCompressedStreamConstants.GZIP_ID2,
+ BlockCompressedStreamConstants.GZIP_CM_DEFLATE,
+ BlockCompressedStreamConstants.GZIP_FLG,
+ 0, 0, 0, 0, // Modification time
+ BlockCompressedStreamConstants.GZIP_XFL,
+ (byte)BlockCompressedStreamConstants.GZIP_OS_UNKNOWN,
+ BlockCompressedStreamConstants.GZIP_XLEN, 0, // Little-endian short
+ BlockCompressedStreamConstants.BGZF_ID1,
+ BlockCompressedStreamConstants.BGZF_ID2,
+ BlockCompressedStreamConstants.BGZF_LEN, 0, // Little-endian short
+ };
+}
diff --git a/net/sf/samtools/util/BlockGunzipper.java b/net/sf/samtools/util/BlockGunzipper.java
new file mode 100644
index 0000000..365a6bd
--- /dev/null
+++ b/net/sf/samtools/util/BlockGunzipper.java
@@ -0,0 +1,115 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools.util;
+
+import net.sf.samtools.SAMFormatException;
+
+import java.util.zip.Inflater;
+import java.util.zip.CRC32;
+import java.util.zip.DataFormatException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+/**
+ * Alternative to GZIPInputStream, for decompressing GZIP blocks that are already loaded into a byte[].
+ * The main advantage is that this object can be used over and over again to decompress many blocks,
+ * whereas a new GZIPInputStream and ByteArrayInputStream would otherwise need to be created for each
+ * block to be decompressed.
+ *
+ * This code requires that the GZIP header conform to the GZIP blocks written to BAM files, with
+ * a specific subfield and no other optional stuff.
+ *
+ * @author alecw at broadinstitute.org
+ */
+public class BlockGunzipper {
+ private final Inflater inflater = new Inflater(true); // GZIP mode
+ private final CRC32 crc32 = new CRC32();
+ private boolean checkCrcs = false;
+
+ /** Allows the caller to decide whether or not to check CRCs on when uncompressing blocks. */
+ public void setCheckCrcs(final boolean check) {
+ this.checkCrcs = check;
+ }
+
+ /**
+ * Decompress GZIP-compressed data
+ * @param uncompressedBlock must be big enough to hold decompressed output.
+ * @param compressedBlock compressed data starting at offset 0
+ * @param compressedLength size of compressed data, possibly less than the size of the buffer.
+ */
+ void unzipBlock(byte[] uncompressedBlock, byte[] compressedBlock, int compressedLength) {
+ try {
+ ByteBuffer byteBuffer = ByteBuffer.wrap(compressedBlock, 0, compressedLength);
+ byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+
+ // Validate GZIP header
+ if (byteBuffer.get() != BlockCompressedStreamConstants.GZIP_ID1 ||
+ byteBuffer.get() != (byte)BlockCompressedStreamConstants.GZIP_ID2 ||
+ byteBuffer.get() != BlockCompressedStreamConstants.GZIP_CM_DEFLATE ||
+ byteBuffer.get() != BlockCompressedStreamConstants.GZIP_FLG
+ ) {
+ throw new SAMFormatException("Invalid GZIP header");
+ }
+ // Skip MTIME, XFL, OS fields
+ byteBuffer.position(byteBuffer.position() + 6);
+ if (byteBuffer.getShort() != BlockCompressedStreamConstants.GZIP_XLEN) {
+ throw new SAMFormatException("Invalid GZIP header");
+ }
+ // Skip blocksize subfield intro
+ byteBuffer.position(byteBuffer.position() + 4);
+ // Read ushort
+ final int totalBlockSize = (byteBuffer.getShort() & 0xffff) + 1;
+ if (totalBlockSize != compressedLength) {
+ throw new SAMFormatException("GZIP blocksize disagreement");
+ }
+
+ // Read expected size and CRD from end of GZIP block
+ final int deflatedSize = compressedLength - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH - BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH;
+ byteBuffer.position(byteBuffer.position() + deflatedSize);
+ int expectedCrc = byteBuffer.getInt();
+ int uncompressedSize = byteBuffer.getInt();
+ inflater.reset();
+
+ // Decompress
+ inflater.setInput(compressedBlock, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, deflatedSize);
+ final int inflatedBytes = inflater.inflate(uncompressedBlock, 0, uncompressedSize);
+ if (inflatedBytes != uncompressedSize) {
+ throw new SAMFormatException("Did not inflate expected amount");
+ }
+
+ // Validate CRC if so desired
+ if (this.checkCrcs) {
+ crc32.reset();
+ crc32.update(uncompressedBlock, 0, uncompressedSize);
+ final long crc = crc32.getValue();
+ if ((int)crc != expectedCrc) {
+ throw new SAMFormatException("CRC mismatch");
+ }
+ }
+ } catch (DataFormatException e)
+ {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/net/sf/samtools/util/HttpUtils.java b/net/sf/samtools/util/HttpUtils.java
new file mode 100644
index 0000000..940e51f
--- /dev/null
+++ b/net/sf/samtools/util/HttpUtils.java
@@ -0,0 +1,102 @@
+package net.sf.samtools.util;
+
+import java.net.URLConnection;
+import java.net.URL;
+import java.net.HttpURLConnection;
+import java.net.MalformedURLException;
+
+/**
+ * User: jrobinso
+ * Date: Sep 23, 2009
+ */
+public class HttpUtils {
+
+
+ public static String getETag(final URL url) {
+ URLConnection conn = null;
+ try {
+ // Create a URLConnection object for a URL
+ conn = url.openConnection();
+ conn.setReadTimeout(3000);
+ return conn.getHeaderField("ETag");
+ } catch (Exception e) {
+ e.printStackTrace();
+ return null;
+ }
+ finally {
+ if (conn != null && conn instanceof HttpURLConnection) {
+ ((HttpURLConnection) conn).disconnect();
+ }
+ }
+ }
+
+ public static String getHeaderField(final URL url, final String name) {
+ URLConnection conn = null;
+ try {
+ // Create a URLConnection object for a URL
+ conn = url.openConnection();
+ conn.setReadTimeout(3000);
+ return conn.getHeaderField(name);
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ return null;
+ }
+ finally {
+ if (conn != null && conn instanceof HttpURLConnection) {
+ ((HttpURLConnection) conn).disconnect();
+ }
+ }
+ }
+
+ public static void printHeaderFields(final URL url) {
+
+ URLConnection conn = null;
+ try {
+ // Create a URLConnection object for a URL
+ conn = url.openConnection();
+ conn.setReadTimeout(3000);
+
+ for (final String name : conn.getHeaderFields().keySet()) {
+ System.out.println(name + "\t" + conn.getHeaderField(name));
+
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ finally {
+ if (conn != null && conn instanceof HttpURLConnection) {
+ ((HttpURLConnection) conn).disconnect();
+ }
+ }
+ }
+
+ public static boolean resourceAvailable(final URL url) {
+ URLConnection conn = null;
+ try {
+ // Create a URLConnection object for a URL
+ conn = url.openConnection();
+ conn.setReadTimeout(3000);
+ return conn.getHeaderField("ETag") != null;
+ } catch (Exception e) {
+ e.printStackTrace();
+ return false;
+ }
+ finally {
+ if (conn != null && conn instanceof HttpURLConnection) {
+ ((HttpURLConnection) conn).disconnect();
+ }
+ }
+ }
+
+ public static void main(final String[] args) throws MalformedURLException {
+ //printHeaderFields(new URL(
+ // "http://www.broadinstitute.org/igvdata/1KG/DCC_merged/freeze5/NA12891.pilot2.SLX.bam"));
+ System.out.println(getETag(new URL(
+ "http://www.broadinstitute.org/igvdata/test/sam/303KY.8.paired1.bam.tdf")));
+ System.out.println(resourceAvailable(new URL(
+ "http://www.broadinstitute.org/igvdata/test/sam/303KY.8.paired1.bam.tdf")));
+
+
+ }
+}
diff --git a/net/sf/samtools/util/IOUtil.java b/net/sf/samtools/util/IOUtil.java
new file mode 100644
index 0000000..9665c68
--- /dev/null
+++ b/net/sf/samtools/util/IOUtil.java
@@ -0,0 +1,124 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools.util;
+
+
+import net.sf.samtools.Defaults;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.File;
+
+/**
+ * Miscellaneous stateless static IO-oriented methods.
+ */
+public class IOUtil {
+ /**
+ * @deprecated Use Defaults.BUFFER_SIZE instead.
+ */
+ @Deprecated public static final int STANDARD_BUFFER_SIZE = Defaults.BUFFER_SIZE;
+
+ public static final long ONE_GB = 1024 * 1024 * 1024;
+ public static final long TWO_GBS = 2 * ONE_GB;
+ public static final long FIVE_GBS = 5 * ONE_GB;
+
+ /**
+ * Wrap the given stream in a BufferedInputStream, if it isn't already wrapper
+ * @param stream stream to be wrapped
+ * @return A BufferedInputStream wrapping stream, or stream itself if stream instanceof BufferedInputStream.
+ */
+ public static BufferedInputStream toBufferedStream(final InputStream stream) {
+ if (stream instanceof BufferedInputStream) {
+ return (BufferedInputStream) stream;
+ } else {
+ return new BufferedInputStream(stream, STANDARD_BUFFER_SIZE);
+ }
+ }
+
+ /**
+ * Delete a list of files, and write a warning message if one could not be deleted.
+ * @param files Files to be deleted.
+ */
+ public static void deleteFiles(final File... files) {
+ for (final File f : files) {
+ if (!f.delete()) {
+ System.err.println("Could not delete file " + f);
+ }
+ }
+ }
+
+ public static void deleteFiles(final Iterable<File> files) {
+ for (final File f : files) {
+ if (!f.delete()) {
+ System.err.println("Could not delete file " + f);
+ }
+ }
+ }
+
+
+ /**
+ * @return true if the path is not a device (e.g. /dev/null or /dev/stdin), and is not
+ * an existing directory. I.e. is is a regular path that may correspond to an existing
+ * file, or a path that could be a regular output file.
+ */
+ public static boolean isRegularPath(final File file) {
+ return !file.exists() || file.isFile();
+ }
+
+ /**
+ * Creates a new tmp file on one of the available temp filesystems, registers it for deletion
+ * on JVM exit and then returns it.
+ */
+ public static File newTempFile(final String prefix, final String suffix,
+ final File[] tmpDirs, final long minBytesFree) throws IOException {
+ File f = null;
+
+ for (int i=0; i<tmpDirs.length; ++i) {
+ if (tmpDirs[i].getUsableSpace() > minBytesFree || i == tmpDirs.length-1) {
+ f = File.createTempFile(prefix, suffix, tmpDirs[i]);
+ f.deleteOnExit();
+ break;
+ }
+ }
+
+ return f;
+ }
+
+ /** Creates a new tmp file on one of the potential filesystems that has at least 5GB free. */
+ public static File newTempFile(final String prefix, final String suffix,
+ final File[] tmpDirs) throws IOException {
+ return newTempFile(prefix, suffix, tmpDirs, FIVE_GBS);
+ }
+
+
+ /** Returns a default tmp directory. */
+ public static File getDefaultTmpDir() {
+ final String user = System.getProperty("user.name");
+ final String tmp = System.getProperty("java.io.tmpdir");
+
+ if (tmp.endsWith("/" + user)) return new File(tmp);
+ else return new File(tmp, user);
+ }
+}
diff --git a/net/sf/samtools/util/RuntimeEOFException.java b/net/sf/samtools/util/RuntimeEOFException.java
new file mode 100644
index 0000000..ae4d70d
--- /dev/null
+++ b/net/sf/samtools/util/RuntimeEOFException.java
@@ -0,0 +1,46 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools.util;
+
+import net.sf.samtools.SAMException;
+
+/**
+ * Thrown by various codecs to indicate EOF without having to clutter the API with throws clauses
+ */
+public class RuntimeEOFException extends SAMException {
+ public RuntimeEOFException() {
+ }
+
+ public RuntimeEOFException(final String s) {
+ super(s);
+ }
+
+ public RuntimeEOFException(final String s, final Throwable throwable) {
+ super(s, throwable);
+ }
+
+ public RuntimeEOFException(final Throwable throwable) {
+ super(throwable);
+ }
+}
diff --git a/net/sf/samtools/util/RuntimeIOException.java b/net/sf/samtools/util/RuntimeIOException.java
new file mode 100644
index 0000000..6c2f5ad
--- /dev/null
+++ b/net/sf/samtools/util/RuntimeIOException.java
@@ -0,0 +1,46 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools.util;
+
+import net.sf.samtools.SAMException;
+
+/**
+ * Thrown by various IO classes to indicate IOException without having to clutter the API with throws clauses
+ */
+public class RuntimeIOException extends SAMException {
+ public RuntimeIOException() {
+ }
+
+ public RuntimeIOException(final String s) {
+ super(s);
+ }
+
+ public RuntimeIOException(final String s, final Throwable throwable) {
+ super(s, throwable);
+ }
+
+ public RuntimeIOException(final Throwable throwable) {
+ super(throwable);
+ }
+}
diff --git a/net/sf/samtools/util/SeekableBufferedStream.java b/net/sf/samtools/util/SeekableBufferedStream.java
new file mode 100644
index 0000000..50d0087
--- /dev/null
+++ b/net/sf/samtools/util/SeekableBufferedStream.java
@@ -0,0 +1,90 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package net.sf.samtools.util;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+
+/**
+ * A wrapper class to provide buffered read access to a SeekableStream. Just wrapping such a stream with
+ * a BufferedInputStream will not work as it does not support seeking. In this implementation a
+ * seek call is delegated to the wrapped stream, and the buffer reset.
+ */
+public class SeekableBufferedStream extends SeekableStream {
+
+ public static final int DEFAULT_BUFFER_SIZE = 512000;
+
+ final private int bufferSize;
+ final SeekableStream wrappedStream;
+ BufferedInputStream bufferedStream;
+ long position;
+
+ public SeekableBufferedStream(SeekableStream httpStream, int bufferSize) {
+ this.bufferSize = bufferSize;
+ this.wrappedStream = httpStream;
+ this.position = 0;
+ bufferedStream = new BufferedInputStream(wrappedStream, bufferSize);
+ }
+ public SeekableBufferedStream(SeekableStream httpStream) {
+ this(httpStream, DEFAULT_BUFFER_SIZE);
+ }
+
+ public long length() {
+ return wrappedStream.length();
+ }
+
+ public void seek(long position) throws IOException {
+ this.position = position;
+ wrappedStream.seek(position);
+ bufferedStream = new BufferedInputStream(wrappedStream, bufferSize);
+ }
+
+ public int read() throws IOException {
+ int b = bufferedStream.read();
+ position++;
+ return b;
+ }
+
+ public int read(byte[] buffer, int offset, int length) throws IOException {
+ int nBytesRead = bufferedStream.read(buffer, offset, length);
+ if (nBytesRead > 0) {
+ position += nBytesRead;
+ }
+ return nBytesRead;
+ }
+
+ public void close() throws IOException {
+ wrappedStream.close();
+ }
+
+ public boolean eof() throws IOException {
+ return position >= wrappedStream.length();
+ }
+
+ @Override
+ public String getSource() {
+ return wrappedStream.getSource();
+ }
+}
diff --git a/net/sf/samtools/util/SeekableFileStream.java b/net/sf/samtools/util/SeekableFileStream.java
new file mode 100644
index 0000000..ef4db1d
--- /dev/null
+++ b/net/sf/samtools/util/SeekableFileStream.java
@@ -0,0 +1,69 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package net.sf.samtools.util;
+
+import java.io.*;
+
+/**
+ *
+ * @author jrobinso
+ */
+public class SeekableFileStream extends SeekableStream {
+
+ File file;
+ RandomAccessFile fis;
+
+ public SeekableFileStream(final File file) throws FileNotFoundException {
+ this.file = file;
+ fis = new RandomAccessFile(file, "r");
+ }
+
+ public long length() {
+ return file.length();
+ }
+
+ public boolean eof() throws IOException {
+ return fis.length() == fis.getFilePointer();
+ }
+
+ public void seek(final long position) throws IOException {
+ fis.seek(position);
+ }
+
+ public int read(final byte[] buffer, final int offset, final int length) throws IOException {
+ if (length < 0) {
+ throw new IndexOutOfBoundsException();
+ }
+ int n = 0;
+ while (n < length) {
+ final int count = fis.read(buffer, offset + n, length - n);
+ if (count < 0) {
+ if (n > 0) {
+ return n;
+ } else {
+ return count;
+ }
+ }
+ n += count;
+ }
+ return n;
+
+ }
+
+
+ public void close() throws IOException {
+ fis.close();
+
+ }
+
+ public int read() throws IOException {
+ return fis.read();
+ }
+
+ @Override
+ public String getSource() {
+ return file.getAbsolutePath();
+ }
+}
\ No newline at end of file
diff --git a/net/sf/samtools/util/SeekableHTTPStream.java b/net/sf/samtools/util/SeekableHTTPStream.java
new file mode 100644
index 0000000..1e240f1
--- /dev/null
+++ b/net/sf/samtools/util/SeekableHTTPStream.java
@@ -0,0 +1,153 @@
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+package net.sf.samtools.util;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.HttpURLConnection;
+import java.net.Proxy;
+import java.net.URL;
+
+/**
+ * @author jrobinso
+ */
+public class SeekableHTTPStream extends SeekableStream {
+
+ private long position = 0;
+ private long contentLength = -1;
+ private final URL url;
+ private final Proxy proxy;
+
+ public SeekableHTTPStream(final URL url) {
+ this(url, null);
+
+ }
+
+ public SeekableHTTPStream(final URL url, Proxy proxy) {
+
+ this.proxy = proxy;
+ this.url = url;
+
+ // Try to get the file length
+ final String contentLengthString = HttpUtils.getHeaderField(url, "Content-Length");
+ if (contentLengthString != null) {
+ try {
+ contentLength = Long.parseLong(contentLengthString);
+ }
+ catch (NumberFormatException ignored) {
+ System.err.println("WARNING: Invalid content length (" + contentLengthString + " for: " + url);
+ contentLength = -1;
+ }
+ }
+
+ }
+
+ public long length() {
+ return contentLength;
+ }
+
+ public boolean eof() throws IOException {
+ return position >= contentLength;
+ }
+
+ public void seek(final long position) {
+ this.position = position;
+ }
+
+ public int read(byte[] buffer, int offset, int len) throws IOException {
+
+ if (offset < 0 || len < 0 || (offset + len) > buffer.length) {
+ throw new IndexOutOfBoundsException("Offset="+offset+",len="+len+",buflen="+buffer.length);
+ }
+ if (len == 0) {
+ return 0;
+ }
+
+ HttpURLConnection connection = null;
+ InputStream is = null;
+ String byteRange = "";
+ int n = 0;
+ try {
+ connection = proxy == null ?
+ (HttpURLConnection) url.openConnection() :
+ (HttpURLConnection) url.openConnection(proxy);
+
+ long endRange = position + len - 1;
+ // IF we know the total content length, limit the end range to that.
+ if (contentLength > 0) {
+ endRange = Math.min(endRange, contentLength);
+ }
+ byteRange = "bytes=" + position + "-" + endRange;
+ connection.setRequestProperty("Range", byteRange);
+
+ is = connection.getInputStream();
+
+ while (n < len) {
+ int count = is.read(buffer, offset + n, len - n);
+ if (count < 0) {
+ if (n == 0) {
+ return -1;
+ } else {
+ break;
+ }
+ }
+ n += count;
+ }
+
+ position += n;
+
+ return n;
+
+ }
+
+ catch (IOException e) {
+ // THis is a bit of a hack, but its not clear how else to handle this. If a byte range is specified
+ // that goes past the end of the file the response code will be 416. The MAC os translates this to
+ // an IOException with the 416 code in the message. Windows translates the error to an EOFException.
+ //
+ // The BAM file iterator uses the return value to detect end of file (specifically looks for n == 0).
+ if (e.getMessage().contains("416") || (e instanceof EOFException)) {
+ if (n < 0) {
+ return -1;
+ } else {
+ position += n;
+ // As we are at EOF, the contentLength and position are by definition =
+ contentLength = position;
+ return n;
+ }
+ } else {
+ throw e;
+ }
+
+ }
+
+ finally {
+ if (is != null) {
+ is.close();
+ }
+ if (connection != null) {
+ connection.disconnect();
+ }
+ }
+ }
+
+
+ public void close() throws IOException {
+ // Nothing to do
+ }
+
+
+ public int read() throws IOException {
+ byte []tmp=new byte[1];
+ read(tmp,0,1);
+ return (int) tmp[0] & 0xFF;
+ }
+
+ @Override
+ public String getSource() {
+ return url.toString();
+ }
+}
\ No newline at end of file
diff --git a/net/sf/samtools/util/SeekableStream.java b/net/sf/samtools/util/SeekableStream.java
new file mode 100644
index 0000000..c0619f0
--- /dev/null
+++ b/net/sf/samtools/util/SeekableStream.java
@@ -0,0 +1,37 @@
+/*
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT
+ * This is copyright (2007-2009) by the Broad Institute/Massachusetts Institute
+ * of Technology. It is licensed to You under the Gnu Public License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.opensource.org/licenses/gpl-2.0.php
+ *
+ * This software is supplied without any warranty or guaranteed support
+ * whatsoever. Neither the Broad Institute nor MIT can be responsible for its
+ * use, misuse, or functionality.
+ */
+package net.sf.samtools.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public abstract class SeekableStream extends InputStream {
+
+ public abstract long length();
+
+ public abstract void seek(long position) throws IOException;
+
+ public abstract int read(byte[] buffer, int offset, int length) throws IOException;
+
+ public abstract void close() throws IOException;
+
+ public abstract boolean eof() throws IOException;
+
+ /**
+ * @return String representation of source (e.g. URL, file path, etc.), or null if not available.
+ * Should end with .bam if not null.
+ */
+ public abstract String getSource();
+}
\ No newline at end of file
diff --git a/net/sf/samtools/util/StringUtil.java b/net/sf/samtools/util/StringUtil.java
new file mode 100644
index 0000000..55fe342
--- /dev/null
+++ b/net/sf/samtools/util/StringUtil.java
@@ -0,0 +1,460 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+package net.sf.samtools.util;
+
+import java.util.Collection;
+import java.util.List;
+import java.util.Arrays;
+
+/**
+ * Grab-bag of stateless String-oriented utilities.
+ */
+public class StringUtil {
+ private static final byte UPPER_CASE_OFFSET = 'A' - 'a';
+
+ /**
+ * @param <T> the type parameter
+ * @param separator String to interject between each string in strings arg
+ * @param objs List of objs to be joined
+ * @return String that concatenates the result of each item's to String method for all items in objs, with separator between each of them.
+ */
+ public static <T> String join(final String separator, final Collection<T> objs) {
+ if (objs.isEmpty()) {
+ return "";
+ }
+ boolean notFirst = false;
+ final StringBuilder ret = new StringBuilder();
+ for (final Object obj : objs) {
+ if(notFirst) {
+ ret.append(separator);
+ }
+ ret.append(obj.toString());
+ notFirst = true;
+ }
+ return ret.toString();
+ }
+
+ public static <T> String join(final String separator, final T... objs) {
+ final List<T> values = Arrays.asList(objs);
+ return join(separator, values);
+ }
+
+
+ /**
+ * Split the string into tokens separated by the given delimiter. Profiling has
+ * revealed that the standard string.split() method typically takes {@code > 1/2}
+ * the total time when used for parsing ascii files.
+ * Note that if tokens arg is not large enough to all the tokens in the string, excess tokens are discarded.
+ *
+ * @param aString the string to split
+ * @param tokens an array to hold the parsed tokens
+ * @param delim character that delimits tokens
+ * @return the number of tokens parsed
+ */
+ public static int split(final String aString, final String[] tokens, final char delim) {
+
+ final int maxTokens = tokens.length;
+ int nTokens = 0;
+ int start = 0;
+ int end = aString.indexOf(delim);
+ if(end < 0) {
+ tokens[nTokens++] = aString;
+ return nTokens;
+ }
+ while ((end > 0) && (nTokens < maxTokens))
+ {
+ tokens[nTokens++] = aString.substring(start, end);
+ start = end + 1;
+ end = aString.indexOf(delim, start);
+
+ }
+ // Add the trailing string, if there is room and if it is not empty.
+ if (nTokens < maxTokens)
+ {
+ final String trailingString = aString.substring(start);
+ if (trailingString.length() > 0)
+ {
+ tokens[nTokens++] = trailingString;
+ }
+ }
+ return nTokens;
+ }
+
+ /**
+ * Split the string into tokens separated by the given delimiter. Profiling has
+ * revealed that the standard string.split() method typically takes {@code > 1/2}
+ * the total time when used for parsing ascii files.
+ * Note that the string is split into no more elements than tokens arg will hold, so the final tokenized
+ * element may contain delimiter chars.
+ *
+ * @param aString the string to split
+ * @param tokens an array to hold the parsed tokens
+ * @param delim character that delimits tokens
+ * @return the number of tokens parsed
+ */
+ public static int splitConcatenateExcessTokens(final String aString, final String[] tokens, final char delim) {
+
+ final int maxTokens = tokens.length;
+ int nTokens = 0;
+ int start = 0;
+ int end = aString.indexOf(delim);
+ if(end < 0) {
+ tokens[nTokens++] = aString;
+ return nTokens;
+ }
+ while ((end > 0) && (nTokens < maxTokens - 1))
+ {
+ tokens[nTokens++] = aString.substring(start, end);
+ start = end + 1;
+ end = aString.indexOf(delim, start);
+
+ }
+ // Add the trailing string, if it is not empty.
+ final String trailingString = aString.substring(start);
+ if (trailingString.length() > 0)
+ {
+ tokens[nTokens++] = trailingString;
+ }
+ return nTokens;
+ }
+
+ /**
+ * @param b ASCII character
+ * @return lowercase version of arg if it was uppercase, otherwise returns arg
+ */
+ public static byte toLowerCase(final byte b) {
+ if (b < 'A' || b > 'Z') {
+ return b;
+ }
+ return (byte)(b - UPPER_CASE_OFFSET);
+ }
+
+ /**
+ * @param b ASCII character
+ * @return uppercase version of arg if it was lowercase, otherwise returns arg
+ */
+ public static byte toUpperCase(final byte b) {
+ if (b < 'a' || b > 'z') {
+ return b;
+ }
+ return (byte)(b + UPPER_CASE_OFFSET);
+ }
+
+ /**
+ * Converts in place all lower case letters to upper case in the byte array provided.
+ */
+ public static void toUpperCase(final byte[] bytes) {
+ final int length = bytes.length;
+ for (int i=0; i<length; ++i) {
+ if (bytes[i] >= 'a' && bytes[i] <= 'z') {
+ bytes[i] = (byte) (bytes[i] + UPPER_CASE_OFFSET);
+ }
+ }
+ }
+
+
+ /**
+ * Checks that a String doesn't contain one or more characters of interest.
+ *
+ * @param illegalChars the String to check
+ * @param chars the characters to check for
+ * @return String the input String for convenience
+ * @throws IllegalArgumentException if the String contains one or more of the characters
+ */
+ public static String assertCharactersNotInString(final String illegalChars, final char... chars) {
+ for (final char illegalChar : illegalChars.toCharArray()) {
+ for (final char ch: chars) {
+ if (illegalChar == ch) {
+ throw new IllegalArgumentException("Supplied String contains illegal character '" + illegalChar + "'.");
+ }
+ }
+ }
+
+ return illegalChars;
+ }
+
+ /**
+ * Return input string with newlines inserted to ensure that all lines
+ * have {@code length <= maxLineLength}. if a word is too long, it is simply broken
+ * at maxLineLength. Does not handle tabs intelligently (due to implementer laziness).
+ */
+ public static String wordWrap(final String s, final int maxLineLength) {
+ final String[] lines = s.split("\n");
+ final StringBuilder sb = new StringBuilder();
+ for (final String line: lines) {
+ if (sb.length() > 0) {
+ sb.append("\n");
+ }
+ sb.append(wordWrapSingleLine(line, maxLineLength));
+ }
+ if (s.endsWith("\n")) {
+ sb.append("\n");
+ }
+ return sb.toString();
+ }
+
+ public static String wordWrapSingleLine(final String s, final int maxLineLength) {
+ if (s.length() <= maxLineLength) {
+ return s;
+ }
+ final StringBuilder sb = new StringBuilder();
+ int startCopyFrom = 0;
+ while (startCopyFrom < s.length()) {
+ int lastSpaceIndex = startCopyFrom;
+ int i;
+ // Find break point (if it exists)
+ for (i = startCopyFrom; i < s.length() && i - startCopyFrom < maxLineLength; ++i) {
+ if (Character.isWhitespace(s.charAt(i))) {
+ lastSpaceIndex = i;
+ }
+ }
+ if (i - startCopyFrom < maxLineLength) {
+ lastSpaceIndex = i;
+ }
+ // Include any trailing whitespace
+ for (; lastSpaceIndex < s.length() && Character.isWhitespace(s.charAt(lastSpaceIndex)); ++lastSpaceIndex) {}
+ if (sb.length() > 0) {
+ sb.append("\n");
+ }
+ // Handle situation in which there is no word break. Just break the word in the middle.
+ if (lastSpaceIndex == startCopyFrom) {
+ lastSpaceIndex = i;
+ }
+ sb.append(s.substring(startCopyFrom, lastSpaceIndex));
+ startCopyFrom = lastSpaceIndex;
+ }
+ return sb.toString();
+ }
+
+
+ public static String intValuesToString(final int[] intVals) {
+ final StringBuilder sb = new StringBuilder(intVals.length);
+ if(intVals.length > 0) {
+ sb.append(String.valueOf(intVals[0]));
+ for(int i = 1; i < intVals.length; i++) {
+ sb.append(", ");
+ sb.append(String.valueOf(intVals[i]));
+ }
+ }
+
+ return sb.toString();
+ }
+
+ public static String intValuesToString(final short[] shortVals) {
+ final StringBuilder sb = new StringBuilder(shortVals.length);
+ if(shortVals.length > 0) {
+ sb.append(String.valueOf(shortVals[0]));
+ for(int i = 1; i < shortVals.length; i++) {
+ sb.append(", ");
+ sb.append(String.valueOf(shortVals[i]));
+ }
+ }
+
+ return sb.toString();
+ }
+
+ ////////////////////////////////////////////////////////////////////
+ // The following methods all convert btw bytes and Strings, without
+ // using the Java character set mechanism.
+ ////////////////////////////////////////////////////////////////////
+
+ public static String bytesToString(final byte[] data) {
+ if (data == null) {
+ return null;
+ }
+ return bytesToString(data, 0, data.length);
+ }
+
+ @SuppressWarnings("deprecation")
+ public static String bytesToString(final byte[] buffer, final int offset, final int length) {
+/*
+ The non-deprecated way, that requires allocating char[]
+ final char[] charBuffer = new char[length];
+ for (int i = 0; i < length; ++i) {
+ charBuffer[i] = (char)buffer[i+offset];
+ }
+ return new String(charBuffer);
+*/
+ return new String(buffer, 0, offset, length);
+ }
+
+ @SuppressWarnings("deprecation")
+ public static byte[] stringToBytes(final String s) {
+/*
+ The non-deprecated way, that requires allocating char[]
+ final byte[] byteBuffer = new byte[s.length()];
+ final char[] charBuffer = s.toCharArray();
+ for (int i = 0; i < charBuffer.length; ++i) {
+ byteBuffer[i] = (byte)(charBuffer[i] & 0xff);
+ }
+ return byteBuffer;
+*/
+ final byte[] byteBuffer = new byte[s.length()];
+ s.getBytes(0, byteBuffer.length, byteBuffer, 0);
+ return byteBuffer;
+ }
+
+ @SuppressWarnings("deprecation")
+ public static byte[] stringToBytes(final String s, final int offset, final int length) {
+ final byte[] byteBuffer = new byte[length];
+ s.getBytes(offset, offset + length, byteBuffer, 0);
+ return byteBuffer;
+ }
+
+ // This method might more appropriately live in BinaryCodec, but all the byte <=> char conversion
+ // should be in the same place.
+ public static String readNullTerminatedString(final BinaryCodec binaryCodec) {
+ final StringBuilder ret = new StringBuilder();
+ for (byte b = binaryCodec.readByte(); b != 0; b = binaryCodec.readByte()) {
+ ret.append((char)(b & 0xff));
+ }
+ return ret.toString();
+ }
+
+ /**
+ * Convert chars to bytes merely by casting
+ * @param chars input chars
+ * @param charOffset where to start converting from chars array
+ * @param length how many chars to convert
+ * @param bytes where to put the converted output
+ * @param byteOffset where to start writing the converted output.
+ */
+ public static void charsToBytes(final char[] chars, final int charOffset, final int length,
+ final byte[] bytes, final int byteOffset) {
+ for (int i = 0; i < length; ++i) {
+ bytes[byteOffset + i] = (byte)chars[charOffset + i];
+ }
+ }
+
+ /**
+ * Convert ASCII char to byte.
+ */
+ public static byte charToByte(final char c) {
+ return (byte)c;
+ }
+
+ /**
+ * Convert ASCII byte to ASCII char.
+ */
+ public static char byteToChar(final byte b) {
+ return (char)(b & 0xff);
+ }
+
+ /**
+ * Convert a byte array into a String hex representation.
+ * @param data Input to be converted.
+ * @return String twice as long as data.length with hex representation of data.
+ */
+ public static String bytesToHexString(final byte[] data) {
+ final char[] chars = new char[2 * data.length];
+ for (int i = 0; i < data.length; i++) {
+ final byte b = data[i];
+ chars[2*i] = toHexDigit((b >> 4) & 0xF);
+ chars[2*i+1] = toHexDigit(b & 0xF);
+ }
+ return new String(chars);
+ }
+
+ /**
+ * Convert a String containing hex characters into an array of bytes with the binary representation
+ * of the hex string
+ * @param s Hex string. Length must be even because each pair of hex chars is converted into a byte.
+ * @return byte array with binary representation of hex string.
+ * @throws NumberFormatException
+ */
+ public static byte[] hexStringToBytes(final String s) throws NumberFormatException {
+ if (s.length() % 2 != 0) {
+ throw new NumberFormatException("Hex representation of byte string does not have even number of hex chars: " + s);
+ }
+ final byte[] ret = new byte[s.length() / 2];
+ for (int i = 0; i < ret.length; ++i) {
+ ret[i] = (byte) ((fromHexDigit(s.charAt(i * 2)) << 4) | fromHexDigit(s.charAt(i * 2 + 1)));
+ }
+ return ret;
+ }
+
+ public static char toHexDigit(final int value) {
+ return (char) ((value < 10) ? ('0' + value) : ('A' + value - 10));
+ }
+
+ public static int fromHexDigit(final char c) throws NumberFormatException {
+ final int ret = Character.digit(c, 16);
+ if (ret == -1) {
+ throw new NumberFormatException("Not a valid hex digit: " + c);
+ }
+ return ret;
+ }
+
+ /**
+ * Reverse the given string. Does not check for null.
+ * @param s String to be reversed.
+ * @return New string that is the reverse of the input string.
+ */
+ public static String reverseString(final String s) {
+ final StringBuilder sb = new StringBuilder(s);
+ sb.reverse();
+ return sb.toString();
+ }
+
+ /**
+ * <p>Checks if a String is whitespace, empty ("") or null.</p>
+ *
+ * <pre>
+ * StringUtils.isBlank(null) = true
+ * StringUtils.isBlank("") = true
+ * StringUtils.isBlank(" ") = true
+ * StringUtils.isBlank("sam") = false
+ * StringUtils.isBlank(" sam ") = false
+ * </pre>
+ *
+ * @param str the String to check, may be null
+ * @return {@code true} if the String is null, empty or whitespace
+ */
+ public static boolean isBlank(String str) {
+ int strLen;
+ if (str == null || (strLen = str.length()) == 0) {
+ return true;
+ }
+ for (int i = 0; i < strLen; i++) {
+ if (!Character.isWhitespace(str.charAt(i)) ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /* <p>Generates a string of one character to a specified length</p>
+ *
+ * @param c the Character to repeat
+ * @param repeatNumber the number of times to repeat the character
+ * @return String with the character c repeated repeatNumber times
+ */
+ public static String repeatCharNTimes(char c, int repeatNumber) {
+ char[] output = new char[repeatNumber];
+ Arrays.fill(output, c);
+ return String.valueOf(output);
+ }
+
+}
diff --git a/sample/ConsumeSingleSamples.java b/sample/ConsumeSingleSamples.java
new file mode 100644
index 0000000..389aef1
--- /dev/null
+++ b/sample/ConsumeSingleSamples.java
@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import blbutil.Utilities;
+import java.util.List;
+import java.util.concurrent.BlockingQueue;
+import haplotype.HapPair;
+import haplotype.RevHapPair;
+import main.GenotypeValues;
+
+/**
+ * <p>Class {@code ConsumeSingleSamples} samples haplotype pairs conditional
+ * on the observed genotype data and a haplotype frequency model.
+ * Class {@code ConsumeSingleSamples} is designed for use as a consumer in a
+ * producer-consumer design pattern.
+ * </p>
+ * <p>Instances of class {@code ConsumeSingleSamples} are thread-safe if the
+ * synchronization requirements for the constructor are satisfied.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class ConsumeSingleSamples implements Runnable {
+
+ /**
+ * A sentinel {@code Integer}.
+ */
+ public static final Integer POISON = -1;
+
+ private final boolean markersAreReversed;
+ private final SingleBaumInterface baum;
+ private final BlockingQueue<Integer> qIn;
+ private final List<HapPair> sampledHaps;
+ private final GenotypeValues gv;
+ private final double[] gprobs;
+
+ /**
+ * Constructs a new {@code ConsumeSingleSample} instance.
+ *
+ * @param markersAreReversed {@code true} if the {@code baum} parameter
+ * {@code randomSample()} method produces sampled haplotype pairs that have
+ * their marker order reversed and {@code false} otherwise
+ * @param baum a thread-confined instance of class
+ * {@code sample.SingleBaumInterface}
+ * @param qIn a thread-safe input work queue
+ * @param hapList a thread-safe list for storing sampled haplotype pairs
+ *
+ * @throws NullPointerException if any parameter is {@code null}
+ */
+ public ConsumeSingleSamples(boolean markersAreReversed,
+ SingleBaumInterface baum, BlockingQueue<Integer> qIn,
+ List<HapPair> hapList) {
+ if (baum == null) {
+ throw new NullPointerException("baum=null");
+ }
+ if (qIn == null) {
+ throw new IllegalArgumentException("qIn==null");
+ }
+ if (hapList == null) {
+ throw new IllegalArgumentException("hapList==null");
+ }
+ this.markersAreReversed = markersAreReversed;
+ this.baum = baum;
+ this.qIn = qIn;
+ this.sampledHaps = hapList;
+ this.gv = null;
+ this.gprobs = null;
+ }
+
+ /**
+ * Constructs a new {@code ConsumeSingleSample} instance.
+ *
+ * @param markersAreReversed {@code true} if the {@code baum} parameter
+ * {@code randomSample()} method produces sampled haplotype pairs that have
+ * their marker order reversed and {@code false} otherwise
+ * @param baum a thread-confined instance of class
+ * {@code sample.SingleBaumInterface}
+ * @param qIn a thread-safe input work queue
+ * @param hapList a thread-safe list for storing sampled haplotype pairs
+ * @param gv a thread-safe object which stores scaled posterior genotype
+ * probabilities
+ *
+ * @throws NullPointerException if any parameter is {@code null}
+ */
+ public ConsumeSingleSamples(boolean markersAreReversed,
+ SingleBaumInterface baum, BlockingQueue<Integer> qIn,
+ List<HapPair> hapList, GenotypeValues gv) {
+ if (baum == null) {
+ throw new NullPointerException("baum=null");
+ }
+ if (qIn == null) {
+ throw new IllegalArgumentException("qIn==null");
+ }
+ if (hapList == null) {
+ throw new IllegalArgumentException("hapList==null");
+ }
+ if (gv == null) {
+ throw new IllegalArgumentException("gv==null");
+ }
+ this.markersAreReversed = markersAreReversed;
+ this.baum = baum;
+ this.qIn = qIn;
+ this.gv = gv;
+ this.sampledHaps = hapList;
+ int n = baum.gl().markers().sumGenotypes();
+ this.gprobs = new double[n];
+ }
+
+ /**
+ * Takes sample indices from the thread-safe work-queue specified at time of
+ * construction and samples haplotype pairs for each sample. The method
+ * exits when {@code ConsumeSingleSamples.POISON} is taken from
+ * the work queue.
+ */
+ @Override
+ @SuppressWarnings({"BroadCatchBlock", "TooBroadCatch"})
+ public void run() {
+ try {
+ int single = qIn.take();
+ while (single != POISON) {
+ if (gv == null) {
+ List<HapPair> newHaps = baum.randomSample(single);
+ storeHaps(newHaps);
+ } else {
+ List<HapPair> newHaps = baum.randomSample(single, gprobs);
+ storeHaps(newHaps);
+ gv.add(single, gprobs);
+
+ }
+ single = qIn.take();
+ }
+ } catch (Throwable e) {
+ Utilities.exit("ConsumeSingleSamples: ERROR", e);
+ }
+ }
+
+ private void storeHaps(List<HapPair> newHaps) {
+ if (markersAreReversed) {
+ newHaps.stream().forEach((hp) -> {
+ sampledHaps.add(new RevHapPair(hp));
+ });
+ } else {
+ sampledHaps.addAll(newHaps);
+ }
+ }
+}
diff --git a/sample/DiploidStates.java b/sample/DiploidStates.java
new file mode 100644
index 0000000..47ab83f
--- /dev/null
+++ b/sample/DiploidStates.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2014 Brian L. Browning
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package sample;
+
+import java.util.NoSuchElementException;
+
+/**
+ * <p>Class {@code DiploidStates} represents a list of iterators
+ * (one iterator for each marker) that iterate over a subset of diploid
+ * HMM states at a marker.
+ * </p>
+ * <p>Instances of class {@code DiploidStates} are not requires to be
+ * thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface DiploidStates {
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ public int nMarkers();
+
+ /**
+ * Initializes the iteration of permitted ordered edge pairs for the
+ * specified marker.
+ * @param marker a marker index
+ * @throws IllegalArgumentException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ public void setMarker(int marker);
+
+ /**
+ * Returns the current marker index.
+ * @return the current marker index
+ */
+ public int marker();
+
+ /**
+ * Returns {@code true} if the iteration of the ordered edge pairs has
+ * more elements, and returns {@code false} otherwise.
+ * @return {@code true} if the iteration of the ordered edge pairs has
+ * more elements
+ */
+ public boolean hasNext();
+
+ /**
+ * Advances the iteration of ordered edge pairs to the next element.
+ *
+ * @throws NoSuchElementException if {@code this.hasNext() == false}
+ */
+ public void next();
+
+ /**
+ * Returns the first edge of the edge pair that is the current element
+ * in the iterations, or returns {@code -1} if {@code this.next()}
+ * has not been invoked since the most recent invocation of
+ * {@code this.setMarker()}.
+ * @return the first edge of the edge pair that is the current element
+ * in the iterations
+ */
+ public int edge1();
+
+ /**
+ * Returns the second edge of the edge pair that is the current element
+ * in the iteration, or returns {@code -1} if {@code this.next()}
+ * has not been invoked since the most recent invocation of
+ * {@code this.setMarker()}.
+ * @return the second edge of the edge pair that is
+ * the current element in the iteration
+ */
+ public int edge2();
+}
\ No newline at end of file
diff --git a/sample/DuoBaumLevel.java b/sample/DuoBaumLevel.java
new file mode 100644
index 0000000..163a909
--- /dev/null
+++ b/sample/DuoBaumLevel.java
@@ -0,0 +1,641 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import dag.Dag;
+import java.util.Arrays;
+import vcf.BasicGL;
+import vcf.GL;
+
+/**
+ * <p>Class {@code DuoBaumLevel} computes forward and backward Baum
+ * values at a level of a hidden Markov model (HMM) whose states are
+ * ordered edge trios of a leveled directed acyclic graph (DAG).
+ * </p>
+ * <p>Instances of class {@code SingleBaumLevel} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class DuoBaumLevel {
+
+ private static final int INITIAL_CAPACITY = 400;
+ private static final float MIN_VALUE = 100*Float.MIN_VALUE;
+ private final Dag dag;
+ private final GL gl;
+
+ private int marker = -1;
+ private int sampleA = -1;
+ private int sampleB = -1;
+ private int size = 0;
+
+ private int capacity = INITIAL_CAPACITY;
+ private int[] edgesAB1 = new int[INITIAL_CAPACITY];
+ private int[] edgesA2 = new int[INITIAL_CAPACITY];
+ private int[] edgesB2 = new int[INITIAL_CAPACITY];
+ private float[] fwdValues = new float[INITIAL_CAPACITY];
+ private float[] bwdValues = new float[INITIAL_CAPACITY];
+ private float fwdValueSum = 0f;
+ private float bwdValueSum = 0f;
+
+ private int nGenotypes = 0;
+ private float[] gtProbsA = new float[3];
+ private float[] gtProbsB = new float[3];
+
+ /**
+ * Constructs a new {@code DuoBaumLevel} instance from the specified data.
+ * @param dag the directed acyclic graph that the determines transition
+ * probabilities
+ * @param gl the emission probabilities
+ * @throws IllegalArgumentException if
+ * {@code dag.markers().equals(gl.markers()) == false}
+ * @throws NullPointerException if {@code dag == null || gl == null}
+ */
+ public DuoBaumLevel(Dag dag, GL gl) {
+ if (dag.markers().equals(gl.markers())==false) {
+ throw new IllegalArgumentException("marker inconsistency");
+ }
+ this.dag = dag;
+ this.gl = gl;
+ }
+
+ /**
+ * Sets the Baum forward algorithm values for this level of the HMM
+ * and records the child node trio values in the specified
+ * {@code nodes} parameter. When the method call returns, the {@code nodes}
+ * parameter will be reset to the child node trio values for this level of
+ * the HMM.
+ *
+ * @param nodes child node trio values at the previous level of the HMM
+ * @param marker the level of the HMM at which the Baum forward algorithm
+ * probabilities will be computed
+ * @param sampleA the parent sample index
+ * @param sampleB the offspring sample index
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.dag().nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sampleA < 0 || sampleA >= this.gl().nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sampleB < 0 || sampleB >= this.gl().nSamples()}
+ * @throws IndexOutOfBoundsException if any node in any node trio with
+ * non-zero value is not a valid parent node at the specified level of the
+ * HMM
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setForwardValues(DuoNodes nodes, int marker, int sampleA,
+ int sampleB) {
+ this.marker = marker;
+ this.sampleA = sampleA;
+ this.sampleB = sampleB;
+ this.nGenotypes = gl.marker(marker).nGenotypes();
+ this.size = 0;
+ this.fwdValueSum = 0f;
+ this.bwdValueSum = 0f;
+ initializeGtProbs(); // initialized here due to gtProbs() contract
+ setStates(nodes);
+ setChildNodes(nodes);
+ }
+
+ private void initializeGtProbs() {
+ if (gtProbsA.length < nGenotypes) {
+ int newLength = Math.max(nGenotypes, (3*gtProbsA.length/2 + 1));
+ gtProbsA = new float[newLength];
+ gtProbsB = new float[newLength];
+ }
+ else {
+ for (int j=0; j<nGenotypes; ++j) {
+ gtProbsA[j] = 0f;
+ gtProbsB[j] = 0f;
+ }
+ }
+ }
+
+ private void setStates(DuoNodes nodes) {
+ float valueSum = 0f;
+ for (int j=0, n=nodes.size(); j<n; ++j) {
+ int nodeAB1 = nodes.enumNodeAB1(j);
+ int nodeA2 = nodes.enumNodeA2(j);
+ int nodeB2 = nodes.enumNodeB2(j);
+ float nodeValue = nodes.enumValue(j);
+ for (int ab1=0, nAB1=dag.nOutEdges(marker, nodeAB1); ab1<nAB1; ++ab1) {
+ int edgeAB1 = dag.outEdge(marker, nodeAB1, ab1);
+ int symbolAB1 = dag.symbol(marker, edgeAB1);
+ for (int a2=0, nA2=dag.nOutEdges(marker, nodeA2); a2<nA2; ++a2) {
+ int edgeA2 = dag.outEdge(marker, nodeA2, a2);
+ int symbolA2 = dag.symbol(marker, edgeA2);
+ float epA = gl.gl(marker, sampleA, symbolAB1, symbolA2);
+ if (epA > 0.0) {
+ for (int b2=0, nB2=dag.nOutEdges(marker, nodeB2); b2<nB2; ++b2) {
+ int edgeB2 = dag.outEdge(marker, nodeB2, b2);
+ int symbolB2 = dag.symbol(marker, edgeB2);
+ float epB = gl.gl(marker, sampleB, symbolAB1, symbolB2);
+ if (epB > 0.0) {
+ if (size == capacity) {
+ ensureCapacity(size+1);
+ }
+ float tpAB1 = dag.condEdgeProb(marker, edgeAB1);
+ float tpA2 = dag.condEdgeProb(marker, edgeA2);
+ float tpB2 = dag.condEdgeProb(marker, edgeB2);
+ float fwdValue = (epA * epB) * nodeValue
+ * (tpAB1 * tpA2 * tpB2);
+ if (fwdValue<MIN_VALUE && nodeValue > 0.0) {
+ fwdValue = MIN_VALUE;
+ }
+ edgesAB1[size] = edgeAB1;
+ edgesA2[size] = edgeA2;
+ edgesB2[size] = edgeB2;
+ fwdValues[size++] = fwdValue;
+ valueSum += fwdValue;
+ }
+ }
+ }
+ }
+ }
+ }
+ assert valueSum>0.0 ^ size==0;
+ for (int k=0; k<size; ++k) {
+ this.fwdValues[k] /= valueSum;
+ }
+ fwdValueSum = valueSum;
+ }
+
+ /**
+ * Stores the Baum forward algorithm child node trio values for this
+ * level of the HMM in the specified {@code DuoNodes} object.
+ *
+ * @param nodes the node trio values that will be set
+ *
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setChildNodes(DuoNodes nodes) {
+ nodes.clear();
+ for (int k=0; k<size; ++k) {
+ int nodeAB1 = dag.childNode(marker, edgesAB1[k]);
+ int nodeA2 = dag.childNode(marker, edgesA2[k]);
+ int nodeB2 = dag.childNode(marker, edgesB2[k]);
+ nodes.sumUpdate(nodeAB1, nodeA2, nodeB2, fwdValues[k]);
+ }
+ }
+
+ /**
+ * Sets the Baum backward algorithm values for this level of the HMM
+ * and stores the parent node trio values in the specified
+ * {@code nodes} parameter. When the method call returns, the
+ * ${@code nodes} parameter will be reset to the parent node trio values
+ * for this level of the HMM.
+ *
+ * @param nodes parent node trio values at the next level of HMM
+ *
+ * @throws IndexOutOfBoundsException if any node in any node trio with
+ * non-zero value is not a valid child node at this level of the HMM
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setBackwardValues(DuoNodes nodes) {
+ for (int j=0; j<size; ++j) {
+ int nodeAB1 = dag.childNode(marker, edgesAB1[j]);
+ int nodeA2 = dag.childNode(marker, edgesA2[j]);
+ int nodeB2 = dag.childNode(marker, edgesB2[j]);
+ float backwardValue = nodes.value(nodeAB1, nodeA2, nodeB2);
+ bwdValues[j] = backwardValue;
+ bwdValueSum += backwardValue;
+ }
+ nodes.clear();
+ float gtProbsSum = 0f;
+ for (int j=0; j<size; ++j) {
+ bwdValues[j] /= bwdValueSum;
+ int symbolAB1 = symbolAB1(j);
+ int symbolA2 = symbolA2(j);
+ int symbolB2 = symbolB2(j);
+ float tpAB1 = dag.condEdgeProb(marker, edgesAB1[j]);
+ float tpA2 = dag.condEdgeProb(marker, edgesA2[j]);
+ float tpB2 = dag.condEdgeProb(marker, edgesB2[j]);
+
+ float stateProb = fwdValues[j] * bwdValues[j];
+ int gtIndexA = BasicGL.genotype(symbolAB1, symbolA2);
+ int gtIndexB = BasicGL.genotype(symbolAB1, symbolB2);
+ // gtProbs[AB] assumed to be initialized in setForwardValues() method
+ gtProbsA[gtIndexA] += stateProb;
+ gtProbsB[gtIndexB] += stateProb;
+ gtProbsSum += stateProb;
+
+ float epA = gl.gl(marker, sampleA, symbolAB1, symbolA2);
+ float epB = gl.gl(marker, sampleB, symbolAB1, symbolB2);
+ float bwdValue = bwdValues[j] * (tpAB1 * tpA2 * tpB2) * (epA*epB);
+ if (bwdValue < MIN_VALUE && bwdValues[j] > 0f) {
+ bwdValue = MIN_VALUE;
+ }
+ int pnAB1 = dag.parentNode(marker, edgesAB1[j]);
+ int pnA2 = dag.parentNode(marker, edgesA2[j]);
+ int pnB2 = dag.parentNode(marker, edgesB2[j]);
+ nodes.sumUpdate(pnAB1, pnA2, pnB2, bwdValue);
+ }
+ for (int j=0; j<nGenotypes; ++j) {
+ gtProbsA[j] /= gtProbsSum;
+ gtProbsB[j] /= gtProbsSum;
+ }
+ }
+
+ /**
+ * Returns the directed acyclic graph that determines the transition
+ * probabilities.
+ * @return the directed acyclic graph that determines the transition
+ * probabilities
+ */
+ public Dag dag() {
+ return dag;
+ }
+
+ /**
+ * Returns the emission probabilities.
+ * @return the emission probabilities
+ */
+ public GL gl() {
+ return gl;
+ }
+
+ /**
+ * Return the level of the HMM.
+ * @return the level of the HMM
+ */
+ public int marker() {
+ return marker;
+ }
+
+ /**
+ * Return the number of possible genotypes at this level of the HMM.
+ * @return the number of possible genotypes at this level of the HMM
+ */
+ public int nGenotypes() {
+ return nGenotypes;
+ }
+
+ /**
+ * Returns the specified posterior genotype probability for the parent.
+ * Returns 0 if the Baum backward probabilities have not been set.
+ * @param gt a genotype index
+ * @return the specified posterior genotype probability for the parent
+ * @throws IndexOutOfBoundsException if
+ * {@code gt < 0 || gt >= this.nGenotypes()}
+ */
+ public float gtProbsA(int gt) {
+ checkGT(gt);
+ return gtProbsA[gt];
+ }
+
+ /**
+ * Returns the specified posterior genotype probability for the offspring.
+ * Returns 0 if the Baum backward probabilities have not been set.
+ * @param gt a genotype index
+ * @return the specified posterior genotype probability for the offspring
+ * @throws IndexOutOfBoundsException if
+ * {@code gt < 0 || gt >= this.nGenotypes()}
+ */
+ public float gtProbsB(int gt) {
+ checkGT(gt);
+ return gtProbsB[gt];
+ }
+
+ private void checkGT(int gt) {
+ if (gt >= nGenotypes) {
+ throw new IllegalArgumentException(String.valueOf(gt));
+ }
+ }
+
+ /**
+ * Return the number of states with nonzero forward probability at
+ * this level of the HMM.
+ *
+ * @return the number of states with nonzero forward probability at
+ * this level of the HMM
+ */
+ public int size() {
+ return size;
+ }
+
+ private void checkIndex(int state) {
+ if (state >= size) {
+ throw new IndexOutOfBoundsException(String.valueOf(size));
+ }
+ }
+
+ /**
+ * Returns the first edge of the specified HMM state with nonzero forward
+ * probability.
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the first edge of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int edgeAB1(int state) {
+ checkIndex(state);
+ return edgesAB1[state];
+ }
+
+ /**
+ * Returns the second edge of the specified HMM state with nonzero forward
+ * probability.
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the second edge of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int edgeA2(int state) {
+ checkIndex(state);
+ return edgesA2[state];
+ }
+
+ /**
+ * Returns the third edge of the specified HMM state with nonzero forward
+ * probability.
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the third edge of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int edgeB2(int state) {
+ checkIndex(state);
+ return edgesB2[state];
+ }
+
+ /**
+ * Returns the parent node of the first edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the parent node of the first edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int parentNodeAB1(int state) {
+ checkIndex(state);
+ return dag.parentNode(marker, edgesAB1[state]);
+ }
+
+ /**
+ * Returns the parent node of the second edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the parent node of the second edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int parentNodeA2(int state) {
+ checkIndex(state);
+ return dag.parentNode(marker, edgesA2[state]);
+ }
+
+ /**
+ * Returns the parent node of the third edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the parent node of the third edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int parentNodeB2(int state) {
+ checkIndex(state);
+ return dag.parentNode(marker, edgesB2[state]);
+ }
+
+ /**
+ * Returns the child node of the first edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the child node of the first edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int childNodeAB1(int state) {
+ checkIndex(state);
+ return dag.childNode(marker, edgesAB1[state]);
+ }
+
+ /**
+ * Returns the child node of the second edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the child node of the second edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int childNodeA2(int state) {
+ checkIndex(state);
+ return dag.childNode(marker, edgesA2[state]);
+ }
+
+ /**
+ * Returns the child node of the third edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the child node of the third edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int childNodeB2(int state) {
+ checkIndex(state);
+ return dag.childNode(marker, edgesB2[state]);
+ }
+
+ /**
+ * Returns the symbol for the first edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the symbol for the first edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int symbolAB1(int state) {
+ return dag.symbol(marker, edgeAB1(state));
+ }
+
+ /**
+ * Returns the symbol for the second edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the symbol for the second edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int symbolA2(int state) {
+ return dag.symbol(marker, edgeA2(state));
+ }
+
+ /**
+ * Returns the symbol for the third edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the symbol for the third edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int symbolB2(int state) {
+ return dag.symbol(marker, edgeB2(state));
+ }
+
+ /**
+ * Returns the normalized forward value for the specified HMM state
+ * with nonzero forward probability.
+ * The normalized forward value is obtained by dividing the
+ * forward value by the sum of the forward values at this level
+ * of the HMM.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ *
+ * @return the normalized forward value for the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public float forwardValue(int state) {
+ checkIndex(state);
+ return fwdValues[state];
+ }
+
+ /**
+ * Returns the normalized backward value for the specified HMM state
+ * with nonzero forward probability.
+ * The normalized backward value is obtained by dividing the
+ * backward value by the sum of the backward values at this level
+ * of the HMM.
+ *
+ * @param state an index of a state with nonzero forward probability
+ *
+ * @return the normalized backward value for the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public float backwardValue(int state) {
+ checkIndex(state);
+ return bwdValues[state];
+ }
+
+ /**
+ * Returns the sum of the forward values at this level of the HMM
+ * when the forward values are computed using forward values
+ * from the previous level that are normalized to sum to 1.
+ * @return the sum of the forward values at this level of the HMM
+ */
+ public float forwardValuesSum() {
+ return fwdValueSum;
+ }
+
+ /**
+ * Returns the sum of the backward values at this level of the HMM
+ * when the backward values are computed using backward
+ * values from the next level that are normalized to sum to 1.
+ * @return the sum of the backward values at this level of the HMM
+ */
+ public float backwardValuesSum() {
+ return bwdValueSum;
+ }
+
+ /**
+ * Returns a string description of {@code this}. The exact details
+ * of the description are unspecified and subject to change.
+ *
+ * @return a string description of {@code this}.
+ */
+ @Override
+ public String toString() {
+ String space = " ";
+ String sep = " | ";
+ StringBuilder sb = new StringBuilder(100);
+ sb.append("level=");
+ sb.append(marker);
+ sb.append(" size=");
+ sb.append(size);
+ sb.append(" forwardValuesSum=");
+ sb.append(fwdValueSum);
+ sb.append(" backwardSum=");
+ sb.append(bwdValueSum);
+ for (int j=0; j<size; ++j) {
+ sb.append(sep);
+ sb.append("j=");
+ sb.append(j);
+ sb.append(": ");
+ sb.append( (int) edgeAB1(j));
+ sb.append(space);
+ sb.append( (int) edgeA2(j));
+ sb.append(space);
+ sb.append( (int) edgeB2(j));
+ sb.append(space);
+ sb.append(forwardValue(j));
+ sb.append(space);
+ sb.append(backwardValue(j));
+ }
+ sb.append(sep);
+ return sb.toString();
+ }
+
+ /*
+ * Increases the state capacity of array fields as necessary
+ * to be greater than or equal to the specified minimum capacity.
+ *
+ * @param minCapacity the desired minimum state capacity
+ */
+ private void ensureCapacity(int minCapacity) {
+ if (minCapacity > capacity) {
+ capacity = (capacity * 3)/2 + 1;
+ if (capacity < minCapacity) {
+ capacity = minCapacity;
+ }
+ edgesAB1 = Arrays.copyOf(edgesAB1, capacity);
+ edgesA2 = Arrays.copyOf(edgesA2, capacity);
+ edgesB2 = Arrays.copyOf(edgesB2, capacity);
+ fwdValues = Arrays.copyOf(fwdValues, capacity);
+ bwdValues = Arrays.copyOf(bwdValues, capacity);
+ }
+ }
+}
diff --git a/sample/DuoNodes.java b/sample/DuoNodes.java
new file mode 100644
index 0000000..907e004
--- /dev/null
+++ b/sample/DuoNodes.java
@@ -0,0 +1,338 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+/**
+ * <p>Class {@code DuoNodes} stores ordered node trios and associated values.
+ * </p>
+ * <p>Instances of class {@code DuoNodes} are not thread safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class DuoNodes {
+
+ private static final float loadFactor = 0.75f;
+
+ private int size;
+ private int capacity; // required to be a power of 2
+ private int rehashThreshold;
+
+ private int[] index;
+ private int[] nodeAB1;
+ private int[] nodeA2;
+ private int[] nodeB2;
+ private float[] value;
+
+ /**
+ * Creates a new instance of {@code DuoNodes} that has an
+ * initial value of 0 for each ordered node trio.
+ */
+ public DuoNodes() {
+ this.size = 0;
+ this.capacity = (1<<10);
+ this.rehashThreshold = (int) (loadFactor * capacity);
+ this.index = new int[capacity];
+ this.nodeAB1 = new int[capacity];
+ this.nodeA2 = new int[capacity];
+ this.nodeB2 = new int[capacity];
+ this.value = new float[capacity];
+ }
+
+ private static long hash1(int nodeAB1, int nodeA2, int nodeB2) {
+ long hash = 5;
+ hash = 71 * hash + nodeAB1;
+ hash = 71 * hash + nodeA2;
+ hash = 71 * hash + nodeB2;
+ return hash;
+ }
+
+ private static long hash2(int nodeAB1, int nodeA2, int nodeB2) {
+ long hash = 7;
+ hash = 97 * hash + nodeAB1;
+ hash = 97 * hash + nodeA2;
+ hash = 97 * hash + nodeB2;
+ return hash;
+ }
+
+ /*
+ * Return the storage index for specified node trio. If the key is not
+ * currently stored in the hash table, the index at which the value
+ * should be stored is returned.
+ */
+ private int index(int ab1, int a2, int b2) {
+ long h1 = hash1(ab1, a2, b2);
+ long h2 = hash2(ab1, a2, b2);
+ if ((h2 & 1)==0) {
+ // h2 must be relatively prime to maxSize, which is a power of 2
+ ++h2;
+ }
+ long l = h1;
+ for (int k=0; k<capacity; ++k) {
+ int i = (int) (l % capacity);
+ if (value[i]==0.0 ||
+ (nodeAB1[i]==ab1 && nodeA2[i]==a2 && nodeB2[i]==b2)) {
+ return i;
+ }
+ l += h2;
+ }
+ assert false;
+ return -1;
+ }
+
+ /*
+ * Increases the capacity of the internal hash table.
+ */
+ private void rehash() {
+ assert this.size>=this.rehashThreshold;
+ int newMaxSize = 2*capacity;
+ if (newMaxSize<0) {
+ throw new IllegalStateException("hash table overflow");
+ }
+ int[] oldIndex = index;
+ int[] oldNodeAB1 = nodeAB1;
+ int[] oldNodeA2 = nodeA2;
+ int[] oldNodeB2 = nodeB2;
+ float[] oldValue = value;
+
+ capacity = newMaxSize;
+ index = new int[newMaxSize];
+ nodeAB1 = new int[newMaxSize];
+ nodeA2 = new int[newMaxSize];
+ nodeB2 = new int[newMaxSize];
+ value = new float[newMaxSize];
+
+ for (int j=0; j<size; ++j) {
+ int oldInd = oldIndex[j];
+ int newIndex = index(oldNodeAB1[oldInd], oldNodeA2[oldInd],
+ oldNodeB2[oldInd]);
+ index[j] = newIndex;
+ nodeAB1[newIndex] = oldNodeAB1[oldInd];
+ nodeA2[newIndex] = oldNodeA2[oldInd];
+ nodeB2[newIndex] = oldNodeB2[oldInd];
+ value[newIndex] = oldValue[oldInd];
+ }
+ rehashThreshold = (int) (loadFactor * capacity);
+ }
+
+ /**
+ * Adds the specified value to the stored value of the specified
+ * node trio.
+ *
+ * @param nodeAB1 the first node
+ * @param nodeA2 the second node
+ * @param nodeB2 the third node
+ * @param value the value
+ *
+ * @throws IllegalArgumentException if
+ * {@code (nodeAB1 < 0 || nodeA2 < 0 || nodeB2 < 0)}
+ * @throws IllegalArgumentException if
+ * {@code value <= 0 || (Double.isFinite(value) == false)}
+ */
+ public void sumUpdate(int nodeAB1, int nodeA2, int nodeB2, float value) {
+ if (nodeAB1 < 0) {
+ throw new IllegalArgumentException(String.valueOf(nodeAB1));
+ }
+ if (nodeA2 < 0) {
+ throw new IllegalArgumentException(String.valueOf(nodeA2));
+ }
+ if (nodeB2 < 0) {
+ throw new IllegalArgumentException(String.valueOf(nodeB2));
+ }
+ if (value <= 0 || (Double.isFinite(value)==false) ) {
+ throw new IllegalArgumentException(String.valueOf(value));
+ }
+ if (value>0.0) {
+ int i = index(nodeAB1, nodeA2, nodeB2);
+ boolean addNode = (this.value[i]==0f);
+ this.value[i] += value;
+ if (addNode) {
+ this.index[size++] = i;
+ this.nodeAB1[i] = nodeAB1;
+ this.nodeA2[i] = nodeA2;
+ this.nodeB2[i] = nodeB2;
+ if (this.size>=this.rehashThreshold) {
+ rehash();
+ }
+ }
+ }
+ }
+
+ /**
+ * Returns the number of node trios with non-zero value.
+ * @return the number of node trios with non-zero value
+ */
+ public int size() {
+ return size;
+ }
+
+ private void checkSize(int index) {
+ if (index>=size()) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ }
+
+ /**
+ * Returns the first node of the specified node trio in a list of
+ * node trios with non-zero value. Repeated invocations of this
+ * method with the same parameter will return the same value if
+ * node values are not modified between invocations. If
+ * {@code (index >= 0 && index < this.size())}, then the following
+ * expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNodeAB1(index), this.enumNodeA2(index),
+ * this.enumNodeB2(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of node trios with non-zero value.
+ * @return the first node of the specified node trio in a list of
+ * node trios with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int enumNodeAB1(int index) {
+ checkSize(index);
+ return nodeAB1[this.index[index]];
+ }
+
+ /**
+ * Returns the second node of the specified node trio in a list of
+ * node trios with non-zero value. Repeated invocations of this
+ * method with the same parameter will return the same value if
+ * node values are not modified between invocations. If
+ * {@code (index >= 0 && index < this.size())}, then the following
+ * expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNodeAB1(index), this.enumNodeA2(index),
+ * this.enumNodeB2(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of node trios with non-zero value
+ * @return the second node of the specified node trio in a list of
+ * node trios with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int enumNodeA2(int index) {
+ checkSize(index);
+ return nodeA2[this.index[index]];
+ }
+
+ /**
+ * Returns the third node of the specified node trio in a list of
+ * node trios with non-zero value. Repeated invocations of this
+ * method with the same parameter will return the same value if
+ * node values are not modified between invocations. If
+ * {@code (index >= 0 && index < this.size())}, then the following
+ * expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNodeAB1(index), this.enumNodeA2(index),
+ * this.enumNodeB2(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of node trios with non-zero value
+ * @return the third node of the specified node trio in a list of
+ * node trios with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int enumNodeB2(int index) {
+ checkSize(index);
+ return nodeB2[this.index[index]];
+ }
+
+ /**
+ * Returns the value of the specified ordered node trio in a list of
+ * node trios with non-zero value. Repeated invocations of this
+ * method with the same parameter will return the same value if
+ * node values are not modified between invocations. If
+ * {@code (index >= 0 && index < this.size())}, then the following
+ * expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNodeAB1(index), this.enumNodeA2(index),
+ * this.enumNodeB2(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of node trios with non-zero value
+ * @return the value of the specified ordered node trio in a list of
+ * node trios with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public float enumValue(int index) {
+ checkSize(index);
+ return value[this.index[index]];
+ }
+
+ /**
+ * Returns the specified node trio value.
+ *
+ * @param nodeAB1 the first node
+ * @param nodeA2 the second node
+ * @param nodeB2 the third node
+ * @return the specified node trio value
+ * @throws IllegalArgumentException if
+ * {@code (nodeAB1 < 0 || nodeA2 < 0 || nodeB2 < 0)}
+ */
+ public float value(int nodeAB1, int nodeA2, int nodeB2) {
+ if (nodeAB1 < 0) {
+ throw new IllegalArgumentException(String.valueOf(nodeAB1));
+ }
+ if (nodeA2 < 0) {
+ throw new IllegalArgumentException(String.valueOf(nodeA2));
+ }
+ if (nodeB2 < 0) {
+ throw new IllegalArgumentException(String.valueOf(nodeB2));
+ }
+ return value[index(nodeAB1, nodeA2, nodeB2)];
+ }
+
+ /**
+ * Sets the value of each node trio to 0.
+ */
+ public void clear() {
+ for (int j=0; j<this.size; ++j) {
+ value[index[j]] = 0f;
+ }
+ size = 0;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ *
+ * @return a string representation of {@code this}.
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append("size=");
+ sb.append(size);
+ for (int j=0; j<size; ++j) {
+ sb.append(" (");
+ sb.append(j);
+ sb.append(": nodeAB1=");
+ sb.append(enumNodeAB1(j));
+ sb.append(" nodeA2=");
+ sb.append((int) enumNodeA2(j));
+ sb.append(" nodeB2=");
+ sb.append(enumNodeB2(j));
+ sb.append(" value=");
+ sb.append(enumValue(j));
+ sb.append(") ");
+ }
+ return sb.toString();
+ }
+}
diff --git a/sample/HapBaumLevel.java b/sample/HapBaumLevel.java
new file mode 100644
index 0000000..6371d93
--- /dev/null
+++ b/sample/HapBaumLevel.java
@@ -0,0 +1,445 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import dag.Dag;
+import java.util.Arrays;
+import vcf.AL;
+
+/**
+ * <p>Class {@code HapBaumLevel} computes forward and backward Baum values for a
+ * haploid hidden Markov model (HMM) whose states are edges of a leveled
+ * directed acyclic graph (DAG).
+ * </p>
+ * <p>Instances of class {@code HapBaumLevel} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class HapBaumLevel {
+
+ private static final int INITIAL_CAPACITY = 100;
+ private static final float MIN_VALUE = 100*Float.MIN_VALUE;
+ private final Dag dag;
+ private final AL al;
+
+ private int marker = -1;
+ private int hap = -1;
+ private int size = 0;
+
+ private int capacity = INITIAL_CAPACITY;
+ private int[] edges = new int[INITIAL_CAPACITY];
+ private float[] fwdValues = new float[INITIAL_CAPACITY];
+ private float[] bwdValues = new float[INITIAL_CAPACITY];
+ private float fwdValueSum = 0f;
+ private float bwdValueSum = 0f;
+
+ private int nAlleles = 0;
+ private float[] alProbs = new float[3];
+
+ /**
+ * Constructs a new {@code HapBaumLevel} instance from the specified
+ * data.
+ *
+ * @param dag the directed acyclic graph that the determines transition
+ * probabilities
+ * @param al the emission probabilities
+ * @throws IllegalArgumentException if
+ * {@code dag.markers().equals(al.markers()) == false}
+ * @throws NullPointerException if {@code dag == null || al == null}
+ */
+ public HapBaumLevel(Dag dag, AL al) {
+ if (dag.markers().equals(al.markers())==false) {
+ throw new IllegalArgumentException("marker inconsistency");
+ }
+ this.dag = dag;
+ this.al = al;
+ }
+
+ /**
+ * Sets the Baum forward algorithm values for this level of the HMM and
+ * records the child node values in the specified {@code nodes} parameter.
+ * When the method call returns, the {@code nodes} parameter will be
+ * reset to the child node values for this level of the HMM.
+ *
+ * @param nodes child node values at the previous level of the HMM
+ * @param marker the level of the HMM at which the Baum forward algorithm
+ * values will be computed
+ * @param haplotype a haplotype index
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.dag().nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code haplotype < 0 || haplotype >= this.al().nHaps()}
+ * @throws IndexOutOfBoundsException if any node with non-zero value
+ * is not a valid parent node at the specified level of the HMM
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setForwardValues(HapNodes nodes, int marker, int haplotype) {
+ this.marker = marker;
+ this.hap = haplotype;
+ this.nAlleles = al.marker(marker).nAlleles();
+ this.size = 0;
+ this.fwdValueSum = 0f;
+ this.bwdValueSum = 0f;
+ initializeAlProbs(); // initialized here due to alProbs() contract
+ setStates(nodes);
+ setChildNodes(nodes);
+ }
+
+ private void initializeAlProbs() {
+ if (alProbs.length<nAlleles) {
+ int newLength=Math.max(nAlleles, (3*alProbs.length/2+1));
+ alProbs = new float[newLength];
+ }
+ else {
+ Arrays.fill(alProbs, 0, nAlleles, 0f);
+ }
+ }
+
+ private void setStates(HapNodes nodes) {
+ float valueSum = 0f;
+ for (int j=0, n=nodes.size(); j<n; ++j) {
+ int node=nodes.enumNode(j);
+ for (int k=0, m=dag.nOutEdges(marker, node); k<m; ++k) {
+ int edge = dag.outEdge(marker, node, k);
+ int symbol = dag.symbol(marker, edge);
+ float ep = al.al(marker, hap, symbol);
+ if (ep > 0.0f) {
+ if (size==capacity) {
+ ensureCapacity(size+1);
+ }
+ edges[size] = edge;
+ float tp = dag.condEdgeProb(marker, edge);
+ float fwdValue = ep*nodes.enumValue(j)*tp;
+ if (fwdValue < MIN_VALUE) {
+ assert nodes.enumValue(j)>0.0;
+ fwdValue = MIN_VALUE;
+ }
+ fwdValues[size++] = fwdValue;
+ valueSum+=fwdValue;
+ }
+ }
+ }
+ assert valueSum>0.0 ^ size==0;
+ for (int k=0; k<size; ++k) {
+ this.fwdValues[k] /= valueSum;
+ }
+ fwdValueSum=valueSum;
+ }
+
+ /**
+ * Stores the Baum forward algorithm child node values for this
+ * level of the HMM in the specified {@code HapNodes} object.
+ *
+ * @param nodes the node values that will be set
+ *
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setChildNodes(HapNodes nodes) {
+ nodes.clear();
+ for (int k=0; k<size; ++k) {
+ int node = dag.childNode(marker, edges[k]);
+ nodes.sumUpdate(node, fwdValues[k]);
+ }
+ }
+
+ /**
+ * Sets the Baum backward algorithm values for this level of the HMM
+ * and stores the parent node values in the specified {@code nodes}
+ * parameter. When the method call returns, the {@code nodes} parameter
+ * will be reset to the parent node values for this level of the HMM.
+ *
+ * @param nodes parent node values at the next level of HMM
+ *
+ * @throws IndexOutOfBoundsException if any node with non-zero value is
+ * not a valid child node at the {@code this.marker()} level of the HMM
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setBackwardValues(HapNodes nodes) {
+ for (int j=0; j<size; ++j) {
+ int node = dag.childNode(marker, edges[j]);
+ float backwardValue = nodes.value(node);
+ bwdValues[j] = backwardValue;
+ bwdValueSum += backwardValue;
+ }
+ nodes.clear();
+ float alProbsSum = 0f;
+ for (int j=0; j<size; ++j) {
+ bwdValues[j]/=bwdValueSum;
+ int edge = edges[j];
+ int symbol = symbol(j);
+ float tp = dag.condEdgeProb(marker, edge);
+
+ float stateProb = fwdValues[j]*bwdValues[j];
+ alProbs[symbol] += stateProb;
+ alProbsSum += stateProb;
+
+ float bwdValue = bwdValues[j]*tp*al.al(marker, hap, symbol);
+ if (bwdValue < MIN_VALUE && bwdValues[j] > 0.0) {
+ bwdValue = MIN_VALUE;
+ }
+ int pn = dag.parentNode(marker, edge);
+ nodes.sumUpdate(pn, bwdValue);
+ }
+ for (int j=0; j<nAlleles; ++j) {
+ alProbs[j] /= alProbsSum;
+ }
+ }
+
+ /**
+ * Returns the directed acyclic graph that determines the transition
+ * probabilities.
+ * @return the directed acyclic graph that determines the transition
+ * probabilities
+ */
+ public Dag dag() {
+ return dag;
+ }
+
+ /**
+ * Returns the emission probabilities.
+ * @return the emission probabilities
+ */
+ public AL emissions() {
+ return al;
+ }
+
+ /**
+ * Return the level of the HMM.
+ * @return the level of the HMM
+ */
+ public int marker() {
+ return marker;
+ }
+
+ /**
+ * Return the number of possible alleles at this level of the HMM.
+ * @return the number of possible alleles at this level of the HMM
+ */
+ public int nAlleles() {
+ return nAlleles;
+ }
+
+ /**
+ * Returns the specified posterior allele probability. Returns 0
+ * if the Baum backward probabilities have not been set.
+ * @param allele an allele index
+ * @return the specified posterior allele probability
+ * @throws IndexOutOfBoundsException if
+ * {@code allele < 0 || allele >= this.nAlleles()}
+ */
+ public float alProbs(int allele) {
+ if (allele >= nAlleles) {
+ throw new IllegalArgumentException(String.valueOf(allele));
+ }
+ return alProbs[allele];
+ }
+
+ /**
+ * Return the number of states with nonzero forward probability at
+ * this level of the HMM.
+ *
+ * @return the number of states with nonzero forward probability at
+ * this level of the HMM
+ */
+ public int size() {
+ return size;
+ }
+
+ private void checkIndex(int state) {
+ if (state>=size) {
+ throw new IndexOutOfBoundsException(String.valueOf(size));
+ }
+ }
+
+ /**
+ * Returns the edge of the specified HMM state with nonzero forward
+ * probability.
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the edge of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int edge(int state) {
+ checkIndex(state);
+ return edges[state];
+ }
+
+ /**
+ * Returns the parent node of the specified HMM state with nonzero forward
+ * probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the parent node of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int parentNode(int state) {
+ checkIndex(state);
+ return dag.parentNode(marker, edges[state]);
+ }
+
+ /**
+ * Returns the child node of the specified HMM state with nonzero forward
+ * probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the child node of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int childNode(int state) {
+ checkIndex(state);
+ return dag.childNode(marker, edges[state]);
+ }
+
+ /**
+ * Returns the symbol of the specified HMM state with nonzero forward
+ * probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the symbol of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int symbol(int state) {
+ return dag.symbol(marker, edge(state));
+ }
+
+ /**
+ * Returns the normalized forward value for the specified HMM state
+ * with nonzero forward probability.
+ * The normalized forward value is obtained by dividing the
+ * forward value by the sum of the forward values at this level
+ * of the HMM.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ *
+ * @return the normalized forward value for the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public float forwardValue(int state) {
+ checkIndex(state);
+ return fwdValues[state];
+ }
+
+ /**
+ * Returns the normalized backward value for the specified HMM state
+ * with nonzero forward probability.
+ * The normalized backward value is obtained by dividing the
+ * backward value by the sum of the backward values at this level
+ * of the HMM.
+ *
+ * @param state an index of a state with nonzero forward probability
+ *
+ * @return the normalized backward value for the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public float backwardValue(int state) {
+ checkIndex(state);
+ return bwdValues[state];
+ }
+
+ /**
+ * Returns the sum of the forward values at this level of the HMM
+ * when the forward values are computed using forward values
+ * from the previous level that are normalized to sum to 1.
+ * @return the sum of the forward values at this level of the HMM
+ */
+ public float forwardValuesSum() {
+ return fwdValueSum;
+ }
+
+ /**
+ * Returns the sum of the backward values at this level of the HMM
+ * when the backward values are computed using backward
+ * values from the next level that are normalized to sum to 1.
+ * @return the sum of the backward values at this level of the HMM
+ */
+ public float backwardValuesSum() {
+ return bwdValueSum;
+ }
+
+ /**
+ * Returns a string description of {@code this}. The exact details of the
+ * description are unspecified and subject to change.
+ *
+ * @return a string description of {@code this}
+ */
+ @Override
+ public String toString() {
+ String space=" ";
+ String sep=" | ";
+ StringBuilder sb=new StringBuilder(100);
+ sb.append("level=");
+ sb.append(marker);
+ sb.append(" size=");
+ sb.append(size);
+ sb.append(" forwardValuesSum=");
+ sb.append(fwdValueSum);
+ sb.append(" backwardSum=");
+ sb.append(bwdValueSum);
+ for (int j=0; j<size; ++j) {
+ sb.append(sep);
+ sb.append("j=");
+ sb.append(j);
+ sb.append(": ");
+ sb.append((int) edge(j));
+ sb.append(space);
+ sb.append(forwardValue(j));
+ sb.append(space);
+ sb.append(backwardValue(j));
+ }
+ sb.append(sep);
+ return sb.toString();
+ }
+
+ /*
+ * Increases the state capacity of array fields as necessary
+ * to be greater than or equal to the specified minimum capacity.
+ *
+ * @param minCapacity the desired minimum state capacity
+ */
+ private void ensureCapacity(int minCapacity) {
+ if (minCapacity>capacity) {
+ capacity=(capacity*3)/2+1;
+ if (capacity<minCapacity) {
+ capacity=minCapacity;
+ }
+ edges=Arrays.copyOf(edges, capacity);
+ fwdValues=Arrays.copyOf(fwdValues, capacity);
+ bwdValues=Arrays.copyOf(bwdValues, capacity);
+ }
+ }
+}
diff --git a/sample/HapNodes.java b/sample/HapNodes.java
new file mode 100644
index 0000000..805bb83
--- /dev/null
+++ b/sample/HapNodes.java
@@ -0,0 +1,233 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+/**
+ * <p>Class {@code HapNodes} stores nodes and associated values.
+ * </p>
+ * <p>Instances of class {@code HapNodes} are not thread safe.</p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class HapNodes {
+
+ private static final float loadFactor = 0.75f;
+
+ private int size;
+ private int capacity; // required to be a power of 2
+ private int rehashThreshold;
+
+ private int[] index;
+ private int[] node;
+ private float[] value;
+
+ /**
+ * Creates a new instance of {@code HapNodes} that has an
+ * initial value of 0 for each node.
+ */
+ public HapNodes() {
+ this.size = 0;
+ this.capacity = (1<<10);
+ this.rehashThreshold = (int) (loadFactor * capacity);
+ this.index = new int[capacity];
+ this.node = new int[capacity];
+ this.value = new float[capacity];
+ }
+
+ /*
+ * Return the storage index for specified node. If the key is not
+ * currently stored in the hash table, the index at which the value
+ * should be stored is returned.
+ */
+ private int index(int node) {
+ long l = (71 * 5) + node;
+ long h2 = (97 * 7) + node;
+ if ((h2 & 1)==0) {
+ // h2 must be relatively prime to maxSize, which is a power of 2
+ ++h2;
+ }
+ for (int k=0; k<capacity; ++k) {
+ int i = (int) (l % capacity);
+ if (value[i]==0.0 || (this.node[i]==node)) {
+ return i;
+ }
+ l += h2;
+ }
+ assert false;
+ return -1;
+ }
+
+ /*
+ * Increases the capacity of the internal hash table.
+ */
+ private void rehash() {
+ assert this.size>=this.rehashThreshold;
+ int newMaxSize = 2*capacity;
+ if (newMaxSize<0) {
+ throw new IllegalStateException("hash table overflow");
+ }
+ int[] oldIndex = index;
+ int[] oldNode = node;
+ float[] oldValue = value;
+
+ capacity = newMaxSize;
+ index = new int[newMaxSize];
+ node = new int[newMaxSize];
+ value = new float[newMaxSize];
+
+ for (int j=0; j<size; ++j) {
+ int oldInd = oldIndex[j];
+ int newIndex = index(oldNode[oldInd]);
+ index[j] = newIndex;
+ node[newIndex] = oldNode[oldInd];
+ value[newIndex] = oldValue[oldInd];
+ }
+ rehashThreshold = (int) (loadFactor * capacity);
+ }
+
+ /**
+ * Adds the specified value to the stored value of the specified
+ * node.
+ *
+ * @param node the node
+ * @param value the value
+ *
+ * @throws IllegalArgumentException if {@code node < 0}
+ * @throws IllegalArgumentException if
+ * {@code value <= 0 || (Double.isFinite(value) == false)}
+ */
+ public void sumUpdate(int node, float value) {
+ if (node < 0) {
+ throw new IllegalArgumentException(String.valueOf(node));
+ }
+ if (value <= 0 || (Double.isFinite(value)==false) ) {
+ throw new IllegalArgumentException(String.valueOf(value));
+ }
+ int i = index(node);
+ boolean addNode = (this.value[i]==0f);
+ this.value[i] += value;
+ if (addNode) {
+ this.index[size++] = i;
+ this.node[i] = node;
+ if (this.size>=this.rehashThreshold) {
+ rehash();
+ }
+ }
+ }
+
+ /**
+ * Returns the number of nodes with non-zero value.
+ * @return the number of nodes with non-zero value
+ */
+ public int size() {
+ return size;
+ }
+
+ private void checkSize(int index) {
+ if (index>=size()) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ }
+
+ /**
+ * Returns the specified node in a list of nodes with non-zero value.
+ * Repeated invocations of this method with the same parameter will
+ * return the same value if node values are not modified between
+ * invocations. If {@code (index >= 0 && index < this.size())}, then the
+ * following expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNode(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of nodes with non-zero value
+ * @return the specified node in the list of nodes with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int enumNode(int index) {
+ checkSize(index);
+ return node[this.index[index]];
+ }
+
+ /**
+ * Returns the value of the specified node in a list of nodes with
+ * non-zero value. Repeated invocations of this method with the same
+ * parameter will return the same value if node values are not modified
+ * between invocations. If {@code (index >= 0 && index < this.size())}, then
+ * the following expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNode(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of nodes with non-zero value
+ * @return the value of the specified node in a list of nodes with
+ * non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public float enumValue(int index) {
+ checkSize(index);
+ return value[this.index[index]];
+ }
+
+ /**
+ * Returns the specified node value.
+ *
+ * @param node the first node
+ * @return the specified node value
+ * @throws IllegalArgumentException if {@code node < 0}
+ */
+ public float value(int node) {
+ if (node < 0) {
+ throw new IllegalArgumentException(String.valueOf(node));
+ }
+ return value[index(node)];
+ }
+
+ /**
+ * Sets the value of each node to 0.
+ */
+ public void clear() {
+ for (int j=0; j<this.size; ++j) {
+ value[index[j]] = 0f;
+ }
+ size = 0;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append("size=");
+ sb.append(size);
+ for (int j=0; j<size; ++j) {
+ sb.append(" (");
+ sb.append(j);
+ sb.append(": node=");
+ sb.append(enumNode(j));
+ sb.append(" value=");
+ sb.append(enumValue(j));
+ sb.append(") ");
+ }
+ return sb.toString();
+ }
+}
diff --git a/sample/HaplotypeCoder.java b/sample/HaplotypeCoder.java
new file mode 100644
index 0000000..4ae3782
--- /dev/null
+++ b/sample/HaplotypeCoder.java
@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import blbutil.ByteIndexArray;
+import blbutil.ShiftedByteIndexArray;
+import blbutil.CharIndexArray;
+import blbutil.IntArray;
+import blbutil.IntList;
+import blbutil.WrappedIntArray;
+import haplotype.SampleHapPairs;
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code HaplotypeCoder} indexes the observed allele sequences
+ * in reference and target haplotype pairs for a list of consecutive markers.
+ * </p>
+ * <p>Instances of class {@code HaplotypeCoder} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class HaplotypeCoder {
+
+ private final int nRefHaps;
+ private final int nHaps;
+ private final SampleHapPairs refHapPairs;
+ private final SampleHapPairs targetHapPairs;
+
+ /**
+ * Constructs a new {@code HaplotypeCoder} instance from the specified
+ * data.
+ * @param refHapPairs the reference haplotype pairs
+ * @param targetHapPairs the target haplotype pairs
+ * @throws IllegalArgumentException if
+ * {@code refHapPairs.markers().equals(targetHapPairs.markers()) == false}
+ * @throws NullPointerException if
+ * {@code refHapPairs == null || targetHapPairs == null}
+ */
+ public HaplotypeCoder(SampleHapPairs refHapPairs,
+ SampleHapPairs targetHapPairs) {
+ if (refHapPairs.markers().equals(targetHapPairs.markers())==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ this.nRefHaps = refHapPairs.nHaps();
+ this.nHaps = nRefHaps + targetHapPairs.nHaps();
+ this.refHapPairs = refHapPairs;
+ this.targetHapPairs = targetHapPairs;
+ }
+
+ /**
+ * Returns the reference haplotype pairs used to construct this.
+ * @return the reference haplotype pairs used to construct this
+ */
+ public SampleHapPairs refHapPairs() {
+ return refHapPairs;
+ }
+
+ /**
+ * Returns the target haplotype pairs used to construct this.
+ * @return the target haplotype pairs used to construct this
+ */
+ public SampleHapPairs targetHapPairs() {
+ return targetHapPairs;
+ }
+
+ /**
+ * Returns a two element array whose first element maps each reference
+ * haplotype index to the index of the allele sequence carried by that
+ * reference haplotype, and whose second element maps each target haplotype
+ * index to the index of the allele sequence carried by that target
+ * haplotype. The size of the first element of the returned array is
+ * {@code this.refHapPairs().nHaps()}, and the size of the second
+ * element of the returned array is {@code this.targetHapPairs().nHaps()}
+ *
+ * @param start the first marker index (inclusive)
+ * @param end the last marker index (exclusive)
+ * @return the haplotype indices for the reference and target samples.
+ * @throws IllegalArgumentException if {@code start > end}
+ * @throws IndexOutOfBoundsException if
+ * {@code start < 0 || end >= this.refHapPairs.nMarkers()}
+ */
+ public IntArray[] run(int start, int end) {
+ if (start >= end) {
+ throw new IllegalArgumentException("start > end");
+ }
+ IntArray[] val = new IntArray[2];
+ int[] haps = initialHaps();
+ int[] alleles = new int[nHaps];
+ IntList lastEnds = new IntList(1);
+ lastEnds.add(nHaps);
+ for (int m=start; m<end; ++m) {
+ lastEnds = partition(m, alleles, haps, lastEnds);
+ }
+ setAllelesToHapIndices(alleles, lastEnds, haps);
+ int nAlleles = lastEnds.size();
+ if (nAlleles <= 128) {
+ val[0] = new ByteIndexArray(alleles, 0, nRefHaps);
+ val[1] = new ByteIndexArray(alleles, nRefHaps, nHaps);
+ }
+ else if (nAlleles <= 256) {
+ val[0] = new ShiftedByteIndexArray(alleles, 0, nRefHaps);
+ val[1] = new ShiftedByteIndexArray(alleles, nRefHaps, nHaps);
+ }
+ else if (nAlleles <= 65535) {
+ val[0] = new CharIndexArray(alleles, 0, nRefHaps);
+ val[1] = new CharIndexArray(alleles, nRefHaps, nHaps);
+ }
+ else {
+ val[0] = new WrappedIntArray(Arrays.copyOfRange(alleles, 0, nRefHaps));
+ val[1] = new WrappedIntArray(Arrays.copyOfRange(alleles, nRefHaps, nHaps));
+ }
+ return val;
+ }
+
+ private int[] initialHaps() {
+ int[] ia = new int[nHaps];
+ for (int j = 0; j < ia.length; ++j) {
+ ia[j] = j;
+ }
+ return ia;
+ }
+
+ private IntList partition(int marker, int[] alleles, int[] haps,
+ IntList lastEnds) {
+ IntList nextEnds = new IntList( (4*lastEnds.size())/3 + 1 );
+ setAlleles(marker, alleles);
+ int nAlleles = refHapPairs.marker(marker).nAlleles();
+ int lastAllele = nAlleles - 1;
+
+ int start = 0;
+ for (int j=0, n=lastEnds.size(); j<n; ++j) {
+ int end = lastEnds.get(j);
+ for (int al=0; al<lastAllele; ++al) {
+ int nextStart = partition(alleles, haps, start, end, al);
+ if (nextStart > start) {
+ nextEnds.add(nextStart);
+ start = nextStart;
+ }
+ }
+ if (end > start) {
+ nextEnds.add(end);
+ }
+ start = end;
+ }
+ return nextEnds;
+ }
+
+ private void setAlleles(int marker, int[] alleles) {
+ for (int j = 0; j < nRefHaps; ++j) {
+ alleles[j] = refHapPairs.allele(marker, j);
+ }
+ for (int j = nRefHaps; j < nHaps; ++j) {
+ alleles[j] = targetHapPairs.allele(marker, j - nRefHaps);
+ }
+ }
+
+ /* Returns the start index of second partitioned set */
+ private int partition(int[] alleles, int[] haps, int start, int end,
+ int splitAllele) {
+ int nextStart = end;
+ while (start < nextStart) {
+ int allele = alleles[haps[start]];
+ if (allele == splitAllele) {
+ ++start;
+ }
+ else {
+ --nextStart;
+ int tmp = haps[nextStart];
+ haps[nextStart] = haps[start];
+ haps[start] = tmp;
+ }
+ }
+ return nextStart;
+ }
+
+ private static void setAllelesToHapIndices(int[] alleles, IntList ends,
+ int[] haps) {
+ int start = 0;
+ for (int j=0, n=ends.size(); j<n; ++j) {
+ int end = ends.get(j);
+ for (int k=start; k<end; ++k) {
+ alleles[haps[k]] = j;
+ }
+ start = end;
+ }
+ }
+}
diff --git a/sample/ImputationData.java b/sample/ImputationData.java
new file mode 100644
index 0000000..d44b1d3
--- /dev/null
+++ b/sample/ImputationData.java
@@ -0,0 +1,341 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import beagleutil.Samples;
+import blbutil.IntArray;
+import haplotype.SampleHapPairs;
+import java.util.Arrays;
+import main.CurrentData;
+import main.GeneticMap;
+import main.Par;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code ImputationData} contains the input data that is
+ * required for imputation of ungenotyped markers in the imputation target.
+ * </p>
+ * <p>Instances of class {@code ImputationData} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class ImputationData {
+
+ private static final double MIN_CM_DIST = 1e-7;
+
+ private final SampleHapPairs refHapPairs;
+ private final SampleHapPairs targHapPairs;
+ private final RefHapSegs refHapSegs;
+ private final IntArray[] refAlleles;
+ private final IntArray[] targAlleles;
+ private final float[] errProb;
+ private final float[] pRecomb;
+ private final float[] weight;
+ private final int nClusters;
+
+ /**
+ * Constructs a new {@code ImputationData} instance from the specified data.
+ * @param par the analysis parameters
+ * @param cd the reference haplotype data for the current marker window
+ * @param targetHapPairs the target haplotype pairs
+ * @param map the genetic map
+ *
+ * @throws IllegalArgumentException if
+ * {@code cd.targetMarkers().equals(targetHapPairs.markers() == false}
+ * @throws IllegalArgumentException if
+ * {@code cd.targetSamples().equals(targetHapPairs.samples()) == false}
+ * @throws NullPointerException if any parameter is {@code null}
+ */
+ public ImputationData(Par par, CurrentData cd,
+ SampleHapPairs targetHapPairs, GeneticMap map) {
+ if (cd.targetMarkers().equals(targetHapPairs.markers())==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ if (cd.targetSamples().equals(targetHapPairs.samples())==false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ int[] gtEnd = gtEnd(targetHapPairs.markers(), map, par.cluster());
+ int[] gtStart = gtStart(gtEnd);
+ this.refAlleles = new IntArray[gtStart.length];
+ this.targAlleles = new IntArray[gtStart.length];
+ setCodedAlleles(cd.restrictedRefSampleHapPairs(), targetHapPairs,
+ gtStart, gtEnd, refAlleles, targAlleles);
+ this.nClusters = gtStart.length;
+ this.refHapPairs = cd.refSampleHapPairs();
+ this.refHapSegs = refHapSegs(refHapPairs, gtStart, gtEnd,
+ cd.markerIndices(), par.nthreads());
+ this.targHapPairs = targetHapPairs;
+ this.errProb = err(par.err(), gtStart, gtEnd);
+ this.pRecomb = ImputationData.this.pRecomb(refHapSegs, map, par.ne());
+ this.weight = wts(refHapSegs, map);
+ }
+
+ private static int[] gtEnd(Markers targetMarkers, GeneticMap genMap,
+ float clusterDist) {
+ int nMarkers = targetMarkers.nMarkers();
+ int[] ends = new int[nMarkers];
+ double startPos = genMap.genPos(targetMarkers.marker(0));
+ int index = 0;
+ for (int m=1; m<nMarkers; ++m) {
+ double pos = genMap.genPos(targetMarkers.marker(m));
+ if ((pos - startPos) > clusterDist) {
+ ends[index++] = m;
+ startPos = pos;
+ }
+ }
+ ends[index++] = nMarkers;
+ return Arrays.copyOf(ends, index);
+ }
+
+ private static int[] gtStart(int[] gtEnd) {
+ int[] gtStart = new int[gtEnd.length];
+ for (int j=1; j<gtStart.length; ++j) {
+ gtStart[j] = gtEnd[j - 1];
+ }
+ return gtStart;
+ }
+
+ private static void setCodedAlleles(SampleHapPairs refHapPairs,
+ SampleHapPairs targetHapPairs, int[] gtStart,
+ int[] gtEnd, IntArray[] refAlleles, IntArray[] targAlleles) {
+ HaplotypeCoder coder = new HaplotypeCoder(refHapPairs, targetHapPairs);
+ for (int j=0; j<gtStart.length; ++j) {
+ IntArray[] ia = coder.run(gtStart[j], gtEnd[j]);
+ refAlleles[j] = ia[0];
+ targAlleles[j] = ia[1];
+ }
+ }
+
+ private static float[] err(float errRate, int[] gtStart, int[] gtEnd) {
+ float maxErrProb = 0.5f;
+ float[] err = new float[gtStart.length];
+ for (int j=0; j<err.length; ++j) {
+ err[j] = errRate * (gtEnd[j] - gtStart[j]);
+ if (err[j] > maxErrProb) {
+ err[j] = maxErrProb;
+ }
+ }
+ return err;
+ }
+
+ private static float[] pRecomb(RefHapSegs refHapSegs, GeneticMap map,
+ float ne) {
+ SampleHapPairs refHaps = refHapSegs.refHapPairs();
+ Markers refMarkers = refHaps.markers();
+ int nHaps = refHaps.nHaps();
+ int[] midPos = midPos(refMarkers, refHapSegs);
+ int chrom = refMarkers.marker(0).chromIndex();
+ return pRcomb(chrom, midPos, nHaps, map, ne);
+ }
+
+ private static int[] midPos(Markers refMarkers, RefHapSegs refHapSegs) {
+ int[] midPos = new int[refHapSegs.nClusters()];
+ for (int j=0; j<midPos.length; ++j) {
+ int startPos = refMarkers.marker(refHapSegs.clusterStart(j)).pos();
+ int endPos = refMarkers.marker(refHapSegs.clusterEnd(j) - 1).pos();
+ midPos[j] = (startPos + endPos) / 2;
+ }
+ return midPos;
+ }
+
+ private static float[] pRcomb(int chrom, int[] midPos, int nHaps,
+ GeneticMap map, float ne) {
+ float[] rr = new float[midPos.length];
+ double c = -(0.04*ne/nHaps); // 0.04 = 4/(100 cM/M)
+ double lastGenPos = map.genPos(chrom, midPos[0]);
+ rr[0] = 0f;
+ for (int j=1; j<rr.length; ++j) {
+ double genPos = map.genPos(chrom, midPos[j]);
+ double genDist = Math.max(Math.abs(genPos - lastGenPos), MIN_CM_DIST);
+ rr[j] = (float) -Math.expm1(c*genDist);
+ lastGenPos = genPos;
+ }
+ return rr;
+ }
+
+ private static float[] wts(RefHapSegs refHapSegs, GeneticMap map) {
+ Markers refMarkers = refHapSegs.refHapPairs().markers();
+ double[] cumPos = cumPos(refMarkers, map);
+ int nMarkers = refMarkers.nMarkers();
+ int nClusters = refHapSegs.nClusters();
+ float[] wts = new float[cumPos.length];
+ if (nClusters > 0) {
+ Arrays.fill(wts, 0, refHapSegs.clusterStart(0), Float.NaN);
+ }
+ for (int j = 0, jj = (nClusters - 1); j < jj; ++j) {
+ int start = refHapSegs.clusterStart(j);
+ int end = refHapSegs.clusterEnd(j);
+ int nextStart = refHapSegs.clusterStart(j+1);
+ double nextStartPos = cumPos[nextStart];
+ double totalLength = nextStartPos - cumPos[end - 1];
+ Arrays.fill(wts, start, end, 1f);
+ for (int m=end; m<nextStart; ++m) {
+ wts[m] = (float) ( (nextStartPos - cumPos[m]) / totalLength );
+ }
+ }
+ Arrays.fill(wts, refHapSegs.clusterStart(nClusters - 1), nMarkers, Float.NaN);
+ return wts;
+ }
+
+ private static double[] cumPos(Markers markers, GeneticMap map) {
+ double[] cumPos = new double[markers.nMarkers()];
+ double lastGenPos = map.genPos(markers.marker(0));
+ cumPos[0] = 0.0;
+ for (int j=1; j<cumPos.length; ++j) {
+ double genPos = map.genPos(markers.marker(j));
+ double genDist = Math.max(Math.abs(genPos - lastGenPos), MIN_CM_DIST);
+ cumPos[j] = cumPos[j-1] + genDist;
+ lastGenPos = genPos;
+ }
+ return cumPos;
+ }
+
+ private static RefHapSegs refHapSegs(SampleHapPairs refHapPairs,
+ int[] gtStart, int[] gtEnd, int[] gtIndices, int nThreads) {
+ assert gtStart.length == gtEnd.length;
+ int[] clusterStart = new int[gtStart.length];
+ int[] clusterEnd = new int[gtEnd.length];
+ for (int j=0; j<clusterStart.length; ++j) {
+ clusterStart[j] = gtIndices[gtStart[j]];
+ if (j < clusterStart.length - 1) {
+ clusterEnd[j] = gtIndices[gtEnd[j]];
+ }
+ else {
+ clusterEnd[j] = gtIndices[gtEnd[j] - 1] + 1;
+ }
+ }
+ return new RefHapSegs(refHapPairs, clusterStart, clusterEnd,
+ nThreads);
+ }
+
+ /**
+ * Return the reference haplotype pairs.
+ * @return the reference haplotype pairs
+ */
+ public SampleHapPairs refHapPairs() {
+ return refHapPairs;
+ }
+
+ /**
+ * Return the target haplotype pairs.
+ * @return the target haplotype pairs
+ */
+ public SampleHapPairs targHapPairs() {
+ return targHapPairs;
+ }
+
+ /**
+ * Return the reference haplotype segments.
+ * @return the reference haplotype segments
+ */
+ public RefHapSegs refHapSegs() {
+ return refHapSegs;
+ }
+
+ /**
+ * Return the number of target marker clusters.
+ * @return the number of target marker clusters
+ */
+ public int nClusters() {
+ return nClusters;
+ }
+
+ /**
+ * Returns the list of target samples.
+ * @return the list of target samples
+ */
+ public Samples targetSamples() {
+ return targHapPairs.samples();
+ }
+
+ /**
+ * Returns the specified reference allele.
+ * @param marker a marker index
+ * @param haplotype a haplotype index
+ * @return the specified reference allele
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nClusters()}
+ * @throws IndexOutOfBoundsException if
+ * {@code haplotype < 0 || haplotype >= this.refHapPairs().nHaps()}
+ */
+ public int refAllele(int marker, int haplotype) {
+ return refAlleles[marker].get(haplotype);
+ }
+
+ /**
+ * Returns the specified target allele.
+ * @param marker a marker index
+ * @param haplotype a haplotype index
+ * @return the specified target allele
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nClusters()}
+ * @throws IndexOutOfBoundsException if
+ * {@code haplotype < 0 || haplotype >= targ.refHapPairs().nHaps()}
+ */
+ public int targetAllele(int marker, int haplotype) {
+ return targAlleles[marker].get(haplotype);
+ }
+
+ /**
+ * Returns the specified allele error probability.
+ * @param marker the marker index
+ * @return the specified allele error probability
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nClusters()}
+ */
+ public float errProb(int marker) {
+ return errProb[marker];
+ }
+
+ /**
+ * Returns {@code (1f - this.errProb(marker))}.
+ * @param marker a marker index
+ * @return {@code (1f - this.errProb(marker))}
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nClusters()}
+ */
+ public float noErrProb(int marker) {
+ return 1f - errProb[marker];
+ }
+
+ /**
+ * Return the probability of recombination between the specified
+ * marker and the previous marker, or returns {@code 0}
+ * if {@code (marker == 0)}.
+ * @param marker a marker index
+ * @return the specified recombination probability
+ * @throws IllegalArgumentException if
+ * {@code marker < 0 || marker >= this.refHapPairs().nMarkers()}
+ */
+ public float pRecomb(int marker) {
+ return pRecomb[marker];
+ }
+
+ /**
+ * Return the specified weight.
+ * @param marker a marker index
+ * @return the specified weight
+ * @throws IllegalArgumentException if
+ * {@code marker < 0 || marker >= this.refHapPairs().nMarkers()}
+ */
+ public double weight(int marker) {
+ return weight[marker];
+ }
+}
diff --git a/sample/LSHapBaum.java b/sample/LSHapBaum.java
new file mode 100644
index 0000000..2454cb8
--- /dev/null
+++ b/sample/LSHapBaum.java
@@ -0,0 +1,324 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import java.util.Arrays;
+import main.HapAlleleProbs;
+import main.LowMemHapAlleleProbs;
+import vcf.Markers;
+
+/**
+ * <p>Class {@code LSHapBaum} implements the Baum hidden Markov model
+ * forward and backward algorithms for imputing missing alleles on a
+ * target haplotype.
+ * </p>
+ * <p>Instances of class {@code LSHapBaum} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class LSHapBaum {
+
+ private final ImputationData impData;
+ private final boolean lowMem;
+ private final int n; // number of reference haplotypes
+ private final Markers refMarkers;
+ private final float[] alleleProbs;
+ private final float[][] fwdVal;
+ private final float[] bwdVal;
+ private final float[] emBwdVal;
+ private final int[] fwdValueIndex2Marker;
+
+ private final RefHapSegs refHapSegs;
+ private final float[][] fwdHapProbs;
+ private final float[][] bwdHapProbs;
+
+ private float emBwdValuesSum = 0f;
+
+ private int windowIndex = -9999;
+ private int arrayIndex = -9999;
+
+ /**
+ * Creates a {@code LSHapBaum} instance from the specified data.
+ *
+ * @param impData the input data for genotype imputation
+ * @param lowMem {@code true} if a low-memory checkpoint algorithm
+ * should be used, and {@code false} otherwise
+ *
+ * @throws NullPointerException if {@code impData == null}
+ */
+ public LSHapBaum(ImputationData impData, boolean lowMem) {
+ this.impData = impData;
+ this.lowMem = lowMem;
+ this.n = impData.refHapPairs().nHaps();
+ this.refMarkers = impData.refHapPairs().markers();
+ this.alleleProbs = new float[refMarkers.sumAlleles()];
+
+ int nClusters = impData.nClusters();
+ int size = lowMem ? (int) Math.ceil(Math.sqrt(1 + 8*nClusters)/2.0) + 1
+ : nClusters;
+ this.fwdValueIndex2Marker = new int[size];
+ this.fwdVal = new float[size][n];
+ this.bwdVal = new float[n];
+ this.emBwdVal = new float[n];
+
+ this.refHapSegs = impData.refHapSegs();
+ this.fwdHapProbs = new float[impData.nClusters()][];
+ this.bwdHapProbs = new float[impData.nClusters()][];
+ for (int j=0; j < nClusters; ++j) {
+ this.fwdHapProbs[j] = new float[refHapSegs.nSeq(j+1)];
+ this.bwdHapProbs[j] = new float[refHapSegs.nSeq(j)];
+ }
+ }
+
+ /**
+ * <p>Estimates and returns allele probabilities for the specified target
+ * haplotype. Estimated allele probabilities are conditional on the hidden
+ * Markov model (HMM) and the input data represented by
+ * {@code this.imputationData()}.
+ * </p>
+ *
+ * @param hap a target data haplotype index
+ * @return allele probabilities for the specified target haplotype
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= this.imputationData().targetHapPairs().nHaps()}
+ */
+ public HapAlleleProbs randomHapSample(int hap) {
+ int nMarkers = impData.nClusters();
+ Arrays.fill(alleleProbs, 0f);
+ setFwdValues(hap);
+ setInitBwdValue(hap);
+ setStateProbs(nMarkers-1, currentIndex());
+ for (int m=nMarkers-2; m>=0; --m) {
+ setBwdValue(m, hap);
+ setStateProbs(m, previousIndex(hap));
+ }
+ setAlleleProbs(alleleProbs);
+ return new LowMemHapAlleleProbs(refMarkers, impData.targetSamples(),
+ hap, alleleProbs);
+ }
+
+ /**
+ * Returns the input data for genotype imputation.
+ * @return the input data for genotype imputation
+ */
+ public ImputationData imputationData() {
+ return impData;
+ }
+
+ private void setFwdValues(int hap) {
+ int nMarkers = impData.nClusters();
+ windowIndex = 0;
+ arrayIndex = -1;
+ for (int m=0; m<nMarkers; ++m) {
+ float sum = 0f;
+ float probRec = impData.pRecomb(m);
+ int prev = currentIndex();
+ int next = nextIndex();
+ fwdValueIndex2Marker[next] = m;
+ int a = impData.targetAllele(m, hap);
+ for (int h=0; h<n; ++h) {
+ int refAllele = impData.refAllele(m, h);
+ float em = (a == refAllele) ? impData.noErrProb(m) : impData.errProb(m);
+ float x = m==0 ? 1 : (probRec/n + (1-probRec)*fwdVal[prev][h]);
+ fwdVal[next][h] = em*x;
+ sum += fwdVal[next][h];
+ }
+ scale(fwdVal[next], sum);
+ }
+ }
+
+ private static float sum(float[] fa) {
+ float sum = 0f;
+ for (float f : fa) {
+ sum += f;
+ }
+ return sum;
+ }
+
+ private static void scale(float[] fa, float divisor) {
+ for (int j=0; j<fa.length; ++j) {
+ fa[j] /= divisor;
+ }
+ }
+
+ private void setInitBwdValue(int hap) {
+ int m = impData.nClusters() - 1;
+ float f = 1f/n;
+ emBwdValuesSum = 0f;
+ int a = impData.targetAllele(m, hap);
+ for (int h=0; h<n; ++h) {
+ int refAllele = impData.refAllele(m, h);
+ float em = (a == refAllele) ? impData.noErrProb(m) : impData.errProb(m);
+ bwdVal[h] = f;
+ emBwdVal[h] = f*em;
+ emBwdValuesSum += emBwdVal[h];
+ }
+ }
+
+ private void setBwdValue(int m, int hap) {
+ float bwdValuesSum = 0f;
+ float probRec = impData.pRecomb(m + 1);
+ float commonTerm = emBwdValuesSum*probRec/n;
+ for (int h=0; h<n; ++h) {
+ bwdVal[h] = commonTerm + (1-probRec)*emBwdVal[h];
+ bwdValuesSum += bwdVal[h];
+ }
+ int a = impData.targetAllele(m, hap);
+ emBwdValuesSum = 0f;
+ for (int h=0; h<n; ++h) {
+ bwdVal[h] /= bwdValuesSum; // normalize first
+ int refAllele = impData.refAllele(m, h);
+ float em = (a == refAllele) ? impData.noErrProb(m) : impData.errProb(m);
+ emBwdVal[h] = em*bwdVal[h];
+ emBwdValuesSum += emBwdVal[h];
+ }
+ }
+
+ private void setStateProbs(int m, int fwdIndex) {
+ Arrays.fill(fwdHapProbs[m], 0f);
+ Arrays.fill(bwdHapProbs[m], 0f);
+ for (int h=0; h<n; ++h) {
+ float stateProbs = fwdVal[fwdIndex][h]*bwdVal[h];
+ fwdHapProbs[m][refHapSegs.seq(m+1, h)] += stateProbs;
+ bwdHapProbs[m][refHapSegs.seq(m, h)] += stateProbs;
+ }
+ scale(fwdHapProbs[m], sum(fwdHapProbs[m]));
+ scale(bwdHapProbs[m], sum(bwdHapProbs[m]));
+ }
+
+ private static float threshold(int nSeq) {
+ return 0.5f/nSeq;
+ }
+
+ private void setAlleleProbs(float[] alleleProbs) {
+ int nClusters = refHapSegs.nClusters();
+ setFirstAlleleProbs(alleleProbs);
+ for (int cluster=1; cluster < nClusters; ++cluster) {
+ setAlleleProbs(alleleProbs, cluster);
+ }
+ setLastAlleleProbs(alleleProbs);
+ }
+
+ private void setFirstAlleleProbs(float[] alleleProbs) {
+ int segment = 0;
+ int refMarker = refHapSegs.clusterStart(segment);
+ int nSeq = refHapSegs.nSeq(segment);
+ float threshold = threshold(nSeq);
+ for (int h=0; h<nSeq; ++h) {
+ if (bwdHapProbs[segment][h] >= threshold) {
+ for (int m=0; m<refMarker; ++m) {
+ int start = refMarkers.sumAlleles(m);
+ int allele = refHapSegs.allele(segment, m, h);
+ alleleProbs[start + allele] += bwdHapProbs[segment][h];
+ }
+ }
+ }
+ }
+
+ private void setAlleleProbs(float[] alleleProbs, int cluster) {
+ assert cluster > 0;
+ int startRefMarker = refHapSegs.clusterStart(cluster-1);
+ int midRefMarker = refHapSegs.clusterEnd(cluster - 1);
+ int endRefMarker = refHapSegs.clusterStart(cluster);
+ int nSeq = refHapSegs.nSeq(cluster);
+ float threshold = threshold(nSeq);
+ for (int seq=0; seq<nSeq; ++seq) {
+ boolean useFwd = fwdHapProbs[cluster-1][seq] >= threshold;
+ boolean useBwd = bwdHapProbs[cluster][seq] >= threshold;
+ if (useFwd) {
+ for (int m=startRefMarker; m<midRefMarker; ++m) {
+ int start = refMarkers.sumAlleles(m);
+ int allele = refHapSegs.allele(cluster, m - startRefMarker, seq);
+ alleleProbs[start + allele] += fwdHapProbs[cluster-1][seq];
+ }
+ }
+ if (useFwd || useBwd) {
+ for (int m=midRefMarker; m<endRefMarker; ++m) {
+ int start = refMarkers.sumAlleles(m);
+ int allele = refHapSegs.allele(cluster, m - startRefMarker, seq);
+ double wt = impData.weight(m);
+ alleleProbs[start + allele] += wt*fwdHapProbs[cluster-1][seq];
+ alleleProbs[start + allele] += (1-wt)*bwdHapProbs[cluster][seq];
+ }
+ }
+ }
+ }
+
+ private void setLastAlleleProbs(float[] alleleProbs) {
+ int segment = refHapSegs.nClusters();
+ int cluster = segment - 1;
+ int refMarkerStart = refHapSegs.clusterStart(cluster);
+ int refMarkerEnd = refHapSegs.refHapPairs().nMarkers();
+ int nSeq = refHapSegs.nSeq(segment);
+ float threshold = threshold(nSeq);
+ for (int seq=0; seq<nSeq; ++seq) {
+ if (fwdHapProbs[cluster][seq] >= threshold) {
+ for (int m=refMarkerStart; m<refMarkerEnd; ++m) {
+ int start = refMarkers.sumAlleles(m);
+ int allele = refHapSegs.allele(segment, m - refMarkerStart, seq);
+ alleleProbs[start + allele] += fwdHapProbs[cluster][seq];
+ }
+ }
+ }
+ }
+
+ private int nextIndex() {
+ ++arrayIndex;
+ if (arrayIndex == fwdVal.length) {
+ ++windowIndex;
+ arrayIndex = windowIndex;
+ }
+ return arrayIndex;
+ }
+
+ private int currentIndex() {
+ return arrayIndex;
+ }
+
+ private int previousIndex(int hap) {
+ if (arrayIndex == windowIndex) {
+ --windowIndex;
+ arrayIndex = windowIndex;
+ int start = fwdValueIndex2Marker[arrayIndex] + 1;
+ int end = start + ( fwdVal.length - (arrayIndex + 1) );
+ for (int m=start; m<end; ++m) {
+ float sum = 0f;
+ float probRec = impData.pRecomb(m);
+ int prev = currentIndex();
+ int next = nextIndex();
+ fwdValueIndex2Marker[next] = m;
+ int a = impData.targetAllele(m, hap);
+ for (int h=0; h<n; ++h) {
+ int refAllele = impData.refAllele(m, h);
+ float em = (a == refAllele) ? impData.noErrProb(m) : impData.errProb(m);
+ float x = (probRec/n + (1-probRec)*fwdVal[prev][h]); // since m>0
+ fwdVal[next][h] = em*x;
+ sum += fwdVal[next][h];
+ }
+ scale(fwdVal[next], sum);
+ }
+ return arrayIndex;
+ }
+ else {
+ return --arrayIndex;
+ }
+ }
+}
diff --git a/sample/RecombSingleBaum.java b/sample/RecombSingleBaum.java
new file mode 100644
index 0000000..7d80f15
--- /dev/null
+++ b/sample/RecombSingleBaum.java
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2014 Brian L. Browning
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package sample;
+
+import dag.Dag;
+import haplotype.HapPair;
+import haplotype.BitHapPair;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import vcf.GL;
+
+/**
+ * <p>Class {@code RestrictedSingleBaum} implements the Baum forward and
+ * backward algorithms for a hidden Markov model (HMM) of an individual's
+ * genotype data. The HMM transition probabilities model recent
+ * genetic recombination by allowing jumps between states that are not
+ * connected by a node.
+ * </p>
+ * <p>Instances of class {@code RestrictedSingleBaum} are not thread-safe.
+ * </p>
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RecombSingleBaum implements SingleBaumInterface {
+
+ private final SamplerData samplerData;
+ private final Dag dag;
+ private final RestrictedDag rdag;
+ private final GL gl;
+ private final int nMarkers;
+ private final int nSamplesPerIndividual;
+ private final long seed;
+ private final Random random;
+
+ private final int[] node1;
+ private final int[] node2;
+ private final float[] baseTrProb;
+ private final float[] maxSum;
+
+ private final int[][] alleles1;
+ private final int[][] alleles2;
+
+ private final RecombSingleBaumLevel[] levels;
+ private final RecombSingleNodes fwdNodes;
+ private final RecombSingleNodes bwdNodes;
+
+ private int windowIndex = -9999;
+ private int arrayIndex = -9999;
+
+ /**
+ * Creates a new {@code RestrictedSingleBaum} instance from the specified
+ * data.
+ *
+ * @param samplerData the analysis data
+ * @param seed the random seed
+ * @param nSamplesPerIndividual the number of haplotype pairs that
+ * will be sampled for each individual
+ * @param lowMem {@code true} if a low memory algorithm should be used, and
+ * {@code false} otherwise
+ *
+ * @throws IllegalArgumentException if {@code nSamplesPerIndividual < 1}
+ * @throws NullPointerException if {@code samplerData == null}
+ */
+ public RecombSingleBaum(SamplerData samplerData, long seed,
+ int nSamplesPerIndividual, boolean lowMem) {
+ if (nSamplesPerIndividual < 1) {
+ throw new IllegalArgumentException(
+ String.valueOf(nSamplesPerIndividual));
+ }
+ this.samplerData = samplerData;
+ this.dag = samplerData.rdag().dag();
+ this.rdag = samplerData.rdag();
+ this.gl = samplerData.gl();
+ this.nMarkers = samplerData.nMarkers();
+ this.nSamplesPerIndividual = nSamplesPerIndividual;
+ this.seed = seed;
+ this.random = new Random(seed);
+
+ this.node1 = new int[nSamplesPerIndividual];
+ this.node2 = new int[nSamplesPerIndividual];
+ this.baseTrProb = new float[nSamplesPerIndividual];
+ this.maxSum = new float[nSamplesPerIndividual];
+
+ this.alleles1 = new int[nSamplesPerIndividual][nMarkers];
+ this.alleles2 = new int[nSamplesPerIndividual][nMarkers];
+
+ int size = dag.nLevels();
+ if (lowMem) {
+ size = (int) Math.ceil(Math.sqrt(1 + 8*dag.nLevels())/2.0) + 1;
+ }
+ this.levels = new RecombSingleBaumLevel[size];
+ for (int j=0; j<levels.length; ++j) {
+ levels[j] = new RecombSingleBaumLevel(samplerData);
+ }
+ this.fwdNodes = new RecombSingleNodes(dag.maxNodes());
+ this.bwdNodes = new RecombSingleNodes(dag.maxNodes());
+ }
+
+ @Override
+ public Dag dag() {
+ return rdag.dag();
+ }
+
+ @Override
+ public GL gl() {
+ return gl;
+ }
+
+ @Override
+ public int nSamplesPerIndividual() {
+ return nSamplesPerIndividual;
+ }
+
+ @Override
+ public long seed() {
+ return seed;
+ }
+
+ @Override
+ public List<HapPair> randomSample(int sample) {
+ DiploidStates permittedStates = rdag.singleStates(sample);
+ forwardAlgorithm(sample, permittedStates);
+ initSampleAlleles(currentLevel(), sample);
+ for (int j=nMarkers-2; j>=0; --j) {
+ RecombSingleBaumLevel level
+ = previousLevel(sample, permittedStates);
+ sampleAlleles(level, sample);
+ }
+ pruneLevels();
+ return hapList(sample);
+ }
+
+ @Override
+ public List<HapPair> randomSample(int sample, double[] gprobs) {
+ checkGprobs(gprobs);
+ DiploidStates permittedStates = rdag.singleStates(sample);
+ forwardAlgorithm(sample, permittedStates);
+ initSampleAlleles(currentLevel(), sample);
+ currentLevel().setInitialBackwardValues(bwdNodes);
+ setGprobs(currentLevel(), gprobs);
+ for (int j=nMarkers-2; j>=0; --j) {
+ RecombSingleBaumLevel level = previousLevel(sample, permittedStates);
+ sampleAlleles(level, sample);
+ level.setBackwardValues(bwdNodes);
+ setGprobs(level, gprobs);
+ }
+ pruneLevels();
+ return hapList(sample);
+ }
+
+ private void pruneLevels() {
+ int meanSize = estMeanSize();
+ int capacityThreshold = 3*meanSize;
+ int newCapacity = 3*meanSize/2 + 1;
+ for (int j=0; j<levels.length; ++j) {
+ if (levels[j].capacity() > capacityThreshold) {
+ levels[j].reset(newCapacity);
+ }
+ }
+ }
+
+ private int estMeanSize() {
+ int nLevelsToSample = 20;
+ long sizeSum = 0;
+ for (int j=0; j<nLevelsToSample; ++j) {
+ sizeSum += levels[random.nextInt(levels.length)].size();
+ }
+ return (int) (sizeSum / nLevelsToSample);
+ }
+
+ private void checkGprobs(double[] gprobs) {
+ int n = gl.markers().sumGenotypes();
+ if (gprobs.length != n) {
+ throw new IllegalArgumentException(String.valueOf(n));
+ }
+ }
+
+ private void setGprobs(RecombSingleBaumLevel level, double[] gprobs) {
+ if (gprobs != null) {
+ int m = level.marker();
+ int nGenotypes = gl.marker(m).nGenotypes();
+ int base = gl.markers().sumGenotypes(m);
+ for (int j=0; j<nGenotypes; ++j) {
+ gprobs[base + j] = level.gprobs(j);
+ }
+ }
+ }
+
+ private List<HapPair> hapList(int sample) {
+ List<HapPair> hapList = new ArrayList<>(2*nSamplesPerIndividual);
+ for (int j=0; j<nSamplesPerIndividual; ++j) {
+ HapPair haps = new BitHapPair(gl.markers(), gl.samples(), sample,
+ alleles1[j], alleles2[j]);
+ hapList.add(haps);
+ }
+ return hapList;
+ }
+
+ private void initSampleAlleles(RecombSingleBaumLevel level, int sample) {
+ for (int j=0; j<nSamplesPerIndividual; ++j) {
+ saveCurrentData(level, sample, j, initialRandomState(level));
+ }
+ }
+
+ private int initialRandomState(RecombSingleBaumLevel level) {
+ float d = random.nextFloat();
+ float sum = 0.0f;
+ for (int j=0, n=level.size(); j<n; ++j) {
+ sum += level.forwardValue(j);
+ if (d <= sum) {
+ return j;
+ }
+ }
+ return level.size()-1; // error in finite bit arithmetic encountered
+ }
+
+ private void saveCurrentData(RecombSingleBaumLevel level, int sample,
+ int copy, int stateIndex) {
+ int m = level.marker();
+ int e1 = level.edge1(stateIndex);
+ int e2 = level.edge2(stateIndex);
+ int s1 = level.symbol1(stateIndex);
+ int s2 = level.symbol2(stateIndex);
+ node1[copy] = level.parentNode1(stateIndex);
+ node2[copy] = level.parentNode2(stateIndex);
+ float p1 = dag.edgeProb(m, e1);
+ float p2 = dag.edgeProb(m, e2);
+ baseTrProb[copy] = p1*p2;
+
+ maxSum[copy] = level.forwardValue(stateIndex) * level.forwardValuesSum()
+ / gl.gl(m, sample, s1, s2);
+ alleles1[copy][m] = s1;
+ alleles2[copy][m] = s2;
+ }
+
+ private void sampleAlleles(RecombSingleBaumLevel level, int sample) {
+ for (int j=0; j<nSamplesPerIndividual; ++j) {
+ saveCurrentData(level, sample, j, randomPreviousState(level, j));
+ }
+ }
+
+ private int randomPreviousState(RecombSingleBaumLevel level, int copy) {
+ int m = level.marker();
+ float np1 = dag.parentProb(m+1, node1[copy]);
+ float np2 = dag.parentProb(m+1, node2[copy]);
+ float pRecomb = samplerData.pRecomb(m+1);
+ float d = random.nextFloat() * maxSum[copy];
+ float sum = 0.0f;
+ for (int j=0, n=level.size(); j<n; ++j) {
+ float tp = 0.0f;
+ boolean noJump1 = level.childNode1(j)==node1[copy];
+ boolean noJump2 = level.childNode2(j)==node2[copy];
+ if (noJump1 && noJump2) {
+ tp += (1-pRecomb)*(1-pRecomb)*baseTrProb[copy]/ (np1*np2);
+ }
+ if (noJump1) {
+ tp += (1-pRecomb)*pRecomb*baseTrProb[copy] / np1;
+ }
+ if (noJump2) {
+ tp += pRecomb*(1-pRecomb)*baseTrProb[copy] / np2;
+ }
+ tp += pRecomb*pRecomb*baseTrProb[copy];
+
+ sum += (level.forwardValue(j)*tp);
+ if (d <= sum) {
+ return j;
+ }
+ }
+ return level.size()-1; // if reached due to rounding
+ }
+
+ private RecombSingleBaumLevel nextLevel() {
+ ++arrayIndex;
+ if (arrayIndex == levels.length) {
+ ++windowIndex;
+ arrayIndex = windowIndex;
+ }
+ return levels[arrayIndex];
+ }
+
+ private RecombSingleBaumLevel currentLevel() {
+ return levels[arrayIndex];
+ }
+
+ private RecombSingleBaumLevel previousLevel(int sample,
+ DiploidStates permittedStates) {
+ if (arrayIndex == windowIndex) {
+ --windowIndex;
+ arrayIndex = windowIndex;
+ levels[arrayIndex].setChildNodes(fwdNodes);
+ int startLevel = levels[windowIndex].marker() + 1;
+ int endLevel = startLevel + (levels.length - (windowIndex + 1) );
+ for (int marker=startLevel; marker<endLevel; ++marker) {
+ nextLevel().setForwardValues(fwdNodes, permittedStates, marker, sample);
+ }
+ return currentLevel();
+ }
+ else {
+ return levels[--arrayIndex];
+ }
+ }
+
+ private void forwardAlgorithm(int sample, DiploidStates permittedStates) {
+ fwdNodes.clear();
+ fwdNodes.sumUpdate(0, 0, 1.0f);
+ this.windowIndex = -1;
+ this.arrayIndex = levels.length - 1;
+ for (int marker=0; marker<nMarkers; ++marker) {
+ nextLevel().setForwardValues(fwdNodes, permittedStates, marker,
+ sample);
+ }
+ }
+}
diff --git a/sample/RecombSingleBaumLevel.java b/sample/RecombSingleBaumLevel.java
new file mode 100644
index 0000000..d8ba8ad
--- /dev/null
+++ b/sample/RecombSingleBaumLevel.java
@@ -0,0 +1,613 @@
+/*
+ * Copyright 2014 Brian L. Browning
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package sample;
+
+import dag.Dag;
+import java.util.Arrays;
+import vcf.BasicGL;
+import vcf.GL;
+
+/**
+ * <p>Class {@code RestrictedSingleBaumLevel} computes forward and backward
+ * Baum values at a level of a hidden Markov model (HMM) whose states are
+ * ordered edge pairs of a leveled directed acyclic graph (DAG).
+ * </p>
+ * <p>Instances of class {@code RestrictedSingleBaumLevel} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RecombSingleBaumLevel {
+
+ private static final int INITIAL_CAPACITY = 400;
+ private static final float MIN_VALUE = 100*Float.MIN_VALUE;
+ private final SamplerData samplerData;
+ private final Dag dag;
+ private final GL gl;
+
+ private int marker = -1;
+ private int sample = -1;
+ private int size = 0;
+
+ private int capacity = INITIAL_CAPACITY;
+ private int[] edges1 = new int[INITIAL_CAPACITY];
+ private int[] edges2 = new int[INITIAL_CAPACITY];
+ private float[] fwdValues = new float[INITIAL_CAPACITY];
+ private float[] bwdValues = new float[INITIAL_CAPACITY];
+ private float fwdValueSum = 0.0f;
+ private float bwdValueSum = 0.0f;
+
+ private int nGenotypes = 0;
+ private float[] gtProbs = new float[3];
+
+ /**
+ * Constructs a new {@code RecombSingleBaumLevel} instance from the
+ * specified data.
+ * @param samplerData the analysis data
+ * @throws NullPointerException if {@code samplerData == null}
+ */
+ public RecombSingleBaumLevel(SamplerData samplerData) {
+ this.samplerData = samplerData;
+ this.dag = samplerData.rdag().dag();
+ this.gl = samplerData.gl();
+ }
+
+ /**
+ * Sets the Baum forward algorithm values for this level of the HMM
+ * and records the child node pair values in the specified
+ * {@code nodes} parameter. When the method call returns, the {@code nodes}
+ * parameter will be reset to the child node pair values for this level of
+ * the HMM.
+ *
+ * @param nodes child node pair values at the previous level of the HMM
+ * @param permittedStates the permitted diploid model states
+ * @param marker the level of the HMM at which the Baum forward algorithm
+ * values will be computed
+ * @param sample a sample index
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.dag().nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.gl().nSamples()}
+ * @throws IndexOutOfBoundsException if either node in any node pair with
+ * non-zero value is not a valid parent node at the specified level of the
+ * HMM
+ * @throws NullPointerException if
+ * {@code nodes == null || permittedStates == null}
+ */
+ public void setForwardValues(RecombSingleNodes nodes,
+ DiploidStates permittedStates, int marker, int sample) {
+ this.marker = marker;
+ this.sample = sample;
+ this.nGenotypes = gl.marker(marker).nGenotypes();
+ this.size = 0;
+ this.fwdValueSum = 0.0f;
+ this.bwdValueSum = 0.0f;
+ initializeGtProbs(); // initialized here due to gtProbs() contract
+ setStates(nodes, permittedStates);
+ setChildNodes(nodes);
+ }
+
+ private void initializeGtProbs() {
+ if (gtProbs.length < nGenotypes) {
+ int newLength = Math.max(nGenotypes, (3*gtProbs.length/2 + 1));
+ gtProbs = new float[newLength];
+ }
+ else {
+ Arrays.fill(gtProbs, 0, nGenotypes, 0f);
+
+ }
+ }
+
+ private void setStates(RecombSingleNodes nodes,
+ DiploidStates permittedStates) {
+ float valueSum = 0.0f;
+ permittedStates.setMarker(marker);
+ while (permittedStates.hasNext()) {
+ permittedStates.next();
+ int edge1 = permittedStates.edge1();
+ int edge2 = permittedStates.edge2();
+ float fwdValue = fwdValue(edge1, edge2, nodes);
+ if (fwdValue > 0.0) {
+ if (size == capacity) {
+ ensureCapacity(size+1);
+ }
+ edges1[size] = edge1;
+ edges2[size] = edge2;
+ fwdValues[size++] = fwdValue;
+ valueSum += fwdValue;
+ }
+ }
+ if (valueSum <= 0f) {
+ throw new IllegalStateException(String.valueOf(valueSum));
+ }
+ for (int k=0; k<size; ++k) {
+ this.fwdValues[k] /= valueSum;
+ }
+ fwdValueSum = valueSum;
+ }
+
+ private float fwdValue(int edge1, int edge2, RecombSingleNodes nodes) {
+ float fwdValue = 0.0f;
+ int symbol1 = dag.symbol(marker, edge1);
+ int symbol2 = dag.symbol(marker, edge2);
+ float emProb = gl.gl(marker, sample, symbol1, symbol2);
+
+ if (emProb > 0.0) {
+ float pRecom = samplerData.pRecomb(marker);
+ float rec0 = (1-pRecom)*(1-pRecom);
+ float rec1 = pRecom*(1-pRecom);
+ float rec2 = pRecom*pRecom;
+ float ep1 = dag.condEdgeProb(marker, edge1);
+ float ep2 = dag.condEdgeProb(marker, edge2);
+ float ep = ep1*ep2;
+
+ int pn1 = dag.parentNode(marker, edge1);
+ int pn2 = dag.parentNode(marker, edge2);
+ float pnp1 = dag.parentProb(marker, pn1);
+ float pnp2 = dag.parentProb(marker, pn2);
+
+ fwdValue = rec0*emProb*ep*nodes.value(pn1, pn2);
+ fwdValue += rec1*emProb*ep*pnp2*nodes.sumNode1Value(pn1);
+ fwdValue += rec1*emProb*ep*pnp1*nodes.sumNode2Value(pn2);
+ fwdValue += rec2*emProb*ep*pnp1*pnp2*nodes.sumValue();
+ if (fwdValue<MIN_VALUE) {
+ fwdValue = MIN_VALUE;
+ }
+ }
+ return fwdValue;
+ }
+
+ /**
+ * Stores the Baum forward algorithm child node pair values for this
+ * level of the HMM in the specified {@code SingleNodes} object.
+ *
+ * @param nodes the node pair values that will be set
+ *
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setChildNodes(RecombSingleNodes nodes) {
+ nodes.clear();
+ for (int k=0; k<size; ++k) {
+ int node1 = dag.childNode(marker, edges1[k]);
+ int node2 = dag.childNode(marker, edges2[k]);
+ nodes.sumUpdate(node1, node2, fwdValues[k]);
+ }
+ }
+
+ /**
+ * Initializes the node pair values for the Baum backward algorithm.
+ *
+ * @param nodes the node pair values to be initialized
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setInitialBackwardValues(RecombSingleNodes nodes) {
+ float bwdValue = 1.0f/size;
+ bwdValueSum = size;
+ nodes.clear();
+ for (int j=0; j<size; ++j) {
+ bwdValues[j] = bwdValue;
+ int gtIndex = BasicGL.genotype(symbol1(j), symbol2(j));
+ gtProbs[gtIndex] += fwdValues[j];
+
+ float nextBaseBwdValue = nextBaseBwdValue(edges1[j], edges2[j],
+ bwdValues[j]);
+ int pn1 = dag.parentNode(marker, edges1[j]);
+ int pn2 = dag.parentNode(marker, edges2[j]);
+ nodes.sumUpdate(pn1, pn2, nextBaseBwdValue);
+ }
+ }
+
+ /**
+ * Sets the Baum backward algorithm values for this level of the HMM
+ * and stores the parent node pair values in the specified
+ * {@code nodes} parameter. When the method call returns, this
+ * {@code nodes} parameter will be reset to the parent
+ * node pair values for this level of the HMM.
+ *
+ * @param nodes parent node pair values at the next level of HMM
+ *
+ * @throws IndexOutOfBoundsException if either node in any node pair with
+ * non-zero value is not a valid child node at this level of the HMM
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setBackwardValues(RecombSingleNodes nodes) {
+ for (int j=0; j<size; ++j) {
+ int child1 = dag.childNode(marker, edges1[j]);
+ int child2 = dag.childNode(marker, edges2[j]);
+ float childProb1 = dag.parentProb(marker+1, child1);
+ float childProb2 = dag.parentProb(marker+1, child2);
+
+ float pRecom = samplerData.pRecomb(marker+1);
+ float rec0 = (1-pRecom)*(1-pRecom);
+ float rec1 = pRecom*(1-pRecom);
+ float rec2 = pRecom*pRecom;
+ float bwdValue = rec0*nodes.value(child1, child2)
+ / (childProb1*childProb2);
+ bwdValue += rec1*nodes.sumNode1Value(child1)/childProb1;
+ bwdValue += rec1*nodes.sumNode2Value(child2)/childProb2;
+ bwdValue += rec2*nodes.sumValue();
+ bwdValues[j] = bwdValue;
+ bwdValueSum += bwdValue;
+ }
+ nodes.clear();
+ float gtProbsSum = 0f;
+ for (int j=0; j<size; ++j) {
+ bwdValues[j] /= bwdValueSum;
+ float stateProb = (fwdValues[j] * bwdValues[j]);
+ int gtIndex = BasicGL.genotype(symbol1(j), symbol2(j));
+ // gtProbs assumed to be initialized in setForwardValues() method
+ gtProbs[gtIndex] += stateProb;
+ gtProbsSum += stateProb;
+
+ float nextBaseBwdValue = nextBaseBwdValue(edges1[j], edges2[j],
+ bwdValues[j]);
+ int pn1 = dag.parentNode(marker, edges1[j]);
+ int pn2 = dag.parentNode(marker, edges2[j]);
+ nodes.sumUpdate(pn1, pn2, nextBaseBwdValue);
+ }
+ for (int j=0; j<nGenotypes; ++j) {
+ gtProbs[j] /= gtProbsSum;
+ }
+ }
+
+ private float nextBaseBwdValue(int edge1, int edge2, float lastBwdValue) {
+ float ep1 = dag.edgeProb(marker, edge1);
+ float ep2 = dag.edgeProb(marker, edge2);
+ int symbol1 = dag.symbol(marker, edge1);
+ int symbol2 = dag.symbol(marker, edge2);
+ float emProb = gl.gl(marker, sample, symbol1, symbol2);
+ float value = emProb*lastBwdValue*ep1*ep2;
+ if (value<MIN_VALUE && lastBwdValue>0.0) {
+ value = MIN_VALUE;
+ }
+ return value;
+ }
+
+ /**
+ * Returns the directed acyclic graph that determines the transition
+ * probabilities.
+ * @return the directed acyclic graph that determines the transition
+ * probabilities
+ */
+ public Dag dag() {
+ return dag;
+ }
+
+ /**
+ * Returns the emission probabilities.
+ * @return the emission probabilities
+ */
+ public GL gl() {
+ return gl;
+ }
+
+ /**
+ * Return the level of the HMM.
+ * @return the level of the HMM
+ */
+ public int marker() {
+ return marker;
+ }
+
+ /**
+ * Return the number of possible genotypes at this level of the HMM.
+ * @return the number of possible genotypes at this level of the HMM
+ */
+ public int nGenotypes() {
+ return nGenotypes;
+ }
+
+ /**
+ * Returns the specified posterior genotype probability. Returns 0
+ * if the Baum backward probabilities have not been set.
+ * @param gt a genotype index
+ * @return the specified posterior genotype probability
+ * @throws IndexOutOfBoundsException if
+ * {@code gt < 0 || gt >= this.nGenotypes()}
+ */
+ public float gprobs(int gt) {
+ if (gt >= nGenotypes) {
+ String s = "gt=" + gt + " >= nGenotypes()=" + nGenotypes;
+ throw new IllegalArgumentException(s);
+ }
+ return gtProbs[gt];
+ }
+
+ /**
+ * Returns the current capacity of this level.
+ * @return the current capacity of this level
+ */
+ public int capacity() {
+ return edges1.length;
+ }
+
+ /**
+ * Resets the size of this level to 0 and resets the capacity of this
+ * level to the specified value.
+ *
+ * @param newCapacity the new capacity
+ * @throws IllegalArgumentException if {@code newCapacity < 0}
+ */
+ public void reset(int newCapacity) {
+ if (newCapacity<0) {
+ throw new IllegalArgumentException(String.valueOf(newCapacity));
+ }
+ size = 0;
+ capacity = newCapacity;
+ edges1 = new int[newCapacity];
+ edges2 = new int[newCapacity];
+ fwdValues = new float[newCapacity];
+ bwdValues = new float[newCapacity];
+ }
+
+ /**
+ * Return the number of states with nonzero forward probability at
+ * this level of the HMM.
+ *
+ * @return the number of states with nonzero forward probability at
+ * this level of the HMM
+ */
+ public int size() {
+ return size;
+ }
+
+ private void checkIndex(int state) {
+ if (state >= size) {
+ String s = "state=" + state + " size()=" + size();
+ throw new IndexOutOfBoundsException(s);
+ }
+ }
+
+ /**
+ * Returns the first edge of the specified HMM state with nonzero forward
+ * probability.
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the first edge of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int edge1(int state) {
+ checkIndex(state);
+ return edges1[state];
+ }
+
+ /**
+ * Returns the second edge of the specified HMM state with nonzero forward
+ * probability.
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the second edge of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int edge2(int state) {
+ checkIndex(state);
+ return edges2[state];
+ }
+
+ /**
+ * Returns the parent node of the first edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the parent node of the first edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int parentNode1(int state) {
+ checkIndex(state);
+ return dag.parentNode(marker, edges1[state]);
+ }
+
+ /**
+ * Returns the parent node of the second edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the parent node of the second edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int parentNode2(int state) {
+ checkIndex(state);
+ return dag.parentNode(marker, edges2[state]);
+ }
+
+ /**
+ * Returns the child node of the first edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the child node of the first edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int childNode1(int state) {
+ checkIndex(state);
+ return dag.childNode(marker, edges1[state]);
+ }
+
+ /**
+ * Returns the child node of the second edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the child node of the second edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int childNode2(int state) {
+ checkIndex(state);
+ return dag.childNode(marker, edges2[state]);
+ }
+
+ /**
+ * Returns the symbol for the first edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the symbol for the first edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int symbol1(int state) {
+ return dag.symbol(marker, edge1(state));
+ }
+
+ /**
+ * Returns the symbol for the second edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the symbol for the second edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int symbol2(int state) {
+ return dag.symbol(marker, edge2(state));
+ }
+
+ /**
+ * Returns the normalized forward value for the specified HMM state
+ * with nonzero forward probability.
+ * The normalized forward value is obtained by dividing the
+ * forward value by the sum of the forward values at this level
+ * of the HMM.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ *
+ * @return the normalized forward value for the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public float forwardValue(int state) {
+ checkIndex(state);
+ return fwdValues[state];
+ }
+
+ /**
+ * Returns the normalized backward value for the specified HMM state
+ * with nonzero forward probability.
+ * The normalized backward value is obtained by dividing the
+ * backward value by the sum of the backward values at this level
+ * of the HMM.
+ *
+ * @param state an index of a state with nonzero forward probability
+ *
+ * @return the normalized backward value for the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public float backwardValue(int state) {
+ checkIndex(state);
+ return bwdValues[state];
+ }
+
+ /**
+ * Returns the sum of the forward values at this level of the HMM
+ * when the forward values are computed using forward values
+ * from the previous level that are normalized to sum to 1.
+ * @return the sum of the forward values at this level of the HMM
+ */
+ public float forwardValuesSum() {
+ return fwdValueSum;
+ }
+
+ /**
+ * Returns the sum of the backward values at this level of the HMM
+ * when the backward values are computed using backward
+ * values from the next level that are normalized to sum to 1.
+ * @return the sum of the backward values at this level of the HMM
+ */
+ public float backwardValuesSum() {
+ return bwdValueSum;
+ }
+
+ /**
+ * Returns a string description of {@code this}. The exact details
+ * of the description are unspecified and subject to change.
+ *
+ * @return a string description of {@code this}.
+ */
+ @Override
+ public String toString() {
+ String space = " ";
+ String sep = " | ";
+ StringBuilder sb = new StringBuilder(100);
+ sb.append("level=");
+ sb.append(marker);
+ sb.append(" size=");
+ sb.append(size);
+ sb.append(" forwardValuesSum=");
+ sb.append(fwdValueSum);
+ sb.append(" backwardSum=");
+ sb.append(bwdValueSum);
+ for (int j=0; j<size; ++j) {
+ sb.append(sep);
+ sb.append("j=");
+ sb.append(j);
+ sb.append(": ");
+ sb.append( (int) edge1(j));
+ sb.append(space);
+ sb.append( (int) edge2(j));
+ sb.append(space);
+ sb.append(forwardValue(j));
+ sb.append(space);
+ sb.append(backwardValue(j));
+ }
+ sb.append(sep);
+ return sb.toString();
+ }
+
+ /*
+ * Increases the state capacity of array fields as necessary
+ * to be greater than or equal to the specified minimum capacity.
+ *
+ * @param minCapacity the desired minimum state capacity
+ */
+ private void ensureCapacity(int minCapacity) {
+ if (minCapacity >capacity) {
+ capacity = (capacity * 3)/2 + 1;
+ if (capacity < minCapacity) {
+ capacity = minCapacity;
+ }
+ edges1 = Arrays.copyOf(edges1, capacity);
+ edges2 = Arrays.copyOf(edges2, capacity);
+ fwdValues = Arrays.copyOf(fwdValues, capacity);
+ bwdValues = Arrays.copyOf(bwdValues, capacity);
+ }
+ }
+}
diff --git a/sample/RecombSingleNodes.java b/sample/RecombSingleNodes.java
new file mode 100644
index 0000000..45dfa58
--- /dev/null
+++ b/sample/RecombSingleNodes.java
@@ -0,0 +1,361 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+/**
+ * <p>Class {@code RecombSingleNodes} stores ordered node pairs and
+ * associated values.
+ * </p>
+ * <p>Instances of class {@code RecombSingleNodes} are not thread safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RecombSingleNodes {
+
+ private static final double loadFactor = 0.75;
+
+ private final int nNodes;
+
+ private int size = 0;
+ private int capacity; // required to be a power of 2
+ private int rehashThreshold;
+
+ private int[] index;
+ private int[] node1;
+ private int[] node2;
+ private float[] value;
+ private float[] sumNode1Value;
+ private float[] sumNode2Value;
+ private float sumValue;
+
+ /**
+ * Creates a new instance of {@code RecombSingleNodes} that has an
+ * initial value of 0 for each ordered node pair. The first node
+ * has index 0.
+ * @param nNodes the maximum number of distinct nodes
+ * which will be paired to form ordered node pairs
+ * @throws IllegalArgumentException if {@code nNodes < 1}
+ */
+ public RecombSingleNodes(int nNodes) {
+ if (nNodes < 1) {
+ throw new IllegalArgumentException("nNodes < 1: " + nNodes);
+ }
+ this.nNodes = nNodes;
+ this.size = 0;
+ this.capacity = (1<<10);
+ this.rehashThreshold = (int) (loadFactor * capacity);
+ this.index = new int[capacity];
+ this.node1 = new int[capacity];
+ this.node2 = new int[capacity];
+ this.value = new float[capacity];
+ this.sumNode1Value = new float[nNodes];
+ this.sumNode2Value = new float[nNodes];
+ this.sumValue = 0.0f;
+ }
+
+ private static long hash1(int node1, int node2) {
+ long hash = 5;
+ hash = 71 * hash + node1;
+ hash = 71 * hash + node2;
+ return hash;
+ }
+
+ private static long hash2(int node1, int node2) {
+ long hash = 7;
+ hash = 97 * hash + node1;
+ hash = 97 * hash + node2;
+ return hash;
+ }
+ /*
+ * Return the storage index for specified node pair. If the key is not
+ * currently stored in the hash table, the index at which the value
+ * should be stored is returned.
+ */
+ private int index(int node1, int node2) {
+ long h1 = hash1(node1, node2);
+ long h2 = hash2(node1, node2);
+ if ((h2 & 1)==0) {
+ // h2 must be relatively prime to maxSize, which is a power of 2
+ ++h2;
+ }
+ long l = h1;
+ for (int k=0; k<capacity; ++k) {
+ int i = (int) (l % capacity);
+ if (value[i]==0.0
+ || (this.node1[i]==node1 && this.node2[i]==node2)) {
+ return i;
+ }
+ l += h2;
+ }
+ assert false;
+ return -1;
+ }
+
+ /*
+ * Increases the capacity of the internal hash table.
+ */
+ private void rehash() {
+ assert this.size>=this.rehashThreshold;
+ int newMaxSize = 2*capacity;
+ if (newMaxSize<0) {
+ throw new IllegalStateException("hash table overflow");
+ }
+ int[] oldIndex = index;
+ int[] oldNode1 = node1;
+ int[] oldNode2 = node2;
+ float[] oldValue = value;
+
+ capacity = newMaxSize;
+ index = new int[newMaxSize];
+ node1 = new int[newMaxSize];
+ node2 = new int[newMaxSize];
+ value = new float[newMaxSize];
+
+ for (int j=0; j<size; ++j) {
+ int oldInd = oldIndex[j];
+ int newIndex = index(oldNode1[oldInd], oldNode2[oldInd]);
+ index[j] = newIndex;
+ node1[newIndex] = oldNode1[oldInd];
+ node2[newIndex] = oldNode2[oldInd];
+ value[newIndex] = oldValue[oldInd];
+ }
+ rehashThreshold = (int) (loadFactor * capacity);
+ }
+
+ /**
+ * Adds the specified positive value to the stored value of the specified
+ * node pair.
+ *
+ * @param node1 the first node
+ * @param node2 the second node
+ * @param value the value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code node1 < 0 || node1 >= this.nNodes()}
+ * @throws IndexOutOfBoundsException if
+ * {@code node2 < 0 || node2 >= this.nNodes()}
+ * @throws IllegalArgumentException if
+ * {@code value <= 0 || (Double.isFinite(value) == false)}
+ */
+ public void sumUpdate(int node1, int node2, float value) {
+ if (value <= 0 || (Double.isFinite(value)==false) ) {
+ throw new IllegalArgumentException(String.valueOf(value));
+ }
+ int i = index(node1, node2);
+ boolean addNode = (this.value[i]==0f);
+ this.value[i] += value;
+ this.sumNode1Value[node1] += value;
+ this.sumNode2Value[node2] += value;
+ this.sumValue += value;
+ if (addNode) {
+ this.index[size++] = i;
+ this.node1[i] = node1;
+ this.node2[i] = node2;
+ if (this.size>=this.rehashThreshold) {
+ rehash();
+ }
+ }
+ }
+
+ /**
+ * Returns the number of node pairs with non-zero value.
+ * @return the number of node pairs with non-zero value
+ */
+ public int size() {
+ return size;
+ }
+
+ private void checkSize(int index) {
+ if (index>=size()) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ }
+
+ /**
+ * Returns the number of nodes.
+ *
+ * @return the number of nodes
+ */
+ public int nNodes() {
+ return nNodes;
+ }
+
+ /**
+ * Returns the first node of the specified node pair in the list of
+ * node pairs with non-zero value. Repeated invocations of this
+ * method with the same parameter will return the same value if
+ * node values are not modified between invocations. If
+ * {@code (index >= 0 && index < this.size())}, then the following
+ * expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNode1(index),
+ * this.enumNode2(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of node pairs with non-zero
+ * value
+ * @return the first node of the specified node pair in a list of
+ * node pairs with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int enumNode1(int index) {
+ checkSize(index);
+ return node1[this.index[index]];
+ }
+
+ /**
+ * Returns the second node of the specified node pair in a list of
+ * node pairs with non-zero value. Repeated invocations of this
+ * method with the same parameter will return the same value if
+ * node values are not modified between invocations. If
+ * {@code (index >= 0 && index < this.size())}, then the following
+ * expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNode1(index),
+ * this.enumNode2(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of node pairs with non-zero value
+ * @return the second node of the specified node pair in a list of
+ * node pairs with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int enumNode2(int index) {
+ checkSize(index);
+ return node2[this.index[index]];
+ }
+
+ /**
+ * Returns the value of the specified node pair in a list of
+ * node pairs with non-zero value. Repeated invocations of this
+ * method with the same parameter will return the same value if
+ * node values are not modified between invocations. If
+ * {@code (index >= 0 && index < this.size())}, then the following
+ * expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNode1(index),
+ * this.enumNode2(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of node pairs with non-zero value
+ * @return the value of the specified ordered node pair in a list of
+ * node pairs with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public float enumValue(int index) {
+ checkSize(index);
+ return value[this.index[index]];
+ }
+
+ /**
+ * Returns the value of the specified node pair.
+ *
+ * @param node1 the first node
+ * @param node2 the second node
+ * @return the value of the specified node pair
+ * @throws IllegalArgumentException if {@code node1 < 0 || node2 < 0}
+ */
+ public float value(int node1, int node2) {
+ if (node1 < 0 || node1 >= nNodes) {
+ throw new IndexOutOfBoundsException(String.valueOf(node1));
+ }
+ if (node2 < 0 || node2 >= nNodes) {
+ throw new IndexOutOfBoundsException(String.valueOf(node2));
+ }
+ return value[index(node1, node2)];
+ }
+
+ /**
+ * Returns the sum of the values of the node pairs that have the specified
+ * first node
+ *
+ * @param node1 a node
+ * @return the sum of the values of the node pairs that have the specified
+ * first node
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code node1 < 0 || node1 >= this.nNodes()}
+ */
+ public float sumNode1Value(int node1) {
+ return sumNode1Value[node1];
+ }
+
+ /**
+ * Returns the sum of the values of the node pairs that have the specified
+ * second node.
+ *
+ * @param node2 a node
+ * @return the sum of the values of the node pairs that have the specified
+ * second node
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code node2 < 0 || node2 >= this.nNodes()}
+ */
+ public float sumNode2Value(int node2) {
+ return sumNode2Value[node2];
+ }
+
+ /**
+ * Returns the sum of the values of all node pairs.
+ *
+ * @return the sum of the values of all node pairs
+ */
+ public float sumValue() {
+ return sumValue;
+ }
+
+ /**
+ * Sets the value of each ordered node pair to 0.
+ */
+ public void clear() {
+ for (int j=0; j<this.size; ++j) {
+ value[index[j]] = 0f;
+ sumNode1Value[node1[index[j]]]=0f;
+ sumNode2Value[node2[index[j]]]=0f;
+ }
+ sumValue = 0.0f;
+ size = 0;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append("size=");
+ sb.append(size);
+ for (int j=0; j<size; ++j) {
+ sb.append(" (");
+ sb.append(j);
+ sb.append(": node1=");
+ sb.append(enumNode1(j));
+ sb.append(" node2=");
+ sb.append(enumNode2(j));
+ sb.append(" value=");
+ sb.append(enumValue(j));
+ sb.append(")");
+ }
+ return sb.toString();
+ }
+}
diff --git a/sample/RefHapSeg.java b/sample/RefHapSeg.java
new file mode 100644
index 0000000..f7f1277
--- /dev/null
+++ b/sample/RefHapSeg.java
@@ -0,0 +1,283 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import blbutil.IntArray;
+import blbutil.IntList;
+import haplotype.SampleHapPairs;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * <p>Class {@code RefHapSeg} represents a chromosome segment of
+ * reference haplotypes.
+ * </p>
+ * <p>Instances of class {@code RefHapSeg} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RefHapSeg {
+
+ private final SampleHapPairs refHapPairs;
+ private final int start; // inclusive
+ private final int end; // exclusive
+ private final IntArray hapToSeq;
+ private final int[] seqToHap;
+
+ /**
+ * Constructs a new {@code RefHapSegs} instance from the specified data.
+ * @param refHapPairs the reference haplotype pairs
+ * @param start the starting marker index (inclusive)
+ * @param end the ending marker index (exclusive)
+ * @throws IllegalArgumentException if
+ * {@code start < 0 || start >= end || end > refHapPairs.nMarkers()}
+ * @throws NullPointerException if {@code refHapPairs == null}
+ */
+ public RefHapSeg(SampleHapPairs refHapPairs, int start, int end) {
+ if (start < 0 || start >= end || end > refHapPairs.nMarkers()) {
+ throw new IllegalArgumentException();
+ }
+ HapSegData hapSegData = new HapSegData(refHapPairs, start, end);
+ this.refHapPairs = refHapPairs;
+ this.start = start;
+ this.end = end;
+ this.hapToSeq = hapSegData.hap2Seq();
+ this.seqToHap = hapSegData.seq2Hap();
+ }
+
+ /**
+ * Returns the reference haplotype pairs.
+ * @return the reference haplotype pairs
+ */
+ public SampleHapPairs refHapPairs() {
+ return refHapPairs;
+ }
+
+ /**
+ * Return the number of reference allele sequences in this segment.
+ * @return the number of reference allele sequences in this segment
+ */
+ public int nSeq() {
+ return seqToHap.length;
+ }
+
+ /**
+ * Return the index of the reference allele sequence in this segment
+ * for the specified reference haplotype.
+ * @param hap a haplotype index
+ * @return the index of the reference allele sequence in this segment
+ * for the specified reference haplotype
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= this.refHapPairs().nHaps()}
+ */
+ public int seq(int hap) {
+ return hapToSeq.get(hap);
+ }
+
+ /**
+ * Return the specified reference haplotype allele.
+ * @param marker index of a marker in this segment
+ * @param seq index of a reference allele sequence in this segment
+ * @return the specified reference haplotype allele
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= (this.end() - this.start())}
+ * @throws IndexOutOfBoundsException if
+ * {@code seq < 0 || seg >= this.nSeq(segment)}
+ */
+ public int allele(int marker, int seq) {
+ int refIndex = start + marker;
+ if (marker < 0 || refIndex >= end) {
+ throw new IllegalArgumentException(String.valueOf(marker));
+ }
+ return refHapPairs.allele(refIndex, seqToHap[seq]);
+ }
+
+ /**
+ * Returns the starting marker index (inclusive) of this segment.
+ * @return the starting marker index (inclusive) of this segment
+ */
+ public int start() {
+ return start;
+ }
+
+ /**
+ * Returns the ending marker index (exclusive) of this segment.
+ * @return the ending marker index (exclusive) of this segment
+ */
+ public int end() {
+ return end;
+ }
+
+ private static class HapSegData {
+ private final SampleHapPairs refHapPairs;
+ private final int[] hap2seq;
+ private final IntList seq2Cnt;
+
+ private final List<IntList> seq2AlleleMap;
+ private final IntList seq2NonMajorCnt;
+ private final IntList nonMajorSeq;
+
+ public HapSegData(SampleHapPairs refHapPairs, int start, int end) {
+ this.refHapPairs = refHapPairs;
+ this.hap2seq = new int[refHapPairs.nHaps()];
+ this.seq2Cnt = new IntList(200);
+ this.seq2Cnt.add(hap2seq.length);
+ this.seq2AlleleMap = new ArrayList<>(200);
+ this.seq2AlleleMap.add(new IntList(4));
+ this.seq2NonMajorCnt = new IntList(200);
+ this.nonMajorSeq = new IntList(20);
+ for (int m=start; m<end; ++m) {
+ if (refHapPairs.storesNonMajorIndices(m)) {
+ lowMafUpdate(m);
+ }
+ else {
+ highMafUpdate(m);
+ }
+ }
+ }
+
+ public IntArray hap2Seq() {
+ return IntArray.create(hap2seq, 0, seq2AlleleMap.size()-1);
+ }
+
+ public int[] seq2Hap() {
+ int[] seqToHap = new int[seq2AlleleMap.size()];
+ Arrays.fill(seqToHap, -1);
+ for (int h=0; h<hap2seq.length; ++h) {
+ int seq = hap2seq[h];
+ if (seqToHap[seq] == -1) {
+ seqToHap[seq] = h;
+ }
+ }
+ return seqToHap;
+ }
+
+ private void lowMafUpdate(int marker) {
+ setSeqToAlleleMap(marker); // major allele not in any allele map
+ int nAlleles = refHapPairs.nAlleles(marker);
+ int majorAllele = refHapPairs.majorAllele(marker);
+ for (int al=0; al<nAlleles; ++al) {
+ if (al!=majorAllele) {
+ int nCopies = refHapPairs.alleleCount(marker, al);
+ for (int c=0; c<nCopies; ++c) {
+ int h = refHapPairs.hapIndex(marker, al, c);
+ int seq = hap2seq[h];
+ IntList list = seq2AlleleMap.get(seq);
+ int index = indexOfAllele(list, al);
+ if (index < list.size()) {
+ updateHap2Seq(h, list.get(index+1));
+ }
+ }
+ }
+ }
+ }
+
+ private void setSeqToAlleleMap(int marker) {
+ int nAlleles = refHapPairs.nAlleles(marker);
+ int majorAllele = refHapPairs.majorAllele(marker);
+
+ seq2NonMajorCnt.clear();
+ for (int j=0, n=seq2Cnt.size(); j<n; ++j) {
+ seq2AlleleMap.get(j).clear();
+ seq2NonMajorCnt.add(0);
+ }
+
+ for (int al=0; al<nAlleles; ++al) {
+ if (al!=majorAllele) {
+ updateNonMajorSeq(marker, al, seq2NonMajorCnt, nonMajorSeq);
+ updateAlleleToSeqList(al, seq2NonMajorCnt, nonMajorSeq);
+ for (int j=0; j<nonMajorSeq.size(); ++j) {
+ seq2NonMajorCnt.set(nonMajorSeq.get(j), 0);
+ }
+ nonMajorSeq.clear();
+ }
+ }
+ }
+
+ private void updateNonMajorSeq(int marker, int allele,
+ IntList nonMajorSeqCnts, IntList nonMajorSeq) {
+ int nCopies = refHapPairs.alleleCount(marker, allele);
+ for (int c=0; c<nCopies; ++c) {
+ int h = refHapPairs.hapIndex(marker, allele, c);
+ int seq = hap2seq[h];
+ if (nonMajorSeqCnts.getAndIncrement(seq) == 0) {
+ nonMajorSeq.add(seq);
+ }
+ }
+ }
+
+ private void updateAlleleToSeqList(int allele, IntList nonMajorSeqCnts,
+ IntList nonMajorSeq) {
+ for (int j=0; j<nonMajorSeq.size(); ++j) {
+ int seq = nonMajorSeq.get(j);
+ if (nonMajorSeqCnts.get(seq) < seq2Cnt.get(seq)) {
+ IntList list = seq2AlleleMap.get(seq);
+ mapAlleleToNewSeq(list, allele);
+ }
+ }
+ }
+
+ private void highMafUpdate(int marker) {
+ for (int j=0, n=seq2AlleleMap.size(); j<n; ++j) {
+ seq2AlleleMap.get(j).clear();
+ }
+ for (int h=0; h<hap2seq.length; ++h) {
+ int seq = hap2seq[h];
+ int allele = refHapPairs.allele(marker, h);
+ IntList list = seq2AlleleMap.get(seq);
+ if (list.isEmpty()) {
+ list.add(allele);
+ list.add(seq);
+ }
+ else {
+ int index = indexOfAllele(list, allele);
+ if (index==list.size()) {
+ mapAlleleToNewSeq(list, allele);
+ }
+ updateHap2Seq(h, list.get(index+1));
+ }
+ }
+ }
+
+ private int indexOfAllele(IntList list, int allele) {
+ int index=0;
+ while (index < list.size() && list.get(index)!=allele) {
+ index+=2;
+ }
+ return index;
+ }
+
+ private void mapAlleleToNewSeq(IntList list, int allele) {
+ list.add(allele);
+ list.add(seq2AlleleMap.size());
+ seq2AlleleMap.add(new IntList(4));
+ seq2Cnt.add(0);
+ }
+
+ private void updateHap2Seq(int h, int seq) {
+ seq2Cnt.decrementAndGet(hap2seq[h]);
+ hap2seq[h] = seq;
+ seq2Cnt.incrementAndGet(hap2seq[h]);
+ }
+ }
+}
diff --git a/sample/RefHapSegs.java b/sample/RefHapSegs.java
new file mode 100644
index 0000000..3c71c1e
--- /dev/null
+++ b/sample/RefHapSegs.java
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import blbutil.IntPair;
+import haplotype.SampleHapPairs;
+import java.util.function.Function;
+import java.util.stream.IntStream;
+
+/**
+ * <p>Class {@code RefHapSegs} represents reference haplotypes that span
+ * segments determined by non-overlapping clusters of markers.
+ * </p>
+ * <p>Instances of class {@code RefHapSegs} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RefHapSegs {
+
+ private final int[] clusterStart;
+ private final int[] clusterEnd;
+ private final SampleHapPairs refHapPairs;
+ private final RefHapSeg[] refHapSegs;
+
+ /**
+ * Constructs a new {@code RefHapSegs} instance from the specified data.
+ * @param refHapPairs the reference haplotype pairs
+ * @param clusterStart an array whose {@code j}-th element is the
+ * starting reference marker index (inclusive) for the {@code j}-th
+ * marker cluster
+ * @param clusterEnd an array whose {@code j}-th element is the
+ * ending reference marker index (exclusive) for the {@code j}-th
+ * marker cluster
+ * @param nThreads the number of threads to use during object construction
+ * @throws IllegalArgumentException if
+ * {@code clusterStart.length != clusterEnd.length}
+ * @throws IllegalArgumentException if
+ * {@code clusterStart.length > 0 && clusterStart[0] < 0}
+ * @throws IllegalArgumentException if
+ * {@code clusterEnd.length > 0 && clusterEnd[clusterEnd.length - 1] > nMarkers}
+ * @throws IllegalArgumentException if
+ * {@code clusterStart[j] >= clusterEnd[j]} for some {@code j} satisfying
+ * {@code 0 <= j && j < clusterStart.length}
+ * @throws IllegalArgumentException if
+ * {@code clusterStart[j] < clusterEnd[j-1]} for some {@code j} satisfying
+ * {@code 1 <= j && j < clusterStart.length}
+ * @throws IllegalArgumentException if {@code nThreads < 0}
+ * @throws NullPointerException if
+ * {@code refHapPairs == null || clusterStart == null || clusterEnd == null}
+ */
+ public RefHapSegs(SampleHapPairs refHapPairs, int[] clusterStart,
+ int[] clusterEnd, int nThreads) {
+ if (nThreads <=0) {
+ throw new IllegalArgumentException(String.valueOf(nThreads));
+ }
+ int nMarkers = refHapPairs.nMarkers();
+ checkClusters(clusterStart, clusterEnd, nMarkers);
+ this.clusterStart = clusterStart.clone();
+ this.clusterEnd = clusterEnd.clone();
+ this.refHapPairs = refHapPairs;
+ this.refHapSegs = IntStream.rangeClosed(0, this.clusterStart.length)
+ .parallel()
+ .mapToObj(j -> intPair(j, this.clusterStart, this.clusterEnd,
+ nMarkers))
+ .map(ip -> new RefHapSeg(refHapPairs, ip.first(), ip.second()))
+ .toArray(RefHapSeg[]::new);
+ }
+
+ private void checkClusters(int[] starts, int[] ends, int nMarkers) {
+ if (starts.length != ends.length) {
+ throw new IllegalArgumentException("inconsistent data");
+ }
+ if (starts.length > 0 && starts[0] < 0) {
+ throw new IllegalArgumentException("inconsistent data");
+ }
+ if (ends.length > 0 && ends[ends.length - 1] > nMarkers) {
+ throw new IllegalArgumentException("inconsistent data");
+ }
+ for (int j=0; j<starts.length; ++j) {
+ if (starts[j] >= ends[j]) {
+ throw new IllegalArgumentException("inconsistent data");
+ }
+ if (j>0 && ends[j-1] > starts[j]) {
+ throw new IllegalArgumentException("inconsistent data");
+ }
+ }
+ }
+
+ private static IntPair intPair(int index, int[] starts, int[] ends,
+ int nMarkers) {
+ int start = (index == 0) ? 0 : starts[index - 1];
+ int end = (index == ends.length) ? nMarkers : ends[index];
+ return new IntPair(start, end);
+ }
+
+ /**
+ * Returns the reference haplotype pairs.
+ * @return the reference haplotype pairs
+ */
+ public SampleHapPairs refHapPairs() {
+ return refHapPairs;
+ }
+
+ /**
+ * Return the number of distinct reference allele sequences in the
+ * specified chromosome segment.
+ * @param segment index of a chromosome segment determined by
+ * the marker clusters
+ * @return the number of distinct reference allele sequences in the
+ * specified chromosome segment
+ * @throws IndexOutOfBoundsException if
+ * {@code segment < 0 || segment > this.nClusters()}
+ */
+ public int nSeq(int segment) {
+ return refHapSegs[segment].nSeq();
+ }
+
+ /**
+ * Return the number of markers in the specified chromosome segment.
+ * @param segment index of a chromosome segment determined by
+ * the marker clusters
+ * @return the number of markers in the specified chromosome segment
+ * @throws IndexOutOfBoundsException if
+ * {@code segment < 0 || segment > this.nClusters()}
+ */
+ public int nMarkers(int segment) {
+ return refHapSegs[segment].end() - refHapSegs[segment].start();
+ }
+
+ /**
+ * Return the index of the allele sequence in the specified chromosome
+ * segment for the specified reference haplotype.
+ *
+ * @param segment index of a chromosome segment determined by
+ * the marker clusters
+ * @param hap a haplotype index
+ *
+ * @return the index of the allele sequence in the specified chromosome
+ * segment for the specified reference haplotype
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code segment < 0 || segment > this.nClusters()}
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= this.refHapPairs().nHaps()}
+ */
+ public int seq(int segment, int hap) {
+ return refHapSegs[segment].seq(hap);
+ }
+
+ /**
+ * Return the specified reference haplotype allele.
+ *
+ * @param segment index of a chromosome segment determined by
+ * the marker clusters
+ * @param marker index of a marker in the specified interval
+ * @param seq index of a reference allele sequence in the specified
+ * interval
+ * @return the specified reference haplotype allele
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code segment < 0 || segment > this.nClusters()}
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers(interval)}
+ * @throws IndexOutOfBoundsException if
+ * {@code seq < 0 || seg >= this.nSeq(segment)}
+ */
+ public int allele(int segment, int marker, int seq) {
+ return refHapSegs[segment].allele(marker, seq);
+ }
+
+ /**
+ * Returns the index of the first marker (inclusive) in the specified
+ * marker cluster.
+ * @param cluster an index of a marker cluster
+ * @return the index of the first marker (inclusive) in the specified
+ * marker cluster
+ * @throws IndexOutOfBoundsException if
+ * {@code cluster < 0 || cluster >= this.nClusters}
+ */
+ public int clusterStart(int cluster) {
+ return clusterStart[cluster];
+ }
+
+ /**
+ * Returns the index of the last marker (exclusive) in the specified
+ * marker cluster.
+ * @param cluster an index of a marker cluster
+ * @return the index of the last marker (exclusive) in the specified
+ * marker cluster
+ * @throws IndexOutOfBoundsException if
+ * {@code cluster < 0 || cluster >= this.nClusters()}
+ */
+ public int clusterEnd(int cluster) {
+ return clusterEnd[cluster];
+ }
+
+ /**
+ * Returns the number of marker clusters.
+ * @return the number of marker clusters
+ */
+ public int nClusters() {
+ return clusterStart.length;
+ }
+}
diff --git a/sample/RestrictedDag.java b/sample/RestrictedDag.java
new file mode 100644
index 0000000..410316c
--- /dev/null
+++ b/sample/RestrictedDag.java
@@ -0,0 +1,402 @@
+/*
+ * Copyright 2013 Brian L. Browning
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package sample;
+
+import beagleutil.CenteredIntIntervalTree;
+import beagleutil.IntIntervalTree;
+import blbutil.IndexSet;
+import dag.Dag;
+import dag.MergeableDag;
+import haplotype.HapPairs;
+import haplotype.SampleHapPairs;
+import ibd.HapSegment;
+import ibd.IbsHapSegments;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * <p>Class {@code RestrictedDag} is a wrapper for a {@code Dag}
+ * object that stores segments of identity by descent.
+ * </p>
+ * <p>Instances of class {@code RestrictedDag} are immutable.
+ * </p>
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RestrictedDag {
+
+ private static final int END_FILTER = 1;
+
+ private final SampleHapPairs haps;
+ private final Dag dag;
+ private final int[][] hapStates;
+ private final IbsHapSegments hapSegments;
+ private final double[] pos;
+ private final double ibdExtend;
+
+ /**
+ * Constructs a {@code RestrictedDag} instance.
+ * @param haps the sample haplotypes
+ * @param weights an array of length {@code hapPairs.nHaps()}
+ * whose {@code j}-th element is the weight for the {@code j}-th haplotype
+ * @param nInitLevels the number of initial levels to read
+ * @param scale a parameter that multiplicatively scales the node
+ * similarity threshold
+ * @param ibdLength the minimum length of an IBD segment
+ * @param ibdExtend the length by which an IBD segment will be extended
+ *
+ * @throws IllegalArgumentException if {@code hapPairs.nMarkers() == 0}
+ * @throws IllegalArgumentException if
+ * {@code weights.length != 2*haps.nSamples()}
+ * @throws IllegalArgumentException if
+ * {@code (weights[j] <= 0 || Float.isFinite(weights[j]) == false)}
+ * for any {@code j} satisfying {@code (0 <= j && j < weights.length)}
+ * @throws IllegalArgumentException if {@code nInitLevels < 1}
+ * @throws IllegalArgumentException if
+ * {@code Double.isFinite(scale) == false || scale <= 0}
+ * @throws IllegalArgumentException if
+ * {@code ibdLength < 0 || ibdExtend < 0}
+ * @throws NullPointerException if
+ * {@code hapPairs == null || weights == null}
+ */
+ public RestrictedDag(SampleHapPairs haps, float[] weights, int nInitLevels,
+ float scale, double ibdLength, double ibdExtend) {
+ if (ibdLength <= 0d) {
+ throw new IllegalArgumentException(String.valueOf(ibdLength));
+ }
+ if (ibdExtend <= 0d) {
+ throw new IllegalArgumentException(String.valueOf(ibdExtend));
+ }
+ this.haps = haps;
+ this.ibdExtend = ibdExtend;
+ this.dag = MergeableDag.dag(haps, weights, scale, nInitLevels);
+ this.pos = pos(dag);
+ this.hapStates = hapStates(dag, haps);
+ this.hapSegments = new IbsHapSegments(haps, pos, ibdLength);
+ }
+
+ private static double[] pos(Dag dag) {
+ double[] pos = dag.posArray();
+ double scaleFactor = 0.2;
+ for (int j=0; j<pos.length; ++j) {
+ pos[j] *= scaleFactor;
+ }
+ return pos;
+ }
+
+ /**
+ * Returns a int[][] array whose (j,k)-th element is the edge state
+ * at the j-th marker that is traversed by the k-th haplotype.
+ * @param dag the DAG constructed by the specified haplotypes
+ * @param haps the haplotype pairs used to construct the specified DAG
+ * @return a int[][] array whose (j,k)-th element is the edge state
+ * at the j-th marker that is traversed by the k-th haplotype
+ * @throws IllegalArgumentException if the specified haplotypes
+ * pairs are inconsistent with the specified DAG
+ * @throws NullPointerException if {@code dag == null || haps == null}
+ */
+ private static int[][] hapStates(Dag dag, HapPairs haps) {
+ if (dag.nLevels()!=haps.nMarkers()) {
+ throw new IllegalArgumentException("dag.nMarkers()!=haps.nMarkers()");
+ }
+ int nHaps = haps.nHaps();
+ int[][] states = new int[dag.nLevels()][nHaps];
+ for (int h=0; h<nHaps; ++h) {
+ int node = 0;
+ int symbol = haps.allele(0, h);
+ states[0][h] = dag.outEdgeBySymbol(0, node, symbol);
+ for (int j=1; j<states.length; ++j) {
+ node = dag.childNode(j-1, states[j-1][h]);
+ symbol = haps.allele(j, h);
+ states[j][h] = dag.outEdgeBySymbol(j, node, symbol);
+ assert states[j][h] != -1;
+ }
+ }
+ return states;
+ }
+
+ /**
+ * Returns the haplotypes used to construct {@code this}.
+ * @return the haplotypes used to construct {@code this}
+ */
+ public SampleHapPairs sampleHaps() {
+ return haps;
+ }
+
+ /**
+ * Returns the DAG.
+ * @return the DAG
+ */
+ public Dag dag() {
+ return dag;
+ }
+
+ /**
+ * Returns the permitted states for the specified sample.
+ * @param sample the sample index
+ * @return the permitted states for the specified sample
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= haps.nSamples()}
+ */
+ public DiploidStates singleStates(int sample) {
+ if (sample < 0 || sample >= haps.nSamples()) {
+ throw new IndexOutOfBoundsException("sample: " + sample);
+ }
+ int hap1 = 2*sample;
+ int hap2 = 2*sample + 1;
+ List<HapSegment> hapSegs1 = ibsSegs(hap1);
+ List<HapSegment> hapSegs2 = ibsSegs(hap2);
+
+ return new SinglePermittedStates(haps.nMarkers(), haps.nHaps(),
+ sample, hapSegs1, hapSegs2);
+ }
+
+ private List<HapSegment> ibsSegs(int hap) {
+ List<HapSegment> hapSegs = hapSegments.filteredFind(hap);
+ containmentFilter(hapSegs, END_FILTER);
+ return hapSegs;
+ }
+
+ /* filter if minimum requirements are not met */
+ private void containmentFilter(List<HapSegment> ibdSegments,
+ int minEndDiff) {
+ assert minEndDiff >= 0;
+ Collections.sort(ibdSegments, modStartComparator());
+ if (ibdSegments.isEmpty()==false) {
+ List<HapSegment> list = new LinkedList<>();
+ List<HapSegment> filtered = new ArrayList<>(ibdSegments.size()/5);
+ for (int k=0, m=ibdSegments.size(); k<m; ++k) {
+ HapSegment hs = ibdSegments.get(k);
+ boolean exclude = false;
+ Iterator<HapSegment> it = list.iterator();
+ while (it.hasNext() && exclude==false) {
+ HapSegment cover = it.next();
+ int cStart = cover.start();
+ int cEnd = cover.end();
+ if (cEnd <= hs.start() ) {
+ it.remove();
+ }
+ else {
+ if ( (hs.start() - cStart) >= minEndDiff
+ && (cEnd - hs.end()) >= minEndDiff) {
+ exclude = true;
+ }
+ }
+ }
+ if (exclude==false) {
+ list.add(hs);
+ filtered.add(hs);
+ }
+ }
+ ibdSegments.clear();
+ ibdSegments.addAll(filtered);
+ }
+ }
+
+
+ // for a given start field, the end field is sorted in in reverse order.
+ private static Comparator<HapSegment> modStartComparator() {
+ return (HapSegment hs1, HapSegment hs2) -> {
+ if (hs1.start() != hs2.start()) {
+ return (hs1.start() < hs2.start()) ? -1 : 1;
+ }
+ else if (hs1.end() != hs2.end()) {
+ return (hs1.end() > hs2.end()) ? -1 : 1;
+ }
+ if (hs1.hap() != hs2.hap()) {
+ return (hs1.hap() < hs2.hap()) ? -1 : 1;
+ }
+ return 0;
+ };
+ }
+
+ private int modifyStart(HapSegment targetHS,
+ IntIntervalTree<HapSegment> tree) {
+ int maxStart = extStartIndex(targetHS.start(), ibdExtend, pos);
+ int minEnd = targetHS.end();
+ List<HapSegment> list = new ArrayList<>(10);
+ tree.intersectAll(maxStart, minEnd, list);
+ return list.isEmpty() ? maxStart : targetHS.start();
+ }
+
+ private int modifyEnd(HapSegment targetHS,
+ IntIntervalTree<HapSegment> tree) {
+ int maxStart = targetHS.start();
+ int minEnd = extEndIndex(targetHS.end(), ibdExtend, pos);
+ List<HapSegment> list = new ArrayList<>(10);
+ tree.intersectAll(maxStart, minEnd, list);
+ return list.isEmpty() ? minEnd : targetHS.end();
+ }
+
+ private static int extStartIndex(int start, double extension, double[] pos) {
+ double target = pos[start] - extension;
+ int x = Arrays.binarySearch(pos, target);
+ return (x<0) ? -x-1 : x;
+ }
+
+ private static int extEndIndex(int end, double extension, double[] pos) {
+ double target = pos[end] + extension;
+ int x = Arrays.binarySearch(pos, target);
+ return (x<0) ? -x-2 : x; // end is inclusive
+ }
+
+ private class SinglePermittedStates implements DiploidStates {
+
+ private final int nMarkers;
+ private final IndexSet indices1;
+ private final IndexSet indices2;
+ private final IntIntervalTree<HapSegment> tree1;
+ private final IntIntervalTree<HapSegment> tree2;
+
+ private int marker = -1;
+ private int i1 = 0;
+ private int i2 = 0;
+ private int edge1 = -1;
+ private int edge2 = -1;
+ private boolean rev = false;
+
+ private SinglePermittedStates(int nMarkers, int nHaps, int sample,
+ List<HapSegment> list1, List<HapSegment> list2) {
+ int hap1 = 2*sample;
+ int hap2 = 2*sample + 1;
+ this.nMarkers = nMarkers;
+ this.indices1 = new IndexSet(nHaps);
+ this.indices2 = new IndexSet(nHaps);
+
+ List<HapSegment> extList1 = extendSegment(hap1, list1);
+ List<HapSegment> extList2 = extendSegment(hap2, list2);
+
+ tree1 = getTree(nMarkers, extList1);
+ tree2 = getTree(nMarkers, extList2);
+ }
+
+ private List<HapSegment> extendSegment(int hap, List<HapSegment> ibdSegs) {
+ List<HapSegment> extendedSegs = new ArrayList<>(ibdSegs.size());
+ IntIntervalTree<HapSegment> tree = getTree(haps.nMarkers(), ibdSegs);
+
+ // permit states traversed by hap
+ int lastMarker = haps.nMarkers()-1;
+ extendedSegs.add( new HapSegment(hap, 0, lastMarker) );
+
+ // permit states traversed by IBS haps
+ if (ibdSegs.isEmpty()==false) {
+ for (int k=0, n=ibdSegs.size(); k<n; ++k) {
+ HapSegment targetHS = ibdSegs.get(k);
+ int start = modifyStart(targetHS, tree);
+ int end = modifyEnd(targetHS, tree);
+ extendedSegs.add(new HapSegment(targetHS.hap(), start, end));
+ }
+ }
+ return extendedSegs;
+ }
+
+ private IntIntervalTree<HapSegment> getTree(int nMarkers,
+ Collection<HapSegment> c) {
+ IntIntervalTree<HapSegment> tree
+ = new CenteredIntIntervalTree<>(0, nMarkers-1);
+ c.stream().forEach((hs) -> {
+ tree.add(hs);
+ });
+ return tree;
+ }
+
+ private void convertToIndices(int marker,
+ IntIntervalTree<HapSegment> tree, IndexSet set) {
+ set.clear();
+ Collection<HapSegment> c = new ArrayList<>(30);
+ tree.intersect(marker, c);
+ c.stream().forEach((hs) -> {
+ set.add(hapStates[marker][hs.hap()]);
+ });
+ }
+
+ @Override
+ public int nMarkers() {
+ return nMarkers;
+ }
+
+ @Override
+ public int marker() {
+ return marker;
+ }
+
+ @Override
+ public void setMarker(int marker) {
+ this.marker = marker;
+ this.i1 = 0;
+ this.i2 = 0;
+ this.edge1 = -1;
+ this.edge2 = -1;
+ this.rev = false;
+ convertToIndices(marker, tree1, indices1);
+ convertToIndices(marker, tree2, indices2);
+ }
+
+ @Override
+ public boolean hasNext() {
+ return i1<indices1.size();
+ }
+
+ @Override
+ public void next() {
+ if (hasNext()==false) {
+ throw new NoSuchElementException();
+ }
+ if (rev) {
+ int tmp = edge1;
+ edge1 = edge2;
+ edge2 = tmp;
+ ++i2;
+ if (i2==indices2.size()) {
+ ++i1;
+ i2 = 0;
+ }
+ rev = false;
+ }
+ else {
+ edge1 = indices1.enumeratedValue(i1);
+ edge2 = indices2.enumeratedValue(i2);
+ if (indices1.contains(edge2)==false
+ || indices2.contains(edge1)==false) {
+ rev = true;
+ }
+ else {
+ ++i2;
+ if (i2==indices2.size()) {
+ ++i1;
+ i2 = 0;
+ }
+ }
+ }
+ }
+
+ @Override
+ public int edge1() {
+ return edge1;
+ }
+
+ @Override
+ public int edge2() {
+ return edge2;
+ }
+ }
+}
diff --git a/sample/SamplerData.java b/sample/SamplerData.java
new file mode 100644
index 0000000..615ec05
--- /dev/null
+++ b/sample/SamplerData.java
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * IBD is licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package sample;
+
+import dag.Dag;
+import haplotype.BasicSampleHapPairs;
+import haplotype.ConsensusPhaser;
+import haplotype.HapPair;
+import haplotype.RevSampleHapPairs;
+import haplotype.SampleHapPairs;
+import haplotype.Weights;
+import java.util.ArrayList;
+import java.util.List;
+import main.CurrentData;
+import main.Par;
+import main.RunStats;
+import vcf.FuzzyGL;
+import vcf.GL;
+import vcf.Markers;
+import vcf.RevGL;
+
+/**
+ * <p>Class {@code SamplerData} contains immutable input data for the
+ * current marker window.
+ * </p>
+ * <p>Instances of class {@code SamplerData} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class SamplerData {
+
+ private static final double MIN_CM_DIST = 1e-7;
+ private static final int nInitLevels = 500;
+
+ private final Par par;
+ private final boolean revMarkers;
+ private final RestrictedDag rdag;
+ private final GL gl;
+ private final float[] recombRate;
+
+ /**
+ * Constructs a new {@code SamplerData} instance from the specified data.
+ * The contract for this method is undefined if the specified
+ * {@code hapPairs} is inconsistent with the input data
+ * contained in the {@code cd} parameter.
+ *
+ * @param par the analysis parameters
+ * @param cd the input data for the current marker window
+ * @param hapPairs the target haplotype pairs used to build the haplotype
+ * frequency model
+ * @param revMarkers {@code true} if the order of markers should
+ * be reversed when building the haplotype frequency model, and
+ * {@code false} otherwise
+ * @param runStats the object to which run-time statistics will be written
+ *
+ * @throws IllegalArgumentException if {@code haps.isEmpty() == true}
+ * @throws NullPointerException if any parameter is {@code null}
+ */
+ public SamplerData(Par par, CurrentData cd, List<HapPair> hapPairs,
+ boolean revMarkers, RunStats runStats) {
+ if (hapPairs.isEmpty()) {
+ throw new IllegalArgumentException("hapPairs.isEmpty()");
+ }
+ this.par = par;
+ this.revMarkers = revMarkers;
+ this.rdag = restrictedDag(cd, hapPairs, revMarkers, runStats);
+ this.gl = gl(cd, par.err(), revMarkers);
+ this.recombRate = recombRate(par, cd, rdag.dag(), revMarkers);
+ }
+
+ private static float[] recombRate(Par par, CurrentData cd, Dag dag,
+ boolean revMarkers) {
+ float[] recombRate = cd.recombRate();
+ if (recombRate != null && revMarkers) {
+ for (int j=1, n=(recombRate.length + 1)/2; j<n; ++j) {
+ int k = recombRate.length - j;
+ float tmp = recombRate[j];
+ recombRate[j] = recombRate[k];
+ recombRate[k] = tmp;
+ }
+ recombRate[0] = 0;
+ }
+ else {
+ recombRate = dagRecombRate(dag, par.mapscale());
+ }
+ return recombRate;
+ }
+
+ private RestrictedDag restrictedDag(CurrentData cd, List<HapPair> hapPairs,
+ boolean revMarkers, RunStats runStats) {
+ hapPairs = new ArrayList<>(hapPairs); // xx defensive copy
+ long t0 = System.nanoTime();
+ Weights weights = cd.weights();
+ List<HapPair> haps = ConsensusPhaser.run(hapPairs);
+ cd.addRestrictedRefHapPairs(haps);
+
+ SampleHapPairs dagHaps = new BasicSampleHapPairs(cd.allSamples(), haps);
+ if (revMarkers) {
+ dagHaps = new RevSampleHapPairs(dagHaps);
+ }
+ float[] wts = weights.get(dagHaps);
+ RestrictedDag rdag = new RestrictedDag(dagHaps, wts, nInitLevels,
+ par.modelscale(), par.ibdlength(), par.ibdextend());
+ runStats.buildNanos(System.nanoTime() - t0);
+ runStats.setDagStats(rdag.dag());
+ return rdag;
+ }
+
+ private static float[] dagRecombRate(Dag dag, float xdist) {
+ double[] bglDist = dag.posArray();
+ for (int j=0; j<bglDist.length; ++j) {
+ bglDist[j] *= 0.2;
+ }
+ double c = -2.0*xdist;
+ float[] rr = new float[dag.nLevels()];
+ rr[0] = 0.0f;
+ double lastGenPos = bglDist[0];
+ for (int j=1; j<rr.length; ++j) {
+ double genPos = bglDist[j];
+ double genDist = Math.max(Math.abs(genPos - lastGenPos), MIN_CM_DIST);
+ rr[j] = (float) -Math.expm1(c*genDist);
+ lastGenPos = genPos;
+ }
+ return rr;
+ }
+
+ private static GL gl(CurrentData cd, float err, boolean markersAreReversed) {
+ GL gl = new FuzzyGL(cd.targetGL(), err);
+ if (markersAreReversed) {
+ gl = new RevGL(gl);
+ }
+ return gl;
+ }
+
+ /**
+ * Returns {@code true} if the order of markers is reversed, and
+ * {@code false} otherwise
+ * @return {@code true} if the order of markers is reversed, and
+ * {@code false} otherwise
+ */
+ public boolean markersAreReversed() {
+ return revMarkers;
+ }
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ public int nMarkers() {
+ return gl.nMarkers();
+ }
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ public int nSamples() {
+ return gl.nSamples();
+ }
+
+ /**
+ * Returns the number of haplotypes.
+ * @return the number of haplotypes
+ */
+ public int nHaps() {
+ return 2*gl.nSamples();
+ }
+
+ /**
+ * returns the list of markers.
+ * @return the list of markers
+ */
+ public Markers markers() {
+ return gl.markers();
+ }
+
+ /**
+ * Returns the analysis parameters.
+ * @return the analysis parameters
+ */
+ public Par par() {
+ return par;
+ }
+
+ /**
+ * Returns the DAG model.
+ * @return the DAG model
+ */
+ public RestrictedDag rdag() {
+ return rdag;
+ }
+
+ /**
+ * Returns the genotype likelihoods for the
+ * target samples at the target data markers.
+ * @return the genotype likelihoods for the
+ * target samples at the target data markers.
+ */
+ public GL gl() {
+ return gl;
+ }
+
+ /**
+ * Returns the allele error rate
+ * @return the allele error rate
+ */
+ public float err() {
+ return par.err();
+ }
+
+ /**
+ * Returns the probability of recombination between {@code (marker - 1)}
+ * and {@code marker}.
+ * @param marker a marker index
+ * @return the probability of recombination between {@code (marker - 1)}
+ * and {@code marker}
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ public float pRecomb(int marker) {
+ return recombRate[marker];
+ }
+}
+
diff --git a/sample/SingleBaum.java b/sample/SingleBaum.java
new file mode 100644
index 0000000..d15ba46
--- /dev/null
+++ b/sample/SingleBaum.java
@@ -0,0 +1,301 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import dag.Dag;
+import haplotype.HapPair;
+import haplotype.BitHapPair;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import vcf.GL;
+
+/**
+ * <p>Class {@code SingleBaum} implements the Baum forward and backward
+ * algorithms for a hidden Markov model (HMM) of an individual's genotype data.
+ * </p>
+ * Instances of class {@code SingleBaum} are not thread-safe.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class SingleBaum implements SingleBaumInterface {
+
+ private final Dag dag;
+ private final GL gl;
+ private final int nMarkers;
+ private final int nSamplesPerIndividual;
+ private final long seed;
+ private final Random random;
+
+ private final int[] node1;
+ private final int[] node2;
+ private final double[] nodeValue;
+
+ private final int[][] alleles1;
+ private final int[][] alleles2;
+
+ private final SingleBaumLevel[] levels;
+ private final SingleNodes fwdNodes;
+ private final SingleNodes bwdNodes;
+
+ private int windowIndex = -9999;
+ private int arrayIndex = -9999;
+
+ /**
+ * Creates a new {@code SingleBaum} instance from the specified data.
+ *
+ * @param dag the directed acyclic graph that determines the
+ * transition probabilities
+ * @param gl the emission probabilities
+ * @param seed the random seed
+ * @param nSamplesPerIndividual the number of haplotype pairs that
+ * will be sampled for each individual
+ * @param lowMem {@code true} if a low memory algorithm should be used, and
+ * {@code false} otherwise
+ *
+ * @throws IllegalArgumentException if
+ * {@code dag.markers().equals(gl.markers()) == false}
+ * @throws IllegalArgumentException if {@code nSamplesPerIndividual < 1}
+ * @throws NullPointerException if {@code dag == null || gl == null}
+ */
+ public SingleBaum(Dag dag, GL gl, long seed, int nSamplesPerIndividual,
+ boolean lowMem) {
+ if (dag.markers().equals(gl.markers())==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ if (nSamplesPerIndividual < 1) {
+ throw new IllegalArgumentException(
+ String.valueOf(nSamplesPerIndividual));
+ }
+ this.dag = dag;
+ this.gl = gl;
+ this.nMarkers = dag.nLevels();
+ this.nSamplesPerIndividual = nSamplesPerIndividual;
+ this.seed = seed;
+ this.random = new Random(seed);
+
+ this.node1 = new int[nSamplesPerIndividual];
+ this.node2 = new int[nSamplesPerIndividual];
+ this.nodeValue = new double[nSamplesPerIndividual];
+ this.alleles1 = new int[nSamplesPerIndividual][gl.nMarkers()];
+ this.alleles2 = new int[nSamplesPerIndividual][gl.nMarkers()];
+
+ int size = dag.nLevels();
+ if (lowMem) {
+ size = (int) Math.ceil(Math.sqrt(1 + 8*dag.nLevels())/2.0) + 1;
+ }
+ this.levels = new SingleBaumLevel[size];
+ for (int j=0; j<levels.length; ++j) {
+ levels[j] = new SingleBaumLevel(dag, gl);
+ }
+ this.fwdNodes = new SingleNodes();
+ this.bwdNodes = new SingleNodes();
+ }
+
+ @Override
+ public Dag dag() {
+ return dag;
+ }
+
+ @Override
+ public GL gl() {
+ return gl;
+ }
+
+ @Override
+ public int nSamplesPerIndividual() {
+ return nSamplesPerIndividual;
+ }
+
+ @Override
+ public long seed() {
+ return seed;
+ }
+
+ @Override
+ public List<HapPair> randomSample(int sample) {
+ forwardAlgorithm(sample);
+ initSampleAlleles(currentLevel(), sample);
+ for (int j=nMarkers-2; j>=0; --j) {
+ SingleBaumLevel level = previousLevel(sample);
+ sampleAlleles(level, sample);
+ }
+ return hapList(sample);
+ }
+
+ @Override
+ public List<HapPair> randomSample(int sample, double[] gtProbs) {
+ checkGtProbs(gtProbs);
+ forwardAlgorithm(sample);
+ initSampleAlleles(currentLevel(), sample);
+ setInitialBackwardNodesValues();
+ setGtProbs(currentLevel(), gtProbs);
+ for (int j=nMarkers-2; j>=0; --j) {
+ SingleBaumLevel level = previousLevel(sample);
+ sampleAlleles(level, sample);
+ level.setBackwardValues(bwdNodes);
+ setGtProbs(level, gtProbs);
+ }
+ return hapList(sample);
+ }
+
+ private void checkGtProbs(double[] gtProbs) {
+ if (gtProbs.length != gl.markers().sumGenotypes()) {
+ String s = "gtProbs.length!=gl.markers().sumGenotypes()";
+ throw new IllegalArgumentException(s);
+ }
+ }
+
+ private void setGtProbs(SingleBaumLevel level, double[] gtProbs) {
+ int m = level.marker();
+ int nGenotypes = gl.marker(m).nGenotypes();
+ int base = gl.markers().sumGenotypes(m);
+ for (int j=0; j<nGenotypes; ++j) {
+ gtProbs[base + j] = level.gtProbs(j);
+ }
+ }
+
+ private List<HapPair> hapList(int sample) {
+ List<HapPair> hapList = new ArrayList<>(2*nSamplesPerIndividual);
+ for (int copy=0; copy<nSamplesPerIndividual; ++copy) {
+ HapPair haps = new BitHapPair(gl.markers(), gl.samples(), sample,
+ alleles1[copy], alleles2[copy]);
+ hapList.add(haps);
+ }
+ return hapList;
+ }
+
+ private void initSampleAlleles(SingleBaumLevel level, int sample) {
+ int m = level.marker();
+ for (int copy=0; copy<nSamplesPerIndividual; ++copy) {
+ int state = initialRandomState(level);
+ node1[copy] = level.parentNode1(state);
+ node2[copy] = level.parentNode2(state);
+ nodeValue[copy] = parentSum(level, sample, state);
+ alleles1[copy][m] = level.symbol1(state);
+ alleles2[copy][m] = level.symbol2(state);
+ }
+ }
+
+ private int initialRandomState(SingleBaumLevel level) {
+ double d = random.nextDouble();
+ double sum = 0.0;
+ for (int j=0, n=level.size(); j<n; ++j) {
+ sum += level.forwardValue(j);
+ if (d <= sum) {
+ return j;
+ }
+ }
+ return level.size()-1; // if reached due to rounding
+ }
+
+ private double parentSum(SingleBaumLevel level, int sample, int state) {
+ int marker = level.marker();
+ double fwdValue = level.forwardValuesSum()*level.forwardValue(state);
+ int edge1 = level.edge1(state);
+ int edge2 = level.edge2(state);
+ double tp1 = dag.condEdgeProb(marker, edge1);
+ double tp2 = dag.condEdgeProb(marker, edge2);
+ int symbol1 = dag.symbol(marker, edge1);
+ int symbol2 = dag.symbol(marker, edge2);
+ double ep = gl.gl(marker, sample, symbol1, symbol2);
+ return fwdValue / ( ep*tp1*tp2 );
+ }
+
+ private void sampleAlleles(SingleBaumLevel level, int sample) {
+ int m = level.marker();
+ for (int copy=0; copy<nSamplesPerIndividual; ++copy) {
+ int state = randomPreviousState(level, node1[copy], node2[copy],
+ nodeValue[copy]);
+ node1[copy] = level.parentNode1(state);
+ node2[copy] = level.parentNode2(state);
+ nodeValue[copy] = parentSum(level, sample, state);
+ alleles1[copy][m] = level.symbol1(state);
+ alleles2[copy][m] = level.symbol2(state);
+ }
+ }
+
+ private int randomPreviousState(SingleBaumLevel level, int node1,
+ int node2, double nodeValue) {
+ double d = random.nextDouble() * nodeValue;
+ double sum = 0.0;
+ for (int j=0, n=level.size(); j<n; ++j) {
+ if ( node1==level.childNode1(j)
+ && node2==level.childNode2(j) ) {
+ sum += level.forwardValue(j);
+ if (d <= sum) {
+ return j;
+ }
+ }
+ }
+ return level.size()-1; // error in finite bit arithmetic encountered
+ }
+
+ private SingleBaumLevel nextLevel() {
+ ++arrayIndex;
+ if (arrayIndex == levels.length) {
+ ++windowIndex;
+ arrayIndex = windowIndex;
+ }
+ return levels[arrayIndex];
+ }
+
+ private SingleBaumLevel currentLevel() {
+ return levels[arrayIndex];
+ }
+
+ private SingleBaumLevel previousLevel(int sample) {
+ if (arrayIndex == windowIndex) {
+ --windowIndex;
+ arrayIndex = windowIndex;
+ levels[arrayIndex].setChildNodes(fwdNodes);
+ int startLevel = levels[windowIndex].marker() + 1;
+ int endLevel = startLevel + (levels.length - (windowIndex + 1) );
+ for (int marker=startLevel; marker<endLevel; ++marker) {
+ nextLevel().setForwardValues(fwdNodes, marker, sample);
+ }
+ return currentLevel();
+ }
+ else {
+ return levels[--arrayIndex];
+ }
+ }
+
+ private void forwardAlgorithm(int sample) {
+ fwdNodes.clear();
+ fwdNodes.sumUpdate(0, 0, 1f);
+ this.windowIndex = -1;
+ this.arrayIndex = levels.length - 1;
+ for (int marker=0; marker<nMarkers; ++marker) {
+ nextLevel().setForwardValues(fwdNodes, marker, sample);
+ }
+ }
+
+ private void setInitialBackwardNodesValues() {
+ SingleBaumLevel level = currentLevel();
+ int marker = level.marker();
+ bwdNodes.clear();
+ for (int j=0, n=level.size(); j<n; ++j) {
+ int cn1 = dag.childNode(marker, level.edge1(j));
+ int cn2 = dag.childNode(marker, level.edge2(j));
+ bwdNodes.sumUpdate(cn1, cn2, 1f);
+ }
+ level.setBackwardValues(bwdNodes);
+ }
+}
diff --git a/sample/SingleBaumInterface.java b/sample/SingleBaumInterface.java
new file mode 100644
index 0000000..132d05a
--- /dev/null
+++ b/sample/SingleBaumInterface.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import dag.Dag;
+import haplotype.HapPair;
+import java.util.List;
+import vcf.GL;
+
+/**
+ * <p>Interface {@code SingleBaumInterface} has methods for sampling
+ * haplotype pairs.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface SingleBaumInterface {
+
+ /**
+ * Returns the directed acyclic graph that determines the transition
+ * probabilities.
+ * @return the directed acyclic graph that determines the transition
+ * probabilities
+ */
+ Dag dag();
+
+ /**
+ * Returns the emission probabilities.
+ * @return the emission probabilities
+ */
+ GL gl();
+
+ /**
+ * Returns the number of haplotype pairs that are sampled for each
+ * individual.
+ * @return the number of haplotype pairs that are sampled for each
+ * individual
+ */
+ int nSamplesPerIndividual();
+
+ /**
+ * Returns the initial random seed.
+ * @return the initial random seed
+ */
+ long seed();
+
+ /**
+ * <p>Returns a list of {@code this.nSamplesPerIndividual()} sampled
+ * haplotype pairs for the specified individual. Haplotype pairs are
+ * sampled conditional on the HMM with transition probabilities
+ * determined by {@code this.dag()} and emission probabilities
+ * determined by {@code this.gl()}.
+ * </p>
+ * <p>The contract for this method is unspecified if no haplotype pair
+ * is consistent with the HMM.
+ * </p>
+ * @param sample a sample index
+ * @return a list of {@code this.nSamplesPerIndividual()} sampled
+ * haplotype pairs for the specified individual
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.gl().nSamples()}
+ */
+ List<HapPair> randomSample(int sample);
+
+ /**
+ * <p>Returns a list of {@code this.nSamplesPerIndividual()} sampled
+ * haplotype pairs for the specified individual. Haplotype pairs are
+ * sampled conditional on the HMM with transition probabilities determined
+ * by {@code this.dag()} and emission probabilities determined by
+ * {@code this.gl()}. Posterior genotype probabilities are written to
+ * the specified array. The posterior probability of the {@code j}-th
+ * genotype for the {@code k}-th marker is stored at index
+ * {@code gl.markers().sumGenotypes(k) + j} in the {@code gtProbs} array.
+ * </p>
+ * <p>The contract for this method is unspecified if no haplotype pair
+ * is consistent with the HMM.
+ * </p>
+ * @param sample the sample index
+ * @param gtProbs a array to which posterior genotype probabilities
+ * for the sample will be written
+ * @return a list of {@code this.nSamplesPerIndividual()} sampled
+ * haplotype pairs for the specified individual
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.gl().nSamples()}
+ * @throws IllegalArgumentException if
+ * {@code gtProbs.length != this.gl().markers().sumGenotypes()}
+ * @throws NullPointerException if {@code gtProbs == null}
+ */
+ List<HapPair> randomSample(int sample, double[] gtProbs);
+}
diff --git a/sample/SingleBaumLevel.java b/sample/SingleBaumLevel.java
new file mode 100644
index 0000000..ee75939
--- /dev/null
+++ b/sample/SingleBaumLevel.java
@@ -0,0 +1,530 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+import dag.Dag;
+import java.util.Arrays;
+import vcf.BasicGL;
+import vcf.GL;
+
+/**
+ * <p>Class {@code SingleBaumLevel} computes forward and backward Baum
+ * values at a level of a hidden Markov model (HMM) whose states are
+ * ordered edge pairs of a leveled directed acyclic graph (DAG).
+ * </p>
+ * <p>Instances of class {@code SingleBaumLevel} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class SingleBaumLevel {
+
+ private static final int INITIAL_CAPACITY = 400;
+ private static final float MIN_VALUE = 100*Float.MIN_VALUE;
+ private final Dag dag;
+ private final GL gl;
+
+ private int marker = -1;
+ private int sample = -1;
+ private int size = 0;
+
+ private int capacity = INITIAL_CAPACITY;
+ private int[] edges1 = new int[INITIAL_CAPACITY];
+ private int[] edges2 = new int[INITIAL_CAPACITY];
+ private float[] fwdValues = new float[INITIAL_CAPACITY];
+ private float[] bwdValues = new float[INITIAL_CAPACITY];
+ private float fwdValueSum = 0f;
+ private float bwdValueSum = 0f;
+
+ private int nGenotypes = 0;
+ private float[] gtProbs = new float[3];
+
+ /**
+ * Constructs a new {@code SingleBaumLevel} instance from the specified
+ * data.
+ * @param dag the directed acyclic graph that the determines transition
+ * probabilities
+ * @param gl the emission probabilities
+ * @throws IllegalArgumentException if
+ * {@code dag.markers().equals(gl.markers()) == false}
+ * @throws NullPointerException if {@code dag == null || gl == null}
+ */
+ public SingleBaumLevel(Dag dag, GL gl) {
+ if (dag.markers().equals(gl.markers())==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ this.dag = dag;
+ this.gl = gl;
+ }
+
+ /**
+ * Sets the Baum forward algorithm values for this level of the HMM
+ * and records the child node pair values in the specified
+ * {@code nodes} parameter. When the method call returns, the {@code nodes}
+ * parameter will be reset to the child node pair values for this level of
+ * the HMM.
+ *
+ * @param nodes child node pair values at the previous level of the HMM
+ * @param marker the level of the HMM at which the Baum forward algorithm
+ * values will be computed
+ * @param sample a sample index
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.dag().nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.gl().nSamples()}
+ * @throws IndexOutOfBoundsException if either node in any node pair with
+ * non-zero value is not a valid parent node at the specified level of the
+ * HMM
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setForwardValues(SingleNodes nodes, int marker, int sample) {
+ this.marker = marker;
+ this.sample = sample;
+ this.nGenotypes = gl.marker(marker).nGenotypes();
+ this.size = 0;
+ this.fwdValueSum = 0f;
+ this.bwdValueSum = 0f;
+ initializeGtProbs(); // initialized here due to gtProbs() contract
+ setStates(nodes);
+ setChildNodes(nodes);
+ }
+
+ private void initializeGtProbs() {
+ if (gtProbs.length < nGenotypes) {
+ int newLength = Math.max(nGenotypes, (3*gtProbs.length/2 + 1));
+ gtProbs = new float[newLength];
+ }
+ else {
+ Arrays.fill(gtProbs, 0, nGenotypes, 0f);
+
+ }
+ }
+
+ private void setStates(SingleNodes nodes) {
+ float valueSum = 0f;
+ for (int j=0, n=nodes.size(); j<n; ++j) {
+ int node1 = nodes.enumNode1(j);
+ int node2 = nodes.enumNode2(j);
+ for (int i1=0, nI1=dag.nOutEdges(marker, node1); i1<nI1; ++i1) {
+ int edge1 = dag.outEdge(marker, node1, i1);
+ int symbol1 = dag.symbol(marker, edge1);
+ for (int i2=0, nI2=dag.nOutEdges(marker, node2); i2<nI2; ++i2) {
+ int edge2 = dag.outEdge(marker, node2, i2);
+ int symbol2 = dag.symbol(marker, edge2);
+ float ep = gl.gl(marker, sample, symbol1, symbol2);
+ if (ep > 0.0) {
+ if (size == capacity) {
+ ensureCapacity(size+1);
+ }
+ edges1[size] = edge1;
+ edges2[size] = edge2;
+ float tp1 = dag.condEdgeProb(marker, edge1);
+ float tp2 = dag.condEdgeProb(marker, edge2);
+ float fwdValue = ep * nodes.enumValue(j) * (tp1 * tp2);
+ if (fwdValue<MIN_VALUE && nodes.enumValue(j) > 0.0) {
+ fwdValue = MIN_VALUE;
+ }
+ fwdValues[size++] = fwdValue;
+ valueSum += fwdValue;
+ }
+ }
+ }
+ }
+ assert valueSum>0.0 ^ size==0;
+ for (int k=0; k<size; ++k) {
+ this.fwdValues[k] /= valueSum;
+ }
+ fwdValueSum = valueSum;
+ }
+
+ /**
+ * Stores the Baum forward algorithm child node pair values for this
+ * level of the HMM in the specified {@code SingleNodes} object.
+ *
+ * @param nodes the node pair values that will be set
+ *
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setChildNodes(SingleNodes nodes) {
+ nodes.clear();
+ for (int k=0; k<size; ++k) {
+ int node1 = dag.childNode(marker, edges1[k]);
+ int node2 = dag.childNode(marker, edges2[k]);
+ nodes.sumUpdate(node1, node2, fwdValues[k]);
+ }
+ }
+
+ /**
+ * Sets the Baum backward algorithm values for this level of the HMM
+ * and stores the parent node pair values in the specified
+ * {@code nodes} parameter. When the method call returns, the
+ * {@code nodes} parameter will be reset to the parent
+ * node pair values for this level of the HMM.
+ *
+ * @param nodes parent node pair values at the next level of HMM
+ *
+ * @throws IndexOutOfBoundsException if either node in any node pair with
+ * non-zero value is not a valid child node at this level of the HMM
+ * @throws NullPointerException if {@code nodes == null}
+ */
+ public void setBackwardValues(SingleNodes nodes) {
+ for (int j=0; j<size; ++j) {
+ int node1 = dag.childNode(marker, edges1[j]);
+ int node2 = dag.childNode(marker, edges2[j]);
+ float backwardValue = nodes.value(node1, node2);
+ bwdValues[j] = backwardValue;
+ bwdValueSum += backwardValue;
+ }
+ nodes.clear();
+ float gtProbsSum = 0f;
+ for (int j=0; j<size; ++j) {
+ bwdValues[j] /= bwdValueSum;
+ int edge1 = edges1[j];
+ int edge2 = edges2[j];
+ int symbol1 = symbol1(j);
+ int symbol2 = symbol2(j);
+ float tp1 = dag.condEdgeProb(marker, edge1);
+ float tp2 = dag.condEdgeProb(marker, edge2);
+
+ float stateProb = fwdValues[j] * bwdValues[j];
+ int gtIndex = BasicGL.genotype(symbol1, symbol2);
+ // gtProbs assumed to be initialized in setForwardValues() method
+ gtProbs[gtIndex] += stateProb;
+ gtProbsSum += stateProb;
+
+ float ep = gl.gl(marker, sample, symbol1, symbol2);
+ float bwdValue = bwdValues[j] * (tp1 * tp2) * ep;
+ if (bwdValue < MIN_VALUE && bwdValues[j]>0.0) {
+ bwdValue = MIN_VALUE;
+ }
+ int pn1 = dag.parentNode(marker, edge1);
+ int pn2 = dag.parentNode(marker, edge2);
+ nodes.sumUpdate(pn1, pn2, bwdValue);
+ }
+ for (int j=0; j<nGenotypes; ++j) {
+ gtProbs[j] /= gtProbsSum;
+ }
+ }
+
+ /**
+ * Returns the directed acyclic graph that determines the transition
+ * probabilities.
+ * @return the directed acyclic graph that determines the transition
+ * probabilities
+ */
+ public Dag dag() {
+ return dag;
+ }
+
+ /**
+ * Returns the emission probabilities.
+ * @return the emission probabilities
+ */
+ public GL gl() {
+ return gl;
+ }
+
+ /**
+ * Return the level of the HMM.
+ * @return the level of the HMM
+ */
+ public int marker() {
+ return marker;
+ }
+
+ /**
+ * Return the number of possible genotypes at this level of the HMM.
+ * @return the number of possible genotypes at this level of the HMM
+ */
+ public int nGenotypes() {
+ return nGenotypes;
+ }
+
+ /**
+ * Returns the specified posterior genotype probability. Returns 0
+ * if the Baum backward probabilities have not been set.
+ * @param gt a genotype index
+ * @return the specified posterior genotype probability
+ * @throws IndexOutOfBoundsException if
+ * {@code gt < 0 || gt >= this.nGenotypes()}
+ */
+ public float gtProbs(int gt) {
+ if (gt >= nGenotypes) {
+ throw new IllegalArgumentException(String.valueOf(gt));
+ }
+ return gtProbs[gt];
+ }
+
+ /**
+ * Return the number of states with nonzero forward probability at
+ * this level of the HMM.
+ *
+ * @return the number of states with nonzero forward probability at
+ * this level of the HMM
+ */
+ public int size() {
+ return size;
+ }
+
+ private void checkIndex(int state) {
+ if (state >= size) {
+ throw new IndexOutOfBoundsException(String.valueOf(size));
+ }
+ }
+
+ /**
+ * Returns the first edge of the specified HMM state with nonzero forward
+ * probability.
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the first edge of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int edge1(int state) {
+ checkIndex(state);
+ return edges1[state];
+ }
+
+ /**
+ * Returns the second edge of the specified HMM state with nonzero forward
+ * probability.
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the second edge of the specified HMM state with nonzero forward
+ * probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int edge2(int state) {
+ checkIndex(state);
+ return edges2[state];
+ }
+
+ /**
+ * Returns the parent node of the first edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the parent node of the first edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int parentNode1(int state) {
+ checkIndex(state);
+ return dag.parentNode(marker, edges1[state]);
+ }
+
+ /**
+ * Returns the parent node of the second edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the parent node of the second edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int parentNode2(int state) {
+ checkIndex(state);
+ return dag.parentNode(marker, edges2[state]);
+ }
+
+ /**
+ * Returns the child node of the first edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the child node of the first edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int childNode1(int state) {
+ checkIndex(state);
+ return dag.childNode(marker, edges1[state]);
+ }
+
+ /**
+ * Returns the child node of the second edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the child node of the second edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int childNode2(int state) {
+ checkIndex(state);
+ return dag.childNode(marker, edges2[state]);
+ }
+
+ /**
+ * Returns the symbol for the first edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the symbol for the first edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int symbol1(int state) {
+ return dag.symbol(marker, edge1(state));
+ }
+
+ /**
+ * Returns the symbol for the second edge of the specified HMM state
+ * with nonzero forward probability.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ * @return the symbol for the second edge of the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public int symbol2(int state) {
+ return dag.symbol(marker, edge2(state));
+ }
+
+ /**
+ * Returns the normalized forward value for the specified HMM state
+ * with nonzero forward probability.
+ * The normalized forward value is obtained by dividing the
+ * forward value by the sum of the forward values at this level
+ * of the HMM.
+ *
+ * @param state an index of a HMM state with nonzero forward probability
+ *
+ * @return the normalized forward value for the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public float forwardValue(int state) {
+ checkIndex(state);
+ return fwdValues[state];
+ }
+
+ /**
+ * Returns the normalized backward value for the specified HMM state
+ * with nonzero forward probability.
+ * The normalized backward value is obtained by dividing the
+ * backward value by the sum of the backward values at this level
+ * of the HMM.
+ *
+ * @param state an index of a state with nonzero forward probability
+ *
+ * @return the normalized backward value for the specified HMM state
+ * with nonzero forward probability
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code state < 0 || state >= this.size()}
+ */
+ public float backwardValue(int state) {
+ checkIndex(state);
+ return bwdValues[state];
+ }
+
+ /**
+ * Returns the sum of the forward values at this level of the HMM
+ * when the forward values are computed using forward values
+ * from the previous level that are normalized to sum to 1.
+ * @return the sum of the forward values at this level of the HMM
+ */
+ public float forwardValuesSum() {
+ return fwdValueSum;
+ }
+
+ /**
+ * Returns the sum of the backward values at this level of the HMM
+ * when the backward values are computed using backward
+ * values from the next level that are normalized to sum to 1.
+ * @return the sum of the backward values at this level of the HMM
+ */
+ public float backwardValuesSum() {
+ return bwdValueSum;
+ }
+
+ /**
+ * Returns a string description of {@code this}. The exact details
+ * of the description are unspecified and subject to change.
+ *
+ * @return a string description of {@code this}
+ */
+ @Override
+ public String toString() {
+ String space = " ";
+ String sep = " | ";
+ StringBuilder sb = new StringBuilder(100);
+ sb.append("level=");
+ sb.append(marker);
+ sb.append(" size=");
+ sb.append(size);
+ sb.append(" forwardValuesSum=");
+ sb.append(fwdValueSum);
+ sb.append(" backwardSum=");
+ sb.append(bwdValueSum);
+ for (int j=0; j<size; ++j) {
+ sb.append(sep);
+ sb.append("j=");
+ sb.append(j);
+ sb.append(": ");
+ sb.append( (int) edge1(j));
+ sb.append(space);
+ sb.append( (int) edge2(j));
+ sb.append(space);
+ sb.append(forwardValue(j));
+ sb.append(space);
+ sb.append(backwardValue(j));
+ }
+ sb.append(sep);
+ return sb.toString();
+ }
+
+ /*
+ * Increases the state capacity of array fields as necessary
+ * to be greater than or equal to the specified minimum capacity.
+ *
+ * @param minCapacity the desired minimum state capacity
+ */
+ private void ensureCapacity(int minCapacity) {
+ if (minCapacity >capacity) {
+ capacity = (capacity * 3)/2 + 1;
+ if (capacity < minCapacity) {
+ capacity = minCapacity;
+ }
+ edges1 = Arrays.copyOf(edges1, capacity);
+ edges2 = Arrays.copyOf(edges2, capacity);
+ fwdValues = Arrays.copyOf(fwdValues, capacity);
+ bwdValues = Arrays.copyOf(bwdValues, capacity);
+ }
+ }
+}
diff --git a/sample/SingleNodes.java b/sample/SingleNodes.java
new file mode 100644
index 0000000..0801493
--- /dev/null
+++ b/sample/SingleNodes.java
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package sample;
+
+/**
+ * <p>Class {@code SingleNodes} stores ordered node pairs and associated values.
+ * </p>
+ * <p>Instances of class {@code SingleNodes} are not thread safe.</p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class SingleNodes {
+
+ private static final float loadFactor = 0.75f;
+
+ private int size;
+ private int capacity; // required to be a power of 2
+ private int rehashThreshold;
+
+ private int[] index;
+ private int[] node1;
+ private int[] node2;
+ private float[] value;
+
+ /**
+ * Creates a new instance of {@code SingleNodes} that has an
+ * initial value of 0 for each ordered node pair. The first node
+ * has index 0.
+ */
+ public SingleNodes() {
+ this.size = 0;
+ this.capacity = (1<<10);
+ this.rehashThreshold = (int) (loadFactor * capacity);
+ this.index = new int[capacity];
+ this.node1 = new int[capacity];
+ this.node2 = new int[capacity];
+ this.value = new float[capacity];
+ }
+
+ private static long hash1(int node1, int node2) {
+ long hash = 5;
+ hash = 71 * hash + node1;
+ hash = 71 * hash + node2;
+ return hash;
+ }
+
+ private static long hash2(int node1, int node2) {
+ long hash = 7;
+ hash = 97 * hash + node1;
+ hash = 97 * hash + node2;
+ return hash;
+ }
+
+ /*
+ * Return the storage index for specified node pair. If the key is not
+ * currently stored in the hash table, the index at which the value
+ * should be stored is returned.
+ */
+ private int index(int node1, int node2) {
+ long h1 = hash1(node1, node2);
+ long h2 = hash2(node1, node2);
+ if ((h2 & 1)==0) {
+ // h2 must be relatively prime to maxSize, which is a power of 2
+ ++h2;
+ }
+ long l = h1;
+ for (int k=0; k<capacity; ++k) {
+ int i = (int) (l % capacity);
+ if (value[i]==0.0
+ || (this.node1[i]==node1 && this.node2[i]==node2)) {
+ return i;
+ }
+ l += h2;
+ }
+ assert false;
+ return -1;
+ }
+
+ /*
+ * Increases the capacity of the internal hash table.
+ */
+ private void rehash() {
+ assert this.size>=this.rehashThreshold;
+ int newMaxSize = 2*capacity;
+ if (newMaxSize<0) {
+ throw new IllegalStateException("hash table overflow");
+ }
+ int[] oldIndex = index;
+ int[] oldNode1 = node1;
+ int[] oldNode2 = node2;
+ float[] oldValue = value;
+
+ capacity = newMaxSize;
+ index = new int[newMaxSize];
+ node1 = new int[newMaxSize];
+ node2 = new int[newMaxSize];
+ value = new float[newMaxSize];
+
+ for (int j=0; j<size; ++j) {
+ int oldInd = oldIndex[j];
+ int newIndex = index(oldNode1[oldInd], oldNode2[oldInd]);
+ index[j] = newIndex;
+ node1[newIndex] = oldNode1[oldInd];
+ node2[newIndex] = oldNode2[oldInd];
+ value[newIndex] = oldValue[oldInd];
+ }
+ rehashThreshold = (int) (loadFactor * capacity);
+ }
+
+ /**
+ * Adds the specified positive value to the stored value of the specified
+ * node pair.
+ *
+ * @param node1 the first node
+ * @param node2 the second node
+ * @param value the value
+ *
+ * @throws IllegalArgumentException if {@code node1 < 0 || node2 < 0}
+ * @throws IllegalArgumentException if
+ * {@code value <= 0 || (Double.isFinite(value) == false)}
+ */
+ public void sumUpdate(int node1, int node2, float value) {
+ if (node1 < 0) {
+ throw new IllegalArgumentException(String.valueOf(node1));
+ }
+ if (node2 < 0) {
+ throw new IllegalArgumentException(String.valueOf(node2));
+ }
+ if (value <= 0 || (Double.isFinite(value)==false) ) {
+ throw new IllegalArgumentException(String.valueOf(value));
+ }
+ int i = index(node1, node2);
+ boolean addNode = (this.value[i]==0f);
+ this.value[i] += value;
+ if (addNode) {
+ this.index[size++] = i;
+ this.node1[i] = node1;
+ this.node2[i] = node2;
+ if (this.size>=this.rehashThreshold) {
+ rehash();
+ }
+ }
+ }
+
+ /**
+ * Returns the number of node pairs with non-zero value.
+ * @return the number of node pairs with non-zero value
+ */
+ public int size() {
+ return size;
+ }
+
+ private void checkSize(int index) {
+ if (index>=size()) {
+ throw new IndexOutOfBoundsException(String.valueOf(index));
+ }
+ }
+
+ /**
+ * Returns the first node of the specified node pair in the list of
+ * node pairs with non-zero value. Repeated invocations of this
+ * method with the same parameter will return the same value if
+ * node values are not modified between invocations. If
+ * {@code (index >= 0 && index < this.size())}, then the following
+ * expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNode1(index),
+ * this.enumNode2(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of node pairs with non-zero
+ * value
+ * @return the first node of the specified node pair in a list of
+ * node pairs with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int enumNode1(int index) {
+ checkSize(index);
+ return node1[this.index[index]];
+ }
+
+ /**
+ * Returns the second node of the specified node pair in a list of
+ * node pairs with non-zero value. Repeated invocations of this
+ * method with the same parameter will return the same value if
+ * node values are not modified between invocations. If
+ * {@code (index >= 0 && index < this.size())}, then the following
+ * expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNode1(index),
+ * this.enumNode2(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of node pairs with non-zero value
+ * @return the second node of the specified node pair in a list of
+ * node pairs with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public int enumNode2(int index) {
+ checkSize(index);
+ return node2[this.index[index]];
+ }
+
+ /**
+ * Returns the value of the specified node pair in a list of
+ * node pairs with non-zero value. Repeated invocations of this
+ * method with the same parameter will return the same value if
+ * node values are not modified between invocations. If
+ * {@code (index >= 0 && index < this.size())}, then the following
+ * expression will always evaluate to {@code true}:<br>
+ * {@code (this.value(this.enumNode1(index),
+ * this.enumNode2(index)) == this.enumValue(index))}.
+ *
+ * @param index an index in a list of node pairs with non-zero value
+ * @return the value of the specified ordered node pair in a list of
+ * node pairs with non-zero value
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public float enumValue(int index) {
+ checkSize(index);
+ return value[this.index[index]];
+ }
+
+ /**
+ * Returns the value of the specified node pair.
+ *
+ * @param node1 the first node
+ * @param node2 the second node
+ * @return the value of the specified node pair
+ * @throws IllegalArgumentException if {@code node1 < 0 || node2 < 0}
+ */
+ public float value(int node1, int node2) {
+ if (node1 < 0) {
+ throw new IllegalArgumentException(String.valueOf(node1));
+ }
+ if (node2 < 0) {
+ throw new IllegalArgumentException(String.valueOf(node2));
+ }
+ return value[index(node1, node2)];
+ }
+
+ /**
+ * Sets the value of each ordered node pair to 0.
+ */
+ public void clear() {
+ for (int j=0; j<this.size; ++j) {
+ value[index[j]] = 0f;
+ }
+ size = 0;
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ *
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append("size=");
+ sb.append(size);
+ for (int j=0; j<size; ++j) {
+ sb.append(" (");
+ sb.append(j);
+ sb.append(": node1=");
+ sb.append(enumNode1(j));
+ sb.append(" node2=");
+ sb.append(enumNode2(j));
+ sb.append(" value=");
+ sb.append(enumValue(j));
+ sb.append(") ");
+ }
+ return sb.toString();
+ }
+}
diff --git a/vcf/AL.java b/vcf/AL.java
new file mode 100644
index 0000000..62f7ff4
--- /dev/null
+++ b/vcf/AL.java
@@ -0,0 +1,118 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+
+/**
+ * <p>Interface {@code AL} (Allele Likelihoods) represents allele
+ * likelihoods for a set of haplotypes.
+ * </p>
+ * Instances of class {@code AL} are required to be immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface AL {
+
+ /**
+ * Returns the probability of the observed data if the specified allele
+ * is the true allele for the specified marker and haplotype.
+ * @param marker a marker index
+ * @param haplotype a haplotype index
+ * @param allele a allele index
+ * @return the probability of the observed data if the specified allele
+ * is the true allele for the specified marker and haplotype
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code haplotype < 0 || haplotype >= this.nHaps()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele < 0 || allele >= this.marker(marker).nAlleles()}
+ */
+ float al(int marker, int haplotype, int allele);
+
+ /**
+ * Returns the allele on the specified haplotype if the allele
+ * emission probabilities are determined by a called allele, and
+ * returns -1 otherwise.
+ * @param marker a marker index
+ * @param haplotype a haplotype index
+ * @return the allele on the specified haplotype if the allele
+ * emission probabilities are determined by a called allele, and
+ * returns -1 otherwise
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= this.nHaps()}
+ */
+ int allele(int marker, int haplotype);
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ int nMarkers();
+
+ /**
+ * Returns the specified marker.
+ * @param marker the marker index
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ Marker marker(int marker);
+
+ /**
+ * Returns the list of markers.
+ * @return the list of markers
+ */
+ Markers markers();
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ int nSamples();
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ Samples samples();
+
+ /**
+ * Returns the number of haplotypes.
+ * @return the number of haplotypes
+ */
+ int nHaps();
+
+ /**
+ * Returns the allelic error probability.
+ * @return the allelic error probability
+ */
+ float errProb();
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ String toString();
+}
diff --git a/vcf/AllData.java b/vcf/AllData.java
new file mode 100644
index 0000000..be5f132
--- /dev/null
+++ b/vcf/AllData.java
@@ -0,0 +1,355 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.SampleIds;
+import beagleutil.Samples;
+import blbutil.SampleFileIt;
+import haplotype.HapPair;
+import haplotype.RefHapPairs;
+import haplotype.SampleHapPairs;
+import haplotype.WrappedHapPair;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * <p>Class {@code AllData} represents a sliding window of
+ * reference and target VCF records.
+ * </p>
+ * <p>Instances of class {@code AllData} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class AllData implements Data {
+
+ private int window = 0;
+ private VcfEmission[] refData;
+ private SampleHapPairs refSampleHapPairs;
+ private GL refEmissions;
+ private VcfEmission[] targetData; // missing markers as null entries
+ private int[] refIndices;
+ private int[] targetIndices;
+ private GL targetEmissions;
+ private final Samples allSamples;
+
+ private final List<HapPair> refHapPairs;
+ private final List<HapPair> targetRefHapPairs; // at target markers
+ private final VcfWindow refWindow;
+ private final RestrictedVcfWindow targetWindow;
+
+ /**
+ * Constructs and returns a new {@code AllData} instance from VCF records
+ * returned by the specified {@code SampleFileIt} objects.
+ *
+ * @param refIt an iterator that returns reference VCF records
+ * @param targetIt an iterator that returns target VCF records
+ * @return a new {@code AllData} instance.
+ *
+ * @throws IllegalArgumentException if either the reference data or
+ * target data contain no samples
+ * @throws IllegalArgumentException if a format error is detected
+ * in a string VCF record
+ * @throws NullPointerException if {@code refIt == null || targetIt == null}
+ */
+ public static AllData allData(SampleFileIt<VcfEmission> refIt,
+ SampleFileIt<? extends VcfEmission> targetIt) {
+ if (refIt.samples().nSamples()==0 && targetIt.samples().nSamples()==0) {
+ throw new IllegalArgumentException("nSamples==0");
+ }
+ VcfWindow refWindow = new VcfWindow(refIt);
+ RestrictedVcfWindow targetWindow = new RestrictedVcfWindow(targetIt);
+ return new AllData(refWindow, targetWindow);
+ }
+
+ private AllData(VcfWindow refWind, RestrictedVcfWindow targetWind) {
+ checkSampleOverlap(refWind.samples(), targetWind.samples());
+ this.refWindow = refWind;
+ this.targetWindow = targetWind;
+
+ this.refData = new VcfEmission[0];
+ this.refSampleHapPairs = null;
+ this.refEmissions = new RefGL(refWind.samples(), refData);
+ this.targetData = new VcfEmission[0];
+ this.refIndices = new int[0];
+ this.targetIndices = new int[0];
+ this.targetEmissions = new BasicGL(targetWind.samples(), targetData);
+ this.allSamples = allSamples(refWind.samples(), targetWind.samples());
+
+ this.refHapPairs = new ArrayList<>(0);
+ this.targetRefHapPairs = new ArrayList<>(0);
+ }
+
+ private static Samples allSamples(Samples ref, Samples target) {
+ /*
+ Target samples are listed first so that sample indices agree
+ with sample indices in target data genotype likelihoods.
+ */
+ int nRef = ref.nSamples();
+ int nTarget = target.nSamples();
+ int[] idIndices = new int[nRef + nTarget];
+ for (int j=0; j<nTarget; ++j) {
+ idIndices[j] = target.idIndex(j);
+ }
+ for (int j=0; j<nRef; ++j) {
+ idIndices[nTarget + j] = ref.idIndex(j);
+ }
+ return new Samples(idIndices);
+ }
+
+ private static void checkSampleOverlap(Samples ref, Samples nonRef) {
+ int nRef = ref.nSamples();
+ int nNonRef = nonRef.nSamples();
+ int n = nRef + nNonRef;
+ int[] idIndices = new int[n];
+ for (int j=0; j<nRef; ++j) {
+ idIndices[j] = ref.idIndex(j);
+ }
+ for (int j=0; j<nNonRef; ++j) {
+ idIndices[nRef + j] = nonRef.idIndex(j);
+ }
+ Arrays.sort(idIndices);
+ for (int j=1; j<idIndices.length; ++j) {
+ if (idIndices[j-1]==idIndices[j]) {
+ String s = "Overlap between reference and non-reference samples: "
+ + SampleIds.instance().id(idIndices[j-1]);
+ throw new IllegalArgumentException(s);
+ }
+ }
+ }
+
+ @Override
+ public boolean lastWindowOnChrom() {
+ return refWindow.lastWindowOnChrom();
+ }
+
+ @Override
+ public boolean canAdvanceWindow() {
+ return refWindow.canAdvanceWindow();
+ }
+
+ @Override
+ public void advanceWindow(int requestedOverlap, int windowSize) {
+ Samples refSamples = refWindow.samples();
+ refData = refWindow.advanceWindow(requestedOverlap, windowSize);
+ refEmissions = new RefGL(refSamples, refData);
+ refSampleHapPairs = new RefHapPairs(refEmissions.markers(), refSamples, refData);
+ targetData = targetWindow.advanceWindow(refEmissions.markers());
+ refIndices = refIndices(targetData);
+ targetIndices = targetIndices(targetData);
+ targetEmissions = targetEmissions(targetWindow.samples(),
+ targetData, refIndices);
+ ++window;
+ setRefHaplotypes(refEmissions.markers(), refData);
+ setTargetRefHaplotypes(targetEmissions.markers(), refData, refIndices);
+ }
+
+ @Override
+ public int window() {
+ return window;
+ }
+
+ private static int[] refIndices(VcfEmission[] vma) {
+ int nonNullCnt = 0;
+ for (VcfEmission vm : vma) {
+ if (vm!=null) {
+ ++nonNullCnt;
+ }
+ }
+ int[] inclusionMap = new int[nonNullCnt];
+ int index = 0;
+ for (int j=0; j<vma.length; ++j) {
+ if (vma[j]!=null) {
+ inclusionMap[index++] = j;
+ }
+ }
+ if (index != inclusionMap.length) {
+ throw new IllegalStateException("vma modification detected");
+ }
+ return inclusionMap;
+ }
+
+ private static int[] targetIndices(VcfEmission[] vma) {
+ int[] inclusionMap = new int[vma.length];
+ int index = 0;
+ for (int j=0; j<inclusionMap.length; ++j) {
+ if (vma[j]!=null) {
+ inclusionMap[j] = index++;
+ }
+ else {
+ inclusionMap[j] = -1;
+ }
+ }
+ return inclusionMap;
+ }
+
+ private static GL targetEmissions(Samples samples,
+ VcfEmission[] vma, int[] refMarkerIndex) {
+ VcfEmission[] restricted = new VcfEmission[refMarkerIndex.length];
+ for (int j=0; j<refMarkerIndex.length; ++j) {
+ restricted[j] = vma[refMarkerIndex[j]];
+ }
+ return new BasicGL(samples, restricted);
+ }
+
+ private void setRefHaplotypes(Markers refMarkers, VcfEmission[] refData) {
+ refHapPairs.clear();
+ SampleHapPairs refHaplotypes =
+ new RefHapPairs(refMarkers, refWindow.samples(), refData);
+ for (int j=0, n=refHaplotypes.nSamples(); j<n; ++j) {
+ refHapPairs.add(new WrappedHapPair(refHaplotypes, j));
+ }
+ }
+
+ private void setTargetRefHaplotypes(Markers targetMarkers, VcfEmission[] refData,
+ int[] refMarkerIndices) {
+ assert targetMarkers.nMarkers()==refMarkerIndices.length;
+ targetRefHapPairs.clear();
+ VcfEmission[] vma = new VcfEmission[refMarkerIndices.length];
+ for (int j=0; j<refMarkerIndices.length; ++j) {
+ vma[j] = refData[refMarkerIndices[j]];
+ }
+ SampleHapPairs refHaplotypes
+ = new RefHapPairs(targetMarkers, refWindow.samples(), vma);
+ for (int j=0, n=refHaplotypes.nSamples(); j<n; ++j) {
+ targetRefHapPairs.add(new WrappedHapPair(refHaplotypes, j));
+ }
+ }
+
+ @Override
+ public int targetOverlap() {
+ return targetWindow.overlap();
+ }
+
+ @Override
+ public int overlap() {
+ return refWindow.overlap();
+ }
+
+ @Override
+ public int nTargetMarkers() {
+ return targetEmissions.markers().nMarkers();
+ }
+
+ @Override
+ public int nTargetMarkersSoFar() {
+ return targetWindow.cumMarkerCnt();
+ }
+
+ @Override
+ public Markers targetMarkers() {
+ return targetEmissions.markers();
+ }
+
+
+ @Override
+ public int nMarkers() {
+ return refEmissions.nMarkers();
+ }
+
+ @Override
+ public int nMarkersSoFar() {
+ return refWindow.cumMarkerCnt();
+ }
+
+ @Override
+ public Markers markers() {
+ return refEmissions.markers();
+ }
+
+ @Override
+ public int targetMarkerIndex(int refIndex) {
+ return targetIndices[refIndex];
+ }
+
+ @Override
+ public int markerIndex(int nonRefIndex) {
+ return refIndices[nonRefIndex];
+ }
+
+ @Override
+ public int nTargetSamples() {
+ return targetEmissions.nSamples();
+ }
+
+ @Override
+ public Samples targetSamples() {
+ return targetEmissions.samples();
+ }
+
+ @Override
+ public int nRefSamples() {
+ return refWindow.nSamples();
+ }
+
+ @Override
+ public Samples refSamples() {
+ return refWindow.samples();
+ }
+
+ @Override
+ public int nAllSamples() {
+ return allSamples.nSamples();
+ }
+
+ @Override
+ public Samples allSamples() {
+ return allSamples;
+ }
+
+
+ @Override
+ public GL targetGL() {
+ return targetEmissions;
+ }
+
+ @Override
+ public List<HapPair> restrictedRefHapPairs() {
+ return new ArrayList<>(targetRefHapPairs);
+ }
+
+ @Override
+ public List<HapPair> refHapPairs() {
+ return new ArrayList<>(refHapPairs);
+ }
+
+ @Override
+ public SampleHapPairs refSampleHapPairs() {
+ return refSampleHapPairs;
+ }
+
+ @Override
+ public void close() {
+ refWindow.close();
+ targetWindow.close();
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(20);
+ sb.append(this.getClass().toString());
+ return sb.toString();
+ }
+}
diff --git a/vcf/BasicGL.java b/vcf/BasicGL.java
new file mode 100644
index 0000000..142d5c6
--- /dev/null
+++ b/vcf/BasicGL.java
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.Const;
+
+/**
+ * <p>Class {@code BasicGL} represents genotype emission probabilities
+ * for a set of samples.
+ * </p>
+ * Instances of class {@code GL} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BasicGL implements GL {
+
+ private final Samples samples;
+ private final Markers markers;
+ private final VcfEmission[] vma;
+ private final boolean isRefData;
+
+ /**
+ * Returns the genotype index corresponding to the
+ * specified unordered alleles.
+ * @param a1 the first allele index of an unordered genotype
+ * @param a2 the second allele index of an unordered genotype
+ * @return the genotype index corresponding to the
+ * specified unordered alleles
+ * @throws IllegalArgumentException if {@code a1 < 0 || a2 < 0}
+ */
+ public static int genotype(int a1, int a2) {
+ if (a1<=a2) {
+ if (a1 < 0) {
+ String s = "allele < 0: " + a1 + " " + a2;
+ throw new IllegalArgumentException(s);
+ }
+ return (a2*(a2+1))/2 + a1;
+ }
+ else {
+ if (a2<0) {
+ String s = "allele < 0: " + a1 + " " + a2;
+ throw new IllegalArgumentException(s);
+ }
+ return (a1*(a1+1))/2 + a2;
+ }
+ }
+
+ /**
+ * Constructs a {@code BasicGL} instance.
+ *
+ * @param samples the list of samples with genotype data
+ * @param vma genotype emission probabilities
+ *
+ * @throws IllegalArgumentException
+ * if elements of {@code vma} corresponding to the same chromosome
+ * are not contiguous and sorted in chromosome position order
+ * @throws IllegalArgumentException if any
+ * two {@code vma} elements correspond to the same genetic marker
+ * @throws IllegalArgumentException if
+ * {@code vma[j].samples().equals(samples) == false} for any {@code j}
+ * satisfying {@code 0 <= j && j < vma.length}
+ *
+ * @throws NullPointerException if {@code samples == null}
+ * @throws NullPointerException if {@code vma == null}
+ * @throws NullPointerException if {@code vma[j] == null} any {@code j}
+ * satisfying {@code 0 <= j && j < vma.length}
+ */
+ public BasicGL(Samples samples, VcfEmission[] vma) {
+ checkSamples(samples, vma);
+ this.markers = markers(vma);
+ this.samples = samples;
+ this.vma = vma.clone();
+ this.isRefData = isRefData(vma);
+ }
+
+ private static void checkSamples(Samples samples, VcfEmission[] mla) {
+ for (int j=0; j<mla.length; ++j) {
+ if (mla[j].samples().equals(samples)==false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ }
+ }
+
+ private static Markers markers(VcfEmission[] vma) {
+ Marker[] markers = new Marker[vma.length];
+ for (int j=0; j<markers.length; ++j) {
+ markers[j] = vma[j].marker();
+ }
+ return Markers.create(markers);
+ }
+
+ private static boolean isRefData(VcfEmission[] vma) {
+ boolean isRefData = true;
+ for (int j=0; j<vma.length && isRefData==true; ++j) {
+ if (vma[j].isRefData()==false) {
+ isRefData = false;
+ }
+ }
+ return isRefData;
+ }
+
+ @Override
+ public boolean isRefData() {
+ return isRefData;
+ }
+
+ @Override
+ public float gl(int marker, int sample, int allele1, int allele2) {
+ return vma[marker].gl(sample, allele1, allele2);
+ }
+
+ @Override
+ public boolean isPhased(int marker, int sample) {
+ return vma[marker].isPhased(sample);
+ }
+
+ @Override
+ public int allele1(int marker, int sample) {
+ return vma[marker].allele1(sample);
+ }
+
+ @Override
+ public int allele2(int marker, int sample) {
+ return vma[marker].allele2(sample);
+ }
+
+ @Override
+ public int allele(int marker, int hap) {
+ return vma[marker].allele(hap);
+ }
+
+ @Override
+ public int nMarkers() {
+ return vma.length;
+ }
+
+ @Override
+ public Marker marker(int markerIndex) {
+ return markers.marker(markerIndex);
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*samples.nSamples();
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("[BasicGL: nMarkers=");
+ sb.append(nMarkers());
+ sb.append(" nSamples=");
+ sb.append(nSamples());
+ for (VcfEmission vm : vma) {
+ sb.append(Const.nl);
+ sb.append(vm);
+ }
+ sb.append(']');
+ return sb.toString();
+ }
+}
diff --git a/vcf/BasicMarker.java b/vcf/BasicMarker.java
new file mode 100644
index 0000000..b2737ef
--- /dev/null
+++ b/vcf/BasicMarker.java
@@ -0,0 +1,547 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.ChromIds;
+import blbutil.Const;
+import blbutil.StringUtil;
+import blbutil.Utilities;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * <p>Class {@code BasicMarker} represents a genetic marker.
+ * </p>
+ * <p>Instances of class {@code BasicMarker} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class BasicMarker implements Marker {
+
+ private static final int MIN_NUMBER_FIELDS = 8;
+
+ private static final String[] EMPTY_ID_ARRAY = new String[0];
+ private static final Map<String, String[]> allelesMap
+ = new HashMap<>(24);
+
+ private final int chromIndex;
+ private final int pos;
+ private final String[] ids;
+ private final String[] alleles;
+ private final int nGenotypes;
+ private final int end;
+
+ /**
+ * Constructs a new {@code BasicMarker} instance from the specified data.
+ * The {@code end()} method of the new instance will return {@code -1}.
+ * The JVM will exit with an error message if any marker identifier
+ * in the specified{@code ids} array or if any allele identifier in the
+ * specified {@code alleles} array does not conform to the VCF
+ * specification.
+ *
+ * @param chrom a chromosome index
+ * @param pos the marker position
+ * @param ids a list of marker identifiers
+ * @param alleles a list of alleles beginning with the reference allele
+ *
+ * @throws IllegalArgumentException if
+ * {@code chrom < 0 || chrom >= ChromIds.instance().size()}
+ * @throws NullPointerException if {@code ids == null} or if any element
+ * of {@code ids} is {@code null}
+ * @throws NullPointerException if {@code alleles == null} or if any element
+ * of {@code alleles} is {@code null}
+ */
+ public BasicMarker(int chrom, int pos, String[] ids, String[] alleles) {
+ this(chrom, pos, ids, alleles, -1);
+ }
+
+ /**
+ * Constructs a new {@code BasicMarker} instance from the specified data.
+ * The JVM will exit with an error message if any marker identifier
+ * in the specified {@code ids} array does not conform to the VCF
+ * specification, if any allele identifier in the specified {@code alleles}
+ * array does not conform to the VCF specification, or if
+ * {@code (end != -1 && end < pos)}.
+ * @param chrom a chromosome index
+ * @param pos the marker position
+ * @param ids a list of marker identifiers
+ * @param alleles a list of alleles beginning with the reference allele
+ * @param end the INFO END field, or -1 if there is no INFO END field
+ *
+ * @throws IllegalArgumentException if
+ * {@code chrom < 0 || chrom >= ChromIds.instance().size()}
+ * @throws NullPointerException if {@code ids == null} or if any element
+ * of {@code ids} is {@code null}
+ * @throws NullPointerException if {@code alleles == null} or if any element
+ * of {@code alleles} is {@code null}
+ */
+ public BasicMarker(int chrom, int pos, String[] ids, String[] alleles,
+ int end) {
+ if (chrom<0 || chrom>=ChromIds.instance().size()) {
+ throw new IndexOutOfBoundsException(String.valueOf(chrom));
+ }
+ checkIds(chrom, pos, ids);
+ checkAlleles(chrom, pos, alleles);
+ checkEnd(chrom, pos, end);
+ this.chromIndex = chrom;
+ this.pos = pos;
+ this.ids = ids.length == 0 ? EMPTY_ID_ARRAY : ids.clone();
+ this.alleles = canonicalAlleles(alleles.clone());
+ this.nGenotypes = (alleles.length*(alleles.length+1))/2;
+ this.end = end;
+ }
+
+ private static void checkIds(int chrom, int pos, String[] ids) {
+ for (String id : ids) {
+ for (int j=0, n=id.length(); j<n; ++j) {
+ if (Character.isWhitespace(id.charAt(j))) {
+ String s = "ERROR: ID field contains white-space at "
+ + coordinate(chrom, pos) + " [" + id + "]";
+ Utilities.exit(s);
+ }
+ }
+ }
+ }
+
+ private static void checkAlleles(int chrom, int pos, String[] alleles) {
+ if (alleles.length<2) {
+ String s = "ERROR: missing REF or ALT allele at "
+ + coordinate(chrom, pos);
+ throw new IllegalArgumentException(s);
+ }
+ Set<String> set = new HashSet<>(Arrays.asList(alleles));
+ if (set.size() != alleles.length) {
+ String s = "ERROR: duplicate allele at "
+ + coordinate(chrom, pos) + " " + Arrays.toString(alleles);
+ Utilities.exit(s);
+ }
+ checkREF(chrom, pos, alleles[0]);
+ for (int j=1; j<alleles.length; ++j) {
+ checkAltAllele(chrom, pos, alleles[j]);
+ }
+ }
+
+ private static void checkREF(int chrom, int pos, String ref) {
+ if (ref.isEmpty()) {
+ String s = "ERROR: missing REF field at " + coordinate(chrom, pos);
+ Utilities.exit(s);
+ }
+ for (int j=0, n=ref.length(); j<n; ++j) {
+ char c = Character.toUpperCase(ref.charAt(j));
+ if ((c=='A' || c=='C' || c=='G' || c=='T' || c=='N')==false) {
+ String s = "ERROR: REF field is not a sequence"
+ + " of A, C, T, G, or N characters at "
+ + coordinate(chrom, pos) + " [" + ref + "]" ;
+ Utilities.exit(s);
+ }
+ }
+ }
+
+ private static void checkAltAllele(int chrom, int pos, String alt) {
+ int n = alt.length();
+ if (n==1 && alt.charAt(0)=='*') {
+ return;
+ }
+ if (n >= 2 && alt.charAt(0)=='<' && alt.charAt(n-1)=='>') {
+ for (int j=1; j<n-1; ++j) {
+ char c = alt.charAt(j);
+ if (Character.isWhitespace(c) || c==Const.comma || c=='<'
+ || c=='>') {
+ String s = "ERROR: invalid ALT allele at "
+ + coordinate(chrom, pos) + " [" + alt + "]";
+ Utilities.exit(s);
+ }
+ }
+ }
+ else {
+ for (int j=0; j<n; ++j) {
+ char c = Character.toUpperCase(alt.charAt(j));
+ if ((c=='A' || c=='C' || c=='G' || c=='T' || c=='N')==false) {
+ String s = "ERROR: invalid ALT allele at "
+ + coordinate(chrom, pos) + " [" + alt + "]";
+ Utilities.exit(s);
+ }
+ }
+ }
+ }
+
+ /* Return specified String[] alleles if not a SNV */
+ private static String[] canonicalAlleles(String[] alleles) {
+ if (isSNV(alleles)) {
+ String key = alleles[0];
+ for (int j=1; j<alleles.length; ++j) {
+ key += alleles[j];
+ }
+ String[] storedAlleles = allelesMap.get(key);
+ if (storedAlleles!=null) {
+ alleles = storedAlleles;
+ }
+ else {
+ allelesMap.put(key, alleles.clone());
+ }
+ }
+ return alleles;
+ }
+
+ private static boolean isSNV(String[] alleles) {
+ for (String a : alleles) {
+ if (a.length()!=1) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private static void checkEnd(int chrom, int pos, int end) {
+ if (end != -1 && end < pos) {
+ String s = "ERROR: invalid INFO:END field at "
+ + coordinate(chrom, pos) + " [" + end + "]";
+ Utilities.exit(s);
+ }
+ }
+
+ /**
+ * Constructs a new {@code BasicMarker} instance from the specified
+ * string VCF record.
+ * @param vcfRecord a VCF record
+ * @throws IllegalArgumentException if the specified VCF
+ * record has fewer than 8 tab-delimited fields, or if a format
+ * error is detected in the specified VCF record
+ * @throws NullPointerException if {@code vcfRecord == null}
+ */
+ @SuppressWarnings("RedundantStringConstructorCall")
+ public BasicMarker(String vcfRecord) {
+ String[] fields = StringUtil.getFields(vcfRecord, Const.tab,
+ MIN_NUMBER_FIELDS+1);
+ if (fields.length < MIN_NUMBER_FIELDS) {
+ String s = "VCF record does not contain at least "
+ + MIN_NUMBER_FIELDS + " tab-delimited fields: "
+ + vcfRecord;
+ Utilities.exit(s);
+ }
+ // Store minimal required data, not entire VCF record
+ for (int j=0; j<5; ++j) {
+ fields[j] = new String(fields[j]);
+ }
+ this.chromIndex = extractChrom(fields[0], vcfRecord);
+ this.pos = extractPos(fields[1], vcfRecord);
+ this.ids = extractIds(chromIndex, pos, fields[2]);
+ this.alleles = extractAlleles(chromIndex, pos, fields[3], fields[4]);
+ this.nGenotypes = alleles.length*(alleles.length+1)/2;
+ this.end = extractEnd(chromIndex, pos, vcfRecord);
+ }
+
+ private static int extractChrom(String chrom, String vcfRecord) {
+ if (chrom.isEmpty() || chrom.equals(Const.MISSING_DATA_STRING)) {
+ String s = "ERROR: missing CHROM field: " + Const.nl +
+ vcfRecord.substring(0,80);
+ Utilities.exit(s);
+ }
+ for (int j=0, n=chrom.length(); j<n; ++j) {
+ char c = chrom.charAt(j);
+ if (c==Const.colon || Character.isWhitespace(c)) {
+ String s = "invalid character in CHROM field ['" + c
+ + "']: " + Const.nl + vcfRecord.substring(0,80);
+ Utilities.exit(s);
+ }
+ }
+ return ChromIds.instance().getIndex(chrom);
+ }
+
+ private static int extractPos(String pos, String vcfRecord) {
+ for (int j=0, n=pos.length(); j<n; ++j) {
+ if (Character.isDigit(pos.charAt(j))==false) {
+ String s = "ERROR: invalid POS field [" + pos + "]: "
+ + Const.nl + vcfRecord.substring(0,80);
+ Utilities.exit(s);
+ }
+ }
+ return Integer.parseInt(pos);
+ }
+
+ private static String[] extractIds(int chrom, int pos, String id) {
+ if (id.isEmpty()) {
+ String s = "ERROR: missing ID field at " + coordinate(chrom, pos);
+ Utilities.exit(s);
+ }
+ if (id.equals(Const.MISSING_DATA_STRING)) {
+ return EMPTY_ID_ARRAY;
+ }
+ String[] ids = StringUtil.getFields(id, Const.semicolon);
+ checkIds(chrom, pos, ids);
+ return ids;
+ }
+
+ private static String[] extractAlleles(int chrom, int pos, String ref,
+ String alt) {
+ if (ref.isEmpty()) {
+ String s = "ERROR: missing REF field at " + coordinate(chrom, pos);
+ Utilities.exit(s);
+ }
+ if (alt.isEmpty()) {
+ String s = "ERROR: missing ALT field: at " + coordinate(chrom, pos);
+ Utilities.exit(s);
+ }
+ String[] altAlleles = EMPTY_ID_ARRAY;
+ if (alt.equals(Const.MISSING_DATA_STRING)==false) {
+ altAlleles = StringUtil.getFields(alt, Const.comma);
+ }
+ String[] alleles = new String[altAlleles.length + 1];
+ alleles[0] = ref;
+ System.arraycopy(altAlleles, 0, alleles, 1, altAlleles.length);
+ checkAlleles(chrom, pos, alleles);
+ return canonicalAlleles(alleles);
+ }
+
+ /*
+ * Returns value of first END key in the specified INFO field, or
+ * returns -1 if there is no END key in INFO field.
+ */
+ private static int extractEnd(int chrom, int pos, String info) {
+ String[] fields = StringUtil.getFields(info, Const.semicolon);
+ String key = "END=";
+ int end = -1;
+ for (String field : fields) {
+ if (field.startsWith(key)) {
+ String value = field.substring(4);
+ for (int j=0, n=value.length(); j<n; ++j) {
+ char c = value.charAt(j);
+ if (Character.isDigit(c)==false) {
+ String s = "ERROR: invalid INFO:END field at "
+ + coordinate(chrom, pos) + " [" + key + value
+ + "]";
+ Utilities.exit(s);
+ }
+ }
+ end = Integer.parseInt(value);
+ checkEnd(chrom, pos, end);
+ }
+ }
+ return end;
+ }
+
+ /**
+ * Constructs and returns a new marker obtained from the specified marker
+ * by changing the marker's non-symbolic alleles to the alleles on the
+ * opposite chromosome strand.
+ * @param marker a marker
+ * @return the equivalent marker on the opposite chromosome strand
+ * @throws NullPointerException if {@code marker == null}
+ */
+ public static Marker flipStrand(Marker marker) {
+ return new BasicMarker(marker);
+ }
+
+ /* Private constructor used by flipStrand(Marker) method */
+ private BasicMarker(Marker markerOnReverseStrand) {
+ Marker m = markerOnReverseStrand;
+ this.chromIndex = m.chromIndex();
+ this.pos = m.pos();
+ this.ids = new String[m.nIds()];
+ for (int j=0; j<this.ids.length; ++j) {
+ this.ids[j] = m.id(j);
+ }
+ this.alleles = new String[m.nAlleles()];
+ for (int j=0; j<this.alleles.length; ++j) {
+ if (m.allele(j).charAt(0)!='<') { // not a symbolic allele
+ this.alleles[j] = flipAllele(m.allele(j));
+ }
+ else {
+ this.alleles[j] = m.allele(j);
+ }
+ }
+ this.nGenotypes = m.nGenotypes();
+ this.end = m.end();
+ }
+
+ private static String flipAllele(String allele) {
+ char[] ca = new char[allele.length()];
+ for (int j=0; j<ca.length; ++j) {
+ ca[j] = flipBase(allele.charAt(j));
+ }
+ return new String(ca);
+ }
+
+ private static char flipBase(char c) {
+ switch (c) {
+ case 'A' : return 'T';
+ case 'C' : return 'G';
+ case 'G' : return 'C';
+ case 'T' : return 'A';
+ case 'N' : return 'N';
+ case '*' : return '*';
+ default: assert false; return 0;
+ }
+ }
+
+
+ @Override
+ public String chrom() {
+ return ChromIds.instance().id(chromIndex);
+ }
+
+ @Override
+ public int chromIndex() {
+ return chromIndex;
+ }
+
+ @Override
+ public int pos() {
+ return pos;
+ }
+
+ @Override
+ public int nIds() {
+ return ids.length;
+ }
+
+ @Override
+ public String id(int index) {
+ return ids[index];
+ }
+
+ @Override
+ public String id() {
+ return ids.length>0 ? ids[0] : coordinate(chromIndex, pos);
+ }
+
+ @Override
+ public int nAlleles() {
+ return alleles.length;
+ }
+
+ @Override
+ public String allele(int index) {
+ return alleles[index];
+ }
+
+ @Override
+ public String[] alleles() {
+ return alleles.clone();
+ }
+
+ @Override
+ public int nGenotypes() {
+ return nGenotypes;
+ }
+
+ @Override
+ public int end() {
+ return end;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(50);
+ sb.append(chrom());
+ sb.append(Const.tab);
+ sb.append(pos);
+ if (ids.length==0) {
+ sb.append(Const.tab);
+ sb.append(Const.MISSING_DATA_CHAR);
+ }
+ else {
+ for (int j=0; j<ids.length; ++j) {
+ sb.append(j==0 ? Const.tab : Const.semicolon);
+ sb.append(ids[j]);
+ }
+ }
+ if (alleles.length==1) {
+ sb.append(Const.tab);
+ sb.append(alleles[0]);
+ sb.append(Const.tab);
+ sb.append(Const.MISSING_DATA_CHAR);
+ }
+ else {
+ for (int j=0; j<alleles.length; ++j) {
+ sb.append(j<2 ? Const.tab : Const.comma);
+ sb.append(alleles[j]);
+ }
+ }
+ return sb.toString();
+ }
+
+ @Override
+ public int hashCode() {
+ int hash = 5;
+ hash = 29 * hash + chromIndex;
+ hash = 29 * hash + this.pos;
+ for (int j=0; j<alleles.length; ++j) {
+ hash = 29 * hash + alleles[j].hashCode();
+ }
+ hash = 29 * hash + end;
+ return hash;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this==obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final BasicMarker other = (BasicMarker) obj;
+ if (this.chromIndex != other.chromIndex) {
+ return false;
+ }
+ if (this.pos != other.pos) {
+ return false;
+ }
+ if (!Arrays.equals(this.alleles, other.alleles)) {
+ return false;
+ }
+ return this.end == other.end;
+ }
+
+ @Override
+ public int compareTo(Marker other) {
+ if (this.chromIndex != other.chromIndex()) {
+ return (this.chromIndex < other.chromIndex()) ? -1 : 1;
+ }
+ if (this.pos != other.pos()) {
+ return (this.pos < other.pos()) ? -1 : 1;
+ }
+ int n = Math.min(this.alleles.length, other.nAlleles());
+ for (int j=0; j<n; ++j) {
+ int cmp = this.alleles[j].compareTo(other.allele(j));
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+ if (this.alleles.length != other.nAlleles()) {
+ return (this.alleles.length < other.nAlleles()) ? -1 : 1;
+ }
+ if (this.end != other.end()) {
+ return (this.end < other.end()) ? -1 : 1;
+ }
+ return 0;
+ }
+
+ private static String coordinate(int chrom, int pos) {
+ return ChromIds.instance().id(chrom) + Const.colon + pos;
+ }
+}
diff --git a/vcf/BitSetGT.java b/vcf/BitSetGT.java
new file mode 100644
index 0000000..67955fa
--- /dev/null
+++ b/vcf/BitSetGT.java
@@ -0,0 +1,482 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import java.util.BitSet;
+
+/**
+ * <p>Class {@code BitSetGT} represents genotype emission
+ * probabilities for a list of samples at a single marker.
+ * The genotype emission probabilities are determined by the called
+ * genotypes for the samples.
+ * </p>
+ * <p>Class {@code BitSetGT} stores alleles using
+ * {@code java.util.BitSet} objects.
+ * </p>
+ * <p>Instances of class {@code BitSetGT} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BitSetGT implements VcfEmission {
+
+ /**
+ * The VCF FORMAT code for genotype data: "GT".
+ */
+ public static final String GT_FORMAT = "GT";
+
+ private final int bitsPerAllele;
+ private final Marker marker;
+ private final Samples samples;
+ private final boolean isRefData;
+
+ private final BitSet allele1;
+ private final BitSet allele2;
+ private final BitSet isMissing1;
+ private final BitSet isMissing2;
+ private final BitSet isPhased;
+
+ /**
+ * Constructs a new {@code BitSetGT} instance representing
+ * the specified VCF record's GT format field data.
+ *
+ * @param vcfHeader meta-information lines and header line for the
+ * specified VCF record.
+ * @param vcfRecord a VCF record corresponding to the specified
+ * {@code vcfHeader} object
+ *
+ * @throws IllegalArgumentException if a format error is detected
+ * in the VCF record
+ * @throws IllegalArgumentException if {@code rec.nSamples() == 0}
+ * @throws IllegalArgumentException if the header line
+ * or VCF record does not have a "GT" format field
+ * @throws NullPointerException if
+ * {@code vcfHeader == null || vcfRecord == null}
+ */
+ public BitSetGT(VcfHeader vcfHeader, String vcfRecord) {
+ VcfRecGTParser gtp = new VcfRecGTParser(vcfHeader, vcfRecord);
+ this.marker = gtp.marker();
+ this.samples = vcfHeader.samples();
+ this.bitsPerAllele = bitsPerAllele(marker);
+ this.allele1 = new BitSet(vcfHeader.nSamples()*bitsPerAllele);
+ this.allele2 = new BitSet(vcfHeader.nSamples()*bitsPerAllele);
+ this.isMissing1 = new BitSet(vcfHeader.nSamples());
+ this.isMissing2 = new BitSet(vcfHeader.nSamples());
+ this.isPhased = new BitSet(vcfHeader.nSamples());
+ storeAlleles(gtp, bitsPerAllele, allele1, allele2, isMissing1,
+ isMissing2, isPhased);
+ this.isRefData = isRef(vcfHeader.nSamples(), isPhased, isMissing1, isMissing2);
+ }
+
+ private static void storeAlleles(VcfRecGTParser gtp,
+ int bitsPerAllele, BitSet allele1, BitSet allele2,
+ BitSet isMissing1, BitSet isMissing2, BitSet isPhased) {
+ int nSamples = gtp.nSamples();
+ for (int sample=0; sample<nSamples; ++sample) {
+ int a1 = gtp.allele1();
+ int a2 = gtp.allele2();
+ if (gtp.isPhased()) {
+ isPhased.set(sample);
+ }
+ if (a1 == -1) {
+ isMissing1.set(sample);
+ }
+ else {
+ storeAllele(allele1, sample, bitsPerAllele, a1);
+ }
+ if (a2 == -1) {
+ isMissing2.set(sample);
+ }
+ else {
+ storeAllele(allele2, sample, bitsPerAllele, a2);
+ }
+ if (sample + 1 < nSamples) {
+ gtp.nextSample();
+ }
+ }
+ }
+
+ private static void storeAllele(BitSet alleles, int sample,
+ int bitsPerAllele, int allele) {
+ int index = sample*bitsPerAllele;
+ int mask = 1;
+ for (int k=0; k<bitsPerAllele; ++k) {
+ if ((allele & mask)==mask) {
+ alleles.set(index);
+ }
+ ++index;
+ mask <<= 1;
+ }
+ }
+
+ private static boolean isRef(int nSamples, BitSet isPhased,
+ BitSet isMissing1, BitSet isMissing2) {
+ int nMissing = isMissing1.cardinality() + isMissing2.cardinality();
+ int nUnphased = nSamples - isPhased.cardinality();
+ return nMissing==0 && nUnphased==0;
+ }
+
+// /**
+// * Constructs a new {@code LowMemGT} instance representing
+// * the specified VCF record's GT format field data.
+// *
+// * @param rec a VCF file record.
+// * @param fam parent-offspring relationships.
+// * @param usePhase {@code true} if phase information in the specified
+// * VCF file record will be used, and {@code false} if phase
+// * information in the specified VCF file record will be ignored.
+// *
+// * @throws IllegalArgumentException if
+// * {@code rec.nSamples()==0|| rec.samples().equals(fam.samples())==false}.
+// * @throws IllegalArgumentException if the VCF record does not have a
+// * GT format field.
+// * @throws NullPointerException if {@code rec==null || fam==null}.
+// */
+// public BitSetGT(VcfRecord rec, NuclearFamilies fam, boolean usePhase) {
+// this(rec);
+// if (rec.samples().equals(fam.samples())==false) {
+// throw new IllegalArgumentException("inconsistent samples");
+// }
+// setBits(rec, usePhase, bitsPerAllele, allele1, allele2, isMissing1,
+// isMissing2, isPhased);
+// removeMendelianInconsistencies(rec, fam, isPhased, isMissing1,
+// isMissing2);
+// }
+
+ private BitSetGT(VcfRecord rec) {
+ int nSamples = rec.nSamples();
+ if (nSamples==0) {
+ String s = "missing sample data: " + rec;
+ throw new IllegalArgumentException(s);
+ }
+ if (rec.hasFormat(GT_FORMAT)==false) {
+ String s = "missing GT FORMAT: " + rec;
+ throw new IllegalArgumentException(s);
+ }
+ this.bitsPerAllele = bitsPerAllele(rec.marker());
+ this.samples = rec.samples();
+ this.marker = rec.marker();
+ this.isRefData = isRef(rec);
+
+ this.allele1 = new BitSet(nSamples * bitsPerAllele);
+ this.allele2 = new BitSet(nSamples * bitsPerAllele);
+ this.isMissing1 = new BitSet(nSamples);
+ this.isMissing2 = new BitSet(nSamples);
+ this.isPhased = new BitSet(nSamples);
+ }
+
+ private static boolean isRef(VcfRecord rec) {
+ for (int j=0, n=rec.nSamples(); j<n; ++j) {
+ if (rec.isPhased(j)==false || rec.allele1(j)<0 || rec.allele2(j)<0) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private static void setBits(VcfRecord rec, boolean usePhase,
+ int bitsPerAllele, BitSet allele1, BitSet allele2,
+ BitSet isMissing1, BitSet isMissing2, BitSet isPhased) {
+ int index1 = 0;
+ int index2 = 0;
+ for (int j=0, n=rec.nSamples(); j<n; ++j) {
+ if (usePhase && rec.isPhased(j)) {
+ isPhased.set(j);
+ }
+ int a1 = rec.allele1(j);
+ int a2 = rec.allele2(j);
+ if (a1 < 0) {
+ isMissing1.set(j);
+ index1 += bitsPerAllele;
+ }
+ else {
+ int mask = 1;
+ for (int k=0; k<bitsPerAllele; ++k) {
+ if ((a1 & mask)==mask) {
+ allele1.set(index1);
+ }
+ ++index1;
+ mask <<= 1;
+ }
+ }
+
+ if (a2 < 0) {
+ isMissing2.set(j);
+ index2 += bitsPerAllele;
+ }
+ else {
+ int mask = 1;
+ for (int k=0; k<bitsPerAllele; ++k) {
+ if ((a2 & mask)==mask) {
+ allele2.set(index2);
+ }
+ ++index2;
+ mask <<= 1;
+ }
+ }
+ }
+ }
+
+ private static int bitsPerAllele(Marker marker) {
+ int nAllelesM1 = marker.nAlleles() - 1;
+ int nStorageBits = Integer.SIZE - Integer.numberOfLeadingZeros(nAllelesM1);
+ return nStorageBits;
+ }
+
+// /*
+// * Sets phase to unknown for all parent-offspring relationships, and sets
+// * all genotypes in a duo or trio genotypes to missing if a Mendelian
+// * inconsistency is found.
+// */
+// private static void removeMendelianInconsistencies(VcfRecord rec,
+// NuclearFamilies fam, BitSet isPhased, BitSet isMissing1,
+// BitSet isMissing2) {
+// for (int j=0, n=fam.nDuos(); j<n; ++j) {
+// int p = fam.duoParent(j);
+// int o = fam.duoOffspring(j);
+// isPhased.clear(p);
+// isPhased.clear(o);
+// if (duoIsConsistent(rec, p, o) == false) {
+// logDuoInconsistency(rec, p, o);
+// isMissing1.set(p);
+// isMissing2.set(p);
+// isMissing1.set(o);
+// isMissing2.set(o);
+// }
+// }
+// for (int j=0, n=fam.nTrios(); j<n; ++j) {
+// int f = fam.trioFather(j);
+// int m = fam.trioMother(j);
+// int o = fam.trioOffspring(j);
+// isPhased.clear(f);
+// isPhased.clear(m);
+// isPhased.clear(o);
+// if (trioIsConsistent(rec, f, m, o) == false) {
+// logTrioInconsistency(rec, f, m, o);
+// isMissing1.set(f);
+// isMissing2.set(f);
+// isMissing1.set(m);
+// isMissing2.set(m);
+// isMissing1.set(o);
+// isMissing2.set(o);
+// }
+// }
+// }
+//
+// private static boolean duoIsConsistent(VcfRecord rec, int parent,
+// int offspring) {
+// int p1 = rec.gt(parent, 0);
+// int p2 = rec.gt(parent, 1);
+// int o1 = rec.gt(offspring, 0);
+// int o2 = rec.gt(offspring, 1);
+// boolean alleleMissing = (p1<0 || p2<0 || o1<0 || o2<0);
+// return (alleleMissing || p1==o1 || p1==o2 || p2==o1 || p2==o2);
+// }
+//
+// private static boolean trioIsConsistent(VcfRecord rec, int father,
+// int mother, int offspring) {
+// int f1 = rec.gt(father, 0);
+// int f2 = rec.gt(father, 1);
+// int m1 = rec.gt(mother, 0);
+// int m2 = rec.gt(mother, 1);
+// int o1 = rec.gt(offspring, 0);
+// int o2 = rec.gt(offspring, 1);
+// boolean fo1 = (o1<0 || f1<0 || f2<0 || o1==f1 || o1==f2);
+// boolean mo2 = (o2<0 || m1<0 || m2<0 || o2==m1 || o2==m2);
+// if (fo1 && mo2) {
+// return true;
+// }
+// else {
+// boolean fo2 = (o2<0 || f1<0 || f2<0 || o2==f1 || o2==f2);
+// boolean mo1 = (o1<0 || m1<0 || m2<0 || o1==m1 || o1==m2);
+// return (fo2 && mo1);
+// }
+// }
+//
+// private static void logDuoInconsistency(VcfRecord rec, int parent,
+// int offspring) {
+// StringBuilder sb = new StringBuilder(80);
+// sb.append("WARNING: Inconsistent duo genotype set to missing");
+// sb.append(Const.tab);
+// sb.append(rec.marker());
+// sb.append(Const.colon);
+// sb.append(rec.samples().id(parent));
+// sb.append(Const.tab);
+// sb.append(rec.samples().id(offspring));
+// main.Logger.getInstance().println(sb.toString());
+// }
+//
+// private static void logTrioInconsistency(VcfRecord rec, int father,
+// int mother, int offspring) {
+// StringBuilder sb = new StringBuilder(80);
+// sb.append("WARNING: Inconsistent trio genotype set to missing");
+// sb.append(Const.tab);
+// sb.append(rec.marker());
+// sb.append(Const.tab);
+// sb.append(rec.samples().id(father));
+// sb.append(Const.tab);
+// sb.append(rec.samples().id(mother));
+// sb.append(Const.tab);
+// sb.append(rec.samples().id(offspring));
+// main.Logger.getInstance().println(sb.toString());
+// }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*samples.nSamples();
+ }
+
+ @Override
+ public int nHapPairs() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Marker marker() {
+ return marker;
+ }
+
+ @Override
+ public boolean isRefData() {
+ return isRefData;
+ }
+
+ @Override
+ public boolean isPhased(int sample) {
+ return isPhased.get(sample);
+ }
+
+ @Override
+ public int allele1(int sample) {
+ return isMissing1.get(sample) ? -1 : allele(allele1, sample);
+ }
+
+ @Override
+ public int allele2(int sample) {
+ return isMissing2.get(sample) ? -1 : allele(allele2, sample);
+ }
+
+ @Override
+ public int allele(int hap) {
+ int sample = hap/2;
+ return (hap & 1) == 0 ? allele1(sample) : allele2(sample);
+ }
+
+ @Override
+ public float gl(int sample, int a1, int a2) {
+ if ( a1 < 0 || a1 >= marker.nAlleles()) {
+ String s = "invalid alleles: (" + a1 + "): " + marker;
+ throw new IllegalArgumentException(s);
+ }
+ if ( a2 < 0 || a2 >= marker.nAlleles()) {
+ String s = "invalid alleles: (" + a2 + "): " + marker;
+ throw new IllegalArgumentException(s);
+ }
+ if (isMissing1.get(sample) && isMissing2.get(sample)) {
+ return 1.0f;
+ }
+ else if (isMissing1.get(sample) ^ isMissing2.get(sample)) {
+ int obsA1 = allele1(sample);
+ int obsA2 = allele2(sample);
+ boolean consistent = (obsA1<0 || obsA1==a1) && (obsA2<0 || obsA2==a2);
+ if (isPhased.get(sample)==false && consistent==false) {
+ consistent = (obsA1<0 || obsA1==a2) && (obsA2<0 || obsA2==a1);
+ }
+ return consistent ? 1.0f : 0.0f;
+ }
+ else {
+ int obsA1 = allele(allele1, sample);
+ int obsA2 = allele(allele2, sample);
+ if (isPhased.get(sample)) {
+ return (obsA1==a1 && obsA2==a2) ? 1.0f : 0.0f;
+ }
+ else {
+ boolean isConsistent = (obsA1==a1 && obsA2==a2)
+ || (obsA1==a2 && obsA2==a1);
+ return isConsistent ? 1.0f : 0.0f;
+ }
+ }
+ }
+
+ private int allele(BitSet bits, int sample) {
+ int start = bitsPerAllele*sample;
+ int end = start + bitsPerAllele;
+ int allele = 0;
+ int mask = 1;
+ for (int j=start; j<end; ++j) {
+ if (bits.get(j)) {
+ allele += mask;
+ }
+ mask <<= 1;
+ }
+ return allele;
+ }
+
+ @Override
+ public int nAlleles() {
+ return this.marker().nAlleles();
+ }
+
+ @Override
+ public boolean storesNonMajorIndices() {
+ return false;
+ }
+
+ @Override
+ public int majorAllele() {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int alleleCount(int allele) {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int hapIndex(int allele, int copy) {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ /**
+ * Returns the data represented by {@code this} as a VCF
+ * record with a GT format field. The returned VCF record
+ * will have missing QUAL and INFO fields, will have "PASS"
+ * in the filter field, and will have a GT format field.
+ * @return the data represented by {@code this} as a VCF
+ * record with a GT format field
+ */
+ @Override
+ public String toString() {
+ return toVcfRec();
+ }
+}
diff --git a/vcf/BitSetRefGT.java b/vcf/BitSetRefGT.java
new file mode 100644
index 0000000..bea79cd
--- /dev/null
+++ b/vcf/BitSetRefGT.java
@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import java.util.BitSet;
+
+/**
+ * <p>Class {@code BitSetRefGT} represents genotype emission
+ * probabilities for a list reference samples with phased, non-missing
+ * genotypes at a single marker.
+ * The genotype emission probabilities are determined by the called
+ * genotypes for the reference samples.
+ * </p>
+ * <p>Class {@code BitSetRefGT} stores alleles using
+ * {@code java.util.BitSet} objects.
+ * </p>
+ * <p>Instances of class {@code BitSetRefGT} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BitSetRefGT implements VcfEmission {
+
+ private final int bitsPerAllele;
+ private final Marker marker;
+ private final Samples samples;
+ private final BitSet allele1;
+ private final BitSet allele2;
+
+ /**
+ * Creates a new {@code BitSetRefGT} instance from a VCF record
+ * storing phased, non-missing reference genotypes.
+ *
+ * @param vcfHeader meta-information lines and header line for the
+ * specified VCF record
+ * @param vcfRecord a VCF record corresponding to the specified
+ * {@code vcfHeader} object
+ *
+ * @throws IllegalArgumentException if a format error is detected
+ * in the VCF record or if any allele is missing or unphased
+ * @throws IllegalArgumentException if {@code rec.nSamples() == 0}
+ * @throws IllegalArgumentException if the header line
+ * or VCF record does not have a "GT" format field
+ * @throws NullPointerException if
+ * {@code vcfHeader == null || vcfRecord == null}
+ */
+ public BitSetRefGT(VcfHeader vcfHeader, String vcfRecord) {
+ this(new VcfRecGTParser(vcfHeader, vcfRecord));
+ }
+
+ /**
+ * Creates a new {@code VcfEmission} instance from a VCF record
+ * containing phased, non-missing genotypes for a list of reference samples.
+ * @param gtp a parser for the VCF record
+ * @throws IllegalArgumentException if a format error, a missing genotype,
+ * or an unphased genotype is detected in the VCF record
+ * @throws NullPointerException if {@code gtp==null}
+ */
+ public BitSetRefGT(VcfRecGTParser gtp) {
+ this.marker = gtp.marker();
+ this.samples = gtp.samples();
+ this.bitsPerAllele = bitsPerAllele(marker);
+ this.allele1 = new BitSet(gtp.nSamples()*bitsPerAllele);
+ this.allele2 = new BitSet(gtp.nSamples()*bitsPerAllele);
+ storeAlleles(gtp, bitsPerAllele, allele1, allele2);
+ }
+
+ private static int bitsPerAllele(Marker marker) {
+ int nAllelesM1 = marker.nAlleles() - 1;
+ int nStorageBits = Integer.SIZE - Integer.numberOfLeadingZeros(nAllelesM1);
+ return nStorageBits;
+ }
+
+ private static void storeAlleles(VcfRecGTParser gtp, int bitsPerAllele,
+ BitSet allele1, BitSet allele2) {
+ int nSamples = gtp.nSamples();
+ for (int sample=0; sample<nSamples; ++sample) {
+ int a1 = gtp.allele1();
+ int a2 = gtp.allele2();
+ if (gtp.isPhased()==false || a1 == -1 || a2 == -2) {
+ String s = "Unphased or missing reference genotype at marker: "
+ + gtp.marker();
+ throw new IllegalArgumentException(s);
+ }
+ storeAllele(allele1, sample, bitsPerAllele, a1);
+ storeAllele(allele2, sample, bitsPerAllele, a2);
+ if (sample + 1 < nSamples) {
+ gtp.nextSample();
+ }
+ }
+ }
+
+ private static void storeAllele(BitSet alleles, int sample,
+ int bitsPerAllele, int allele) {
+ int index = sample*bitsPerAllele;
+ int mask = 1;
+ for (int k=0; k<bitsPerAllele; ++k) {
+ if ((allele & mask)==mask) {
+ alleles.set(index);
+ }
+ ++index;
+ mask <<= 1;
+ }
+ }
+
+ @Override
+ public boolean isPhased(int sample) {
+ if (sample < 0 || sample >= this.nSamples()) {
+ throw new IllegalArgumentException(String.valueOf(sample));
+ }
+ return true;
+ }
+
+ /**
+ * Returns the samples. The returned samples are the filtered samples
+ * after all sample exclusions.
+ *
+ * @return the samples.
+ */
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*samples.nSamples();
+ }
+
+ @Override
+ public int nHapPairs() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Marker marker() {
+ return marker;
+ }
+
+ @Override
+ public boolean isRefData() {
+ return true;
+ }
+
+ @Override
+ public float gl(int sample, int allele1, int allele2) {
+ if (allele1 < 0 || allele1 >= marker.nAlleles()) {
+ throw new IndexOutOfBoundsException(String.valueOf(allele1));
+ }
+ if (allele2 < 0 || allele2 >= marker.nAlleles()) {
+ throw new IndexOutOfBoundsException(String.valueOf(allele2));
+ }
+ boolean matches = (allele1==allele1(sample) && allele2==allele2(sample));
+ return matches ? 1.0f : 0.0f;
+ }
+
+ @Override
+ public int allele1(int sample) {
+ return allele(allele1, bitsPerAllele, sample);
+ }
+
+ @Override
+ public int allele2(int sample) {
+ return allele(allele2, bitsPerAllele, sample);
+ }
+
+ @Override
+ public int allele(int hap) {
+ int sample = hap/2;
+ return (hap & 1) == 0 ? allele1(sample) : allele2(sample);
+ }
+
+ private int allele(BitSet bits, int bitsPerAllele, int sample) {
+ if (sample >= samples.nSamples()) {
+ throw new IndexOutOfBoundsException(String.valueOf(sample));
+ }
+ int start = bitsPerAllele*sample;
+ int end = start + bitsPerAllele;
+ int allele = 0;
+ int mask = 1;
+ for (int j=start; j<end; ++j) {
+ if (bits.get(j)) {
+ allele += mask;
+ }
+ mask <<= 1;
+ }
+ return allele;
+ }
+
+ @Override
+ public int nAlleles() {
+ return this.marker().nAlleles();
+ }
+
+ @Override
+ public boolean storesNonMajorIndices() {
+ return false;
+ }
+
+ @Override
+ public int majorAllele() {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int alleleCount(int allele) {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int hapIndex(int allele, int copy) {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ /**
+ * Returns the data represented by {@code this} as a VCF
+ * record with a GT format field. The returned VCF record
+ * will have missing QUAL and INFO fields, will have "PASS"
+ * in the filter field, and will have a GT format field.
+ * @return the data represented by {@code this} as a VCF
+ * record with a GT format field
+ */
+ @Override
+ public String toString() {
+ return toVcfRec();
+ }
+}
diff --git a/vcf/Bref.java b/vcf/Bref.java
new file mode 100644
index 0000000..c6d2fbd
--- /dev/null
+++ b/vcf/Bref.java
@@ -0,0 +1,550 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either VERSION 3 of the License, or
+ * (at your option) any later VERSION.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import blbutil.Const;
+import blbutil.FileIt;
+import blbutil.FileUtil;
+import blbutil.Filter;
+import blbutil.InputIt;
+import blbutil.IntArray;
+import blbutil.SampleFileIt;
+import blbutil.Utilities;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+import java.util.zip.GZIPOutputStream;
+
+/**
+ * <p>Class {@code Bref} has methods for reading and writing phased,
+ * non-missing genotypes that are stored in a "bref" binary VCF file.
+ * </p>
+ * <p>Instances of class {@code Bref} are not thread-safe.</p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class Bref {
+
+ private static final String program = "bref.__REV__.jar";
+ private static final int BUFFER_SIZE = 1<<22;
+ private static final int SHIFT = 128;
+ private static final int MAX_NSEQ = 255; // allow nSeq=0 as sentinal
+ private static final int STRING_BUFFER_SIZE = 300;
+
+ private static final String[] bases = new String[] {"A", "C", "G", "T"};
+ private static final Set<String> basesSet = basesSet();
+ private static final String[][] snvPerms = snvPerms();
+ private static final Comparator<String[]> allelesComp = allelesComparator();
+
+ /**
+ * The initial long in a bref file created with this bref version.
+ */
+ public static final int INITIAL_NUMBER = 223579146;
+
+ /**
+ * The end of file character for a bref file.
+ */
+ public static final int EOF = 0;
+
+ private final FileIt<String> it;
+ private final DataOutputStream os;
+ private final VcfHeader vcfHeader;
+ private final BlockingQueue<String[]> stringBuffers;
+ private final Function<String, VcfEmission> mapper;
+ private final List<VcfEmission> emBuffer;
+ private final VcfEmissionCompressor emCompressor;
+
+ /**
+ * The {@code main()} method is the entry point to the bref program.
+ * See the usage() method for usage instructions.
+ *
+ * @param args the command line arguments
+ */
+ public static void main(String[] args) {
+ if (args.length != 1) {
+ System.out.println(usage());
+ System.exit(0);
+ }
+ String fname = args[0];
+ if (fname.endsWith(".vcf") || fname.endsWith(".vcf.gz")) {
+ FileIt<String> it = inputIterator(fname);
+ File brefFile = brefFile(fname);
+ Bref bref = new Bref(it, brefFile);
+ }
+ else if (fname.endsWith(".bref")) {
+ File brefFile = new File(fname);
+ try (PrintWriter out = FileUtil.stdOutPrintWriter()) {
+ writeVcf(brefFile, out);
+ }
+ }
+ else {
+ System.out.println(usage());
+ System.out.println("Unrecognized filename extension");
+ System.exit(0);
+ }
+ }
+
+ private static FileIt<String> inputIterator(String fname) {
+ if (fname.endsWith(".vcf")) {
+ return InputIt.fromTextFile(new File(fname));
+ }
+ else if (fname.endsWith(".vcf.gz")) {
+ return InputIt.fromGzipFile(new File(fname));
+ }
+ else {
+ throw new IllegalArgumentException("invalid filename");
+ }
+ }
+
+ private static File brefFile(String fname) {
+ int x = fname.lastIndexOf(".vcf");
+ assert x>=0;
+ return new File(fname.substring(0, x) + ".bref");
+
+ }
+
+ /**
+ * Returns an array that is obtained by taking the first {@code length}
+ * elements of the specified permutation of "A", "C", "G", and "T".
+ * The list of 24 permutations of "A", "C", "G", and "T" are sorted
+ * in lexicographic order.
+ * @param permIndex an index of a permutation of the bases "A",
+ * "C", "G", and "T"
+ * @param length the number of elements in the returned array
+ * @return an array that is obtained by taking the first {@code length}
+ * elements of the specified permutation of "A", "C", "G", and "T"
+ * @throws IndexOutOfBoundsException if
+ * {@code permIndex < 0 || permIndex >= 24}
+ * @throws IndexOutOfBoundsException if {@code length < 0 || length >= 4}
+ */
+ public static String[] alleleString(int permIndex, int length) {
+ return Arrays.copyOf(snvPerms[permIndex], length);
+ }
+
+ /**
+ * Constructs a new {@code BinaryDagWriter} which can write a DAG
+ * to the specified file.
+ *
+ * @param it a file iterator that returns lines of a VCF file.
+ * @param brefFile filename for the binary reference files that
+ * will be read or written.
+ * @throws NullPointerException if {@code file==null}.
+ */
+ private Bref(FileIt<String> it, File brefFile) {
+ this.it = it;
+ this.os = dataOutputStream(brefFile);
+ this.vcfHeader = vcfHeader(it);
+ this.mapper = (String s) -> RefIt.toRef.apply(vcfHeader, s);
+ this.stringBuffers = new ArrayBlockingQueue<>(1);
+ this.emBuffer = new ArrayList<>(500);
+ this.emCompressor = new VcfEmissionCompressor(vcfHeader.samples(),
+ MAX_NSEQ);
+ try {
+ writeHeader(vcfHeader.sampleIds(), os);
+ startFileReadingThread();
+ writeCompressedRecords();
+ os.writeInt(EOF);
+ os.close();
+ } catch (IOException ex) {
+ Utilities.exit("Error writing file", ex);
+ }
+ }
+
+ private static DataOutputStream dataOutputStream(File file) {
+ OutputStream os = null;
+ try {
+ os = new FileOutputStream(file);
+ os = new GZIPOutputStream(os, BUFFER_SIZE);
+ } catch (FileNotFoundException ex) {
+ Utilities.exit("Error opening: " + file, ex);
+ } catch (IOException ex) {
+ Utilities.exit("IO error: " + file, ex);
+ }
+ return new DataOutputStream(os);
+ }
+
+ private static VcfHeader vcfHeader(FileIt<String> it) {
+ Filter<String> sampleFilter = Filter.acceptAllFilter();
+ return new VcfHeader(it, sampleFilter);
+ }
+
+ private static void writeHeader(String[] sampleIds, DataOutputStream os)
+ throws IOException {
+ os.writeInt(Bref.INITIAL_NUMBER);
+ os.writeUTF(Bref.program);
+ os.writeInt(sampleIds.length);
+ for (String id : sampleIds) {
+ os.writeUTF(id);
+ }
+ }
+
+ private void startFileReadingThread() {
+ Runnable runnable = () -> {
+ String line = readLine(it);
+ int bufferSize = STRING_BUFFER_SIZE;
+ while (line != null) {
+ String chromPlusTab = chromFieldPlusTab(line);
+ String[] sa = new String[bufferSize];
+ int size = 0;
+ while (line != null && size < bufferSize
+ && line.startsWith(chromPlusTab)) {
+ sa[size++] = line;
+ line = readLine(it);
+ }
+ if (size < bufferSize) {
+ sa = Arrays.copyOf(sa, size);
+ }
+ putInBlockingQueue(stringBuffers, sa);
+ }
+ putInBlockingQueue(stringBuffers, new String[0]); // sentinel
+ };
+ new Thread(runnable).start();
+ }
+
+ private static String readLine(FileIt<String> it) {
+ if (it.hasNext()==false) {
+ return null;
+ }
+ String line = it.next();
+ if (line.trim().isEmpty()) {
+ String s = "Blank line in VCF file: "
+ + (it.file()==null ? "stdin" : it.file());
+ throw new IllegalArgumentException(s);
+ }
+ return line;
+ }
+
+ private static String chromFieldPlusTab(String vcfRecord) {
+ int tabIndex = vcfRecord.indexOf(Const.tab);
+ if (tabIndex == -1) {
+ String s = "Missing tab delimiter: " + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ return vcfRecord.substring(0, tabIndex + 1);
+ }
+
+ private static <E> void putInBlockingQueue(BlockingQueue<E> q, E e) {
+ try {
+ q.put(e);
+ } catch (InterruptedException ex) {
+ Utilities.exit("Error: InterruptedException", ex);
+ }
+ }
+
+ private static <E> E takeFromBlockingQueue(BlockingQueue<E> q) {
+ try {
+ return q.take();
+ } catch (InterruptedException ex) {
+ Utilities.exit("Error: InterruptedException", ex);
+ }
+ assert false;
+ return null;
+ }
+
+ private void writeCompressedRecords() throws IOException {
+ int lastLength = -1;
+ while (lastLength != 0) {
+ String[] stringBuffer = takeFromBlockingQueue(stringBuffers);
+ lastLength = stringBuffer.length;
+ if (stringBuffer.length>0) {
+ List<VcfEmission> list = convertStringBuffer(stringBuffer);
+ for (VcfEmission em : list) {
+ if (em.storesNonMajorIndices()) {
+ emBuffer.add(em);
+ }
+ else if (em.marker().nAlleles() > MAX_NSEQ) {
+ emBuffer.add(new LowMafRefGT(em.marker(), em.samples(),
+ hapIndices(em)));
+ }
+ else {
+ boolean success = emCompressor.addToCompessedList(em);
+ if (success == false) {
+ writeAndClearVcfEmissions();
+ success = emCompressor.addToCompessedList(em);
+ assert success;
+ }
+ emBuffer.add(null);
+ }
+ }
+ }
+ }
+ writeAndClearVcfEmissions();
+ }
+
+ private List<VcfEmission> convertStringBuffer(String[] stringBuffer) {
+ return Arrays.stream(stringBuffer)
+ .parallel()
+ .map(mapper)
+ .collect(Collectors.toList());
+ }
+
+ private static int[][] hapIndices(VcfEmission em) {
+ int[] alCnts = alleleCounts(em);
+ int majorAllele = majorAllele(alCnts);
+ int[][] hapIndices = new int[alCnts.length][];
+ for (int j=0; j<hapIndices.length; ++j) {
+ hapIndices[j] = (j == majorAllele) ? null : new int[alCnts[j]];
+ }
+ int[] indices = new int[alCnts.length];
+ for (int h=0, n=em.nHaps(); h<n; ++h) {
+ int a = em.allele(h);
+ if (a != majorAllele) {
+ hapIndices[a][indices[a]++] = h;
+ }
+ }
+ return hapIndices;
+ }
+
+ private static int[] alleleCounts(VcfEmission em) {
+ int[] cnts = new int[em.marker().nAlleles()];
+ for (int h = 0, n = em.nHaps(); h < n; ++h) {
+ ++cnts[em.allele(h)];
+ }
+ return cnts;
+ }
+
+ private static int majorAllele(int[] alleleCnts) {
+ int major = 0;
+ for (int j=1; j<alleleCnts.length; ++j) {
+ if (alleleCnts[j] > alleleCnts[major]) {
+ major = j;
+ }
+ }
+ return major;
+ }
+
+ private void writeAndClearVcfEmissions() throws IOException {
+ if (emBuffer.isEmpty()== false) {
+ os.writeInt(emBuffer.size());
+ os.writeUTF(chrom(emBuffer, emCompressor));
+ os.writeByte(emCompressor.nSeq() - SHIFT);
+ IntArray hap2seq = emCompressor.hapToSeq();
+ for (int j=0, n=hap2seq.size(); j<n; ++j) {
+ os.writeByte(hap2seq.get(j) - SHIFT);
+ }
+ int index = 0;
+ for (VcfEmission ve : emBuffer) {
+ if (ve==null) {
+ writeCompressedRecord(emCompressor, index++, os);
+ }
+ else {
+ writeAlleleIndexRecord(ve, os);
+ }
+ }
+ emBuffer.clear();
+ emCompressor.clear();
+ }
+ }
+
+ private static String chrom(List<VcfEmission> emBuffer,
+ VcfEmissionCompressor emCompressor) {
+ if (emCompressor.size() > 0) {
+ return emCompressor.marker(0).chrom();
+ }
+ else if (emBuffer.size()>0) {
+ return emBuffer.get(0).marker().chrom();
+ }
+ else {
+ throw new IllegalArgumentException();
+ }
+ }
+
+ private static void writeCompressedRecord(VcfEmissionCompressor emCompressor,
+ int index, DataOutputStream os) throws IOException {
+ Marker marker = emCompressor.marker(index);
+ IntArray seq2Allele = emCompressor.seqToAllele(index);
+ writeMarker(marker, os);
+ byte codingFlag = 0;
+ os.writeByte(codingFlag);
+ if (marker.nAlleles() <= 256) {
+ for (int j=0, n=seq2Allele.size(); j<n; ++j) {
+ os.writeByte(seq2Allele.get(j) - SHIFT);
+ }
+ }
+ else {
+ for (int j=0, n=seq2Allele.size(); j<n; ++j) {
+ os.writeInt(seq2Allele.get(j));
+ }
+ }
+ }
+
+ private static void writeAlleleIndexRecord(VcfEmission ve, DataOutputStream os)
+ throws IOException {
+ assert ve.storesNonMajorIndices();
+ int nAlleles = ve.nAlleles();
+ int majorAllele = ve.majorAllele();
+ writeMarker(ve.marker(), os);
+ byte codingFlag = 1;
+ os.writeByte(codingFlag);
+ for (int a=0; a<nAlleles; ++a) {
+ if (a == majorAllele) {
+ os.writeInt(-1);
+ }
+ else {
+ os.writeInt(ve.alleleCount(a));
+ for (int c=0; c<ve.alleleCount(a); ++c) {
+ os.writeInt(ve.hapIndex(a, c));
+ }
+ }
+ }
+ }
+
+ private static void writeMarker(Marker marker, DataOutputStream os)
+ throws IOException {
+ os.writeInt(marker.pos());
+ int nIds = Math.min(marker.nIds(), 255);
+ os.writeByte(nIds - SHIFT);
+ for (int j=0; j<nIds; ++j) {
+ os.writeUTF(marker.id(j));
+ }
+ byte alleleCode = isSNV(marker) ? snvCode(marker.alleles()) : -1;
+ os.writeByte(alleleCode);
+ if (alleleCode == -1) {
+ os.writeInt(marker.nAlleles());
+ for (int j=0, n=marker.nAlleles(); j<n; ++j) {
+ os.writeUTF(marker.allele(j));
+ }
+ os.writeInt(marker.end());
+ }
+ }
+
+ private static byte snvCode(String[] alleles) {
+ int x = Arrays.binarySearch(snvPerms, alleles, allelesComp);
+ if (x < 0) {
+ x = (-x - 1);
+ }
+ int code = (x << 2) + (alleles.length - 1);
+ return (byte) code;
+ }
+
+ private static boolean isSNV(Marker marker) {
+ for (int j=0, n=marker.nAlleles(); j<n; ++j) {
+ if (basesSet.contains(marker.allele(j))==false) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private static void writeVcf(File bref, PrintWriter out) {
+ try (SampleFileIt<VcfEmission> brefIt = new BrefIt(bref)) {
+ if (brefIt.hasNext()) {
+ VcfEmission ve = brefIt.next();
+ VcfWriter.writeMetaLinesGT(ve.samples().ids(), program, out);
+ out.println(ve.toString());
+ }
+ while (brefIt.hasNext()) {
+ out.println(brefIt.next().toString());
+ }
+ }
+ }
+
+ private static Comparator<String[]> allelesComparator() {
+ return (String[] o1, String[] o2) -> {
+ int n = Math.min(o1.length, o2.length);
+ for (int k=0; k<n; ++k) {
+ char c1 = o1[k].charAt(0);
+ char c2 = o2[k].charAt(0);
+ if (c1 != c2) {
+ return (c1 < c2) ? -1 : 1;
+ }
+ }
+ if (o1.length != o2.length) {
+ return o1.length < o2.length ? -1 : 1;
+ }
+ else {
+ return 0;
+ }
+ };
+ }
+
+ private static String[][] snvPerms() {
+ List<String[]> perms = new ArrayList<>(24);
+ permute(new String[0], bases, perms);
+ return perms.toArray(new String[0][]);
+ }
+
+ private static Set<String> basesSet() {
+ Set<String> set = new HashSet<>(4);
+ set.addAll(Arrays.asList(bases));
+ return Collections.unmodifiableSet(set);
+ }
+
+ private static void permute(String[] start, String[] end, List<String[]> perms) {
+ if (end.length==0) {
+ perms.add(start);
+ }
+ else {
+ for (int j=0; j<end.length; ++j) {
+ String[] newStart = Arrays.copyOf(start, start.length + 1);
+ newStart[start.length] = end[j];
+
+ String[] newEnd = new String[end.length - 1];
+ if (j > 0) {
+ System.arraycopy(end, 0, newEnd, 0, j);
+ }
+ if (j < newEnd.length) {
+ System.arraycopy(end, j+1, newEnd, j, (newEnd.length - j));
+ }
+ permute(newStart, newEnd, perms);
+ }
+ }
+ }
+
+ private static String usage() {
+ StringBuilder sb = new StringBuilder(500);
+ sb.append("usage: java -jar ");
+ sb.append(program);
+ sb.append(" [vcf] (creates a .bref file)");
+ sb.append(Const.nl);
+ sb.append(" or");
+ sb.append(Const.nl);
+ sb.append("usage: java -jar ");
+ sb.append(program);
+ sb.append(" [bref] (prints a .vcf file to standard out)");
+ sb.append(Const.nl);
+ sb.append(Const.nl);
+ sb.append("where");
+ sb.append(Const.nl);
+ sb.append(" [vcf] = A vcf file with phased, non-missing genotype data. If the VCF");
+ sb.append(Const.nl);
+ sb.append(" file is a text file, its filename should end in \".vcf\". If the");
+ sb.append(Const.nl);
+ sb.append(" VCF file is GZIP-compressed, its filename should end in \".vcf.gz\"");
+ sb.append(Const.nl);
+ sb.append(" [bref] = A binary reference file. The filename should end in \".bref\"");
+ sb.append(Const.nl);
+ return sb.toString();
+ }
+}
diff --git a/vcf/BrefIt.java b/vcf/BrefIt.java
new file mode 100644
index 0000000..9027546
--- /dev/null
+++ b/vcf/BrefIt.java
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.ChromIds;
+import beagleutil.Samples;
+import blbutil.Const;
+import blbutil.Filter;
+import blbutil.IntArray;
+import blbutil.SampleFileIt;
+import blbutil.Utilities;
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayDeque;
+import java.util.Deque;
+import java.util.NoSuchElementException;
+import java.util.zip.GZIPInputStream;
+
+/**
+ * <p>Class {@code BrefIt} represents an iterator whose {@code next()}
+ * method returns an object storing data from a VCF record with phased,
+ * non-missing genotypes.
+ * </p>
+ * <p>Instances of class {@code BrefIt} are not thread-safe.
+ * </p>
+ * <p>Methods of this class will terminate the Java Virtual Machine with
+ * an error message if an I/O error or file format error is detected.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BrefIt implements SampleFileIt<VcfEmission> {
+
+ private static final int BUFFER_SIZE = 1<<22;
+ private static final int SHIFT = 128;
+ private static final String[] EMPTY_STRING_ARRAY = new String[0];
+ private static final String err = "Error reading file.";
+
+ private final File file;
+ private final Filter<Marker> markerFilter;
+ private final DataInputStream is;
+ private final long initNumber;
+ private final String version;
+ private final Samples samples;
+ private final int nHaps;
+
+ private final Deque<VcfEmission> emBuffer;
+
+ /**
+ * Constructs a new {@code BrefIt} instance.
+ * @param brefFile a bref file
+ *
+ * @throws IllegalArgumentException if a format error is detected in a
+ * line of the specified bref file
+ * @throws NullPointerException if {@code file == null}
+ */
+ public BrefIt(File brefFile) {
+ this(brefFile, Filter.acceptAllFilter());
+ }
+
+ /**
+ * Constructs a new {@code BrefIt} instance.
+ * @param brefFile a bref file
+ * @param markerFilter a marker filter or {@code null}
+ *
+ * @throws IllegalArgumentException if a format error is detected in a
+ * line of the specified bref file
+ * @throws NullPointerException if {@code file == null}
+ */
+ public BrefIt(File brefFile, Filter<Marker> markerFilter) {
+ if (markerFilter == null) {
+ markerFilter = Filter.acceptAllFilter();
+ }
+ this.file = brefFile;
+ this.markerFilter = markerFilter;
+ this.is = dataInputStream(brefFile);
+ this.initNumber = readInitialNumber(is);
+ this.version = readVersion(is);
+ this.samples = readSamples(is);
+ this.nHaps = 2*samples.nSamples();
+ this.emBuffer = new ArrayDeque<>(500);
+ fillBuffer();
+ }
+
+ private static DataInputStream dataInputStream(File file) {
+ InputStream is = null;
+ try {
+ is = new FileInputStream(file);
+ is = new GZIPInputStream(is, BUFFER_SIZE);
+ } catch (FileNotFoundException ex) {
+ Utilities.exit("File not found: " + file, ex);
+ }
+ catch (IOException ex) {
+ Utilities.exit("Error opening: " + file, ex);
+ }
+ return new DataInputStream(is);
+ }
+
+ private static long readInitialNumber(DataInputStream is) {
+ try {
+ long initialNumber = is.readInt();
+ if (initialNumber != Bref.INITIAL_NUMBER) {
+ String s = "ERROR: unrecognized input file. Was file created "
+ + Const.nl
+ + "with a different version of the bref program?";
+ Utilities.exit(s);
+ }
+ return initialNumber;
+ } catch (IOException ex) {
+ Utilities.exit(err, ex);
+ }
+ Utilities.exit(err);
+ return -1;
+ }
+
+ private static String readVersion(DataInputStream is) {
+ try {
+ return is.readUTF();
+ } catch (IOException ex) {
+ Utilities.exit(err, ex);
+ }
+ Utilities.exit(err);
+ return null;
+ }
+
+ private static Samples readSamples(DataInputStream is) {
+ try {
+ int length = is.readInt();
+ String[] ids = readStringArray(is, length);
+ return Samples.fromIds(ids);
+ } catch (IOException ex) {
+ Utilities.exit(err, ex);
+ }
+ Utilities.exit(err);
+ return null;
+ }
+
+ /**
+ * Returns {@code true} if the iteration has more elements, and returns
+ * {@code false} otherwise.
+ * @return {@code true} if the iteration has more elements
+ */
+ @Override
+ public boolean hasNext() {
+ return !emBuffer.isEmpty();
+ }
+
+ /**
+ * Returns the next element in the iteration.
+ * @return the next element in the iteration
+ * @throws NoSuchElementException if the iteration has no more elements
+ */
+ @Override
+ public VcfEmission next() {
+ if (hasNext()==false) {
+ throw new NoSuchElementException();
+ }
+ VcfEmission first = emBuffer.removeFirst();
+ if (emBuffer.isEmpty()) {
+ fillBuffer();
+ }
+ return first;
+ }
+
+ /**
+ * The {@code remove} method is not supported by this iterator.
+ * @throws UnsupportedOperationException if this method is invoked
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException(this.getClass().toString());
+ }
+
+ @Override
+ public void close() {
+ try {
+ is.close();
+ } catch (IOException ex) {
+ Utilities.exit("Error closing file", ex);
+ }
+ emBuffer.clear();
+ }
+
+ @Override
+ public File file() {
+ return file;
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append(this.getClass().toString());
+ sb.append(" : ");
+ sb.append(file);
+ return sb.toString();
+ }
+
+ private void fillBuffer() {
+ assert emBuffer.isEmpty();
+ try {
+ int nRecords = is.readInt();
+ if (nRecords>0) {
+ String chrom = is.readUTF();
+ int chromIndex = ChromIds.instance().getIndex(chrom);
+ int nSeq = is.readByte() + SHIFT;
+ IntArray hapToSeq = readHapToSeq(nSeq);
+ for (int j=0; j<nRecords; ++j) {
+ Marker marker = readMarker(chromIndex);
+ byte flag = is.readByte();
+ switch (flag) {
+ case 0:
+ VcfEmission em = readSeqCodedRecord(marker,
+ samples, hapToSeq, nSeq);
+ if (markerFilter.accept(marker)) {
+ emBuffer.add(em);
+ }
+ break;
+ case 1:
+ em = readLowMafRecord(marker, samples);
+ if (markerFilter.accept(marker)) {
+ emBuffer.add(em);
+ }
+ break;
+ default:
+ Utilities.exit("Error reading file.");
+ }
+ }
+ }
+ } catch (IOException ex) {
+ Utilities.exit("Error reading file", ex);
+ }
+ }
+
+ private VcfEmission readSeqCodedRecord(Marker marker, Samples samples,
+ IntArray hapToSeq, int nSeq) throws IOException {
+ IntArray seqToAllele = readSeqToAllele(nSeq, marker.nAlleles());
+ return new SeqCodedRefGT(marker, samples, hapToSeq, seqToAllele);
+ }
+
+ private IntArray readHapToSeq(int nSeq) throws IOException {
+ int[] hap2seq = new int[nHaps];
+ for (int j=0; j<hap2seq.length; ++j) {
+ hap2seq[j] = is.readByte() + SHIFT;
+ if (hap2seq[j] >= nSeq) {
+ throw new IllegalStateException("inconsistent data");
+ }
+ }
+ return IntArray.create(hap2seq, 0, (nSeq-1));
+ }
+
+ private IntArray readSeqToAllele(int nSeq, int nAlleles) throws IOException {
+ int[] seqToAllele = new int[nSeq];
+ for (int j=0; j<seqToAllele.length; ++j) {
+ seqToAllele[j] = is.readByte() + SHIFT;
+ if (seqToAllele[j] >= nAlleles) {
+ throw new IllegalStateException("inconsistent data");
+ }
+ }
+ return IntArray.create(seqToAllele, 0, (nAlleles-1));
+ }
+
+ private VcfEmission readLowMafRecord(Marker marker, Samples samples)
+ throws IOException {
+ int nAlleles = marker.nAlleles();
+ int[][] hapIndices = new int[nAlleles][];
+ for (int j=0; j<nAlleles; ++j) {
+ int length = is.readInt();
+ hapIndices[j] = (length == -1) ? null : readIntArray(is, length);
+ }
+ if (nAlleles==2) {
+ int x = (hapIndices[0]==null) ? 1 : 0;
+ return new LowMafRefDiallelicGT(marker, samples, x, hapIndices[x]);
+ }
+ else {
+ return new LowMafRefGT(marker, samples, hapIndices);
+ }
+ }
+
+ private Marker readMarker(int chromIndex) throws IOException {
+ int end = -1;
+ int pos = is.readInt();
+ int length = is.readByte() + SHIFT;
+ String[] ids = readStringArray(is, length);
+ String[] strAlleles;
+ byte alleleCode = is.readByte();
+ if (alleleCode == -1) {
+ length = is.readInt();
+ strAlleles = readStringArray(is, length);
+ end = is.readInt();
+ }
+ else {
+ int nAlleles = 1 + (alleleCode & 0b11);
+ int permIndex = alleleCode >> 2;
+ strAlleles = Bref.alleleString(permIndex, nAlleles);
+ }
+ return new BasicMarker(chromIndex, pos, ids, strAlleles, end);
+ }
+
+ private static int[] readIntArray(DataInputStream is, int length)
+ throws IOException {
+ int[] ia = new int[length];
+ for (int j=0; j<ia.length; ++j) {
+ ia[j] = is.readInt();
+ }
+ return ia;
+ }
+
+ /* Returns null if length is negative */
+ private static String[] readStringArray(DataInputStream is, int length)
+ throws IOException {
+ if (length < 0) {
+ return null;
+ }
+ else if (length==0) {
+ return EMPTY_STRING_ARRAY;
+ }
+ else {
+ String[] sa = new String[length];
+ for (int j=0; j<sa.length; ++j) {
+ sa[j] = is.readUTF();
+ }
+ return sa;
+ }
+ }
+}
diff --git a/vcf/ByteArrayRefGT.java b/vcf/ByteArrayRefGT.java
new file mode 100644
index 0000000..d099fec
--- /dev/null
+++ b/vcf/ByteArrayRefGT.java
@@ -0,0 +1,222 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+
+/**
+ * <p>Class {@code ByteArrayRefGT} represents phased, non-missing
+ * genotypes for a list of reference samples at a single marker.
+ * Genotype emission probabilities are determined by the sample
+ * genotypes.
+ * </p>
+ * <p>Instances of class {@code ByteArrayRefGT} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class ByteArrayRefGT implements VcfEmission {
+
+ private final Marker marker;
+ private final Samples samples;
+ private final byte[] allele1;
+ private final byte[] allele2;
+
+ /**
+ * Creates a new {@code ByteArrayRefGT} instance from the
+ * specified {@code VcfHeader} and string VCF record.
+ * The VCF record must contain a GT FORMAT field, and
+ * all genotypes must have phased, non-missing genotypes.
+ *
+ * @param vcfHeader meta-information lines and header line for the
+ * specified VCF record
+ * @param vcfRecord a VCF record corresponding to the specified
+ * {@code vcfHeader} object
+ *
+ * @throws IllegalArgumentException if a format error is detected
+ * in the VCF record
+ * @throws IllegalArgumentException if any VCF record genotype
+ * is unphased or missing
+ * @throws NullPointerException if
+ * {@code vcfHeader == null || vcfRecord == null}
+ */
+ public ByteArrayRefGT(VcfHeader vcfHeader, String vcfRecord) {
+ this(new VcfRecGTParser(vcfHeader, vcfRecord));
+ }
+
+ /**
+ * Creates a new {@code VcfEmission} instance from a VCF record
+ * containing phased, non-missing genotypes for a list of reference samples.
+ * The VCF record must contain a GT FORMAT field, and
+ * all genotypes must have phased, non-missing genotypes.
+ *
+ * @param gtp a parser for the VCF record
+ * @throws IllegalArgumentException if a format error is detected
+ * in the VCF record
+ * @throws IllegalArgumentException if any VCF record genotype
+ * is unphased or missing
+ * @throws NullPointerException if {@code gtp == null}
+ */
+ public ByteArrayRefGT(VcfRecGTParser gtp) {
+ if (gtp.marker().nAlleles() > Byte.MAX_VALUE) {
+ throw new IllegalArgumentException(gtp.marker().toString());
+ }
+ if (gtp.currentSample() > 0) {
+ throw new IllegalArgumentException(
+ String.valueOf(gtp.currentSample()));
+ }
+ this.marker = gtp.marker();
+ this.samples = gtp.samples();
+ this.allele1 = new byte[gtp.nSamples()];
+ this.allele2 = new byte[gtp.nSamples()];
+ storeAlleles(gtp, allele1, allele2);
+ }
+
+ private static void storeAlleles(VcfRecGTParser gtp, byte[] allele1,
+ byte[] allele2) {
+ int nSamples = gtp.nSamples();
+ for (int sample=0; sample<nSamples; ++sample) {
+ byte a1 = (byte) gtp.allele1();
+ byte a2 = (byte) gtp.allele2();
+ if (gtp.isPhased()==false || a1 == -1 || a2 == -2) {
+ String s = "Unphased or missing reference genotype at marker: "
+ + gtp.marker();
+ throw new IllegalArgumentException(s);
+ }
+ allele1[sample] = a1;
+ allele2[sample] = a2;
+ if (sample + 1 < nSamples) {
+ gtp.nextSample();
+ }
+ }
+ }
+
+ @Override
+ public boolean isPhased(int sample) {
+ if (sample < 0 || sample >= this.nSamples()) {
+ throw new IllegalArgumentException(String.valueOf(sample));
+ }
+ return true;
+ }
+
+ /**
+ * Returns the samples. The returned samples are the filtered samples
+ * after all sample exclusions.
+ *
+ * @return the samples.
+ */
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*samples.nSamples();
+ }
+
+ @Override
+ public int nHapPairs() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Marker marker() {
+ return marker;
+ }
+
+ @Override
+ public boolean isRefData() {
+ return true;
+ }
+
+ @Override
+ public float gl(int sample, int allele1, int allele2) {
+ if (allele1 < 0 || allele1 >= marker.nAlleles()) {
+ throw new IndexOutOfBoundsException(String.valueOf(allele1));
+ }
+ if (allele2 < 0 || allele2 >= marker.nAlleles()) {
+ throw new IndexOutOfBoundsException(String.valueOf(allele2));
+ }
+ boolean matches = (allele1==allele1(sample) && allele2==allele2(sample));
+ return matches ? 1.0f : 0.0f;
+ }
+
+ @Override
+ public int allele1(int sample) {
+ return allele1[sample];
+ }
+
+ @Override
+ public int allele2(int sample) {
+ return allele2[sample];
+ }
+
+ @Override
+ public int allele(int hap) {
+ int sample = hap/2;
+ return (hap & 1) == 0 ? allele1(sample) : allele2(sample);
+ }
+
+ @Override
+ public int nAlleles() {
+ return this.marker().nAlleles();
+ }
+
+ @Override
+ public boolean storesNonMajorIndices() {
+ return false;
+ }
+
+ @Override
+ public int majorAllele() {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int alleleCount(int allele) {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int hapIndex(int allele, int copy) {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ /**
+ * Returns the data represented by {@code this} as a VCF
+ * record with a GT format field. The returned VCF record
+ * will have missing QUAL and INFO fields, will have "PASS"
+ * in the filter field, and will have a GT format field.
+ * @return the data represented by {@code this} as a VCF
+ * record with a GT format field
+ */
+ @Override
+ public String toString() {
+ return toVcfRec();
+ }
+}
diff --git a/vcf/Data.java b/vcf/Data.java
new file mode 100644
index 0000000..566103e
--- /dev/null
+++ b/vcf/Data.java
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import haplotype.HapPair;
+import haplotype.SampleHapPairs;
+import java.io.Closeable;
+import java.util.List;
+
+/**
+ * Interface {@code Data} represents a sliding window of target VCF records
+ * or a sliding window of reference and target VCF records.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface Data extends Closeable {
+
+ /**
+ * Returns {@code true} if the current window of VCF records is the last
+ * window for the chromosome and returns {@code false} otherwise.
+ * @return {@code true} if the current window of VCF records is the last
+ *window for the chromosome
+ */
+ public boolean lastWindowOnChrom();
+
+ /**
+ * Returns {@code true} if the sliding window of VCF records can advance
+ * and returns {@code false} otherwise.
+ * @return {@code true} if the sliding window of VCF records can advance
+ */
+ boolean canAdvanceWindow();
+
+ /**
+ * Advances the sliding window of VCF records, and returns the advanced
+ * window. The size of the advanced window and the number of markers
+ * of overlap between the marker window immediately before method
+ * invocation and the marker window immediately after method invocation
+ * may differ from the requested values. If the advanced window size or
+ * overlap is less than the requested value, the actual value will be
+ * as large as possible. If {@code this.lastWindowOnChrom() == true}
+ * before method invocation, then there will be no overlap between the
+ * windows.
+ *
+ * @param overlap the requested number of markers of overlap
+ * @param windowSize the requested number of the markers in the window
+ * immediately after the method returns
+ *
+ * @throws IllegalArgumentException if a format error in the input data
+ * is detected
+ * @throws IllegalArgumentException if
+ * {@code overlap < 0 || overlap >= windowSize}
+ * @throws IllegalStateException if
+ * {@code this.canAdvanceWindow() == false}
+ */
+ void advanceWindow(int overlap, int windowSize);
+
+ /**
+ * Returns the current window index. The window index
+ * is the number of previous invocations of the {@code advanceWindow()}
+ * method.
+ * @return the window index
+ */
+ public int window();
+
+ /**
+ * Returns the number of target data markers in the overlap between
+ * the current marker window and the previous marker window.
+ * Returns 0 if the current marker window is the first marker window.
+ *
+ * @return the number of target data markers in the overlap between
+ * the current marker window and the previous marker window
+ */
+ int targetOverlap();
+
+ /**
+ * Returns the number of VCF records in the overlap between the current
+ * window and the previous window. Returns 0 if the current window
+ * is the first window.
+ *
+ * @return the number of VCF records in the overlap between the current
+ * window and the previous window
+ */
+ public int overlap();
+
+ /**
+ * Returns the number of target data markers in the current window.
+ * @return the number of target data markers in the current window
+ */
+ int nTargetMarkers();
+
+ /**
+ * Returns the number of target VCF records in the union of the
+ * current window and all previous windows.
+ * @return the number of target VCF records in the union of the
+ * current window and all previous windows
+ */
+ int nTargetMarkersSoFar();
+
+ /**
+ * Returns the list of target data markers in the current window.
+ * @return the list of target data markers in the current window
+ */
+ Markers targetMarkers();
+
+ /**
+ * Returns the number of markers in the current window.
+ * @return the number of markers in the current window
+ */
+ int nMarkers();
+
+ /**
+ * Returns the number of markers in the union of the current window
+ * and all previous windows.
+ * @return the number of markers in the union of the current window
+ * and all previous windows
+ */
+ int nMarkersSoFar();
+
+ /**
+ * Returns the list of markers in the current window.
+ * @return the list of markers in the current window
+ */
+ Markers markers();
+
+ /**
+ * Returns the target data marker index corresponding to the specified
+ * marker, or returns -1 if no corresponding target data marker exists.
+ * Indices are with respect to the current window.
+ * @param marker a marker index
+ * @return the target data marker index corresponding to the specified
+ * marker, or returns -1 if no corresponding target data marker exists
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ int targetMarkerIndex(int marker);
+
+ /**
+ * Returns the marker index corresponding to the
+ * specified target data marker. Indices are with
+ * respect to the current window.
+ * @param targetMarker a target data marker index
+ * @return the marker index corresponding to the specified
+ * target data marker
+ * @throws IndexOutOfBoundsException if
+ * {@code targetMarker < 0 || targetMarker >= this.nTargetMarkers()}
+ */
+ int markerIndex(int targetMarker);
+
+ /**
+ * Returns the number of target samples.
+ * @return the number of target samples
+ */
+ int nTargetSamples();
+
+ /**
+ * Returns the list of target samples.
+ * @return the list of target samples
+ */
+ Samples targetSamples();
+
+ /**
+ * Returns the number of reference samples.
+ * @return the number of reference samples
+ */
+ int nRefSamples();
+
+ /**
+ * Returns the list of reference samples, or {@code null} if
+ * there are no reference samples.
+ * @return the list of reference samples, or {@code null} if
+ * there are no reference samples
+ */
+ Samples refSamples();
+
+ /**
+ * Returns the total number of reference and target samples.
+ * @return the total number of reference and target samples
+ */
+ int nAllSamples();
+
+ /**
+ * Returns a list of all target and reference samples.
+ * Target samples are listed first in the same order as the list returned
+ * by {@code this.targetSamples()}. Reference samples are listed last
+ * in the same order as the list returned by {@code this.refSamples()}.
+ * @return a list of all target and reference samples
+ */
+ Samples allSamples();
+
+ /**
+ * Returns the genotype likelihoods for the target samples
+ * restricted to the target data markers in the current window.
+ * The returned {@code GL} instance will contain no markers if
+ * {@code this.advanceWindow()} has not yet been invoked.
+ * @return the genotype likelihoods for the target samples
+ * restricted to the target data markers in the current window
+ */
+ GL targetGL();
+
+ /**
+ * Returns a list of reference haplotype pairs that are restricted
+ * to the target data markers in the current window.
+ * The returned list will be empty if there are no reference samples
+ * or if {@code this.advanceWindow()} has not yet been invoked.
+ * @return a list of reference haplotype pairs that are restricted
+ * to the target data markers
+ */
+ List<HapPair> restrictedRefHapPairs();
+
+ /**
+ * Returns a list of the reference haplotype pairs for the current
+ * window. The returned list will be empty if there are no reference
+ * samples or if {@code this.advanceWindow()} has not yet been invoked.
+ * @return a list of the reference haplotype pairs
+ */
+ List<HapPair> refHapPairs();
+
+ /**
+ * Returns the reference haplotype pairs for the current
+ * window. Returns {@code null} if there are no reference samples or if
+ * {@code this.advanceWindow()} has not yet been invoked.
+ * @return the reference haplotype pairs or {@code null} if there
+ * are no reference haplotype pairs
+ */
+ SampleHapPairs refSampleHapPairs();
+
+ /**
+ * Releases any I/O resources controlled by this object.
+ */
+ @Override
+ void close();
+}
diff --git a/vcf/FilterUtil.java b/vcf/FilterUtil.java
new file mode 100644
index 0000000..9961645
--- /dev/null
+++ b/vcf/FilterUtil.java
@@ -0,0 +1,139 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import blbutil.Filter;
+import blbutil.Utilities;
+import java.io.File;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * Class {@code FilterUtil} contains static methods for constructing
+ * marker filters.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class FilterUtil {
+
+ private FilterUtil() {
+ // private constructor to prevent instantiation
+ }
+
+ /**
+ * Returns a filter that excludes markers that have an identifier
+ * or genome coordinates that matches a line of the specified file,
+ * or returns {@code null} if the {@code excludeMarkersFile} parameter is
+ * {@code null}. Genome coordinates must be in "CHROM:POS" format.
+ * @param excludeMarkersFile a file that contains an identifier
+ * or genome coordinate of one excluded marker on each line
+ * @return a filter that excludes markers that have an identifier
+ * or genome coordinates that matches a line of the specified file,
+ * or {@code null} if the {@code excludeMarkersFile} parameter is
+ * {@code null}
+ *
+ * @throws IllegalArgumentException if the specified file does not exist
+ * @throws IllegalArgumentException if the specified file is a directory
+ * @throws IllegalArgumentException if any line of the specified
+ * file contains two non-white-space characters separated by one or
+ * more white-space characters
+ */
+ public static Filter<Marker> markerFilter(File excludeMarkersFile) {
+ Set<String> excludeIds;
+ if (excludeMarkersFile==null) {
+ return Filter.acceptAllFilter();
+ }
+ else {
+ excludeIds = Utilities.idSet(excludeMarkersFile);
+ return idFilter(excludeIds);
+ }
+ }
+
+ /**
+ * Returns a filter that excludes samples that have an identifier
+ * that matches a line of the specified file, or returns {@code null} if
+ * the {@code excludeSamplesFile} parameter is {@code null}
+ * @param excludeSamplesFile a file which contains an identifier
+ * of one excluded sample on each line
+ * @return a filter that excludes samples that have an identifier
+ * that matches a line of the specified file, or {@code null} if
+ * the {@code excludeSamplesFile} parameter is {@code null}
+ *
+ * @throws IllegalArgumentException if the specified file does not exist
+ * @throws IllegalArgumentException if the specified file is a directory
+ * @throws IllegalArgumentException if any line of the specified
+ * file contains two non-white-space characters separated by one or
+ * more white-space characters
+ */
+ public static Filter<String> sampleFilter(File excludeSamplesFile) {
+ Filter<String> sampleFilter = null;
+ if (excludeSamplesFile!=null) {
+ Set<String> exclude = Utilities.idSet(excludeSamplesFile);
+ sampleFilter = Filter.excludeFilter(exclude);
+ }
+ return sampleFilter;
+ }
+
+ /**
+ * Returns {@code true} if the specified marker has an identifier
+ * is in the specified set, or if ("marker.chrom()" + ":" + "marker.pos()")
+ * is in the specified set, and returns {@code false} otherwise.
+ * @param marker a marker
+ * @param set a set of marker identifiers and chromosome positions in
+ * "CHROM:POS" format
+ * @return {@code true} if the specified marker has an identifier
+ * is in the specified set or if ("marker.chrom()" + ":" + "marker.pos()")
+ * is in the specified set
+ * @throws NullPointerException if {@code marker == null || set == null}
+ */
+ public static boolean markerIsInSet(Marker marker, Set<String> set) {
+ for (int j=0, n=marker.nIds(); j<n; ++j) {
+ if (set.contains(marker.id(j))) {
+ return true;
+ }
+ }
+ String posId = marker.chrom() + ':' + marker.pos();
+ return set.contains(posId);
+ }
+
+ /**
+ * Returns a filter that accepts all markers which do not have an
+ * identifier or chromomsome position present in the specified
+ * collection.
+ * A marker is excluded if {@code exclude.contains(marker.id(j)) == true}
+ * for any {@code 0 <= j < marker.nIds()} or if
+ * {@code exclude.contains(marker.chrom() + ":" + marker.pos()) == true}.
+ * @param exclude a collection of marker identifiers and chromosome
+ * positions in "CHROM:POS" format
+ * @return a filter that accepts all markers which do not have an
+ * identifier or chromomsome position present in the specified
+ * collection
+ * @throws NullPointerException if {@code exclude == null}
+ */
+ public static Filter<Marker> idFilter(Collection<String> exclude) {
+ final Set<String> excludeSet = new HashSet<>(exclude);
+ if (excludeSet.isEmpty()) {
+ return Marker -> true;
+ }
+ else {
+ return (Marker marker) -> !markerIsInSet(marker, excludeSet);
+ }
+ }
+}
diff --git a/vcf/FuzzyGL.java b/vcf/FuzzyGL.java
new file mode 100644
index 0000000..bd2bd45
--- /dev/null
+++ b/vcf/FuzzyGL.java
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2013 Brian L. Browning
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package vcf;
+
+import beagleutil.Samples;
+
+/**
+ * <p>Class {@code FuzzyGL} is a wrapper for a {@code GL}
+ * instance that incorporates a fixed error rate for the
+ * observed (emitted) allele to differ from the true allele. Allele
+ * errors are independent.
+ * </p>
+ * Instances of class {@code FuzzyGL} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class FuzzyGL implements GL {
+
+ private final float ee;
+ private final float ef;
+ private final float ff;
+
+ private final GL gl;
+
+ /**
+ * Constructs a {@code FuzzyGL} instance.
+ * @param gl the genotype likelihoods without error
+ * @param err the allele error rate
+ * @throws IllegalArgumentException if
+ * {@code Float.isNaN(err) || err < 0 || err >= 1.0}
+ * @throws NullPointerException if {@code gl == null}
+ */
+ public FuzzyGL(GL gl, float err) {
+ if (gl==null) {
+ throw new NullPointerException("gl==null");
+ }
+ if (Double.isNaN(err) || err < 0.0 || err >= 1.0) {
+ throw new IllegalArgumentException("err: " + err);
+ }
+ float e = err;
+ float f = 1.0f - err;
+ this.ee = e*e;
+ this.ef = e*f;
+ this.ff = f*f;
+ this.gl = gl;
+ }
+
+ @Override
+ public float gl(int marker, int sample, int a1, int a2) {
+ // following algorithm is for both diallelic and multi-allelic markers
+ int obs1 = gl.allele1(marker, sample);
+ int obs2 = gl.allele2(marker, sample);
+ if (obs1>=0 && obs2>=0) {
+ if (obs1==obs2 || gl.isPhased(marker, sample)) {
+ return phasedGL(obs1, obs2, a1, a2);
+ }
+ else {
+ return phasedGL(obs1, obs2, a1, a2)
+ + phasedGL(obs2, obs1, a1, a2);
+ }
+ }
+ else {
+ return gl.gl(marker, sample, a1, a2);
+ }
+ }
+
+ private float phasedGL(int obs1, int obs2, int a1, int a2) {
+ if (obs1==a1) {
+ return obs2==a2 ? ff : ef;
+ }
+ else {
+ return obs2==a2 ? ef : ee;
+ }
+ }
+
+ @Override
+ public boolean isRefData() {
+ return gl.isRefData();
+ }
+
+ @Override
+ public boolean isPhased(int marker, int sample) {
+ return gl.isPhased(marker, sample);
+ }
+
+ @Override
+ public int allele1(int marker, int sample) {
+ return gl.allele1(marker, sample);
+ }
+
+ @Override
+ public int allele2(int marker, int sample) {
+ return gl.allele2(marker, sample);
+ }
+
+ @Override
+ public int allele(int marker, int hap) {
+ return gl.allele(marker, hap);
+ }
+
+ @Override
+ public int nMarkers() {
+ return gl.nMarkers();
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return gl.marker(marker);
+ }
+
+ @Override
+ public Markers markers() {
+ return gl.markers();
+ }
+
+ @Override
+ public int nHaps() {
+ return gl.nHaps();
+ }
+
+ @Override
+ public int nSamples() {
+ return gl.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return gl.samples();
+ }
+}
diff --git a/vcf/GL.java b/vcf/GL.java
new file mode 100644
index 0000000..d586ca5
--- /dev/null
+++ b/vcf/GL.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+
+/**
+ * <p>Interface {@code GL} (Genotype Likelihoods) represents genotype
+ * likelihoods for a set of samples.
+ * </p>
+ * <p>Instances of {@code GL} are required to be immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface GL {
+
+ /**
+ * Returns {@code true} if the observed data for each marker and sample
+ * includes a phased genotype that has no missing alleles,
+ * and returns {@code false} otherwise.
+ * @return {@code true} if the observed data for each marker and sample
+ * includes a phased genotype that has no missing alleles,
+ * and {@code false} otherwise
+ */
+ boolean isRefData();
+
+ /**
+ * Returns the probability of the observed data for the specified marker
+ * and sample if the specified pair of ordered alleles is the true
+ * ordered genotype.
+ * @param marker the marker index
+ * @param sample the sample index
+ * @param allele1 the first allele index
+ * @param allele2 the second allele index
+ * @return the probability of the observed data for the specified marker
+ * and sample if the specified pair of ordered alleles is the true
+ * ordered genotype
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code samples < 0 || samples >= this.nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele1 < 0 || allele1 >= this.marker(marker).nAlleles()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele2 < 0 || allele2 >= this.marker(marker).nAlleles()}
+ */
+ float gl(int marker, int sample, int allele1, int allele2);
+
+ /**
+ * Returns {@code true} if the observed data for the specified
+ * marker and sample includes a phased genotype, and returns {@code false}
+ * otherwise.
+ * @param marker the marker index
+ * @param sample the sample index
+ * @return {@code true} if the observed data for the specified
+ * marker and sample includes a phased genotype, and {@code false}
+ * otherwise
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || samples >= this.nSamples()}
+ */
+ boolean isPhased(int marker, int sample);
+
+ /**
+ * Returns the first allele for the specified marker and sample
+ * if the observed data include a non-missing allele, and returns -1
+ * otherwise. Alleles are arbitrarily ordered if the genotype is unphased.
+ * @param marker the marker index
+ * @param sample the sample index
+ * @return the first allele for the specified marker and sample
+ * if the observed data include a non-missing allele, and
+ * -1 otherwise
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || samples >= this.nSamples()}
+ */
+ int allele1(int marker, int sample);
+
+ /**
+ * Returns the second allele for the specified marker and sample
+ * if the observed data include a non-missing allele, and
+ * returns -1 otherwise.
+ * Alleles are arbitrarily ordered if the genotype is unphased.
+ * @param marker the marker index
+ * @param sample the sample index
+ * @return the second allele for the specified marker and sample
+ * if the observed data include a non-missing allele, and
+ * -1 otherwise
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || samples >= this.nSamples()}
+ */
+ int allele2(int marker, int sample);
+
+ /**
+ * Returns the allele on the specified haplotype for the specified marker
+ * if the observed data include a non-missing allele, and returns
+ * -1 otherwise. Alleles are arbitrarily ordered if the genotype is
+ * unphased.
+ * @param marker the marker index
+ * @param hap the haplotype index
+ * @return the allele on the specified haplotype for the specified marker
+ * if the observed data include a non-missing allele, and -1 otherwise
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= this.nHaps()}
+ */
+ int allele(int marker, int hap);
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ int nMarkers();
+
+ /**
+ * Returns the specified marker.
+ * @param marker the marker index
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ Marker marker(int marker);
+
+ /**
+ * Returns the list of markers.
+ * @return the list of markers
+ */
+ Markers markers();
+
+ /**
+ * Returns the number of haplotypes.
+ * @return the number of haplotypes
+ */
+ public int nHaps();
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ int nSamples();
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ Samples samples();
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ String toString();
+}
diff --git a/vcf/GprobsStatistics.java b/vcf/GprobsStatistics.java
new file mode 100644
index 0000000..d03a057
--- /dev/null
+++ b/vcf/GprobsStatistics.java
@@ -0,0 +1,313 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import blbutil.Const;
+import java.text.DecimalFormat;
+import java.util.Arrays;
+import main.AlleleProbs;
+import main.GenotypeValues;
+
+/**
+ * <p>Class {@code GprobsStatistics} has methods for computing statistics
+ * from posterior genotype probabilities.
+ * </p>
+ *
+ * <p>The squared correlation statistics computed by this class can be derived
+ * using the methods found in Appendix 1 of
+ * "Browning BL and Browning SR, Am J Hum Genet 2009;84(2):210-23".
+ * If a variant has multiple ALT alleles, all ALT alleles are collapsed
+ * into a single allele when computing the squared correlation statistics.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class GprobsStatistics {
+
+ private final Marker marker;
+ private final int nSamples;
+ private final float[] alleleFreq;
+
+ private float sumCall = 0;
+ private float sumSquareCall = 0;
+ private float sumExpected = 0;
+ private float sumExpectedSquare = 0;
+ private float sumSquareExpected= 0;
+ private float sumCallExpected = 0;
+
+ /**
+ * Constructs a new {@code GprobsStatistics} instance from the
+ * specified scaled genotype probabilities.
+ * @param gv scaled sample posterior genotype probabilities
+ * @param marker a marker index
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= gv.nMarkers()}
+ * @throws NullPointerException if {@code gv == null}
+ */
+ public GprobsStatistics(GenotypeValues gv, int marker) {
+ int nAlleles = gv.marker(marker).nAlleles();
+ this.marker = gv.marker(marker);
+ this.nSamples = gv.nSamples();
+ this.alleleFreq = new float[nAlleles];
+ float[] alProbs = new float[nAlleles];
+ float[] gtProbs = new float[3];
+ for (int j=0; j<this.nSamples; ++j) {
+ setProbs(gv, marker, j, gtProbs, alProbs);
+ for (int a=0; a<nAlleles; ++a) {
+ alleleFreq[a] += alProbs[a];
+ }
+ int call = maxIndex(gtProbs);
+ float exp = (gtProbs[1] + 2*gtProbs[2]);
+ float expSquare = (gtProbs[1] + 4*gtProbs[2]);
+ sumCall += call;
+ sumSquareCall += call*call;
+ sumExpected += exp;
+ sumExpectedSquare += expSquare;
+ sumSquareExpected += (exp*exp);
+ sumCallExpected += (call*exp);
+ }
+ float sum = sum(alleleFreq);
+ divideBy(alleleFreq, sum);
+ }
+
+ private static void setProbs(GenotypeValues gv, int marker, int sample,
+ float[] gtProbs, float[] alProbs) {
+ Arrays.fill(gtProbs, 0.0f);
+ Arrays.fill(alProbs, 0.0f);
+ int gt = 0;
+ for (int a2=0; a2<alProbs.length; ++a2) {
+ for (int a1=0; a1<=a2; ++a1) {
+ float gprob = gv.value(marker, sample, gt++);
+ alProbs[a1] += gprob;
+ alProbs[a2] += gprob;
+ if (a2==0) {
+ gtProbs[0] += gprob;
+ }
+ else if (a1==0) {
+ gtProbs[1] += gprob;
+ }
+ else {
+ gtProbs[2] += gprob;
+ }
+ }
+ }
+ float sum = sum(gtProbs);
+ divideBy(gtProbs, sum);
+ divideBy(alProbs, 2*sum);
+ }
+
+ /**
+ * Constructs a new {@code GprobsStatistics} instance from the
+ * specified allele probabilities.
+ * @param alleleProbs the allele probabilities
+ * @param marker a marker index
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= alProbs.nMarkers()}
+ * @throws NullPointerException if {@code alProbs == null}
+ */
+ public GprobsStatistics(AlleleProbs alleleProbs, int marker) {
+ int nAlleles = alleleProbs.marker(marker).nAlleles();
+ this.marker = alleleProbs.marker(marker);
+ this.nSamples = alleleProbs.nSamples();
+ this.alleleFreq = new float[nAlleles];
+ float[] alProbs = new float[nAlleles];
+ float[] gtProbs = new float[3];
+ for (int j=0; j<this.nSamples; ++j) {
+ setProbs(alleleProbs, marker, j, gtProbs, alProbs);
+ for (int a=0; a<nAlleles; ++a) {
+ alleleFreq[a] += alProbs[a];
+ }
+ int call = maxIndex(gtProbs);
+ float exp = (gtProbs[1] + 2*gtProbs[2]);
+ float expSquare = (gtProbs[1] + 4*gtProbs[2]);
+ sumCall += call;
+ sumSquareCall += call*call;
+ sumExpected += exp;
+ sumExpectedSquare += expSquare;
+ sumSquareExpected += (exp*exp);
+ sumCallExpected += (call*exp);
+ }
+ float sum = sum(alleleFreq);
+ divideBy(alleleFreq, sum);
+ }
+
+ private static void setProbs(AlleleProbs ap, int marker, int sample,
+ float[] gtProbs, float[] alProbs) {
+ Arrays.fill(gtProbs, 0.0f);
+ Arrays.fill(alProbs, 0.0f);
+ for (int a2=0; a2<alProbs.length; ++a2) {
+ for (int a1=0; a1<=a2; ++a1) {
+ float gprob = ap.gtProb(marker, sample, a1, a2);
+ if (a1 != a2) {
+ gprob += ap.gtProb(marker, sample, a2, a1);
+ }
+ alProbs[a1] += gprob;
+ alProbs[a2] += gprob;
+ if (a2==0) {
+ gtProbs[0] += gprob;
+ }
+ else if (a1==0) {
+ gtProbs[1] += gprob;
+ }
+ else {
+ gtProbs[2] += gprob;
+ }
+ }
+ }
+ float sum = sum(gtProbs);
+ divideBy(gtProbs, sum);
+ divideBy(alProbs, 2*sum);
+ }
+
+ private static int maxIndex(float[] fa) {
+ int maxIndex = 0;
+ for (int j=1; j<fa.length; ++j) {
+ if (fa[j]>fa[maxIndex]) {
+ maxIndex = j;
+ }
+ }
+ return maxIndex;
+ }
+
+ private static float sum(float[] fa) {
+ float sum = 0.0f;
+ for (float f : fa) {
+ sum += f;
+ }
+ return sum;
+ }
+
+ private static void divideBy(float[] fa, float divisor) {
+ for (int j=0; j<fa.length; ++j) {
+ fa[j] /= divisor;
+ }
+ }
+
+ /**
+ * Returns the marker.
+ * @return the marker.
+ */
+ public Marker marker() {
+ return marker;
+ }
+
+ /**
+ * Returns an array of length {@code this.marker().nAlleles()} whose
+ * {@code j}-th element is the estimated sample frequency of allele
+ * {@code j}.
+ * @return an array of length {@code this.marker().nAlleles()} whose
+ * {@code j}-th element is the estimated sample frequency of allele
+ * {@code j}
+ */
+ public float[] alleleFreq() {
+ return alleleFreq.clone();
+ }
+
+ /**
+ * Returns the estimated squared correlation between the most probable
+ * ALT allele dose and the true ALT allele dose.
+ * Returns 0 if the marker is monomorphic or if most probable ALT
+ * allele dose is monomorphic.
+ *
+ * @return the estimated squared correlation between the most likely
+ * allele dose and the true allele dose
+ */
+ public float allelicR2() {
+ float f = 1.0f / nSamples;
+ float cov = sumCallExpected - (sumCall * sumExpected * f);
+ float varBest = sumSquareCall - (sumCall * sumCall * f);
+ float varExp = sumExpectedSquare - (sumExpected * sumExpected * f);
+ float den = varBest * varExp;
+ return (den==0.0f) ? 0.0f : Math.abs( (cov*cov) / den );
+ }
+
+ /**
+ * Returns the estimated squared correlation between the estimated
+ * ALT allele dose and the true ALT allele dose. Returns 0 if the
+ * marker is monomorphic.
+ *
+ * @return the estimated squared correlation between the estimated
+ * ALT allele dose and the true ALT allele dose
+ */
+ public float doseR2() {
+ float f = 1.0f / (float) nSamples;
+ float num = sumSquareExpected - (sumExpected * sumExpected * f);
+ float den = sumExpectedSquare - (sumExpected * sumExpected * f);
+ return (den==0.0f) ? 0.0f : Math.abs(num / den);
+ }
+
+ /**
+ * Returns the estimated squared correlation between the estimated
+ * ALT allele dose and the true ALT allele dose where the variance of
+ * the true ALT allele dose is estimated from the estimated
+ * ALT allele frequency. Returns 0 if the marker is monomorphic.
+ *
+ * @return the estimated squared correlation between the estimated
+ * ALT allele dose and the true ALT allele dose
+ */
+ public float hweDoseR2() {
+ float f = 1.0f / nSamples;
+ float num = (sumSquareExpected - (sumExpected*sumExpected*f))/nSamples;
+ float altFreq = sumExpected / (2.0f * nSamples);
+ float den = 2.0f * altFreq * (1.0f - altFreq);
+ return (den==0.0f) ? 0.0f : Math.abs(num / den);
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ DecimalFormat df = new DecimalFormat("0.####");
+ StringBuilder sb = new StringBuilder(80);
+ sb.append(marker);
+ sb.append(Const.tab);
+ for (int j=0; j<alleleFreq.length; ++j) {
+ sb.append( (j==0) ? "AF=" : Const.comma);
+ sb.append(alleleFreq[j]);
+ }
+ sb.append(Const.tab);
+ sb.append("AR2=");
+ sb.append(format(df, allelicR2()));
+ sb.append(Const.tab);
+ sb.append("DR2=");
+ sb.append(format(df, doseR2()));
+ sb.append(Const.tab);
+ sb.append("HDR2=");
+ sb.append(format(df, hweDoseR2()));
+ return sb.toString();
+ }
+
+ private static String format(DecimalFormat df, float d) {
+ if (Double.isNaN(d)) {
+ return "NaN";
+ }
+ else if (d==Double.POSITIVE_INFINITY) {
+ return "Infinity";
+ }
+ else if (d==Double.NEGATIVE_INFINITY) {
+ return "-Infinity";
+ }
+ else {
+ return df.format(d);
+ }
+ }
+}
diff --git a/vcf/HapsMarker.java b/vcf/HapsMarker.java
new file mode 100644
index 0000000..d7429dc
--- /dev/null
+++ b/vcf/HapsMarker.java
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+/**
+ * <p>Interface {@code HapsMarker} represents marker alleles for a
+ * list of haplotype pairs.
+ * </p>
+ * All instances of {@code HapsMarkers} are required to be
+ * immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface HapsMarker extends MarkerContainer {
+
+ /**
+ * Returns the allele on the specified haplotype.
+ * @param haplotype a haplotype index
+ * @return the allele on the specified haplotype
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code haplotype < 0 || haplotype >= this.nHaps()}
+ */
+ int allele(int haplotype);
+
+ /**
+ * Returns the first allele for the specified haplotype pair.
+ * @param hapPair a haplotype pair index
+ * @return the first allele for the specified haplotype pair
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code hapPair < 0 || hapPair >= this.nHapPairs()}
+ */
+ int allele1(int hapPair);
+
+ /**
+ * Returns the second allele for the specified haplotype pair.
+ * @param hapPair a haplotype pair index
+ * @return the second allele for the specified haplotype pair
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code hapPair < 0 || hapPair >= this.nHapPairs()}
+ */
+ int allele2(int hapPair);
+
+ /**
+ * Returns the marker.
+ * @return the marker
+ */
+ @Override
+ Marker marker();
+
+ /**
+ * Returns the number of haplotypes. The returned value is equal to
+ * {@code 2*this.nHapPairs()}.
+ * @return the number of haplotypes
+ */
+ int nHaps();
+
+ /**
+ * Returns the number of haplotype pairs. The returned value is
+ * equal to {@code this.nHaps()/2}.
+ * @return the number of haplotype pairs
+ */
+ int nHapPairs();
+}
diff --git a/vcf/HbdAL.java b/vcf/HbdAL.java
new file mode 100644
index 0000000..6cf6d60
--- /dev/null
+++ b/vcf/HbdAL.java
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.Const;
+
+/**
+ * <p>Class {@code HbdAL} represents allele emission probabilities
+ * for a set of haplotype pairs under a homozygosity by descent (HBD) model.
+ * </p>
+ * <p>Instances of class {@code HbdAL} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class HbdAL implements AL {
+
+ private final GL gl;
+
+ /**
+ * Constructs an {@code HbdAL} instance.
+ *
+ * @param gl genotype emission probabilities.
+ *
+ * @throws NullPointerException if {@code gl==null}.
+ */
+ public HbdAL(GL gl) {
+ if (gl==null) {
+ throw new NullPointerException("em==null");
+ }
+ this.gl = gl;
+ }
+
+ @Override
+ public float al(int marker, int haplotype, int allele) {
+ if (allele<0 || allele >= gl.marker(marker).nAlleles()) {
+ String s = "marker=" + marker + " allele: " + allele;
+ throw new IllegalArgumentException(s);
+ }
+ int sample = haplotype/2;
+ return gl.gl(marker, sample, allele, allele);
+ }
+
+ @Override
+ public int allele(int marker, int haplotype) {
+ int sample = haplotype/2;
+ int a1 = gl.allele1(marker, sample);
+ int a2 = gl.allele2(marker, sample);
+ return (a1!=a2) ? -1 : a1;
+ }
+
+ @Override
+ public int nMarkers() {
+ return gl.nMarkers();
+ }
+
+ @Override
+ public Marker marker(int markerIndex) {
+ return gl.marker(markerIndex);
+ }
+
+ @Override
+ public Markers markers() {
+ return gl.markers();
+ }
+
+ @Override
+ public int nSamples() {
+ return gl.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return gl.samples();
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*gl.nSamples();
+ }
+
+ @Override
+ public float errProb() {
+ return 0f;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("[HbdGL: nMarkers=");
+ sb.append(nMarkers());
+ sb.append(" nHaps=");
+ sb.append(nHaps());
+ sb.append(Const.nl);
+ sb.append(gl);
+ sb.append(Const.nl);
+ sb.append(']');
+ return sb.toString();
+ }
+}
diff --git a/vcf/IntervalVcfIt.java b/vcf/IntervalVcfIt.java
new file mode 100644
index 0000000..783dd39
--- /dev/null
+++ b/vcf/IntervalVcfIt.java
@@ -0,0 +1,137 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import blbutil.SampleFileIt;
+import beagleutil.ChromInterval;
+import beagleutil.Samples;
+import java.io.File;
+import java.util.NoSuchElementException;
+
+/**
+ * <p>Class {@code IntervalVcfIterator} is a sample file iterator whose
+ * {@code next()} method returns a marker container.
+ * </p>
+ *
+ * @param <E> the type parameter
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class IntervalVcfIt<E extends MarkerContainer>
+ implements SampleFileIt<E> {
+
+ private final SampleFileIt<E> it;
+ private final ChromInterval interval;
+ private E next;
+
+ /**
+ * Constructs a new {@code IntervalVcfIterator} instance.
+ * @param it an iterator whose {@code next()} method returns a marker
+ * container
+ * @param interval a chromosome interval
+ * @throws NullPointerException if {@code it == null || interval == null}
+ */
+ public IntervalVcfIt(SampleFileIt<E> it, ChromInterval interval) {
+ if (it==null) {
+ throw new IllegalArgumentException("it==null");
+ }
+ if (interval==null) {
+ throw new IllegalArgumentException("interval==null");
+ }
+ this.it = it;
+ this.interval = interval;
+ this.next = readFirstRecord(it, interval);
+ }
+
+ @Override
+ public File file() {
+ return it.file();
+ }
+
+ @Override
+ public Samples samples() {
+ return it.samples();
+ }
+
+ /**
+ * Returns {@code true} if the iteration has more elements.
+ * @return {@code true} if the iteration has more elements.
+ */
+ @Override
+ public boolean hasNext() {
+ return (next != null);
+ }
+
+ /**
+ * Returns the next element in the iteration.
+ * @return the next element in the iteration.
+ * @throws NoSuchElementException if the iteration has no more elements.
+ */
+ @Override
+ public E next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException();
+ }
+ E current = next;
+ this.next = readNextRecord(it, interval);
+ return current;
+ }
+
+ private E readFirstRecord(SampleFileIt<E> it, ChromInterval interval) {
+ E nextRecord = null;
+ while (nextRecord==null && it.hasNext()) {
+ E candidate = it.next();
+ if (inInterval(interval, candidate.marker())) {
+ nextRecord = candidate;
+ }
+ }
+ return nextRecord;
+ }
+
+ private E readNextRecord(SampleFileIt<E> it, ChromInterval interval) {
+ E nextRecord = null;
+ if (it.hasNext()) {
+ E candidate = it.next();
+ if (inInterval(interval, candidate.marker())) {
+ nextRecord = candidate;
+ }
+ }
+ return nextRecord;
+ }
+
+ private static boolean inInterval(ChromInterval interval, Marker marker) {
+ return (marker.chromIndex() == interval.chromIndex()
+ && interval.start() <= marker.pos()
+ && marker.pos() <= interval.end());
+ }
+
+ /**
+ * The {@code remove} method is not supported by this iterator.
+ * @throws UnsupportedOperationException if this method is invoked
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException(this.getClass().toString());
+ }
+
+ @Override
+ public void close() {
+ it.close();
+ }
+}
diff --git a/vcf/LowMafRefDiallelicGT.java b/vcf/LowMafRefDiallelicGT.java
new file mode 100644
index 0000000..17a4f03
--- /dev/null
+++ b/vcf/LowMafRefDiallelicGT.java
@@ -0,0 +1,220 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code LowMafRefDiallelicGT} represent represents phased,
+ * non-missing genotypes for a list of reference samples at a single diallelic
+ * marker. Genotype emission probabilities are determined by the sample
+ * genotypes.
+ * </p>
+ * <p>
+ * Class {@code LowMafRefDiallelicGT} stores the minor allele indices.
+ * </p>
+ * <p>Instances of class {@code LowMemRefDiallelicGT} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class LowMafRefDiallelicGT implements VcfEmission {
+
+ private final Marker marker;
+ private final Samples samples;
+ private final int nHaps;
+ private final int majorAllele;
+ private final int minorAllele;
+ private final int[] minorAlleles;
+
+ /**
+ * Constructs a new {@code LowMafRefDiallelicGT} instance with phased
+ * non-missing genotypes from the specified marker, samples, and
+ * haplotype indices.
+ * @param marker the marker
+ * @param samples the samples
+ * @param minorAllele the minor allele
+ * @param minorIndices an array whose elements are indices of haplotypes
+ * that carry the minor allele
+ *
+ * @throws IllegalArgumentException {@code marker.nAlleles() != 2}
+ * @throws IllegalArgumentException
+ * {@code minorAllele < 0 || minorAllele > 1}
+ * @throws IllegalArgumentException if any element in
+ * {@code minorIndices} is negative or greater than or equal to
+ * {@code 2*samples.nSamples()}
+ * @throws IllegalArgumentException if any two elements in
+ * {@code minorIndices} are equal
+ * @throws NullPointerException if
+ * {@code marker == null || samples == null || minorIndices == null}
+ */
+ public LowMafRefDiallelicGT(Marker marker, Samples samples, int minorAllele,
+ int[] minorIndices) {
+ int[] sortedIndices = checkAndSortIndices(marker, samples, minorAllele,
+ minorIndices);
+ this.marker = marker;
+ this.samples = samples;
+ this.nHaps = 2*samples.nSamples();
+ this.majorAllele = 1 - minorAllele;
+ this.minorAllele = minorAllele;
+ this.minorAlleles = sortedIndices;
+ }
+
+ private static int[] checkAndSortIndices(Marker marker, Samples samples,
+ int minorAllele, int[] minorIndices) {
+ if (marker.nAlleles() != 2 || minorAllele < 0 || minorAllele > 1) {
+ throw new IllegalArgumentException("ERROR: inconsistent data");
+ }
+ int[] sorted = minorIndices.clone();
+ Arrays.sort(sorted);
+ for (int j=1; j<sorted.length; ++j) {
+ if (sorted[j] == sorted[j-1]) {
+ throw new IllegalArgumentException("ERROR: inconsistent data");
+ }
+ }
+ int nHaps = 2*samples.nSamples();
+ if (sorted.length>0
+ && (sorted[0]<0 || sorted[sorted.length-1] >= nHaps)) {
+ throw new IllegalArgumentException("ERROR: inconsistent data");
+ }
+ return sorted;
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public int nHaps() {
+ return nHaps;
+ }
+
+ @Override
+ public int nHapPairs() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Marker marker() {
+ return marker;
+ }
+
+ @Override
+ public boolean isRefData() {
+ return true;
+ }
+
+ @Override
+ public float gl(int sample, int allele1, int allele2) {
+ if (allele1 != 0 && allele1 != 1) {
+ throw new IndexOutOfBoundsException(String.valueOf(allele1));
+ }
+ if (allele2 != 0 && allele2 != 1) {
+ throw new IndexOutOfBoundsException(String.valueOf(allele2));
+ }
+ boolean matches = (allele1==allele1(sample) && allele2==allele2(sample));
+ return matches ? 1.0f : 0.0f;
+ }
+
+ @Override
+ public boolean isPhased(int sample) {
+ if (sample >= samples.nSamples()) {
+ throw new IndexOutOfBoundsException(String.valueOf(sample));
+ }
+ return true;
+ }
+
+ @Override
+ public int allele1(int sample) {
+ return allele(2*sample);
+ }
+
+ @Override
+ public int allele2(int sample) {
+ return allele(2*sample + 1);
+ }
+
+ @Override
+ public int allele(int hap) {
+ if (hap < 0 || hap >= nHaps) {
+ throw new IndexOutOfBoundsException(String.valueOf(hap));
+ }
+ if (Arrays.binarySearch(minorAlleles, hap) >= 0) {
+ return minorAllele;
+ }
+ else {
+ return majorAllele;
+ }
+ }
+
+ @Override
+ public int nAlleles() {
+ return this.marker().nAlleles();
+ }
+
+ @Override
+ public boolean storesNonMajorIndices() {
+ return true;
+ }
+
+ @Override
+ public int majorAllele() {
+ return majorAllele;
+ }
+
+ @Override
+ public int alleleCount(int allele) {
+ if (allele==majorAllele) {
+ throw new IllegalArgumentException("major allele");
+ }
+ else {
+ return minorAlleles.length;
+ }
+ }
+
+ @Override
+ public int hapIndex(int allele, int copy) {
+ if (allele==majorAllele) {
+ throw new IllegalArgumentException("major allele");
+ }
+ else {
+ return minorAlleles[copy];
+ }
+ }
+
+ /**
+ * Returns the data represented by {@code this} as a VCF
+ * record with a GT format field. The returned VCF record
+ * will have missing QUAL and INFO fields, will have "PASS"
+ * in the filter field, and will have a GT format field.
+ * @return the data represented by {@code this} as a VCF
+ * record with a GT format field
+ */
+ @Override
+ public String toString() {
+ return toVcfRec();
+ }
+}
diff --git a/vcf/LowMafRefGT.java b/vcf/LowMafRefGT.java
new file mode 100644
index 0000000..30c825d
--- /dev/null
+++ b/vcf/LowMafRefGT.java
@@ -0,0 +1,264 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code LowMafRefGT} represent represents phased, non-missing
+ * genotypes for a list of reference samples at a single marker.
+ * Genotype emission probabilities are determined by the sample
+ * genotypes.
+ * </p>
+ * <p>
+ * Class {@code LowMafRefGT} stores the non-major allele indices.
+ * </p>
+ * <p>Instances of class {@code LowMemRefGT} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class LowMafRefGT implements VcfEmission {
+
+ private final Marker marker;
+ private final Samples samples;
+ private final int nHaps;
+ private final int[][] hapIndices;
+ private final int majorAllele;
+
+ /**
+ * Constructs a new {@code LowMafRefGT} instance with phased,
+ * non-missing genotypes from the specified marker, samples, and haplotype
+ * indices. If a haplotype index is duplicated in the specified
+ * {@code hapIndices} array, the haplotype will be assigned the allele
+ * with the smallest index.
+ *
+ * @param marker the marker
+ * @param samples the samples
+ * @param hapIndices an array whose {@code j}-th element is {@code null}
+ * if {@code j} is the unique (or first) major allele, and is an array of
+ * indices of haplotypes that carry the {@code j}-th allele otherwise
+ *
+ * @throws IllegalArgumentException if {@code hapIndices[j] == null} and
+ * {@code j} is not the unique or first major allele
+ * @throws IllegalArgumentException if any haplotype index in
+ * {@code hapIndices} is negative or greater than or equal to
+ * {@code 2*samples.nSamples()}
+ * @throws IllegalArgumentException if
+ * {@code marker.nAlleles() != hapIndices.length}
+ * @throws NullPointerException if any parameter is {@code null}
+ */
+ public LowMafRefGT(Marker marker, Samples samples, int[][] hapIndices) {
+ int[][] sortedCopy = copyAndSortIndices(hapIndices);
+ checkSortedIndices(marker, samples, sortedCopy);
+ this.marker = marker;
+ this.samples = samples;
+ this.nHaps = 2*samples.nSamples();
+ this.hapIndices = sortedCopy;
+ int nullIndex = -1;
+ for (int j=0; j<hapIndices.length; ++j) {
+ if (sortedCopy[j] == null) {
+ nullIndex = j;
+ break;
+ }
+ }
+ assert nullIndex != -1;
+ this.majorAllele = nullIndex;
+ }
+
+ private static int[][] copyAndSortIndices(int[][] hapIndices) {
+ int[][] sortedCopy = new int[hapIndices.length][];
+ for (int j=0; j<hapIndices.length; ++j) {
+ if (hapIndices[j] != null) {
+ sortedCopy[j] = hapIndices[j].clone();
+ Arrays.sort(sortedCopy[j]);
+ }
+ }
+ return sortedCopy;
+ }
+
+ private static void checkSortedIndices(Marker marker, Samples samples,
+ int[][] hapIndices) {
+ if (marker.nAlleles() != hapIndices.length) {
+ throw new IllegalArgumentException("ERROR: inconsistent data");
+ }
+ int nHaps = 2*samples.nSamples();
+ checkAlleleCounts(hapIndices, nHaps);
+ for (int[] ia : hapIndices) {
+ if (ia != null) {
+ if (ia.length > 0 && (ia[0] < 0 || ia[ia.length-1] >= nHaps)) {
+ throw new IndexOutOfBoundsException(Arrays.toString(ia));
+ }
+ }
+ }
+ }
+
+ private static void checkAlleleCounts(int[][] hapIndices, int nHaps) {
+ int nMajorAlleles = nHaps;
+ int maxIndex = -1;
+ int nullIndex = -1;
+ for (int j=0; j<hapIndices.length; ++j) {
+ if (hapIndices[j] == null) {
+ if (nullIndex != -1) {
+ throw new IllegalArgumentException("ERROR: major allele error");
+ }
+ nullIndex = j;
+ }
+ else {
+ if (maxIndex == -1
+ || hapIndices[j].length > hapIndices[maxIndex].length) {
+ maxIndex = j;
+ }
+ nMajorAlleles -= hapIndices[j].length;
+ }
+ }
+ boolean majorAlleleError =
+ maxIndex != -1
+ && hapIndices[maxIndex].length == nMajorAlleles
+ && maxIndex < nullIndex;
+ if (nullIndex == -1 || majorAlleleError) {
+ throw new IllegalArgumentException("ERROR: major allele error");
+ }
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public int nHaps() {
+ return nHaps;
+ }
+
+ @Override
+ public int nHapPairs() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Marker marker() {
+ return marker;
+ }
+
+ @Override
+ public boolean isRefData() {
+ return true;
+ }
+
+ @Override
+ public float gl(int sample, int allele1, int allele2) {
+ if (allele1 < 0 || allele1 >= hapIndices.length) {
+ throw new IndexOutOfBoundsException(String.valueOf(allele1));
+ }
+ if (allele2 < 0 || allele2 >= hapIndices.length) {
+ throw new IndexOutOfBoundsException(String.valueOf(allele2));
+ }
+ boolean matches = (allele1==allele1(sample) && allele2==allele2(sample));
+ return matches ? 1.0f : 0.0f;
+ }
+
+ @Override
+ public boolean isPhased(int sample) {
+ if (sample >= samples.nSamples()) {
+ throw new IndexOutOfBoundsException(String.valueOf(sample));
+ }
+ return true;
+ }
+
+ @Override
+ public int allele1(int sample) {
+ return allele(2*sample);
+ }
+
+ @Override
+ public int allele2(int sample) {
+ return allele(2*sample + 1);
+ }
+
+ @Override
+ public int allele(int hap) {
+ if (hap < 0 || hap >= nHaps) {
+ throw new IndexOutOfBoundsException(String.valueOf(hap));
+ }
+ for (int j=0; j<hapIndices.length; ++j) {
+ if (j != majorAllele) {
+ if (Arrays.binarySearch(hapIndices[j], hap) >= 0) {
+ return j;
+ }
+ }
+ }
+ return majorAllele;
+ }
+
+
+ @Override
+ public int nAlleles() {
+ return this.marker().nAlleles();
+ }
+
+ @Override
+ public boolean storesNonMajorIndices() {
+ return true;
+ }
+
+ @Override
+ public int majorAllele() {
+ return majorAllele;
+ }
+
+ @Override
+ public int alleleCount(int allele) {
+ if (hapIndices[allele]==null) {
+ throw new IllegalArgumentException("major allele");
+ }
+ else {
+ return hapIndices[allele].length;
+ }
+ }
+
+ @Override
+ public int hapIndex(int allele, int copy) {
+ if (hapIndices[allele]==null) {
+ throw new IllegalArgumentException("major allele");
+ }
+ else {
+ return hapIndices[allele][copy];
+ }
+ }
+
+ /**
+ * Returns the data represented by {@code this} as a VCF
+ * record with a GT format field. The returned VCF record
+ * will have missing QUAL and INFO fields, will have "PASS"
+ * in the filter field, and will have a GT format field.
+ * @return the data represented by {@code this} as a VCF
+ * record with a GT format field
+ */
+ @Override
+ public String toString() {
+ return toVcfRec();
+ }
+}
diff --git a/vcf/Marker.java b/vcf/Marker.java
new file mode 100644
index 0000000..6e6c833
--- /dev/null
+++ b/vcf/Marker.java
@@ -0,0 +1,177 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+/**
+ * <p>Interface {@code Marker} represents a genetic marker.
+ * </p>
+ * <p>All instances of class {@code Marker} are required to be immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface Marker extends Comparable<Marker> {
+
+ /**
+ * Returns the chromosome.
+ * @return the chromosome
+ */
+ String chrom();
+
+ /**
+ * Returns the chromosome index.
+ * @return the chromosome index
+ */
+ int chromIndex();
+
+ /**
+ * Returns the chromosome position coordinate.
+ * @return the chromosome position coordinate
+ */
+ int pos();
+
+ /**
+ * Returns the number of marker identifiers.
+ * @return the number of marker identifiers
+ */
+ int nIds();
+
+ /**
+ * Returns the specified marker identifier.
+ * @param index a marker identifier index
+ * @return the specified marker identifier
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nIds()}
+ */
+ String id(int index);
+
+ /**
+ * Returns the first marker identifier if there is at least
+ * one identifier in the VCF record ID field, and returns
+ * {@code this.chr() + ":" + this.pos()} otherwise.
+ *
+ * @return a marker identifier
+ */
+ String id();
+
+ /**
+ * Returns the number of alleles for the marker, including the REF
+ * allele.
+ * @return the number of alleles for the marker, including the REF
+ * allele
+ */
+ int nAlleles();
+
+ /**
+ * Returns the specified allele. The reference allele has index 0.
+ * @param index an allele index
+ * @return the specified allele
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nAlleles()}
+ */
+ String allele(int index);
+
+ /**
+ * Returns the alleles. The {@code k}-th element of the returned array
+ * is equal to {@code this.allele(k)}.
+ * @return the alleles
+ */
+ String[] alleles();
+
+ /**
+ * Returns the number of distinct genotypes, which equals
+ * {@code this.nAlleles()*(1 + this.nAlleles())/2}.
+ *
+ * @return the number of distinct genotypes
+ */
+ int nGenotypes();
+
+ /**
+ * Returns the INFO END field, or -1 if there is no INFO END field.
+ *
+ * @return the INFO END field, or -1 if there is no INFO END field
+ */
+ int end();
+
+ /**
+ * Returns {@code true} if the specified object is a
+ * {@code Marker} with the same chromosome,
+ * position, allele lists, and INFO END field, and
+ * returns {@code false} otherwise. Equality does not
+ * depend on value of the VCF record ID field.
+ *
+ * @param obj object to be compared with {@code this} for equality
+ *
+ * @return {@code true} if the specified object is a
+ * {@code Marker} with the same chromosome,
+ * position, and allele lists, and INFO END field
+ */
+ @Override
+ boolean equals(Object obj);
+
+ /**
+ * <p>Returns the hash code value for this object. The hash code does not
+ * depend on value of the VCF record ID field.
+ * The hash code is defined by the following calculation:
+ * </p>
+ * <pre>
+ * int hash = 5;
+ * hash = 29 * hash + this.chromIndex();
+ * hash = 29 * hash + this.pos();
+ * for (int j=0, n=this.nAlleles(); j<n; ++j) {
+ * hash = 29 * hash + alleles[j].hashCode();
+ * }
+ * hash = 29 * hash + end();
+ * </pre>
+ *
+ * @return the hash code value for this marker
+ */
+ @Override
+ int hashCode();
+
+ /**
+ * Compares this marker with the specified marker
+ * for order, and returns a negative integer, 0, or a positive integer
+ * depending on whether this marker is less than, equal to,
+ * or greater than the specified marker. Comparison is
+ * on chromosome index, position, allele identifier lists, and end value
+ * in that order. Allele identifier lists are compared for
+ * lexicographical order, and alleles are compared using the
+ * {@code String compareTo()} method.
+ *
+ * @param other the {@code Marker} to be compared
+ * @return a negative integer, 0, or a positive integer
+ * depending on whether this marker is less than, equal,
+ * or greater than the specified marker
+ */
+ @Override
+ int compareTo(Marker other);
+
+ /**
+ * Returns a string equal to the first five tab-delimited fields
+ * of a VCF record corresponding to this marker.
+ *
+ * @return a string equal to the first five tab-delimited fields
+ * of a VCF record corresponding to this marker
+ */
+ @Override
+ String toString();
+}
diff --git a/vcf/MarkerContainer.java b/vcf/MarkerContainer.java
new file mode 100644
index 0000000..bb0df8b
--- /dev/null
+++ b/vcf/MarkerContainer.java
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+/**
+ * Interface {@code MarkerContainer} represents an object that stores
+ * a unique {@code vcf.Marker} instance.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface MarkerContainer {
+
+ /**
+ * Returns the marker.
+ * @return the marker
+ */
+ public Marker marker();
+}
diff --git a/vcf/Markers.java b/vcf/Markers.java
new file mode 100644
index 0000000..13b6645
--- /dev/null
+++ b/vcf/Markers.java
@@ -0,0 +1,389 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.ChromIds;
+import blbutil.Const;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * <p>Class {@code Markers} represent a list of markers in chromosome order.
+ * </p>
+ * <p>Instances of class {@code Markers} are immutable.
+ * </p>
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class Markers {
+
+ private final Set<Marker> markerSet;
+
+ private final Marker[] fwdMarkerArray;
+ private final int[] fwdSumAlleles;
+ private final int[] fwdSumGenotypes;
+ private final int[] fwdSumHaplotypeBits;
+ private final int fwdHashCode;
+
+ private final Marker[] bwdMarkerArray;
+ private final int[] bwdSumAlleles;
+ private final int[] bwdSumGenotypes;
+ private final int[] bwdSumHaplotypeBits;
+ private final int bwdHashCode;
+ private final Markers bwdMarkers;
+
+ /**
+ * Construct and return a new {@code Markers} instance that represents the
+ * specified list of markers.
+ * @param markers a list of markers in chromosome order
+ * @return a new {@code Markers} instance that represents the
+ * specified list of markers
+ *
+ * @throws IllegalArgumentException if markers on a chromosome are not
+ * in chromosome order
+ * @throws IllegalArgumentException if there are duplicate markers
+ * @throws IllegalArgumentException if the markers on a chromosome
+ * do not form a contiguous set of entries within the array
+ *
+ * @throws NullPointerException if
+ * {@code markers == null} or if {@code markers[j] == null}
+ * for any {@code j} satisfying {@code (0 <= j && j < markers.length)}
+ */
+ public static Markers create(Marker[] markers) {
+ Markers fwd = new Markers(markers);
+ return new Markers(fwd.reverse());
+ }
+
+ /**
+ * Construct a new {@code Markers} instance that represents the
+ * specified list of markers.
+ * @param markers a list of markers in chromosome order
+ *
+ * @throws IllegalArgumentException if markers on a chromosome are not
+ * in chromosome order
+ * @throws IllegalArgumentException if there are duplicate markers
+ * @throws IllegalArgumentException if the markers on a chromosome
+ * do not form a contiguous set of entries within the array
+ *
+ * @throws NullPointerException if
+ * {@code markers == null} or if {@code markers[j] == null}
+ * for any {@code j} satisfying {@code (0 <= j && j < markers.length)}
+ */
+ private Markers(Marker[] markers) {
+ checkMarkerPosOrder(markers);
+ this.fwdMarkerArray = markers.clone();
+ this.bwdMarkerArray = reverse(this.fwdMarkerArray);
+ this.markerSet = markerSet(fwdMarkerArray);
+
+ this.fwdSumAlleles = cumSumAlleles(fwdMarkerArray);
+ this.fwdSumGenotypes = cumSumGenotypes(fwdMarkerArray);
+ this.fwdSumHaplotypeBits = cumSumHaplotypeBits(fwdMarkerArray);
+ this.fwdHashCode = Arrays.deepHashCode(fwdMarkerArray);
+
+ this.bwdSumAlleles = cumSumAlleles(bwdMarkerArray);
+ this.bwdSumGenotypes = cumSumGenotypes(bwdMarkerArray);
+ this.bwdSumHaplotypeBits = cumSumHaplotypeBits(bwdMarkerArray);
+ this.bwdHashCode = Arrays.deepHashCode(bwdMarkerArray);
+ this.bwdMarkers = null;
+ }
+
+ /**
+ * Constructs a new {@code Markers} instance whose {@code reverse()}
+ * method returns the specified {@code Markers}
+ * @param bwdMarkers a list of markers
+ */
+ private Markers(Markers bwdMarkers) {
+ this.markerSet = bwdMarkers.markerSet;
+ this.fwdMarkerArray = bwdMarkers.bwdMarkerArray;
+ this.bwdMarkerArray = bwdMarkers.fwdMarkerArray;
+
+ this.fwdSumAlleles = bwdMarkers.bwdSumAlleles;
+ this.fwdSumGenotypes = bwdMarkers.bwdSumGenotypes;
+ this.fwdSumHaplotypeBits = bwdMarkers.bwdSumHaplotypeBits;
+ this.fwdHashCode = bwdMarkers.bwdHashCode;
+
+ this.bwdSumAlleles = bwdMarkers.fwdSumAlleles;
+ this.bwdSumGenotypes = bwdMarkers.fwdSumGenotypes;
+ this.bwdSumHaplotypeBits = bwdMarkers.fwdSumHaplotypeBits;
+ this.bwdHashCode = bwdMarkers.fwdHashCode;
+ this.bwdMarkers = bwdMarkers;
+ }
+
+ private static void checkMarkerPosOrder(Marker[] markers) {
+ if (markers.length < 2) {
+ return;
+ }
+ Set<Integer> chromIndices = new HashSet<>();
+ chromIndices.add(markers[0].chromIndex());
+ chromIndices.add(markers[1].chromIndex());
+ for (int j=2; j<markers.length; ++j) {
+ int chr0 = markers[j-2].chromIndex();
+ int chr1 = markers[j-1].chromIndex();
+ int chr2 = markers[j].chromIndex();
+ if (chr0 == chr1 && chr1==chr2) {
+ int pos0 = markers[j-2].pos();
+ int pos1 = markers[j-1].pos();
+ int pos2 = markers[j].pos();
+ if ( (pos1<pos0 && pos1<pos2) || (pos1>pos0 && pos1>pos2) ) {
+ String s = "markers not in chromosomal order: "
+ + Const.nl + markers[j-2]
+ + Const.nl + markers[j-1]
+ + Const.nl + markers[j];
+ throw new IllegalArgumentException(s);
+ }
+ }
+ else if (chr1!=chr2) {
+ if (chromIndices.contains(chr2)) {
+ String s = "markers on chromosome are not contiguous: "
+ + ChromIds.instance().id(chr2);
+ throw new IllegalArgumentException(s);
+ }
+ chromIndices.add(chr2);
+ }
+ }
+ }
+
+ private static Marker[] reverse(Marker[] markers) {
+ int lastIndex = markers.length - 1;
+ Marker[] rev = new Marker[markers.length];
+ for (int j=0; j<markers.length; ++j) {
+ rev[j] = markers[lastIndex - j];
+ }
+ return rev;
+ }
+
+ private static Set<Marker> markerSet(Marker[] markers) {
+ Set<Marker> markerSet = new HashSet<>(markers.length);
+ for (Marker m : markers) {
+ if (markerSet.add(m)==false) {
+ throw new IllegalArgumentException("Duplicate marker: " + m);
+ }
+ }
+ return markerSet;
+ }
+
+ private static int[] cumSumAlleles(Marker[] markers) {
+ int[] ia = new int[markers.length + 1];
+ for (int j=1; j<ia.length; ++j) {
+ ia[j] = ia[j-1] + markers[j-1].nAlleles();
+ }
+ return ia;
+ }
+
+ private static int[] cumSumGenotypes(Marker[] markers) {
+ int[] ia = new int[markers.length + 1];
+ for (int j=1; j<ia.length; ++j) {
+ ia[j] = ia[j-1] + markers[j-1].nGenotypes();
+ }
+ return ia;
+ }
+
+ private static int[] cumSumHaplotypeBits(Marker[] markers) {
+ int[] ia = new int[markers.length + 1];
+ for (int j=1; j<ia.length; ++j) {
+ int nAllelesM1 = markers[j-1].nAlleles() - 1;
+ int nStorageBits = Integer.SIZE
+ - Integer.numberOfLeadingZeros(nAllelesM1);
+ ia[j] = ia[j-1] + nStorageBits;
+ }
+ return ia;
+ }
+
+ /**
+ * Returns a hash code value for the object.
+ * The returned hash code equals
+ * {@code Arrays.deepHashCode(this.markers())}.
+ * @return a hash code value for the object
+ */
+ @Override
+ public int hashCode() {
+ return fwdHashCode;
+ }
+
+ /**
+ * Returns {@code true} if the specified object is a {@code Markers}
+ * instance which represents the same list of markers as {@code this},
+ * and returns {@code false} otherwise. Two lists of markers are
+ * the same if the lists have the same size and if markers with the
+ * same index in the two lists are equal.
+ *
+ * @param obj the object to be tested for equality with {@code this}
+ *
+ * @return {@code true} if the specified object is a {@code Markers}
+ * instance which represents the same list of markers as {@code this}
+ */
+ @Override
+ public boolean equals(Object obj) {
+ if (this==obj) {
+ return true;
+ }
+ if (obj == null) {
+ return false;
+ }
+ if (getClass() != obj.getClass()) {
+ return false;
+ }
+ final Markers other = (Markers) obj;
+ return Arrays.deepEquals(this.fwdMarkerArray, other.fwdMarkerArray);
+ }
+
+ /**
+ * Constructs and returns a new {@code Markers} instance that is
+ * obtained by reversing the order of markers in {@code this}.
+ * @return a new {@code Markers} instance that is obtained by
+ * reversing the order of markers in {@code this}
+ */
+ public Markers reverse() {
+ return bwdMarkers==null ? new Markers(this) : bwdMarkers;
+ }
+
+ /**
+ * Returns the number of markers.
+ * @return the number of markers
+ */
+ public int nMarkers() {
+ return fwdMarkerArray.length;
+ }
+
+ /**
+ * Returns the specified marker.
+ * @param marker a marker index
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker >= this.nMarkers()}
+ */
+ public Marker marker(int marker) {
+ return fwdMarkerArray[marker];
+ }
+
+ /**
+ * Returns the list of markers.
+ * @return the list of markers
+ */
+ public Marker[] markers() {
+ return fwdMarkerArray.clone();
+ }
+
+ /**
+ * Returns {@code true} if the specified marker is not {@code null}
+ * and is an element in the list of markers represented by {@code this},
+ * and returns {@code false} otherwise.
+ *
+ * @param marker a marker
+ *
+ * @return {@code true} if the specified marker is not {@code null} and
+ * is an element in the list of markers represented by {@code this}
+ */
+ public boolean contains(Marker marker) {
+ return markerSet.contains(marker);
+ }
+
+ /**
+ * Returns a {@code Markers} instance that represents
+ * the specified range of marker indices.
+ * @param start the starting marker index (inclusive)
+ * @param end the ending marker index (exclusive)
+ * @return a {@code Markers} instance that represents
+ * the specified range of marker indices
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code start < 0 || end > this.nMarkers()}
+ * @throws IllegalArgumentException if {@code start >= end}.
+ */
+ public Markers restrict(int start, int end) {
+ if (end > fwdMarkerArray.length) {
+ throw new IndexOutOfBoundsException("end > this.nMarkers(): " + end);
+ }
+ return new Markers(Arrays.copyOfRange(fwdMarkerArray, start, end));
+ }
+
+ /**
+ * Returns the sum of the number of alleles for
+ * the markers with index less than the specified index.
+ * @param marker a marker index
+ * @return the sum of the number of alleles for
+ * the markers with index less than the specified index
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker > this.nMarkers()}
+ */
+ public int sumAlleles(int marker) {
+ return fwdSumAlleles[marker];
+ }
+
+ /**
+ * Returns {@code this.sumAlleles(this.nMarkers())}.
+ * @return {@code this.sumAlleles(this.nMarkers())}
+ */
+ public int sumAlleles() {
+ return fwdSumAlleles[fwdMarkerArray.length];
+ }
+
+ /**
+ * Returns the sum of the number of possible genotypes for the markers
+ * with index less than the specified index.
+ * @param marker a marker index
+ * @return the sum of the number of possible genotypes for the markers
+ * with index less than the specified index
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker > this.nMarkers()}
+ */
+ public int sumGenotypes(int marker) {
+ return fwdSumGenotypes[marker];
+ }
+
+ /**
+ * Returns {@code this.sumGenotypes(this.nMarkers())}.
+ * @return {@code this.sumGenotypes(this.nMarkers())}
+ */
+ public int sumGenotypes() {
+ return fwdSumGenotypes[fwdMarkerArray.length];
+ }
+
+ /**
+ * Returns the number of bits requires to store a haplotype for the
+ * markers with index less than the specified index.
+ * @param marker a marker index
+ * @return the number of bits requires to store a haplotype for the
+ * markers with index less than the specified index
+ * @throws IndexOutOfBoundsException if
+ * {@code marker < 0 || marker > this.nMarkers()}
+ */
+ public int sumHaplotypeBits(int marker) {
+ return fwdSumHaplotypeBits[marker];
+ }
+
+ /**
+ * Returns {@code this.sumHaplotypeBits(this.nMarkers())}.
+ * @return {@code this.sumHaplotypeBits(this.nMarkers())}
+ */
+ public int sumHaplotypeBits() {
+ return fwdSumHaplotypeBits[fwdMarkerArray.length];
+ }
+
+ /**
+ * Returns a string representation of {@code this}.
+ * The exact details of the representation are unspecified and
+ * subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ return Arrays.toString(fwdMarkerArray);
+ }
+}
diff --git a/vcf/MaskedEndsGL.java b/vcf/MaskedEndsGL.java
new file mode 100644
index 0000000..e48c715
--- /dev/null
+++ b/vcf/MaskedEndsGL.java
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+
+/**
+ * <p>Class {@code MaskedEndsGL} is a wrapper for a {@code GL}
+ * instance that masks the genotype emission probabilities for a
+ * user-specified number of starting and ending markers. The {@code gl()},
+ * {@code allele1()}, and {@code allele2()} methods return
+ * {@code 1.0f}, {@code -1}, and {@code -1} respectively if a genotype
+ * emission probability for a marker is masked.
+ * </p>
+ * <p>Instances of class {@code MaskedEndsGL} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class MaskedEndsGL implements GL {
+
+ private final GL gl;
+ private final int start;
+ private final int end;
+
+ /**
+ * Constructs a new {@code MaskedEndsGL} instance.
+ * @param gl genotype emission probabilities for all markers
+ * @param start the starting marker index (inclusive) of the markers
+ * whose genotype emission probabilities are not masked
+ * @param end the ending marker index (exclusive) of the markers
+ * whose genotype emission probabilities are not masked
+ * @throws IllegalArgumentException if
+ * {@code start < 0 || start > end || end > gl.nMarkers()}
+ * @throws NullPointerException if {@code gl == null}
+ */
+ public MaskedEndsGL(GL gl, int start, int end) {
+ if (start<0 || start>end || end>gl.nMarkers()) {
+ String s = "start=" + start + " end=" + end;
+ throw new IllegalArgumentException(s);
+ }
+ this.gl = gl;
+ this.start = start;
+ this.end = end;
+ }
+
+ @Override
+ public boolean isRefData() {
+ if ((start>0 || end<gl.nMarkers()) && start<end) {
+ return false;
+ }
+ else {
+ return gl.isRefData();
+ }
+ }
+
+ private void checkMarkerAndSample(int marker, int sample) {
+ if (marker<0 || marker>=gl.nMarkers()) {
+ throw new IndexOutOfBoundsException("marker: " + marker);
+ }
+ if (sample<0 || sample>=gl.nSamples()) {
+ throw new IndexOutOfBoundsException("sample: " + sample);
+ }
+ }
+
+ private void checkAllele(int marker, int allele) {
+ if (allele<0 || allele>=gl.marker(marker).nAlleles()) {
+ String s = "marker=" + marker + " allele=" + allele;
+ throw new IndexOutOfBoundsException(s);
+ }
+ }
+
+ @Override
+ public float gl(int marker, int sample, int allele1, int allele2) {
+ if (marker<start || marker>=end) {
+ checkMarkerAndSample(marker, sample);
+ checkAllele(marker, allele1);
+ checkAllele(marker, allele2);
+ return 1.0f;
+ }
+ else {
+ return gl.gl(marker, sample, allele1, allele2);
+ }
+ }
+
+ @Override
+ public boolean isPhased(int marker, int sample) {
+ if (marker<start || marker>=end) {
+ checkMarkerAndSample(marker, sample);
+ return false;
+ }
+ else {
+ return gl.isPhased(marker, sample);
+ }
+ }
+
+ @Override
+ public int allele1(int marker, int sample) {
+ if (marker<start || marker>=end) {
+ checkMarkerAndSample(marker, sample);
+ return -1;
+ }
+ else {
+ return gl.allele1(marker, sample);
+ }
+ }
+
+ @Override
+ public int allele2(int marker, int sample) {
+ if (marker<start || marker>=end) {
+ checkMarkerAndSample(marker, sample);
+ return -1;
+ }
+ else {
+ return gl.allele2(marker, sample);
+ }
+ }
+
+ @Override
+ public int allele(int marker, int hap) {
+ if (marker<start || marker>=end) {
+ checkMarkerAndSample(marker, hap/2);
+ return -1;
+ }
+ else {
+ return gl.allele(marker, hap);
+ }
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return gl.marker(marker);
+ }
+
+ @Override
+ public Markers markers() {
+ return gl.markers();
+ }
+
+ @Override
+ public int nMarkers() {
+ return gl.nMarkers();
+ }
+
+ @Override
+ public int nHaps() {
+ return gl.nHaps();
+ }
+
+ @Override
+ public int nSamples() {
+ return gl.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return gl.samples();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(10000);
+ sb.append(this.getClass().toString());
+ sb.append(": nSamples=");
+ sb.append(this.nSamples());
+ return sb.toString();
+ }
+}
diff --git a/vcf/NoPhaseGL.java b/vcf/NoPhaseGL.java
new file mode 100644
index 0000000..eb0d6a3
--- /dev/null
+++ b/vcf/NoPhaseGL.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+
+/**
+ * <p>Class {@code NoPhaseGL} is a wrapper for a {@code GL}
+ * instance that hides all genotype phase data in the wrapped object.
+ * </p>
+ * <p>Instances of class {@code NoPhaseGL} are immutable.
+ * </p>
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class NoPhaseGL implements GL {
+
+ private final GL gl;
+
+ /**
+ * Constructs a new {@code NoPhaseGL} instance.
+ * @param gl genotype emission probabilities that will be wrapped by
+ * the new instance
+ * @throws NullPointerException if {@code gl == null}
+ */
+ public NoPhaseGL(GL gl) {
+ if (gl==null) {
+ throw new NullPointerException("gl==null");
+ }
+ this.gl = gl;
+ }
+
+ @Override
+ public float gl(int marker, int sample, int a1, int a2) {
+ if (a1==a2) {
+ return gl.gl(marker, sample, a1, a2);
+ }
+ else {
+ float f1 = gl.gl(marker, sample, a1, a2);
+ float f2 = gl.gl(marker, sample, a2, a1);
+ return Math.max(f1, f2);
+ }
+ }
+
+ @Override
+ public boolean isRefData() {
+ return gl.isRefData();
+ }
+
+ @Override
+ public boolean isPhased(int marker, int sample) {
+ return false;
+ }
+
+ @Override
+ public int allele1(int marker, int sample) {
+ return gl.allele1(marker, sample);
+ }
+
+ @Override
+ public int allele2(int marker, int sample) {
+ return gl.allele2(marker, sample);
+ }
+
+
+ @Override
+ public int allele(int marker, int hap) {
+ return gl.allele(marker, hap);
+ }
+
+ @Override
+ public int nMarkers() {
+ return gl.nMarkers();
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return gl.marker(marker);
+ }
+
+ @Override
+ public Markers markers() {
+ return gl.markers();
+ }
+
+ @Override
+ public int nHaps() {
+ return gl.nHaps();
+ }
+
+ @Override
+ public int nSamples() {
+ return gl.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return gl.samples();
+ }
+
+ @Override
+ public String toString() {
+ return gl.toString();
+ }
+}
diff --git a/vcf/RefGL.java b/vcf/RefGL.java
new file mode 100644
index 0000000..30bd309
--- /dev/null
+++ b/vcf/RefGL.java
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.Const;
+
+/**
+ * <p>Class {@code RefGL} represents genotype emission probabilities
+ * for a reference panel of phased, non-missing genotypes.
+ * </p>
+ * Instances of class {@code RefGL} are immutable.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class RefGL implements GL {
+
+ private final Samples samples;
+ private final Markers markers;
+ private final VcfEmission[] vea;
+
+ /**
+ * Constructs a {@code RefGL} instance. Each element of the
+ * specified array stores genotype emission probabilities for a single
+ * marker. Array elements corresponding to the same chromosome must be
+ * contiguous and sorted in chromosome position order.
+ *
+ * @param samples the list of samples with phased genotype data.
+ * @param vea genotype emission probabilities.
+ *
+ * @throws IllegalArgumentException
+ * if elements of {@code vea} corresponding to the same chromosome
+ * are not contiguous and sorted in chromosome position order
+ * @throws IllegalArgumentException if
+ * {@code vea[j].marker().equals(vea[k].marker() == true}
+ * for any {@code j, k} satisfying {@code 0 <= j && j < k && k < vea.length}
+ * @throws IllegalArgumentException if
+ * {@code vea[j].samples().equals(samples) == false}
+ * for any {@code j} satisfying {@code 0 <= j && j < vea.length}
+ * @throws IllegalArgumentException if
+ * {@code vea[j].isRefData() == false} for any {@code j} satisfying
+ * {@code 0 <= j && j < vea.length}
+ *
+ * @throws NullPointerException if {@code samples == null}
+ * @throws NullPointerException if {@code vea == null}
+ * @throws NullPointerException if {@code vea[j] == null} for any
+ * {@code j} satisfying {@code 0 <= j && j < vea.length}
+ */
+ public RefGL(Samples samples, VcfEmission[] vea) {
+ checkData(samples, vea);
+ this.markers = markers(vea);
+ this.samples = samples;
+ this.vea = vea.clone();
+ }
+
+ private static void checkData(Samples samples, VcfEmission[] pma) {
+ for (int j=0; j<pma.length; ++j) {
+ if (pma[j].samples().equals(samples)==false) {
+ String s = "samples=" + samples
+ + Const.nl + "pma[" + j + "].samples()=" + pma[j].samples();
+ throw new IllegalArgumentException(s);
+ }
+ if (pma[j].isRefData()==false) {
+ String s = "non-reference data at marker index " + j;
+ throw new IllegalArgumentException(s);
+ }
+ }
+ }
+
+ private static Markers markers(VcfEmission[] vea) {
+ Marker[] markers = new Marker[vea.length];
+ for (int j=0; j<markers.length; ++j) {
+ markers[j] = vea[j].marker();
+ }
+ return Markers.create(markers);
+ }
+
+ @Override
+ public boolean isRefData() {
+ return true;
+ }
+
+ @Override
+ public float gl(int marker, int sample, int allele1, int allele2) {
+ int a1 = vea[marker].allele1(sample);
+ int a2 = vea[marker].allele2(sample);
+ return (allele1==a1 && allele2==a2) ? 1.0f : 0.0f;
+ }
+
+ @Override
+ public boolean isPhased(int marker, int sample) {
+ return true;
+ }
+
+ @Override
+ public int allele1(int marker, int sample) {
+ return vea[marker].allele1(sample);
+ }
+
+ @Override
+ public int allele2(int marker, int sample) {
+ return vea[marker].allele2(sample);
+ }
+
+ @Override
+ public int allele(int marker, int hap) {
+ return vea[marker].allele(hap);
+ }
+
+ @Override
+ public int nMarkers() {
+ return vea.length;
+ }
+
+ @Override
+ public Marker marker(int markerIndex) {
+ return markers.marker(markerIndex);
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*samples.nSamples();
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append('[');
+ sb.append(this.getClass().toString());
+ sb.append(": nMarkers=");
+ sb.append(nMarkers());
+ sb.append(" nSamples=");
+ sb.append(nSamples());
+ for (VcfEmission vm : vea) {
+ sb.append(Const.nl);
+ sb.append(vm);
+ }
+ sb.append(']');
+ return sb.toString();
+ }
+}
diff --git a/vcf/RefIt.java b/vcf/RefIt.java
new file mode 100644
index 0000000..f03d034
--- /dev/null
+++ b/vcf/RefIt.java
@@ -0,0 +1,449 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import blbutil.SampleFileIt;
+import beagleutil.Samples;
+import blbutil.Const;
+import blbutil.FileIt;
+import blbutil.Filter;
+import blbutil.Utilities;
+import java.io.File;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * <p>Class {@code RefIt} represents an iterator whose {@code next()}
+ * method returns an object storing data from a VCF record with
+ * phased, non-missing genotypes.
+ * </p>
+ * <p>Instances of class {@code RefIt} are not thread-safe.
+ * </p>
+ * <p>Methods of this class will terminate the Java Virtual Machine with
+ * an error message if an I/O error or file format error is detected.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RefIt implements SampleFileIt<VcfEmission> {
+
+ /**
+ * The default number of {@code VcfEmission} objects that are
+ * stored in a buffer.
+ */
+ public static final int DEFAULT_EM_BUFFER_SIZE = 1000;
+
+ private static final int MAX_NSEQ = 255;
+
+ private final VcfHeader vcfHeader;
+ private final FileIt<String> strIt;
+ private final Function<String, VcfEmission> mapper;
+ private final Filter<Marker> markerFilter;
+ private final Thread fileReaderThread;
+ private volatile boolean stopFileReadingThread = false;
+
+ private final BlockingQueue<String[]> stringBuffers;
+ private final Deque<VcfEmission> emBuffer;
+
+ private final List<VcfEmission> uncompressedBuffer;
+ private final VcfEmissionCompressor emCompressor;
+
+ public static final BiFunction<VcfHeader, String, VcfEmission> toRef
+ = (VcfHeader h, String s) -> refEmission(h, s);
+
+ /**
+ * Create and returns a new {@code RefIt} instance from the specified
+ * iterator.
+ * @param strIt an iterator that returns lines of a VCF file
+ * @return a new {@code RefIt} instance
+ * @throws IllegalArgumentException if a format error is detected in a
+ * line of a VCF file returned by {@code strIt}
+ * @throws NullPointerException if {@code strIt == null}
+ */
+ public static RefIt create(FileIt<String> strIt) {
+ return RefIt.create(strIt, Filter.acceptAllFilter(),
+ Filter.acceptAllFilter(), DEFAULT_EM_BUFFER_SIZE);
+ }
+
+ /**
+ * Create and returns a new {@code RefIt} instance from the specified
+ * objects.
+ * @param strIt an iterator that returns lines of a VCF file
+ * @param sampleFilter a sample filter or {@code null}
+ * @param markerFilter a marker filter or {@code null}
+ * @param bufferSize the buffer size
+ * @return a new {@code RefIt} instance
+ * @throws IllegalArgumentException if a format error is detected in a
+ * line of a VCF file returned by {@code strItt}
+ * @throws IllegalArgumentException if {@code bufferSize < 1}
+ * @throws NullPointerException if {@code strIt == null}
+ */
+ public static RefIt create(FileIt<String> strIt,
+ Filter<String> sampleFilter, Filter<Marker> markerFilter,
+ int bufferSize) {
+ RefIt refIt = new RefIt(strIt, sampleFilter, markerFilter, bufferSize);
+ refIt.start();
+ return refIt;
+ }
+
+ private RefIt(FileIt<String> strIt, Filter<String> sampleFilter,
+ Filter<Marker> markerFilter, int bufferSize) {
+ if (bufferSize < 1) {
+ throw new IllegalArgumentException(String.valueOf(bufferSize));
+ }
+ if (markerFilter==null) {
+ markerFilter = Filter.acceptAllFilter();
+ }
+ this.vcfHeader = new VcfHeader(strIt, sampleFilter);
+ this.strIt = strIt;
+ this.mapper = (String s) -> toRef.apply(vcfHeader, s);
+ this.markerFilter = markerFilter;
+ this.stringBuffers = new ArrayBlockingQueue<>(1);
+ this.emBuffer = new ArrayDeque<>(DEFAULT_EM_BUFFER_SIZE);
+ this.uncompressedBuffer = new ArrayList<>();
+ this.emCompressor = new VcfEmissionCompressor(vcfHeader.samples(),
+ MAX_NSEQ);
+ this.fileReaderThread = fileReadingThread();
+ }
+
+ private void start() {
+ this.fileReaderThread.setDaemon(true);
+ this.fileReaderThread.start();
+ fillEmissionBuffer();
+ if (emBuffer.isEmpty()) {
+ noRecordFoundError(strIt);
+ }
+ }
+
+ private void noRecordFoundError(FileIt<String> it) {
+ if (it.hasNext()==false) {
+ StringBuilder sb = new StringBuilder(100);
+ sb.append("No VCF records found (data source: ");
+ sb.append(it.file()==null ? "stdin" : it.file());
+ sb.append(")");
+ sb.append(Const.nl);
+ sb.append("Check that the chromosome identifiers are the same in each input VCF");
+ sb.append(Const.nl);
+ sb.append("file and in the \'chrom=\' command line argument (if \'chrom=\' is used).");
+ throw new IllegalArgumentException(sb.toString());
+ }
+ }
+
+ private Thread fileReadingThread() {
+ Runnable runnable = () -> {
+ String line = readLine(strIt);
+ int bufferSize = stringBufferSize(line);
+ while (line != null && stopFileReadingThread == false) {
+ String chromPlusTab = chromFieldPlusTab(line);
+ String[] sa = new String[bufferSize];
+ int size = 0;
+ while (line != null && size < bufferSize
+ && line.startsWith(chromPlusTab)) {
+ sa[size++] = line;
+ line = readLine(strIt);
+ }
+ if (size < bufferSize) {
+ sa = Arrays.copyOf(sa, size);
+ }
+ putInBlockingQueue(stringBuffers, sa);
+ }
+ if (stopFileReadingThread == false) {
+ putInBlockingQueue(stringBuffers, new String[0]); // sentinel
+ }
+ };
+ return new Thread(runnable);
+ }
+
+ private static int stringBufferSize(String line) {
+ if (line == null) {
+ return 0;
+ }
+ long nBytesPerLine = 2*line.length();
+ Runtime rt = Runtime.getRuntime();
+ long maxMem = rt.maxMemory();
+ if (maxMem == Long.MAX_VALUE) {
+ maxMem = 500 * (1 << 30);
+ }
+ long bufferSize = maxMem / (100*nBytesPerLine);
+ if (bufferSize > DEFAULT_EM_BUFFER_SIZE) {
+ bufferSize = DEFAULT_EM_BUFFER_SIZE;
+ }
+ if (bufferSize < DEFAULT_EM_BUFFER_SIZE/20) {
+ bufferSize = DEFAULT_EM_BUFFER_SIZE/20;
+ }
+ return (int) bufferSize;
+ }
+
+ private static <E> void putInBlockingQueue(BlockingQueue<E> q, E e) {
+ try {
+ q.put(e);
+ } catch (InterruptedException ex) {
+ Utilities.exit("Error: InterruptedException", ex);
+ }
+ }
+
+ private static <E> E takeFromBlockingQueue(BlockingQueue<E> q) {
+ try {
+ return q.take();
+ } catch (InterruptedException ex) {
+ Utilities.exit("Error: InterruptedException", ex);
+ }
+ assert false;
+ return null;
+ }
+
+ private static String chromFieldPlusTab(String vcfRecord) {
+ int tabIndex = vcfRecord.indexOf(Const.tab);
+ if (tabIndex == -1) {
+ String s = Const.nl + "ERROR: Missing tab delimiter in VCV Record:"
+ + Const.nl + vcfRecord
+ + Const.nl + "Exiting Program";
+ Utilities.exit(s);
+ }
+ return vcfRecord.substring(0, tabIndex + 1);
+ }
+
+ private void fillEmissionBuffer() {
+ assert emBuffer.isEmpty();
+ int lastLength = -1;
+ while (lastLength != 0 && emBuffer.size() < DEFAULT_EM_BUFFER_SIZE) {
+ String[] stringBuffer = takeFromBlockingQueue(stringBuffers);
+ lastLength = stringBuffer.length;
+ if (stringBuffer.length>0) {
+ List<VcfEmission> list = Arrays.stream(stringBuffer)
+ .parallel()
+ .map(mapper)
+ .filter(e -> markerFilter.accept(e.marker()))
+ .collect(Collectors.toList());
+ for (int j=0, n=list.size(); j<n; ++j) {
+ VcfEmission e = list.get(j);
+ if (e.storesNonMajorIndices()
+ || e.marker().nAlleles() > MAX_NSEQ) {
+ uncompressedBuffer.add(e);
+ }
+ else {
+ boolean success = emCompressor.addToCompessedList(e);
+ if (success == false) {
+ flushToEmBuffer();
+ success = emCompressor.addToCompessedList(e);
+ assert success;
+ }
+ uncompressedBuffer.add(null);
+ }
+ }
+ }
+ else {
+ // put sentinel element back
+ putInBlockingQueue(stringBuffers, stringBuffer);
+ }
+ }
+ if (lastLength==0) {
+ flushToEmBuffer();
+ }
+ }
+
+ private void flushToEmBuffer() {
+ List<VcfEmission> list = emCompressor.getCompressedList();
+ emCompressor.clear();
+ int index = 0;
+ for (int j=0, n=uncompressedBuffer.size(); j<n; ++j) {
+ VcfEmission ve = uncompressedBuffer.get(j);
+ if (ve==null) {
+ uncompressedBuffer.set(j, list.get(index++));
+ }
+ }
+ emBuffer.addAll(uncompressedBuffer);
+ uncompressedBuffer.clear();
+ }
+
+ private static String readLine(FileIt<String> it) {
+ if (it.hasNext()==false) {
+ return null;
+ }
+ String line = it.next();
+ while (line.trim().isEmpty() && it.hasNext()) {
+ line = it.next();
+ }
+ return line;
+ }
+
+ @Override
+ public void close() {
+ stopFileReadingThread = true;
+ stringBuffers.poll(); // unblock file reading thread
+ try {
+ fileReaderThread.join();
+ } catch (InterruptedException ex) {
+ Utilities.exit("Error: InterruptedException", ex);
+ }
+ strIt.close();
+ emBuffer.clear();
+ }
+
+ /**
+ * Returns {@code true} if the iteration has more elements, and returns
+ * {@code false} otherwise.
+ * @return {@code true} if the iteration has more elements
+ */
+ @Override
+ public boolean hasNext() {
+ return !emBuffer.isEmpty();
+ }
+
+ /**
+ * Returns the next element in the iteration.
+ * @return the next element in the iteration
+ * @throws NoSuchElementException if the iteration has no more elements
+ */
+ @Override
+ public VcfEmission next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException();
+ }
+ VcfEmission first = emBuffer.removeFirst();
+ if (emBuffer.isEmpty()) {
+ fillEmissionBuffer();
+ }
+ return first;
+ }
+
+ /**
+ * The {@code remove} method is not supported by this iterator.
+ * @throws UnsupportedOperationException if this method is invoked
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException(this.getClass().toString());
+ }
+
+ @Override
+ public File file() {
+ return strIt.file();
+ }
+
+ @Override
+ public Samples samples() {
+ return vcfHeader.samples();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append("RefVcfIt from file: ");
+ sb.append(strIt.file()==null ? "stdin" : strIt.file().toString());
+ return sb.toString();
+ }
+
+ private static VcfEmission refEmission(VcfHeader vcfHeader,
+ String vcfRecord) {
+ VcfEmission ve = refEmission(new VcfRecGTParser(vcfHeader, vcfRecord));
+ int nHaps = 2*ve.nSamples();
+ int[] alleleCounts = alleleCounts(ve);
+ int nonMajorCnt = nHaps - max(alleleCounts);
+ if (nonMajorCnt < (1 + nHaps/200)) {
+ if (alleleCounts.length == 2) {
+ int minorAllele = 1 - majorAllele(alleleCounts);
+ int[] minorIndices = minorIndices(ve, minorAllele, nonMajorCnt);
+ return new LowMafRefDiallelicGT(ve.marker(), ve.samples(),
+ minorAllele, minorIndices);
+ }
+ else {
+ int[][] hapIndices = hapIndices(ve, alleleCounts);
+ return new LowMafRefGT(ve.marker(), ve.samples(), hapIndices);
+ }
+ }
+ else {
+ return ve;
+ }
+ }
+
+ private static int[] minorIndices(VcfEmission ve, int minorAllele, int mac) {
+ int[] minorIndices = new int[mac];
+ int index = 0;
+ for (int h = 0, n = ve.nHaps(); h < n; ++h) {
+ if (ve.allele(h) == minorAllele) {
+ minorIndices[index++] = h;
+ }
+ }
+ assert index==mac;
+ return minorIndices;
+ }
+
+ private static int[][] hapIndices(VcfEmission ve, int[] alCnts) {
+ int majorAllele = majorAllele(alCnts);
+ int[][] hapIndices = new int[alCnts.length][];
+ for (int j=0; j<hapIndices.length; ++j) {
+ hapIndices[j] = (j == majorAllele) ? null : new int[alCnts[j]];
+ }
+ int[] indices = new int[alCnts.length];
+ for (int h=0, n=ve.nHaps(); h<n; ++h) {
+ int a = ve.allele(h);
+ if (a != majorAllele) {
+ hapIndices[a][indices[a]++] = h;
+ }
+ }
+ return hapIndices;
+ }
+
+ private static int majorAllele(int[] alleleCnts) {
+ int major = 0;
+ for (int j=1; j<alleleCnts.length; ++j) {
+ if (alleleCnts[j] > alleleCnts[major]) {
+ major = j;
+ }
+ }
+ return major;
+ }
+
+ private static VcfEmission refEmission(VcfRecGTParser gtp) {
+ if (gtp.marker().nAlleles() <= Byte.MAX_VALUE + 1) {
+ return new ByteArrayRefGT(gtp);
+ }
+ else {
+ return new BitSetRefGT(gtp);
+ }
+ }
+
+ private static int max(int[] ia) {
+ int maxIndex = 0;
+ for (int j=1; j<ia.length; ++j) {
+ if (ia[j] > ia[maxIndex]) {
+ maxIndex = j;
+ }
+ }
+ return ia[maxIndex];
+ }
+
+ private static int[] alleleCounts(VcfEmission ve) {
+ int[] cnts = new int[ve.marker().nAlleles()];
+ for (int h = 0, n = ve.nHaps(); h < n; ++h) {
+ ++cnts[ve.allele(h)];
+ }
+ return cnts;
+ }
+}
diff --git a/vcf/RestrictedVcfWindow.java b/vcf/RestrictedVcfWindow.java
new file mode 100644
index 0000000..a09043e
--- /dev/null
+++ b/vcf/RestrictedVcfWindow.java
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.Const;
+import blbutil.SampleFileIt;
+import java.io.Closeable;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * <p>Class {@code RestrictedVcfWindow} represents a sliding window of VCF
+ * records.
+ * </p>
+ * <p>Instances of class {@code RestrictedVcfWindow} are not thread.safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class RestrictedVcfWindow implements Closeable {
+
+ private final SampleFileIt<? extends VcfEmission> it;
+ private final List<VcfEmission> window;
+ private Markers markers;
+ private int overlap = 0;
+ private int cumMarkerCnt;
+ private VcfEmission next;
+
+ /**
+ * Construct a new {@code RestrictedVcfWindow} instance.
+ * @param it an iterator that returns VCF records.
+ * @throws IllegalArgumentException if {@code it.hasNext() == false}
+ * @throws IllegalArgumentException if a format error is detected in
+ * a VCF record
+ * @throws NullPointerException if {@code it == null}
+ */
+ public RestrictedVcfWindow(SampleFileIt<? extends VcfEmission> it) {
+ if (it.hasNext()==false) {
+ throw new IllegalArgumentException("it.hasNext()==false");
+ }
+ this.it = it;
+ this.window = new ArrayList<>();
+ this.markers = null;
+ this.overlap = 0;
+ this.cumMarkerCnt = 0;
+ this.next = it.next();
+ }
+
+ /**
+ * Advances the sliding marker window, and returns the advanced window
+ * as a {@code VcfEmission[]} object.
+ * The returned array will have length {@code markers.nMarkers()}.
+ * Markers not found in the data source will have {@code null}
+ * entries in the returned array.
+ *
+ * @param nextMarkers the set of markers in the advanced window
+ * @return the advanced marker window
+ *
+ * @throws IllegalArgumentException if {@code markers.nMarkers() == 0}
+ * @throws IllegalArgumentException if any two of the specified markers
+ * are on different chromosomes
+ * @throws IllegalArgumentException if specified markers are
+ * inconsistent with a sliding marker window
+ * @throws IllegalArgumentException if the specified markers do not
+ * advance the current marker window
+ * @throws IllegalArgumentException if a format error is detected in a
+ * VCF record
+ * @throws IllegalArgumentException if the input data does not contain
+ * any of the specified markers
+ * @throws NullPointerException if {@code nextMarkers == null}
+ */
+ public VcfEmission[] advanceWindow(Markers nextMarkers) {
+ checkMarkers(nextMarkers);
+ advanceToCurrentChrom(nextMarkers);
+ int fullOverlap = overlap(markers, nextMarkers);
+
+ List<VcfEmission> newWindow = new ArrayList<>(nextMarkers.nMarkers());
+ newWindow.addAll(window.subList(window.size() - fullOverlap, window.size()));
+ this.overlap = countNonNull(newWindow);
+ for (int j = fullOverlap, n=nextMarkers.nMarkers(); j<n; ++j) {
+ Marker m = nextMarkers.marker(j);
+ if (next!=null && next.marker().chromIndex()==m.chromIndex()) {
+ while (next != null && next.marker().pos() < m.pos()) {
+ next = it.hasNext() ? it.next() : null;
+ }
+ while (next != null && next.marker().pos() == m.pos()
+ && next.marker().equals(m)==false) {
+ next = it.hasNext() ? it.next() : null;
+ }
+ }
+ if (next != null && next.marker().equals(m)) {
+ ++cumMarkerCnt;
+ newWindow.add(next);
+ next = it.hasNext() ? it.next() : null;
+ }
+ else {
+ newWindow.add(null);
+ }
+ }
+ this.markers = nextMarkers;
+ this.window.clear();
+ this.window.addAll(newWindow);
+ if (countNonNull(newWindow) == 0) {
+ missingMarkersErr(nextMarkers);
+ }
+ return window.toArray(new VcfEmission[0]);
+ }
+
+ private void checkMarkers(Markers mkrs) {
+ if (mkrs.nMarkers()==0) {
+ throw new IllegalArgumentException("markers do not advance window");
+ }
+ Marker start = mkrs.marker(0);
+ Marker end = mkrs.marker(mkrs.nMarkers()-1);
+ if (this.markers != null) {
+ Marker m = this.markers.marker(this.markers.nMarkers()-1);
+ if (m.chromIndex()==end.chromIndex() && m.pos()>=end.pos()) {
+ String s = "markers do not advance window";
+ throw new IllegalArgumentException(s);
+ }
+ }
+ if (start.chromIndex() != end.chromIndex()) {
+ String s = "inconsistent chromosomes:" + Const.nl
+ + start + Const.nl + end;
+ throw new IllegalArgumentException(s);
+ }
+ }
+
+ private void advanceToCurrentChrom(Markers markers) {
+ int chromIndex = markers.marker(0).chromIndex();
+ while (next!=null && next.marker().chromIndex()!=chromIndex) {
+ next = it.hasNext() ? it.next() : null;
+ }
+ }
+
+ private static int overlap(Markers prev, Markers next) {
+ if (prev==null
+ || prev.marker(0).chromIndex() != next.marker(0).chromIndex()) {
+ return 0;
+ }
+ Marker startMarker = next.marker(0);
+ int startPos = startMarker.pos();
+ int index = prev.nMarkers() - 1; // index of first overlap marker
+ while (index >= 0 && prev.marker(index).pos() > startPos) {
+ --index;
+ }
+ while (index >= 0 && prev.marker(index).equals(startMarker)==false) {
+ --index;
+ }
+ if (index < 0) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ int overlap = prev.nMarkers() - index;
+ for (int j = 1; j < overlap; ++j) {
+ if (prev.marker(index + j).equals(next.marker(j))==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ }
+ return overlap;
+ }
+
+ private static <E> int countNonNull(List<E> list) {
+ int cnt = 0;
+ for (int j=0, n=list.size(); j<n; ++j) {
+ if (list.get(j)!=null) {
+ ++cnt;
+ }
+ }
+ return cnt;
+ }
+
+ private static void missingMarkersErr(Markers markers) {
+ StringBuilder sb = new StringBuilder(500);
+ sb.append(Const.nl);
+ sb.append("ERROR: Reference and target files have no markers in common"
+ + " in interval: ");
+ sb.append(Const.nl);
+ sb.append(" ");
+ sb.append(interval(markers));
+ sb.append(Const.nl);
+ sb.append(Const.nl);
+ sb.append("Common markers must have identical CHROM, POS, REF, and ALT"
+ + " fields.");
+ sb.append(Const.nl);
+ sb.append("Exiting program.");
+ sb.append(Const.nl);
+ blbutil.Utilities.exit(sb.toString());
+ }
+
+ private static String interval(Markers markers) {
+ Marker a = markers.marker(0);
+ Marker b = markers.marker(markers.nMarkers()-1);
+ assert a.chromIndex() == b.chromIndex();
+ return a.chrom() + Const.colon + a.pos() + Const.hyphen + b.pos();
+ }
+
+ /**
+ * Returns the file from which VCF records are read, or returns
+ * {@code null} if the source is standard input.
+ * @return the file from which VCF records are read, or
+ * {@code null} if the source is standard input
+ */
+ public File file() {
+ return it.file();
+ }
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ public Samples samples() {
+ return it.samples();
+ }
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ public int nSamples() {
+ return it.samples().nSamples();
+ }
+
+ /**
+ * Returns the number of VCF records in the overlap between the current
+ * window and the previous window. Returns 0 if the current window
+ * is the first window.
+ *
+ * @return the number of VCF records in the overlap between the current
+ * window and the previous window
+ */
+ public int overlap() {
+ return overlap;
+ }
+
+ /**
+ * Returns the number of VCF records in the union of the current window
+ * and all previous windows.
+ *
+ * @return the number of VCF records in the union of the current window
+ * and all previous windows
+ */
+ public int cumMarkerCnt() {
+ return cumMarkerCnt;
+ }
+
+ /**
+ * Releases any I/O resources controlled by this object.
+ */
+ @Override
+ public void close() {
+ it.close();
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(100);
+ sb.append(this.getClass().toString());
+ sb.append(" - next:");
+ sb.append(next);
+ return sb.toString();
+ }
+}
diff --git a/vcf/RevGL.java b/vcf/RevGL.java
new file mode 100644
index 0000000..0368b31
--- /dev/null
+++ b/vcf/RevGL.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+
+/**
+ * <p>Class {@code RevGL} is wrapper for a {@code GL} instance. The wrapper
+ * reverses the order of markers in the wrapped object.
+ * </p>
+ * <p>Instances of class {@code RevGL} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class RevGL implements GL {
+
+ /*
+ * All instances of the {@code GL} interface are required to be immutable.
+ */
+ private final GL gl;
+ private final int lastMarker;
+
+ /**
+ * Constructs a new {@code RevGL} instance.
+ * @param gl genotype emission probabilities that will be
+ * wrapped by the new instance
+ * @throws NullPointerException if {@code gl == null}
+ */
+ public RevGL(GL gl) {
+ this.gl = gl;
+ this.lastMarker = gl.nMarkers() - 1;
+ }
+
+ @Override
+ public boolean isRefData() {
+ return gl.isRefData();
+ }
+
+ @Override
+ public float gl(int marker, int sample, int allele1, int allele2) {
+ int revMarker = lastMarker - marker;
+ return gl.gl(revMarker, sample, allele1, allele2);
+ }
+
+ @Override
+ public boolean isPhased(int marker, int sample) {
+ int revMarker = lastMarker - marker;
+ return gl.isPhased(revMarker, sample);
+ }
+
+ @Override
+ public int allele1(int marker, int sample) {
+ int revMarker = lastMarker - marker;
+ return gl.allele1(revMarker, sample);
+ }
+
+ @Override
+ public int allele2(int marker, int sample) {
+ int revMarker = lastMarker - marker;
+ return gl.allele2(revMarker, sample);
+ }
+
+ @Override
+ public int allele(int marker, int hap) {
+ int revMarker = lastMarker - marker;
+ return gl.allele(revMarker, hap);
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ int revMarker = lastMarker - marker;
+ return gl.marker(revMarker);
+ }
+
+ @Override
+ public Markers markers() {
+ return gl.markers().reverse();
+ }
+
+ @Override
+ public int nMarkers() {
+ return gl.nMarkers();
+ }
+
+ @Override
+ public int nHaps() {
+ return gl.nHaps();
+ }
+
+ @Override
+ public int nSamples() {
+ return gl.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return gl.samples();
+ }
+
+ @Override
+ public String toString() {
+ return gl.toString();
+ }
+}
diff --git a/vcf/SeqCodedRefGT.java b/vcf/SeqCodedRefGT.java
new file mode 100644
index 0000000..f6b3abd
--- /dev/null
+++ b/vcf/SeqCodedRefGT.java
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2015 browning
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.IntArray;
+
+/**
+ * <p>Class {@code SeqCodedRefGT} represents phased, non-missing
+ * genotypes for a list of reference samples at a single marker.
+ * Genotype emission probabilities are determined by the sample
+ * genotypes.
+ * </p>
+ * <p>Instances of class {@code SeqCodedRefGT} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class SeqCodedRefGT implements VcfEmission {
+
+ private final Marker marker;
+ private final Samples samples;
+ private final IntArray hapToSeq;
+ private final IntArray seqToAllele;
+
+ /**
+ * Creates a new {@code SeqCodedRefGT} instance with phased,
+ * non-missing genotypes from the specified marker, samples,
+ * and haplotype alleles.
+ * @param marker the marker
+ * @param samples the samples
+ * @param hapToSeq an array whose {@code j}-th element is the index
+ * of the distinct allele sequence carried by the {@code j}-th haplotype
+ * @param seqToAllele an array whose {@code j}-th element is the marker
+ * allele carried by the {@code j}-th distinct allele sequence
+ *
+ * @throws IllegalArgumentException if
+ * {@code hapToSeq.size() != 2*samples.nSamples()}
+ * @throws IndexOutOfBoundsException if any element of {@code hapToSeq}
+ * is negative or greater than or equal to {@code seqToAllele.size()}
+ * @throws IndexOutOfBoundsException if any element of {@code seqToAllele}
+ * is negative or greater than or equal to {@code marker.nAlleles()}
+ * @throws NullPointerException if any parameter is {@code null}
+ */
+ public SeqCodedRefGT(Marker marker, Samples samples, IntArray hapToSeq,
+ IntArray seqToAllele) {
+ checkData(marker, samples, hapToSeq, seqToAllele);
+ this.marker = marker;
+ this.samples = samples;
+ this.hapToSeq = hapToSeq;
+ this.seqToAllele = seqToAllele;
+ }
+
+ private static void checkData(Marker marker, Samples samples,
+ IntArray hapToSeq, IntArray seqToAllele) {
+ String err = "inconsistent data";
+ int nHaps = hapToSeq.size();
+ if (hapToSeq.size() != 2*samples.nSamples()) {
+ throw new IllegalArgumentException(err);
+ }
+ for (int j=0; j<nHaps; ++j) {
+ marker.allele(seqToAllele.get(hapToSeq.get(j)));
+ }
+ }
+
+ @Override
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return samples;
+ }
+
+ @Override
+ public int nHaps() {
+ return hapToSeq.size();
+ }
+
+ @Override
+ public int nHapPairs() {
+ return samples.nSamples();
+ }
+
+ @Override
+ public Marker marker() {
+ return marker;
+ }
+
+ @Override
+ public boolean isRefData() {
+ return true;
+ }
+
+ @Override
+ public float gl(int sample, int allele1, int allele2) {
+ boolean match = allele1 == allele1(sample)
+ && allele2 == allele2(sample);
+ return match ? 1f : 0f;
+ }
+
+ @Override
+ public boolean isPhased(int sample) {
+ return true;
+ }
+
+ @Override
+ public int allele1(int sample) {
+ return seqToAllele.get(hapToSeq.get(2*sample));
+ }
+
+ @Override
+ public int allele2(int sample) {
+ return seqToAllele.get(hapToSeq.get(2*sample + 1));
+ }
+
+ @Override
+ public int allele(int hap) {
+ return seqToAllele.get(hapToSeq.get(hap));
+ }
+
+ @Override
+ public int nAlleles() {
+ return this.marker().nAlleles();
+ }
+
+ @Override
+ public boolean storesNonMajorIndices() {
+ return false;
+ }
+
+ @Override
+ public int majorAllele() {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int alleleCount(int allele) {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int hapIndex(int allele, int copy) {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ /**
+ * Returns the data represented by {@code this} as a VCF
+ * record with a GT format field. The returned VCF record
+ * will have missing QUAL and INFO fields, will have "PASS"
+ * in the filter field, and will have a GT format field.
+ * @return the data represented by {@code this} as a VCF
+ * record with a GT format field
+ */
+ @Override
+ public String toString() {
+ return toVcfRec();
+ }
+}
diff --git a/vcf/SplicedGL.java b/vcf/SplicedGL.java
new file mode 100644
index 0000000..377a2f6
--- /dev/null
+++ b/vcf/SplicedGL.java
@@ -0,0 +1,167 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import haplotype.SampleHapPairs;
+
+/**
+ * <p>Class {@code SplicedGL} represents genotype emission probabilities
+ * for a set of samples. The genotype emission probabilities are determined
+ * by a {@code SampleHapPairs} instance for the initial markers, and are
+ * determined by a {@code GL} instance for the remaining markers.
+ * The {@code isRefData()} method of the {@code SplicedGL} class
+ * returns the same value as the {@code isRefData()} method of
+ * the {@code GL} instance.
+ * </p>
+ * <p>Instances of class {@code SplicedGL} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class SplicedGL implements GL {
+
+ private final int overlap;
+ private final SampleHapPairs haps;
+ private final GL gl;
+
+ /**
+ * Constructs a new {@code SplicedGL} instance.
+ * @param haps sample haplotype pairs for the initial
+ * markers
+ * @param gl genotype emission probabilities for all markers
+ * @throws IllegalArgumentException if
+ * {@code haps.nMarkers() >= gl.nMarkers()}
+ * @throws IllegalArgumentException if
+ * {@code haps.marker(j).equals(gl.marker(j)) == false} for any {@code j}
+ * satisfying {@code 0 <= j && j < haps.nMarkers()}
+ * @throws IllegalArgumentException if
+ * {@code haps.samples().equals(gl.samples()) == false}
+ * @throws NullPointerException if {@code haps == null || gl == null}
+ */
+ public SplicedGL(SampleHapPairs haps, GL gl) {
+ if (haps.nMarkers()>=gl.nMarkers()) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ for (int j=0, n=haps.nMarkers(); j<n; ++j) {
+ if (haps.marker(j).equals(gl.marker(j))==false) {
+ throw new IllegalArgumentException("inconsistent markers");
+ }
+ }
+ if (haps.samples().equals(gl.samples())==false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ this.overlap = haps.nMarkers();
+ this.haps = haps;
+ this.gl = gl;
+ }
+
+ @Override
+ public boolean isRefData() {
+ return gl.isRefData();
+ }
+
+ @Override
+ public float gl(int marker, int sample, int allele1, int allele2) {
+ if (marker<overlap) {
+ int a1 = haps.allele1(marker, sample);
+ int a2 = haps.allele2(marker, sample);
+ return (allele1==a1 && allele2==a2) ? 1.0f : 0.0f;
+ }
+ else {
+ return gl.gl(marker, sample, allele1, allele2);
+ }
+ }
+
+ @Override
+ public boolean isPhased(int marker, int sample) {
+ if (marker<overlap) {
+ return true;
+ }
+ else {
+ return gl.isPhased(marker, sample);
+ }
+ }
+
+ @Override
+ public int allele1(int marker, int sample) {
+ if (marker<overlap) {
+ return haps.allele1(marker, sample);
+ }
+ else {
+ return gl.allele1(marker, sample);
+ }
+ }
+
+ @Override
+ public int allele2(int marker, int sample) {
+ if (marker<overlap) {
+ return haps.allele2(marker, sample);
+ }
+ else {
+ return gl.allele2(marker, sample);
+ }
+ }
+
+ @Override
+ public int allele(int marker, int hap) {
+ if (marker<overlap) {
+ return haps.allele(marker, hap);
+ }
+ else {
+ return gl.allele(marker, hap);
+ }
+ }
+
+ @Override
+ public Marker marker(int marker) {
+ return gl.marker(marker);
+ }
+
+ @Override
+ public Markers markers() {
+ return gl.markers();
+ }
+
+ @Override
+ public int nMarkers() {
+ return gl.nMarkers();
+ }
+
+ @Override
+ public int nHaps() {
+ return gl.nHaps();
+ }
+
+ @Override
+ public int nSamples() {
+ return gl.nSamples();
+ }
+
+ @Override
+ public Samples samples() {
+ return gl.samples();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(10000);
+ sb.append("SplicedGL: nSamples=");
+ sb.append(this.nSamples());
+ return sb.toString();
+ }
+}
diff --git a/vcf/TargetData.java b/vcf/TargetData.java
new file mode 100644
index 0000000..c5108d4
--- /dev/null
+++ b/vcf/TargetData.java
@@ -0,0 +1,230 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.SampleFileIt;
+import haplotype.HapPair;
+import haplotype.SampleHapPairs;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * <p>Class {@code TargetData} represents a sliding window of
+ * target VCF records.
+ * </p>
+ * <p>Instances of class {@code TargetData} are not thread-safe.
+ * </p>
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class TargetData implements Data {
+
+ private final VcfWindow vcfWindow;
+
+ private int window = 0;
+ private Markers markers;
+ private VcfEmission[] markerData;
+ private GL gl;
+
+ /**
+ * Constructs and returns a new {@code TargetData} instance from
+ * VcfRecords returned by the specified {@code SampleFileIt} objects.
+ *
+ * @param it an iterator that returns the target VCF records
+ * @return a new {@code TargetData} instance
+ *
+ * @throws IllegalArgumentException if the data returned by
+ * the specified iterator contains no samples
+ * @throws IllegalArgumentException if a format error is detected
+ * in a string VCF record
+ * @throws NullPointerException if {@code it == null}
+ */
+ public static TargetData targetData(SampleFileIt<? extends VcfEmission> it) {
+ if (it.samples().nSamples()==0) {
+ throw new IllegalArgumentException("nSamples==0");
+ }
+ return new TargetData(new VcfWindow(it));
+ }
+
+ private TargetData(VcfWindow vcfWindow) {
+ this.vcfWindow = vcfWindow;
+ this.markers = Markers.create(new Marker[0]);
+ this.markerData = new VcfEmission[0];
+ this.gl = new BasicGL(vcfWindow.samples(), markerData);
+ }
+
+ @Override
+ public boolean lastWindowOnChrom() {
+ return vcfWindow.lastWindowOnChrom();
+ }
+
+ @Override
+ public boolean canAdvanceWindow() {
+ return vcfWindow.canAdvanceWindow();
+ }
+
+ @Override
+ public void advanceWindow(int overlap, int windowSize) {
+ markerData = vcfWindow.advanceWindow(overlap, windowSize);
+ markers = extractMarkers(markerData);
+ gl = new BasicGL(vcfWindow.samples(), markerData);
+ ++window;
+ }
+
+ private static Markers extractMarkers(VcfEmission[] markerData) {
+ Marker[] ma = new Marker[markerData.length];
+ for (int j=0; j<ma.length; ++j) {
+ ma[j] = markerData[j].marker();
+ }
+ return Markers.create(ma);
+ }
+
+ @Override
+ public int window() {
+ return window;
+ }
+
+
+ @Override
+ public int targetOverlap() {
+ return vcfWindow.overlap();
+ }
+
+ @Override
+ public int overlap() {
+ return vcfWindow.overlap();
+ }
+
+ @Override
+ public int nTargetMarkers() {
+ return markers.nMarkers();
+ }
+
+ @Override
+ public int nTargetMarkersSoFar() {
+ return vcfWindow.cumMarkerCnt();
+ }
+
+ @Override
+ public Markers targetMarkers() {
+ return markers;
+ }
+
+ @Override
+ public int nMarkers() {
+ return markers.nMarkers();
+ }
+
+ @Override
+ public int nMarkersSoFar() {
+ return vcfWindow.cumMarkerCnt();
+ }
+
+ @Override
+ public Markers markers() {
+ return markers;
+ }
+
+ @Override
+ public int targetMarkerIndex(int refIndex) {
+ if (refIndex < 0 || refIndex >= markers.nMarkers()) {
+ throw new ArrayIndexOutOfBoundsException(refIndex);
+ }
+ return refIndex;
+ }
+
+ @Override
+ public int markerIndex(int nonRefIndex) {
+ if (nonRefIndex < 0 || nonRefIndex >= markers.nMarkers()) {
+ throw new ArrayIndexOutOfBoundsException(nonRefIndex);
+ }
+ return nonRefIndex;
+ }
+
+ @Override
+ public int nTargetSamples() {
+ return vcfWindow.nSamples();
+ }
+
+ @Override
+ public Samples targetSamples() {
+ return vcfWindow.samples();
+ }
+
+ @Override
+ public int nRefSamples() {
+ return 0;
+ }
+
+ @Override
+ public Samples refSamples() {
+ return null;
+ }
+
+ @Override
+ public int nAllSamples() {
+ return nTargetSamples();
+ }
+
+ @Override
+ public Samples allSamples() {
+ return targetSamples();
+ }
+
+ @Override
+ public GL targetGL() {
+ return gl;
+ }
+
+ @Override
+ public List<HapPair> restrictedRefHapPairs() {
+ // no reference haplotypes to add
+ return new ArrayList<>();
+ }
+
+ @Override
+ public List<HapPair> refHapPairs() {
+ // no reference haplotypes to return
+ return new ArrayList<>();
+ }
+
+
+ @Override
+ public SampleHapPairs refSampleHapPairs() {
+ // no reference haplotypes to return
+ return null;
+ }
+
+ @Override
+ public void close() {
+ vcfWindow.close();
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}.
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("vcf.NonRefData");
+ return sb.toString();
+ }
+}
diff --git a/vcf/VcfEmission.java b/vcf/VcfEmission.java
new file mode 100644
index 0000000..5798dc5
--- /dev/null
+++ b/vcf/VcfEmission.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.Const;
+
+/**
+ * <p>Interface {@code VcfEmission} represents genotype emission
+ * probabilities for a set of samples at a single marker.
+ * </p>
+ * <p>All instances of {@code VcfEmission} are required to be immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public interface VcfEmission extends HapsMarker {
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ int nSamples();
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ Samples samples();
+
+ /**
+ * Returns {@code true} if the genotype emission probabilities
+ * for each sample are determined by a phased called genotype
+ * that has no missing alleles, and returns {@code false} otherwise.
+ * @return {@code true} if the genotype emission probabilities
+ * for each sample are determined by a phased called genotype
+ * that has no missing alleles
+ */
+ boolean isRefData();
+
+ /**
+ * Returns the probability of the observed data if the specified pair
+ * of ordered alleles is the true genotype in the specified sample.
+ * @param sample the sample index
+ * @param allele1 the first allele index
+ * @param allele2 the second allele index
+ * @return the probability of the observed data if the specified pair
+ * of ordered alleles is the true genotype in the specified sample
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code samples < 0 || samples >= this.nSamples()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele1 < 0 || allele1 >= this.marker().nAlleles()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele2 < 0 || allele2 >= this.marker().nAlleles()}
+ */
+ float gl(int sample, int allele1, int allele2);
+
+ /**
+ * Returns {@code true} if the genotype emission probabilities for
+ * the specified sample are determined by a phased, nonmissing genotype,
+ * and returns {@code false} otherwise.
+ * @param sample the sample index
+ * @return {@code true} if the genotype emission probabilities
+ * for the specified sample are determined by a phased, nonmissing genotype
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ */
+ boolean isPhased(int sample);
+
+ /**
+ * Returns the first allele for the specified sample or -1 if the
+ * allele is missing. Alleles are arbitrarily ordered
+ * if the genotype is unphased.
+ * @param sample the sample index
+ * @return the first allele for the specified sample or -1 if the
+ * allele is missing
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || samples >= this.nSamples()}
+ */
+ @Override
+ int allele1(int sample);
+
+ /**
+ * Returns the second allele for the specified sample or -1 if the
+ * allele is missing. Alleles are arbitrarily ordered
+ * if the genotype is unphased.
+ * @param sample the sample index
+ * @return the second allele for the specified sample or -1 if the
+ * allele is missing
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || samples >= this.nSamples()}
+ */
+ @Override
+ int allele2(int sample);
+
+ /**
+ * Returns the allele on the specified haplotype or -1 if the
+ * allele is missing. Alleles are arbitrarily ordered if the genotype
+ * is unphased.
+ * @param hap the haplotype index
+ * @return the allele on the specified haplotype or -1 if the
+ * allele is missing
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code hap < 0 || hap >= this.nHaps()}
+ */
+ @Override
+ int allele(int hap);
+
+ /**
+ * Returns the number of marker alleles.
+ * @return the number of marker alleles.
+ */
+ int nAlleles();
+
+ /**
+ * Returns {@code true} if this instance stores the indices of haplotypes
+ * that carry non-major alleles, and returns {@code false} otherwise.
+ *
+ * @return {@code true} if this instance stores the indices of haplotypes
+ * that carry non-major alleles, and returns {@code false} otherwise
+ */
+ boolean storesNonMajorIndices();
+
+ /**
+ * Returns the index of the major allele.
+ * @return the index of the major allele
+ * @throws UnsupportedOperationException if
+ * {@code storesNonMajorIndices() == false}
+ */
+ int majorAllele();
+
+ /**
+ * Returns the number of haplotypes that carry the specified allele.
+ * @param allele an allele index
+ * @return the number of haplotypes that carry the specified allele
+ * @throws IllegalArgumentException if
+ * {@code allele == this.majorAllele()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele < 0 || allele >= this.nAlleles()}
+ * @throws UnsupportedOperationException if
+ * {@code storesNonMajorIndices() == false}
+ */
+ int alleleCount(int allele);
+
+ /**
+ * Returns index of the haplotype that carries the specified copy of the
+ * specified allele.
+ * @param allele an allele index
+ * @param copy a copy index
+ * @return index of the haplotype that carries the specified allele
+ * @throws IllegalArgumentException if
+ * {@code allele == this.majorAllele()}
+ * @throws IndexOutOfBoundsException if
+ * {@code allele < 0 || allele >= this.nAlleles()}
+ * @throws IndexOutOfBoundsException if
+ * {@code copy < 0 || copy >= this.alleleCount(allele)}
+ * @throws UnsupportedOperationException if
+ * {@code storesNonMajorIndices() == false}
+ */
+ int hapIndex(int allele, int copy);
+
+ /**
+ * Returns a VCF record corresponding to {@code this}. The returned
+ * VCF record will have missing QUAL and INFO fields, will have "PASS"
+ * in the filter field, and will have a GT format field.
+ * @return a VCF record corresponding to {@code this}
+ */
+ default String toVcfRec() {
+ StringBuilder sb = new StringBuilder(100);
+ sb.append(marker());
+ sb.append(Const.tab);
+ sb.append(Const.MISSING_DATA_CHAR); // QUAL
+ sb.append(Const.tab);
+ sb.append("PASS"); // FILTER
+ sb.append(Const.tab);
+ sb.append(Const.MISSING_DATA_CHAR); // INFO
+ sb.append(Const.tab);
+ sb.append("GT"); // FORMAT
+ for (int j=0, n=nSamples(); j<n; ++j) {
+ int a1 = allele1(j);
+ int a2 = allele2(j);
+ sb.append(Const.tab);
+ sb.append(a1 == -1 ? Const.MISSING_DATA_CHAR : a1);
+ sb.append(isPhased(j) ? Const.phasedSep : Const.unphasedSep);
+ sb.append(a2 == -1 ? Const.MISSING_DATA_CHAR : a2);
+ }
+ return sb.toString();
+ }
+}
diff --git a/vcf/VcfEmissionCompressor.java b/vcf/VcfEmissionCompressor.java
new file mode 100644
index 0000000..928f792
--- /dev/null
+++ b/vcf/VcfEmissionCompressor.java
@@ -0,0 +1,298 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.IntArray;
+import blbutil.IntList;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * <p>Class {@code VcfEmissionCompressor} compresses a sequence of
+ * {@code VcfEmission} objects which contain reference genotype data.
+ * Reference genotype data does not contain any unphased or missing genotypes.
+ * Compression is performed by storing the list of distinct allele sequences
+ * and the allele sequence carried by each haplotype.
+ * </p>
+ * <p>Class {@code VcfEmissionCompressor} is not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class VcfEmissionCompressor {
+
+ private final Samples samples;
+ private final int capacity;
+ private final int[] hapToSeq;
+ private final List<Marker> markers;
+ private final List<IntList> alleleToSeqList;
+ private final List<IntList> sequences;
+ private final IntList copiedSeqToSrcSeq;
+
+ private int nSeq;
+
+ /**
+ * Constructs a new {@code VcfEmissionCompressor} for the specified
+ * samples.
+ * @param samples the list of samples whose data will be compressed
+ * @param capacity the maximum number of allele sequences that is
+ * permitted to exist in the list of compressed {@code VcfEmission} objects
+ * @throws IllegalArgumentException if {@code capacity < 0}
+ * @throws NullPointerException if {@code samples == null}
+ */
+ public VcfEmissionCompressor(Samples samples, int capacity) {
+ if (samples == null) {
+ throw new NullPointerException("samples==null");
+ }
+ if (capacity < 0) {
+ throw new IllegalArgumentException(String.valueOf(capacity));
+ }
+ this.samples = samples;
+ this.capacity = capacity;
+ this.hapToSeq = new int[2*samples.nSamples()];
+ this.markers = new ArrayList<>(100);
+ this.sequences = new ArrayList<>(100);
+ this.alleleToSeqList = new ArrayList<>(100);
+ this.copiedSeqToSrcSeq = new IntList(50);
+ clear();
+ }
+
+ /**
+ * Returns the list of samples whose phased genotype data will be compressed.
+ * @return the list of samples whose phased genotype data will be compressed
+ */
+ public Samples samples() {
+ return samples;
+ }
+
+ /**
+ * Returns the maximum number of allele sequences that is
+ * permitted to exist in the list of compressed {@code VcfEmission} objects.
+ * @return the maximum number of allele sequences that is
+ * permitted to exist in the list of compressed {@code VcfEmission} objects
+ */
+ public int capacity() {
+ return capacity;
+ }
+
+ /**
+ * Attempts to add the specified {@code VcfEmission} object to the list of
+ * compressed {@code VcfEmission} objects, and returns {@code true}
+ * if the {@code VcfEmission} object was added.
+ *
+ * @param em reference genotypes for a marker
+ * @return {@code true} if the specified {@code VcfEmission} object was
+ * added to the list of compressed markers, and {@code false}
+ * others
+ * @throws IllegalArgumentException if
+ * {@code em.samples().equals(this.samples()) == false}
+ * @throws IllegalArgumentException if {@code em.isRefData() == false}
+ * @throws NullPointerException if {@code em == null}
+ */
+ public boolean addToCompessedList(VcfEmission em) {
+ checkEmission(em);
+ if (inconsistentChrom(em) || em.marker().nAlleles() > capacity) {
+ return false;
+ }
+ boolean success = true;
+ int startNSeq = nSeq;
+ for (int j=0; j<nSeq; ++j) {
+ alleleToSeqList.get(j).clear();
+ }
+ copiedSeqToSrcSeq.clear();
+ for (int h=0; h<hapToSeq.length && success; ++h) {
+ success = addHaplotype(em, h);
+ }
+ if (success) {
+ markers.add(em.marker());
+ }
+ else {
+ rollBackChanges(startNSeq);
+ }
+ return success;
+ }
+
+ private void checkEmission(VcfEmission em) {
+ if (em.samples().equals(samples)==false) {
+ throw new IllegalArgumentException("inconsistent samples");
+ }
+ if (em.isRefData()==false) {
+ throw new IllegalArgumentException("unphased data");
+ }
+ }
+
+ private boolean inconsistentChrom(VcfEmission em) {
+ return (markers.isEmpty()==false
+ && em.marker().chromIndex() != markers.get(0).chromIndex());
+ }
+
+ private boolean addHaplotype(VcfEmission em, int hap) {
+ int seq = hapToSeq[hap];
+ int allele = em.allele(hap);
+ IntList alleleToSeq = alleleToSeqList.get(seq);
+ if (alleleToSeq.isEmpty()) {
+ alleleToSeq.add(allele);
+ alleleToSeq.add(seq);
+ sequences.get(seq).add(allele);
+ }
+ else {
+ int index=0;
+ while (index < alleleToSeq.size()
+ && alleleToSeq.get(index)!=allele) {
+ index+=2;
+ }
+ if (index==alleleToSeq.size()) {
+ if (nSeq == capacity) {
+ return false;
+ }
+ else {
+ addCopyOfSequence(seq);
+ sequences.get(nSeq - 1).add(allele);
+ alleleToSeq.add(allele);
+ alleleToSeq.add(nSeq - 1);
+ copiedSeqToSrcSeq.add(seq);
+ }
+ }
+ hapToSeq[hap] = alleleToSeq.get(index+1);
+ }
+ return true;
+ }
+
+ private void rollBackChanges(int startNSeq) {
+ alleleToSeqList.subList(startNSeq, nSeq).clear();
+ sequences.subList(startNSeq, nSeq).clear();
+ nSeq = startNSeq;
+ for (int h=0; h<hapToSeq.length; ++h) {
+ if (hapToSeq[h] >= startNSeq) {
+ hapToSeq[h] = copiedSeqToSrcSeq.get(hapToSeq[h] - startNSeq);
+ }
+ }
+ }
+
+ /**
+ * Returns the size of the list of compressed {@code VcfEmission} objects.
+ * @return the size of the list of compressed {@code VcfEmission} objects
+ */
+ public int size() {
+ return markers.size();
+ }
+
+ /**
+ * Returns the number of distinct allele sequences in the list of
+ * compressed {@code VcfEmission} objects.
+ * @return the number of distinct allele sequences
+ */
+ public int nSeq() {
+ return nSeq;
+ }
+
+ /**
+ * Returns the specified marker.
+ * @param index an index in the list of compressed {@code VcfEmission}
+ * objects
+ * @return the specified marker
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public Marker marker(int index) {
+ return markers.get(index);
+ }
+
+ /**
+ * Returns an array of length {@code 2*this.samples().nSamples()}, whose
+ * {@code j}-th element is the index of the allele sequence carried by the
+ * {@code j}-th haplotype in the list of compressed {@code VcfEmission}
+ * objects.
+ * @return an array mapping haplotype indices to allele sequence indices
+ */
+ public IntArray hapToSeq() {
+ if (nSeq==0) {
+ return IntArray.create(hapToSeq, 0, 0);
+ }
+ else {
+ return IntArray.create(hapToSeq, 0, nSeq-1);
+ }
+ }
+
+ /**
+ * Returns an array of length {@code this.nSeq()} whose {@code j}-th
+ * element is the allele carried by the {@code j}-th distinct allele
+ * sequence at {@code this.marker(index)}.
+ * @param index an index in the list of compressed {@code VcfEmission}
+ * objects
+ * @return an array mapping allele sequence indices to allele indices.
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.size()}
+ */
+ public IntArray seqToAllele(int index) {
+ int maxAllele = markers.get(index).nAlleles() - 1;
+ int[] seq2allele = new int[nSeq];
+ for (int j=0; j<seq2allele.length; ++j) {
+ seq2allele[j] = sequences.get(j).get(index);
+ }
+ return IntArray.create(seq2allele, 0, maxAllele);
+ }
+
+ /**
+ * Returns the list of compressed {@code VcfEmission} objects.
+ *
+ * @return the list of compressed {@code VcfEmission} objects
+ */
+ public List<VcfEmission> getCompressedList() {
+ List<VcfEmission> list = new ArrayList<>(markers.size());
+ IntArray hap2seq = hapToSeq();
+ for (int j=0, n=markers.size(); j<n; ++j) {
+ Marker marker = markers.get(j);
+ IntArray seq2allele = seqToAllele(j);
+ list.add(new SeqCodedRefGT(marker, samples, hap2seq, seq2allele));
+ }
+ return list;
+ }
+
+ /**
+ * Clears the list of compressed {@code VcfEmission} objects.
+ */
+ public final void clear() {
+ Arrays.fill(hapToSeq, 0);
+ markers.clear();
+ sequences.clear();
+ alleleToSeqList.clear();
+ copiedSeqToSrcSeq.clear();
+ nSeq = 0;
+ addEmptySequence();
+ }
+
+ private void addEmptySequence() {
+ alleleToSeqList.add(new IntList(4));
+ sequences.add(new IntList(100));
+ ++nSeq;
+ }
+
+ private void addCopyOfSequence(int seq) {
+ addEmptySequence();
+ IntList srcSeq = sequences.get(seq);
+ IntList destSeq = sequences.get(nSeq - 1);
+ assert destSeq.isEmpty();
+ for (int j=0, n=markers.size(); j<n; ++j) {
+ destSeq.add(srcSeq.get(j));
+ }
+ }
+}
\ No newline at end of file
diff --git a/vcf/VcfHeader.java b/vcf/VcfHeader.java
new file mode 100644
index 0000000..4d6a1ba
--- /dev/null
+++ b/vcf/VcfHeader.java
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.Const;
+import blbutil.FileIt;
+import blbutil.Filter;
+import blbutil.StringUtil;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * <p>Class {@code VcfHeader} represents the Variant Call Format (VCF)
+ * meta-information lines and the Variant Call Format header line
+ * that precede the first Variant Call Format record.
+ * </p>
+ * <p>Instances of class {@code VcfHeader} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class VcfHeader {
+
+ private static final String SHORT_HEADER_PREFIX= "#CHROM" + Const.tab + "POS"
+ + Const.tab + "ID" + Const.tab + "REF" + Const.tab + "ALT"
+ + Const.tab + "QUAL" + Const.tab + "FILTER" + Const.tab + "INFO";
+
+ /**
+ * A string equal to the first nine tab-delimited fields of a VCF header
+ * line that contains sample data.
+ */
+ public static final String HEADER_PREFIX =
+ SHORT_HEADER_PREFIX + Const.tab + "FORMAT";
+
+ private static final int sampleOffset = 9;
+
+ private final File file; // null if source is standard input
+ private final VcfMetaInfo[] metaInfoLines;
+ private final String headerLine;
+ private final int nHeaderFields;
+ private final int[] includedIndices;
+ private final Samples samples;
+
+ /**
+ * Constructs a new {@code VcfHeader} object from the VCF
+ * meta-information lines and the VCF header line returned by the
+ * specified {@code FileIterator<String>}. This constructor will advance
+ * the {@code FileIterator<String>} to the point before the first VCF record
+ * in the file. The {@code VcfHeader} object will have no excluded samples.
+ * @param it an iterator that returns lines of VCF file
+ *
+ * @throws IllegalArgumentException if any of the meta-information lines
+ * returned by the specified {@code FileIterator<String>} does not conform
+ * to the VCF specification
+ * @throws IllegalArgumentException if the header lines returned by the
+ * specified {@code FileIterator<String>} does not conform to the VCF
+ * specification
+ * @throws IllegalArgumentException if no header line is returned by the
+ * specified {@code FileIterator<String>}
+ *
+ * @throws NullPointerException if {@code it == null}
+ */
+ public VcfHeader(FileIt<String> it) {
+ this(it, Filter.acceptAllFilter());
+ }
+ /**
+ * Constructs a new {@code VcfHeader} object from the VCF
+ * meta-information lines and the VCF header line returned by the
+ * specified {@code FileIterator<String>}. This constructor will advance
+ * the {@code FileIterator<String>} to the point before the first VCF record in the file.
+ * @param it an iterator that returns lines of a VCF file
+ * @param sampleFilter a sample filter or {@code null}
+ *
+ * @throws IllegalArgumentException if any of the meta-information lines
+ * returned by the specified {@code FileIterator<String>} does not conform
+ * to the VCF specification
+ * @throws IllegalArgumentException if the header lines returned by the
+ * specified {@code FileIterator<String>} does not conform to the VCF
+ * specification
+ * @throws IllegalArgumentException if no header line is returned by the
+ * specified {@code FileIterator<String>}
+ *
+ * @throws NullPointerException if {@code it == null}
+ */
+ public VcfHeader(FileIt<String> it, Filter<String> sampleFilter) {
+ if (sampleFilter==null) {
+ sampleFilter = Filter.acceptAllFilter();
+ }
+ List<VcfMetaInfo> metaInfo = new ArrayList<>(20);
+ String candidateHeader = null;
+ while (it.hasNext() && candidateHeader==null) {
+ String line = it.next().trim();
+ if (line.startsWith(VcfMetaInfo.PREFIX)) {
+ metaInfo.add(new VcfMetaInfo(line));
+ }
+ else {
+ candidateHeader = line;
+ }
+ }
+ checkHeaderLine(candidateHeader, it.file());
+ String[] headerFields = StringUtil.getFields(candidateHeader, Const.tab);
+
+ this.file = it.file();
+ this.metaInfoLines = metaInfo.toArray(new VcfMetaInfo[0]);
+ this.headerLine = candidateHeader;
+ this.nHeaderFields = headerFields.length;
+ this.includedIndices = includedIndices(headerFields, sampleFilter);
+ this.samples = samples(headerFields, includedIndices);
+ }
+
+ private static void checkHeaderLine(String line, File file) {
+ if (line == null || line.startsWith("#")==false) {
+ String s = "Missing line (#CHROM ...) after meta-information lines"
+ + Const.nl + "File source: " + (file==null ? "stdin" : file)
+ + Const.nl + line;
+ throw new IllegalArgumentException(s);
+ }
+ if (line.startsWith(HEADER_PREFIX) == false) {
+ if (line.equals(SHORT_HEADER_PREFIX)==false) {
+ String s = "Missing header line (file source: "
+ + (file==null ? "stdin" : file) + ")"
+ + Const.nl + "The first line after the initial meta-information lines"
+ + Const.nl + "does not begin with: "
+ + Const.nl + HEADER_PREFIX
+ + Const.nl + line
+ + Const.nl + "The data fields in the header line must be tab-separated.";
+ throw new IllegalArgumentException(s);
+ }
+ }
+ }
+
+ private static int[] includedIndices(String[] headerFields,
+ Filter<String> sampleFilter) {
+ int nUnfilteredSamples = Math.max(headerFields.length - sampleOffset, 0);
+ int[] includedIndices = new int[nUnfilteredSamples];
+ int index = 0;
+ for (int j=0; j<nUnfilteredSamples; ++j) {
+ if (sampleFilter.accept(headerFields[sampleOffset + j])) {
+ includedIndices[index++] = j;
+ }
+ }
+ if (index < includedIndices.length) {
+ includedIndices = Arrays.copyOf(includedIndices, index);
+ }
+ return includedIndices;
+ }
+
+ private Samples samples(String[] headerFields, int[] includedIndices) {
+ String[] ids = new String[includedIndices.length];
+ for (int j=0; j<ids.length; ++j) {
+ ids[j] = headerFields[sampleOffset + includedIndices[j]];
+ }
+ return Samples.fromIds(ids);
+ }
+
+ /**
+ * Returns the file from which data are read, or returns
+ * {@code null} if the source is standard input.
+ * @return the file from which data are read, or
+ * {@code null} if the source is standard input
+ */
+ public File file() {
+ return file;
+ }
+
+ /**
+ * Returns the number of VCF meta-information lines. VCF meta-information
+ * lines are lines that precede the VCF header line. A VCF meta-information
+ * line must begin with "##".
+ *
+ * @return the number of VCF meta-information lines
+ */
+ public int nMetaInfoLines() {
+ return metaInfoLines.length;
+ }
+
+ /**
+ * Returns the specified VCF meta-information line.
+
+ * @param index a VCF meta-information line index
+ * @return the specified VCF meta-information line
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code index < 0 || index >= this.nMetaInfoLines()}
+ */
+ public VcfMetaInfo metaInfoLine(int index) {
+ return metaInfoLines[index];
+ }
+
+ /**
+ * Returns the VCF header line. The VCF header line begins with "#CHROM".
+ * @return the VCF header line
+ */
+ public String headerLine() {
+ return headerLine;
+ }
+
+ /**
+ * Returns the number of fields in the VCF header line before sample
+ * exclusions.
+ * @return the number of fields in the VCF header line before sample
+ * exclusions
+ */
+ public int nHeaderFields() {
+ return nHeaderFields;
+ }
+
+ /**
+ * Returns the number of samples before sample exclusions.
+ * @return the number of samples before sample exclusions
+ */
+ public int nUnfilteredSamples() {
+ return Math.max(0, nHeaderFields - sampleOffset);
+ }
+
+ /**
+ * Returns the index of the specified sample in the the list original
+ * list of samples before sample exclusions.
+ * @param sample a sample index
+ * @return the index of the specified sample in the the list original
+ * list of samples before sample exclusions
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ */
+ public int unfilteredSampleIndex(int sample) {
+ return includedIndices[sample];
+ }
+
+ /**
+ * Returns the number of samples after sample exclusions.
+ * @return the number of samples after sample exclusions
+ */
+ public int nSamples() {
+ return samples.nSamples();
+ }
+
+ /**
+ * Return the list of samples after sample exclusions.
+ * @return the list of samples after sample exclusions
+ */
+ public Samples samples() {
+ return samples;
+ }
+
+ /**
+ * Returns {@code this.sample().ids()}.
+ * @return {@code this.sample().ids()}
+ */
+ public String[] sampleIds() {
+ return samples.ids();
+ }
+
+ /**
+ * Returns the VCF meta-information lines and the VCF header line used to
+ * construct {@code this}.
+ * @return the VCF meta-information lines and the VCF header line used to
+ * construct {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(400);
+ for (int j=0; j<metaInfoLines.length; ++j) {
+ sb.append(metaInfoLines[j]);
+ sb.append(Const.nl);
+ }
+ sb.append(headerLine);
+ sb.append(Const.nl);
+ return sb.toString();
+ }
+}
diff --git a/vcf/VcfIt.java b/vcf/VcfIt.java
new file mode 100644
index 0000000..5715395
--- /dev/null
+++ b/vcf/VcfIt.java
@@ -0,0 +1,396 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import blbutil.SampleFileIt;
+import beagleutil.Samples;
+import blbutil.Const;
+import blbutil.FileIt;
+import blbutil.Filter;
+import blbutil.Utilities;
+import java.io.File;
+import java.util.ArrayDeque;
+import java.util.Arrays;
+import java.util.Deque;
+import java.util.List;
+import java.util.NoSuchElementException;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.function.BiFunction;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+/**
+ * <p>Class {@code VcfIt} represents an iterator whose {@code next()}
+ * method returns an object storing data from a VCF record.
+ * </p>
+ * <p>Instances of class {@code VcfIt} are not thread-safe.
+ * </p>
+ * <p>Methods of this class will terminate the Java Virtual Machine with
+ * an error message if an I/O error or file format error is detected.
+ * </p>
+ * @param <E> the type parameter
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class VcfIt<E extends MarkerContainer> implements SampleFileIt<E> {
+
+ private static final float DEFAULT_MAX_LR = Float.MAX_VALUE;
+
+ private final VcfHeader vcfHeader;
+ private final FileIt<String> it;
+ private final Function<String, E> mapper;
+ private final Filter<Marker> markerFilter;
+ private final Thread fileReaderThread;
+ private volatile boolean stopFileReadingThread = false;
+
+ private final BlockingQueue<String[]> stringBuffers;
+ private final Deque<E> emBuffer;
+
+ /**
+ * The default number of VCF records stored in a buffer, which is 1000.
+ */
+ public static final int DEFAULT_BUFFER_SIZE = 1000;
+
+ /**
+ * A function mapping a string VCF record with GT or GL format fields
+ * to a {@code VcfRecord} object.
+ */
+ public static final BiFunction<VcfHeader, String, VcfRecord> toGTGLRec
+ = (VcfHeader h, String s) -> VcfRecord.fromGTGL(h, s, DEFAULT_MAX_LR);
+
+ /**
+ * A function mapping a string VCF record with GL format fields
+ * to a {@code VcfRecord} object.
+ */
+ public static final BiFunction<VcfHeader, String, VcfRecord> toGLRec
+ = (VcfHeader h, String s) -> VcfRecord.fromGL(h, s, DEFAULT_MAX_LR);
+
+ /**
+ * A function mapping a string VCF record with GT format fields
+ * to a {@code VcfEmission} object.
+ */
+ public static final BiFunction<VcfHeader, String, VcfEmission> toBitSetGT
+ = (VcfHeader h, String s) -> new BitSetGT(h, s);
+
+ /**
+ * Create and returns a new {@code VcfIt} instance from the specified
+ * objects.
+ * @param <R> the type returned by the returned {@code VcfIt}
+ * @param strIt an iterator that returns lines of a VCF file
+ * @param recMapper a function mapping string VCF records to
+ * {@code VcfEmission} objects
+ * @return a new {@code VcfIt} instance
+ * @throws IllegalArgumentException if a format error is detected in a
+ * line of a VCF file returned by {@code strIt}
+ * @throws NullPointerException if
+ * {@code strIt == null || recMapper == null}
+ */
+ public static <R extends VcfEmission> VcfIt<R> create(
+ FileIt<String> strIt, BiFunction<VcfHeader, String, R> recMapper) {
+ return VcfIt.create(strIt, Filter.acceptAllFilter(), recMapper);
+ }
+
+ /**
+ * Create and returns a new {@code VcfIt} instance from the specified
+ * objects.
+ * @param <R> the type returned by the returned {@code VcfIt}
+ * @param strIt an iterator that returns lines of a VCF file
+ * @param sampleFilter a sample filter or {@code null}
+ * @param recMapper a function mapping string VCF records to
+ * {@code VcfEmission} objects
+ * @return a new {@code VcfIt} instance
+ * @throws IllegalArgumentException if a format error is detected in a
+ * line of a VCF file returned by {@code strIt}
+ * @throws NullPointerException if
+ * {@code strIt == null || recMapper == null}
+ */
+ public static <R extends VcfEmission> VcfIt<R> create(
+ FileIt<String> strIt, Filter<String> sampleFilter,
+ BiFunction<VcfHeader, String, R> recMapper) {
+ return VcfIt.create(strIt, sampleFilter, Filter.acceptAllFilter(),
+ recMapper);
+ }
+
+ /**
+ * Create and returns a new {@code VcfIt} instance from the specified
+ * objects.
+ * @param <R> the type returned by the returned {@code VcfIt}
+ * @param strIt an iterator that returns lines of a VCF file
+ * @param sampleFilter a sample filter or {@code null}
+ * @param markerFilter a marker filter or {@code null}
+ * @param recMapper a function mapping string VCF records to
+ * {@code VcfEmission} objects
+ * @return a new {@code VcfIt} instance
+ * @throws IllegalArgumentException if a format error is detected in a
+ * line of a VCF file returned by {@code strIt}
+ * @throws NullPointerException if
+ * {@code strIt == null || recMapper == null}
+ */
+ public static <R extends VcfEmission> VcfIt<R> create(
+ FileIt<String> strIt, Filter<String> sampleFilter,
+ Filter<Marker> markerFilter,
+ BiFunction<VcfHeader, String, R> recMapper) {
+ return VcfIt.create(strIt, sampleFilter, markerFilter, recMapper,
+ DEFAULT_BUFFER_SIZE);
+ }
+
+ /**
+ * Create and returns a new {@code VcfIt} instance from the specified
+ * objects.
+ * @param <R> the type returned by the returned {@code VcfIt}
+ * @param strIt an iterator that returns lines of a VCF file
+ * @param sampleFilter a sample filter or {@code null}
+ * @param markerFilter a marker filter or {@code null}
+ * @param recMapper a function mapping string VCF records to
+ * {@code VcfEmission} objects
+ * @param bufferSize the buffer size
+ * @return a new {@code VcfIt} instance
+ * @throws IllegalArgumentException if a format error is detected in a
+ * line of a VCF file returned by {@code strIt}
+ * @throws IllegalArgumentException if {@code bufferSize < 1}
+ * @throws NullPointerException if
+ * {@code strIt == null || recMapper == null}
+ */
+ public static <R extends VcfEmission> VcfIt<R> create(
+ FileIt<String> strIt, Filter<String> sampleFilter,
+ Filter<Marker> markerFilter,
+ BiFunction<VcfHeader, String, R> recMapper, int bufferSize) {
+ VcfIt<R> vcfIt = new VcfIt<>(strIt, sampleFilter, markerFilter,
+ recMapper, bufferSize);
+ vcfIt.start();
+ return vcfIt;
+ }
+
+ private VcfIt(FileIt<String> it, Filter<String> sampleFilter,
+ Filter<Marker> markerFilter,
+ BiFunction<VcfHeader, String, E> recMapper, int bufferSize) {
+ if (bufferSize < 1) {
+ throw new IllegalArgumentException(String.valueOf(bufferSize));
+ }
+ if (markerFilter==null) {
+ markerFilter = Filter.acceptAllFilter();
+ }
+ this.vcfHeader = new VcfHeader(it, sampleFilter);
+ this.it = it;
+ this.mapper = (String s) -> recMapper.apply(vcfHeader, s);
+ this.markerFilter = markerFilter;
+ this.stringBuffers = new ArrayBlockingQueue<>(1);
+ this.emBuffer = new ArrayDeque<>(bufferSize);
+ this.fileReaderThread = fileReadingThread();
+ }
+
+ private void start() {
+ this.fileReaderThread.setDaemon(true);
+ this.fileReaderThread.start();
+ fillEmissionBuffer();
+ if (emBuffer.isEmpty()) {
+ noRecordFoundError(it);
+ }
+ }
+
+ private void noRecordFoundError(FileIt<String> it) {
+ if (it.hasNext()==false) {
+ StringBuilder sb = new StringBuilder(100);
+ sb.append("No VCF records found (data source: ");
+ sb.append(it.file()==null ? "stdin" : it.file());
+ sb.append(")");
+ sb.append(Const.nl);
+ sb.append("Check that the chromosome identifiers are the same in each input VCF");
+ sb.append(Const.nl);
+ sb.append("file and in the \'chrom=\' command line argument (if \'chrom=\' is used).");
+ throw new IllegalArgumentException(sb.toString());
+ }
+ }
+
+ private Thread fileReadingThread() {
+ Runnable runnable = () -> {
+ String line = readLine(it);
+ int bufferSize = stringBufferSize(line);
+ while (line != null && stopFileReadingThread == false) {
+ String chromPlusTab = chromFieldPlusTab(line);
+ String[] sa = new String[bufferSize];
+ int size = 0;
+ while (line != null && size < bufferSize
+ && line.startsWith(chromPlusTab)) {
+ sa[size++] = line;
+ line = readLine(it);
+ }
+ if (size < bufferSize) {
+ sa = Arrays.copyOf(sa, size);
+ }
+ putInBlockingQueue(stringBuffers, sa);
+ }
+ if (stopFileReadingThread == false) {
+ putInBlockingQueue(stringBuffers, new String[0]); // sentinel
+ }
+ };
+ return new Thread(runnable);
+ }
+
+ private static int stringBufferSize(String line) {
+ if (line == null) {
+ return 0;
+ }
+ long nBytesPerLine = 2*line.length();
+ Runtime rt = Runtime.getRuntime();
+ long maxMem = rt.maxMemory();
+ if (maxMem == Long.MAX_VALUE) {
+ maxMem = 500 * (1 << 30);
+ }
+ long bufferSize = maxMem / (100*nBytesPerLine);
+ if (bufferSize > DEFAULT_BUFFER_SIZE) {
+ bufferSize = DEFAULT_BUFFER_SIZE;
+ }
+ if (bufferSize < DEFAULT_BUFFER_SIZE/20) {
+ bufferSize = DEFAULT_BUFFER_SIZE/20;
+ }
+ return (int) bufferSize;
+ }
+
+ private static <E> void putInBlockingQueue(BlockingQueue<E> q, E e) {
+ try {
+ q.put(e);
+ } catch (InterruptedException ex) {
+ Utilities.exit("Error: InterruptedException", ex);
+ }
+ }
+
+ private static <E> E takeFromBlockingQueue(BlockingQueue<E> q) {
+ try {
+ return q.take();
+ } catch (InterruptedException ex) {
+ Utilities.exit("Error: InterruptedException", ex);
+ }
+ assert false;
+ return null;
+ }
+
+ private static String chromFieldPlusTab(String vcfRecord) {
+ int tabIndex = vcfRecord.indexOf(Const.tab);
+ if (tabIndex == -1) {
+ String s = Const.nl + "ERROR: Missing tab delimiter in VCV Record:"
+ + Const.nl + vcfRecord
+ + Const.nl + "Exiting Program";
+ Utilities.exit(s);
+ }
+ return vcfRecord.substring(0, tabIndex + 1);
+ }
+
+ private void fillEmissionBuffer() {
+ assert emBuffer.isEmpty();
+ int lastLength = -1;
+ while (lastLength != 0 && emBuffer.size() < DEFAULT_BUFFER_SIZE) {
+ String[] stringBuffer = takeFromBlockingQueue(stringBuffers);
+ lastLength = stringBuffer.length;
+ if (stringBuffer.length>0) {
+ List<E> list = Arrays.stream(stringBuffer)
+ .parallel()
+ .map(mapper)
+ .filter(e -> markerFilter.accept(e.marker()))
+ .collect(Collectors.toList());
+ emBuffer.addAll(list);
+ }
+ else {
+ // put sentinel element back
+ putInBlockingQueue(stringBuffers, stringBuffer);
+ }
+ }
+ }
+
+ private static String readLine(FileIt<String> it) {
+ if (it.hasNext()==false) {
+ return null;
+ }
+ String line = it.next();
+ while (line.trim().isEmpty() && it.hasNext()) {
+ line = it.next();
+ }
+ return line;
+ }
+
+ @Override
+ public void close() {
+ stopFileReadingThread = true;
+ stringBuffers.poll(); // unblock file reading thread
+ try {
+ fileReaderThread.join();
+ } catch (InterruptedException ex) {
+ Utilities.exit("Error: InterruptedException", ex);
+ }
+ it.close();
+ emBuffer.clear();
+ }
+
+ /**
+ * Returns {@code true} if the iteration has more elements, and returns
+ * {@code false} otherwise.
+ * @return {@code true} if the iteration has more elements
+ */
+ @Override
+ public boolean hasNext() {
+ return !emBuffer.isEmpty();
+ }
+
+ /**
+ * Returns the next element in the iteration.
+ * @return the next element in the iteration
+ * @throws NoSuchElementException if the iteration has no more elements.
+ */
+ @Override
+ public E next() {
+ if (!hasNext()) {
+ throw new NoSuchElementException();
+ }
+ E first = emBuffer.removeFirst();
+ if (emBuffer.isEmpty()) {
+ fillEmissionBuffer();
+ }
+ return first;
+ }
+
+ /**
+ * The {@code remove} method is not supported by this iterator.
+ * @throws UnsupportedOperationException if this method is invoked
+ */
+ @Override
+ public void remove() {
+ String s = "remove() is not supported by VcfIterator";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public File file() {
+ return it.file();
+ }
+
+ @Override
+ public Samples samples() {
+ return vcfHeader.samples();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(80);
+ sb.append(this.getClass().toString());
+ sb.append(" : ");
+ sb.append(it.file()==null ? "stdin" : it.file().toString());
+ return sb.toString();
+ }
+}
diff --git a/vcf/VcfMetaInfo.java b/vcf/VcfMetaInfo.java
new file mode 100644
index 0000000..2aad247
--- /dev/null
+++ b/vcf/VcfMetaInfo.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+/**
+ * <p>Class {@code VcfMetaInfo} represents a VCF meta-information line.
+ * </p>
+ * <p>Instances of class {@code VcfMetaInfo} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class VcfMetaInfo {
+
+ /**
+ * The VCF meta-information line prefix: "##"
+ */
+ public static final String PREFIX = "##";
+
+ /**
+ * The VCF meta-information line key-value delimiter: '='
+ */
+ public static final char DELIMITER = '=';
+
+ private final String line;
+ private final String key;
+ private final String value;
+
+ /**
+ * Constructs a {@code VcfMetaInfo} instance representing
+ * the specified VCF meta-information line.
+ *
+ * @param line a VCF meta-information line
+ *
+ * @throws IllegalArgumentException if the specified information line,
+ * after removing any beginning and ending white-space, does not begin with
+ * {@code VcfMetaInfo.PREFIX}, and does not contain non-empty key and
+ * value strings separated by the {@code VcfMetaInfo.DELIMITER} character
+ *
+ * @throws NullPointerException if {@code line == null}
+ */
+ public VcfMetaInfo(String line) {
+ line = line.trim();
+ if (line.startsWith(PREFIX)==false) {
+ String s = "VCF meta-information line: missing starting \""
+ + PREFIX + "\": " + line;
+ throw new IllegalArgumentException(s);
+ }
+ int index = line.indexOf(DELIMITER);
+ if (index <=0 || index == line.length() - 1) {
+ String s = "VCF meta-information line: missing \""
+ + DELIMITER + "\"";
+ throw new IllegalArgumentException(s);
+ }
+ this.line = line;
+ this.key = line.substring(2, index);
+ this.value = line.substring(index+1);
+ }
+
+ /**
+ * Returns the VCF meta-information line key.
+ * @return the VCF meta-information line key
+ */
+ public String key() {
+ return key;
+ }
+
+ /**
+ * Returns the VCF meta-information line value.
+ * @return the VCF meta-information line value
+ */
+ public String value() {
+ return value;
+ }
+
+ /**
+ * Returns the VCF meta-information line represented by {@code this}.
+ *
+ * @return the VCF meta-information line represented by {@code this}
+ */
+ @Override
+ public String toString() {
+ return line;
+ }
+}
diff --git a/vcf/VcfRecGTParser.java b/vcf/VcfRecGTParser.java
new file mode 100644
index 0000000..15b0a52
--- /dev/null
+++ b/vcf/VcfRecGTParser.java
@@ -0,0 +1,300 @@
+/*
+ * Copyright (C) 2015 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.Const;
+import blbutil.StringUtil;
+import blbutil.Utilities;
+import java.io.File;
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code VcfRecGTParser} parses VCF records and extracts the GT format
+ * field.
+ * </p>
+ * <p>Instances of class {@code VcfRecGTParser} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class VcfRecGTParser {
+
+ private final VcfHeader vcfHeader;
+ private final String vcfRec;
+ private final Marker marker;
+ private final int nSamples;
+
+ private int character; // points to delimiter at start of a field
+ private int currentSample;
+ private int unfilteredSample;
+
+ private int allele1;
+ private int allele2;
+ private boolean isPhased;
+
+ /**
+ * Constructs a new {@code VcfRecGTParser} object from the specified VCF
+ * record.
+ * @param vcfHeader the VCF meta-information lines and header line
+ * @param vcfRec the VCF record
+ * @throws IllegalArgumentException if
+ * {@code vcfHeader.nSamples() == 0}
+ * @throws IllegalArgumentException if a format error is detected in the
+ * {@code vcfRecord}
+ * @throws NullPointerException if
+ * {@code vcfHeader == null || vcfRec == null}
+ */
+ public VcfRecGTParser(VcfHeader vcfHeader, String vcfRec) {
+ if (vcfHeader.nSamples()==0) {
+ throw new IllegalArgumentException("nSamples==0");
+ }
+ this.vcfHeader = vcfHeader;
+ this.vcfRec = vcfRec;
+ this.marker = new BasicMarker(vcfRec);
+ this.nSamples = vcfHeader.nSamples();
+
+ this.unfilteredSample = -1;
+ this.character = -1;
+ this.currentSample = -1;
+ skipFixedFields();
+ nextSample();
+ }
+
+ private void skipFixedFields() {
+ for (int j=0; j<8; ++j) {
+ character = vcfRec.indexOf(Const.tab, character + 1);
+ }
+ if (vcfRec.startsWith("GT:", character + 1) == false
+ && vcfRec.startsWith("GT\t", character + 1) == false) {
+ throw new IllegalArgumentException("invalid VCF rec: " + vcfRec);
+ }
+ character = vcfRec.indexOf(Const.tab, character + 1);
+ }
+
+ /**
+ * Returns the VCF meta-information lines and header line.
+ * @return the VCF meta-information lines and header line
+ */
+ public VcfHeader vcfHeader() {
+ return vcfHeader;
+ }
+
+ /**
+ * Returns the VCF record that is being parsed.
+ * @return the VCF record that is being parsed
+ */
+ public String vcfRecord() {
+ return vcfRec;
+ }
+
+ /**
+ * Returns the marker.
+ * @return the marker
+ */
+ public Marker marker() {
+ return marker;
+ }
+
+ /**
+ * Returns the index of the current sample, or
+ * -1 if {@code this.nextSample()} has not yet been invoked.
+ * @return the index of the current sample, or
+ * -1 if {@code this.nextSample()} has not yet been invoked
+ */
+ public int currentSample() {
+ return currentSample;
+ }
+
+ /**
+ * Returns the first allele of the genotype for the current sample.
+ * @return the first allele of the genotype for the current sample
+ */
+ public int allele1() {
+ return allele1;
+ }
+
+ /**
+ * Returns the second allele of the genotype for the current sample.
+ * @return the second allele of the genotype for the current sample
+ */
+ public int allele2() {
+ return allele2;
+ }
+
+ /**
+ * Returns {@code true} if the genotype for the current sample is phased,
+ * and returns {@code false} otherwise.
+ * @return {@code true} if the genotype for the current sample is phased
+ */
+ public boolean isPhased() {
+ return isPhased;
+ }
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ public Samples samples() {
+ return vcfHeader.samples();
+ }
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ public int nSamples() {
+ return nSamples;
+ }
+
+ /**
+ * Increases the current sample index by one.
+ * @throws IndexOutOfBoundsException if
+ * {@code (this.currentSample() + 1 == this.nSamples())} immediately prior
+ * to the invocation of this method
+ * @throws IllegalArgumentException if a format error is detected in
+ * {@code this.vcfRecord()}
+ */
+ public void nextSample() {
+ ++currentSample;
+ if (currentSample == nSamples) {
+ throw new IndexOutOfBoundsException(String.valueOf(currentSample));
+ }
+ int nextUnfilteredSample = vcfHeader.unfilteredSampleIndex(currentSample);
+ while (++unfilteredSample < nextUnfilteredSample) {
+ if (character == -1) {
+ throwFieldCountError();
+ }
+ character = vcfRec.indexOf(Const.tab, character + 1);
+ }
+ if (character == -1) {
+ throwFieldCountError();
+ }
+ int end1 = end1(vcfRec, character + 1);
+ int end2 = end2(vcfRec, end1 + 1);
+ this.allele1 = allele(vcfRec, marker.nAlleles(), character + 1, end1);
+ this.allele2 = allele(vcfRec, marker.nAlleles(), end1 + 1, end2);
+ this.isPhased = vcfRec.charAt(end1) != Const.unphasedSep;
+ character = vcfRec.indexOf(Const.tab, end2);
+ }
+
+ /* returns exclusive end */
+ private static int end1(String rec, int start) {
+ if (start==rec.length()) {
+ throwGTFormatError(rec, rec.length());
+ }
+ int index = start;
+ while (index < rec.length()) {
+ char c = rec.charAt(index);
+ if (c == Const.unphasedSep || c == Const.phasedSep) {
+ return index;
+ }
+ else if (c == Const.colon || c == Const.tab) {
+ throwGTFormatError(rec, index+1);
+ }
+ ++index;
+ }
+ if (index==rec.length()) {
+ throwGTFormatError(rec, rec.length());
+ }
+ return index;
+ }
+
+ /* returns exclusive end */
+ private static int end2(String rec, int start) {
+ int index = start;
+ while (index < rec.length()) {
+ char c = rec.charAt(index);
+ if (c == Const.colon || c == Const.tab) {
+ return index;
+ }
+ ++index;
+ }
+ return index;
+ }
+
+ private static int allele(String vcfRecord, int nAlleles, int start,
+ int end) {
+ if (start==end) {
+ String s = "Missing sample allele: " + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ int a;
+ if (start + 1 == end) {
+ char c = vcfRecord.charAt(start);
+ switch (c) {
+ case '.' : a = -1; break;
+ case '0' : a = 0; break;
+ case '1' : a = 1; break;
+ case '2' : a = 2; break;
+ case '3' : a = 3; break;
+ case '4' : a = 4; break;
+ case '5' : a = 5; break;
+ case '6' : a = 6; break;
+ case '7' : a = 7; break;
+ case '8' : a = 8; break;
+ case '9' : a = 9; break;
+ default: String s = "invalid allele (" + c + "): "
+ + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ }
+ else {
+ a = Integer.parseInt(vcfRecord.substring(start, end));
+ if (a < 0) {
+ String s = "invalid allele (" + a + "): " + Const.nl + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ }
+ if (a >= nAlleles) {
+ String s = "invalid allele (" + a + "): " + Const.nl + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ return a;
+ }
+
+ private static void throwGTFormatError(String rec, int index) {
+ StringBuilder sb = new StringBuilder(1000);
+ sb.append("ERROR: Missing one or both alleles for a genotype:");
+ sb.append(Const.nl);
+ sb.append(rec.substring(0, index));
+ sb.append(Const.nl);
+ sb.append("Exiting Program");
+ sb.append(Const.nl);
+ Utilities.exit(sb.toString());
+ }
+
+ private void throwFieldCountError() {
+ File f = vcfHeader.file();
+ String[] fields = StringUtil.getFields(vcfRec, Const.tab);
+ StringBuilder sb = new StringBuilder(1000);
+ sb.append("VCF header line has ");
+ sb.append(vcfHeader.nHeaderFields());
+ sb.append(" fields, but data line has ");
+ sb.append(fields.length);
+ sb.append(" fields");
+ sb.append(Const.nl);
+ sb.append("File source: ");
+ sb.append((f!=null ? f.toString() : "stdin"));
+ sb.append(Const.nl);
+ sb.append(Arrays.toString(fields));
+ sb.append(Const.nl);
+ Utilities.exit(sb.toString());
+ }
+}
diff --git a/vcf/VcfRecord.java b/vcf/VcfRecord.java
new file mode 100644
index 0000000..51dbfb0
--- /dev/null
+++ b/vcf/VcfRecord.java
@@ -0,0 +1,676 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.Const;
+import blbutil.StringUtil;
+import java.io.File;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * <p>Class {@code VcfRecord} represents a VCF record.
+ * </p>
+ * <p>Instances of class {@code VcfRecord} are immutable.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class VcfRecord implements VcfEmission {
+
+ /**
+ * The VCF FORMAT code for log-scaled genotype likelihood data: "GL".
+ */
+ public static final String GL_FORMAT = "GL";
+
+ /**
+ * The VCF FORMAT code for phred-scaled genotype likelihood data: "PL".
+ */
+ public static final String PL_FORMAT = "PL";
+
+ private static final int sampleOffset = 9;
+
+ private final VcfHeader vcfHeader;
+ private final String vcfRecord;
+ private final int[] delimiters;
+ private final Marker marker;
+
+ private final String[] formatFields;
+ private final Map<String, Integer> formatMap;
+
+ private final VcfEmission gtEm;
+ private final float[] gls;
+
+ /**
+ * Returns the VCF genotype index for the specified pair of alleles.
+ * @param a1 the first allele
+ * @param a2 the second allele
+ * @return the VCF genotype index for the specified pair of alleles
+ * @throws IllegalArgumentException if {@code a1 < 0 || a2 < 0}
+ */
+ public static int gtIndex(int a1, int a2) {
+ if (a1 < 0) {
+ throw new IllegalArgumentException("a1<0: " + a1);
+ }
+ if (a2 < 0) {
+ throw new IllegalArgumentException("a2<0: " + a2);
+ } else if (a1 < a2) {
+ return (a2 * (a2 + 1)) / 2 + a1;
+ } else {
+ return (a1 * (a1 + 1)) / 2 + a2;
+ }
+ }
+
+ private VcfRecord(VcfHeader vcfHeader, String vcfRecord, boolean useGT,
+ boolean useGL, float maxLR) {
+ this.vcfHeader = vcfHeader;
+ this.vcfRecord = vcfRecord;
+ this.delimiters = delimiters(vcfHeader, vcfRecord);
+ this.marker = new BasicMarker(vcfRecord);
+ this.formatFields = formats(format());
+ this.formatMap = formatToIndexMap(vcfHeader, vcfRecord, formatFields);
+ boolean storeGT = useGT && formatMap.containsKey("GT");
+ boolean storeGL = useGL &&
+ (formatMap.containsKey("PL") || formatMap.containsKey("GL"));
+ if (storeGT==false && storeGL==false) {
+ String s = "Missing required data: " + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ this.gtEm = storeGT ? new BitSetGT(vcfHeader, vcfRecord) : null;
+ this.gls = storeGL ? likelihoodsFromGL(maxLR) : null;
+ }
+
+ /**
+ * Constructs and returns a new {@code VcfRecord} instance from a
+ * VCF record and its GT format subfield data
+ *
+ * @param vcfHeader meta-information lines and header line for the
+ * specified VCF record.
+ * @param vcfRecord a VCF record with a GL format field corresponding to
+ * the specified {@code vcfHeader} object
+ * @return a new {@code VcfRecord} instance
+ *
+ * @throws IllegalArgumentException if the VCF record does not have a
+ * GT format field
+ * @throws IllegalArgumentException if a VCF record format error is
+ * detected
+ * @throws IllegalArgumentException if there are not
+ * {@code vcfHeader.nHeaderFields()} tab-delimited fields in the
+ * specified VCF record
+ * @throws NullPointerException if
+ * {@code vcfHeader == null || vcfRecord == null}
+ */
+ public static VcfRecord fromGT(VcfHeader vcfHeader, String vcfRecord) {
+ boolean useGT = true;
+ boolean useGL = false;
+ float maxLR = Float.NaN;
+ return new VcfRecord(vcfHeader, vcfRecord, useGT, useGL, maxLR);
+ }
+
+ /**
+ * Constructs and returns a new {@code VcfRecord} instance from a
+ * VCF record and its GL or PL format subfield data. If both
+ * GL and PL format subfields are present, the GL format field will be used.
+ * If the maximum normalized genotype likelihood is 1.0 for a sample,
+ * then any other genotype likelihood for the sample that is less than
+ * {@code lrThreshold} is set to 0.
+ *
+ * @param vcfHeader meta-information lines and header line for the
+ * specified VCF record
+ * @param vcfRecord a VCF record with a GL format field corresponding to
+ * the specified {@code vcfHeader} object
+ * @param maxLR the maximum likelihood ratio
+ * @return a new {@code VcfRecord} instance
+ *
+ * @throws IllegalArgumentException if the VCF record does not have a
+ * GL format field
+ * @throws IllegalArgumentException if a VCF record format error is
+ * detected
+ * @throws IllegalArgumentException if there are not
+ * {@code vcfHeader.nHeaderFields()} tab-delimited fields in the
+ * specified VCF record
+ * @throws NullPointerException if
+ * {@code vcfHeader == null || vcfRecord == null}
+ */
+ public static VcfRecord fromGL(VcfHeader vcfHeader, String vcfRecord,
+ float maxLR) {
+ boolean useGT = false;
+ boolean useGL = true;
+ return new VcfRecord(vcfHeader, vcfRecord, useGT, useGL, maxLR);
+ }
+
+ /**
+ * Constructs and returns a new {@code VcfRecord} instance from a VCF
+ * record and its GT, GL, and PL format subfield data.
+ * If the GT format subfield is present and non-missing, the
+ * GT format subfield is used to determine genotype likelihoods. Otherwise
+ * the GL or PL format subfield is used to determine genotype likelihoods.
+ * If both the GL and PL format subfields are present, only the GL format
+ * subfield will be used. If the maximum normalized genotype likelihood
+ * is 1.0 for a sample, then any other genotype likelihood for the sample
+ * that is less than {@code lrThreshold} is set to 0.
+ *
+ * @param vcfHeader meta-information lines and header line for the
+ * specified VCF record
+ * @param vcfRecord a VCF record with a GT, a GL or a PL format field
+ * corresponding to the specified {@code vcfHeader} object
+ * @param maxLR the maximum likelihood ratio
+ * @return a new {@code VcfRecord}
+ *
+ * @throws IllegalArgumentException if the VCF record does not have a
+ * GT, GL, or PL format field
+ * @throws IllegalArgumentException if a VCF record format error is
+ * detected
+ * @throws IllegalArgumentException if there are not
+ * {@code vcfHeader.nHeaderFields()} tab-delimited fields in the
+ * specified VCF record
+ * @throws NullPointerException if
+ * {@code vcfHeader == null || vcfRecord == null}
+ */
+ public static VcfRecord fromGTGL(VcfHeader vcfHeader, String vcfRecord,
+ float maxLR) {
+ boolean useGT = true;
+ boolean useGL = true;
+ return new VcfRecord(vcfHeader, vcfRecord, useGT, useGL, maxLR);
+ }
+
+ private static int[] delimiters(VcfHeader vcfHeader, String vcfRecord) {
+ int nFields = vcfHeader.nHeaderFields();
+ int[] delimiters = new int[nFields + 1];
+ delimiters[0] = -1;
+ for (int j=1; j<nFields; ++j) {
+ delimiters[j] = vcfRecord.indexOf(Const.tab, delimiters[j-1] + 1);
+ if (delimiters[j] == -1) {
+ fieldCountError(vcfHeader, vcfRecord);
+ }
+ }
+ if (vcfRecord.indexOf(Const.tab, delimiters[nFields-1] + 1) != -1) {
+ fieldCountError(vcfHeader, vcfRecord);
+ }
+ delimiters[nFields] = vcfRecord.length();
+ return delimiters;
+ }
+
+ private static void fieldCountError(VcfHeader vcfHeader, String vcfRecord) {
+ File f = vcfHeader.file();
+ String[] fields = StringUtil.getFields(vcfRecord, Const.tab);
+ String src = "File source: " + (f!=null ? f : "stdin");
+ String s = "VCF header line has " + vcfHeader.nHeaderFields()
+ + " fields, but data line has " + fields.length + " fields"
+ + Const.nl + "File source:" + src
+ + Const.nl + Arrays.toString(fields);
+ throw new IllegalArgumentException(s);
+ }
+
+ /**
+ * Return {@code true} if all characters in the specified
+ * string are letters or digits and returns {@code false} otherwise.
+ * @param s a string.
+ * @return {@code true} if all characters in the specified
+ * string are letters or digits and returns {@code false} otherwise.
+ */
+ private static boolean isAlphanumeric(String s) {
+ for (int j=0, n=s.length(); j<n; ++j) {
+ if (Character.isLetterOrDigit(s.charAt(j))==false) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private String[] formats(String formats) {
+ if (formats.equals(Const.MISSING_DATA_STRING) || formats.isEmpty()) {
+ String s = "missing format field: " + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ String[] fields = StringUtil.getFields(formats, Const.colon);
+ for (String f : fields) {
+ if (f.isEmpty()) {
+ String s = "missing format in format subfield list: " + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ // Commented-out alpha-numeric check to avoid throwing an
+ // exception when the FORMAT subfield code is not alphanumeric.
+// if (isAlphanumeric(f)==false) {
+// String s = "format subfield must be alphanumeric (" + f + "): "
+// + vcfRecord;
+// throw new IllegalArgumentException(s);
+// }
+ }
+ return fields;
+ }
+
+ private static Map<String, Integer> formatToIndexMap(VcfHeader vcfHeader,
+ String vcfRecord, String[] formatFields) {
+ if (vcfHeader.nSamples()==0) {
+ return Collections.emptyMap();
+ }
+ Map<String, Integer> map = new HashMap<>(formatFields.length);
+ for (int j=0; j<formatFields.length; ++j) {
+ map.put(formatFields[j], j);
+ }
+ if (map.containsKey("GT") && map.get("GT")!=0) {
+ String s = "GT format is not first format: " + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ return map;
+ }
+
+ /* returns exclusive end */
+ private int formatSubfieldEnd(int start) {
+ while (start < vcfRecord.length()) {
+ char c = vcfRecord.charAt(start);
+ if (c == Const.colon || c == Const.tab) {
+ return start;
+ }
+ ++start;
+ }
+ return start;
+ }
+
+ private float[] likelihoodsFromGL(float maxLR) {
+ float minLR = 1f/maxLR;
+ int nGt = this.marker.nGenotypes();
+ String[] dataGL = hasFormat(GL_FORMAT) ? formatData(GL_FORMAT) : null;
+ String[] dataPL = hasFormat(PL_FORMAT) ? formatData(PL_FORMAT) : null;
+ double[] doubleLike = new double[nGt];
+ float[] floatLike = new float[nSamples()*nGt];
+ int floatLikeIndex = 0;
+ for (int s=0, n=nSamples(); s<n; ++s) {
+ Arrays.fill(doubleLike, 0.0);
+ if (dataGL != null) {
+ String[] fields = getGL(GL_FORMAT, dataGL, s, nGt);
+ for (int k=0; k<nGt; ++k) {
+ doubleLike[k] = GL2Like(fields[k]);
+ }
+ }
+ else if (dataPL != null) {
+ String[] fields = getGL(PL_FORMAT, dataPL, s, nGt);
+ for (int k=0; k<nGt; ++k) {
+ doubleLike[k] = PL2Like(fields[k]);
+ }
+ }
+ rescaleToMax1(doubleLike);
+ for (int gt=0; gt<nGt; ++gt) {
+ if (doubleLike[gt] >= minLR) {
+ floatLike[floatLikeIndex] = (float) doubleLike[gt];
+ }
+ ++floatLikeIndex;
+ }
+ }
+ assert floatLikeIndex==floatLike.length;
+ return floatLike;
+ }
+
+ private String[] getGL(String format, String[] sampleData,
+ int sample, int nGt) {
+ if (sampleData[sample].equals(Const.MISSING_DATA_STRING)) {
+ String[] fields = new String[nGt];
+ Arrays.fill(fields, "0");
+ return fields;
+ }
+ else {
+ String[] subfields = StringUtil.getFields(sampleData[sample],
+ Const.comma);
+ if (subfields.length!=nGt) {
+ String s = "unexpected number of " + format + " subfields: "
+ + sampleData(sample, format) + Const.nl
+ + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ for (String subfield : subfields) {
+ if (subfield.equals(Const.MISSING_DATA_STRING)) {
+ String s = "missing subfield in " + format + " field: "
+ + sampleData(sample, format) + Const.nl
+ + vcfRecord;
+ throw new IllegalArgumentException(s);
+ }
+ }
+ return subfields;
+ }
+ }
+
+ private static double GL2Like(String gl) {
+ return Math.pow(10.0, Double.parseDouble(gl));
+ }
+
+ private static double PL2Like(String pl) {
+ return Math.pow(10.0, -Integer.parseInt(pl)/10.0);
+ }
+
+ private static void rescaleToMax1(double[] like) {
+ double max = max(like);
+ if (max == 0.0f) {
+ Arrays.fill(like, 1.0);
+ }
+ else {
+ for (int j=0; j<like.length; ++j) {
+ like[j] /= max;
+ }
+ }
+ }
+
+ /* returns max{double[] like, double 0.0} */
+ private static double max(double[] like) {
+ double max = 0.0;
+ for (int k=0; k<like.length; ++k) {
+ if (like[k] > max) {
+ max = like[k];
+ }
+ }
+ return max;
+ }
+
+ /**
+ * Returns the QUAL field.
+ * @return the QUAL field
+ */
+ public String qual() {
+ return vcfRecord.substring(delimiters[5] + 1, delimiters[6]);
+ }
+
+ /**
+ * Returns the FILTER field.
+ * @return the FILTER field
+ */
+ public String filter() {
+ return vcfRecord.substring(delimiters[6] + 1, delimiters[7]);
+ }
+
+ /**
+ * Returns the INFO field.
+ * @return the INFO field
+ */
+ public String info() {
+ return vcfRecord.substring(delimiters[7] + 1, delimiters[8]);
+ }
+
+ /**
+ * Returns the FORMAT field. Returns the empty string ("") if the FORMAT
+ * field is missing.
+ * @return the FORMAT field
+ */
+ public String format() {
+ if (delimiters.length > 9) {
+ return vcfRecord.substring(delimiters[8] + 1, delimiters[9]);
+ }
+ else {
+ return "";
+ }
+ }
+
+ /**
+ * Returns the number of FORMAT subfields.
+ * @return the number of FORMAT subfields
+ */
+ public int nFormatSubfields() {
+ return formatFields.length;
+ }
+
+ /**
+ * Returns the specified FORMAT subfield.
+ * @param subfieldIndex a FORMAT subfield index
+ * @return the specified FORMAT subfield
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code subfieldIndex < 0 || subfieldIndex >= this.nFormatSubfields()}
+ */
+ public String formatSubfield(int subfieldIndex) {
+ if (formatFields==null) {
+ throw new IllegalArgumentException("No format exists");
+ }
+ return formatFields[subfieldIndex];
+ }
+
+ /**
+ * Returns {@code true} if the specified FORMAT subfield is
+ * present, and returns {@code false} otherwise.
+ * @param formatCode a FORMAT subfield code
+ * @return {@code true} if the specified FORMAT subfield is
+ * present
+ */
+ public boolean hasFormat(String formatCode) {
+ return formatMap.get(formatCode)!=null;
+ }
+
+ /**
+ * Returns the index of the specified FORMAT subfield if the
+ * specified subfield is defined for this VCF record, and returns -1
+ * otherwise.
+ * @param formatCode the format subfield code
+ * @return the index of the specified FORMAT subfield if the
+ * specified subfield is defined for this VCF record, and {@code -1}
+ * otherwise
+ */
+ public int formatIndex(String formatCode) {
+ Integer index = formatMap.get(formatCode);
+ return (index==null) ? -1 : index;
+ }
+
+ /**
+ * Returns the data for the specified sample.
+ * @param sample a sample index
+ * @return the data for the specified sample
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ */
+ public String sampleData(int sample) {
+ int index = vcfHeader.unfilteredSampleIndex(sample);
+ return vcfRecord.substring(delimiters[index + sampleOffset] + 1,
+ delimiters[index + sampleOffset + 1]);
+ }
+
+ /**
+ * Returns the specified data for the specified sample.
+ * @param sample a sample index
+ * @param formatCode a FORMAT subfield code
+ * @return the specified data for the specified sample
+ *
+ * @throws IllegalArgumentException if
+ * {@code this.hasFormat(formatCode)==false}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ */
+ public String sampleData(int sample, String formatCode) {
+ Integer formatIndex = formatMap.get(formatCode);
+ if (formatIndex==null) {
+ String s = "missing format data: " + formatCode;
+ throw new IllegalArgumentException(s);
+ }
+ return VcfRecord.this.sampleData(sample, formatIndex);
+ }
+
+ /**
+ * Returns the specified data for the specified sample.
+ * @param sample a sample index
+ * @param subfieldIndex a FORMAT subfield index
+ * @return the specified data for the specified sample
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code field < 0 || field >= this.nFormatSubfields()}
+ * @throws IndexOutOfBoundsException if
+ * {@code sample < 0 || sample >= this.nSamples()}
+ */
+ public String sampleData(int sample, int subfieldIndex) {
+ if (subfieldIndex < 0 || subfieldIndex >= formatFields.length) {
+ throw new IndexOutOfBoundsException(String.valueOf(subfieldIndex));
+ }
+ int index = sampleOffset + vcfHeader.unfilteredSampleIndex(sample);
+ int start = delimiters[index] + 1;
+ for (int j = 0; j < subfieldIndex; ++j) {
+ int end = formatSubfieldEnd(start);
+ if (end==vcfRecord.length() || vcfRecord.charAt(end)==Const.tab) {
+ return ".";
+ }
+ else {
+ start = end + 1;
+ }
+ }
+ int end = formatSubfieldEnd(start);
+ if (end==start) {
+ return ".";
+ }
+ else {
+ return vcfRecord.substring(start, end);
+ }
+ }
+
+ /**
+ * Returns an array of length {@code this.nSamples()}
+ * containing the specified FORMAT subfield data for each sample. The
+ * {@code k}-th element of the array is the specified FORMAT subfield data
+ * for the {@code k}-th sample.
+ * @param formatCode a format subfield code
+ * @return an array of length {@code this.nSamples()}
+ * containing the specified FORMAT subfield data for each sample
+ *
+ * @throws IllegalArgumentException if
+ * {@code this.hasFormat(formatCode) == false}
+ */
+ public String[] formatData(String formatCode) {
+ Integer formatIndex = formatMap.get(formatCode);
+ if (formatIndex==null) {
+ String s = "missing format data: " + formatCode;
+ throw new IllegalArgumentException(s);
+ }
+ String[] sa = new String[vcfHeader.nSamples()];
+ for (int j=0; j<sa.length; ++j) {
+ sa[j] = sampleData(j, formatIndex);
+ }
+ return sa;
+ }
+
+ @Override
+ public Samples samples() {
+ return vcfHeader.samples();
+ }
+
+
+ @Override
+ public int nSamples() {
+ return vcfHeader.nSamples();
+ }
+
+ /**
+ * Returns the VCF meta-information lines and the VCF header line.
+ * @return the VCF meta-information lines and the VCF header line
+ */
+ public VcfHeader vcfHeader() {
+ return vcfHeader;
+ }
+
+ @Override
+ public Marker marker() {
+ return marker;
+ }
+
+ @Override
+ public int allele1(int sample) {
+ return gtEm == null ? -1 : gtEm.allele1(sample);
+ }
+
+ @Override
+ public int allele2(int sample) {
+ return gtEm == null ? -1 : gtEm.allele2(sample);
+ }
+
+ @Override
+ public boolean isPhased(int sample) {
+ return gtEm == null ? false : gtEm.isPhased(sample);
+ }
+
+ @Override
+ public boolean isRefData() {
+ return gtEm == null ? false : gtEm.isRefData();
+ }
+
+ @Override
+ public float gl(int sample, int allele1, int allele2) {
+ if (gtEm==null
+ || (gls!=null
+ && (gtEm.allele1(sample) == -1 || gtEm.allele2(sample) == -1))) {
+ int n = marker.nAlleles();
+ if (allele1 < 0 || allele2 < 0 || allele1 >= n || allele2 >= n) {
+ String s = allele1 + " " + allele2 + " " + n;
+ throw new ArrayIndexOutOfBoundsException(s);
+ }
+ int gtIndex = VcfRecord.gtIndex(allele1, allele2);
+ return gls[(sample*marker.nGenotypes()) + gtIndex];
+ }
+ else {
+ return gtEm.gl(sample, allele1, allele2);
+ }
+ }
+
+ @Override
+ public int allele(int hap) {
+ return gtEm == null ? -1 : gtEm.allele(hap);
+ }
+
+ @Override
+ public int nAlleles() {
+ return this.marker().nAlleles();
+ }
+
+ @Override
+ public boolean storesNonMajorIndices() {
+ return false;
+ }
+
+ @Override
+ public int majorAllele() {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int alleleCount(int allele) {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int hapIndex(int allele, int copy) {
+ String s = "this.storesNonMajorIndices()==false";
+ throw new UnsupportedOperationException(s);
+ }
+
+ @Override
+ public int nHaps() {
+ return 2*vcfHeader.nSamples();
+ }
+
+ @Override
+ public int nHapPairs() {
+ return vcfHeader.nSamples();
+ }
+
+ /**
+ * Returns the VCF record.
+ * @return the VCF record
+ */
+ @Override
+ public String toString() {
+ return vcfRecord;
+ }
+}
diff --git a/vcf/VcfWindow.java b/vcf/VcfWindow.java
new file mode 100644
index 0000000..4750286
--- /dev/null
+++ b/vcf/VcfWindow.java
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import beagleutil.Samples;
+import blbutil.SampleFileIt;
+import java.io.Closeable;
+import java.io.File;
+import java.util.ArrayList;
+import java.util.List;
+import main.GeneticMap;
+
+/**
+ * <p>Class {@code VcfWindow} represents a sliding window of VCF records.
+ * </p>
+ * Instances of class {@code VcfWindow} are not thread-safe.
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public class VcfWindow implements Closeable {
+
+ private final SampleFileIt<? extends VcfEmission> it;
+ private final List<VcfEmission> window;
+ private int overlap;
+ private int cumMarkerCnt;
+ private VcfEmission next;
+
+ /**
+ * Constructs a new {@code VcfWindow} instance.
+ * @param it an iterator that returns VCF records
+ * @throws IllegalArgumentException if {@code it.hasNext() == false}
+ * @throws IllegalArgumentException if a format error is detected in
+ * a VCF record
+ * @throws NullPointerException if {@code it == null}
+ */
+ public VcfWindow(SampleFileIt<? extends VcfEmission> it) {
+ if (it.hasNext()==false) {
+ throw new IllegalArgumentException("it.hasNext()==false");
+ }
+ this.it = it;
+ this.overlap = 0;
+ this.cumMarkerCnt = 0;
+ this.window = new ArrayList<>(20000);
+ this.next = it.next();
+ }
+
+ /**
+ * Returns {@code true} if the sliding window of VCF Records is the last
+ * window for the chromosome and returns {@code false} otherwise.
+ * @return {@code true} if the sliding window of VCF Records is the last
+ * window for the chromosome
+ */
+ public boolean lastWindowOnChrom() {
+ return next==null || (sameChrom(next, window.get(0))==false);
+ }
+
+ private boolean sameChrom(VcfEmission a, VcfEmission b) {
+ return a.marker().chromIndex()==b.marker().chromIndex();
+ }
+
+ /**
+ * Returns {@code true} if the sliding window of VCF records can advance
+ * and returns {@code false} otherwise.
+ * @return {@code true} if the sliding window of VCF records can advance
+ */
+ public boolean canAdvanceWindow() {
+ return next!=null;
+ }
+
+ /**
+ * Advances the sliding window of VCF records, and returns the advanced
+ * window as a {@code VcfEmission[]} object. The size of the advanced
+ * window and the number of markers of overlap between the marker window
+ * immediately before method invocation and the marker window immediately
+ * after method invocation may differ from the requested values. If the
+ * advanced window size or overlap is less than the requested value, the
+ * actual value will be as large as possible. If
+ * {@code this.lastWindowOnChrom() == true} before method invocation, then
+ * there will be no overlap between the advanced window and the previous
+ * window.
+ *
+ * @param overlap the requested number of markers of overlap
+ * @param windowSize the requested number of the markers in the window
+ * immediately after the method returns
+ * @return the advanced window of VCF records
+ *
+ * @throws IllegalArgumentException if a format error is detected in
+ * a VCF record
+ * @throws IllegalArgumentException if
+ * {@code overlap < 0 || overlap >= windowSize}
+ * @throws IllegalStateException if
+ * {@code this.canAdvanceWindow() == false}
+ */
+ public VcfEmission[] advanceWindow(int overlap, int windowSize) {
+ if (canAdvanceWindow()==false) {
+ throw new IllegalStateException("canAdvanceWindow()==false");
+ }
+ checkParameters(overlap, windowSize);
+ overlap = getActualOverlap(overlap);
+ List<VcfEmission> newWindow = new ArrayList<>(windowSize);
+
+ newWindow.addAll(window.subList(window.size() - overlap, window.size()));
+ int currentChromIndex = currentChromIndex(newWindow);
+ while (newWindow.size() < windowSize
+ && next != null
+ && next.marker().chromIndex()==currentChromIndex) {
+ newWindow.add(next);
+ next = it.hasNext() ? it.next() : null;
+ }
+ // add all markers at the same marker position
+ VcfEmission last = newWindow.get(newWindow.size()-1);
+ while (next!=null && samePosition(last, next)) {
+ newWindow.add(next);
+ next = it.hasNext() ? it.next() : null;
+ }
+ this.overlap = overlap;
+ this.window.clear();
+ this.window.addAll(newWindow);
+ this.cumMarkerCnt += (window.size() - overlap);
+ return window.toArray(new VcfEmission[0]);
+ }
+
+ /**
+ * Advances the sliding window of VCF records, and returns the advanced
+ * window as a {@code VcfEmission[]} object. The size of the advanced
+ * window and the number of markers of overlap between the marker window
+ * immediately before method invocation and the marker window immediately
+ * after method invocation may differ from the requested values. If the
+ * distance the window is advanced or the overlap is less than the requested
+ * value, the actual distance or overlap will be as large as possible. If
+ * {@code this.lastWindowOnChrom() == true}
+ * before method invocation, then there will be no overlap between the
+ * advanced window and the previous window
+ *
+ * @param overlap the requested number of markers of overlap
+ * @param cM the requested distance in cM to advance the window
+ * @param map the genetic map
+ * @return the advanced window of VCF records
+ *
+ * @throws IllegalArgumentException if a format error is detected in
+ * a VCF record
+ * @throws IllegalArgumentException if {@code overlap < 0 || cM <= 0}
+ * @throws IllegalStateException if
+ * {@code this.canAdvanceWindow() == false}
+ */
+ public VcfEmission[] advanceWindow(int overlap, double cM, GeneticMap map) {
+ if (canAdvanceWindow()==false) {
+ throw new IllegalStateException("canAdvanceWindow()==false");
+ }
+ if (overlap < 0) {
+ throw new IllegalArgumentException(String.valueOf(overlap));
+ }
+ if (cM < 0) {
+ throw new IllegalArgumentException(String.valueOf(cM));
+ }
+
+ overlap = getActualOverlap(overlap);
+ List<VcfEmission> newWindow = new ArrayList<>(overlap + 1000);
+
+ newWindow.addAll(window.subList(window.size() - overlap, window.size()));
+ int currentChromIndex = currentChromIndex(newWindow);
+ double endMapPos = startMapPos(newWindow, map) + cM;
+ while (next != null
+ && next.marker().chromIndex()==currentChromIndex
+ && map.genPos(next.marker()) < endMapPos) {
+ newWindow.add(next);
+ next = it.hasNext() ? it.next() : null;
+ }
+ // add all markers at the same marker position
+ VcfEmission last = newWindow.get(newWindow.size()-1);
+ while (next!=null && samePosition(last, next)) {
+ newWindow.add(next);
+ next = it.hasNext() ? it.next() : null;
+ }
+ this.overlap = overlap;
+ this.window.clear();
+ this.window.addAll(newWindow);
+ this.cumMarkerCnt += (window.size() - overlap);
+ return window.toArray(new VcfEmission[0]);
+ }
+
+ private void checkParameters(int overlap, int windowSize) {
+ if (overlap < 0 || overlap >= windowSize) {
+ String s = "overlap=" + overlap + "windowSize=" + windowSize;
+ throw new IllegalArgumentException(s);
+ }
+ }
+
+ private int getActualOverlap(int overlap) {
+ if (window.isEmpty() || lastWindowOnChrom()) {
+ return 0;
+ }
+ int n = window.size();
+ if (overlap > n) {
+ overlap = n;
+ }
+ while (overlap > 0 && overlap < n
+ && window.get(n - overlap).marker().pos()
+ == window.get(n - overlap - 1).marker().pos()) {
+ ++overlap;
+ }
+ return overlap;
+ }
+
+ private int currentChromIndex(List<VcfEmission> currentWindow) {
+ if (currentWindow.isEmpty()==false) {
+ return currentWindow.get(0).marker().chromIndex();
+ }
+ else if (next!=null) {
+ return next.marker().chromIndex();
+ }
+ else {
+ return -1;
+ }
+ }
+
+ private double startMapPos(List<VcfEmission> currentWindow, GeneticMap map) {
+ if (currentWindow.isEmpty()==false) {
+ Marker m = currentWindow.get(currentWindow.size() - 1).marker();
+ return map.genPos(m);
+ }
+ else if (next!=null) {
+ return map.genPos(next.marker());
+ }
+ else {
+ return 0;
+ }
+ }
+
+ private boolean samePosition(VcfEmission a, VcfEmission b) {
+ return a.marker().chromIndex()==b.marker().chromIndex()
+ && a.marker().pos()==b.marker().pos();
+ }
+
+ /**
+ * Returns the file from which VCF records are read, or returns
+ * {@code null} if the source is standard input.
+ * @return the file from which VCF records are read, or
+ * {@code null} if the source is standard input
+ */
+ public File file() {
+ return it.file();
+ }
+
+ /**
+ * Returns the list of samples.
+ * @return the list of samples
+ */
+ public Samples samples() {
+ return it.samples();
+ }
+
+ /**
+ * Returns the number of samples.
+ * @return the number of samples
+ */
+ public int nSamples() {
+ return it.samples().nSamples();
+ }
+
+ /**
+ * Returns the number of VCF records in the overlap between the current
+ * window and the previous window. Returns 0 if the current window
+ * is the first window.
+ *
+ * @return the number of VCF records in the overlap between the current
+ * window and the previous window
+ */
+ public int overlap() {
+ return overlap;
+ }
+
+ /**
+ * Returns the number of distinct VCF records in the union of the current
+ * window and all previous windows.
+ *
+ * @return the number of distinct VCF records in the union of the current
+ * window and all previous windows
+ */
+ public int cumMarkerCnt() {
+ return cumMarkerCnt;
+ }
+
+ /**
+ * Releases any I/O resources controlled by this object.
+ */
+ @Override
+ public void close() {
+ it.close();
+ }
+
+ /**
+ * Returns a string representation of {@code this}. The exact
+ * details of the representation are unspecified and subject to change.
+ * @return a string representation of {@code this}
+ */
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(1100);
+ sb.append(this.getClass().toString());
+ sb.append("; next: ");
+ sb.append(next);
+ return sb.toString();
+ }
+}
diff --git a/vcf/VcfWriter.java b/vcf/VcfWriter.java
new file mode 100644
index 0000000..7df50e2
--- /dev/null
+++ b/vcf/VcfWriter.java
@@ -0,0 +1,380 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import blbutil.Const;
+import java.io.PrintWriter;
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.text.DecimalFormat;
+import java.text.SimpleDateFormat;
+import java.util.Calendar;
+import main.AlleleProbs;
+import main.GenotypeValues;
+
+/**
+ * <p>Class {@code VcfWriter} contains static methods for writing data in
+ * VCF 4.2 format.
+ * </p>
+ * <p>Instances of class {@code VcfWriter} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class VcfWriter {
+
+ private static final String PASS = "PASS";
+ private static final DecimalFormat df2 = new DecimalFormat("#.##");
+ private static final DecimalFormat df3 = new DecimalFormat("#.###");
+ private static final DecimalFormat df2_fixed = new DecimalFormat("0.00");
+ private static final MathContext mathContext2 = new MathContext(2);
+
+ private static final String fileformat = "##fileformat=VCFv4.2";
+
+ private static final String afInfo = "##INFO=<ID=AF,Number=A,Type=Float,"
+ + "Description=\"Estimated Allele Frequencies\">";
+ private static final String ar2Info = "##INFO=<ID=AR2,Number=1,Type=Float,"
+ + "Description=\"Allelic R-Squared: estimated squared correlation between "
+ + "most probable REF dose and true REF dose\">";
+ private static final String dr2Info = "##INFO=<ID=DR2,Number=1,Type=Float,"
+ + "Description=\"Dosage R-Squared: estimated squared correlation between "
+ + "estimated REF dose [P(RA) + 2*P(RR)] and true REF dose\">";
+
+ private static final String gtFormat = "##FORMAT=<ID=GT,Number=1,Type=String,"
+ + "Description=\"Genotype\">";
+ private static final String dsFormat = "##FORMAT=<ID=DS,Number=1,Type=Float,"
+ +"Description=\"estimated ALT dose [P(RA) + P(AA)]\">";
+ private static final String glFormat = "##FORMAT=<ID=GL,Number=G,Type=Float,"
+ + "Description=\"Log10-scaled Genotype Likelihood\">";
+ private static final String gpFormat = "##FORMAT=<ID=GP,Number=G,Type=Float,"
+ + "Description=\"Estimated Genotype Probability\">";
+
+ private static final String shortChromPrefix= "#CHROM" + Const.tab + "POS"
+ + Const.tab + "ID" + Const.tab + "REF" + Const.tab + "ALT"
+ + Const.tab + "QUAL" + Const.tab + "FILTER" + Const.tab + "INFO";
+
+ private static final String longChromPrefix =
+ shortChromPrefix + Const.tab + "FORMAT";
+
+
+ private VcfWriter() {
+ // private constructor prevents instantiation
+ }
+
+ /**
+ * Writes VCF meta-information lines and header line to the specified
+ * {@code PrintWriter}. Only one FORMAT subfield, the GT subfield,
+ * is described in the meta-information lines.
+ * @param sampleIds the sample identifiers
+ * @param source a description of the data source, or {@code null} if
+ * no description is to be printed
+ * @param out the {@code PrintWriter} to which VCF meta-information
+ * lines will be written
+ * @throws NullPointerException if {@code out == null}
+ * @throws NullPointerException if
+ * {@code sampleIds == null}, or if {@code sampleIds[j] == null} for any
+ * {@code j} satisfying {@code (0 <= j && j < <sampleIds.length)}
+ */
+ public static void writeMetaLinesGT(String[] sampleIds, String source,
+ PrintWriter out) {
+ boolean printGT = true;
+ boolean printGP = false;
+ boolean printGL = false;
+ writeMetaLines(sampleIds, source, printGT, printGP, printGL, out);
+ }
+
+ /**
+ * Writes VCF meta-information lines and header line to the specified
+ * {@code PrintWriter}.
+ * @param sampleIds the sample identifiers
+ * @param source a description of the data source, or {@code null} if
+ * no description is to be printed
+ * @param printGT {@code true} if the meta-information lines
+ * will describe the GT FORMAT subfield and {@code false} otherwise
+ * @param printGP {@code true} if the meta-information lines
+ * will describe the GP FORMAT subfield and {@code false} otherwise
+ * @param printGL {@code true} if the meta-information lines
+ * will describe the GL FORMAT subfield and {@code false} otherwise
+ * @param out the {@code PrintWriter} to which VCF meta-information lines
+ * will be written.
+ * @throws NullPointerException if {@code out == null}
+ * @throws NullPointerException if
+ * {@code sampleIds == null}, or if {@code sampleIds[j] == null} for any
+ * {@code j} satisfying {@code (0 <= j && j < sampleIds.length)}
+ */
+ public static void writeMetaLines(String[] sampleIds, String source,
+ boolean printGT, boolean printGP, boolean printGL, PrintWriter out) {
+ out.print(fileformat);
+ out.print(Const.nl);
+ out.print("##filedate=");
+ out.print(now());
+ out.print(Const.nl);
+ if (source != null) {
+ out.print("##source=\"");
+ out.print(source);
+ out.println("\"");
+ }
+ if (printGP) {
+ out.println(afInfo);
+ out.println(ar2Info);
+ out.println(dr2Info);
+ }
+ if (printGT) {
+ out.println(gtFormat);
+ }
+ if (printGL) {
+ out.println(glFormat);
+ }
+ if (printGP) {
+ out.println(dsFormat);
+ out.println(gpFormat);
+ }
+ out.print(longChromPrefix);
+ for (String id : sampleIds) {
+ if (id==null) {
+ throw new NullPointerException("id==null");
+ }
+ out.print(Const.tab);
+ out.print(id);
+ }
+ out.println();
+ }
+
+ private static String now() {
+ String dateFormat = "yyyyMMdd";
+ Calendar cal = Calendar.getInstance();
+ SimpleDateFormat sdf = new SimpleDateFormat(dateFormat);
+ return sdf.format(cal.getTime());
+ }
+
+ /**
+ * Writes the specified genotype data as VCF records to the specified
+ * {@code PrintWriter}.
+ * @param gv the scaled sample posterior genotype probabilities
+ * @param start the starting marker index (inclusive)
+ * @param end the ending marker index (exclusive)
+ * @param out the {@code PrintWriter} to which VCF records will
+ * be written.
+ *
+ * @throws IllegalArgumentException if
+ * {@code haps.markers().equals(gv.markers()) == false}
+ * @throws IndexOutOfBoundsException if
+ * {@code (start < 0 || start > end || end > haps.nMarkers())}
+ * @throws NullPointerException if
+ * {@code (gv == null || out == null)}
+ */
+ public static void appendRecords(GenotypeValues gv, int start, int end,
+ PrintWriter out) {
+ if (start > end) {
+ throw new IllegalArgumentException("start=" + start + " end=" + end);
+ }
+ for (int marker=start; marker<end; ++marker) {
+ printFixedFields(gv, marker, out);
+ for (int sample=0, n=gv.nSamples(); sample<n; ++sample) {
+ print_GT_DS_GP(gv, marker, sample, out);
+ }
+ out.println();
+ }
+ }
+
+
+ private static void print_GT_DS_GP(GenotypeValues gv, int marker, int sample,
+ PrintWriter out) {
+ int nAlleles = gv.marker(marker).nAlleles();
+ int nGenotypes = gv.marker(marker).nGenotypes();
+ float[] dose = new float[nAlleles];
+ int bestA1 = -1;
+ int bestA2 = -1;
+ int gt = 0;
+ float sum = 0f;
+ float maxGP = 0f;
+ for (int a2=0; a2<nAlleles; ++a2) {
+ for (int a1=0; a1<=a2; ++a1) {
+ float value = gv.value(marker, sample, gt++);
+ if (value > maxGP) {
+ bestA1 = a1;
+ bestA2 = a2;
+ maxGP = value;
+ }
+ dose[a1] += value;
+ dose[a2] += value;
+ sum += value;
+ }
+ }
+ out.print(Const.tab);
+ out.print(bestA1 == -1 ? Const.MISSING_DATA_STRING : bestA1);
+ out.print(Const.unphasedSep);
+ out.print(bestA2 == -1 ? Const.MISSING_DATA_STRING : bestA2);
+ for (int al = 1; al < dose.length; ++al) {
+ out.print( (al==1) ? Const.colon : Const.comma);
+ out.print(df2.format(dose[al]/sum));
+ }
+ for (gt=0; gt<nGenotypes; ++gt) {
+ out.print(gt==0 ? Const.colon : Const.comma);
+ double v = gv.value(marker, sample, gt)/sum;
+ out.print(df2.format(v));
+ }
+ }
+
+ /**
+ * Writes the specified genotype data as VCF records to the specified
+ * {@code PrintWriter}.
+ * @param alProbs the sample haplotype pairs
+ * @param start the starting marker index (inclusive)
+ * @param end the ending marker index (exclusive)
+ * @param imputed {@code true} if there are imputed markers,
+ * and {@code false} otherwise
+ * @param gprobs {@code true} if the GP field should be printed, and
+ * {@code false} otherwise.
+ * @param out the {@code PrintWriter} to which VCF records will
+ * be written
+ *
+ * @throws IndexOutOfBoundsException if
+ * {@code (start < 0 || start > end || end > alProbs.nMarkers())}
+ * @throws NullPointerException if {@code haps == null || out == null}
+ */
+ public static void appendRecords(AlleleProbs alProbs, int start, int end,
+ boolean imputed, boolean gprobs, PrintWriter out) {
+ if (start > end) {
+ throw new IllegalArgumentException("start=" + start + " end=" + end);
+ }
+ for (int marker=start; marker<end; ++marker) {
+ printFixedFields(alProbs, marker, imputed, gprobs, out);
+ for (int sample=0, n=alProbs.nSamples(); sample<n; ++sample) {
+ printGTandDose(alProbs, marker, sample, imputed, out);
+ if (gprobs) {
+ printGP(alProbs, marker, sample, out);
+ }
+ }
+ out.println();
+ }
+ }
+
+ private static void printGTandDose(AlleleProbs alProbs, int marker, int
+ sample, boolean imputed, PrintWriter out) {
+ out.print(Const.tab);
+ out.print(alProbs.allele1(marker, sample));
+ out.append(Const.phasedSep);
+ out.print(alProbs.allele2(marker, sample));
+ if (imputed) {
+ int nAlleles = alProbs.marker(marker).nAlleles();
+ for (int j = 1; j < nAlleles; ++j) {
+ float p1 = alProbs.alProb1(marker, sample, j);
+ float p2 = alProbs.alProb2(marker, sample, j);
+ out.print( (j==1) ? Const.colon : Const.comma );
+ out.print(df2.format(p1 + p2));
+ }
+ }
+ }
+
+ private static void printGP(AlleleProbs alProbs, int marker, int sample,
+ PrintWriter out) {
+ int nAlleles = alProbs.marker(marker).nAlleles();
+ for (int a2=0; a2<nAlleles; ++a2) {
+ for (int a1=0; a1<=a2; ++a1) {
+ out.print((a2 == 0 && a1 == 0) ? Const.colon : Const.comma);
+ float gtProb = alProbs.gtProb(marker, sample, a1, a2);
+ if (a1 != a2) {
+ gtProb += alProbs.gtProb(marker, sample, a2, a1);
+ }
+ out.print(df2.format(gtProb));
+ }
+ }
+ }
+
+ /**
+ * Prints the first 9 VCF record fields for the specified marker to
+ * the specified {@code PrintWriter}. Only one VCF FORMAT subfield,
+ * the GT subfield, is printed.
+ *
+ * @param marker a marker
+ * @param out the {@code PrintWriter} to which the first 9 VCF record
+ * fields will be written
+ *
+ * @throws NullPointerException if {@code marker == null || out == null}
+ */
+ public static void printFixedFieldsGT(Marker marker, PrintWriter out) {
+ out.print(marker);
+ out.print(Const.tab);
+ out.print(Const.MISSING_DATA_CHAR); // QUAL
+ out.print(Const.tab);
+ out.print(PASS); // FILTER
+ out.print(Const.tab);
+ out.print(Const.MISSING_DATA_CHAR); // INFO
+ out.print(Const.tab);
+ out.print("GT");
+ }
+
+ private static void printFixedFields(GenotypeValues gv, int marker,
+ PrintWriter out) {
+ GprobsStatistics gpm = new GprobsStatistics(gv, marker);
+ float[] alleleFreq = gpm.alleleFreq();
+ out.print(gv.marker(marker));
+ out.print(Const.tab);
+ out.print(Const.MISSING_DATA_CHAR); // QUAL
+ out.print(Const.tab);
+ out.print(PASS); // FILTER
+ out.print(Const.tab);
+ out.print("AR2="); // INFO
+ out.print(df2_fixed.format(gpm.allelicR2()));
+ out.print(";DR2=");
+ out.print(df2_fixed.format(gpm.doseR2()));
+ for (int j=1; j<alleleFreq.length; ++j) {
+ out.print( (j==1) ? ";AF=" : Const.comma);
+ BigDecimal bd = new BigDecimal(alleleFreq[j]).round(mathContext2);
+ out.print(bd.doubleValue());
+ }
+ out.print(Const.tab);
+ out.print("GT:DS:GP");
+ }
+
+ private static void printFixedFields(AlleleProbs alProbs,
+ int marker, boolean printR2, boolean gprobs, PrintWriter out) {
+ GprobsStatistics gpm = new GprobsStatistics(alProbs, marker);
+ float[] alleleFreq = gpm.alleleFreq();
+ out.print(alProbs.marker(marker));
+ out.print(Const.tab);
+ out.print(Const.MISSING_DATA_CHAR); // QUAL
+ out.print(Const.tab);
+ out.print(PASS); // FILTER
+ if (printR2) {
+ out.print(Const.tab);
+ out.print("AR2="); // INFO
+ out.print(df2_fixed.format(gpm.allelicR2()));
+ out.print(";DR2=");
+ out.print(df2_fixed.format(gpm.doseR2()));
+ for (int j=1; j<alleleFreq.length; ++j) {
+ out.print( (j==1) ? ";AF=" : Const.comma);
+ BigDecimal bd = new BigDecimal(alleleFreq[j]).round(mathContext2);
+ out.print(bd.doubleValue());
+ }
+ }
+ else {
+ out.print(Const.tab);
+ out.print(Const.MISSING_DATA_CHAR);
+ }
+ out.print(Const.tab);
+ if (printR2) {
+ out.print(gprobs ? "GT:DS:GP" : "GT:DS");
+ }
+ else {
+ out.print("GT");
+ }
+ }
+}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/beagle.git
More information about the debian-med-commit
mailing list