[med-svn] [tophat] 02/07: New upstream version 2.1.1+dfsg1
Andreas Tille
tille at debian.org
Wed Dec 6 15:16:18 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository tophat.
commit b3cee615b88f2039e2e8591e62062ef41401312c
Author: Andreas Tille <tille at debian.org>
Date: Wed Dec 6 16:01:25 2017 +0100
New upstream version 2.1.1+dfsg1
---
src/intervaltree/__init__.py | 22 -
src/intervaltree/interval.py | 302 --
src/intervaltree/intervaltree.py | 947 -----
src/intervaltree/node.py | 593 ---
src/samtools-0.1.18/AUTHORS | 20 -
src/samtools-0.1.18/COPYING | 21 -
src/samtools-0.1.18/ChangeLog | 5948 -----------------------------
src/samtools-0.1.18/INSTALL | 30 -
src/samtools-0.1.18/Makefile | 93 -
src/samtools-0.1.18/Makefile.mingw | 63 -
src/samtools-0.1.18/NEWS | 806 ----
src/samtools-0.1.18/bam.c | 362 --
src/samtools-0.1.18/bam.h | 763 ----
src/samtools-0.1.18/bam2bcf.c | 351 --
src/samtools-0.1.18/bam2bcf.h | 57 -
src/samtools-0.1.18/bam2bcf_indel.c | 475 ---
src/samtools-0.1.18/bam2depth.c | 112 -
src/samtools-0.1.18/bam_aux.c | 213 --
src/samtools-0.1.18/bam_cat.c | 184 -
src/samtools-0.1.18/bam_color.c | 127 -
src/samtools-0.1.18/bam_endian.h | 42 -
src/samtools-0.1.18/bam_import.c | 485 ---
src/samtools-0.1.18/bam_index.c | 719 ----
src/samtools-0.1.18/bam_lpileup.c | 198 -
src/samtools-0.1.18/bam_mate.c | 70 -
src/samtools-0.1.18/bam_md.c | 384 --
src/samtools-0.1.18/bam_pileup.c | 437 ---
src/samtools-0.1.18/bam_plcmd.c | 546 ---
src/samtools-0.1.18/bam_reheader.c | 61 -
src/samtools-0.1.18/bam_rmdup.c | 206 -
src/samtools-0.1.18/bam_rmdupse.c | 159 -
src/samtools-0.1.18/bam_sort.c | 438 ---
src/samtools-0.1.18/bam_stat.c | 77 -
src/samtools-0.1.18/bam_tview.c | 440 ---
src/samtools-0.1.18/bamtk.c | 109 -
src/samtools-0.1.18/bcftools/Makefile | 51 -
src/samtools-0.1.18/bcftools/README | 36 -
src/samtools-0.1.18/bcftools/bcf.c | 328 --
src/samtools-0.1.18/bcftools/bcf.h | 190 -
src/samtools-0.1.18/bcftools/bcf.tex | 77 -
src/samtools-0.1.18/bcftools/bcf2qcall.c | 91 -
src/samtools-0.1.18/bcftools/bcfutils.c | 390 --
src/samtools-0.1.18/bcftools/call1.c | 586 ---
src/samtools-0.1.18/bcftools/em.c | 310 --
src/samtools-0.1.18/bcftools/fet.c | 112 -
src/samtools-0.1.18/bcftools/index.c | 335 --
src/samtools-0.1.18/bcftools/kfunc.c | 162 -
src/samtools-0.1.18/bcftools/kmin.c | 209 -
src/samtools-0.1.18/bcftools/kmin.h | 46 -
src/samtools-0.1.18/bcftools/main.c | 190 -
src/samtools-0.1.18/bcftools/mut.c | 127 -
src/samtools-0.1.18/bcftools/prob1.c | 554 ---
src/samtools-0.1.18/bcftools/prob1.h | 42 -
src/samtools-0.1.18/bcftools/vcf.c | 244 --
src/samtools-0.1.18/bcftools/vcfutils.pl | 567 ---
src/samtools-0.1.18/bedidx.c | 162 -
src/samtools-0.1.18/bgzf.c | 714 ----
src/samtools-0.1.18/bgzf.h | 157 -
src/samtools-0.1.18/bgzip.c | 206 -
src/samtools-0.1.18/cut_target.c | 193 -
src/samtools-0.1.18/errmod.c | 130 -
src/samtools-0.1.18/errmod.h | 24 -
src/samtools-0.1.18/faidx.c | 432 ---
src/samtools-0.1.18/faidx.h | 103 -
src/samtools-0.1.18/kaln.c | 486 ---
src/samtools-0.1.18/kaln.h | 67 -
src/samtools-0.1.18/khash.h | 528 ---
src/samtools-0.1.18/klist.h | 96 -
src/samtools-0.1.18/knetfile.c | 632 ---
src/samtools-0.1.18/knetfile.h | 75 -
src/samtools-0.1.18/kprobaln.c | 278 --
src/samtools-0.1.18/kprobaln.h | 49 -
src/samtools-0.1.18/kseq.h | 224 --
src/samtools-0.1.18/ksort.h | 281 --
src/samtools-0.1.18/kstring.c | 212 -
src/samtools-0.1.18/kstring.h | 117 -
src/samtools-0.1.18/phase.c | 687 ----
src/samtools-0.1.18/razf.c | 853 -----
src/samtools-0.1.18/razf.h | 134 -
src/samtools-0.1.18/razip.c | 141 -
src/samtools-0.1.18/sam.c | 179 -
src/samtools-0.1.18/sam.h | 98 -
src/samtools-0.1.18/sam_header.c | 736 ----
src/samtools-0.1.18/sam_header.h | 24 -
src/samtools-0.1.18/sam_view.c | 406 --
src/samtools-0.1.18/sample.c | 107 -
src/samtools-0.1.18/sample.h | 17 -
src/samtools-0.1.18/samtools.1 | 994 -----
src/sortedcontainers/__init__.py | 55 -
src/sortedcontainers/sorteddict.py | 737 ----
src/sortedcontainers/sortedlist.py | 1233 ------
src/sortedcontainers/sortedlistwithkey.py | 1331 -------
src/sortedcontainers/sortedset.py | 294 --
93 files changed, 33699 deletions(-)
diff --git a/src/intervaltree/__init__.py b/src/intervaltree/__init__.py
deleted file mode 100755
index 4b8690f..0000000
--- a/src/intervaltree/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-"""
-intervaltree: A mutable, self-balancing interval tree for Python 2 and 3.
-Queries may be by point, by range overlap, or by range envelopment.
-
-Root package.
-
-Copyright 2013-2015 Chaim-Leib Halbert
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from .interval import Interval
-from .intervaltree import IntervalTree
diff --git a/src/intervaltree/interval.py b/src/intervaltree/interval.py
deleted file mode 100755
index 4c19cba..0000000
--- a/src/intervaltree/interval.py
+++ /dev/null
@@ -1,302 +0,0 @@
-"""
-intervaltree: A mutable, self-balancing interval tree for Python 2 and 3.
-Queries may be by point, by range overlap, or by range envelopment.
-
-Interval class
-
-Copyright 2013-2015 Chaim-Leib Halbert
-Modifications copyright 2014 Konstantin Tretyakov
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from numbers import Number
-from collections import namedtuple
-
-
-# noinspection PyBroadException
-class Interval(namedtuple('IntervalBase', ['begin', 'end', 'data'])):
- __slots__ = () # Saves memory, avoiding the need to create __dict__ for each interval
-
- def __new__(cls, begin, end, data=None):
- return super(Interval, cls).__new__(cls, begin, end, data)
-
- def overlaps(self, begin, end=None):
- """
- Whether the interval overlaps the given point, range or Interval.
- :param begin: beginning point of the range, or the point, or an Interval
- :param end: end point of the range. Optional if not testing ranges.
- :return: True or False
- :rtype: bool
- """
- if end is not None:
- return (
- (begin <= self.begin < end) or
- (begin < self.end <= end) or
- (self.begin <= begin < self.end) or
- (self.begin < end <= self.end)
- )
- try:
- return self.overlaps(begin.begin, begin.end)
- except:
- return self.contains_point(begin)
-
- def contains_point(self, p):
- """
- Whether the Interval contains p.
- :param p: a point
- :return: True or False
- :rtype: bool
- """
- return self.begin <= p < self.end
-
- def range_matches(self, other):
- """
- Whether the begins equal and the ends equal. Compare __eq__().
- :param other: Interval
- :return: True or False
- :rtype: bool
- """
- return (
- self.begin == other.begin and
- self.end == other.end
- )
-
- def contains_interval(self, other):
- """
- Whether other is contained in this Interval.
- :param other: Interval
- :return: True or False
- :rtype: bool
- """
- return (
- self.begin <= other.begin and
- self.end >= other.end
- )
-
- def distance_to(self, other):
- """
- Returns the size of the gap between intervals, or 0
- if they touch or overlap.
- :param other: Interval or point
- :return: distance
- :rtype: Number
- """
- if self.overlaps(other):
- return 0
- try:
- if self.begin < other.begin:
- return other.begin - self.end
- else:
- return self.begin - other.end
- except:
- if self.end < other:
- return other - self.end
- else:
- return self.begin - other
-
- def is_null(self):
- """
- Whether this equals the null interval.
- :return: True if end <= begin else False
- :rtype: bool
- """
- return self.begin >= self.end
-
- def length(self):
- """
- The distance covered by this Interval.
- :return: length
- :type: Number
- """
- if self.is_null():
- return 0
- return self.end - self.begin
-
- def __hash__(self):
- """
- Depends on begin and end only.
- :return: hash
- :rtype: Number
- """
- return hash((self.begin, self.end))
-
- def __eq__(self, other):
- """
- Whether the begins equal, the ends equal, and the data fields
- equal. Compare range_matches().
- :param other: Interval
- :return: True or False
- :rtype: bool
- """
- return (
- self.begin == other.begin and
- self.end == other.end and
- self.data == other.data
- )
-
- def __cmp__(self, other):
- """
- Tells whether other sorts before, after or equal to this
- Interval.
-
- Sorting is by begins, then by ends, then by data fields.
-
- If data fields are not both sortable types, data fields are
- compared alphabetically by type name.
- :param other: Interval
- :return: -1, 0, 1
- :rtype: int
- """
- s = self[0:2]
- try:
- o = other[0:2]
- except:
- o = (other,)
- if s != o:
- return -1 if s < o else 1
- try:
- if self.data == other.data:
- return 0
- return -1 if self.data < other.data else 1
- except TypeError:
- s = type(self.data).__name__
- o = type(other.data).__name__
- if s == o:
- return 0
- return -1 if s < o else 1
-
- def __lt__(self, other):
- """
- Less than operator. Parrots __cmp__()
- :param other: Interval or point
- :return: True or False
- :rtype: bool
- """
- return self.__cmp__(other) < 0
-
- def __gt__(self, other):
- """
- Greater than operator. Parrots __cmp__()
- :param other: Interval or point
- :return: True or False
- :rtype: bool
- """
- return self.__cmp__(other) > 0
-
- def _raise_if_null(self, other):
- """
- :raises ValueError: if either self or other is a null Interval
- """
- if self.is_null():
- raise ValueError("Cannot compare null Intervals!")
- if hasattr(other, 'is_null') and other.is_null():
- raise ValueError("Cannot compare null Intervals!")
-
- def lt(self, other):
- """
- Strictly less than. Returns True if no part of this Interval
- extends higher than or into other.
- :raises ValueError: if either self or other is a null Interval
- :param other: Interval or point
- :return: True or False
- :rtype: bool
- """
- self._raise_if_null(other)
- return self.end <= getattr(other, 'begin', other)
-
- def le(self, other):
- """
- Less than or overlaps. Returns True if no part of this Interval
- extends higher than other.
- :raises ValueError: if either self or other is a null Interval
- :param other: Interval or point
- :return: True or False
- :rtype: bool
- """
- self._raise_if_null(other)
- return self.end <= getattr(other, 'end', other)
-
- def gt(self, other):
- """
- Strictly greater than. Returns True if no part of this Interval
- extends lower than or into other.
- :raises ValueError: if either self or other is a null Interval
- :param other: Interval or point
- :return: True or False
- :rtype: bool
- """
- self._raise_if_null(other)
- if hasattr(other, 'end'):
- return self.begin >= other.end
- else:
- return self.begin > other
-
- def ge(self, other):
- """
- Greater than or overlaps. Returns True if no part of this Interval
- extends lower than other.
- :raises ValueError: if either self or other is a null Interval
- :param other: Interval or point
- :return: True or False
- :rtype: bool
- """
- self._raise_if_null(other)
- return self.begin >= getattr(other, 'begin', other)
-
- def _get_fields(self):
- """
- Used by str, unicode, repr and __reduce__.
-
- Returns only the fields necessary to reconstruct the Interval.
- :return: reconstruction info
- :rtype: tuple
- """
- if self.data is not None:
- return self.begin, self.end, self.data
- else:
- return self.begin, self.end
-
- def __repr__(self):
- """
- Executable string representation of this Interval.
- :return: string representation
- :rtype: str
- """
- if isinstance(self.begin, Number):
- s_begin = str(self.begin)
- s_end = str(self.end)
- else:
- s_begin = repr(self.begin)
- s_end = repr(self.end)
- if self.data is None:
- return "Interval({0}, {1})".format(s_begin, s_end)
- else:
- return "Interval({0}, {1}, {2})".format(s_begin, s_end, repr(self.data))
-
- __str__ = __repr__
-
- def copy(self):
- """
- Shallow copy.
- :return: copy of self
- :rtype: Interval
- """
- return Interval(self.begin, self.end, self.data)
-
- def __reduce__(self):
- """
- For pickle-ing.
- :return: pickle data
- :rtype: tuple
- """
- return Interval, self._get_fields()
diff --git a/src/intervaltree/intervaltree.py b/src/intervaltree/intervaltree.py
deleted file mode 100755
index 0366f5f..0000000
--- a/src/intervaltree/intervaltree.py
+++ /dev/null
@@ -1,947 +0,0 @@
-"""
-intervaltree: A mutable, self-balancing interval tree for Python 2 and 3.
-Queries may be by point, by range overlap, or by range envelopment.
-
-Core logic.
-
-Copyright 2013-2015 Chaim-Leib Halbert
-Modifications Copyright 2014 Konstantin Tretyakov
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from .interval import Interval
-from .node import Node
-from numbers import Number
-import collections
-from sortedcontainers import SortedDict
-from warnings import warn
-
-try:
- xrange # Python 2?
-except NameError:
- xrange = range
-
-
-# noinspection PyBroadException
-class IntervalTree(collections.MutableSet):
- """
- A binary lookup tree of intervals.
- The intervals contained in the tree are represented using ``Interval(a, b, data)`` objects.
- Each such object represents a half-open interval ``[a, b)`` with optional data.
-
- Examples:
- ---------
-
- Initialize a blank tree::
-
- >>> tree = IntervalTree()
- >>> tree
- IntervalTree()
-
- Initialize a tree from an iterable set of Intervals in O(n * log n)::
-
- >>> tree = IntervalTree([Interval(-10, 10), Interval(-20.0, -10.0)])
- >>> tree
- IntervalTree([Interval(-20.0, -10.0), Interval(-10, 10)])
- >>> len(tree)
- 2
-
- Note that this is a set, i.e. repeated intervals are ignored. However,
- Intervals with different data fields are regarded as different::
-
- >>> tree = IntervalTree([Interval(-10, 10), Interval(-10, 10), Interval(-10, 10, "x")])
- >>> tree
- IntervalTree([Interval(-10, 10), Interval(-10, 10, 'x')])
- >>> len(tree)
- 2
-
- Insertions::
- >>> tree = IntervalTree()
- >>> tree[0:1] = "data"
- >>> tree.add(Interval(10, 20))
- >>> tree.addi(19.9, 20)
- >>> tree
- IntervalTree([Interval(0, 1, 'data'), Interval(10, 20), Interval(19.9, 20)])
- >>> tree.update([Interval(19.9, 20.1), Interval(20.1, 30)])
- >>> len(tree)
- 5
-
- Inserting the same Interval twice does nothing::
- >>> tree = IntervalTree()
- >>> tree[-10:20] = "arbitrary data"
- >>> tree[-10:20] = None # Note that this is also an insertion
- >>> tree
- IntervalTree([Interval(-10, 20), Interval(-10, 20, 'arbitrary data')])
- >>> tree[-10:20] = None # This won't change anything
- >>> tree[-10:20] = "arbitrary data" # Neither will this
- >>> len(tree)
- 2
-
- Deletions::
- >>> tree = IntervalTree(Interval(b, e) for b, e in [(-10, 10), (-20, -10), (10, 20)])
- >>> tree
- IntervalTree([Interval(-20, -10), Interval(-10, 10), Interval(10, 20)])
- >>> tree.remove(Interval(-10, 10))
- >>> tree
- IntervalTree([Interval(-20, -10), Interval(10, 20)])
- >>> tree.remove(Interval(-10, 10))
- Traceback (most recent call last):
- ...
- ValueError
- >>> tree.discard(Interval(-10, 10)) # Same as remove, but no exception on failure
- >>> tree
- IntervalTree([Interval(-20, -10), Interval(10, 20)])
-
- Delete intervals, overlapping a given point::
-
- >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)])
- >>> tree.remove_overlap(1.1)
- >>> tree
- IntervalTree([Interval(-1.1, 1.1)])
-
- Delete intervals, overlapping an interval::
-
- >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)])
- >>> tree.remove_overlap(0, 0.5)
- >>> tree
- IntervalTree([Interval(0.5, 1.7)])
- >>> tree.remove_overlap(1.7, 1.8)
- >>> tree
- IntervalTree([Interval(0.5, 1.7)])
- >>> tree.remove_overlap(1.6, 1.6) # Null interval does nothing
- >>> tree
- IntervalTree([Interval(0.5, 1.7)])
- >>> tree.remove_overlap(1.6, 1.5) # Ditto
- >>> tree
- IntervalTree([Interval(0.5, 1.7)])
-
- Delete intervals, enveloped in the range::
-
- >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)])
- >>> tree.remove_envelop(-1.0, 1.5)
- >>> tree
- IntervalTree([Interval(-1.1, 1.1), Interval(0.5, 1.7)])
- >>> tree.remove_envelop(-1.1, 1.5)
- >>> tree
- IntervalTree([Interval(0.5, 1.7)])
- >>> tree.remove_envelop(0.5, 1.5)
- >>> tree
- IntervalTree([Interval(0.5, 1.7)])
- >>> tree.remove_envelop(0.5, 1.7)
- >>> tree
- IntervalTree()
-
- Point/interval overlap queries::
-
- >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)])
- >>> assert tree[-1.1] == set([Interval(-1.1, 1.1)])
- >>> assert tree.search(1.1) == set([Interval(-0.5, 1.5), Interval(0.5, 1.7)]) # Same as tree[1.1]
- >>> assert tree[-0.5:0.5] == set([Interval(-0.5, 1.5), Interval(-1.1, 1.1)]) # Interval overlap query
- >>> assert tree.search(1.5, 1.5) == set() # Same as tree[1.5:1.5]
- >>> assert tree.search(1.5) == set([Interval(0.5, 1.7)]) # Same as tree[1.5]
-
- >>> assert tree.search(1.7, 1.8) == set()
-
- Envelop queries::
-
- >>> assert tree.search(-0.5, 0.5, strict=True) == set()
- >>> assert tree.search(-0.4, 1.7, strict=True) == set([Interval(0.5, 1.7)])
-
- Membership queries::
-
- >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)])
- >>> Interval(-0.5, 0.5) in tree
- False
- >>> Interval(-1.1, 1.1) in tree
- True
- >>> Interval(-1.1, 1.1, "x") in tree
- False
- >>> tree.overlaps(-1.1)
- True
- >>> tree.overlaps(1.7)
- False
- >>> tree.overlaps(1.7, 1.8)
- False
- >>> tree.overlaps(-1.2, -1.1)
- False
- >>> tree.overlaps(-1.2, -1.0)
- True
-
- Sizing::
-
- >>> tree = IntervalTree([Interval(-1.1, 1.1), Interval(-0.5, 1.5), Interval(0.5, 1.7)])
- >>> len(tree)
- 3
- >>> tree.is_empty()
- False
- >>> IntervalTree().is_empty()
- True
- >>> not tree
- False
- >>> not IntervalTree()
- True
- >>> print(tree.begin()) # using print() because of floats in Python 2.6
- -1.1
- >>> print(tree.end()) # ditto
- 1.7
-
- Iteration::
-
- >>> tree = IntervalTree([Interval(-11, 11), Interval(-5, 15), Interval(5, 17)])
- >>> [iv.begin for iv in sorted(tree)]
- [-11, -5, 5]
- >>> assert tree.items() == set([Interval(-5, 15), Interval(-11, 11), Interval(5, 17)])
-
- Copy- and typecasting, pickling::
-
- >>> tree0 = IntervalTree([Interval(0, 1, "x"), Interval(1, 2, ["x"])])
- >>> tree1 = IntervalTree(tree0) # Shares Interval objects
- >>> tree2 = tree0.copy() # Shallow copy (same as above, as Intervals are singletons)
- >>> import pickle
- >>> tree3 = pickle.loads(pickle.dumps(tree0)) # Deep copy
- >>> list(tree0[1])[0].data[0] = "y" # affects shallow copies, but not deep copies
- >>> tree0
- IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['y'])])
- >>> tree1
- IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['y'])])
- >>> tree2
- IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['y'])])
- >>> tree3
- IntervalTree([Interval(0, 1, 'x'), Interval(1, 2, ['x'])])
-
- Equality testing::
-
- >>> IntervalTree([Interval(0, 1)]) == IntervalTree([Interval(0, 1)])
- True
- >>> IntervalTree([Interval(0, 1)]) == IntervalTree([Interval(0, 1, "x")])
- False
- """
- @classmethod
- def from_tuples(cls, tups):
- """
- Create a new IntervalTree from an iterable of 2- or 3-tuples,
- where the tuple lists begin, end, and optionally data.
- """
- ivs = [Interval(*t) for t in tups]
- return IntervalTree(ivs)
-
- def __init__(self, intervals=None):
- """
- Set up a tree. If intervals is provided, add all the intervals
- to the tree.
-
- Completes in O(n*log n) time.
- """
- intervals = set(intervals) if intervals is not None else set()
- for iv in intervals:
- if iv.is_null():
- raise ValueError(
- "IntervalTree: Null Interval objects not allowed in IntervalTree:"
- " {0}".format(iv)
- )
- self.all_intervals = intervals
- self.top_node = Node.from_intervals(self.all_intervals)
- self.boundary_table = SortedDict()
- for iv in self.all_intervals:
- self._add_boundaries(iv)
-
- def copy(self):
- """
- Construct a new IntervalTree using shallow copies of the
- intervals in the source tree.
-
- Completes in O(n*log n) time.
- :rtype: IntervalTree
- """
- return IntervalTree(iv.copy() for iv in self)
-
- def _add_boundaries(self, interval):
- """
- Records the boundaries of the interval in the boundary table.
- """
- begin = interval.begin
- end = interval.end
- if begin in self.boundary_table:
- self.boundary_table[begin] += 1
- else:
- self.boundary_table[begin] = 1
-
- if end in self.boundary_table:
- self.boundary_table[end] += 1
- else:
- self.boundary_table[end] = 1
-
- def _remove_boundaries(self, interval):
- """
- Removes the boundaries of the interval from the boundary table.
- """
- begin = interval.begin
- end = interval.end
- if self.boundary_table[begin] == 1:
- del self.boundary_table[begin]
- else:
- self.boundary_table[begin] -= 1
-
- if self.boundary_table[end] == 1:
- del self.boundary_table[end]
- else:
- self.boundary_table[end] -= 1
-
- def add(self, interval):
- """
- Adds an interval to the tree, if not already present.
-
- Completes in O(log n) time.
- """
- if interval in self:
- return
-
- if interval.is_null():
- raise ValueError(
- "IntervalTree: Null Interval objects not allowed in IntervalTree:"
- " {0}".format(interval)
- )
-
- if not self.top_node:
- self.top_node = Node.from_interval(interval)
- else:
- self.top_node = self.top_node.add(interval)
- self.all_intervals.add(interval)
- self._add_boundaries(interval)
- append = add
-
- def addi(self, begin, end, data=None):
- """
- Shortcut for add(Interval(begin, end, data)).
-
- Completes in O(log n) time.
- """
- return self.add(Interval(begin, end, data))
- appendi = addi
-
- def update(self, intervals):
- """
- Given an iterable of intervals, add them to the tree.
-
- Completes in O(m*log(n+m), where m = number of intervals to
- add.
- """
- for iv in intervals:
- self.add(iv)
-
- def extend(self, intervals):
- """
- Deprecated: Replaced by update().
- """
- warn("IntervalTree.extend() has been deprecated. Consider using update() instead", DeprecationWarning)
- self.update(intervals)
-
- def remove(self, interval):
- """
- Removes an interval from the tree, if present. If not, raises
- ValueError.
-
- Completes in O(log n) time.
- """
- #self.verify()
- if interval not in self:
- #print(self.all_intervals)
- raise ValueError
- self.top_node = self.top_node.remove(interval)
- self.all_intervals.remove(interval)
- self._remove_boundaries(interval)
- #self.verify()
-
- def removei(self, begin, end, data=None):
- """
- Shortcut for remove(Interval(begin, end, data)).
-
- Completes in O(log n) time.
- """
- return self.remove(Interval(begin, end, data))
-
- def discard(self, interval):
- """
- Removes an interval from the tree, if present. If not, does
- nothing.
-
- Completes in O(log n) time.
- """
- if interval not in self:
- return
- self.all_intervals.discard(interval)
- self.top_node = self.top_node.discard(interval)
- self._remove_boundaries(interval)
-
- def discardi(self, begin, end, data=None):
- """
- Shortcut for discard(Interval(begin, end, data)).
-
- Completes in O(log n) time.
- """
- return self.discard(Interval(begin, end, data))
-
- def difference(self, other):
- """
- Returns a new tree, comprising all intervals in self but not
- in other.
- """
- ivs = set()
- for iv in self:
- if iv not in other:
- ivs.add(iv)
- return IntervalTree(ivs)
-
- def difference_update(self, other):
- """
- Removes all intervals in other from self.
- """
- for iv in other:
- self.discard(iv)
-
- def union(self, other):
- """
- Returns a new tree, comprising all intervals from self
- and other.
- """
- return IntervalTree(set(self).union(other))
-
- def intersection(self, other):
- """
- Returns a new tree of all intervals common to both self and
- other.
- """
- ivs = set()
- shorter, longer = sorted([self, other], key=len)
- for iv in shorter:
- if iv in longer:
- ivs.add(iv)
- return IntervalTree(ivs)
-
- def intersection_update(self, other):
- """
- Removes intervals from self unless they also exist in other.
- """
- for iv in self:
- if iv not in other:
- self.remove(iv)
-
- def symmetric_difference(self, other):
- """
- Return a tree with elements only in self or other but not
- both.
- """
- if not isinstance(other, set): other = set(other)
- me = set(self)
- ivs = me - other + (other - me)
- return IntervalTree(ivs)
-
- def symmetric_difference_update(self, other):
- """
- Throws out all intervals except those only in self or other,
- not both.
- """
- other = set(other)
- for iv in self:
- if iv in other:
- self.remove(iv)
- other.remove(iv)
- self.update(other)
-
- def remove_overlap(self, begin, end=None):
- """
- Removes all intervals overlapping the given point or range.
-
- Completes in O((r+m)*log n) time, where:
- * n = size of the tree
- * m = number of matches
- * r = size of the search range (this is 1 for a point)
- """
- hitlist = self.search(begin, end)
- for iv in hitlist:
- self.remove(iv)
-
- def remove_envelop(self, begin, end):
- """
- Removes all intervals completely enveloped in the given range.
-
- Completes in O((r+m)*log n) time, where:
- * n = size of the tree
- * m = number of matches
- * r = size of the search range (this is 1 for a point)
- """
- hitlist = self.search(begin, end, strict=True)
- for iv in hitlist:
- self.remove(iv)
-
- def chop(self, begin, end, datafunc=None):
- """
- Like remove_envelop(), but trims back Intervals hanging into
- the chopped area so that nothing overlaps.
- """
- insertions = set()
- begin_hits = [iv for iv in self[begin] if iv.begin < begin]
- end_hits = [iv for iv in self[end] if iv.end > end]
-
- if datafunc:
- for iv in begin_hits:
- insertions.add(Interval(iv.begin, begin, datafunc(iv, True)))
- for iv in end_hits:
- insertions.add(Interval(end, iv.end, datafunc(iv, False)))
- else:
- for iv in begin_hits:
- insertions.add(Interval(iv.begin, begin, iv.data))
- for iv in end_hits:
- insertions.add(Interval(end, iv.end, iv.data))
-
- self.remove_envelop(begin, end)
- self.difference_update(begin_hits)
- self.difference_update(end_hits)
- self.update(insertions)
-
- def slice(self, point, datafunc=None):
- """
- Split Intervals that overlap point into two new Intervals. if
- specified, uses datafunc(interval, islower=True/False) to
- set the data field of the new Intervals.
- :param point: where to slice
- :param datafunc(interval, isupper): callable returning a new
- value for the interval's data field
- """
- hitlist = set(iv for iv in self[point] if iv.begin < point)
- insertions = set()
- if datafunc:
- for iv in hitlist:
- insertions.add(Interval(iv.begin, point, datafunc(iv, True)))
- insertions.add(Interval(point, iv.end, datafunc(iv, False)))
- else:
- for iv in hitlist:
- insertions.add(Interval(iv.begin, point, iv.data))
- insertions.add(Interval(point, iv.end, iv.data))
- self.difference_update(hitlist)
- self.update(insertions)
-
- def clear(self):
- """
- Empties the tree.
-
- Completes in O(1) tine.
- """
- self.__init__()
-
- def find_nested(self):
- """
- Returns a dictionary mapping parent intervals to sets of
- intervals overlapped by and contained in the parent.
-
- Completes in O(n^2) time.
- :rtype: dict of [Interval, set of Interval]
- """
- result = {}
-
- def add_if_nested():
- if parent.contains_interval(child):
- if parent not in result:
- result[parent] = set()
- result[parent].add(child)
-
- long_ivs = sorted(self.all_intervals, key=Interval.length, reverse=True)
- for i, parent in enumerate(long_ivs):
- for child in long_ivs[i + 1:]:
- add_if_nested()
- return result
-
- def overlaps(self, begin, end=None):
- """
- Returns whether some interval in the tree overlaps the given
- point or range.
-
- Completes in O(r*log n) time, where r is the size of the
- search range.
- :rtype: bool
- """
- if end is not None:
- return self.overlaps_range(begin, end)
- elif isinstance(begin, Number):
- return self.overlaps_point(begin)
- else:
- return self.overlaps_range(begin.begin, begin.end)
-
- def overlaps_point(self, p):
- """
- Returns whether some interval in the tree overlaps p.
-
- Completes in O(log n) time.
- :rtype: bool
- """
- if self.is_empty():
- return False
- return bool(self.top_node.contains_point(p))
-
- def overlaps_range(self, begin, end):
- """
- Returns whether some interval in the tree overlaps the given
- range.
-
- Completes in O(r*log n) time, where r is the range length and n
- is the table size.
- :rtype: bool
- """
- if self.is_empty():
- return False
- elif self.overlaps_point(begin):
- return True
- return any(
- self.overlaps_point(bound)
- for bound in self.boundary_table
- if begin <= bound < end
- )
-
- def split_overlaps(self):
- """
- Finds all intervals with overlapping ranges and splits them
- along the range boundaries.
-
- Completes in worst-case O(n^2*log n) time (many interval
- boundaries are inside many intervals), best-case O(n*log n)
- time (small number of overlaps << n per interval).
- """
- if not self:
- return
- if len(self.boundary_table) == 2:
- return
-
- bounds = sorted(self.boundary_table) # get bound locations
-
- new_ivs = set()
- for lbound, ubound in zip(bounds[:-1], bounds[1:]):
- for iv in self[lbound]:
- new_ivs.add(Interval(lbound, ubound, iv.data))
-
- self.__init__(new_ivs)
-
- def items(self):
- """
- Constructs and returns a set of all intervals in the tree.
-
- Completes in O(n) time.
- :rtype: set of Interval
- """
- return set(self.all_intervals)
-
- def is_empty(self):
- """
- Returns whether the tree is empty.
-
- Completes in O(1) time.
- :rtype: bool
- """
- return 0 == len(self)
-
- def search(self, begin, end=None, strict=False):
- """
- Returns a set of all intervals overlapping the given range. Or,
- if strict is True, returns the set of all intervals fully
- contained in the range [begin, end].
-
- Completes in O(m + k*log n) time, where:
- * n = size of the tree
- * m = number of matches
- * k = size of the search range (this is 1 for a point)
- :rtype: set of Interval
- """
- root = self.top_node
- if not root:
- return set()
- if end is None:
- try:
- iv = begin
- return self.search(iv.begin, iv.end, strict=strict)
- except:
- return root.search_point(begin, set())
- elif begin >= end:
- return set()
- else:
- result = root.search_point(begin, set())
-
- boundary_table = self.boundary_table
- bound_begin = boundary_table.bisect_left(begin)
- bound_end = boundary_table.bisect_left(end) # exclude final end bound
- result.update(root.search_overlap(
- # slice notation is slightly slower
- boundary_table.iloc[index] for index in xrange(bound_begin, bound_end)
- ))
-
- # TODO: improve strict search to use node info instead of less-efficient filtering
- if strict:
- result = set(
- iv for iv in result
- if iv.begin >= begin and iv.end <= end
- )
- return result
-
- def begin(self):
- """
- Returns the lower bound of the first interval in the tree.
-
- Completes in O(n) time.
- :rtype: Number
- """
- if not self.boundary_table:
- return 0
- return min(self.boundary_table)
-
- def end(self):
- """
- Returns the upper bound of the last interval in the tree.
-
- Completes in O(n) time.
- :rtype: Number
- """
- if not self.boundary_table:
- return 0
- return max(self.boundary_table)
-
- def print_structure(self, tostring=False):
- """
- ## FOR DEBUGGING ONLY ##
- Pretty-prints the structure of the tree.
- If tostring is true, prints nothing and returns a string.
- :rtype: None or str
- """
- if self.top_node:
- return self.top_node.print_structure(tostring=tostring)
- else:
- result = "<empty IntervalTree>"
- if not tostring:
- print(result)
- else:
- return result
-
- def verify(self):
- """
- ## FOR DEBUGGING ONLY ##
- Checks the table to ensure that the invariants are held.
- """
- if self.all_intervals:
- ## top_node.all_children() == self.all_intervals
- try:
- assert self.top_node.all_children() == self.all_intervals
- except AssertionError as e:
- print(
- 'Error: the tree and the membership set are out of sync!'
- )
- tivs = set(self.top_node.all_children())
- print('top_node.all_children() - all_intervals:')
- pprint(tivs - self.all_intervals)
- print('all_intervals - top_node.all_children():')
- pprint(self.all_intervals - tivs)
- raise e
-
- ## All members are Intervals
- for iv in self:
- assert isinstance(iv, Interval), (
- "Error: Only Interval objects allowed in IntervalTree:"
- " {0}".format(iv)
- )
-
- ## No null intervals
- for iv in self:
- assert not iv.is_null(), (
- "Error: Null Interval objects not allowed in IntervalTree:"
- " {0}".format(iv)
- )
-
- ## Reconstruct boundary_table
- bound_check = {}
- for iv in self:
- if iv.begin in bound_check:
- bound_check[iv.begin] += 1
- else:
- bound_check[iv.begin] = 1
- if iv.end in bound_check:
- bound_check[iv.end] += 1
- else:
- bound_check[iv.end] = 1
-
- ## Reconstructed boundary table (bound_check) ==? boundary_table
- assert set(self.boundary_table.keys()) == set(bound_check.keys()),\
- 'Error: boundary_table is out of sync with ' \
- 'the intervals in the tree!'
-
- # For efficiency reasons this should be iteritems in Py2, but we
- # don't care much for efficiency in debug methods anyway.
- for key, val in self.boundary_table.items():
- assert bound_check[key] == val, \
- 'Error: boundary_table[{0}] should be {1},' \
- ' but is {2}!'.format(
- key, bound_check[key], val)
-
- ## Internal tree structure
- self.top_node.verify(set())
- else:
- ## Verify empty tree
- assert not self.boundary_table, \
- "Error: boundary table should be empty!"
- assert self.top_node is None, \
- "Error: top_node isn't None!"
-
- def score(self, full_report=False):
- """
- Returns a number between 0 and 1, indicating how suboptimal the tree
- is. The lower, the better. Roughly, this number represents the
- fraction of flawed Intervals in the tree.
- :rtype: float
- """
- if len(self) <= 2:
- return 0.0
-
- n = len(self)
- m = self.top_node.count_nodes()
-
- def s_center_score():
- """
- Returns a normalized score, indicating roughly how many times
- intervals share s_center with other intervals. Output is full-scale
- from 0 to 1.
- :rtype: float
- """
- raw = n - m
- maximum = n - 1
- return raw / float(maximum)
-
- report = {
- "depth": self.top_node.depth_score(n, m),
- "s_center": s_center_score(),
- }
- cumulative = max(report.values())
- report["_cumulative"] = cumulative
- if full_report:
- return report
- return cumulative
-
- def __getitem__(self, index):
- """
- Returns a set of all intervals overlapping the given index or
- slice.
-
- Completes in O(k * log(n) + m) time, where:
- * n = size of the tree
- * m = number of matches
- * k = size of the search range (this is 1 for a point)
- :rtype: set of Interval
- """
- try:
- start, stop = index.start, index.stop
- if start is None:
- start = self.begin()
- if stop is None:
- return set(self)
- if stop is None:
- stop = self.end()
- return self.search(start, stop)
- except AttributeError:
- return self.search(index)
-
- def __setitem__(self, index, value):
- """
- Adds a new interval to the tree. A shortcut for
- add(Interval(index.start, index.stop, value)).
-
- If an identical Interval object with equal range and data
- already exists, does nothing.
-
- Completes in O(log n) time.
- """
- self.addi(index.start, index.stop, value)
-
- def __delitem__(self, point):
- """
- Delete all items overlapping point.
- """
- self.remove_overlap(point)
-
- def __contains__(self, item):
- """
- Returns whether item exists as an Interval in the tree.
- This method only returns True for exact matches; for
- overlaps, see the overlaps() method.
-
- Completes in O(1) time.
- :rtype: bool
- """
- # Removed point-checking code; it might trick the user into
- # thinking that this is O(1), which point-checking isn't.
- #if isinstance(item, Interval):
- return item in self.all_intervals
- #else:
- # return self.contains_point(item)
-
- def containsi(self, begin, end, data=None):
- """
- Shortcut for (Interval(begin, end, data) in tree).
-
- Completes in O(1) time.
- :rtype: bool
- """
- return Interval(begin, end, data) in self
-
- def __iter__(self):
- """
- Returns an iterator over all the intervals in the tree.
-
- Completes in O(1) time.
- :rtype: collections.Iterable[Interval]
- """
- return self.all_intervals.__iter__()
- iter = __iter__
-
- def __len__(self):
- """
- Returns how many intervals are in the tree.
-
- Completes in O(1) time.
- :rtype: int
- """
- return len(self.all_intervals)
-
- def __eq__(self, other):
- """
- Whether two IntervalTrees are equal.
-
- Completes in O(n) time if sizes are equal; O(1) time otherwise.
- :rtype: bool
- """
- return (
- isinstance(other, IntervalTree) and
- self.all_intervals == other.all_intervals
- )
-
- def __repr__(self):
- """
- :rtype: str
- """
- ivs = sorted(self)
- if not ivs:
- return "IntervalTree()"
- else:
- return "IntervalTree({0})".format(ivs)
-
- __str__ = __repr__
-
- def __reduce__(self):
- """
- For pickle-ing.
- :rtype: tuple
- """
- return IntervalTree, (sorted(self.all_intervals),)
diff --git a/src/intervaltree/node.py b/src/intervaltree/node.py
deleted file mode 100755
index e9d9170..0000000
--- a/src/intervaltree/node.py
+++ /dev/null
@@ -1,593 +0,0 @@
-"""
-intervaltree: A mutable, self-balancing interval tree for Python 2 and 3.
-Queries may be by point, by range overlap, or by range envelopment.
-
-Core logic: internal tree nodes.
-
-Copyright 2013-2015 Chaim-Leib Halbert
-Modifications Copyright 2014 Konstantin Tretyakov
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-from operator import attrgetter
-from math import floor, log
-
-
-def l2(num):
- """
- log base 2
- :rtype real
- """
- return log(num, 2)
-
-
-class Node(object):
- def __init__(self,
- x_center=None,
- s_center=set(),
- left_node=None,
- right_node=None):
- self.x_center = x_center
- self.s_center = set(s_center)
- self.left_node = left_node
- self.right_node = right_node
- self.depth = 0 # will be set when rotated
- self.balance = 0 # ditto
- self.rotate()
-
- @classmethod
- def from_interval(cls, interval):
- """
- :rtype : Node
- """
- center = interval.begin
- return Node(center, [interval])
-
- @classmethod
- def from_intervals(cls, intervals):
- """
- :rtype : Node
- """
- if not intervals:
- return None
- node = Node()
- node = node.init_from_sorted(sorted(intervals))
- return node
-
- def init_from_sorted(self, intervals):
- if not intervals:
- return None
- center_iv = intervals[len(intervals) // 2]
- self.x_center = center_iv.begin
- self.s_center = set()
- s_left = []
- s_right = []
- for k in intervals:
- if k.end <= self.x_center:
- s_left.append(k)
- elif k.begin > self.x_center:
- s_right.append(k)
- else:
- self.s_center.add(k)
- self.left_node = Node.from_intervals(s_left)
- self.right_node = Node.from_intervals(s_right)
- return self.rotate()
-
- def center_hit(self, interval):
- """Returns whether interval overlaps self.x_center."""
- return interval.contains_point(self.x_center)
-
- def hit_branch(self, interval):
- """
- Assuming not center_hit(interval), return which branch
- (left=0, right=1) interval is in.
- """
- return interval.begin > self.x_center
-
- def refresh_balance(self):
- """
- Recalculate self.balance and self.depth based on child node values.
- """
- left_depth = self.left_node.depth if self.left_node else 0
- right_depth = self.right_node.depth if self.right_node else 0
- self.depth = 1 + max(left_depth, right_depth)
- self.balance = right_depth - left_depth
-
- def compute_depth(self):
- """
- Recursively computes true depth of the subtree. Should only
- be needed for debugging. Unless something is wrong, the
- depth field should reflect the correct depth of the subtree.
- """
- left_depth = self.left_node.compute_depth() if self.left_node else 0
- right_depth = self.right_node.compute_depth() if self.right_node else 0
- return 1 + max(left_depth, right_depth)
-
- def rotate(self):
- """
- Does rotating, if necessary, to balance this node, and
- returns the new top node.
- """
- self.refresh_balance()
- if abs(self.balance) < 2:
- return self
- # balance > 0 is the heavy side
- my_heavy = self.balance > 0
- child_heavy = self[my_heavy].balance > 0
- if my_heavy == child_heavy or self[my_heavy].balance == 0:
- ## Heavy sides same
- # self save
- # save -> 1 self
- # 1
- #
- ## Heavy side balanced
- # self save save
- # save -> 1 self -> 1 self.rot()
- # 1 2 2
- return self.srotate()
- else:
- return self.drotate()
-
- def srotate(self):
- """Single rotation. Assumes that balance is +-2."""
- # self save save
- # save 3 -> 1 self -> 1 self.rot()
- # 1 2 2 3
- #
- # self save save
- # 3 save -> self 1 -> self.rot() 1
- # 2 1 3 2
-
- #assert(self.balance != 0)
- heavy = self.balance > 0
- light = not heavy
- save = self[heavy]
- #print("srotate: bal={},{}".format(self.balance, save.balance))
- #self.print_structure()
- self[heavy] = save[light] # 2
- #assert(save[light])
- save[light] = self.rotate() # Needed to ensure the 2 and 3 are balanced under new subnode
-
- # Some intervals may overlap both self.x_center and save.x_center
- # Promote those to the new tip of the tree
- promotees = [iv for iv in save[light].s_center if save.center_hit(iv)]
- if promotees:
- for iv in promotees:
- save[light] = save[light].remove(iv) # may trigger pruning
- # TODO: Use Node.add() here, to simplify future balancing improvements.
- # For now, this is the same as augmenting save.s_center, but that may
- # change.
- save.s_center.update(promotees)
- save.refresh_balance()
- return save
-
- def drotate(self):
- # First rotation
- my_heavy = self.balance > 0
- self[my_heavy] = self[my_heavy].srotate()
- self.refresh_balance()
-
- # Second rotation
- result = self.srotate()
-
- return result
-
- def add(self, interval):
- """
- Returns self after adding the interval and balancing.
- """
- if self.center_hit(interval):
- self.s_center.add(interval)
- return self
- else:
- direction = self.hit_branch(interval)
- if not self[direction]:
- self[direction] = Node.from_interval(interval)
- self.refresh_balance()
- return self
- else:
- self[direction] = self[direction].add(interval)
- return self.rotate()
-
- def remove(self, interval):
- """
- Returns self after removing the interval and balancing.
-
- If interval is not present, raise ValueError.
- """
- # since this is a list, called methods can set this to [1],
- # making it true
- done = []
- return self.remove_interval_helper(interval, done, should_raise_error=True)
-
- def discard(self, interval):
- """
- Returns self after removing interval and balancing.
-
- If interval is not present, do nothing.
- """
- done = []
- return self.remove_interval_helper(interval, done, should_raise_error=False)
-
- def remove_interval_helper(self, interval, done, should_raise_error):
- """
- Returns self after removing interval and balancing.
- If interval doesn't exist, raise ValueError.
-
- This method may set done to [1] to tell all callers that
- rebalancing has completed.
-
- See Eternally Confuzzled's jsw_remove_r function (lines 1-32)
- in his AVL tree article for reference.
- """
- #trace = interval.begin == 347 and interval.end == 353
- #if trace: print('\nRemoving from {} interval {}'.format(
- # self.x_center, interval))
- if self.center_hit(interval):
- #if trace: print('Hit at {}'.format(self.x_center))
- if not should_raise_error and interval not in self.s_center:
- done.append(1)
- #if trace: print('Doing nothing.')
- return self
- try:
- # raises error if interval not present - this is
- # desired.
- self.s_center.remove(interval)
- except:
- self.print_structure()
- raise KeyError(interval)
- if self.s_center: # keep this node
- done.append(1) # no rebalancing necessary
- #if trace: print('Removed, no rebalancing.')
- return self
-
- # If we reach here, no intervals are left in self.s_center.
- # So, prune self.
- return self.prune()
- else: # interval not in s_center
- direction = self.hit_branch(interval)
-
- if not self[direction]:
- if should_raise_error:
- raise ValueError
- done.append(1)
- return self
-
- #if trace:
- # print('Descending to {} branch'.format(
- # ['left', 'right'][direction]
- # ))
- self[direction] = self[direction].remove_interval_helper(interval, done, should_raise_error)
-
- # Clean up
- if not done:
- #if trace:
- # print('Rotating {}'.format(self.x_center))
- # self.print_structure()
- return self.rotate()
- return self
-
- def search_overlap(self, point_list):
- """
- Returns all intervals that overlap the point_list.
- """
- result = set()
- for j in point_list:
- self.search_point(j, result)
- return result
-
- def search_point(self, point, result):
- """
- Returns all intervals that contain point.
- """
- for k in self.s_center:
- if k.begin <= point < k.end:
- result.add(k)
- if point < self.x_center and self[0]:
- return self[0].search_point(point, result)
- elif point > self.x_center and self[1]:
- return self[1].search_point(point, result)
- return result
-
- def prune(self):
- """
- On a subtree where the root node's s_center is empty,
- return a new subtree with no empty s_centers.
- """
- if not self[0] or not self[1]: # if I have an empty branch
- direction = not self[0] # graft the other branch here
- #if trace:
- # print('Grafting {} branch'.format(
- # 'right' if direction else 'left'))
-
- result = self[direction]
- #if result: result.verify()
- return result
- else:
- # Replace the root node with the greatest predecessor.
- heir, self[0] = self[0].pop_greatest_child()
- #if trace:
- # print('Replacing {} with {}.'.format(
- # self.x_center, heir.x_center
- # ))
- # print('Removed greatest predecessor:')
- # self.print_structure()
-
- #if self[0]: self[0].verify()
- #if self[1]: self[1].verify()
-
- # Set up the heir as the new root node
- (heir[0], heir[1]) = (self[0], self[1])
- #if trace: print('Setting up the heir:')
- #if trace: heir.print_structure()
-
- # popping the predecessor may have unbalanced this node;
- # fix it
- heir.refresh_balance()
- heir = heir.rotate()
- #heir.verify()
- #if trace: print('Rotated the heir:')
- #if trace: heir.print_structure()
- return heir
-
- def pop_greatest_child(self):
- """
- Used when pruning a node with both a left and a right branch.
- Returns (greatest_child, node), where:
- * greatest_child is a new node to replace the removed node.
- * node is the subtree after:
- - removing the greatest child
- - balancing
- - moving overlapping nodes into greatest_child
-
- Assumes that self.s_center is not empty.
-
- See Eternally Confuzzled's jsw_remove_r function (lines 34-54)
- in his AVL tree article for reference.
- """
- #print('Popping from {}'.format(self.x_center))
- if not self.right_node: # This node is the greatest child.
- # To reduce the chances of an overlap with a parent, return
- # a child node containing the smallest possible number of
- # intervals, as close as possible to the maximum bound.
- ivs = sorted(self.s_center, key=attrgetter('end', 'begin'))
- max_iv = ivs.pop()
- new_x_center = self.x_center
- while ivs:
- next_max_iv = ivs.pop()
- if next_max_iv.end == max_iv.end: continue
- new_x_center = max(new_x_center, next_max_iv.end)
- def get_new_s_center():
- for iv in self.s_center:
- if iv.contains_point(new_x_center): yield iv
-
- # Create a new node with the largest x_center possible.
- child = Node.from_intervals(get_new_s_center())
- # [iv for iv in self.s_center if iv.contains_point(child_x_center)]
- # )
- child.x_center = new_x_center
- self.s_center -= child.s_center
-
- #print('Pop hit! Returning child = {}'.format(
- # child.print_structure(tostring=True)
- # ))
- #assert not child[0]
- #assert not child[1]
-
- if self.s_center:
- #print(' and returning newnode = {}'.format( self ))
- #self.verify()
- return child, self
- else:
- #print(' and returning newnode = {}'.format( self[0] ))
- #if self[0]: self[0].verify()
- return child, self[0] # Rotate left child up
-
- else:
- #print('Pop descent to {}'.format(self[1].x_center))
- (greatest_child, self[1]) = self[1].pop_greatest_child()
- self.refresh_balance()
- new_self = self.rotate()
-
- # Move any overlaps into greatest_child
- for iv in set(new_self.s_center):
- if iv.contains_point(greatest_child.x_center):
- new_self.s_center.remove(iv)
- greatest_child.add(iv)
-
- #print('Pop Returning child = {}'.format(
- # greatest_child.print_structure(tostring=True)
- # ))
- if new_self.s_center:
- #print('and returning newnode = {}'.format(
- # new_self.print_structure(tostring=True)
- # ))
- #new_self.verify()
- return greatest_child, new_self
- else:
- new_self = new_self.prune()
- #print('and returning prune = {}'.format(
- # new_self.print_structure(tostring=True)
- # ))
- #if new_self: new_self.verify()
- return greatest_child, new_self
-
- def contains_point(self, p):
- """
- Returns whether this node or a child overlaps p.
- """
- for iv in self.s_center:
- if iv.contains_point(p):
- return True
- branch = self[p > self.x_center]
- return branch and branch.contains_point(p)
-
- def all_children(self):
- return self.all_children_helper(set())
-
- def all_children_helper(self, result):
- result.update(self.s_center)
- if self[0]:
- self[0].all_children_helper(result)
- if self[1]:
- self[1].all_children_helper(result)
- return result
-
- def verify(self, parents=set()):
- """
- ## DEBUG ONLY ##
- Recursively ensures that the invariants of an interval subtree
- hold.
- """
- assert(isinstance(self.s_center, set))
-
- bal = self.balance
- assert abs(bal) < 2, \
- "Error: Rotation should have happened, but didn't! \n{}".format(
- self.print_structure(tostring=True)
- )
- self.refresh_balance()
- assert bal == self.balance, \
- "Error: self.balance not set correctly! \n{}".format(
- self.print_structure(tostring=True)
- )
-
- assert self.s_center, \
- "Error: s_center is empty! \n{}".format(
- self.print_structure(tostring=True)
- )
- for iv in self.s_center:
- assert hasattr(iv, 'begin')
- assert hasattr(iv, 'end')
- assert iv.begin < iv.end
- assert iv.overlaps(self.x_center)
- for parent in sorted(parents):
- assert not iv.contains_point(parent), \
- "Error: Overlaps ancestor ({})! \n{}\n\n{}".format(
- parent, iv, self.print_structure(tostring=True)
- )
- if self[0]:
- assert self[0].x_center < self.x_center, \
- "Error: Out-of-order left child! {}".format(self.x_center)
- self[0].verify(parents.union([self.x_center]))
- if self[1]:
- assert self[1].x_center > self.x_center, \
- "Error: Out-of-order right child! {}".format(self.x_center)
- self[1].verify(parents.union([self.x_center]))
-
- def __getitem__(self, index):
- """
- Returns the left child if input is equivalent to False, or
- the right side otherwise.
- """
- if index:
- return self.right_node
- else:
- return self.left_node
-
- def __setitem__(self, key, value):
- """Sets the left (0) or right (1) child."""
- if key:
- self.right_node = value
- else:
- self.left_node = value
-
- def __str__(self):
- """
- Shows info about this node.
-
- Since Nodes are internal data structures not revealed to the
- user, I'm not bothering to make this copy-paste-executable as a
- constructor.
- """
- return "Node<{0}, depth={1}, balance={2}>".format(
- self.x_center,
- self.depth,
- self.balance
- )
- #fieldcount = 'c_count,has_l,has_r = <{}, {}, {}>'.format(
- # len(self.s_center),
- # bool(self.left_node),
- # bool(self.right_node)
- #)
- #fields = [self.x_center, self.balance, fieldcount]
- #return "Node({}, b={}, {})".format(*fields)
-
- def count_nodes(self):
- """
- Count the number of Nodes in this subtree.
- :rtype: int
- """
- count = 1
- if self.left_node:
- count += self.left_node.count_nodes()
- if self.right_node:
- count += self.right_node.count_nodes()
- return count
-
- def depth_score(self, n, m):
- """
- Calculates flaws in balancing the tree.
- :param n: size of tree
- :param m: number of Nodes in tree
- :rtype: real
- """
- if n == 0:
- return 0.0
-
- # dopt is the optimal maximum depth of the tree
- dopt = 1 + int(floor(l2(m)))
- f = 1 / float(1 + n - dopt)
- return f * self.depth_score_helper(1, dopt)
-
- def depth_score_helper(self, d, dopt):
- """
- Gets a weighted count of the number of Intervals deeper than dopt.
- :param d: current depth, starting from 0
- :param dopt: optimal maximum depth of a leaf Node
- :rtype: real
- """
- # di is how may levels deeper than optimal d is
- di = d - dopt
- if di > 0:
- count = di * len(self.s_center)
- else:
- count = 0
- if self.right_node:
- count += self.right_node.depth_score_helper(d + 1, dopt)
- if self.left_node:
- count += self.left_node.depth_score_helper(d + 1, dopt)
- return count
-
- def print_structure(self, indent=0, tostring=False):
- """
- For debugging.
- """
- nl = '\n'
- sp = indent * ' '
-
- rlist = [str(self) + nl]
- if self.s_center:
- for iv in sorted(self.s_center):
- rlist.append(sp + ' ' + repr(iv) + nl)
- if self.left_node:
- rlist.append(sp + '<: ') # no CR
- rlist.append(self.left_node.print_structure(indent + 1, True))
- if self.right_node:
- rlist.append(sp + '>: ') # no CR
- rlist.append(self.right_node.print_structure(indent + 1, True))
- result = ''.join(rlist)
- if tostring:
- return result
- else:
- print(result)
diff --git a/src/samtools-0.1.18/AUTHORS b/src/samtools-0.1.18/AUTHORS
deleted file mode 100644
index 95afabb..0000000
--- a/src/samtools-0.1.18/AUTHORS
+++ /dev/null
@@ -1,20 +0,0 @@
-Heng Li from the Sanger Institute wrote most of the initial source codes
-of SAMtools and various converters.
-
-Bob Handsaker from the Broad Institute is a major contributor to the
-SAM/BAM specification. He designed and implemented the BGZF format, the
-underlying indexable compression format for the BAM format. BGZF does
-not support arithmetic between file offsets.
-
-Jue Ruan for the Beijing Genome Institute designed and implemented the
-RAZF format, an alternative indexable compression format. RAZF supports
-arithmetic between file offsets, at the cost of increased index file
-size and the full compatibility with gzip. RAZF is optional and only
-used in `faidx' for indexing RAZF compressed fasta files.
-
-Colin Hercus updated novo2sam.pl to support gapped alignment by
-novoalign.
-
-Petr Danecek contributed the header parsing library sam_header.c and
-sam2vcf.pl script and added knet support to the RAZF library.
-
diff --git a/src/samtools-0.1.18/COPYING b/src/samtools-0.1.18/COPYING
deleted file mode 100644
index 82fa2f4..0000000
--- a/src/samtools-0.1.18/COPYING
+++ /dev/null
@@ -1,21 +0,0 @@
-The MIT License
-
-Copyright (c) 2008-2009 Genome Research Ltd.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
\ No newline at end of file
diff --git a/src/samtools-0.1.18/ChangeLog b/src/samtools-0.1.18/ChangeLog
deleted file mode 100644
index a471838..0000000
--- a/src/samtools-0.1.18/ChangeLog
+++ /dev/null
@@ -1,5948 +0,0 @@
-------------------------------------------------------------------------
-r925 | lh3lh3 | 2011-02-28 15:45:17 -0500 (Mon, 28 Feb 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/phase.c
-
-minor changes to a heuristic rule
-
-------------------------------------------------------------------------
-r924 | lh3lh3 | 2011-02-28 15:24:04 -0500 (Mon, 28 Feb 2011) | 4 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bcftools/vcfutils.pl
- M /trunk/samtools/phase.c
-
- * 0.1.12-r924:126
- * fixed a bug in phase (due to recent changes)
- * fixed a bug in vcf2fq
-
-------------------------------------------------------------------------
-r923 | lh3lh3 | 2011-02-28 12:57:39 -0500 (Mon, 28 Feb 2011) | 5 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/phase.c
-
- * put version number in bam.h
- * write version to BCF
- * in phase, change the default -q to 37
- * output a little more information during phasing
-
-------------------------------------------------------------------------
-r922 | lh3lh3 | 2011-02-25 16:40:09 -0500 (Fri, 25 Feb 2011) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.tex
- M /trunk/samtools/bcftools/bcf2qcall.c
- M /trunk/samtools/bcftools/bcfutils.c
- M /trunk/samtools/bcftools/ld.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/vcf.c
- M /trunk/samtools/cut_target.c
-
- * change the order of PL/GL according to the latest VCF spec
- * change the type of SP to int32_t
-
-------------------------------------------------------------------------
-r921 | lh3lh3 | 2011-02-25 14:40:56 -0500 (Fri, 25 Feb 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.tex
-
-update the BCF spec
-
-------------------------------------------------------------------------
-r920 | lh3lh3 | 2011-02-25 00:59:27 -0500 (Fri, 25 Feb 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
- A /trunk/samtools/cut_target.c
- M /trunk/samtools/errmod.h
- M /trunk/samtools/faidx.c
- M /trunk/samtools/khash.h
- M /trunk/samtools/kstring.c
- M /trunk/samtools/kstring.h
- A /trunk/samtools/phase.c
- M /trunk/samtools/samtools.1
-
-added the phase command
-
-------------------------------------------------------------------------
-r918 | lh3lh3 | 2011-02-24 10:05:54 -0500 (Thu, 24 Feb 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/prob1.h
-
-added "const" to bcf_p1_cal()
-
-------------------------------------------------------------------------
-r917 | lh3lh3 | 2011-02-24 09:36:30 -0500 (Thu, 24 Feb 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/bam.c
-
-more meaningful BAM truncation message
-
-------------------------------------------------------------------------
-r916 | lh3lh3 | 2011-02-24 09:35:06 -0500 (Thu, 24 Feb 2011) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/vcf.c
-
- * automatically fix errors in GL
- * output unrecognized FORMAT as "."
-
-------------------------------------------------------------------------
-r913 | lh3lh3 | 2011-02-10 22:59:47 -0500 (Thu, 10 Feb 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/vcf.c
-
-finished VCF->BCF conversion
-
-------------------------------------------------------------------------
-r910 | petulda | 2011-02-03 03:13:48 -0500 (Thu, 03 Feb 2011) | 1 line
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
-Prevent division by zero
-------------------------------------------------------------------------
-r909 | lh3lh3 | 2011-02-02 11:29:20 -0500 (Wed, 02 Feb 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
-
-fixed a typo in the VCF header
-
-------------------------------------------------------------------------
-r908 | lh3lh3 | 2011-02-02 11:28:24 -0500 (Wed, 02 Feb 2011) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam_index.c
-
- * fixed an out-of-boundary bug
- * improved sorting order checking in index
-
-------------------------------------------------------------------------
-r907 | lh3lh3 | 2011-01-29 22:59:20 -0500 (Sat, 29 Jan 2011) | 4 lines
-Changed paths:
- M /trunk/samtools/INSTALL
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/knetfile.c
-
- * avoid a segfault when network connect fails
- * update INSTALL
- * fixed a bug in tview on big-endian by Nathan Weeks
-
-------------------------------------------------------------------------
-r903 | lh3lh3 | 2011-01-27 14:50:02 -0500 (Thu, 27 Jan 2011) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_md.c
-
- * fixed a rare memory issue in bam_md.c
- * fixed a bug in indel calling related to unmapped and refskip reads
-
-------------------------------------------------------------------------
-r902 | lh3lh3 | 2011-01-23 21:46:18 -0500 (Sun, 23 Jan 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/fet.c
-
-fixed two minor bugs in Fisher's exact test
-
-------------------------------------------------------------------------
-r899 | petulda | 2011-01-19 09:28:02 -0500 (Wed, 19 Jan 2011) | 1 line
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
-Skip sites with unknown ref
-------------------------------------------------------------------------
-r898 | lh3lh3 | 2011-01-15 12:56:05 -0500 (Sat, 15 Jan 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_md.c
-
-move bam_nt16_nt4_table[] from bam_maqcns.c to bam_md.c
-
-------------------------------------------------------------------------
-r896 | lh3lh3 | 2011-01-06 10:52:15 -0500 (Thu, 06 Jan 2011) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/bcfutils.c
- M /trunk/samtools/bcftools/call1.c
-
- * samtools-0.1.12-10 (r896)
- * allow to exclude read groups in mpileup
-
-------------------------------------------------------------------------
-r895 | lh3lh3 | 2011-01-04 11:31:29 -0500 (Tue, 04 Jan 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.tex
-
-sorry. It is SP not ST
-
-------------------------------------------------------------------------
-r894 | lh3lh3 | 2011-01-04 11:29:06 -0500 (Tue, 04 Jan 2011) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.tex
-
-added ST
-
-------------------------------------------------------------------------
-r893 | petulda | 2011-01-04 06:55:56 -0500 (Tue, 04 Jan 2011) | 1 line
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
-
-Fixed a typo in read_samples
-------------------------------------------------------------------------
-r892 | jmarshall | 2010-12-28 08:06:49 -0500 (Tue, 28 Dec 2010) | 9 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bcftools/Makefile
- M /trunk/samtools/examples/Makefile
-
-System libraries go *after* user libraries in link commands, because
-the user libraries may themselves have dependencies that are satisfied
-by the system libraries. It's not rocket science!
-
-This makes a difference with some linkers; or with -static or --as-needed.
-
-The examples/Makefile fix is from Charles Plessy.
-See also http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=606004
-
-------------------------------------------------------------------------
-r891 | lh3lh3 | 2010-12-21 12:16:33 -0500 (Tue, 21 Dec 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/bcfutils.c
- M /trunk/samtools/bcftools/call1.c
-
- * samtools-0.1.12-9 (r891)
- * allow to call SNPs from a subset of samples
-
-------------------------------------------------------------------------
-r889 | lh3lh3 | 2010-12-15 11:28:16 -0500 (Wed, 15 Dec 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.12-12 (r889)
- * set mapQ as 20 if it equals 255
-
-------------------------------------------------------------------------
-r888 | lh3lh3 | 2010-12-14 22:41:09 -0500 (Tue, 14 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
-When -B is applied to mpileup, still use paired reads only unless -A is flagged.
-
-------------------------------------------------------------------------
-r887 | lh3lh3 | 2010-12-14 22:37:05 -0500 (Tue, 14 Dec 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.12-6 (r887)
- * added a hidden option -E to mpileup/calmd. -E triggers an alternative way to apply BAQ.
-
-------------------------------------------------------------------------
-r886 | lh3lh3 | 2010-12-14 12:51:03 -0500 (Tue, 14 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
-
-(Arguably) improved the indel caller a tiny bit for lowCov data.
-
-------------------------------------------------------------------------
-r885 | petulda | 2010-12-14 04:55:46 -0500 (Tue, 14 Dec 2010) | 1 line
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
-
-Fixed the VCF header to pass validation
-------------------------------------------------------------------------
-r884 | lh3lh3 | 2010-12-12 23:02:19 -0500 (Sun, 12 Dec 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
- * samtools-0.1.12-4 (r884)
- * fixed a long-existing flaw in the INDEL calling model
-
-------------------------------------------------------------------------
-r883 | lh3lh3 | 2010-12-11 20:05:42 -0500 (Sat, 11 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcfutils.c
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
-compute max SP and max GQ from sample genotypes
-
-------------------------------------------------------------------------
-r880 | lh3lh3 | 2010-12-10 10:50:54 -0500 (Fri, 10 Dec 2010) | 2 lines
-Changed paths:
- D /trunk/samtools/bcftools/bcf-fix.pl
-
-drop bcf-fix.pl as it is redundant by the latest changes
-
-------------------------------------------------------------------------
-r879 | lh3lh3 | 2010-12-10 10:50:29 -0500 (Fri, 10 Dec 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/vcf.c
-
- * fixed a minor issue in printing VCFs
- * write bcftools specific INFO and FORMAT in the header
-
-------------------------------------------------------------------------
-r878 | lh3lh3 | 2010-12-10 10:09:14 -0500 (Fri, 10 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/bcfutils.c
- M /trunk/samtools/bcftools/call1.c
-
-Make sure that the GT genotype field is the first
-
-------------------------------------------------------------------------
-r877 | lh3lh3 | 2010-12-08 17:27:05 -0500 (Wed, 08 Dec 2010) | 7 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.12-2 (r877)
-
- * allow to fine control the selection of indel candidates. The current
- setting is okay for lowCov and highCov with ~100 samples, but it
- skips too many indels for highCov with >250 samples.
-
-
-------------------------------------------------------------------------
-r874 | lh3lh3 | 2010-12-07 22:40:35 -0500 (Tue, 07 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
-
-a spelling error..
-
-------------------------------------------------------------------------
-r873 | lh3lh3 | 2010-12-07 22:39:57 -0500 (Tue, 07 Dec 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.12-1 (r873)
- * added a switch to allow anomalous read pairs in calling
-
-------------------------------------------------------------------------
-r872 | lh3lh3 | 2010-12-07 14:43:54 -0500 (Tue, 07 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
-fixed a bug in vcf2fq
-
-------------------------------------------------------------------------
-r869 | lh3lh3 | 2010-12-05 01:18:06 -0500 (Sun, 05 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
-
-added a warning for the Windows version
-
-------------------------------------------------------------------------
-r868 | lh3lh3 | 2010-12-05 01:05:51 -0500 (Sun, 05 Dec 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
-
-In ksprintf(), change "%lf" and "%lg" to "%f" and "%g", respectively.
-According to the manual page, this change is valid. However, MinGW seems
-to interpret "%lf" as "%Lf".
-
-------------------------------------------------------------------------
-r867 | lh3lh3 | 2010-12-05 00:35:43 -0500 (Sun, 05 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile.mingw
- M /trunk/samtools/bam_aux.c
-
-bring back the windows support
-
-------------------------------------------------------------------------
-r866 | lh3lh3 | 2010-12-04 23:33:51 -0500 (Sat, 04 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_reheader.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
-Fixed a compiling error when knetfile is not used.
-
-------------------------------------------------------------------------
-r865 | lh3lh3 | 2010-12-04 00:13:22 -0500 (Sat, 04 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
-vcf->fastq
-
-------------------------------------------------------------------------
-r864 | lh3lh3 | 2010-12-03 17:12:30 -0500 (Fri, 03 Dec 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/prob1.h
-
- * remove "-f". Instead always compute consensus quality
- * increase the upper limit of quality
-
-------------------------------------------------------------------------
-r863 | lh3lh3 | 2010-12-03 15:28:15 -0500 (Fri, 03 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
-
-more informative error message
-
-------------------------------------------------------------------------
-r862 | lh3lh3 | 2010-12-02 16:16:08 -0500 (Thu, 02 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/NEWS
- M /trunk/samtools/bamtk.c
-
-Release samtools-0.1.12a
-
-------------------------------------------------------------------------
-r861 | lh3lh3 | 2010-12-02 15:55:06 -0500 (Thu, 02 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
-
-a possible fix to DP4=0,0,0,0; have not tested, but should have no side-effect
-
-------------------------------------------------------------------------
-r859 | lh3lh3 | 2010-12-02 11:39:57 -0500 (Thu, 02 Dec 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/NEWS
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
-
-Release samtools-0.1.12
-
-------------------------------------------------------------------------
-r858 | lh3lh3 | 2010-12-02 11:24:41 -0500 (Thu, 02 Dec 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/bcf.c
-
- * samtools-0.1.11-1 (r858)
- * fixed a bug in mpileup which causes segfaults
- * bcftools: do not segfault when BCF contains errors
-
-------------------------------------------------------------------------
-r857 | lh3lh3 | 2010-11-30 23:52:50 -0500 (Tue, 30 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
-
-fixed a memory leak in bam_fetch()
-
-------------------------------------------------------------------------
-r856 | lh3lh3 | 2010-11-26 00:07:31 -0500 (Fri, 26 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
- * fixed a memory violation
- * added splitchr to vcfutils.pl
-
-------------------------------------------------------------------------
-r854 | lh3lh3 | 2010-11-23 09:05:08 -0500 (Tue, 23 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/ld.c
-
-fixed a typo/bug in r^2 computation
-
-------------------------------------------------------------------------
-r852 | lh3lh3 | 2010-11-21 22:20:20 -0500 (Sun, 21 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
-
-forget to change the version information
-
-------------------------------------------------------------------------
-r851 | lh3lh3 | 2010-11-21 22:16:52 -0500 (Sun, 21 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
- M /trunk/samtools/bcftools/bcftools.1
- M /trunk/samtools/samtools.1
-
-Release samtools-0.1.11
-
-------------------------------------------------------------------------
-r844 | lh3lh3 | 2010-11-19 23:16:08 -0500 (Fri, 19 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/prob1.h
-
- * samtools-0.1.10-9 (r844)
- * added the "folded" or reference-free mode for variant calling
-
-------------------------------------------------------------------------
-r843 | lh3lh3 | 2010-11-19 22:26:36 -0500 (Fri, 19 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/NEWS
- M /trunk/samtools/bam_sort.c
-
-In merging, if -R is specified, do not abort if the sequence dictionary is different.
-
-------------------------------------------------------------------------
-r842 | jmarshall | 2010-11-19 21:24:28 -0500 (Fri, 19 Nov 2010) | 5 lines
-Changed paths:
- M /trunk/samtools/bam_sort.c
-
-When merging BAM headers, compare the list of target reference sequences
-strictly (and fail/abort if there is a mismatch), but allow one list to be a
-prefix of the other. (i.e., check that the lists are identical up until the
-shorter runs out, and add the excess targets from the longer to the output.)
-
-------------------------------------------------------------------------
-r841 | lh3lh3 | 2010-11-19 14:49:27 -0500 (Fri, 19 Nov 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.10 (r841)
- * fixed a bug in pileup when the first CIGAR operation is D
- * fixed a bug in view with range query
-
-------------------------------------------------------------------------
-r840 | lh3lh3 | 2010-11-19 13:45:51 -0500 (Fri, 19 Nov 2010) | 10 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.10-4 (r840)
-
- * drop the MNP caller. It is slow while does not diliver too much
- benefit. Possibly I will work on it in future given more time.
-
- * there is a segfault in pileup
-
- * someone has reported segfault from view/index/sort
-
-
-------------------------------------------------------------------------
-r839 | lh3lh3 | 2010-11-18 17:30:11 -0500 (Thu, 18 Nov 2010) | 9 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.10-6 (r839)
-
- * call MNPs without realignment because it seems to me that it is not
- worthwhile to significantly slow down SNP calling.
-
- * the result looks quite different from the previous version. I have
- work to do...
-
-
-------------------------------------------------------------------------
-r838 | lh3lh3 | 2010-11-18 11:26:09 -0500 (Thu, 18 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/knetfile.c
-
-Apply a patch by Rob Davis, which improves fault detection.
-
-------------------------------------------------------------------------
-r836 | lh3lh3 | 2010-11-18 11:09:23 -0500 (Thu, 18 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
-
- * samtools-r836
- * initiate MNP realignment when the MNP has at least 0.2% frequency (otherwise too slow)
-
-------------------------------------------------------------------------
-r835 | lh3lh3 | 2010-11-18 00:25:13 -0500 (Thu, 18 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
- * modify the filtering rule: also filter SNPs around filtered indels
- * added MNP filter
-
-------------------------------------------------------------------------
-r834 | lh3lh3 | 2010-11-17 23:13:52 -0500 (Wed, 17 Nov 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.10-4 (r834)
- * fixed a silly bug in printing MNP
- * restrict to at most 1 alternative allele
-
-------------------------------------------------------------------------
-r833 | lh3lh3 | 2010-11-17 21:58:58 -0500 (Wed, 17 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bamtk.c
-
-fixed a bug in printing MNPs
-
-------------------------------------------------------------------------
-r832 | lh3lh3 | 2010-11-17 21:47:20 -0500 (Wed, 17 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
-
-minor change to how seqQ is applied
-
-------------------------------------------------------------------------
-r831 | lh3lh3 | 2010-11-17 21:41:12 -0500 (Wed, 17 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.10 (r831)
- * initial MNP caller
-
-------------------------------------------------------------------------
-r829 | lh3lh3 | 2010-11-16 23:14:15 -0500 (Tue, 16 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
- M /trunk/samtools/bamtk.c
-
-Release samtools-0.1.10 (r829)
-
-------------------------------------------------------------------------
-r828 | lh3lh3 | 2010-11-16 20:48:49 -0500 (Tue, 16 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
-
-update version information: samtools-0.1.9-20 (r828)
-
-------------------------------------------------------------------------
-r827 | lh3lh3 | 2010-11-16 15:32:50 -0500 (Tue, 16 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
-
-bcftools: allow to skip indels
-
-------------------------------------------------------------------------
-r826 | lh3lh3 | 2010-11-16 14:11:58 -0500 (Tue, 16 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
-
-remove ZQ if both BQ and ZQ are present
-
-------------------------------------------------------------------------
-r825 | lh3lh3 | 2010-11-16 13:51:33 -0500 (Tue, 16 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
-
- * samtools-0.1.9-18 (r825)
- * change the behaviour of calmd such that by default it does not change the base quality
-
-------------------------------------------------------------------------
-r824 | lh3lh3 | 2010-11-15 23:31:53 -0500 (Mon, 15 Nov 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/samtools.1
-
- * samtools-0.1.9-17 (r824)
- * added command line options to change the default parameters in indel calling
- * update the manual
-
-------------------------------------------------------------------------
-r823 | lh3lh3 | 2010-11-15 12:20:13 -0500 (Mon, 15 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-r823
- * the BQ tag is now 64 shifted, not 33 shifted
-
-------------------------------------------------------------------------
-r822 | lh3lh3 | 2010-11-15 00:30:18 -0500 (Mon, 15 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/vcfutils.pl
- M /trunk/samtools/misc/samtools.pl
-
- * samtools-0.1.9-16 (r822)
- * keep the raw depth because in indel calling, DP4 may be way off the true depth
-
-------------------------------------------------------------------------
-r821 | lh3lh3 | 2010-11-13 01:18:31 -0500 (Sat, 13 Nov 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-15 (r821)
- * calmd: write BQ
- * skip realignment if BQ is present
-
-------------------------------------------------------------------------
-r820 | lh3lh3 | 2010-11-13 01:08:26 -0500 (Sat, 13 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-14 (r820)
- * penalize reads with excessive differences in indel calling
-
-------------------------------------------------------------------------
-r819 | lh3lh3 | 2010-11-12 21:36:27 -0500 (Fri, 12 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-13 (r819)
- * fixed a bug in pileup given refskip
-
-------------------------------------------------------------------------
-r818 | lh3lh3 | 2010-11-12 13:04:53 -0500 (Fri, 12 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
-
- * samtools-r818
- * for indel calling, do two rounds of probabilistic realignments
-
-------------------------------------------------------------------------
-r817 | lh3lh3 | 2010-11-11 20:04:07 -0500 (Thu, 11 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
- * samtools-0.1.19-11 (r817)
- * only initiate indel calling when 0.2% of reads contain a gap
-
-------------------------------------------------------------------------
-r816 | lh3lh3 | 2010-11-11 01:22:59 -0500 (Thu, 11 Nov 2010) | 7 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-10 (r816)
-
- * I know why the forward method fails. it is because of zero base
- qualities. when that is fixed, the forward method seems to give
- better results than Viterbi, as it should be. I am tired...
-
-
-------------------------------------------------------------------------
-r815 | lh3lh3 | 2010-11-11 00:57:15 -0500 (Thu, 11 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam2bcf_indel.c
-
-effectively revert to the viterbi version. The forward realignment gives too many false positives.
-
-------------------------------------------------------------------------
-r814 | lh3lh3 | 2010-11-11 00:18:02 -0500 (Thu, 11 Nov 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-9 (r810)
- * use forward, instead of viterbi, for realignment
- * realignment is now quality aware
-
-------------------------------------------------------------------------
-r813 | lh3lh3 | 2010-11-10 22:45:24 -0500 (Wed, 10 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/kprobaln.c
- M /trunk/samtools/kprobaln.h
-
- * prepare to replace kaln with kprobaln in realignment
-
-------------------------------------------------------------------------
-r812 | lh3lh3 | 2010-11-10 17:28:50 -0500 (Wed, 10 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
-
-fixed a typo
-
-------------------------------------------------------------------------
-r811 | lh3lh3 | 2010-11-10 16:54:46 -0500 (Wed, 10 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.h
-
-use zlib for direct reading when BCF_LITE is in use
-
-------------------------------------------------------------------------
-r810 | lh3lh3 | 2010-11-10 16:32:13 -0500 (Wed, 10 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
-
- * do not use reads containing too many mismatches for indel calling
- * fixed a trivial bug in case of multi-allelic indels
-
-------------------------------------------------------------------------
-r809 | lh3lh3 | 2010-11-10 13:23:02 -0500 (Wed, 10 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-8 (r809)
- * fixed a bug in the indel caller
-
-------------------------------------------------------------------------
-r808 | lh3lh3 | 2010-11-10 12:24:10 -0500 (Wed, 10 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
-
-minor change to makefile
-
-------------------------------------------------------------------------
-r807 | lh3lh3 | 2010-11-10 12:10:21 -0500 (Wed, 10 Nov 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
- * samtools-0.1.9-8 (r807)
- * collect indel candidates only from specified platforms (@RG-PL)
- * merge varFilter and filter4vcf in vcfutils.pl
-
-------------------------------------------------------------------------
-r806 | lh3lh3 | 2010-11-09 22:05:46 -0500 (Tue, 09 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/prob1.h
-
-bcftools: compute equal-tail (Bayesian) credible interval
-
-------------------------------------------------------------------------
-r805 | lh3lh3 | 2010-11-09 16:28:39 -0500 (Tue, 09 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
-added a double-hit filter to avoid overestimated indel likelihood
-
-------------------------------------------------------------------------
-r804 | lh3lh3 | 2010-11-09 14:12:06 -0500 (Tue, 09 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-7 (r804)
- * fixed a bug in the gap caller
-
-------------------------------------------------------------------------
-r803 | lh3lh3 | 2010-11-09 10:45:33 -0500 (Tue, 09 Nov 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/prob1.c
-
- * samtools-0.1.9-6 (r803)
- * mpileup: apply homopolymer correction when calculating GL, instead of before
- * bcftools: apply a different prior to indels
-
-------------------------------------------------------------------------
-r802 | lh3lh3 | 2010-11-08 23:53:15 -0500 (Mon, 08 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-5 (r802)
- * relax tandem penalty. this will be made a command-line option in future.
-
-------------------------------------------------------------------------
-r801 | lh3lh3 | 2010-11-08 23:35:52 -0500 (Mon, 08 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-4 (r801)
- * fixed a minor issue in printing indel VCF
-
-------------------------------------------------------------------------
-r800 | lh3lh3 | 2010-11-08 15:28:14 -0500 (Mon, 08 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
-fixed another silly bug in mpileup's indel caller
-
-------------------------------------------------------------------------
-r799 | lh3lh3 | 2010-11-08 14:28:27 -0500 (Mon, 08 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
-
-fixed a silly bug in the indel caller
-
-------------------------------------------------------------------------
-r798 | lh3lh3 | 2010-11-08 14:07:33 -0500 (Mon, 08 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/sam_view.c
- M /trunk/samtools/samtools.1
-
-Incorporate patches by Marcel Martin for read counting.
-
-------------------------------------------------------------------------
-r797 | lh3lh3 | 2010-11-08 13:39:52 -0500 (Mon, 08 Nov 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-2 (r797)
- * mpileup: indel calling seems to be working
-
-------------------------------------------------------------------------
-r796 | lh3lh3 | 2010-11-08 10:54:46 -0500 (Mon, 08 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/kaln.c
-
-indel calling is apparently working, but more information needs to be collected
-
-------------------------------------------------------------------------
-r795 | lh3lh3 | 2010-11-08 00:39:18 -0500 (Mon, 08 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf_indel.c
-
-fixed a few bugs in the indel caller. Probably there are more.
-
-------------------------------------------------------------------------
-r794 | lh3lh3 | 2010-11-07 22:23:16 -0500 (Sun, 07 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- A /trunk/samtools/bam2bcf_indel.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/kaln.c
- M /trunk/samtools/kaln.h
-
-prepare for the indel caller. It is not ready yet.
-
-------------------------------------------------------------------------
-r793 | lh3lh3 | 2010-11-05 11:28:23 -0400 (Fri, 05 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam_plcmd.c
-
-Revert to r790. The recent changes are not good...
-
-------------------------------------------------------------------------
-r792 | lh3lh3 | 2010-11-05 00:19:14 -0400 (Fri, 05 Nov 2010) | 6 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam_plcmd.c
-
- * this revision is UNSTABLE
-
- * indel caller seems working, but it is very insensitive and has
- several things I do not quite understand.
-
-
-------------------------------------------------------------------------
-r791 | lh3lh3 | 2010-11-04 22:58:43 -0400 (Thu, 04 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam_plcmd.c
-
-for backup. no effective changes
-
-------------------------------------------------------------------------
-r790 | lh3lh3 | 2010-11-03 15:51:24 -0400 (Wed, 03 Nov 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
- M /trunk/samtools/kprobaln.c
-
-fixed a minor problem in the example coming with kprobaln.c
-
-------------------------------------------------------------------------
-r789 | lh3lh3 | 2010-11-02 15:41:27 -0400 (Tue, 02 Nov 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/kaln.c
- M /trunk/samtools/kaln.h
- A /trunk/samtools/kprobaln.c
- A /trunk/samtools/kprobaln.h
-
-Separate kaln and kprobaln as I am preparing further changes. At
-present, the results should be identical to the previous.
-
-
-------------------------------------------------------------------------
-r788 | petulda | 2010-11-02 12:19:04 -0400 (Tue, 02 Nov 2010) | 1 line
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
-
-Added -b option: read file names from a file
-------------------------------------------------------------------------
-r787 | lh3lh3 | 2010-10-29 23:17:22 -0400 (Fri, 29 Oct 2010) | 7 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.9-2 (r787)
-
- * Allow to set a maximum per-sample depth to reduce memory. However,
- BAQ computation is still applied to every read. The speed is not
- improved.
-
-
-------------------------------------------------------------------------
-r786 | lh3lh3 | 2010-10-29 12:10:40 -0400 (Fri, 29 Oct 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/vcf.c
-
- * samtools-0.1.9-1 (r786)
- * samtools: optionally perform exact test for each sample
-
-------------------------------------------------------------------------
-r785 | lh3lh3 | 2010-10-29 09:42:25 -0400 (Fri, 29 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bcftools/bcf.c
-
-Optionally output "DP", the individual read depth
-
-------------------------------------------------------------------------
-r784 | lh3lh3 | 2010-10-27 23:10:27 -0400 (Wed, 27 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/samtools.1
-
-acknowledge Petr and John who have greatly contributed to the project.
-
-------------------------------------------------------------------------
-r783 | lh3lh3 | 2010-10-27 22:47:47 -0400 (Wed, 27 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
-
-Release samtools-0.1.9 (r783)
-
-------------------------------------------------------------------------
-r782 | lh3lh3 | 2010-10-27 19:58:54 -0400 (Wed, 27 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
-
-fixed a silly bug in pileup
-
-------------------------------------------------------------------------
-r781 | lh3lh3 | 2010-10-27 14:39:48 -0400 (Wed, 27 Oct 2010) | 5 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
-
- * samtools-0.1.8-22 (r781)
- * made BAQ the default behavior of mpileup
- * updated manual
- * in merge, force to exit given inconsistent header when "-R" is not in use.
-
-------------------------------------------------------------------------
-r780 | lh3lh3 | 2010-10-27 11:01:11 -0400 (Wed, 27 Oct 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.8-21 (r780)
- * minor speedup to pileup
-
-------------------------------------------------------------------------
-r779 | lh3lh3 | 2010-10-27 09:58:56 -0400 (Wed, 27 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/examples/toy.sam
-
-improve pileup a little bit
-
-------------------------------------------------------------------------
-r778 | lh3lh3 | 2010-10-27 00:14:43 -0400 (Wed, 27 Oct 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.8-20 (r778)
- * speed up pileup, although I do not know how much is the improvement
-
-------------------------------------------------------------------------
-r777 | lh3lh3 | 2010-10-26 17:26:04 -0400 (Tue, 26 Oct 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_maqcns.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/examples/Makefile
-
- * samtools-0.1.8-19 (r777)
- * integrate mpileup features to pileup: min_baseQ, capQ, prob_realn, paired-only and biased prior
-
-------------------------------------------------------------------------
-r776 | lh3lh3 | 2010-10-26 15:27:46 -0400 (Tue, 26 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
-
-remove local realignment (probabilistic realignment is still there)
-
-------------------------------------------------------------------------
-r774 | jmarshall | 2010-10-21 06:52:38 -0400 (Thu, 21 Oct 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/sam_view.c
-
-Add the relevant filename or region to error messages, and cause a failure
-exit status where appropriate. Based on a patch provided by Marcel Martin.
-
-------------------------------------------------------------------------
-r773 | lh3lh3 | 2010-10-19 19:44:31 -0400 (Tue, 19 Oct 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/examples/toy.sam
- M /trunk/samtools/kaln.c
-
- * Minor code changes. No real effect.
- * change quality to 30 in toy.sam
-
-------------------------------------------------------------------------
-r772 | lh3lh3 | 2010-10-18 23:40:13 -0400 (Mon, 18 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/examples/toy.fa
- M /trunk/samtools/examples/toy.sam
-
-added another toy example
-
-------------------------------------------------------------------------
-r771 | lh3lh3 | 2010-10-13 23:32:12 -0400 (Wed, 13 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/ld.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
-improve the LD statistics
-
-------------------------------------------------------------------------
-r770 | lh3lh3 | 2010-10-12 23:49:26 -0400 (Tue, 12 Oct 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
- * a minor fix to the -L option
- * add ldstats to vcfutils.pl
-
-------------------------------------------------------------------------
-r769 | lh3lh3 | 2010-10-12 15:51:57 -0400 (Tue, 12 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
-
-a minor change
-
-------------------------------------------------------------------------
-r768 | lh3lh3 | 2010-10-12 15:49:06 -0400 (Tue, 12 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- A /trunk/samtools/bcftools/ld.c
-
-forget to add the key file
-
-------------------------------------------------------------------------
-r767 | lh3lh3 | 2010-10-12 15:48:46 -0400 (Tue, 12 Oct 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
- * vcfutils.pl: fixed a typo in help message
- * added APIs: bcf_append_info() and bcf_cpy()
- * calculate adjacent LD
-
-------------------------------------------------------------------------
-r766 | lh3lh3 | 2010-10-11 11:06:40 -0400 (Mon, 11 Oct 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
-added filter for samtools/bcftools genetated VCFs
-
-------------------------------------------------------------------------
-r765 | lh3lh3 | 2010-10-05 14:05:18 -0400 (Tue, 05 Oct 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
- M /trunk/samtools/kaln.c
-
- * removed a comment line in kaln.c
- * vcfutils.pl fillac works when GT is not the first field
-
-------------------------------------------------------------------------
-r764 | petulda | 2010-10-05 08:59:36 -0400 (Tue, 05 Oct 2010) | 1 line
-Changed paths:
- A /trunk/samtools/bcftools/bcf-fix.pl
-
-Convert VCF output of "bcftools view -bgcv" to a valid VCF file
-------------------------------------------------------------------------
-r763 | lh3lh3 | 2010-10-02 22:51:03 -0400 (Sat, 02 Oct 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- A /trunk/samtools/bcftools/bcftools.1
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/samtools.1
-
- * samtools-0.1.8-18 (r763)
- * added bcftools manual page
- * minor fix to mpileup and view command lines
-
-------------------------------------------------------------------------
-r762 | lh3lh3 | 2010-10-02 21:46:25 -0400 (Sat, 02 Oct 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
- * vcfutils.pl qstats: calculate marginal ts/tv
- * allow to call genotypes at variant sites
-
-------------------------------------------------------------------------
-r761 | lh3lh3 | 2010-10-01 00:29:55 -0400 (Fri, 01 Oct 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/kaln.c
- M /trunk/samtools/misc/HmmGlocal.java
-
-I am changing the gap open probability back to 0.001. It seems that
-being conservative here is a good thing...
-
-------------------------------------------------------------------------
-r760 | lh3lh3 | 2010-10-01 00:11:27 -0400 (Fri, 01 Oct 2010) | 5 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/kaln.c
- A /trunk/samtools/misc/HmmGlocal.java
-
- * samtools-0.1.8-17 (r760)
- * the default gap open penalty is too small (a typo)
- * added comments on hmm_realn
- * Java implementation
-
-------------------------------------------------------------------------
-r759 | lh3lh3 | 2010-09-30 10:12:54 -0400 (Thu, 30 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
-
-mark samtools-0.1.8-16 (r759)
-
-------------------------------------------------------------------------
-r758 | lh3lh3 | 2010-09-30 10:12:02 -0400 (Thu, 30 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
-round to the nearest integer
-
-------------------------------------------------------------------------
-r757 | lh3lh3 | 2010-09-28 17:16:43 -0400 (Tue, 28 Sep 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
-I was trying to accelerate ka_prob_glocal() as this will be the
-bottleneck. After an hour, the only gain is to change division to
-multiplication. OK. I will stop.
-
-------------------------------------------------------------------------
-r756 | lh3lh3 | 2010-09-28 16:57:49 -0400 (Tue, 28 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
-this is interesting. multiplication is much faster than division, at least on my Mac
-
-------------------------------------------------------------------------
-r755 | lh3lh3 | 2010-09-28 16:19:13 -0400 (Tue, 28 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
-minor changes
-
-------------------------------------------------------------------------
-r754 | lh3lh3 | 2010-09-28 15:44:16 -0400 (Tue, 28 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/kaln.c
-
-prob_realn() seems working!
-
-------------------------------------------------------------------------
-r753 | lh3lh3 | 2010-09-28 12:48:23 -0400 (Tue, 28 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
-minor
-
-------------------------------------------------------------------------
-r752 | lh3lh3 | 2010-09-28 12:47:41 -0400 (Tue, 28 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/kaln.c
- M /trunk/samtools/kaln.h
-
-Convert phredQ to probabilities
-
-------------------------------------------------------------------------
-r751 | lh3lh3 | 2010-09-28 12:32:08 -0400 (Tue, 28 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/kaln.c
- M /trunk/samtools/kaln.h
-
-Implement the glocal HMM; discard the extention HMM
-
-------------------------------------------------------------------------
-r750 | lh3lh3 | 2010-09-28 00:06:11 -0400 (Tue, 28 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
-improve numerical stability
-
-------------------------------------------------------------------------
-r749 | lh3lh3 | 2010-09-27 23:27:54 -0400 (Mon, 27 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
-more comments
-
-------------------------------------------------------------------------
-r748 | lh3lh3 | 2010-09-27 23:17:16 -0400 (Mon, 27 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
-fixed a bug in banded DP
-
-------------------------------------------------------------------------
-r747 | lh3lh3 | 2010-09-27 23:05:12 -0400 (Mon, 27 Sep 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
- * fixed that weird issue.
- * the banded version is NOT working
-
-------------------------------------------------------------------------
-r746 | lh3lh3 | 2010-09-27 22:57:05 -0400 (Mon, 27 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
-More comments. This version seems working, but something is a little weird...
-
-------------------------------------------------------------------------
-r745 | lh3lh3 | 2010-09-27 17:21:40 -0400 (Mon, 27 Sep 2010) | 6 lines
-Changed paths:
- M /trunk/samtools/kaln.c
-
-A little code cleanup. Now the forward and backback algorithms give
-nearly identical P(x), which means both are close to the correct
-forms. However, I have only tested on toy examples. Minor errors in
-the implementation may not be obvious.
-
-
-------------------------------------------------------------------------
-r744 | lh3lh3 | 2010-09-27 16:55:15 -0400 (Mon, 27 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/kaln.c
- M /trunk/samtools/kaln.h
-
-...
-
-------------------------------------------------------------------------
-r743 | jmarshall | 2010-09-27 08:19:06 -0400 (Mon, 27 Sep 2010) | 6 lines
-Changed paths:
- M /trunk/samtools/bam_sort.c
-
-Abort if merge -h's INH.SAM cannot be opened, just as we abort
-if any of the IN#.BAM input files cannot be opened.
-
-Also propagate any error indication returned by bam_merge_core()
-to samtools merge's exit status.
-
-------------------------------------------------------------------------
-r741 | jmarshall | 2010-09-24 11:08:24 -0400 (Fri, 24 Sep 2010) | 5 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
-
-Use bam_validate1() to detect garbage records in the event of a corrupt
-BAI index file that causes a bam_seek() to an invalid position. At most
-one record (namely, the bam_iter_read terminator) is tested per bam_fetch()
-call, so the cost is insignificant in the normal case.
-
-------------------------------------------------------------------------
-r740 | jmarshall | 2010-09-24 11:00:19 -0400 (Fri, 24 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
-
-Add bam_validate1().
-
-------------------------------------------------------------------------
-r739 | lh3lh3 | 2010-09-22 12:07:50 -0400 (Wed, 22 Sep 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.8-15 (r379)
- * allow to change capQ parameter in calmd
-
-------------------------------------------------------------------------
-r738 | jmarshall | 2010-09-22 11:15:33 -0400 (Wed, 22 Sep 2010) | 13 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/sam_view.c
-
-When bam_read1() returns an error (return value <= -2), propagate that error
-to bam_iter_read()'s own return value. Similarly, also propagate it up to
-bam_fetch()'s return value. Previously bam_fetch() always returned 0, and
-callers ignored its return value anyway. With this change, 0 continues to
-indicate success, while <= -2 (which can be written as < 0, as -1 is never
-returned) indicates corrupted input.
-
-bam_iter_read() ought also to propagate errors returned by bam_seek().
-
-main_samview() can now print an error message and fail when bam_fetch()
-detects that a .bai index file is corrupted or otherwise does not correspond
-to the .bam file it is being used with.
-
-------------------------------------------------------------------------
-r737 | jmarshall | 2010-09-22 10:47:42 -0400 (Wed, 22 Sep 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
-
-0 is a successful return value from bam_read1(). (In practice, it never
-returns 0 anyway; but all the other callers treat 0 as successful.)
-
-------------------------------------------------------------------------
-r736 | lh3lh3 | 2010-09-20 17:43:08 -0400 (Mon, 20 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bam_sort.c
-
- * merge files region-by-region. work on small examples but more tests are needed.
-
-------------------------------------------------------------------------
-r735 | lh3lh3 | 2010-09-20 16:56:24 -0400 (Mon, 20 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
-improve qstats by checking the alleles as well
-
-------------------------------------------------------------------------
-r734 | lh3lh3 | 2010-09-17 18:12:13 -0400 (Fri, 17 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
-convert UCSC SNP SQL dump to VCF
-
-------------------------------------------------------------------------
-r733 | lh3lh3 | 2010-09-17 13:02:11 -0400 (Fri, 17 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
-hapmap2vcf convertor
-
-------------------------------------------------------------------------
-r732 | lh3lh3 | 2010-09-17 10:11:37 -0400 (Fri, 17 Sep 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/vcf.c
-
- * added comments
- * VCF->BCF is not possible without knowing the sequence dictionary before hand...
-
-------------------------------------------------------------------------
-r731 | lh3lh3 | 2010-09-17 09:15:53 -0400 (Fri, 17 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/bcfutils.c
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/vcf.c
-
- * put n_smpl to "bcf1_t" to simplify API a little
-
-------------------------------------------------------------------------
-r730 | lh3lh3 | 2010-09-16 21:36:01 -0400 (Thu, 16 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/index.c
-
-fixed a bug in indexing
-
-------------------------------------------------------------------------
-r729 | lh3lh3 | 2010-09-16 16:54:48 -0400 (Thu, 16 Sep 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_pileup.c
-
- * fixed a bug in capQ
- * valgrind identifies a use of uninitialised value, but I have not fixed it.
-
-------------------------------------------------------------------------
-r728 | lh3lh3 | 2010-09-16 15:03:59 -0400 (Thu, 16 Sep 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bgzip.c
- M /trunk/samtools/razip.c
-
- * fixed a bug in razip: -c will delete the input file
- * copy tabix/bgzip to here
-
-------------------------------------------------------------------------
-r727 | lh3lh3 | 2010-09-16 13:45:49 -0400 (Thu, 16 Sep 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.8-14 (r727)
- * allow to change the capQ parameter at the command line
-
-------------------------------------------------------------------------
-r726 | lh3lh3 | 2010-09-16 13:38:43 -0400 (Thu, 16 Sep 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bcftools/vcfutils.pl
- M /trunk/samtools/misc/samtools.pl
-
- * added varFilter to vcfutils.pl
- * reimplement realn(). now it performs a local alignment
- * added cap_mapQ() to cap mapping quality when there are many substitutions
-
-------------------------------------------------------------------------
-r724 | lh3lh3 | 2010-09-15 00:18:31 -0400 (Wed, 15 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- A /trunk/samtools/bcftools/bcf2qcall.c
- M /trunk/samtools/bcftools/call1.c
-
- * convert BCF to QCALL input
-
-------------------------------------------------------------------------
-r723 | lh3lh3 | 2010-09-14 22:41:50 -0400 (Tue, 14 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
-
-dynamic band width in realignment
-
-------------------------------------------------------------------------
-r722 | lh3lh3 | 2010-09-14 22:05:32 -0400 (Tue, 14 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_plcmd.c
-
-fixed a bug in realignment
-
-------------------------------------------------------------------------
-r721 | lh3lh3 | 2010-09-14 20:54:09 -0400 (Tue, 14 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/prob1.c
-
-fixed a minor issue
-
-------------------------------------------------------------------------
-r720 | lh3lh3 | 2010-09-14 19:25:10 -0400 (Tue, 14 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_md.c
-
-fixed a bug in realignment
-
-------------------------------------------------------------------------
-r719 | lh3lh3 | 2010-09-14 19:18:24 -0400 (Tue, 14 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
-
-minor changes. It is BUGGY now!
-
-------------------------------------------------------------------------
-r718 | lh3lh3 | 2010-09-14 16:32:33 -0400 (Tue, 14 Sep 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/kaln.c
- M /trunk/samtools/kaln.h
-
- * aggressive gapped aligner is implemented in calmd.
- * distinguish gap_open and gap_end_open in banded alignment
- * make tview accepts alignment with heading and tailing D
-
-------------------------------------------------------------------------
-r717 | jmarshall | 2010-09-14 09:04:28 -0400 (Tue, 14 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools
-
-Add svn:ignore properties for generated files that don't appear in "make all".
-
-------------------------------------------------------------------------
-r716 | jmarshall | 2010-09-13 08:37:53 -0400 (Mon, 13 Sep 2010) | 3 lines
-Changed paths:
- M /trunk/samtools
- M /trunk/samtools/bcftools
- M /trunk/samtools/misc
-
-Add svn:ignore properties listing the generated files.
-(Except for *.o, which we'll assume is in global-ignores.)
-
-------------------------------------------------------------------------
-r715 | lh3lh3 | 2010-09-08 12:53:55 -0400 (Wed, 08 Sep 2010) | 5 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/sample.c
- M /trunk/samtools/sample.h
-
- * samtools-0.1.8-13 (r715)
- * fixed a bug in identifying SM across files
- * bcftools: estimate heterozygosity
- * bcftools: allow to skip sites without reference bases
-
-------------------------------------------------------------------------
-r713 | lh3lh3 | 2010-09-03 17:19:12 -0400 (Fri, 03 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/prob1.h
-
-quite a lot changes to the contrast caller, but I still feel something is missing...
-
-------------------------------------------------------------------------
-r711 | lh3lh3 | 2010-09-03 00:30:48 -0400 (Fri, 03 Sep 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/vcfutils.pl
-
- * changed 3.434 to 4.343 (typo!)
- * fixed a bug in the contrast caller
- * calculate heterozygosity
-
-------------------------------------------------------------------------
-r710 | lh3lh3 | 2010-09-01 23:24:47 -0400 (Wed, 01 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/bcfutils.c
- M /trunk/samtools/bcftools/call1.c
-
-SNP calling from the GL field
-
-------------------------------------------------------------------------
-r709 | lh3lh3 | 2010-09-01 18:52:30 -0400 (Wed, 01 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcf.c
-
-fixed another problem
-
-------------------------------------------------------------------------
-r708 | lh3lh3 | 2010-09-01 18:31:17 -0400 (Wed, 01 Sep 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/vcf.c
-
- * fixed bugs in parsing VCF
- * parser now works with GT/GQ/DP/PL/GL
-
-------------------------------------------------------------------------
-r707 | lh3lh3 | 2010-09-01 15:28:29 -0400 (Wed, 01 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- M /trunk/samtools/bcftools/prob1.c
-
-Do not compile _BCF_QUAD by default
-
-------------------------------------------------------------------------
-r706 | lh3lh3 | 2010-09-01 15:21:41 -0400 (Wed, 01 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/bcfutils.c
- M /trunk/samtools/bcftools/call1.c
-
-Write the correct ALT and PL in the SNP calling mode.
-
-------------------------------------------------------------------------
-r705 | lh3lh3 | 2010-09-01 12:50:33 -0400 (Wed, 01 Sep 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfutils.pl
-
-more commands for my own uses
-
-------------------------------------------------------------------------
-r704 | lh3lh3 | 2010-09-01 09:26:10 -0400 (Wed, 01 Sep 2010) | 2 lines
-Changed paths:
- A /trunk/samtools/bcftools/vcfutils.pl
-
-Utilities for processing VCF
-
-------------------------------------------------------------------------
-r703 | lh3lh3 | 2010-08-31 16:44:57 -0400 (Tue, 31 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/prob1.h
-
-preliminary contrast variant caller
-
-------------------------------------------------------------------------
-r702 | lh3lh3 | 2010-08-31 12:28:39 -0400 (Tue, 31 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/call1.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/prob1.h
-
-z' and z'' can be calculated
-
-------------------------------------------------------------------------
-r701 | lh3lh3 | 2010-08-31 10:20:57 -0400 (Tue, 31 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- A /trunk/samtools/bcftools/call1.c (from /trunk/samtools/bcftools/vcfout.c:699)
- M /trunk/samtools/bcftools/prob1.c
- D /trunk/samtools/bcftools/vcfout.c
-
- * rename vcfout.c as call1.c
- * prepare to add two-sample comparison
-
-------------------------------------------------------------------------
-r699 | lh3lh3 | 2010-08-24 15:28:16 -0400 (Tue, 24 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfout.c
-
-fixed a bug in calculating the t statistics
-
-------------------------------------------------------------------------
-r698 | lh3lh3 | 2010-08-24 14:05:50 -0400 (Tue, 24 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcftools/kfunc.c
- M /trunk/samtools/bcftools/vcfout.c
-
- * samtools-0.1.8-13 (r698)
- * perform one-tailed t-test for baseQ, mapQ and endDist
-
-------------------------------------------------------------------------
-r697 | lh3lh3 | 2010-08-24 12:30:13 -0400 (Tue, 24 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/kfunc.c
-
-added regularized incomplete beta function
-
-------------------------------------------------------------------------
-r695 | lh3lh3 | 2010-08-23 17:36:17 -0400 (Mon, 23 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_plcmd.c
-
-change the default correlation coefficient
-
-------------------------------------------------------------------------
-r694 | lh3lh3 | 2010-08-23 14:46:52 -0400 (Mon, 23 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/vcfout.c
-
-print QUAL as floating numbers
-
-------------------------------------------------------------------------
-r693 | lh3lh3 | 2010-08-23 14:06:07 -0400 (Mon, 23 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/examples/Makefile
- A /trunk/samtools/sample.c
- A /trunk/samtools/sample.h
-
- * samtools-0.1.8-12 (r692)
- * group data by samples in "mpileup -g"
-
-------------------------------------------------------------------------
-r692 | lh3lh3 | 2010-08-23 10:58:53 -0400 (Mon, 23 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- D /trunk/samtools/bam_mcns.c
- D /trunk/samtools/bam_mcns.h
- M /trunk/samtools/bam_plcmd.c
-
-remove VCF output in mpileup
-
-------------------------------------------------------------------------
-r691 | lh3lh3 | 2010-08-23 10:48:20 -0400 (Mon, 23 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
-
- * use the revised MAQ error model for mpileup
- * prepare to remove the independent model from mpileup
-
-------------------------------------------------------------------------
-r690 | lh3lh3 | 2010-08-20 15:46:40 -0400 (Fri, 20 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_maqcns.h
- M /trunk/samtools/bam_plcmd.c
- A /trunk/samtools/errmod.c
- A /trunk/samtools/errmod.h
- M /trunk/samtools/ksort.h
-
-added revised MAQ error model
-
-------------------------------------------------------------------------
-r689 | lh3lh3 | 2010-08-18 09:55:20 -0400 (Wed, 18 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/prob1.h
- M /trunk/samtools/bcftools/vcfout.c
-
-allow to read the prior from the error output. EM iteration is working.
-
-------------------------------------------------------------------------
-r688 | lh3lh3 | 2010-08-17 12:12:20 -0400 (Tue, 17 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/main.c
- M /trunk/samtools/bcftools/vcf.c
-
- * write a little more VCF header
- * concatenate BCFs
-
-------------------------------------------------------------------------
-r687 | lh3lh3 | 2010-08-16 20:53:16 -0400 (Mon, 16 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/bcf.tex
-
-use float for QUAL
-
-------------------------------------------------------------------------
-r686 | lh3lh3 | 2010-08-14 00:11:13 -0400 (Sat, 14 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/prob1.c
-
-faster for large sample size (in principle)
-
-------------------------------------------------------------------------
-r685 | lh3lh3 | 2010-08-13 23:28:31 -0400 (Fri, 13 Aug 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bcftools/prob1.c
-
- * a numerically stable method to calculate z_{jk}
- * currently slower than the old method but will be important for large sample size
- * in principle, we can speed up for large n, but have not tried
-
-------------------------------------------------------------------------
-r684 | lh3lh3 | 2010-08-11 21:58:31 -0400 (Wed, 11 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfout.c
-
-fixed an issue in parsing integer
-
-------------------------------------------------------------------------
-r683 | lh3lh3 | 2010-08-09 13:05:07 -0400 (Mon, 09 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
-
-do not print refname if file is converted from VCF
-
-------------------------------------------------------------------------
-r682 | lh3lh3 | 2010-08-09 12:59:47 -0400 (Mon, 09 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcf.c
-
- * parse PL
- * fixed a bug in parsing VCF
-
-------------------------------------------------------------------------
-r681 | lh3lh3 | 2010-08-09 12:49:23 -0400 (Mon, 09 Aug 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/bcfutils.c
- M /trunk/samtools/bcftools/main.c
- M /trunk/samtools/bcftools/vcf.c
- M /trunk/samtools/bcftools/vcfout.c
- M /trunk/samtools/bgzf.c
- M /trunk/samtools/kstring.c
-
- * fixed a bug in kstrtok at kstring.c
- * preliminary VCF parser (not parse everything for now)
- * improved view interface
-
-------------------------------------------------------------------------
-r680 | lh3lh3 | 2010-08-09 10:43:13 -0400 (Mon, 09 Aug 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/vcfout.c
- M /trunk/samtools/kstring.c
- M /trunk/samtools/kstring.h
-
- * improved kstring (added kstrtok)
- * removed the limit on the format string length in bcftools
- * use kstrtok to parse format which fixed a bug in the old code
-
-------------------------------------------------------------------------
-r679 | lh3lh3 | 2010-08-09 01:12:05 -0400 (Mon, 09 Aug 2010) | 2 lines
-Changed paths:
- A /trunk/samtools/bcftools/README
- M /trunk/samtools/bcftools/vcfout.c
-
-help messages
-
-------------------------------------------------------------------------
-r678 | lh3lh3 | 2010-08-09 00:01:52 -0400 (Mon, 09 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcfout.c
-
-perform single-tail test for ED4
-
-------------------------------------------------------------------------
-r677 | lh3lh3 | 2010-08-08 23:48:35 -0400 (Sun, 08 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- M /trunk/samtools/bcftools/kfunc.c
- M /trunk/samtools/bcftools/vcfout.c
-
- * test depth, end distance and HWE
-
-------------------------------------------------------------------------
-r676 | lh3lh3 | 2010-08-08 02:04:15 -0400 (Sun, 08 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/kfunc.c
-
-reimplement incomplete gamma functions. no copy-paste
-
-------------------------------------------------------------------------
-r675 | lh3lh3 | 2010-08-06 22:42:54 -0400 (Fri, 06 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bcftools/fet.c
- M /trunk/samtools/bcftools/prob1.c
- M /trunk/samtools/bcftools/prob1.h
- M /trunk/samtools/bcftools/vcfout.c
-
- * bcftools: add HWE (no testing for now)
- * record end dist in a 2x2 table, not avg, std any more
-
-------------------------------------------------------------------------
-r674 | lh3lh3 | 2010-08-06 17:30:16 -0400 (Fri, 06 Aug 2010) | 3 lines
-Changed paths:
- A /trunk/samtools/bcftools/kfunc.c
-
- * Special functions: log(gamma()), erfc(), P(a,x) (incomplete gamma)
- * Not using Numerical Recipe due to licensing issues
-
-------------------------------------------------------------------------
-r673 | lh3lh3 | 2010-08-05 23:46:53 -0400 (Thu, 05 Aug 2010) | 2 lines
-Changed paths:
- A /trunk/samtools/bcftools/fet.c
-
-Fisher's exact test
-
-------------------------------------------------------------------------
-r672 | lh3lh3 | 2010-08-05 21:48:33 -0400 (Thu, 05 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/examples/Makefile
-
- * samtools-0.1.8-11 (r672)
- * collect more stats for allele balance test in bcftools (not yet)
-
-------------------------------------------------------------------------
-r671 | lh3lh3 | 2010-08-05 16:17:58 -0400 (Thu, 05 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/main.c
-
- * the code base is stablized again.
- * I will delay the vcf parser, which is quite complicated but with little value for now
-
-------------------------------------------------------------------------
-r670 | lh3lh3 | 2010-08-05 16:03:23 -0400 (Thu, 05 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/examples/Makefile
-
-minor
-
-------------------------------------------------------------------------
-r669 | lh3lh3 | 2010-08-05 16:03:08 -0400 (Thu, 05 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcftools/vcf.c
-
-unfinished vcf parser
-
-------------------------------------------------------------------------
-r668 | lh3lh3 | 2010-08-05 15:46:40 -0400 (Thu, 05 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bcftools/Makefile
- M /trunk/samtools/bcftools/bcf.c
- M /trunk/samtools/bcftools/bcf.h
- M /trunk/samtools/bcftools/bcfutils.c
- M /trunk/samtools/bcftools/index.c
- M /trunk/samtools/bcftools/main.c
- A /trunk/samtools/bcftools/vcf.c
- M /trunk/samtools/bcftools/vcfout.c
-
- * added prelimiary VCF parser (not finished)
- * change struct a bit
-
-------------------------------------------------------------------------
-r667 | lh3lh3 | 2010-08-03 22:35:27 -0400 (Tue, 03 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bcftools/bcf.c
-
- * allow to set min base q
- * fixed a bug in mpileup -u
-
-------------------------------------------------------------------------
-r666 | lh3lh3 | 2010-08-03 22:08:44 -0400 (Tue, 03 Aug 2010) | 2 lines
-Changed paths:
- A /trunk/samtools/bcftools/bcf.tex
-
-spec
-
-------------------------------------------------------------------------
-r665 | lh3lh3 | 2010-08-03 21:18:57 -0400 (Tue, 03 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/examples/Makefile
-
-added more examples
-
-------------------------------------------------------------------------
-r664 | lh3lh3 | 2010-08-03 21:13:00 -0400 (Tue, 03 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bcftools/Makefile
-
-fixed compilation error
-
-------------------------------------------------------------------------
-r662 | lh3lh3 | 2010-08-03 21:04:00 -0400 (Tue, 03 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- D /trunk/samtools/bcf.c
- D /trunk/samtools/bcf.h
- A /trunk/samtools/bcftools
- A /trunk/samtools/bcftools/Makefile
- A /trunk/samtools/bcftools/bcf.c
- A /trunk/samtools/bcftools/bcf.h
- A /trunk/samtools/bcftools/bcfutils.c
- A /trunk/samtools/bcftools/index.c
- A /trunk/samtools/bcftools/main.c
- A /trunk/samtools/bcftools/prob1.c
- A /trunk/samtools/bcftools/prob1.h
- A /trunk/samtools/bcftools/vcfout.c
-
-move bcftools to samtools
-
-------------------------------------------------------------------------
-r660 | lh3lh3 | 2010-08-03 15:58:32 -0400 (Tue, 03 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
-
-fixed another minor bug
-
-------------------------------------------------------------------------
-r658 | lh3lh3 | 2010-08-03 15:06:45 -0400 (Tue, 03 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bcf.c
-
- * samtools-0.1.8-10 (r658)
- * fixed a bug in bam2bcf when the reference is N
-
-------------------------------------------------------------------------
-r657 | lh3lh3 | 2010-08-03 14:50:23 -0400 (Tue, 03 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
-
- * fixed a bug
- * treat ambiguous ref base as the fifth base
-
-------------------------------------------------------------------------
-r654 | lh3lh3 | 2010-08-02 17:38:27 -0400 (Mon, 02 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/bcftools/bcf.c
- M /trunk/samtools/bcf.c
-
-missing a column in VCF output...
-
-------------------------------------------------------------------------
-r653 | lh3lh3 | 2010-08-02 17:31:33 -0400 (Mon, 02 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcf.c
-
-fixed a memory leak
-
-------------------------------------------------------------------------
-r651 | lh3lh3 | 2010-08-02 17:27:31 -0400 (Mon, 02 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bcf.c
-
-fixed a bug in bcf reader
-
-------------------------------------------------------------------------
-r650 | lh3lh3 | 2010-08-02 17:00:41 -0400 (Mon, 02 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam2bcf.c
-
-fixed a bug
-
-------------------------------------------------------------------------
-r649 | lh3lh3 | 2010-08-02 16:49:35 -0400 (Mon, 02 Aug 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam2bcf.c
- M /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.8-9 (r649)
- * lossless representation of PL in BCF output
-
-------------------------------------------------------------------------
-r648 | lh3lh3 | 2010-08-02 16:07:25 -0400 (Mon, 02 Aug 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- A /trunk/samtools/bam2bcf.c
- A /trunk/samtools/bam2bcf.h
- M /trunk/samtools/bam_plcmd.c
- A /trunk/samtools/bcf.c
- A /trunk/samtools/bcf.h
-
-Generate binary VCF
-
-------------------------------------------------------------------------
-r644 | lh3lh3 | 2010-07-28 11:59:19 -0400 (Wed, 28 Jul 2010) | 5 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.8-8 (r644)
- * mpileup becomes a little stable again
- * the method is slightly different, but is more theoretically correct
- * snp calling is O(n^2) instead of O(n^3)
-
-------------------------------------------------------------------------
-r643 | lh3lh3 | 2010-07-28 11:54:15 -0400 (Wed, 28 Jul 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
-
- * fixed a STUPID bug, which cost me a lot of time.
- * I am going to clean up mcns a little bit
-
-------------------------------------------------------------------------
-r642 | lh3lh3 | 2010-07-27 23:23:07 -0400 (Tue, 27 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bam_mcns.h
- M /trunk/samtools/bam_plcmd.c
-
-supposedly this is THE correct implementation, but more testing is needed
-
-------------------------------------------------------------------------
-r641 | lh3lh3 | 2010-07-27 22:43:39 -0400 (Tue, 27 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
-
-NOT ready yet. Going to make further changes...
-
-------------------------------------------------------------------------
-r639 | lh3lh3 | 2010-07-25 22:18:38 -0400 (Sun, 25 Jul 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.8-7 (r639)
- * fixed the reference allele assignment
-
-------------------------------------------------------------------------
-r638 | lh3lh3 | 2010-07-25 12:01:26 -0400 (Sun, 25 Jul 2010) | 5 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bam_mcns.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.8-6 (r638)
- * skip isnan/isinf in case of float underflow
- * added the flat prior
- * fixed an issue where there are no reads supporting the reference
-
-------------------------------------------------------------------------
-r637 | lh3lh3 | 2010-07-24 14:16:27 -0400 (Sat, 24 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
-
-minor changes
-
-------------------------------------------------------------------------
-r636 | lh3lh3 | 2010-07-24 14:07:27 -0400 (Sat, 24 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bam_mcns.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
-minor tweaks
-
-------------------------------------------------------------------------
-r635 | lh3lh3 | 2010-07-24 01:49:49 -0400 (Sat, 24 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bam_mcns.h
- M /trunk/samtools/bam_plcmd.c
-
-posterior expectation FINALLY working. I am so tired...
-
-------------------------------------------------------------------------
-r633 | lh3lh3 | 2010-07-23 13:50:48 -0400 (Fri, 23 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
-
-another minor fix to mpileup
-
-------------------------------------------------------------------------
-r632 | lh3lh3 | 2010-07-23 13:43:31 -0400 (Fri, 23 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
-
-added the format column
-
-------------------------------------------------------------------------
-r631 | lh3lh3 | 2010-07-23 13:25:44 -0400 (Fri, 23 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bam_mcns.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
-added an alternative prior
-
-------------------------------------------------------------------------
-r628 | lh3lh3 | 2010-07-23 11:48:51 -0400 (Fri, 23 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bam_mcns.h
- M /trunk/samtools/bam_plcmd.c
-
-calculate posterior allele frequency
-
-------------------------------------------------------------------------
-r627 | lh3lh3 | 2010-07-22 21:39:13 -0400 (Thu, 22 Jul 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.8-3 (r627)
- * multi-sample snp calling appears to work. More tests needed.
-
-------------------------------------------------------------------------
-r626 | lh3lh3 | 2010-07-22 16:37:56 -0400 (Thu, 22 Jul 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bam_mcns.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bam_tview.c
-
- * preliminary multisample SNP caller.
- * something looks not so right, but it largely works
-
-------------------------------------------------------------------------
-r617 | lh3lh3 | 2010-07-14 16:26:27 -0400 (Wed, 14 Jul 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_mcns.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.8-2 (r617)
- * allele frequency calculation apparently works...
-
-------------------------------------------------------------------------
-r616 | lh3lh3 | 2010-07-14 13:33:51 -0400 (Wed, 14 Jul 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile
- A /trunk/samtools/bam_mcns.c
- A /trunk/samtools/bam_mcns.h
- M /trunk/samtools/bam_plcmd.c
-
- * added mutli-sample framework. It is not working, yet.
- * improved the mpileup interface
-
-------------------------------------------------------------------------
-r615 | lh3lh3 | 2010-07-13 14:50:12 -0400 (Tue, 13 Jul 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/Makefile
-
- * samtools-0.1.8-1 (r615)
- * allow to get mpileup at required sites
-
-------------------------------------------------------------------------
-r613 | lh3lh3 | 2010-07-11 22:40:56 -0400 (Sun, 11 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
-
-Release samtools-0.1.8
-
-------------------------------------------------------------------------
-r612 | lh3lh3 | 2010-07-11 21:08:56 -0400 (Sun, 11 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/knetfile.c
-
-fixed a compiling issue for Windows
-
-------------------------------------------------------------------------
-r611 | lh3lh3 | 2010-07-11 20:59:15 -0400 (Sun, 11 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_sort.c
-
-fixed a bug in sorting when output to stdout (by Peter Chines)
-
-------------------------------------------------------------------------
-r610 | lh3lh3 | 2010-07-09 17:05:10 -0400 (Fri, 09 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/NEWS
- M /trunk/samtools/bam_plcmd.c
-
-change the command line option of pileup
-
-------------------------------------------------------------------------
-r609 | lh3lh3 | 2010-07-09 00:39:34 -0400 (Fri, 09 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_pileup.c
- A /trunk/samtools/examples/toy.fa
- A /trunk/samtools/examples/toy.sam
-
-make pileup work with CIGAR with I/D at the beginning or in the end
-
-------------------------------------------------------------------------
-r608 | lh3lh3 | 2010-07-08 22:36:12 -0400 (Thu, 08 Jul 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_maqcns.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bam_tview.c
-
- * make tview more friendly
- * a temporary remedy for an issue in indel calling
-
-------------------------------------------------------------------------
-r607 | lh3lh3 | 2010-07-08 14:43:52 -0400 (Thu, 08 Jul 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-r607
- * improved the genotype accuracy for indels
- * use the SOAPsnp model for SNP calling by default.
-
-------------------------------------------------------------------------
-r606 | lh3lh3 | 2010-07-08 01:05:19 -0400 (Thu, 08 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/Makefile
-
-removed a debugging example
-
-------------------------------------------------------------------------
-r605 | lh3lh3 | 2010-07-08 01:04:09 -0400 (Thu, 08 Jul 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bamtk.c
-
- * samtools-.1.7-18 (r605)
- * fixed an issue when a deletion and mismatch occur at the same time
- and the base quality is higher than 40 (if -I40).
-
-------------------------------------------------------------------------
-r604 | lh3lh3 | 2010-07-02 19:32:24 -0400 (Fri, 02 Jul 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/misc/Makefile
-
-fixed a minor bug in idxstats
-
-------------------------------------------------------------------------
-r601 | lh3lh3 | 2010-06-16 09:03:59 -0400 (Wed, 16 Jun 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
-
-fixed a minor bug in indexing
-
-------------------------------------------------------------------------
-r600 | lh3lh3 | 2010-06-15 10:17:53 -0400 (Tue, 15 Jun 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam.c
-
-change printf() to puts in exporting
-
-------------------------------------------------------------------------
-r599 | lh3lh3 | 2010-06-13 21:41:11 -0400 (Sun, 13 Jun 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
-
-minor fix. No actual effect.
-
-------------------------------------------------------------------------
-r598 | lh3lh3 | 2010-06-13 21:32:45 -0400 (Sun, 13 Jun 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
-
-added Makefile targets to compile shared/dynamic library
-
-------------------------------------------------------------------------
-r596 | lh3lh3 | 2010-06-13 19:48:07 -0400 (Sun, 13 Jun 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-17 (r596)
- * also keep the number of coor-less reads in the index file
-
-------------------------------------------------------------------------
-r595 | lh3lh3 | 2010-06-13 18:54:26 -0400 (Sun, 13 Jun 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-16 (r595)
- * write additional information to bam index
-
-------------------------------------------------------------------------
-r594 | lh3lh3 | 2010-06-13 17:29:52 -0400 (Sun, 13 Jun 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
-
-fixed a bug for unmapped sequences in indexing
-
-------------------------------------------------------------------------
-r593 | lh3lh3 | 2010-06-12 18:11:32 -0400 (Sat, 12 Jun 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/samtools.1
-
-rename iterf as iter
-
-------------------------------------------------------------------------
-r592 | lh3lh3 | 2010-06-12 18:02:38 -0400 (Sat, 12 Jun 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-15 (r592)
- * fixed a few minor memory leaks in the new pileup code
- * improved the functionality of mpileup
-
-------------------------------------------------------------------------
-r591 | lh3lh3 | 2010-06-12 14:09:22 -0400 (Sat, 12 Jun 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-14 (r591)
- * elementary multi-way pileup. More testing and more functionality to be done.
-
-------------------------------------------------------------------------
-r590 | lh3lh3 | 2010-06-12 01:00:24 -0400 (Sat, 12 Jun 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-13 (r590)
- * added mpileup APIs. No compiling errors, but not tested at all. It is late.
-
-------------------------------------------------------------------------
-r589 | lh3lh3 | 2010-06-11 22:37:09 -0400 (Fri, 11 Jun 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-12 (r589)
- * added iterator-like APIs for pileup
-
-------------------------------------------------------------------------
-r588 | lh3lh3 | 2010-06-11 17:41:13 -0400 (Fri, 11 Jun 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-11 (r588)
- * ported a few improvements from tabix back to samtools
-
-------------------------------------------------------------------------
-r587 | lh3lh3 | 2010-06-11 17:33:16 -0400 (Fri, 11 Jun 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-10 (r587)
- * added iterator interface for bam_fetch (ported back from tabix)
-
-------------------------------------------------------------------------
-r586 | lh3lh3 | 2010-06-11 13:23:53 -0400 (Fri, 11 Jun 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile
- A /trunk/samtools/bam_reheader.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bgzf.c
-
- * samtools-0.1.7-9 (r586)
- * added "reheader" to replace the BAM header
-
-------------------------------------------------------------------------
-r585 | lh3lh3 | 2010-06-11 12:22:06 -0400 (Fri, 11 Jun 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/kstring.h
-
- * samtools-0.1.7-8 (r585)
- * speed up "view"
-
-------------------------------------------------------------------------
-r584 | lh3lh3 | 2010-06-11 12:00:41 -0400 (Fri, 11 Jun 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bgzf.c
- M /trunk/samtools/bgzf.h
- M /trunk/samtools/kstring.h
- M /trunk/samtools/misc/wgsim_eval.pl
-
- * samtools-0.1.7-7 (r584)
- * ported tabix BGZF to samtools
- * flush BGZF after writing the BAM header and between alignment boundaries
-
-------------------------------------------------------------------------
-r583 | petulda | 2010-06-11 11:58:20 -0400 (Fri, 11 Jun 2010) | 1 line
-Changed paths:
- A /trunk/samtools/misc/varfilter.py
-
-Initial release on behalf of Aylwyn Scally
-------------------------------------------------------------------------
-r561 | petulda | 2010-05-07 08:41:56 -0400 (Fri, 07 May 2010) | 1 line
-Changed paths:
- M /trunk/samtools/samtools.1
-
-Added a note about the indels coordinates
-------------------------------------------------------------------------
-r551 | petulda | 2010-04-23 09:42:13 -0400 (Fri, 23 Apr 2010) | 1 line
-Changed paths:
- M /trunk/samtools/misc/sam2vcf.pl
-
-Added the possibility to print or not to print the reference allele
-------------------------------------------------------------------------
-r546 | petulda | 2010-04-15 04:33:55 -0400 (Thu, 15 Apr 2010) | 1 line
-Changed paths:
- M /trunk/samtools/sam_header.c
-
-More descriptive message for space separated tags
-------------------------------------------------------------------------
-r545 | petulda | 2010-04-14 11:44:50 -0400 (Wed, 14 Apr 2010) | 1 line
-Changed paths:
- M /trunk/samtools/misc/sam2vcf.pl
-
-Speedup with -i, no need to query the reference all the time
-------------------------------------------------------------------------
-r541 | petulda | 2010-03-15 10:03:51 -0400 (Mon, 15 Mar 2010) | 1 line
-Changed paths:
- M /trunk/samtools/sam_header.c
-
-Fixed the order of sequences in the header
-------------------------------------------------------------------------
-r540 | petulda | 2010-03-04 06:28:35 -0500 (Thu, 04 Mar 2010) | 1 line
-Changed paths:
- M /trunk/samtools/misc/sam2vcf.pl
-
-Added possibility to select indels only and fixed a bug in reporting homozygous indels.
-------------------------------------------------------------------------
-r539 | jmarshall | 2010-02-27 06:48:17 -0500 (Sat, 27 Feb 2010) | 4 lines
-Changed paths:
- M /trunk/samtools/bam.c
-
-Improve the invalid 'BAM\1' magic number error message, and also print it
-when no bytes can be read from the alleged BAM file, e.g., in the common
-user error case when a SAM file has accidentally been supplied.
-
-------------------------------------------------------------------------
-r538 | petulda | 2010-02-26 10:51:40 -0500 (Fri, 26 Feb 2010) | 1 line
-Changed paths:
- M /trunk/samtools/AUTHORS
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/sam_header.c
-
-Improved efficiency of header parsing
-------------------------------------------------------------------------
-r537 | lh3lh3 | 2010-02-23 21:08:48 -0500 (Tue, 23 Feb 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/export2sam.pl
-
-Updated export2sam.pl by Chris Saunders from Illumina.
-
-
-------------------------------------------------------------------------
-r536 | petulda | 2010-02-17 08:32:53 -0500 (Wed, 17 Feb 2010) | 1 line
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
-Fixed filtering of SNPs near indels. Added min indel and SNP quality filter.
-------------------------------------------------------------------------
-r535 | petulda | 2010-02-12 04:52:37 -0500 (Fri, 12 Feb 2010) | 1 line
-Changed paths:
- M /trunk/samtools/misc/sam2vcf.pl
-
-Print an error for pileups in simple format
-------------------------------------------------------------------------
-r534 | lh3lh3 | 2010-02-11 14:01:41 -0500 (Thu, 11 Feb 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
-
-added a hidden option in pileup to output the base position (for Erin)
-
-------------------------------------------------------------------------
-r533 | petulda | 2010-02-09 10:12:14 -0500 (Tue, 09 Feb 2010) | 1 line
-Changed paths:
- M /trunk/samtools/misc/sam2vcf.pl
-
-Added possibility to specify a custom column title for the data column
-------------------------------------------------------------------------
-r532 | petulda | 2010-02-09 09:46:09 -0500 (Tue, 09 Feb 2010) | 1 line
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
-
-Added the -d option to limit maximum depth for indels.
-------------------------------------------------------------------------
-r531 | petulda | 2010-02-03 07:57:27 -0500 (Wed, 03 Feb 2010) | 1 line
-Changed paths:
- M /trunk/samtools/misc/sam2vcf.pl
-
-Added VCF header
-------------------------------------------------------------------------
-r530 | lh3lh3 | 2010-02-01 09:13:19 -0500 (Mon, 01 Feb 2010) | 3 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/samtools.pl
- M /trunk/samtools/misc/wgsim.c
-
- * samtools-0.1.7-6
- * fixed a bug in faidx
-
-------------------------------------------------------------------------
-r529 | jmarshall | 2010-01-11 18:51:49 -0500 (Mon, 11 Jan 2010) | 2 lines
-Changed paths:
- M /trunk/samtools/faidx.c
-
-Put the right filename in the error message.
-
-------------------------------------------------------------------------
-r528 | lh3lh3 | 2009-12-14 11:26:47 -0500 (Mon, 14 Dec 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-5 (r528)
- * further add new consensus generation strategy
-
-------------------------------------------------------------------------
-r527 | petulda | 2009-12-11 12:31:05 -0500 (Fri, 11 Dec 2009) | 1 line
-Changed paths:
- M /trunk/samtools/knetfile.c
-
-Fixed a bug in knet_seek
-------------------------------------------------------------------------
-r526 | petulda | 2009-12-11 07:51:18 -0500 (Fri, 11 Dec 2009) | 1 line
-Changed paths:
- M /trunk/samtools/misc/sam2vcf.pl
-
-Small fix in VCF format: dot for the empty INFO field
-------------------------------------------------------------------------
-r525 | petulda | 2009-12-11 04:36:18 -0500 (Fri, 11 Dec 2009) | 1 line
-Changed paths:
- M /trunk/samtools/sam_header.c
-
-Allow tabs in the CO header field
-------------------------------------------------------------------------
-r524 | jmarshall | 2009-12-10 10:03:58 -0500 (Thu, 10 Dec 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/Makefile.mingw
-
-Depend on libbam.a rather than the phony target, so that samtools is not
-unnecessarily rebuilt every time. Also clean bgzip.
-
-------------------------------------------------------------------------
-r523 | jmarshall | 2009-12-10 09:45:32 -0500 (Thu, 10 Dec 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/Makefile.mingw
-
-Fix a bug in compiling bgzip: this also needs knetfile.o when _USE_KNETFILE
-is defined. Also introduce $(KNETFILE_O) which can be set to empty to
-facilitate non-knet builds.
-
-------------------------------------------------------------------------
-r522 | lh3lh3 | 2009-12-01 13:02:36 -0500 (Tue, 01 Dec 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.7-4 (r522)
- * fixed a bug in "view -r"
- * added a new option "view -R" to read required read groups from a file
-
-------------------------------------------------------------------------
-r521 | lh3lh3 | 2009-12-01 10:00:12 -0500 (Tue, 01 Dec 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.7-3 (r521)
- * calmd: optionally mask matching bases as N
-
-------------------------------------------------------------------------
-r520 | lh3lh3 | 2009-12-01 09:37:17 -0500 (Tue, 01 Dec 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/samtools.pl
-
- * samtools-0.1.7-2 (r520)
- * fixed a few issues with compilation in Windows (on behalf of John)
- * choose a random base as the consensus (for population genetics studies)
-
-------------------------------------------------------------------------
-r519 | jmarshall | 2009-11-30 10:53:02 -0500 (Mon, 30 Nov 2009) | 6 lines
-Changed paths:
- M /trunk/samtools/Makefile
-
-Put libraries at the end, so they can resolve references from libbam.a
-as well, even with old-fashioned linkers.
-
-Also use libbam.a explicitly rather than "-L. -lbam" to ensure that we get
-the freshly built library, not some other libbam.a lying around the system.
-
-------------------------------------------------------------------------
-r518 | jmarshall | 2009-11-30 08:44:56 -0500 (Mon, 30 Nov 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/misc/Makefile
-
-Also clean *.exe (for Cygwin users using this makefile).
-
-------------------------------------------------------------------------
-r517 | jmarshall | 2009-11-30 07:09:04 -0500 (Mon, 30 Nov 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
-
-Index files should be opened in binary mode, not text mode.
-
-------------------------------------------------------------------------
-r516 | lh3lh3 | 2009-11-27 15:18:59 -0500 (Fri, 27 Nov 2009) | 2 lines
-Changed paths:
- A /trunk/samtools/examples/bam2bed.c
-
-another example program
-
-------------------------------------------------------------------------
-r515 | lh3lh3 | 2009-11-27 10:44:56 -0500 (Fri, 27 Nov 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/wgsim_eval.pl
- M /trunk/samtools/sam.c
-
- * samtools-0.1.7-1 (r515)
- * report an error when .fai contains duplicated names, instead of segfault
-
-------------------------------------------------------------------------
-r514 | jmarshall | 2009-11-24 09:45:35 -0500 (Tue, 24 Nov 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bam.c
-
-Format 'c'-encoded auxiliary fields correctly, as *signed* integers.
-
-------------------------------------------------------------------------
-r513 | lh3lh3 | 2009-11-16 10:13:07 -0500 (Mon, 16 Nov 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile.mingw
-
-Update Makefile.mingw for the same reason
-
-------------------------------------------------------------------------
-r512 | lh3lh3 | 2009-11-16 10:00:08 -0500 (Mon, 16 Nov 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
-
-Fixed a bug in compiling razip
-
-------------------------------------------------------------------------
-r510 | lh3lh3 | 2009-11-10 10:55:41 -0500 (Tue, 10 Nov 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
-
-Release samtools-0.1.7 (r510)
-
-------------------------------------------------------------------------
-r509 | lh3lh3 | 2009-11-06 09:17:09 -0500 (Fri, 06 Nov 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.6-22 (r509)
- * forget to fix a similar problem in glfgen
-
-------------------------------------------------------------------------
-r508 | lh3lh3 | 2009-11-06 09:06:40 -0500 (Fri, 06 Nov 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.6-21 (r508)
- * fixed a potential bug in the indel caller towards the end of a chromosome
-
-------------------------------------------------------------------------
-r494 | lh3lh3 | 2009-10-26 11:38:00 -0400 (Mon, 26 Oct 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.6-19 (r494)
- * allow to convert Illumina quality (64 based) to the BAM quality
-
-------------------------------------------------------------------------
-r493 | lh3lh3 | 2009-10-26 10:24:39 -0400 (Mon, 26 Oct 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam_header.c
-
- * samtools-0.1.6-18 (r493)
- * fixed the bugs due to improperly incorporating Petr's header parser
- * a little code clean up in sam_header.c
-
-------------------------------------------------------------------------
-r492 | petulda | 2009-10-24 09:43:25 -0400 (Sat, 24 Oct 2009) | 1 line
-Changed paths:
- M /trunk/samtools/sam_header.c
-
-Added sam_header_line_free call for sam_header_parse2
-------------------------------------------------------------------------
-r491 | lh3lh3 | 2009-10-24 00:50:16 -0400 (Sat, 24 Oct 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/sam_view.c
-
- * BUGGY VERSION
- * fixed a minor bug
-
-------------------------------------------------------------------------
-r490 | lh3lh3 | 2009-10-24 00:45:12 -0400 (Sat, 24 Oct 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/sam.c
-
- * BUGGY VERSION
- * improved the interface a bit
- * bug unfixed
-
-------------------------------------------------------------------------
-r489 | lh3lh3 | 2009-10-24 00:41:50 -0400 (Sat, 24 Oct 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/sam_header.c
- M /trunk/samtools/sam_header.h
-
- * BUGGY VERSION. Please NOT use it.
- * Fixed a minor bug, but the major bug is still there.
-
-------------------------------------------------------------------------
-r488 | lh3lh3 | 2009-10-24 00:17:10 -0400 (Sat, 24 Oct 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_rmdup.c
- M /trunk/samtools/bam_rmdupse.c
- M /trunk/samtools/kaln.c
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam_header.c
- M /trunk/samtools/sam_header.h
- M /trunk/samtools/sam_view.c
-
- * This revision is SERIOUSLY BUGGY. Please NOT use it.
- * Start to incorporate header parsing from Petr Danecek
-
-------------------------------------------------------------------------
-r487 | petulda | 2009-10-23 11:44:32 -0400 (Fri, 23 Oct 2009) | 1 line
-Changed paths:
- M /trunk/samtools/sam_header.c
- M /trunk/samtools/sam_header.h
-
-Now possible to merge multiple HeaderDict dictionaries
-------------------------------------------------------------------------
-r486 | petulda | 2009-10-22 11:46:58 -0400 (Thu, 22 Oct 2009) | 1 line
-Changed paths:
- M /trunk/samtools/sam_header.c
-
-
-------------------------------------------------------------------------
-r485 | petulda | 2009-10-22 11:41:56 -0400 (Thu, 22 Oct 2009) | 1 line
-Changed paths:
- A /trunk/samtools/sam_header.c
- A /trunk/samtools/sam_header.h
-
-
-------------------------------------------------------------------------
-r484 | lh3lh3 | 2009-10-19 14:31:32 -0400 (Mon, 19 Oct 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_rmdupse.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/examples/Makefile
-
- * samtools-0.1.6-17 (r484)
- * fixed a memory leak in rmdupse
- * fixed a bug in parsing @RG header lines
- * test rmdup in examples/
-
-------------------------------------------------------------------------
-r483 | lh3lh3 | 2009-10-19 13:22:48 -0400 (Mon, 19 Oct 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_rmdup.c
- M /trunk/samtools/bam_rmdupse.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.6-16 (r483)
- * unify the interface of rmdup and rmdupse
- * a new bug found in rg2lib(). Have not been fixed yet.
-
-------------------------------------------------------------------------
-r482 | lh3lh3 | 2009-10-19 13:03:34 -0400 (Mon, 19 Oct 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_rmdup.c
- M /trunk/samtools/bam_rmdupse.c
- M /trunk/samtools/bamtk.c
- A /trunk/samtools/klist.h
-
- * samtools-0.1.6-15 (r482)
- * rewrite rmdupse
- * rmdupse is now library aware
-
-------------------------------------------------------------------------
-r481 | lh3lh3 | 2009-10-18 00:07:21 -0400 (Sun, 18 Oct 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_rmdup.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.6-14 (r480)
- * rmdup is now RG aware
-
-------------------------------------------------------------------------
-r480 | lh3lh3 | 2009-10-17 22:05:20 -0400 (Sat, 17 Oct 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
-added a small unitity to parse SRA XML files
-
-------------------------------------------------------------------------
-r479 | lh3lh3 | 2009-10-17 20:57:26 -0400 (Sat, 17 Oct 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_maqcns.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.6-13 (r479)
- * merge: optionally use file names as RG tags
-
-------------------------------------------------------------------------
-r478 | lh3lh3 | 2009-10-14 14:18:12 -0400 (Wed, 14 Oct 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/kaln.c
-
- * samtools-0.1.6-12 (r478)
- * fixed a bug in the indel caller
-
-------------------------------------------------------------------------
-r477 | lh3lh3 | 2009-10-10 06:12:26 -0400 (Sat, 10 Oct 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.6-11 (r477)
- * fixed a bug due to recent change in bam_index.c (thank Nicole Washington for the patch)
-
-------------------------------------------------------------------------
-r476 | petulda | 2009-10-09 11:45:36 -0400 (Fri, 09 Oct 2009) | 1 line
-Changed paths:
- A /trunk/samtools/misc/sam2vcf.pl
-
-Added the sam2vcf.pl script.
-------------------------------------------------------------------------
-r475 | lh3lh3 | 2009-10-08 10:19:16 -0400 (Thu, 08 Oct 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bamtk.c
- A /trunk/samtools/kaln.c
- A /trunk/samtools/kaln.h
-
-Unfinished modification. Please do not use this revision...
-
-------------------------------------------------------------------------
-r474 | petulda | 2009-10-08 06:39:54 -0400 (Thu, 08 Oct 2009) | 1 line
-Changed paths:
- M /trunk/samtools/knetfile.c
-
-Removed the offending knet_seek message.
-------------------------------------------------------------------------
-r473 | petulda | 2009-10-06 09:26:35 -0400 (Tue, 06 Oct 2009) | 1 line
-Changed paths:
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/razf.c
-
-Bug fix - faidx on RAZF compressed files now working.
-------------------------------------------------------------------------
-r472 | lh3lh3 | 2009-10-02 08:42:57 -0400 (Fri, 02 Oct 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/samtools.1
-
-Clarify the meaning of a region like "chr2:1,000,000".
-
-------------------------------------------------------------------------
-r471 | lh3lh3 | 2009-10-02 05:42:19 -0400 (Fri, 02 Oct 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/novo2sam.pl
-
-Fixed minor bugs in novo2sam.pl (on behalf of Ken Chen and Colin Hercus)
-
-------------------------------------------------------------------------
-r470 | lh3lh3 | 2009-09-29 15:01:27 -0400 (Tue, 29 Sep 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile.mingw
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/knetfile.h
-
- * samtools-0.1.6-9 (r470)
- * make knetfile.c compatible with MinGW (thank Martin Morgan for the patch)
-
-------------------------------------------------------------------------
-r469 | lh3lh3 | 2009-09-29 08:07:44 -0400 (Tue, 29 Sep 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.6-9 (r469)
- * refactor bam_fetch() for Python binding. On behalf of Leo Goodstadt.
-
-------------------------------------------------------------------------
-r468 | lh3lh3 | 2009-09-28 05:18:29 -0400 (Mon, 28 Sep 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/samtools.pl
-
- * samtools-0.1.6-7 (r468)
- * make merge stable
-
-------------------------------------------------------------------------
-r467 | petulda | 2009-09-28 04:51:29 -0400 (Mon, 28 Sep 2009) | 1 line
-Changed paths:
- M /trunk/samtools/bgzf.c
- M /trunk/samtools/bgzip.c
- M /trunk/samtools/razf.c
- M /trunk/samtools/razip.c
-
-Changed the mode for newly created files to 0666. This allows less strict permissions with umask properly set (e.g. 0002 vs. 0022).
-------------------------------------------------------------------------
-r466 | lh3lh3 | 2009-09-24 06:29:19 -0400 (Thu, 24 Sep 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.6-6 (r466)
- * do not crash calmd when some sequences are absent from the reference.
-
-------------------------------------------------------------------------
-r464 | jmarshall | 2009-09-23 06:14:32 -0400 (Wed, 23 Sep 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/knetfile.c
-
-Suppress bgzf_check_EOF() messages when reading from a pipe, as there is
-no way to seek on a pipe and the messages always appear.
-
-------------------------------------------------------------------------
-r463 | petulda | 2009-09-16 07:05:41 -0400 (Wed, 16 Sep 2009) | 1 line
-Changed paths:
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/razf.c
-
-A bug fix, "samtools view" is now working again.
-------------------------------------------------------------------------
-r462 | lh3lh3 | 2009-09-16 04:51:07 -0400 (Wed, 16 Sep 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/faidx.c
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/knetfile.h
- M /trunk/samtools/razf.c
- M /trunk/samtools/razf.h
-
- * samtools-0.1.6-5 (r462)
- * Added knetfile support in razf and faidx (on behalf of Petr Danecek)
-
-------------------------------------------------------------------------
-r460 | lh3lh3 | 2009-09-09 07:06:22 -0400 (Wed, 09 Sep 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/samtools.1
-
-fixed a formatting issue
-
-------------------------------------------------------------------------
-r459 | lh3lh3 | 2009-09-08 18:14:08 -0400 (Tue, 08 Sep 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.6-4 (r459)
- * make sort output the result to stdout when -o is in use
-
-------------------------------------------------------------------------
-r458 | lh3lh3 | 2009-09-07 05:10:28 -0400 (Mon, 07 Sep 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/faidx.c
- M /trunk/samtools/faidx.h
- M /trunk/samtools/samtools.1
-
- * samtools-0.1.6-2 (r458)
- * added more interface to faidx (by Nils)
- * updated documentation
-
-------------------------------------------------------------------------
-r457 | lh3lh3 | 2009-09-05 16:12:04 -0400 (Sat, 05 Sep 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.6-2 (r457)
- * get rid of three assert() in bam_sort.c
-
-------------------------------------------------------------------------
-r456 | jmarshall | 2009-09-04 12:46:25 -0400 (Fri, 04 Sep 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/razf.c
-
-Return NULL from _razf_open() (and hence razf_open()/razf_open2())
-when opening the file fails.
-
-------------------------------------------------------------------------
-r453 | lh3lh3 | 2009-09-02 08:56:33 -0400 (Wed, 02 Sep 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
- D /trunk/samtools/source.dot
-
-Release samtools-0.1.6
-
-------------------------------------------------------------------------
-r451 | lh3lh3 | 2009-09-02 05:44:48 -0400 (Wed, 02 Sep 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_rmdup.c
- M /trunk/samtools/bam_rmdupse.c
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
-
- * samtools-0.1.5-34 (r451)
- * applied the patch by John
- * improved the help message a little bit
-
-------------------------------------------------------------------------
-r450 | lh3lh3 | 2009-09-02 04:55:55 -0400 (Wed, 02 Sep 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_color.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-33 (r450)
- * fixed a bug in bam_color.c (on behalf of Nils Homer)
-
-------------------------------------------------------------------------
-r449 | lh3lh3 | 2009-08-29 15:36:41 -0400 (Sat, 29 Aug 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/samtools.pl
-
- * samtools-0.1.5-32 (r449)
- * fillmd: fixed a bug in modifying MD/NM tags
- * in import, give a warning if the read is aligned but there is no CIGAR.
-
-------------------------------------------------------------------------
-r448 | lh3lh3 | 2009-08-19 04:44:28 -0400 (Wed, 19 Aug 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/wgsim_eval.pl
-
- * samtools-0.1.5-31 (r448)
- * fixed an issue when the last CIGAR is I or D
-
-------------------------------------------------------------------------
-r447 | lh3lh3 | 2009-08-17 04:34:57 -0400 (Mon, 17 Aug 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-30 (r447)
- * fixed a bug in bam_aux_get(): 'A' is not checked
-
-------------------------------------------------------------------------
-r446 | lh3lh3 | 2009-08-17 04:33:17 -0400 (Mon, 17 Aug 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bamtk.c
-
- *
-
-------------------------------------------------------------------------
-r444 | lh3lh3 | 2009-08-11 05:02:36 -0400 (Tue, 11 Aug 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-28 (r444)
- * bug in "merge -n"
-
-------------------------------------------------------------------------
-r443 | lh3lh3 | 2009-08-11 04:29:11 -0400 (Tue, 11 Aug 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-27 (r443)
- * SEQ and QUAL can be "*"
- * parse CIGAR "=" and "X" as "M"
-
-------------------------------------------------------------------------
-r442 | lh3lh3 | 2009-08-07 16:56:38 -0400 (Fri, 07 Aug 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/md5.c
- M /trunk/samtools/misc/md5.h
- M /trunk/samtools/misc/md5fa.c
-
- * samtools-0.1.5-26 (r442)
- * replace RSA Inc md5.* with ones under permissive lincense
- * fixed a bug in detecting unsorted bam in pileup
-
-------------------------------------------------------------------------
-r441 | bhandsaker | 2009-08-05 09:41:28 -0400 (Wed, 05 Aug 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bgzf.c
- M /trunk/samtools/bgzf.h
- M /trunk/samtools/bgzip.c
-
-Change copyright notices now that MIT has approved open source distribution.
-
-------------------------------------------------------------------------
-r440 | lh3lh3 | 2009-08-05 05:44:24 -0400 (Wed, 05 Aug 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_stat.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-25 (r436)
- * in flagstats, do not report singletons if both ends are unmapped
-
-------------------------------------------------------------------------
-r439 | lh3lh3 | 2009-08-04 17:16:51 -0400 (Tue, 04 Aug 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/maq2sam.c
-
-fixed a SERIOUS bug in setting 0x20 flag
-
-------------------------------------------------------------------------
-r438 | lh3lh3 | 2009-08-04 16:50:43 -0400 (Tue, 04 Aug 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
-fixed two minor bugs (suggested by Tim M Storm)
-
-------------------------------------------------------------------------
-r437 | lh3lh3 | 2009-08-04 04:13:24 -0400 (Tue, 04 Aug 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/samtools.pl
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.5-24 (r435)
- * fixed a typo
-
-------------------------------------------------------------------------
-r434 | lh3lh3 | 2009-08-03 05:40:42 -0400 (Mon, 03 Aug 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-23 (r434)
- * in tview, press 'r' to show read names rather than sequences
-
-------------------------------------------------------------------------
-r433 | lh3lh3 | 2009-08-02 14:13:35 -0400 (Sun, 02 Aug 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/knetfile.c
-
- * tried to fixed the buggy FTP random access in Windows. FAILED.
- * anyway, MinGW seems to have problem with "%lld".
-
-------------------------------------------------------------------------
-r432 | lh3lh3 | 2009-08-01 19:32:07 -0400 (Sat, 01 Aug 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/Makefile.mingw
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/faidx.c
- M /trunk/samtools/razf.c
- A /trunk/samtools/win32/libcurses.a
- A /trunk/samtools/win32/xcurses.h
-
- * samtools-0.1.5-22 (r432)
- * faidx: fixed compitability issue with _WIN32
- * razf: fixed potential compitability issue with _WIN32
- * PDCurses support in Windows
-
-------------------------------------------------------------------------
-r431 | lh3lh3 | 2009-08-01 18:34:54 -0400 (Sat, 01 Aug 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/win32/libz.a
-
-replace the GnuWin32 version of libz.a with my own build with MinGW.
-
-------------------------------------------------------------------------
-r430 | lh3lh3 | 2009-08-01 18:21:07 -0400 (Sat, 01 Aug 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/knetfile.c
-
-add comments
-
-------------------------------------------------------------------------
-r429 | lh3lh3 | 2009-08-01 17:41:19 -0400 (Sat, 01 Aug 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile.mingw
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/knetfile.h
-
- * samtools-0.1.5-21 (r428)
- * knetfile.c is now compatible with mingw-winsock
-
-------------------------------------------------------------------------
-r428 | lh3lh3 | 2009-07-31 19:39:07 -0400 (Fri, 31 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile.mingw
-
-simplify MinGW Makefile
-
-------------------------------------------------------------------------
-r427 | lh3lh3 | 2009-07-31 19:30:54 -0400 (Fri, 31 Jul 2009) | 5 lines
-Changed paths:
- A /trunk/samtools/Makefile.mingw
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
- A /trunk/samtools/win32
- A /trunk/samtools/win32/libz.a
- A /trunk/samtools/win32/zconf.h
- A /trunk/samtools/win32/zlib.h
-
- * samtools-0.1.5-20 (r427)
- * MinGW support. At least SAM<->BAM conversion is working. Other
- functionality are not tested at the moment.
- * zlib headers and Windows version of libz.a are included in win32/
-
-------------------------------------------------------------------------
-r426 | lh3lh3 | 2009-07-31 18:32:09 -0400 (Fri, 31 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.5-19 (r426)
- * fixed a bug caused by recent modifications. Sorry.
-
-------------------------------------------------------------------------
-r425 | lh3lh3 | 2009-07-31 18:23:51 -0400 (Fri, 31 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bgzf.c
-
-compatible with Windows binary files
-
-------------------------------------------------------------------------
-r424 | lh3lh3 | 2009-07-31 05:19:59 -0400 (Fri, 31 Jul 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_maqcns.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/samtools.pl
-
- * samtools-0.1.5-18 (r423)
- * output additional information in pileup indel lines, for the purepose
- of debugging at the moment
- * in tview, optionally allow to treat reference skip as deletion
-
-------------------------------------------------------------------------
-r423 | lh3lh3 | 2009-07-30 17:00:36 -0400 (Thu, 30 Jul 2009) | 2 lines
-Changed paths:
- A /trunk/samtools/misc/psl2sam.pl
-
-convert BLAT psl to SAM.
-
-------------------------------------------------------------------------
-r422 | lh3lh3 | 2009-07-30 06:24:39 -0400 (Thu, 30 Jul 2009) | 6 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bgzf.c
- M /trunk/samtools/bgzf.h
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.5-17 (r422)
- * fixed a but in knetfile.c when seek type is not SEEK_SET
- * write an empty BGZF block to every BGZF file
- * check BGZF EOF marker in bam_header_read()
- * update ChangeLog
-
-------------------------------------------------------------------------
-r421 | lh3lh3 | 2009-07-30 05:03:39 -0400 (Thu, 30 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/samtools.pl
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam.h
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.5-16 (r421)
- * in view and pileup, load header from FASTA index if the input is SAM.
-
-------------------------------------------------------------------------
-r420 | lh3lh3 | 2009-07-29 04:18:55 -0400 (Wed, 29 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/maq2sam.c
-
-do not set "read 1" if reads are not mapped in the PE mode of maq
-
-------------------------------------------------------------------------
-r419 | lh3lh3 | 2009-07-28 04:52:33 -0400 (Tue, 28 Jul 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/samtools.pl
- M /trunk/samtools/misc/wgsim_eval.pl
-
- * samtools-0.1.5-15 (r419)
- * in sam_open(), return NULL when the file cannot be opened.
- * make wgsim_eval.pl more robust to imperfect SAM
- * add "unique" command to samtools.pl
-
-------------------------------------------------------------------------
-r418 | lh3lh3 | 2009-07-24 09:04:19 -0400 (Fri, 24 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/wgsim_eval.pl
-
-skip @header lines in SAM
-
-------------------------------------------------------------------------
-r417 | lh3lh3 | 2009-07-24 07:42:38 -0400 (Fri, 24 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.5-14 (r417)
- * more help in "samtools view" due to the recent changes.
-
-------------------------------------------------------------------------
-r416 | lh3lh3 | 2009-07-24 07:34:30 -0400 (Fri, 24 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam.h
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.5-17 (r416)
- * support import/export SAM with string tags
-
-------------------------------------------------------------------------
-r415 | lh3lh3 | 2009-07-24 06:39:26 -0400 (Fri, 24 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam.h
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.5-12 (r415)
- * FLAG now can be in HEX
-
-------------------------------------------------------------------------
-r414 | lh3lh3 | 2009-07-22 17:03:49 -0400 (Wed, 22 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/kstring.h
-
-fixed a compiling error (thank Ken for fixing it)
-
-------------------------------------------------------------------------
-r412 | lh3lh3 | 2009-07-21 17:19:40 -0400 (Tue, 21 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/kstring.c
- M /trunk/samtools/kstring.h
-
-Implemented Boyer-Moore search in the kstring library.
-
-------------------------------------------------------------------------
-r409 | lh3lh3 | 2009-07-17 12:10:20 -0400 (Fri, 17 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
-
-do not include knetfile.h when _USE_KNETFILE is not defined
-
-------------------------------------------------------------------------
-r408 | lh3lh3 | 2009-07-17 10:29:21 -0400 (Fri, 17 Jul 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bgzf.c
-
- * samtools-0.1.5-11 (r408)
- * force to overwirte existing MD if it is different from the one calculated
- from fillmd.
- * bgzf.c: improved the compatibility with Windows headers
-
-------------------------------------------------------------------------
-r407 | lh3lh3 | 2009-07-17 09:46:56 -0400 (Fri, 17 Jul 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.h
-
- * samtools-0.1.5-10 (r407)
- * implemented bam_aux_del() to remove a tag
- * fillmd: generate the NM tag
- * fillmd: cmd interface improvement
-
-------------------------------------------------------------------------
-r406 | lh3lh3 | 2009-07-16 18:30:40 -0400 (Thu, 16 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
-
-Sorry. The old Makefile is for PDCurses...
-
-------------------------------------------------------------------------
-r405 | lh3lh3 | 2009-07-16 18:30:11 -0400 (Thu, 16 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-9 (r405)
- * improved the compatibility with PDCurses a little bit
-
-------------------------------------------------------------------------
-r404 | lh3lh3 | 2009-07-16 18:23:52 -0400 (Thu, 16 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-8 (r404)
- * compatible with PDCurses
-
-------------------------------------------------------------------------
-r403 | lh3lh3 | 2009-07-16 17:39:39 -0400 (Thu, 16 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/kseq.h
-
- * samtools-0.1.5-7 (r403)
- * fixed a bug in kseq.h for binary files (text files are fine)
-
-------------------------------------------------------------------------
-r402 | lh3lh3 | 2009-07-16 06:49:53 -0400 (Thu, 16 Jul 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bgzf.c
-
- * samtools-0.1.5-6 (r402)
- * fixed compiling error when "-D_USE_NETFILE" is not applied
- * improve portability to MinGW
-
-------------------------------------------------------------------------
-r398 | lh3lh3 | 2009-07-13 05:21:36 -0400 (Mon, 13 Jul 2009) | 3 lines
-Changed paths:
- A /trunk/bam-lite/bam.h (from /trunk/samtools/bam.h:395)
- A /trunk/bam-lite/bam_lite.c (from /trunk/samtools/bam_lite.c:395)
- D /trunk/samtools/bam_lite.c
-
- * move bam_lite.c to bam-lite
- * copy bam.h to bam-lite
-
-------------------------------------------------------------------------
-r395 | lh3lh3 | 2009-07-13 05:12:57 -0400 (Mon, 13 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_lite.c
- M /trunk/samtools/bam_lpileup.c
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-5 (r395)
- * added bam_pileup_file() and removed bam_lpileup_file()
-
-------------------------------------------------------------------------
-r394 | lh3lh3 | 2009-07-12 19:35:10 -0400 (Sun, 12 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/knetfile.h
-
- * samtools-0.1.5-4 (r394)
- * http_proxy support in knetfile library (check http_proxy ENV)
-
-------------------------------------------------------------------------
-r393 | lh3lh3 | 2009-07-12 18:57:07 -0400 (Sun, 12 Jul 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/knetfile.h
-
- * samtools-0.1.5-3 (r393)
- * knetfile now supports HTTP (no proxy at the moment)
- * fixed a potential issue in knetfile on opening ordinary file, although I have
- not seen the sideeffect so far.
-
-------------------------------------------------------------------------
-r392 | lh3lh3 | 2009-07-12 13:50:55 -0400 (Sun, 12 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/samtools.1
-
-Remove the warning in tview
-
-------------------------------------------------------------------------
-r391 | lh3lh3 | 2009-07-12 13:42:43 -0400 (Sun, 12 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-2 (r391)
- * do not show a blank screen when no reads mapped
-
-------------------------------------------------------------------------
-r390 | lh3lh3 | 2009-07-09 09:01:42 -0400 (Thu, 09 Jul 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam.h
- A /trunk/samtools/bam_lite.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.5-1 (r390)
- * removed useless _IOLIB in bam.h. This should cause no change at all.
- * added bam_lite.c for light-weight BAM reading
-
-------------------------------------------------------------------------
-r385 | lh3lh3 | 2009-07-07 11:53:29 -0400 (Tue, 07 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/knetfile.c
-
-Release samtools-0.1.5c (fixed a bug in piping)
-
-------------------------------------------------------------------------
-r383 | lh3lh3 | 2009-07-07 06:39:55 -0400 (Tue, 07 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
-
-Release samtools-0.1.5b (BUG! so embarrassing!)
-
-------------------------------------------------------------------------
-r381 | lh3lh3 | 2009-07-07 06:20:06 -0400 (Tue, 07 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bamtk.c
-
-Release samtools-0.1.5a (for compatibility with Bio::DB::Sam)
-
-------------------------------------------------------------------------
-r373 | lh3lh3 | 2009-07-07 05:26:57 -0400 (Tue, 07 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
-
-Release samtools-0.1.5
-
-------------------------------------------------------------------------
-r372 | lh3lh3 | 2009-07-07 04:49:27 -0400 (Tue, 07 Jul 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
-
- * samtools-0.1.4-23 (r372)
- * keep header text if "view -t" is used (by Gerton)
-
-------------------------------------------------------------------------
-r371 | lh3lh3 | 2009-07-06 20:13:32 -0400 (Mon, 06 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/samtools.1
-
-update documentation
-
-------------------------------------------------------------------------
-r370 | bhandsaker | 2009-07-02 17:24:34 -0400 (Thu, 02 Jul 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
-
-Introduced LIBPATH variable so this could be overridden to allow samtools to build correct at the Broad.
-
-------------------------------------------------------------------------
-r369 | lh3lh3 | 2009-07-02 08:36:53 -0400 (Thu, 02 Jul 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.4-22 (r369)
- * in pileup, optionally print E2 and U2
- * remove the debugging code in bam_aux_get() (Drat!)
-
-------------------------------------------------------------------------
-r368 | lh3lh3 | 2009-07-02 06:32:26 -0400 (Thu, 02 Jul 2009) | 6 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bam_lpileup.c
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bam_rmdup.c
- M /trunk/samtools/bam_stat.c
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/faidx.c
- M /trunk/samtools/faidx.h
- M /trunk/samtools/glf.c
-
- * samtools-0.1.4-21 (r368)
- * propagate errors rather than exit or complain assertion failure. Assertion
- should be only used for checking internal bugs, but not for external input
- inconsistency. I was just a bit lazy.
- * small memory leak may be present on failure, though
-
-------------------------------------------------------------------------
-r367 | lh3lh3 | 2009-06-30 11:18:42 -0400 (Tue, 30 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/knetfile.c
-
-reduce the chance of blocking in FTP connection
-
-------------------------------------------------------------------------
-r366 | lh3lh3 | 2009-06-30 10:35:21 -0400 (Tue, 30 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/knetfile.c
-
-minor changes to knetfile: invalid fd equals -1 rather than 0
-
-------------------------------------------------------------------------
-r365 | lh3lh3 | 2009-06-30 09:04:30 -0400 (Tue, 30 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/knetfile.h
-
- * samtools-0.1.4-20 (r365)
- * download the BAM index file if it is not found in the current working directory.
-
-------------------------------------------------------------------------
-r364 | lh3lh3 | 2009-06-30 07:39:07 -0400 (Tue, 30 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/knetfile.c
-
- * samtools-0.1.4-19 (r364)
- * knetfile: report error when the file is not present on FTP
-
-------------------------------------------------------------------------
-r363 | lh3lh3 | 2009-06-29 18:23:32 -0400 (Mon, 29 Jun 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bgzf.c
- M /trunk/samtools/bgzf.h
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/knetfile.h
-
- * samtools-0.1.4-18 (r363)
- * knetfile: do not trigger network communication in FTP seek (lazy seek)
- * bgzf: cache recent blocks (disabled by default)
-
-------------------------------------------------------------------------
-r362 | lh3lh3 | 2009-06-25 16:04:34 -0400 (Thu, 25 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bgzf.c
-
-write changelog
-
-------------------------------------------------------------------------
-r361 | lh3lh3 | 2009-06-25 16:03:10 -0400 (Thu, 25 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.4-17 (r361)
- * if a file is given on FTP, search locally for the BAM index
-
-------------------------------------------------------------------------
-r360 | lh3lh3 | 2009-06-25 15:44:52 -0400 (Thu, 25 Jun 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bgzf.c
- M /trunk/samtools/bgzf.h
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/knetfile.h
-
- * samtools-0.1.4-16 (r360)
- * report more information in index when the input is not sorted
- * change the behaviour of knet_seek() such that it returns 0 on success
- * support knetfile library in BGZF
-
-------------------------------------------------------------------------
-r359 | lh3lh3 | 2009-06-25 12:10:55 -0400 (Thu, 25 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/knetfile.c
- M /trunk/samtools/knetfile.h
-
-fixed bugs in knetfile.*
-
-------------------------------------------------------------------------
-r358 | lh3lh3 | 2009-06-25 08:53:19 -0400 (Thu, 25 Jun 2009) | 2 lines
-Changed paths:
- A /trunk/samtools/knetfile.h
-
-this is the header file
-
-------------------------------------------------------------------------
-r357 | lh3lh3 | 2009-06-25 08:52:03 -0400 (Thu, 25 Jun 2009) | 3 lines
-Changed paths:
- A /trunk/samtools/knetfile.c
-
- * open a file at FTP
- * preliminary version
-
-------------------------------------------------------------------------
-r354 | lh3lh3 | 2009-06-24 09:02:25 -0400 (Wed, 24 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.4-15 (r354)
- * fixed a memory leak in bam_view1(), although samtools is not using this routine.
-
-------------------------------------------------------------------------
-r351 | lh3lh3 | 2009-06-17 19:16:26 -0400 (Wed, 17 Jun 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/faidx.c
-
- * samtools-0.1.4-13 (r351)
- * make faidx more tolerant to empty lines right before or after > lines
- * hope this does not introduce new bugs...
-
-------------------------------------------------------------------------
-r350 | lh3lh3 | 2009-06-16 09:37:01 -0400 (Tue, 16 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.4-13 (r350)
- * fixed a small memory leak in pileup, caused by recent modifications
-
-------------------------------------------------------------------------
-r347 | lh3lh3 | 2009-06-13 16:20:49 -0400 (Sat, 13 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.4-12 (r347)
- * added `-S' to pileup, similar to `view -S'
-
-------------------------------------------------------------------------
-r346 | lh3lh3 | 2009-06-13 12:52:31 -0400 (Sat, 13 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam_view.c
- M /trunk/samtools/samtools.1
-
- * samtools-0.1.4-11 (r346)
- * allow to select a read group at view command-line
-
-------------------------------------------------------------------------
-r344 | lh3lh3 | 2009-06-13 09:06:24 -0400 (Sat, 13 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/examples/calDepth.c
-
-added more comments
-
-------------------------------------------------------------------------
-r343 | lh3lh3 | 2009-06-13 09:01:22 -0400 (Sat, 13 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/examples/calDepth.c
-
-nothing really
-
-------------------------------------------------------------------------
-r342 | lh3lh3 | 2009-06-13 08:58:48 -0400 (Sat, 13 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/examples/Makefile
- A /trunk/samtools/examples/calDepth.c
-
-added an example of calculating read depth
-
-------------------------------------------------------------------------
-r341 | lh3lh3 | 2009-06-13 08:00:08 -0400 (Sat, 13 Jun 2009) | 6 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- A /trunk/samtools/bam_color.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam.h
-
- * samtools-0.1.4-10 (r341)
- * only include key APIs in libbam.a
- * move color-specific routines to bam_color.c
- * update documentations
- * remove the support of -q in pileup
-
-------------------------------------------------------------------------
-r340 | lh3lh3 | 2009-06-13 06:17:14 -0400 (Sat, 13 Jun 2009) | 6 lines
-Changed paths:
- M /trunk/samtools/INSTALL
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/razf.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.4-9 (r340)
- * added a warning to razf.c if zlib<1.2.2.1
- * fixed a compilation warning
- * fixed a segfault caused by @RG parsing
- * detect NCURSES in bam_tview.c
-
-------------------------------------------------------------------------
-r339 | lh3lh3 | 2009-06-13 05:35:19 -0400 (Sat, 13 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/INSTALL
-
-update INSTALL
-
-------------------------------------------------------------------------
-r338 | lh3lh3 | 2009-06-12 19:15:24 -0400 (Fri, 12 Jun 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/kstring.h
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.4-8 (r338)
- * parse the @RG header lines and allow to choose library at the "samtools view"
- command line
-
-------------------------------------------------------------------------
-r337 | lh3lh3 | 2009-06-12 16:25:50 -0400 (Fri, 12 Jun 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bgzf.c
- M /trunk/samtools/bgzf.h
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.4-7 (r337)
- * bgzf.c: support mode string "wu": uncompressed output
- * "samtools view" support "-u" command-line option
-
-------------------------------------------------------------------------
-r336 | lh3lh3 | 2009-06-12 12:20:12 -0400 (Fri, 12 Jun 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/misc/Makefile
- M /trunk/samtools/razf.c
- M /trunk/samtools/razf.h
- M /trunk/samtools/razip.c
-
- * no changes to samtools itself
- * remove zlib source codes
- * make RAZF reading compatible with old version of zlib
- * on old version of zlib, writing is not available
-
-------------------------------------------------------------------------
-r335 | lh3lh3 | 2009-06-12 11:47:33 -0400 (Fri, 12 Jun 2009) | 2 lines
-Changed paths:
- D /trunk/samtools/zlib
-
-remove zlib for simplification...
-
-------------------------------------------------------------------------
-r334 | lh3lh3 | 2009-06-12 10:43:36 -0400 (Fri, 12 Jun 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.4-6 (r334)
- * do not export bam_aux_get_core() for Bio::DB::Sam because it has already
- been implemented in that.
- * this version works with the latest Bio::DB::Sam (20090612)
-
-------------------------------------------------------------------------
-r333 | lh3lh3 | 2009-06-12 10:33:42 -0400 (Fri, 12 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
-
-update ChangeLog
-
-------------------------------------------------------------------------
-r332 | lh3lh3 | 2009-06-12 10:21:21 -0400 (Fri, 12 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/AUTHORS
- M /trunk/samtools/Makefile
- M /trunk/samtools/misc/Makefile
-
-fixed minor things in Makefile
-
-------------------------------------------------------------------------
-r331 | lh3lh3 | 2009-06-12 10:07:05 -0400 (Fri, 12 Jun 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.4-5 (r3310
- * no change to samtools itself. Version number is increased to reflect the
- changes in the Makefile building system.
-
-------------------------------------------------------------------------
-r330 | lh3lh3 | 2009-06-12 10:03:38 -0400 (Fri, 12 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/AUTHORS
- D /trunk/samtools/README
-
-update information...
-
-------------------------------------------------------------------------
-r329 | lh3lh3 | 2009-06-12 09:52:21 -0400 (Fri, 12 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/novo2sam.pl
-
- * updated novoalign converter by Colin Hercus et al.
- * this version works with indels
-
-------------------------------------------------------------------------
-r328 | lh3lh3 | 2009-06-12 09:50:53 -0400 (Fri, 12 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/INSTALL
- M /trunk/samtools/Makefile
- M /trunk/samtools/misc/Makefile
- M /trunk/samtools/zlib/Makefile
-
- * update Makefile
- * update INSTALL instruction
-
-------------------------------------------------------------------------
-r327 | lh3lh3 | 2009-06-12 09:18:29 -0400 (Fri, 12 Jun 2009) | 4 lines
-Changed paths:
- A /trunk/samtools/Makefile (from /trunk/samtools/Makefile.generic:325)
- D /trunk/samtools/Makefile.am
- D /trunk/samtools/Makefile.generic
- D /trunk/samtools/Makefile.lite
- D /trunk/samtools/autogen.sh
- D /trunk/samtools/cleanup.sh
- D /trunk/samtools/configure.ac
- A /trunk/samtools/misc/Makefile (from /trunk/samtools/misc/Makefile.generic:305)
- D /trunk/samtools/misc/Makefile.am
- D /trunk/samtools/misc/Makefile.generic
- M /trunk/samtools/razf.c
- A /trunk/samtools/zlib
- A /trunk/samtools/zlib/Makefile
- A /trunk/samtools/zlib/adler32.c
- A /trunk/samtools/zlib/compress.c
- A /trunk/samtools/zlib/crc32.c
- A /trunk/samtools/zlib/crc32.h
- A /trunk/samtools/zlib/deflate.c
- A /trunk/samtools/zlib/deflate.h
- A /trunk/samtools/zlib/gzio.c
- A /trunk/samtools/zlib/infback.c
- A /trunk/samtools/zlib/inffast.c
- A /trunk/samtools/zlib/inffast.h
- A /trunk/samtools/zlib/inffixed.h
- A /trunk/samtools/zlib/inflate.c
- A /trunk/samtools/zlib/inflate.h
- A /trunk/samtools/zlib/inftrees.c
- A /trunk/samtools/zlib/inftrees.h
- A /trunk/samtools/zlib/trees.c
- A /trunk/samtools/zlib/trees.h
- A /trunk/samtools/zlib/uncompr.c
- A /trunk/samtools/zlib/zconf.h
- A /trunk/samtools/zlib/zlib.h
- A /trunk/samtools/zlib/zutil.c
- A /trunk/samtools/zlib/zutil.h
- D /trunk/samtools/zutil.h
-
- * added zlib-1.2.3 as razip requires that
- * prepare to changed back to the Makefile building system
- * unfinished! (will be soon)
-
-------------------------------------------------------------------------
-r326 | lh3lh3 | 2009-06-12 09:12:03 -0400 (Fri, 12 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
-Unfinished
-
-------------------------------------------------------------------------
-r325 | lh3lh3 | 2009-06-10 11:27:59 -0400 (Wed, 10 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.4-4 (r325)
- * further avoid wrong consensus calls in repetitive regions.
-
-------------------------------------------------------------------------
-r324 | lh3lh3 | 2009-06-10 10:56:17 -0400 (Wed, 10 Jun 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam.h
-
- * samtools-0.1.4-3 (r324)
- * make maqcns generate the correct call in repetitive regions.
- * allow filtering on mapQ at the pileup command line
-
-------------------------------------------------------------------------
-r323 | lh3lh3 | 2009-06-10 05:04:21 -0400 (Wed, 10 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
- * samtools.pl-0.3.2 (r322)
- * indels and SNPs use different mapping quality threshold
-
-------------------------------------------------------------------------
-r322 | lh3lh3 | 2009-06-10 05:03:22 -0400 (Wed, 10 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/export2sam.pl
-
-fixed a typo
-
-------------------------------------------------------------------------
-r321 | lh3lh3 | 2009-06-09 04:21:48 -0400 (Tue, 09 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
-just typo. no real change
-
-------------------------------------------------------------------------
-r320 | lh3lh3 | 2009-06-08 09:32:51 -0400 (Mon, 08 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
-a little bit code cleanup
-
-------------------------------------------------------------------------
-r319 | lh3lh3 | 2009-06-08 09:22:33 -0400 (Mon, 08 Jun 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
- * samtools.pl-0.3.1
- * change default parameters
- * optionally print filtered variants
-
-------------------------------------------------------------------------
-r318 | lh3lh3 | 2009-06-08 09:14:26 -0400 (Mon, 08 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
- * samtools.pl-0.3.0
- * combine snpFilter and indelFilter
-
-------------------------------------------------------------------------
-r317 | lh3lh3 | 2009-06-08 06:31:42 -0400 (Mon, 08 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
- * samtools.pl-0.2.3
- * change a default parameter
-
-------------------------------------------------------------------------
-r316 | lh3lh3 | 2009-06-08 06:11:06 -0400 (Mon, 08 Jun 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_maqcns.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
-
- * samtools-0.1.4-2 (r316)
- * pileup: cap mapping quality at 60 (by default)
- * pileup: always calculate RMS mapq
- * pileup: allow to output variant sites only
-
-------------------------------------------------------------------------
-r312 | lh3lh3 | 2009-06-04 08:01:10 -0400 (Thu, 04 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
- * samtools.pl-0.2.2
- * added pileup2fq
-
-------------------------------------------------------------------------
-r311 | lh3lh3 | 2009-06-03 04:40:40 -0400 (Wed, 03 Jun 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
- * in snpFilter, suppress non-SNP sites
-
-------------------------------------------------------------------------
-r310 | lh3lh3 | 2009-06-01 09:35:13 -0400 (Mon, 01 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
- * samtools.pl-0.2.1
- * fixed a typo
-
-------------------------------------------------------------------------
-r309 | lh3lh3 | 2009-06-01 09:04:39 -0400 (Mon, 01 Jun 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
- * samtools.pl-0.2.0
- * snpFilter
-
-------------------------------------------------------------------------
-r306 | lh3lh3 | 2009-05-28 06:49:35 -0400 (Thu, 28 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bgzf.c
-
- * minor changes to bgzf: return NULL if fd == -1
- * suggested by {kdj,jm18}@sanger.ac.uk
-
-------------------------------------------------------------------------
-r305 | lh3lh3 | 2009-05-28 06:16:08 -0400 (Thu, 28 May 2009) | 2 lines
-Changed paths:
- A /trunk/samtools/misc/interpolate_sam.pl
-
-Script for paired-end pileup, contributed by Stephen Montgomery.
-
-------------------------------------------------------------------------
-r304 | lh3lh3 | 2009-05-28 06:08:49 -0400 (Thu, 28 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
-
- * samtools-0.1.4-1 (r304)
- * fixed a minor bug in printing headers
-
-------------------------------------------------------------------------
-r297 | lh3lh3 | 2009-05-21 11:06:16 -0400 (Thu, 21 May 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/maq2sam.c
- M /trunk/samtools/samtools.1
-
-Release samtools-0.1.4
-
-------------------------------------------------------------------------
-r296 | lh3lh3 | 2009-05-21 07:53:14 -0400 (Thu, 21 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-24 (r296)
- * another similar bug in the indel caller
-
-------------------------------------------------------------------------
-r295 | lh3lh3 | 2009-05-21 07:50:28 -0400 (Thu, 21 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-23 (r295)
- * fixed a critical bug in the indel caller
-
-------------------------------------------------------------------------
-r294 | lh3lh3 | 2009-05-20 08:00:20 -0400 (Wed, 20 May 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_stat.c
-
-added a missing header file
-
-------------------------------------------------------------------------
-r293 | lh3lh3 | 2009-05-19 18:44:25 -0400 (Tue, 19 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-22 (r293)
- * open tview in the dot-view mode by default
-
-------------------------------------------------------------------------
-r292 | lh3lh3 | 2009-05-18 16:01:23 -0400 (Mon, 18 May 2009) | 6 lines
-Changed paths:
- M /trunk/samtools/samtools.1
-
-Added a note to the manual. Currently SAMtools used unaligned words in
-several places. Although this does not cause bus errors to me, it may
-affect portability. Please see the "Bus error" wiki page for more
-information. Also thank James Bonfields for pointing this out.
-
-
-------------------------------------------------------------------------
-r286 | lh3lh3 | 2009-05-14 10:23:13 -0400 (Thu, 14 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-21 (286)
- * declare bam_aux_get_core() in bam.h
-
-------------------------------------------------------------------------
-r276 | lh3lh3 | 2009-05-13 05:07:55 -0400 (Wed, 13 May 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-20 (r276)
- * remove bam1_t::hash again. We need to modify the Perl API anyway to
- make it work with the latest SVN.
- * As is suggested by Tim, scan "{base}.bai" and "{base}.bam.bai" for index
-
-------------------------------------------------------------------------
-r275 | lh3lh3 | 2009-05-12 16:14:10 -0400 (Tue, 12 May 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam.h
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-19 (r275)
- * a minor change to the bam1_t struct: added back "void *hash" for the
- backward compatibility with Bio::DB::Sam
-
-------------------------------------------------------------------------
-r273 | lh3lh3 | 2009-05-12 09:28:39 -0400 (Tue, 12 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_rmdupse.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-18 (r273)
- * rmdupse: do not remove unmapped reads
-
-------------------------------------------------------------------------
-r272 | lh3lh3 | 2009-05-12 09:20:00 -0400 (Tue, 12 May 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_rmdupse.c
-
-change a parameter. It does nothing
-
-------------------------------------------------------------------------
-r271 | lh3lh3 | 2009-05-12 09:17:58 -0400 (Tue, 12 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile.am
- M /trunk/samtools/Makefile.generic
- M /trunk/samtools/Makefile.lite
- A /trunk/samtools/bam_rmdupse.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/configure.ac
-
- * samtools-0.1.3-17 (r271)
- * added 'rmdupse' command
-
-------------------------------------------------------------------------
-r267 | lh3lh3 | 2009-05-05 17:31:41 -0400 (Tue, 05 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.3-16 (r267)
- * in sam_view.c, changed g_flag_on based on the suggestion by Angie Hinrichs
-
-------------------------------------------------------------------------
-r266 | lh3lh3 | 2009-05-05 17:23:27 -0400 (Tue, 05 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-15 (r266)
- * report an error if a non-* reference is present while @SQ is absent
-
-------------------------------------------------------------------------
-r265 | lh3lh3 | 2009-05-05 17:09:00 -0400 (Tue, 05 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.3-14 (r262)
- * make samopen() recognize @SQ header lines
-
-------------------------------------------------------------------------
-r261 | lh3lh3 | 2009-05-05 10:10:30 -0400 (Tue, 05 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bgzf.c
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam_view.c
-
- * samtools-0.1.3-13 (r260)
- * report error for file I/O error
-
-------------------------------------------------------------------------
-r260 | lh3lh3 | 2009-05-05 10:01:16 -0400 (Tue, 05 May 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile.am
-
-update Makefile.am
-
-------------------------------------------------------------------------
-r259 | lh3lh3 | 2009-05-05 09:52:25 -0400 (Tue, 05 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/sam.c
- M /trunk/samtools/sam.h
-
- * samtools-0.1.3-12 (r259)
- * use the new I/O interface in pileup
-
-------------------------------------------------------------------------
-r258 | lh3lh3 | 2009-05-05 09:33:22 -0400 (Tue, 05 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile.generic
- M /trunk/samtools/Makefile.lite
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
- A /trunk/samtools/sam.c
- A /trunk/samtools/sam.h
- A /trunk/samtools/sam_view.c
-
- * samtools-0.1.3-11 (r258)
- * unify the interface to BAM and SAM I/O
-
-------------------------------------------------------------------------
-r257 | lh3lh3 | 2009-05-05 04:53:35 -0400 (Tue, 05 May 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile.lite
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-10 (r257)
- * allow hex with "pileup -m"
-
-------------------------------------------------------------------------
-r256 | lh3lh3 | 2009-05-04 14:16:50 -0400 (Mon, 04 May 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_lpileup.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-9 (r256)
- * fixed a bug in bam_lpileup.c
- * I do not know if this also fixes the bug causing assertion failure in the tview
-
-------------------------------------------------------------------------
-r251 | lh3lh3 | 2009-04-28 08:53:23 -0400 (Tue, 28 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-8 (r251)
- * fixed a bug when there are reads without coordinates
-
-------------------------------------------------------------------------
-r250 | lh3lh3 | 2009-04-28 08:43:33 -0400 (Tue, 28 Apr 2009) | 2 lines
-Changed paths:
- A /trunk/samtools/AUTHORS
- A /trunk/samtools/README
- M /trunk/samtools/cleanup.sh
-
-added missing files
-
-------------------------------------------------------------------------
-r249 | lh3lh3 | 2009-04-28 08:37:16 -0400 (Tue, 28 Apr 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile.generic
- M /trunk/samtools/Makefile.lite
- M /trunk/samtools/configure.ac
- M /trunk/samtools/misc/Makefile.generic
-
-improve large file support in compilation
-
-------------------------------------------------------------------------
-r248 | lh3lh3 | 2009-04-28 08:33:24 -0400 (Tue, 28 Apr 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/INSTALL
-
-update INSTALL
-
-------------------------------------------------------------------------
-r247 | lh3lh3 | 2009-04-28 08:28:50 -0400 (Tue, 28 Apr 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile.am
- M /trunk/samtools/autogen.sh
- M /trunk/samtools/cleanup.sh
- M /trunk/samtools/configure.ac
- A /trunk/samtools/misc/Makefile.am
-
-fixed various issues about the GNU building scripts
-
-------------------------------------------------------------------------
-r246 | lh3lh3 | 2009-04-28 08:10:23 -0400 (Tue, 28 Apr 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- D /trunk/samtools/Makefile
- A /trunk/samtools/Makefile.am
- A /trunk/samtools/Makefile.generic
- A /trunk/samtools/autogen.sh
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
- A /trunk/samtools/cleanup.sh
- A /trunk/samtools/configure.ac
- D /trunk/samtools/misc/Makefile
- A /trunk/samtools/misc/Makefile.generic (from /trunk/samtools/misc/Makefile:245)
-
- * samtools-0.1.3-7 (r246)
- * incorporated revisions from Nils Homer
- * enhanced support of displaying color-space reads
-
-------------------------------------------------------------------------
-r244 | lh3lh3 | 2009-04-25 06:49:40 -0400 (Sat, 25 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-6 (r244)
- * fixed segfault for unmapped reads
-
-------------------------------------------------------------------------
-r243 | lh3lh3 | 2009-04-24 16:27:26 -0400 (Fri, 24 Apr 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-5 (r243)
- * fixed a long existing bug which may cause memory leak
- * check MD
- * consensus calling now works with "=", but indel calling not
-
-------------------------------------------------------------------------
-r242 | lh3lh3 | 2009-04-24 15:44:46 -0400 (Fri, 24 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_md.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-4 (r242)
- * fixed a memory leak
-
-------------------------------------------------------------------------
-r240 | lh3lh3 | 2009-04-24 11:40:18 -0400 (Fri, 24 Apr 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/Makefile.lite
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- A /trunk/samtools/bam_md.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-3 (r240)
- * generate MD tag
- * generate "=" bases
- * the plain pileup now support "=" bases, but consensus calling and glfgen may fail
-
-------------------------------------------------------------------------
-r239 | lh3lh3 | 2009-04-24 07:08:20 -0400 (Fri, 24 Apr 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_aux.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-2 (r239)
- * fixed bugs in bam_aux.c (these functions nevered used by samtools)
- * removed bam_aux_init()/bam_aux_destroy()
- * added tagview for testing bam_aux
-
-------------------------------------------------------------------------
-r235 | lh3lh3 | 2009-04-21 18:17:39 -0400 (Tue, 21 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.3-1
- * fixed a bug in pileup: the first read in a chromosome may not be printed
-
-------------------------------------------------------------------------
-r232 | lh3lh3 | 2009-04-16 10:25:43 -0400 (Thu, 16 Apr 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile.lite
-
-a missing file in Makefile.lite
-
-------------------------------------------------------------------------
-r227 | lh3lh3 | 2009-04-15 17:02:53 -0400 (Wed, 15 Apr 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/NEWS
- M /trunk/samtools/bamtk.c
-
-Release samtools-0.1.3
-
-------------------------------------------------------------------------
-r223 | lh3lh3 | 2009-04-15 09:31:32 -0400 (Wed, 15 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-28
- * make samtools more robust to weird input such as empty file
-
-------------------------------------------------------------------------
-r222 | lh3lh3 | 2009-04-15 09:05:33 -0400 (Wed, 15 Apr 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
- M /trunk/samtools/samtools.1
-
-prepare for release 0.1.3
-
-------------------------------------------------------------------------
-r221 | lh3lh3 | 2009-04-15 08:32:14 -0400 (Wed, 15 Apr 2009) | 2 lines
-Changed paths:
- A /trunk/samtools/misc/blast2sam.pl
-
-convert NCBI-BLASTN to SAM
-
-------------------------------------------------------------------------
-r220 | lh3lh3 | 2009-04-15 08:18:19 -0400 (Wed, 15 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_lpileup.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-27
- * fixed a small memory leak in tview
-
-------------------------------------------------------------------------
-r219 | lh3lh3 | 2009-04-15 08:00:08 -0400 (Wed, 15 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_rmdup.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-26
- * fixed a bug in rmdup when there are unmapped reads
-
-------------------------------------------------------------------------
-r218 | lh3lh3 | 2009-04-14 17:28:58 -0400 (Tue, 14 Apr 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
-
-proposed NEWS for the new release (have not yet)
-
-------------------------------------------------------------------------
-r216 | lh3lh3 | 2009-04-14 17:10:46 -0400 (Tue, 14 Apr 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/misc/samtools.pl
-
- * samtools.pl-0.1.1
- * improve indelFilter to avoid filtering true indels. The new filter relies
- on the new pileup indel line implemented in samtools-0.1.2-25
-
-------------------------------------------------------------------------
-r215 | lh3lh3 | 2009-04-14 17:04:19 -0400 (Tue, 14 Apr 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
-
- * samtools-0.1.2-25
- * change the pileup indel line to shows the number of alignments actually
- containing indels
-
-------------------------------------------------------------------------
-r211 | lh3lh3 | 2009-04-13 07:07:13 -0400 (Mon, 13 Apr 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
-
-update ChangeLog from "svn log"
-
-------------------------------------------------------------------------
-r210 | lh3lh3 | 2009-04-12 15:57:05 -0400 (Sun, 12 Apr 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/kseq.h
-
- * samtools-0.1.2-24
- * in merge, gives a warning rather than error if the target sequence length is different
- * allow empty header
-
-------------------------------------------------------------------------
-r209 | lh3lh3 | 2009-04-12 15:32:44 -0400 (Sun, 12 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-23
- * recognize '*' at the QUAL field
-
-------------------------------------------------------------------------
-r208 | lh3lh3 | 2009-04-12 15:08:02 -0400 (Sun, 12 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/kseq.h
-
- * samtools-0.1.2-22
- * the field separater is TAB only, now
-
-------------------------------------------------------------------------
-r207 | lh3lh3 | 2009-04-08 10:18:03 -0400 (Wed, 08 Apr 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/examples/ex1.sam.gz
-
- * fixed the problem in the example alignment due to the bug in fixmate
-
-------------------------------------------------------------------------
-r206 | lh3lh3 | 2009-04-08 10:15:05 -0400 (Wed, 08 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_mate.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/soap2sam.pl
-
- * samtools-0.1.2-21
- * fixed a nasty bug in `fixmate'
-
-------------------------------------------------------------------------
-r205 | lh3lh3 | 2009-04-08 05:57:08 -0400 (Wed, 08 Apr 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/bowtie2sam.pl
- M /trunk/samtools/misc/soap2sam.pl
- M /trunk/samtools/misc/wgsim_eval.pl
-
-make the script robust to the bugs in SOAP-2.1.7
-
-------------------------------------------------------------------------
-r200 | lh3lh3 | 2009-04-02 10:14:56 -0400 (Thu, 02 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_stat.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-20
- * check if file is truncated in flagstat
-
-------------------------------------------------------------------------
-r199 | lh3lh3 | 2009-04-02 10:09:10 -0400 (Thu, 02 Apr 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-19
- * print the header if requested
-
-------------------------------------------------------------------------
-r193 | lh3lh3 | 2009-03-27 11:09:50 -0400 (Fri, 27 Mar 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-18
- * fixed a minor bug reported by Nils Homer
-
-------------------------------------------------------------------------
-r185 | lh3lh3 | 2009-03-24 07:50:32 -0400 (Tue, 24 Mar 2009) | 2 lines
-Changed paths:
- A /trunk/samtools/Makefile (from /trunk/samtools/Makefile.std:184)
- D /trunk/samtools/Makefile.std
- A /trunk/samtools/misc/Makefile (from /trunk/samtools/misc/Makefile.std:184)
- D /trunk/samtools/misc/Makefile.std
-
-rename Makefile.std as Makefile. GNU building systerm is not ready and may take some time...
-
-------------------------------------------------------------------------
-r184 | lh3lh3 | 2009-03-24 06:36:38 -0400 (Tue, 24 Mar 2009) | 4 lines
-Changed paths:
- D /trunk/samtools/Makefile
- A /trunk/samtools/Makefile.std (from /trunk/samtools/Makefile:183)
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bam_tview.c
- M /trunk/samtools/bamtk.c
- D /trunk/samtools/misc/Makefile
- A /trunk/samtools/misc/Makefile.std (from /trunk/samtools/misc/Makefile:182)
- M /trunk/samtools/samtools.1
-
- * samtools-0.1.2-17
- * incorporating Nils' changes
- * rename Makefile to Makefile.std and prepare to add the GNU building systerms (also by Nils)
-
-------------------------------------------------------------------------
-r183 | lh3lh3 | 2009-03-24 06:30:23 -0400 (Tue, 24 Mar 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_maqcns.h
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/kseq.h
- A /trunk/samtools/kstring.c
- A /trunk/samtools/kstring.h
-
- * samtools-0.1.2-16
- * made pileup take a list of proposed indels. An insertion is N at the moment.
- * added my kstring library for a bit complex parsing of the position list.
-
-------------------------------------------------------------------------
-r169 | lh3lh3 | 2009-03-12 09:40:14 -0400 (Thu, 12 Mar 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/soap2sam.pl
-
- * soap2sam.pl-0.1.2
- * more robust to truncated soap output
-
-------------------------------------------------------------------------
-r168 | lh3lh3 | 2009-03-11 06:49:00 -0400 (Wed, 11 Mar 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile.lite
-
-added bam_stat.o to Makefile.lite
-
-------------------------------------------------------------------------
-r167 | lh3lh3 | 2009-03-10 18:11:31 -0400 (Tue, 10 Mar 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-15
- * generate RMS of mapQ instead of max mapQ
-
-------------------------------------------------------------------------
-r166 | lh3lh3 | 2009-03-10 18:06:45 -0400 (Tue, 10 Mar 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/glf.c
- M /trunk/samtools/glf.h
- M /trunk/samtools/misc/Makefile
-
- * samtools-0.1.2-14
- * implemented GLFv3
-
-------------------------------------------------------------------------
-r159 | lh3lh3 | 2009-03-03 06:26:08 -0500 (Tue, 03 Mar 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-13
- * fixed a minor bug in displaying pileup
-
-------------------------------------------------------------------------
-r158 | lh3lh3 | 2009-03-03 06:24:16 -0500 (Tue, 03 Mar 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-12
- * optionally print SAM header
-
-------------------------------------------------------------------------
-r153 | lh3lh3 | 2009-03-02 05:45:28 -0500 (Mon, 02 Mar 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/glf.c
-
- * samtools-0.1.2-11
- * use "GLF\3" as the magic for GLFv3 files
-
-------------------------------------------------------------------------
-r152 | lh3lh3 | 2009-03-02 05:39:09 -0500 (Mon, 02 Mar 2009) | 5 lines
-Changed paths:
- M /trunk/samtools/Makefile
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/glf.c
- M /trunk/samtools/glf.h
-
- * samtools-0.1.2-10
- * fixed a bug in import: core.bin is undefined for unmapped reads
- * this bug can be alleviated (not completely solved) in bam_index.c
- * update to GLFv3: pos is changed to offset for better compression
-
-------------------------------------------------------------------------
-r151 | lh3lh3 | 2009-03-01 10:18:43 -0500 (Sun, 01 Mar 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/wgsim.c
-
- * wgsim-0.2.3
- * fixed a bug in simulating indels
-
-------------------------------------------------------------------------
-r145 | lh3lh3 | 2009-02-26 14:43:57 -0500 (Thu, 26 Feb 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/misc/wgsim.c
-
- * wgsim-0.2.2
- * allow to print mismatch information as fastq comment. MAQ does
- not like long read names.
-
-------------------------------------------------------------------------
-r141 | lh3lh3 | 2009-02-26 09:53:03 -0500 (Thu, 26 Feb 2009) | 6 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/misc/wgsim.c
- M /trunk/samtools/misc/wgsim_eval.pl
-
- * wgsim-0.2.1
- * fixed a bug about color read coordinates
- * fixed a bug in read names
- * wgsim_eval.pl-0.1.3
- * make the script work with color reads
-
-------------------------------------------------------------------------
-r140 | lh3lh3 | 2009-02-26 09:02:57 -0500 (Thu, 26 Feb 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/Makefile
- M /trunk/samtools/misc/wgsim.c
-
- * wgsim: added a note
-
-------------------------------------------------------------------------
-r139 | lh3lh3 | 2009-02-26 06:39:08 -0500 (Thu, 26 Feb 2009) | 7 lines
-Changed paths:
- M /trunk/samtools/misc/wgsim.c
- M /trunk/samtools/misc/wgsim_eval.pl
-
- * wgsim-0.2.0
- * considerable code clean up
- * print number of substitutions/indels/errors on each read
- * potentially support SOLiD simulation, though not tested at the moment
- * wgsim_eval.pl-0.1.2
- * change in accordant with wgsim
-
-------------------------------------------------------------------------
-r129 | lh3lh3 | 2009-02-18 17:23:27 -0500 (Wed, 18 Feb 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-9
- * fixed a bug in bam_fetch, caused by completely contained adjacent chunks
-
-------------------------------------------------------------------------
-r128 | bhandsaker | 2009-02-18 14:06:57 -0500 (Wed, 18 Feb 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bamtk.c
-
-Fix annoying segv when invalid region specified.
-
-------------------------------------------------------------------------
-r127 | lh3lh3 | 2009-02-17 05:49:55 -0500 (Tue, 17 Feb 2009) | 2 lines
-Changed paths:
- D /trunk/samtools/misc/indel_filter.pl
- A /trunk/samtools/misc/samtools.pl
-
- * move indel_filter.pl to samtools.pl
-
-------------------------------------------------------------------------
-r126 | lh3lh3 | 2009-02-14 16:22:30 -0500 (Sat, 14 Feb 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_mate.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-7
- * fixed a bug in fixmate: SE reads are flagged as BAM_FMUNMAP
-
-------------------------------------------------------------------------
-r125 | lh3lh3 | 2009-02-13 04:54:45 -0500 (Fri, 13 Feb 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_stat.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-7
- * fixed a minor bug in flagstat
-
-------------------------------------------------------------------------
-r124 | lh3lh3 | 2009-02-12 06:15:32 -0500 (Thu, 12 Feb 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/misc/indel_filter.pl
-
- * samtools-0.1.2-6
- * improve indel caller by setting maximum window size
-
-------------------------------------------------------------------------
-r123 | lh3lh3 | 2009-02-12 05:30:29 -0500 (Thu, 12 Feb 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bamtk.c
-
- * output max mapping quality in indel line
-
-------------------------------------------------------------------------
-r122 | lh3lh3 | 2009-02-11 05:59:10 -0500 (Wed, 11 Feb 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/misc/maq2sam.c
-
-fixed a bug in generating tag AM
-
-------------------------------------------------------------------------
-r121 | lh3lh3 | 2009-02-03 05:43:11 -0500 (Tue, 03 Feb 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bamtk.c
-
-fixed a potential memory problem in indexing
-
-------------------------------------------------------------------------
-r120 | bhandsaker | 2009-02-02 10:52:52 -0500 (Mon, 02 Feb 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/Makefile
-
-Pass LIBS to recursive targets to facilitate building at Broad.
-
-------------------------------------------------------------------------
-r119 | lh3lh3 | 2009-02-02 05:12:15 -0500 (Mon, 02 Feb 2009) | 4 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/bam_plcmd.c
- M /trunk/samtools/bam_stat.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-3
- * fixed a bug in generating GLFv2 for indels
- * improve flagstat report a little bit
-
-------------------------------------------------------------------------
-r118 | lh3lh3 | 2009-01-29 07:33:23 -0500 (Thu, 29 Jan 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/Makefile
- A /trunk/samtools/bam_stat.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.2-1
- * added flagstat command
-
-------------------------------------------------------------------------
-r116 | lh3lh3 | 2009-01-28 08:31:12 -0500 (Wed, 28 Jan 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/NEWS
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/samtools.1
-
-Release SAMtools-0.1.2
-
-------------------------------------------------------------------------
-r115 | lh3lh3 | 2009-01-28 07:54:08 -0500 (Wed, 28 Jan 2009) | 2 lines
-Changed paths:
- A /trunk/samtools/misc/indel_filter.pl
-
-Script for filtering indel results
-
-------------------------------------------------------------------------
-r114 | lh3lh3 | 2009-01-25 06:45:37 -0500 (Sun, 25 Jan 2009) | 2 lines
-Changed paths:
- A /trunk/samtools/misc/zoom2sam.pl
-
-convert ZOOM to SAM
-
-------------------------------------------------------------------------
-r113 | lh3lh3 | 2009-01-24 09:25:07 -0500 (Sat, 24 Jan 2009) | 2 lines
-Changed paths:
- A /trunk/samtools/misc/novo2sam.pl
-
-add a script to convert novo alignment to SAM
-
-------------------------------------------------------------------------
-r112 | lh3lh3 | 2009-01-23 15:57:39 -0500 (Fri, 23 Jan 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/ChangeLog
- M /trunk/samtools/ChangeLog.old
- M /trunk/samtools/samtools.1
-
-update documentation and ChangeLog
-
-------------------------------------------------------------------------
-r111 | lh3lh3 | 2009-01-23 14:22:59 -0500 (Fri, 23 Jan 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
-
- * samtools-0.1.1-19
- * fixed a bug in "merge" command line
-
-------------------------------------------------------------------------
-r110 | lh3lh3 | 2009-01-22 10:36:48 -0500 (Thu, 22 Jan 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/misc/Makefile
- A /trunk/samtools/misc/bowtie2sam.pl (from /branches/dev/samtools/misc/bowtie2sam.pl:108)
- M /trunk/samtools/misc/export2sam.pl
- A /trunk/samtools/misc/soap2sam.pl (from /branches/dev/samtools/misc/soap2sam.pl:108)
- A /trunk/samtools/misc/wgsim.c (from /branches/dev/samtools/misc/wgsim.c:108)
- A /trunk/samtools/misc/wgsim_eval.pl (from /branches/dev/samtools/misc/wgsim_eval.pl:108)
-
- * merge from branches/dev/
- * all future development will happen here
-
-------------------------------------------------------------------------
-r109 | lh3lh3 | 2009-01-22 10:14:27 -0500 (Thu, 22 Jan 2009) | 3 lines
-Changed paths:
- M /trunk/samtools/COPYING
- M /trunk/samtools/ChangeLog
- A /trunk/samtools/INSTALL (from /branches/dev/samtools/INSTALL:108)
- M /trunk/samtools/Makefile
- A /trunk/samtools/Makefile.lite (from /branches/dev/samtools/Makefile.lite:108)
- M /trunk/samtools/bam.c
- M /trunk/samtools/bam.h
- M /trunk/samtools/bam_import.c
- M /trunk/samtools/bam_index.c
- M /trunk/samtools/bam_lpileup.c
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_maqcns.h
- A /trunk/samtools/bam_mate.c (from /branches/dev/samtools/bam_mate.c:108)
- M /trunk/samtools/bam_pileup.c
- M /trunk/samtools/bam_plcmd.c
- A /trunk/samtools/bam_rmdup.c (from /branches/dev/samtools/bam_rmdup.c:108)
- M /trunk/samtools/bam_sort.c
- M /trunk/samtools/bamtk.c
- M /trunk/samtools/bgzf.h
- M /trunk/samtools/examples/00README.txt
- A /trunk/samtools/examples/Makefile (from /branches/dev/samtools/examples/Makefile:108)
- D /trunk/samtools/examples/ex1.fa.fai
- M /trunk/samtools/examples/ex1.sam.gz
- M /trunk/samtools/faidx.c
- A /trunk/samtools/glf.c (from /branches/dev/samtools/glf.c:108)
- M /trunk/samtools/glf.h
- M /trunk/samtools/misc/Makefile
- M /trunk/samtools/misc/maq2sam.c
- M /trunk/samtools/razf.c
- M /trunk/samtools/source.dot
-
- * Merge from branches/dev/
- * all future development will happen here at trunk/
-
-------------------------------------------------------------------------
-r79 | bhandsaker | 2009-01-07 16:42:15 -0500 (Wed, 07 Jan 2009) | 2 lines
-Changed paths:
- M /trunk/samtools/bam_maqcns.c
- M /trunk/samtools/bam_tview.c
-
-Fix problem with compiling without curses.
-
-------------------------------------------------------------------------
-r63 | lh3lh3 | 2008-12-22 10:58:02 -0500 (Mon, 22 Dec 2008) | 2 lines
-Changed paths:
- A /trunk/samtools (from /branches/dev/samtools:62)
-
-Create trunk copy
-
-------------------------------------------------------------------------
-r62 | lh3lh3 | 2008-12-22 10:55:13 -0500 (Mon, 22 Dec 2008) | 2 lines
-Changed paths:
- A /branches/dev/samtools/NEWS
- M /branches/dev/samtools/bamtk.c
- M /branches/dev/samtools/samtools.1
-
-Release samtools-0.1.1
-
-------------------------------------------------------------------------
-r61 | lh3lh3 | 2008-12-22 10:46:08 -0500 (Mon, 22 Dec 2008) | 10 lines
-Changed paths:
- M /branches/dev/samtools/bam_aux.c
- M /branches/dev/samtools/bam_index.c
- M /branches/dev/samtools/bam_plcmd.c
- M /branches/dev/samtools/bam_tview.c
- M /branches/dev/samtools/bamtk.c
- M /branches/dev/samtools/razf.c
- M /branches/dev/samtools/samtools.1
-
- * samtools-0.1.0-66
- * fixed a bug in razf.c: reset z_eof when razf_seek() is called
- * fixed a memory leak in parsing a region
- * changed pileup a little bit when -s is in use: output ^ and $
- * when a bam is not indexed, output more meaningful error message
- * fixed a bug in indexing for small alignment
- * fixed a bug in the viewer when we come to the end of a reference file
- * updated documentation
- * prepare to release 0.1.1
-
-------------------------------------------------------------------------
-r60 | lh3lh3 | 2008-12-22 10:10:16 -0500 (Mon, 22 Dec 2008) | 2 lines
-Changed paths:
- A /branches/dev/samtools/examples
- A /branches/dev/samtools/examples/00README.txt
- A /branches/dev/samtools/examples/ex1.fa
- A /branches/dev/samtools/examples/ex1.fa.fai
- A /branches/dev/samtools/examples/ex1.sam.gz
-
-example
-
-------------------------------------------------------------------------
-r59 | lh3lh3 | 2008-12-22 04:38:15 -0500 (Mon, 22 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/ChangeLog
-
-update ChangeLog
-
-------------------------------------------------------------------------
-r58 | lh3lh3 | 2008-12-20 18:06:00 -0500 (Sat, 20 Dec 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/misc/export2sam.pl
-
- * added comments
- * fixed several bugs
-
-------------------------------------------------------------------------
-r57 | lh3lh3 | 2008-12-20 10:44:20 -0500 (Sat, 20 Dec 2008) | 2 lines
-Changed paths:
- A /branches/dev/samtools/misc/export2sam.pl
-
-convert Export format to SAM; not thoroughly tested
-
-------------------------------------------------------------------------
-r56 | lh3lh3 | 2008-12-19 17:13:28 -0500 (Fri, 19 Dec 2008) | 6 lines
-Changed paths:
- M /branches/dev/samtools/bam_import.c
- M /branches/dev/samtools/bam_plcmd.c
- M /branches/dev/samtools/bam_tview.c
- M /branches/dev/samtools/bamtk.c
- A /branches/dev/samtools/source.dot
-
- * samtools-0.1.0-65
- * pileup: generate maq-like simple output
- * pileup: allow to output pileup at required sites
- * source.dot: source file relationship graph
- * tview: fixed a minor bug
-
-------------------------------------------------------------------------
-r55 | lh3lh3 | 2008-12-19 15:10:26 -0500 (Fri, 19 Dec 2008) | 2 lines
-Changed paths:
- D /branches/dev/samtools/misc/all2sam.pl
-
-remove all2sam.pl
-
-------------------------------------------------------------------------
-r54 | lh3lh3 | 2008-12-16 17:34:25 -0500 (Tue, 16 Dec 2008) | 2 lines
-Changed paths:
- A /branches/dev/samtools/COPYING
- M /branches/dev/samtools/bam.h
- M /branches/dev/samtools/faidx.h
- M /branches/dev/samtools/khash.h
- M /branches/dev/samtools/kseq.h
- M /branches/dev/samtools/ksort.h
- M /branches/dev/samtools/samtools.1
-
-Added copyright information and a bit more documentation. No code change.
-
-------------------------------------------------------------------------
-r53 | lh3lh3 | 2008-12-16 08:40:18 -0500 (Tue, 16 Dec 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/bam.c
- M /branches/dev/samtools/bam.h
- M /branches/dev/samtools/bam_index.c
- M /branches/dev/samtools/bam_maqcns.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-64
- * improved efficiency of the indel caller for spliced alignments
-
-------------------------------------------------------------------------
-r52 | lh3lh3 | 2008-12-16 05:28:20 -0500 (Tue, 16 Dec 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/bam.c
- M /branches/dev/samtools/bam.h
- M /branches/dev/samtools/bam_aux.c
- M /branches/dev/samtools/bam_index.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-63
- * a bit code cleanup: reduce the dependency between source files
-
-------------------------------------------------------------------------
-r51 | lh3lh3 | 2008-12-15 09:29:32 -0500 (Mon, 15 Dec 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/bam_maqcns.c
- M /branches/dev/samtools/bam_plcmd.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-62
- * fixed a memory leak
-
-------------------------------------------------------------------------
-r50 | lh3lh3 | 2008-12-15 09:00:13 -0500 (Mon, 15 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/ChangeLog
- M /branches/dev/samtools/bam.h
- M /branches/dev/samtools/samtools.1
-
-update documentation, ChangeLog and a comment
-
-------------------------------------------------------------------------
-r49 | lh3lh3 | 2008-12-15 08:36:43 -0500 (Mon, 15 Dec 2008) | 6 lines
-Changed paths:
- M /branches/dev/samtools/Makefile
- M /branches/dev/samtools/bam.h
- M /branches/dev/samtools/bam_maqcns.c
- M /branches/dev/samtools/bam_maqcns.h
- M /branches/dev/samtools/bam_pileup.c
- A /branches/dev/samtools/bam_plcmd.c
- M /branches/dev/samtools/bamtk.c
- M /branches/dev/samtools/samtools.1
-
- * samtools-0.1.0-61
- * moved pileup command to a separate source file
- * added indel caller
- * added bam_cal_segend(). (NOT WORKING for spliced alignment!!!)
- * updated documentation
-
-------------------------------------------------------------------------
-r48 | lh3lh3 | 2008-12-12 08:55:36 -0500 (Fri, 12 Dec 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/bam_maqcns.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-60
- * fixed another bug in maqcns when there is a nearby deletion
-
-------------------------------------------------------------------------
-r47 | lh3lh3 | 2008-12-12 08:42:16 -0500 (Fri, 12 Dec 2008) | 5 lines
-Changed paths:
- M /branches/dev/samtools/bam_maqcns.c
- M /branches/dev/samtools/bam_pileup.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-59
- * pileup: outputing consensus is now optional
- * fixed a bug in glfgen. This bug also exists in maq's glfgen. However,
- I am not quite sure why the previous version may have problem.
-
-------------------------------------------------------------------------
-r46 | lh3lh3 | 2008-12-12 06:44:56 -0500 (Fri, 12 Dec 2008) | 6 lines
-Changed paths:
- M /branches/dev/samtools/bam_pileup.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-58
- * add maq consensus to pileup. However, I will move this part to a new
- command as strictly speaking, consensus callin is not part of pileup,
- and imposing it would make it harder to generate for other language
- bindings.
-
-------------------------------------------------------------------------
-r45 | bhandsaker | 2008-12-11 15:43:56 -0500 (Thu, 11 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/bgzf.c
-
-Fix bug in tell() after reads that consume to the exact end of a block.
-
-------------------------------------------------------------------------
-r44 | lh3lh3 | 2008-12-11 04:36:53 -0500 (Thu, 11 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/samtools.1
-
-update manual
-
-------------------------------------------------------------------------
-r43 | lh3lh3 | 2008-12-11 04:25:36 -0500 (Thu, 11 Dec 2008) | 4 lines
-Changed paths:
- M /branches/dev/samtools/bam_import.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-57
- * fixed a bug in parser when there is auxiliary fields
- * made the parser a bit more robust
-
-------------------------------------------------------------------------
-r42 | lh3lh3 | 2008-12-10 09:57:29 -0500 (Wed, 10 Dec 2008) | 5 lines
-Changed paths:
- M /branches/dev/samtools/bam_index.c
- M /branches/dev/samtools/bamtk.c
- M /branches/dev/samtools/bgzf.c
-
- * samtools-0.1.0-56
- * fixed a bug in bgzf (only reading is affected)
- * fixed a typo in bam_index.c
- * in bam_index.c, check potential bugs in the underlying I/O library
-
-------------------------------------------------------------------------
-r41 | lh3lh3 | 2008-12-10 07:53:08 -0500 (Wed, 10 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/samtools.1
-
-update manual
-
-------------------------------------------------------------------------
-r40 | lh3lh3 | 2008-12-10 06:52:10 -0500 (Wed, 10 Dec 2008) | 5 lines
-Changed paths:
- M /branches/dev/samtools/bam.h
- M /branches/dev/samtools/bam_pileup.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-55
- * tried to make pileup work with clipping (previously not), though NOT tested
- * removed -v from pileup
- * made pileup take the reference sequence
-
-------------------------------------------------------------------------
-r39 | lh3lh3 | 2008-12-09 06:59:28 -0500 (Tue, 09 Dec 2008) | 4 lines
-Changed paths:
- M /branches/dev/samtools/bam_import.c
- M /branches/dev/samtools/bamtk.c
- M /branches/dev/samtools/samtools.1
-
- * samtools-0.1.0-54
- * in parser, recognize "=", rather than ",", as a match
- * in parser, correctl parse "=" at the MRNM field.
-
-------------------------------------------------------------------------
-r38 | lh3lh3 | 2008-12-09 06:39:07 -0500 (Tue, 09 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/misc/maq2sam.c
-
-fixed a bug in handling maq flag 64 and 192
-
-------------------------------------------------------------------------
-r37 | lh3lh3 | 2008-12-09 04:53:46 -0500 (Tue, 09 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/misc/md5fa.c
-
-also calculate unordered md5sum check
-
-------------------------------------------------------------------------
-r36 | lh3lh3 | 2008-12-09 04:46:21 -0500 (Tue, 09 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/misc/md5fa.c
-
-fixed a minor bug when there are space in the sequence
-
-------------------------------------------------------------------------
-r35 | lh3lh3 | 2008-12-09 04:40:45 -0500 (Tue, 09 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/misc/md5fa.c
-
-fixed a potential memory leak
-
-------------------------------------------------------------------------
-r34 | lh3lh3 | 2008-12-08 09:52:17 -0500 (Mon, 08 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/bam_import.c
- M /branches/dev/samtools/bam_index.c
- M /branches/dev/samtools/bamtk.c
-
- * fixed a bug in import: bin is wrongly calculated
-
-------------------------------------------------------------------------
-r33 | lh3lh3 | 2008-12-08 09:08:01 -0500 (Mon, 08 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/misc/all2sam.pl
-
-nothing, really
-
-------------------------------------------------------------------------
-r32 | lh3lh3 | 2008-12-08 07:56:02 -0500 (Mon, 08 Dec 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/Makefile
- M /branches/dev/samtools/kseq.h
- M /branches/dev/samtools/misc/Makefile
- A /branches/dev/samtools/misc/md5.c
- A /branches/dev/samtools/misc/md5.h
- A /branches/dev/samtools/misc/md5fa.c
-
- * fixed two warnings in kseq.h
- * added md5sum utilities
-
-------------------------------------------------------------------------
-r31 | lh3lh3 | 2008-12-08 06:35:29 -0500 (Mon, 08 Dec 2008) | 5 lines
-Changed paths:
- M /branches/dev/samtools/Makefile
- M /branches/dev/samtools/bam_import.c
- M /branches/dev/samtools/bamtk.c
- A /branches/dev/samtools/kseq.h
- D /branches/dev/samtools/kstream.h
-
- * samtools-0.1.0-52
- * replace kstream with kseq. kseq is a superset of kstream. I need the
- extra functions in kseq.h.
- * also compile stand-alone faidx
-
-------------------------------------------------------------------------
-r30 | lh3lh3 | 2008-12-08 06:17:04 -0500 (Mon, 08 Dec 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/bam.h
- M /branches/dev/samtools/bam_sort.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-51
- * sorting by read names is available
-
-------------------------------------------------------------------------
-r29 | lh3lh3 | 2008-12-08 05:29:02 -0500 (Mon, 08 Dec 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/bam.c
- M /branches/dev/samtools/bam.h
- M /branches/dev/samtools/bam_import.c
- M /branches/dev/samtools/bam_maqcns.c
- M /branches/dev/samtools/bam_pileup.c
- M /branches/dev/samtools/bam_sort.c
- M /branches/dev/samtools/bam_tview.c
- M /branches/dev/samtools/bamtk.c
- M /branches/dev/samtools/misc/maq2sam.c
-
- * samtools-0.1.0-50
- * format change to meet the latest specification
-
-------------------------------------------------------------------------
-r28 | lh3lh3 | 2008-12-04 11:09:21 -0500 (Thu, 04 Dec 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/bam_maqcns.c
- M /branches/dev/samtools/misc/maq2sam.c
-
- * minor change in maqcns: special care when n==0
- * change maq2sam to meet the latest specification
-
-------------------------------------------------------------------------
-r27 | lh3lh3 | 2008-12-04 10:55:44 -0500 (Thu, 04 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/razf.c
- M /branches/dev/samtools/razf.h
-
-considerable code clean up in razf
-
-------------------------------------------------------------------------
-r26 | lh3lh3 | 2008-12-04 10:08:18 -0500 (Thu, 04 Dec 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/ChangeLog
- M /branches/dev/samtools/Makefile
- M /branches/dev/samtools/faidx.c
-
-make RAZF optional in faidx.c
-
-------------------------------------------------------------------------
-r25 | lh3lh3 | 2008-12-01 10:27:22 -0500 (Mon, 01 Dec 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/Makefile
- M /branches/dev/samtools/bam.h
- M /branches/dev/samtools/bam_aux.c
- M /branches/dev/samtools/bamtk.c
- M /branches/dev/samtools/samtools.1
-
- * samtools-0.1.0-49
- * added routines for retrieving aux data, NOT TESTED YET!
-
-------------------------------------------------------------------------
-r24 | lh3lh3 | 2008-12-01 09:29:43 -0500 (Mon, 01 Dec 2008) | 5 lines
-Changed paths:
- M /branches/dev/samtools/bam.c
- M /branches/dev/samtools/bam_import.c
- M /branches/dev/samtools/bam_maqcns.c
- M /branches/dev/samtools/bamtk.c
- M /branches/dev/samtools/bgzf.c
- M /branches/dev/samtools/samtools.1
-
- * samtools-0.1.0-48
- * bgzf: fixed a potential integer overflow on 32-it machines
- * maqcns: set the minimum combined quality as 0
- * supporting hex strings
-
-------------------------------------------------------------------------
-r23 | lh3lh3 | 2008-11-27 12:14:37 -0500 (Thu, 27 Nov 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/bam_maqcns.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-47
- * fixed the bug in maqcns
-
-------------------------------------------------------------------------
-r22 | lh3lh3 | 2008-11-27 12:08:11 -0500 (Thu, 27 Nov 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/Makefile
- M /branches/dev/samtools/bam.h
- A /branches/dev/samtools/bam_maqcns.c
- A /branches/dev/samtools/bam_maqcns.h
- M /branches/dev/samtools/bam_tview.c
- M /branches/dev/samtools/bamtk.c
- A /branches/dev/samtools/glf.h
-
- * samtools-0.1.0-46
- * add MAQ consensus caller, currently BUGGY!
-
-------------------------------------------------------------------------
-r21 | lh3lh3 | 2008-11-27 08:51:28 -0500 (Thu, 27 Nov 2008) | 4 lines
-Changed paths:
- M /branches/dev/samtools/bam_pileup.c
- M /branches/dev/samtools/bam_tview.c
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-45
- * tview: display padded alignment (but not P operation)
- * better coordinates and reference sequence
-
-------------------------------------------------------------------------
-r19 | lh3lh3 | 2008-11-27 04:26:05 -0500 (Thu, 27 Nov 2008) | 2 lines
-Changed paths:
- A /branches/dev/samtools/ChangeLog
-
-new ChangeLog
-
-------------------------------------------------------------------------
-r18 | lh3lh3 | 2008-11-27 04:24:45 -0500 (Thu, 27 Nov 2008) | 3 lines
-Changed paths:
- D /branches/dev/samtools/ChangeLog
- A /branches/dev/samtools/ChangeLog.old (from /branches/dev/samtools/ChangeLog:6)
-
-Rename ChangeLog to ChangeLog.old. This old ChangeLog is generated from
-the log of my personal SVN repository.
-
-------------------------------------------------------------------------
-r17 | lh3lh3 | 2008-11-27 04:22:55 -0500 (Thu, 27 Nov 2008) | 6 lines
-Changed paths:
- M /branches/dev/samtools/Makefile
- M /branches/dev/samtools/bamtk.c
- M /branches/dev/samtools/bgzf.c
-
- * samtools-0.1.0-44
- * declare fseeko and ftello as some Linux may not do this by default and
- missing these declarations will make bgzf buggy
- * get rid of some harmless warings
- * use BGZF by default, now
-
-------------------------------------------------------------------------
-r16 | lh3lh3 | 2008-11-26 16:19:11 -0500 (Wed, 26 Nov 2008) | 4 lines
-Changed paths:
- M /branches/dev/samtools/bam_index.c
- M /branches/dev/samtools/bamtk.c
- M /branches/dev/samtools/razf.c
-
- * samtools-0.1.0-43
- * fixed a bug in razf_read()
- * give more warnings when the file is truncated (or due to bugs in I/O library)
-
-------------------------------------------------------------------------
-r15 | lh3lh3 | 2008-11-26 15:41:39 -0500 (Wed, 26 Nov 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/bgzf.c
-
-fixed a bug in bgzf.c at the end of the file
-
-------------------------------------------------------------------------
-r14 | lh3lh3 | 2008-11-26 12:05:18 -0500 (Wed, 26 Nov 2008) | 4 lines
-Changed paths:
- M /branches/dev/samtools/bamtk.c
-
- * samtools-0.1.0-42
- * a lot happened to RAZF, although samtools itself is untouched. Better
- also update the version number anyway to avoid confusion
-
-------------------------------------------------------------------------
-r13 | lh3lh3 | 2008-11-26 12:03:48 -0500 (Wed, 26 Nov 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/razf.c
-
-a change from Jue, but I think it should not matter
-
-------------------------------------------------------------------------
-r12 | lh3lh3 | 2008-11-26 11:48:14 -0500 (Wed, 26 Nov 2008) | 3 lines
-Changed paths:
- M /branches/dev/samtools/razf.c
-
-fixed a potential bug in razf. However, it seems still buggy, just
-rarely happens, very rarely.
-
-------------------------------------------------------------------------
-r11 | lh3lh3 | 2008-11-26 09:02:56 -0500 (Wed, 26 Nov 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/razf.c
-
-fixed a bug in razf, with the help of Jue
-
-------------------------------------------------------------------------
-r10 | lh3lh3 | 2008-11-26 06:55:32 -0500 (Wed, 26 Nov 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/bam_index.c
-
-remove a comment
-
-------------------------------------------------------------------------
-r9 | lh3lh3 | 2008-11-26 06:37:05 -0500 (Wed, 26 Nov 2008) | 2 lines
-Changed paths:
- M /branches/dev/samtools/Makefile
- M /branches/dev/samtools/bam.h
- M /branches/dev/samtools/razf.c
- M /branches/dev/samtools/razf.h
-
- * Jue has updated razf to realize Bob's scheme
-
-------------------------------------------------------------------------
-r7 | lh3lh3 | 2008-11-25 15:37:37 -0500 (Tue, 25 Nov 2008) | 2 lines
-Changed paths:
- A /branches/dev/samtools/samtools.1
-
-the manual page
-
-------------------------------------------------------------------------
-r6 | lh3lh3 | 2008-11-25 15:37:16 -0500 (Tue, 25 Nov 2008) | 3 lines
-Changed paths:
- A /branches/dev/samtools/ChangeLog
- A /branches/dev/samtools/Makefile
- A /branches/dev/samtools/bam.c
- A /branches/dev/samtools/bam.h
- A /branches/dev/samtools/bam_aux.c
- A /branches/dev/samtools/bam_endian.h
- A /branches/dev/samtools/bam_import.c
- A /branches/dev/samtools/bam_index.c
- A /branches/dev/samtools/bam_lpileup.c
- A /branches/dev/samtools/bam_pileup.c
- A /branches/dev/samtools/bam_sort.c
- A /branches/dev/samtools/bam_tview.c
- A /branches/dev/samtools/bamtk.c
- A /branches/dev/samtools/bgzf.c
- A /branches/dev/samtools/bgzf.h
- A /branches/dev/samtools/bgzip.c
- A /branches/dev/samtools/faidx.c
- A /branches/dev/samtools/faidx.h
- A /branches/dev/samtools/khash.h
- A /branches/dev/samtools/ksort.h
- A /branches/dev/samtools/kstream.h
- A /branches/dev/samtools/misc
- A /branches/dev/samtools/misc/Makefile
- A /branches/dev/samtools/misc/all2sam.pl
- A /branches/dev/samtools/misc/maq2sam.c
- A /branches/dev/samtools/razf.c
- A /branches/dev/samtools/razf.h
- A /branches/dev/samtools/razip.c
- A /branches/dev/samtools/zutil.h
-
-The initial version of samtools, replicated from my local SVN repository.
-The current version is: 0.1.0-42. All future development will happen here.
-
-------------------------------------------------------------------------
-r5 | lh3lh3 | 2008-11-25 15:30:49 -0500 (Tue, 25 Nov 2008) | 2 lines
-Changed paths:
- A /branches/dev/samtools
-
-samtools (C version)
-
-------------------------------------------------------------------------
diff --git a/src/samtools-0.1.18/INSTALL b/src/samtools-0.1.18/INSTALL
deleted file mode 100644
index 37d84a9..0000000
--- a/src/samtools-0.1.18/INSTALL
+++ /dev/null
@@ -1,30 +0,0 @@
-System Requirements
-===================
-
-SAMtools depends on the zlib library <http://www.zlib.net>. Version 1.2.3+ is
-preferred and with 1.2.3+ you can compile razip and use it to compress a FASTA
-file. SAMtools' faidx is able to index a razip-compressed FASTA file to save
-diskspace. Older zlib also works with SAMtools, but razip cannot be compiled.
-
-The text-based viewer (tview) requires the GNU ncurses library
-<http://www.gnu.org/software/ncurses/>, which comes with Mac OS X and most of
-the modern Linux/Unix distributions. If you do not have this library installed,
-you can still compile the rest of SAMtools by manually changing:
-`-D_CURSES_LIB=1' to `-D_CURSES_LIB=0' at the line starting with `DFLAGS=', and
-comment out the line starting with `LIBCURSES='.
-
-
-Compilation
-===========
-
-Type `make' to compile samtools. If you have zlib >= 1.2.2.1, you can compile
-razip with `make razip'.
-
-
-Installation
-============
-
-Copy `samtools', `bcftools/bcftools' and other executables/scripts in `misc' to
-a location you want (e.g. a directory in your $PATH). You may also copy
-`samtools.1' and `bcftools/bcftools.1' to a directory in your $MANPATH such
-that the `man' command may find the manual.
diff --git a/src/samtools-0.1.18/Makefile b/src/samtools-0.1.18/Makefile
deleted file mode 100644
index 00927f8..0000000
--- a/src/samtools-0.1.18/Makefile
+++ /dev/null
@@ -1,93 +0,0 @@
-CC= gcc
-CFLAGS= -g -Wall -O2 #-m64 #-arch ppc
-DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_USE_KNETFILE -D_CURSES_LIB=0
-KNETFILE_O= knetfile.o
-LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \
- bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o bedidx.o \
- $(KNETFILE_O) bam_sort.o sam_header.o bam_reheader.o kprobaln.o bam_cat.o
-AOBJS= bam_tview.o bam_plcmd.o sam_view.o \
- bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \
- bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \
- cut_target.o phase.o bam2depth.o
-PROG= samtools_0.1.18
-INCLUDES= -I.
-SUBDIRS= . bcftools
-LIBPATH=
-LIBCURSES= -lcurses
-
-.SUFFIXES:.c .o
-
-.c.o:
- $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
-
-all-recur lib-recur clean-recur cleanlocal-recur install-recur:
- @target=`echo $@ | sed s/-recur//`; \
- wdir=`pwd`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- cd $$subdir; \
- $(MAKE) CC="$(CC)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \
- INCLUDES="$(INCLUDES)" LIBPATH="$(LIBPATH)" $$target || exit 1; \
- cd $$wdir; \
- done;
-
-all:$(PROG)
-
-.PHONY:all lib clean cleanlocal
-.PHONY:all-recur lib-recur clean-recur cleanlocal-recur install-recur
-
-lib:libbam.a
-
-libbam.a:$(LOBJS)
- $(AR) -csru $@ $(LOBJS)
-
-samtools_0.1.18:lib-recur $(AOBJS)
- $(CC) $(CFLAGS) -o $@ $(AOBJS) -Lbcftools $(LIBPATH) libbam.a -lbcf -lm -lz #$(LIBCURSES)
-
-razip:razip.o razf.o $(KNETFILE_O)
- $(CC) $(CFLAGS) -o $@ razf.o razip.o $(KNETFILE_O) -lz
-
-bgzip:bgzip.o bgzf.o $(KNETFILE_O)
- $(CC) $(CFLAGS) -o $@ bgzf.o bgzip.o $(KNETFILE_O) -lz
-
-razip.o:razf.h
-bam.o:bam.h razf.h bam_endian.h kstring.h sam_header.h
-sam.o:sam.h bam.h
-bam_import.o:bam.h kseq.h khash.h razf.h
-bam_pileup.o:bam.h razf.h ksort.h
-bam_plcmd.o:bam.h faidx.h bcftools/bcf.h bam2bcf.h
-bam_index.o:bam.h khash.h ksort.h razf.h bam_endian.h
-bam_lpileup.o:bam.h ksort.h
-bam_tview.o:bam.h faidx.h
-bam_sort.o:bam.h ksort.h razf.h
-bam_md.o:bam.h faidx.h
-sam_header.o:sam_header.h khash.h
-bcf.o:bcftools/bcf.h
-bam2bcf.o:bam2bcf.h errmod.h bcftools/bcf.h
-bam2bcf_indel.o:bam2bcf.h
-errmod.o:errmod.h
-phase.o:bam.h khash.h ksort.h
-bamtk.o:bam.h
-
-faidx.o:faidx.h razf.h khash.h
-faidx_main.o:faidx.h razf.h
-
-
-libbam.1.dylib-local:$(LOBJS)
- libtool -dynamic $(LOBJS) -o libbam.1.dylib -lc -lz
-
-libbam.so.1-local:$(LOBJS)
- $(CC) -shared -Wl,-soname,libbam.so -o libbam.so.1 $(LOBJS) -lc -lz
-
-dylib:
- @$(MAKE) cleanlocal; \
- case `uname` in \
- Linux) $(MAKE) CFLAGS="$(CFLAGS) -fPIC" libbam.so.1-local;; \
- Darwin) $(MAKE) CFLAGS="$(CFLAGS) -fPIC" libbam.1.dylib-local;; \
- *) echo 'Unknown OS';; \
- esac
-
-
-cleanlocal:
- rm -fr gmon.out *.o a.out *.exe *.dSYM razip bgzip $(PROG) *~ *.a *.so.* *.so *.dylib
-
-clean:cleanlocal-recur
diff --git a/src/samtools-0.1.18/Makefile.mingw b/src/samtools-0.1.18/Makefile.mingw
deleted file mode 100644
index 7a57ffc..0000000
--- a/src/samtools-0.1.18/Makefile.mingw
+++ /dev/null
@@ -1,63 +0,0 @@
-CC= gcc.exe
-AR= ar.exe
-CFLAGS= -g -Wall -O2
-DFLAGS= -D_USE_KNETFILE -D_CURSES_LIB=2
-KNETFILE_O= knetfile.o
-LOBJS= bgzf.o kstring.o bam_aux.o bam.o bam_import.o sam.o bam_index.o \
- bam_pileup.o bam_lpileup.o bam_md.o razf.o faidx.o \
- $(KNETFILE_O) bam_sort.o sam_header.o bam_reheader.o kprobaln.o bedidx.o
-AOBJS= bam_tview.o bam_plcmd.o sam_view.o \
- bam_rmdup.o bam_rmdupse.o bam_mate.o bam_stat.o bam_color.o \
- bamtk.o kaln.o bam2bcf.o bam2bcf_indel.o errmod.o sample.o \
- cut_target.o phase.o bam_cat.o bam2depth.o
-BCFOBJS= bcftools/bcf.o bcftools/fet.o bcftools/bcf2qcall.o bcftools/bcfutils.o \
- bcftools/call1.o bcftools/index.o bcftools/kfunc.o bcftools/em.o \
- bcftools/kmin.o bcftools/prob1.o bcftools/vcf.o bcftools/mut.o
-PROG= samtools.exe bcftools.exe
-INCLUDES= -I. -Iwin32
-SUBDIRS= .
-LIBPATH=
-
-.SUFFIXES:.c .o
-
-.c.o:
- $(CC) -c $(CFLAGS) $(DFLAGS) $(INCLUDES) $< -o $@
-
-all:$(PROG)
-
-.PHONY:all lib clean cleanlocal
-.PHONY:all-recur lib-recur clean-recur cleanlocal-recur install-recur
-
-lib:libbam.a
-
-libbam.a:$(LOBJS)
- $(AR) -cru $@ $(LOBJS)
-
-samtools.exe:$(AOBJS) libbam.a $(BCFOBJS)
- $(CC) $(CFLAGS) -o $@ $(AOBJS) $(BCFOBJS) $(LIBPATH) -lm -L. -lbam -Lwin32 -lz -lcurses -lws2_32
-
-bcftools.exe:$(BCFOBJS) bcftools/main.o kstring.o bgzf.o knetfile.o bedidx.o
- $(CC) $(CFLAGS) -o $@ $(BCFOBJS) bcftools/main.o kstring.o bgzf.o knetfile.o bedidx.o -lm -Lwin32 -lz -lws2_32
-
-razip.o:razf.h
-bam.o:bam.h razf.h bam_endian.h kstring.h sam_header.h
-sam.o:sam.h bam.h
-bam_import.o:bam.h kseq.h khash.h razf.h
-bam_pileup.o:bam.h razf.h ksort.h
-bam_plcmd.o:bam.h faidx.h bcftools/bcf.h bam2bcf.h
-bam_index.o:bam.h khash.h ksort.h razf.h bam_endian.h
-bam_lpileup.o:bam.h ksort.h
-bam_tview.o:bam.h faidx.h
-bam_sort.o:bam.h ksort.h razf.h
-bam_md.o:bam.h faidx.h
-sam_header.o:sam_header.h khash.h
-bcf.o:bcftools/bcf.h
-bam2bcf.o:bam2bcf.h errmod.h bcftools/bcf.h
-bam2bcf_indel.o:bam2bcf.h
-errmod.o:errmod.h
-
-faidx.o:faidx.h razf.h khash.h
-faidx_main.o:faidx.h razf.h
-
-clean:
- rm -fr gmon.out *.o a.out *.exe *.dSYM razip bgzip $(PROG) *~ *.a *.so.* *.so *.dylib
diff --git a/src/samtools-0.1.18/NEWS b/src/samtools-0.1.18/NEWS
deleted file mode 100644
index 41a6cc8..0000000
--- a/src/samtools-0.1.18/NEWS
+++ /dev/null
@@ -1,806 +0,0 @@
-Beta Release 0.1.18 (2 September, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Notable changes in samtools:
-
- * Support the new =/X CIGAR operators (by Peter Cock).
-
- * Allow to subsample BAM while keeping the pairing intact (view -s).
-
- * Implemented variant distance bias as a new filter (by Petr Danecek).
-
- * Bugfix: huge memory usage during indexing
-
- * Bugfix: use of uninitialized variable in mpileup (rare)
-
- * Bugfix: wrong BAQ probability (rare)
-
-Notable changes in bcftools:
-
- * Support indel in the contrast caller.
-
- * Bugfix: LRT2=nan in rare cases
-
-(0.1.18: 2 September 2011, r982:295)
-
-
-
-Beta Release 0.1.17 (6 July, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-With the maturity of `mpileup' and the lack of update in the `pileup' command,
-the `pileup' command is now formally dropped. Most of the pileup functionality,
-such as outputting mapping quality and read positions, have been added
-`mpileup'.
-
-Since this release, `bcftools view' is able to perform contrast SNP calling
-(option -T) for discovering de novo and/or somatic mutations between a pair of
-samples or in a family trio. Potential mutations are scored by a log likelihood
-ratio, which is very simple in math, but should be comparable to more
-sophisticated methods. Note that getting the score is only the very first step.
-A lot more need to be done to reduce systematical errors due to mapping and
-reference errors and structural variations.
-
-Other notable changes in samtools:
-
- * Improved sorting order checking during indexing.
-
- * Improved region parsing. Colons in reference sequence names are parsed
- properly.
-
- * Fixed an issue where mpileup does not apply BAQ for the first few reads when
- a region is specified.
-
- * Fixed an issue where `faidx' does not work with FASTA files with long lines.
-
- * Bugfix: wrong SP genotype information in the BCF output.
-
-Other notable changes in bcftools:
-
- * Output the ML esitmate of the allele count.
-
- * Added the HWE plus F<0 filter to varFilter. For multiple samples, it
- effectively filters false heterozygous calls around centromeres.
-
- * For association mapping, perform both 1-degree and 2-degree test. The
- 2-degree test is conservative but more robust to HWE violation.
-
-(0.1.17: 6 July 2011, r973:277)
-
-
-
-Beta Release 0.1.16 (21 April, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Notable changes in samtools:
-
- * Support the new SAM/BAM type `B' in the latest SAM spec v1.4.
-
- * When the output file of `samtools merge' exists, do not overwrite it unless
- a new command-line option `-f' is applied.
-
- * Bugfix: BED support is not working when the input BED is not sorted.
-
- * Bugfix: some reads without coordinates but given on the reverse strand are
- lost in merging.
-
-Notable changes in bcftools:
-
- * Code cleanup: separated max-likelihood inference and Bayesian inference.
-
- * Test Hardy-Weinberg equilibrium with a likelihood-ratio test.
-
- * Provided another association test P-value by likelihood-ratio test.
-
- * Use Brent's method to estimate the site allele frequency when EM converges
- slowly. The resulting ML estimate of allele frequnecy is more accurate.
-
- * Added the `ldpair' command, which computes r^2 between SNP pairs given in
- an input file.
-
-Also, the `pileup' command, which has been deprecated by `mpileup' since
-version 0.1.10, will be dropped in the next release. The old `pileup' command
-is substandard and causing a lot of confusion.
-
-(0.1.16: 21 April 2011, r963:234)
-
-
-
-Beta Release 0.1.15 (10 April, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Noteable changes:
-
- * Allow to perform variant calling or to extract information in multiple
- regions specified by a BED file (`samtools mpileup -l', `samtools view -L'
- and `bcftools view -l').
-
- * Added the `depth' command to samtools to compute the per-base depth with a
- simpler interface. File `bam2depth.c', which implements this command, is the
- recommended example on how to use the mpileup APIs.
-
- * Estimate genotype frequencies with ML; perform chi^2 based Hardy-Weinberg
- test using this estimate.
-
- * For `samtools view', when `-R' is specified, drop read groups in the header
- that are not contained in the specified file.
-
- * For `samtools flagstat', separate QC-pass and QC-fail reads.
-
- * Improved the command line help of `samtools mpileup' and `bcftools view'.
-
- * Use a global variable to control the verbose level of samtools stderr
- output. Nonetheless, it has not been full utilized.
-
- * Fixed an issue in association test which may report false associations,
- possibly due to floating point underflow.
-
-(0.1.15: 10 April 2011, r949:203)
-
-
-
-Beta release 0.1.14 (21 March, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This release implements a method for testing associations for case-control
-data. The method does not call genotypes but instead sums over all genotype
-configurations to compute a chi^2 based test statistics. It can be potentially
-applied to comparing a pair of samples (e.g. a tumor-normal pair), but this
-has not been evaluated on real data.
-
-Another new feature is to make X chromosome variant calls when female and male
-samples are both present. The user needs to provide a file indicating the
-ploidy of each sample (see also manual bcftools/bcftools.1).
-
-Other notable changes:
-
- * Added `bcftools view -F' to parse BCF files generated by samtools r921 or
- older which encodes PL in a different way.
-
- * Changed the behavior of `bcftools view -s'. Now when a list of samples is
- provided, the samples in the output will be reordered to match the ordering
- in the sample list. This change is mainly designed for association test.
-
- * Sped up `bcftools view -v' for target sequencing given thousands of samples.
- Also added a new option `view -d' to skip loci where only a few samples are
- covered by reads.
-
- * Dropped HWE test. This feature has never been implemented properly. An EM
- should be much better. To be implemented in future.
-
- * Added the `cat' command to samtools. This command concatenate BAMs with
- identical sequence dictionaries in an efficient way. Modified from bam_cat.c
- written by Chris Saunders.
-
- * Added `samtools view -1' to write BAMs at a low compression level but twice
- faster to create. The `sort' command generates temporary files at a low
- compression level as well.
-
- * Added `samtools mpileup -6' to accept "BAM" with Illumina 1.3+ quality
- strings (strictly speaking, such a file is not BAM).
-
- * Added `samtools mpileup -L' to skip INDEL calling in regions with
- excessively high coverage. Such regions dramatically slow down mpileup.
-
- * Updated `misc/export2sam.pl', provided by Chris Saunders from Illumina Inc.
-
-(0.1.14: 21 March 2011, r933:170)
-
-
-
-Beta release 0.1.13 (1 March, 2011)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The most important though largely invisible modification is the change of the
-order of genotypes in the PL VCF/BCF tag. This is to conform the upcoming VCF
-spec v4.1. The change means that 0.1.13 is not backward compatible with VCF/BCF
-generated by samtools older than r921 inclusive. VCF/BCF generated by the new
-samtools will contain a line `##fileformat=VCFv4.1' as well as the samtools
-version number.
-
-Single Individual Haplotyping (SIH) is added as an experimental feature. It
-originally aims to produce haploid consensus from fosmid pool sequencing, but
-also works with short-read data. For short reads, phased blocks are usually too
-short to be useful in many applications, but they can help to rule out part of
-SNPs close to INDELs or between copies of CNVs.
-
-
-Other notable changes in samtools:
-
- * Construct per-sample consensus to reduce the effect of nearby SNPs in INDEL
- calling. This reduces the power but improves specificity.
-
- * Improved sorting order checking in indexing. Now indexing is the preferred way
- to check if a BAM is sorted.
-
- * Added a switch `-E' to mpileup and calmd. This option uses an alternative way
- to apply BAQ, which increases sensistivity, especially to MNPs, at the cost of
- a little loss in specificity.
-
- * Added `mpileup -A' to allow to use reads in anomalous pairs in SNP calling.
-
- * Added `mpileup -m' to allow fine control of the collection of INDEL candidates.
-
- * Added `mpileup -S' to compute per-sample strand bias P-value.
-
- * Added `mpileup -G' to exclude read groups in variant calling.
-
- * Fixed segfault in indel calling related to unmapped and refskip reads.
-
- * Fixed an integer overflow in INDEL calling. This bug produces wrong INDEL
- genotypes for longer short INDELs, typically over 10bp.
-
- * Fixed a bug in tview on big-endian machines.
-
- * Fixed a very rare memory issue in bam_md.c
-
- * Fixed an out-of-boundary bug in mpileup when the read base is `N'.
-
- * Fixed a compiling error when the knetfile library is not used. Fixed a
- library compiling error due to the lack of bam_nt16_nt4_table[] table.
- Suppress a compiling warning related to the latest zlib.
-
-
-Other notable changes in bcftools:
-
- * Updated the BCF spec.
-
- * Added the `FQ' VCF INFO field, which gives the phred-scaled probability
- of all samples being the same (identical to the reference or all homozygous
- variants). Option `view -f' has been dropped.
-
- * Implementated of "vcfutils.pl vcf2fq" to generate a consensus sequence
- similar to "samtools.pl pileup2fq".
-
- * Make sure the GT FORMAT field is always the first FORMAT to conform the VCF
- spec. Drop bcf-fix.pl.
-
- * Output bcftools specific INFO and FORMAT in the VCF header.
-
- * Added `view -s' to call variants from a subset of samples.
-
- * Properly convert VCF to BCF with a user provided sequence dictionary. Nonetheless,
- custom fields are still unparsed and will be stored as a missing value.
-
- * Fixed a minor bug in Fisher's exact test; the results are rarely changed.
-
-
-(0.1.13: 1 March 2011, r926:134)
-
-
-
-Beta release 0.1.12a (2 December, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This is another bug fix release:
-
- * Fixed a memory violation in mpileup, which causes segfault. Release
- 0.1.9 and above are affected.
-
- * Fixed a memory violation in the indel caller, which does not causes
- segfault, but may potentially affect deletion calls in an unexpected
- way. Release 0.1.10 and above are affected.
-
- * Fixed a bug in computing r-square in bcftools. Few are using this
- functionality and it only has minor effect.
-
- * Fixed a memory leak in bam_fetch().
-
- * Fixed a bug in writing meta information to the BAM index for the last
- sequence. This bug is invisible to most users, but it is a bug anyway.
-
- * Fixed a bug in bcftools which causes false "DP4=0,0,0,0" annotations.
-
-(0.1.12: 2 December 2010, r862)
-
-
-
-Beta release 0.1.11 (21 November, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This is mainly a bug fix release:
-
- * Fixed a bug in random retrieval (since 0.1.8). It occurs when reads
- are retrieved from a small region containing no reads.
-
- * Fixed a bug in pileup (since 0.1.9). The bug causes an assertion
- failure when the first CIGAR operation is a deletion.
-
- * Improved fault tolerence in remote access.
-
-One minor feature has been implemented in bcftools:
-
- * Added a reference-free variant calling mode. In this mode, a site is
- regarded as a variat iff the sample(s) contains two or more alleles;
- the meaning of the QUAL field in the VCF output is changed
- accordingly. Effectively, the reference allele is irrelevant to the
- result in the new mode, although the reference sequence has to be
- used in realignment when SAMtools computes genotype likelihoods.
-
-In addition, since 0.1.10, the `pileup' command has been deprecated by
-`mpileup' which is more powerful and more accurate. The `pileup' command
-will not be removed in the next few releases, but new features will not
-be added.
-
-(0.1.11: 21 November 2010, r851)
-
-
-
-Beta Release 0.1.10 (16 November, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This release is featured as the first major improvement to the indel
-caller. The method is similar to the old one implemented in the pileup
-command, but the details are handled more carefully both in theory and
-in practice. As a result, the new indel caller usually gives more
-accurate indel calls, though at the cost of sensitivity. The caller is
-implemented in the mpileup command and is invoked by default. It works
-with multiple samples.
-
-Other notable changes:
-
- * With the -r option, the calmd command writes the difference between
- the original base quality and the BAQ capped base quality at the BQ
- tag but does not modify the base quality. Please use -Ar to overwrite
- the original base quality (the 0.1.9 behavior).
-
- * Allow to set a maximum per-sample read depth to reduce memory. In
- 0.1.9, most of memory is wasted for the ultra high read depth in some
- regions (e.g. the chr1 centromere).
-
- * Optionally write per-sample read depth and per-sample strand bias
- P-value.
-
- * Compute equal-tail (Bayesian) credible interval of site allele
- frequency at the CI95 VCF annotation.
-
- * Merged the vcfutils.pl varFilter and filter4vcf for better SNP/indel
- filtering.
-
-(0.1.10: 16 November 2010, r829)
-
-
-
-Beta Release 0.1.9 (27 October, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This release is featured as the first major improvement to the samtools'
-SNP caller. It comes with a revised MAQ error model, the support of
-multi-sample SNP calling and the computation of base alignment quality
-(BAQ).
-
-The revised MAQ error model is based on the original model. It solves an
-issue of miscalling SNPs in repetitive regions. Althought such SNPs can
-usually be filtered at a later step, they mess up unfiltered calls. This
-is a theoretical flaw in the original model. The revised MAQ model
-deprecates the orginal MAQ model and the simplified SOAPsnp model.
-
-Multi-sample SNP calling is separated in two steps. The first is done by
-samtools mpileup and the second by a new program, bcftools, which is
-included in the samtools source code tree. Multi-sample SNP calling also
-works for single sample and has the advantage of enabling more powerful
-filtration. It is likely to deprecate pileup in future once a proper
-indel calling method is implemented.
-
-BAQ is the Phred-scaled probability of a read base being wrongly
-aligned. Capping base quality by BAQ has been shown to be very effective
-in suppressing false SNPs caused by misalignments around indels or in
-low-complexity regions with acceptable compromise on computation
-time. This strategy is highly recommended and can be used with other SNP
-callers as well.
-
-In addition to the three major improvements, other notable changes are:
-
- * Changes to the pileup format. A reference skip (the N CIGAR operator)
- is shown as '<' or '>' depending on the strand. Tview is also changed
- accordingly.
-
- * Accelerated pileup. The plain pileup is about 50% faster.
-
- * Regional merge. The merge command now accepts a new option to merge
- files in a specified region.
-
- * Fixed a bug in bgzip and razip which causes source files to be
- deleted even if option -c is applied.
-
- * In APIs, propogate errors to downstream callers and make samtools
- return non-zero values once errors occur.
-
-(0.1.9: 27 October 2010, r783)
-
-
-
-Beta Release 0.1.8 (11 July, 2010)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Notable functional changes:
-
- * Added the `reheader' command which replaces a BAM header with a new
- header. This command is much faster than replacing header by
- BAM->SAM->BAM conversions.
-
- * Added the `mpileup' command which computes the pileup of multiple
- alignments.
-
- * The `index' command now stores the number of mapped and unmapped
- reads in the index file. This information can be retrieved quickly by
- the new `idxstats' command.
-
- * By default, pileup used the SOAPsnp model for SNP calling. This
- avoids the floating overflow in the MAQ model which leads to spurious
- calls in repetitive regions, although these calls will be immediately
- filtered by varFilter.
-
- * The `tview' command now correctly handles CIGARs like 7I10M and
- 10M1P1I10M which cause assertion failure in earlier versions.
-
- * Tview accepts a region like `=10,000' where `=' stands for the
- current sequence name. This saves typing for long sequence names.
-
- * Added the `-d' option to `pileup' which avoids slow indel calling
- in ultradeep regions by subsampling reads locally.
-
- * Added the `-R' option to `view' which retrieves alignments in read
- groups listed in the specified file.
-
-Performance improvements:
-
- * The BAM->SAM conversion is up to twice faster, depending on the
- characteristic of the input.
-
- * Parsing SAM headers with a lot of reference sequences is now much
- faster.
-
- * The number of lseek() calls per query is reduced when the query
- region contains no read alignments.
-
-Bug fixes:
-
- * Fixed an issue in the indel caller that leads to miscall of indels.
- Note that this solution may not work well when the sequencing indel
- error rate is higher than the rate of SNPs.
-
- * Fixed another issue in the indel caller which may lead to incorrect
- genotype.
-
- * Fixed a bug in `sort' when option `-o' is applied.
-
- * Fixed a bug in `view -r'.
-
-APIs and other changes:
-
- * Added iterator interfaces to random access and pileup. The callback
- interfaces directly call the iterator interfaces.
-
- * The BGZF blocks holding the BAM header are indepedent of alignment
- BGZF blocks. Alignment records shorter than 64kB is guaranteed to be
- fully contained in one BGZF block. This change is fully compatible
- with the old version of samtools/picard.
-
-Changes in other utilities:
-
- * Updated export2sam.pl by Chris Saunders.
-
- * Improved the sam2vcf.pl script.
-
- * Added a Python version of varfilter.py by Aylwyn Scally.
-
-(0.1.8: 11 July 2010, r613)
-
-
-
-Beta Release 0.1.7 (10 November, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Notable changes:
-
- * Improved the indel caller in complex scenariors, in particular for
- long reads. The indel caller is now able to make reasonable indel
- calls from Craig Venter capillary reads.
-
- * Rewrote single-end duplicate removal with improved
- performance. Paired-end reads are not touched.
-
- * Duplicate removal is now library aware. Samtools remove potential
- PCR/optical dupliates inside a library rather than across libraries.
-
- * SAM header is now fully parsed, although this functionality is not
- used in merging and so on.
-
- * In samtools merge, optionally take the input file name as RG-ID and
- attach the RG tag to each alignment.
-
- * Added FTP support in the RAZF library. RAZF-compressed reference
- sequence can be retrieved remotely.
-
- * Improved network support for Win32.
-
- * Samtools sort and merge are now stable.
-
-Changes in other utilities:
-
- * Implemented sam2vcf.pl that converts the pileup format to the VCF
- format.
-
- * This release of samtools is known to work with the latest
- Bio-Samtools Perl module.
-
-(0.1.7: 10 November 2009, r510)
-
-
-
-Beta Release 0.1.6 (2 September, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Notable changes:
-
- * In tview, do not show a blank screen when no reads mapped to the
- corresponding region.
-
- * Implemented native HTTP support in the BGZF library. Samtools is now
- able to directly open a BAM file on HTTP. HTTP proxy is also
- supported via the "http_proxy" environmental variable.
-
- * Samtools is now compitable with the MinGW (win32) compiler and the
- PDCurses library.
-
- * The calmd (or fillmd) command now calculates the NM tag and replaces
- MD tags if they are wrong.
-
- * The view command now recognizes and optionally prints FLAG in HEXs or
- strings to make a SAM file more friendly to human eyes. This is a
- samtools-C extension, not implemented in Picard for the time
- being. Please type `samtools view -?' for more information.
-
- * BAM files now have an end-of-file (EOF) marker to facilitate
- truncation detection. A warning will be given if an on-disk BAM file
- does not have this marker. The warning will be seen on BAM files
- generated by an older version of samtools. It does NO harm.
-
- * New key bindings in tview: `r' to show read names and `s' to show
- reference skip (N operation) as deletions.
-
- * Fixed a bug in `samtools merge -n'.
-
- * Samtools merge now optionally copies the header of a user specified
- SAM file to the resultant BAM output.
-
- * Samtools pileup/tview works with a CIGAR with the first or the last
- operation is an indel.
-
- * Fixed a bug in bam_aux_get().
-
-
-Changes in other utilies:
-
- * Fixed wrong FLAG in maq2sam.
-
-
-(0.1.6: 2 September 2009, r453)
-
-
-
-Beta Release 0.1.5 (7 July, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Notable changes:
-
- * Support opening a BAM alignment on FTP. Users can now use "tview" to
- view alignments at the NCBI ftp site. Please read manual for more
- information.
-
- * In library, propagate errors rather than exit or complain assertion
- failure.
-
- * Simplified the building system and fixed compiling errors caused by
- zlib<1.2.2.1.
-
- * Fixed an issue about lost header information when a SAM is imported
- with "view -t".
-
- * Implemented "samtool.pl varFilter" which filters both SNPs and short
- indels. This command replaces "indelFilter".
-
- * Implemented "samtools.pl pileup2fq" to generate FASTQ consensus from
- pileup output.
-
- * In pileup, cap mapping quality at 60. This helps filtering when
- different aligners are in use.
-
- * In pileup, allow to output variant sites only.
-
- * Made pileup generate correct calls in repetitive region. At the same
- time, I am considering to implement a simplified model in SOAPsnp,
- although this has not happened yet.
-
- * In view, added '-u' option to output BAM without compression. This
- option is preferred when the output is piped to other commands.
-
- * In view, added '-l' and '-r' to get the alignments for one library or
- read group. The "@RG" header lines are now partially parsed.
-
- * Do not include command line utilities to libbam.a.
-
- * Fixed memory leaks in pileup and bam_view1().
-
- * Made faidx more tolerant to empty lines right before or after FASTA >
- lines.
-
-
-Changes in other utilities:
-
- * Updated novo2sam.pl by Colin Hercus, the key developer of novoalign.
-
-
-This release involves several modifications to the key code base which
-may potentially introduce new bugs even though we have tried to minimize
-this by testing on several examples. Please let us know if you catch
-bugs.
-
-(0.1.5: 7 July 2009, r373)
-
-
-
-Beta Release 0.1.4 (21 May, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Notable changes:
-
- * Added the 'rmdupse' command: removing duplicates for SE reads.
-
- * Fixed a critical bug in the indel caller: clipped alignments are not
- processed correctly.
-
- * Fixed a bug in the tview: gapped alignment may be incorrectly
- displayed.
-
- * Unified the interface to BAM and SAM I/O. This is done by
- implementing a wrapper on top of the old APIs and therefore old APIs
- are still valid. The new I/O APIs also recognize the @SQ header
- lines.
-
- * Generate the MD tag.
-
- * Generate "=" bases. However, the indel caller will not work when "="
- bases are present.
-
- * Enhanced support of color-read display (by Nils Homer).
-
- * Implemented the GNU building system. However, currently the building
- system does not generate libbam.a. We will improve this later. For
- the time being, `make -f Makefile.generic' is preferred.
-
- * Fixed a minor bug in pileup: the first read in a chromosome may be
- skipped.
-
- * Fixed bugs in bam_aux.c. These bugs do not affect other components as
- they were not used previously.
-
- * Output the 'SM' tag from maq2sam.
-
-(0.1.4: 21 May 2009, r297)
-
-
-
-Beta Release 0.1.3 (15 April, 2009)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Notable changes in SAMtools:
-
- * SAMtools is more consistent with the specification: a) '*' in the
- QUAL field is allowed; b) the field separator is TAB only and SPACE
- is treated as a character in a field; c) empty header is allowed.
-
- * Implemented GLFv3 support in pileup.
-
- * Fixed a severe bug in fixmate: strand information is wrongly
- overwritten.
-
- * Fixed a bug in alignment retrieval: alignments bridging n*16384bp are
- not correctly retrieved sometimes.
-
- * Fixed a bug in rmdup: segfault if unmapped reads are present.
-
- * Move indel_filter.pl to samtools.pl and improved the filtering by
- checking the actual number of alignments containing indels. The indel
- pileup line is also changed a little to make this filtration easier.
-
- * Fixed a minor bug in indexing: the bin number of an unmapped read is
- wrongly calculated.
-
- * Added `flagstat' command to show statistics on the FLAG field.
-
- * Improved indel caller by setting the maximum window size in local
- realignment.
-
-Changes in other utilities:
-
- * Fixed a bug in maq2sam: a tag name is obsolete.
-
- * Improvement to wgsim: a) added support for SOLiD read simulation; b)
- show the number of substitutions/indels/errors in read name; c)
- considerable code clean up.
-
- * Various converters: improved functionality in general.
-
- * Updated the example SAM due to the previous bug in fixmate.
-
-(0.1.3: 15 April 2009, r227)
-
-
-
-Beta Release 0.1.2 (28 January, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-Notable changes in SAMtools:
-
- * Implemented a Bayesian indel caller. The new caller generate scores
- and genotype and is potentially more accurate than Maq's indel
- caller. The pileup format is also changed accordingly.
-
- * Implemented rmdup command: remove potential PCR duplicates. Note that
- this command ONLY works for FR orientation and requires ISIZE is
- correctly set.
-
- * Added fixmate command: fill in mate coordinates, ISIZE and mate
- related flags from a name-sorted alignment.
-
- * Fixed a bug in indexing: reads bridging 16x kbp were not retrieved.
-
- * Allow to select reads shown in the pileup output with a mask.
-
- * Generate GLFv2 from pileup.
-
- * Added two more flags for flagging PCR/optical duplicates and for QC
- failure.
-
- * Fixed a bug in sort command: name sorting for large alignment did not
- work.
-
- * Allow to completely disable RAZF (using Makefile.lite) as some people
- have problem to compile it.
-
- * Fixed a bug in import command when there are reads without
- coordinates.
-
- * Fixed a bug in tview: clipping broke the alignment viewer.
-
- * Fixed a compiling error when _NO_CURSES is applied.
-
- * Fixed a bug in merge command.
-
-Changes in other utilities:
-
- * Added wgsim, a paired-end reads simulator. Wgsim was adapted from
- maq's reads simulator. Colin Hercus further improved it to allow
- longer indels.
-
- * Added wgsim_eval.pl, a script that evaluates the accuracy of
- alignment on reads generated by wgsim.
-
- * Added soap2sam.pl, a SOAP2->SAM converter. This converter does not
- work properly when multiple hits are output.
-
- * Added bowtie2sam.pl, a Bowtie->SAM converter. Only the top hit will
- be retained when multiple hits are present.
-
- * Fixed a bug in export2sam.pl for QC reads.
-
- * Support RG tag at MAQ->SAM converter.
-
- * Added novo2sam.pl, a NovoAlign->SAM converter. Multiple hits and
- indel are not properly handled, though.
-
- * Added zoom2sam.pl, a ZOOM->SAM converter. It only works with the
- default Illumina output.
-
-(0.1.2: 28 January 2008; r116)
-
-
-
-Beta Release 0.1.1 (22 December, 2008)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-The is the first public release of samtools. For more information,
-please check the manual page `samtools.1' and the samtools website
-http://samtools.sourceforge.net
diff --git a/src/samtools-0.1.18/bam.c b/src/samtools-0.1.18/bam.c
deleted file mode 100644
index 0055e84..0000000
--- a/src/samtools-0.1.18/bam.c
+++ /dev/null
@@ -1,362 +0,0 @@
-#include <stdio.h>
-#include <ctype.h>
-#include <errno.h>
-#include <assert.h>
-#include "bam.h"
-#include "bam_endian.h"
-#include "kstring.h"
-#include "sam_header.h"
-
-int bam_is_be = 0, bam_verbose = 2;
-char *bam_flag2char_table = "pPuUrR12sfd\0\0\0\0\0";
-
-/**************************
- * CIGAR related routines *
- **************************/
-
-uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar)
-{
- uint32_t k, end;
- end = c->pos;
- for (k = 0; k < c->n_cigar; ++k) {
- int op = cigar[k] & BAM_CIGAR_MASK;
- if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP)
- end += cigar[k] >> BAM_CIGAR_SHIFT;
- }
- return end;
-}
-
-int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar)
-{
- uint32_t k;
- int32_t l = 0;
- for (k = 0; k < c->n_cigar; ++k) {
- int op = cigar[k] & BAM_CIGAR_MASK;
- if (op == BAM_CMATCH || op == BAM_CINS || op == BAM_CSOFT_CLIP || op == BAM_CEQUAL || op == BAM_CDIFF)
- l += cigar[k] >> BAM_CIGAR_SHIFT;
- }
- return l;
-}
-
-/********************
- * BAM I/O routines *
- ********************/
-
-bam_header_t *bam_header_init()
-{
- bam_is_be = bam_is_big_endian();
- return (bam_header_t*)calloc(1, sizeof(bam_header_t));
-}
-
-void bam_header_destroy(bam_header_t *header)
-{
- int32_t i;
- extern void bam_destroy_header_hash(bam_header_t *header);
- if (header == 0) return;
- if (header->target_name) {
- for (i = 0; i < header->n_targets; ++i)
- free(header->target_name[i]);
- free(header->target_name);
- free(header->target_len);
- }
- free(header->text);
- if (header->dict) sam_header_free(header->dict);
- if (header->rg2lib) sam_tbl_destroy(header->rg2lib);
- bam_destroy_header_hash(header);
- free(header);
-}
-
-bam_header_t *bam_header_read(bamFile fp)
-{
- bam_header_t *header;
- char buf[4];
- int magic_len;
- int32_t i = 1, name_len;
- // check EOF
- i = bgzf_check_EOF(fp);
- if (i < 0) {
- // If the file is a pipe, checking the EOF marker will *always* fail
- // with ESPIPE. Suppress the error message in this case.
- if (errno != ESPIPE) perror("[bam_header_read] bgzf_check_EOF");
- }
- else if (i == 0) fprintf(stderr, "[bam_header_read] EOF marker is absent. The input is probably truncated.\n");
- // read "BAM1"
- magic_len = bam_read(fp, buf, 4);
- if (magic_len != 4 || strncmp(buf, "BAM\001", 4) != 0) {
- fprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n");
- return 0;
- }
- header = bam_header_init();
- // read plain text and the number of reference sequences
- bam_read(fp, &header->l_text, 4);
- if (bam_is_be) bam_swap_endian_4p(&header->l_text);
- header->text = (char*)calloc(header->l_text + 1, 1);
- bam_read(fp, header->text, header->l_text);
- bam_read(fp, &header->n_targets, 4);
- if (bam_is_be) bam_swap_endian_4p(&header->n_targets);
- // read reference sequence names and lengths
- header->target_name = (char**)calloc(header->n_targets, sizeof(char*));
- header->target_len = (uint32_t*)calloc(header->n_targets, 4);
- for (i = 0; i != header->n_targets; ++i) {
- bam_read(fp, &name_len, 4);
- if (bam_is_be) bam_swap_endian_4p(&name_len);
- header->target_name[i] = (char*)calloc(name_len, 1);
- bam_read(fp, header->target_name[i], name_len);
- bam_read(fp, &header->target_len[i], 4);
- if (bam_is_be) bam_swap_endian_4p(&header->target_len[i]);
- }
- return header;
-}
-
-int bam_header_write(bamFile fp, const bam_header_t *header)
-{
- char buf[4];
- int32_t i, name_len, x;
- // write "BAM1"
- strncpy(buf, "BAM\001", 4);
- bam_write(fp, buf, 4);
- // write plain text and the number of reference sequences
- if (bam_is_be) {
- x = bam_swap_endian_4(header->l_text);
- bam_write(fp, &x, 4);
- if (header->l_text) bam_write(fp, header->text, header->l_text);
- x = bam_swap_endian_4(header->n_targets);
- bam_write(fp, &x, 4);
- } else {
- bam_write(fp, &header->l_text, 4);
- if (header->l_text) bam_write(fp, header->text, header->l_text);
- bam_write(fp, &header->n_targets, 4);
- }
- // write sequence names and lengths
- for (i = 0; i != header->n_targets; ++i) {
- char *p = header->target_name[i];
- name_len = strlen(p) + 1;
- if (bam_is_be) {
- x = bam_swap_endian_4(name_len);
- bam_write(fp, &x, 4);
- } else bam_write(fp, &name_len, 4);
- bam_write(fp, p, name_len);
- if (bam_is_be) {
- x = bam_swap_endian_4(header->target_len[i]);
- bam_write(fp, &x, 4);
- } else bam_write(fp, &header->target_len[i], 4);
- }
- bgzf_flush(fp);
- return 0;
-}
-
-static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data)
-{
- uint8_t *s;
- uint32_t i, *cigar = (uint32_t*)(data + c->l_qname);
- s = data + c->n_cigar*4 + c->l_qname + c->l_qseq + (c->l_qseq + 1)/2;
- for (i = 0; i < c->n_cigar; ++i) bam_swap_endian_4p(&cigar[i]);
- while (s < data + data_len) {
- uint8_t type;
- s += 2; // skip key
- type = toupper(*s); ++s; // skip type
- if (type == 'C' || type == 'A') ++s;
- else if (type == 'S') { bam_swap_endian_2p(s); s += 2; }
- else if (type == 'I' || type == 'F') { bam_swap_endian_4p(s); s += 4; }
- else if (type == 'D') { bam_swap_endian_8p(s); s += 8; }
- else if (type == 'Z' || type == 'H') { while (*s) ++s; ++s; }
- else if (type == 'B') {
- int32_t n, Bsize = bam_aux_type2size(*s);
- memcpy(&n, s + 1, 4);
- if (1 == Bsize) {
- } else if (2 == Bsize) {
- for (i = 0; i < n; i += 2)
- bam_swap_endian_2p(s + 5 + i);
- } else if (4 == Bsize) {
- for (i = 0; i < n; i += 4)
- bam_swap_endian_4p(s + 5 + i);
- }
- bam_swap_endian_4p(s+1);
- }
- }
-}
-
-int bam_read1(bamFile fp, bam1_t *b)
-{
- bam1_core_t *c = &b->core;
- int32_t block_len, ret, i;
- uint32_t x[8];
-
- assert(BAM_CORE_SIZE == 32);
- if ((ret = bam_read(fp, &block_len, 4)) != 4) {
- if (ret == 0) return -1; // normal end-of-file
- else return -2; // truncated
- }
- if (bam_read(fp, x, BAM_CORE_SIZE) != BAM_CORE_SIZE) return -3;
- if (bam_is_be) {
- bam_swap_endian_4p(&block_len);
- for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i);
- }
- c->tid = x[0]; c->pos = x[1];
- c->bin = x[2]>>16; c->qual = x[2]>>8&0xff; c->l_qname = x[2]&0xff;
- c->flag = x[3]>>16; c->n_cigar = x[3]&0xffff;
- c->l_qseq = x[4];
- c->mtid = x[5]; c->mpos = x[6]; c->isize = x[7];
- b->data_len = block_len - BAM_CORE_SIZE;
- if (b->m_data < b->data_len) {
- b->m_data = b->data_len;
- kroundup32(b->m_data);
- b->data = (uint8_t*)realloc(b->data, b->m_data);
- }
- if (bam_read(fp, b->data, b->data_len) != b->data_len) return -4;
- b->l_aux = b->data_len - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2;
- if (bam_is_be) swap_endian_data(c, b->data_len, b->data);
- return 4 + block_len;
-}
-
-inline int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8_t *data)
-{
- uint32_t x[8], block_len = data_len + BAM_CORE_SIZE, y;
- int i;
- assert(BAM_CORE_SIZE == 32);
- x[0] = c->tid;
- x[1] = c->pos;
- x[2] = (uint32_t)c->bin<<16 | c->qual<<8 | c->l_qname;
- x[3] = (uint32_t)c->flag<<16 | c->n_cigar;
- x[4] = c->l_qseq;
- x[5] = c->mtid;
- x[6] = c->mpos;
- x[7] = c->isize;
- bgzf_flush_try(fp, 4 + block_len);
- if (bam_is_be) {
- for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i);
- y = block_len;
- bam_write(fp, bam_swap_endian_4p(&y), 4);
- swap_endian_data(c, data_len, data);
- } else bam_write(fp, &block_len, 4);
- bam_write(fp, x, BAM_CORE_SIZE);
- bam_write(fp, data, data_len);
- if (bam_is_be) swap_endian_data(c, data_len, data);
- return 4 + block_len;
-}
-
-int bam_write1(bamFile fp, const bam1_t *b)
-{
- return bam_write1_core(fp, &b->core, b->data_len, b->data);
-}
-
-char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of)
-{
- uint8_t *s = bam1_seq(b), *t = bam1_qual(b);
- int i;
- const bam1_core_t *c = &b->core;
- kstring_t str;
- str.l = str.m = 0; str.s = 0;
-
- kputsn(bam1_qname(b), c->l_qname-1, &str); kputc('\t', &str);
- if (of == BAM_OFDEC) { kputw(c->flag, &str); kputc('\t', &str); }
- else if (of == BAM_OFHEX) ksprintf(&str, "0x%x\t", c->flag);
- else { // BAM_OFSTR
- for (i = 0; i < 16; ++i)
- if ((c->flag & 1<<i) && bam_flag2char_table[i])
- kputc(bam_flag2char_table[i], &str);
- kputc('\t', &str);
- }
- if (c->tid < 0) kputsn("*\t", 2, &str);
- else {
- if (header) kputs(header->target_name[c->tid] , &str);
- else kputw(c->tid, &str);
- kputc('\t', &str);
- }
- kputw(c->pos + 1, &str); kputc('\t', &str); kputw(c->qual, &str); kputc('\t', &str);
- if (c->n_cigar == 0) kputc('*', &str);
- else {
- for (i = 0; i < c->n_cigar; ++i) {
- kputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str);
- kputc("MIDNSHP=X"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str);
- }
- }
- kputc('\t', &str);
- if (c->mtid < 0) kputsn("*\t", 2, &str);
- else if (c->mtid == c->tid) kputsn("=\t", 2, &str);
- else {
- if (header) kputs(header->target_name[c->mtid], &str);
- else kputw(c->mtid, &str);
- kputc('\t', &str);
- }
- kputw(c->mpos + 1, &str); kputc('\t', &str); kputw(c->isize, &str); kputc('\t', &str);
- if (c->l_qseq) {
- for (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str);
- kputc('\t', &str);
- if (t[0] == 0xff) kputc('*', &str);
- else for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str);
- } else kputsn("*\t*", 3, &str);
- s = bam1_aux(b);
- while (s < b->data + b->data_len) {
- uint8_t type, key[2];
- key[0] = s[0]; key[1] = s[1];
- s += 2; type = *s; ++s;
- kputc('\t', &str); kputsn((char*)key, 2, &str); kputc(':', &str);
- if (type == 'A') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; }
- else if (type == 'C') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; }
- else if (type == 'c') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; }
- else if (type == 'S') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; }
- else if (type == 's') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; }
- else if (type == 'I') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; }
- else if (type == 'i') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; }
- else if (type == 'f') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }
- else if (type == 'd') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }
- else if (type == 'Z' || type == 'H') { kputc(type, &str); kputc(':', &str); while (*s) kputc(*s++, &str); ++s; }
- else if (type == 'B') {
- uint8_t sub_type = *(s++);
- int32_t n;
- memcpy(&n, s, 4);
- s += 4; // no point to the start of the array
- kputc(type, &str); kputc(':', &str); kputc(sub_type, &str); // write the typing
- for (i = 0; i < n; ++i) {
- kputc(',', &str);
- if ('c' == sub_type || 'c' == sub_type) { kputw(*(int8_t*)s, &str); ++s; }
- else if ('C' == sub_type) { kputw(*(uint8_t*)s, &str); ++s; }
- else if ('s' == sub_type) { kputw(*(int16_t*)s, &str); s += 2; }
- else if ('S' == sub_type) { kputw(*(uint16_t*)s, &str); s += 2; }
- else if ('i' == sub_type) { kputw(*(int32_t*)s, &str); s += 4; }
- else if ('I' == sub_type) { kputuw(*(uint32_t*)s, &str); s += 4; }
- else if ('f' == sub_type) { ksprintf(&str, "%g", *(float*)s); s += 4; }
- }
- }
- }
- return str.s;
-}
-
-char *bam_format1(const bam_header_t *header, const bam1_t *b)
-{
- return bam_format1_core(header, b, BAM_OFDEC);
-}
-
-void bam_view1(const bam_header_t *header, const bam1_t *b)
-{
- char *s = bam_format1(header, b);
- puts(s);
- free(s);
-}
-
-int bam_validate1(const bam_header_t *header, const bam1_t *b)
-{
- char *s;
-
- if (b->core.tid < -1 || b->core.mtid < -1) return 0;
- if (header && (b->core.tid >= header->n_targets || b->core.mtid >= header->n_targets)) return 0;
-
- if (b->data_len < b->core.l_qname) return 0;
- s = memchr(bam1_qname(b), '\0', b->core.l_qname);
- if (s != &bam1_qname(b)[b->core.l_qname-1]) return 0;
-
- // FIXME: Other fields could also be checked, especially the auxiliary data
-
- return 1;
-}
-
-// FIXME: we should also check the LB tag associated with each alignment
-const char *bam_get_library(bam_header_t *h, const bam1_t *b)
-{
- const uint8_t *rg;
- if (h->dict == 0) h->dict = sam_header_parse2(h->text);
- if (h->rg2lib == 0) h->rg2lib = sam_header2tbl(h->dict, "RG", "ID", "LB");
- rg = bam_aux_get(b, "RG");
- return (rg == 0)? 0 : sam_tbl_get(h->rg2lib, (const char*)(rg + 1));
-}
diff --git a/src/samtools-0.1.18/bam.h b/src/samtools-0.1.18/bam.h
deleted file mode 100644
index 346c750..0000000
--- a/src/samtools-0.1.18/bam.h
+++ /dev/null
@@ -1,763 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008-2010 Genome Research Ltd (GRL).
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/* Contact: Heng Li <lh3 at sanger.ac.uk> */
-
-#ifndef BAM_BAM_H
-#define BAM_BAM_H
-
-/*!
- @header
-
- BAM library provides I/O and various operations on manipulating files
- in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map)
- format. It now supports importing from or exporting to SAM, sorting,
- merging, generating pileup, and quickly retrieval of reads overlapped
- with a specified region.
-
- @copyright Genome Research Ltd.
- */
-
-#define BAM_VERSION "0.1.18 (r982:295)"
-
-#include <stdint.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-
-#ifndef BAM_LITE
-#define BAM_VIRTUAL_OFFSET16
-#include "bgzf.h"
-/*! @abstract BAM file handler */
-typedef BGZF *bamFile;
-#define bam_open(fn, mode) bgzf_open(fn, mode)
-#define bam_dopen(fd, mode) bgzf_fdopen(fd, mode)
-#define bam_close(fp) bgzf_close(fp)
-#define bam_read(fp, buf, size) bgzf_read(fp, buf, size)
-#define bam_write(fp, buf, size) bgzf_write(fp, buf, size)
-#define bam_tell(fp) bgzf_tell(fp)
-#define bam_seek(fp, pos, dir) bgzf_seek(fp, pos, dir)
-#else
-#define BAM_TRUE_OFFSET
-#include <zlib.h>
-typedef gzFile bamFile;
-#define bam_open(fn, mode) gzopen(fn, mode)
-#define bam_dopen(fd, mode) gzdopen(fd, mode)
-#define bam_close(fp) gzclose(fp)
-#define bam_read(fp, buf, size) gzread(fp, buf, size)
-/* no bam_write/bam_tell/bam_seek() here */
-#endif
-
-/*! @typedef
- @abstract Structure for the alignment header.
- @field n_targets number of reference sequences
- @field target_name names of the reference sequences
- @field target_len lengths of the referene sequences
- @field dict header dictionary
- @field hash hash table for fast name lookup
- @field rg2lib hash table for @RG-ID -> LB lookup
- @field l_text length of the plain text in the header
- @field text plain text
-
- @discussion Field hash points to null by default. It is a private
- member.
- */
-typedef struct {
- int32_t n_targets;
- char **target_name;
- uint32_t *target_len;
- void *dict, *hash, *rg2lib;
- size_t l_text, n_text;
- char *text;
-} bam_header_t;
-
-/*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */
-#define BAM_FPAIRED 1
-/*! @abstract the read is mapped in a proper pair */
-#define BAM_FPROPER_PAIR 2
-/*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */
-#define BAM_FUNMAP 4
-/*! @abstract the mate is unmapped */
-#define BAM_FMUNMAP 8
-/*! @abstract the read is mapped to the reverse strand */
-#define BAM_FREVERSE 16
-/*! @abstract the mate is mapped to the reverse strand */
-#define BAM_FMREVERSE 32
-/*! @abstract this is read1 */
-#define BAM_FREAD1 64
-/*! @abstract this is read2 */
-#define BAM_FREAD2 128
-/*! @abstract not primary alignment */
-#define BAM_FSECONDARY 256
-/*! @abstract QC failure */
-#define BAM_FQCFAIL 512
-/*! @abstract optical or PCR duplicate */
-#define BAM_FDUP 1024
-
-#define BAM_OFDEC 0
-#define BAM_OFHEX 1
-#define BAM_OFSTR 2
-
-/*! @abstract defautl mask for pileup */
-#define BAM_DEF_MASK (BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP)
-
-#define BAM_CORE_SIZE sizeof(bam1_core_t)
-
-/**
- * Describing how CIGAR operation/length is packed in a 32-bit integer.
- */
-#define BAM_CIGAR_SHIFT 4
-#define BAM_CIGAR_MASK ((1 << BAM_CIGAR_SHIFT) - 1)
-
-/*
- CIGAR operations.
- */
-/*! @abstract CIGAR: M = match or mismatch*/
-#define BAM_CMATCH 0
-/*! @abstract CIGAR: I = insertion to the reference */
-#define BAM_CINS 1
-/*! @abstract CIGAR: D = deletion from the reference */
-#define BAM_CDEL 2
-/*! @abstract CIGAR: N = skip on the reference (e.g. spliced alignment) */
-#define BAM_CREF_SKIP 3
-/*! @abstract CIGAR: S = clip on the read with clipped sequence
- present in qseq */
-#define BAM_CSOFT_CLIP 4
-/*! @abstract CIGAR: H = clip on the read with clipped sequence trimmed off */
-#define BAM_CHARD_CLIP 5
-/*! @abstract CIGAR: P = padding */
-#define BAM_CPAD 6
-/*! @abstract CIGAR: equals = match */
-#define BAM_CEQUAL 7
-/*! @abstract CIGAR: X = mismatch */
-#define BAM_CDIFF 8
-
-/*! @typedef
- @abstract Structure for core alignment information.
- @field tid chromosome ID, defined by bam_header_t
- @field pos 0-based leftmost coordinate
- @field strand strand; 0 for forward and 1 otherwise
- @field bin bin calculated by bam_reg2bin()
- @field qual mapping quality
- @field l_qname length of the query name
- @field flag bitwise flag
- @field n_cigar number of CIGAR operations
- @field l_qseq length of the query sequence (read)
- */
-typedef struct {
- int32_t tid;
- int32_t pos;
- uint32_t bin:16, qual:8, l_qname:8;
- uint32_t flag:16, n_cigar:16;
- int32_t l_qseq;
- int32_t mtid;
- int32_t mpos;
- int32_t isize;
-} bam1_core_t;
-
-/*! @typedef
- @abstract Structure for one alignment.
- @field core core information about the alignment
- @field l_aux length of auxiliary data
- @field data_len current length of bam1_t::data
- @field m_data maximum length of bam1_t::data
- @field data all variable-length data, concatenated; structure: cigar-qname-seq-qual-aux
-
- @discussion Notes:
-
- 1. qname is zero tailing and core.l_qname includes the tailing '\0'.
- 2. l_qseq is calculated from the total length of an alignment block
- on reading or from CIGAR.
- */
-typedef struct {
- bam1_core_t core;
- int l_aux, data_len, m_data;
- uint8_t *data;
-} bam1_t;
-
-typedef struct __bam_iter_t *bam_iter_t;
-
-#define bam1_strand(b) (((b)->core.flag&BAM_FREVERSE) != 0)
-#define bam1_mstrand(b) (((b)->core.flag&BAM_FMREVERSE) != 0)
-
-/*! @function
- @abstract Get the CIGAR array
- @param b pointer to an alignment
- @return pointer to the CIGAR array
-
- @discussion In the CIGAR array, each element is a 32-bit integer. The
- lower 4 bits gives a CIGAR operation and the higher 28 bits keep the
- length of a CIGAR.
- */
-#define bam1_cigar(b) ((uint32_t*)((b)->data + (b)->core.l_qname))
-
-/*! @function
- @abstract Get the name of the query
- @param b pointer to an alignment
- @return pointer to the name string, null terminated
- */
-#define bam1_qname(b) ((char*)((b)->data))
-
-/*! @function
- @abstract Get query sequence
- @param b pointer to an alignment
- @return pointer to sequence
-
- @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G,
- 8 for T and 15 for N. Two bases are packed in one byte with the base
- at the higher 4 bits having smaller coordinate on the read. It is
- recommended to use bam1_seqi() macro to get the base.
- */
-#define bam1_seq(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname)
-
-/*! @function
- @abstract Get query quality
- @param b pointer to an alignment
- @return pointer to quality string
- */
-#define bam1_qual(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))
-
-/*! @function
- @abstract Get a base on read
- @param s Query sequence returned by bam1_seq()
- @param i The i-th position, 0-based
- @return 4-bit integer representing the base.
- */
-#define bam1_seqi(s, i) ((s)[(i)/2] >> 4*(1-(i)%2) & 0xf)
-
-/*! @function
- @abstract Get query sequence and quality
- @param b pointer to an alignment
- @return pointer to the concatenated auxiliary data
- */
-#define bam1_aux(b) ((b)->data + (b)->core.n_cigar*4 + (b)->core.l_qname + (b)->core.l_qseq + ((b)->core.l_qseq + 1)/2)
-
-#ifndef kroundup32
-/*! @function
- @abstract Round an integer to the next closest power-2 integer.
- @param x integer to be rounded (in place)
- @discussion x will be modified.
- */
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-#endif
-
-/*!
- @abstract Whether the machine is big-endian; modified only in
- bam_header_init().
- */
-extern int bam_is_be;
-
-/*!
- @abstract Verbose level between 0 and 3; 0 is supposed to disable all
- debugging information, though this may not have been implemented.
- */
-extern int bam_verbose;
-
-/*! @abstract Table for converting a nucleotide character to the 4-bit encoding. */
-extern unsigned char bam_nt16_table[256];
-
-/*! @abstract Table for converting a 4-bit encoded nucleotide to a letter. */
-extern char *bam_nt16_rev_table;
-
-extern char bam_nt16_nt4_table[];
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- /*********************
- * Low-level SAM I/O *
- *********************/
-
- /*! @abstract TAM file handler */
- typedef struct __tamFile_t *tamFile;
-
- /*!
- @abstract Open a SAM file for reading, either uncompressed or compressed by gzip/zlib.
- @param fn SAM file name
- @return SAM file handler
- */
- tamFile sam_open(const char *fn);
-
- /*!
- @abstract Close a SAM file handler
- @param fp SAM file handler
- */
- void sam_close(tamFile fp);
-
- /*!
- @abstract Read one alignment from a SAM file handler
- @param fp SAM file handler
- @param header header information (ordered names of chromosomes)
- @param b read alignment; all members in b will be updated
- @return 0 if successful; otherwise negative
- */
- int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b);
-
- /*!
- @abstract Read header information from a TAB-delimited list file.
- @param fn_list file name for the list
- @return a pointer to the header structure
-
- @discussion Each line in this file consists of chromosome name and
- the length of chromosome.
- */
- bam_header_t *sam_header_read2(const char *fn_list);
-
- /*!
- @abstract Read header from a SAM file (if present)
- @param fp SAM file handler
- @return pointer to header struct; 0 if no @SQ lines available
- */
- bam_header_t *sam_header_read(tamFile fp);
-
- /*!
- @abstract Parse @SQ lines a update a header struct
- @param h pointer to the header struct to be updated
- @return number of target sequences
-
- @discussion bam_header_t::{n_targets,target_len,target_name} will
- be destroyed in the first place.
- */
- int sam_header_parse(bam_header_t *h);
- int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);
-
- /*!
- @abstract Parse @RG lines a update a header struct
- @param h pointer to the header struct to be updated
- @return number of @RG lines
-
- @discussion bam_header_t::rg2lib will be destroyed in the first
- place.
- */
- int sam_header_parse_rg(bam_header_t *h);
-
-#define sam_write1(header, b) bam_view1(header, b)
-
-
- /********************************
- * APIs for string dictionaries *
- ********************************/
-
- int bam_strmap_put(void *strmap, const char *rg, const char *lib);
- const char *bam_strmap_get(const void *strmap, const char *rg);
- void *bam_strmap_dup(const void*);
- void *bam_strmap_init();
- void bam_strmap_destroy(void *strmap);
-
-
- /*********************
- * Low-level BAM I/O *
- *********************/
-
- /*!
- @abstract Initialize a header structure.
- @return the pointer to the header structure
-
- @discussion This function also modifies the global variable
- bam_is_be.
- */
- bam_header_t *bam_header_init();
-
- /*!
- @abstract Destroy a header structure.
- @param header pointer to the header
- */
- void bam_header_destroy(bam_header_t *header);
-
- /*!
- @abstract Read a header structure from BAM.
- @param fp BAM file handler, opened by bam_open()
- @return pointer to the header structure
-
- @discussion The file position indicator must be placed at the
- beginning of the file. Upon success, the position indicator will
- be set at the start of the first alignment.
- */
- bam_header_t *bam_header_read(bamFile fp);
-
- /*!
- @abstract Write a header structure to BAM.
- @param fp BAM file handler
- @param header pointer to the header structure
- @return always 0 currently
- */
- int bam_header_write(bamFile fp, const bam_header_t *header);
-
- /*!
- @abstract Read an alignment from BAM.
- @param fp BAM file handler
- @param b read alignment; all members are updated.
- @return number of bytes read from the file
-
- @discussion The file position indicator must be
- placed right before an alignment. Upon success, this function
- will set the position indicator to the start of the next
- alignment. This function is not affected by the machine
- endianness.
- */
- int bam_read1(bamFile fp, bam1_t *b);
-
- /*!
- @abstract Write an alignment to BAM.
- @param fp BAM file handler
- @param c pointer to the bam1_core_t structure
- @param data_len total length of variable size data related to
- the alignment
- @param data pointer to the concatenated data
- @return number of bytes written to the file
-
- @discussion This function is not affected by the machine
- endianness.
- */
- int bam_write1_core(bamFile fp, const bam1_core_t *c, int data_len, uint8_t *data);
-
- /*!
- @abstract Write an alignment to BAM.
- @param fp BAM file handler
- @param b alignment to write
- @return number of bytes written to the file
-
- @abstract It is equivalent to:
- bam_write1_core(fp, &b->core, b->data_len, b->data)
- */
- int bam_write1(bamFile fp, const bam1_t *b);
-
- /*! @function
- @abstract Initiate a pointer to bam1_t struct
- */
-#define bam_init1() ((bam1_t*)calloc(1, sizeof(bam1_t)))
-
- /*! @function
- @abstract Free the memory allocated for an alignment.
- @param b pointer to an alignment
- */
-#define bam_destroy1(b) do { \
- if (b) { free((b)->data); free(b); } \
- } while (0)
-
- /*!
- @abstract Format a BAM record in the SAM format
- @param header pointer to the header structure
- @param b alignment to print
- @return a pointer to the SAM string
- */
- char *bam_format1(const bam_header_t *header, const bam1_t *b);
-
- char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of);
-
- /*!
- @abstract Check whether a BAM record is plausibly valid
- @param header associated header structure, or NULL if unavailable
- @param b alignment to validate
- @return 0 if the alignment is invalid; non-zero otherwise
-
- @discussion Simple consistency check of some of the fields of the
- alignment record. If the header is provided, several additional checks
- are made. Not all fields are checked, so a non-zero result is not a
- guarantee that the record is valid. However it is usually good enough
- to detect when bam_seek() has been called with a virtual file offset
- that is not the offset of an alignment record.
- */
- int bam_validate1(const bam_header_t *header, const bam1_t *b);
-
- const char *bam_get_library(bam_header_t *header, const bam1_t *b);
-
-
- /***************
- * pileup APIs *
- ***************/
-
- /*! @typedef
- @abstract Structure for one alignment covering the pileup position.
- @field b pointer to the alignment
- @field qpos position of the read base at the pileup site, 0-based
- @field indel indel length; 0 for no indel, positive for ins and negative for del
- @field is_del 1 iff the base on the padded read is a deletion
- @field level the level of the read in the "viewer" mode
-
- @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The
- difference between the two functions is that the former does not
- set bam_pileup1_t::level, while the later does. Level helps the
- implementation of alignment viewers, but calculating this has some
- overhead.
- */
- typedef struct {
- bam1_t *b;
- int32_t qpos;
- int indel, level;
- uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;
- } bam_pileup1_t;
-
- typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
-
- struct __bam_plp_t;
- typedef struct __bam_plp_t *bam_plp_t;
-
- bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data);
- int bam_plp_push(bam_plp_t iter, const bam1_t *b);
- const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
- const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
- void bam_plp_set_mask(bam_plp_t iter, int mask);
- void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
- void bam_plp_reset(bam_plp_t iter);
- void bam_plp_destroy(bam_plp_t iter);
-
- struct __bam_mplp_t;
- typedef struct __bam_mplp_t *bam_mplp_t;
-
- bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
- void bam_mplp_destroy(bam_mplp_t iter);
- void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
- int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
-
- /*! @typedef
- @abstract Type of function to be called by bam_plbuf_push().
- @param tid chromosome ID as is defined in the header
- @param pos start coordinate of the alignment, 0-based
- @param n number of elements in pl array
- @param pl array of alignments
- @param data user provided data
- @discussion See also bam_plbuf_push(), bam_plbuf_init() and bam_pileup1_t.
- */
- typedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data);
-
- typedef struct {
- bam_plp_t iter;
- bam_pileup_f func;
- void *data;
- } bam_plbuf_t;
-
- void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask);
- void bam_plbuf_reset(bam_plbuf_t *buf);
- bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data);
- void bam_plbuf_destroy(bam_plbuf_t *buf);
- int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf);
-
- int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data);
-
- struct __bam_lplbuf_t;
- typedef struct __bam_lplbuf_t bam_lplbuf_t;
-
- void bam_lplbuf_reset(bam_lplbuf_t *buf);
-
- /*! @abstract bam_plbuf_init() equivalent with level calculated. */
- bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data);
-
- /*! @abstract bam_plbuf_destroy() equivalent with level calculated. */
- void bam_lplbuf_destroy(bam_lplbuf_t *tv);
-
- /*! @abstract bam_plbuf_push() equivalent with level calculated. */
- int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *buf);
-
-
- /*********************
- * BAM indexing APIs *
- *********************/
-
- struct __bam_index_t;
- typedef struct __bam_index_t bam_index_t;
-
- /*!
- @abstract Build index for a BAM file.
- @discussion Index file "fn.bai" will be created.
- @param fn name of the BAM file
- @return always 0 currently
- */
- int bam_index_build(const char *fn);
-
- /*!
- @abstract Load index from file "fn.bai".
- @param fn name of the BAM file (NOT the index file)
- @return pointer to the index structure
- */
- bam_index_t *bam_index_load(const char *fn);
-
- /*!
- @abstract Destroy an index structure.
- @param idx pointer to the index structure
- */
- void bam_index_destroy(bam_index_t *idx);
-
- /*! @typedef
- @abstract Type of function to be called by bam_fetch().
- @param b the alignment
- @param data user provided data
- */
- typedef int (*bam_fetch_f)(const bam1_t *b, void *data);
-
- /*!
- @abstract Retrieve the alignments that are overlapped with the
- specified region.
-
- @discussion A user defined function will be called for each
- retrieved alignment ordered by its start position.
-
- @param fp BAM file handler
- @param idx pointer to the alignment index
- @param tid chromosome ID as is defined in the header
- @param beg start coordinate, 0-based
- @param end end coordinate, 0-based
- @param data user provided data (will be transferred to func)
- @param func user defined function
- */
- int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func);
-
- bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end);
- int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b);
- void bam_iter_destroy(bam_iter_t iter);
-
- /*!
- @abstract Parse a region in the format: "chr2:100,000-200,000".
- @discussion bam_header_t::hash will be initialized if empty.
- @param header pointer to the header structure
- @param str string to be parsed
- @param ref_id the returned chromosome ID
- @param begin the returned start coordinate
- @param end the returned end coordinate
- @return 0 on success; -1 on failure
- */
- int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end);
-
-
- /**************************
- * APIs for optional tags *
- **************************/
-
- /*!
- @abstract Retrieve data of a tag
- @param b pointer to an alignment struct
- @param tag two-character tag to be retrieved
-
- @return pointer to the type and data. The first character is the
- type that can be 'iIsScCdfAZH'.
-
- @discussion Use bam_aux2?() series to convert the returned data to
- the corresponding type.
- */
- uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);
-
- int32_t bam_aux2i(const uint8_t *s);
- float bam_aux2f(const uint8_t *s);
- double bam_aux2d(const uint8_t *s);
- char bam_aux2A(const uint8_t *s);
- char *bam_aux2Z(const uint8_t *s);
-
- int bam_aux_del(bam1_t *b, uint8_t *s);
- void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);
- uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get()
-
-
- /*****************
- * Miscellaneous *
- *****************/
-
- /*!
- @abstract Calculate the rightmost coordinate of an alignment on the
- reference genome.
-
- @param c pointer to the bam1_core_t structure
- @param cigar the corresponding CIGAR array (from bam1_t::cigar)
- @return the rightmost coordinate, 0-based
- */
- uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar);
-
- /*!
- @abstract Calculate the length of the query sequence from CIGAR.
- @param c pointer to the bam1_core_t structure
- @param cigar the corresponding CIGAR array (from bam1_t::cigar)
- @return length of the query sequence
- */
- int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar);
-
-#ifdef __cplusplus
-}
-#endif
-
-/*!
- @abstract Calculate the minimum bin that contains a region [beg,end).
- @param beg start of the region, 0-based
- @param end end of the region, 0-based
- @return bin
- */
-static inline int bam_reg2bin(uint32_t beg, uint32_t end)
-{
- --end;
- if (beg>>14 == end>>14) return 4681 + (beg>>14);
- if (beg>>17 == end>>17) return 585 + (beg>>17);
- if (beg>>20 == end>>20) return 73 + (beg>>20);
- if (beg>>23 == end>>23) return 9 + (beg>>23);
- if (beg>>26 == end>>26) return 1 + (beg>>26);
- return 0;
-}
-
-/*!
- @abstract Copy an alignment
- @param bdst destination alignment struct
- @param bsrc source alignment struct
- @return pointer to the destination alignment struct
- */
-static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc)
-{
- uint8_t *data = bdst->data;
- int m_data = bdst->m_data; // backup data and m_data
- if (m_data < bsrc->data_len) { // double the capacity
- m_data = bsrc->data_len; kroundup32(m_data);
- data = (uint8_t*)realloc(data, m_data);
- }
- memcpy(data, bsrc->data, bsrc->data_len); // copy var-len data
- *bdst = *bsrc; // copy the rest
- // restore the backup
- bdst->m_data = m_data;
- bdst->data = data;
- return bdst;
-}
-
-/*!
- @abstract Duplicate an alignment
- @param src source alignment struct
- @return pointer to the destination alignment struct
- */
-static inline bam1_t *bam_dup1(const bam1_t *src)
-{
- bam1_t *b;
- b = bam_init1();
- *b = *src;
- b->m_data = b->data_len;
- b->data = (uint8_t*)calloc(b->data_len, 1);
- memcpy(b->data, src->data, b->data_len);
- return b;
-}
-
-static inline int bam_aux_type2size(int x)
-{
- if (x == 'C' || x == 'c' || x == 'A') return 1;
- else if (x == 'S' || x == 's') return 2;
- else if (x == 'I' || x == 'i' || x == 'f') return 4;
- else return 0;
-}
-
-
-#endif
diff --git a/src/samtools-0.1.18/bam2bcf.c b/src/samtools-0.1.18/bam2bcf.c
deleted file mode 100644
index dec3305..0000000
--- a/src/samtools-0.1.18/bam2bcf.c
+++ /dev/null
@@ -1,351 +0,0 @@
-#include <math.h>
-#include <stdint.h>
-#include "bam.h"
-#include "kstring.h"
-#include "bam2bcf.h"
-#include "errmod.h"
-#include "bcftools/bcf.h"
-
-extern void ks_introsort_uint32_t(size_t n, uint32_t a[]);
-
-#define CALL_ETA 0.03f
-#define CALL_MAX 256
-#define CALL_DEFTHETA 0.83f
-#define DEF_MAPQ 20
-
-#define CAP_DIST 25
-
-bcf_callaux_t *bcf_call_init(double theta, int min_baseQ)
-{
- bcf_callaux_t *bca;
- if (theta <= 0.) theta = CALL_DEFTHETA;
- bca = calloc(1, sizeof(bcf_callaux_t));
- bca->capQ = 60;
- bca->openQ = 40; bca->extQ = 20; bca->tandemQ = 100;
- bca->min_baseQ = min_baseQ;
- bca->e = errmod_init(1. - theta);
- bca->min_frac = 0.002;
- bca->min_support = 1;
- return bca;
-}
-
-void bcf_call_destroy(bcf_callaux_t *bca)
-{
- if (bca == 0) return;
- errmod_destroy(bca->e);
- free(bca->bases); free(bca->inscns); free(bca);
-}
-/* ref_base is the 4-bit representation of the reference base. It is
- * negative if we are looking at an indel. */
-int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t *bca, bcf_callret1_t *r)
-{
- static int *var_pos = NULL, nvar_pos = 0;
- int i, n, ref4, is_indel, ori_depth = 0;
- memset(r, 0, sizeof(bcf_callret1_t));
- if (ref_base >= 0) {
- ref4 = bam_nt16_nt4_table[ref_base];
- is_indel = 0;
- } else ref4 = 4, is_indel = 1;
- if (_n == 0) return -1;
- // enlarge the bases array if necessary
- if (bca->max_bases < _n) {
- bca->max_bases = _n;
- kroundup32(bca->max_bases);
- bca->bases = (uint16_t*)realloc(bca->bases, 2 * bca->max_bases);
- }
- // fill the bases array
- memset(r, 0, sizeof(bcf_callret1_t));
- for (i = n = 0; i < _n; ++i) {
- const bam_pileup1_t *p = pl + i;
- int q, b, mapQ, baseQ, is_diff, min_dist, seqQ;
- // set base
- if (p->is_del || p->is_refskip || (p->b->core.flag&BAM_FUNMAP)) continue;
- ++ori_depth;
- baseQ = q = is_indel? p->aux&0xff : (int)bam1_qual(p->b)[p->qpos]; // base/indel quality
- seqQ = is_indel? (p->aux>>8&0xff) : 99;
- if (q < bca->min_baseQ) continue;
- if (q > seqQ) q = seqQ;
- mapQ = p->b->core.qual < 255? p->b->core.qual : DEF_MAPQ; // special case for mapQ==255
- mapQ = mapQ < bca->capQ? mapQ : bca->capQ;
- if (q > mapQ) q = mapQ;
- if (q > 63) q = 63;
- if (q < 4) q = 4;
- if (!is_indel) {
- b = bam1_seqi(bam1_seq(p->b), p->qpos); // base
- b = bam_nt16_nt4_table[b? b : ref_base]; // b is the 2-bit base
- is_diff = (ref4 < 4 && b == ref4)? 0 : 1;
- } else {
- b = p->aux>>16&0x3f;
- is_diff = (b != 0);
- }
- bca->bases[n++] = q<<5 | (int)bam1_strand(p->b)<<4 | b;
- // collect annotations
- if (b < 4) r->qsum[b] += q;
- ++r->anno[0<<2|is_diff<<1|bam1_strand(p->b)];
- min_dist = p->b->core.l_qseq - 1 - p->qpos;
- if (min_dist > p->qpos) min_dist = p->qpos;
- if (min_dist > CAP_DIST) min_dist = CAP_DIST;
- r->anno[1<<2|is_diff<<1|0] += baseQ;
- r->anno[1<<2|is_diff<<1|1] += baseQ * baseQ;
- r->anno[2<<2|is_diff<<1|0] += mapQ;
- r->anno[2<<2|is_diff<<1|1] += mapQ * mapQ;
- r->anno[3<<2|is_diff<<1|0] += min_dist;
- r->anno[3<<2|is_diff<<1|1] += min_dist * min_dist;
- }
- r->depth = n; r->ori_depth = ori_depth;
- // glfgen
- errmod_cal(bca->e, n, 5, bca->bases, r->p);
-
- // Calculate the Variant Distance Bias (make it optional?)
- if ( nvar_pos < _n ) {
- nvar_pos = _n;
- var_pos = realloc(var_pos,sizeof(int)*nvar_pos);
- }
- int alt_dp=0, read_len=0;
- for (i=0; i<_n; i++) {
- const bam_pileup1_t *p = pl + i;
- if ( bam1_seqi(bam1_seq(p->b),p->qpos) == ref_base )
- continue;
-
- var_pos[alt_dp] = p->qpos;
- if ( (bam1_cigar(p->b)[0]&BAM_CIGAR_MASK)==4 )
- var_pos[alt_dp] -= bam1_cigar(p->b)[0]>>BAM_CIGAR_SHIFT;
-
- alt_dp++;
- read_len += p->b->core.l_qseq;
- }
- float mvd=0;
- int j;
- n=0;
- for (i=0; i<alt_dp; i++) {
- for (j=0; j<i; j++) {
- mvd += abs(var_pos[i] - var_pos[j]);
- n++;
- }
- }
- r->mvd[0] = n ? mvd/n : 0;
- r->mvd[1] = alt_dp;
- r->mvd[2] = alt_dp ? read_len/alt_dp : 0;
-
- return r->depth;
-}
-
-
-void calc_vdb(int n, const bcf_callret1_t *calls, bcf_call_t *call)
-{
- // Variant distance bias. Samples merged by means of DP-weighted average.
-
- float weight=0, tot_prob=0;
-
- int i;
- for (i=0; i<n; i++)
- {
- int mvd = calls[i].mvd[0];
- int dp = calls[i].mvd[1];
- int read_len = calls[i].mvd[2];
-
- if ( dp<2 ) continue;
-
- float prob = 0;
- if ( dp==2 )
- {
- // Exact formula
- prob = (mvd==0) ? 1.0/read_len : (read_len-mvd)*2.0/read_len/read_len;
- }
- else if ( dp==3 )
- {
- // Sin, quite accurate approximation
- float mu = read_len/2.9;
- prob = mvd>2*mu ? 0 : sin(mvd*3.14/2/mu) / (4*mu/3.14);
- }
- else
- {
- // Scaled gaussian curve, crude approximation, but behaves well. Using fixed depth for bigger depths.
- if ( dp>5 )
- dp = 5;
- float sigma2 = (read_len/1.9/(dp+1)) * (read_len/1.9/(dp+1));
- float norm = 1.125*sqrt(2*3.14*sigma2);
- float mu = read_len/2.9;
- if ( mvd < mu )
- prob = exp(-(mvd-mu)*(mvd-mu)/2/sigma2)/norm;
- else
- prob = exp(-(mvd-mu)*(mvd-mu)/3.125/sigma2)/norm;
- }
-
- //fprintf(stderr,"dp=%d mvd=%d read_len=%d -> prob=%f\n", dp,mvd,read_len,prob);
- tot_prob += prob*dp;
- weight += dp;
- }
- tot_prob = weight ? tot_prob/weight : 1;
- //fprintf(stderr,"prob=%f\n", tot_prob);
- call->vdb = tot_prob;
-}
-
-int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call)
-{
- int ref4, i, j, qsum[4];
- int64_t tmp;
- if (ref_base >= 0) {
- call->ori_ref = ref4 = bam_nt16_nt4_table[ref_base];
- if (ref4 > 4) ref4 = 4;
- } else call->ori_ref = -1, ref4 = 0;
- // calculate qsum
- memset(qsum, 0, 4 * sizeof(int));
- for (i = 0; i < n; ++i)
- for (j = 0; j < 4; ++j)
- qsum[j] += calls[i].qsum[j];
- for (j = 0; j < 4; ++j) qsum[j] = qsum[j] << 2 | j;
- // find the top 2 alleles
- for (i = 1; i < 4; ++i) // insertion sort
- for (j = i; j > 0 && qsum[j] < qsum[j-1]; --j)
- tmp = qsum[j], qsum[j] = qsum[j-1], qsum[j-1] = tmp;
- // set the reference allele and alternative allele(s)
- for (i = 0; i < 5; ++i) call->a[i] = -1;
- call->unseen = -1;
- call->a[0] = ref4;
- for (i = 3, j = 1; i >= 0; --i) {
- if ((qsum[i]&3) != ref4) {
- if (qsum[i]>>2 != 0) call->a[j++] = qsum[i]&3;
- else break;
- }
- }
- if (ref_base >= 0) { // for SNPs, find the "unseen" base
- if (((ref4 < 4 && j < 4) || (ref4 == 4 && j < 5)) && i >= 0)
- call->unseen = j, call->a[j++] = qsum[i]&3;
- call->n_alleles = j;
- } else {
- call->n_alleles = j;
- if (call->n_alleles == 1) return -1; // no reliable supporting read. stop doing anything
- }
- // set the PL array
- if (call->n < n) {
- call->n = n;
- call->PL = realloc(call->PL, 15 * n);
- }
- {
- int x, g[15], z;
- double sum_min = 0.;
- x = call->n_alleles * (call->n_alleles + 1) / 2;
- // get the possible genotypes
- for (i = z = 0; i < call->n_alleles; ++i)
- for (j = 0; j <= i; ++j)
- g[z++] = call->a[j] * 5 + call->a[i];
- for (i = 0; i < n; ++i) {
- uint8_t *PL = call->PL + x * i;
- const bcf_callret1_t *r = calls + i;
- float min = 1e37;
- for (j = 0; j < x; ++j)
- if (min > r->p[g[j]]) min = r->p[g[j]];
- sum_min += min;
- for (j = 0; j < x; ++j) {
- int y;
- y = (int)(r->p[g[j]] - min + .499);
- if (y > 255) y = 255;
- PL[j] = y;
- }
- }
-// if (ref_base < 0) fprintf(stderr, "%d,%d,%f,%d\n", call->n_alleles, x, sum_min, call->unseen);
- call->shift = (int)(sum_min + .499);
- }
- // combine annotations
- memset(call->anno, 0, 16 * sizeof(int));
- for (i = call->depth = call->ori_depth = 0, tmp = 0; i < n; ++i) {
- call->depth += calls[i].depth;
- call->ori_depth += calls[i].ori_depth;
- for (j = 0; j < 16; ++j) call->anno[j] += calls[i].anno[j];
- }
-
- calc_vdb(n, calls, call);
-
- return 0;
-}
-
-int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP,
- const bcf_callaux_t *bca, const char *ref)
-{
- extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two);
- kstring_t s;
- int i, j;
- b->n_smpl = bc->n;
- b->tid = tid; b->pos = pos; b->qual = 0;
- s.s = b->str; s.m = b->m_str; s.l = 0;
- kputc('\0', &s);
- if (bc->ori_ref < 0) { // an indel
- // write REF
- kputc(ref[pos], &s);
- for (j = 0; j < bca->indelreg; ++j) kputc(ref[pos+1+j], &s);
- kputc('\0', &s);
- // write ALT
- kputc(ref[pos], &s);
- for (i = 1; i < 4; ++i) {
- if (bc->a[i] < 0) break;
- if (i > 1) {
- kputc(',', &s); kputc(ref[pos], &s);
- }
- if (bca->indel_types[bc->a[i]] < 0) { // deletion
- for (j = -bca->indel_types[bc->a[i]]; j < bca->indelreg; ++j)
- kputc(ref[pos+1+j], &s);
- } else { // insertion; cannot be a reference unless a bug
- char *inscns = &bca->inscns[bc->a[i] * bca->maxins];
- for (j = 0; j < bca->indel_types[bc->a[i]]; ++j)
- kputc("ACGTN"[(int)inscns[j]], &s);
- for (j = 0; j < bca->indelreg; ++j) kputc(ref[pos+1+j], &s);
- }
- }
- kputc('\0', &s);
- } else { // a SNP
- kputc("ACGTN"[bc->ori_ref], &s); kputc('\0', &s);
- for (i = 1; i < 5; ++i) {
- if (bc->a[i] < 0) break;
- if (i > 1) kputc(',', &s);
- kputc(bc->unseen == i? 'X' : "ACGT"[bc->a[i]], &s);
- }
- kputc('\0', &s);
- }
- kputc('\0', &s);
- // INFO
- if (bc->ori_ref < 0) kputs("INDEL;", &s);
- kputs("DP=", &s); kputw(bc->ori_depth, &s); kputs(";I16=", &s);
- for (i = 0; i < 16; ++i) {
- if (i) kputc(',', &s);
- kputw(bc->anno[i], &s);
- }
- if ( bc->vdb!=1 )
- {
- ksprintf(&s, ";VDB=%.4f", bc->vdb);
- }
- kputc('\0', &s);
- // FMT
- kputs("PL", &s);
- if (bcr) {
- kputs(":DP", &s);
- if (is_SP) kputs(":SP", &s);
- }
- kputc('\0', &s);
- b->m_str = s.m; b->str = s.s; b->l_str = s.l;
- bcf_sync(b);
- memcpy(b->gi[0].data, bc->PL, b->gi[0].len * bc->n);
- if (bcr) {
- uint16_t *dp = (uint16_t*)b->gi[1].data;
- int32_t *sp = is_SP? b->gi[2].data : 0;
- for (i = 0; i < bc->n; ++i) {
- bcf_callret1_t *p = bcr + i;
- dp[i] = p->depth < 0xffff? p->depth : 0xffff;
- if (is_SP) {
- if (p->anno[0] + p->anno[1] < 2 || p->anno[2] + p->anno[3] < 2
- || p->anno[0] + p->anno[2] < 2 || p->anno[1] + p->anno[3] < 2)
- {
- sp[i] = 0;
- } else {
- double left, right, two;
- int x;
- kt_fisher_exact(p->anno[0], p->anno[1], p->anno[2], p->anno[3], &left, &right, &two);
- x = (int)(-4.343 * log(two) + .499);
- if (x > 255) x = 255;
- sp[i] = x;
- }
- }
- }
- }
- return 0;
-}
diff --git a/src/samtools-0.1.18/bam2bcf.h b/src/samtools-0.1.18/bam2bcf.h
deleted file mode 100644
index 4af080c..0000000
--- a/src/samtools-0.1.18/bam2bcf.h
+++ /dev/null
@@ -1,57 +0,0 @@
-#ifndef BAM2BCF_H
-#define BAM2BCF_H
-
-#include <stdint.h>
-#include "errmod.h"
-#include "bcftools/bcf.h"
-
-#define B2B_INDEL_NULL 10000
-
-typedef struct __bcf_callaux_t {
- int capQ, min_baseQ;
- int openQ, extQ, tandemQ; // for indels
- int min_support; // for collecting indel candidates
- double min_frac; // for collecting indel candidates
- // for internal uses
- int max_bases;
- int indel_types[4];
- int maxins, indelreg;
- char *inscns;
- uint16_t *bases;
- errmod_t *e;
- void *rghash;
-} bcf_callaux_t;
-
-typedef struct {
- int depth, ori_depth, qsum[4];
- int anno[16];
- float p[25];
- int mvd[3]; // mean variant distance, number of variant reads, average read length
-} bcf_callret1_t;
-
-typedef struct {
- int a[5]; // alleles: ref, alt, alt2, alt3
- int n, n_alleles, shift, ori_ref, unseen;
- int anno[16], depth, ori_depth;
- uint8_t *PL;
- float vdb; // variant distance bias
-} bcf_call_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- bcf_callaux_t *bcf_call_init(double theta, int min_baseQ);
- void bcf_call_destroy(bcf_callaux_t *bca);
- int bcf_call_glfgen(int _n, const bam_pileup1_t *pl, int ref_base, bcf_callaux_t *bca, bcf_callret1_t *r);
- int bcf_call_combine(int n, const bcf_callret1_t *calls, int ref_base /*4-bit*/, bcf_call_t *call);
- int bcf_call2bcf(int tid, int pos, bcf_call_t *bc, bcf1_t *b, bcf_callret1_t *bcr, int is_SP,
- const bcf_callaux_t *bca, const char *ref);
- int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref,
- const void *rghash);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/samtools-0.1.18/bam2bcf_indel.c b/src/samtools-0.1.18/bam2bcf_indel.c
deleted file mode 100644
index 5142b3e..0000000
--- a/src/samtools-0.1.18/bam2bcf_indel.c
+++ /dev/null
@@ -1,475 +0,0 @@
-#include <assert.h>
-#include <ctype.h>
-#include <string.h>
-#include "bam.h"
-#include "bam2bcf.h"
-#include "kaln.h"
-#include "kprobaln.h"
-#include "khash.h"
-KHASH_SET_INIT_STR(rg)
-
-#include "ksort.h"
-KSORT_INIT_GENERIC(uint32_t)
-
-#define MINUS_CONST 0x10000000
-#define INDEL_WINDOW_SIZE 50
-
-void *bcf_call_add_rg(void *_hash, const char *hdtext, const char *list)
-{
- const char *s, *p, *q, *r, *t;
- khash_t(rg) *hash;
- if (list == 0 || hdtext == 0) return _hash;
- if (_hash == 0) _hash = kh_init(rg);
- hash = (khash_t(rg)*)_hash;
- if ((s = strstr(hdtext, "@RG\t")) == 0) return hash;
- do {
- t = strstr(s + 4, "@RG\t"); // the next @RG
- if ((p = strstr(s, "\tID:")) != 0) p += 4;
- if ((q = strstr(s, "\tPL:")) != 0) q += 4;
- if (p && q && (t == 0 || (p < t && q < t))) { // ID and PL are both present
- int lp, lq;
- char *x;
- for (r = p; *r && *r != '\t' && *r != '\n'; ++r); lp = r - p;
- for (r = q; *r && *r != '\t' && *r != '\n'; ++r); lq = r - q;
- x = calloc((lp > lq? lp : lq) + 1, 1);
- for (r = q; *r && *r != '\t' && *r != '\n'; ++r) x[r-q] = *r;
- if (strstr(list, x)) { // insert ID to the hash table
- khint_t k;
- int ret;
- for (r = p; *r && *r != '\t' && *r != '\n'; ++r) x[r-p] = *r;
- x[r-p] = 0;
- k = kh_get(rg, hash, x);
- if (k == kh_end(hash)) k = kh_put(rg, hash, x, &ret);
- else free(x);
- } else free(x);
- }
- s = t;
- } while (s);
- return hash;
-}
-
-void bcf_call_del_rghash(void *_hash)
-{
- khint_t k;
- khash_t(rg) *hash = (khash_t(rg)*)_hash;
- if (hash == 0) return;
- for (k = kh_begin(hash); k < kh_end(hash); ++k)
- if (kh_exist(hash, k))
- free((char*)kh_key(hash, k));
- kh_destroy(rg, hash);
-}
-
-static int tpos2qpos(const bam1_core_t *c, const uint32_t *cigar, int32_t tpos, int is_left, int32_t *_tpos)
-{
- int k, x = c->pos, y = 0, last_y = 0;
- *_tpos = c->pos;
- for (k = 0; k < c->n_cigar; ++k) {
- int op = cigar[k] & BAM_CIGAR_MASK;
- int l = cigar[k] >> BAM_CIGAR_SHIFT;
- if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
- if (c->pos > tpos) return y;
- if (x + l > tpos) {
- *_tpos = tpos;
- return y + (tpos - x);
- }
- x += l; y += l;
- last_y = y;
- } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
- else if (op == BAM_CDEL || op == BAM_CREF_SKIP) {
- if (x + l > tpos) {
- *_tpos = is_left? x : x + l;
- return y;
- }
- x += l;
- }
- }
- *_tpos = x;
- return last_y;
-}
-// FIXME: check if the inserted sequence is consistent with the homopolymer run
-// l is the relative gap length and l_run is the length of the homopolymer on the reference
-static inline int est_seqQ(const bcf_callaux_t *bca, int l, int l_run)
-{
- int q, qh;
- q = bca->openQ + bca->extQ * (abs(l) - 1);
- qh = l_run >= 3? (int)(bca->tandemQ * (double)abs(l) / l_run + .499) : 1000;
- return q < qh? q : qh;
-}
-
-static inline int est_indelreg(int pos, const char *ref, int l, char *ins4)
-{
- int i, j, max = 0, max_i = pos, score = 0;
- l = abs(l);
- for (i = pos + 1, j = 0; ref[i]; ++i, ++j) {
- if (ins4) score += (toupper(ref[i]) != "ACGTN"[(int)ins4[j%l]])? -10 : 1;
- else score += (toupper(ref[i]) != toupper(ref[pos+1+j%l]))? -10 : 1;
- if (score < 0) break;
- if (max < score) max = score, max_i = i;
- }
- return max_i - pos;
-}
-
-int bcf_call_gap_prep(int n, int *n_plp, bam_pileup1_t **plp, int pos, bcf_callaux_t *bca, const char *ref,
- const void *rghash)
-{
- int i, s, j, k, t, n_types, *types, max_rd_len, left, right, max_ins, *score1, *score2, max_ref2;
- int N, K, l_run, ref_type, n_alt;
- char *inscns = 0, *ref2, *query, **ref_sample;
- khash_t(rg) *hash = (khash_t(rg)*)rghash;
- if (ref == 0 || bca == 0) return -1;
- // mark filtered reads
- if (rghash) {
- N = 0;
- for (s = N = 0; s < n; ++s) {
- for (i = 0; i < n_plp[s]; ++i) {
- bam_pileup1_t *p = plp[s] + i;
- const uint8_t *rg = bam_aux_get(p->b, "RG");
- p->aux = 1; // filtered by default
- if (rg) {
- khint_t k = kh_get(rg, hash, (const char*)(rg + 1));
- if (k != kh_end(hash)) p->aux = 0, ++N; // not filtered
- }
- }
- }
- if (N == 0) return -1; // no reads left
- }
- // determine if there is a gap
- for (s = N = 0; s < n; ++s) {
- for (i = 0; i < n_plp[s]; ++i)
- if (plp[s][i].indel != 0) break;
- if (i < n_plp[s]) break;
- }
- if (s == n) return -1; // there is no indel at this position.
- for (s = N = 0; s < n; ++s) N += n_plp[s]; // N is the total number of reads
- { // find out how many types of indels are present
- int m, n_alt = 0, n_tot = 0;
- uint32_t *aux;
- aux = calloc(N + 1, 4);
- m = max_rd_len = 0;
- aux[m++] = MINUS_CONST; // zero indel is always a type
- for (s = 0; s < n; ++s) {
- for (i = 0; i < n_plp[s]; ++i) {
- const bam_pileup1_t *p = plp[s] + i;
- if (rghash == 0 || p->aux == 0) {
- ++n_tot;
- if (p->indel != 0) {
- ++n_alt;
- aux[m++] = MINUS_CONST + p->indel;
- }
- }
- j = bam_cigar2qlen(&p->b->core, bam1_cigar(p->b));
- if (j > max_rd_len) max_rd_len = j;
- }
- }
- ks_introsort(uint32_t, m, aux);
- // squeeze out identical types
- for (i = 1, n_types = 1; i < m; ++i)
- if (aux[i] != aux[i-1]) ++n_types;
- if (n_types == 1 || (double)n_alt / n_tot < bca->min_frac || n_alt < bca->min_support) { // then skip
- free(aux); return -1;
- }
- if (n_types >= 64) {
- free(aux);
- if (bam_verbose >= 2)
- fprintf(stderr, "[%s] excessive INDEL alleles at position %d. Skip the position.\n", __func__, pos + 1);
- return -1;
- }
- types = (int*)calloc(n_types, sizeof(int));
- t = 0;
- types[t++] = aux[0] - MINUS_CONST;
- for (i = 1; i < m; ++i)
- if (aux[i] != aux[i-1])
- types[t++] = aux[i] - MINUS_CONST;
- free(aux);
- for (t = 0; t < n_types; ++t)
- if (types[t] == 0) break;
- ref_type = t; // the index of the reference type (0)
- }
- { // calculate left and right boundary
- left = pos > INDEL_WINDOW_SIZE? pos - INDEL_WINDOW_SIZE : 0;
- right = pos + INDEL_WINDOW_SIZE;
- if (types[0] < 0) right -= types[0];
- // in case the alignments stand out the reference
- for (i = pos; i < right; ++i)
- if (ref[i] == 0) break;
- right = i;
- }
- /* The following block fixes a long-existing flaw in the INDEL
- * calling model: the interference of nearby SNPs. However, it also
- * reduces the power because sometimes, substitutions caused by
- * indels are not distinguishable from true mutations. Multiple
- * sequence realignment helps to increase the power.
- */
- { // construct per-sample consensus
- int L = right - left + 1, max_i, max2_i;
- uint32_t *cns, max, max2;
- char *ref0, *r;
- ref_sample = calloc(n, sizeof(void*));
- cns = calloc(L, 4);
- ref0 = calloc(L, 1);
- for (i = 0; i < right - left; ++i)
- ref0[i] = bam_nt16_table[(int)ref[i+left]];
- for (s = 0; s < n; ++s) {
- r = ref_sample[s] = calloc(L, 1);
- memset(cns, 0, sizeof(int) * L);
- // collect ref and non-ref counts
- for (i = 0; i < n_plp[s]; ++i) {
- bam_pileup1_t *p = plp[s] + i;
- bam1_t *b = p->b;
- uint32_t *cigar = bam1_cigar(b);
- uint8_t *seq = bam1_seq(b);
- int x = b->core.pos, y = 0;
- for (k = 0; k < b->core.n_cigar; ++k) {
- int op = cigar[k]&0xf;
- int j, l = cigar[k]>>4;
- if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
- for (j = 0; j < l; ++j)
- if (x + j >= left && x + j < right)
- cns[x+j-left] += (bam1_seqi(seq, y+j) == ref0[x+j-left])? 1 : 0x10000;
- x += l; y += l;
- } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l;
- else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
- }
- }
- // determine the consensus
- for (i = 0; i < right - left; ++i) r[i] = ref0[i];
- max = max2 = 0; max_i = max2_i = -1;
- for (i = 0; i < right - left; ++i) {
- if (cns[i]>>16 >= max>>16) max2 = max, max2_i = max_i, max = cns[i], max_i = i;
- else if (cns[i]>>16 >= max2>>16) max2 = cns[i], max2_i = i;
- }
- if ((double)(max&0xffff) / ((max&0xffff) + (max>>16)) >= 0.7) max_i = -1;
- if ((double)(max2&0xffff) / ((max2&0xffff) + (max2>>16)) >= 0.7) max2_i = -1;
- if (max_i >= 0) r[max_i] = 15;
- if (max2_i >= 0) r[max2_i] = 15;
-// for (i = 0; i < right - left; ++i) fputc("=ACMGRSVTWYHKDBN"[(int)r[i]], stderr); fputc('\n', stderr);
- }
- free(ref0); free(cns);
- }
- { // the length of the homopolymer run around the current position
- int c = bam_nt16_table[(int)ref[pos + 1]];
- if (c == 15) l_run = 1;
- else {
- for (i = pos + 2; ref[i]; ++i)
- if (bam_nt16_table[(int)ref[i]] != c) break;
- l_run = i;
- for (i = pos; i >= 0; --i)
- if (bam_nt16_table[(int)ref[i]] != c) break;
- l_run -= i + 1;
- }
- }
- // construct the consensus sequence
- max_ins = types[n_types - 1]; // max_ins is at least 0
- if (max_ins > 0) {
- int *inscns_aux = calloc(4 * n_types * max_ins, sizeof(int));
- // count the number of occurrences of each base at each position for each type of insertion
- for (t = 0; t < n_types; ++t) {
- if (types[t] > 0) {
- for (s = 0; s < n; ++s) {
- for (i = 0; i < n_plp[s]; ++i) {
- bam_pileup1_t *p = plp[s] + i;
- if (p->indel == types[t]) {
- uint8_t *seq = bam1_seq(p->b);
- for (k = 1; k <= p->indel; ++k) {
- int c = bam_nt16_nt4_table[bam1_seqi(seq, p->qpos + k)];
- if (c < 4) ++inscns_aux[(t*max_ins+(k-1))*4 + c];
- }
- }
- }
- }
- }
- }
- // use the majority rule to construct the consensus
- inscns = calloc(n_types * max_ins, 1);
- for (t = 0; t < n_types; ++t) {
- for (j = 0; j < types[t]; ++j) {
- int max = 0, max_k = -1, *ia = &inscns_aux[(t*max_ins+j)*4];
- for (k = 0; k < 4; ++k)
- if (ia[k] > max)
- max = ia[k], max_k = k;
- inscns[t*max_ins + j] = max? max_k : 4;
- }
- }
- free(inscns_aux);
- }
- // compute the likelihood given each type of indel for each read
- max_ref2 = right - left + 2 + 2 * (max_ins > -types[0]? max_ins : -types[0]);
- ref2 = calloc(max_ref2, 1);
- query = calloc(right - left + max_rd_len + max_ins + 2, 1);
- score1 = calloc(N * n_types, sizeof(int));
- score2 = calloc(N * n_types, sizeof(int));
- bca->indelreg = 0;
- for (t = 0; t < n_types; ++t) {
- int l, ir;
- kpa_par_t apf1 = { 1e-4, 1e-2, 10 }, apf2 = { 1e-6, 1e-3, 10 };
- apf1.bw = apf2.bw = abs(types[t]) + 3;
- // compute indelreg
- if (types[t] == 0) ir = 0;
- else if (types[t] > 0) ir = est_indelreg(pos, ref, types[t], &inscns[t*max_ins]);
- else ir = est_indelreg(pos, ref, -types[t], 0);
- if (ir > bca->indelreg) bca->indelreg = ir;
-// fprintf(stderr, "%d, %d, %d\n", pos, types[t], ir);
- // realignment
- for (s = K = 0; s < n; ++s) {
- // write ref2
- for (k = 0, j = left; j <= pos; ++j)
- ref2[k++] = bam_nt16_nt4_table[(int)ref_sample[s][j-left]];
- if (types[t] <= 0) j += -types[t];
- else for (l = 0; l < types[t]; ++l)
- ref2[k++] = inscns[t*max_ins + l];
- for (; j < right && ref[j]; ++j)
- ref2[k++] = bam_nt16_nt4_table[(int)ref_sample[s][j-left]];
- for (; k < max_ref2; ++k) ref2[k] = 4;
- if (j < right) right = j;
- // align each read to ref2
- for (i = 0; i < n_plp[s]; ++i, ++K) {
- bam_pileup1_t *p = plp[s] + i;
- int qbeg, qend, tbeg, tend, sc, kk;
- uint8_t *seq = bam1_seq(p->b);
- uint32_t *cigar = bam1_cigar(p->b);
- if (p->b->core.flag&4) continue; // unmapped reads
- // FIXME: the following loop should be better moved outside; nonetheless, realignment should be much slower anyway.
- for (kk = 0; kk < p->b->core.n_cigar; ++kk)
- if ((cigar[kk]&BAM_CIGAR_MASK) == BAM_CREF_SKIP) break;
- if (kk < p->b->core.n_cigar) continue;
- // FIXME: the following skips soft clips, but using them may be more sensitive.
- // determine the start and end of sequences for alignment
- qbeg = tpos2qpos(&p->b->core, bam1_cigar(p->b), left, 0, &tbeg);
- qend = tpos2qpos(&p->b->core, bam1_cigar(p->b), right, 1, &tend);
- if (types[t] < 0) {
- int l = -types[t];
- tbeg = tbeg - l > left? tbeg - l : left;
- }
- // write the query sequence
- for (l = qbeg; l < qend; ++l)
- query[l - qbeg] = bam_nt16_nt4_table[bam1_seqi(seq, l)];
- { // do realignment; this is the bottleneck
- const uint8_t *qual = bam1_qual(p->b), *bq;
- uint8_t *qq;
- qq = calloc(qend - qbeg, 1);
- bq = (uint8_t*)bam_aux_get(p->b, "ZQ");
- if (bq) ++bq; // skip type
- for (l = qbeg; l < qend; ++l) {
- qq[l - qbeg] = bq? qual[l] + (bq[l] - 64) : qual[l];
- if (qq[l - qbeg] > 30) qq[l - qbeg] = 30;
- if (qq[l - qbeg] < 7) qq[l - qbeg] = 7;
- }
- sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
- (uint8_t*)query, qend - qbeg, qq, &apf1, 0, 0);
- l = (int)(100. * sc / (qend - qbeg) + .499); // used for adjusting indelQ below
- if (l > 255) l = 255;
- score1[K*n_types + t] = score2[K*n_types + t] = sc<<8 | l;
- if (sc > 5) {
- sc = kpa_glocal((uint8_t*)ref2 + tbeg - left, tend - tbeg + abs(types[t]),
- (uint8_t*)query, qend - qbeg, qq, &apf2, 0, 0);
- l = (int)(100. * sc / (qend - qbeg) + .499);
- if (l > 255) l = 255;
- score2[K*n_types + t] = sc<<8 | l;
- }
- free(qq);
- }
-/*
- for (l = 0; l < tend - tbeg + abs(types[t]); ++l)
- fputc("ACGTN"[(int)ref2[tbeg-left+l]], stderr);
- fputc('\n', stderr);
- for (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[(int)query[l]], stderr);
- fputc('\n', stderr);
- fprintf(stderr, "pos=%d type=%d read=%d:%d name=%s qbeg=%d tbeg=%d score=%d\n", pos, types[t], s, i, bam1_qname(p->b), qbeg, tbeg, sc);
-*/
- }
- }
- }
- free(ref2); free(query);
- { // compute indelQ
- int *sc, tmp, *sumq;
- sc = alloca(n_types * sizeof(int));
- sumq = alloca(n_types * sizeof(int));
- memset(sumq, 0, sizeof(int) * n_types);
- for (s = K = 0; s < n; ++s) {
- for (i = 0; i < n_plp[s]; ++i, ++K) {
- bam_pileup1_t *p = plp[s] + i;
- int *sct = &score1[K*n_types], indelQ1, indelQ2, seqQ, indelQ;
- for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
- for (t = 1; t < n_types; ++t) // insertion sort
- for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
- tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp;
- /* errmod_cal() assumes that if the call is wrong, the
- * likelihoods of other events are equal. This is about
- * right for substitutions, but is not desired for
- * indels. To reuse errmod_cal(), I have to make
- * compromise for multi-allelic indels.
- */
- if ((sc[0]&0x3f) == ref_type) {
- indelQ1 = (sc[1]>>14) - (sc[0]>>14);
- seqQ = est_seqQ(bca, types[sc[1]&0x3f], l_run);
- } else {
- for (t = 0; t < n_types; ++t) // look for the reference type
- if ((sc[t]&0x3f) == ref_type) break;
- indelQ1 = (sc[t]>>14) - (sc[0]>>14);
- seqQ = est_seqQ(bca, types[sc[0]&0x3f], l_run);
- }
- tmp = sc[0]>>6 & 0xff;
- indelQ1 = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ1 + .499); // reduce indelQ
- sct = &score2[K*n_types];
- for (t = 0; t < n_types; ++t) sc[t] = sct[t]<<6 | t;
- for (t = 1; t < n_types; ++t) // insertion sort
- for (j = t; j > 0 && sc[j] < sc[j-1]; --j)
- tmp = sc[j], sc[j] = sc[j-1], sc[j-1] = tmp;
- if ((sc[0]&0x3f) == ref_type) {
- indelQ2 = (sc[1]>>14) - (sc[0]>>14);
- } else {
- for (t = 0; t < n_types; ++t) // look for the reference type
- if ((sc[t]&0x3f) == ref_type) break;
- indelQ2 = (sc[t]>>14) - (sc[0]>>14);
- }
- tmp = sc[0]>>6 & 0xff;
- indelQ2 = tmp > 111? 0 : (int)((1. - tmp/111.) * indelQ2 + .499);
- // pick the smaller between indelQ1 and indelQ2
- indelQ = indelQ1 < indelQ2? indelQ1 : indelQ2;
- if (indelQ > 255) indelQ = 255;
- if (seqQ > 255) seqQ = 255;
- p->aux = (sc[0]&0x3f)<<16 | seqQ<<8 | indelQ; // use 22 bits in total
- sumq[sc[0]&0x3f] += indelQ < seqQ? indelQ : seqQ;
-// fprintf(stderr, "pos=%d read=%d:%d name=%s call=%d indelQ=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), types[sc[0]&0x3f], indelQ, seqQ);
- }
- }
- // determine bca->indel_types[] and bca->inscns
- bca->maxins = max_ins;
- bca->inscns = realloc(bca->inscns, bca->maxins * 4);
- for (t = 0; t < n_types; ++t)
- sumq[t] = sumq[t]<<6 | t;
- for (t = 1; t < n_types; ++t) // insertion sort
- for (j = t; j > 0 && sumq[j] > sumq[j-1]; --j)
- tmp = sumq[j], sumq[j] = sumq[j-1], sumq[j-1] = tmp;
- for (t = 0; t < n_types; ++t) // look for the reference type
- if ((sumq[t]&0x3f) == ref_type) break;
- if (t) { // then move the reference type to the first
- tmp = sumq[t];
- for (; t > 0; --t) sumq[t] = sumq[t-1];
- sumq[0] = tmp;
- }
- for (t = 0; t < 4; ++t) bca->indel_types[t] = B2B_INDEL_NULL;
- for (t = 0; t < 4 && t < n_types; ++t) {
- bca->indel_types[t] = types[sumq[t]&0x3f];
- memcpy(&bca->inscns[t * bca->maxins], &inscns[(sumq[t]&0x3f) * max_ins], bca->maxins);
- }
- // update p->aux
- for (s = n_alt = 0; s < n; ++s) {
- for (i = 0; i < n_plp[s]; ++i) {
- bam_pileup1_t *p = plp[s] + i;
- int x = types[p->aux>>16&0x3f];
- for (j = 0; j < 4; ++j)
- if (x == bca->indel_types[j]) break;
- p->aux = j<<16 | (j == 4? 0 : (p->aux&0xffff));
- if ((p->aux>>16&0x3f) > 0) ++n_alt;
-// fprintf(stderr, "X pos=%d read=%d:%d name=%s call=%d type=%d q=%d seqQ=%d\n", pos, s, i, bam1_qname(p->b), p->aux>>16&63, bca->indel_types[p->aux>>16&63], p->aux&0xff, p->aux>>8&0xff);
- }
- }
- }
- free(score1); free(score2);
- // free
- for (i = 0; i < n; ++i) free(ref_sample[i]);
- free(ref_sample);
- free(types); free(inscns);
- return n_alt > 0? 0 : -1;
-}
diff --git a/src/samtools-0.1.18/bam2depth.c b/src/samtools-0.1.18/bam2depth.c
deleted file mode 100644
index ca36b89..0000000
--- a/src/samtools-0.1.18/bam2depth.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/* This program demonstrates how to generate pileup from multiple BAMs
- * simutaneously, to achieve random access and to use the BED interface.
- * To compile this program separately, you may:
- *
- * gcc -g -O2 -Wall -o bam2depth -D_MAIN_BAM2DEPTH bam2depth.c -L. -lbam -lz
- */
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <unistd.h>
-#include "bam.h"
-
-typedef struct { // auxiliary data structure
- bamFile fp; // the file handler
- bam_iter_t iter; // NULL if a region not specified
- int min_mapQ; // mapQ filter
-} aux_t;
-
-void *bed_read(const char *fn); // read a BED or position list file
-void bed_destroy(void *_h); // destroy the BED data structure
-int bed_overlap(const void *_h, const char *chr, int beg, int end); // test if chr:beg-end overlaps
-
-// This function reads a BAM alignment from one BAM file.
-static int read_bam(void *data, bam1_t *b) // read level filters better go here to avoid pileup
-{
- aux_t *aux = (aux_t*)data; // data in fact is a pointer to an auxiliary structure
- int ret = aux->iter? bam_iter_read(aux->fp, aux->iter, b) : bam_read1(aux->fp, b);
- if ((int)b->core.qual < aux->min_mapQ) b->core.flag |= BAM_FUNMAP;
- return ret;
-}
-
-#ifdef _MAIN_BAM2DEPTH
-int main(int argc, char *argv[])
-#else
-int main_depth(int argc, char *argv[])
-#endif
-{
- int i, n, tid, beg, end, pos, *n_plp, baseQ = 0, mapQ = 0;
- const bam_pileup1_t **plp;
- char *reg = 0; // specified region
- void *bed = 0; // BED data structure
- bam_header_t *h = 0; // BAM header of the 1st input
- aux_t **data;
- bam_mplp_t mplp;
-
- // parse the command line
- while ((n = getopt(argc, argv, "r:b:q:Q:")) >= 0) {
- switch (n) {
- case 'r': reg = strdup(optarg); break; // parsing a region requires a BAM header
- case 'b': bed = bed_read(optarg); break; // BED or position list file can be parsed now
- case 'q': baseQ = atoi(optarg); break; // base quality threshold
- case 'Q': mapQ = atoi(optarg); break; // mapping quality threshold
- }
- }
- if (optind == argc) {
- fprintf(stderr, "Usage: bam2depth [-r reg] [-q baseQthres] [-Q mapQthres] [-b in.bed] <in1.bam> [...]\n");
- return 1;
- }
-
- // initialize the auxiliary data structures
- n = argc - optind; // the number of BAMs on the command line
- data = calloc(n, sizeof(void*)); // data[i] for the i-th input
- beg = 0; end = 1<<30; tid = -1; // set the default region
- for (i = 0; i < n; ++i) {
- bam_header_t *htmp;
- data[i] = calloc(1, sizeof(aux_t));
- data[i]->fp = bam_open(argv[optind+i], "r"); // open BAM
- data[i]->min_mapQ = mapQ; // set the mapQ filter
- htmp = bam_header_read(data[i]->fp); // read the BAM header
- if (i == 0) {
- h = htmp; // keep the header of the 1st BAM
- if (reg) bam_parse_region(h, reg, &tid, &beg, &end); // also parse the region
- } else bam_header_destroy(htmp); // if not the 1st BAM, trash the header
- if (tid >= 0) { // if a region is specified and parsed successfully
- bam_index_t *idx = bam_index_load(argv[optind+i]); // load the index
- data[i]->iter = bam_iter_query(idx, tid, beg, end); // set the iterator
- bam_index_destroy(idx); // the index is not needed any more; phase out of the memory
- }
- }
-
- // the core multi-pileup loop
- mplp = bam_mplp_init(n, read_bam, (void**)data); // initialization
- n_plp = calloc(n, sizeof(int)); // n_plp[i] is the number of covering reads from the i-th BAM
- plp = calloc(n, sizeof(void*)); // plp[i] points to the array of covering reads (internal in mplp)
- while (bam_mplp_auto(mplp, &tid, &pos, n_plp, plp) > 0) { // come to the next covered position
- if (pos < beg || pos >= end) continue; // out of range; skip
- if (bed && bed_overlap(bed, h->target_name[tid], pos, pos + 1) == 0) continue; // not in BED; skip
- fputs(h->target_name[tid], stdout); printf("\t%d", pos+1); // a customized printf() would be faster
- for (i = 0; i < n; ++i) { // base level filters have to go here
- int j, m = 0;
- for (j = 0; j < n_plp[i]; ++j) {
- const bam_pileup1_t *p = plp[i] + j; // DON'T modfity plp[][] unless you really know
- if (p->is_del || p->is_refskip) ++m; // having dels or refskips at tid:pos
- else if (bam1_qual(p->b)[p->qpos] < baseQ) ++m; // low base quality
- }
- printf("\t%d", n_plp[i] - m); // this the depth to output
- }
- putchar('\n');
- }
- free(n_plp); free(plp);
- bam_mplp_destroy(mplp);
-
- bam_header_destroy(h);
- for (i = 0; i < n; ++i) {
- bam_close(data[i]->fp);
- if (data[i]->iter) bam_iter_destroy(data[i]->iter);
- free(data[i]);
- }
- free(data); free(reg);
- if (bed) bed_destroy(bed);
- return 0;
-}
diff --git a/src/samtools-0.1.18/bam_aux.c b/src/samtools-0.1.18/bam_aux.c
deleted file mode 100644
index 28b22e3..0000000
--- a/src/samtools-0.1.18/bam_aux.c
+++ /dev/null
@@ -1,213 +0,0 @@
-#include <ctype.h>
-#include "bam.h"
-#include "khash.h"
-typedef char *str_p;
-KHASH_MAP_INIT_STR(s, int)
-KHASH_MAP_INIT_STR(r2l, str_p)
-
-void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data)
-{
- int ori_len = b->data_len;
- b->data_len += 3 + len;
- b->l_aux += 3 + len;
- if (b->m_data < b->data_len) {
- b->m_data = b->data_len;
- kroundup32(b->m_data);
- b->data = (uint8_t*)realloc(b->data, b->m_data);
- }
- b->data[ori_len] = tag[0]; b->data[ori_len + 1] = tag[1];
- b->data[ori_len + 2] = type;
- memcpy(b->data + ori_len + 3, data, len);
-}
-
-uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2])
-{
- return bam_aux_get(b, tag);
-}
-
-#define __skip_tag(s) do { \
- int type = toupper(*(s)); \
- ++(s); \
- if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \
- else if (type == 'B') (s) += 5 + bam_aux_type2size(*(s)) * (*(int32_t*)((s)+1)); \
- else (s) += bam_aux_type2size(type); \
- } while(0)
-
-uint8_t *bam_aux_get(const bam1_t *b, const char tag[2])
-{
- uint8_t *s;
- int y = tag[0]<<8 | tag[1];
- s = bam1_aux(b);
- while (s < b->data + b->data_len) {
- int x = (int)s[0]<<8 | s[1];
- s += 2;
- if (x == y) return s;
- __skip_tag(s);
- }
- return 0;
-}
-// s MUST BE returned by bam_aux_get()
-int bam_aux_del(bam1_t *b, uint8_t *s)
-{
- uint8_t *p, *aux;
- aux = bam1_aux(b);
- p = s - 2;
- __skip_tag(s);
- memmove(p, s, b->l_aux - (s - aux));
- b->data_len -= s - p;
- b->l_aux -= s - p;
- return 0;
-}
-
-int bam_aux_drop_other(bam1_t *b, uint8_t *s)
-{
- if (s) {
- uint8_t *p, *aux;
- aux = bam1_aux(b);
- p = s - 2;
- __skip_tag(s);
- memmove(aux, p, s - p);
- b->data_len -= b->l_aux - (s - p);
- b->l_aux = s - p;
- } else {
- b->data_len -= b->l_aux;
- b->l_aux = 0;
- }
- return 0;
-}
-
-void bam_init_header_hash(bam_header_t *header)
-{
- if (header->hash == 0) {
- int ret, i;
- khiter_t iter;
- khash_t(s) *h;
- header->hash = h = kh_init(s);
- for (i = 0; i < header->n_targets; ++i) {
- iter = kh_put(s, h, header->target_name[i], &ret);
- kh_value(h, iter) = i;
- }
- }
-}
-
-void bam_destroy_header_hash(bam_header_t *header)
-{
- if (header->hash)
- kh_destroy(s, (khash_t(s)*)header->hash);
-}
-
-int32_t bam_get_tid(const bam_header_t *header, const char *seq_name)
-{
- khint_t k;
- khash_t(s) *h = (khash_t(s)*)header->hash;
- k = kh_get(s, h, seq_name);
- return k == kh_end(h)? -1 : kh_value(h, k);
-}
-
-int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *beg, int *end)
-{
- char *s;
- int i, l, k, name_end;
- khiter_t iter;
- khash_t(s) *h;
-
- bam_init_header_hash(header);
- h = (khash_t(s)*)header->hash;
-
- *ref_id = *beg = *end = -1;
- name_end = l = strlen(str);
- s = (char*)malloc(l+1);
- // remove space
- for (i = k = 0; i < l; ++i)
- if (!isspace(str[i])) s[k++] = str[i];
- s[k] = 0; l = k;
- // determine the sequence name
- for (i = l - 1; i >= 0; --i) if (s[i] == ':') break; // look for colon from the end
- if (i >= 0) name_end = i;
- if (name_end < l) { // check if this is really the end
- int n_hyphen = 0;
- for (i = name_end + 1; i < l; ++i) {
- if (s[i] == '-') ++n_hyphen;
- else if (!isdigit(s[i]) && s[i] != ',') break;
- }
- if (i < l || n_hyphen > 1) name_end = l; // malformated region string; then take str as the name
- s[name_end] = 0;
- iter = kh_get(s, h, s);
- if (iter == kh_end(h)) { // cannot find the sequence name
- iter = kh_get(s, h, str); // try str as the name
- if (iter == kh_end(h)) {
- if (bam_verbose >= 2) fprintf(stderr, "[%s] fail to determine the sequence name.\n", __func__);
- free(s); return -1;
- } else s[name_end] = ':', name_end = l;
- }
- } else iter = kh_get(s, h, str);
- *ref_id = kh_val(h, iter);
- // parse the interval
- if (name_end < l) {
- for (i = k = name_end + 1; i < l; ++i)
- if (s[i] != ',') s[k++] = s[i];
- s[k] = 0;
- *beg = atoi(s + name_end + 1);
- for (i = name_end + 1; i != k; ++i) if (s[i] == '-') break;
- *end = i < k? atoi(s + i + 1) : 1<<29;
- if (*beg > 0) --*beg;
- } else *beg = 0, *end = 1<<29;
- free(s);
- return *beg <= *end? 0 : -1;
-}
-
-int32_t bam_aux2i(const uint8_t *s)
-{
- int type;
- if (s == 0) return 0;
- type = *s++;
- if (type == 'c') return (int32_t)*(int8_t*)s;
- else if (type == 'C') return (int32_t)*(uint8_t*)s;
- else if (type == 's') return (int32_t)*(int16_t*)s;
- else if (type == 'S') return (int32_t)*(uint16_t*)s;
- else if (type == 'i' || type == 'I') return *(int32_t*)s;
- else return 0;
-}
-
-float bam_aux2f(const uint8_t *s)
-{
- int type;
- type = *s++;
- if (s == 0) return 0.0;
- if (type == 'f') return *(float*)s;
- else return 0.0;
-}
-
-double bam_aux2d(const uint8_t *s)
-{
- int type;
- type = *s++;
- if (s == 0) return 0.0;
- if (type == 'd') return *(double*)s;
- else return 0.0;
-}
-
-char bam_aux2A(const uint8_t *s)
-{
- int type;
- type = *s++;
- if (s == 0) return 0;
- if (type == 'A') return *(char*)s;
- else return 0;
-}
-
-char *bam_aux2Z(const uint8_t *s)
-{
- int type;
- type = *s++;
- if (s == 0) return 0;
- if (type == 'Z' || type == 'H') return (char*)s;
- else return 0;
-}
-
-#ifdef _WIN32
-double drand48()
-{
- return (double)rand() / RAND_MAX;
-}
-#endif
diff --git a/src/samtools-0.1.18/bam_cat.c b/src/samtools-0.1.18/bam_cat.c
deleted file mode 100644
index 0fde045..0000000
--- a/src/samtools-0.1.18/bam_cat.c
+++ /dev/null
@@ -1,184 +0,0 @@
-/*
-
-bam_cat -- efficiently concatenates bam files
-
-bam_cat can be used to concatenate BAM files. Under special
-circumstances, it can be used as an alternative to 'samtools merge' to
-concatenate multiple sorted files into a single sorted file. For this
-to work each file must be sorted, and the sorted files must be given
-as command line arguments in order such that the final read in file i
-is less than or equal to the first read in file i+1.
-
-This code is derived from the bam_reheader function in samtools 0.1.8
-and modified to perform concatenation by Chris Saunders on behalf of
-Illumina.
-
-
-########## License:
-
-The MIT License
-
-Original SAMtools work copyright (c) 2008-2009 Genome Research Ltd.
-Modified SAMtools work copyright (c) 2010 Illumina, Inc.
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
-
-*/
-
-
-/*
-makefile:
-"""
-CC=gcc
-CFLAGS+=-g -Wall -O2 -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE -I$(SAMTOOLS_DIR)
-LDFLAGS+=-L$(SAMTOOLS_DIR)
-LDLIBS+=-lbam -lz
-
-all:bam_cat
-"""
-*/
-
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "bgzf.h"
-#include "bam.h"
-
-#define BUF_SIZE 0x10000
-
-#define GZIPID1 31
-#define GZIPID2 139
-
-#define BGZF_EMPTY_BLOCK_SIZE 28
-
-
-int bam_cat(int nfn, char * const *fn, const bam_header_t *h, const char* outbam)
-{
- BGZF *fp;
- FILE* fp_file;
- uint8_t *buf;
- uint8_t ebuf[BGZF_EMPTY_BLOCK_SIZE];
- const int es=BGZF_EMPTY_BLOCK_SIZE;
- int i;
-
- fp = strcmp(outbam, "-")? bgzf_open(outbam, "w") : bgzf_fdopen(fileno(stdout), "w");
- if (fp == 0) {
- fprintf(stderr, "[%s] ERROR: fail to open output file '%s'.\n", __func__, outbam);
- return 1;
- }
- if (h) bam_header_write(fp, h);
-
- buf = (uint8_t*) malloc(BUF_SIZE);
- for(i = 0; i < nfn; ++i){
- BGZF *in;
- bam_header_t *old;
- int len,j;
-
- in = strcmp(fn[i], "-")? bam_open(fn[i], "r") : bam_dopen(fileno(stdin), "r");
- if (in == 0) {
- fprintf(stderr, "[%s] ERROR: fail to open file '%s'.\n", __func__, fn[i]);
- return -1;
- }
- if (in->open_mode != 'r') return -1;
-
- old = bam_header_read(in);
- if (h == 0 && i == 0) bam_header_write(fp, old);
-
- if (in->block_offset < in->block_length) {
- bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
- bgzf_flush(fp);
- }
-
- j=0;
-#ifdef _USE_KNETFILE
- fp_file=fp->x.fpw;
- while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0) {
-#else
- fp_file=fp->file;
- while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) {
-#endif
- if(len<es){
- int diff=es-len;
- if(j==0) {
- fprintf(stderr, "[%s] ERROR: truncated file?: '%s'.\n", __func__, fn[i]);
- return -1;
- }
- fwrite(ebuf, 1, len, fp_file);
- memcpy(ebuf,ebuf+len,diff);
- memcpy(ebuf+diff,buf,len);
- } else {
- if(j!=0) fwrite(ebuf, 1, es, fp_file);
- len-= es;
- memcpy(ebuf,buf+len,es);
- fwrite(buf, 1, len, fp_file);
- }
- j=1;
- }
-
- /* check final gzip block */
- {
- const uint8_t gzip1=ebuf[0];
- const uint8_t gzip2=ebuf[1];
- const uint32_t isize=*((uint32_t*)(ebuf+es-4));
- if(((gzip1!=GZIPID1) || (gzip2!=GZIPID2)) || (isize!=0)) {
- fprintf(stderr, "[%s] WARNING: Unexpected block structure in file '%s'.", __func__, fn[i]);
- fprintf(stderr, " Possible output corruption.\n");
- fwrite(ebuf, 1, es, fp_file);
- }
- }
- bam_header_destroy(old);
- bgzf_close(in);
- }
- free(buf);
- bgzf_close(fp);
- return 0;
-}
-
-
-
-int main_cat(int argc, char *argv[])
-{
- bam_header_t *h = 0;
- char *outfn = 0;
- int c, ret;
- while ((c = getopt(argc, argv, "h:o:")) >= 0) {
- switch (c) {
- case 'h': {
- tamFile fph = sam_open(optarg);
- if (fph == 0) {
- fprintf(stderr, "[%s] ERROR: fail to read the header from '%s'.\n", __func__, argv[1]);
- return 1;
- }
- h = sam_header_read(fph);
- sam_close(fph);
- break;
- }
- case 'o': outfn = strdup(optarg); break;
- }
- }
- if (argc - optind < 2) {
- fprintf(stderr, "Usage: samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [...]\n");
- return 1;
- }
- ret = bam_cat(argc - optind, argv + optind, h, outfn? outfn : "-");
- free(outfn);
- return ret;
-}
diff --git a/src/samtools-0.1.18/bam_color.c b/src/samtools-0.1.18/bam_color.c
deleted file mode 100644
index ce637f7..0000000
--- a/src/samtools-0.1.18/bam_color.c
+++ /dev/null
@@ -1,127 +0,0 @@
-#include <ctype.h>
-#include "bam.h"
-
-/*!
- @abstract Get the color encoding the previous and current base
- @param b pointer to an alignment
- @param i The i-th position, 0-based
- @return color
-
- @discussion Returns 0 no color information is found.
- */
-char bam_aux_getCSi(bam1_t *b, int i)
-{
- uint8_t *c = bam_aux_get(b, "CS");
- char *cs = NULL;
-
- // return the base if the tag was not found
- if(0 == c) return 0;
-
- cs = bam_aux2Z(c);
- // adjust for strandedness and leading adaptor
- if(bam1_strand(b)) i = strlen(cs) - 1 - i;
- else i++;
- return cs[i];
-}
-
-/*!
- @abstract Get the color quality of the color encoding the previous and current base
- @param b pointer to an alignment
- @param i The i-th position, 0-based
- @return color quality
-
- @discussion Returns 0 no color information is found.
- */
-char bam_aux_getCQi(bam1_t *b, int i)
-{
- uint8_t *c = bam_aux_get(b, "CQ");
- char *cq = NULL;
-
- // return the base if the tag was not found
- if(0 == c) return 0;
-
- cq = bam_aux2Z(c);
- // adjust for strandedness
- if(bam1_strand(b)) i = strlen(cq) - 1 - i;
- return cq[i];
-}
-
-char bam_aux_nt2int(char a)
-{
- switch(toupper(a)) {
- case 'A':
- return 0;
- break;
- case 'C':
- return 1;
- break;
- case 'G':
- return 2;
- break;
- case 'T':
- return 3;
- break;
- default:
- return 4;
- break;
- }
-}
-
-char bam_aux_ntnt2cs(char a, char b)
-{
- a = bam_aux_nt2int(a);
- b = bam_aux_nt2int(b);
- if(4 == a || 4 == b) return '4';
- return "0123"[(int)(a ^ b)];
-}
-
-/*!
- @abstract Get the color error profile at the give position
- @param b pointer to an alignment
- @return the original color if the color was an error, '-' (dash) otherwise
-
- @discussion Returns 0 no color information is found.
- */
-char bam_aux_getCEi(bam1_t *b, int i)
-{
- int cs_i;
- uint8_t *c = bam_aux_get(b, "CS");
- char *cs = NULL;
- char prev_b, cur_b;
- char cur_color, cor_color;
-
- // return the base if the tag was not found
- if(0 == c) return 0;
-
- cs = bam_aux2Z(c);
-
- // adjust for strandedness and leading adaptor
- if(bam1_strand(b)) { //reverse strand
- cs_i = strlen(cs) - 1 - i;
- // get current color
- cur_color = cs[cs_i];
- // get previous base. Note: must rc adaptor
- prev_b = (cs_i == 1) ? "TGCAN"[(int)bam_aux_nt2int(cs[0])] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)];
- // get current base
- cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)];
- }
- else {
- cs_i=i+1;
- // get current color
- cur_color = cs[cs_i];
- // get previous base
- prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)];
- // get current base
- cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)];
- }
-
- // corrected color
- cor_color = bam_aux_ntnt2cs(prev_b, cur_b);
-
- if(cur_color == cor_color) {
- return '-';
- }
- else {
- return cur_color;
- }
-}
diff --git a/src/samtools-0.1.18/bam_endian.h b/src/samtools-0.1.18/bam_endian.h
deleted file mode 100644
index 0fc74a8..0000000
--- a/src/samtools-0.1.18/bam_endian.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef BAM_ENDIAN_H
-#define BAM_ENDIAN_H
-
-#include <stdint.h>
-
-static inline int bam_is_big_endian()
-{
- long one= 1;
- return !(*((char *)(&one)));
-}
-static inline uint16_t bam_swap_endian_2(uint16_t v)
-{
- return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
-}
-static inline void *bam_swap_endian_2p(void *x)
-{
- *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x);
- return x;
-}
-static inline uint32_t bam_swap_endian_4(uint32_t v)
-{
- v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
- return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
-}
-static inline void *bam_swap_endian_4p(void *x)
-{
- *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x);
- return x;
-}
-static inline uint64_t bam_swap_endian_8(uint64_t v)
-{
- v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
- v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
- return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
-}
-static inline void *bam_swap_endian_8p(void *x)
-{
- *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x);
- return x;
-}
-
-#endif
diff --git a/src/samtools-0.1.18/bam_import.c b/src/samtools-0.1.18/bam_import.c
deleted file mode 100644
index 5518a9c..0000000
--- a/src/samtools-0.1.18/bam_import.c
+++ /dev/null
@@ -1,485 +0,0 @@
-#include <zlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <assert.h>
-#ifdef _WIN32
-#include <fcntl.h>
-#endif
-#include "kstring.h"
-#include "bam.h"
-#include "sam_header.h"
-#include "kseq.h"
-#include "khash.h"
-
-KSTREAM_INIT(gzFile, gzread, 16384)
-KHASH_MAP_INIT_STR(ref, uint64_t)
-
-void bam_init_header_hash(bam_header_t *header);
-void bam_destroy_header_hash(bam_header_t *header);
-int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);
-
-unsigned char bam_nt16_table[256] = {
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
- 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,
- 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
- 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,
- 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
- 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
- 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
-};
-
-unsigned short bam_char2flag_table[256] = {
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,BAM_FREAD1,BAM_FREAD2,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- BAM_FPROPER_PAIR,0,BAM_FMREVERSE,0, 0,BAM_FMUNMAP,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, BAM_FDUP,0,BAM_FQCFAIL,0, 0,0,0,0, 0,0,0,0,
- BAM_FPAIRED,0,BAM_FREVERSE,BAM_FSECONDARY, 0,BAM_FUNMAP,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
- 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0
-};
-
-char *bam_nt16_rev_table = "=ACMGRSVTWYHKDBN";
-
-struct __tamFile_t {
- gzFile fp;
- kstream_t *ks;
- kstring_t *str;
- uint64_t n_lines;
- int is_first;
-};
-
-char **__bam_get_lines(const char *fn, int *_n) // for bam_plcmd.c only
-{
- char **list = 0, *s;
- int n = 0, dret, m = 0;
- gzFile fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r");
- kstream_t *ks;
- kstring_t *str;
- str = (kstring_t*)calloc(1, sizeof(kstring_t));
- ks = ks_init(fp);
- while (ks_getuntil(ks, '\n', str, &dret) > 0) {
- if (n == m) {
- m = m? m << 1 : 16;
- list = (char**)realloc(list, m * sizeof(char*));
- }
- if (str->s[str->l-1] == '\r')
- str->s[--str->l] = '\0';
- s = list[n++] = (char*)calloc(str->l + 1, 1);
- strcpy(s, str->s);
- }
- ks_destroy(ks);
- gzclose(fp);
- free(str->s); free(str);
- *_n = n;
- return list;
-}
-
-static bam_header_t *hash2header(const kh_ref_t *hash)
-{
- bam_header_t *header;
- khiter_t k;
- header = bam_header_init();
- header->n_targets = kh_size(hash);
- header->target_name = (char**)calloc(kh_size(hash), sizeof(char*));
- header->target_len = (uint32_t*)calloc(kh_size(hash), 4);
- for (k = kh_begin(hash); k != kh_end(hash); ++k) {
- if (kh_exist(hash, k)) {
- int i = (int)kh_value(hash, k);
- header->target_name[i] = (char*)kh_key(hash, k);
- header->target_len[i] = kh_value(hash, k)>>32;
- }
- }
- bam_init_header_hash(header);
- return header;
-}
-bam_header_t *sam_header_read2(const char *fn)
-{
- bam_header_t *header;
- int c, dret, ret, error = 0;
- gzFile fp;
- kstream_t *ks;
- kstring_t *str;
- kh_ref_t *hash;
- khiter_t k;
- if (fn == 0) return 0;
- fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r");
- if (fp == 0) return 0;
- hash = kh_init(ref);
- ks = ks_init(fp);
- str = (kstring_t*)calloc(1, sizeof(kstring_t));
- while (ks_getuntil(ks, 0, str, &dret) > 0) {
- char *s = strdup(str->s);
- int len, i;
- i = kh_size(hash);
- ks_getuntil(ks, 0, str, &dret);
- len = atoi(str->s);
- k = kh_put(ref, hash, s, &ret);
- if (ret == 0) {
- fprintf(stderr, "[sam_header_read2] duplicated sequence name: %s\n", s);
- error = 1;
- }
- kh_value(hash, k) = (uint64_t)len<<32 | i;
- if (dret != '\n')
- while ((c = ks_getc(ks)) != '\n' && c != -1);
- }
- ks_destroy(ks);
- gzclose(fp);
- free(str->s); free(str);
- fprintf(stderr, "[sam_header_read2] %d sequences loaded.\n", kh_size(hash));
- if (error) return 0;
- header = hash2header(hash);
- kh_destroy(ref, hash);
- return header;
-}
-static inline uint8_t *alloc_data(bam1_t *b, int size)
-{
- if (b->m_data < size) {
- b->m_data = size;
- kroundup32(b->m_data);
- b->data = (uint8_t*)realloc(b->data, b->m_data);
- }
- return b->data;
-}
-static inline void parse_error(int64_t n_lines, const char * __restrict msg)
-{
- fprintf(stderr, "Parse error at line %lld: %s\n", (long long)n_lines, msg);
- abort();
-}
-static inline void append_text(bam_header_t *header, kstring_t *str)
-{
- size_t x = header->l_text, y = header->l_text + str->l + 2; // 2 = 1 byte dret + 1 byte null
- kroundup32(x); kroundup32(y);
- if (x < y)
- {
- header->n_text = y;
- header->text = (char*)realloc(header->text, y);
- if ( !header->text )
- {
- fprintf(stderr,"realloc failed to alloc %ld bytes\n", y);
- abort();
- }
- }
- // Sanity check
- if ( header->l_text+str->l+1 >= header->n_text )
- {
- fprintf(stderr,"append_text FIXME: %ld>=%ld, x=%ld,y=%ld\n", header->l_text+str->l+1,header->n_text,x,y);
- abort();
- }
- strncpy(header->text + header->l_text, str->s, str->l+1); // we cannot use strcpy() here.
- header->l_text += str->l + 1;
- header->text[header->l_text] = 0;
-}
-
-int sam_header_parse(bam_header_t *h)
-{
- char **tmp;
- int i;
- free(h->target_len); free(h->target_name);
- h->n_targets = 0; h->target_len = 0; h->target_name = 0;
- if (h->l_text < 3) return 0;
- if (h->dict == 0) h->dict = sam_header_parse2(h->text);
- tmp = sam_header2list(h->dict, "SQ", "SN", &h->n_targets);
- if (h->n_targets == 0) return 0;
- h->target_name = calloc(h->n_targets, sizeof(void*));
- for (i = 0; i < h->n_targets; ++i)
- h->target_name[i] = strdup(tmp[i]);
- free(tmp);
- tmp = sam_header2list(h->dict, "SQ", "LN", &h->n_targets);
- h->target_len = calloc(h->n_targets, 4);
- for (i = 0; i < h->n_targets; ++i)
- h->target_len[i] = atoi(tmp[i]);
- free(tmp);
- return h->n_targets;
-}
-
-bam_header_t *sam_header_read(tamFile fp)
-{
- int ret, dret;
- bam_header_t *header = bam_header_init();
- kstring_t *str = fp->str;
- while ((ret = ks_getuntil(fp->ks, KS_SEP_TAB, str, &dret)) >= 0 && str->s[0] == '@') { // skip header
- str->s[str->l] = dret; // note that str->s is NOT null terminated!!
- append_text(header, str);
- if (dret != '\n') {
- ret = ks_getuntil(fp->ks, '\n', str, &dret);
- str->s[str->l] = '\n'; // NOT null terminated!!
- append_text(header, str);
- }
- ++fp->n_lines;
- }
- sam_header_parse(header);
- bam_init_header_hash(header);
- fp->is_first = 1;
- return header;
-}
-
-int sam_read1(tamFile fp, bam_header_t *header, bam1_t *b)
-{
- int ret, doff, doff0, dret, z = 0;
- bam1_core_t *c = &b->core;
- kstring_t *str = fp->str;
- kstream_t *ks = fp->ks;
-
- if (fp->is_first) {
- fp->is_first = 0;
- ret = str->l;
- } else {
- do { // special consideration for empty lines
- ret = ks_getuntil(fp->ks, KS_SEP_TAB, str, &dret);
- if (ret >= 0) z += str->l + 1;
- } while (ret == 0);
- }
- if (ret < 0) return -1;
- ++fp->n_lines;
- doff = 0;
-
- { // name
- c->l_qname = strlen(str->s) + 1;
- memcpy(alloc_data(b, doff + c->l_qname) + doff, str->s, c->l_qname);
- doff += c->l_qname;
- }
- { // flag
- long flag;
- char *s;
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1;
- flag = strtol((char*)str->s, &s, 0);
- if (*s) { // not the end of the string
- flag = 0;
- for (s = str->s; *s; ++s)
- flag |= bam_char2flag_table[(int)*s];
- }
- c->flag = flag;
- }
- { // tid, pos, qual
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->tid = bam_get_tid(header, str->s);
- if (c->tid < 0 && strcmp(str->s, "*")) {
- if (header->n_targets == 0) {
- fprintf(stderr, "[sam_read1] missing header? Abort!\n");
- exit(1);
- } else fprintf(stderr, "[sam_read1] reference '%s' is recognized as '*'.\n", str->s);
- }
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->pos = isdigit(str->s[0])? atoi(str->s) - 1 : -1;
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1; c->qual = isdigit(str->s[0])? atoi(str->s) : 0;
- if (ret < 0) return -2;
- }
- { // cigar
- char *s, *t;
- int i, op;
- long x;
- c->n_cigar = 0;
- if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -3;
- z += str->l + 1;
- if (str->s[0] != '*') {
- for (s = str->s; *s; ++s) {
- if ((isalpha(*s)) || (*s=='=')) ++c->n_cigar;
- else if (!isdigit(*s)) parse_error(fp->n_lines, "invalid CIGAR character");
- }
- b->data = alloc_data(b, doff + c->n_cigar * 4);
- for (i = 0, s = str->s; i != c->n_cigar; ++i) {
- x = strtol(s, &t, 10);
- op = toupper(*t);
- if (op == 'M') op = BAM_CMATCH;
- else if (op == 'I') op = BAM_CINS;
- else if (op == 'D') op = BAM_CDEL;
- else if (op == 'N') op = BAM_CREF_SKIP;
- else if (op == 'S') op = BAM_CSOFT_CLIP;
- else if (op == 'H') op = BAM_CHARD_CLIP;
- else if (op == 'P') op = BAM_CPAD;
- else if (op == '=') op = BAM_CEQUAL;
- else if (op == 'X') op = BAM_CDIFF;
- else parse_error(fp->n_lines, "invalid CIGAR operation");
- s = t + 1;
- bam1_cigar(b)[i] = x << BAM_CIGAR_SHIFT | op;
- }
- if (*s) parse_error(fp->n_lines, "unmatched CIGAR operation");
- c->bin = bam_reg2bin(c->pos, bam_calend(c, bam1_cigar(b)));
- doff += c->n_cigar * 4;
- } else {
- if (!(c->flag&BAM_FUNMAP)) {
- fprintf(stderr, "Parse warning at line %lld: mapped sequence without CIGAR\n", (long long)fp->n_lines);
- c->flag |= BAM_FUNMAP;
- }
- c->bin = bam_reg2bin(c->pos, c->pos + 1);
- }
- }
- { // mtid, mpos, isize
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1;
- c->mtid = strcmp(str->s, "=")? bam_get_tid(header, str->s) : c->tid;
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1;
- c->mpos = isdigit(str->s[0])? atoi(str->s) - 1 : -1;
- ret = ks_getuntil(ks, KS_SEP_TAB, str, &dret); z += str->l + 1;
- c->isize = (str->s[0] == '-' || isdigit(str->s[0]))? atoi(str->s) : 0;
- if (ret < 0) return -4;
- }
- { // seq and qual
- int i;
- uint8_t *p = 0;
- if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -5; // seq
- z += str->l + 1;
- if (strcmp(str->s, "*")) {
- c->l_qseq = strlen(str->s);
- if (c->n_cigar && c->l_qseq != (int32_t)bam_cigar2qlen(c, bam1_cigar(b))) {
- fprintf(stderr, "Line %ld, sequence length %i vs %i from CIGAR\n",
- (long)fp->n_lines, c->l_qseq, (int32_t)bam_cigar2qlen(c, bam1_cigar(b)));
- parse_error(fp->n_lines, "CIGAR and sequence length are inconsistent");
- }
- p = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff;
- memset(p, 0, (c->l_qseq+1)/2);
- for (i = 0; i < c->l_qseq; ++i)
- p[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2);
- } else c->l_qseq = 0;
- if (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -6; // qual
- z += str->l + 1;
- if (strcmp(str->s, "*") && c->l_qseq != strlen(str->s))
- parse_error(fp->n_lines, "sequence and quality are inconsistent");
- p += (c->l_qseq+1)/2;
- if (strcmp(str->s, "*") == 0) for (i = 0; i < c->l_qseq; ++i) p[i] = 0xff;
- else for (i = 0; i < c->l_qseq; ++i) p[i] = str->s[i] - 33;
- doff += c->l_qseq + (c->l_qseq+1)/2;
- }
- doff0 = doff;
- if (dret != '\n' && dret != '\r') { // aux
- while (ks_getuntil(ks, KS_SEP_TAB, str, &dret) >= 0) {
- uint8_t *s, type, key[2];
- z += str->l + 1;
- if (str->l < 6 || str->s[2] != ':' || str->s[4] != ':')
- parse_error(fp->n_lines, "missing colon in auxiliary data");
- key[0] = str->s[0]; key[1] = str->s[1];
- type = str->s[3];
- s = alloc_data(b, doff + 3) + doff;
- s[0] = key[0]; s[1] = key[1]; s += 2; doff += 2;
- if (type == 'A' || type == 'a' || type == 'c' || type == 'C') { // c and C for backward compatibility
- s = alloc_data(b, doff + 2) + doff;
- *s++ = 'A'; *s = str->s[5];
- doff += 2;
- } else if (type == 'I' || type == 'i') {
- long long x;
- s = alloc_data(b, doff + 5) + doff;
- x = (long long)atoll(str->s + 5);
- if (x < 0) {
- if (x >= -127) {
- *s++ = 'c'; *(int8_t*)s = (int8_t)x;
- s += 1; doff += 2;
- } else if (x >= -32767) {
- *s++ = 's'; *(int16_t*)s = (int16_t)x;
- s += 2; doff += 3;
- } else {
- *s++ = 'i'; *(int32_t*)s = (int32_t)x;
- s += 4; doff += 5;
- if (x < -2147483648ll)
- fprintf(stderr, "Parse warning at line %lld: integer %lld is out of range.",
- (long long)fp->n_lines, x);
- }
- } else {
- if (x <= 255) {
- *s++ = 'C'; *s++ = (uint8_t)x;
- doff += 2;
- } else if (x <= 65535) {
- *s++ = 'S'; *(uint16_t*)s = (uint16_t)x;
- s += 2; doff += 3;
- } else {
- *s++ = 'I'; *(uint32_t*)s = (uint32_t)x;
- s += 4; doff += 5;
- if (x > 4294967295ll)
- fprintf(stderr, "Parse warning at line %lld: integer %lld is out of range.",
- (long long)fp->n_lines, x);
- }
- }
- } else if (type == 'f') {
- s = alloc_data(b, doff + 5) + doff;
- *s++ = 'f';
- *(float*)s = (float)atof(str->s + 5);
- s += 4; doff += 5;
- } else if (type == 'd') {
- s = alloc_data(b, doff + 9) + doff;
- *s++ = 'd';
- *(float*)s = (float)atof(str->s + 9);
- s += 8; doff += 9;
- } else if (type == 'Z' || type == 'H') {
- int size = 1 + (str->l - 5) + 1;
- if (type == 'H') { // check whether the hex string is valid
- int i;
- if ((str->l - 5) % 2 == 1) parse_error(fp->n_lines, "length of the hex string not even");
- for (i = 0; i < str->l - 5; ++i) {
- int c = toupper(str->s[5 + i]);
- if (!((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F')))
- parse_error(fp->n_lines, "invalid hex character");
- }
- }
- s = alloc_data(b, doff + size) + doff;
- *s++ = type;
- memcpy(s, str->s + 5, str->l - 5);
- s[str->l - 5] = 0;
- doff += size;
- } else if (type == 'B') {
- int32_t n = 0, Bsize, k = 0, size;
- char *p;
- if (str->l < 8) parse_error(fp->n_lines, "too few values in aux type B");
- Bsize = bam_aux_type2size(str->s[5]); // the size of each element
- for (p = (char*)str->s + 6; *p; ++p) // count the number of elements in the array
- if (*p == ',') ++n;
- p = str->s + 7; // now p points to the first number in the array
- size = 6 + Bsize * n; // total number of bytes allocated to this tag
- s = alloc_data(b, doff + 6 * Bsize * n) + doff; // allocate memory
- *s++ = 'B'; *s++ = str->s[5];
- memcpy(s, &n, 4); s += 4; // write the number of elements
- if (str->s[5] == 'c') while (p < str->s + str->l) ((int8_t*)s)[k++] = (int8_t)strtol(p, &p, 0), ++p;
- else if (str->s[5] == 'C') while (p < str->s + str->l) ((uint8_t*)s)[k++] = (uint8_t)strtol(p, &p, 0), ++p;
- else if (str->s[5] == 's') while (p < str->s + str->l) ((int16_t*)s)[k++] = (int16_t)strtol(p, &p, 0), ++p; // FIXME: avoid unaligned memory
- else if (str->s[5] == 'S') while (p < str->s + str->l) ((uint16_t*)s)[k++] = (uint16_t)strtol(p, &p, 0), ++p;
- else if (str->s[5] == 'i') while (p < str->s + str->l) ((int32_t*)s)[k++] = (int32_t)strtol(p, &p, 0), ++p;
- else if (str->s[5] == 'I') while (p < str->s + str->l) ((uint32_t*)s)[k++] = (uint32_t)strtol(p, &p, 0), ++p;
- else if (str->s[5] == 'f') while (p < str->s + str->l) ((float*)s)[k++] = (float)strtod(p, &p), ++p;
- else parse_error(fp->n_lines, "unrecognized array type");
- s += Bsize * n; doff += size;
- } else parse_error(fp->n_lines, "unrecognized type");
- if (dret == '\n' || dret == '\r') break;
- }
- }
- b->l_aux = doff - doff0;
- b->data_len = doff;
- return z;
-}
-
-tamFile sam_open(const char *fn)
-{
- tamFile fp;
- gzFile gzfp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "rb") : gzopen(fn, "rb");
- if (gzfp == 0) return 0;
- fp = (tamFile)calloc(1, sizeof(struct __tamFile_t));
- fp->str = (kstring_t*)calloc(1, sizeof(kstring_t));
- fp->fp = gzfp;
- fp->ks = ks_init(fp->fp);
- return fp;
-}
-
-void sam_close(tamFile fp)
-{
- if (fp) {
- ks_destroy(fp->ks);
- gzclose(fp->fp);
- free(fp->str->s); free(fp->str);
- free(fp);
- }
-}
diff --git a/src/samtools-0.1.18/bam_index.c b/src/samtools-0.1.18/bam_index.c
deleted file mode 100644
index 9610a26..0000000
--- a/src/samtools-0.1.18/bam_index.c
+++ /dev/null
@@ -1,719 +0,0 @@
-#include <ctype.h>
-#include <assert.h>
-#include "bam.h"
-#include "khash.h"
-#include "ksort.h"
-#include "bam_endian.h"
-#ifdef _USE_KNETFILE
-#include "knetfile.h"
-#endif
-
-/*!
- @header
-
- Alignment indexing. Before indexing, BAM must be sorted based on the
- leftmost coordinate of alignments. In indexing, BAM uses two indices:
- a UCSC binning index and a simple linear index. The binning index is
- efficient for alignments spanning long distance, while the auxiliary
- linear index helps to reduce unnecessary seek calls especially for
- short alignments.
-
- The UCSC binning scheme was suggested by Richard Durbin and Lincoln
- Stein and is explained by Kent et al. (2002). In this scheme, each bin
- represents a contiguous genomic region which can be fully contained in
- another bin; each alignment is associated with a bin which represents
- the smallest region containing the entire alignment. The binning
- scheme is essentially another representation of R-tree. A distinct bin
- uniquely corresponds to a distinct internal node in a R-tree. Bin A is
- a child of Bin B if region A is contained in B.
-
- In BAM, each bin may span 2^29, 2^26, 2^23, 2^20, 2^17 or 2^14 bp. Bin
- 0 spans a 512Mbp region, bins 1-8 span 64Mbp, 9-72 8Mbp, 73-584 1Mbp,
- 585-4680 128Kbp and bins 4681-37449 span 16Kbp regions. If we want to
- find the alignments overlapped with a region [rbeg,rend), we need to
- calculate the list of bins that may be overlapped the region and test
- the alignments in the bins to confirm the overlaps. If the specified
- region is short, typically only a few alignments in six bins need to
- be retrieved. The overlapping alignments can be quickly fetched.
-
- */
-
-#define BAM_MIN_CHUNK_GAP 32768
-// 1<<14 is the size of minimum bin.
-#define BAM_LIDX_SHIFT 14
-
-#define BAM_MAX_BIN 37450 // =(8^6-1)/7+1
-
-typedef struct {
- uint64_t u, v;
-} pair64_t;
-
-#define pair64_lt(a,b) ((a).u < (b).u)
-KSORT_INIT(off, pair64_t, pair64_lt)
-
-typedef struct {
- uint32_t m, n;
- pair64_t *list;
-} bam_binlist_t;
-
-typedef struct {
- int32_t n, m;
- uint64_t *offset;
-} bam_lidx_t;
-
-KHASH_MAP_INIT_INT(i, bam_binlist_t)
-
-struct __bam_index_t {
- int32_t n;
- uint64_t n_no_coor; // unmapped reads without coordinate
- khash_t(i) **index;
- bam_lidx_t *index2;
-};
-
-// requirement: len <= LEN_MASK
-static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t end)
-{
- khint_t k;
- bam_binlist_t *l;
- int ret;
- k = kh_put(i, h, bin, &ret);
- l = &kh_value(h, k);
- if (ret) { // not present
- l->m = 1; l->n = 0;
- l->list = (pair64_t*)calloc(l->m, 16);
- }
- if (l->n == l->m) {
- l->m <<= 1;
- l->list = (pair64_t*)realloc(l->list, l->m * 16);
- }
- l->list[l->n].u = beg; l->list[l->n++].v = end;
-}
-
-static inline void insert_offset2(bam_lidx_t *index2, bam1_t *b, uint64_t offset)
-{
- int i, beg, end;
- beg = b->core.pos >> BAM_LIDX_SHIFT;
- end = (bam_calend(&b->core, bam1_cigar(b)) - 1) >> BAM_LIDX_SHIFT;
- if (index2->m < end + 1) {
- int old_m = index2->m;
- index2->m = end + 1;
- kroundup32(index2->m);
- index2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8);
- memset(index2->offset + old_m, 0, 8 * (index2->m - old_m));
- }
- if (beg == end) {
- if (index2->offset[beg] == 0) index2->offset[beg] = offset;
- } else {
- for (i = beg; i <= end; ++i)
- if (index2->offset[i] == 0) index2->offset[i] = offset;
- }
- index2->n = end + 1;
-}
-
-static void merge_chunks(bam_index_t *idx)
-{
-#if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)
- khash_t(i) *index;
- int i, l, m;
- khint_t k;
- for (i = 0; i < idx->n; ++i) {
- index = idx->index[i];
- for (k = kh_begin(index); k != kh_end(index); ++k) {
- bam_binlist_t *p;
- if (!kh_exist(index, k) || kh_key(index, k) == BAM_MAX_BIN) continue;
- p = &kh_value(index, k);
- m = 0;
- for (l = 1; l < p->n; ++l) {
-#ifdef BAM_TRUE_OFFSET
- if (p->list[m].v + BAM_MIN_CHUNK_GAP > p->list[l].u) p->list[m].v = p->list[l].v;
-#else
- if (p->list[m].v>>16 == p->list[l].u>>16) p->list[m].v = p->list[l].v;
-#endif
- else p->list[++m] = p->list[l];
- } // ~for(l)
- p->n = m + 1;
- } // ~for(k)
- } // ~for(i)
-#endif // defined(BAM_TRUE_OFFSET) || defined(BAM_BGZF)
-}
-
-static void fill_missing(bam_index_t *idx)
-{
- int i, j;
- for (i = 0; i < idx->n; ++i) {
- bam_lidx_t *idx2 = &idx->index2[i];
- for (j = 1; j < idx2->n; ++j)
- if (idx2->offset[j] == 0)
- idx2->offset[j] = idx2->offset[j-1];
- }
-}
-
-bam_index_t *bam_index_core(bamFile fp)
-{
- bam1_t *b;
- bam_header_t *h;
- int i, ret;
- bam_index_t *idx;
- uint32_t last_bin, save_bin;
- int32_t last_coor, last_tid, save_tid;
- bam1_core_t *c;
- uint64_t save_off, last_off, n_mapped, n_unmapped, off_beg, off_end, n_no_coor;
-
- idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
- b = (bam1_t*)calloc(1, sizeof(bam1_t));
- h = bam_header_read(fp);
- c = &b->core;
-
- idx->n = h->n_targets;
- bam_header_destroy(h);
- idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
- for (i = 0; i < idx->n; ++i) idx->index[i] = kh_init(i);
- idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t));
-
- save_bin = save_tid = last_tid = last_bin = 0xffffffffu;
- save_off = last_off = bam_tell(fp); last_coor = 0xffffffffu;
- n_mapped = n_unmapped = n_no_coor = off_end = 0;
- off_beg = off_end = bam_tell(fp);
- while ((ret = bam_read1(fp, b)) >= 0) {
- if (c->tid < 0) ++n_no_coor;
- if (last_tid < c->tid || (last_tid >= 0 && c->tid < 0)) { // change of chromosomes
- last_tid = c->tid;
- last_bin = 0xffffffffu;
- } else if ((uint32_t)last_tid > (uint32_t)c->tid) {
- fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %d-th chr > %d-th chr\n",
- bam1_qname(b), last_tid+1, c->tid+1);
- return NULL;
- } else if ((int32_t)c->tid >= 0 && last_coor > c->pos) {
- fprintf(stderr, "[bam_index_core] the alignment is not sorted (%s): %u > %u in %d-th chr\n",
- bam1_qname(b), last_coor, c->pos, c->tid+1);
- return NULL;
- }
- if (c->tid >= 0 && !(c->flag & BAM_FUNMAP)) insert_offset2(&idx->index2[b->core.tid], b, last_off);
- if (c->bin != last_bin) { // then possibly write the binning index
- if (save_bin != 0xffffffffu) // save_bin==0xffffffffu only happens to the first record
- insert_offset(idx->index[save_tid], save_bin, save_off, last_off);
- if (last_bin == 0xffffffffu && save_tid != 0xffffffffu) { // write the meta element
- off_end = last_off;
- insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, off_end);
- insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
- n_mapped = n_unmapped = 0;
- off_beg = off_end;
- }
- save_off = last_off;
- save_bin = last_bin = c->bin;
- save_tid = c->tid;
- if (save_tid < 0) break;
- }
- if (bam_tell(fp) <= last_off) {
- fprintf(stderr, "[bam_index_core] bug in BGZF/RAZF: %llx < %llx\n",
- (unsigned long long)bam_tell(fp), (unsigned long long)last_off);
- return NULL;
- }
- if (c->flag & BAM_FUNMAP) ++n_unmapped;
- else ++n_mapped;
- last_off = bam_tell(fp);
- last_coor = b->core.pos;
- }
- if (save_tid >= 0) {
- insert_offset(idx->index[save_tid], save_bin, save_off, bam_tell(fp));
- insert_offset(idx->index[save_tid], BAM_MAX_BIN, off_beg, bam_tell(fp));
- insert_offset(idx->index[save_tid], BAM_MAX_BIN, n_mapped, n_unmapped);
- }
- merge_chunks(idx);
- fill_missing(idx);
- if (ret >= 0) {
- while ((ret = bam_read1(fp, b)) >= 0) {
- ++n_no_coor;
- if (c->tid >= 0 && n_no_coor) {
- fprintf(stderr, "[bam_index_core] the alignment is not sorted: reads without coordinates prior to reads with coordinates.\n");
- return NULL;
- }
- }
- }
- if (ret < -1) fprintf(stderr, "[bam_index_core] truncated file? Continue anyway. (%d)\n", ret);
- free(b->data); free(b);
- idx->n_no_coor = n_no_coor;
- return idx;
-}
-
-void bam_index_destroy(bam_index_t *idx)
-{
- khint_t k;
- int i;
- if (idx == 0) return;
- for (i = 0; i < idx->n; ++i) {
- khash_t(i) *index = idx->index[i];
- bam_lidx_t *index2 = idx->index2 + i;
- for (k = kh_begin(index); k != kh_end(index); ++k) {
- if (kh_exist(index, k))
- free(kh_value(index, k).list);
- }
- kh_destroy(i, index);
- free(index2->offset);
- }
- free(idx->index); free(idx->index2);
- free(idx);
-}
-
-void bam_index_save(const bam_index_t *idx, FILE *fp)
-{
- int32_t i, size;
- khint_t k;
- fwrite("BAI\1", 1, 4, fp);
- if (bam_is_be) {
- uint32_t x = idx->n;
- fwrite(bam_swap_endian_4p(&x), 4, 1, fp);
- } else fwrite(&idx->n, 4, 1, fp);
- for (i = 0; i < idx->n; ++i) {
- khash_t(i) *index = idx->index[i];
- bam_lidx_t *index2 = idx->index2 + i;
- // write binning index
- size = kh_size(index);
- if (bam_is_be) { // big endian
- uint32_t x = size;
- fwrite(bam_swap_endian_4p(&x), 4, 1, fp);
- } else fwrite(&size, 4, 1, fp);
- for (k = kh_begin(index); k != kh_end(index); ++k) {
- if (kh_exist(index, k)) {
- bam_binlist_t *p = &kh_value(index, k);
- if (bam_is_be) { // big endian
- uint32_t x;
- x = kh_key(index, k); fwrite(bam_swap_endian_4p(&x), 4, 1, fp);
- x = p->n; fwrite(bam_swap_endian_4p(&x), 4, 1, fp);
- for (x = 0; (int)x < p->n; ++x) {
- bam_swap_endian_8p(&p->list[x].u);
- bam_swap_endian_8p(&p->list[x].v);
- }
- fwrite(p->list, 16, p->n, fp);
- for (x = 0; (int)x < p->n; ++x) {
- bam_swap_endian_8p(&p->list[x].u);
- bam_swap_endian_8p(&p->list[x].v);
- }
- } else {
- fwrite(&kh_key(index, k), 4, 1, fp);
- fwrite(&p->n, 4, 1, fp);
- fwrite(p->list, 16, p->n, fp);
- }
- }
- }
- // write linear index (index2)
- if (bam_is_be) {
- int x = index2->n;
- fwrite(bam_swap_endian_4p(&x), 4, 1, fp);
- } else fwrite(&index2->n, 4, 1, fp);
- if (bam_is_be) { // big endian
- int x;
- for (x = 0; (int)x < index2->n; ++x)
- bam_swap_endian_8p(&index2->offset[x]);
- fwrite(index2->offset, 8, index2->n, fp);
- for (x = 0; (int)x < index2->n; ++x)
- bam_swap_endian_8p(&index2->offset[x]);
- } else fwrite(index2->offset, 8, index2->n, fp);
- }
- { // write the number of reads coor-less records.
- uint64_t x = idx->n_no_coor;
- if (bam_is_be) bam_swap_endian_8p(&x);
- fwrite(&x, 8, 1, fp);
- }
- fflush(fp);
-}
-
-static bam_index_t *bam_index_load_core(FILE *fp)
-{
- int i;
- char magic[4];
- bam_index_t *idx;
- if (fp == 0) {
- fprintf(stderr, "[bam_index_load_core] fail to load index.\n");
- return 0;
- }
- fread(magic, 1, 4, fp);
- if (strncmp(magic, "BAI\1", 4)) {
- fprintf(stderr, "[bam_index_load] wrong magic number.\n");
- fclose(fp);
- return 0;
- }
- idx = (bam_index_t*)calloc(1, sizeof(bam_index_t));
- fread(&idx->n, 4, 1, fp);
- if (bam_is_be) bam_swap_endian_4p(&idx->n);
- idx->index = (khash_t(i)**)calloc(idx->n, sizeof(void*));
- idx->index2 = (bam_lidx_t*)calloc(idx->n, sizeof(bam_lidx_t));
- for (i = 0; i < idx->n; ++i) {
- khash_t(i) *index;
- bam_lidx_t *index2 = idx->index2 + i;
- uint32_t key, size;
- khint_t k;
- int j, ret;
- bam_binlist_t *p;
- index = idx->index[i] = kh_init(i);
- // load binning index
- fread(&size, 4, 1, fp);
- if (bam_is_be) bam_swap_endian_4p(&size);
- for (j = 0; j < (int)size; ++j) {
- fread(&key, 4, 1, fp);
- if (bam_is_be) bam_swap_endian_4p(&key);
- k = kh_put(i, index, key, &ret);
- p = &kh_value(index, k);
- fread(&p->n, 4, 1, fp);
- if (bam_is_be) bam_swap_endian_4p(&p->n);
- p->m = p->n;
- p->list = (pair64_t*)malloc(p->m * 16);
- fread(p->list, 16, p->n, fp);
- if (bam_is_be) {
- int x;
- for (x = 0; x < p->n; ++x) {
- bam_swap_endian_8p(&p->list[x].u);
- bam_swap_endian_8p(&p->list[x].v);
- }
- }
- }
- // load linear index
- fread(&index2->n, 4, 1, fp);
- if (bam_is_be) bam_swap_endian_4p(&index2->n);
- index2->m = index2->n;
- index2->offset = (uint64_t*)calloc(index2->m, 8);
- fread(index2->offset, index2->n, 8, fp);
- if (bam_is_be)
- for (j = 0; j < index2->n; ++j) bam_swap_endian_8p(&index2->offset[j]);
- }
- if (fread(&idx->n_no_coor, 8, 1, fp) == 0) idx->n_no_coor = 0;
- if (bam_is_be) bam_swap_endian_8p(&idx->n_no_coor);
- return idx;
-}
-
-bam_index_t *bam_index_load_local(const char *_fn)
-{
- FILE *fp;
- char *fnidx, *fn;
-
- if (strstr(_fn, "ftp://") == _fn || strstr(_fn, "http://") == _fn) {
- const char *p;
- int l = strlen(_fn);
- for (p = _fn + l - 1; p >= _fn; --p)
- if (*p == '/') break;
- fn = strdup(p + 1);
- } else fn = strdup(_fn);
- fnidx = (char*)calloc(strlen(fn) + 5, 1);
- strcpy(fnidx, fn); strcat(fnidx, ".bai");
- fp = fopen(fnidx, "rb");
- if (fp == 0) { // try "{base}.bai"
- char *s = strstr(fn, "bam");
- if (s == fn + strlen(fn) - 3) {
- strcpy(fnidx, fn);
- fnidx[strlen(fn)-1] = 'i';
- fp = fopen(fnidx, "rb");
- }
- }
- free(fnidx); free(fn);
- if (fp) {
- bam_index_t *idx = bam_index_load_core(fp);
- fclose(fp);
- return idx;
- } else return 0;
-}
-
-#ifdef _USE_KNETFILE
-static void download_from_remote(const char *url)
-{
- const int buf_size = 1 * 1024 * 1024;
- char *fn;
- FILE *fp;
- uint8_t *buf;
- knetFile *fp_remote;
- int l;
- if (strstr(url, "ftp://") != url && strstr(url, "http://") != url) return;
- l = strlen(url);
- for (fn = (char*)url + l - 1; fn >= url; --fn)
- if (*fn == '/') break;
- ++fn; // fn now points to the file name
- fp_remote = knet_open(url, "r");
- if (fp_remote == 0) {
- fprintf(stderr, "[download_from_remote] fail to open remote file.\n");
- return;
- }
- if ((fp = fopen(fn, "wb")) == 0) {
- fprintf(stderr, "[download_from_remote] fail to create file in the working directory.\n");
- knet_close(fp_remote);
- return;
- }
- buf = (uint8_t*)calloc(buf_size, 1);
- while ((l = knet_read(fp_remote, buf, buf_size)) != 0)
- fwrite(buf, 1, l, fp);
- free(buf);
- fclose(fp);
- knet_close(fp_remote);
-}
-#else
-static void download_from_remote(const char *url)
-{
- return;
-}
-#endif
-
-bam_index_t *bam_index_load(const char *fn)
-{
- bam_index_t *idx;
- idx = bam_index_load_local(fn);
- if (idx == 0 && (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)) {
- char *fnidx = calloc(strlen(fn) + 5, 1);
- strcat(strcpy(fnidx, fn), ".bai");
- fprintf(stderr, "[bam_index_load] attempting to download the remote index file.\n");
- download_from_remote(fnidx);
- idx = bam_index_load_local(fn);
- }
- if (idx == 0) fprintf(stderr, "[bam_index_load] fail to load BAM index.\n");
- return idx;
-}
-
-int bam_index_build2(const char *fn, const char *_fnidx)
-{
- char *fnidx;
- FILE *fpidx;
- bamFile fp;
- bam_index_t *idx;
- if ((fp = bam_open(fn, "r")) == 0) {
- fprintf(stderr, "[bam_index_build2] fail to open the BAM file.\n");
- return -1;
- }
- idx = bam_index_core(fp);
- bam_close(fp);
- if(idx == 0) {
- fprintf(stderr, "[bam_index_build2] fail to index the BAM file.\n");
- return -1;
- }
- if (_fnidx == 0) {
- fnidx = (char*)calloc(strlen(fn) + 5, 1);
- strcpy(fnidx, fn); strcat(fnidx, ".bai");
- } else fnidx = strdup(_fnidx);
- fpidx = fopen(fnidx, "wb");
- if (fpidx == 0) {
- fprintf(stderr, "[bam_index_build2] fail to create the index file.\n");
- free(fnidx);
- return -1;
- }
- bam_index_save(idx, fpidx);
- bam_index_destroy(idx);
- fclose(fpidx);
- free(fnidx);
- return 0;
-}
-
-int bam_index_build(const char *fn)
-{
- return bam_index_build2(fn, 0);
-}
-
-int bam_index(int argc, char *argv[])
-{
- if (argc < 2) {
- fprintf(stderr, "Usage: samtools index <in.bam> [out.index]\n");
- return 1;
- }
- if (argc >= 3) bam_index_build2(argv[1], argv[2]);
- else bam_index_build(argv[1]);
- return 0;
-}
-
-int bam_idxstats(int argc, char *argv[])
-{
- bam_index_t *idx;
- bam_header_t *header;
- bamFile fp;
- int i;
- if (argc < 2) {
- fprintf(stderr, "Usage: samtools idxstats <in.bam>\n");
- return 1;
- }
- fp = bam_open(argv[1], "r");
- if (fp == 0) { fprintf(stderr, "[%s] fail to open BAM.\n", __func__); return 1; }
- header = bam_header_read(fp);
- bam_close(fp);
- idx = bam_index_load(argv[1]);
- if (idx == 0) { fprintf(stderr, "[%s] fail to load the index.\n", __func__); return 1; }
- for (i = 0; i < idx->n; ++i) {
- khint_t k;
- khash_t(i) *h = idx->index[i];
- printf("%s\t%d", header->target_name[i], header->target_len[i]);
- k = kh_get(i, h, BAM_MAX_BIN);
- if (k != kh_end(h))
- printf("\t%llu\t%llu", (long long)kh_val(h, k).list[1].u, (long long)kh_val(h, k).list[1].v);
- else printf("\t0\t0");
- putchar('\n');
- }
- printf("*\t0\t0\t%llu\n", (long long)idx->n_no_coor);
- bam_header_destroy(header);
- bam_index_destroy(idx);
- return 0;
-}
-
-static inline int reg2bins(uint32_t beg, uint32_t end, uint16_t list[BAM_MAX_BIN])
-{
- int i = 0, k;
- if (beg >= end) return 0;
- if (end >= 1u<<29) end = 1u<<29;
- --end;
- list[i++] = 0;
- for (k = 1 + (beg>>26); k <= 1 + (end>>26); ++k) list[i++] = k;
- for (k = 9 + (beg>>23); k <= 9 + (end>>23); ++k) list[i++] = k;
- for (k = 73 + (beg>>20); k <= 73 + (end>>20); ++k) list[i++] = k;
- for (k = 585 + (beg>>17); k <= 585 + (end>>17); ++k) list[i++] = k;
- for (k = 4681 + (beg>>14); k <= 4681 + (end>>14); ++k) list[i++] = k;
- return i;
-}
-
-static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b)
-{
- uint32_t rbeg = b->core.pos;
- uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1;
- return (rend > beg && rbeg < end);
-}
-
-struct __bam_iter_t {
- int from_first; // read from the first record; no random access
- int tid, beg, end, n_off, i, finished;
- uint64_t curr_off;
- pair64_t *off;
-};
-
-// bam_fetch helper function retrieves
-bam_iter_t bam_iter_query(const bam_index_t *idx, int tid, int beg, int end)
-{
- uint16_t *bins;
- int i, n_bins, n_off;
- pair64_t *off;
- khint_t k;
- khash_t(i) *index;
- uint64_t min_off;
- bam_iter_t iter = 0;
-
- if (beg < 0) beg = 0;
- if (end < beg) return 0;
- // initialize iter
- iter = calloc(1, sizeof(struct __bam_iter_t));
- iter->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1;
- //
- bins = (uint16_t*)calloc(BAM_MAX_BIN, 2);
- n_bins = reg2bins(beg, end, bins);
- index = idx->index[tid];
- if (idx->index2[tid].n > 0) {
- min_off = (beg>>BAM_LIDX_SHIFT >= idx->index2[tid].n)? idx->index2[tid].offset[idx->index2[tid].n-1]
- : idx->index2[tid].offset[beg>>BAM_LIDX_SHIFT];
- if (min_off == 0) { // improvement for index files built by tabix prior to 0.1.4
- int n = beg>>BAM_LIDX_SHIFT;
- if (n > idx->index2[tid].n) n = idx->index2[tid].n;
- for (i = n - 1; i >= 0; --i)
- if (idx->index2[tid].offset[i] != 0) break;
- if (i >= 0) min_off = idx->index2[tid].offset[i];
- }
- } else min_off = 0; // tabix 0.1.2 may produce such index files
- for (i = n_off = 0; i < n_bins; ++i) {
- if ((k = kh_get(i, index, bins[i])) != kh_end(index))
- n_off += kh_value(index, k).n;
- }
- if (n_off == 0) {
- free(bins); return iter;
- }
- off = (pair64_t*)calloc(n_off, 16);
- for (i = n_off = 0; i < n_bins; ++i) {
- if ((k = kh_get(i, index, bins[i])) != kh_end(index)) {
- int j;
- bam_binlist_t *p = &kh_value(index, k);
- for (j = 0; j < p->n; ++j)
- if (p->list[j].v > min_off) off[n_off++] = p->list[j];
- }
- }
- free(bins);
- if (n_off == 0) {
- free(off); return iter;
- }
- {
- bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t));
- int l;
- ks_introsort(off, n_off, off);
- // resolve completely contained adjacent blocks
- for (i = 1, l = 0; i < n_off; ++i)
- if (off[l].v < off[i].v)
- off[++l] = off[i];
- n_off = l + 1;
- // resolve overlaps between adjacent blocks; this may happen due to the merge in indexing
- for (i = 1; i < n_off; ++i)
- if (off[i-1].v >= off[i].u) off[i-1].v = off[i].u;
- { // merge adjacent blocks
-#if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)
- for (i = 1, l = 0; i < n_off; ++i) {
-#ifdef BAM_TRUE_OFFSET
- if (off[l].v + BAM_MIN_CHUNK_GAP > off[i].u) off[l].v = off[i].v;
-#else
- if (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v;
-#endif
- else off[++l] = off[i];
- }
- n_off = l + 1;
-#endif
- }
- bam_destroy1(b);
- }
- iter->n_off = n_off; iter->off = off;
- return iter;
-}
-
-pair64_t *get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int *cnt_off)
-{ // for pysam compatibility
- bam_iter_t iter;
- pair64_t *off;
- iter = bam_iter_query(idx, tid, beg, end);
- off = iter->off; *cnt_off = iter->n_off;
- free(iter);
- return off;
-}
-
-void bam_iter_destroy(bam_iter_t iter)
-{
- if (iter) { free(iter->off); free(iter); }
-}
-
-int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b)
-{
- int ret;
- if (iter && iter->finished) return -1;
- if (iter == 0 || iter->from_first) {
- ret = bam_read1(fp, b);
- if (ret < 0 && iter) iter->finished = 1;
- return ret;
- }
- if (iter->off == 0) return -1;
- for (;;) {
- if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk
- if (iter->i == iter->n_off - 1) { ret = -1; break; } // no more chunks
- if (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug
- if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek
- bam_seek(fp, iter->off[iter->i+1].u, SEEK_SET);
- iter->curr_off = bam_tell(fp);
- }
- ++iter->i;
- }
- if ((ret = bam_read1(fp, b)) >= 0) {
- iter->curr_off = bam_tell(fp);
- if (b->core.tid != iter->tid || b->core.pos >= iter->end) { // no need to proceed
- ret = bam_validate1(NULL, b)? -1 : -5; // determine whether end of region or error
- break;
- }
- else if (is_overlap(iter->beg, iter->end, b)) return ret;
- } else break; // end of file or error
- }
- iter->finished = 1;
- return ret;
-}
-
-int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)
-{
- int ret;
- bam_iter_t iter;
- bam1_t *b;
- b = bam_init1();
- iter = bam_iter_query(idx, tid, beg, end);
- while ((ret = bam_iter_read(fp, iter, b)) >= 0) func(b, data);
- bam_iter_destroy(iter);
- bam_destroy1(b);
- return (ret == -1)? 0 : ret;
-}
diff --git a/src/samtools-0.1.18/bam_lpileup.c b/src/samtools-0.1.18/bam_lpileup.c
deleted file mode 100644
index d4dd63b..0000000
--- a/src/samtools-0.1.18/bam_lpileup.c
+++ /dev/null
@@ -1,198 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <assert.h>
-#include "bam.h"
-#include "ksort.h"
-
-#define TV_GAP 2
-
-typedef struct __freenode_t {
- uint32_t level:28, cnt:4;
- struct __freenode_t *next;
-} freenode_t, *freenode_p;
-
-#define freenode_lt(a,b) ((a)->cnt < (b)->cnt || ((a)->cnt == (b)->cnt && (a)->level < (b)->level))
-KSORT_INIT(node, freenode_p, freenode_lt)
-
-/* Memory pool, similar to the one in bam_pileup.c */
-typedef struct {
- int cnt, n, max;
- freenode_t **buf;
-} mempool_t;
-
-static mempool_t *mp_init()
-{
- return (mempool_t*)calloc(1, sizeof(mempool_t));
-}
-static void mp_destroy(mempool_t *mp)
-{
- int k;
- for (k = 0; k < mp->n; ++k) free(mp->buf[k]);
- free(mp->buf); free(mp);
-}
-static inline freenode_t *mp_alloc(mempool_t *mp)
-{
- ++mp->cnt;
- if (mp->n == 0) return (freenode_t*)calloc(1, sizeof(freenode_t));
- else return mp->buf[--mp->n];
-}
-static inline void mp_free(mempool_t *mp, freenode_t *p)
-{
- --mp->cnt; p->next = 0; p->cnt = TV_GAP;
- if (mp->n == mp->max) {
- mp->max = mp->max? mp->max<<1 : 256;
- mp->buf = (freenode_t**)realloc(mp->buf, sizeof(freenode_t*) * mp->max);
- }
- mp->buf[mp->n++] = p;
-}
-
-/* core part */
-struct __bam_lplbuf_t {
- int max, n_cur, n_pre;
- int max_level, *cur_level, *pre_level;
- mempool_t *mp;
- freenode_t **aux, *head, *tail;
- int n_nodes, m_aux;
- bam_pileup_f func;
- void *user_data;
- bam_plbuf_t *plbuf;
-};
-
-void bam_lplbuf_reset(bam_lplbuf_t *buf)
-{
- freenode_t *p, *q;
- bam_plbuf_reset(buf->plbuf);
- for (p = buf->head; p->next;) {
- q = p->next;
- mp_free(buf->mp, p);
- p = q;
- }
- buf->head = buf->tail;
- buf->max_level = 0;
- buf->n_cur = buf->n_pre = 0;
- buf->n_nodes = 0;
-}
-
-static int tview_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)
-{
- bam_lplbuf_t *tv = (bam_lplbuf_t*)data;
- freenode_t *p;
- int i, l, max_level;
- // allocate memory if necessary
- if (tv->max < n) { // enlarge
- tv->max = n;
- kroundup32(tv->max);
- tv->cur_level = (int*)realloc(tv->cur_level, sizeof(int) * tv->max);
- tv->pre_level = (int*)realloc(tv->pre_level, sizeof(int) * tv->max);
- }
- tv->n_cur = n;
- // update cnt
- for (p = tv->head; p->next; p = p->next)
- if (p->cnt > 0) --p->cnt;
- // calculate cur_level[]
- max_level = 0;
- for (i = l = 0; i < n; ++i) {
- const bam_pileup1_t *p = pl + i;
- if (p->is_head) {
- if (tv->head->next && tv->head->cnt == 0) { // then take a free slot
- freenode_t *p = tv->head->next;
- tv->cur_level[i] = tv->head->level;
- mp_free(tv->mp, tv->head);
- tv->head = p;
- --tv->n_nodes;
- } else tv->cur_level[i] = ++tv->max_level;
- } else {
- tv->cur_level[i] = tv->pre_level[l++];
- if (p->is_tail) { // then return a free slot
- tv->tail->level = tv->cur_level[i];
- tv->tail->next = mp_alloc(tv->mp);
- tv->tail = tv->tail->next;
- ++tv->n_nodes;
- }
- }
- if (tv->cur_level[i] > max_level) max_level = tv->cur_level[i];
- ((bam_pileup1_t*)p)->level = tv->cur_level[i];
- }
- assert(l == tv->n_pre);
- tv->func(tid, pos, n, pl, tv->user_data);
- // sort the linked list
- if (tv->n_nodes) {
- freenode_t *q;
- if (tv->n_nodes + 1 > tv->m_aux) { // enlarge
- tv->m_aux = tv->n_nodes + 1;
- kroundup32(tv->m_aux);
- tv->aux = (freenode_t**)realloc(tv->aux, sizeof(void*) * tv->m_aux);
- }
- for (p = tv->head, i = l = 0; p->next;) {
- if (p->level > max_level) { // then discard this entry
- q = p->next;
- mp_free(tv->mp, p);
- p = q;
- } else {
- tv->aux[i++] = p;
- p = p->next;
- }
- }
- tv->aux[i] = tv->tail; // add a proper tail for the loop below
- tv->n_nodes = i;
- if (tv->n_nodes) {
- ks_introsort(node, tv->n_nodes, tv->aux);
- for (i = 0; i < tv->n_nodes; ++i) tv->aux[i]->next = tv->aux[i+1];
- tv->head = tv->aux[0];
- } else tv->head = tv->tail;
- }
- // clean up
- tv->max_level = max_level;
- memcpy(tv->pre_level, tv->cur_level, tv->n_cur * 4);
- // squeeze out terminated levels
- for (i = l = 0; i < n; ++i) {
- const bam_pileup1_t *p = pl + i;
- if (!p->is_tail)
- tv->pre_level[l++] = tv->pre_level[i];
- }
- tv->n_pre = l;
-/*
- fprintf(stderr, "%d\t", pos+1);
- for (i = 0; i < n; ++i) {
- const bam_pileup1_t *p = pl + i;
- if (p->is_head) fprintf(stderr, "^");
- if (p->is_tail) fprintf(stderr, "$");
- fprintf(stderr, "%d,", p->level);
- }
- fprintf(stderr, "\n");
-*/
- return 0;
-}
-
-bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data)
-{
- bam_lplbuf_t *tv;
- tv = (bam_lplbuf_t*)calloc(1, sizeof(bam_lplbuf_t));
- tv->mp = mp_init();
- tv->head = tv->tail = mp_alloc(tv->mp);
- tv->func = func;
- tv->user_data = data;
- tv->plbuf = bam_plbuf_init(tview_func, tv);
- return (bam_lplbuf_t*)tv;
-}
-
-void bam_lplbuf_destroy(bam_lplbuf_t *tv)
-{
- freenode_t *p, *q;
- free(tv->cur_level); free(tv->pre_level);
- bam_plbuf_destroy(tv->plbuf);
- free(tv->aux);
- for (p = tv->head; p->next;) {
- q = p->next;
- mp_free(tv->mp, p); p = q;
- }
- mp_free(tv->mp, p);
- assert(tv->mp->cnt == 0);
- mp_destroy(tv->mp);
- free(tv);
-}
-
-int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *tv)
-{
- return bam_plbuf_push(b, tv->plbuf);
-}
diff --git a/src/samtools-0.1.18/bam_mate.c b/src/samtools-0.1.18/bam_mate.c
deleted file mode 100644
index 61f808a..0000000
--- a/src/samtools-0.1.18/bam_mate.c
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <stdlib.h>
-#include <string.h>
-#include "bam.h"
-
-// currently, this function ONLY works if each read has one hit
-void bam_mating_core(bamFile in, bamFile out)
-{
- bam_header_t *header;
- bam1_t *b[2];
- int curr, has_prev;
-
- header = bam_header_read(in);
- bam_header_write(out, header);
-
- b[0] = bam_init1();
- b[1] = bam_init1();
- curr = 0; has_prev = 0;
- while (bam_read1(in, b[curr]) >= 0) {
- bam1_t *cur = b[curr], *pre = b[1-curr];
- if (has_prev) {
- if (strcmp(bam1_qname(cur), bam1_qname(pre)) == 0) { // identical pair name
- cur->core.mtid = pre->core.tid; cur->core.mpos = pre->core.pos;
- pre->core.mtid = cur->core.tid; pre->core.mpos = cur->core.pos;
- if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))
- && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP)))
- {
- uint32_t cur5, pre5;
- cur5 = (cur->core.flag&BAM_FREVERSE)? bam_calend(&cur->core, bam1_cigar(cur)) : cur->core.pos;
- pre5 = (pre->core.flag&BAM_FREVERSE)? bam_calend(&pre->core, bam1_cigar(pre)) : pre->core.pos;
- cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5;
- } else cur->core.isize = pre->core.isize = 0;
- if (pre->core.flag&BAM_FREVERSE) cur->core.flag |= BAM_FMREVERSE;
- else cur->core.flag &= ~BAM_FMREVERSE;
- if (cur->core.flag&BAM_FREVERSE) pre->core.flag |= BAM_FMREVERSE;
- else pre->core.flag &= ~BAM_FMREVERSE;
- if (cur->core.flag & BAM_FUNMAP) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FPROPER_PAIR; }
- if (pre->core.flag & BAM_FUNMAP) { cur->core.flag |= BAM_FMUNMAP; cur->core.flag &= ~BAM_FPROPER_PAIR; }
- bam_write1(out, pre);
- bam_write1(out, cur);
- has_prev = 0;
- } else { // unpaired or singleton
- pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
- if (pre->core.flag & BAM_FPAIRED) {
- pre->core.flag |= BAM_FMUNMAP;
- pre->core.flag &= ~BAM_FMREVERSE & ~BAM_FPROPER_PAIR;
- }
- bam_write1(out, pre);
- }
- } else has_prev = 1;
- curr = 1 - curr;
- }
- if (has_prev) bam_write1(out, b[1-curr]);
- bam_header_destroy(header);
- bam_destroy1(b[0]);
- bam_destroy1(b[1]);
-}
-
-int bam_mating(int argc, char *argv[])
-{
- bamFile in, out;
- if (argc < 3) {
- fprintf(stderr, "samtools fixmate <in.nameSrt.bam> <out.nameSrt.bam>\n");
- return 1;
- }
- in = (strcmp(argv[1], "-") == 0)? bam_dopen(fileno(stdin), "r") : bam_open(argv[1], "r");
- out = (strcmp(argv[2], "-") == 0)? bam_dopen(fileno(stdout), "w") : bam_open(argv[2], "w");
- bam_mating_core(in, out);
- bam_close(in); bam_close(out);
- return 0;
-}
diff --git a/src/samtools-0.1.18/bam_md.c b/src/samtools-0.1.18/bam_md.c
deleted file mode 100644
index d42aa8f..0000000
--- a/src/samtools-0.1.18/bam_md.c
+++ /dev/null
@@ -1,384 +0,0 @@
-#include <unistd.h>
-#include <assert.h>
-#include <string.h>
-#include <ctype.h>
-#include <math.h>
-#include "faidx.h"
-#include "sam.h"
-#include "kstring.h"
-#include "kaln.h"
-#include "kprobaln.h"
-
-#define USE_EQUAL 1
-#define DROP_TAG 2
-#define BIN_QUAL 4
-#define UPDATE_NM 8
-#define UPDATE_MD 16
-#define HASH_QNM 32
-
-char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
-
-int bam_aux_drop_other(bam1_t *b, uint8_t *s);
-
-void bam_fillmd1_core(bam1_t *b, char *ref, int flag, int max_nm)
-{
- uint8_t *seq = bam1_seq(b);
- uint32_t *cigar = bam1_cigar(b);
- bam1_core_t *c = &b->core;
- int i, x, y, u = 0;
- kstring_t *str;
- int32_t old_nm_i = -1, nm = 0;
-
- str = (kstring_t*)calloc(1, sizeof(kstring_t));
- for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
- int j, l = cigar[i]>>4, op = cigar[i]&0xf;
- if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
- for (j = 0; j < l; ++j) {
- int z = y + j;
- int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]];
- if (ref[x+j] == 0) break; // out of boundary
- if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match
- if (flag&USE_EQUAL) seq[z/2] &= (z&1)? 0xf0 : 0x0f;
- ++u;
- } else {
- kputw(u, str); kputc(ref[x+j], str);
- u = 0; ++nm;
- }
- }
- if (j < l) break;
- x += l; y += l;
- } else if (op == BAM_CDEL) {
- kputw(u, str); kputc('^', str);
- for (j = 0; j < l; ++j) {
- if (ref[x+j] == 0) break;
- kputc(ref[x+j], str);
- }
- u = 0;
- if (j < l) break;
- x += l; nm += l;
- } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) {
- y += l;
- if (op == BAM_CINS) nm += l;
- } else if (op == BAM_CREF_SKIP) {
- x += l;
- }
- }
- kputw(u, str);
- // apply max_nm
- if (max_nm > 0 && nm >= max_nm) {
- for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
- int j, l = cigar[i]>>4, op = cigar[i]&0xf;
- if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
- for (j = 0; j < l; ++j) {
- int z = y + j;
- int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]];
- if (ref[x+j] == 0) break; // out of boundary
- if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match
- seq[z/2] |= (z&1)? 0x0f : 0xf0;
- bam1_qual(b)[z] = 0;
- }
- }
- if (j < l) break;
- x += l; y += l;
- } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l;
- else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;
- }
- }
- // update NM
- if (flag & UPDATE_NM) {
- uint8_t *old_nm = bam_aux_get(b, "NM");
- if (c->flag & BAM_FUNMAP) return;
- if (old_nm) old_nm_i = bam_aux2i(old_nm);
- if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
- else if (nm != old_nm_i) {
- fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam1_qname(b), old_nm_i, nm);
- bam_aux_del(b, old_nm);
- bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm);
- }
- }
- // update MD
- if (flag & UPDATE_MD) {
- uint8_t *old_md = bam_aux_get(b, "MD");
- if (c->flag & BAM_FUNMAP) return;
- if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
- else {
- int is_diff = 0;
- if (strlen((char*)old_md+1) == str->l) {
- for (i = 0; i < str->l; ++i)
- if (toupper(old_md[i+1]) != toupper(str->s[i]))
- break;
- if (i < str->l) is_diff = 1;
- } else is_diff = 1;
- if (is_diff) {
- fprintf(stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam1_qname(b), old_md+1, str->s);
- bam_aux_del(b, old_md);
- bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s);
- }
- }
- }
- // drop all tags but RG
- if (flag&DROP_TAG) {
- uint8_t *q = bam_aux_get(b, "RG");
- bam_aux_drop_other(b, q);
- }
- // reduce the resolution of base quality
- if (flag&BIN_QUAL) {
- uint8_t *qual = bam1_qual(b);
- for (i = 0; i < b->core.l_qseq; ++i)
- if (qual[i] >= 3) qual[i] = qual[i]/10*10 + 7;
- }
- free(str->s); free(str);
-}
-
-void bam_fillmd1(bam1_t *b, char *ref, int flag)
-{
- bam_fillmd1_core(b, ref, flag, 0);
-}
-
-int bam_cap_mapQ(bam1_t *b, char *ref, int thres)
-{
- uint8_t *seq = bam1_seq(b), *qual = bam1_qual(b);
- uint32_t *cigar = bam1_cigar(b);
- bam1_core_t *c = &b->core;
- int i, x, y, mm, q, len, clip_l, clip_q;
- double t;
- if (thres < 0) thres = 40; // set the default
- mm = q = len = clip_l = clip_q = 0;
- for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) {
- int j, l = cigar[i]>>4, op = cigar[i]&0xf;
- if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
- for (j = 0; j < l; ++j) {
- int z = y + j;
- int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]];
- if (ref[x+j] == 0) break; // out of boundary
- if (c2 != 15 && c1 != 15 && qual[z] >= 13) { // not ambiguous
- ++len;
- if (c1 && c1 != c2 && qual[z] >= 13) { // mismatch
- ++mm;
- q += qual[z] > 33? 33 : qual[z];
- }
- }
- }
- if (j < l) break;
- x += l; y += l; len += l;
- } else if (op == BAM_CDEL) {
- for (j = 0; j < l; ++j)
- if (ref[x+j] == 0) break;
- if (j < l) break;
- x += l;
- } else if (op == BAM_CSOFT_CLIP) {
- for (j = 0; j < l; ++j) clip_q += qual[y+j];
- clip_l += l;
- y += l;
- } else if (op == BAM_CHARD_CLIP) {
- clip_q += 13 * l;
- clip_l += l;
- } else if (op == BAM_CINS) y += l;
- else if (op == BAM_CREF_SKIP) x += l;
- }
- for (i = 0, t = 1; i < mm; ++i)
- t *= (double)len / (i+1);
- t = q - 4.343 * log(t) + clip_q / 5.;
- if (t > thres) return -1;
- if (t < 0) t = 0;
- t = sqrt((thres - t) / thres) * thres;
-// fprintf(stderr, "%s %lf %d\n", bam1_qname(b), t, q);
- return (int)(t + .499);
-}
-
-int bam_prob_realn_core(bam1_t *b, const char *ref, int flag)
-{
- int k, i, bw, x, y, yb, ye, xb, xe, apply_baq = flag&1, extend_baq = flag>>1&1;
- uint32_t *cigar = bam1_cigar(b);
- bam1_core_t *c = &b->core;
- kpa_par_t conf = kpa_par_def;
- uint8_t *bq = 0, *zq = 0, *qual = bam1_qual(b);
- if ((c->flag & BAM_FUNMAP) || b->core.l_qseq == 0) return -1; // do nothing
- // test if BQ or ZQ is present
- if ((bq = bam_aux_get(b, "BQ")) != 0) ++bq;
- if ((zq = bam_aux_get(b, "ZQ")) != 0 && *zq == 'Z') ++zq;
- if (bq && zq) { // remove the ZQ tag
- bam_aux_del(b, zq-1);
- zq = 0;
- }
- if (bq || zq) {
- if ((apply_baq && zq) || (!apply_baq && bq)) return -3; // in both cases, do nothing
- if (bq && apply_baq) { // then convert BQ to ZQ
- for (i = 0; i < c->l_qseq; ++i)
- qual[i] = qual[i] + 64 < bq[i]? 0 : qual[i] - ((int)bq[i] - 64);
- *(bq - 3) = 'Z';
- } else if (zq && !apply_baq) { // then convert ZQ to BQ
- for (i = 0; i < c->l_qseq; ++i)
- qual[i] += (int)zq[i] - 64;
- *(zq - 3) = 'B';
- }
- return 0;
- }
- // find the start and end of the alignment
- x = c->pos, y = 0, yb = ye = xb = xe = -1;
- for (k = 0; k < c->n_cigar; ++k) {
- int op, l;
- op = cigar[k]&0xf; l = cigar[k]>>4;
- if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
- if (yb < 0) yb = y;
- if (xb < 0) xb = x;
- ye = y + l; xe = x + l;
- x += l; y += l;
- } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
- else if (op == BAM_CDEL) x += l;
- else if (op == BAM_CREF_SKIP) return -1; // do nothing if there is a reference skip
- }
- // set bandwidth and the start and the end
- bw = 7;
- if (abs((xe - xb) - (ye - yb)) > bw)
- bw = abs((xe - xb) - (ye - yb)) + 3;
- conf.bw = bw;
- xb -= yb + bw/2; if (xb < 0) xb = 0;
- xe += c->l_qseq - ye + bw/2;
- if (xe - xb - c->l_qseq > bw)
- xb += (xe - xb - c->l_qseq - bw) / 2, xe -= (xe - xb - c->l_qseq - bw) / 2;
- { // glocal
- uint8_t *s, *r, *q, *seq = bam1_seq(b), *bq;
- int *state;
- bq = calloc(c->l_qseq + 1, 1);
- memcpy(bq, qual, c->l_qseq);
- s = calloc(c->l_qseq, 1);
- for (i = 0; i < c->l_qseq; ++i) s[i] = bam_nt16_nt4_table[bam1_seqi(seq, i)];
- r = calloc(xe - xb, 1);
- for (i = xb; i < xe; ++i) {
- if (ref[i] == 0) { xe = i; break; }
- r[i-xb] = bam_nt16_nt4_table[bam_nt16_table[(int)ref[i]]];
- }
- state = calloc(c->l_qseq, sizeof(int));
- q = calloc(c->l_qseq, 1);
- kpa_glocal(r, xe-xb, s, c->l_qseq, qual, &conf, state, q);
- if (!extend_baq) { // in this block, bq[] is capped by base quality qual[]
- for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) {
- int op = cigar[k]&0xf, l = cigar[k]>>4;
- if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
- for (i = y; i < y + l; ++i) {
- if ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y)) bq[i] = 0;
- else bq[i] = bq[i] < q[i]? bq[i] : q[i];
- }
- x += l; y += l;
- } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
- else if (op == BAM_CDEL) x += l;
- }
- for (i = 0; i < c->l_qseq; ++i) bq[i] = qual[i] - bq[i] + 64; // finalize BQ
- } else { // in this block, bq[] is BAQ that can be larger than qual[] (different from the above!)
- uint8_t *left, *rght;
- left = calloc(c->l_qseq, 1); rght = calloc(c->l_qseq, 1);
- for (k = 0, x = c->pos, y = 0; k < c->n_cigar; ++k) {
- int op = cigar[k]&0xf, l = cigar[k]>>4;
- if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
- for (i = y; i < y + l; ++i)
- bq[i] = ((state[i]&3) != 0 || state[i]>>2 != x - xb + (i - y))? 0 : q[i];
- for (left[y] = bq[y], i = y + 1; i < y + l; ++i)
- left[i] = bq[i] > left[i-1]? bq[i] : left[i-1];
- for (rght[y+l-1] = bq[y+l-1], i = y + l - 2; i >= y; --i)
- rght[i] = bq[i] > rght[i+1]? bq[i] : rght[i+1];
- for (i = y; i < y + l; ++i)
- bq[i] = left[i] < rght[i]? left[i] : rght[i];
- x += l; y += l;
- } else if (op == BAM_CSOFT_CLIP || op == BAM_CINS) y += l;
- else if (op == BAM_CDEL) x += l;
- }
- for (i = 0; i < c->l_qseq; ++i) bq[i] = 64 + (qual[i] <= bq[i]? 0 : qual[i] - bq[i]); // finalize BQ
- free(left); free(rght);
- }
- if (apply_baq) {
- for (i = 0; i < c->l_qseq; ++i) qual[i] -= bq[i] - 64; // modify qual
- bam_aux_append(b, "ZQ", 'Z', c->l_qseq + 1, bq);
- } else bam_aux_append(b, "BQ", 'Z', c->l_qseq + 1, bq);
- free(bq); free(s); free(r); free(q); free(state);
- }
- return 0;
-}
-
-int bam_prob_realn(bam1_t *b, const char *ref)
-{
- return bam_prob_realn_core(b, ref, 1);
-}
-
-int bam_fillmd(int argc, char *argv[])
-{
- int c, flt_flag, tid = -2, ret, len, is_bam_out, is_sam_in, is_uncompressed, max_nm, is_realn, capQ, baq_flag;
- samfile_t *fp, *fpout = 0;
- faidx_t *fai;
- char *ref = 0, mode_w[8], mode_r[8];
- bam1_t *b;
-
- flt_flag = UPDATE_NM | UPDATE_MD;
- is_bam_out = is_sam_in = is_uncompressed = is_realn = max_nm = capQ = baq_flag = 0;
- mode_w[0] = mode_r[0] = 0;
- strcpy(mode_r, "r"); strcpy(mode_w, "w");
- while ((c = getopt(argc, argv, "EqreuNhbSC:n:Ad")) >= 0) {
- switch (c) {
- case 'r': is_realn = 1; break;
- case 'e': flt_flag |= USE_EQUAL; break;
- case 'd': flt_flag |= DROP_TAG; break;
- case 'q': flt_flag |= BIN_QUAL; break;
- case 'h': flt_flag |= HASH_QNM; break;
- case 'N': flt_flag &= ~(UPDATE_MD|UPDATE_NM); break;
- case 'b': is_bam_out = 1; break;
- case 'u': is_uncompressed = is_bam_out = 1; break;
- case 'S': is_sam_in = 1; break;
- case 'n': max_nm = atoi(optarg); break;
- case 'C': capQ = atoi(optarg); break;
- case 'A': baq_flag |= 1; break;
- case 'E': baq_flag |= 2; break;
- default: fprintf(stderr, "[bam_fillmd] unrecognized option '-%c'\n", c); return 1;
- }
- }
- if (!is_sam_in) strcat(mode_r, "b");
- if (is_bam_out) strcat(mode_w, "b");
- else strcat(mode_w, "h");
- if (is_uncompressed) strcat(mode_w, "u");
- if (optind + 1 >= argc) {
- fprintf(stderr, "\n");
- fprintf(stderr, "Usage: samtools fillmd [-eubrS] <aln.bam> <ref.fasta>\n\n");
- fprintf(stderr, "Options: -e change identical bases to '='\n");
- fprintf(stderr, " -u uncompressed BAM output (for piping)\n");
- fprintf(stderr, " -b compressed BAM output\n");
- fprintf(stderr, " -S the input is SAM with header\n");
- fprintf(stderr, " -A modify the quality string\n");
- fprintf(stderr, " -r compute the BQ tag (without -A) or cap baseQ by BAQ (with -A)\n");
- fprintf(stderr, " -E extended BAQ for better sensitivity but lower specificity\n\n");
- return 1;
- }
- fp = samopen(argv[optind], mode_r, 0);
- if (fp == 0) return 1;
- if (is_sam_in && (fp->header == 0 || fp->header->n_targets == 0)) {
- fprintf(stderr, "[bam_fillmd] input SAM does not have header. Abort!\n");
- return 1;
- }
- fpout = samopen("-", mode_w, fp->header);
- fai = fai_load(argv[optind+1]);
-
- b = bam_init1();
- while ((ret = samread(fp, b)) >= 0) {
- if (b->core.tid >= 0) {
- if (tid != b->core.tid) {
- free(ref);
- ref = fai_fetch(fai, fp->header->target_name[b->core.tid], &len);
- tid = b->core.tid;
- if (ref == 0)
- fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n",
- fp->header->target_name[tid]);
- }
- if (is_realn) bam_prob_realn_core(b, ref, baq_flag);
- if (capQ > 10) {
- int q = bam_cap_mapQ(b, ref, capQ);
- if (b->core.qual > q) b->core.qual = q;
- }
- if (ref) bam_fillmd1_core(b, ref, flt_flag, max_nm);
- }
- samwrite(fpout, b);
- }
- bam_destroy1(b);
-
- free(ref);
- fai_destroy(fai);
- samclose(fp); samclose(fpout);
- return 0;
-}
diff --git a/src/samtools-0.1.18/bam_pileup.c b/src/samtools-0.1.18/bam_pileup.c
deleted file mode 100644
index 57434e0..0000000
--- a/src/samtools-0.1.18/bam_pileup.c
+++ /dev/null
@@ -1,437 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include <assert.h>
-#include "sam.h"
-
-typedef struct {
- int k, x, y, end;
-} cstate_t;
-
-static cstate_t g_cstate_null = { -1, 0, 0, 0 };
-
-typedef struct __linkbuf_t {
- bam1_t b;
- uint32_t beg, end;
- cstate_t s;
- struct __linkbuf_t *next;
-} lbnode_t;
-
-/* --- BEGIN: Memory pool */
-
-typedef struct {
- int cnt, n, max;
- lbnode_t **buf;
-} mempool_t;
-
-static mempool_t *mp_init()
-{
- mempool_t *mp;
- mp = (mempool_t*)calloc(1, sizeof(mempool_t));
- return mp;
-}
-static void mp_destroy(mempool_t *mp)
-{
- int k;
- for (k = 0; k < mp->n; ++k) {
- free(mp->buf[k]->b.data);
- free(mp->buf[k]);
- }
- free(mp->buf);
- free(mp);
-}
-static inline lbnode_t *mp_alloc(mempool_t *mp)
-{
- ++mp->cnt;
- if (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t));
- else return mp->buf[--mp->n];
-}
-static inline void mp_free(mempool_t *mp, lbnode_t *p)
-{
- --mp->cnt; p->next = 0; // clear lbnode_t::next here
- if (mp->n == mp->max) {
- mp->max = mp->max? mp->max<<1 : 256;
- mp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max);
- }
- mp->buf[mp->n++] = p;
-}
-
-/* --- END: Memory pool */
-
-/* --- BEGIN: Auxiliary functions */
-
-/* s->k: the index of the CIGAR operator that has just been processed.
- s->x: the reference coordinate of the start of s->k
- s->y: the query coordiante of the start of s->k
- */
-static inline int resolve_cigar2(bam_pileup1_t *p, uint32_t pos, cstate_t *s)
-{
-#define _cop(c) ((c)&BAM_CIGAR_MASK)
-#define _cln(c) ((c)>>BAM_CIGAR_SHIFT)
-
- bam1_t *b = p->b;
- bam1_core_t *c = &b->core;
- uint32_t *cigar = bam1_cigar(b);
- int k, is_head = 0;
- // determine the current CIGAR operation
-// fprintf(stderr, "%s\tpos=%d\tend=%d\t(%d,%d,%d)\n", bam1_qname(b), pos, s->end, s->k, s->x, s->y);
- if (s->k == -1) { // never processed
- is_head = 1;
- if (c->n_cigar == 1) { // just one operation, save a loop
- if (_cop(cigar[0]) == BAM_CMATCH || _cop(cigar[0]) == BAM_CEQUAL || _cop(cigar[0]) == BAM_CDIFF) s->k = 0, s->x = c->pos, s->y = 0;
- } else { // find the first match or deletion
- for (k = 0, s->x = c->pos, s->y = 0; k < c->n_cigar; ++k) {
- int op = _cop(cigar[k]);
- int l = _cln(cigar[k]);
- if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CEQUAL || op == BAM_CDIFF) break;
- else if (op == BAM_CREF_SKIP) s->x += l;
- else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l;
- }
- assert(k < c->n_cigar);
- s->k = k;
- }
- } else { // the read has been processed before
- int op, l = _cln(cigar[s->k]);
- if (pos - s->x >= l) { // jump to the next operation
- assert(s->k < c->n_cigar); // otherwise a bug: this function should not be called in this case
- op = _cop(cigar[s->k+1]);
- if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) { // jump to the next without a loop
- if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l;
- s->x += l;
- ++s->k;
- } else { // find the next M/D/N/=/X
- if (_cop(cigar[s->k]) == BAM_CMATCH|| _cop(cigar[s->k]) == BAM_CEQUAL || _cop(cigar[s->k]) == BAM_CDIFF) s->y += l;
- s->x += l;
- for (k = s->k + 1; k < c->n_cigar; ++k) {
- op = _cop(cigar[k]), l = _cln(cigar[k]);
- if (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP || op == BAM_CEQUAL || op == BAM_CDIFF) break;
- else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) s->y += l;
- }
- s->k = k;
- }
- assert(s->k < c->n_cigar); // otherwise a bug
- } // else, do nothing
- }
- { // collect pileup information
- int op, l;
- op = _cop(cigar[s->k]); l = _cln(cigar[s->k]);
- p->is_del = p->indel = p->is_refskip = 0;
- if (s->x + l - 1 == pos && s->k + 1 < c->n_cigar) { // peek the next operation
- int op2 = _cop(cigar[s->k+1]);
- int l2 = _cln(cigar[s->k+1]);
- if (op2 == BAM_CDEL) p->indel = -(int)l2;
- else if (op2 == BAM_CINS) p->indel = l2;
- else if (op2 == BAM_CPAD && s->k + 2 < c->n_cigar) { // no working for adjacent padding
- int l3 = 0;
- for (k = s->k + 2; k < c->n_cigar; ++k) {
- op2 = _cop(cigar[k]); l2 = _cln(cigar[k]);
- if (op2 == BAM_CINS) l3 += l2;
- else if (op2 == BAM_CDEL || op2 == BAM_CMATCH || op2 == BAM_CREF_SKIP || op2 == BAM_CEQUAL || op2 == BAM_CDIFF) break;
- }
- if (l3 > 0) p->indel = l3;
- }
- }
- if (op == BAM_CMATCH || op == BAM_CEQUAL || op == BAM_CDIFF) {
- p->qpos = s->y + (pos - s->x);
- } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) {
- p->is_del = 1; p->qpos = s->y; // FIXME: distinguish D and N!!!!!
- p->is_refskip = (op == BAM_CREF_SKIP);
- } // cannot be other operations; otherwise a bug
- p->is_head = (pos == c->pos); p->is_tail = (pos == s->end);
- }
- return 1;
-}
-
-/* --- END: Auxiliary functions */
-
-/*******************
- * pileup iterator *
- *******************/
-
-struct __bam_plp_t {
- mempool_t *mp;
- lbnode_t *head, *tail, *dummy;
- int32_t tid, pos, max_tid, max_pos;
- int is_eof, flag_mask, max_plp, error, maxcnt;
- bam_pileup1_t *plp;
- // for the "auto" interface only
- bam1_t *b;
- bam_plp_auto_f func;
- void *data;
-};
-
-bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data)
-{
- bam_plp_t iter;
- iter = calloc(1, sizeof(struct __bam_plp_t));
- iter->mp = mp_init();
- iter->head = iter->tail = mp_alloc(iter->mp);
- iter->dummy = mp_alloc(iter->mp);
- iter->max_tid = iter->max_pos = -1;
- iter->flag_mask = BAM_DEF_MASK;
- iter->maxcnt = 8000;
- if (func) {
- iter->func = func;
- iter->data = data;
- iter->b = bam_init1();
- }
- return iter;
-}
-
-void bam_plp_destroy(bam_plp_t iter)
-{
- mp_free(iter->mp, iter->dummy);
- mp_free(iter->mp, iter->head);
- if (iter->mp->cnt != 0)
- fprintf(stderr, "[bam_plp_destroy] memory leak: %d. Continue anyway.\n", iter->mp->cnt);
- mp_destroy(iter->mp);
- if (iter->b) bam_destroy1(iter->b);
- free(iter->plp);
- free(iter);
-}
-
-const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)
-{
- if (iter->error) { *_n_plp = -1; return 0; }
- *_n_plp = 0;
- if (iter->is_eof && iter->head->next == 0) return 0;
- while (iter->is_eof || iter->max_tid > iter->tid || (iter->max_tid == iter->tid && iter->max_pos > iter->pos)) {
- int n_plp = 0;
- lbnode_t *p, *q;
- // write iter->plp at iter->pos
- iter->dummy->next = iter->head;
- for (p = iter->head, q = iter->dummy; p->next; q = p, p = p->next) {
- if (p->b.core.tid < iter->tid || (p->b.core.tid == iter->tid && p->end <= iter->pos)) { // then remove
- q->next = p->next; mp_free(iter->mp, p); p = q;
- } else if (p->b.core.tid == iter->tid && p->beg <= iter->pos) { // here: p->end > pos; then add to pileup
- if (n_plp == iter->max_plp) { // then double the capacity
- iter->max_plp = iter->max_plp? iter->max_plp<<1 : 256;
- iter->plp = (bam_pileup1_t*)realloc(iter->plp, sizeof(bam_pileup1_t) * iter->max_plp);
- }
- iter->plp[n_plp].b = &p->b;
- if (resolve_cigar2(iter->plp + n_plp, iter->pos, &p->s)) ++n_plp; // actually always true...
- }
- }
- iter->head = iter->dummy->next; // dummy->next may be changed
- *_n_plp = n_plp; *_tid = iter->tid; *_pos = iter->pos;
- // update iter->tid and iter->pos
- if (iter->head->next) {
- if (iter->tid > iter->head->b.core.tid) {
- fprintf(stderr, "[%s] unsorted input. Pileup aborts.\n", __func__);
- iter->error = 1;
- *_n_plp = -1;
- return 0;
- }
- }
- if (iter->tid < iter->head->b.core.tid) { // come to a new reference sequence
- iter->tid = iter->head->b.core.tid; iter->pos = iter->head->beg; // jump to the next reference
- } else if (iter->pos < iter->head->beg) { // here: tid == head->b.core.tid
- iter->pos = iter->head->beg; // jump to the next position
- } else ++iter->pos; // scan contiguously
- // return
- if (n_plp) return iter->plp;
- if (iter->is_eof && iter->head->next == 0) break;
- }
- return 0;
-}
-
-int bam_plp_push(bam_plp_t iter, const bam1_t *b)
-{
- if (iter->error) return -1;
- if (b) {
- if (b->core.tid < 0) return 0;
- if (b->core.flag & iter->flag_mask) return 0;
- if (iter->tid == b->core.tid && iter->pos == b->core.pos && iter->mp->cnt > iter->maxcnt) return 0;
- bam_copy1(&iter->tail->b, b);
- iter->tail->beg = b->core.pos; iter->tail->end = bam_calend(&b->core, bam1_cigar(b));
- iter->tail->s = g_cstate_null; iter->tail->s.end = iter->tail->end - 1; // initialize cstate_t
- if (b->core.tid < iter->max_tid) {
- fprintf(stderr, "[bam_pileup_core] the input is not sorted (chromosomes out of order)\n");
- iter->error = 1;
- return -1;
- }
- if ((b->core.tid == iter->max_tid) && (iter->tail->beg < iter->max_pos)) {
- fprintf(stderr, "[bam_pileup_core] the input is not sorted (reads out of order)\n");
- iter->error = 1;
- return -1;
- }
- iter->max_tid = b->core.tid; iter->max_pos = iter->tail->beg;
- if (iter->tail->end > iter->pos || iter->tail->b.core.tid > iter->tid) {
- iter->tail->next = mp_alloc(iter->mp);
- iter->tail = iter->tail->next;
- }
- } else iter->is_eof = 1;
- return 0;
-}
-
-const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)
-{
- const bam_pileup1_t *plp;
- if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; }
- if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
- else { // no pileup line can be obtained; read alignments
- *_n_plp = 0;
- if (iter->is_eof) return 0;
- while (iter->func(iter->data, iter->b) >= 0) {
- if (bam_plp_push(iter, iter->b) < 0) {
- *_n_plp = -1;
- return 0;
- }
- if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
- // otherwise no pileup line can be returned; read the next alignment.
- }
- bam_plp_push(iter, 0);
- if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;
- return 0;
- }
-}
-
-void bam_plp_reset(bam_plp_t iter)
-{
- lbnode_t *p, *q;
- iter->max_tid = iter->max_pos = -1;
- iter->tid = iter->pos = 0;
- iter->is_eof = 0;
- for (p = iter->head; p->next;) {
- q = p->next;
- mp_free(iter->mp, p);
- p = q;
- }
- iter->head = iter->tail;
-}
-
-void bam_plp_set_mask(bam_plp_t iter, int mask)
-{
- iter->flag_mask = mask < 0? BAM_DEF_MASK : (BAM_FUNMAP | mask);
-}
-
-void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt)
-{
- iter->maxcnt = maxcnt;
-}
-
-/*****************
- * callback APIs *
- *****************/
-
-int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data)
-{
- bam_plbuf_t *buf;
- int ret;
- bam1_t *b;
- b = bam_init1();
- buf = bam_plbuf_init(func, func_data);
- bam_plbuf_set_mask(buf, mask);
- while ((ret = bam_read1(fp, b)) >= 0)
- bam_plbuf_push(b, buf);
- bam_plbuf_push(0, buf);
- bam_plbuf_destroy(buf);
- bam_destroy1(b);
- return 0;
-}
-
-void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask)
-{
- bam_plp_set_mask(buf->iter, mask);
-}
-
-void bam_plbuf_reset(bam_plbuf_t *buf)
-{
- bam_plp_reset(buf->iter);
-}
-
-bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data)
-{
- bam_plbuf_t *buf;
- buf = calloc(1, sizeof(bam_plbuf_t));
- buf->iter = bam_plp_init(0, 0);
- buf->func = func;
- buf->data = data;
- return buf;
-}
-
-void bam_plbuf_destroy(bam_plbuf_t *buf)
-{
- bam_plp_destroy(buf->iter);
- free(buf);
-}
-
-int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf)
-{
- int ret, n_plp, tid, pos;
- const bam_pileup1_t *plp;
- ret = bam_plp_push(buf->iter, b);
- if (ret < 0) return ret;
- while ((plp = bam_plp_next(buf->iter, &tid, &pos, &n_plp)) != 0)
- buf->func(tid, pos, n_plp, plp, buf->data);
- return 0;
-}
-
-/***********
- * mpileup *
- ***********/
-
-struct __bam_mplp_t {
- int n;
- uint64_t min, *pos;
- bam_plp_t *iter;
- int *n_plp;
- const bam_pileup1_t **plp;
-};
-
-bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data)
-{
- int i;
- bam_mplp_t iter;
- iter = calloc(1, sizeof(struct __bam_mplp_t));
- iter->pos = calloc(n, 8);
- iter->n_plp = calloc(n, sizeof(int));
- iter->plp = calloc(n, sizeof(void*));
- iter->iter = calloc(n, sizeof(void*));
- iter->n = n;
- iter->min = (uint64_t)-1;
- for (i = 0; i < n; ++i) {
- iter->iter[i] = bam_plp_init(func, data[i]);
- iter->pos[i] = iter->min;
- }
- return iter;
-}
-
-void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt)
-{
- int i;
- for (i = 0; i < iter->n; ++i)
- iter->iter[i]->maxcnt = maxcnt;
-}
-
-void bam_mplp_destroy(bam_mplp_t iter)
-{
- int i;
- for (i = 0; i < iter->n; ++i) bam_plp_destroy(iter->iter[i]);
- free(iter->iter); free(iter->pos); free(iter->n_plp); free(iter->plp);
- free(iter);
-}
-
-int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp)
-{
- int i, ret = 0;
- uint64_t new_min = (uint64_t)-1;
- for (i = 0; i < iter->n; ++i) {
- if (iter->pos[i] == iter->min) {
- int tid, pos;
- iter->plp[i] = bam_plp_auto(iter->iter[i], &tid, &pos, &iter->n_plp[i]);
- iter->pos[i] = (uint64_t)tid<<32 | pos;
- }
- if (iter->plp[i] && iter->pos[i] < new_min) new_min = iter->pos[i];
- }
- iter->min = new_min;
- if (new_min == (uint64_t)-1) return 0;
- *_tid = new_min>>32; *_pos = (uint32_t)new_min;
- for (i = 0; i < iter->n; ++i) {
- if (iter->pos[i] == iter->min) { // FIXME: valgrind reports "uninitialised value(s) at this line"
- n_plp[i] = iter->n_plp[i], plp[i] = iter->plp[i];
- ++ret;
- } else n_plp[i] = 0, plp[i] = 0;
- }
- return ret;
-}
diff --git a/src/samtools-0.1.18/bam_plcmd.c b/src/samtools-0.1.18/bam_plcmd.c
deleted file mode 100644
index cbf6ae8..0000000
--- a/src/samtools-0.1.18/bam_plcmd.c
+++ /dev/null
@@ -1,546 +0,0 @@
-#include <math.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <ctype.h>
-#include <string.h>
-#include <errno.h>
-#include "sam.h"
-#include "faidx.h"
-#include "kstring.h"
-
-static inline int printw(int c, FILE *fp)
-{
- char buf[16];
- int l, x;
- if (c == 0) return fputc('0', fp);
- for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
- if (c < 0) buf[l++] = '-';
- buf[l] = 0;
- for (x = 0; x < l/2; ++x) {
- int y = buf[x]; buf[x] = buf[l-1-x]; buf[l-1-x] = y;
- }
- fputs(buf, fp);
- return 0;
-}
-
-static inline void pileup_seq(const bam_pileup1_t *p, int pos, int ref_len, const char *ref)
-{
- int j;
- if (p->is_head) {
- putchar('^');
- putchar(p->b->core.qual > 93? 126 : p->b->core.qual + 33);
- }
- if (!p->is_del) {
- int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
- if (ref) {
- int rb = pos < ref_len? ref[pos] : 'N';
- if (c == '=' || bam_nt16_table[c] == bam_nt16_table[rb]) c = bam1_strand(p->b)? ',' : '.';
- else c = bam1_strand(p->b)? tolower(c) : toupper(c);
- } else {
- if (c == '=') c = bam1_strand(p->b)? ',' : '.';
- else c = bam1_strand(p->b)? tolower(c) : toupper(c);
- }
- putchar(c);
- } else putchar(p->is_refskip? (bam1_strand(p->b)? '<' : '>') : '*');
- if (p->indel > 0) {
- putchar('+'); printw(p->indel, stdout);
- for (j = 1; j <= p->indel; ++j) {
- int c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];
- putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
- }
- } else if (p->indel < 0) {
- printw(p->indel, stdout);
- for (j = 1; j <= -p->indel; ++j) {
- int c = (ref && (int)pos+j < ref_len)? ref[pos+j] : 'N';
- putchar(bam1_strand(p->b)? tolower(c) : toupper(c));
- }
- }
- if (p->is_tail) putchar('$');
-}
-
-#include <assert.h>
-#include "bam2bcf.h"
-#include "sample.h"
-
-#define MPLP_GLF 0x10
-#define MPLP_NO_COMP 0x20
-#define MPLP_NO_ORPHAN 0x40
-#define MPLP_REALN 0x80
-#define MPLP_FMT_DP 0x100
-#define MPLP_FMT_SP 0x200
-#define MPLP_NO_INDEL 0x400
-#define MPLP_EXT_BAQ 0x800
-#define MPLP_ILLUMINA13 0x1000
-#define MPLP_IGNORE_RG 0x2000
-#define MPLP_PRINT_POS 0x4000
-#define MPLP_PRINT_MAPQ 0x8000
-
-void *bed_read(const char *fn);
-void bed_destroy(void *_h);
-int bed_overlap(const void *_h, const char *chr, int beg, int end);
-
-typedef struct {
- int max_mq, min_mq, flag, min_baseQ, capQ_thres, max_depth, max_indel_depth;
- int openQ, extQ, tandemQ, min_support; // for indels
- double min_frac; // for indels
- char *reg, *pl_list;
- faidx_t *fai;
- void *bed, *rghash;
-} mplp_conf_t;
-
-typedef struct {
- bamFile fp;
- bam_iter_t iter;
- bam_header_t *h;
- int ref_id;
- char *ref;
- const mplp_conf_t *conf;
-} mplp_aux_t;
-
-typedef struct {
- int n;
- int *n_plp, *m_plp;
- bam_pileup1_t **plp;
-} mplp_pileup_t;
-
-static int mplp_func(void *data, bam1_t *b)
-{
- extern int bam_realn(bam1_t *b, const char *ref);
- extern int bam_prob_realn_core(bam1_t *b, const char *ref, int);
- extern int bam_cap_mapQ(bam1_t *b, char *ref, int thres);
- mplp_aux_t *ma = (mplp_aux_t*)data;
- int ret, skip = 0;
- do {
- int has_ref;
- ret = ma->iter? bam_iter_read(ma->fp, ma->iter, b) : bam_read1(ma->fp, b);
- if (ret < 0) break;
- if (b->core.tid < 0 || (b->core.flag&BAM_FUNMAP)) { // exclude unmapped reads
- skip = 1;
- continue;
- }
- if (ma->conf->bed) { // test overlap
- skip = !bed_overlap(ma->conf->bed, ma->h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b)));
- if (skip) continue;
- }
- if (ma->conf->rghash) { // exclude read groups
- uint8_t *rg = bam_aux_get(b, "RG");
- skip = (rg && bcf_str2id(ma->conf->rghash, (const char*)(rg+1)) >= 0);
- if (skip) continue;
- }
- if (ma->conf->flag & MPLP_ILLUMINA13) {
- int i;
- uint8_t *qual = bam1_qual(b);
- for (i = 0; i < b->core.l_qseq; ++i)
- qual[i] = qual[i] > 31? qual[i] - 31 : 0;
- }
- has_ref = (ma->ref && ma->ref_id == b->core.tid)? 1 : 0;
- skip = 0;
- if (has_ref && (ma->conf->flag&MPLP_REALN)) bam_prob_realn_core(b, ma->ref, (ma->conf->flag & MPLP_EXT_BAQ)? 3 : 1);
- if (has_ref && ma->conf->capQ_thres > 10) {
- int q = bam_cap_mapQ(b, ma->ref, ma->conf->capQ_thres);
- if (q < 0) skip = 1;
- else if (b->core.qual > q) b->core.qual = q;
- }
- else if (b->core.qual < ma->conf->min_mq) skip = 1;
- else if ((ma->conf->flag&MPLP_NO_ORPHAN) && (b->core.flag&1) && !(b->core.flag&2)) skip = 1;
- } while (skip);
- return ret;
-}
-
-static void group_smpl(mplp_pileup_t *m, bam_sample_t *sm, kstring_t *buf,
- int n, char *const*fn, int *n_plp, const bam_pileup1_t **plp, int ignore_rg)
-{
- int i, j;
- memset(m->n_plp, 0, m->n * sizeof(int));
- for (i = 0; i < n; ++i) {
- for (j = 0; j < n_plp[i]; ++j) {
- const bam_pileup1_t *p = plp[i] + j;
- uint8_t *q;
- int id = -1;
- q = ignore_rg? 0 : bam_aux_get(p->b, "RG");
- if (q) id = bam_smpl_rg2smid(sm, fn[i], (char*)q+1, buf);
- if (id < 0) id = bam_smpl_rg2smid(sm, fn[i], 0, buf);
- if (id < 0 || id >= m->n) {
- assert(q); // otherwise a bug
- fprintf(stderr, "[%s] Read group %s used in file %s but absent from the header or an alignment missing read group.\n", __func__, (char*)q+1, fn[i]);
- exit(1);
- }
- if (m->n_plp[id] == m->m_plp[id]) {
- m->m_plp[id] = m->m_plp[id]? m->m_plp[id]<<1 : 8;
- m->plp[id] = realloc(m->plp[id], sizeof(bam_pileup1_t) * m->m_plp[id]);
- }
- m->plp[id][m->n_plp[id]++] = *p;
- }
- }
-}
-
-static int mpileup(mplp_conf_t *conf, int n, char **fn)
-{
- extern void *bcf_call_add_rg(void *rghash, const char *hdtext, const char *list);
- extern void bcf_call_del_rghash(void *rghash);
- mplp_aux_t **data;
- int i, tid, pos, *n_plp, tid0 = -1, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid = -1, max_depth, max_indel_depth;
- const bam_pileup1_t **plp;
- bam_mplp_t iter;
- bam_header_t *h = 0;
- char *ref;
- void *rghash = 0;
-
- bcf_callaux_t *bca = 0;
- bcf_callret1_t *bcr = 0;
- bcf_call_t bc;
- bcf_t *bp = 0;
- bcf_hdr_t *bh = 0;
-
- bam_sample_t *sm = 0;
- kstring_t buf;
- mplp_pileup_t gplp;
-
- memset(&gplp, 0, sizeof(mplp_pileup_t));
- memset(&buf, 0, sizeof(kstring_t));
- memset(&bc, 0, sizeof(bcf_call_t));
- data = calloc(n, sizeof(void*));
- plp = calloc(n, sizeof(void*));
- n_plp = calloc(n, sizeof(int*));
- sm = bam_smpl_init();
-
- // read the header and initialize data
- for (i = 0; i < n; ++i) {
- bam_header_t *h_tmp;
- data[i] = calloc(1, sizeof(mplp_aux_t));
- data[i]->fp = strcmp(fn[i], "-") == 0? bam_dopen(fileno(stdin), "r") : bam_open(fn[i], "r");
- data[i]->conf = conf;
- h_tmp = bam_header_read(data[i]->fp);
- data[i]->h = i? h : h_tmp; // for i==0, "h" has not been set yet
- bam_smpl_add(sm, fn[i], (conf->flag&MPLP_IGNORE_RG)? 0 : h_tmp->text);
- rghash = bcf_call_add_rg(rghash, h_tmp->text, conf->pl_list);
- if (conf->reg) {
- int beg, end;
- bam_index_t *idx;
- idx = bam_index_load(fn[i]);
- if (idx == 0) {
- fprintf(stderr, "[%s] fail to load index for %d-th input.\n", __func__, i+1);
- exit(1);
- }
- if (bam_parse_region(h_tmp, conf->reg, &tid, &beg, &end) < 0) {
- fprintf(stderr, "[%s] malformatted region or wrong seqname for %d-th input.\n", __func__, i+1);
- exit(1);
- }
- if (i == 0) tid0 = tid, beg0 = beg, end0 = end;
- data[i]->iter = bam_iter_query(idx, tid, beg, end);
- bam_index_destroy(idx);
- }
- if (i == 0) h = h_tmp;
- else {
- // FIXME: to check consistency
- bam_header_destroy(h_tmp);
- }
- }
- gplp.n = sm->n;
- gplp.n_plp = calloc(sm->n, sizeof(int));
- gplp.m_plp = calloc(sm->n, sizeof(int));
- gplp.plp = calloc(sm->n, sizeof(void*));
-
- fprintf(stderr, "[%s] %d samples in %d input files\n", __func__, sm->n, n);
- // write the VCF header
- if (conf->flag & MPLP_GLF) {
- kstring_t s;
- bh = calloc(1, sizeof(bcf_hdr_t));
- s.l = s.m = 0; s.s = 0;
- bp = bcf_open("-", (conf->flag&MPLP_NO_COMP)? "wu" : "w");
- for (i = 0; i < h->n_targets; ++i) {
- kputs(h->target_name[i], &s);
- kputc('\0', &s);
- }
- bh->l_nm = s.l;
- bh->name = malloc(s.l);
- memcpy(bh->name, s.s, s.l);
- s.l = 0;
- for (i = 0; i < sm->n; ++i) {
- kputs(sm->smpl[i], &s); kputc('\0', &s);
- }
- bh->l_smpl = s.l;
- bh->sname = malloc(s.l);
- memcpy(bh->sname, s.s, s.l);
- bh->txt = malloc(strlen(BAM_VERSION) + 64);
- bh->l_txt = 1 + sprintf(bh->txt, "##samtoolsVersion=%s\n", BAM_VERSION);
- free(s.s);
- bcf_hdr_sync(bh);
- bcf_hdr_write(bp, bh);
- bca = bcf_call_init(-1., conf->min_baseQ);
- bcr = calloc(sm->n, sizeof(bcf_callret1_t));
- bca->rghash = rghash;
- bca->openQ = conf->openQ, bca->extQ = conf->extQ, bca->tandemQ = conf->tandemQ;
- bca->min_frac = conf->min_frac;
- bca->min_support = conf->min_support;
- }
- if (tid0 >= 0 && conf->fai) { // region is set
- ref = faidx_fetch_seq(conf->fai, h->target_name[tid0], 0, 0x7fffffff, &ref_len);
- ref_tid = tid0;
- for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid0;
- } else ref_tid = -1, ref = 0;
- iter = bam_mplp_init(n, mplp_func, (void**)data);
- max_depth = conf->max_depth;
- if (max_depth * sm->n > 1<<20)
- fprintf(stderr, "(%s) Max depth is above 1M. Potential memory hog!\n", __func__);
- if (max_depth * sm->n < 8000) {
- max_depth = 8000 / sm->n;
- fprintf(stderr, "<%s> Set max per-file depth to %d\n", __func__, max_depth);
- }
- max_indel_depth = conf->max_indel_depth * sm->n;
- bam_mplp_set_maxcnt(iter, max_depth);
- while (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) {
- if (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested
- if (conf->bed && tid >= 0 && !bed_overlap(conf->bed, h->target_name[tid], pos, pos+1)) continue;
- if (tid != ref_tid) {
- free(ref); ref = 0;
- if (conf->fai) ref = faidx_fetch_seq(conf->fai, h->target_name[tid], 0, 0x7fffffff, &ref_len);
- for (i = 0; i < n; ++i) data[i]->ref = ref, data[i]->ref_id = tid;
- ref_tid = tid;
- }
- if (conf->flag & MPLP_GLF) {
- int total_depth, _ref0, ref16;
- bcf1_t *b = calloc(1, sizeof(bcf1_t));
- for (i = total_depth = 0; i < n; ++i) total_depth += n_plp[i];
- group_smpl(&gplp, sm, &buf, n, fn, n_plp, plp, conf->flag & MPLP_IGNORE_RG);
- _ref0 = (ref && pos < ref_len)? ref[pos] : 'N';
- ref16 = bam_nt16_table[_ref0];
- for (i = 0; i < gplp.n; ++i)
- bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], ref16, bca, bcr + i);
- bcf_call_combine(gplp.n, bcr, ref16, &bc);
- bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0,
- (conf->flag&MPLP_FMT_SP), 0, 0);
- bcf_write(bp, bh, b);
- bcf_destroy(b);
- // call indels
- if (!(conf->flag&MPLP_NO_INDEL) && total_depth < max_indel_depth && bcf_call_gap_prep(gplp.n, gplp.n_plp, gplp.plp, pos, bca, ref, rghash) >= 0) {
- for (i = 0; i < gplp.n; ++i)
- bcf_call_glfgen(gplp.n_plp[i], gplp.plp[i], -1, bca, bcr + i);
- if (bcf_call_combine(gplp.n, bcr, -1, &bc) >= 0) {
- b = calloc(1, sizeof(bcf1_t));
- bcf_call2bcf(tid, pos, &bc, b, (conf->flag&(MPLP_FMT_DP|MPLP_FMT_SP))? bcr : 0,
- (conf->flag&MPLP_FMT_SP), bca, ref);
- bcf_write(bp, bh, b);
- bcf_destroy(b);
- }
- }
- } else {
- printf("%s\t%d\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : 'N');
- for (i = 0; i < n; ++i) {
- int j;
- printf("\t%d\t", n_plp[i]);
- if (n_plp[i] == 0) {
- printf("*\t*"); // FIXME: printf() is very slow...
- if (conf->flag & MPLP_PRINT_POS) printf("\t*");
- } else {
- for (j = 0; j < n_plp[i]; ++j)
- pileup_seq(plp[i] + j, pos, ref_len, ref);
- putchar('\t');
- for (j = 0; j < n_plp[i]; ++j) {
- const bam_pileup1_t *p = plp[i] + j;
- int c = bam1_qual(p->b)[p->qpos] + 33;
- if (c > 126) c = 126;
- putchar(c);
- }
- if (conf->flag & MPLP_PRINT_MAPQ) {
- putchar('\t');
- for (j = 0; j < n_plp[i]; ++j) {
- int c = plp[i][j].b->core.qual + 33;
- if (c > 126) c = 126;
- putchar(c);
- }
- }
- if (conf->flag & MPLP_PRINT_POS) {
- putchar('\t');
- for (j = 0; j < n_plp[i]; ++j) {
- if (j > 0) putchar(',');
- printf("%d", plp[i][j].qpos + 1); // FIXME: printf() is very slow...
- }
- }
- }
- }
- putchar('\n');
- }
- }
-
- bcf_close(bp);
- bam_smpl_destroy(sm); free(buf.s);
- for (i = 0; i < gplp.n; ++i) free(gplp.plp[i]);
- free(gplp.plp); free(gplp.n_plp); free(gplp.m_plp);
- bcf_call_del_rghash(rghash);
- bcf_hdr_destroy(bh); bcf_call_destroy(bca); free(bc.PL); free(bcr);
- bam_mplp_destroy(iter);
- bam_header_destroy(h);
- for (i = 0; i < n; ++i) {
- bam_close(data[i]->fp);
- if (data[i]->iter) bam_iter_destroy(data[i]->iter);
- free(data[i]);
- }
- free(data); free(plp); free(ref); free(n_plp);
- return 0;
-}
-
-#define MAX_PATH_LEN 1024
-static int read_file_list(const char *file_list,int *n,char **argv[])
-{
- char buf[MAX_PATH_LEN];
- int len, nfiles;
- char **files;
-
- FILE *fh = fopen(file_list,"r");
- if ( !fh )
- {
- fprintf(stderr,"%s: %s\n", file_list,strerror(errno));
- return 1;
- }
-
- // Speed is not an issue here, determine the number of files by reading the file twice
- nfiles = 0;
- while ( fgets(buf,MAX_PATH_LEN,fh) ) nfiles++;
-
- if ( fseek(fh, 0L, SEEK_SET) )
- {
- fprintf(stderr,"%s: %s\n", file_list,strerror(errno));
- return 1;
- }
-
- files = calloc(nfiles,sizeof(char*));
- nfiles = 0;
- while ( fgets(buf,MAX_PATH_LEN,fh) )
- {
- len = strlen(buf);
- while ( len>0 && isspace(buf[len-1]) ) len--;
- if ( !len ) continue;
-
- files[nfiles] = malloc(sizeof(char)*(len+1));
- strncpy(files[nfiles],buf,len);
- files[nfiles][len] = 0;
- nfiles++;
- }
- fclose(fh);
- if ( !nfiles )
- {
- fprintf(stderr,"No files read from %s\n", file_list);
- return 1;
- }
- *argv = files;
- *n = nfiles;
- return 0;
-}
-#undef MAX_PATH_LEN
-
-int bam_mpileup(int argc, char *argv[])
-{
- int c;
- const char *file_list = NULL;
- char **fn = NULL;
- int nfiles = 0, use_orphan = 0;
- mplp_conf_t mplp;
- memset(&mplp, 0, sizeof(mplp_conf_t));
- #define MPLP_PRINT_POS 0x4000
- mplp.max_mq = 60;
- mplp.min_baseQ = 13;
- mplp.capQ_thres = 0;
- mplp.max_depth = 250; mplp.max_indel_depth = 250;
- mplp.openQ = 40; mplp.extQ = 20; mplp.tandemQ = 100;
- mplp.min_frac = 0.002; mplp.min_support = 1;
- mplp.flag = MPLP_NO_ORPHAN | MPLP_REALN;
- while ((c = getopt(argc, argv, "Agf:r:l:M:q:Q:uaRC:BDSd:L:b:P:o:e:h:Im:F:EG:6Os")) >= 0) {
- switch (c) {
- case 'f':
- mplp.fai = fai_load(optarg);
- if (mplp.fai == 0) return 1;
- break;
- case 'd': mplp.max_depth = atoi(optarg); break;
- case 'r': mplp.reg = strdup(optarg); break;
- case 'l': mplp.bed = bed_read(optarg); break;
- case 'P': mplp.pl_list = strdup(optarg); break;
- case 'g': mplp.flag |= MPLP_GLF; break;
- case 'u': mplp.flag |= MPLP_NO_COMP | MPLP_GLF; break;
- case 'a': mplp.flag |= MPLP_NO_ORPHAN | MPLP_REALN; break;
- case 'B': mplp.flag &= ~MPLP_REALN; break;
- case 'D': mplp.flag |= MPLP_FMT_DP; break;
- case 'S': mplp.flag |= MPLP_FMT_SP; break;
- case 'I': mplp.flag |= MPLP_NO_INDEL; break;
- case 'E': mplp.flag |= MPLP_EXT_BAQ; break;
- case '6': mplp.flag |= MPLP_ILLUMINA13; break;
- case 'R': mplp.flag |= MPLP_IGNORE_RG; break;
- case 's': mplp.flag |= MPLP_PRINT_MAPQ; break;
- case 'O': mplp.flag |= MPLP_PRINT_POS; break;
- case 'C': mplp.capQ_thres = atoi(optarg); break;
- case 'M': mplp.max_mq = atoi(optarg); break;
- case 'q': mplp.min_mq = atoi(optarg); break;
- case 'Q': mplp.min_baseQ = atoi(optarg); break;
- case 'b': file_list = optarg; break;
- case 'o': mplp.openQ = atoi(optarg); break;
- case 'e': mplp.extQ = atoi(optarg); break;
- case 'h': mplp.tandemQ = atoi(optarg); break;
- case 'A': use_orphan = 1; break;
- case 'F': mplp.min_frac = atof(optarg); break;
- case 'm': mplp.min_support = atoi(optarg); break;
- case 'L': mplp.max_indel_depth = atoi(optarg); break;
- case 'G': {
- FILE *fp_rg;
- char buf[1024];
- mplp.rghash = bcf_str2id_init();
- if ((fp_rg = fopen(optarg, "r")) == 0)
- fprintf(stderr, "(%s) Fail to open file %s. Continue anyway.\n", __func__, optarg);
- while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but forgive me...
- bcf_str2id_add(mplp.rghash, strdup(buf));
- fclose(fp_rg);
- }
- break;
- }
- }
- if (use_orphan) mplp.flag &= ~MPLP_NO_ORPHAN;
- if (argc == 1) {
- fprintf(stderr, "\n");
- fprintf(stderr, "Usage: samtools mpileup [options] in1.bam [in2.bam [...]]\n\n");
- fprintf(stderr, "Input options:\n\n");
- fprintf(stderr, " -6 assume the quality is in the Illumina-1.3+ encoding\n");
- fprintf(stderr, " -A count anomalous read pairs\n");
- fprintf(stderr, " -B disable BAQ computation\n");
- fprintf(stderr, " -b FILE list of input BAM files [null]\n");
- fprintf(stderr, " -C INT parameter for adjusting mapQ; 0 to disable [0]\n");
- fprintf(stderr, " -d INT max per-BAM depth to avoid excessive memory usage [%d]\n", mplp.max_depth);
- fprintf(stderr, " -E extended BAQ for higher sensitivity but lower specificity\n");
- fprintf(stderr, " -f FILE faidx indexed reference sequence file [null]\n");
- fprintf(stderr, " -G FILE exclude read groups listed in FILE [null]\n");
- fprintf(stderr, " -l FILE list of positions (chr pos) or regions (BED) [null]\n");
- fprintf(stderr, " -M INT cap mapping quality at INT [%d]\n", mplp.max_mq);
- fprintf(stderr, " -r STR region in which pileup is generated [null]\n");
- fprintf(stderr, " -R ignore RG tags\n");
- fprintf(stderr, " -q INT skip alignments with mapQ smaller than INT [%d]\n", mplp.min_mq);
- fprintf(stderr, " -Q INT skip bases with baseQ/BAQ smaller than INT [%d]\n", mplp.min_baseQ);
- fprintf(stderr, "\nOutput options:\n\n");
- fprintf(stderr, " -D output per-sample DP in BCF (require -g/-u)\n");
- fprintf(stderr, " -g generate BCF output (genotype likelihoods)\n");
- fprintf(stderr, " -O output base positions on reads (disabled by -g/-u)\n");
- fprintf(stderr, " -s output mapping quality (disabled by -g/-u)\n");
- fprintf(stderr, " -S output per-sample strand bias P-value in BCF (require -g/-u)\n");
- fprintf(stderr, " -u generate uncompress BCF output\n");
- fprintf(stderr, "\nSNP/INDEL genotype likelihoods options (effective with `-g' or `-u'):\n\n");
- fprintf(stderr, " -e INT Phred-scaled gap extension seq error probability [%d]\n", mplp.extQ);
- fprintf(stderr, " -F FLOAT minimum fraction of gapped reads for candidates [%g]\n", mplp.min_frac);
- fprintf(stderr, " -h INT coefficient for homopolymer errors [%d]\n", mplp.tandemQ);
- fprintf(stderr, " -I do not perform indel calling\n");
- fprintf(stderr, " -L INT max per-sample depth for INDEL calling [%d]\n", mplp.max_indel_depth);
- fprintf(stderr, " -m INT minimum gapped reads for indel candidates [%d]\n", mplp.min_support);
- fprintf(stderr, " -o INT Phred-scaled gap open sequencing error probability [%d]\n", mplp.openQ);
- fprintf(stderr, " -P STR comma separated list of platforms for indels [all]\n");
- fprintf(stderr, "\n");
- fprintf(stderr, "Notes: Assuming diploid individuals.\n\n");
- return 1;
- }
- if (file_list) {
- if ( read_file_list(file_list,&nfiles,&fn) ) return 1;
- mpileup(&mplp,nfiles,fn);
- for (c=0; c<nfiles; c++) free(fn[c]);
- free(fn);
- } else mpileup(&mplp, argc - optind, argv + optind);
- if (mplp.rghash) bcf_str2id_thorough_destroy(mplp.rghash);
- free(mplp.reg); free(mplp.pl_list);
- if (mplp.fai) fai_destroy(mplp.fai);
- if (mplp.bed) bed_destroy(mplp.bed);
- return 0;
-}
diff --git a/src/samtools-0.1.18/bam_reheader.c b/src/samtools-0.1.18/bam_reheader.c
deleted file mode 100644
index 0b52267..0000000
--- a/src/samtools-0.1.18/bam_reheader.c
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include "bgzf.h"
-#include "bam.h"
-
-#define BUF_SIZE 0x10000
-
-int bam_reheader(BGZF *in, const bam_header_t *h, int fd)
-{
- BGZF *fp;
- bam_header_t *old;
- int len;
- uint8_t *buf;
- if (in->open_mode != 'r') return -1;
- buf = malloc(BUF_SIZE);
- old = bam_header_read(in);
- fp = bgzf_fdopen(fd, "w");
- bam_header_write(fp, h);
- if (in->block_offset < in->block_length) {
- bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset);
- bgzf_flush(fp);
- }
-#ifdef _USE_KNETFILE
- while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0)
- fwrite(buf, 1, len, fp->x.fpw);
-#else
- while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0)
- fwrite(buf, 1, len, fp->file);
-#endif
- free(buf);
- fp->block_offset = in->block_offset = 0;
- bgzf_close(fp);
- return 0;
-}
-
-int main_reheader(int argc, char *argv[])
-{
- bam_header_t *h;
- BGZF *in;
- if (argc != 3) {
- fprintf(stderr, "Usage: samtools reheader <in.header.sam> <in.bam>\n");
- return 1;
- }
- { // read the header
- tamFile fph = sam_open(argv[1]);
- if (fph == 0) {
- fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[1]);
- return 1;
- }
- h = sam_header_read(fph);
- sam_close(fph);
- }
- in = strcmp(argv[2], "-")? bam_open(argv[2], "r") : bam_dopen(fileno(stdin), "r");
- if (in == 0) {
- fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[2]);
- return 1;
- }
- bam_reheader(in, h, fileno(stdout));
- bgzf_close(in);
- return 0;
-}
diff --git a/src/samtools-0.1.18/bam_rmdup.c b/src/samtools-0.1.18/bam_rmdup.c
deleted file mode 100644
index f0d2b5d..0000000
--- a/src/samtools-0.1.18/bam_rmdup.c
+++ /dev/null
@@ -1,206 +0,0 @@
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <zlib.h>
-#include <unistd.h>
-#include "sam.h"
-
-typedef bam1_t *bam1_p;
-
-#include "khash.h"
-KHASH_SET_INIT_STR(name)
-KHASH_MAP_INIT_INT64(pos, bam1_p)
-
-#define BUFFER_SIZE 0x40000
-
-typedef struct {
- uint64_t n_checked, n_removed;
- khash_t(pos) *best_hash;
-} lib_aux_t;
-KHASH_MAP_INIT_STR(lib, lib_aux_t)
-
-typedef struct {
- int n, max;
- bam1_t **a;
-} tmp_stack_t;
-
-static inline void stack_insert(tmp_stack_t *stack, bam1_t *b)
-{
- if (stack->n == stack->max) {
- stack->max = stack->max? stack->max<<1 : 0x10000;
- stack->a = (bam1_t**)realloc(stack->a, sizeof(bam1_t*) * stack->max);
- }
- stack->a[stack->n++] = b;
-}
-
-static inline void dump_best(tmp_stack_t *stack, samfile_t *out)
-{
- int i;
- for (i = 0; i != stack->n; ++i) {
- samwrite(out, stack->a[i]);
- bam_destroy1(stack->a[i]);
- }
- stack->n = 0;
-}
-
-static void clear_del_set(khash_t(name) *del_set)
-{
- khint_t k;
- for (k = kh_begin(del_set); k < kh_end(del_set); ++k)
- if (kh_exist(del_set, k))
- free((char*)kh_key(del_set, k));
- kh_clear(name, del_set);
-}
-
-static lib_aux_t *get_aux(khash_t(lib) *aux, const char *lib)
-{
- khint_t k = kh_get(lib, aux, lib);
- if (k == kh_end(aux)) {
- int ret;
- char *p = strdup(lib);
- lib_aux_t *q;
- k = kh_put(lib, aux, p, &ret);
- q = &kh_val(aux, k);
- q->n_checked = q->n_removed = 0;
- q->best_hash = kh_init(pos);
- return q;
- } else return &kh_val(aux, k);
-}
-
-static void clear_best(khash_t(lib) *aux, int max)
-{
- khint_t k;
- for (k = kh_begin(aux); k != kh_end(aux); ++k) {
- if (kh_exist(aux, k)) {
- lib_aux_t *q = &kh_val(aux, k);
- if (kh_size(q->best_hash) >= max)
- kh_clear(pos, q->best_hash);
- }
- }
-}
-
-static inline int sum_qual(const bam1_t *b)
-{
- int i, q;
- uint8_t *qual = bam1_qual(b);
- for (i = q = 0; i < b->core.l_qseq; ++i) q += qual[i];
- return q;
-}
-
-void bam_rmdup_core(samfile_t *in, samfile_t *out)
-{
- bam1_t *b;
- int last_tid = -1, last_pos = -1;
- tmp_stack_t stack;
- khint_t k;
- khash_t(lib) *aux;
- khash_t(name) *del_set;
-
- aux = kh_init(lib);
- del_set = kh_init(name);
- b = bam_init1();
- memset(&stack, 0, sizeof(tmp_stack_t));
-
- kh_resize(name, del_set, 4 * BUFFER_SIZE);
- while (samread(in, b) >= 0) {
- bam1_core_t *c = &b->core;
- if (c->tid != last_tid || last_pos != c->pos) {
- dump_best(&stack, out); // write the result
- clear_best(aux, BUFFER_SIZE);
- if (c->tid != last_tid) {
- clear_best(aux, 0);
- if (kh_size(del_set)) { // check
- fprintf(stderr, "[bam_rmdup_core] %llu unmatched pairs\n", (long long)kh_size(del_set));
- clear_del_set(del_set);
- }
- if ((int)c->tid == -1) { // append unmapped reads
- samwrite(out, b);
- while (samread(in, b) >= 0) samwrite(out, b);
- break;
- }
- last_tid = c->tid;
- fprintf(stderr, "[bam_rmdup_core] processing reference %s...\n", in->header->target_name[c->tid]);
- }
- }
- if (!(c->flag&BAM_FPAIRED) || (c->flag&(BAM_FUNMAP|BAM_FMUNMAP)) || (c->mtid >= 0 && c->tid != c->mtid)) {
- samwrite(out, b);
- } else if (c->isize > 0) { // paired, head
- uint64_t key = (uint64_t)c->pos<<32 | c->isize;
- const char *lib;
- lib_aux_t *q;
- int ret;
- lib = bam_get_library(in->header, b);
- q = lib? get_aux(aux, lib) : get_aux(aux, "\t");
- ++q->n_checked;
- k = kh_put(pos, q->best_hash, key, &ret);
- if (ret == 0) { // found in best_hash
- bam1_t *p = kh_val(q->best_hash, k);
- ++q->n_removed;
- if (sum_qual(p) < sum_qual(b)) { // the current alignment is better; this can be accelerated in principle
- kh_put(name, del_set, strdup(bam1_qname(p)), &ret); // p will be removed
- bam_copy1(p, b); // replaced as b
- } else kh_put(name, del_set, strdup(bam1_qname(b)), &ret); // b will be removed
- if (ret == 0)
- fprintf(stderr, "[bam_rmdup_core] inconsistent BAM file for pair '%s'. Continue anyway.\n", bam1_qname(b));
- } else { // not found in best_hash
- kh_val(q->best_hash, k) = bam_dup1(b);
- stack_insert(&stack, kh_val(q->best_hash, k));
- }
- } else { // paired, tail
- k = kh_get(name, del_set, bam1_qname(b));
- if (k != kh_end(del_set)) {
- free((char*)kh_key(del_set, k));
- kh_del(name, del_set, k);
- } else samwrite(out, b);
- }
- last_pos = c->pos;
- }
-
- for (k = kh_begin(aux); k != kh_end(aux); ++k) {
- if (kh_exist(aux, k)) {
- lib_aux_t *q = &kh_val(aux, k);
- dump_best(&stack, out);
- fprintf(stderr, "[bam_rmdup_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
- (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
- kh_destroy(pos, q->best_hash);
- free((char*)kh_key(aux, k));
- }
- }
- kh_destroy(lib, aux);
-
- clear_del_set(del_set);
- kh_destroy(name, del_set);
- free(stack.a);
- bam_destroy1(b);
-}
-
-void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se);
-
-int bam_rmdup(int argc, char *argv[])
-{
- int c, is_se = 0, force_se = 0;
- samfile_t *in, *out;
- while ((c = getopt(argc, argv, "sS")) >= 0) {
- switch (c) {
- case 's': is_se = 1; break;
- case 'S': force_se = is_se = 1; break;
- }
- }
- if (optind + 2 > argc) {
- fprintf(stderr, "\n");
- fprintf(stderr, "Usage: samtools rmdup [-sS] <input.srt.bam> <output.bam>\n\n");
- fprintf(stderr, "Option: -s rmdup for SE reads\n");
- fprintf(stderr, " -S treat PE reads as SE in rmdup (force -s)\n\n");
- return 1;
- }
- in = samopen(argv[optind], "rb", 0);
- out = samopen(argv[optind+1], "wb", in->header);
- if (in == 0 || out == 0) {
- fprintf(stderr, "[bam_rmdup] fail to read/write input files\n");
- return 1;
- }
- if (is_se) bam_rmdupse_core(in, out, force_se);
- else bam_rmdup_core(in, out);
- samclose(in); samclose(out);
- return 0;
-}
diff --git a/src/samtools-0.1.18/bam_rmdupse.c b/src/samtools-0.1.18/bam_rmdupse.c
deleted file mode 100644
index e7dbdc7..0000000
--- a/src/samtools-0.1.18/bam_rmdupse.c
+++ /dev/null
@@ -1,159 +0,0 @@
-#include <math.h>
-#include "sam.h"
-#include "khash.h"
-#include "klist.h"
-
-#define QUEUE_CLEAR_SIZE 0x100000
-#define MAX_POS 0x7fffffff
-
-typedef struct {
- int endpos;
- uint32_t score:31, discarded:1;
- bam1_t *b;
-} elem_t, *elem_p;
-#define __free_elem(p) bam_destroy1((p)->data.b)
-KLIST_INIT(q, elem_t, __free_elem)
-typedef klist_t(q) queue_t;
-
-KHASH_MAP_INIT_INT(best, elem_p)
-typedef khash_t(best) besthash_t;
-
-typedef struct {
- uint64_t n_checked, n_removed;
- besthash_t *left, *rght;
-} lib_aux_t;
-KHASH_MAP_INIT_STR(lib, lib_aux_t)
-
-static lib_aux_t *get_aux(khash_t(lib) *aux, const char *lib)
-{
- khint_t k = kh_get(lib, aux, lib);
- if (k == kh_end(aux)) {
- int ret;
- char *p = strdup(lib);
- lib_aux_t *q;
- k = kh_put(lib, aux, p, &ret);
- q = &kh_val(aux, k);
- q->left = kh_init(best);
- q->rght = kh_init(best);
- q->n_checked = q->n_removed = 0;
- return q;
- } else return &kh_val(aux, k);
-}
-
-static inline int sum_qual(const bam1_t *b)
-{
- int i, q;
- uint8_t *qual = bam1_qual(b);
- for (i = q = 0; i < b->core.l_qseq; ++i) q += qual[i];
- return q;
-}
-
-static inline elem_t *push_queue(queue_t *queue, const bam1_t *b, int endpos, int score)
-{
- elem_t *p = kl_pushp(q, queue);
- p->discarded = 0;
- p->endpos = endpos; p->score = score;
- if (p->b == 0) p->b = bam_init1();
- bam_copy1(p->b, b);
- return p;
-}
-
-static void clear_besthash(besthash_t *h, int32_t pos)
-{
- khint_t k;
- for (k = kh_begin(h); k != kh_end(h); ++k)
- if (kh_exist(h, k) && kh_val(h, k)->endpos <= pos)
- kh_del(best, h, k);
-}
-
-static void dump_alignment(samfile_t *out, queue_t *queue, int32_t pos, khash_t(lib) *h)
-{
- if (queue->size > QUEUE_CLEAR_SIZE || pos == MAX_POS) {
- khint_t k;
- while (1) {
- elem_t *q;
- if (queue->head == queue->tail) break;
- q = &kl_val(queue->head);
- if (q->discarded) {
- q->b->data_len = 0;
- kl_shift(q, queue, 0);
- continue;
- }
- if ((q->b->core.flag&BAM_FREVERSE) && q->endpos > pos) break;
- samwrite(out, q->b);
- q->b->data_len = 0;
- kl_shift(q, queue, 0);
- }
- for (k = kh_begin(h); k != kh_end(h); ++k) {
- if (kh_exist(h, k)) {
- clear_besthash(kh_val(h, k).left, pos);
- clear_besthash(kh_val(h, k).rght, pos);
- }
- }
- }
-}
-
-void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se)
-{
- bam1_t *b;
- queue_t *queue;
- khint_t k;
- int last_tid = -2;
- khash_t(lib) *aux;
-
- aux = kh_init(lib);
- b = bam_init1();
- queue = kl_init(q);
- while (samread(in, b) >= 0) {
- bam1_core_t *c = &b->core;
- int endpos = bam_calend(c, bam1_cigar(b));
- int score = sum_qual(b);
-
- if (last_tid != c->tid) {
- if (last_tid >= 0) dump_alignment(out, queue, MAX_POS, aux);
- last_tid = c->tid;
- } else dump_alignment(out, queue, c->pos, aux);
- if ((c->flag&BAM_FUNMAP) || ((c->flag&BAM_FPAIRED) && !force_se)) {
- push_queue(queue, b, endpos, score);
- } else {
- const char *lib;
- lib_aux_t *q;
- besthash_t *h;
- uint32_t key;
- int ret;
- lib = bam_get_library(in->header, b);
- q = lib? get_aux(aux, lib) : get_aux(aux, "\t");
- ++q->n_checked;
- h = (c->flag&BAM_FREVERSE)? q->rght : q->left;
- key = (c->flag&BAM_FREVERSE)? endpos : c->pos;
- k = kh_put(best, h, key, &ret);
- if (ret == 0) { // in the hash table
- elem_t *p = kh_val(h, k);
- ++q->n_removed;
- if (p->score < score) {
- if (c->flag&BAM_FREVERSE) { // mark "discarded" and push the queue
- p->discarded = 1;
- kh_val(h, k) = push_queue(queue, b, endpos, score);
- } else { // replace
- p->score = score; p->endpos = endpos;
- bam_copy1(p->b, b);
- }
- } // otherwise, discard the alignment
- } else kh_val(h, k) = push_queue(queue, b, endpos, score);
- }
- }
- dump_alignment(out, queue, MAX_POS, aux);
-
- for (k = kh_begin(aux); k != kh_end(aux); ++k) {
- if (kh_exist(aux, k)) {
- lib_aux_t *q = &kh_val(aux, k);
- fprintf(stderr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed,
- (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k));
- kh_destroy(best, q->left); kh_destroy(best, q->rght);
- free((char*)kh_key(aux, k));
- }
- }
- kh_destroy(lib, aux);
- bam_destroy1(b);
- kl_destroy(q, queue);
-}
diff --git a/src/samtools-0.1.18/bam_sort.c b/src/samtools-0.1.18/bam_sort.c
deleted file mode 100644
index abf8d4f..0000000
--- a/src/samtools-0.1.18/bam_sort.c
+++ /dev/null
@@ -1,438 +0,0 @@
-#include <stdlib.h>
-#include <ctype.h>
-#include <assert.h>
-#include <errno.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include "bam.h"
-#include "ksort.h"
-
-static int g_is_by_qname = 0;
-
-static inline int strnum_cmp(const char *a, const char *b)
-{
- char *pa, *pb;
- pa = (char*)a; pb = (char*)b;
- while (*pa && *pb) {
- if (isdigit(*pa) && isdigit(*pb)) {
- long ai, bi;
- ai = strtol(pa, &pa, 10);
- bi = strtol(pb, &pb, 10);
- if (ai != bi) return ai<bi? -1 : ai>bi? 1 : 0;
- } else {
- if (*pa != *pb) break;
- ++pa; ++pb;
- }
- }
- if (*pa == *pb)
- return (pa-a) < (pb-b)? -1 : (pa-a) > (pb-b)? 1 : 0;
- return *pa<*pb? -1 : *pa>*pb? 1 : 0;
-}
-
-#define HEAP_EMPTY 0xffffffffffffffffull
-
-typedef struct {
- int i;
- uint64_t pos, idx;
- bam1_t *b;
-} heap1_t;
-
-#define __pos_cmp(a, b) ((a).pos > (b).pos || ((a).pos == (b).pos && ((a).i > (b).i || ((a).i == (b).i && (a).idx > (b).idx))))
-
-static inline int heap_lt(const heap1_t a, const heap1_t b)
-{
- if (g_is_by_qname) {
- int t;
- if (a.b == 0 || b.b == 0) return a.b == 0? 1 : 0;
- t = strnum_cmp(bam1_qname(a.b), bam1_qname(b.b));
- return (t > 0 || (t == 0 && __pos_cmp(a, b)));
- } else return __pos_cmp(a, b);
-}
-
-KSORT_INIT(heap, heap1_t, heap_lt)
-
-static void swap_header_targets(bam_header_t *h1, bam_header_t *h2)
-{
- bam_header_t t;
- t.n_targets = h1->n_targets, h1->n_targets = h2->n_targets, h2->n_targets = t.n_targets;
- t.target_name = h1->target_name, h1->target_name = h2->target_name, h2->target_name = t.target_name;
- t.target_len = h1->target_len, h1->target_len = h2->target_len, h2->target_len = t.target_len;
-}
-
-static void swap_header_text(bam_header_t *h1, bam_header_t *h2)
-{
- int tempi;
- char *temps;
- tempi = h1->l_text, h1->l_text = h2->l_text, h2->l_text = tempi;
- temps = h1->text, h1->text = h2->text, h2->text = temps;
-}
-
-#define MERGE_RG 1
-#define MERGE_UNCOMP 2
-#define MERGE_LEVEL1 4
-#define MERGE_FORCE 8
-
-/*!
- @abstract Merge multiple sorted BAM.
- @param is_by_qname whether to sort by query name
- @param out output BAM file name
- @param headers name of SAM file from which to copy '@' header lines,
- or NULL to copy them from the first file to be merged
- @param n number of files to be merged
- @param fn names of files to be merged
-
- @discussion Padding information may NOT correctly maintained. This
- function is NOT thread safe.
- */
-int bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn,
- int flag, const char *reg)
-{
- bamFile fpout, *fp;
- heap1_t *heap;
- bam_header_t *hout = 0;
- bam_header_t *hheaders = NULL;
- int i, j, *RG_len = 0;
- uint64_t idx = 0;
- char **RG = 0;
- bam_iter_t *iter = 0;
-
- if (headers) {
- tamFile fpheaders = sam_open(headers);
- if (fpheaders == 0) {
- const char *message = strerror(errno);
- fprintf(stderr, "[bam_merge_core] cannot open '%s': %s\n", headers, message);
- return -1;
- }
- hheaders = sam_header_read(fpheaders);
- sam_close(fpheaders);
- }
-
- g_is_by_qname = by_qname;
- fp = (bamFile*)calloc(n, sizeof(bamFile));
- heap = (heap1_t*)calloc(n, sizeof(heap1_t));
- iter = (bam_iter_t*)calloc(n, sizeof(bam_iter_t));
- // prepare RG tag
- if (flag & MERGE_RG) {
- RG = (char**)calloc(n, sizeof(void*));
- RG_len = (int*)calloc(n, sizeof(int));
- for (i = 0; i != n; ++i) {
- int l = strlen(fn[i]);
- const char *s = fn[i];
- if (l > 4 && strcmp(s + l - 4, ".bam") == 0) l -= 4;
- for (j = l - 1; j >= 0; --j) if (s[j] == '/') break;
- ++j; l -= j;
- RG[i] = calloc(l + 1, 1);
- RG_len[i] = l;
- strncpy(RG[i], s + j, l);
- }
- }
- // read the first
- for (i = 0; i != n; ++i) {
- bam_header_t *hin;
- fp[i] = bam_open(fn[i], "r");
- if (fp[i] == 0) {
- int j;
- fprintf(stderr, "[bam_merge_core] fail to open file %s\n", fn[i]);
- for (j = 0; j < i; ++j) bam_close(fp[j]);
- free(fp); free(heap);
- // FIXME: possible memory leak
- return -1;
- }
- hin = bam_header_read(fp[i]);
- if (i == 0) { // the first BAM
- hout = hin;
- } else { // validate multiple baf
- int min_n_targets = hout->n_targets;
- if (hin->n_targets < min_n_targets) min_n_targets = hin->n_targets;
-
- for (j = 0; j < min_n_targets; ++j)
- if (strcmp(hout->target_name[j], hin->target_name[j]) != 0) {
- fprintf(stderr, "[bam_merge_core] different target sequence name: '%s' != '%s' in file '%s'\n",
- hout->target_name[j], hin->target_name[j], fn[i]);
- return -1;
- }
-
- // If this input file has additional target reference sequences,
- // add them to the headers to be output
- if (hin->n_targets > hout->n_targets) {
- swap_header_targets(hout, hin);
- // FIXME Possibly we should also create @SQ text headers
- // for the newly added reference sequences
- }
-
- bam_header_destroy(hin);
- }
- }
-
- if (hheaders) {
- // If the text headers to be swapped in include any @SQ headers,
- // check that they are consistent with the existing binary list
- // of reference information.
- if (hheaders->n_targets > 0) {
- if (hout->n_targets != hheaders->n_targets) {
- fprintf(stderr, "[bam_merge_core] number of @SQ headers in '%s' differs from number of target sequences\n", headers);
- if (!reg) return -1;
- }
- for (j = 0; j < hout->n_targets; ++j)
- if (strcmp(hout->target_name[j], hheaders->target_name[j]) != 0) {
- fprintf(stderr, "[bam_merge_core] @SQ header '%s' in '%s' differs from target sequence\n", hheaders->target_name[j], headers);
- if (!reg) return -1;
- }
- }
-
- swap_header_text(hout, hheaders);
- bam_header_destroy(hheaders);
- }
-
- if (reg) {
- int tid, beg, end;
- if (bam_parse_region(hout, reg, &tid, &beg, &end) < 0) {
- fprintf(stderr, "[%s] Malformated region string or undefined reference name\n", __func__);
- return -1;
- }
- for (i = 0; i < n; ++i) {
- bam_index_t *idx;
- idx = bam_index_load(fn[i]);
- iter[i] = bam_iter_query(idx, tid, beg, end);
- bam_index_destroy(idx);
- }
- }
-
- for (i = 0; i < n; ++i) {
- heap1_t *h = heap + i;
- h->i = i;
- h->b = (bam1_t*)calloc(1, sizeof(bam1_t));
- if (bam_iter_read(fp[i], iter[i], h->b) >= 0) {
- h->pos = ((uint64_t)h->b->core.tid<<32) | (uint32_t)((int32_t)h->b->core.pos+1)<<1 | bam1_strand(h->b);
- h->idx = idx++;
- }
- else h->pos = HEAP_EMPTY;
- }
- if (flag & MERGE_UNCOMP) fpout = strcmp(out, "-")? bam_open(out, "wu") : bam_dopen(fileno(stdout), "wu");
- else if (flag & MERGE_LEVEL1) fpout = strcmp(out, "-")? bam_open(out, "w1") : bam_dopen(fileno(stdout), "w1");
- else fpout = strcmp(out, "-")? bam_open(out, "w") : bam_dopen(fileno(stdout), "w");
- if (fpout == 0) {
- fprintf(stderr, "[%s] fail to create the output file.\n", __func__);
- return -1;
- }
- bam_header_write(fpout, hout);
- bam_header_destroy(hout);
-
- ks_heapmake(heap, n, heap);
- while (heap->pos != HEAP_EMPTY) {
- bam1_t *b = heap->b;
- if (flag & MERGE_RG) {
- uint8_t *rg = bam_aux_get(b, "RG");
- if (rg) bam_aux_del(b, rg);
- bam_aux_append(b, "RG", 'Z', RG_len[heap->i] + 1, (uint8_t*)RG[heap->i]);
- }
- bam_write1_core(fpout, &b->core, b->data_len, b->data);
- if ((j = bam_iter_read(fp[heap->i], iter[heap->i], b)) >= 0) {
- heap->pos = ((uint64_t)b->core.tid<<32) | (uint32_t)((int)b->core.pos+1)<<1 | bam1_strand(b);
- heap->idx = idx++;
- } else if (j == -1) {
- heap->pos = HEAP_EMPTY;
- free(heap->b->data); free(heap->b);
- heap->b = 0;
- } else fprintf(stderr, "[bam_merge_core] '%s' is truncated. Continue anyway.\n", fn[heap->i]);
- ks_heapadjust(heap, 0, n, heap);
- }
-
- if (flag & MERGE_RG) {
- for (i = 0; i != n; ++i) free(RG[i]);
- free(RG); free(RG_len);
- }
- for (i = 0; i != n; ++i) {
- bam_iter_destroy(iter[i]);
- bam_close(fp[i]);
- }
- bam_close(fpout);
- free(fp); free(heap); free(iter);
- return 0;
-}
-
-int bam_merge(int argc, char *argv[])
-{
- int c, is_by_qname = 0, flag = 0, ret = 0;
- char *fn_headers = NULL, *reg = 0;
-
- while ((c = getopt(argc, argv, "h:nru1R:f")) >= 0) {
- switch (c) {
- case 'r': flag |= MERGE_RG; break;
- case 'f': flag |= MERGE_FORCE; break;
- case 'h': fn_headers = strdup(optarg); break;
- case 'n': is_by_qname = 1; break;
- case '1': flag |= MERGE_LEVEL1; break;
- case 'u': flag |= MERGE_UNCOMP; break;
- case 'R': reg = strdup(optarg); break;
- }
- }
- if (optind + 2 >= argc) {
- fprintf(stderr, "\n");
- fprintf(stderr, "Usage: samtools merge [-nr] [-h inh.sam] <out.bam> <in1.bam> <in2.bam> [...]\n\n");
- fprintf(stderr, "Options: -n sort by read names\n");
- fprintf(stderr, " -r attach RG tag (inferred from file names)\n");
- fprintf(stderr, " -u uncompressed BAM output\n");
- fprintf(stderr, " -f overwrite the output BAM if exist\n");
- fprintf(stderr, " -1 compress level 1\n");
- fprintf(stderr, " -R STR merge file in the specified region STR [all]\n");
- fprintf(stderr, " -h FILE copy the header in FILE to <out.bam> [in1.bam]\n\n");
- fprintf(stderr, "Note: Samtools' merge does not reconstruct the @RG dictionary in the header. Users\n");
- fprintf(stderr, " must provide the correct header with -h, or uses Picard which properly maintains\n");
- fprintf(stderr, " the header dictionary in merging.\n\n");
- return 1;
- }
- if (!(flag & MERGE_FORCE) && strcmp(argv[optind], "-")) {
- FILE *fp = fopen(argv[optind], "rb");
- if (fp != NULL) {
- fclose(fp);
- fprintf(stderr, "[%s] File '%s' exists. Please apply '-f' to overwrite. Abort.\n", __func__, argv[optind]);
- return 1;
- }
- }
- if (bam_merge_core(is_by_qname, argv[optind], fn_headers, argc - optind - 1, argv + optind + 1, flag, reg) < 0) ret = 1;
- free(reg);
- free(fn_headers);
- return ret;
-}
-
-typedef bam1_t *bam1_p;
-
-static inline int bam1_lt(const bam1_p a, const bam1_p b)
-{
- if (g_is_by_qname) {
- int t = strnum_cmp(bam1_qname(a), bam1_qname(b));
- return (t < 0 || (t == 0 && (((uint64_t)a->core.tid<<32|(a->core.pos+1)) < ((uint64_t)b->core.tid<<32|(b->core.pos+1)))));
- } else return (((uint64_t)a->core.tid<<32|(a->core.pos+1)) < ((uint64_t)b->core.tid<<32|(b->core.pos+1)));
-}
-KSORT_INIT(sort, bam1_p, bam1_lt)
-
-static void sort_blocks(int n, int k, bam1_p *buf, const char *prefix, const bam_header_t *h, int is_stdout)
-{
- char *name, mode[3];
- int i;
- bamFile fp;
- ks_mergesort(sort, k, buf, 0);
- name = (char*)calloc(strlen(prefix) + 20, 1);
- if (n >= 0) {
- sprintf(name, "%s.%.4d.bam", prefix, n);
- strcpy(mode, "w1");
- } else {
- sprintf(name, "%s.bam", prefix);
- strcpy(mode, "w");
- }
- fp = is_stdout? bam_dopen(fileno(stdout), mode) : bam_open(name, mode);
- if (fp == 0) {
- fprintf(stderr, "[sort_blocks] fail to create file %s.\n", name);
- free(name);
- // FIXME: possible memory leak
- return;
- }
- free(name);
- bam_header_write(fp, h);
- for (i = 0; i < k; ++i)
- bam_write1_core(fp, &buf[i]->core, buf[i]->data_len, buf[i]->data);
- bam_close(fp);
-}
-
-/*!
- @abstract Sort an unsorted BAM file based on the chromosome order
- and the leftmost position of an alignment
-
- @param is_by_qname whether to sort by query name
- @param fn name of the file to be sorted
- @param prefix prefix of the output and the temporary files; upon
- sucessess, prefix.bam will be written.
- @param max_mem approxiate maximum memory (very inaccurate)
-
- @discussion It may create multiple temporary subalignment files
- and then merge them by calling bam_merge_core(). This function is
- NOT thread safe.
- */
-void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t max_mem, int is_stdout)
-{
- int n, ret, k, i;
- size_t mem;
- bam_header_t *header;
- bamFile fp;
- bam1_t *b, **buf;
-
- g_is_by_qname = is_by_qname;
- n = k = 0; mem = 0;
- fp = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
- if (fp == 0) {
- fprintf(stderr, "[bam_sort_core] fail to open file %s\n", fn);
- return;
- }
- header = bam_header_read(fp);
- buf = (bam1_t**)calloc(max_mem / BAM_CORE_SIZE, sizeof(bam1_t*));
- // write sub files
- for (;;) {
- if (buf[k] == 0) buf[k] = (bam1_t*)calloc(1, sizeof(bam1_t));
- b = buf[k];
- if ((ret = bam_read1(fp, b)) < 0) break;
- mem += ret;
- ++k;
- if (mem >= max_mem) {
- sort_blocks(n++, k, buf, prefix, header, 0);
- mem = 0; k = 0;
- }
- }
- if (ret != -1)
- fprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\n");
- if (n == 0) sort_blocks(-1, k, buf, prefix, header, is_stdout);
- else { // then merge
- char **fns, *fnout;
- fprintf(stderr, "[bam_sort_core] merging from %d files...\n", n+1);
- sort_blocks(n++, k, buf, prefix, header, 0);
- fnout = (char*)calloc(strlen(prefix) + 20, 1);
- if (is_stdout) sprintf(fnout, "-");
- else sprintf(fnout, "%s.bam", prefix);
- fns = (char**)calloc(n, sizeof(char*));
- for (i = 0; i < n; ++i) {
- fns[i] = (char*)calloc(strlen(prefix) + 20, 1);
- sprintf(fns[i], "%s.%.4d.bam", prefix, i);
- }
- bam_merge_core(is_by_qname, fnout, 0, n, fns, 0, 0);
- free(fnout);
- for (i = 0; i < n; ++i) {
- unlink(fns[i]);
- free(fns[i]);
- }
- free(fns);
- }
- for (k = 0; k < max_mem / BAM_CORE_SIZE; ++k) {
- if (buf[k]) {
- free(buf[k]->data);
- free(buf[k]);
- }
- }
- free(buf);
- bam_header_destroy(header);
- bam_close(fp);
-}
-
-void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem)
-{
- bam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0);
-}
-
-int bam_sort(int argc, char *argv[])
-{
- size_t max_mem = 500000000;
- int c, is_by_qname = 0, is_stdout = 0;
- while ((c = getopt(argc, argv, "nom:")) >= 0) {
- switch (c) {
- case 'o': is_stdout = 1; break;
- case 'n': is_by_qname = 1; break;
- case 'm': max_mem = atol(optarg); break;
- }
- }
- if (optind + 2 > argc) {
- fprintf(stderr, "Usage: samtools sort [-on] [-m <maxMem>] <in.bam> <out.prefix>\n");
- return 1;
- }
- bam_sort_core_ext(is_by_qname, argv[optind], argv[optind+1], max_mem, is_stdout);
- return 0;
-}
diff --git a/src/samtools-0.1.18/bam_stat.c b/src/samtools-0.1.18/bam_stat.c
deleted file mode 100644
index f2de0f1..0000000
--- a/src/samtools-0.1.18/bam_stat.c
+++ /dev/null
@@ -1,77 +0,0 @@
-#include <unistd.h>
-#include <assert.h>
-#include "bam.h"
-
-typedef struct {
- long long n_reads[2], n_mapped[2], n_pair_all[2], n_pair_map[2], n_pair_good[2];
- long long n_sgltn[2], n_read1[2], n_read2[2];
- long long n_dup[2];
- long long n_diffchr[2], n_diffhigh[2];
-} bam_flagstat_t;
-
-#define flagstat_loop(s, c) do { \
- int w = ((c)->flag & BAM_FQCFAIL)? 1 : 0; \
- ++(s)->n_reads[w]; \
- if ((c)->flag & BAM_FPAIRED) { \
- ++(s)->n_pair_all[w]; \
- if ((c)->flag & BAM_FPROPER_PAIR) ++(s)->n_pair_good[w]; \
- if ((c)->flag & BAM_FREAD1) ++(s)->n_read1[w]; \
- if ((c)->flag & BAM_FREAD2) ++(s)->n_read2[w]; \
- if (((c)->flag & BAM_FMUNMAP) && !((c)->flag & BAM_FUNMAP)) ++(s)->n_sgltn[w]; \
- if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \
- ++(s)->n_pair_map[w]; \
- if ((c)->mtid != (c)->tid) { \
- ++(s)->n_diffchr[w]; \
- if ((c)->qual >= 5) ++(s)->n_diffhigh[w]; \
- } \
- } \
- } \
- if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped[w]; \
- if ((c)->flag & BAM_FDUP) ++(s)->n_dup[w]; \
- } while (0)
-
-bam_flagstat_t *bam_flagstat_core(bamFile fp)
-{
- bam_flagstat_t *s;
- bam1_t *b;
- bam1_core_t *c;
- int ret;
- s = (bam_flagstat_t*)calloc(1, sizeof(bam_flagstat_t));
- b = bam_init1();
- c = &b->core;
- while ((ret = bam_read1(fp, b)) >= 0)
- flagstat_loop(s, c);
- bam_destroy1(b);
- if (ret != -1)
- fprintf(stderr, "[bam_flagstat_core] Truncated file? Continue anyway.\n");
- return s;
-}
-int bam_flagstat(int argc, char *argv[])
-{
- bamFile fp;
- bam_header_t *header;
- bam_flagstat_t *s;
- if (argc == optind) {
- fprintf(stderr, "Usage: samtools flagstat <in.bam>\n");
- return 1;
- }
- fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r");
- assert(fp);
- header = bam_header_read(fp);
- s = bam_flagstat_core(fp);
- printf("%lld + %lld in total (QC-passed reads + QC-failed reads)\n", s->n_reads[0], s->n_reads[1]);
- printf("%lld + %lld duplicates\n", s->n_dup[0], s->n_dup[1]);
- printf("%lld + %lld mapped (%.2f%%:%.2f%%)\n", s->n_mapped[0], s->n_mapped[1], (float)s->n_mapped[0] / s->n_reads[0] * 100.0, (float)s->n_mapped[1] / s->n_reads[1] * 100.0);
- printf("%lld + %lld paired in sequencing\n", s->n_pair_all[0], s->n_pair_all[1]);
- printf("%lld + %lld read1\n", s->n_read1[0], s->n_read1[1]);
- printf("%lld + %lld read2\n", s->n_read2[0], s->n_read2[1]);
- printf("%lld + %lld properly paired (%.2f%%:%.2f%%)\n", s->n_pair_good[0], s->n_pair_good[1], (float)s->n_pair_good[0] / s->n_pair_all[0] * 100.0, (float)s->n_pair_good[1] / s->n_pair_all[1] * 100.0);
- printf("%lld + %lld with itself and mate mapped\n", s->n_pair_map[0], s->n_pair_map[1]);
- printf("%lld + %lld singletons (%.2f%%:%.2f%%)\n", s->n_sgltn[0], s->n_sgltn[1], (float)s->n_sgltn[0] / s->n_pair_all[0] * 100.0, (float)s->n_sgltn[1] / s->n_pair_all[1] * 100.0);
- printf("%lld + %lld with mate mapped to a different chr\n", s->n_diffchr[0], s->n_diffchr[1]);
- printf("%lld + %lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh[0], s->n_diffhigh[1]);
- free(s);
- bam_header_destroy(header);
- bam_close(fp);
- return 0;
-}
diff --git a/src/samtools-0.1.18/bam_tview.c b/src/samtools-0.1.18/bam_tview.c
deleted file mode 100644
index 4eea955..0000000
--- a/src/samtools-0.1.18/bam_tview.c
+++ /dev/null
@@ -1,440 +0,0 @@
-#undef _HAVE_CURSES
-
-#if _CURSES_LIB == 0
-#elif _CURSES_LIB == 1
-#include <curses.h>
-#ifndef NCURSES_VERSION
-#warning "_CURSES_LIB=1 but NCURSES_VERSION not defined; tview is NOT compiled"
-#else
-#define _HAVE_CURSES
-#endif
-#elif _CURSES_LIB == 2
-#include <xcurses.h>
-#define _HAVE_CURSES
-#else
-#warning "_CURSES_LIB is not 0, 1 or 2; tview is NOT compiled"
-#endif
-
-#ifdef _HAVE_CURSES
-#include <ctype.h>
-#include <assert.h>
-#include <string.h>
-#include <math.h>
-#include "bam.h"
-#include "faidx.h"
-#include "bam2bcf.h"
-
-char bam_aux_getCEi(bam1_t *b, int i);
-char bam_aux_getCSi(bam1_t *b, int i);
-char bam_aux_getCQi(bam1_t *b, int i);
-
-#define TV_MIN_ALNROW 2
-#define TV_MAX_GOTO 40
-#define TV_LOW_MAPQ 10
-
-#define TV_COLOR_MAPQ 0
-#define TV_COLOR_BASEQ 1
-#define TV_COLOR_NUCL 2
-#define TV_COLOR_COL 3
-#define TV_COLOR_COLQ 4
-
-#define TV_BASE_NUCL 0
-#define TV_BASE_COLOR_SPACE 1
-
-typedef struct {
- int mrow, mcol;
- WINDOW *wgoto, *whelp;
-
- bam_index_t *idx;
- bam_lplbuf_t *lplbuf;
- bam_header_t *header;
- bamFile fp;
- int curr_tid, left_pos;
- faidx_t *fai;
- bcf_callaux_t *bca;
-
- int ccol, last_pos, row_shift, base_for, color_for, is_dot, l_ref, ins, no_skip, show_name;
- char *ref;
-} tview_t;
-
-int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)
-{
- extern unsigned char bam_nt16_table[256];
- tview_t *tv = (tview_t*)data;
- int i, j, c, rb, attr, max_ins = 0;
- uint32_t call = 0;
- if (pos < tv->left_pos || tv->ccol > tv->mcol) return 0; // out of screen
- // print referece
- rb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N';
- for (i = tv->last_pos + 1; i < pos; ++i) {
- if (i%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", i+1);
- c = tv->ref? tv->ref[i - tv->left_pos] : 'N';
- mvaddch(1, tv->ccol++, c);
- }
- if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", pos+1);
- { // call consensus
- bcf_callret1_t bcr;
- int qsum[4], a1, a2, tmp;
- double p[3], prior = 30;
- bcf_call_glfgen(n, pl, bam_nt16_table[rb], tv->bca, &bcr);
- for (i = 0; i < 4; ++i) qsum[i] = bcr.qsum[i]<<2 | i;
- for (i = 1; i < 4; ++i) // insertion sort
- for (j = i; j > 0 && qsum[j] > qsum[j-1]; --j)
- tmp = qsum[j], qsum[j] = qsum[j-1], qsum[j-1] = tmp;
- a1 = qsum[0]&3; a2 = qsum[1]&3;
- p[0] = bcr.p[a1*5+a1]; p[1] = bcr.p[a1*5+a2] + prior; p[2] = bcr.p[a2*5+a2];
- if ("ACGT"[a1] != toupper(rb)) p[0] += prior + 3;
- if ("ACGT"[a2] != toupper(rb)) p[2] += prior + 3;
- if (p[0] < p[1] && p[0] < p[2]) call = (1<<a1)<<16 | (int)((p[1]<p[2]?p[1]:p[2]) - p[0] + .499);
- else if (p[2] < p[1] && p[2] < p[0]) call = (1<<a2)<<16 | (int)((p[0]<p[1]?p[0]:p[1]) - p[2] + .499);
- else call = (1<<a1|1<<a2)<<16 | (int)((p[0]<p[2]?p[0]:p[2]) - p[1] + .499);
- }
- attr = A_UNDERLINE;
- c = ",ACMGRSVTWYHKDBN"[call>>16&0xf];
- i = (call&0xffff)/10+1;
- if (i > 4) i = 4;
- attr |= COLOR_PAIR(i);
- if (c == toupper(rb)) c = '.';
- attron(attr);
- mvaddch(2, tv->ccol, c);
- attroff(attr);
- if(tv->ins) {
- // calculate maximum insert
- for (i = 0; i < n; ++i) {
- const bam_pileup1_t *p = pl + i;
- if (p->indel > 0 && max_ins < p->indel) max_ins = p->indel;
- }
- }
- // core loop
- for (j = 0; j <= max_ins; ++j) {
- for (i = 0; i < n; ++i) {
- const bam_pileup1_t *p = pl + i;
- int row = TV_MIN_ALNROW + p->level - tv->row_shift;
- if (j == 0) {
- if (!p->is_del) {
- if (tv->base_for == TV_BASE_COLOR_SPACE &&
- (c = bam_aux_getCSi(p->b, p->qpos))) {
- c = bam_aux_getCSi(p->b, p->qpos);
- // assume that if we found one color, we will be able to get the color error
- if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos)) c = bam1_strand(p->b)? ',' : '.';
- } else {
- if (tv->show_name) {
- char *name = bam1_qname(p->b);
- c = (p->qpos + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos];
- } else {
- c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];
- if (tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.';
- }
- }
- } else c = p->is_refskip? (bam1_strand(p->b)? '<' : '>') : '*';
- } else { // padding
- if (j > p->indel) c = '*';
- else { // insertion
- if (tv->base_for == TV_BASE_NUCL) {
- if (tv->show_name) {
- char *name = bam1_qname(p->b);
- c = (p->qpos + j + 1 >= p->b->core.l_qname)? ' ' : name[p->qpos + j];
- } else {
- c = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];
- if (j == 0 && tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? ',' : '.';
- }
- } else {
- c = bam_aux_getCSi(p->b, p->qpos + j);
- if (tv->is_dot && '-' == bam_aux_getCEi(p->b, p->qpos + j)) c = bam1_strand(p->b)? ',' : '.';
- }
- }
- }
- if (row > TV_MIN_ALNROW && row < tv->mrow) {
- int x;
- attr = 0;
- if (((p->b->core.flag&BAM_FPAIRED) && !(p->b->core.flag&BAM_FPROPER_PAIR))
- || (p->b->core.flag & BAM_FSECONDARY)) attr |= A_UNDERLINE;
- if (tv->color_for == TV_COLOR_BASEQ) {
- x = bam1_qual(p->b)[p->qpos]/10 + 1;
- if (x > 4) x = 4;
- attr |= COLOR_PAIR(x);
- } else if (tv->color_for == TV_COLOR_MAPQ) {
- x = p->b->core.qual/10 + 1;
- if (x > 4) x = 4;
- attr |= COLOR_PAIR(x);
- } else if (tv->color_for == TV_COLOR_NUCL) {
- x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)] + 5;
- attr |= COLOR_PAIR(x);
- } else if(tv->color_for == TV_COLOR_COL) {
- x = 0;
- switch(bam_aux_getCSi(p->b, p->qpos)) {
- case '0': x = 0; break;
- case '1': x = 1; break;
- case '2': x = 2; break;
- case '3': x = 3; break;
- case '4': x = 4; break;
- default: x = bam_nt16_nt4_table[bam1_seqi(bam1_seq(p->b), p->qpos)]; break;
- }
- x+=5;
- attr |= COLOR_PAIR(x);
- } else if(tv->color_for == TV_COLOR_COLQ) {
- x = bam_aux_getCQi(p->b, p->qpos);
- if(0 == x) x = bam1_qual(p->b)[p->qpos];
- x = x/10 + 1;
- if (x > 4) x = 4;
- attr |= COLOR_PAIR(x);
- }
- attron(attr);
- mvaddch(row, tv->ccol, bam1_strand(p->b)? tolower(c) : toupper(c));
- attroff(attr);
- }
- }
- c = j? '*' : rb;
- if (c == '*') {
- attr = COLOR_PAIR(8);
- attron(attr);
- mvaddch(1, tv->ccol++, c);
- attroff(attr);
- } else mvaddch(1, tv->ccol++, c);
- }
- tv->last_pos = pos;
- return 0;
-}
-
-tview_t *tv_init(const char *fn, const char *fn_fa)
-{
- tview_t *tv = (tview_t*)calloc(1, sizeof(tview_t));
- tv->is_dot = 1;
- tv->fp = bam_open(fn, "r");
- bgzf_set_cache_size(tv->fp, 8 * 1024 *1024);
- assert(tv->fp);
- tv->header = bam_header_read(tv->fp);
- tv->idx = bam_index_load(fn);
- if (tv->idx == 0) exit(1);
- tv->lplbuf = bam_lplbuf_init(tv_pl_func, tv);
- if (fn_fa) tv->fai = fai_load(fn_fa);
- tv->bca = bcf_call_init(0.83, 13);
- tv->ins = 1;
-
- initscr();
- keypad(stdscr, TRUE);
- clear();
- noecho();
- cbreak();
- tv->mrow = 24; tv->mcol = 80;
- getmaxyx(stdscr, tv->mrow, tv->mcol);
- tv->wgoto = newwin(3, TV_MAX_GOTO + 10, 10, 5);
- tv->whelp = newwin(29, 40, 5, 5);
- tv->color_for = TV_COLOR_MAPQ;
- start_color();
- init_pair(1, COLOR_BLUE, COLOR_BLACK);
- init_pair(2, COLOR_GREEN, COLOR_BLACK);
- init_pair(3, COLOR_YELLOW, COLOR_BLACK);
- init_pair(4, COLOR_WHITE, COLOR_BLACK);
- init_pair(5, COLOR_GREEN, COLOR_BLACK);
- init_pair(6, COLOR_CYAN, COLOR_BLACK);
- init_pair(7, COLOR_YELLOW, COLOR_BLACK);
- init_pair(8, COLOR_RED, COLOR_BLACK);
- init_pair(9, COLOR_BLUE, COLOR_BLACK);
- return tv;
-}
-
-void tv_destroy(tview_t *tv)
-{
- delwin(tv->wgoto); delwin(tv->whelp);
- endwin();
-
- bam_lplbuf_destroy(tv->lplbuf);
- bcf_call_destroy(tv->bca);
- bam_index_destroy(tv->idx);
- if (tv->fai) fai_destroy(tv->fai);
- free(tv->ref);
- bam_header_destroy(tv->header);
- bam_close(tv->fp);
- free(tv);
-}
-
-int tv_fetch_func(const bam1_t *b, void *data)
-{
- tview_t *tv = (tview_t*)data;
- if (tv->no_skip) {
- uint32_t *cigar = bam1_cigar(b); // this is cheating...
- int i;
- for (i = 0; i <b->core.n_cigar; ++i) {
- if ((cigar[i]&0xf) == BAM_CREF_SKIP)
- cigar[i] = cigar[i]>>4<<4 | BAM_CDEL;
- }
- }
- bam_lplbuf_push(b, tv->lplbuf);
- return 0;
-}
-
-int tv_draw_aln(tview_t *tv, int tid, int pos)
-{
- // reset
- clear();
- tv->curr_tid = tid; tv->left_pos = pos;
- tv->last_pos = tv->left_pos - 1;
- tv->ccol = 0;
- // print ref and consensus
- if (tv->fai) {
- char *str;
- if (tv->ref) free(tv->ref);
- str = (char*)calloc(strlen(tv->header->target_name[tv->curr_tid]) + 30, 1);
- sprintf(str, "%s:%d-%d", tv->header->target_name[tv->curr_tid], tv->left_pos + 1, tv->left_pos + tv->mcol);
- tv->ref = fai_fetch(tv->fai, str, &tv->l_ref);
- free(str);
- }
- // draw aln
- bam_lplbuf_reset(tv->lplbuf);
- bam_fetch(tv->fp, tv->idx, tv->curr_tid, tv->left_pos, tv->left_pos + tv->mcol, tv, tv_fetch_func);
- bam_lplbuf_push(0, tv->lplbuf);
-
- while (tv->ccol < tv->mcol) {
- int pos = tv->last_pos + 1;
- if (pos%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", pos+1);
- mvaddch(1, tv->ccol++, (tv->ref && pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : 'N');
- ++tv->last_pos;
- }
- return 0;
-}
-
-static void tv_win_goto(tview_t *tv, int *tid, int *pos)
-{
- char str[256], *p;
- int i, l = 0;
- wborder(tv->wgoto, '|', '|', '-', '-', '+', '+', '+', '+');
- mvwprintw(tv->wgoto, 1, 2, "Goto: ");
- for (;;) {
- int c = wgetch(tv->wgoto);
- wrefresh(tv->wgoto);
- if (c == KEY_BACKSPACE || c == '\010' || c == '\177') {
- --l;
- } else if (c == KEY_ENTER || c == '\012' || c == '\015') {
- int _tid = -1, _beg, _end;
- if (str[0] == '=') {
- _beg = strtol(str+1, &p, 10) - 1;
- if (_beg > 0) {
- *pos = _beg;
- return;
- }
- } else {
- bam_parse_region(tv->header, str, &_tid, &_beg, &_end);
- if (_tid >= 0) {
- *tid = _tid; *pos = _beg;
- return;
- }
- }
- } else if (isgraph(c)) {
- if (l < TV_MAX_GOTO) str[l++] = c;
- } else if (c == '\027') l = 0;
- else if (c == '\033') return;
- str[l] = '\0';
- for (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, ' ');
- mvwprintw(tv->wgoto, 1, 8, "%s", str);
- }
-}
-
-static void tv_win_help(tview_t *tv) {
- int r = 1;
- WINDOW *win = tv->whelp;
- wborder(win, '|', '|', '-', '-', '+', '+', '+', '+');
- mvwprintw(win, r++, 2, " -=- Help -=- ");
- r++;
- mvwprintw(win, r++, 2, "? This window");
- mvwprintw(win, r++, 2, "Arrows Small scroll movement");
- mvwprintw(win, r++, 2, "h,j,k,l Small scroll movement");
- mvwprintw(win, r++, 2, "H,J,K,L Large scroll movement");
- mvwprintw(win, r++, 2, "ctrl-H Scroll 1k left");
- mvwprintw(win, r++, 2, "ctrl-L Scroll 1k right");
- mvwprintw(win, r++, 2, "space Scroll one screen");
- mvwprintw(win, r++, 2, "backspace Scroll back one screen");
- mvwprintw(win, r++, 2, "g Go to specific location");
- mvwprintw(win, r++, 2, "m Color for mapping qual");
- mvwprintw(win, r++, 2, "n Color for nucleotide");
- mvwprintw(win, r++, 2, "b Color for base quality");
- mvwprintw(win, r++, 2, "c Color for cs color");
- mvwprintw(win, r++, 2, "z Color for cs qual");
- mvwprintw(win, r++, 2, ". Toggle on/off dot view");
- mvwprintw(win, r++, 2, "s Toggle on/off ref skip");
- mvwprintw(win, r++, 2, "r Toggle on/off rd name");
- mvwprintw(win, r++, 2, "N Turn on nt view");
- mvwprintw(win, r++, 2, "C Turn on cs view");
- mvwprintw(win, r++, 2, "i Toggle on/off ins");
- mvwprintw(win, r++, 2, "q Exit");
- r++;
- mvwprintw(win, r++, 2, "Underline: Secondary or orphan");
- mvwprintw(win, r++, 2, "Blue: 0-9 Green: 10-19");
- mvwprintw(win, r++, 2, "Yellow: 20-29 White: >=30");
- wrefresh(win);
- wgetch(win);
-}
-
-void tv_loop(tview_t *tv)
-{
- int tid, pos;
- tid = tv->curr_tid; pos = tv->left_pos;
- while (1) {
- int c = getch();
- switch (c) {
- case '?': tv_win_help(tv); break;
- case '\033':
- case 'q': goto end_loop;
- case '/':
- case 'g': tv_win_goto(tv, &tid, &pos); break;
- case 'm': tv->color_for = TV_COLOR_MAPQ; break;
- case 'b': tv->color_for = TV_COLOR_BASEQ; break;
- case 'n': tv->color_for = TV_COLOR_NUCL; break;
- case 'c': tv->color_for = TV_COLOR_COL; break;
- case 'z': tv->color_for = TV_COLOR_COLQ; break;
- case 's': tv->no_skip = !tv->no_skip; break;
- case 'r': tv->show_name = !tv->show_name; break;
- case KEY_LEFT:
- case 'h': --pos; break;
- case KEY_RIGHT:
- case 'l': ++pos; break;
- case KEY_SLEFT:
- case 'H': pos -= 20; break;
- case KEY_SRIGHT:
- case 'L': pos += 20; break;
- case '.': tv->is_dot = !tv->is_dot; break;
- case 'N': tv->base_for = TV_BASE_NUCL; break;
- case 'C': tv->base_for = TV_BASE_COLOR_SPACE; break;
- case 'i': tv->ins = !tv->ins; break;
- case '\010': pos -= 1000; break;
- case '\014': pos += 1000; break;
- case ' ': pos += tv->mcol; break;
- case KEY_UP:
- case 'j': --tv->row_shift; break;
- case KEY_DOWN:
- case 'k': ++tv->row_shift; break;
- case KEY_BACKSPACE:
- case '\177': pos -= tv->mcol; break;
- case KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break;
- default: continue;
- }
- if (pos < 0) pos = 0;
- if (tv->row_shift < 0) tv->row_shift = 0;
- tv_draw_aln(tv, tid, pos);
- }
-end_loop:
- return;
-}
-
-int bam_tview_main(int argc, char *argv[])
-{
- tview_t *tv;
- if (argc == 1) {
- fprintf(stderr, "Usage: bamtk tview <aln.bam> [ref.fasta]\n");
- return 1;
- }
- tv = tv_init(argv[1], (argc == 2)? 0 : argv[2]);
- tv_draw_aln(tv, 0, 0);
- tv_loop(tv);
- tv_destroy(tv);
- return 0;
-}
-#else // #ifdef _HAVE_CURSES
-#include <stdio.h>
-#warning "No curses library is available; tview is disabled."
-int bam_tview_main(int argc, char *argv[])
-{
- fprintf(stderr, "[bam_tview_main] The ncurses library is unavailable; tview is not compiled.\n");
- return 1;
-}
-#endif // #ifdef _HAVE_CURSES
diff --git a/src/samtools-0.1.18/bamtk.c b/src/samtools-0.1.18/bamtk.c
deleted file mode 100644
index 8ba2581..0000000
--- a/src/samtools-0.1.18/bamtk.c
+++ /dev/null
@@ -1,109 +0,0 @@
-#include <stdio.h>
-#include <unistd.h>
-#include <assert.h>
-#include <fcntl.h>
-#include "bam.h"
-
-#ifdef _USE_KNETFILE
-#include "knetfile.h"
-#endif
-
-int bam_taf2baf(int argc, char *argv[]);
-int bam_mpileup(int argc, char *argv[]);
-int bam_merge(int argc, char *argv[]);
-int bam_index(int argc, char *argv[]);
-int bam_sort(int argc, char *argv[]);
-int bam_tview_main(int argc, char *argv[]);
-int bam_mating(int argc, char *argv[]);
-int bam_rmdup(int argc, char *argv[]);
-int bam_flagstat(int argc, char *argv[]);
-int bam_fillmd(int argc, char *argv[]);
-int bam_idxstats(int argc, char *argv[]);
-int main_samview(int argc, char *argv[]);
-int main_import(int argc, char *argv[]);
-int main_reheader(int argc, char *argv[]);
-int main_cut_target(int argc, char *argv[]);
-int main_phase(int argc, char *argv[]);
-int main_cat(int argc, char *argv[]);
-int main_depth(int argc, char *argv[]);
-int main_bam2fq(int argc, char *argv[]);
-
-int faidx_main(int argc, char *argv[]);
-
-static int usage()
-{
- fprintf(stderr, "\n");
- fprintf(stderr, "Program: samtools (Tools for alignments in the SAM format)\n");
- fprintf(stderr, "Version: %s\n\n", BAM_VERSION);
- fprintf(stderr, "Usage: samtools <command> [options]\n\n");
- fprintf(stderr, "Command: view SAM<->BAM conversion\n");
- fprintf(stderr, " sort sort alignment file\n");
- fprintf(stderr, " mpileup multi-way pileup\n");
- fprintf(stderr, " depth compute the depth\n");
- fprintf(stderr, " faidx index/extract FASTA\n");
-#if _CURSES_LIB != 0
- fprintf(stderr, " tview text alignment viewer\n");
-#endif
- fprintf(stderr, " index index alignment\n");
- fprintf(stderr, " idxstats BAM index stats (r595 or later)\n");
- fprintf(stderr, " fixmate fix mate information\n");
- fprintf(stderr, " flagstat simple stats\n");
- fprintf(stderr, " calmd recalculate MD/NM tags and '=' bases\n");
- fprintf(stderr, " merge merge sorted alignments\n");
- fprintf(stderr, " rmdup remove PCR duplicates\n");
- fprintf(stderr, " reheader replace BAM header\n");
- fprintf(stderr, " cat concatenate BAMs\n");
- fprintf(stderr, " targetcut cut fosmid regions (for fosmid pool only)\n");
- fprintf(stderr, " phase phase heterozygotes\n");
- fprintf(stderr, "\n");
-#ifdef _WIN32
- fprintf(stderr, "\
-Note: The Windows version of SAMtools is mainly designed for read-only\n\
- operations, such as viewing the alignments and generating the pileup.\n\
- Binary files generated by the Windows version may be buggy.\n\n");
-#endif
- return 1;
-}
-
-int main(int argc, char *argv[])
-{
-#ifdef _WIN32
- setmode(fileno(stdout), O_BINARY);
- setmode(fileno(stdin), O_BINARY);
-#ifdef _USE_KNETFILE
- knet_win32_init();
-#endif
-#endif
- if (argc < 2) return usage();
- if (strcmp(argv[1], "view") == 0) return main_samview(argc-1, argv+1);
- else if (strcmp(argv[1], "import") == 0) return main_import(argc-1, argv+1);
- else if (strcmp(argv[1], "mpileup") == 0) return bam_mpileup(argc-1, argv+1);
- else if (strcmp(argv[1], "merge") == 0) return bam_merge(argc-1, argv+1);
- else if (strcmp(argv[1], "sort") == 0) return bam_sort(argc-1, argv+1);
- else if (strcmp(argv[1], "index") == 0) return bam_index(argc-1, argv+1);
- else if (strcmp(argv[1], "idxstats") == 0) return bam_idxstats(argc-1, argv+1);
- else if (strcmp(argv[1], "faidx") == 0) return faidx_main(argc-1, argv+1);
- else if (strcmp(argv[1], "fixmate") == 0) return bam_mating(argc-1, argv+1);
- else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1);
- else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1);
- else if (strcmp(argv[1], "calmd") == 0) return bam_fillmd(argc-1, argv+1);
- else if (strcmp(argv[1], "fillmd") == 0) return bam_fillmd(argc-1, argv+1);
- else if (strcmp(argv[1], "reheader") == 0) return main_reheader(argc-1, argv+1);
- else if (strcmp(argv[1], "cat") == 0) return main_cat(argc-1, argv+1);
- else if (strcmp(argv[1], "targetcut") == 0) return main_cut_target(argc-1, argv+1);
- else if (strcmp(argv[1], "phase") == 0) return main_phase(argc-1, argv+1);
- else if (strcmp(argv[1], "depth") == 0) return main_depth(argc-1, argv+1);
- else if (strcmp(argv[1], "bam2fq") == 0) return main_bam2fq(argc-1, argv+1);
- else if (strcmp(argv[1], "pileup") == 0) {
- fprintf(stderr, "[main] The `pileup' command has been removed. Please use `mpileup' instead.\n");
- return 1;
- }
-#if _CURSES_LIB != 0
- else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1);
-#endif
- else {
- fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
- return 1;
- }
- return 0;
-}
diff --git a/src/samtools-0.1.18/bcftools/Makefile b/src/samtools-0.1.18/bcftools/Makefile
deleted file mode 100644
index 9b6f863..0000000
--- a/src/samtools-0.1.18/bcftools/Makefile
+++ /dev/null
@@ -1,51 +0,0 @@
-CC= gcc
-CFLAGS= -g -Wall -O2 #-m64 #-arch ppc
-DFLAGS= -D_FILE_OFFSET_BITS=64 -D_USE_KNETFILE
-LOBJS= bcf.o vcf.o bcfutils.o prob1.o em.o kfunc.o kmin.o index.o fet.o mut.o bcf2qcall.o
-OMISC= ..
-AOBJS= call1.o main.o $(OMISC)/kstring.o $(OMISC)/bgzf.o $(OMISC)/knetfile.o $(OMISC)/bedidx.o
-PROG= bcftools
-INCLUDES=
-SUBDIRS= .
-
-.SUFFIXES:.c .o
-
-.c.o:
- $(CC) -c $(CFLAGS) $(DFLAGS) -I.. $(INCLUDES) $< -o $@
-
-all-recur lib-recur clean-recur cleanlocal-recur install-recur:
- @target=`echo $@ | sed s/-recur//`; \
- wdir=`pwd`; \
- list='$(SUBDIRS)'; for subdir in $$list; do \
- cd $$subdir; \
- $(MAKE) CC="$(CC)" DFLAGS="$(DFLAGS)" CFLAGS="$(CFLAGS)" \
- INCLUDES="$(INCLUDES)" LIBPATH="$(LIBPATH)" $$target || exit 1; \
- cd $$wdir; \
- done;
-
-all:$(PROG)
-
-lib:libbcf.a
-
-libbcf.a:$(LOBJS)
- $(AR) -csru $@ $(LOBJS)
-
-bcftools:lib $(AOBJS)
- $(CC) $(CFLAGS) -o $@ $(AOBJS) -L. $(LIBPATH) -lbcf -lm -lz
-
-bcf.o:bcf.h
-vcf.o:bcf.h
-index.o:bcf.h
-bcfutils.o:bcf.h
-prob1.o:prob1.h bcf.h
-call1.o:prob1.h bcf.h
-bcf2qcall.o:bcf.h
-main.o:bcf.h
-
-bcf.pdf:bcf.tex
- pdflatex bcf
-
-cleanlocal:
- rm -fr gmon.out *.o a.out *.dSYM $(PROG) *~ *.a bcf.aux bcf.log bcf.pdf *.class libbcf.*.dylib libbcf.so*
-
-clean:cleanlocal-recur
diff --git a/src/samtools-0.1.18/bcftools/README b/src/samtools-0.1.18/bcftools/README
deleted file mode 100644
index 1d7159d..0000000
--- a/src/samtools-0.1.18/bcftools/README
+++ /dev/null
@@ -1,36 +0,0 @@
-The view command of bcftools calls variants, tests Hardy-Weinberg
-equilibrium (HWE), tests allele balances and estimates allele frequency.
-
-This command calls a site as a potential variant if P(ref|D,F) is below
-0.9 (controlled by the -p option), where D is data and F is the prior
-allele frequency spectrum (AFS).
-
-The view command performs two types of allele balance tests, both based
-on Fisher's exact test for 2x2 contingency tables with the row variable
-being reference allele or not. In the first table, the column variable
-is strand. Two-tail P-value is taken. We test if variant bases tend to
-come from one strand. In the second table, the column variable is
-whether a base appears in the first or the last 11bp of the read.
-One-tail P-value is taken. We test if variant bases tend to occur
-towards the end of reads, which is usually an indication of
-misalignment.
-
-Site allele frequency is estimated in two ways. In the first way, the
-frequency is esimated as \argmax_f P(D|f) under the assumption of
-HWE. Prior AFS is not used. In the second way, the frequency is
-estimated as the posterior expectation of allele counts \sum_k
-kP(k|D,F), dividied by the total number of haplotypes. HWE is not
-assumed, but the estimate depends on the prior AFS. The two estimates
-largely agree when the signal is strong, but may differ greatly on weak
-sites as in this case, the prior plays an important role.
-
-To test HWE, we calculate the posterior distribution of genotypes
-(ref-hom, het and alt-hom). Chi-square test is performed. It is worth
-noting that the model used here is prior dependent and assumes HWE,
-which is different from both models for allele frequency estimate. The
-new model actually yields a third estimate of site allele frequency.
-
-The estimate allele frequency spectrum is printed to stderr per 64k
-sites. The estimate is in fact only the first round of a EM
-procedure. The second model (not the model for HWE testing) is used to
-estimate the AFS.
\ No newline at end of file
diff --git a/src/samtools-0.1.18/bcftools/bcf.c b/src/samtools-0.1.18/bcftools/bcf.c
deleted file mode 100644
index 84a8e76..0000000
--- a/src/samtools-0.1.18/bcftools/bcf.c
+++ /dev/null
@@ -1,328 +0,0 @@
-#include <string.h>
-#include <ctype.h>
-#include <stdio.h>
-#include "kstring.h"
-#include "bcf.h"
-
-bcf_t *bcf_open(const char *fn, const char *mode)
-{
- bcf_t *b;
- b = calloc(1, sizeof(bcf_t));
- if (strchr(mode, 'w')) {
- b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdout), mode);
- } else {
- b->fp = strcmp(fn, "-")? bgzf_open(fn, mode) : bgzf_fdopen(fileno(stdin), mode);
- }
-#ifndef BCF_LITE
- b->fp->owned_file = 1;
-#endif
- return b;
-}
-
-int bcf_close(bcf_t *b)
-{
- int ret;
- if (b == 0) return 0;
- ret = bgzf_close(b->fp);
- free(b);
- return ret;
-}
-
-int bcf_hdr_write(bcf_t *b, const bcf_hdr_t *h)
-{
- if (b == 0 || h == 0) return -1;
- bgzf_write(b->fp, "BCF\4", 4);
- bgzf_write(b->fp, &h->l_nm, 4);
- bgzf_write(b->fp, h->name, h->l_nm);
- bgzf_write(b->fp, &h->l_smpl, 4);
- bgzf_write(b->fp, h->sname, h->l_smpl);
- bgzf_write(b->fp, &h->l_txt, 4);
- bgzf_write(b->fp, h->txt, h->l_txt);
- bgzf_flush(b->fp);
- return 16 + h->l_nm + h->l_smpl + h->l_txt;
-}
-
-bcf_hdr_t *bcf_hdr_read(bcf_t *b)
-{
- uint8_t magic[4];
- bcf_hdr_t *h;
- if (b == 0) return 0;
- h = calloc(1, sizeof(bcf_hdr_t));
- bgzf_read(b->fp, magic, 4);
- bgzf_read(b->fp, &h->l_nm, 4);
- h->name = malloc(h->l_nm);
- bgzf_read(b->fp, h->name, h->l_nm);
- bgzf_read(b->fp, &h->l_smpl, 4);
- h->sname = malloc(h->l_smpl);
- bgzf_read(b->fp, h->sname, h->l_smpl);
- bgzf_read(b->fp, &h->l_txt, 4);
- h->txt = malloc(h->l_txt);
- bgzf_read(b->fp, h->txt, h->l_txt);
- bcf_hdr_sync(h);
- return h;
-}
-
-void bcf_hdr_destroy(bcf_hdr_t *h)
-{
- if (h == 0) return;
- free(h->name); free(h->sname); free(h->txt); free(h->ns); free(h->sns);
- free(h);
-}
-
-static inline char **cnt_null(int l, char *str, int *_n)
-{
- int n = 0;
- char *p, **list;
- *_n = 0;
- if (l == 0 || str == 0) return 0;
- for (p = str; p != str + l; ++p)
- if (*p == 0) ++n;
- *_n = n;
- list = calloc(n, sizeof(void*));
- list[0] = str;
- for (p = str, n = 1; p < str + l - 1; ++p)
- if (*p == 0) list[n++] = p + 1;
- return list;
-}
-
-int bcf_hdr_sync(bcf_hdr_t *b)
-{
- if (b == 0) return -1;
- if (b->ns) free(b->ns);
- if (b->sns) free(b->sns);
- if (b->l_nm) b->ns = cnt_null(b->l_nm, b->name, &b->n_ref);
- else b->ns = 0, b->n_ref = 0;
- b->sns = cnt_null(b->l_smpl, b->sname, &b->n_smpl);
- return 0;
-}
-
-int bcf_sync(bcf1_t *b)
-{
- char *p, *tmp[5];
- int i, n, n_smpl = b->n_smpl;
- ks_tokaux_t aux;
- // set ref, alt, flt, info, fmt
- b->ref = b->alt = b->flt = b->info = b->fmt = 0;
- for (p = b->str, n = 0; p < b->str + b->l_str; ++p) {
- if (*p == 0 && p+1 != b->str + b->l_str) {
- if (n == 5) {
- ++n;
- break;
- } else tmp[n++] = p + 1;
- }
- }
- if (n != 5) {
- fprintf(stderr, "[%s] incorrect number of fields (%d != 5) at %d:%d\n", __func__, n, b->tid, b->pos);
- return -1;
- }
- b->ref = tmp[0]; b->alt = tmp[1]; b->flt = tmp[2]; b->info = tmp[3]; b->fmt = tmp[4];
- // set n_alleles
- if (*b->alt == 0) b->n_alleles = 1;
- else {
- for (p = b->alt, n = 1; *p; ++p)
- if (*p == ',') ++n;
- b->n_alleles = n + 1;
- }
- // set n_gi and gi[i].fmt
- for (p = b->fmt, n = 1; *p; ++p)
- if (*p == ':') ++n;
- if (n > b->m_gi) {
- int old_m = b->m_gi;
- b->m_gi = n;
- kroundup32(b->m_gi);
- b->gi = realloc(b->gi, b->m_gi * sizeof(bcf_ginfo_t));
- memset(b->gi + old_m, 0, (b->m_gi - old_m) * sizeof(bcf_ginfo_t));
- }
- b->n_gi = n;
- for (p = kstrtok(b->fmt, ":", &aux), n = 0; p; p = kstrtok(0, 0, &aux))
- b->gi[n++].fmt = bcf_str2int(p, aux.p - p);
- // set gi[i].len
- for (i = 0; i < b->n_gi; ++i) {
- if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
- b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2;
- } else if (b->gi[i].fmt == bcf_str2int("DP", 2) || b->gi[i].fmt == bcf_str2int("HQ", 2)) {
- b->gi[i].len = 2;
- } else if (b->gi[i].fmt == bcf_str2int("GQ", 2) || b->gi[i].fmt == bcf_str2int("GT", 2)) {
- b->gi[i].len = 1;
- } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
- b->gi[i].len = 4;
- } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
- b->gi[i].len = b->n_alleles * (b->n_alleles + 1) / 2 * 4;
- }
- b->gi[i].data = realloc(b->gi[i].data, n_smpl * b->gi[i].len);
- }
- return 0;
-}
-
-int bcf_write(bcf_t *bp, const bcf_hdr_t *h, const bcf1_t *b)
-{
- int i, l = 0;
- if (b == 0) return -1;
- bgzf_write(bp->fp, &b->tid, 4);
- bgzf_write(bp->fp, &b->pos, 4);
- bgzf_write(bp->fp, &b->qual, 4);
- bgzf_write(bp->fp, &b->l_str, 4);
- bgzf_write(bp->fp, b->str, b->l_str);
- l = 12 + b->l_str;
- for (i = 0; i < b->n_gi; ++i) {
- bgzf_write(bp->fp, b->gi[i].data, b->gi[i].len * h->n_smpl);
- l += b->gi[i].len * h->n_smpl;
- }
- return l;
-}
-
-int bcf_read(bcf_t *bp, const bcf_hdr_t *h, bcf1_t *b)
-{
- int i, l = 0;
- if (b == 0) return -1;
- if (bgzf_read(bp->fp, &b->tid, 4) == 0) return -1;
- b->n_smpl = h->n_smpl;
- bgzf_read(bp->fp, &b->pos, 4);
- bgzf_read(bp->fp, &b->qual, 4);
- bgzf_read(bp->fp, &b->l_str, 4);
- if (b->l_str > b->m_str) {
- b->m_str = b->l_str;
- kroundup32(b->m_str);
- b->str = realloc(b->str, b->m_str);
- }
- bgzf_read(bp->fp, b->str, b->l_str);
- l = 12 + b->l_str;
- if (bcf_sync(b) < 0) return -2;
- for (i = 0; i < b->n_gi; ++i) {
- bgzf_read(bp->fp, b->gi[i].data, b->gi[i].len * h->n_smpl);
- l += b->gi[i].len * h->n_smpl;
- }
- return l;
-}
-
-int bcf_destroy(bcf1_t *b)
-{
- int i;
- if (b == 0) return -1;
- free(b->str);
- for (i = 0; i < b->m_gi; ++i)
- free(b->gi[i].data);
- free(b->gi);
- free(b);
- return 0;
-}
-
-static inline void fmt_str(const char *p, kstring_t *s)
-{
- if (*p == 0) kputc('.', s);
- else kputs(p, s);
-}
-
-void bcf_fmt_core(const bcf_hdr_t *h, bcf1_t *b, kstring_t *s)
-{
- int i, j, x;
- s->l = 0;
- if (h->n_ref) kputs(h->ns[b->tid], s);
- else kputw(b->tid, s);
- kputc('\t', s);
- kputw(b->pos + 1, s); kputc('\t', s);
- fmt_str(b->str, s); kputc('\t', s);
- fmt_str(b->ref, s); kputc('\t', s);
- fmt_str(b->alt, s); kputc('\t', s);
- ksprintf(s, "%.3g", b->qual); kputc('\t', s);
- fmt_str(b->flt, s); kputc('\t', s);
- fmt_str(b->info, s);
- if (b->fmt[0]) {
- kputc('\t', s);
- fmt_str(b->fmt, s);
- }
- x = b->n_alleles * (b->n_alleles + 1) / 2;
- if (b->n_gi == 0) return;
- for (j = 0; j < h->n_smpl; ++j) {
- kputc('\t', s);
- for (i = 0; i < b->n_gi; ++i) {
- if (i) kputc(':', s);
- if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
- uint8_t *d = (uint8_t*)b->gi[i].data + j * x;
- int k;
- for (k = 0; k < x; ++k) {
- if (k > 0) kputc(',', s);
- kputw(d[k], s);
- }
- } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
- kputw(((uint16_t*)b->gi[i].data)[j], s);
- } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
- kputw(((uint8_t*)b->gi[i].data)[j], s);
- } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
- kputw(((int32_t*)b->gi[i].data)[j], s);
- } else if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
- int y = ((uint8_t*)b->gi[i].data)[j];
- if (y>>7&1) {
- kputsn("./.", 3, s);
- } else {
- kputc('0' + (y>>3&7), s);
- kputc("/|"[y>>6&1], s);
- kputc('0' + (y&7), s);
- }
- } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
- float *d = (float*)b->gi[i].data + j * x;
- int k;
- //printf("- %lx\n", d);
- for (k = 0; k < x; ++k) {
- if (k > 0) kputc(',', s);
- ksprintf(s, "%.2f", d[k]);
- }
- } else kputc('.', s); // custom fields
- }
- }
-}
-
-char *bcf_fmt(const bcf_hdr_t *h, bcf1_t *b)
-{
- kstring_t s;
- s.l = s.m = 0; s.s = 0;
- bcf_fmt_core(h, b, &s);
- return s.s;
-}
-
-int bcf_append_info(bcf1_t *b, const char *info, int l)
-{
- int shift = b->fmt - b->str;
- int l_fmt = b->l_str - shift;
- char *ori = b->str;
- if (b->l_str + l > b->m_str) { // enlarge if necessary
- b->m_str = b->l_str + l;
- kroundup32(b->m_str);
- b->str = realloc(b->str, b->m_str);
- }
- memmove(b->str + shift + l, b->str + shift, l_fmt); // move the FORMAT field
- memcpy(b->str + shift - 1, info, l); // append to the INFO field
- b->str[shift + l - 1] = '\0';
- b->fmt = b->str + shift + l;
- b->l_str += l;
- if (ori != b->str) bcf_sync(b); // synchronize when realloc changes the pointer
- return 0;
-}
-
-int bcf_cpy(bcf1_t *r, const bcf1_t *b)
-{
- char *t1 = r->str;
- bcf_ginfo_t *t2 = r->gi;
- int i, t3 = r->m_str, t4 = r->m_gi;
- *r = *b;
- r->str = t1; r->gi = t2; r->m_str = t3; r->m_gi = t4;
- if (r->m_str < b->m_str) {
- r->m_str = b->m_str;
- r->str = realloc(r->str, r->m_str);
- }
- memcpy(r->str, b->str, r->m_str);
- bcf_sync(r); // calling bcf_sync() is simple but inefficient
- for (i = 0; i < r->n_gi; ++i)
- memcpy(r->gi[i].data, b->gi[i].data, r->n_smpl * r->gi[i].len);
- return 0;
-}
-
-int bcf_is_indel(const bcf1_t *b)
-{
- char *p;
- if (strlen(b->ref) > 1) return 1;
- for (p = b->alt; *p; ++p)
- if (*p != ',' && p[1] != ',' && p[1] != '\0')
- return 1;
- return 0;
-}
diff --git a/src/samtools-0.1.18/bcftools/bcf.h b/src/samtools-0.1.18/bcftools/bcf.h
deleted file mode 100644
index 822ae5c..0000000
--- a/src/samtools-0.1.18/bcftools/bcf.h
+++ /dev/null
@@ -1,190 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2010 Broad Institute
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/* Contact: Heng Li <lh3 at live.co.uk> */
-
-#ifndef BCF_H
-#define BCF_H
-
-#define BCF_VERSION "0.1.17-dev (r973:277)"
-
-#include <stdint.h>
-#include <zlib.h>
-
-#ifndef BCF_LITE
-#include "bgzf.h"
-typedef BGZF *bcfFile;
-#else
-typedef gzFile bcfFile;
-#define bgzf_open(fn, mode) gzopen(fn, mode)
-#define bgzf_fdopen(fd, mode) gzdopen(fd, mode)
-#define bgzf_close(fp) gzclose(fp)
-#define bgzf_read(fp, buf, len) gzread(fp, buf, len)
-#define bgzf_write(fp, buf, len)
-#define bgzf_flush(fp)
-#endif
-
-/*
- A member in the structs below is said to "primary" if its content
- cannot be inferred from other members in any of structs below; a
- member is said to be "derived" if its content can be derived from
- other members. For example, bcf1_t::str is primary as this comes from
- the input data, while bcf1_t::info is derived as it can always be
- correctly set if we know bcf1_t::str. Derived members are for quick
- access to the content and must be synchronized with the primary data.
- */
-
-typedef struct {
- uint32_t fmt; // format of the block, set by bcf_str2int().
- int len; // length of data for each individual
- void *data; // concatenated data
- // derived info: fmt, len (<-bcf1_t::fmt)
-} bcf_ginfo_t;
-
-typedef struct {
- int32_t tid, pos; // refID and 0-based position
- int32_t l_str, m_str; // length and the allocated size of ->str
- float qual; // SNP quality
- char *str; // concatenated string of variable length strings in VCF (from col.2 to col.7)
- char *ref, *alt, *flt, *info, *fmt; // they all point to ->str; no memory allocation
- int n_gi, m_gi; // number and the allocated size of geno fields
- bcf_ginfo_t *gi; // array of geno fields
- int n_alleles, n_smpl; // number of alleles and samples
- // derived info: ref, alt, flt, info, fmt (<-str), n_gi (<-fmt), n_alleles (<-alt), n_smpl (<-bcf_hdr_t::n_smpl)
-} bcf1_t;
-
-typedef struct {
- int32_t n_ref, n_smpl; // number of reference sequences and samples
- int32_t l_nm; // length of concatenated sequence names; 0 padded
- int32_t l_smpl; // length of concatenated sample names; 0 padded
- int32_t l_txt; // length of header text (lines started with ##)
- char *name, *sname, *txt; // concatenated sequence names, sample names and header text
- char **ns, **sns; // array of sequence and sample names; point to name and sname, respectively
- // derived info: n_ref (<-name), n_smpl (<-sname), ns (<-name), sns (<-sname)
-} bcf_hdr_t;
-
-typedef struct {
- int is_vcf; // if the file in operation is a VCF
- void *v; // auxillary data structure for VCF
- bcfFile fp; // file handler for BCF
-} bcf_t;
-
-struct __bcf_idx_t;
-typedef struct __bcf_idx_t bcf_idx_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- // open a BCF file; for BCF file only
- bcf_t *bcf_open(const char *fn, const char *mode);
- // close file
- int bcf_close(bcf_t *b);
- // read one record from BCF; return -1 on end-of-file, and <-1 for errors
- int bcf_read(bcf_t *bp, const bcf_hdr_t *h, bcf1_t *b);
- // call this function if b->str is changed
- int bcf_sync(bcf1_t *b);
- // write a BCF record
- int bcf_write(bcf_t *bp, const bcf_hdr_t *h, const bcf1_t *b);
- // read the BCF header; BCF only
- bcf_hdr_t *bcf_hdr_read(bcf_t *b);
- // write the BCF header
- int bcf_hdr_write(bcf_t *b, const bcf_hdr_t *h);
- // set bcf_hdr_t::ns and bcf_hdr_t::sns
- int bcf_hdr_sync(bcf_hdr_t *b);
- // destroy the header
- void bcf_hdr_destroy(bcf_hdr_t *h);
- // destroy a record
- int bcf_destroy(bcf1_t *b);
- // BCF->VCF conversion
- char *bcf_fmt(const bcf_hdr_t *h, bcf1_t *b);
- // append more info
- int bcf_append_info(bcf1_t *b, const char *info, int l);
- // copy
- int bcf_cpy(bcf1_t *r, const bcf1_t *b);
-
- // open a VCF or BCF file if "b" is set in "mode"
- bcf_t *vcf_open(const char *fn, const char *mode);
- // close a VCF/BCF file
- int vcf_close(bcf_t *bp);
- // read the VCF/BCF header
- bcf_hdr_t *vcf_hdr_read(bcf_t *bp);
- // read the sequence dictionary from a separate file; required for VCF->BCF conversion
- int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn);
- // read a VCF/BCF record; return -1 on end-of-file and <-1 for errors
- int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b);
- // write the VCF header
- int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h);
- // write a VCF record
- int vcf_write(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b);
-
- // keep the first n alleles and discard the rest
- int bcf_shrink_alt(bcf1_t *b, int n);
- // convert GL to PL
- int bcf_gl2pl(bcf1_t *b);
- // if the site is an indel
- int bcf_is_indel(const bcf1_t *b);
- bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list);
- int bcf_subsam(int n_smpl, int *list, bcf1_t *b);
- // move GT to the first FORMAT field
- int bcf_fix_gt(bcf1_t *b);
- // update PL generated by old samtools
- int bcf_fix_pl(bcf1_t *b);
- // convert PL to GLF-like 10-likelihood GL
- int bcf_gl10(const bcf1_t *b, uint8_t *gl);
- // convert up to 4 INDEL alleles to GLF-like 10-likelihood GL
- int bcf_gl10_indel(const bcf1_t *b, uint8_t *gl);
-
- // string hash table
- void *bcf_build_refhash(bcf_hdr_t *h);
- void bcf_str2id_destroy(void *_hash);
- void bcf_str2id_thorough_destroy(void *_hash);
- int bcf_str2id_add(void *_hash, const char *str);
- int bcf_str2id(void *_hash, const char *str);
- void *bcf_str2id_init();
-
- // indexing related functions
- int bcf_idx_build(const char *fn);
- uint64_t bcf_idx_query(const bcf_idx_t *idx, int tid, int beg);
- int bcf_parse_region(void *str2id, const char *str, int *tid, int *begin, int *end);
- bcf_idx_t *bcf_idx_load(const char *fn);
- void bcf_idx_destroy(bcf_idx_t *idx);
-
-#ifdef __cplusplus
-}
-#endif
-
-static inline uint32_t bcf_str2int(const char *str, int l)
-{
- int i;
- uint32_t x = 0;
- for (i = 0; i < l && i < 4; ++i) {
- if (str[i] == 0) return x;
- x = x<<8 | str[i];
- }
- return x;
-}
-
-#endif
diff --git a/src/samtools-0.1.18/bcftools/bcf.tex b/src/samtools-0.1.18/bcftools/bcf.tex
deleted file mode 100644
index 442fc2a..0000000
--- a/src/samtools-0.1.18/bcftools/bcf.tex
+++ /dev/null
@@ -1,77 +0,0 @@
-\documentclass[10pt,pdftex]{article}
-\usepackage{color}
-\definecolor{gray}{rgb}{0.7,0.7,0.7}
-
-\setlength{\topmargin}{0.0cm}
-\setlength{\textheight}{21.5cm}
-\setlength{\oddsidemargin}{0cm}
-\setlength{\textwidth}{16.5cm}
-\setlength{\columnsep}{0.6cm}
-
-\begin{document}
-
-\begin{center}
-\begin{tabular}{|l|l|l|l|l|}
-\hline
-\multicolumn{2}{|c|}{\bf Field} & \multicolumn{1}{c|}{\bf Descrption} & \multicolumn{1}{c|}{\bf Type} & \multicolumn{1}{c|}{\bf Value} \\\hline\hline
-\multicolumn{2}{|l|}{\sf magic} & Magic string & {\tt char[4]} & {\tt BCF\char92 4} \\\hline
-\multicolumn{2}{|l|}{\sf l\_seqnm} & Length of concatenated sequence names & {\tt int32\_t} & \\\hline
-\multicolumn{2}{|l|}{\sf seqnm} & Concatenated names, {\tt NULL} padded & {\tt char[{\sf l\_seqnm}]} & \\\hline
-\multicolumn{2}{|l|}{\sf l\_smpl} & Length of concatenated sample names & {\tt int32\_t} & \\\hline
-\multicolumn{2}{|l|}{\sf smpl} & Concatenated sample names & {\tt char[{\sf l\_smpl}]} & \\\hline
-\multicolumn{2}{|l|}{\sf l\_meta} & Length of the meta text (double-hash lines)& {\tt int32\_t} & \\\hline
-\multicolumn{2}{|l|}{\sf meta} & Meta text, {\tt NULL} terminated & {\tt char[{\sf l\_meta}]} & \\\hline
-\multicolumn{5}{|c|}{\it \color{gray}{List of records until the end of the file}}\\\cline{2-5}
-& {\sf seq\_id} & Reference sequence ID & {\tt int32\_t} & \\\cline{2-5}
-& {\sf pos} & Position & {\tt int32\_t} & \\\cline{2-5}
-& {\sf qual} & Variant quality & {\tt float} & \\\cline{2-5}
-& {\sf l\_str} & Length of {\sf str} & {\tt int32\_t} & \\\cline{2-5}
-& {\sf str} & {\tt ID+REF+ALT+FILTER+INFO+FORMAT}, {\tt NULL} padded & {\tt char[{\sf l\_str}]} &\\\cline{2-5}
-& \multicolumn{4}{c|}{Blocks of data; \#blocks and formats defined by {\tt FORMAT} (table below)}\\
-\hline
-\end{tabular}
-\end{center}
-
-\begin{center}
-\begin{tabular}{clp{9cm}}
-\hline
-\multicolumn{1}{l}{\bf Field} & \multicolumn{1}{l}{\bf Type} & \multicolumn{1}{l}{\bf Description} \\\hline
-{\tt DP} & {\tt uint16\_t[n]} & Read depth \\
-{\tt GL} & {\tt float[n*G]} & Log10 likelihood of data; $G=\frac{A(A+1)}{2}$, $A=\#\{alleles\}$\\
-{\tt GT} & {\tt uint8\_t[n]} & {\tt missing\char60\char60 7 | phased\char60\char60 6 | allele1\char60\char60 3 | allele2} \\
-{\tt \_GT} & {\tt uint8\_t+uint8\_t[n*P]} & {Generic GT; the first int equals the max ploidy $P$. If the highest bit is set,
- the allele is not present (e.g. due to different ploidy between samples).} \\
-{\tt GQ} & {\tt uint8\_t[n]} & {Genotype quality}\\
-{\tt HQ} & {\tt uint8\_t[n*2]} & {Haplotype quality}\\
-{\tt \_HQ} & {\tt uint8\_t+uint8\_t[n*P]} & {Generic HQ}\\
-{\tt IBD} & {\tt uint32\_t[n*2]} & {IBD}\\
-{\tt \_IBD} & {\tt uint8\_t+uint32\_t[n*P]} & {Generic IBD}\\
-{\tt PL} & {\tt uint8\_t[n*G]} & {Phred-scaled likelihood of data}\\
-{\tt PS} & {\tt uint32\_t[n]} & {Phase set}\\
-%{\tt SP} & {\tt uint8\_t[n]} & {Strand bias P-value (bcftools only)}\\
-\emph{Integer} & {\tt int32\_t[n*X]} & {Fix-sized custom Integer; $X$ defined in the header}\\
-\emph{Numeric} & {\tt double[n*X]} & {Fix-sized custom Numeric}\\
-\emph{String} & {\tt uint32\_t+char*} & {\tt NULL} padded concat. strings (int equals to the length) \\
-\hline
-\end{tabular}
-\end{center}
-
-\begin{itemize}
-\item A BCF file is in the {\tt BGZF} format.
-\item All multi-byte numbers are little-endian.
-\item In a string, a missing value `.' is an empty C string ``{\tt
- \char92 0}'' (not ``{\tt .\char92 0}'')
-\item For {\tt GL} and {\tt PL}, likelihoods of genotypes appear in the
- order of alleles in {\tt REF} and then {\tt ALT}. For example, if {\tt
- REF=C}, {\tt ALT=T,A}, likelihoods appear in the order of {\tt
- CC,CT,TT,CA,TA,AA} (NB: the ordering is different from the one in the original
- BCF proposal).
-\item Predefined {\tt FORMAT} fields can be missing from VCF headers, but custom {\tt FORMAT} fields
- are required to be explicitly defined in the headers.
-\item A {\tt FORMAT} field with its name starting with `{\tt \_}' is specific to BCF only.
- It gives an alternative binary representation of the corresponding VCF field, in case
- the default representation is unable to keep the genotype information,
- for example, when the ploidy is not 2 or there are more than 8 alleles.
-\end{itemize}
-
-\end{document}
diff --git a/src/samtools-0.1.18/bcftools/bcf2qcall.c b/src/samtools-0.1.18/bcftools/bcf2qcall.c
deleted file mode 100644
index a86bac2..0000000
--- a/src/samtools-0.1.18/bcftools/bcf2qcall.c
+++ /dev/null
@@ -1,91 +0,0 @@
-#include <errno.h>
-#include <math.h>
-#include <string.h>
-#include <stdlib.h>
-#include "bcf.h"
-
-static int8_t nt4_table[256] = {
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 /*'-'*/, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 3, 4, 4, 4, -1, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 3, 4, 4, 4, -1, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
-};
-
-static int read_I16(bcf1_t *b, int anno[16])
-{
- char *p;
- int i;
- if ((p = strstr(b->info, "I16=")) == 0) return -1;
- p += 4;
- for (i = 0; i < 16; ++i) {
- anno[i] = strtol(p, &p, 10);
- if (anno[i] == 0 && (errno == EINVAL || errno == ERANGE)) return -2;
- ++p;
- }
- return 0;
-}
-
-int bcf_2qcall(bcf_hdr_t *h, bcf1_t *b)
-{
- int a[4], k, g[10], l, map[4], k1, j, i, i0, anno[16], dp, mq, d_rest;
- char *s;
- if (b->ref[1] != 0 || b->n_alleles > 4) return -1; // ref is not a single base
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == bcf_str2int("PL", 2)) break;
- if (i == b->n_gi) return -1; // no PL
- if (read_I16(b, anno) != 0) return -1; // no I16; FIXME: can be improved
- d_rest = dp = anno[0] + anno[1] + anno[2] + anno[3];
- if (dp == 0) return -1; // depth is zero
- mq = (int)(sqrt((double)(anno[9] + anno[11]) / dp) + .499);
- i0 = i;
- a[0] = nt4_table[(int)b->ref[0]];
- if (a[0] > 3) return -1; // ref is not A/C/G/T
- a[1] = a[2] = a[3] = -2; // -1 has a special meaning
- if (b->alt[0] == 0) return -1; // no alternate allele
- map[0] = map[1] = map[2] = map[3] = -2;
- map[a[0]] = 0;
- for (k = 0, s = b->alt, k1 = -1; k < 3 && *s; ++k, s += 2) {
- if (s[1] != ',' && s[1] != 0) return -1; // ALT is not single base
- a[k+1] = nt4_table[(int)*s];
- if (a[k+1] >= 0) map[a[k+1]] = k+1;
- else k1 = k+1;
- if (s[1] == 0) break;
- }
- for (k = 0; k < 4; ++k)
- if (map[k] < 0) map[k] = k1;
- for (i = 0; i < h->n_smpl; ++i) {
- int d;
- uint8_t *p = b->gi[i0].data + i * b->gi[i0].len;
- for (j = 0; j < b->gi[i0].len; ++j)
- if (p[j]) break;
- d = (int)((double)d_rest / (h->n_smpl - i) + .499);
- if (d == 0) d = 1;
- if (j == b->gi[i0].len) d = 0;
- d_rest -= d;
- for (k = j = 0; k < 4; ++k) {
- for (l = k; l < 4; ++l) {
- int t, x = map[k], y = map[l];
- if (x > y) t = x, x = y, y = t; // swap
- g[j++] = p[y * (y+1) / 2 + x];
- }
- }
- printf("%s\t%d\t%c", h->ns[b->tid], b->pos+1, *b->ref);
- printf("\t%d\t%d\t0", d, mq);
- for (j = 0; j < 10; ++j)
- printf("\t%d", g[j]);
- printf("\t%s\n", h->sns[i]);
- }
- return 0;
-}
diff --git a/src/samtools-0.1.18/bcftools/bcfutils.c b/src/samtools-0.1.18/bcftools/bcfutils.c
deleted file mode 100644
index 0eab4c1..0000000
--- a/src/samtools-0.1.18/bcftools/bcfutils.c
+++ /dev/null
@@ -1,390 +0,0 @@
-#include <string.h>
-#include <math.h>
-#include "bcf.h"
-#include "kstring.h"
-#include "khash.h"
-KHASH_MAP_INIT_STR(str2id, int)
-
-#ifdef _WIN32
-#define srand48(x) srand(x)
-#define drand48() ((double)rand() / RAND_MAX)
-#endif
-
-// FIXME: valgrind report a memory leak in this function. Probably it does not get deallocated...
-void *bcf_build_refhash(bcf_hdr_t *h)
-{
- khash_t(str2id) *hash;
- int i, ret;
- hash = kh_init(str2id);
- for (i = 0; i < h->n_ref; ++i) {
- khint_t k;
- k = kh_put(str2id, hash, h->ns[i], &ret); // FIXME: check ret
- kh_val(hash, k) = i;
- }
- return hash;
-}
-
-void *bcf_str2id_init()
-{
- return kh_init(str2id);
-}
-
-void bcf_str2id_destroy(void *_hash)
-{
- khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
- if (hash) kh_destroy(str2id, hash); // Note that strings are not freed.
-}
-
-void bcf_str2id_thorough_destroy(void *_hash)
-{
- khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
- khint_t k;
- if (hash == 0) return;
- for (k = 0; k < kh_end(hash); ++k)
- if (kh_exist(hash, k)) free((char*)kh_key(hash, k));
- kh_destroy(str2id, hash);
-}
-
-int bcf_str2id(void *_hash, const char *str)
-{
- khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
- khint_t k;
- if (!hash) return -1;
- k = kh_get(str2id, hash, str);
- return k == kh_end(hash)? -1 : kh_val(hash, k);
-}
-
-int bcf_str2id_add(void *_hash, const char *str)
-{
- khint_t k;
- int ret;
- khash_t(str2id) *hash = (khash_t(str2id)*)_hash;
- if (!hash) return -1;
- k = kh_put(str2id, hash, str, &ret);
- if (ret == 0) return kh_val(hash, k);
- kh_val(hash, k) = kh_size(hash) - 1;
- return kh_val(hash, k);
-}
-
-int bcf_shrink_alt(bcf1_t *b, int n)
-{
- char *p;
- int i, j, k, n_smpl = b->n_smpl;
- if (b->n_alleles <= n) return -1;
- // update ALT
- if (n > 1) {
- for (p = b->alt, k = 1; *p; ++p)
- if (*p == ',' && ++k == n) break;
- *p = '\0';
- } else p = b->alt, *p = '\0';
- ++p;
- memmove(p, b->flt, b->str + b->l_str - b->flt);
- b->l_str -= b->flt - p;
- // update PL
- for (i = 0; i < b->n_gi; ++i) {
- bcf_ginfo_t *g = b->gi + i;
- if (g->fmt == bcf_str2int("PL", 2)) {
- int l, x = b->n_alleles * (b->n_alleles + 1) / 2;
- uint8_t *d = (uint8_t*)g->data;
- g->len = n * (n + 1) / 2;
- for (l = k = 0; l < n_smpl; ++l) {
- uint8_t *dl = d + l * x;
- for (j = 0; j < g->len; ++j) d[k++] = dl[j];
- }
- } // FIXME: to add GL
- }
- b->n_alleles = n;
- bcf_sync(b);
- return 0;
-}
-
-int bcf_gl2pl(bcf1_t *b)
-{
- char *p;
- int i, n_smpl = b->n_smpl;
- bcf_ginfo_t *g;
- float *d0;
- uint8_t *d1;
- if (strstr(b->fmt, "PL")) return -1;
- if ((p = strstr(b->fmt, "GL")) == 0) return -1;
- *p = 'P';
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == bcf_str2int("GL", 2))
- break;
- g = b->gi + i;
- g->fmt = bcf_str2int("PL", 2);
- g->len /= 4; // 4 == sizeof(float)
- d0 = (float*)g->data; d1 = (uint8_t*)g->data;
- for (i = 0; i < n_smpl * g->len; ++i) {
- int x = (int)(-10. * d0[i] + .499);
- if (x > 255) x = 255;
- if (x < 0) x = 0;
- d1[i] = x;
- }
- return 0;
-}
-/* FIXME: this function will fail given AB:GTX:GT. BCFtools never
- * produces such FMT, but others may do. */
-int bcf_fix_gt(bcf1_t *b)
-{
- char *s;
- int i;
- uint32_t tmp;
- bcf_ginfo_t gt;
- // check the presence of the GT FMT
- if ((s = strstr(b->fmt, ":GT")) == 0) return 0; // no GT or GT is already the first
- if (s[3] != '\0' && s[3] != ':') return 0; // :GTX in fact
- tmp = bcf_str2int("GT", 2);
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == tmp) break;
- if (i == b->n_gi) return 0; // no GT in b->gi; probably a bug...
- gt = b->gi[i];
- // move GT to the first
- for (; i > 0; --i) b->gi[i] = b->gi[i-1];
- b->gi[0] = gt;
- memmove(b->fmt + 3, b->fmt, s + 1 - b->fmt);
- b->fmt[0] = 'G'; b->fmt[1] = 'T'; b->fmt[2] = ':';
- return 0;
-}
-
-int bcf_fix_pl(bcf1_t *b)
-{
- int i;
- uint32_t tmp;
- uint8_t *PL, *swap;
- bcf_ginfo_t *gi;
- // pinpoint PL
- tmp = bcf_str2int("PL", 2);
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == tmp) break;
- if (i == b->n_gi) return 0;
- // prepare
- gi = b->gi + i;
- PL = (uint8_t*)gi->data;
- swap = alloca(gi->len);
- // loop through individuals
- for (i = 0; i < b->n_smpl; ++i) {
- int k, l, x;
- uint8_t *PLi = PL + i * gi->len;
- memcpy(swap, PLi, gi->len);
- for (k = x = 0; k < b->n_alleles; ++k)
- for (l = k; l < b->n_alleles; ++l)
- PLi[l*(l+1)/2 + k] = swap[x++];
- }
- return 0;
-}
-
-int bcf_smpl_covered(const bcf1_t *b)
-{
- int i, j, n = 0;
- uint32_t tmp;
- bcf_ginfo_t *gi;
- // pinpoint PL
- tmp = bcf_str2int("PL", 2);
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == tmp) break;
- if (i == b->n_gi) return 0;
- // count how many samples having PL!=[0..0]
- gi = b->gi + i;
- for (i = 0; i < b->n_smpl; ++i) {
- uint8_t *PLi = ((uint8_t*)gi->data) + i * gi->len;
- for (j = 0; j < gi->len; ++j)
- if (PLi[j]) break;
- if (j < gi->len) ++n;
- }
- return n;
-}
-
-static void *locate_field(const bcf1_t *b, const char *fmt, int l)
-{
- int i;
- uint32_t tmp;
- tmp = bcf_str2int(fmt, l);
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == tmp) break;
- return i == b->n_gi? 0 : b->gi[i].data;
-}
-
-int bcf_anno_max(bcf1_t *b)
-{
- int k, max_gq, max_sp, n_het;
- kstring_t str;
- uint8_t *gt, *gq;
- int32_t *sp;
- max_gq = max_sp = n_het = 0;
- gt = locate_field(b, "GT", 2);
- if (gt == 0) return -1;
- gq = locate_field(b, "GQ", 2);
- sp = locate_field(b, "SP", 2);
- if (sp)
- for (k = 0; k < b->n_smpl; ++k)
- if (gt[k]&0x3f)
- max_sp = max_sp > (int)sp[k]? max_sp : sp[k];
- if (gq)
- for (k = 0; k < b->n_smpl; ++k)
- if (gt[k]&0x3f)
- max_gq = max_gq > (int)gq[k]? max_gq : gq[k];
- for (k = 0; k < b->n_smpl; ++k) {
- int a1, a2;
- a1 = gt[k]&7; a2 = gt[k]>>3&7;
- if ((!a1 && a2) || (!a2 && a1)) { // a het
- if (gq == 0) ++n_het;
- else if (gq[k] >= 20) ++n_het;
- }
- }
- if (n_het) max_sp -= (int)(4.343 * log(n_het) + .499);
- if (max_sp < 0) max_sp = 0;
- memset(&str, 0, sizeof(kstring_t));
- if (*b->info) kputc(';', &str);
- ksprintf(&str, "MXSP=%d;MXGQ=%d", max_sp, max_gq);
- bcf_append_info(b, str.s, str.l);
- free(str.s);
- return 0;
-}
-
-// FIXME: only data are shuffled; the header is NOT
-int bcf_shuffle(bcf1_t *b, int seed)
-{
- int i, j, *a;
- if (seed > 0) srand48(seed);
- a = malloc(b->n_smpl * sizeof(int));
- for (i = 0; i < b->n_smpl; ++i) a[i] = i;
- for (i = b->n_smpl; i > 1; --i) {
- int tmp;
- j = (int)(drand48() * i);
- tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp;
- }
- for (j = 0; j < b->n_gi; ++j) {
- bcf_ginfo_t *gi = b->gi + j;
- uint8_t *swap, *data = (uint8_t*)gi->data;
- swap = malloc(gi->len * b->n_smpl);
- for (i = 0; i < b->n_smpl; ++i)
- memcpy(swap + gi->len * a[i], data + gi->len * i, gi->len);
- free(gi->data);
- gi->data = swap;
- }
- free(a);
- return 0;
-}
-
-bcf_hdr_t *bcf_hdr_subsam(const bcf_hdr_t *h0, int n, char *const* samples, int *list)
-{
- int i, ret, j;
- khint_t k;
- bcf_hdr_t *h;
- khash_t(str2id) *hash;
- kstring_t s;
- s.l = s.m = 0; s.s = 0;
- hash = kh_init(str2id);
- for (i = 0; i < h0->n_smpl; ++i) {
- k = kh_put(str2id, hash, h0->sns[i], &ret);
- kh_val(hash, k) = i;
- }
- for (i = j = 0; i < n; ++i) {
- k = kh_get(str2id, hash, samples[i]);
- if (k != kh_end(hash)) {
- list[j++] = kh_val(hash, k);
- kputs(samples[i], &s); kputc('\0', &s);
- }
- }
- if (j < n) fprintf(stderr, "<%s> %d samples in the list but not in BCF.", __func__, n - j);
- kh_destroy(str2id, hash);
- h = calloc(1, sizeof(bcf_hdr_t));
- *h = *h0;
- h->ns = 0; h->sns = 0;
- h->name = malloc(h->l_nm); memcpy(h->name, h0->name, h->l_nm);
- h->txt = calloc(1, h->l_txt + 1); memcpy(h->txt, h0->txt, h->l_txt);
- h->l_smpl = s.l; h->sname = s.s;
- bcf_hdr_sync(h);
- return h;
-}
-
-int bcf_subsam(int n_smpl, int *list, bcf1_t *b)
-{
- int i, j;
- for (j = 0; j < b->n_gi; ++j) {
- bcf_ginfo_t *gi = b->gi + j;
- uint8_t *swap;
- swap = malloc(gi->len * b->n_smpl);
- for (i = 0; i < n_smpl; ++i)
- memcpy(swap + i * gi->len, (uint8_t*)gi->data + list[i] * gi->len, gi->len);
- free(gi->data);
- gi->data = swap;
- }
- b->n_smpl = n_smpl;
- return 0;
-}
-
-static int8_t nt4_table[128] = {
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 /*'-'*/, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 3, 4, 4, 4, -1, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 3, 4, 4, 4, -1, 4, 4, 4, 4, 4, 4, 4
-};
-
-int bcf_gl10(const bcf1_t *b, uint8_t *gl)
-{
- int a[4], k, l, map[4], k1, j, i;
- const bcf_ginfo_t *PL;
- char *s;
- if (b->ref[1] != 0 || b->n_alleles > 4) return -1; // ref is not a single base or >4 alleles
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == bcf_str2int("PL", 2)) break;
- if (i == b->n_gi) return -1; // no PL
- PL = b->gi + i;
- a[0] = nt4_table[(int)b->ref[0]];
- if (a[0] > 3 || a[0] < 0) return -1; // ref is not A/C/G/T
- a[1] = a[2] = a[3] = -2; // -1 has a special meaning
- if (b->alt[0] == 0) return -1; // no alternate allele
- map[0] = map[1] = map[2] = map[3] = -2;
- map[a[0]] = 0;
- for (k = 0, s = b->alt, k1 = -1; k < 3 && *s; ++k, s += 2) {
- if (s[1] != ',' && s[1] != 0) return -1; // ALT is not single base
- a[k+1] = nt4_table[(int)*s];
- if (a[k+1] >= 0) map[a[k+1]] = k+1;
- else k1 = k + 1;
- if (s[1] == 0) break; // the end of the ALT string
- }
- for (k = 0; k < 4; ++k)
- if (map[k] < 0) map[k] = k1;
- for (i = 0; i < b->n_smpl; ++i) {
- const uint8_t *p = PL->data + i * PL->len; // the PL for the i-th individual
- uint8_t *g = gl + 10 * i;
- for (k = j = 0; k < 4; ++k) {
- for (l = k; l < 4; ++l) {
- int t, x = map[k], y = map[l];
- if (x > y) t = x, x = y, y = t; // make sure x is the smaller
- g[j++] = p[y * (y+1) / 2 + x];
- }
- }
- }
- return 0;
-}
-
-int bcf_gl10_indel(const bcf1_t *b, uint8_t *gl)
-{
- int k, l, j, i;
- const bcf_ginfo_t *PL;
- if (b->alt[0] == 0) return -1; // no alternate allele
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == bcf_str2int("PL", 2)) break;
- if (i == b->n_gi) return -1; // no PL
- PL = b->gi + i;
- for (i = 0; i < b->n_smpl; ++i) {
- const uint8_t *p = PL->data + i * PL->len; // the PL for the i-th individual
- uint8_t *g = gl + 10 * i;
- for (k = j = 0; k < 4; ++k) {
- for (l = k; l < 4; ++l) {
- int t, x = k, y = l;
- if (x > y) t = x, x = y, y = t; // make sure x is the smaller
- x = y * (y+1) / 2 + x;
- g[j++] = x < PL->len? p[x] : 255;
- }
- }
- }
- return 0;
-}
diff --git a/src/samtools-0.1.18/bcftools/call1.c b/src/samtools-0.1.18/bcftools/call1.c
deleted file mode 100644
index 3cc4649..0000000
--- a/src/samtools-0.1.18/bcftools/call1.c
+++ /dev/null
@@ -1,586 +0,0 @@
-#include <unistd.h>
-#include <stdlib.h>
-#include <math.h>
-#include <zlib.h>
-#include <errno.h>
-#include "bcf.h"
-#include "prob1.h"
-#include "kstring.h"
-#include "time.h"
-
-#ifdef _WIN32
-#define srand48(x) srand(x)
-#define lrand48() rand()
-#endif
-
-#include "kseq.h"
-KSTREAM_INIT(gzFile, gzread, 16384)
-
-#define VC_NO_GENO 2
-#define VC_BCFOUT 4
-#define VC_CALL 8
-#define VC_VARONLY 16
-#define VC_VCFIN 32
-#define VC_UNCOMP 64
-#define VC_KEEPALT 256
-#define VC_ACGT_ONLY 512
-#define VC_QCALL 1024
-#define VC_CALL_GT 2048
-#define VC_ADJLD 4096
-#define VC_NO_INDEL 8192
-#define VC_ANNO_MAX 16384
-#define VC_FIX_PL 32768
-#define VC_EM 0x10000
-#define VC_PAIRCALL 0x20000
-#define VC_QCNT 0x40000
-
-typedef struct {
- int flag, prior_type, n1, n_sub, *sublist, n_perm;
- uint32_t *trio_aux;
- char *prior_file, **subsam, *fn_dict;
- uint8_t *ploidy;
- double theta, pref, indel_frac, min_perm_p, min_smpl_frac, min_lrt;
- void *bed;
-} viewconf_t;
-
-void *bed_read(const char *fn);
-void bed_destroy(void *_h);
-int bed_overlap(const void *_h, const char *chr, int beg, int end);
-
-typedef struct {
- double p[4];
- int mq, depth, is_tested, d[4];
-} anno16_t;
-
-static double ttest(int n1, int n2, int a[4])
-{
- extern double kf_betai(double a, double b, double x);
- double t, v, u1, u2;
- if (n1 == 0 || n2 == 0 || n1 + n2 < 3) return 1.0;
- u1 = (double)a[0] / n1; u2 = (double)a[2] / n2;
- if (u1 <= u2) return 1.;
- t = (u1 - u2) / sqrt(((a[1] - n1 * u1 * u1) + (a[3] - n2 * u2 * u2)) / (n1 + n2 - 2) * (1./n1 + 1./n2));
- v = n1 + n2 - 2;
-// printf("%d,%d,%d,%d,%lf,%lf,%lf\n", a[0], a[1], a[2], a[3], t, u1, u2);
- return t < 0.? 1. : .5 * kf_betai(.5*v, .5, v/(v+t*t));
-}
-
-static int test16_core(int anno[16], anno16_t *a)
-{
- extern double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two);
- double left, right;
- int i;
- a->p[0] = a->p[1] = a->p[2] = a->p[3] = 1.;
- memcpy(a->d, anno, 4 * sizeof(int));
- a->depth = anno[0] + anno[1] + anno[2] + anno[3];
- a->is_tested = (anno[0] + anno[1] > 0 && anno[2] + anno[3] > 0);
- if (a->depth == 0) return -1;
- a->mq = (int)(sqrt((anno[9] + anno[11]) / a->depth) + .499);
- kt_fisher_exact(anno[0], anno[1], anno[2], anno[3], &left, &right, &a->p[0]);
- for (i = 1; i < 4; ++i)
- a->p[i] = ttest(anno[0] + anno[1], anno[2] + anno[3], anno+4*i);
- return 0;
-}
-
-static int test16(bcf1_t *b, anno16_t *a)
-{
- char *p;
- int i, anno[16];
- a->p[0] = a->p[1] = a->p[2] = a->p[3] = 1.;
- a->d[0] = a->d[1] = a->d[2] = a->d[3] = 0.;
- a->mq = a->depth = a->is_tested = 0;
- if ((p = strstr(b->info, "I16=")) == 0) return -1;
- p += 4;
- for (i = 0; i < 16; ++i) {
- errno = 0; anno[i] = strtol(p, &p, 10);
- if (anno[i] == 0 && (errno == EINVAL || errno == ERANGE)) return -2;
- ++p;
- }
- return test16_core(anno, a);
-}
-
-static void rm_info(bcf1_t *b, const char *key)
-{
- char *p, *q;
- if ((p = strstr(b->info, key)) == 0) return;
- for (q = p; *q && *q != ';'; ++q);
- if (p > b->info && *(p-1) == ';') --p;
- memmove(p, q, b->l_str - (q - b->str));
- b->l_str -= q - p;
- bcf_sync(b);
-}
-
-static int update_bcf1(bcf1_t *b, const bcf_p1aux_t *pa, const bcf_p1rst_t *pr, double pref, int flag, double em[10], int cons_llr, int64_t cons_gt)
-{
- kstring_t s;
- int has_I16, is_var;
- double fq, r;
- anno16_t a;
-
- has_I16 = test16(b, &a) >= 0? 1 : 0;
- rm_info(b, "I16="); // FIXME: probably this function has a bug. If I move it below, I16 will not be removed!
-
- memset(&s, 0, sizeof(kstring_t));
- kputc('\0', &s); kputs(b->ref, &s); kputc('\0', &s);
- kputs(b->alt, &s); kputc('\0', &s); kputc('\0', &s);
- kputs(b->info, &s);
- if (b->info[0]) kputc(';', &s);
- { // print EM
- if (em[0] >= 0) ksprintf(&s, "AF1=%.4g", 1 - em[0]);
- if (em[4] >= 0 && em[4] <= 0.05) ksprintf(&s, ";G3=%.4g,%.4g,%.4g;HWE=%.3g", em[3], em[2], em[1], em[4]);
- if (em[5] >= 0 && em[6] >= 0) ksprintf(&s, ";AF2=%.4g,%.4g", 1 - em[5], 1 - em[6]);
- if (em[7] >= 0) ksprintf(&s, ";LRT=%.3g", em[7]);
- if (em[8] >= 0) ksprintf(&s, ";LRT2=%.3g", em[8]);
- }
- if (cons_llr > 0) {
- ksprintf(&s, ";CLR=%d", cons_llr);
- if (cons_gt > 0)
- ksprintf(&s, ";UGT=%c%c%c;CGT=%c%c%c", cons_gt&0xff, cons_gt>>8&0xff, cons_gt>>16&0xff,
- cons_gt>>32&0xff, cons_gt>>40&0xff, cons_gt>>48&0xff);
- }
- if (pr == 0) { // if pr is unset, return
- kputc('\0', &s); kputs(b->fmt, &s); kputc('\0', &s);
- free(b->str);
- b->m_str = s.m; b->l_str = s.l; b->str = s.s;
- bcf_sync(b);
- return 1;
- }
-
- is_var = (pr->p_ref < pref);
- r = is_var? pr->p_ref : pr->p_var;
-
-// ksprintf(&s, ";CI95=%.4g,%.4g", pr->cil, pr->cih); // FIXME: when EM is not used, ";" should be omitted!
- ksprintf(&s, ";AC1=%d", pr->ac);
- if (has_I16) ksprintf(&s, ";DP4=%d,%d,%d,%d;MQ=%d", a.d[0], a.d[1], a.d[2], a.d[3], a.mq);
- fq = pr->p_ref_folded < 0.5? -4.343 * log(pr->p_ref_folded) : 4.343 * log(pr->p_var_folded);
- if (fq < -999) fq = -999;
- if (fq > 999) fq = 999;
- ksprintf(&s, ";FQ=%.3g", fq);
- if (pr->cmp[0] >= 0.) { // two sample groups
- int i, q[3];
- for (i = 1; i < 3; ++i) {
- double x = pr->cmp[i] + pr->cmp[0]/2.;
- q[i] = x == 0? 255 : (int)(-4.343 * log(x) + .499);
- if (q[i] > 255) q[i] = 255;
- }
- if (pr->perm_rank >= 0) ksprintf(&s, ";PR=%d", pr->perm_rank);
- // ksprintf(&s, ";LRT3=%.3g", pr->lrt);
- ksprintf(&s, ";PCHI2=%.3g;PC2=%d,%d", q[1], q[2], pr->p_chi2);
- }
- if (has_I16 && a.is_tested) ksprintf(&s, ";PV4=%.2g,%.2g,%.2g,%.2g", a.p[0], a.p[1], a.p[2], a.p[3]);
- kputc('\0', &s);
- kputs(b->fmt, &s); kputc('\0', &s);
- free(b->str);
- b->m_str = s.m; b->l_str = s.l; b->str = s.s;
- b->qual = r < 1e-100? 999 : -4.343 * log(r);
- if (b->qual > 999) b->qual = 999;
- bcf_sync(b);
- if (!is_var) bcf_shrink_alt(b, 1);
- else if (!(flag&VC_KEEPALT))
- bcf_shrink_alt(b, pr->rank0 < 2? 2 : pr->rank0+1);
- if (is_var && (flag&VC_CALL_GT)) { // call individual genotype
- int i, x, old_n_gi = b->n_gi;
- s.m = b->m_str; s.l = b->l_str - 1; s.s = b->str;
- kputs(":GT:GQ", &s); kputc('\0', &s);
- b->m_str = s.m; b->l_str = s.l; b->str = s.s;
- bcf_sync(b);
- for (i = 0; i < b->n_smpl; ++i) {
- x = bcf_p1_call_gt(pa, pr->f_exp, i);
- ((uint8_t*)b->gi[old_n_gi].data)[i] = (x&3) == 0? 1<<3|1 : (x&3) == 1? 1 : 0;
- ((uint8_t*)b->gi[old_n_gi+1].data)[i] = x>>2;
- }
- }
- return is_var;
-}
-
-static char **read_samples(const char *fn, int *_n)
-{
- gzFile fp;
- kstream_t *ks;
- kstring_t s;
- int dret, n = 0, max = 0;
- char **sam = 0;
- *_n = 0;
- s.l = s.m = 0; s.s = 0;
- fp = gzopen(fn, "r");
- if (fp == 0) return 0; // fail to open file
- ks = ks_init(fp);
- while (ks_getuntil(ks, 0, &s, &dret) >= 0) {
- int l;
- if (max == n) {
- max = max? max<<1 : 4;
- sam = realloc(sam, sizeof(void*)*max);
- }
- l = s.l;
- sam[n] = malloc(s.l + 2);
- strcpy(sam[n], s.s);
- sam[n][l+1] = 2; // by default, diploid
- if (dret != '\n') {
- if (ks_getuntil(ks, 0, &s, &dret) >= 0) { // read ploidy, 1 or 2
- int x = (int)s.s[0] - '0';
- if (x == 1 || x == 2) sam[n][l+1] = x;
- else fprintf(stderr, "(%s) ploidy can only be 1 or 2; assume diploid\n", __func__);
- }
- if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
- }
- ++n;
- }
- ks_destroy(ks);
- gzclose(fp);
- free(s.s);
- *_n = n;
- return sam;
-}
-
-static void write_header(bcf_hdr_t *h)
-{
- kstring_t str;
- str.l = h->l_txt? h->l_txt - 1 : 0;
- str.m = str.l + 1; str.s = h->txt;
- if (!strstr(str.s, "##INFO=<ID=DP,"))
- kputs("##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Raw read depth\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=DP4,"))
- kputs("##INFO=<ID=DP4,Number=4,Type=Integer,Description=\"# high-quality ref-forward bases, ref-reverse, alt-forward and alt-reverse bases\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=MQ,"))
- kputs("##INFO=<ID=MQ,Number=1,Type=Integer,Description=\"Root-mean-square mapping quality of covering reads\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=FQ,"))
- kputs("##INFO=<ID=FQ,Number=1,Type=Float,Description=\"Phred probability of all samples being the same\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=AF1,"))
- kputs("##INFO=<ID=AF1,Number=1,Type=Float,Description=\"Max-likelihood estimate of the first ALT allele frequency (assuming HWE)\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=AC1,"))
- kputs("##INFO=<ID=AC1,Number=1,Type=Float,Description=\"Max-likelihood estimate of the first ALT allele count (no HWE assumption)\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=G3,"))
- kputs("##INFO=<ID=G3,Number=3,Type=Float,Description=\"ML estimate of genotype frequencies\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=HWE,"))
- kputs("##INFO=<ID=HWE,Number=1,Type=Float,Description=\"Chi^2 based HWE test P-value based on G3\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=CLR,"))
- kputs("##INFO=<ID=CLR,Number=1,Type=Integer,Description=\"Log ratio of genotype likelihoods with and without the constraint\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=UGT,"))
- kputs("##INFO=<ID=UGT,Number=1,Type=String,Description=\"The most probable unconstrained genotype configuration in the trio\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=CGT,"))
- kputs("##INFO=<ID=CGT,Number=1,Type=String,Description=\"The most probable constrained genotype configuration in the trio\">\n", &str);
-// if (!strstr(str.s, "##INFO=<ID=CI95,"))
-// kputs("##INFO=<ID=CI95,Number=2,Type=Float,Description=\"Equal-tail Bayesian credible interval of the site allele frequency at the 95% level\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=PV4,"))
- kputs("##INFO=<ID=PV4,Number=4,Type=Float,Description=\"P-values for strand bias, baseQ bias, mapQ bias and tail distance bias\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=INDEL,"))
- kputs("##INFO=<ID=INDEL,Number=0,Type=Flag,Description=\"Indicates that the variant is an INDEL.\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=PC2,"))
- kputs("##INFO=<ID=PC2,Number=2,Type=Integer,Description=\"Phred probability of the nonRef allele frequency in group1 samples being larger (,smaller) than in group2.\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=PCHI2,"))
- kputs("##INFO=<ID=PCHI2,Number=1,Type=Float,Description=\"Posterior weighted chi^2 P-value for testing the association between group1 and group2 samples.\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=QCHI2,"))
- kputs("##INFO=<ID=QCHI2,Number=1,Type=Integer,Description=\"Phred scaled PCHI2.\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=RP,"))
- kputs("##INFO=<ID=PR,Number=1,Type=Integer,Description=\"# permutations yielding a smaller PCHI2.\">\n", &str);
- if (!strstr(str.s, "##INFO=<ID=VDB,"))
- kputs("##INFO=<ID=VDB,Number=1,Type=Float,Description=\"Variant Distance Bias\">\n", &str);
- if (!strstr(str.s, "##FORMAT=<ID=GT,"))
- kputs("##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n", &str);
- if (!strstr(str.s, "##FORMAT=<ID=GQ,"))
- kputs("##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\"Genotype Quality\">\n", &str);
- if (!strstr(str.s, "##FORMAT=<ID=GL,"))
- kputs("##FORMAT=<ID=GL,Number=3,Type=Float,Description=\"Likelihoods for RR,RA,AA genotypes (R=ref,A=alt)\">\n", &str);
- if (!strstr(str.s, "##FORMAT=<ID=DP,"))
- kputs("##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"# high-quality bases\">\n", &str);
- if (!strstr(str.s, "##FORMAT=<ID=SP,"))
- kputs("##FORMAT=<ID=SP,Number=1,Type=Integer,Description=\"Phred-scaled strand bias P-value\">\n", &str);
- if (!strstr(str.s, "##FORMAT=<ID=PL,"))
- kputs("##FORMAT=<ID=PL,Number=G,Type=Integer,Description=\"List of Phred-scaled genotype likelihoods\">\n", &str);
- h->l_txt = str.l + 1; h->txt = str.s;
-}
-
-double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]);
-
-int bcfview(int argc, char *argv[])
-{
- extern int bcf_2qcall(bcf_hdr_t *h, bcf1_t *b);
- extern void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x);
- extern int bcf_fix_gt(bcf1_t *b);
- extern int bcf_anno_max(bcf1_t *b);
- extern int bcf_shuffle(bcf1_t *b, int seed);
- extern uint32_t *bcf_trio_prep(int is_x, int is_son);
- extern int bcf_trio_call(uint32_t *prep, const bcf1_t *b, int *llr, int64_t *gt);
- extern int bcf_pair_call(const bcf1_t *b);
- extern int bcf_min_diff(const bcf1_t *b);
-
- bcf_t *bp, *bout = 0;
- bcf1_t *b, *blast;
- int c, *seeds = 0;
- uint64_t n_processed = 0, qcnt[256];
- viewconf_t vc;
- bcf_p1aux_t *p1 = 0;
- bcf_hdr_t *hin, *hout;
- int tid, begin, end;
- char moder[4], modew[4];
-
- tid = begin = end = -1;
- memset(&vc, 0, sizeof(viewconf_t));
- vc.prior_type = vc.n1 = -1; vc.theta = 1e-3; vc.pref = 0.5; vc.indel_frac = -1.; vc.n_perm = 0; vc.min_perm_p = 0.01; vc.min_smpl_frac = 0; vc.min_lrt = 1;
- memset(qcnt, 0, 8 * 256);
- while ((c = getopt(argc, argv, "FN1:l:cC:eHAGvbSuP:t:p:QgLi:IMs:D:U:X:d:T:Y")) >= 0) {
- switch (c) {
- case '1': vc.n1 = atoi(optarg); break;
- case 'l': vc.bed = bed_read(optarg); break;
- case 'D': vc.fn_dict = strdup(optarg); break;
- case 'F': vc.flag |= VC_FIX_PL; break;
- case 'N': vc.flag |= VC_ACGT_ONLY; break;
- case 'G': vc.flag |= VC_NO_GENO; break;
- case 'A': vc.flag |= VC_KEEPALT; break;
- case 'b': vc.flag |= VC_BCFOUT; break;
- case 'S': vc.flag |= VC_VCFIN; break;
- case 'c': vc.flag |= VC_CALL; break;
- case 'e': vc.flag |= VC_EM; break;
- case 'v': vc.flag |= VC_VARONLY | VC_CALL; break;
- case 'u': vc.flag |= VC_UNCOMP | VC_BCFOUT; break;
- case 'g': vc.flag |= VC_CALL_GT | VC_CALL; break;
- case 'I': vc.flag |= VC_NO_INDEL; break;
- case 'M': vc.flag |= VC_ANNO_MAX; break;
- case 'Y': vc.flag |= VC_QCNT; break;
- case 't': vc.theta = atof(optarg); break;
- case 'p': vc.pref = atof(optarg); break;
- case 'i': vc.indel_frac = atof(optarg); break;
- case 'Q': vc.flag |= VC_QCALL; break;
- case 'L': vc.flag |= VC_ADJLD; break;
- case 'U': vc.n_perm = atoi(optarg); break;
- case 'C': vc.min_lrt = atof(optarg); break;
- case 'X': vc.min_perm_p = atof(optarg); break;
- case 'd': vc.min_smpl_frac = atof(optarg); break;
- case 's': vc.subsam = read_samples(optarg, &vc.n_sub);
- vc.ploidy = calloc(vc.n_sub + 1, 1);
- for (tid = 0; tid < vc.n_sub; ++tid) vc.ploidy[tid] = vc.subsam[tid][strlen(vc.subsam[tid]) + 1];
- tid = -1;
- break;
- case 'T':
- if (strcmp(optarg, "trioauto") == 0) vc.trio_aux = bcf_trio_prep(0, 0);
- else if (strcmp(optarg, "trioxd") == 0) vc.trio_aux = bcf_trio_prep(1, 0);
- else if (strcmp(optarg, "trioxs") == 0) vc.trio_aux = bcf_trio_prep(1, 1);
- else if (strcmp(optarg, "pair") == 0) vc.flag |= VC_PAIRCALL;
- else {
- fprintf(stderr, "[%s] Option '-T' can only take value trioauto, trioxd or trioxs.\n", __func__);
- return 1;
- }
- break;
- case 'P':
- if (strcmp(optarg, "full") == 0) vc.prior_type = MC_PTYPE_FULL;
- else if (strcmp(optarg, "cond2") == 0) vc.prior_type = MC_PTYPE_COND2;
- else if (strcmp(optarg, "flat") == 0) vc.prior_type = MC_PTYPE_FLAT;
- else vc.prior_file = strdup(optarg);
- break;
- }
- }
- if (argc == optind) {
- fprintf(stderr, "\n");
- fprintf(stderr, "Usage: bcftools view [options] <in.bcf> [reg]\n\n");
- fprintf(stderr, "Input/output options:\n\n");
- fprintf(stderr, " -A keep all possible alternate alleles at variant sites\n");
- fprintf(stderr, " -b output BCF instead of VCF\n");
- fprintf(stderr, " -D FILE sequence dictionary for VCF->BCF conversion [null]\n");
- fprintf(stderr, " -F PL generated by r921 or before (which generate old ordering)\n");
- fprintf(stderr, " -G suppress all individual genotype information\n");
- fprintf(stderr, " -l FILE list of sites (chr pos) or regions (BED) to output [all sites]\n");
- fprintf(stderr, " -L calculate LD for adjacent sites\n");
- fprintf(stderr, " -N skip sites where REF is not A/C/G/T\n");
- fprintf(stderr, " -Q output the QCALL likelihood format\n");
- fprintf(stderr, " -s FILE list of samples to use [all samples]\n");
- fprintf(stderr, " -S input is VCF\n");
- fprintf(stderr, " -u uncompressed BCF output (force -b)\n");
- fprintf(stderr, "\nConsensus/variant calling options:\n\n");
- fprintf(stderr, " -c SNP calling (force -e)\n");
- fprintf(stderr, " -d FLOAT skip loci where less than FLOAT fraction of samples covered [0]\n");
- fprintf(stderr, " -e likelihood based analyses\n");
- fprintf(stderr, " -g call genotypes at variant sites (force -c)\n");
- fprintf(stderr, " -i FLOAT indel-to-substitution ratio [%.4g]\n", vc.indel_frac);
- fprintf(stderr, " -I skip indels\n");
- fprintf(stderr, " -p FLOAT variant if P(ref|D)<FLOAT [%.3g]\n", vc.pref);
- fprintf(stderr, " -P STR type of prior: full, cond2, flat [full]\n");
- fprintf(stderr, " -t FLOAT scaled substitution mutation rate [%.4g]\n", vc.theta);
- fprintf(stderr, " -T STR constrained calling; STR can be: pair, trioauto, trioxd and trioxs (see manual) [null]\n");
- fprintf(stderr, " -v output potential variant sites only (force -c)\n");
- fprintf(stderr, "\nContrast calling and association test options:\n\n");
- fprintf(stderr, " -1 INT number of group-1 samples [0]\n");
- fprintf(stderr, " -C FLOAT posterior constrast for LRT<FLOAT and P(ref|D)<0.5 [%g]\n", vc.min_lrt);
- fprintf(stderr, " -U INT number of permutations for association testing (effective with -1) [0]\n");
- fprintf(stderr, " -X FLOAT only perform permutations for P(chi^2)<FLOAT [%g]\n", vc.min_perm_p);
- fprintf(stderr, "\n");
- return 1;
- }
-
- if (vc.flag & VC_CALL) vc.flag |= VC_EM;
- if ((vc.flag & VC_VCFIN) && (vc.flag & VC_BCFOUT) && vc.fn_dict == 0) {
- fprintf(stderr, "[%s] For VCF->BCF conversion please specify the sequence dictionary with -D\n", __func__);
- return 1;
- }
- if (vc.n1 <= 0) vc.n_perm = 0; // TODO: give a warning here!
- if (vc.n_perm > 0) {
- seeds = malloc(vc.n_perm * sizeof(int));
- srand48(time(0));
- for (c = 0; c < vc.n_perm; ++c) seeds[c] = lrand48();
- }
- b = calloc(1, sizeof(bcf1_t));
- blast = calloc(1, sizeof(bcf1_t));
- strcpy(moder, "r");
- if (!(vc.flag & VC_VCFIN)) strcat(moder, "b");
- strcpy(modew, "w");
- if (vc.flag & VC_BCFOUT) strcat(modew, "b");
- if (vc.flag & VC_UNCOMP) strcat(modew, "u");
- bp = vcf_open(argv[optind], moder);
- hin = hout = vcf_hdr_read(bp);
- if (vc.fn_dict && (vc.flag & VC_VCFIN))
- vcf_dictread(bp, hin, vc.fn_dict);
- bout = vcf_open("-", modew);
- if (!(vc.flag & VC_QCALL)) {
- if (vc.n_sub) {
- vc.sublist = calloc(vc.n_sub, sizeof(int));
- hout = bcf_hdr_subsam(hin, vc.n_sub, vc.subsam, vc.sublist);
- }
- if (vc.flag & VC_CALL) write_header(hout);
- vcf_hdr_write(bout, hout);
- }
- if (vc.flag & VC_CALL) {
- p1 = bcf_p1_init(hout->n_smpl, vc.ploidy);
- if (vc.prior_file) {
- if (bcf_p1_read_prior(p1, vc.prior_file) < 0) {
- fprintf(stderr, "[%s] fail to read the prior AFS.\n", __func__);
- return 1;
- }
- } else bcf_p1_init_prior(p1, vc.prior_type, vc.theta);
- if (vc.n1 > 0 && vc.min_lrt > 0.) { // set n1
- bcf_p1_set_n1(p1, vc.n1);
- bcf_p1_init_subprior(p1, vc.prior_type, vc.theta);
- }
- if (vc.indel_frac > 0.) bcf_p1_indel_prior(p1, vc.indel_frac); // otherwise use the default indel_frac
- }
- if (optind + 1 < argc && !(vc.flag&VC_VCFIN)) {
- void *str2id = bcf_build_refhash(hout);
- if (bcf_parse_region(str2id, argv[optind+1], &tid, &begin, &end) >= 0) {
- bcf_idx_t *idx;
- idx = bcf_idx_load(argv[optind]);
- if (idx) {
- uint64_t off;
- off = bcf_idx_query(idx, tid, begin);
- if (off == 0) {
- fprintf(stderr, "[%s] no records in the query region.\n", __func__);
- return 1; // FIXME: a lot of memory leaks...
- }
- bgzf_seek(bp->fp, off, SEEK_SET);
- bcf_idx_destroy(idx);
- }
- }
- }
- while (vcf_read(bp, hin, b) > 0) {
- int is_indel, cons_llr = -1;
- int64_t cons_gt = -1;
- double em[10];
- if ((vc.flag & VC_VARONLY) && strcmp(b->alt, "X") == 0) continue;
- if ((vc.flag & VC_VARONLY) && vc.min_smpl_frac > 0.) {
- extern int bcf_smpl_covered(const bcf1_t *b);
- int n = bcf_smpl_covered(b);
- if ((double)n / b->n_smpl < vc.min_smpl_frac) continue;
- }
- if (vc.n_sub) bcf_subsam(vc.n_sub, vc.sublist, b);
- if (vc.flag & VC_FIX_PL) bcf_fix_pl(b);
- is_indel = bcf_is_indel(b);
- if ((vc.flag & VC_NO_INDEL) && is_indel) continue;
- if ((vc.flag & VC_ACGT_ONLY) && !is_indel) {
- int x;
- if (b->ref[0] == 0 || b->ref[1] != 0) continue;
- x = toupper(b->ref[0]);
- if (x != 'A' && x != 'C' && x != 'G' && x != 'T') continue;
- }
- if (vc.bed && !bed_overlap(vc.bed, hin->ns[b->tid], b->pos, b->pos + strlen(b->ref))) continue;
- if (tid >= 0) {
- int l = strlen(b->ref);
- l = b->pos + (l > 0? l : 1);
- if (b->tid != tid || b->pos >= end) break;
- if (!(l > begin && end > b->pos)) continue;
- }
- ++n_processed;
- if ((vc.flag & VC_QCNT) && !is_indel) { // summarize the difference
- int x = bcf_min_diff(b);
- if (x > 255) x = 255;
- if (x >= 0) ++qcnt[x];
- }
- if (vc.flag & VC_QCALL) { // output QCALL format; STOP here
- bcf_2qcall(hout, b);
- continue;
- }
- if (vc.trio_aux) // do trio calling
- bcf_trio_call(vc.trio_aux, b, &cons_llr, &cons_gt);
- else if (vc.flag & VC_PAIRCALL)
- cons_llr = bcf_pair_call(b);
- if (vc.flag & (VC_CALL|VC_ADJLD|VC_EM)) bcf_gl2pl(b);
- if (vc.flag & VC_EM) bcf_em1(b, vc.n1, 0x1ff, em);
- else {
- int i;
- for (i = 0; i < 9; ++i) em[i] = -1.;
- }
- if (vc.flag & VC_CALL) { // call variants
- bcf_p1rst_t pr;
- int calret = bcf_p1_cal(b, (em[7] >= 0 && em[7] < vc.min_lrt), p1, &pr);
- if (n_processed % 100000 == 0) {
- fprintf(stderr, "[%s] %ld sites processed.\n", __func__, (long)n_processed);
- bcf_p1_dump_afs(p1);
- }
- if (pr.p_ref >= vc.pref && (vc.flag & VC_VARONLY)) continue;
- if (vc.n_perm && vc.n1 > 0 && pr.p_chi2 < vc.min_perm_p) { // permutation test
- bcf_p1rst_t r;
- int i, n = 0;
- for (i = 0; i < vc.n_perm; ++i) {
-#ifdef BCF_PERM_LRT // LRT based permutation is much faster but less robust to artifacts
- double x[10];
- bcf_shuffle(b, seeds[i]);
- bcf_em1(b, vc.n1, 1<<7, x);
- if (x[7] < em[7]) ++n;
-#else
- bcf_shuffle(b, seeds[i]);
- bcf_p1_cal(b, 1, p1, &r);
- if (pr.p_chi2 >= r.p_chi2) ++n;
-#endif
- }
- pr.perm_rank = n;
- }
- if (calret >= 0) update_bcf1(b, p1, &pr, vc.pref, vc.flag, em, cons_llr, cons_gt);
- } else if (vc.flag & VC_EM) update_bcf1(b, 0, 0, 0, vc.flag, em, cons_llr, cons_gt);
- if (vc.flag & VC_ADJLD) { // compute LD
- double f[4], r2;
- if ((r2 = bcf_pair_freq(blast, b, f)) >= 0) {
- kstring_t s;
- s.m = s.l = 0; s.s = 0;
- if (*b->info) kputc(';', &s);
- ksprintf(&s, "NEIR=%.3f;NEIF4=%.3f,%.3f,%.3f,%.3f", r2, f[0], f[1], f[2], f[3]);
- bcf_append_info(b, s.s, s.l);
- free(s.s);
- }
- bcf_cpy(blast, b);
- }
- if (vc.flag & VC_ANNO_MAX) bcf_anno_max(b);
- if (vc.flag & VC_NO_GENO) { // do not output GENO fields
- b->n_gi = 0;
- b->fmt[0] = '\0';
- b->l_str = b->fmt - b->str + 1;
- } else bcf_fix_gt(b);
- vcf_write(bout, hout, b);
- }
- if (vc.prior_file) free(vc.prior_file);
- if (vc.flag & VC_CALL) bcf_p1_dump_afs(p1);
- if (hin != hout) bcf_hdr_destroy(hout);
- bcf_hdr_destroy(hin);
- bcf_destroy(b); bcf_destroy(blast);
- vcf_close(bp); vcf_close(bout);
- if (vc.fn_dict) free(vc.fn_dict);
- if (vc.ploidy) free(vc.ploidy);
- if (vc.trio_aux) free(vc.trio_aux);
- if (vc.n_sub) {
- int i;
- for (i = 0; i < vc.n_sub; ++i) free(vc.subsam[i]);
- free(vc.subsam); free(vc.sublist);
- }
- if (vc.bed) bed_destroy(vc.bed);
- if (vc.flag & VC_QCNT)
- for (c = 0; c < 256; ++c)
- fprintf(stderr, "QT\t%d\t%lld\n", c, (long long)qcnt[c]);
- if (seeds) free(seeds);
- if (p1) bcf_p1_destroy(p1);
- return 0;
-}
diff --git a/src/samtools-0.1.18/bcftools/em.c b/src/samtools-0.1.18/bcftools/em.c
deleted file mode 100644
index b7dfe1a..0000000
--- a/src/samtools-0.1.18/bcftools/em.c
+++ /dev/null
@@ -1,310 +0,0 @@
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include "bcf.h"
-#include "kmin.h"
-
-static double g_q2p[256];
-
-#define ITER_MAX 50
-#define ITER_TRY 10
-#define EPS 1e-5
-
-extern double kf_gammaq(double, double);
-
-/*
- Generic routines
- */
-// get the 3 genotype likelihoods
-static double *get_pdg3(const bcf1_t *b)
-{
- double *pdg;
- const uint8_t *PL = 0;
- int i, PL_len = 0;
- // initialize g_q2p if necessary
- if (g_q2p[0] == 0.)
- for (i = 0; i < 256; ++i)
- g_q2p[i] = pow(10., -i / 10.);
- // set PL and PL_len
- for (i = 0; i < b->n_gi; ++i) {
- if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
- PL = (const uint8_t*)b->gi[i].data;
- PL_len = b->gi[i].len;
- break;
- }
- }
- if (i == b->n_gi) return 0; // no PL
- // fill pdg
- pdg = malloc(3 * b->n_smpl * sizeof(double));
- for (i = 0; i < b->n_smpl; ++i) {
- const uint8_t *pi = PL + i * PL_len;
- double *p = pdg + i * 3;
- p[0] = g_q2p[pi[2]]; p[1] = g_q2p[pi[1]]; p[2] = g_q2p[pi[0]];
- }
- return pdg;
-}
-
-// estimate site allele frequency in a very naive and inaccurate way
-static double est_freq(int n, const double *pdg)
-{
- int i, gcnt[3], tmp1;
- // get a rough estimate of the genotype frequency
- gcnt[0] = gcnt[1] = gcnt[2] = 0;
- for (i = 0; i < n; ++i) {
- const double *p = pdg + i * 3;
- if (p[0] != 1. || p[1] != 1. || p[2] != 1.) {
- int which = p[0] > p[1]? 0 : 1;
- which = p[which] > p[2]? which : 2;
- ++gcnt[which];
- }
- }
- tmp1 = gcnt[0] + gcnt[1] + gcnt[2];
- return (tmp1 == 0)? -1.0 : (.5 * gcnt[1] + gcnt[2]) / tmp1;
-}
-
-/*
- Single-locus EM
- */
-
-typedef struct {
- int beg, end;
- const double *pdg;
-} minaux1_t;
-
-static double prob1(double f, void *data)
-{
- minaux1_t *a = (minaux1_t*)data;
- double p = 1., l = 0., f3[3];
- int i;
-// printf("brent %lg\n", f);
- if (f < 0 || f > 1) return 1e300;
- f3[0] = (1.-f)*(1.-f); f3[1] = 2.*f*(1.-f); f3[2] = f*f;
- for (i = a->beg; i < a->end; ++i) {
- const double *pdg = a->pdg + i * 3;
- p *= pdg[0] * f3[0] + pdg[1] * f3[1] + pdg[2] * f3[2];
- if (p < 1e-200) l -= log(p), p = 1.;
- }
- return l - log(p);
-}
-
-// one EM iteration for allele frequency estimate
-static double freq_iter(double *f, const double *_pdg, int beg, int end)
-{
- double f0 = *f, f3[3], err;
- int i;
-// printf("em %lg\n", *f);
- f3[0] = (1.-f0)*(1.-f0); f3[1] = 2.*f0*(1.-f0); f3[2] = f0*f0;
- for (i = beg, f0 = 0.; i < end; ++i) {
- const double *pdg = _pdg + i * 3;
- f0 += (pdg[1] * f3[1] + 2. * pdg[2] * f3[2])
- / (pdg[0] * f3[0] + pdg[1] * f3[1] + pdg[2] * f3[2]);
- }
- f0 /= (end - beg) * 2;
- err = fabs(f0 - *f);
- *f = f0;
- return err;
-}
-
-/* The following function combines EM and Brent's method. When the signal from
- * the data is strong, EM is faster but sometimes, EM may converge very slowly.
- * When this happens, we switch to Brent's method. The idea is learned from
- * Rasmus Nielsen.
- */
-static double freqml(double f0, int beg, int end, const double *pdg)
-{
- int i;
- double f;
- for (i = 0, f = f0; i < ITER_TRY; ++i)
- if (freq_iter(&f, pdg, beg, end) < EPS) break;
- if (i == ITER_TRY) { // haven't converged yet; try Brent's method
- minaux1_t a;
- a.beg = beg; a.end = end; a.pdg = pdg;
- kmin_brent(prob1, f0 == f? .5*f0 : f0, f, (void*)&a, EPS, &f);
- }
- return f;
-}
-
-// one EM iteration for genotype frequency estimate
-static double g3_iter(double g[3], const double *_pdg, int beg, int end)
-{
- double err, gg[3];
- int i;
- gg[0] = gg[1] = gg[2] = 0.;
-// printf("%lg,%lg,%lg\n", g[0], g[1], g[2]);
- for (i = beg; i < end; ++i) {
- double sum, tmp[3];
- const double *pdg = _pdg + i * 3;
- tmp[0] = pdg[0] * g[0]; tmp[1] = pdg[1] * g[1]; tmp[2] = pdg[2] * g[2];
- sum = (tmp[0] + tmp[1] + tmp[2]) * (end - beg);
- gg[0] += tmp[0] / sum; gg[1] += tmp[1] / sum; gg[2] += tmp[2] / sum;
- }
- err = fabs(gg[0] - g[0]) > fabs(gg[1] - g[1])? fabs(gg[0] - g[0]) : fabs(gg[1] - g[1]);
- err = err > fabs(gg[2] - g[2])? err : fabs(gg[2] - g[2]);
- g[0] = gg[0]; g[1] = gg[1]; g[2] = gg[2];
- return err;
-}
-
-// perform likelihood ratio test
-static double lk_ratio_test(int n, int n1, const double *pdg, double f3[3][3])
-{
- double r;
- int i;
- for (i = 0, r = 1.; i < n1; ++i) {
- const double *p = pdg + i * 3;
- r *= (p[0] * f3[1][0] + p[1] * f3[1][1] + p[2] * f3[1][2])
- / (p[0] * f3[0][0] + p[1] * f3[0][1] + p[2] * f3[0][2]);
- }
- for (; i < n; ++i) {
- const double *p = pdg + i * 3;
- r *= (p[0] * f3[2][0] + p[1] * f3[2][1] + p[2] * f3[2][2])
- / (p[0] * f3[0][0] + p[1] * f3[0][1] + p[2] * f3[0][2]);
- }
- return r;
-}
-
-// x[0]: ref frequency
-// x[1..3]: alt-alt, alt-ref, ref-ref frequenc
-// x[4]: HWE P-value
-// x[5..6]: group1 freq, group2 freq
-// x[7]: 1-degree P-value
-// x[8]: 2-degree P-value
-int bcf_em1(const bcf1_t *b, int n1, int flag, double x[10])
-{
- double *pdg;
- int i, n, n2;
- if (b->n_alleles < 2) return -1; // one allele only
- // initialization
- if (n1 < 0 || n1 > b->n_smpl) n1 = 0;
- if (flag & 1<<7) flag |= 7<<5; // compute group freq if LRT is required
- if (flag & 0xf<<1) flag |= 0xf<<1;
- n = b->n_smpl; n2 = n - n1;
- pdg = get_pdg3(b);
- if (pdg == 0) return -1;
- for (i = 0; i < 10; ++i) x[i] = -1.; // set to negative
- {
- if ((x[0] = est_freq(n, pdg)) < 0.) {
- free(pdg);
- return -1; // no data
- }
- x[0] = freqml(x[0], 0, n, pdg);
- }
- if (flag & (0xf<<1|3<<8)) { // estimate the genotype frequency and test HWE
- double *g = x + 1, f3[3], r;
- f3[0] = g[0] = (1 - x[0]) * (1 - x[0]);
- f3[1] = g[1] = 2 * x[0] * (1 - x[0]);
- f3[2] = g[2] = x[0] * x[0];
- for (i = 0; i < ITER_MAX; ++i)
- if (g3_iter(g, pdg, 0, n) < EPS) break;
- // Hardy-Weinberg equilibrium (HWE)
- for (i = 0, r = 1.; i < n; ++i) {
- double *p = pdg + i * 3;
- r *= (p[0] * g[0] + p[1] * g[1] + p[2] * g[2]) / (p[0] * f3[0] + p[1] * f3[1] + p[2] * f3[2]);
- }
- x[4] = kf_gammaq(.5, log(r));
- }
- if ((flag & 7<<5) && n1 > 0 && n1 < n) { // group frequency
- x[5] = freqml(x[0], 0, n1, pdg);
- x[6] = freqml(x[0], n1, n, pdg);
- }
- if ((flag & 1<<7) && n1 > 0 && n1 < n) { // 1-degree P-value
- double f[3], f3[3][3], tmp;
- f[0] = x[0]; f[1] = x[5]; f[2] = x[6];
- for (i = 0; i < 3; ++i)
- f3[i][0] = (1-f[i])*(1-f[i]), f3[i][1] = 2*f[i]*(1-f[i]), f3[i][2] = f[i]*f[i];
- tmp = log(lk_ratio_test(n, n1, pdg, f3));
- if (tmp < 0) tmp = 0;
- x[7] = kf_gammaq(.5, tmp);
- }
- if ((flag & 3<<8) && n1 > 0 && n1 < n) { // 2-degree P-value
- double g[3][3], tmp;
- for (i = 0; i < 3; ++i) memcpy(g[i], x + 1, 3 * sizeof(double));
- for (i = 0; i < ITER_MAX; ++i)
- if (g3_iter(g[1], pdg, 0, n1) < EPS) break;
- for (i = 0; i < ITER_MAX; ++i)
- if (g3_iter(g[2], pdg, n1, n) < EPS) break;
- tmp = log(lk_ratio_test(n, n1, pdg, g));
- if (tmp < 0) tmp = 0;
- x[8] = kf_gammaq(1., tmp);
- }
- // free
- free(pdg);
- return 0;
-}
-
-/*
- Two-locus EM (LD)
- */
-
-#define _G1(h, k) ((h>>1&1) + (k>>1&1))
-#define _G2(h, k) ((h&1) + (k&1))
-
-// 0: the previous site; 1: the current site
-static int pair_freq_iter(int n, double *pdg[2], double f[4])
-{
- double ff[4];
- int i, k, h;
-// printf("%lf,%lf,%lf,%lf\n", f[0], f[1], f[2], f[3]);
- memset(ff, 0, 4 * sizeof(double));
- for (i = 0; i < n; ++i) {
- double *p[2], sum, tmp;
- p[0] = pdg[0] + i * 3; p[1] = pdg[1] + i * 3;
- for (k = 0, sum = 0.; k < 4; ++k)
- for (h = 0; h < 4; ++h)
- sum += f[k] * f[h] * p[0][_G1(k,h)] * p[1][_G2(k,h)];
- for (k = 0; k < 4; ++k) {
- tmp = f[0] * (p[0][_G1(0,k)] * p[1][_G2(0,k)] + p[0][_G1(k,0)] * p[1][_G2(k,0)])
- + f[1] * (p[0][_G1(1,k)] * p[1][_G2(1,k)] + p[0][_G1(k,1)] * p[1][_G2(k,1)])
- + f[2] * (p[0][_G1(2,k)] * p[1][_G2(2,k)] + p[0][_G1(k,2)] * p[1][_G2(k,2)])
- + f[3] * (p[0][_G1(3,k)] * p[1][_G2(3,k)] + p[0][_G1(k,3)] * p[1][_G2(k,3)]);
- ff[k] += f[k] * tmp / sum;
- }
- }
- for (k = 0; k < 4; ++k) f[k] = ff[k] / (2 * n);
- return 0;
-}
-
-double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4])
-{
- const bcf1_t *b[2];
- int i, j, n_smpl;
- double *pdg[2], flast[4], r, f0[2];
- // initialize others
- if (b0->n_smpl != b1->n_smpl) return -1; // different number of samples
- n_smpl = b0->n_smpl;
- b[0] = b0; b[1] = b1;
- f[0] = f[1] = f[2] = f[3] = -1.;
- if (b[0]->n_alleles < 2 || b[1]->n_alleles < 2) return -1; // one allele only
- pdg[0] = get_pdg3(b0); pdg[1] = get_pdg3(b1);
- if (pdg[0] == 0 || pdg[1] == 0) {
- free(pdg[0]); free(pdg[1]);
- return -1;
- }
- // set the initial value
- f0[0] = est_freq(n_smpl, pdg[0]);
- f0[1] = est_freq(n_smpl, pdg[1]);
- f[0] = (1 - f0[0]) * (1 - f0[1]); f[3] = f0[0] * f0[1];
- f[1] = (1 - f0[0]) * f0[1]; f[2] = f0[0] * (1 - f0[1]);
- // iteration
- for (j = 0; j < ITER_MAX; ++j) {
- double eps = 0;
- memcpy(flast, f, 4 * sizeof(double));
- pair_freq_iter(n_smpl, pdg, f);
- for (i = 0; i < 4; ++i) {
- double x = fabs(f[i] - flast[i]);
- if (x > eps) eps = x;
- }
- if (eps < EPS) break;
- }
- // free
- free(pdg[0]); free(pdg[1]);
- { // calculate r^2
- double p[2], q[2], D;
- p[0] = f[0] + f[1]; q[0] = 1 - p[0];
- p[1] = f[0] + f[2]; q[1] = 1 - p[1];
- D = f[0] * f[3] - f[1] * f[2];
- r = sqrt(D * D / (p[0] * p[1] * q[0] * q[1]));
-// printf("R(%lf,%lf,%lf,%lf)=%lf\n", f[0], f[1], f[2], f[3], r);
- if (isnan(r)) r = -1.;
- }
- return r;
-}
diff --git a/src/samtools-0.1.18/bcftools/fet.c b/src/samtools-0.1.18/bcftools/fet.c
deleted file mode 100644
index 5812517..0000000
--- a/src/samtools-0.1.18/bcftools/fet.c
+++ /dev/null
@@ -1,112 +0,0 @@
-#include <math.h>
-#include <stdlib.h>
-
-/* This program is implemented with ideas from this web page:
- *
- * http://www.langsrud.com/fisher.htm
- */
-
-// log\binom{n}{k}
-static double lbinom(int n, int k)
-{
- if (k == 0 || n == k) return 0;
- return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1);
-}
-
-// n11 n12 | n1_
-// n21 n22 | n2_
-//-----------+----
-// n_1 n_2 | n
-
-// hypergeometric distribution
-static double hypergeo(int n11, int n1_, int n_1, int n)
-{
- return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1));
-}
-
-typedef struct {
- int n11, n1_, n_1, n;
- double p;
-} hgacc_t;
-
-// incremental version of hypergenometric distribution
-static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux)
-{
- if (n1_ || n_1 || n) {
- aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n;
- } else { // then only n11 changed; the rest fixed
- if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) {
- if (n11 == aux->n11 + 1) { // incremental
- aux->p *= (double)(aux->n1_ - aux->n11) / n11
- * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1);
- aux->n11 = n11;
- return aux->p;
- }
- if (n11 == aux->n11 - 1) { // incremental
- aux->p *= (double)aux->n11 / (aux->n1_ - n11)
- * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11);
- aux->n11 = n11;
- return aux->p;
- }
- }
- aux->n11 = n11;
- }
- aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n);
- return aux->p;
-}
-
-double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two)
-{
- int i, j, max, min;
- double p, q, left, right;
- hgacc_t aux;
- int n1_, n_1, n;
-
- n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n
- max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail
- min = n1_ + n_1 - n;
- if (min < 0) min = 0; // min n11, for left tail
- *two = *_left = *_right = 1.;
- if (min == max) return 1.; // no need to do test
- q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table
- // left tail
- p = hypergeo_acc(min, 0, 0, 0, &aux);
- for (left = 0., i = min + 1; p < 0.99999999 * q; ++i) // loop until underflow
- left += p, p = hypergeo_acc(i, 0, 0, 0, &aux);
- --i;
- if (p < 1.00000001 * q) left += p;
- else --i;
- // right tail
- p = hypergeo_acc(max, 0, 0, 0, &aux);
- for (right = 0., j = max - 1; p < 0.99999999 * q; --j) // loop until underflow
- right += p, p = hypergeo_acc(j, 0, 0, 0, &aux);
- ++j;
- if (p < 1.00000001 * q) right += p;
- else ++j;
- // two-tail
- *two = left + right;
- if (*two > 1.) *two = 1.;
- // adjust left and right
- if (abs(i - n11) < abs(j - n11)) right = 1. - left + q;
- else left = 1.0 - right + q;
- *_left = left; *_right = right;
- return q;
-}
-
-#ifdef FET_MAIN
-#include <stdio.h>
-
-int main(int argc, char *argv[])
-{
- char id[1024];
- int n11, n12, n21, n22;
- double left, right, twotail, prob;
-
- while (scanf("%s%d%d%d%d", id, &n11, &n12, &n21, &n22) == 5) {
- prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &twotail);
- printf("%s\t%d\t%d\t%d\t%d\t%.6g\t%.6g\t%.6g\t%.6g\n", id, n11, n12, n21, n22,
- prob, left, right, twotail);
- }
- return 0;
-}
-#endif
diff --git a/src/samtools-0.1.18/bcftools/index.c b/src/samtools-0.1.18/bcftools/index.c
deleted file mode 100644
index 014856d..0000000
--- a/src/samtools-0.1.18/bcftools/index.c
+++ /dev/null
@@ -1,335 +0,0 @@
-#include <assert.h>
-#include <ctype.h>
-#include <sys/stat.h>
-#include "bam_endian.h"
-#include "kstring.h"
-#include "bcf.h"
-#ifdef _USE_KNETFILE
-#include "knetfile.h"
-#endif
-
-#define TAD_LIDX_SHIFT 13
-
-typedef struct {
- int32_t n, m;
- uint64_t *offset;
-} bcf_lidx_t;
-
-struct __bcf_idx_t {
- int32_t n;
- bcf_lidx_t *index2;
-};
-
-/************
- * indexing *
- ************/
-
-static inline void insert_offset2(bcf_lidx_t *index2, int _beg, int _end, uint64_t offset)
-{
- int i, beg, end;
- beg = _beg >> TAD_LIDX_SHIFT;
- end = (_end - 1) >> TAD_LIDX_SHIFT;
- if (index2->m < end + 1) {
- int old_m = index2->m;
- index2->m = end + 1;
- kroundup32(index2->m);
- index2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8);
- memset(index2->offset + old_m, 0, 8 * (index2->m - old_m));
- }
- if (beg == end) {
- if (index2->offset[beg] == 0) index2->offset[beg] = offset;
- } else {
- for (i = beg; i <= end; ++i)
- if (index2->offset[i] == 0) index2->offset[i] = offset;
- }
- if (index2->n < end + 1) index2->n = end + 1;
-}
-
-bcf_idx_t *bcf_idx_core(bcf_t *bp, bcf_hdr_t *h)
-{
- bcf_idx_t *idx;
- int32_t last_coor, last_tid;
- uint64_t last_off;
- kstring_t *str;
- BGZF *fp = bp->fp;
- bcf1_t *b;
- int ret;
-
- b = calloc(1, sizeof(bcf1_t));
- str = calloc(1, sizeof(kstring_t));
- idx = (bcf_idx_t*)calloc(1, sizeof(bcf_idx_t));
- idx->n = h->n_ref;
- idx->index2 = calloc(h->n_ref, sizeof(bcf_lidx_t));
-
- last_tid = 0xffffffffu;
- last_off = bgzf_tell(fp); last_coor = 0xffffffffu;
- while ((ret = bcf_read(bp, h, b)) > 0) {
- int end, tmp;
- if (last_tid != b->tid) { // change of chromosomes
- last_tid = b->tid;
- } else if (last_coor > b->pos) {
- fprintf(stderr, "[bcf_idx_core] the input is out of order\n");
- free(str->s); free(str); free(idx); bcf_destroy(b);
- return 0;
- }
- tmp = strlen(b->ref);
- end = b->pos + (tmp > 0? tmp : 1);
- insert_offset2(&idx->index2[b->tid], b->pos, end, last_off);
- last_off = bgzf_tell(fp);
- last_coor = b->pos;
- }
- free(str->s); free(str); bcf_destroy(b);
- return idx;
-}
-
-void bcf_idx_destroy(bcf_idx_t *idx)
-{
- int i;
- if (idx == 0) return;
- for (i = 0; i < idx->n; ++i) free(idx->index2[i].offset);
- free(idx->index2);
- free(idx);
-}
-
-/******************
- * index file I/O *
- ******************/
-
-void bcf_idx_save(const bcf_idx_t *idx, BGZF *fp)
-{
- int32_t i, ti_is_be;
- ti_is_be = bam_is_big_endian();
- bgzf_write(fp, "BCI\4", 4);
- if (ti_is_be) {
- uint32_t x = idx->n;
- bgzf_write(fp, bam_swap_endian_4p(&x), 4);
- } else bgzf_write(fp, &idx->n, 4);
- for (i = 0; i < idx->n; ++i) {
- bcf_lidx_t *index2 = idx->index2 + i;
- // write linear index (index2)
- if (ti_is_be) {
- int x = index2->n;
- bgzf_write(fp, bam_swap_endian_4p(&x), 4);
- } else bgzf_write(fp, &index2->n, 4);
- if (ti_is_be) { // big endian
- int x;
- for (x = 0; (int)x < index2->n; ++x)
- bam_swap_endian_8p(&index2->offset[x]);
- bgzf_write(fp, index2->offset, 8 * index2->n);
- for (x = 0; (int)x < index2->n; ++x)
- bam_swap_endian_8p(&index2->offset[x]);
- } else bgzf_write(fp, index2->offset, 8 * index2->n);
- }
-}
-
-static bcf_idx_t *bcf_idx_load_core(BGZF *fp)
-{
- int i, ti_is_be;
- char magic[4];
- bcf_idx_t *idx;
- ti_is_be = bam_is_big_endian();
- if (fp == 0) {
- fprintf(stderr, "[%s] fail to load index.\n", __func__);
- return 0;
- }
- bgzf_read(fp, magic, 4);
- if (strncmp(magic, "BCI\4", 4)) {
- fprintf(stderr, "[%s] wrong magic number.\n", __func__);
- return 0;
- }
- idx = (bcf_idx_t*)calloc(1, sizeof(bcf_idx_t));
- bgzf_read(fp, &idx->n, 4);
- if (ti_is_be) bam_swap_endian_4p(&idx->n);
- idx->index2 = (bcf_lidx_t*)calloc(idx->n, sizeof(bcf_lidx_t));
- for (i = 0; i < idx->n; ++i) {
- bcf_lidx_t *index2 = idx->index2 + i;
- int j;
- bgzf_read(fp, &index2->n, 4);
- if (ti_is_be) bam_swap_endian_4p(&index2->n);
- index2->m = index2->n;
- index2->offset = (uint64_t*)calloc(index2->m, 8);
- bgzf_read(fp, index2->offset, index2->n * 8);
- if (ti_is_be)
- for (j = 0; j < index2->n; ++j) bam_swap_endian_8p(&index2->offset[j]);
- }
- return idx;
-}
-
-bcf_idx_t *bcf_idx_load_local(const char *fnidx)
-{
- BGZF *fp;
- fp = bgzf_open(fnidx, "r");
- if (fp) {
- bcf_idx_t *idx = bcf_idx_load_core(fp);
- bgzf_close(fp);
- return idx;
- } else return 0;
-}
-
-#ifdef _USE_KNETFILE
-static void download_from_remote(const char *url)
-{
- const int buf_size = 1 * 1024 * 1024;
- char *fn;
- FILE *fp;
- uint8_t *buf;
- knetFile *fp_remote;
- int l;
- if (strstr(url, "ftp://") != url && strstr(url, "http://") != url) return;
- l = strlen(url);
- for (fn = (char*)url + l - 1; fn >= url; --fn)
- if (*fn == '/') break;
- ++fn; // fn now points to the file name
- fp_remote = knet_open(url, "r");
- if (fp_remote == 0) {
- fprintf(stderr, "[download_from_remote] fail to open remote file.\n");
- return;
- }
- if ((fp = fopen(fn, "w")) == 0) {
- fprintf(stderr, "[download_from_remote] fail to create file in the working directory.\n");
- knet_close(fp_remote);
- return;
- }
- buf = (uint8_t*)calloc(buf_size, 1);
- while ((l = knet_read(fp_remote, buf, buf_size)) != 0)
- fwrite(buf, 1, l, fp);
- free(buf);
- fclose(fp);
- knet_close(fp_remote);
-}
-#else
-static void download_from_remote(const char *url)
-{
- return;
-}
-#endif
-
-static char *get_local_version(const char *fn)
-{
- struct stat sbuf;
- char *fnidx = (char*)calloc(strlen(fn) + 5, 1);
- strcat(strcpy(fnidx, fn), ".bci");
- if ((strstr(fnidx, "ftp://") == fnidx || strstr(fnidx, "http://") == fnidx)) {
- char *p, *url;
- int l = strlen(fnidx);
- for (p = fnidx + l - 1; p >= fnidx; --p)
- if (*p == '/') break;
- url = fnidx; fnidx = strdup(p + 1);
- if (stat(fnidx, &sbuf) == 0) {
- free(url);
- return fnidx;
- }
- fprintf(stderr, "[%s] downloading the index file...\n", __func__);
- download_from_remote(url);
- free(url);
- }
- if (stat(fnidx, &sbuf) == 0) return fnidx;
- free(fnidx); return 0;
-}
-
-bcf_idx_t *bcf_idx_load(const char *fn)
-{
- bcf_idx_t *idx;
- char *fname = get_local_version(fn);
- if (fname == 0) return 0;
- idx = bcf_idx_load_local(fname);
- free(fname);
- return idx;
-}
-
-int bcf_idx_build2(const char *fn, const char *_fnidx)
-{
- char *fnidx;
- BGZF *fpidx;
- bcf_t *bp;
- bcf_idx_t *idx;
- bcf_hdr_t *h;
- if ((bp = bcf_open(fn, "r")) == 0) {
- fprintf(stderr, "[bcf_idx_build2] fail to open the BAM file.\n");
- return -1;
- }
- h = bcf_hdr_read(bp);
- idx = bcf_idx_core(bp, h);
- bcf_close(bp);
- if (_fnidx == 0) {
- fnidx = (char*)calloc(strlen(fn) + 5, 1);
- strcpy(fnidx, fn); strcat(fnidx, ".bci");
- } else fnidx = strdup(_fnidx);
- fpidx = bgzf_open(fnidx, "w");
- if (fpidx == 0) {
- fprintf(stderr, "[bcf_idx_build2] fail to create the index file.\n");
- free(fnidx);
- return -1;
- }
- bcf_idx_save(idx, fpidx);
- bcf_idx_destroy(idx);
- bgzf_close(fpidx);
- free(fnidx);
- return 0;
-}
-
-int bcf_idx_build(const char *fn)
-{
- return bcf_idx_build2(fn, 0);
-}
-
-/********************************************
- * parse a region in the format chr:beg-end *
- ********************************************/
-
-int bcf_parse_region(void *str2id, const char *str, int *tid, int *begin, int *end)
-{
- char *s, *p;
- int i, l, k;
- l = strlen(str);
- p = s = (char*)malloc(l+1);
- /* squeeze out "," */
- for (i = k = 0; i != l; ++i)
- if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i];
- s[k] = 0;
- for (i = 0; i != k; ++i) if (s[i] == ':') break;
- s[i] = 0;
- if ((*tid = bcf_str2id(str2id, s)) < 0) {
- free(s);
- return -1;
- }
- if (i == k) { /* dump the whole sequence */
- *begin = 0; *end = 1<<29; free(s);
- return 0;
- }
- for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break;
- *begin = atoi(p);
- if (i < k) {
- p = s + i + 1;
- *end = atoi(p);
- } else *end = 1<<29;
- if (*begin > 0) --*begin;
- free(s);
- if (*begin > *end) return -1;
- return 0;
-}
-
-/*******************************
- * retrieve a specified region *
- *******************************/
-
-uint64_t bcf_idx_query(const bcf_idx_t *idx, int tid, int beg)
-{
- uint64_t min_off, *offset;
- int i;
- if (beg < 0) beg = 0;
- offset = idx->index2[tid].offset;
- for (i = beg>>TAD_LIDX_SHIFT; i < idx->index2[tid].n && offset[i] == 0; ++i);
- min_off = (i == idx->index2[tid].n)? offset[idx->index2[tid].n-1] : offset[i];
- return min_off;
-}
-
-int bcf_main_index(int argc, char *argv[])
-{
- if (argc == 1) {
- fprintf(stderr, "Usage: bcftools index <in.bcf>\n");
- return 1;
- }
- bcf_idx_build(argv[1]);
- return 0;
-}
diff --git a/src/samtools-0.1.18/bcftools/kfunc.c b/src/samtools-0.1.18/bcftools/kfunc.c
deleted file mode 100644
index a637b6c..0000000
--- a/src/samtools-0.1.18/bcftools/kfunc.c
+++ /dev/null
@@ -1,162 +0,0 @@
-#include <math.h>
-
-
-/* Log gamma function
- * \log{\Gamma(z)}
- * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245
- */
-double kf_lgamma(double z)
-{
- double x = 0;
- x += 0.1659470187408462e-06 / (z+7);
- x += 0.9934937113930748e-05 / (z+6);
- x -= 0.1385710331296526 / (z+5);
- x += 12.50734324009056 / (z+4);
- x -= 176.6150291498386 / (z+3);
- x += 771.3234287757674 / (z+2);
- x -= 1259.139216722289 / (z+1);
- x += 676.5203681218835 / z;
- x += 0.9999999999995183;
- return log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5);
-}
-
-/* complementary error function
- * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt
- * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66
- */
-double kf_erfc(double x)
-{
- const double p0 = 220.2068679123761;
- const double p1 = 221.2135961699311;
- const double p2 = 112.0792914978709;
- const double p3 = 33.912866078383;
- const double p4 = 6.37396220353165;
- const double p5 = .7003830644436881;
- const double p6 = .03526249659989109;
- const double q0 = 440.4137358247522;
- const double q1 = 793.8265125199484;
- const double q2 = 637.3336333788311;
- const double q3 = 296.5642487796737;
- const double q4 = 86.78073220294608;
- const double q5 = 16.06417757920695;
- const double q6 = 1.755667163182642;
- const double q7 = .08838834764831844;
- double expntl, z, p;
- z = fabs(x) * M_SQRT2;
- if (z > 37.) return x > 0.? 0. : 2.;
- expntl = exp(z * z * - .5);
- if (z < 10. / M_SQRT2) // for small z
- p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0)
- / (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0);
- else p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65)))));
- return x > 0.? 2. * p : 2. * (1. - p);
-}
-
-/* The following computes regularized incomplete gamma functions.
- * Formulas are taken from Wiki, with additional input from Numerical
- * Recipes in C (for modified Lentz's algorithm) and AS245
- * (http://lib.stat.cmu.edu/apstat/245).
- *
- * A good online calculator is available at:
- *
- * http://www.danielsoper.com/statcalc/calc23.aspx
- *
- * It calculates upper incomplete gamma function, which equals
- * kf_gammaq(s,z)*tgamma(s).
- */
-
-#define KF_GAMMA_EPS 1e-14
-#define KF_TINY 1e-290
-
-// regularized lower incomplete gamma function, by series expansion
-static double _kf_gammap(double s, double z)
-{
- double sum, x;
- int k;
- for (k = 1, sum = x = 1.; k < 100; ++k) {
- sum += (x *= z / (s + k));
- if (x / sum < KF_GAMMA_EPS) break;
- }
- return exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum));
-}
-// regularized upper incomplete gamma function, by continued fraction
-static double _kf_gammaq(double s, double z)
-{
- int j;
- double C, D, f;
- f = 1. + z - s; C = f; D = 0.;
- // Modified Lentz's algorithm for computing continued fraction
- // See Numerical Recipes in C, 2nd edition, section 5.2
- for (j = 1; j < 100; ++j) {
- double a = j * (s - j), b = (j<<1) + 1 + z - s, d;
- D = b + a * D;
- if (D < KF_TINY) D = KF_TINY;
- C = b + a / C;
- if (C < KF_TINY) C = KF_TINY;
- D = 1. / D;
- d = C * D;
- f *= d;
- if (fabs(d - 1.) < KF_GAMMA_EPS) break;
- }
- return exp(s * log(z) - z - kf_lgamma(s) - log(f));
-}
-
-double kf_gammap(double s, double z)
-{
- return z <= 1. || z < s? _kf_gammap(s, z) : 1. - _kf_gammaq(s, z);
-}
-
-double kf_gammaq(double s, double z)
-{
- return z <= 1. || z < s? 1. - _kf_gammap(s, z) : _kf_gammaq(s, z);
-}
-
-/* Regularized incomplete beta function. The method is taken from
- * Numerical Recipe in C, 2nd edition, section 6.4. The following web
- * page calculates the incomplete beta function, which equals
- * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b):
- *
- * http://www.danielsoper.com/statcalc/calc36.aspx
- */
-static double kf_betai_aux(double a, double b, double x)
-{
- double C, D, f;
- int j;
- if (x == 0.) return 0.;
- if (x == 1.) return 1.;
- f = 1.; C = f; D = 0.;
- // Modified Lentz's algorithm for computing continued fraction
- for (j = 1; j < 200; ++j) {
- double aa, d;
- int m = j>>1;
- aa = (j&1)? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1))
- : m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m));
- D = 1. + aa * D;
- if (D < KF_TINY) D = KF_TINY;
- C = 1. + aa / C;
- if (C < KF_TINY) C = KF_TINY;
- D = 1. / D;
- d = C * D;
- f *= d;
- if (fabs(d - 1.) < KF_GAMMA_EPS) break;
- }
- return exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f;
-}
-double kf_betai(double a, double b, double x)
-{
- return x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x);
-}
-
-#ifdef KF_MAIN
-#include <stdio.h>
-int main(int argc, char *argv[])
-{
- double x = 5.5, y = 3;
- double a, b;
- printf("erfc(%lg): %lg, %lg\n", x, erfc(x), kf_erfc(x));
- printf("upper-gamma(%lg,%lg): %lg\n", x, y, kf_gammaq(y, x)*tgamma(y));
- a = 2; b = 2; x = 0.5;
- printf("incomplete-beta(%lg,%lg,%lg): %lg\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b)));
- return 0;
-}
-#endif
diff --git a/src/samtools-0.1.18/bcftools/kmin.c b/src/samtools-0.1.18/bcftools/kmin.c
deleted file mode 100644
index 5b8193b..0000000
--- a/src/samtools-0.1.18/bcftools/kmin.c
+++ /dev/null
@@ -1,209 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008, 2010 by Attractive Chaos <attractor at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/* Hooke-Jeeves algorithm for nonlinear minimization
-
- Based on the pseudocodes by Bell and Pike (CACM 9(9):684-685), and
- the revision by Tomlin and Smith (CACM 12(11):637-638). Both of the
- papers are comments on Kaupe's Algorithm 178 "Direct Search" (ACM
- 6(6):313-314). The original algorithm was designed by Hooke and
- Jeeves (ACM 8:212-229). This program is further revised according to
- Johnson's implementation at Netlib (opt/hooke.c).
-
- Hooke-Jeeves algorithm is very simple and it works quite well on a
- few examples. However, it might fail to converge due to its heuristic
- nature. A possible improvement, as is suggested by Johnson, may be to
- choose a small r at the beginning to quickly approach to the minimum
- and a large r at later step to hit the minimum.
- */
-
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-#include "kmin.h"
-
-static double __kmin_hj_aux(kmin_f func, int n, double *x1, void *data, double fx1, double *dx, int *n_calls)
-{
- int k, j = *n_calls;
- double ftmp;
- for (k = 0; k != n; ++k) {
- x1[k] += dx[k];
- ftmp = func(n, x1, data); ++j;
- if (ftmp < fx1) fx1 = ftmp;
- else { /* search the opposite direction */
- dx[k] = 0.0 - dx[k];
- x1[k] += dx[k] + dx[k];
- ftmp = func(n, x1, data); ++j;
- if (ftmp < fx1) fx1 = ftmp;
- else x1[k] -= dx[k]; /* back to the original x[k] */
- }
- }
- *n_calls = j;
- return fx1; /* here: fx1=f(n,x1) */
-}
-
-double kmin_hj(kmin_f func, int n, double *x, void *data, double r, double eps, int max_calls)
-{
- double fx, fx1, *x1, *dx, radius;
- int k, n_calls = 0;
- x1 = (double*)calloc(n, sizeof(double));
- dx = (double*)calloc(n, sizeof(double));
- for (k = 0; k != n; ++k) { /* initial directions, based on MGJ */
- dx[k] = fabs(x[k]) * r;
- if (dx[k] == 0) dx[k] = r;
- }
- radius = r;
- fx1 = fx = func(n, x, data); ++n_calls;
- for (;;) {
- memcpy(x1, x, n * sizeof(double)); /* x1 = x */
- fx1 = __kmin_hj_aux(func, n, x1, data, fx, dx, &n_calls);
- while (fx1 < fx) {
- for (k = 0; k != n; ++k) {
- double t = x[k];
- dx[k] = x1[k] > x[k]? fabs(dx[k]) : 0.0 - fabs(dx[k]);
- x[k] = x1[k];
- x1[k] = x1[k] + x1[k] - t;
- }
- fx = fx1;
- if (n_calls >= max_calls) break;
- fx1 = func(n, x1, data); ++n_calls;
- fx1 = __kmin_hj_aux(func, n, x1, data, fx1, dx, &n_calls);
- if (fx1 >= fx) break;
- for (k = 0; k != n; ++k)
- if (fabs(x1[k] - x[k]) > .5 * fabs(dx[k])) break;
- if (k == n) break;
- }
- if (radius >= eps) {
- if (n_calls >= max_calls) break;
- radius *= r;
- for (k = 0; k != n; ++k) dx[k] *= r;
- } else break; /* converge */
- }
- free(x1); free(dx);
- return fx1;
-}
-
-// I copied this function somewhere several years ago with some of my modifications, but I forgot the source.
-double kmin_brent(kmin1_f func, double a, double b, void *data, double tol, double *xmin)
-{
- double bound, u, r, q, fu, tmp, fa, fb, fc, c;
- const double gold1 = 1.6180339887;
- const double gold2 = 0.3819660113;
- const double tiny = 1e-20;
- const int max_iter = 100;
-
- double e, d, w, v, mid, tol1, tol2, p, eold, fv, fw;
- int iter;
-
- fa = func(a, data); fb = func(b, data);
- if (fb > fa) { // swap, such that f(a) > f(b)
- tmp = a; a = b; b = tmp;
- tmp = fa; fa = fb; fb = tmp;
- }
- c = b + gold1 * (b - a), fc = func(c, data); // golden section extrapolation
- while (fb > fc) {
- bound = b + 100.0 * (c - b); // the farthest point where we want to go
- r = (b - a) * (fb - fc);
- q = (b - c) * (fb - fa);
- if (fabs(q - r) < tiny) { // avoid 0 denominator
- tmp = q > r? tiny : 0.0 - tiny;
- } else tmp = q - r;
- u = b - ((b - c) * q - (b - a) * r) / (2.0 * tmp); // u is the parabolic extrapolation point
- if ((b > u && u > c) || (b < u && u < c)) { // u lies between b and c
- fu = func(u, data);
- if (fu < fc) { // (b,u,c) bracket the minimum
- a = b; b = u; fa = fb; fb = fu;
- break;
- } else if (fu > fb) { // (a,b,u) bracket the minimum
- c = u; fc = fu;
- break;
- }
- u = c + gold1 * (c - b); fu = func(u, data); // golden section extrapolation
- } else if ((c > u && u > bound) || (c < u && u < bound)) { // u lies between c and bound
- fu = func(u, data);
- if (fu < fc) { // fb > fc > fu
- b = c; c = u; u = c + gold1 * (c - b);
- fb = fc; fc = fu; fu = func(u, data);
- } else { // (b,c,u) bracket the minimum
- a = b; b = c; c = u;
- fa = fb; fb = fc; fc = fu;
- break;
- }
- } else if ((u > bound && bound > c) || (u < bound && bound < c)) { // u goes beyond the bound
- u = bound; fu = func(u, data);
- } else { // u goes the other way around, use golden section extrapolation
- u = c + gold1 * (c - b); fu = func(u, data);
- }
- a = b; b = c; c = u;
- fa = fb; fb = fc; fc = fu;
- }
- if (a > c) u = a, a = c, c = u; // swap
-
- // now, a<b<c, fa>fb and fb<fc, move on to Brent's algorithm
- e = d = 0.0;
- w = v = b; fv = fw = fb;
- for (iter = 0; iter != max_iter; ++iter) {
- mid = 0.5 * (a + c);
- tol2 = 2.0 * (tol1 = tol * fabs(b) + tiny);
- if (fabs(b - mid) <= (tol2 - 0.5 * (c - a))) {
- *xmin = b; return fb; // found
- }
- if (fabs(e) > tol1) {
- // related to parabolic interpolation
- r = (b - w) * (fb - fv);
- q = (b - v) * (fb - fw);
- p = (b - v) * q - (b - w) * r;
- q = 2.0 * (q - r);
- if (q > 0.0) p = 0.0 - p;
- else q = 0.0 - q;
- eold = e; e = d;
- if (fabs(p) >= fabs(0.5 * q * eold) || p <= q * (a - b) || p >= q * (c - b)) {
- d = gold2 * (e = (b >= mid ? a - b : c - b));
- } else {
- d = p / q; u = b + d; // actual parabolic interpolation happens here
- if (u - a < tol2 || c - u < tol2)
- d = (mid > b)? tol1 : 0.0 - tol1;
- }
- } else d = gold2 * (e = (b >= mid ? a - b : c - b)); // golden section interpolation
- u = fabs(d) >= tol1 ? b + d : b + (d > 0.0? tol1 : -tol1);
- fu = func(u, data);
- if (fu <= fb) { // u is the minimum point so far
- if (u >= b) a = b;
- else c = b;
- v = w; w = b; b = u; fv = fw; fw = fb; fb = fu;
- } else { // adjust (a,c) and (u,v,w)
- if (u < b) a = u;
- else c = u;
- if (fu <= fw || w == b) {
- v = w; w = u;
- fv = fw; fw = fu;
- } else if (fu <= fv || v == b || v == w) {
- v = u; fv = fu;
- }
- }
- }
- *xmin = b;
- return fb;
-}
diff --git a/src/samtools-0.1.18/bcftools/kmin.h b/src/samtools-0.1.18/bcftools/kmin.h
deleted file mode 100644
index 6feba45..0000000
--- a/src/samtools-0.1.18/bcftools/kmin.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- Copyright (c) 2008, 2010 by Attractive Chaos <attractor at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-#ifndef KMIN_H
-#define KMIN_H
-
-#define KMIN_RADIUS 0.5
-#define KMIN_EPS 1e-7
-#define KMIN_MAXCALL 50000
-
-typedef double (*kmin_f)(int, double*, void*);
-typedef double (*kmin1_f)(double, void*);
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- double kmin_hj(kmin_f func, int n, double *x, void *data, double r, double eps, int max_calls);
- double kmin_brent(kmin1_f func, double a, double b, void *data, double tol, double *xmin);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/samtools-0.1.18/bcftools/main.c b/src/samtools-0.1.18/bcftools/main.c
deleted file mode 100644
index fcd94b8..0000000
--- a/src/samtools-0.1.18/bcftools/main.c
+++ /dev/null
@@ -1,190 +0,0 @@
-#include <string.h>
-#include <stdlib.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include "bcf.h"
-
-#include "kseq.h"
-KSTREAM_INIT(gzFile, gzread, 0x10000)
-
-int bcfview(int argc, char *argv[]);
-int bcf_main_index(int argc, char *argv[]);
-
-#define BUF_SIZE 0x10000
-
-int bcf_cat(int n, char * const *fn)
-{
- int i;
- bcf_t *out;
- uint8_t *buf;
- buf = malloc(BUF_SIZE);
- out = bcf_open("-", "w");
- for (i = 0; i < n; ++i) {
- bcf_t *in;
- bcf_hdr_t *h;
- off_t end;
- struct stat s;
- in = bcf_open(fn[i], "r");
- h = bcf_hdr_read(in);
- if (i == 0) bcf_hdr_write(out, h);
- bcf_hdr_destroy(h);
-#ifdef _USE_KNETFILE
- fstat(knet_fileno(in->fp->x.fpr), &s);
- end = s.st_size - 28;
- while (knet_tell(in->fp->x.fpr) < end) {
- int size = knet_tell(in->fp->x.fpr) + BUF_SIZE < end? BUF_SIZE : end - knet_tell(in->fp->x.fpr);
- knet_read(in->fp->x.fpr, buf, size);
- fwrite(buf, 1, size, out->fp->x.fpw);
- }
-#else
- abort(); // FIXME: not implemented
-#endif
- bcf_close(in);
- }
- bcf_close(out);
- free(buf);
- return 0;
-}
-
-extern double bcf_pair_freq(const bcf1_t *b0, const bcf1_t *b1, double f[4]);
-
-int bcf_main_ldpair(int argc, char *argv[])
-{
- bcf_t *fp;
- bcf_hdr_t *h;
- bcf1_t *b0, *b1;
- bcf_idx_t *idx;
- kstring_t str;
- void *str2id;
- gzFile fplist;
- kstream_t *ks;
- int dret, lineno = 0;
- if (argc < 3) {
- fprintf(stderr, "Usage: bcftools ldpair <in.bcf> <in.list>\n");
- return 1;
- }
- fplist = gzopen(argv[2], "rb");
- ks = ks_init(fplist);
- memset(&str, 0, sizeof(kstring_t));
- fp = bcf_open(argv[1], "rb");
- h = bcf_hdr_read(fp);
- str2id = bcf_build_refhash(h);
- idx = bcf_idx_load(argv[1]);
- if (idx == 0) {
- fprintf(stderr, "[%s] No bcf index is found. Abort!\n", __func__);
- return 1;
- }
- b0 = calloc(1, sizeof(bcf1_t));
- b1 = calloc(1, sizeof(bcf1_t));
- while (ks_getuntil(ks, '\n', &str, &dret) >= 0) {
- char *p, *q;
- int k;
- int tid0 = -1, tid1 = -1, pos0 = -1, pos1 = -1;
- ++lineno;
- for (p = q = str.s, k = 0; *p; ++p) {
- if (*p == ' ' || *p == '\t') {
- *p = '\0';
- if (k == 0) tid0 = bcf_str2id(str2id, q);
- else if (k == 1) pos0 = atoi(q) - 1;
- else if (k == 2) tid1 = strcmp(q, "=")? bcf_str2id(str2id, q) : tid0;
- else if (k == 3) pos1 = atoi(q) - 1;
- q = p + 1;
- ++k;
- }
- }
- if (k == 3) pos1 = atoi(q) - 1;
- if (tid0 >= 0 && tid1 >= 0 && pos0 >= 0 && pos1 >= 0) {
- uint64_t off;
- double r, f[4];
- off = bcf_idx_query(idx, tid0, pos0);
- bgzf_seek(fp->fp, off, SEEK_SET);
- while (bcf_read(fp, h, b0) >= 0 && b0->pos != pos0);
- off = bcf_idx_query(idx, tid1, pos1);
- bgzf_seek(fp->fp, off, SEEK_SET);
- while (bcf_read(fp, h, b1) >= 0 && b1->pos != pos1);
- r = bcf_pair_freq(b0, b1, f);
- r *= r;
- printf("%s\t%d\t%s\t%d\t%.4g\t%.4g\t%.4g\t%.4g\t%.4g\n", h->ns[tid0], pos0+1, h->ns[tid1], pos1+1,
- r, f[0], f[1], f[2], f[3]);
- } //else fprintf(stderr, "[%s] Parse error at line %d.\n", __func__, lineno);
- }
- bcf_destroy(b0); bcf_destroy(b1);
- bcf_idx_destroy(idx);
- bcf_str2id_destroy(str2id);
- bcf_hdr_destroy(h);
- bcf_close(fp);
- free(str.s);
- ks_destroy(ks);
- gzclose(fplist);
- return 0;
-}
-
-int bcf_main_ld(int argc, char *argv[])
-{
- bcf_t *fp;
- bcf_hdr_t *h;
- bcf1_t **b, *b0;
- int i, j, m, n;
- double f[4];
- if (argc == 1) {
- fprintf(stderr, "Usage: bcftools ld <in.bcf>\n");
- return 1;
- }
- fp = bcf_open(argv[1], "rb");
- h = bcf_hdr_read(fp);
- // read the entire BCF
- m = n = 0; b = 0;
- b0 = calloc(1, sizeof(bcf1_t));
- while (bcf_read(fp, h, b0) >= 0) {
- if (m == n) {
- m = m? m<<1 : 16;
- b = realloc(b, sizeof(void*) * m);
- }
- b[n] = calloc(1, sizeof(bcf1_t));
- bcf_cpy(b[n++], b0);
- }
- bcf_destroy(b0);
- // compute pair-wise r^2
- printf("%d\n", n); // the number of loci
- for (i = 0; i < n; ++i) {
- printf("%s:%d", h->ns[b[i]->tid], b[i]->pos + 1);
- for (j = 0; j < i; ++j) {
- double r = bcf_pair_freq(b[i], b[j], f);
- printf("\t%.3f", r*r);
- }
- printf("\t1.000\n");
- }
- // free
- for (i = 0; i < n; ++i) bcf_destroy(b[i]);
- free(b);
- bcf_hdr_destroy(h);
- bcf_close(fp);
- return 0;
-}
-
-int main(int argc, char *argv[])
-{
- if (argc == 1) {
- fprintf(stderr, "\n");
- fprintf(stderr, "Program: bcftools (Tools for data in the VCF/BCF formats)\n");
- fprintf(stderr, "Version: %s\n\n", BCF_VERSION);
- fprintf(stderr, "Usage: bcftools <command> <arguments>\n\n");
- fprintf(stderr, "Command: view print, extract, convert and call SNPs from BCF\n");
- fprintf(stderr, " index index BCF\n");
- fprintf(stderr, " cat concatenate BCFs\n");
- fprintf(stderr, " ld compute all-pair r^2\n");
- fprintf(stderr, " ldpair compute r^2 between requested pairs\n");
- fprintf(stderr, "\n");
- return 1;
- }
- if (strcmp(argv[1], "view") == 0) return bcfview(argc-1, argv+1);
- else if (strcmp(argv[1], "index") == 0) return bcf_main_index(argc-1, argv+1);
- else if (strcmp(argv[1], "ld") == 0) return bcf_main_ld(argc-1, argv+1);
- else if (strcmp(argv[1], "ldpair") == 0) return bcf_main_ldpair(argc-1, argv+1);
- else if (strcmp(argv[1], "cat") == 0) return bcf_cat(argc-2, argv+2); // cat is different ...
- else {
- fprintf(stderr, "[main] Unrecognized command.\n");
- return 1;
- }
- return 0;
-}
diff --git a/src/samtools-0.1.18/bcftools/mut.c b/src/samtools-0.1.18/bcftools/mut.c
deleted file mode 100644
index 15ef265..0000000
--- a/src/samtools-0.1.18/bcftools/mut.c
+++ /dev/null
@@ -1,127 +0,0 @@
-#include <stdlib.h>
-#include <stdint.h>
-#include "bcf.h"
-
-#define MAX_GENO 359
-
-int8_t seq_bitcnt[] = { 4, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
-char *seq_nt16rev = "XACMGRSVTWYHKDBN";
-
-uint32_t *bcf_trio_prep(int is_x, int is_son)
-{
- int i, j, k, n, map[10];
- uint32_t *ret;
- ret = calloc(MAX_GENO, 4);
- for (i = 0, k = 0; i < 4; ++i)
- for (j = i; j < 4; ++j)
- map[k++] = 1<<i|1<<j;
- for (i = 0, n = 1; i < 10; ++i) { // father
- if (is_x && seq_bitcnt[map[i]] != 1) continue;
- if (is_x && is_son) {
- for (j = 0; j < 10; ++j) // mother
- for (k = 0; k < 10; ++k) // child
- if (seq_bitcnt[map[k]] == 1 && (map[j]&map[k]))
- ret[n++] = j<<16 | i<<8 | k;
- } else {
- for (j = 0; j < 10; ++j) // mother
- for (k = 0; k < 10; ++k) // child
- if ((map[i]&map[k]) && (map[j]&map[k]) && ((map[i]|map[j])&map[k]) == map[k])
- ret[n++] = j<<16 | i<<8 | k;
- }
- }
- ret[0] = n - 1;
- return ret;
-}
-
-
-int bcf_trio_call(const uint32_t *prep, const bcf1_t *b, int *llr, int64_t *gt)
-{
- int i, j, k;
- const bcf_ginfo_t *PL;
- uint8_t *gl10;
- int map[10];
- if (b->n_smpl != 3) return -1; // not a trio
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == bcf_str2int("PL", 2)) break;
- if (i == b->n_gi) return -1; // no PL
- gl10 = alloca(10 * b->n_smpl);
- if (bcf_gl10(b, gl10) < 0) {
- if (bcf_gl10_indel(b, gl10) < 0) return -1;
- }
- PL = b->gi + i;
- for (i = 0, k = 0; i < 4; ++i)
- for (j = i; j < 4; ++j)
- map[k++] = seq_nt16rev[1<<i|1<<j];
- for (j = 0; j < 3; ++j) // check if ref hom is the most probable in all members
- if (((uint8_t*)PL->data)[j * PL->len] != 0) break;
- if (j < 3) { // we need to go through the complex procedure
- uint8_t *g[3];
- int minc = 1<<30, minc_j = -1, minf = 0, gtf = 0, gtc = 0;
- g[0] = gl10;
- g[1] = gl10 + 10;
- g[2] = gl10 + 20;
- for (j = 1; j <= (int)prep[0]; ++j) { // compute LK with constraint
- int sum = g[0][prep[j]&0xff] + g[1][prep[j]>>8&0xff] + g[2][prep[j]>>16&0xff];
- if (sum < minc) minc = sum, minc_j = j;
- }
- gtc |= map[prep[minc_j]&0xff]; gtc |= map[prep[minc_j]>>8&0xff]<<8; gtc |= map[prep[minc_j]>>16]<<16;
- for (j = 0; j < 3; ++j) { // compute LK without constraint
- int min = 1<<30, min_k = -1;
- for (k = 0; k < 10; ++k)
- if (g[j][k] < min) min = g[j][k], min_k = k;
- gtf |= map[min_k]<<(j*8);
- minf += min;
- }
- *llr = minc - minf; *gt = (int64_t)gtc<<32 | gtf;
- } else *llr = 0, *gt = -1;
- return 0;
-}
-
-int bcf_pair_call(const bcf1_t *b)
-{
- int i, j, k;
- const bcf_ginfo_t *PL;
- if (b->n_smpl != 2) return -1; // not a pair
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == bcf_str2int("PL", 2)) break;
- if (i == b->n_gi) return -1; // no PL
- PL = b->gi + i;
- for (j = 0; j < 2; ++j) // check if ref hom is the most probable in all members
- if (((uint8_t*)PL->data)[j * PL->len] != 0) break;
- if (j < 2) { // we need to go through the complex procedure
- uint8_t *g[2];
- int minc = 1<<30, minf = 0;
- g[0] = PL->data;
- g[1] = (uint8_t*)PL->data + PL->len;
- for (j = 0; j < PL->len; ++j) // compute LK with constraint
- minc = minc < g[0][j] + g[1][j]? minc : g[0][j] + g[1][j];
- for (j = 0; j < 2; ++j) { // compute LK without constraint
- int min = 1<<30;
- for (k = 0; k < PL->len; ++k)
- min = min < g[j][k]? min : g[j][k];
- minf += min;
- }
- return minc - minf;
- } else return 0;
-}
-
-int bcf_min_diff(const bcf1_t *b)
-{
- int i, min = 1<<30;
- const bcf_ginfo_t *PL;
- for (i = 0; i < b->n_gi; ++i)
- if (b->gi[i].fmt == bcf_str2int("PL", 2)) break;
- if (i == b->n_gi) return -1; // no PL
- PL = b->gi + i;
- for (i = 0; i < b->n_smpl; ++i) {
- int m1, m2, j;
- const uint8_t *p = (uint8_t*)PL->data;
- m1 = m2 = 1<<30;
- for (j = 0; j < PL->len; ++j) {
- if ((int)p[j] < m1) m2 = m1, m1 = p[j];
- else if ((int)p[j] < m2) m2 = p[j];
- }
- min = min < m2 - m1? min : m2 - m1;
- }
- return min;
-}
diff --git a/src/samtools-0.1.18/bcftools/prob1.c b/src/samtools-0.1.18/bcftools/prob1.c
deleted file mode 100644
index a380484..0000000
--- a/src/samtools-0.1.18/bcftools/prob1.c
+++ /dev/null
@@ -1,554 +0,0 @@
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <errno.h>
-#include <assert.h>
-#include "prob1.h"
-
-#include "kseq.h"
-KSTREAM_INIT(gzFile, gzread, 16384)
-
-#define MC_MAX_EM_ITER 16
-#define MC_EM_EPS 1e-5
-#define MC_DEF_INDEL 0.15
-
-unsigned char seq_nt4_table[256] = {
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 /*'-'*/, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
- 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
-};
-
-struct __bcf_p1aux_t {
- int n, M, n1, is_indel;
- uint8_t *ploidy; // haploid or diploid ONLY
- double *q2p, *pdg; // pdg -> P(D|g)
- double *phi, *phi_indel;
- double *z, *zswap; // aux for afs
- double *z1, *z2, *phi1, *phi2; // only calculated when n1 is set
- double **hg; // hypergeometric distribution
- double *lf; // log factorial
- double t, t1, t2;
- double *afs, *afs1; // afs: accumulative AFS; afs1: site posterior distribution
- const uint8_t *PL; // point to PL
- int PL_len;
-};
-
-void bcf_p1_indel_prior(bcf_p1aux_t *ma, double x)
-{
- int i;
- for (i = 0; i < ma->M; ++i)
- ma->phi_indel[i] = ma->phi[i] * x;
- ma->phi_indel[ma->M] = 1. - ma->phi[ma->M] * x;
-}
-
-static void init_prior(int type, double theta, int M, double *phi)
-{
- int i;
- if (type == MC_PTYPE_COND2) {
- for (i = 0; i <= M; ++i)
- phi[i] = 2. * (i + 1) / (M + 1) / (M + 2);
- } else if (type == MC_PTYPE_FLAT) {
- for (i = 0; i <= M; ++i)
- phi[i] = 1. / (M + 1);
- } else {
- double sum;
- for (i = 0, sum = 0.; i < M; ++i)
- sum += (phi[i] = theta / (M - i));
- phi[M] = 1. - sum;
- }
-}
-
-void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta)
-{
- init_prior(type, theta, ma->M, ma->phi);
- bcf_p1_indel_prior(ma, MC_DEF_INDEL);
-}
-
-void bcf_p1_init_subprior(bcf_p1aux_t *ma, int type, double theta)
-{
- if (ma->n1 <= 0 || ma->n1 >= ma->M) return;
- init_prior(type, theta, 2*ma->n1, ma->phi1);
- init_prior(type, theta, 2*(ma->n - ma->n1), ma->phi2);
-}
-
-int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn)
-{
- gzFile fp;
- kstring_t s;
- kstream_t *ks;
- long double sum;
- int dret, k;
- memset(&s, 0, sizeof(kstring_t));
- fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
- ks = ks_init(fp);
- memset(ma->phi, 0, sizeof(double) * (ma->M + 1));
- while (ks_getuntil(ks, '\n', &s, &dret) >= 0) {
- if (strstr(s.s, "[afs] ") == s.s) {
- char *p = s.s + 6;
- for (k = 0; k <= ma->M; ++k) {
- int x;
- double y;
- x = strtol(p, &p, 10);
- if (x != k && (errno == EINVAL || errno == ERANGE)) return -1;
- ++p;
- y = strtod(p, &p);
- if (y == 0. && (errno == EINVAL || errno == ERANGE)) return -1;
- ma->phi[ma->M - k] += y;
- }
- }
- }
- ks_destroy(ks);
- gzclose(fp);
- free(s.s);
- for (sum = 0., k = 0; k <= ma->M; ++k) sum += ma->phi[k];
- fprintf(stderr, "[prior]");
- for (k = 0; k <= ma->M; ++k) ma->phi[k] /= sum;
- for (k = 0; k <= ma->M; ++k) fprintf(stderr, " %d:%.3lg", k, ma->phi[ma->M - k]);
- fputc('\n', stderr);
- for (sum = 0., k = 1; k < ma->M; ++k) sum += ma->phi[ma->M - k] * (2.* k * (ma->M - k) / ma->M / (ma->M - 1));
- fprintf(stderr, "[%s] heterozygosity=%lf, ", __func__, (double)sum);
- for (sum = 0., k = 1; k <= ma->M; ++k) sum += k * ma->phi[ma->M - k] / ma->M;
- fprintf(stderr, "theta=%lf\n", (double)sum);
- bcf_p1_indel_prior(ma, MC_DEF_INDEL);
- return 0;
-}
-
-bcf_p1aux_t *bcf_p1_init(int n, uint8_t *ploidy)
-{
- bcf_p1aux_t *ma;
- int i;
- ma = calloc(1, sizeof(bcf_p1aux_t));
- ma->n1 = -1;
- ma->n = n; ma->M = 2 * n;
- if (ploidy) {
- ma->ploidy = malloc(n);
- memcpy(ma->ploidy, ploidy, n);
- for (i = 0, ma->M = 0; i < n; ++i) ma->M += ploidy[i];
- if (ma->M == 2 * n) {
- free(ma->ploidy);
- ma->ploidy = 0;
- }
- }
- ma->q2p = calloc(256, sizeof(double));
- ma->pdg = calloc(3 * ma->n, sizeof(double));
- ma->phi = calloc(ma->M + 1, sizeof(double));
- ma->phi_indel = calloc(ma->M + 1, sizeof(double));
- ma->phi1 = calloc(ma->M + 1, sizeof(double));
- ma->phi2 = calloc(ma->M + 1, sizeof(double));
- ma->z = calloc(ma->M + 1, sizeof(double));
- ma->zswap = calloc(ma->M + 1, sizeof(double));
- ma->z1 = calloc(ma->M + 1, sizeof(double)); // actually we do not need this large
- ma->z2 = calloc(ma->M + 1, sizeof(double));
- ma->afs = calloc(ma->M + 1, sizeof(double));
- ma->afs1 = calloc(ma->M + 1, sizeof(double));
- ma->lf = calloc(ma->M + 1, sizeof(double));
- for (i = 0; i < 256; ++i)
- ma->q2p[i] = pow(10., -i / 10.);
- for (i = 0; i <= ma->M; ++i) ma->lf[i] = lgamma(i + 1);
- bcf_p1_init_prior(ma, MC_PTYPE_FULL, 1e-3); // the simplest prior
- return ma;
-}
-
-int bcf_p1_set_n1(bcf_p1aux_t *b, int n1)
-{
- if (n1 == 0 || n1 >= b->n) return -1;
- if (b->M != b->n * 2) {
- fprintf(stderr, "[%s] unable to set `n1' when there are haploid samples.\n", __func__);
- return -1;
- }
- b->n1 = n1;
- return 0;
-}
-
-void bcf_p1_destroy(bcf_p1aux_t *ma)
-{
- if (ma) {
- int k;
- free(ma->lf);
- if (ma->hg && ma->n1 > 0) {
- for (k = 0; k <= 2*ma->n1; ++k) free(ma->hg[k]);
- free(ma->hg);
- }
- free(ma->ploidy); free(ma->q2p); free(ma->pdg);
- free(ma->phi); free(ma->phi_indel); free(ma->phi1); free(ma->phi2);
- free(ma->z); free(ma->zswap); free(ma->z1); free(ma->z2);
- free(ma->afs); free(ma->afs1);
- free(ma);
- }
-}
-
-static int cal_pdg(const bcf1_t *b, bcf_p1aux_t *ma)
-{
- int i, j;
- long *p, tmp;
- p = alloca(b->n_alleles * sizeof(long));
- memset(p, 0, sizeof(long) * b->n_alleles);
- for (j = 0; j < ma->n; ++j) {
- const uint8_t *pi = ma->PL + j * ma->PL_len;
- double *pdg = ma->pdg + j * 3;
- pdg[0] = ma->q2p[pi[2]]; pdg[1] = ma->q2p[pi[1]]; pdg[2] = ma->q2p[pi[0]];
- for (i = 0; i < b->n_alleles; ++i)
- p[i] += (int)pi[(i+1)*(i+2)/2-1];
- }
- for (i = 0; i < b->n_alleles; ++i) p[i] = p[i]<<4 | i;
- for (i = 1; i < b->n_alleles; ++i) // insertion sort
- for (j = i; j > 0 && p[j] < p[j-1]; --j)
- tmp = p[j], p[j] = p[j-1], p[j-1] = tmp;
- for (i = b->n_alleles - 1; i >= 0; --i)
- if ((p[i]&0xf) == 0) break;
- return i;
-}
-
-int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k)
-{
- double sum, g[3];
- double max, f3[3], *pdg = ma->pdg + k * 3;
- int q, i, max_i, ploidy;
- ploidy = ma->ploidy? ma->ploidy[k] : 2;
- if (ploidy == 2) {
- f3[0] = (1.-f0)*(1.-f0); f3[1] = 2.*f0*(1.-f0); f3[2] = f0*f0;
- } else {
- f3[0] = 1. - f0; f3[1] = 0; f3[2] = f0;
- }
- for (i = 0, sum = 0.; i < 3; ++i)
- sum += (g[i] = pdg[i] * f3[i]);
- for (i = 0, max = -1., max_i = 0; i < 3; ++i) {
- g[i] /= sum;
- if (g[i] > max) max = g[i], max_i = i;
- }
- max = 1. - max;
- if (max < 1e-308) max = 1e-308;
- q = (int)(-4.343 * log(max) + .499);
- if (q > 99) q = 99;
- return q<<2|max_i;
-}
-
-#define TINY 1e-20
-
-static void mc_cal_y_core(bcf_p1aux_t *ma, int beg)
-{
- double *z[2], *tmp, *pdg;
- int _j, last_min, last_max;
- assert(beg == 0 || ma->M == ma->n*2);
- z[0] = ma->z;
- z[1] = ma->zswap;
- pdg = ma->pdg;
- memset(z[0], 0, sizeof(double) * (ma->M + 1));
- memset(z[1], 0, sizeof(double) * (ma->M + 1));
- z[0][0] = 1.;
- last_min = last_max = 0;
- ma->t = 0.;
- if (ma->M == ma->n * 2) {
- int M = 0;
- for (_j = beg; _j < ma->n; ++_j) {
- int k, j = _j - beg, _min = last_min, _max = last_max, M0;
- double p[3], sum;
- M0 = M; M += 2;
- pdg = ma->pdg + _j * 3;
- p[0] = pdg[0]; p[1] = 2. * pdg[1]; p[2] = pdg[2];
- for (; _min < _max && z[0][_min] < TINY; ++_min) z[0][_min] = z[1][_min] = 0.;
- for (; _max > _min && z[0][_max] < TINY; --_max) z[0][_max] = z[1][_max] = 0.;
- _max += 2;
- if (_min == 0) k = 0, z[1][k] = (M0-k+1) * (M0-k+2) * p[0] * z[0][k];
- if (_min <= 1) k = 1, z[1][k] = (M0-k+1) * (M0-k+2) * p[0] * z[0][k] + k*(M0-k+2) * p[1] * z[0][k-1];
- for (k = _min < 2? 2 : _min; k <= _max; ++k)
- z[1][k] = (M0-k+1)*(M0-k+2) * p[0] * z[0][k] + k*(M0-k+2) * p[1] * z[0][k-1] + k*(k-1)* p[2] * z[0][k-2];
- for (k = _min, sum = 0.; k <= _max; ++k) sum += z[1][k];
- ma->t += log(sum / (M * (M - 1.)));
- for (k = _min; k <= _max; ++k) z[1][k] /= sum;
- if (_min >= 1) z[1][_min-1] = 0.;
- if (_min >= 2) z[1][_min-2] = 0.;
- if (j < ma->n - 1) z[1][_max+1] = z[1][_max+2] = 0.;
- if (_j == ma->n1 - 1) { // set pop1; ma->n1==-1 when unset
- ma->t1 = ma->t;
- memcpy(ma->z1, z[1], sizeof(double) * (ma->n1 * 2 + 1));
- }
- tmp = z[0]; z[0] = z[1]; z[1] = tmp;
- last_min = _min; last_max = _max;
- }
- //for (_j = 0; _j < last_min; ++_j) z[0][_j] = 0.; // TODO: are these necessary?
- //for (_j = last_max + 1; _j < ma->M; ++_j) z[0][_j] = 0.;
- } else { // this block is very similar to the block above; these two might be merged in future
- int j, M = 0;
- for (j = 0; j < ma->n; ++j) {
- int k, M0, _min = last_min, _max = last_max;
- double p[3], sum;
- pdg = ma->pdg + j * 3;
- for (; _min < _max && z[0][_min] < TINY; ++_min) z[0][_min] = z[1][_min] = 0.;
- for (; _max > _min && z[0][_max] < TINY; --_max) z[0][_max] = z[1][_max] = 0.;
- M0 = M;
- M += ma->ploidy[j];
- if (ma->ploidy[j] == 1) {
- p[0] = pdg[0]; p[1] = pdg[2];
- _max++;
- if (_min == 0) k = 0, z[1][k] = (M0+1-k) * p[0] * z[0][k];
- for (k = _min < 1? 1 : _min; k <= _max; ++k)
- z[1][k] = (M0+1-k) * p[0] * z[0][k] + k * p[1] * z[0][k-1];
- for (k = _min, sum = 0.; k <= _max; ++k) sum += z[1][k];
- ma->t += log(sum / M);
- for (k = _min; k <= _max; ++k) z[1][k] /= sum;
- if (_min >= 1) z[1][_min-1] = 0.;
- if (j < ma->n - 1) z[1][_max+1] = 0.;
- } else if (ma->ploidy[j] == 2) {
- p[0] = pdg[0]; p[1] = 2 * pdg[1]; p[2] = pdg[2];
- _max += 2;
- if (_min == 0) k = 0, z[1][k] = (M0-k+1) * (M0-k+2) * p[0] * z[0][k];
- if (_min <= 1) k = 1, z[1][k] = (M0-k+1) * (M0-k+2) * p[0] * z[0][k] + k*(M0-k+2) * p[1] * z[0][k-1];
- for (k = _min < 2? 2 : _min; k <= _max; ++k)
- z[1][k] = (M0-k+1)*(M0-k+2) * p[0] * z[0][k] + k*(M0-k+2) * p[1] * z[0][k-1] + k*(k-1)* p[2] * z[0][k-2];
- for (k = _min, sum = 0.; k <= _max; ++k) sum += z[1][k];
- ma->t += log(sum / (M * (M - 1.)));
- for (k = _min; k <= _max; ++k) z[1][k] /= sum;
- if (_min >= 1) z[1][_min-1] = 0.;
- if (_min >= 2) z[1][_min-2] = 0.;
- if (j < ma->n - 1) z[1][_max+1] = z[1][_max+2] = 0.;
- }
- tmp = z[0]; z[0] = z[1]; z[1] = tmp;
- last_min = _min; last_max = _max;
- }
- }
- if (z[0] != ma->z) memcpy(ma->z, z[0], sizeof(double) * (ma->M + 1));
-}
-
-static void mc_cal_y(bcf_p1aux_t *ma)
-{
- if (ma->n1 > 0 && ma->n1 < ma->n && ma->M == ma->n * 2) { // NB: ma->n1 is ineffective when there are haploid samples
- int k;
- long double x;
- memset(ma->z1, 0, sizeof(double) * (2 * ma->n1 + 1));
- memset(ma->z2, 0, sizeof(double) * (2 * (ma->n - ma->n1) + 1));
- ma->t1 = ma->t2 = 0.;
- mc_cal_y_core(ma, ma->n1);
- ma->t2 = ma->t;
- memcpy(ma->z2, ma->z, sizeof(double) * (2 * (ma->n - ma->n1) + 1));
- mc_cal_y_core(ma, 0);
- // rescale z
- x = expl(ma->t - (ma->t1 + ma->t2));
- for (k = 0; k <= ma->M; ++k) ma->z[k] *= x;
- } else mc_cal_y_core(ma, 0);
-}
-
-#define CONTRAST_TINY 1e-30
-
-extern double kf_gammaq(double s, double z); // incomplete gamma function for chi^2 test
-
-static inline double chi2_test(int a, int b, int c, int d)
-{
- double x, z;
- x = (double)(a+b) * (c+d) * (b+d) * (a+c);
- if (x == 0.) return 1;
- z = a * d - b * c;
- return kf_gammaq(.5, .5 * z * z * (a+b+c+d) / x);
-}
-
-// chi2=(a+b+c+d)(ad-bc)^2/[(a+b)(c+d)(a+c)(b+d)]
-static inline double contrast2_aux(const bcf_p1aux_t *p1, double sum, int k1, int k2, double x[3])
-{
- double p = p1->phi[k1+k2] * p1->z1[k1] * p1->z2[k2] / sum * p1->hg[k1][k2];
- int n1 = p1->n1, n2 = p1->n - p1->n1;
- if (p < CONTRAST_TINY) return -1;
- if (.5*k1/n1 < .5*k2/n2) x[1] += p;
- else if (.5*k1/n1 > .5*k2/n2) x[2] += p;
- else x[0] += p;
- return p * chi2_test(k1, k2, (n1<<1) - k1, (n2<<1) - k2);
-}
-
-static double contrast2(bcf_p1aux_t *p1, double ret[3])
-{
- int k, k1, k2, k10, k20, n1, n2;
- double sum;
- // get n1 and n2
- n1 = p1->n1; n2 = p1->n - p1->n1;
- if (n1 <= 0 || n2 <= 0) return 0.;
- if (p1->hg == 0) { // initialize the hypergeometric distribution
- /* NB: the hg matrix may take a lot of memory when there are many samples. There is a way
- to avoid precomputing this matrix, but it is slower and quite intricate. The following
- computation in this block can be accelerated with a similar strategy, but perhaps this
- is not a serious concern for now. */
- double tmp = lgamma(2*(n1+n2)+1) - (lgamma(2*n1+1) + lgamma(2*n2+1));
- p1->hg = calloc(2*n1+1, sizeof(void*));
- for (k1 = 0; k1 <= 2*n1; ++k1) {
- p1->hg[k1] = calloc(2*n2+1, sizeof(double));
- for (k2 = 0; k2 <= 2*n2; ++k2)
- p1->hg[k1][k2] = exp(lgamma(k1+k2+1) + lgamma(p1->M-k1-k2+1) - (lgamma(k1+1) + lgamma(k2+1) + lgamma(2*n1-k1+1) + lgamma(2*n2-k2+1) + tmp));
- }
- }
- { // compute
- long double suml = 0;
- for (k = 0; k <= p1->M; ++k) suml += p1->phi[k] * p1->z[k];
- sum = suml;
- }
- { // get the max k1 and k2
- double max;
- int max_k;
- for (k = 0, max = 0, max_k = -1; k <= 2*n1; ++k) {
- double x = p1->phi1[k] * p1->z1[k];
- if (x > max) max = x, max_k = k;
- }
- k10 = max_k;
- for (k = 0, max = 0, max_k = -1; k <= 2*n2; ++k) {
- double x = p1->phi2[k] * p1->z2[k];
- if (x > max) max = x, max_k = k;
- }
- k20 = max_k;
- }
- { // We can do the following with one nested loop, but that is an O(N^2) thing. The following code block is much faster for large N.
- double x[3], y;
- long double z = 0., L[2];
- x[0] = x[1] = x[2] = 0; L[0] = L[1] = 0;
- for (k1 = k10; k1 >= 0; --k1) {
- for (k2 = k20; k2 >= 0; --k2) {
- if ((y = contrast2_aux(p1, sum, k1, k2, x)) < 0) break;
- else z += y;
- }
- for (k2 = k20 + 1; k2 <= 2*n2; ++k2) {
- if ((y = contrast2_aux(p1, sum, k1, k2, x)) < 0) break;
- else z += y;
- }
- }
- ret[0] = x[0]; ret[1] = x[1]; ret[2] = x[2];
- x[0] = x[1] = x[2] = 0;
- for (k1 = k10 + 1; k1 <= 2*n1; ++k1) {
- for (k2 = k20; k2 >= 0; --k2) {
- if ((y = contrast2_aux(p1, sum, k1, k2, x)) < 0) break;
- else z += y;
- }
- for (k2 = k20 + 1; k2 <= 2*n2; ++k2) {
- if ((y = contrast2_aux(p1, sum, k1, k2, x)) < 0) break;
- else z += y;
- }
- }
- ret[0] += x[0]; ret[1] += x[1]; ret[2] += x[2];
- if (ret[0] + ret[1] + ret[2] < 0.95) { // in case of bad things happened
- ret[0] = ret[1] = ret[2] = 0; L[0] = L[1] = 0;
- for (k1 = 0, z = 0.; k1 <= 2*n1; ++k1)
- for (k2 = 0; k2 <= 2*n2; ++k2)
- if ((y = contrast2_aux(p1, sum, k1, k2, ret)) >= 0) z += y;
- if (ret[0] + ret[1] + ret[2] < 0.95) // It seems that this may be caused by floating point errors. I do not really understand why...
- z = 1.0, ret[0] = ret[1] = ret[2] = 1./3;
- }
- return (double)z;
- }
-}
-
-static double mc_cal_afs(bcf_p1aux_t *ma, double *p_ref_folded, double *p_var_folded)
-{
- int k;
- long double sum = 0., sum2;
- double *phi = ma->is_indel? ma->phi_indel : ma->phi;
- memset(ma->afs1, 0, sizeof(double) * (ma->M + 1));
- mc_cal_y(ma);
- // compute AFS
- for (k = 0, sum = 0.; k <= ma->M; ++k)
- sum += (long double)phi[k] * ma->z[k];
- for (k = 0; k <= ma->M; ++k) {
- ma->afs1[k] = phi[k] * ma->z[k] / sum;
- if (isnan(ma->afs1[k]) || isinf(ma->afs1[k])) return -1.;
- }
- // compute folded variant probability
- for (k = 0, sum = 0.; k <= ma->M; ++k)
- sum += (long double)(phi[k] + phi[ma->M - k]) / 2. * ma->z[k];
- for (k = 1, sum2 = 0.; k < ma->M; ++k)
- sum2 += (long double)(phi[k] + phi[ma->M - k]) / 2. * ma->z[k];
- *p_var_folded = sum2 / sum;
- *p_ref_folded = (phi[k] + phi[ma->M - k]) / 2. * (ma->z[ma->M] + ma->z[0]) / sum;
- // the expected frequency
- for (k = 0, sum = 0.; k <= ma->M; ++k) {
- ma->afs[k] += ma->afs1[k];
- sum += k * ma->afs1[k];
- }
- return sum / ma->M;
-}
-
-int bcf_p1_cal(const bcf1_t *b, int do_contrast, bcf_p1aux_t *ma, bcf_p1rst_t *rst)
-{
- int i, k;
- long double sum = 0.;
- ma->is_indel = bcf_is_indel(b);
- rst->perm_rank = -1;
- // set PL and PL_len
- for (i = 0; i < b->n_gi; ++i) {
- if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
- ma->PL = (uint8_t*)b->gi[i].data;
- ma->PL_len = b->gi[i].len;
- break;
- }
- }
- if (i == b->n_gi) return -1; // no PL
- if (b->n_alleles < 2) return -1; // FIXME: find a better solution
- //
- rst->rank0 = cal_pdg(b, ma);
- rst->f_exp = mc_cal_afs(ma, &rst->p_ref_folded, &rst->p_var_folded);
- rst->p_ref = ma->afs1[ma->M];
- for (k = 0, sum = 0.; k < ma->M; ++k)
- sum += ma->afs1[k];
- rst->p_var = (double)sum;
- { // compute the allele count
- double max = -1;
- rst->ac = -1;
- for (k = 0; k <= ma->M; ++k)
- if (max < ma->z[k]) max = ma->z[k], rst->ac = k;
- rst->ac = ma->M - rst->ac;
- }
- // calculate f_flat and f_em
- for (k = 0, sum = 0.; k <= ma->M; ++k)
- sum += (long double)ma->z[k];
- rst->f_flat = 0.;
- for (k = 0; k <= ma->M; ++k) {
- double p = ma->z[k] / sum;
- rst->f_flat += k * p;
- }
- rst->f_flat /= ma->M;
- { // estimate equal-tail credible interval (95% level)
- int l, h;
- double p;
- for (i = 0, p = 0.; i <= ma->M; ++i)
- if (p + ma->afs1[i] > 0.025) break;
- else p += ma->afs1[i];
- l = i;
- for (i = ma->M, p = 0.; i >= 0; --i)
- if (p + ma->afs1[i] > 0.025) break;
- else p += ma->afs1[i];
- h = i;
- rst->cil = (double)(ma->M - h) / ma->M; rst->cih = (double)(ma->M - l) / ma->M;
- }
- if (ma->n1 > 0) { // compute LRT
- double max0, max1, max2;
- for (k = 0, max0 = -1; k <= ma->M; ++k)
- if (max0 < ma->z[k]) max0 = ma->z[k];
- for (k = 0, max1 = -1; k <= ma->n1 * 2; ++k)
- if (max1 < ma->z1[k]) max1 = ma->z1[k];
- for (k = 0, max2 = -1; k <= ma->M - ma->n1 * 2; ++k)
- if (max2 < ma->z2[k]) max2 = ma->z2[k];
- rst->lrt = log(max1 * max2 / max0);
- rst->lrt = rst->lrt < 0? 1 : kf_gammaq(.5, rst->lrt);
- } else rst->lrt = -1.0;
- rst->cmp[0] = rst->cmp[1] = rst->cmp[2] = rst->p_chi2 = -1.0;
- if (do_contrast && rst->p_var > 0.5) // skip contrast2() if the locus is a strong non-variant
- rst->p_chi2 = contrast2(ma, rst->cmp);
- return 0;
-}
-
-void bcf_p1_dump_afs(bcf_p1aux_t *ma)
-{
- int k;
- fprintf(stderr, "[afs]");
- for (k = 0; k <= ma->M; ++k)
- fprintf(stderr, " %d:%.3lf", k, ma->afs[ma->M - k]);
- fprintf(stderr, "\n");
- memset(ma->afs, 0, sizeof(double) * (ma->M + 1));
-}
diff --git a/src/samtools-0.1.18/bcftools/prob1.h b/src/samtools-0.1.18/bcftools/prob1.h
deleted file mode 100644
index 0a51a0a..0000000
--- a/src/samtools-0.1.18/bcftools/prob1.h
+++ /dev/null
@@ -1,42 +0,0 @@
-#ifndef BCF_PROB1_H
-#define BCF_PROB1_H
-
-#include "bcf.h"
-
-struct __bcf_p1aux_t;
-typedef struct __bcf_p1aux_t bcf_p1aux_t;
-
-typedef struct {
- int rank0, perm_rank; // NB: perm_rank is always set to -1 by bcf_p1_cal()
- int ac; // ML alternative allele count
- double f_exp, f_flat, p_ref_folded, p_ref, p_var_folded, p_var;
- double cil, cih;
- double cmp[3], p_chi2, lrt; // used by contrast2()
-} bcf_p1rst_t;
-
-#define MC_PTYPE_FULL 1
-#define MC_PTYPE_COND2 2
-#define MC_PTYPE_FLAT 3
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- bcf_p1aux_t *bcf_p1_init(int n, uint8_t *ploidy);
- void bcf_p1_init_prior(bcf_p1aux_t *ma, int type, double theta);
- void bcf_p1_init_subprior(bcf_p1aux_t *ma, int type, double theta);
- void bcf_p1_destroy(bcf_p1aux_t *ma);
- int bcf_p1_cal(const bcf1_t *b, int do_contrast, bcf_p1aux_t *ma, bcf_p1rst_t *rst);
- int bcf_p1_call_gt(const bcf_p1aux_t *ma, double f0, int k);
- void bcf_p1_dump_afs(bcf_p1aux_t *ma);
- int bcf_p1_read_prior(bcf_p1aux_t *ma, const char *fn);
- int bcf_p1_set_n1(bcf_p1aux_t *b, int n1);
- void bcf_p1_set_folded(bcf_p1aux_t *p1a); // only effective when set_n1() is not called
-
- int bcf_em1(const bcf1_t *b, int n1, int flag, double x[10]);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/samtools-0.1.18/bcftools/vcf.c b/src/samtools-0.1.18/bcftools/vcf.c
deleted file mode 100644
index 9daa845..0000000
--- a/src/samtools-0.1.18/bcftools/vcf.c
+++ /dev/null
@@ -1,244 +0,0 @@
-#include <zlib.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include "bcf.h"
-#include "kstring.h"
-#include "kseq.h"
-KSTREAM_INIT(gzFile, gzread, 4096)
-
-typedef struct {
- gzFile fp;
- FILE *fpout;
- kstream_t *ks;
- void *refhash;
- kstring_t line;
- int max_ref;
-} vcf_t;
-
-bcf_hdr_t *vcf_hdr_read(bcf_t *bp)
-{
- kstring_t meta, smpl;
- int dret;
- vcf_t *v;
- bcf_hdr_t *h;
- if (!bp->is_vcf) return bcf_hdr_read(bp);
- h = calloc(1, sizeof(bcf_hdr_t));
- v = (vcf_t*)bp->v;
- v->line.l = 0;
- memset(&meta, 0, sizeof(kstring_t));
- memset(&smpl, 0, sizeof(kstring_t));
- while (ks_getuntil(v->ks, '\n', &v->line, &dret) >= 0) {
- if (v->line.l < 2) continue;
- if (v->line.s[0] != '#') return 0; // no sample line
- if (v->line.s[0] == '#' && v->line.s[1] == '#') {
- kputsn(v->line.s, v->line.l, &meta); kputc('\n', &meta);
- } else if (v->line.s[0] == '#') {
- int k;
- ks_tokaux_t aux;
- char *p;
- for (p = kstrtok(v->line.s, "\t\n", &aux), k = 0; p; p = kstrtok(0, 0, &aux), ++k) {
- if (k >= 9) {
- kputsn(p, aux.p - p, &smpl);
- kputc('\0', &smpl);
- }
- }
- break;
- }
- }
- kputc('\0', &meta);
- h->name = 0;
- h->sname = smpl.s; h->l_smpl = smpl.l;
- h->txt = meta.s; h->l_txt = meta.l;
- bcf_hdr_sync(h);
- return h;
-}
-
-bcf_t *vcf_open(const char *fn, const char *mode)
-{
- bcf_t *bp;
- vcf_t *v;
- if (strchr(mode, 'b')) return bcf_open(fn, mode);
- bp = calloc(1, sizeof(bcf_t));
- v = calloc(1, sizeof(vcf_t));
- bp->is_vcf = 1;
- bp->v = v;
- v->refhash = bcf_str2id_init();
- if (strchr(mode, 'r')) {
- v->fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
- v->ks = ks_init(v->fp);
- } else if (strchr(mode, 'w'))
- v->fpout = strcmp(fn, "-")? fopen(fn, "w") : stdout;
- return bp;
-}
-
-int vcf_dictread(bcf_t *bp, bcf_hdr_t *h, const char *fn)
-{
- vcf_t *v;
- gzFile fp;
- kstream_t *ks;
- kstring_t s, rn;
- int dret;
- if (bp == 0) return -1;
- if (!bp->is_vcf) return 0;
- s.l = s.m = 0; s.s = 0;
- rn.m = rn.l = h->l_nm; rn.s = h->name;
- v = (vcf_t*)bp->v;
- fp = gzopen(fn, "r");
- ks = ks_init(fp);
- while (ks_getuntil(ks, 0, &s, &dret) >= 0) {
- bcf_str2id_add(v->refhash, strdup(s.s));
- kputs(s.s, &rn); kputc('\0', &rn);
- if (dret != '\n') ks_getuntil(ks, '\n', &s, &dret);
- }
- ks_destroy(ks);
- gzclose(fp);
- h->l_nm = rn.l; h->name = rn.s;
- bcf_hdr_sync(h);
- free(s.s);
- return 0;
-}
-
-int vcf_close(bcf_t *bp)
-{
- vcf_t *v;
- if (bp == 0) return -1;
- if (!bp->is_vcf) return bcf_close(bp);
- v = (vcf_t*)bp->v;
- if (v->fp) {
- ks_destroy(v->ks);
- gzclose(v->fp);
- }
- if (v->fpout) fclose(v->fpout);
- free(v->line.s);
- bcf_str2id_thorough_destroy(v->refhash);
- free(v);
- free(bp);
- return 0;
-}
-
-int vcf_hdr_write(bcf_t *bp, const bcf_hdr_t *h)
-{
- vcf_t *v = (vcf_t*)bp->v;
- int i, has_ver = 0;
- if (!bp->is_vcf) return bcf_hdr_write(bp, h);
- if (h->l_txt > 0) {
- if (strstr(h->txt, "##fileformat=")) has_ver = 1;
- if (has_ver == 0) fprintf(v->fpout, "##fileformat=VCFv4.1\n");
- fwrite(h->txt, 1, h->l_txt - 1, v->fpout);
- }
- if (h->l_txt == 0) fprintf(v->fpout, "##fileformat=VCFv4.1\n");
- fprintf(v->fpout, "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT");
- for (i = 0; i < h->n_smpl; ++i)
- fprintf(v->fpout, "\t%s", h->sns[i]);
- fputc('\n', v->fpout);
- return 0;
-}
-
-int vcf_write(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b)
-{
- vcf_t *v = (vcf_t*)bp->v;
- extern void bcf_fmt_core(const bcf_hdr_t *h, bcf1_t *b, kstring_t *s);
- if (!bp->is_vcf) return bcf_write(bp, h, b);
- bcf_fmt_core(h, b, &v->line);
- fwrite(v->line.s, 1, v->line.l, v->fpout);
- fputc('\n', v->fpout);
- return v->line.l + 1;
-}
-
-int vcf_read(bcf_t *bp, bcf_hdr_t *h, bcf1_t *b)
-{
- int dret, k, i, sync = 0;
- vcf_t *v = (vcf_t*)bp->v;
- char *p, *q;
- kstring_t str, rn;
- ks_tokaux_t aux, a2;
- if (!bp->is_vcf) return bcf_read(bp, h, b);
- v->line.l = 0;
- str.l = 0; str.m = b->m_str; str.s = b->str;
- rn.l = rn.m = h->l_nm; rn.s = h->name;
- if (ks_getuntil(v->ks, '\n', &v->line, &dret) < 0) return -1;
- b->n_smpl = h->n_smpl;
- for (p = kstrtok(v->line.s, "\t", &aux), k = 0; p; p = kstrtok(0, 0, &aux), ++k) {
- *(char*)aux.p = 0;
- if (k == 0) { // ref
- int tid = bcf_str2id(v->refhash, p);
- if (tid < 0) {
- tid = bcf_str2id_add(v->refhash, strdup(p));
- kputs(p, &rn); kputc('\0', &rn);
- sync = 1;
- }
- b->tid = tid;
- } else if (k == 1) { // pos
- b->pos = atoi(p) - 1;
- } else if (k == 5) { // qual
- b->qual = (p[0] >= '0' && p[0] <= '9')? atof(p) : 0;
- } else if (k <= 8) { // variable length strings
- kputs(p, &str); kputc('\0', &str);
- b->l_str = str.l; b->m_str = str.m; b->str = str.s;
- if (k == 8) bcf_sync(b);
- } else { // k > 9
- if (strncmp(p, "./.", 3) == 0) {
- for (i = 0; i < b->n_gi; ++i) {
- if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
- ((uint8_t*)b->gi[i].data)[k-9] = 1<<7;
- } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
- ((uint8_t*)b->gi[i].data)[k-9] = 0;
- } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
- ((int32_t*)b->gi[i].data)[k-9] = 0;
- } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
- ((uint16_t*)b->gi[i].data)[k-9] = 0;
- } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
- int y = b->n_alleles * (b->n_alleles + 1) / 2;
- memset((uint8_t*)b->gi[i].data + (k - 9) * y, 0, y);
- } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
- int y = b->n_alleles * (b->n_alleles + 1) / 2;
- memset((float*)b->gi[i].data + (k - 9) * y, 0, y * 4);
- }
- }
- goto endblock;
- }
- for (q = kstrtok(p, ":", &a2), i = 0; q && i < b->n_gi; q = kstrtok(0, 0, &a2), ++i) {
- if (b->gi[i].fmt == bcf_str2int("GT", 2)) {
- ((uint8_t*)b->gi[i].data)[k-9] = (q[0] - '0')<<3 | (q[2] - '0') | (q[1] == '/'? 0 : 1) << 6;
- } else if (b->gi[i].fmt == bcf_str2int("GQ", 2)) {
- double _x = strtod(q, &q);
- int x = (int)(_x + .499);
- if (x > 255) x = 255;
- ((uint8_t*)b->gi[i].data)[k-9] = x;
- } else if (b->gi[i].fmt == bcf_str2int("SP", 2)) {
- int x = strtol(q, &q, 10);
- if (x > 0xffff) x = 0xffff;
- ((uint32_t*)b->gi[i].data)[k-9] = x;
- } else if (b->gi[i].fmt == bcf_str2int("DP", 2)) {
- int x = strtol(q, &q, 10);
- if (x > 0xffff) x = 0xffff;
- ((uint16_t*)b->gi[i].data)[k-9] = x;
- } else if (b->gi[i].fmt == bcf_str2int("PL", 2)) {
- int x, y, j;
- uint8_t *data = (uint8_t*)b->gi[i].data;
- y = b->n_alleles * (b->n_alleles + 1) / 2;
- for (j = 0; j < y; ++j) {
- x = strtol(q, &q, 10);
- if (x > 255) x = 255;
- data[(k-9) * y + j] = x;
- ++q;
- }
- } else if (b->gi[i].fmt == bcf_str2int("GL", 2)) {
- int j, y;
- float x, *data = (float*)b->gi[i].data;
- y = b->n_alleles * (b->n_alleles + 1) / 2;
- for (j = 0; j < y; ++j) {
- x = strtod(q, &q);
- data[(k-9) * y + j] = x > 0? -x/10. : x;
- ++q;
- }
- }
- }
- endblock: i = i;
- }
- }
- h->l_nm = rn.l; h->name = rn.s;
- if (sync) bcf_hdr_sync(h);
- return v->line.l + 1;
-}
diff --git a/src/samtools-0.1.18/bcftools/vcfutils.pl b/src/samtools-0.1.18/bcftools/vcfutils.pl
deleted file mode 100755
index 2b7ba0b..0000000
--- a/src/samtools-0.1.18/bcftools/vcfutils.pl
+++ /dev/null
@@ -1,567 +0,0 @@
-#!/usr/bin/perl -w
-
-# Author: lh3
-
-use strict;
-use warnings;
-use Getopt::Std;
-
-&main;
-exit;
-
-sub main {
- &usage if (@ARGV < 1);
- my $command = shift(@ARGV);
- my %func = (subsam=>\&subsam, listsam=>\&listsam, fillac=>\&fillac, qstats=>\&qstats, varFilter=>\&varFilter,
- hapmap2vcf=>\&hapmap2vcf, ucscsnp2vcf=>\&ucscsnp2vcf, filter4vcf=>\&varFilter, ldstats=>\&ldstats,
- gapstats=>\&gapstats, splitchr=>\&splitchr, vcf2fq=>\&vcf2fq);
- die("Unknown command \"$command\".\n") if (!defined($func{$command}));
- &{$func{$command}};
-}
-
-sub splitchr {
- my %opts = (l=>5000000);
- getopts('l:', \%opts);
- my $l = $opts{l};
- die(qq/Usage: vcfutils.pl splitchr [-l $opts{l}] <in.fa.fai>\n/) if (@ARGV == 0 && -t STDIN);
- while (<>) {
- my @t = split;
- my $last = 0;
- for (my $i = 0; $i < $t[1];) {
- my $e = ($t[1] - $i) / $l < 1.1? $t[1] : $i + $l;
- print "$t[0]:".($i+1)."-$e\n";
- $i = $e;
- }
- }
-}
-
-sub subsam {
- die(qq/Usage: vcfutils.pl subsam <in.vcf> [samples]\n/) if (@ARGV == 0);
- my ($fh, %h);
- my $fn = shift(@ARGV);
- my @col;
- open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die;
- $h{$_} = 1 for (@ARGV);
- while (<$fh>) {
- if (/^##/) {
- print;
- } elsif (/^#/) {
- my @t = split;
- my @s = @t[0..8]; # all fixed fields + FORMAT
- for (9 .. $#t) {
- if ($h{$t[$_]}) {
- push(@s, $t[$_]);
- push(@col, $_);
- }
- }
- pop(@s) if (@s == 9); # no sample selected; remove the FORMAT field
- print join("\t", @s), "\n";
- } else {
- my @t = split;
- if (@col == 0) {
- print join("\t", @t[0..7]), "\n";
- } else {
- print join("\t", @t[0..8], map {$t[$_]} @col), "\n";
- }
- }
- }
- close($fh);
-}
-
-sub listsam {
- die(qq/Usage: vcfutils.pl listsam <in.vcf>\n/) if (@ARGV == 0 && -t STDIN);
- while (<>) {
- if (/^#/ && !/^##/) {
- my @t = split;
- print join("\n", @t[9..$#t]), "\n";
- exit;
- }
- }
-}
-
-sub fillac {
- die(qq/Usage: vcfutils.pl fillac <in.vcf>\n\nNote: The GT field MUST BE present and always appear as the first field.\n/) if (@ARGV == 0 && -t STDIN);
- while (<>) {
- if (/^#/) {
- print;
- } else {
- my @t = split;
- my @c = (0, 0);
- my $n = 0;
- my $s = -1;
- @_ = split(":", $t[8]);
- for (0 .. $#_) {
- if ($_[$_] eq 'GT') { $s = $_; last; }
- }
- if ($s < 0) {
- print join("\t", @t), "\n";
- next;
- }
- for (9 .. $#t) {
- if ($t[$_] =~ /^0,0,0/) {
- } elsif ($t[$_] =~ /^([^\s:]+:){$s}(\d+).(\d+)/) {
- ++$c[$2]; ++$c[$3];
- $n += 2;
- }
- }
- my $AC = "AC=" . join("\t", @c[1..$#c]) . ";AN=$n";
- my $info = $t[7];
- $info =~ s/(;?)AC=(\d+)//;
- $info =~ s/(;?)AN=(\d+)//;
- if ($info eq '.') {
- $info = $AC;
- } else {
- $info .= ";$AC";
- }
- $t[7] = $info;
- print join("\t", @t), "\n";
- }
- }
-}
-
-sub ldstats {
- my %opts = (t=>0.9);
- getopts('t:', \%opts);
- die("Usage: vcfutils.pl ldstats [-t $opts{t}] <in.vcf>\n") if (@ARGV == 0 && -t STDIN);
- my $cutoff = $opts{t};
- my ($last, $lastchr) = (0x7fffffff, '');
- my ($x, $y, $n) = (0, 0, 0);
- while (<>) {
- if (/^([^#\s]+)\s(\d+)/) {
- my ($chr, $pos) = ($1, $2);
- if (/NEIR=([\d\.]+)/) {
- ++$n;
- ++$y, $x += $pos - $last if ($lastchr eq $chr && $pos > $last && $1 > $cutoff);
- }
- $last = $pos; $lastchr = $chr;
- }
- }
- print "Number of SNP intervals in strong LD (r > $opts{t}): $y\n";
- print "Fraction: ", $y/$n, "\n";
- print "Length: $x\n";
-}
-
-sub qstats {
- my %opts = (r=>'', s=>0.02, v=>undef);
- getopts('r:s:v', \%opts);
- die("Usage: vcfutils.pl qstats [-r ref.vcf] <in.vcf>\n
-Note: This command discards indels. Output: QUAL #non-indel #SNPs #transitions #joint ts/tv #joint/#ref #joint/#non-indel \n") if (@ARGV == 0 && -t STDIN);
- my %ts = (AG=>1, GA=>1, CT=>1, TC=>1);
- my %h = ();
- my $is_vcf = defined($opts{v})? 1 : 0;
- if ($opts{r}) { # read the reference positions
- my $fh;
- open($fh, $opts{r}) || die;
- while (<$fh>) {
- next if (/^#/);
- if ($is_vcf) {
- my @t = split;
- $h{$t[0],$t[1]} = $t[4];
- } else {
- $h{$1,$2} = 1 if (/^(\S+)\s+(\d+)/);
- }
- }
- close($fh);
- }
- my $hsize = scalar(keys %h);
- my @a;
- while (<>) {
- next if (/^#/);
- my @t = split;
- next if (length($t[3]) != 1 || uc($t[3]) eq 'N');
- $t[3] = uc($t[3]); $t[4] = uc($t[4]);
- my @s = split(',', $t[4]);
- $t[5] = 3 if ($t[5] eq '.' || $t[5] < 0);
- next if (length($s[0]) != 1);
- my $hit;
- if ($is_vcf) {
- $hit = 0;
- my $aa = $h{$t[0],$t[1]};
- if (defined($aa)) {
- my @aaa = split(",", $aa);
- for (@aaa) {
- $hit = 1 if ($_ eq $s[0]);
- }
- }
- } else {
- $hit = defined($h{$t[0],$t[1]})? 1 : 0;
- }
- push(@a, [$t[5], ($t[4] eq '.' || $t[4] eq $t[3])? 0 : 1, $ts{$t[3].$s[0]}? 1 : 0, $hit]);
- }
- push(@a, [-1, 0, 0, 0]); # end marker
- die("[qstats] No SNP data!\n") if (@a == 0);
- @a = sort {$b->[0]<=>$a->[0]} @a;
- my $next = $opts{s};
- my $last = $a[0];
- my @c = (0, 0, 0, 0);
- my @lc;
- $lc[1] = $lc[2] = 0;
- for my $p (@a) {
- if ($p->[0] == -1 || ($p->[0] != $last && $c[0]/@a > $next)) {
- my @x;
- $x[0] = sprintf("%.4f", $c[1]-$c[2]? $c[2] / ($c[1] - $c[2]) : 100);
- $x[1] = sprintf("%.4f", $hsize? $c[3] / $hsize : 0);
- $x[2] = sprintf("%.4f", $c[3] / $c[1]);
- my $a = $c[1] - $lc[1];
- my $b = $c[2] - $lc[2];
- $x[3] = sprintf("%.4f", $a-$b? $b / ($a-$b) : 100);
- print join("\t", $last, @c, @x), "\n";
- $next = $c[0]/@a + $opts{s};
- $lc[1] = $c[1]; $lc[2] = $c[2];
- }
- ++$c[0]; $c[1] += $p->[1]; $c[2] += $p->[2]; $c[3] += $p->[3];
- $last = $p->[0];
- }
-}
-
-sub varFilter {
- my %opts = (d=>2, D=>10000000, a=>2, W=>10, Q=>10, w=>3, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4, G=>0, S=>1000, e=>1e-4);
- getopts('pd:D:W:Q:w:a:1:2:3:4:G:S:e:', \%opts);
- die(qq/
-Usage: vcfutils.pl varFilter [options] <in.vcf>
-
-Options: -Q INT minimum RMS mapping quality for SNPs [$opts{Q}]
- -d INT minimum read depth [$opts{d}]
- -D INT maximum read depth [$opts{D}]
- -a INT minimum number of alternate bases [$opts{a}]
- -w INT SNP within INT bp around a gap to be filtered [$opts{w}]
- -W INT window size for filtering adjacent gaps [$opts{W}]
- -1 FLOAT min P-value for strand bias (given PV4) [$opts{1}]
- -2 FLOAT min P-value for baseQ bias [$opts{2}]
- -3 FLOAT min P-value for mapQ bias [$opts{3}]
- -4 FLOAT min P-value for end distance bias [$opts{4}]
- -e FLOAT min P-value for HWE (plus F<0) [$opts{e}]
- -p print filtered variants
-
-Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools.
-\n/) if (@ARGV == 0 && -t STDIN);
-
- # calculate the window size
- my ($ol, $ow) = ($opts{W}, $opts{w});
- my $max_dist = $ol > $ow? $ol : $ow;
- # the core loop
- my @staging; # (indel_filtering_score, flt_tag, indel_span; chr, pos, ...)
- while (<>) {
- my @t = split;
- if (/^#/) {
- print; next;
- }
- next if ($t[4] eq '.'); # skip non-var sites
- next if ($t[3] eq 'N'); # skip sites with unknown ref ('N')
- # check if the site is a SNP
- my $type = 1; # SNP
- if (length($t[3]) > 1) {
- $type = 2; # MNP
- my @s = split(',', $t[4]);
- for (@s) {
- $type = 3 if (length != length($t[3]));
- }
- } else {
- my @s = split(',', $t[4]);
- for (@s) {
- $type = 3 if (length > 1);
- }
- }
- # clear the out-of-range elements
- while (@staging) {
- # Still on the same chromosome and the first element's window still affects this position?
- last if ($staging[0][3] eq $t[0] && $staging[0][4] + $staging[0][2] + $max_dist >= $t[1]);
- varFilter_aux(shift(@staging), $opts{p}); # calling a function is a bit slower, not much
- }
- my $flt = 0;
- # parse annotations
- my ($dp, $mq, $dp_alt) = (-1, -1, -1);
- if ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) {
- $dp = $1 + $2 + $3 + $4;
- $dp_alt = $3 + $4;
- }
- if ($t[7] =~ /DP=(\d+)/i) {
- $dp = $1;
- }
- $mq = $1 if ($t[7] =~ /MQ=(\d+)/i);
- # the depth and mapQ filter
- if ($dp >= 0) {
- if ($dp < $opts{d}) {
- $flt = 2;
- } elsif ($dp > $opts{D}) {
- $flt = 3;
- }
- }
- $flt = 4 if ($dp_alt >= 0 && $dp_alt < $opts{a});
- $flt = 1 if ($flt == 0 && $mq >= 0 && $mq < $opts{Q});
- $flt = 7 if ($flt == 0 && /PV4=([^,]+),([^,]+),([^,]+),([^,;\t]+)/
- && ($1<$opts{1} || $2<$opts{2} || $3<$opts{3} || $4<$opts{4}));
- $flt = 8 if ($flt == 0 && ((/MXGQ=(\d+)/ && $1 < $opts{G}) || (/MXSP=(\d+)/ && $1 >= $opts{S})));
- # HWE filter
- if ($t[7] =~ /G3=([^;,]+),([^;,]+),([^;,]+).*HWE=([^;,]+)/ && $4 < $opts{e}) {
- my $p = 2*$1 + $2;
- my $f = ($p > 0 && $p < 1)? 1 - $2 / ($p * (1-$p)) : 0;
- $flt = 9 if ($f < 0);
- }
-
- my $score = $t[5] * 100 + $dp_alt;
- my $rlen = length($t[3]) - 1; # $indel_score<0 for SNPs
- if ($flt == 0) {
- if ($type == 3) { # an indel
- # filtering SNPs and MNPs
- for my $x (@staging) {
- next if (($x->[0]&3) == 3 || $x->[1] || $x->[4] + $x->[2] + $ow < $t[1]);
- $x->[1] = 5;
- }
- # check the staging list for indel filtering
- for my $x (@staging) {
- next if (($x->[0]&3) != 3 || $x->[1] || $x->[4] + $x->[2] + $ol < $t[1]);
- if ($x->[0]>>2 < $score) {
- $x->[1] = 6;
- } else {
- $flt = 6; last;
- }
- }
- } else { # SNP or MNP
- for my $x (@staging) {
- next if (($x->[0]&3) != 3 || $x->[4] + $x->[2] + $ow < $t[1]);
- if ($x->[4] + length($x->[7]) - 1 == $t[1] && substr($x->[7], -1, 1) eq substr($t[4], 0, 1)
- && length($x->[7]) - length($x->[6]) == 1) {
- $x->[1] = 5;
- } else { $flt = 5; }
- last;
- }
- # check MNP
- for my $x (@staging) {
- next if (($x->[0]&3) == 3 || $x->[4] + $x->[2] < $t[1]);
- if ($x->[0]>>2 < $score) {
- $x->[1] = 8;
- } else {
- $flt = 8; last;
- }
- }
- }
- }
- push(@staging, [$score<<2|$type, $flt, $rlen, @t]);
- }
- # output the last few elements in the staging list
- while (@staging) {
- varFilter_aux(shift @staging, $opts{p});
- }
-}
-
-sub varFilter_aux {
- my ($first, $is_print) = @_;
- if ($first->[1] == 0) {
- print join("\t", @$first[3 .. @$first-1]), "\n";
- } elsif ($is_print) {
- print STDERR join("\t", substr("UQdDaGgPMS", $first->[1], 1), @$first[3 .. @$first-1]), "\n";
- }
-}
-
-sub gapstats {
- my (@c0, @c1);
- $c0[$_] = $c1[$_] = 0 for (0 .. 10000);
- while (<>) {
- next if (/^#/);
- my @t = split;
- next if (length($t[3]) == 1 && $t[4] =~ /^[A-Za-z](,[A-Za-z])*$/); # not an indel
- my @s = split(',', $t[4]);
- for my $x (@s) {
- my $l = length($x) - length($t[3]) + 5000;
- if ($x =~ /^-/) {
- $l = -(length($x) - 1) + 5000;
- } elsif ($x =~ /^\+/) {
- $l = length($x) - 1 + 5000;
- }
- $c0[$l] += 1 / @s;
- }
- }
- for (my $i = 0; $i < 10000; ++$i) {
- next if ($c0[$i] == 0);
- $c1[0] += $c0[$i];
- $c1[1] += $c0[$i] if (($i-5000)%3 == 0);
- printf("C\t%d\t%.2f\n", ($i-5000), $c0[$i]);
- }
- printf("3\t%d\t%d\t%.3f\n", $c1[0], $c1[1], $c1[1]/$c1[0]);
-}
-
-sub ucscsnp2vcf {
- die("Usage: vcfutils.pl <in.ucsc.snp>\n") if (@ARGV == 0 && -t STDIN);
- print "##fileformat=VCFv4.0\n";
- print join("\t", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"), "\n";
- while (<>) {
- my @t = split("\t");
- my $indel = ($t[9] =~ /^[ACGT](\/[ACGT])+$/)? 0 : 1;
- my $pos = $t[2] + 1;
- my @alt;
- push(@alt, $t[7]);
- if ($t[6] eq '-') {
- $t[9] = reverse($t[9]);
- $t[9] =~ tr/ACGTRYMKWSNacgtrymkwsn/TGCAYRKMWSNtgcayrkmwsn/;
- }
- my @a = split("/", $t[9]);
- for (@a) {
- push(@alt, $_) if ($_ ne $alt[0]);
- }
- if ($indel) {
- --$pos;
- for (0 .. $#alt) {
- $alt[$_] =~ tr/-//d;
- $alt[$_] = "N$alt[$_]";
- }
- }
- my $ref = shift(@alt);
- my $af = $t[13] > 0? ";AF=$t[13]" : '';
- my $valid = ($t[12] eq 'unknown')? '' : ";valid=$t[12]";
- my $info = "molType=$t[10];class=$t[11]$valid$af";
- print join("\t", $t[1], $pos, $t[4], $ref, join(",", @alt), 0, '.', $info), "\n";
- }
-}
-
-sub hapmap2vcf {
- die("Usage: vcfutils.pl <in.ucsc.snp> <in.hapmap>\n") if (@ARGV == 0);
- my $fn = shift(@ARGV);
- # parse UCSC SNP
- warn("Parsing UCSC SNPs...\n");
- my ($fh, %map);
- open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die;
- while (<$fh>) {
- my @t = split;
- next if ($t[3] - $t[2] != 1); # not SNP
- @{$map{$t[4]}} = @t[1,3,7];
- }
- close($fh);
- # write VCF
- warn("Writing VCF...\n");
- print "##fileformat=VCFv4.0\n";
- while (<>) {
- my @t = split;
- if ($t[0] eq 'rs#') { # the first line
- print join("\t", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT", @t[11..$#t]), "\n";
- } else {
- next unless ($map{$t[0]});
- next if (length($t[1]) != 3); # skip non-SNPs
- my $a = \@{$map{$t[0]}};
- my $ref = $a->[2];
- my @u = split('/', $t[1]);
- if ($u[1] eq $ref) {
- $u[1] = $u[0]; $u[0] = $ref;
- } elsif ($u[0] ne $ref) { next; }
- my $alt = $u[1];
- my %w;
- $w{$u[0]} = 0; $w{$u[1]} = 1;
- my @s = (@$a[0,1], $t[0], $ref, $alt, 0, '.', '.', 'GT');
- my $is_tri = 0;
- for (@t[11..$#t]) {
- if ($_ eq 'NN') {
- push(@s, './.');
- } else {
- my @a = ($w{substr($_,0,1)}, $w{substr($_,1,1)});
- if (!defined($a[0]) || !defined($a[1])) {
- $is_tri = 1;
- last;
- }
- push(@s, "$a[0]/$a[1]");
- }
- }
- next if ($is_tri);
- print join("\t", @s), "\n";
- }
- }
-}
-
-sub vcf2fq {
- my %opts = (d=>3, D=>100000, Q=>10, l=>5);
- getopts('d:D:Q:l:', \%opts);
- die(qq/
-Usage: vcfutils.pl vcf2fq [options] <all-site.vcf>
-
-Options: -d INT minimum depth [$opts{d}]
- -D INT maximum depth [$opts{D}]
- -Q INT min RMS mapQ [$opts{Q}]
- -l INT INDEL filtering window [$opts{l}]
-\n/) if (@ARGV == 0 && -t STDIN);
-
- my ($last_chr, $seq, $qual, $last_pos, @gaps);
- my $_Q = $opts{Q};
- my $_d = $opts{d};
- my $_D = $opts{D};
-
- my %het = (AC=>'M', AG=>'R', AT=>'W', CA=>'M', CG=>'S', CT=>'Y',
- GA=>'R', GC=>'S', GT=>'K', TA=>'W', TC=>'Y', TG=>'K');
-
- $last_chr = '';
- while (<>) {
- next if (/^#/);
- my @t = split;
- if ($last_chr ne $t[0]) {
- &v2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}) if ($last_chr);
- ($last_chr, $last_pos) = ($t[0], 0);
- $seq = $qual = '';
- @gaps = ();
- }
- die("[vcf2fq] unsorted input\n") if ($t[1] - $last_pos < 0);
- if ($t[1] - $last_pos > 1) {
- $seq .= 'n' x ($t[1] - $last_pos - 1);
- $qual .= '!' x ($t[1] - $last_pos - 1);
- }
- if (length($t[3]) == 1 && $t[7] !~ /INDEL/ && $t[4] =~ /^([A-Za-z.])(,[A-Za-z])*$/) { # a SNP or reference
- my ($ref, $alt) = ($t[3], $1);
- my ($b, $q);
- $q = $1 if ($t[7] =~ /FQ=(-?[\d\.]+)/);
- if ($q < 0) {
- $_ = ($t[7] =~ /AF1=([\d\.]+)/)? $1 : 0;
- $b = ($_ < .5 || $alt eq '.')? $ref : $alt;
- $q = -$q;
- } else {
- $b = $het{"$ref$alt"};
- $b ||= 'N';
- }
- $b = lc($b);
- $b = uc($b) if (($t[7] =~ /MQ=(\d+)/ && $1 >= $_Q) && ($t[7] =~ /DP=(\d+)/ && $1 >= $_d && $1 <= $_D));
- $q = int($q + 33 + .499);
- $q = chr($q <= 126? $q : 126);
- $seq .= $b;
- $qual .= $q;
- } elsif ($t[4] ne '.') { # an INDEL
- push(@gaps, [$t[1], length($t[3])]);
- }
- $last_pos = $t[1];
- }
- &v2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l});
-}
-
-sub v2q_post_process {
- my ($chr, $seq, $qual, $gaps, $l) = @_;
- for my $g (@$gaps) {
- my $beg = $g->[0] > $l? $g->[0] - $l : 0;
- my $end = $g->[0] + $g->[1] + $l;
- $end = length($$seq) if ($end > length($$seq));
- substr($$seq, $beg, $end - $beg) = lc(substr($$seq, $beg, $end - $beg));
- }
- print "\@$chr\n"; &v2q_print_str($seq);
- print "+\n"; &v2q_print_str($qual);
-}
-
-sub v2q_print_str {
- my ($s) = @_;
- my $l = length($$s);
- for (my $i = 0; $i < $l; $i += 60) {
- print substr($$s, $i, 60), "\n";
- }
-}
-
-sub usage {
- die(qq/
-Usage: vcfutils.pl <command> [<arguments>]\n
-Command: subsam get a subset of samples
- listsam list the samples
- fillac fill the allele count field
- qstats SNP stats stratified by QUAL
-
- hapmap2vcf convert the hapmap format to VCF
- ucscsnp2vcf convert UCSC SNP SQL dump to VCF
-
- varFilter filtering short variants (*)
- vcf2fq VCF->fastq (**)
-
-Notes: Commands with description endting with (*) may need bcftools
- specific annotations.
-\n/);
-}
diff --git a/src/samtools-0.1.18/bedidx.c b/src/samtools-0.1.18/bedidx.c
deleted file mode 100644
index ec75a10..0000000
--- a/src/samtools-0.1.18/bedidx.c
+++ /dev/null
@@ -1,162 +0,0 @@
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <stdio.h>
-#include <zlib.h>
-
-#ifdef _WIN32
-#define drand48() ((double)rand() / RAND_MAX)
-#endif
-
-#include "ksort.h"
-KSORT_INIT_GENERIC(uint64_t)
-
-#include "kseq.h"
-KSTREAM_INIT(gzFile, gzread, 8192)
-
-typedef struct {
- int n, m;
- uint64_t *a;
- int *idx;
-} bed_reglist_t;
-
-#include "khash.h"
-KHASH_MAP_INIT_STR(reg, bed_reglist_t)
-
-#define LIDX_SHIFT 13
-
-typedef kh_reg_t reghash_t;
-
-int *bed_index_core(int n, uint64_t *a, int *n_idx)
-{
- int i, j, m, *idx;
- m = *n_idx = 0; idx = 0;
- for (i = 0; i < n; ++i) {
- int beg, end;
- beg = a[i]>>32 >> LIDX_SHIFT; end = ((uint32_t)a[i]) >> LIDX_SHIFT;
- if (m < end + 1) {
- int oldm = m;
- m = end + 1;
- kroundup32(m);
- idx = realloc(idx, m * sizeof(int));
- for (j = oldm; j < m; ++j) idx[j] = -1;
- }
- if (beg == end) {
- if (idx[beg] < 0) idx[beg] = i;
- } else {
- for (j = beg; j <= end; ++j)
- if (idx[j] < 0) idx[j] = i;
- }
- *n_idx = end + 1;
- }
- return idx;
-}
-
-void bed_index(void *_h)
-{
- reghash_t *h = (reghash_t*)_h;
- khint_t k;
- for (k = 0; k < kh_end(h); ++k) {
- if (kh_exist(h, k)) {
- bed_reglist_t *p = &kh_val(h, k);
- if (p->idx) free(p->idx);
- ks_introsort(uint64_t, p->n, p->a);
- p->idx = bed_index_core(p->n, p->a, &p->m);
- }
- }
-}
-
-int bed_overlap_core(const bed_reglist_t *p, int beg, int end)
-{
- int i, min_off;
- if (p->n == 0) return 0;
- min_off = (beg>>LIDX_SHIFT >= p->n)? p->idx[p->n-1] : p->idx[beg>>LIDX_SHIFT];
- if (min_off < 0) { // TODO: this block can be improved, but speed should not matter too much here
- int n = beg>>LIDX_SHIFT;
- if (n > p->n) n = p->n;
- for (i = n - 1; i >= 0; --i)
- if (p->idx[i] >= 0) break;
- min_off = i >= 0? p->idx[i] : 0;
- }
- for (i = min_off; i < p->n; ++i) {
- if ((int)(p->a[i]>>32) >= end) break; // out of range; no need to proceed
- if ((int32_t)p->a[i] > beg && (int32_t)(p->a[i]>>32) < end)
- return 1; // find the overlap; return
- }
- return 0;
-}
-
-int bed_overlap(const void *_h, const char *chr, int beg, int end)
-{
- const reghash_t *h = (const reghash_t*)_h;
- khint_t k;
- if (!h) return 0;
- k = kh_get(reg, h, chr);
- if (k == kh_end(h)) return 0;
- return bed_overlap_core(&kh_val(h, k), beg, end);
-}
-
-void *bed_read(const char *fn)
-{
- reghash_t *h = kh_init(reg);
- gzFile fp;
- kstream_t *ks;
- int dret;
- kstring_t *str;
- // read the list
- fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
- if (fp == 0) return 0;
- str = calloc(1, sizeof(kstring_t));
- ks = ks_init(fp);
- while (ks_getuntil(ks, 0, str, &dret) >= 0) { // read the chr name
- int beg = -1, end = -1;
- bed_reglist_t *p;
- khint_t k = kh_get(reg, h, str->s);
- if (k == kh_end(h)) { // absent from the hash table
- int ret;
- char *s = strdup(str->s);
- k = kh_put(reg, h, s, &ret);
- memset(&kh_val(h, k), 0, sizeof(bed_reglist_t));
- }
- p = &kh_val(h, k);
- if (dret != '\n') { // if the lines has other characters
- if (ks_getuntil(ks, 0, str, &dret) > 0 && isdigit(str->s[0])) {
- beg = atoi(str->s); // begin
- if (dret != '\n') {
- if (ks_getuntil(ks, 0, str, &dret) > 0 && isdigit(str->s[0])) {
- end = atoi(str->s); // end
- if (end < beg) end = -1;
- }
- }
- }
- }
- if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n'); // skip the rest of the line
- if (end < 0 && beg > 0) end = beg, beg = beg - 1; // if there is only one column
- if (beg >= 0 && end > beg) {
- if (p->n == p->m) {
- p->m = p->m? p->m<<1 : 4;
- p->a = realloc(p->a, p->m * 8);
- }
- p->a[p->n++] = (uint64_t)beg<<32 | end;
- }
- }
- ks_destroy(ks);
- gzclose(fp);
- free(str->s); free(str);
- bed_index(h);
- return h;
-}
-
-void bed_destroy(void *_h)
-{
- reghash_t *h = (reghash_t*)_h;
- khint_t k;
- for (k = 0; k < kh_end(h); ++k) {
- if (kh_exist(h, k)) {
- free(kh_val(h, k).a);
- free(kh_val(h, k).idx);
- free((char*)kh_key(h, k));
- }
- }
- kh_destroy(reg, h);
-}
diff --git a/src/samtools-0.1.18/bgzf.c b/src/samtools-0.1.18/bgzf.c
deleted file mode 100644
index 216cd04..0000000
--- a/src/samtools-0.1.18/bgzf.c
+++ /dev/null
@@ -1,714 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
-*/
-
-/*
- 2009-06-29 by lh3: cache recent uncompressed blocks.
- 2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.
- 2009-06-12 by lh3: support a mode string like "wu" where 'u' for uncompressed output */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include "bgzf.h"
-
-#include "khash.h"
-typedef struct {
- int size;
- uint8_t *block;
- int64_t end_offset;
-} cache_t;
-KHASH_MAP_INIT_INT64(cache, cache_t)
-
-#if defined(_WIN32) || defined(_MSC_VER)
-#define ftello(fp) ftell(fp)
-#define fseeko(fp, offset, whence) fseek(fp, offset, whence)
-#else
-extern off_t ftello(FILE *stream);
-extern int fseeko(FILE *stream, off_t offset, int whence);
-#endif
-
-typedef int8_t bgzf_byte_t;
-
-static const int DEFAULT_BLOCK_SIZE = 64 * 1024;
-static const int MAX_BLOCK_SIZE = 64 * 1024;
-
-static const int BLOCK_HEADER_LENGTH = 18;
-static const int BLOCK_FOOTER_LENGTH = 8;
-
-static const int GZIP_ID1 = 31;
-static const int GZIP_ID2 = 139;
-static const int CM_DEFLATE = 8;
-static const int FLG_FEXTRA = 4;
-static const int OS_UNKNOWN = 255;
-static const int BGZF_ID1 = 66; // 'B'
-static const int BGZF_ID2 = 67; // 'C'
-static const int BGZF_LEN = 2;
-static const int BGZF_XLEN = 6; // BGZF_LEN+4
-
-static const int GZIP_WINDOW_BITS = -15; // no zlib header
-static const int Z_DEFAULT_MEM_LEVEL = 8;
-
-
-inline
-void
-packInt16(uint8_t* buffer, uint16_t value)
-{
- buffer[0] = value;
- buffer[1] = value >> 8;
-}
-
-inline
-int
-unpackInt16(const uint8_t* buffer)
-{
- return (buffer[0] | (buffer[1] << 8));
-}
-
-inline
-void
-packInt32(uint8_t* buffer, uint32_t value)
-{
- buffer[0] = value;
- buffer[1] = value >> 8;
- buffer[2] = value >> 16;
- buffer[3] = value >> 24;
-}
-
-static inline
-int
-bgzf_min(int x, int y)
-{
- return (x < y) ? x : y;
-}
-
-static
-void
-report_error(BGZF* fp, const char* message) {
- fp->error = message;
-}
-
-int bgzf_check_bgzf(const char *fn)
-{
- BGZF *fp;
- uint8_t buf[10],magic[10]="\037\213\010\4\0\0\0\0\0\377";
- int n;
-
- if ((fp = bgzf_open(fn, "r")) == 0)
- {
- fprintf(stderr, "[bgzf_check_bgzf] failed to open the file: %s\n",fn);
- return -1;
- }
-
-#ifdef _USE_KNETFILE
- n = knet_read(fp->x.fpr, buf, 10);
-#else
- n = fread(buf, 1, 10, fp->file);
-#endif
- bgzf_close(fp);
-
- if ( n!=10 )
- return -1;
-
- if ( !memcmp(magic, buf, 10) ) return 1;
- return 0;
-}
-
-static BGZF *bgzf_read_init()
-{
- BGZF *fp;
- fp = calloc(1, sizeof(BGZF));
- fp->uncompressed_block_size = MAX_BLOCK_SIZE;
- fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);
- fp->compressed_block_size = MAX_BLOCK_SIZE;
- fp->compressed_block = malloc(MAX_BLOCK_SIZE);
- fp->cache_size = 0;
- fp->cache = kh_init(cache);
- return fp;
-}
-
-static
-BGZF*
-open_read(int fd)
-{
-#ifdef _USE_KNETFILE
- knetFile *file = knet_dopen(fd, "r");
-#else
- FILE* file = fdopen(fd, "r");
-#endif
- BGZF* fp;
- if (file == 0) return 0;
- fp = bgzf_read_init();
- fp->file_descriptor = fd;
- fp->open_mode = 'r';
-#ifdef _USE_KNETFILE
- fp->x.fpr = file;
-#else
- fp->file = file;
-#endif
- return fp;
-}
-
-static
-BGZF*
-open_write(int fd, int compress_level) // compress_level==-1 for the default level
-{
- FILE* file = fdopen(fd, "w");
- BGZF* fp;
- if (file == 0) return 0;
- fp = malloc(sizeof(BGZF));
- fp->file_descriptor = fd;
- fp->open_mode = 'w';
- fp->owned_file = 0;
- fp->compress_level = compress_level < 0? Z_DEFAULT_COMPRESSION : compress_level; // Z_DEFAULT_COMPRESSION==-1
- if (fp->compress_level > 9) fp->compress_level = Z_DEFAULT_COMPRESSION;
-#ifdef _USE_KNETFILE
- fp->x.fpw = file;
-#else
- fp->file = file;
-#endif
- fp->uncompressed_block_size = DEFAULT_BLOCK_SIZE;
- fp->uncompressed_block = NULL;
- fp->compressed_block_size = MAX_BLOCK_SIZE;
- fp->compressed_block = malloc(MAX_BLOCK_SIZE);
- fp->block_address = 0;
- fp->block_offset = 0;
- fp->block_length = 0;
- fp->error = NULL;
- return fp;
-}
-
-BGZF*
-bgzf_open(const char* __restrict path, const char* __restrict mode)
-{
- BGZF* fp = NULL;
- if (strchr(mode, 'r') || strchr(mode, 'R')) { /* The reading mode is preferred. */
-#ifdef _USE_KNETFILE
- knetFile *file = knet_open(path, mode);
- if (file == 0) return 0;
- fp = bgzf_read_init();
- fp->file_descriptor = -1;
- fp->open_mode = 'r';
- fp->x.fpr = file;
-#else
- int fd, oflag = O_RDONLY;
-#ifdef _WIN32
- oflag |= O_BINARY;
-#endif
- fd = open(path, oflag);
- if (fd == -1) return 0;
- fp = open_read(fd);
-#endif
- } else if (strchr(mode, 'w') || strchr(mode, 'W')) {
- int fd, compress_level = -1, oflag = O_WRONLY | O_CREAT | O_TRUNC;
-#ifdef _WIN32
- oflag |= O_BINARY;
-#endif
- fd = open(path, oflag, 0666);
- if (fd == -1) return 0;
- { // set compress_level
- int i;
- for (i = 0; mode[i]; ++i)
- if (mode[i] >= '0' && mode[i] <= '9') break;
- if (mode[i]) compress_level = (int)mode[i] - '0';
- if (strchr(mode, 'u')) compress_level = 0;
- }
- fp = open_write(fd, compress_level);
- }
- if (fp != NULL) fp->owned_file = 1;
- return fp;
-}
-
-BGZF*
-bgzf_fdopen(int fd, const char * __restrict mode)
-{
- if (fd == -1) return 0;
- if (mode[0] == 'r' || mode[0] == 'R') {
- return open_read(fd);
- } else if (mode[0] == 'w' || mode[0] == 'W') {
- int i, compress_level = -1;
- for (i = 0; mode[i]; ++i)
- if (mode[i] >= '0' && mode[i] <= '9') break;
- if (mode[i]) compress_level = (int)mode[i] - '0';
- if (strchr(mode, 'u')) compress_level = 0;
- return open_write(fd, compress_level);
- } else {
- return NULL;
- }
-}
-
-static
-int
-deflate_block(BGZF* fp, int block_length)
-{
- // Deflate the block in fp->uncompressed_block into fp->compressed_block.
- // Also adds an extra field that stores the compressed block length.
-
- bgzf_byte_t* buffer = fp->compressed_block;
- int buffer_size = fp->compressed_block_size;
-
- // Init gzip header
- buffer[0] = GZIP_ID1;
- buffer[1] = GZIP_ID2;
- buffer[2] = CM_DEFLATE;
- buffer[3] = FLG_FEXTRA;
- buffer[4] = 0; // mtime
- buffer[5] = 0;
- buffer[6] = 0;
- buffer[7] = 0;
- buffer[8] = 0;
- buffer[9] = OS_UNKNOWN;
- buffer[10] = BGZF_XLEN;
- buffer[11] = 0;
- buffer[12] = BGZF_ID1;
- buffer[13] = BGZF_ID2;
- buffer[14] = BGZF_LEN;
- buffer[15] = 0;
- buffer[16] = 0; // placeholder for block length
- buffer[17] = 0;
-
- // loop to retry for blocks that do not compress enough
- int input_length = block_length;
- int compressed_length = 0;
- while (1) {
- z_stream zs;
- zs.zalloc = NULL;
- zs.zfree = NULL;
- zs.next_in = fp->uncompressed_block;
- zs.avail_in = input_length;
- zs.next_out = (void*)&buffer[BLOCK_HEADER_LENGTH];
- zs.avail_out = buffer_size - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
-
- int status = deflateInit2(&zs, fp->compress_level, Z_DEFLATED,
- GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY);
- if (status != Z_OK) {
- report_error(fp, "deflate init failed");
- return -1;
- }
- status = deflate(&zs, Z_FINISH);
- if (status != Z_STREAM_END) {
- deflateEnd(&zs);
- if (status == Z_OK) {
- // Not enough space in buffer.
- // Can happen in the rare case the input doesn't compress enough.
- // Reduce the amount of input until it fits.
- input_length -= 1024;
- if (input_length <= 0) {
- // should never happen
- report_error(fp, "input reduction failed");
- return -1;
- }
- continue;
- }
- report_error(fp, "deflate failed");
- return -1;
- }
- status = deflateEnd(&zs);
- if (status != Z_OK) {
- report_error(fp, "deflate end failed");
- return -1;
- }
- compressed_length = zs.total_out;
- compressed_length += BLOCK_HEADER_LENGTH + BLOCK_FOOTER_LENGTH;
- if (compressed_length > MAX_BLOCK_SIZE) {
- // should never happen
- report_error(fp, "deflate overflow");
- return -1;
- }
- break;
- }
-
- packInt16((uint8_t*)&buffer[16], compressed_length-1);
- uint32_t crc = crc32(0L, NULL, 0L);
- crc = crc32(crc, fp->uncompressed_block, input_length);
- packInt32((uint8_t*)&buffer[compressed_length-8], crc);
- packInt32((uint8_t*)&buffer[compressed_length-4], input_length);
-
- int remaining = block_length - input_length;
- if (remaining > 0) {
- if (remaining > input_length) {
- // should never happen (check so we can use memcpy)
- report_error(fp, "remainder too large");
- return -1;
- }
- memcpy(fp->uncompressed_block,
- fp->uncompressed_block + input_length,
- remaining);
- }
- fp->block_offset = remaining;
- return compressed_length;
-}
-
-static
-int
-inflate_block(BGZF* fp, int block_length)
-{
- // Inflate the block in fp->compressed_block into fp->uncompressed_block
-
- z_stream zs;
- int status;
- zs.zalloc = NULL;
- zs.zfree = NULL;
- zs.next_in = fp->compressed_block + 18;
- zs.avail_in = block_length - 16;
- zs.next_out = fp->uncompressed_block;
- zs.avail_out = fp->uncompressed_block_size;
-
- status = inflateInit2(&zs, GZIP_WINDOW_BITS);
- if (status != Z_OK) {
- report_error(fp, "inflate init failed");
- return -1;
- }
- status = inflate(&zs, Z_FINISH);
- if (status != Z_STREAM_END) {
- inflateEnd(&zs);
- report_error(fp, "inflate failed");
- return -1;
- }
- status = inflateEnd(&zs);
- if (status != Z_OK) {
- report_error(fp, "inflate failed");
- return -1;
- }
- return zs.total_out;
-}
-
-static
-int
-check_header(const bgzf_byte_t* header)
-{
- return (header[0] == GZIP_ID1 &&
- header[1] == (bgzf_byte_t) GZIP_ID2 &&
- header[2] == Z_DEFLATED &&
- (header[3] & FLG_FEXTRA) != 0 &&
- unpackInt16((uint8_t*)&header[10]) == BGZF_XLEN &&
- header[12] == BGZF_ID1 &&
- header[13] == BGZF_ID2 &&
- unpackInt16((uint8_t*)&header[14]) == BGZF_LEN);
-}
-
-static void free_cache(BGZF *fp)
-{
- khint_t k;
- khash_t(cache) *h = (khash_t(cache)*)fp->cache;
- if (fp->open_mode != 'r') return;
- for (k = kh_begin(h); k < kh_end(h); ++k)
- if (kh_exist(h, k)) free(kh_val(h, k).block);
- kh_destroy(cache, h);
-}
-
-static int load_block_from_cache(BGZF *fp, int64_t block_address)
-{
- khint_t k;
- cache_t *p;
- khash_t(cache) *h = (khash_t(cache)*)fp->cache;
- k = kh_get(cache, h, block_address);
- if (k == kh_end(h)) return 0;
- p = &kh_val(h, k);
- if (fp->block_length != 0) fp->block_offset = 0;
- fp->block_address = block_address;
- fp->block_length = p->size;
- memcpy(fp->uncompressed_block, p->block, MAX_BLOCK_SIZE);
-#ifdef _USE_KNETFILE
- knet_seek(fp->x.fpr, p->end_offset, SEEK_SET);
-#else
- fseeko(fp->file, p->end_offset, SEEK_SET);
-#endif
- return p->size;
-}
-
-static void cache_block(BGZF *fp, int size)
-{
- int ret;
- khint_t k;
- cache_t *p;
- khash_t(cache) *h = (khash_t(cache)*)fp->cache;
- if (MAX_BLOCK_SIZE >= fp->cache_size) return;
- if ((kh_size(h) + 1) * MAX_BLOCK_SIZE > fp->cache_size) {
- /* A better way would be to remove the oldest block in the
- * cache, but here we remove a random one for simplicity. This
- * should not have a big impact on performance. */
- for (k = kh_begin(h); k < kh_end(h); ++k)
- if (kh_exist(h, k)) break;
- if (k < kh_end(h)) {
- free(kh_val(h, k).block);
- kh_del(cache, h, k);
- }
- }
- k = kh_put(cache, h, fp->block_address, &ret);
- if (ret == 0) return; // if this happens, a bug!
- p = &kh_val(h, k);
- p->size = fp->block_length;
- p->end_offset = fp->block_address + size;
- p->block = malloc(MAX_BLOCK_SIZE);
- memcpy(kh_val(h, k).block, fp->uncompressed_block, MAX_BLOCK_SIZE);
-}
-
-int
-bgzf_read_block(BGZF* fp)
-{
- bgzf_byte_t header[BLOCK_HEADER_LENGTH];
- int count, size = 0, block_length, remaining;
-#ifdef _USE_KNETFILE
- int64_t block_address = knet_tell(fp->x.fpr);
- if (load_block_from_cache(fp, block_address)) return 0;
- count = knet_read(fp->x.fpr, header, sizeof(header));
-#else
- int64_t block_address = ftello(fp->file);
- if (load_block_from_cache(fp, block_address)) return 0;
- count = fread(header, 1, sizeof(header), fp->file);
-#endif
- if (count == 0) {
- fp->block_length = 0;
- return 0;
- }
- size = count;
- if (count != sizeof(header)) {
- report_error(fp, "read failed");
- return -1;
- }
- if (!check_header(header)) {
- report_error(fp, "invalid block header");
- return -1;
- }
- block_length = unpackInt16((uint8_t*)&header[16]) + 1;
- bgzf_byte_t* compressed_block = (bgzf_byte_t*) fp->compressed_block;
- memcpy(compressed_block, header, BLOCK_HEADER_LENGTH);
- remaining = block_length - BLOCK_HEADER_LENGTH;
-#ifdef _USE_KNETFILE
- count = knet_read(fp->x.fpr, &compressed_block[BLOCK_HEADER_LENGTH], remaining);
-#else
- count = fread(&compressed_block[BLOCK_HEADER_LENGTH], 1, remaining, fp->file);
-#endif
- if (count != remaining) {
- report_error(fp, "read failed");
- return -1;
- }
- size += count;
- count = inflate_block(fp, block_length);
- if (count < 0) return -1;
- if (fp->block_length != 0) {
- // Do not reset offset if this read follows a seek.
- fp->block_offset = 0;
- }
- fp->block_address = block_address;
- fp->block_length = count;
- cache_block(fp, size);
- return 0;
-}
-
-int
-bgzf_read(BGZF* fp, void* data, int length)
-{
- if (length <= 0) {
- return 0;
- }
- if (fp->open_mode != 'r') {
- report_error(fp, "file not open for reading");
- return -1;
- }
-
- int bytes_read = 0;
- bgzf_byte_t* output = data;
- while (bytes_read < length) {
- int copy_length, available = fp->block_length - fp->block_offset;
- bgzf_byte_t *buffer;
- if (available <= 0) {
- if (bgzf_read_block(fp) != 0) {
- return -1;
- }
- available = fp->block_length - fp->block_offset;
- if (available <= 0) {
- break;
- }
- }
- copy_length = bgzf_min(length-bytes_read, available);
- buffer = fp->uncompressed_block;
- memcpy(output, buffer + fp->block_offset, copy_length);
- fp->block_offset += copy_length;
- output += copy_length;
- bytes_read += copy_length;
- }
- if (fp->block_offset == fp->block_length) {
-#ifdef _USE_KNETFILE
- fp->block_address = knet_tell(fp->x.fpr);
-#else
- fp->block_address = ftello(fp->file);
-#endif
- fp->block_offset = 0;
- fp->block_length = 0;
- }
- return bytes_read;
-}
-
-int bgzf_flush(BGZF* fp)
-{
- while (fp->block_offset > 0) {
- int count, block_length;
- block_length = deflate_block(fp, fp->block_offset);
- if (block_length < 0) return -1;
-#ifdef _USE_KNETFILE
- count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
-#else
- count = fwrite(fp->compressed_block, 1, block_length, fp->file);
-#endif
- if (count != block_length) {
- report_error(fp, "write failed");
- return -1;
- }
- fp->block_address += block_length;
- }
- return 0;
-}
-
-int bgzf_flush_try(BGZF *fp, int size)
-{
- if (fp->block_offset + size > fp->uncompressed_block_size)
- return bgzf_flush(fp);
- return -1;
-}
-
-int bgzf_write(BGZF* fp, const void* data, int length)
-{
- const bgzf_byte_t *input = data;
- int block_length, bytes_written;
- if (fp->open_mode != 'w') {
- report_error(fp, "file not open for writing");
- return -1;
- }
-
- if (fp->uncompressed_block == NULL)
- fp->uncompressed_block = malloc(fp->uncompressed_block_size);
-
- input = data;
- block_length = fp->uncompressed_block_size;
- bytes_written = 0;
- while (bytes_written < length) {
- int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);
- bgzf_byte_t* buffer = fp->uncompressed_block;
- memcpy(buffer + fp->block_offset, input, copy_length);
- fp->block_offset += copy_length;
- input += copy_length;
- bytes_written += copy_length;
- if (fp->block_offset == block_length) {
- if (bgzf_flush(fp) != 0) {
- break;
- }
- }
- }
- return bytes_written;
-}
-
-int bgzf_close(BGZF* fp)
-{
- if (fp->open_mode == 'w') {
- if (bgzf_flush(fp) != 0) return -1;
- { // add an empty block
- int count, block_length = deflate_block(fp, 0);
-#ifdef _USE_KNETFILE
- count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);
-#else
- count = fwrite(fp->compressed_block, 1, block_length, fp->file);
-#endif
- }
-#ifdef _USE_KNETFILE
- if (fflush(fp->x.fpw) != 0) {
-#else
- if (fflush(fp->file) != 0) {
-#endif
- report_error(fp, "flush failed");
- return -1;
- }
- }
- if (fp->owned_file) {
-#ifdef _USE_KNETFILE
- int ret;
- if (fp->open_mode == 'w') ret = fclose(fp->x.fpw);
- else ret = knet_close(fp->x.fpr);
- if (ret != 0) return -1;
-#else
- if (fclose(fp->file) != 0) return -1;
-#endif
- }
- free(fp->uncompressed_block);
- free(fp->compressed_block);
- free_cache(fp);
- free(fp);
- return 0;
-}
-
-void bgzf_set_cache_size(BGZF *fp, int cache_size)
-{
- if (fp) fp->cache_size = cache_size;
-}
-
-int bgzf_check_EOF(BGZF *fp)
-{
- static uint8_t magic[28] = "\037\213\010\4\0\0\0\0\0\377\6\0\102\103\2\0\033\0\3\0\0\0\0\0\0\0\0\0";
- uint8_t buf[28];
- off_t offset;
-#ifdef _USE_KNETFILE
- offset = knet_tell(fp->x.fpr);
- if (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;
- knet_read(fp->x.fpr, buf, 28);
- knet_seek(fp->x.fpr, offset, SEEK_SET);
-#else
- offset = ftello(fp->file);
- if (fseeko(fp->file, -28, SEEK_END) != 0) return -1;
- fread(buf, 1, 28, fp->file);
- fseeko(fp->file, offset, SEEK_SET);
-#endif
- return (memcmp(magic, buf, 28) == 0)? 1 : 0;
-}
-
-int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)
-{
- int block_offset;
- int64_t block_address;
-
- if (fp->open_mode != 'r') {
- report_error(fp, "file not open for read");
- return -1;
- }
- if (where != SEEK_SET) {
- report_error(fp, "unimplemented seek option");
- return -1;
- }
- block_offset = pos & 0xFFFF;
- block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;
-#ifdef _USE_KNETFILE
- if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {
-#else
- if (fseeko(fp->file, block_address, SEEK_SET) != 0) {
-#endif
- report_error(fp, "seek failed");
- return -1;
- }
- fp->block_length = 0; // indicates current block is not loaded
- fp->block_address = block_address;
- fp->block_offset = block_offset;
- return 0;
-}
diff --git a/src/samtools-0.1.18/bgzf.h b/src/samtools-0.1.18/bgzf.h
deleted file mode 100644
index 7295f37..0000000
--- a/src/samtools-0.1.18/bgzf.h
+++ /dev/null
@@ -1,157 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
-*/
-
-#ifndef __BGZF_H
-#define __BGZF_H
-
-#include <stdint.h>
-#include <stdio.h>
-#include <zlib.h>
-#ifdef _USE_KNETFILE
-#include "knetfile.h"
-#endif
-
-//typedef int8_t bool;
-
-typedef struct {
- int file_descriptor;
- char open_mode; // 'r' or 'w'
- int16_t owned_file, compress_level;
-#ifdef _USE_KNETFILE
- union {
- knetFile *fpr;
- FILE *fpw;
- } x;
-#else
- FILE* file;
-#endif
- int uncompressed_block_size;
- int compressed_block_size;
- void* uncompressed_block;
- void* compressed_block;
- int64_t block_address;
- int block_length;
- int block_offset;
- int cache_size;
- const char* error;
- void *cache; // a pointer to a hash table
-} BGZF;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Open an existing file descriptor for reading or writing.
- * Mode must be either "r" or "w".
- * A subsequent bgzf_close will not close the file descriptor.
- * Returns null on error.
- */
-BGZF* bgzf_fdopen(int fd, const char* __restrict mode);
-
-/*
- * Open the specified file for reading or writing.
- * Mode must be either "r" or "w".
- * Returns null on error.
- */
-BGZF* bgzf_open(const char* path, const char* __restrict mode);
-
-/*
- * Close the BGZ file and free all associated resources.
- * Does not close the underlying file descriptor if created with bgzf_fdopen.
- * Returns zero on success, -1 on error.
- */
-int bgzf_close(BGZF* fp);
-
-/*
- * Read up to length bytes from the file storing into data.
- * Returns the number of bytes actually read.
- * Returns zero on end of file.
- * Returns -1 on error.
- */
-int bgzf_read(BGZF* fp, void* data, int length);
-
-/*
- * Write length bytes from data to the file.
- * Returns the number of bytes written.
- * Returns -1 on error.
- */
-int bgzf_write(BGZF* fp, const void* data, int length);
-
-/*
- * Return a virtual file pointer to the current location in the file.
- * No interpetation of the value should be made, other than a subsequent
- * call to bgzf_seek can be used to position the file at the same point.
- * Return value is non-negative on success.
- * Returns -1 on error.
- */
-#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF))
-
-/*
- * Set the file to read from the location specified by pos, which must
- * be a value previously returned by bgzf_tell for this file (but not
- * necessarily one returned by this file handle).
- * The where argument must be SEEK_SET.
- * Seeking on a file opened for write is not supported.
- * Returns zero on success, -1 on error.
- */
-int64_t bgzf_seek(BGZF* fp, int64_t pos, int where);
-
-/*
- * Set the cache size. Zero to disable. By default, caching is
- * disabled. The recommended cache size for frequent random access is
- * about 8M bytes.
- */
-void bgzf_set_cache_size(BGZF *fp, int cache_size);
-
-int bgzf_check_EOF(BGZF *fp);
-int bgzf_read_block(BGZF* fp);
-int bgzf_flush(BGZF* fp);
-int bgzf_flush_try(BGZF *fp, int size);
-int bgzf_check_bgzf(const char *fn);
-
-#ifdef __cplusplus
-}
-#endif
-
-static inline int bgzf_getc(BGZF *fp)
-{
- int c;
- if (fp->block_offset >= fp->block_length) {
- if (bgzf_read_block(fp) != 0) return -2; /* error */
- if (fp->block_length == 0) return -1; /* end-of-file */
- }
- c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++];
- if (fp->block_offset == fp->block_length) {
-#ifdef _USE_KNETFILE
- fp->block_address = knet_tell(fp->x.fpr);
-#else
- fp->block_address = ftello(fp->file);
-#endif
- fp->block_offset = 0;
- fp->block_length = 0;
- }
- return c;
-}
-
-#endif
diff --git a/src/samtools-0.1.18/bgzip.c b/src/samtools-0.1.18/bgzip.c
deleted file mode 100644
index ebcafa2..0000000
--- a/src/samtools-0.1.18/bgzip.c
+++ /dev/null
@@ -1,206 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
-
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
-
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- THE SOFTWARE.
-*/
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/select.h>
-#include <sys/stat.h>
-#include "bgzf.h"
-
-static const int WINDOW_SIZE = 64 * 1024;
-
-static int bgzip_main_usage()
-{
- fprintf(stderr, "\n");
- fprintf(stderr, "Usage: bgzip [options] [file] ...\n\n");
- fprintf(stderr, "Options: -c write on standard output, keep original files unchanged\n");
- fprintf(stderr, " -d decompress\n");
- fprintf(stderr, " -f overwrite files without asking\n");
- fprintf(stderr, " -b INT decompress at virtual file pointer INT\n");
- fprintf(stderr, " -s INT decompress INT bytes in the uncompressed file\n");
- fprintf(stderr, " -h give this help\n");
- fprintf(stderr, "\n");
- return 1;
-}
-
-static int write_open(const char *fn, int is_forced)
-{
- int fd = -1;
- char c;
- if (!is_forced) {
- if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) {
- fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn);
- scanf("%c", &c);
- if (c != 'Y' && c != 'y') {
- fprintf(stderr, "[bgzip] not overwritten\n");
- exit(1);
- }
- }
- }
- if (fd < 0) {
- if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) {
- fprintf(stderr, "[bgzip] %s: Fail to write\n", fn);
- exit(1);
- }
- }
- return fd;
-}
-
-static void fail(BGZF* fp)
-{
- fprintf(stderr, "Error: %s\n", fp->error);
- exit(1);
-}
-
-int main(int argc, char **argv)
-{
- int c, compress, pstdout, is_forced;
- BGZF *fp;
- void *buffer;
- long start, end, size;
-
- compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
- while((c = getopt(argc, argv, "cdhfb:s:")) >= 0){
- switch(c){
- case 'h': return bgzip_main_usage();
- case 'd': compress = 0; break;
- case 'c': pstdout = 1; break;
- case 'b': start = atol(optarg); break;
- case 's': size = atol(optarg); break;
- case 'f': is_forced = 1; break;
- }
- }
- if (size >= 0) end = start + size;
- if (end >= 0 && end < start) {
- fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end);
- return 1;
- }
- if (compress == 1) {
- struct stat sbuf;
- int f_src = fileno(stdin);
- int f_dst = fileno(stdout);
-
- if ( argc>optind )
- {
- if ( stat(argv[optind],&sbuf)<0 )
- {
- fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
- return 1;
- }
-
- if ((f_src = open(argv[optind], O_RDONLY)) < 0) {
- fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
- return 1;
- }
-
- if (pstdout)
- f_dst = fileno(stdout);
- else
- {
- char *name = malloc(strlen(argv[optind]) + 5);
- strcpy(name, argv[optind]);
- strcat(name, ".gz");
- f_dst = write_open(name, is_forced);
- if (f_dst < 0) return 1;
- free(name);
- }
- }
- else if (!pstdout && isatty(fileno((FILE *)stdout)) )
- return bgzip_main_usage();
-
- fp = bgzf_fdopen(f_dst, "w");
- buffer = malloc(WINDOW_SIZE);
- while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0)
- if (bgzf_write(fp, buffer, c) < 0) fail(fp);
- // f_dst will be closed here
- if (bgzf_close(fp) < 0) fail(fp);
- if (argc > optind && !pstdout) unlink(argv[optind]);
- free(buffer);
- close(f_src);
- return 0;
- } else {
- struct stat sbuf;
- int f_dst;
-
- if ( argc>optind )
- {
- if ( stat(argv[optind],&sbuf)<0 )
- {
- fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]);
- return 1;
- }
- char *name;
- int len = strlen(argv[optind]);
- if ( strcmp(argv[optind]+len-3,".gz") )
- {
- fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]);
- return 1;
- }
- fp = bgzf_open(argv[optind], "r");
- if (fp == NULL) {
- fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]);
- return 1;
- }
-
- if (pstdout) {
- f_dst = fileno(stdout);
- }
- else {
- name = strdup(argv[optind]);
- name[strlen(name) - 3] = '\0';
- f_dst = write_open(name, is_forced);
- free(name);
- }
- }
- else if (!pstdout && isatty(fileno((FILE *)stdin)) )
- return bgzip_main_usage();
- else
- {
- f_dst = fileno(stdout);
- fp = bgzf_fdopen(fileno(stdin), "r");
- if (fp == NULL) {
- fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno));
- return 1;
- }
- }
- buffer = malloc(WINDOW_SIZE);
- if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp);
- while (1) {
- if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);
- else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
- if (c == 0) break;
- if (c < 0) fail(fp);
- start += c;
- write(f_dst, buffer, c);
- if (end >= 0 && start >= end) break;
- }
- free(buffer);
- if (bgzf_close(fp) < 0) fail(fp);
- if (!pstdout) unlink(argv[optind]);
- return 0;
- }
-}
diff --git a/src/samtools-0.1.18/cut_target.c b/src/samtools-0.1.18/cut_target.c
deleted file mode 100644
index 26f434f..0000000
--- a/src/samtools-0.1.18/cut_target.c
+++ /dev/null
@@ -1,193 +0,0 @@
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include "bam.h"
-#include "errmod.h"
-#include "faidx.h"
-
-#define ERR_DEP 0.83f
-
-typedef struct {
- int e[2][3], p[2][2];
-} score_param_t;
-
-/* Note that although the two matrics have 10 parameters in total, only 4
- * (probably 3) are free. Changing the scoring matrices in a sort of symmetric
- * way will not change the result. */
-static score_param_t g_param = { {{0,0,0},{-4,1,6}}, {{0,-14000}, {0,0}} };
-
-typedef struct {
- int min_baseQ, tid, max_bases;
- uint16_t *bases;
- bamFile fp;
- bam_header_t *h;
- char *ref;
- faidx_t *fai;
- errmod_t *em;
-} ct_t;
-
-static uint16_t gencns(ct_t *g, int n, const bam_pileup1_t *plp)
-{
- int i, j, ret, tmp, k, sum[4], qual;
- float q[16];
- if (n > g->max_bases) { // enlarge g->bases
- g->max_bases = n;
- kroundup32(g->max_bases);
- g->bases = realloc(g->bases, g->max_bases * 2);
- }
- for (i = k = 0; i < n; ++i) {
- const bam_pileup1_t *p = plp + i;
- uint8_t *seq;
- int q, baseQ, b;
- if (p->is_refskip || p->is_del) continue;
- baseQ = bam1_qual(p->b)[p->qpos];
- if (baseQ < g->min_baseQ) continue;
- seq = bam1_seq(p->b);
- b = bam_nt16_nt4_table[bam1_seqi(seq, p->qpos)];
- if (b > 3) continue;
- q = baseQ < p->b->core.qual? baseQ : p->b->core.qual;
- if (q < 4) q = 4;
- if (q > 63) q = 63;
- g->bases[k++] = q<<5 | bam1_strand(p->b)<<4 | b;
- }
- if (k == 0) return 0;
- errmod_cal(g->em, k, 4, g->bases, q);
- for (i = 0; i < 4; ++i) sum[i] = (int)(q[i<<2|i] + .499) << 2 | i;
- for (i = 1; i < 4; ++i) // insertion sort
- for (j = i; j > 0 && sum[j] < sum[j-1]; --j)
- tmp = sum[j], sum[j] = sum[j-1], sum[j-1] = tmp;
- qual = (sum[1]>>2) - (sum[0]>>2);
- k = k < 256? k : 255;
- ret = (qual < 63? qual : 63) << 2 | (sum[0]&3);
- return ret<<8|k;
-}
-
-static void process_cns(bam_header_t *h, int tid, int l, uint16_t *cns)
-{
- int i, f[2][2], *prev, *curr, *swap_tmp, s;
- uint8_t *b; // backtrack array
- b = calloc(l, 1);
- f[0][0] = f[0][1] = 0;
- prev = f[0]; curr = f[1];
- // fill the backtrack matrix
- for (i = 0; i < l; ++i) {
- int c = (cns[i] == 0)? 0 : (cns[i]>>8 == 0)? 1 : 2;
- int tmp0, tmp1;
- // compute f[0]
- tmp0 = prev[0] + g_param.e[0][c] + g_param.p[0][0]; // (s[i+1],s[i])=(0,0)
- tmp1 = prev[1] + g_param.e[0][c] + g_param.p[1][0]; // (0,1)
- if (tmp0 > tmp1) curr[0] = tmp0, b[i] = 0;
- else curr[0] = tmp1, b[i] = 1;
- // compute f[1]
- tmp0 = prev[0] + g_param.e[1][c] + g_param.p[0][1]; // (s[i+1],s[i])=(1,0)
- tmp1 = prev[1] + g_param.e[1][c] + g_param.p[1][1]; // (1,1)
- if (tmp0 > tmp1) curr[1] = tmp0, b[i] |= 0<<1;
- else curr[1] = tmp1, b[i] |= 1<<1;
- // swap
- swap_tmp = prev; prev = curr; curr = swap_tmp;
- }
- // backtrack
- s = prev[0] > prev[1]? 0 : 1;
- for (i = l - 1; i > 0; --i) {
- b[i] |= s<<2;
- s = b[i]>>s&1;
- }
- // print
- for (i = 0, s = -1; i <= l; ++i) {
- if (i == l || ((b[i]>>2&3) == 0 && s >= 0)) {
- if (s >= 0) {
- int j;
- printf("%s:%d-%d\t0\t%s\t%d\t60\t%dM\t*\t0\t0\t", h->target_name[tid], s+1, i, h->target_name[tid], s+1, i-s);
- for (j = s; j < i; ++j) {
- int c = cns[j]>>8;
- if (c == 0) putchar('N');
- else putchar("ACGT"[c&3]);
- }
- putchar('\t');
- for (j = s; j < i; ++j)
- putchar(33 + (cns[j]>>8>>2));
- putchar('\n');
- }
- //if (s >= 0) printf("%s\t%d\t%d\t%d\n", h->target_name[tid], s, i, i - s);
- s = -1;
- } else if ((b[i]>>2&3) && s < 0) s = i;
- }
- free(b);
-}
-
-static int read_aln(void *data, bam1_t *b)
-{
- extern int bam_prob_realn_core(bam1_t *b, const char *ref, int flag);
- ct_t *g = (ct_t*)data;
- int ret, len;
- ret = bam_read1(g->fp, b);
- if (ret >= 0 && g->fai && b->core.tid >= 0 && (b->core.flag&4) == 0) {
- if (b->core.tid != g->tid) { // then load the sequence
- free(g->ref);
- g->ref = fai_fetch(g->fai, g->h->target_name[b->core.tid], &len);
- g->tid = b->core.tid;
- }
- bam_prob_realn_core(b, g->ref, 1<<1|1);
- }
- return ret;
-}
-
-int main_cut_target(int argc, char *argv[])
-{
- int c, tid, pos, n, lasttid = -1, lastpos = -1, l, max_l;
- const bam_pileup1_t *p;
- bam_plp_t plp;
- uint16_t *cns;
- ct_t g;
-
- memset(&g, 0, sizeof(ct_t));
- g.min_baseQ = 13; g.tid = -1;
- while ((c = getopt(argc, argv, "f:Q:i:o:0:1:2:")) >= 0) {
- switch (c) {
- case 'Q': g.min_baseQ = atoi(optarg); break; // quality cutoff
- case 'i': g_param.p[0][1] = -atoi(optarg); break; // 0->1 transition (in) PENALTY
- case '0': g_param.e[1][0] = atoi(optarg); break; // emission SCORE
- case '1': g_param.e[1][1] = atoi(optarg); break;
- case '2': g_param.e[1][2] = atoi(optarg); break;
- case 'f': g.fai = fai_load(optarg);
- if (g.fai == 0) fprintf(stderr, "[%s] fail to load the fasta index.\n", __func__);
- break;
- }
- }
- if (argc == optind) {
- fprintf(stderr, "Usage: samtools targetcut [-Q minQ] [-i inPen] [-0 em0] [-1 em1] [-2 em2] [-f ref] <in.bam>\n");
- return 1;
- }
- l = max_l = 0; cns = 0;
- g.fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r");
- g.h = bam_header_read(g.fp);
- g.em = errmod_init(1 - ERR_DEP);
- plp = bam_plp_init(read_aln, &g);
- while ((p = bam_plp_auto(plp, &tid, &pos, &n)) != 0) {
- if (tid < 0) break;
- if (tid != lasttid) { // change of chromosome
- if (cns) process_cns(g.h, lasttid, l, cns);
- if (max_l < g.h->target_len[tid]) {
- max_l = g.h->target_len[tid];
- kroundup32(max_l);
- cns = realloc(cns, max_l * 2);
- }
- l = g.h->target_len[tid];
- memset(cns, 0, max_l * 2);
- lasttid = tid;
- }
- cns[pos] = gencns(&g, n, p);
- lastpos = pos;
- }
- process_cns(g.h, lasttid, l, cns);
- free(cns);
- bam_header_destroy(g.h);
- bam_plp_destroy(plp);
- bam_close(g.fp);
- if (g.fai) {
- fai_destroy(g.fai); free(g.ref);
- }
- errmod_destroy(g.em);
- free(g.bases);
- return 0;
-}
diff --git a/src/samtools-0.1.18/errmod.c b/src/samtools-0.1.18/errmod.c
deleted file mode 100644
index fba9a8d..0000000
--- a/src/samtools-0.1.18/errmod.c
+++ /dev/null
@@ -1,130 +0,0 @@
-#include <math.h>
-#include "errmod.h"
-#include "ksort.h"
-KSORT_INIT_GENERIC(uint16_t)
-
-typedef struct __errmod_coef_t {
- double *fk, *beta, *lhet;
-} errmod_coef_t;
-
-typedef struct {
- double fsum[16], bsum[16];
- uint32_t c[16];
-} call_aux_t;
-
-static errmod_coef_t *cal_coef(double depcorr, double eta)
-{
- int k, n, q;
- long double sum, sum1;
- double *lC;
- errmod_coef_t *ec;
-
- ec = calloc(1, sizeof(errmod_coef_t));
- // initialize ->fk
- ec->fk = (double*)calloc(256, sizeof(double));
- ec->fk[0] = 1.0;
- for (n = 1; n != 256; ++n)
- ec->fk[n] = pow(1. - depcorr, n) * (1.0 - eta) + eta;
- // initialize ->coef
- ec->beta = (double*)calloc(256 * 256 * 64, sizeof(double));
- lC = (double*)calloc(256 * 256, sizeof(double));
- for (n = 1; n != 256; ++n) {
- double lgn = lgamma(n+1);
- for (k = 1; k <= n; ++k)
- lC[n<<8|k] = lgn - lgamma(k+1) - lgamma(n-k+1);
- }
- for (q = 1; q != 64; ++q) {
- double e = pow(10.0, -q/10.0);
- double le = log(e);
- double le1 = log(1.0 - e);
- for (n = 1; n <= 255; ++n) {
- double *beta = ec->beta + (q<<16|n<<8);
- sum1 = sum = 0.0;
- for (k = n; k >= 0; --k, sum1 = sum) {
- sum = sum1 + expl(lC[n<<8|k] + k*le + (n-k)*le1);
- beta[k] = -10. / M_LN10 * logl(sum1 / sum);
- }
- }
- }
- // initialize ->lhet
- ec->lhet = (double*)calloc(256 * 256, sizeof(double));
- for (n = 0; n < 256; ++n)
- for (k = 0; k < 256; ++k)
- ec->lhet[n<<8|k] = lC[n<<8|k] - M_LN2 * n;
- free(lC);
- return ec;
-}
-
-errmod_t *errmod_init(float depcorr)
-{
- errmod_t *em;
- em = (errmod_t*)calloc(1, sizeof(errmod_t));
- em->depcorr = depcorr;
- em->coef = cal_coef(depcorr, 0.03);
- return em;
-}
-
-void errmod_destroy(errmod_t *em)
-{
- if (em == 0) return;
- free(em->coef->lhet); free(em->coef->fk); free(em->coef->beta);
- free(em->coef); free(em);
-}
-// qual:6, strand:1, base:4
-int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q)
-{
- call_aux_t aux;
- int i, j, k, w[32];
-
- if (m > m) return -1;
- memset(q, 0, m * m * sizeof(float));
- if (n == 0) return 0;
- // calculate aux.esum and aux.fsum
- if (n > 255) { // then sample 255 bases
- ks_shuffle(uint16_t, n, bases);
- n = 255;
- }
- ks_introsort(uint16_t, n, bases);
- memset(w, 0, 32 * sizeof(int));
- memset(&aux, 0, sizeof(call_aux_t));
- for (j = n - 1; j >= 0; --j) { // calculate esum and fsum
- uint16_t b = bases[j];
- int q = b>>5 < 4? 4 : b>>5;
- if (q > 63) q = 63;
- k = b&0x1f;
- aux.fsum[k&0xf] += em->coef->fk[w[k]];
- aux.bsum[k&0xf] += em->coef->fk[w[k]] * em->coef->beta[q<<16|n<<8|aux.c[k&0xf]];
- ++aux.c[k&0xf];
- ++w[k];
- }
- // generate likelihood
- for (j = 0; j != m; ++j) {
- float tmp1, tmp3;
- int tmp2, bar_e;
- // homozygous
- for (k = 0, tmp1 = tmp3 = 0.0, tmp2 = 0; k != m; ++k) {
- if (k == j) continue;
- tmp1 += aux.bsum[k]; tmp2 += aux.c[k]; tmp3 += aux.fsum[k];
- }
- if (tmp2) {
- bar_e = (int)(tmp1 / tmp3 + 0.499);
- if (bar_e > 63) bar_e = 63;
- q[j*m+j] = tmp1;
- }
- // heterozygous
- for (k = j + 1; k < m; ++k) {
- int cjk = aux.c[j] + aux.c[k];
- for (i = 0, tmp2 = 0, tmp1 = tmp3 = 0.0; i < m; ++i) {
- if (i == j || i == k) continue;
- tmp1 += aux.bsum[i]; tmp2 += aux.c[i]; tmp3 += aux.fsum[i];
- }
- if (tmp2) {
- bar_e = (int)(tmp1 / tmp3 + 0.499);
- if (bar_e > 63) bar_e = 63;
- q[j*m+k] = q[k*m+j] = -4.343 * em->coef->lhet[cjk<<8|aux.c[k]] + tmp1;
- } else q[j*m+k] = q[k*m+j] = -4.343 * em->coef->lhet[cjk<<8|aux.c[k]]; // all the bases are either j or k
- }
- for (k = 0; k != m; ++k) if (q[j*m+k] < 0.0) q[j*m+k] = 0.0;
- }
- return 0;
-}
diff --git a/src/samtools-0.1.18/errmod.h b/src/samtools-0.1.18/errmod.h
deleted file mode 100644
index 32c07b6..0000000
--- a/src/samtools-0.1.18/errmod.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef ERRMOD_H
-#define ERRMOD_H
-
-#include <stdint.h>
-
-struct __errmod_coef_t;
-
-typedef struct {
- double depcorr;
- struct __errmod_coef_t *coef;
-} errmod_t;
-
-errmod_t *errmod_init(float depcorr);
-void errmod_destroy(errmod_t *em);
-
-/*
- n: number of bases
- m: maximum base
- bases[i]: qual:6, strand:1, base:4
- q[i*m+j]: phred-scaled likelihood of (i,j)
- */
-int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q);
-
-#endif
diff --git a/src/samtools-0.1.18/faidx.c b/src/samtools-0.1.18/faidx.c
deleted file mode 100644
index f0798fc..0000000
--- a/src/samtools-0.1.18/faidx.c
+++ /dev/null
@@ -1,432 +0,0 @@
-#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
-#include "faidx.h"
-#include "khash.h"
-
-typedef struct {
- int32_t line_len, line_blen;
- int64_t len;
- uint64_t offset;
-} faidx1_t;
-KHASH_MAP_INIT_STR(s, faidx1_t)
-
-#ifndef _NO_RAZF
-#include "razf.h"
-#else
-#ifdef _WIN32
-#define ftello(fp) ftell(fp)
-#define fseeko(fp, offset, whence) fseek(fp, offset, whence)
-#else
-extern off_t ftello(FILE *stream);
-extern int fseeko(FILE *stream, off_t offset, int whence);
-#endif
-#define RAZF FILE
-#define razf_read(fp, buf, size) fread(buf, 1, size, fp)
-#define razf_open(fn, mode) fopen(fn, mode)
-#define razf_close(fp) fclose(fp)
-#define razf_seek(fp, offset, whence) fseeko(fp, offset, whence)
-#define razf_tell(fp) ftello(fp)
-#endif
-#ifdef _USE_KNETFILE
-#include "knetfile.h"
-#endif
-
-struct __faidx_t {
- RAZF *rz;
- int n, m;
- char **name;
- khash_t(s) *hash;
-};
-
-#ifndef kroundup32
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-#endif
-
-static inline void fai_insert_index(faidx_t *idx, const char *name, int len, int line_len, int line_blen, uint64_t offset)
-{
- khint_t k;
- int ret;
- faidx1_t t;
- if (idx->n == idx->m) {
- idx->m = idx->m? idx->m<<1 : 16;
- idx->name = (char**)realloc(idx->name, sizeof(void*) * idx->m);
- }
- idx->name[idx->n] = strdup(name);
- k = kh_put(s, idx->hash, idx->name[idx->n], &ret);
- t.len = len; t.line_len = line_len; t.line_blen = line_blen; t.offset = offset;
- kh_value(idx->hash, k) = t;
- ++idx->n;
-}
-
-faidx_t *fai_build_core(RAZF *rz)
-{
- char c, *name;
- int l_name, m_name, ret;
- int line_len, line_blen, state;
- int l1, l2;
- faidx_t *idx;
- uint64_t offset;
- int64_t len;
-
- idx = (faidx_t*)calloc(1, sizeof(faidx_t));
- idx->hash = kh_init(s);
- name = 0; l_name = m_name = 0;
- len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0;
- while (razf_read(rz, &c, 1)) {
- if (c == '\n') { // an empty line
- if (state == 1) {
- offset = razf_tell(rz);
- continue;
- } else if ((state == 0 && len < 0) || state == 2) continue;
- }
- if (c == '>') { // fasta header
- if (len >= 0)
- fai_insert_index(idx, name, len, line_len, line_blen, offset);
- l_name = 0;
- while ((ret = razf_read(rz, &c, 1)) != 0 && !isspace(c)) {
- if (m_name < l_name + 2) {
- m_name = l_name + 2;
- kroundup32(m_name);
- name = (char*)realloc(name, m_name);
- }
- name[l_name++] = c;
- }
- name[l_name] = '\0';
- if (ret == 0) {
- fprintf(stderr, "[fai_build_core] the last entry has no sequence\n");
- free(name); fai_destroy(idx);
- return 0;
- }
- if (c != '\n') while (razf_read(rz, &c, 1) && c != '\n');
- state = 1; len = 0;
- offset = razf_tell(rz);
- } else {
- if (state == 3) {
- fprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence '%s'.\n", name);
- free(name); fai_destroy(idx);
- return 0;
- }
- if (state == 2) state = 3;
- l1 = l2 = 0;
- do {
- ++l1;
- if (isgraph(c)) ++l2;
- } while ((ret = razf_read(rz, &c, 1)) && c != '\n');
- if (state == 3 && l2) {
- fprintf(stderr, "[fai_build_core] different line length in sequence '%s'.\n", name);
- free(name); fai_destroy(idx);
- return 0;
- }
- ++l1; len += l2;
- if (state == 1) line_len = l1, line_blen = l2, state = 0;
- else if (state == 0) {
- if (l1 != line_len || l2 != line_blen) state = 2;
- }
- }
- }
- fai_insert_index(idx, name, len, line_len, line_blen, offset);
- free(name);
- return idx;
-}
-
-void fai_save(const faidx_t *fai, FILE *fp)
-{
- khint_t k;
- int i;
- for (i = 0; i < fai->n; ++i) {
- faidx1_t x;
- k = kh_get(s, fai->hash, fai->name[i]);
- x = kh_value(fai->hash, k);
-#ifdef _WIN32
- fprintf(fp, "%s\t%d\t%ld\t%d\t%d\n", fai->name[i], (int)x.len, (long)x.offset, (int)x.line_blen, (int)x.line_len);
-#else
- fprintf(fp, "%s\t%d\t%lld\t%d\t%d\n", fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len);
-#endif
- }
-}
-
-faidx_t *fai_read(FILE *fp)
-{
- faidx_t *fai;
- char *buf, *p;
- int len, line_len, line_blen;
-#ifdef _WIN32
- long offset;
-#else
- long long offset;
-#endif
- fai = (faidx_t*)calloc(1, sizeof(faidx_t));
- fai->hash = kh_init(s);
- buf = (char*)calloc(0x10000, 1);
- while (!feof(fp) && fgets(buf, 0x10000, fp)) {
- for (p = buf; *p && isgraph(*p); ++p);
- *p = 0; ++p;
-#ifdef _WIN32
- sscanf(p, "%d%ld%d%d", &len, &offset, &line_blen, &line_len);
-#else
- sscanf(p, "%d%lld%d%d", &len, &offset, &line_blen, &line_len);
-#endif
- fai_insert_index(fai, buf, len, line_len, line_blen, offset);
- }
- free(buf);
- return fai;
-}
-
-void fai_destroy(faidx_t *fai)
-{
- int i;
- for (i = 0; i < fai->n; ++i) free(fai->name[i]);
- free(fai->name);
- kh_destroy(s, fai->hash);
- if (fai->rz) razf_close(fai->rz);
- free(fai);
-}
-
-int fai_build(const char *fn)
-{
- char *str;
- RAZF *rz;
- FILE *fp;
- faidx_t *fai;
- str = (char*)calloc(strlen(fn) + 5, 1);
- sprintf(str, "%s.fai", fn);
- rz = razf_open(fn, "r");
- if (rz == 0) {
- fprintf(stderr, "[fai_build] fail to open the FASTA file %s\n",fn);
- free(str);
- return -1;
- }
- fai = fai_build_core(rz);
- razf_close(rz);
- fp = fopen(str, "wb");
- if (fp == 0) {
- fprintf(stderr, "[fai_build] fail to write FASTA index %s\n",str);
- fai_destroy(fai); free(str);
- return -1;
- }
- fai_save(fai, fp);
- fclose(fp);
- free(str);
- fai_destroy(fai);
- return 0;
-}
-
-#ifdef _USE_KNETFILE
-FILE *download_and_open(const char *fn)
-{
- const int buf_size = 1 * 1024 * 1024;
- uint8_t *buf;
- FILE *fp;
- knetFile *fp_remote;
- const char *url = fn;
- const char *p;
- int l = strlen(fn);
- for (p = fn + l - 1; p >= fn; --p)
- if (*p == '/') break;
- fn = p + 1;
-
- // First try to open a local copy
- fp = fopen(fn, "r");
- if (fp)
- return fp;
-
- // If failed, download from remote and open
- fp_remote = knet_open(url, "rb");
- if (fp_remote == 0) {
- fprintf(stderr, "[download_from_remote] fail to open remote file %s\n",url);
- return NULL;
- }
- if ((fp = fopen(fn, "wb")) == 0) {
- fprintf(stderr, "[download_from_remote] fail to create file in the working directory %s\n",fn);
- knet_close(fp_remote);
- return NULL;
- }
- buf = (uint8_t*)calloc(buf_size, 1);
- while ((l = knet_read(fp_remote, buf, buf_size)) != 0)
- fwrite(buf, 1, l, fp);
- free(buf);
- fclose(fp);
- knet_close(fp_remote);
-
- return fopen(fn, "r");
-}
-#endif
-
-faidx_t *fai_load(const char *fn)
-{
- char *str;
- FILE *fp;
- faidx_t *fai;
- str = (char*)calloc(strlen(fn) + 5, 1);
- sprintf(str, "%s.fai", fn);
-
-#ifdef _USE_KNETFILE
- if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)
- {
- fp = download_and_open(str);
- if ( !fp )
- {
- fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\n", str);
- free(str);
- return 0;
- }
- }
- else
-#endif
- fp = fopen(str, "rb");
- if (fp == 0) {
- fprintf(stderr, "[fai_load] build FASTA index.\n");
- fai_build(fn);
- fp = fopen(str, "rb");
- if (fp == 0) {
- fprintf(stderr, "[fai_load] fail to open FASTA index.\n");
- free(str);
- return 0;
- }
- }
-
- fai = fai_read(fp);
- fclose(fp);
-
- fai->rz = razf_open(fn, "rb");
- free(str);
- if (fai->rz == 0) {
- fprintf(stderr, "[fai_load] fail to open FASTA file.\n");
- return 0;
- }
- return fai;
-}
-
-char *fai_fetch(const faidx_t *fai, const char *str, int *len)
-{
- char *s, c;
- int i, l, k, name_end;
- khiter_t iter;
- faidx1_t val;
- khash_t(s) *h;
- int beg, end;
-
- beg = end = -1;
- h = fai->hash;
- name_end = l = strlen(str);
- s = (char*)malloc(l+1);
- // remove space
- for (i = k = 0; i < l; ++i)
- if (!isspace(str[i])) s[k++] = str[i];
- s[k] = 0; l = k;
- // determine the sequence name
- for (i = l - 1; i >= 0; --i) if (s[i] == ':') break; // look for colon from the end
- if (i >= 0) name_end = i;
- if (name_end < l) { // check if this is really the end
- int n_hyphen = 0;
- for (i = name_end + 1; i < l; ++i) {
- if (s[i] == '-') ++n_hyphen;
- else if (!isdigit(s[i]) && s[i] != ',') break;
- }
- if (i < l || n_hyphen > 1) name_end = l; // malformated region string; then take str as the name
- s[name_end] = 0;
- iter = kh_get(s, h, s);
- if (iter == kh_end(h)) { // cannot find the sequence name
- iter = kh_get(s, h, str); // try str as the name
- if (iter == kh_end(h)) {
- *len = 0;
- free(s); return 0;
- } else s[name_end] = ':', name_end = l;
- }
- } else iter = kh_get(s, h, str);
- val = kh_value(h, iter);
- // parse the interval
- if (name_end < l) {
- for (i = k = name_end + 1; i < l; ++i)
- if (s[i] != ',') s[k++] = s[i];
- s[k] = 0;
- beg = atoi(s + name_end + 1);
- for (i = name_end + 1; i != k; ++i) if (s[i] == '-') break;
- end = i < k? atoi(s + i + 1) : val.len;
- if (beg > 0) --beg;
- } else beg = 0, end = val.len;
- if (beg >= val.len) beg = val.len;
- if (end >= val.len) end = val.len;
- if (beg > end) beg = end;
- free(s);
-
- // now retrieve the sequence
- l = 0;
- s = (char*)malloc(end - beg + 2);
- razf_seek(fai->rz, val.offset + beg / val.line_blen * val.line_len + beg % val.line_blen, SEEK_SET);
- while (razf_read(fai->rz, &c, 1) == 1 && l < end - beg && !fai->rz->z_err)
- if (isgraph(c)) s[l++] = c;
- s[l] = '\0';
- *len = l;
- return s;
-}
-
-int faidx_main(int argc, char *argv[])
-{
- if (argc == 1) {
- fprintf(stderr, "Usage: faidx <in.fasta> [<reg> [...]]\n");
- return 1;
- } else {
- if (argc == 2) fai_build(argv[1]);
- else {
- int i, j, k, l;
- char *s;
- faidx_t *fai;
- fai = fai_load(argv[1]);
- if (fai == 0) return 1;
- for (i = 2; i != argc; ++i) {
- printf(">%s\n", argv[i]);
- s = fai_fetch(fai, argv[i], &l);
- for (j = 0; j < l; j += 60) {
- for (k = 0; k < 60 && k < l - j; ++k)
- putchar(s[j + k]);
- putchar('\n');
- }
- free(s);
- }
- fai_destroy(fai);
- }
- }
- return 0;
-}
-
-int faidx_fetch_nseq(const faidx_t *fai)
-{
- return fai->n;
-}
-
-char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len)
-{
- int l;
- char c;
- khiter_t iter;
- faidx1_t val;
- char *seq=NULL;
-
- // Adjust position
- iter = kh_get(s, fai->hash, c_name);
- if(iter == kh_end(fai->hash)) return 0;
- val = kh_value(fai->hash, iter);
- if(p_end_i < p_beg_i) p_beg_i = p_end_i;
- if(p_beg_i < 0) p_beg_i = 0;
- else if(val.len <= p_beg_i) p_beg_i = val.len - 1;
- if(p_end_i < 0) p_end_i = 0;
- else if(val.len <= p_end_i) p_end_i = val.len - 1;
-
- // Now retrieve the sequence
- l = 0;
- seq = (char*)malloc(p_end_i - p_beg_i + 2);
- razf_seek(fai->rz, val.offset + p_beg_i / val.line_blen * val.line_len + p_beg_i % val.line_blen, SEEK_SET);
- while (razf_read(fai->rz, &c, 1) == 1 && l < p_end_i - p_beg_i + 1)
- if (isgraph(c)) seq[l++] = c;
- seq[l] = '\0';
- *len = l;
- return seq;
-}
-
-#ifdef FAIDX_MAIN
-int main(int argc, char *argv[]) { return faidx_main(argc, argv); }
-#endif
diff --git a/src/samtools-0.1.18/faidx.h b/src/samtools-0.1.18/faidx.h
deleted file mode 100644
index 1fb1b1f..0000000
--- a/src/samtools-0.1.18/faidx.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008 Genome Research Ltd (GRL).
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/* Contact: Heng Li <lh3 at sanger.ac.uk> */
-
-#ifndef FAIDX_H
-#define FAIDX_H
-
-/*!
- @header
-
- Index FASTA files and extract subsequence.
-
- @copyright The Wellcome Trust Sanger Institute.
- */
-
-struct __faidx_t;
-typedef struct __faidx_t faidx_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- /*!
- @abstract Build index for a FASTA or razip compressed FASTA file.
- @param fn FASTA file name
- @return 0 on success; or -1 on failure
- @discussion File "fn.fai" will be generated.
- */
- int fai_build(const char *fn);
-
- /*!
- @abstract Distroy a faidx_t struct.
- @param fai Pointer to the struct to be destroyed
- */
- void fai_destroy(faidx_t *fai);
-
- /*!
- @abstract Load index from "fn.fai".
- @param fn File name of the FASTA file
- */
- faidx_t *fai_load(const char *fn);
-
- /*!
- @abstract Fetch the sequence in a region.
- @param fai Pointer to the faidx_t struct
- @param reg Region in the format "chr2:20,000-30,000"
- @param len Length of the region
- @return Pointer to the sequence; null on failure
-
- @discussion The returned sequence is allocated by malloc family
- and should be destroyed by end users by calling free() on it.
- */
- char *fai_fetch(const faidx_t *fai, const char *reg, int *len);
-
- /*!
- @abstract Fetch the number of sequences.
- @param fai Pointer to the faidx_t struct
- @return The number of sequences
- */
- int faidx_fetch_nseq(const faidx_t *fai);
-
- /*!
- @abstract Fetch the sequence in a region.
- @param fai Pointer to the faidx_t struct
- @param c_name Region name
- @param p_beg_i Beginning position number (zero-based)
- @param p_end_i End position number (zero-based)
- @param len Length of the region
- @return Pointer to the sequence; null on failure
-
- @discussion The returned sequence is allocated by malloc family
- and should be destroyed by end users by calling free() on it.
- */
- char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/samtools-0.1.18/kaln.c b/src/samtools-0.1.18/kaln.c
deleted file mode 100644
index 9c0bbaa..0000000
--- a/src/samtools-0.1.18/kaln.c
+++ /dev/null
@@ -1,486 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2003-2006, 2008, 2009, by Heng Li <lh3lh3 at gmail.com>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-#include <math.h>
-#include "kaln.h"
-
-#define FROM_M 0
-#define FROM_I 1
-#define FROM_D 2
-
-typedef struct {
- int i, j;
- unsigned char ctype;
-} path_t;
-
-int aln_sm_blosum62[] = {
-/* A R N D C Q E G H I L K M F P S T W Y V * X */
- 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-4, 0,
- -1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-4,-1,
- -2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-4,-1,
- -2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-4,-1,
- 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-4,-2,
- -1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-4,-1,
- -1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-4,-1,
- 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-4,-1,
- -2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-4,-1,
- -1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-4,-1,
- -1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-1,
- -1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-4,-1,
- -1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-4,-1,
- -2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-4,-1,
- -1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-4,-2,
- 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2,-4, 0,
- 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-4, 0,
- -3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-2,
- -2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-4,-1,
- 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-4,-1,
- -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 1,-4,
- 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-4,-1
-};
-
-int aln_sm_blast[] = {
- 1, -3, -3, -3, -2,
- -3, 1, -3, -3, -2,
- -3, -3, 1, -3, -2,
- -3, -3, -3, 1, -2,
- -2, -2, -2, -2, -2
-};
-
-int aln_sm_qual[] = {
- 0, -23, -23, -23, 0,
- -23, 0, -23, -23, 0,
- -23, -23, 0, -23, 0,
- -23, -23, -23, 0, 0,
- 0, 0, 0, 0, 0
-};
-
-ka_param_t ka_param_blast = { 5, 2, 5, 2, aln_sm_blast, 5, 50 };
-ka_param_t ka_param_aa2aa = { 10, 2, 10, 2, aln_sm_blosum62, 22, 50 };
-
-ka_param2_t ka_param2_qual = { 37, 11, 37, 11, 37, 11, 0, 0, aln_sm_qual, 5, 50 };
-
-static uint32_t *ka_path2cigar32(const path_t *path, int path_len, int *n_cigar)
-{
- int i, n;
- uint32_t *cigar;
- unsigned char last_type;
-
- if (path_len == 0 || path == 0) {
- *n_cigar = 0;
- return 0;
- }
-
- last_type = path->ctype;
- for (i = n = 1; i < path_len; ++i) {
- if (last_type != path[i].ctype) ++n;
- last_type = path[i].ctype;
- }
- *n_cigar = n;
- cigar = (uint32_t*)calloc(*n_cigar, 4);
-
- cigar[0] = 1u << 4 | path[path_len-1].ctype;
- last_type = path[path_len-1].ctype;
- for (i = path_len - 2, n = 0; i >= 0; --i) {
- if (path[i].ctype == last_type) cigar[n] += 1u << 4;
- else {
- cigar[++n] = 1u << 4 | path[i].ctype;
- last_type = path[i].ctype;
- }
- }
-
- return cigar;
-}
-
-/***************************/
-/* START OF common_align.c */
-/***************************/
-
-#define SET_INF(s) (s).M = (s).I = (s).D = MINOR_INF;
-
-#define set_M(MM, cur, p, sc) \
-{ \
- if ((p)->M >= (p)->I) { \
- if ((p)->M >= (p)->D) { \
- (MM) = (p)->M + (sc); (cur)->Mt = FROM_M; \
- } else { \
- (MM) = (p)->D + (sc); (cur)->Mt = FROM_D; \
- } \
- } else { \
- if ((p)->I > (p)->D) { \
- (MM) = (p)->I + (sc); (cur)->Mt = FROM_I; \
- } else { \
- (MM) = (p)->D + (sc); (cur)->Mt = FROM_D; \
- } \
- } \
-}
-#define set_I(II, cur, p) \
-{ \
- if ((p)->M - gap_open > (p)->I) { \
- (cur)->It = FROM_M; \
- (II) = (p)->M - gap_open - gap_ext; \
- } else { \
- (cur)->It = FROM_I; \
- (II) = (p)->I - gap_ext; \
- } \
-}
-#define set_end_I(II, cur, p) \
-{ \
- if (gap_end_ext >= 0) { \
- if ((p)->M - gap_end_open > (p)->I) { \
- (cur)->It = FROM_M; \
- (II) = (p)->M - gap_end_open - gap_end_ext; \
- } else { \
- (cur)->It = FROM_I; \
- (II) = (p)->I - gap_end_ext; \
- } \
- } else set_I(II, cur, p); \
-}
-#define set_D(DD, cur, p) \
-{ \
- if ((p)->M - gap_open > (p)->D) { \
- (cur)->Dt = FROM_M; \
- (DD) = (p)->M - gap_open - gap_ext; \
- } else { \
- (cur)->Dt = FROM_D; \
- (DD) = (p)->D - gap_ext; \
- } \
-}
-#define set_end_D(DD, cur, p) \
-{ \
- if (gap_end_ext >= 0) { \
- if ((p)->M - gap_end_open > (p)->D) { \
- (cur)->Dt = FROM_M; \
- (DD) = (p)->M - gap_end_open - gap_end_ext; \
- } else { \
- (cur)->Dt = FROM_D; \
- (DD) = (p)->D - gap_end_ext; \
- } \
- } else set_D(DD, cur, p); \
-}
-
-typedef struct {
- uint8_t Mt:3, It:2, Dt:3;
-} dpcell_t;
-
-typedef struct {
- int M, I, D;
-} dpscore_t;
-
-/***************************
- * banded global alignment *
- ***************************/
-uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const ka_param_t *ap, int *_score, int *n_cigar)
-{
- int i, j;
- dpcell_t **dpcell, *q;
- dpscore_t *curr, *last, *s;
- int b1, b2, tmp_end;
- int *mat, end, max = 0;
- uint8_t type, ctype;
- uint32_t *cigar = 0;
-
- int gap_open, gap_ext, gap_end_open, gap_end_ext, b;
- int *score_matrix, N_MATRIX_ROW;
-
- /* initialize some align-related parameters. just for compatibility */
- gap_open = ap->gap_open;
- gap_ext = ap->gap_ext;
- gap_end_open = ap->gap_end_open;
- gap_end_ext = ap->gap_end_ext;
- b = ap->band_width;
- score_matrix = ap->matrix;
- N_MATRIX_ROW = ap->row;
-
- if (n_cigar) *n_cigar = 0;
- if (len1 == 0 || len2 == 0) return 0;
-
- /* calculate b1 and b2 */
- if (len1 > len2) {
- b1 = len1 - len2 + b;
- b2 = b;
- } else {
- b1 = b;
- b2 = len2 - len1 + b;
- }
- if (b1 > len1) b1 = len1;
- if (b2 > len2) b2 = len2;
- --seq1; --seq2;
-
- /* allocate memory */
- end = (b1 + b2 <= len1)? (b1 + b2 + 1) : (len1 + 1);
- dpcell = (dpcell_t**)malloc(sizeof(dpcell_t*) * (len2 + 1));
- for (j = 0; j <= len2; ++j)
- dpcell[j] = (dpcell_t*)malloc(sizeof(dpcell_t) * end);
- for (j = b2 + 1; j <= len2; ++j)
- dpcell[j] -= j - b2;
- curr = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1));
- last = (dpscore_t*)malloc(sizeof(dpscore_t) * (len1 + 1));
-
- /* set first row */
- SET_INF(*curr); curr->M = 0;
- for (i = 1, s = curr + 1; i < b1; ++i, ++s) {
- SET_INF(*s);
- set_end_D(s->D, dpcell[0] + i, s - 1);
- }
- s = curr; curr = last; last = s;
-
- /* core dynamic programming, part 1 */
- tmp_end = (b2 < len2)? b2 : len2 - 1;
- for (j = 1; j <= tmp_end; ++j) {
- q = dpcell[j]; s = curr; SET_INF(*s);
- set_end_I(s->I, q, last);
- end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
- mat = score_matrix + seq2[j] * N_MATRIX_ROW;
- ++s; ++q;
- for (i = 1; i != end; ++i, ++s, ++q) {
- set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
- set_I(s->I, q, last + i);
- set_D(s->D, q, s - 1);
- }
- set_M(s->M, q, last + i - 1, mat[seq1[i]]);
- set_D(s->D, q, s - 1);
- if (j + b1 - 1 > len1) { /* bug fixed, 040227 */
- set_end_I(s->I, q, last + i);
- } else s->I = MINOR_INF;
- s = curr; curr = last; last = s;
- }
- /* last row for part 1, use set_end_D() instead of set_D() */
- if (j == len2 && b2 != len2 - 1) {
- q = dpcell[j]; s = curr; SET_INF(*s);
- set_end_I(s->I, q, last);
- end = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;
- mat = score_matrix + seq2[j] * N_MATRIX_ROW;
- ++s; ++q;
- for (i = 1; i != end; ++i, ++s, ++q) {
- set_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */
- set_I(s->I, q, last + i);
- set_end_D(s->D, q, s - 1);
- }
- set_M(s->M, q, last + i - 1, mat[seq1[i]]);
- set_end_D(s->D, q, s - 1);
- if (j + b1 - 1 > len1) { /* bug fixed, 040227 */
- set_end_I(s->I, q, last + i);
- } else s->I = MINOR_INF;
- s = curr; curr = last; last = s;
- ++j;
- }
-
- /* core dynamic programming, part 2 */
- for (; j <= len2 - b2 + 1; ++j) {
- SET_INF(curr[j - b2]);
- mat = score_matrix + seq2[j] * N_MATRIX_ROW;
- end = j + b1 - 1;
- for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i != end; ++i, ++s, ++q) {
- set_M(s->M, q, last + i - 1, mat[seq1[i]]);
- set_I(s->I, q, last + i);
- set_D(s->D, q, s - 1);
- }
- set_M(s->M, q, last + i - 1, mat[seq1[i]]);
- set_D(s->D, q, s - 1);
- s->I = MINOR_INF;
- s = curr; curr = last; last = s;
- }
-
- /* core dynamic programming, part 3 */
- for (; j < len2; ++j) {
- SET_INF(curr[j - b2]);
- mat = score_matrix + seq2[j] * N_MATRIX_ROW;
- for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {
- set_M(s->M, q, last + i - 1, mat[seq1[i]]);
- set_I(s->I, q, last + i);
- set_D(s->D, q, s - 1);
- }
- set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
- set_end_I(s->I, q, last + i);
- set_D(s->D, q, s - 1);
- s = curr; curr = last; last = s;
- }
- /* last row */
- if (j == len2) {
- SET_INF(curr[j - b2]);
- mat = score_matrix + seq2[j] * N_MATRIX_ROW;
- for (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {
- set_M(s->M, q, last + i - 1, mat[seq1[i]]);
- set_I(s->I, q, last + i);
- set_end_D(s->D, q, s - 1);
- }
- set_M(s->M, q, last + len1 - 1, mat[seq1[i]]);
- set_end_I(s->I, q, last + i);
- set_end_D(s->D, q, s - 1);
- s = curr; curr = last; last = s;
- }
-
- *_score = last[len1].M;
- if (n_cigar) { /* backtrace */
- path_t *p, *path = (path_t*)malloc(sizeof(path_t) * (len1 + len2 + 2));
- i = len1; j = len2;
- q = dpcell[j] + i;
- s = last + len1;
- max = s->M; type = q->Mt; ctype = FROM_M;
- if (s->I > max) { max = s->I; type = q->It; ctype = FROM_I; }
- if (s->D > max) { max = s->D; type = q->Dt; ctype = FROM_D; }
-
- p = path;
- p->ctype = ctype; p->i = i; p->j = j; /* bug fixed 040408 */
- ++p;
- do {
- switch (ctype) {
- case FROM_M: --i; --j; break;
- case FROM_I: --j; break;
- case FROM_D: --i; break;
- }
- q = dpcell[j] + i;
- ctype = type;
- switch (type) {
- case FROM_M: type = q->Mt; break;
- case FROM_I: type = q->It; break;
- case FROM_D: type = q->Dt; break;
- }
- p->ctype = ctype; p->i = i; p->j = j;
- ++p;
- } while (i || j);
- cigar = ka_path2cigar32(path, p - path - 1, n_cigar);
- free(path);
- }
-
- /* free memory */
- for (j = b2 + 1; j <= len2; ++j)
- dpcell[j] += j - b2;
- for (j = 0; j <= len2; ++j)
- free(dpcell[j]);
- free(dpcell);
- free(curr); free(last);
-
- return cigar;
-}
-
-typedef struct {
- int M, I, D;
-} score_aux_t;
-
-#define MINUS_INF -0x40000000
-
-// matrix: len2 rows and len1 columns
-int ka_global_score(const uint8_t *_seq1, int len1, const uint8_t *_seq2, int len2, const ka_param2_t *ap)
-{
-
-#define __score_aux(_p, _q0, _sc, _io, _ie, _do, _de) { \
- int t1, t2; \
- score_aux_t *_q; \
- _q = _q0; \
- _p->M = _q->M >= _q->I? _q->M : _q->I; \
- _p->M = _p->M >= _q->D? _p->M : _q->D; \
- _p->M += (_sc); \
- ++_q; t1 = _q->M - _io - _ie; t2 = _q->I - _ie; _p->I = t1 >= t2? t1 : t2; \
- _q = _p-1; t1 = _q->M - _do - _de; t2 = _q->D - _de; _p->D = t1 >= t2? t1 : t2; \
- }
-
- int i, j, bw, scmat_size = ap->row, *scmat = ap->matrix, ret;
- const uint8_t *seq1, *seq2;
- score_aux_t *curr, *last, *swap;
- bw = abs(len1 - len2) + ap->band_width;
- i = len1 > len2? len1 : len2;
- if (bw > i + 1) bw = i + 1;
- seq1 = _seq1 - 1; seq2 = _seq2 - 1;
- curr = calloc(len1 + 2, sizeof(score_aux_t));
- last = calloc(len1 + 2, sizeof(score_aux_t));
- { // the zero-th row
- int x, end = len1;
- score_aux_t *p;
- j = 0;
- x = j + bw; end = len1 < x? len1 : x; // band end
- p = curr;
- p->M = 0; p->I = p->D = MINUS_INF;
- for (i = 1, p = &curr[1]; i <= end; ++i, ++p)
- p->M = p->I = MINUS_INF, p->D = -(ap->edo + ap->ede * i);
- p->M = p->I = p->D = MINUS_INF;
- swap = curr; curr = last; last = swap;
- }
- for (j = 1; j < len2; ++j) {
- int x, beg = 0, end = len1, *scrow, col_end;
- score_aux_t *p;
- x = j - bw; beg = 0 > x? 0 : x; // band start
- x = j + bw; end = len1 < x? len1 : x; // band end
- if (beg == 0) { // from zero-th column
- p = curr;
- p->M = p->D = MINUS_INF; p->I = -(ap->eio + ap->eie * j);
- ++beg; // then beg = 1
- }
- scrow = scmat + seq2[j] * scmat_size;
- if (end == len1) col_end = 1, --end;
- else col_end = 0;
- for (i = beg, p = &curr[beg]; i <= end; ++i, ++p)
- __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->iio, ap->iie, ap->ido, ap->ide);
- if (col_end) {
- __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->eio, ap->eie, ap->ido, ap->ide);
- ++p;
- }
- p->M = p->I = p->D = MINUS_INF;
-// for (i = 0; i <= len1; ++i) printf("(%d,%d,%d) ", curr[i].M, curr[i].I, curr[i].D); putchar('\n');
- swap = curr; curr = last; last = swap;
- }
- { // the last row
- int x, beg = 0, *scrow;
- score_aux_t *p;
- j = len2;
- x = j - bw; beg = 0 > x? 0 : x; // band start
- if (beg == 0) { // from zero-th column
- p = curr;
- p->M = p->D = MINUS_INF; p->I = -(ap->eio + ap->eie * j);
- ++beg; // then beg = 1
- }
- scrow = scmat + seq2[j] * scmat_size;
- for (i = beg, p = &curr[beg]; i < len1; ++i, ++p)
- __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->iio, ap->iie, ap->edo, ap->ede);
- __score_aux(p, &last[i-1], scrow[(int)seq1[i]], ap->eio, ap->eie, ap->edo, ap->ede);
-// for (i = 0; i <= len1; ++i) printf("(%d,%d,%d) ", curr[i].M, curr[i].I, curr[i].D); putchar('\n');
- }
- ret = curr[len1].M >= curr[len1].I? curr[len1].M : curr[len1].I;
- ret = ret >= curr[len1].D? ret : curr[len1].D;
- free(curr); free(last);
- return ret;
-}
-
-#ifdef _MAIN
-int main(int argc, char *argv[])
-{
-// int len1 = 35, len2 = 35;
-// uint8_t *seq1 = (uint8_t*)"\0\0\3\3\2\0\0\0\1\0\2\1\2\1\3\2\3\3\3\0\2\3\2\1\1\3\3\3\2\3\3\1\0\0\1";
-// uint8_t *seq2 = (uint8_t*)"\0\0\3\3\2\0\0\0\1\0\2\1\2\1\3\2\3\3\3\0\2\3\2\1\1\3\3\3\2\3\3\1\0\1\0";
- int len1 = 4, len2 = 4;
- uint8_t *seq1 = (uint8_t*)"\1\0\0\1";
- uint8_t *seq2 = (uint8_t*)"\1\0\1\0";
- int sc;
-// ka_global_core(seq1, 2, seq2, 1, &ka_param_qual, &sc, 0);
- sc = ka_global_score(seq1, len1, seq2, len2, &ka_param2_qual);
- printf("%d\n", sc);
- return 0;
-}
-#endif
diff --git a/src/samtools-0.1.18/kaln.h b/src/samtools-0.1.18/kaln.h
deleted file mode 100644
index 1ece132..0000000
--- a/src/samtools-0.1.18/kaln.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2003-2006, 2008, 2009 by Heng Li <lh3 at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-#ifndef LH3_KALN_H_
-#define LH3_KALN_H_
-
-#include <stdint.h>
-
-#define MINOR_INF -1073741823
-
-typedef struct {
- int gap_open;
- int gap_ext;
- int gap_end_open;
- int gap_end_ext;
-
- int *matrix;
- int row;
- int band_width;
-} ka_param_t;
-
-typedef struct {
- int iio, iie, ido, ide;
- int eio, eie, edo, ede;
- int *matrix;
- int row;
- int band_width;
-} ka_param2_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const ka_param_t *ap,
- int *_score, int *n_cigar);
- int ka_global_score(const uint8_t *_seq1, int len1, const uint8_t *_seq2, int len2, const ka_param2_t *ap);
-#ifdef __cplusplus
-}
-#endif
-
-extern ka_param_t ka_param_blast; /* = { 5, 2, 5, 2, aln_sm_blast, 5, 50 }; */
-extern ka_param_t ka_param_qual; // only use this for global alignment!!!
-extern ka_param2_t ka_param2_qual; // only use this for global alignment!!!
-
-#endif
diff --git a/src/samtools-0.1.18/khash.h b/src/samtools-0.1.18/khash.h
deleted file mode 100644
index a7e8056..0000000
--- a/src/samtools-0.1.18/khash.h
+++ /dev/null
@@ -1,528 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/*
- An example:
-
-#include "khash.h"
-KHASH_MAP_INIT_INT(32, char)
-int main() {
- int ret, is_missing;
- khiter_t k;
- khash_t(32) *h = kh_init(32);
- k = kh_put(32, h, 5, &ret);
- if (!ret) kh_del(32, h, k);
- kh_value(h, k) = 10;
- k = kh_get(32, h, 10);
- is_missing = (k == kh_end(h));
- k = kh_get(32, h, 5);
- kh_del(32, h, k);
- for (k = kh_begin(h); k != kh_end(h); ++k)
- if (kh_exist(h, k)) kh_value(h, k) = 1;
- kh_destroy(32, h);
- return 0;
-}
-*/
-
-/*
- 2011-02-14 (0.2.5):
-
- * Allow to declare global functions.
-
- 2009-09-26 (0.2.4):
-
- * Improve portability
-
- 2008-09-19 (0.2.3):
-
- * Corrected the example
- * Improved interfaces
-
- 2008-09-11 (0.2.2):
-
- * Improved speed a little in kh_put()
-
- 2008-09-10 (0.2.1):
-
- * Added kh_clear()
- * Fixed a compiling error
-
- 2008-09-02 (0.2.0):
-
- * Changed to token concatenation which increases flexibility.
-
- 2008-08-31 (0.1.2):
-
- * Fixed a bug in kh_get(), which has not been tested previously.
-
- 2008-08-31 (0.1.1):
-
- * Added destructor
-*/
-
-
-#ifndef __AC_KHASH_H
-#define __AC_KHASH_H
-
-/*!
- @header
-
- Generic hash table library.
-
- @copyright Heng Li
- */
-
-#define AC_VERSION_KHASH_H "0.2.5"
-
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-
-/* compipler specific configuration */
-
-#if UINT_MAX == 0xffffffffu
-typedef unsigned int khint32_t;
-#elif ULONG_MAX == 0xffffffffu
-typedef unsigned long khint32_t;
-#endif
-
-#if ULONG_MAX == ULLONG_MAX
-typedef unsigned long khint64_t;
-#else
-typedef unsigned long long khint64_t;
-#endif
-
-#ifdef _MSC_VER
-#define inline __inline
-#endif
-
-typedef khint32_t khint_t;
-typedef khint_t khiter_t;
-
-#define __ac_HASH_PRIME_SIZE 32
-static const khint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
-{
- 0ul, 3ul, 11ul, 23ul, 53ul,
- 97ul, 193ul, 389ul, 769ul, 1543ul,
- 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,
- 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,
- 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,
- 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,
- 3221225473ul, 4294967291ul
-};
-
-#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
-#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
-#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
-#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
-#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
-#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
-#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
-
-static const double __ac_HASH_UPPER = 0.77;
-
-#define KHASH_DECLARE(name, khkey_t, khval_t) \
- typedef struct { \
- khint_t n_buckets, size, n_occupied, upper_bound; \
- khint32_t *flags; \
- khkey_t *keys; \
- khval_t *vals; \
- } kh_##name##_t; \
- extern kh_##name##_t *kh_init_##name(); \
- extern void kh_destroy_##name(kh_##name##_t *h); \
- extern void kh_clear_##name(kh_##name##_t *h); \
- extern khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key); \
- extern void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets); \
- extern khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret); \
- extern void kh_del_##name(kh_##name##_t *h, khint_t x);
-
-#define KHASH_INIT2(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
- typedef struct { \
- khint_t n_buckets, size, n_occupied, upper_bound; \
- khint32_t *flags; \
- khkey_t *keys; \
- khval_t *vals; \
- } kh_##name##_t; \
- SCOPE kh_##name##_t *kh_init_##name() { \
- return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
- } \
- SCOPE void kh_destroy_##name(kh_##name##_t *h) \
- { \
- if (h) { \
- free(h->keys); free(h->flags); \
- free(h->vals); \
- free(h); \
- } \
- } \
- SCOPE void kh_clear_##name(kh_##name##_t *h) \
- { \
- if (h && h->flags) { \
- memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(khint32_t)); \
- h->size = h->n_occupied = 0; \
- } \
- } \
- SCOPE khint_t kh_get_##name(const kh_##name##_t *h, khkey_t key) \
- { \
- if (h->n_buckets) { \
- khint_t inc, k, i, last; \
- k = __hash_func(key); i = k % h->n_buckets; \
- inc = 1 + k % (h->n_buckets - 1); last = i; \
- while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
- if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
- else i += inc; \
- if (i == last) return h->n_buckets; \
- } \
- return __ac_iseither(h->flags, i)? h->n_buckets : i; \
- } else return 0; \
- } \
- SCOPE void kh_resize_##name(kh_##name##_t *h, khint_t new_n_buckets) \
- { \
- khint32_t *new_flags = 0; \
- khint_t j = 1; \
- { \
- khint_t t = __ac_HASH_PRIME_SIZE - 1; \
- while (__ac_prime_list[t] > new_n_buckets) --t; \
- new_n_buckets = __ac_prime_list[t+1]; \
- if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; \
- else { \
- new_flags = (khint32_t*)malloc(((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
- memset(new_flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(khint32_t)); \
- if (h->n_buckets < new_n_buckets) { \
- h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
- if (kh_is_map) \
- h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
- } \
- } \
- } \
- if (j) { \
- for (j = 0; j != h->n_buckets; ++j) { \
- if (__ac_iseither(h->flags, j) == 0) { \
- khkey_t key = h->keys[j]; \
- khval_t val; \
- if (kh_is_map) val = h->vals[j]; \
- __ac_set_isdel_true(h->flags, j); \
- while (1) { \
- khint_t inc, k, i; \
- k = __hash_func(key); \
- i = k % new_n_buckets; \
- inc = 1 + k % (new_n_buckets - 1); \
- while (!__ac_isempty(new_flags, i)) { \
- if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets; \
- else i += inc; \
- } \
- __ac_set_isempty_false(new_flags, i); \
- if (i < h->n_buckets && __ac_iseither(h->flags, i) == 0) { \
- { khkey_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; } \
- if (kh_is_map) { khval_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; } \
- __ac_set_isdel_true(h->flags, i); \
- } else { \
- h->keys[i] = key; \
- if (kh_is_map) h->vals[i] = val; \
- break; \
- } \
- } \
- } \
- } \
- if (h->n_buckets > new_n_buckets) { \
- h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
- if (kh_is_map) \
- h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
- } \
- free(h->flags); \
- h->flags = new_flags; \
- h->n_buckets = new_n_buckets; \
- h->n_occupied = h->size; \
- h->upper_bound = (khint_t)(h->n_buckets * __ac_HASH_UPPER + 0.5); \
- } \
- } \
- SCOPE khint_t kh_put_##name(kh_##name##_t *h, khkey_t key, int *ret) \
- { \
- khint_t x; \
- if (h->n_occupied >= h->upper_bound) { \
- if (h->n_buckets > (h->size<<1)) kh_resize_##name(h, h->n_buckets - 1); \
- else kh_resize_##name(h, h->n_buckets + 1); \
- } \
- { \
- khint_t inc, k, i, site, last; \
- x = site = h->n_buckets; k = __hash_func(key); i = k % h->n_buckets; \
- if (__ac_isempty(h->flags, i)) x = i; \
- else { \
- inc = 1 + k % (h->n_buckets - 1); last = i; \
- while (!__ac_isempty(h->flags, i) && (__ac_isdel(h->flags, i) || !__hash_equal(h->keys[i], key))) { \
- if (__ac_isdel(h->flags, i)) site = i; \
- if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets; \
- else i += inc; \
- if (i == last) { x = site; break; } \
- } \
- if (x == h->n_buckets) { \
- if (__ac_isempty(h->flags, i) && site != h->n_buckets) x = site; \
- else x = i; \
- } \
- } \
- } \
- if (__ac_isempty(h->flags, x)) { \
- h->keys[x] = key; \
- __ac_set_isboth_false(h->flags, x); \
- ++h->size; ++h->n_occupied; \
- *ret = 1; \
- } else if (__ac_isdel(h->flags, x)) { \
- h->keys[x] = key; \
- __ac_set_isboth_false(h->flags, x); \
- ++h->size; \
- *ret = 2; \
- } else *ret = 0; \
- return x; \
- } \
- SCOPE void kh_del_##name(kh_##name##_t *h, khint_t x) \
- { \
- if (x != h->n_buckets && !__ac_iseither(h->flags, x)) { \
- __ac_set_isdel_true(h->flags, x); \
- --h->size; \
- } \
- }
-
-#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \
- KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal)
-
-/* --- BEGIN OF HASH FUNCTIONS --- */
-
-/*! @function
- @abstract Integer hash function
- @param key The integer [khint32_t]
- @return The hash value [khint_t]
- */
-#define kh_int_hash_func(key) (khint32_t)(key)
-/*! @function
- @abstract Integer comparison function
- */
-#define kh_int_hash_equal(a, b) ((a) == (b))
-/*! @function
- @abstract 64-bit integer hash function
- @param key The integer [khint64_t]
- @return The hash value [khint_t]
- */
-#define kh_int64_hash_func(key) (khint32_t)((key)>>33^(key)^(key)<<11)
-/*! @function
- @abstract 64-bit integer comparison function
- */
-#define kh_int64_hash_equal(a, b) ((a) == (b))
-/*! @function
- @abstract const char* hash function
- @param s Pointer to a null terminated string
- @return The hash value
- */
-static inline khint_t __ac_X31_hash_string(const char *s)
-{
- khint_t h = *s;
- if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
- return h;
-}
-/*! @function
- @abstract Another interface to const char* hash function
- @param key Pointer to a null terminated string [const char*]
- @return The hash value [khint_t]
- */
-#define kh_str_hash_func(key) __ac_X31_hash_string(key)
-/*! @function
- @abstract Const char* comparison function
- */
-#define kh_str_hash_equal(a, b) (strcmp(a, b) == 0)
-
-/* --- END OF HASH FUNCTIONS --- */
-
-/* Other necessary macros... */
-
-/*!
- @abstract Type of the hash table.
- @param name Name of the hash table [symbol]
- */
-#define khash_t(name) kh_##name##_t
-
-/*! @function
- @abstract Initiate a hash table.
- @param name Name of the hash table [symbol]
- @return Pointer to the hash table [khash_t(name)*]
- */
-#define kh_init(name) kh_init_##name()
-
-/*! @function
- @abstract Destroy a hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- */
-#define kh_destroy(name, h) kh_destroy_##name(h)
-
-/*! @function
- @abstract Reset a hash table without deallocating memory.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- */
-#define kh_clear(name, h) kh_clear_##name(h)
-
-/*! @function
- @abstract Resize a hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param s New size [khint_t]
- */
-#define kh_resize(name, h, s) kh_resize_##name(h, s)
-
-/*! @function
- @abstract Insert a key to the hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param k Key [type of keys]
- @param r Extra return code: 0 if the key is present in the hash table;
- 1 if the bucket is empty (never used); 2 if the element in
- the bucket has been deleted [int*]
- @return Iterator to the inserted element [khint_t]
- */
-#define kh_put(name, h, k, r) kh_put_##name(h, k, r)
-
-/*! @function
- @abstract Retrieve a key from the hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param k Key [type of keys]
- @return Iterator to the found element, or kh_end(h) is the element is absent [khint_t]
- */
-#define kh_get(name, h, k) kh_get_##name(h, k)
-
-/*! @function
- @abstract Remove a key from the hash table.
- @param name Name of the hash table [symbol]
- @param h Pointer to the hash table [khash_t(name)*]
- @param k Iterator to the element to be deleted [khint_t]
- */
-#define kh_del(name, h, k) kh_del_##name(h, k)
-
-
-/*! @function
- @abstract Test whether a bucket contains data.
- @param h Pointer to the hash table [khash_t(name)*]
- @param x Iterator to the bucket [khint_t]
- @return 1 if containing data; 0 otherwise [int]
- */
-#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))
-
-/*! @function
- @abstract Get key given an iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @param x Iterator to the bucket [khint_t]
- @return Key [type of keys]
- */
-#define kh_key(h, x) ((h)->keys[x])
-
-/*! @function
- @abstract Get value given an iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @param x Iterator to the bucket [khint_t]
- @return Value [type of values]
- @discussion For hash sets, calling this results in segfault.
- */
-#define kh_val(h, x) ((h)->vals[x])
-
-/*! @function
- @abstract Alias of kh_val()
- */
-#define kh_value(h, x) ((h)->vals[x])
-
-/*! @function
- @abstract Get the start iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @return The start iterator [khint_t]
- */
-#define kh_begin(h) (khint_t)(0)
-
-/*! @function
- @abstract Get the end iterator
- @param h Pointer to the hash table [khash_t(name)*]
- @return The end iterator [khint_t]
- */
-#define kh_end(h) ((h)->n_buckets)
-
-/*! @function
- @abstract Get the number of elements in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @return Number of elements in the hash table [khint_t]
- */
-#define kh_size(h) ((h)->size)
-
-/*! @function
- @abstract Get the number of buckets in the hash table
- @param h Pointer to the hash table [khash_t(name)*]
- @return Number of buckets in the hash table [khint_t]
- */
-#define kh_n_buckets(h) ((h)->n_buckets)
-
-/* More conenient interfaces */
-
-/*! @function
- @abstract Instantiate a hash set containing integer keys
- @param name Name of the hash table [symbol]
- */
-#define KHASH_SET_INIT_INT(name) \
- KHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing integer keys
- @param name Name of the hash table [symbol]
- @param khval_t Type of values [type]
- */
-#define KHASH_MAP_INIT_INT(name, khval_t) \
- KHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing 64-bit integer keys
- @param name Name of the hash table [symbol]
- */
-#define KHASH_SET_INIT_INT64(name) \
- KHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing 64-bit integer keys
- @param name Name of the hash table [symbol]
- @param khval_t Type of values [type]
- */
-#define KHASH_MAP_INIT_INT64(name, khval_t) \
- KHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)
-
-typedef const char *kh_cstr_t;
-/*! @function
- @abstract Instantiate a hash map containing const char* keys
- @param name Name of the hash table [symbol]
- */
-#define KHASH_SET_INIT_STR(name) \
- KHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)
-
-/*! @function
- @abstract Instantiate a hash map containing const char* keys
- @param name Name of the hash table [symbol]
- @param khval_t Type of values [type]
- */
-#define KHASH_MAP_INIT_STR(name, khval_t) \
- KHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)
-
-#endif /* __AC_KHASH_H */
diff --git a/src/samtools-0.1.18/klist.h b/src/samtools-0.1.18/klist.h
deleted file mode 100644
index 2f17016..0000000
--- a/src/samtools-0.1.18/klist.h
+++ /dev/null
@@ -1,96 +0,0 @@
-#ifndef _LH3_KLIST_H
-#define _LH3_KLIST_H
-
-#include <stdlib.h>
-
-#define KMEMPOOL_INIT(name, kmptype_t, kmpfree_f) \
- typedef struct { \
- size_t cnt, n, max; \
- kmptype_t **buf; \
- } kmp_##name##_t; \
- static inline kmp_##name##_t *kmp_init_##name() { \
- return calloc(1, sizeof(kmp_##name##_t)); \
- } \
- static inline void kmp_destroy_##name(kmp_##name##_t *mp) { \
- size_t k; \
- for (k = 0; k < mp->n; ++k) { \
- kmpfree_f(mp->buf[k]); free(mp->buf[k]); \
- } \
- free(mp->buf); free(mp); \
- } \
- static inline kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \
- ++mp->cnt; \
- if (mp->n == 0) return calloc(1, sizeof(kmptype_t)); \
- return mp->buf[--mp->n]; \
- } \
- static inline void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \
- --mp->cnt; \
- if (mp->n == mp->max) { \
- mp->max = mp->max? mp->max<<1 : 16; \
- mp->buf = realloc(mp->buf, sizeof(void*) * mp->max); \
- } \
- mp->buf[mp->n++] = p; \
- }
-
-#define kmempool_t(name) kmp_##name##_t
-#define kmp_init(name) kmp_init_##name()
-#define kmp_destroy(name, mp) kmp_destroy_##name(mp)
-#define kmp_alloc(name, mp) kmp_alloc_##name(mp)
-#define kmp_free(name, mp, p) kmp_free_##name(mp, p)
-
-#define KLIST_INIT(name, kltype_t, kmpfree_t) \
- struct __kl1_##name { \
- kltype_t data; \
- struct __kl1_##name *next; \
- }; \
- typedef struct __kl1_##name kl1_##name; \
- KMEMPOOL_INIT(name, kl1_##name, kmpfree_t) \
- typedef struct { \
- kl1_##name *head, *tail; \
- kmp_##name##_t *mp; \
- size_t size; \
- } kl_##name##_t; \
- static inline kl_##name##_t *kl_init_##name() { \
- kl_##name##_t *kl = calloc(1, sizeof(kl_##name##_t)); \
- kl->mp = kmp_init(name); \
- kl->head = kl->tail = kmp_alloc(name, kl->mp); \
- kl->head->next = 0; \
- return kl; \
- } \
- static inline void kl_destroy_##name(kl_##name##_t *kl) { \
- kl1_##name *p; \
- for (p = kl->head; p != kl->tail; p = p->next) \
- kmp_free(name, kl->mp, p); \
- kmp_free(name, kl->mp, p); \
- kmp_destroy(name, kl->mp); \
- free(kl); \
- } \
- static inline kltype_t *kl_pushp_##name(kl_##name##_t *kl) { \
- kl1_##name *q, *p = kmp_alloc(name, kl->mp); \
- q = kl->tail; p->next = 0; kl->tail->next = p; kl->tail = p; \
- ++kl->size; \
- return &q->data; \
- } \
- static inline int kl_shift_##name(kl_##name##_t *kl, kltype_t *d) { \
- kl1_##name *p; \
- if (kl->head->next == 0) return -1; \
- --kl->size; \
- p = kl->head; kl->head = kl->head->next; \
- if (d) *d = p->data; \
- kmp_free(name, kl->mp, p); \
- return 0; \
- }
-
-#define kliter_t(name) kl1_##name
-#define klist_t(name) kl_##name##_t
-#define kl_val(iter) ((iter)->data)
-#define kl_next(iter) ((iter)->next)
-#define kl_begin(kl) ((kl)->head)
-#define kl_end(kl) ((kl)->tail)
-
-#define kl_init(name) kl_init_##name()
-#define kl_destroy(name, kl) kl_destroy_##name(kl)
-#define kl_pushp(name, kl) kl_pushp_##name(kl)
-#define kl_shift(name, kl, d) kl_shift_##name(kl, d)
-
-#endif
diff --git a/src/samtools-0.1.18/knetfile.c b/src/samtools-0.1.18/knetfile.c
deleted file mode 100644
index af09146..0000000
--- a/src/samtools-0.1.18/knetfile.c
+++ /dev/null
@@ -1,632 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008 by Genome Research Ltd (GRL).
- 2010 by Attractive Chaos <attractor at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/* Probably I will not do socket programming in the next few years and
- therefore I decide to heavily annotate this file, for Linux and
- Windows as well. -ac */
-
-#include <time.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <unistd.h>
-#include <sys/types.h>
-
-#ifndef _WIN32
-#include <netdb.h>
-#include <arpa/inet.h>
-#include <sys/socket.h>
-#endif
-
-#include "knetfile.h"
-
-/* In winsock.h, the type of a socket is SOCKET, which is: "typedef
- * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed
- * integer -1. In knetfile.c, I use "int" for socket type
- * throughout. This should be improved to avoid confusion.
- *
- * In Linux/Mac, recv() and read() do almost the same thing. You can see
- * in the header file that netread() is simply an alias of read(). In
- * Windows, however, they are different and using recv() is mandatory.
- */
-
-/* This function tests if the file handler is ready for reading (or
- * writing if is_read==0). */
-static int socket_wait(int fd, int is_read)
-{
- fd_set fds, *fdr = 0, *fdw = 0;
- struct timeval tv;
- int ret;
- tv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out
- FD_ZERO(&fds);
- FD_SET(fd, &fds);
- if (is_read) fdr = &fds;
- else fdw = &fds;
- ret = select(fd+1, fdr, fdw, 0, &tv);
-#ifndef _WIN32
- if (ret == -1) perror("select");
-#else
- if (ret == 0)
- fprintf(stderr, "select time-out\n");
- else if (ret == SOCKET_ERROR)
- fprintf(stderr, "select: %d\n", WSAGetLastError());
-#endif
- return ret;
-}
-
-#ifndef _WIN32
-/* This function does not work with Windows due to the lack of
- * getaddrinfo() in winsock. It is addapted from an example in "Beej's
- * Guide to Network Programming" (http://beej.us/guide/bgnet/). */
-static int socket_connect(const char *host, const char *port)
-{
-#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)
-
- int on = 1, fd;
- struct linger lng = { 0, 0 };
- struct addrinfo hints, *res = 0;
- memset(&hints, 0, sizeof(struct addrinfo));
- hints.ai_family = AF_UNSPEC;
- hints.ai_socktype = SOCK_STREAM;
- /* In Unix/Mac, getaddrinfo() is the most convenient way to get
- * server information. */
- if (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");
- if ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");
- /* The following two setsockopt() are used by ftplib
- * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they
- * necessary. */
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");
- if (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");
- if (connect(fd, res->ai_addr, res->ai_addrlen) != 0) __err_connect("connect");
- freeaddrinfo(res);
- return fd;
-}
-#else
-/* MinGW's printf has problem with "%lld" */
-char *int64tostr(char *buf, int64_t x)
-{
- int cnt;
- int i = 0;
- do {
- buf[i++] = '0' + x % 10;
- x /= 10;
- } while (x);
- buf[i] = 0;
- for (cnt = i, i = 0; i < cnt/2; ++i) {
- int c = buf[i]; buf[i] = buf[cnt-i-1]; buf[cnt-i-1] = c;
- }
- return buf;
-}
-
-int64_t strtoint64(const char *buf)
-{
- int64_t x;
- for (x = 0; *buf != '\0'; ++buf)
- x = x * 10 + ((int64_t) *buf - 48);
- return x;
-}
-/* In windows, the first thing is to establish the TCP connection. */
-int knet_win32_init()
-{
- WSADATA wsaData;
- return WSAStartup(MAKEWORD(2, 2), &wsaData);
-}
-void knet_win32_destroy()
-{
- WSACleanup();
-}
-/* A slightly modfied version of the following function also works on
- * Mac (and presummably Linux). However, this function is not stable on
- * my Mac. It sometimes works fine but sometimes does not. Therefore for
- * non-Windows OS, I do not use this one. */
-static SOCKET socket_connect(const char *host, const char *port)
-{
-#define __err_connect(func) \
- do { \
- fprintf(stderr, "%s: %d\n", func, WSAGetLastError()); \
- return -1; \
- } while (0)
-
- int on = 1;
- SOCKET fd;
- struct linger lng = { 0, 0 };
- struct sockaddr_in server;
- struct hostent *hp = 0;
- // open socket
- if ((fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == INVALID_SOCKET) __err_connect("socket");
- if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char*)&on, sizeof(on)) == -1) __err_connect("setsockopt");
- if (setsockopt(fd, SOL_SOCKET, SO_LINGER, (char*)&lng, sizeof(lng)) == -1) __err_connect("setsockopt");
- // get host info
- if (isalpha(host[0])) hp = gethostbyname(host);
- else {
- struct in_addr addr;
- addr.s_addr = inet_addr(host);
- hp = gethostbyaddr((char*)&addr, 4, AF_INET);
- }
- if (hp == 0) __err_connect("gethost");
- // connect
- server.sin_addr.s_addr = *((unsigned long*)hp->h_addr);
- server.sin_family= AF_INET;
- server.sin_port = htons(atoi(port));
- if (connect(fd, (struct sockaddr*)&server, sizeof(server)) != 0) __err_connect("connect");
- // freehostent(hp); // strangely in MSDN, hp is NOT freed (memory leak?!)
- return fd;
-}
-#endif
-
-static off_t my_netread(int fd, void *buf, off_t len)
-{
- off_t rest = len, curr, l = 0;
- /* recv() and read() may not read the required length of data with
- * one call. They have to be called repeatedly. */
- while (rest) {
- if (socket_wait(fd, 1) <= 0) break; // socket is not ready for reading
- curr = netread(fd, buf + l, rest);
- /* According to the glibc manual, section 13.2, a zero returned
- * value indicates end-of-file (EOF), which should mean that
- * read() will not return zero if EOF has not been met but data
- * are not immediately available. */
- if (curr == 0) break;
- l += curr; rest -= curr;
- }
- return l;
-}
-
-/*************************
- * FTP specific routines *
- *************************/
-
-static int kftp_get_response(knetFile *ftp)
-{
-#ifndef _WIN32
- unsigned char c;
-#else
- char c;
-#endif
- int n = 0;
- char *p;
- if (socket_wait(ftp->ctrl_fd, 1) <= 0) return 0;
- while (netread(ftp->ctrl_fd, &c, 1)) { // FIXME: this is *VERY BAD* for unbuffered I/O
- //fputc(c, stderr);
- if (n >= ftp->max_response) {
- ftp->max_response = ftp->max_response? ftp->max_response<<1 : 256;
- ftp->response = realloc(ftp->response, ftp->max_response);
- }
- ftp->response[n++] = c;
- if (c == '\n') {
- if (n >= 4 && isdigit(ftp->response[0]) && isdigit(ftp->response[1]) && isdigit(ftp->response[2])
- && ftp->response[3] != '-') break;
- n = 0;
- continue;
- }
- }
- if (n < 2) return -1;
- ftp->response[n-2] = 0;
- return strtol(ftp->response, &p, 0);
-}
-
-static int kftp_send_cmd(knetFile *ftp, const char *cmd, int is_get)
-{
- if (socket_wait(ftp->ctrl_fd, 0) <= 0) return -1; // socket is not ready for writing
- netwrite(ftp->ctrl_fd, cmd, strlen(cmd));
- return is_get? kftp_get_response(ftp) : 0;
-}
-
-static int kftp_pasv_prep(knetFile *ftp)
-{
- char *p;
- int v[6];
- kftp_send_cmd(ftp, "PASV\r\n", 1);
- for (p = ftp->response; *p && *p != '('; ++p);
- if (*p != '(') return -1;
- ++p;
- sscanf(p, "%d,%d,%d,%d,%d,%d", &v[0], &v[1], &v[2], &v[3], &v[4], &v[5]);
- memcpy(ftp->pasv_ip, v, 4 * sizeof(int));
- ftp->pasv_port = (v[4]<<8&0xff00) + v[5];
- return 0;
-}
-
-
-static int kftp_pasv_connect(knetFile *ftp)
-{
- char host[80], port[10];
- if (ftp->pasv_port == 0) {
- fprintf(stderr, "[kftp_pasv_connect] kftp_pasv_prep() is not called before hand.\n");
- return -1;
- }
- sprintf(host, "%d.%d.%d.%d", ftp->pasv_ip[0], ftp->pasv_ip[1], ftp->pasv_ip[2], ftp->pasv_ip[3]);
- sprintf(port, "%d", ftp->pasv_port);
- ftp->fd = socket_connect(host, port);
- if (ftp->fd == -1) return -1;
- return 0;
-}
-
-int kftp_connect(knetFile *ftp)
-{
- ftp->ctrl_fd = socket_connect(ftp->host, ftp->port);
- if (ftp->ctrl_fd == -1) return -1;
- kftp_get_response(ftp);
- kftp_send_cmd(ftp, "USER anonymous\r\n", 1);
- kftp_send_cmd(ftp, "PASS kftp@\r\n", 1);
- kftp_send_cmd(ftp, "TYPE I\r\n", 1);
- return 0;
-}
-
-int kftp_reconnect(knetFile *ftp)
-{
- if (ftp->ctrl_fd != -1) {
- netclose(ftp->ctrl_fd);
- ftp->ctrl_fd = -1;
- }
- netclose(ftp->fd);
- ftp->fd = -1;
- return kftp_connect(ftp);
-}
-
-// initialize ->type, ->host, ->retr and ->size
-knetFile *kftp_parse_url(const char *fn, const char *mode)
-{
- knetFile *fp;
- char *p;
- int l;
- if (strstr(fn, "ftp://") != fn) return 0;
- for (p = (char*)fn + 6; *p && *p != '/'; ++p);
- if (*p != '/') return 0;
- l = p - fn - 6;
- fp = calloc(1, sizeof(knetFile));
- fp->type = KNF_TYPE_FTP;
- fp->fd = -1;
- /* the Linux/Mac version of socket_connect() also recognizes a port
- * like "ftp", but the Windows version does not. */
- fp->port = strdup("21");
- fp->host = calloc(l + 1, 1);
- if (strchr(mode, 'c')) fp->no_reconnect = 1;
- strncpy(fp->host, fn + 6, l);
- fp->retr = calloc(strlen(p) + 8, 1);
- sprintf(fp->retr, "RETR %s\r\n", p);
- fp->size_cmd = calloc(strlen(p) + 8, 1);
- sprintf(fp->size_cmd, "SIZE %s\r\n", p);
- fp->seek_offset = 0;
- return fp;
-}
-// place ->fd at offset off
-int kftp_connect_file(knetFile *fp)
-{
- int ret;
- long long file_size;
- if (fp->fd != -1) {
- netclose(fp->fd);
- if (fp->no_reconnect) kftp_get_response(fp);
- }
- kftp_pasv_prep(fp);
- kftp_send_cmd(fp, fp->size_cmd, 1);
-#ifndef _WIN32
- if ( sscanf(fp->response,"%*d %lld", &file_size) != 1 )
- {
- fprintf(stderr,"[kftp_connect_file] %s\n", fp->response);
- return -1;
- }
-#else
- const char *p = fp->response;
- while (*p != ' ') ++p;
- while (*p < '0' || *p > '9') ++p;
- file_size = strtoint64(p);
-#endif
- fp->file_size = file_size;
- if (fp->offset>=0) {
- char tmp[32];
-#ifndef _WIN32
- sprintf(tmp, "REST %lld\r\n", (long long)fp->offset);
-#else
- strcpy(tmp, "REST ");
- int64tostr(tmp + 5, fp->offset);
- strcat(tmp, "\r\n");
-#endif
- kftp_send_cmd(fp, tmp, 1);
- }
- kftp_send_cmd(fp, fp->retr, 0);
- kftp_pasv_connect(fp);
- ret = kftp_get_response(fp);
- if (ret != 150) {
- fprintf(stderr, "[kftp_connect_file] %s\n", fp->response);
- netclose(fp->fd);
- fp->fd = -1;
- return -1;
- }
- fp->is_ready = 1;
- return 0;
-}
-
-
-/**************************
- * HTTP specific routines *
- **************************/
-
-knetFile *khttp_parse_url(const char *fn, const char *mode)
-{
- knetFile *fp;
- char *p, *proxy, *q;
- int l;
- if (strstr(fn, "http://") != fn) return 0;
- // set ->http_host
- for (p = (char*)fn + 7; *p && *p != '/'; ++p);
- l = p - fn - 7;
- fp = calloc(1, sizeof(knetFile));
- fp->http_host = calloc(l + 1, 1);
- strncpy(fp->http_host, fn + 7, l);
- fp->http_host[l] = 0;
- for (q = fp->http_host; *q && *q != ':'; ++q);
- if (*q == ':') *q++ = 0;
- // get http_proxy
- proxy = getenv("http_proxy");
- // set ->host, ->port and ->path
- if (proxy == 0) {
- fp->host = strdup(fp->http_host); // when there is no proxy, server name is identical to http_host name.
- fp->port = strdup(*q? q : "80");
- fp->path = strdup(*p? p : "/");
- } else {
- fp->host = (strstr(proxy, "http://") == proxy)? strdup(proxy + 7) : strdup(proxy);
- for (q = fp->host; *q && *q != ':'; ++q);
- if (*q == ':') *q++ = 0;
- fp->port = strdup(*q? q : "80");
- fp->path = strdup(fn);
- }
- fp->type = KNF_TYPE_HTTP;
- fp->ctrl_fd = fp->fd = -1;
- fp->seek_offset = 0;
- return fp;
-}
-
-int khttp_connect_file(knetFile *fp)
-{
- int ret, l = 0;
- char *buf, *p;
- if (fp->fd != -1) netclose(fp->fd);
- fp->fd = socket_connect(fp->host, fp->port);
- buf = calloc(0x10000, 1); // FIXME: I am lazy... But in principle, 64KB should be large enough.
- l += sprintf(buf + l, "GET %s HTTP/1.0\r\nHost: %s\r\n", fp->path, fp->http_host);
- l += sprintf(buf + l, "Range: bytes=%lld-\r\n", (long long)fp->offset);
- l += sprintf(buf + l, "\r\n");
- netwrite(fp->fd, buf, l);
- l = 0;
- while (netread(fp->fd, buf + l, 1)) { // read HTTP header; FIXME: bad efficiency
- if (buf[l] == '\n' && l >= 3)
- if (strncmp(buf + l - 3, "\r\n\r\n", 4) == 0) break;
- ++l;
- }
- buf[l] = 0;
- if (l < 14) { // prematured header
- netclose(fp->fd);
- fp->fd = -1;
- return -1;
- }
- ret = strtol(buf + 8, &p, 0); // HTTP return code
- if (ret == 200 && fp->offset>0) { // 200 (complete result); then skip beginning of the file
- off_t rest = fp->offset;
- while (rest) {
- off_t l = rest < 0x10000? rest : 0x10000;
- rest -= my_netread(fp->fd, buf, l);
- }
- } else if (ret != 206 && ret != 200) {
- free(buf);
- fprintf(stderr, "[khttp_connect_file] fail to open file (HTTP code: %d).\n", ret);
- netclose(fp->fd);
- fp->fd = -1;
- return -1;
- }
- free(buf);
- fp->is_ready = 1;
- return 0;
-}
-
-/********************
- * Generic routines *
- ********************/
-
-knetFile *knet_open(const char *fn, const char *mode)
-{
- knetFile *fp = 0;
- if (mode[0] != 'r') {
- fprintf(stderr, "[kftp_open] only mode \"r\" is supported.\n");
- return 0;
- }
- if (strstr(fn, "ftp://") == fn) {
- fp = kftp_parse_url(fn, mode);
- if (fp == 0) return 0;
- if (kftp_connect(fp) == -1) {
- knet_close(fp);
- return 0;
- }
- kftp_connect_file(fp);
- } else if (strstr(fn, "http://") == fn) {
- fp = khttp_parse_url(fn, mode);
- if (fp == 0) return 0;
- khttp_connect_file(fp);
- } else { // local file
-#ifdef _WIN32
- /* In windows, O_BINARY is necessary. In Linux/Mac, O_BINARY may
- * be undefined on some systems, although it is defined on my
- * Mac and the Linux I have tested on. */
- int fd = open(fn, O_RDONLY | O_BINARY);
-#else
- int fd = open(fn, O_RDONLY);
-#endif
- if (fd == -1) {
- perror("open");
- return 0;
- }
- fp = (knetFile*)calloc(1, sizeof(knetFile));
- fp->type = KNF_TYPE_LOCAL;
- fp->fd = fd;
- fp->ctrl_fd = -1;
- }
- if (fp && fp->fd == -1) {
- knet_close(fp);
- return 0;
- }
- return fp;
-}
-
-knetFile *knet_dopen(int fd, const char *mode)
-{
- knetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));
- fp->type = KNF_TYPE_LOCAL;
- fp->fd = fd;
- return fp;
-}
-
-off_t knet_read(knetFile *fp, void *buf, off_t len)
-{
- off_t l = 0;
- if (fp->fd == -1) return 0;
- if (fp->type == KNF_TYPE_FTP) {
- if (fp->is_ready == 0) {
- if (!fp->no_reconnect) kftp_reconnect(fp);
- kftp_connect_file(fp);
- }
- } else if (fp->type == KNF_TYPE_HTTP) {
- if (fp->is_ready == 0)
- khttp_connect_file(fp);
- }
- if (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX
- off_t rest = len, curr;
- while (rest) {
- do {
- curr = read(fp->fd, buf + l, rest);
- } while (curr < 0 && EINTR == errno);
- if (curr < 0) return -1;
- if (curr == 0) break;
- l += curr; rest -= curr;
- }
- } else l = my_netread(fp->fd, buf, len);
- fp->offset += l;
- return l;
-}
-
-off_t knet_seek(knetFile *fp, int64_t off, int whence)
-{
- if (whence == SEEK_SET && off == fp->offset) return 0;
- if (fp->type == KNF_TYPE_LOCAL) {
- /* Be aware that lseek() returns the offset after seeking,
- * while fseek() returns zero on success. */
- off_t offset = lseek(fp->fd, off, whence);
- if (offset == -1) {
- // Be silent, it is OK for knet_seek to fail when the file is streamed
- // fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
- return -1;
- }
- fp->offset = offset;
- return 0;
- }
- else if (fp->type == KNF_TYPE_FTP)
- {
- if (whence==SEEK_CUR)
- fp->offset += off;
- else if (whence==SEEK_SET)
- fp->offset = off;
- else if ( whence==SEEK_END)
- fp->offset = fp->file_size+off;
- fp->is_ready = 0;
- return 0;
- }
- else if (fp->type == KNF_TYPE_HTTP)
- {
- if (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?
- fprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\n");
- errno = ESPIPE;
- return -1;
- }
- if (whence==SEEK_CUR)
- fp->offset += off;
- else if (whence==SEEK_SET)
- fp->offset = off;
- fp->is_ready = 0;
- return 0;
- }
- errno = EINVAL;
- fprintf(stderr,"[knet_seek] %s\n", strerror(errno));
- return -1;
-}
-
-int knet_close(knetFile *fp)
-{
- if (fp == 0) return 0;
- if (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific
- if (fp->fd != -1) {
- /* On Linux/Mac, netclose() is an alias of close(), but on
- * Windows, it is an alias of closesocket(). */
- if (fp->type == KNF_TYPE_LOCAL) close(fp->fd);
- else netclose(fp->fd);
- }
- free(fp->host); free(fp->port);
- free(fp->response); free(fp->retr); // FTP specific
- free(fp->path); free(fp->http_host); // HTTP specific
- free(fp);
- return 0;
-}
-
-#ifdef KNETFILE_MAIN
-int main(void)
-{
- char *buf;
- knetFile *fp;
- int type = 4, l;
-#ifdef _WIN32
- knet_win32_init();
-#endif
- buf = calloc(0x100000, 1);
- if (type == 0) {
- fp = knet_open("knetfile.c", "r");
- knet_seek(fp, 1000, SEEK_SET);
- } else if (type == 1) { // NCBI FTP, large file
- fp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");
- knet_seek(fp, 2500000000ll, SEEK_SET);
- l = knet_read(fp, buf, 255);
- } else if (type == 2) {
- fp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");
- knet_seek(fp, 1000, SEEK_SET);
- } else if (type == 3) {
- fp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");
- knet_seek(fp, 1000, SEEK_SET);
- } else if (type == 4) {
- fp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");
- knet_read(fp, buf, 10000);
- knet_seek(fp, 20000, SEEK_SET);
- knet_seek(fp, 10000, SEEK_SET);
- l = knet_read(fp, buf+10000, 10000000) + 10000;
- }
- if (type != 4 && type != 1) {
- knet_read(fp, buf, 255);
- buf[255] = 0;
- printf("%s\n", buf);
- } else write(fileno(stdout), buf, l);
- knet_close(fp);
- free(buf);
- return 0;
-}
-#endif
diff --git a/src/samtools-0.1.18/knetfile.h b/src/samtools-0.1.18/knetfile.h
deleted file mode 100644
index 0a0e66f..0000000
--- a/src/samtools-0.1.18/knetfile.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef KNETFILE_H
-#define KNETFILE_H
-
-#include <stdint.h>
-#include <fcntl.h>
-
-#ifndef _WIN32
-#define netread(fd, ptr, len) read(fd, ptr, len)
-#define netwrite(fd, ptr, len) write(fd, ptr, len)
-#define netclose(fd) close(fd)
-#else
-#include <winsock2.h>
-#define netread(fd, ptr, len) recv(fd, ptr, len, 0)
-#define netwrite(fd, ptr, len) send(fd, ptr, len, 0)
-#define netclose(fd) closesocket(fd)
-#endif
-
-// FIXME: currently I/O is unbuffered
-
-#define KNF_TYPE_LOCAL 1
-#define KNF_TYPE_FTP 2
-#define KNF_TYPE_HTTP 3
-
-typedef struct knetFile_s {
- int type, fd;
- int64_t offset;
- char *host, *port;
-
- // the following are for FTP only
- int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
- char *response, *retr, *size_cmd;
- int64_t seek_offset; // for lazy seek
- int64_t file_size;
-
- // the following are for HTTP only
- char *path, *http_host;
-} knetFile;
-
-#define knet_tell(fp) ((fp)->offset)
-#define knet_fileno(fp) ((fp)->fd)
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef _WIN32
- int knet_win32_init();
- void knet_win32_destroy();
-#endif
-
- knetFile *knet_open(const char *fn, const char *mode);
-
- /*
- This only works with local files.
- */
- knetFile *knet_dopen(int fd, const char *mode);
-
- /*
- If ->is_ready==0, this routine updates ->fd; otherwise, it simply
- reads from ->fd.
- */
- off_t knet_read(knetFile *fp, void *buf, off_t len);
-
- /*
- This routine only sets ->offset and ->is_ready=0. It does not
- communicate with the FTP server.
- */
- off_t knet_seek(knetFile *fp, int64_t off, int whence);
- int knet_close(knetFile *fp);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/samtools-0.1.18/kprobaln.c b/src/samtools-0.1.18/kprobaln.c
deleted file mode 100644
index 894a2ae..0000000
--- a/src/samtools-0.1.18/kprobaln.c
+++ /dev/null
@@ -1,278 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2003-2006, 2008-2010, by Heng Li <lh3lh3 at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <stdint.h>
-#include <math.h>
-#include "kprobaln.h"
-
-/*****************************************
- * Probabilistic banded glocal alignment *
- *****************************************/
-
-#define EI .25
-#define EM .33333333333
-
-static float g_qual2prob[256];
-
-#define set_u(u, b, i, k) { int x=(i)-(b); x=x>0?x:0; (u)=((k)-x+1)*3; }
-
-kpa_par_t kpa_par_def = { 0.001, 0.1, 10 };
-kpa_par_t kpa_par_alt = { 0.0001, 0.01, 10 };
-
-/*
- The topology of the profile HMM:
-
- /\ /\ /\ /\
- I[1] I[k-1] I[k] I[L]
- ^ \ \ ^ \ ^ \ \ ^
- | \ \ | \ | \ \ |
- M[0] M[1] -> ... -> M[k-1] -> M[k] -> ... -> M[L] M[L+1]
- \ \/ \/ \/ /
- \ /\ /\ /\ /
- -> D[k-1] -> D[k] ->
-
- M[0] points to every {M,I}[k] and every {M,I}[k] points M[L+1].
-
- On input, _ref is the reference sequence and _query is the query
- sequence. Both are sequences of 0/1/2/3/4 where 4 stands for an
- ambiguous residue. iqual is the base quality. c sets the gap open
- probability, gap extension probability and band width.
-
- On output, state and q are arrays of length l_query. The higher 30
- bits give the reference position the query base is matched to and the
- lower two bits can be 0 (an alignment match) or 1 (an
- insertion). q[i] gives the phred scaled posterior probability of
- state[i] being wrong.
- */
-int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, const uint8_t *iqual,
- const kpa_par_t *c, int *state, uint8_t *q)
-{
- double **f, **b = 0, *s, m[9], sI, sM, bI, bM, pb;
- float *qual, *_qual;
- const uint8_t *ref, *query;
- int bw, bw2, i, k, is_diff = 0, is_backward = 1, Pr;
-
- /*** initialization ***/
- is_backward = state && q? 1 : 0;
- ref = _ref - 1; query = _query - 1; // change to 1-based coordinate
- bw = l_ref > l_query? l_ref : l_query;
- if (bw > c->bw) bw = c->bw;
- if (bw < abs(l_ref - l_query)) bw = abs(l_ref - l_query);
- bw2 = bw * 2 + 1;
- // allocate the forward and backward matrices f[][] and b[][] and the scaling array s[]
- f = calloc(l_query+1, sizeof(void*));
- if (is_backward) b = calloc(l_query+1, sizeof(void*));
- for (i = 0; i <= l_query; ++i) {
- f[i] = calloc(bw2 * 3 + 6, sizeof(double)); // FIXME: this is over-allocated for very short seqs
- if (is_backward) b[i] = calloc(bw2 * 3 + 6, sizeof(double));
- }
- s = calloc(l_query+2, sizeof(double)); // s[] is the scaling factor to avoid underflow
- // initialize qual
- _qual = calloc(l_query, sizeof(float));
- if (g_qual2prob[0] == 0)
- for (i = 0; i < 256; ++i)
- g_qual2prob[i] = pow(10, -i/10.);
- for (i = 0; i < l_query; ++i) _qual[i] = g_qual2prob[iqual? iqual[i] : 30];
- qual = _qual - 1;
- // initialize transition probability
- sM = sI = 1. / (2 * l_query + 2); // the value here seems not to affect results; FIXME: need proof
- m[0*3+0] = (1 - c->d - c->d) * (1 - sM); m[0*3+1] = m[0*3+2] = c->d * (1 - sM);
- m[1*3+0] = (1 - c->e) * (1 - sI); m[1*3+1] = c->e * (1 - sI); m[1*3+2] = 0.;
- m[2*3+0] = 1 - c->e; m[2*3+1] = 0.; m[2*3+2] = c->e;
- bM = (1 - c->d) / l_ref; bI = c->d / l_ref; // (bM+bI)*l_ref==1
- /*** forward ***/
- // f[0]
- set_u(k, bw, 0, 0);
- f[0][k] = s[0] = 1.;
- { // f[1]
- double *fi = f[1], sum;
- int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1, _beg, _end;
- for (k = beg, sum = 0.; k <= end; ++k) {
- int u;
- double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] * EM;
- set_u(u, bw, 1, k);
- fi[u+0] = e * bM; fi[u+1] = EI * bI;
- sum += fi[u] + fi[u+1];
- }
- // rescale
- s[1] = sum;
- set_u(_beg, bw, 1, beg); set_u(_end, bw, 1, end); _end += 2;
- for (k = _beg; k <= _end; ++k) fi[k] /= sum;
- }
- // f[2..l_query]
- for (i = 2; i <= l_query; ++i) {
- double *fi = f[i], *fi1 = f[i-1], sum, qli = qual[i];
- int beg = 1, end = l_ref, x, _beg, _end;
- uint8_t qyi = query[i];
- x = i - bw; beg = beg > x? beg : x; // band start
- x = i + bw; end = end < x? end : x; // band end
- for (k = beg, sum = 0.; k <= end; ++k) {
- int u, v11, v01, v10;
- double e;
- e = (ref[k] > 3 || qyi > 3)? 1. : ref[k] == qyi? 1. - qli : qli * EM;
- set_u(u, bw, i, k); set_u(v11, bw, i-1, k-1); set_u(v10, bw, i-1, k); set_u(v01, bw, i, k-1);
- fi[u+0] = e * (m[0] * fi1[v11+0] + m[3] * fi1[v11+1] + m[6] * fi1[v11+2]);
- fi[u+1] = EI * (m[1] * fi1[v10+0] + m[4] * fi1[v10+1]);
- fi[u+2] = m[2] * fi[v01+0] + m[8] * fi[v01+2];
- sum += fi[u] + fi[u+1] + fi[u+2];
-// fprintf(stderr, "F (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, fi[u], fi[u+1], fi[u+2]); // DEBUG
- }
- // rescale
- s[i] = sum;
- set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2;
- for (k = _beg, sum = 1./sum; k <= _end; ++k) fi[k] *= sum;
- }
- { // f[l_query+1]
- double sum;
- for (k = 1, sum = 0.; k <= l_ref; ++k) {
- int u;
- set_u(u, bw, l_query, k);
- if (u < 3 || u >= bw2*3+3) continue;
- sum += f[l_query][u+0] * sM + f[l_query][u+1] * sI;
- }
- s[l_query+1] = sum; // the last scaling factor
- }
- { // compute likelihood
- double p = 1., Pr1 = 0.;
- for (i = 0; i <= l_query + 1; ++i) {
- p *= s[i];
- if (p < 1e-100) Pr1 += -4.343 * log(p), p = 1.;
- }
- Pr1 += -4.343 * log(p * l_ref * l_query);
- Pr = (int)(Pr1 + .499);
- if (!is_backward) { // skip backward and MAP
- for (i = 0; i <= l_query; ++i) free(f[i]);
- free(f); free(s); free(_qual);
- return Pr;
- }
- }
- /*** backward ***/
- // b[l_query] (b[l_query+1][0]=1 and thus \tilde{b}[][]=1/s[l_query+1]; this is where s[l_query+1] comes from)
- for (k = 1; k <= l_ref; ++k) {
- int u;
- double *bi = b[l_query];
- set_u(u, bw, l_query, k);
- if (u < 3 || u >= bw2*3+3) continue;
- bi[u+0] = sM / s[l_query] / s[l_query+1]; bi[u+1] = sI / s[l_query] / s[l_query+1];
- }
- // b[l_query-1..1]
- for (i = l_query - 1; i >= 1; --i) {
- int beg = 1, end = l_ref, x, _beg, _end;
- double *bi = b[i], *bi1 = b[i+1], y = (i > 1), qli1 = qual[i+1];
- uint8_t qyi1 = query[i+1];
- x = i - bw; beg = beg > x? beg : x;
- x = i + bw; end = end < x? end : x;
- for (k = end; k >= beg; --k) {
- int u, v11, v01, v10;
- double e;
- set_u(u, bw, i, k); set_u(v11, bw, i+1, k+1); set_u(v10, bw, i+1, k); set_u(v01, bw, i, k+1);
- e = (k >= l_ref? 0 : (ref[k+1] > 3 || qyi1 > 3)? 1. : ref[k+1] == qyi1? 1. - qli1 : qli1 * EM) * bi1[v11];
- bi[u+0] = e * m[0] + EI * m[1] * bi1[v10+1] + m[2] * bi[v01+2]; // bi1[v11] has been foled into e.
- bi[u+1] = e * m[3] + EI * m[4] * bi1[v10+1];
- bi[u+2] = (e * m[6] + m[8] * bi[v01+2]) * y;
-// fprintf(stderr, "B (%d,%d;%d): %lg,%lg,%lg\n", i, k, u, bi[u], bi[u+1], bi[u+2]); // DEBUG
- }
- // rescale
- set_u(_beg, bw, i, beg); set_u(_end, bw, i, end); _end += 2;
- for (k = _beg, y = 1./s[i]; k <= _end; ++k) bi[k] *= y;
- }
- { // b[0]
- int beg = 1, end = l_ref < bw + 1? l_ref : bw + 1;
- double sum = 0.;
- for (k = end; k >= beg; --k) {
- int u;
- double e = (ref[k] > 3 || query[1] > 3)? 1. : ref[k] == query[1]? 1. - qual[1] : qual[1] * EM;
- set_u(u, bw, 1, k);
- if (u < 3 || u >= bw2*3+3) continue;
- sum += e * b[1][u+0] * bM + EI * b[1][u+1] * bI;
- }
- set_u(k, bw, 0, 0);
- pb = b[0][k] = sum / s[0]; // if everything works as is expected, pb == 1.0
- }
- is_diff = fabs(pb - 1.) > 1e-7? 1 : 0;
- /*** MAP ***/
- for (i = 1; i <= l_query; ++i) {
- double sum = 0., *fi = f[i], *bi = b[i], max = 0.;
- int beg = 1, end = l_ref, x, max_k = -1;
- x = i - bw; beg = beg > x? beg : x;
- x = i + bw; end = end < x? end : x;
- for (k = beg; k <= end; ++k) {
- int u;
- double z;
- set_u(u, bw, i, k);
- z = fi[u+0] * bi[u+0]; if (z > max) max = z, max_k = (k-1)<<2 | 0; sum += z;
- z = fi[u+1] * bi[u+1]; if (z > max) max = z, max_k = (k-1)<<2 | 1; sum += z;
- }
- max /= sum; sum *= s[i]; // if everything works as is expected, sum == 1.0
- if (state) state[i-1] = max_k;
- if (q) k = (int)(-4.343 * log(1. - max) + .499), q[i-1] = k > 100? 99 : k;
-#ifdef _MAIN
- fprintf(stderr, "(%.10lg,%.10lg) (%d,%d:%c,%c:%d) %lg\n", pb, sum, i-1, max_k>>2,
- "ACGT"[query[i]], "ACGT"[ref[(max_k>>2)+1]], max_k&3, max); // DEBUG
-#endif
- }
- /*** free ***/
- for (i = 0; i <= l_query; ++i) {
- free(f[i]); free(b[i]);
- }
- free(f); free(b); free(s); free(_qual);
- return Pr;
-}
-
-#ifdef _MAIN
-#include <unistd.h>
-int main(int argc, char *argv[])
-{
- uint8_t conv[256], *iqual, *ref, *query;
- int c, l_ref, l_query, i, q = 30, b = 10, P;
- while ((c = getopt(argc, argv, "b:q:")) >= 0) {
- switch (c) {
- case 'b': b = atoi(optarg); break;
- case 'q': q = atoi(optarg); break;
- }
- }
- if (optind + 2 > argc) {
- fprintf(stderr, "Usage: %s [-q %d] [-b %d] <ref> <query>\n", argv[0], q, b); // example: acttc attc
- return 1;
- }
- memset(conv, 4, 256);
- conv['a'] = conv['A'] = 0; conv['c'] = conv['C'] = 1;
- conv['g'] = conv['G'] = 2; conv['t'] = conv['T'] = 3;
- ref = (uint8_t*)argv[optind]; query = (uint8_t*)argv[optind+1];
- l_ref = strlen((char*)ref); l_query = strlen((char*)query);
- for (i = 0; i < l_ref; ++i) ref[i] = conv[ref[i]];
- for (i = 0; i < l_query; ++i) query[i] = conv[query[i]];
- iqual = malloc(l_query);
- memset(iqual, q, l_query);
- kpa_par_def.bw = b;
- P = kpa_glocal(ref, l_ref, query, l_query, iqual, &kpa_par_alt, 0, 0);
- fprintf(stderr, "%d\n", P);
- free(iqual);
- return 0;
-}
-#endif
diff --git a/src/samtools-0.1.18/kprobaln.h b/src/samtools-0.1.18/kprobaln.h
deleted file mode 100644
index 0357dcc..0000000
--- a/src/samtools-0.1.18/kprobaln.h
+++ /dev/null
@@ -1,49 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2003-2006, 2008, 2009 by Heng Li <lh3 at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-#ifndef LH3_KPROBALN_H_
-#define LH3_KPROBALN_H_
-
-#include <stdint.h>
-
-typedef struct {
- float d, e;
- int bw;
-} kpa_par_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- int kpa_glocal(const uint8_t *_ref, int l_ref, const uint8_t *_query, int l_query, const uint8_t *iqual,
- const kpa_par_t *c, int *state, uint8_t *q);
-
-#ifdef __cplusplus
-}
-#endif
-
-extern kpa_par_t kpa_par_def, kpa_par_alt;
-
-#endif
diff --git a/src/samtools-0.1.18/kseq.h b/src/samtools-0.1.18/kseq.h
deleted file mode 100644
index 0bbc7dc..0000000
--- a/src/samtools-0.1.18/kseq.h
+++ /dev/null
@@ -1,224 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor at live.co.uk>
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/* Last Modified: 18AUG2011 */
-
-#ifndef AC_KSEQ_H
-#define AC_KSEQ_H
-
-#include <ctype.h>
-#include <string.h>
-#include <stdlib.h>
-
-#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
-#define KS_SEP_TAB 1 // isspace() && !' '
-#define KS_SEP_MAX 1
-
-#define __KS_TYPE(type_t) \
- typedef struct __kstream_t { \
- unsigned char *buf; \
- int begin, end, is_eof; \
- type_t f; \
- } kstream_t;
-
-#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
-#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
-
-#define __KS_BASIC(type_t, __bufsize) \
- static inline kstream_t *ks_init(type_t f) \
- { \
- kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \
- ks->f = f; \
- ks->buf = malloc(__bufsize); \
- return ks; \
- } \
- static inline void ks_destroy(kstream_t *ks) \
- { \
- if (ks) { \
- free(ks->buf); \
- free(ks); \
- } \
- }
-
-#define __KS_GETC(__read, __bufsize) \
- static inline int ks_getc(kstream_t *ks) \
- { \
- if (ks->is_eof && ks->begin >= ks->end) return -1; \
- if (ks->begin >= ks->end) { \
- ks->begin = 0; \
- ks->end = __read(ks->f, ks->buf, __bufsize); \
- if (ks->end < __bufsize) ks->is_eof = 1; \
- if (ks->end == 0) return -1; \
- } \
- return (int)ks->buf[ks->begin++]; \
- }
-
-#ifndef KSTRING_T
-#define KSTRING_T kstring_t
-typedef struct __kstring_t {
- size_t l, m;
- char *s;
-} kstring_t;
-#endif
-
-#ifndef kroundup32
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-#endif
-
-#define __KS_GETUNTIL(__read, __bufsize) \
- static int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \
- { \
- if (dret) *dret = 0; \
- str->l = append? str->l : 0; \
- if (ks->begin >= ks->end && ks->is_eof) return -1; \
- for (;;) { \
- int i; \
- if (ks->begin >= ks->end) { \
- if (!ks->is_eof) { \
- ks->begin = 0; \
- ks->end = __read(ks->f, ks->buf, __bufsize); \
- if (ks->end < __bufsize) ks->is_eof = 1; \
- if (ks->end == 0) break; \
- } else break; \
- } \
- if (delimiter > KS_SEP_MAX) { \
- for (i = ks->begin; i < ks->end; ++i) \
- if (ks->buf[i] == delimiter) break; \
- } else if (delimiter == KS_SEP_SPACE) { \
- for (i = ks->begin; i < ks->end; ++i) \
- if (isspace(ks->buf[i])) break; \
- } else if (delimiter == KS_SEP_TAB) { \
- for (i = ks->begin; i < ks->end; ++i) \
- if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
- } else i = 0; /* never come to here! */ \
- if (str->m - str->l < i - ks->begin + 1) { \
- str->m = str->l + (i - ks->begin) + 1; \
- kroundup32(str->m); \
- str->s = (char*)realloc(str->s, str->m); \
- } \
- memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
- str->l = str->l + (i - ks->begin); \
- ks->begin = i + 1; \
- if (i < ks->end) { \
- if (dret) *dret = ks->buf[i]; \
- break; \
- } \
- } \
- if (str->s == 0) { \
- str->m = 1; \
- str->s = (char*)calloc(1, 1); \
- } \
- str->s[str->l] = '\0'; \
- return str->l; \
- } \
- static inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
- { return ks_getuntil2(ks, delimiter, str, dret, 0); }
-
-#define KSTREAM_INIT(type_t, __read, __bufsize) \
- __KS_TYPE(type_t) \
- __KS_BASIC(type_t, __bufsize) \
- __KS_GETC(__read, __bufsize) \
- __KS_GETUNTIL(__read, __bufsize)
-
-#define __KSEQ_BASIC(type_t) \
- static inline kseq_t *kseq_init(type_t fd) \
- { \
- kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t)); \
- s->f = ks_init(fd); \
- return s; \
- } \
- static inline void kseq_rewind(kseq_t *ks) \
- { \
- ks->last_char = 0; \
- ks->f->is_eof = ks->f->begin = ks->f->end = 0; \
- } \
- static inline void kseq_destroy(kseq_t *ks) \
- { \
- if (!ks) return; \
- free(ks->name.s); free(ks->comment.s); free(ks->seq.s); free(ks->qual.s); \
- ks_destroy(ks->f); \
- free(ks); \
- }
-
-/* Return value:
- >=0 length of the sequence (normal)
- -1 end-of-file
- -2 truncated quality string
- */
-#define __KSEQ_READ \
- static int kseq_read(kseq_t *seq) \
- { \
- int c; \
- kstream_t *ks = seq->f; \
- if (seq->last_char == 0) { /* then jump to the next header line */ \
- while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \
- if (c == -1) return -1; /* end of file */ \
- seq->last_char = c; \
- } /* else: the first header char has been read in the previous call */ \
- seq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \
- if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \
- if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0); /* read FASTA/Q comment */ \
- if (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \
- seq->seq.m = 256; \
- seq->seq.s = (char*)malloc(seq->seq.m); \
- } \
- while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
- seq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \
- ks_getuntil2(ks, '\n', &seq->seq, 0, 1); /* read the rest of the line */ \
- } \
- if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */ \
- if (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \
- seq->seq.m = seq->seq.l + 2; \
- kroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \
- seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
- } \
- seq->seq.s[seq->seq.l] = 0; /* null terminated string */ \
- if (c != '+') return seq->seq.l; /* FASTA */ \
- if (seq->qual.m < seq->seq.m) { /* allocate memory for qual in case insufficient */ \
- seq->qual.m = seq->seq.m; \
- seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \
- } \
- while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
- if (c == -1) return -2; /* error: no quality string */ \
- while (ks_getuntil2(ks, '\n', &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \
- seq->last_char = 0; /* we have not come to the next header line */ \
- if (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \
- return seq->seq.l; \
- }
-
-#define __KSEQ_TYPE(type_t) \
- typedef struct { \
- kstring_t name, comment, seq, qual; \
- int last_char; \
- kstream_t *f; \
- } kseq_t;
-
-#define KSEQ_INIT(type_t, __read) \
- KSTREAM_INIT(type_t, __read, 16384) \
- __KSEQ_TYPE(type_t) \
- __KSEQ_BASIC(type_t) \
- __KSEQ_READ
-
-#endif
diff --git a/src/samtools-0.1.18/ksort.h b/src/samtools-0.1.18/ksort.h
deleted file mode 100644
index fa850ab..0000000
--- a/src/samtools-0.1.18/ksort.h
+++ /dev/null
@@ -1,281 +0,0 @@
-/* The MIT License
-
- Copyright (c) 2008 Genome Research Ltd (GRL).
-
- Permission is hereby granted, free of charge, to any person obtaining
- a copy of this software and associated documentation files (the
- "Software"), to deal in the Software without restriction, including
- without limitation the rights to use, copy, modify, merge, publish,
- distribute, sublicense, and/or sell copies of the Software, and to
- permit persons to whom the Software is furnished to do so, subject to
- the following conditions:
-
- The above copyright notice and this permission notice shall be
- included in all copies or substantial portions of the Software.
-
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- SOFTWARE.
-*/
-
-/* Contact: Heng Li <lh3 at sanger.ac.uk> */
-
-/*
- 2008-11-16 (0.1.4):
-
- * Fixed a bug in introsort() that happens in rare cases.
-
- 2008-11-05 (0.1.3):
-
- * Fixed a bug in introsort() for complex comparisons.
-
- * Fixed a bug in mergesort(). The previous version is not stable.
-
- 2008-09-15 (0.1.2):
-
- * Accelerated introsort. On my Mac (not on another Linux machine),
- my implementation is as fast as std::sort on random input.
-
- * Added combsort and in introsort, switch to combsort if the
- recursion is too deep.
-
- 2008-09-13 (0.1.1):
-
- * Added k-small algorithm
-
- 2008-09-05 (0.1.0):
-
- * Initial version
-
-*/
-
-#ifndef AC_KSORT_H
-#define AC_KSORT_H
-
-#include <stdlib.h>
-#include <string.h>
-
-typedef struct {
- void *left, *right;
- int depth;
-} ks_isort_stack_t;
-
-#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }
-
-#define KSORT_INIT(name, type_t, __sort_lt) \
- void ks_mergesort_##name(size_t n, type_t array[], type_t temp[]) \
- { \
- type_t *a2[2], *a, *b; \
- int curr, shift; \
- \
- a2[0] = array; \
- a2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n); \
- for (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) { \
- a = a2[curr]; b = a2[1-curr]; \
- if (shift == 0) { \
- type_t *p = b, *i, *eb = a + n; \
- for (i = a; i < eb; i += 2) { \
- if (i == eb - 1) *p++ = *i; \
- else { \
- if (__sort_lt(*(i+1), *i)) { \
- *p++ = *(i+1); *p++ = *i; \
- } else { \
- *p++ = *i; *p++ = *(i+1); \
- } \
- } \
- } \
- } else { \
- size_t i, step = 1ul<<shift; \
- for (i = 0; i < n; i += step<<1) { \
- type_t *p, *j, *k, *ea, *eb; \
- if (n < i + step) { \
- ea = a + n; eb = a; \
- } else { \
- ea = a + i + step; \
- eb = a + (n < i + (step<<1)? n : i + (step<<1)); \
- } \
- j = a + i; k = a + i + step; p = b + i; \
- while (j < ea && k < eb) { \
- if (__sort_lt(*k, *j)) *p++ = *k++; \
- else *p++ = *j++; \
- } \
- while (j < ea) *p++ = *j++; \
- while (k < eb) *p++ = *k++; \
- } \
- } \
- curr = 1 - curr; \
- } \
- if (curr == 1) { \
- type_t *p = a2[0], *i = a2[1], *eb = array + n; \
- for (; p < eb; ++i) *p++ = *i; \
- } \
- if (temp == 0) free(a2[1]); \
- } \
- void ks_heapadjust_##name(size_t i, size_t n, type_t l[]) \
- { \
- size_t k = i; \
- type_t tmp = l[i]; \
- while ((k = (k << 1) + 1) < n) { \
- if (k != n - 1 && __sort_lt(l[k], l[k+1])) ++k; \
- if (__sort_lt(l[k], tmp)) break; \
- l[i] = l[k]; i = k; \
- } \
- l[i] = tmp; \
- } \
- void ks_heapmake_##name(size_t lsize, type_t l[]) \
- { \
- size_t i; \
- for (i = (lsize >> 1) - 1; i != (size_t)(-1); --i) \
- ks_heapadjust_##name(i, lsize, l); \
- } \
- void ks_heapsort_##name(size_t lsize, type_t l[]) \
- { \
- size_t i; \
- for (i = lsize - 1; i > 0; --i) { \
- type_t tmp; \
- tmp = *l; *l = l[i]; l[i] = tmp; ks_heapadjust_##name(0, i, l); \
- } \
- } \
- inline void __ks_insertsort_##name(type_t *s, type_t *t) \
- { \
- type_t *i, *j, swap_tmp; \
- for (i = s + 1; i < t; ++i) \
- for (j = i; j > s && __sort_lt(*j, *(j-1)); --j) { \
- swap_tmp = *j; *j = *(j-1); *(j-1) = swap_tmp; \
- } \
- } \
- void ks_combsort_##name(size_t n, type_t a[]) \
- { \
- const double shrink_factor = 1.2473309501039786540366528676643; \
- int do_swap; \
- size_t gap = n; \
- type_t tmp, *i, *j; \
- do { \
- if (gap > 2) { \
- gap = (size_t)(gap / shrink_factor); \
- if (gap == 9 || gap == 10) gap = 11; \
- } \
- do_swap = 0; \
- for (i = a; i < a + n - gap; ++i) { \
- j = i + gap; \
- if (__sort_lt(*j, *i)) { \
- tmp = *i; *i = *j; *j = tmp; \
- do_swap = 1; \
- } \
- } \
- } while (do_swap || gap > 2); \
- if (gap != 1) __ks_insertsort_##name(a, a + n); \
- } \
- void ks_introsort_##name(size_t n, type_t a[]) \
- { \
- int d; \
- ks_isort_stack_t *top, *stack; \
- type_t rp, swap_tmp; \
- type_t *s, *t, *i, *j, *k; \
- \
- if (n < 1) return; \
- else if (n == 2) { \
- if (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \
- return; \
- } \
- for (d = 2; 1ul<<d < n; ++d); \
- stack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \
- top = stack; s = a; t = a + (n-1); d <<= 1; \
- while (1) { \
- if (s < t) { \
- if (--d == 0) { \
- ks_combsort_##name(t - s + 1, s); \
- t = s; \
- continue; \
- } \
- i = s; j = t; k = i + ((j-i)>>1) + 1; \
- if (__sort_lt(*k, *i)) { \
- if (__sort_lt(*k, *j)) k = j; \
- } else k = __sort_lt(*j, *i)? i : j; \
- rp = *k; \
- if (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; } \
- for (;;) { \
- do ++i; while (__sort_lt(*i, rp)); \
- do --j; while (i <= j && __sort_lt(rp, *j)); \
- if (j <= i) break; \
- swap_tmp = *i; *i = *j; *j = swap_tmp; \
- } \
- swap_tmp = *i; *i = *t; *t = swap_tmp; \
- if (i-s > t-i) { \
- if (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \
- s = t-i > 16? i+1 : t; \
- } else { \
- if (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \
- t = i-s > 16? i-1 : s; \
- } \
- } else { \
- if (top == stack) { \
- free(stack); \
- __ks_insertsort_##name(a, a+n); \
- return; \
- } else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \
- } \
- } \
- } \
- /* This function is adapted from: http://ndevilla.free.fr/median/ */ \
- /* 0 <= kk < n */ \
- type_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk) \
- { \
- type_t *low, *high, *k, *ll, *hh, *mid; \
- low = arr; high = arr + n - 1; k = arr + kk; \
- for (;;) { \
- if (high <= low) return *k; \
- if (high == low + 1) { \
- if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
- return *k; \
- } \
- mid = low + (high - low) / 2; \
- if (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \
- if (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \
- if (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low); \
- KSORT_SWAP(type_t, *mid, *(low+1)); \
- ll = low + 1; hh = high; \
- for (;;) { \
- do ++ll; while (__sort_lt(*ll, *low)); \
- do --hh; while (__sort_lt(*low, *hh)); \
- if (hh < ll) break; \
- KSORT_SWAP(type_t, *ll, *hh); \
- } \
- KSORT_SWAP(type_t, *low, *hh); \
- if (hh <= k) low = ll; \
- if (hh >= k) high = hh - 1; \
- } \
- } \
- void ks_shuffle_##name(size_t n, type_t a[]) \
- { \
- int i, j; \
- for (i = n; i > 1; --i) { \
- type_t tmp; \
- j = (int)(drand48() * i); \
- tmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp; \
- } \
- }
-
-#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)
-#define ks_introsort(name, n, a) ks_introsort_##name(n, a)
-#define ks_combsort(name, n, a) ks_combsort_##name(n, a)
-#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)
-#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)
-#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)
-#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)
-#define ks_shuffle(name, n, a) ks_shuffle_##name(n, a)
-
-#define ks_lt_generic(a, b) ((a) < (b))
-#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)
-
-typedef const char *ksstr_t;
-
-#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)
-#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)
-
-#endif
diff --git a/src/samtools-0.1.18/kstring.c b/src/samtools-0.1.18/kstring.c
deleted file mode 100644
index b2a0dab..0000000
--- a/src/samtools-0.1.18/kstring.c
+++ /dev/null
@@ -1,212 +0,0 @@
-#include <stdarg.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-#include <stdint.h>
-#include "kstring.h"
-
-int ksprintf(kstring_t *s, const char *fmt, ...)
-{
- va_list ap;
- int l;
- va_start(ap, fmt);
- l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); // This line does not work with glibc 2.0. See `man snprintf'.
- va_end(ap);
- if (l + 1 > s->m - s->l) {
- s->m = s->l + l + 2;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- va_start(ap, fmt);
- l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap);
- }
- va_end(ap);
- s->l += l;
- return l;
-}
-
-char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux)
-{
- const char *p, *start;
- if (sep) { // set up the table
- if (str == 0 && (aux->tab[0]&1)) return 0; // no need to set up if we have finished
- aux->finished = 0;
- if (sep[1]) {
- aux->sep = -1;
- aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0;
- for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f);
- } else aux->sep = sep[0];
- }
- if (aux->finished) return 0;
- else if (str) aux->p = str - 1, aux->finished = 0;
- if (aux->sep < 0) {
- for (p = start = aux->p + 1; *p; ++p)
- if (aux->tab[*p>>6]>>(*p&0x3f)&1) break;
- } else {
- for (p = start = aux->p + 1; *p; ++p)
- if (*p == aux->sep) break;
- }
- aux->p = p; // end of token
- if (*p == 0) aux->finished = 1; // no more tokens
- return (char*)start;
-}
-
-// s MUST BE a null terminated string; l = strlen(s)
-int ksplit_core(char *s, int delimiter, int *_max, int **_offsets)
-{
- int i, n, max, last_char, last_start, *offsets, l;
- n = 0; max = *_max; offsets = *_offsets;
- l = strlen(s);
-
-#define __ksplit_aux do { \
- if (_offsets) { \
- s[i] = 0; \
- if (n == max) { \
- max = max? max<<1 : 2; \
- offsets = (int*)realloc(offsets, sizeof(int) * max); \
- } \
- offsets[n++] = last_start; \
- } else ++n; \
- } while (0)
-
- for (i = 0, last_char = last_start = 0; i <= l; ++i) {
- if (delimiter == 0) {
- if (isspace(s[i]) || s[i] == 0) {
- if (isgraph(last_char)) __ksplit_aux; // the end of a field
- } else {
- if (isspace(last_char) || last_char == 0) last_start = i;
- }
- } else {
- if (s[i] == delimiter || s[i] == 0) {
- if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field
- } else {
- if (last_char == delimiter || last_char == 0) last_start = i;
- }
- }
- last_char = s[i];
- }
- *_max = max; *_offsets = offsets;
- return n;
-}
-
-/**********************
- * Boyer-Moore search *
- **********************/
-
-typedef unsigned char ubyte_t;
-
-// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html
-static int *ksBM_prep(const ubyte_t *pat, int m)
-{
- int i, *suff, *prep, *bmGs, *bmBc;
- prep = calloc(m + 256, sizeof(int));
- bmGs = prep; bmBc = prep + m;
- { // preBmBc()
- for (i = 0; i < 256; ++i) bmBc[i] = m;
- for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1;
- }
- suff = calloc(m, sizeof(int));
- { // suffixes()
- int f = 0, g;
- suff[m - 1] = m;
- g = m - 1;
- for (i = m - 2; i >= 0; --i) {
- if (i > g && suff[i + m - 1 - f] < i - g)
- suff[i] = suff[i + m - 1 - f];
- else {
- if (i < g) g = i;
- f = i;
- while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g;
- suff[i] = f - g;
- }
- }
- }
- { // preBmGs()
- int j = 0;
- for (i = 0; i < m; ++i) bmGs[i] = m;
- for (i = m - 1; i >= 0; --i)
- if (suff[i] == i + 1)
- for (; j < m - 1 - i; ++j)
- if (bmGs[j] == m)
- bmGs[j] = m - 1 - i;
- for (i = 0; i <= m - 2; ++i)
- bmGs[m - 1 - suff[i]] = m - 1 - i;
- }
- free(suff);
- return prep;
-}
-
-void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep)
-{
- int i, j, *prep = 0, *bmGs, *bmBc;
- const ubyte_t *str, *pat;
- str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat;
- prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep;
- if (_prep && *_prep == 0) *_prep = prep;
- bmGs = prep; bmBc = prep + m;
- j = 0;
- while (j <= n - m) {
- for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i);
- if (i >= 0) {
- int max = bmBc[str[i+j]] - m + 1 + i;
- if (max < bmGs[i]) max = bmGs[i];
- j += max;
- } else return (void*)(str + j);
- }
- if (_prep == 0) free(prep);
- return 0;
-}
-
-char *kstrstr(const char *str, const char *pat, int **_prep)
-{
- return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep);
-}
-
-char *kstrnstr(const char *str, const char *pat, int n, int **_prep)
-{
- return (char*)kmemmem(str, n, pat, strlen(pat), _prep);
-}
-
-/***********************
- * The main() function *
- ***********************/
-
-#ifdef KSTRING_MAIN
-#include <stdio.h>
-int main()
-{
- kstring_t *s;
- int *fields, n, i;
- ks_tokaux_t aux;
- char *p;
- s = (kstring_t*)calloc(1, sizeof(kstring_t));
- // test ksprintf()
- ksprintf(s, " abcdefg: %d ", 100);
- printf("'%s'\n", s->s);
- // test ksplit()
- fields = ksplit(s, 0, &n);
- for (i = 0; i < n; ++i)
- printf("field[%d] = '%s'\n", i, s->s + fields[i]);
- // test kstrtok()
- s->l = 0;
- for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) {
- kputsn(p, aux.p - p, s);
- kputc('\n', s);
- }
- printf("%s", s->s);
- // free
- free(s->s); free(s); free(fields);
-
- {
- static char *str = "abcdefgcdgcagtcakcdcd";
- static char *pat = "cd";
- char *ret, *s = str;
- int *prep = 0;
- while ((ret = kstrstr(s, pat, &prep)) != 0) {
- printf("match: %s\n", ret);
- s = ret + prep[0];
- }
- free(prep);
- }
- return 0;
-}
-#endif
diff --git a/src/samtools-0.1.18/kstring.h b/src/samtools-0.1.18/kstring.h
deleted file mode 100644
index ec5775b..0000000
--- a/src/samtools-0.1.18/kstring.h
+++ /dev/null
@@ -1,117 +0,0 @@
-#ifndef KSTRING_H
-#define KSTRING_H
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-
-#ifndef kroundup32
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-#endif
-
-#ifndef KSTRING_T
-#define KSTRING_T kstring_t
-typedef struct __kstring_t {
- size_t l, m;
- char *s;
-} kstring_t;
-#endif
-
-typedef struct {
- uint64_t tab[4];
- int sep, finished;
- const char *p; // end of the current token
-} ks_tokaux_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- int ksprintf(kstring_t *s, const char *fmt, ...);
- int ksplit_core(char *s, int delimiter, int *_max, int **_offsets);
- char *kstrstr(const char *str, const char *pat, int **_prep);
- char *kstrnstr(const char *str, const char *pat, int n, int **_prep);
- void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep);
-
- /* kstrtok() is similar to strtok_r() except that str is not
- * modified and both str and sep can be NULL. For efficiency, it is
- * actually recommended to set both to NULL in the subsequent calls
- * if sep is not changed. */
- char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux);
-
-#ifdef __cplusplus
-}
-#endif
-
-static inline int kputsn(const char *p, int l, kstring_t *s)
-{
- if (s->l + l + 1 >= s->m) {
- s->m = s->l + l + 2;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- }
- memcpy(s->s + s->l, p, l);
- s->l += l;
- s->s[s->l] = 0;
- return l;
-}
-
-static inline int kputs(const char *p, kstring_t *s)
-{
- return kputsn(p, strlen(p), s);
-}
-
-static inline int kputc(int c, kstring_t *s)
-{
- if (s->l + 1 >= s->m) {
- s->m = s->l + 2;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- }
- s->s[s->l++] = c;
- s->s[s->l] = 0;
- return c;
-}
-
-static inline int kputw(int c, kstring_t *s)
-{
- char buf[16];
- int l, x;
- if (c == 0) return kputc('0', s);
- for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0';
- if (c < 0) buf[l++] = '-';
- if (s->l + l + 1 >= s->m) {
- s->m = s->l + l + 2;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- }
- for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x];
- s->s[s->l] = 0;
- return 0;
-}
-
-static inline int kputuw(unsigned c, kstring_t *s)
-{
- char buf[16];
- int l, i;
- unsigned x;
- if (c == 0) return kputc('0', s);
- for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0';
- if (s->l + l + 1 >= s->m) {
- s->m = s->l + l + 2;
- kroundup32(s->m);
- s->s = (char*)realloc(s->s, s->m);
- }
- for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
- s->s[s->l] = 0;
- return 0;
-}
-
-static inline int *ksplit(kstring_t *s, int delimiter, int *n)
-{
- int max = 0, *offsets = 0;
- *n = ksplit_core(s->s, delimiter, &max, &offsets);
- return offsets;
-}
-
-#endif
diff --git a/src/samtools-0.1.18/phase.c b/src/samtools-0.1.18/phase.c
deleted file mode 100644
index ef4eff9..0000000
--- a/src/samtools-0.1.18/phase.c
+++ /dev/null
@@ -1,687 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <math.h>
-#include <zlib.h>
-#include "bam.h"
-#include "errmod.h"
-
-#include "kseq.h"
-KSTREAM_INIT(gzFile, gzread, 16384)
-
-#define MAX_VARS 256
-#define FLIP_PENALTY 2
-#define FLIP_THRES 4
-#define MASK_THRES 3
-
-#define FLAG_FIX_CHIMERA 0x1
-#define FLAG_LIST_EXCL 0x4
-#define FLAG_DROP_AMBI 0x8
-
-typedef struct {
- // configurations, initialized in the main function
- int flag, k, min_baseQ, min_varLOD, max_depth;
- // other global variables
- int vpos_shift;
- bamFile fp;
- char *pre;
- bamFile out[3];
- // alignment queue
- int n, m;
- bam1_t **b;
-} phaseg_t;
-
-typedef struct {
- int8_t seq[MAX_VARS]; // TODO: change to dynamic memory allocation!
- int vpos, beg, end;
- uint32_t vlen:16, single:1, flip:1, phase:1, phased:1, ambig:1;
- uint32_t in:16, out:16; // in-phase and out-phase
-} frag_t, *frag_p;
-
-#define rseq_lt(a,b) ((a)->vpos < (b)->vpos)
-
-#include "khash.h"
-KHASH_SET_INIT_INT64(set64)
-KHASH_MAP_INIT_INT64(64, frag_t)
-
-typedef khash_t(64) nseq_t;
-
-#include "ksort.h"
-KSORT_INIT(rseq, frag_p, rseq_lt)
-
-static char nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };
-
-static inline uint64_t X31_hash_string(const char *s)
-{
- uint64_t h = *s;
- if (h) for (++s ; *s; ++s) h = (h << 5) - h + *s;
- return h;
-}
-
-static void count1(int l, const uint8_t *seq, int *cnt)
-{
- int i, j, n_ambi;
- uint32_t z, x;
- if (seq[l-1] == 0) return; // do nothing is the last base is ambiguous
- for (i = n_ambi = 0; i < l; ++i) // collect ambiguous bases
- if (seq[i] == 0) ++n_ambi;
- if (l - n_ambi <= 1) return; // only one SNP
- for (x = 0; x < 1u<<n_ambi; ++x) { // count
- for (i = j = 0, z = 0; i < l; ++i) {
- int c;
- if (seq[i]) c = seq[i] - 1;
- else {
- c = x>>j&1;
- ++j;
- }
- z = z<<1 | c;
- }
- ++cnt[z];
- }
-}
-
-static int **count_all(int l, int vpos, nseq_t *hash)
-{
- khint_t k;
- int i, j, **cnt;
- uint8_t *seq;
- seq = calloc(l, 1);
- cnt = calloc(vpos, sizeof(void*));
- for (i = 0; i < vpos; ++i) cnt[i] = calloc(1<<l, sizeof(int));
- for (k = 0; k < kh_end(hash); ++k) {
- if (kh_exist(hash, k)) {
- frag_t *f = &kh_val(hash, k);
- if (f->vpos >= vpos || f->single) continue; // out of region; or singleton
- if (f->vlen == 1) { // such reads should be flagged as deleted previously if everything is right
- f->single = 1;
- continue;
- }
- for (j = 1; j < f->vlen; ++j) {
- for (i = 0; i < l; ++i)
- seq[i] = j < l - 1 - i? 0 : f->seq[j - (l - 1 - i)];
- count1(l, seq, cnt[f->vpos + j]);
- }
- }
- }
- free(seq);
- return cnt;
-}
-
-// phasing
-static int8_t *dynaprog(int l, int vpos, int **w)
-{
- int *f[2], *curr, *prev, max, i;
- int8_t **b, *h = 0;
- uint32_t x, z = 1u<<(l-1), mask = (1u<<l) - 1;
- f[0] = calloc(z, sizeof(int));
- f[1] = calloc(z, sizeof(int));
- b = calloc(vpos, sizeof(void*));
- prev = f[0]; curr = f[1];
- // fill the backtrack matrix
- for (i = 0; i < vpos; ++i) {
- int *wi = w[i], *tmp;
- int8_t *bi;
- bi = b[i] = calloc(z, 1);
- /* In the following, x is the current state, which is the
- * lexicographically smaller local haplotype. xc is the complement of
- * x, or the larger local haplotype; y0 and y1 are the two predecessors
- * of x. */
- for (x = 0; x < z; ++x) { // x0 is the smaller
- uint32_t y0, y1, xc;
- int c0, c1;
- xc = ~x&mask; y0 = x>>1; y1 = xc>>1;
- c0 = prev[y0] + wi[x] + wi[xc];
- c1 = prev[y1] + wi[x] + wi[xc];
- if (c0 > c1) bi[x] = 0, curr[x] = c0;
- else bi[x] = 1, curr[x] = c1;
- }
- tmp = prev; prev = curr; curr = tmp; // swap
- }
- { // backtrack
- uint32_t max_x = 0;
- int which = 0;
- h = calloc(vpos, 1);
- for (x = 0, max = 0, max_x = 0; x < z; ++x)
- if (prev[x] > max) max = prev[x], max_x = x;
- for (i = vpos - 1, x = max_x; i >= 0; --i) {
- h[i] = which? (~x&1) : (x&1);
- which = b[i][x]? !which : which;
- x = b[i][x]? (~x&mask)>>1 : x>>1;
- }
- }
- // free
- for (i = 0; i < vpos; ++i) free(b[i]);
- free(f[0]); free(f[1]); free(b);
- return h;
-}
-
-// phase each fragment
-static uint64_t *fragphase(int vpos, const int8_t *path, nseq_t *hash, int flip)
-{
- khint_t k;
- uint64_t *pcnt;
- uint32_t *left, *rght, max;
- left = rght = 0; max = 0;
- pcnt = calloc(vpos, 8);
- for (k = 0; k < kh_end(hash); ++k) {
- if (kh_exist(hash, k)) {
- int i, c[2];
- frag_t *f = &kh_val(hash, k);
- if (f->vpos >= vpos) continue;
- // get the phase
- c[0] = c[1] = 0;
- for (i = 0; i < f->vlen; ++i) {
- if (f->seq[i] == 0) continue;
- ++c[f->seq[i] == path[f->vpos + i] + 1? 0 : 1];
- }
- f->phase = c[0] > c[1]? 0 : 1;
- f->in = c[f->phase]; f->out = c[1 - f->phase];
- f->phased = f->in == f->out? 0 : 1;
- f->ambig = (f->in && f->out && f->out < 3 && f->in <= f->out + 1)? 1 : 0;
- // fix chimera
- f->flip = 0;
- if (flip && c[0] >= 3 && c[1] >= 3) {
- int sum[2], m, mi, md;
- if (f->vlen > max) { // enlarge the array
- max = f->vlen;
- kroundup32(max);
- left = realloc(left, max * 4);
- rght = realloc(rght, max * 4);
- }
- for (i = 0, sum[0] = sum[1] = 0; i < f->vlen; ++i) { // get left counts
- if (f->seq[i]) {
- int c = f->phase? 2 - f->seq[i] : f->seq[i] - 1;
- ++sum[c == path[f->vpos + i]? 0 : 1];
- }
- left[i] = sum[1]<<16 | sum[0];
- }
- for (i = f->vlen - 1, sum[0] = sum[1] = 0; i >= 0; --i) { // get right counts
- if (f->seq[i]) {
- int c = f->phase? 2 - f->seq[i] : f->seq[i] - 1;
- ++sum[c == path[f->vpos + i]? 0 : 1];
- }
- rght[i] = sum[1]<<16 | sum[0];
- }
- // find the best flip point
- for (i = m = 0, mi = -1, md = -1; i < f->vlen - 1; ++i) {
- int a[2];
- a[0] = (left[i]&0xffff) + (rght[i+1]>>16&0xffff) - (rght[i+1]&0xffff) * FLIP_PENALTY;
- a[1] = (left[i]>>16&0xffff) + (rght[i+1]&0xffff) - (rght[i+1]>>16&0xffff) * FLIP_PENALTY;
- if (a[0] > a[1]) {
- if (a[0] > m) m = a[0], md = 0, mi = i;
- } else {
- if (a[1] > m) m = a[1], md = 1, mi = i;
- }
- }
- if (m - c[0] >= FLIP_THRES && m - c[1] >= FLIP_THRES) { // then flip
- f->flip = 1;
- if (md == 0) { // flip the tail
- for (i = mi + 1; i < f->vlen; ++i)
- if (f->seq[i] == 1) f->seq[i] = 2;
- else if (f->seq[i] == 2) f->seq[i] = 1;
- } else { // flip the head
- for (i = 0; i <= mi; ++i)
- if (f->seq[i] == 1) f->seq[i] = 2;
- else if (f->seq[i] == 2) f->seq[i] = 1;
- }
- }
- }
- // update pcnt[]
- if (!f->single) {
- for (i = 0; i < f->vlen; ++i) {
- int c;
- if (f->seq[i] == 0) continue;
- c = f->phase? 2 - f->seq[i] : f->seq[i] - 1;
- if (c == path[f->vpos + i]) {
- if (f->phase == 0) ++pcnt[f->vpos + i];
- else pcnt[f->vpos + i] += 1ull<<32;
- } else {
- if (f->phase == 0) pcnt[f->vpos + i] += 1<<16;
- else pcnt[f->vpos + i] += 1ull<<48;
- }
- }
- }
- }
- }
- free(left); free(rght);
- return pcnt;
-}
-
-static uint64_t *genmask(int vpos, const uint64_t *pcnt, int *_n)
-{
- int i, max = 0, max_i = -1, m = 0, n = 0, beg = 0, score = 0;
- uint64_t *list = 0;
- for (i = 0; i < vpos; ++i) {
- uint64_t x = pcnt[i];
- int c[4], pre = score, s;
- c[0] = x&0xffff; c[1] = x>>16&0xffff; c[2] = x>>32&0xffff; c[3] = x>>48&0xffff;
- s = (c[1] + c[3] == 0)? -(c[0] + c[2]) : (c[1] + c[3] - 1);
- if (c[3] > c[2]) s += c[3] - c[2];
- if (c[1] > c[0]) s += c[1] - c[0];
- score += s;
- if (score < 0) score = 0;
- if (pre == 0 && score > 0) beg = i; // change from zero to non-zero
- if ((i == vpos - 1 || score == 0) && max >= MASK_THRES) {
- if (n == m) {
- m = m? m<<1 : 4;
- list = realloc(list, m * 8);
- }
- list[n++] = (uint64_t)beg<<32 | max_i;
- i = max_i; // reset i to max_i
- score = 0;
- } else if (score > max) max = score, max_i = i;
- if (score == 0) max = 0;
- }
- *_n = n;
- return list;
-}
-
-// trim heading and tailing ambiguous bases; mark deleted and remove sequence
-static int clean_seqs(int vpos, nseq_t *hash)
-{
- khint_t k;
- int ret = 0;
- for (k = 0; k < kh_end(hash); ++k) {
- if (kh_exist(hash, k)) {
- frag_t *f = &kh_val(hash, k);
- int beg, end, i;
- if (f->vpos >= vpos) {
- ret = 1;
- continue;
- }
- for (i = 0; i < f->vlen; ++i)
- if (f->seq[i] != 0) break;
- beg = i;
- for (i = f->vlen - 1; i >= 0; --i)
- if (f->seq[i] != 0) break;
- end = i + 1;
- if (end - beg <= 0) kh_del(64, hash, k);
- else {
- if (beg != 0) memmove(f->seq, f->seq + beg, end - beg);
- f->vpos += beg; f->vlen = end - beg;
- f->single = f->vlen == 1? 1 : 0;
- }
- }
- }
- return ret;
-}
-
-static void dump_aln(phaseg_t *g, int min_pos, const nseq_t *hash)
-{
- int i, is_flip, drop_ambi;
- drop_ambi = g->flag & FLAG_DROP_AMBI;
- is_flip = (drand48() < 0.5);
- for (i = 0; i < g->n; ++i) {
- int end, which;
- uint64_t key;
- khint_t k;
- bam1_t *b = g->b[i];
- key = X31_hash_string(bam1_qname(b));
- end = bam_calend(&b->core, bam1_cigar(b));
- if (end > min_pos) break;
- k = kh_get(64, hash, key);
- if (k == kh_end(hash)) which = 3;
- else {
- frag_t *f = &kh_val(hash, k);
- if (f->ambig) which = drop_ambi? 2 : 3;
- else if (f->phased && f->flip) which = 2;
- else if (f->phased == 0) which = 3;
- else { // phased and not flipped
- char c = 'Y';
- which = f->phase;
- bam_aux_append(b, "ZP", 'A', 1, (uint8_t*)&c);
- }
- if (which < 2 && is_flip) which = 1 - which; // increase the randomness
- }
- if (which == 3) which = (drand48() < 0.5);
- bam_write1(g->out[which], b);
- bam_destroy1(b);
- g->b[i] = 0;
- }
- memmove(g->b, g->b + i, (g->n - i) * sizeof(void*));
- g->n -= i;
-}
-
-static int phase(phaseg_t *g, const char *chr, int vpos, uint64_t *cns, nseq_t *hash)
-{
- int i, j, n_seqs = kh_size(hash), n_masked = 0, min_pos;
- khint_t k;
- frag_t **seqs;
- int8_t *path, *sitemask;
- uint64_t *pcnt, *regmask;
-
- if (vpos == 0) return 0;
- i = clean_seqs(vpos, hash); // i is true if hash has an element with its vpos >= vpos
- min_pos = i? cns[vpos]>>32 : 0x7fffffff;
- if (vpos == 1) {
- printf("PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1);
- printf("M0\t%s\t%d\t%d\t%c\t%c\t%d\t0\t0\t0\t0\n//\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[0]>>32) + 1,
- "ACGTX"[cns[0]&3], "ACGTX"[cns[0]>>16&3], g->vpos_shift + 1);
- for (k = 0; k < kh_end(hash); ++k) {
- if (kh_exist(hash, k)) {
- frag_t *f = &kh_val(hash, k);
- if (f->vpos) continue;
- f->flip = 0;
- if (f->seq[0] == 0) f->phased = 0;
- else f->phased = 1, f->phase = f->seq[0] - 1;
- }
- }
- dump_aln(g, min_pos, hash);
- ++g->vpos_shift;
- return 1;
- }
- { // phase
- int **cnt;
- uint64_t *mask;
- printf("PS\t%s\t%d\t%d\n", chr, (int)(cns[0]>>32) + 1, (int)(cns[vpos-1]>>32) + 1);
- sitemask = calloc(vpos, 1);
- cnt = count_all(g->k, vpos, hash);
- path = dynaprog(g->k, vpos, cnt);
- for (i = 0; i < vpos; ++i) free(cnt[i]);
- free(cnt);
- pcnt = fragphase(vpos, path, hash, 0); // do not fix chimeras when masking
- mask = genmask(vpos, pcnt, &n_masked);
- regmask = calloc(n_masked, 8);
- for (i = 0; i < n_masked; ++i) {
- regmask[i] = cns[mask[i]>>32]>>32<<32 | cns[(uint32_t)mask[i]]>>32;
- for (j = mask[i]>>32; j <= (int32_t)mask[i]; ++j)
- sitemask[j] = 1;
- }
- free(mask);
- if (g->flag & FLAG_FIX_CHIMERA) {
- free(pcnt);
- pcnt = fragphase(vpos, path, hash, 1);
- }
- }
- for (i = 0; i < n_masked; ++i)
- printf("FL\t%s\t%d\t%d\n", chr, (int)(regmask[i]>>32) + 1, (int)regmask[i] + 1);
- for (i = 0; i < vpos; ++i) {
- uint64_t x = pcnt[i];
- int8_t c[2];
- c[0] = (cns[i]&0xffff)>>2 == 0? 4 : (cns[i]&3);
- c[1] = (cns[i]>>16&0xffff)>>2 == 0? 4 : (cns[i]>>16&3);
- printf("M%d\t%s\t%d\t%d\t%c\t%c\t%d\t%d\t%d\t%d\t%d\n", sitemask[i]+1, chr, (int)(cns[0]>>32) + 1, (int)(cns[i]>>32) + 1, "ACGTX"[c[path[i]]], "ACGTX"[c[1-path[i]]],
- i + g->vpos_shift + 1, (int)(x&0xffff), (int)(x>>16&0xffff), (int)(x>>32&0xffff), (int)(x>>48&0xffff));
- }
- free(path); free(pcnt); free(regmask); free(sitemask);
- seqs = calloc(n_seqs, sizeof(void*));
- for (k = 0, i = 0; k < kh_end(hash); ++k)
- if (kh_exist(hash, k) && kh_val(hash, k).vpos < vpos && !kh_val(hash, k).single)
- seqs[i++] = &kh_val(hash, k);
- n_seqs = i;
- ks_introsort_rseq(n_seqs, seqs);
- for (i = 0; i < n_seqs; ++i) {
- frag_t *f = seqs[i];
- printf("EV\t0\t%s\t%d\t40\t%dM\t*\t0\t0\t", chr, f->vpos + 1 + g->vpos_shift, f->vlen);
- for (j = 0; j < f->vlen; ++j) {
- uint32_t c = cns[f->vpos + j];
- if (f->seq[j] == 0) putchar('N');
- else putchar("ACGT"[f->seq[j] == 1? (c&3) : (c>>16&3)]);
- }
- printf("\t*\tYP:i:%d\tYF:i:%d\tYI:i:%d\tYO:i:%d\tYS:i:%d\n", f->phase, f->flip, f->in, f->out, f->beg+1);
- }
- free(seqs);
- printf("//\n");
- fflush(stdout);
- g->vpos_shift += vpos;
- dump_aln(g, min_pos, hash);
- return vpos;
-}
-
-static void update_vpos(int vpos, nseq_t *hash)
-{
- khint_t k;
- for (k = 0; k < kh_end(hash); ++k) {
- if (kh_exist(hash, k)) {
- frag_t *f = &kh_val(hash, k);
- if (f->vpos < vpos) kh_del(64, hash, k); // TODO: if frag_t::seq is allocated dynamically, free it
- else f->vpos -= vpos;
- }
- }
-}
-
-static nseq_t *shrink_hash(nseq_t *hash) // TODO: to implement
-{
- return hash;
-}
-
-static int readaln(void *data, bam1_t *b)
-{
- phaseg_t *g = (phaseg_t*)data;
- int ret;
- ret = bam_read1(g->fp, b);
- if (ret < 0) return ret;
- if (!(b->core.flag & (BAM_FUNMAP|BAM_FSECONDARY|BAM_FQCFAIL|BAM_FDUP)) && g->pre) {
- if (g->n == g->m) {
- g->m = g->m? g->m<<1 : 16;
- g->b = realloc(g->b, g->m * sizeof(void*));
- }
- g->b[g->n++] = bam_dup1(b);
- }
- return ret;
-}
-
-static khash_t(set64) *loadpos(const char *fn, bam_header_t *h)
-{
- gzFile fp;
- kstream_t *ks;
- int ret, dret;
- kstring_t *str;
- khash_t(set64) *hash;
-
- hash = kh_init(set64);
- str = calloc(1, sizeof(kstring_t));
- fp = strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
- ks = ks_init(fp);
- while (ks_getuntil(ks, 0, str, &dret) >= 0) {
- int tid = bam_get_tid(h, str->s);
- if (tid >= 0 && dret != '\n') {
- if (ks_getuntil(ks, 0, str, &dret) >= 0) {
- uint64_t x = (uint64_t)tid<<32 | (atoi(str->s) - 1);
- kh_put(set64, hash, x, &ret);
- } else break;
- }
- if (dret != '\n') while ((dret = ks_getc(ks)) > 0 && dret != '\n');
- if (dret < 0) break;
- }
- ks_destroy(ks);
- gzclose(fp);
- free(str->s); free(str);
- return hash;
-}
-
-static int gl2cns(float q[16])
-{
- int i, j, min_ij;
- float min, min2;
- min = min2 = 1e30; min_ij = -1;
- for (i = 0; i < 4; ++i) {
- for (j = i; j < 4; ++j) {
- if (q[i<<2|j] < min) min_ij = i<<2|j, min2 = min, min = q[i<<2|j];
- else if (q[i<<2|j] < min2) min2 = q[i<<2|j];
- }
- }
- return (min_ij>>2&3) == (min_ij&3)? 0 : 1<<18 | (min_ij>>2&3)<<16 | (min_ij&3) | (int)(min2 - min + .499) << 2;
-}
-
-int main_phase(int argc, char *argv[])
-{
- extern void bam_init_header_hash(bam_header_t *header);
- int c, tid, pos, vpos = 0, n, lasttid = -1, max_vpos = 0;
- const bam_pileup1_t *plp;
- bam_plp_t iter;
- bam_header_t *h;
- nseq_t *seqs;
- uint64_t *cns = 0;
- phaseg_t g;
- char *fn_list = 0;
- khash_t(set64) *set = 0;
- errmod_t *em;
- uint16_t *bases;
-
- memset(&g, 0, sizeof(phaseg_t));
- g.flag = FLAG_FIX_CHIMERA;
- g.min_varLOD = 37; g.k = 13; g.min_baseQ = 13; g.max_depth = 256;
- while ((c = getopt(argc, argv, "Q:eFq:k:b:l:D:A:")) >= 0) {
- switch (c) {
- case 'D': g.max_depth = atoi(optarg); break;
- case 'q': g.min_varLOD = atoi(optarg); break;
- case 'Q': g.min_baseQ = atoi(optarg); break;
- case 'k': g.k = atoi(optarg); break;
- case 'F': g.flag &= ~FLAG_FIX_CHIMERA; break;
- case 'e': g.flag |= FLAG_LIST_EXCL; break;
- case 'A': g.flag |= FLAG_DROP_AMBI; break;
- case 'b': g.pre = strdup(optarg); break;
- case 'l': fn_list = strdup(optarg); break;
- }
- }
- if (argc == optind) {
- fprintf(stderr, "\n");
- fprintf(stderr, "Usage: samtools phase [options] <in.bam>\n\n");
- fprintf(stderr, "Options: -k INT block length [%d]\n", g.k);
- fprintf(stderr, " -b STR prefix of BAMs to output [null]\n");
- fprintf(stderr, " -q INT min het phred-LOD [%d]\n", g.min_varLOD);
- fprintf(stderr, " -Q INT min base quality in het calling [%d]\n", g.min_baseQ);
- fprintf(stderr, " -D INT max read depth [%d]\n", g.max_depth);
-// fprintf(stderr, " -l FILE list of sites to phase [null]\n");
- fprintf(stderr, " -F do not attempt to fix chimeras\n");
- fprintf(stderr, " -A drop reads with ambiguous phase\n");
-// fprintf(stderr, " -e do not discover SNPs (effective with -l)\n");
- fprintf(stderr, "\n");
- return 1;
- }
- g.fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r");
- h = bam_header_read(g.fp);
- if (fn_list) { // read the list of sites to phase
- bam_init_header_hash(h);
- set = loadpos(fn_list, h);
- free(fn_list);
- } else g.flag &= ~FLAG_LIST_EXCL;
- if (g.pre) { // open BAMs to write
- char *s = malloc(strlen(g.pre) + 20);
- strcpy(s, g.pre); strcat(s, ".0.bam"); g.out[0] = bam_open(s, "w");
- strcpy(s, g.pre); strcat(s, ".1.bam"); g.out[1] = bam_open(s, "w");
- strcpy(s, g.pre); strcat(s, ".chimera.bam"); g.out[2] = bam_open(s, "w");
- for (c = 0; c <= 2; ++c) bam_header_write(g.out[c], h);
- free(s);
- }
-
- iter = bam_plp_init(readaln, &g);
- g.vpos_shift = 0;
- seqs = kh_init(64);
- em = errmod_init(1. - 0.83);
- bases = calloc(g.max_depth, 2);
- printf("CC\n");
- printf("CC\tDescriptions:\nCC\n");
- printf("CC\t CC comments\n");
- printf("CC\t PS start of a phase set\n");
- printf("CC\t FL filtered region\n");
- printf("CC\t M[012] markers; 0 for singletons, 1 for phased and 2 for filtered\n");
- printf("CC\t EV supporting reads; SAM format\n");
- printf("CC\t // end of a phase set\nCC\n");
- printf("CC\tFormats of PS, FL and M[012] lines (1-based coordinates):\nCC\n");
- printf("CC\t PS chr phaseSetStart phaseSetEnd\n");
- printf("CC\t FL chr filterStart filterEnd\n");
- printf("CC\t M? chr PS pos allele0 allele1 hetIndex #supports0 #errors0 #supp1 #err1\n");
- printf("CC\nCC\n");
- fflush(stdout);
- while ((plp = bam_plp_auto(iter, &tid, &pos, &n)) != 0) {
- int i, k, c, tmp, dophase = 1, in_set = 0;
- float q[16];
- if (tid < 0) break;
- if (tid != lasttid) { // change of chromosome
- g.vpos_shift = 0;
- if (lasttid >= 0) {
- seqs = shrink_hash(seqs);
- phase(&g, h->target_name[lasttid], vpos, cns, seqs);
- update_vpos(0x7fffffff, seqs);
- }
- lasttid = tid;
- vpos = 0;
- }
- if (set && kh_get(set64, set, (uint64_t)tid<<32 | pos) != kh_end(set)) in_set = 1;
- if (n > g.max_depth) continue; // do not proceed if the depth is too high
- // fill the bases array and check if there is a variant
- for (i = k = 0; i < n; ++i) {
- const bam_pileup1_t *p = plp + i;
- uint8_t *seq;
- int q, baseQ, b;
- if (p->is_del || p->is_refskip) continue;
- baseQ = bam1_qual(p->b)[p->qpos];
- if (baseQ < g.min_baseQ) continue;
- seq = bam1_seq(p->b);
- b = bam_nt16_nt4_table[bam1_seqi(seq, p->qpos)];
- if (b > 3) continue;
- q = baseQ < p->b->core.qual? baseQ : p->b->core.qual;
- if (q < 4) q = 4;
- if (q > 63) q = 63;
- bases[k++] = q<<5 | (int)bam1_strand(p->b)<<4 | b;
- }
- if (k == 0) continue;
- errmod_cal(em, k, 4, bases, q); // compute genotype likelihood
- c = gl2cns(q); // get the consensus
- // tell if to proceed
- if (set && (g.flag&FLAG_LIST_EXCL) && !in_set) continue; // not in the list
- if (!in_set && (c&0xffff)>>2 < g.min_varLOD) continue; // not a variant
- // add the variant
- if (vpos == max_vpos) {
- max_vpos = max_vpos? max_vpos<<1 : 128;
- cns = realloc(cns, max_vpos * 8);
- }
- cns[vpos] = (uint64_t)pos<<32 | c;
- for (i = 0; i < n; ++i) {
- const bam_pileup1_t *p = plp + i;
- uint64_t key;
- khint_t k;
- uint8_t *seq = bam1_seq(p->b);
- frag_t *f;
- if (p->is_del || p->is_refskip) continue;
- if (p->b->core.qual == 0) continue;
- // get the base code
- c = nt16_nt4_table[(int)bam1_seqi(seq, p->qpos)];
- if (c == (cns[vpos]&3)) c = 1;
- else if (c == (cns[vpos]>>16&3)) c = 2;
- else c = 0;
- // write to seqs
- key = X31_hash_string(bam1_qname(p->b));
- k = kh_put(64, seqs, key, &tmp);
- f = &kh_val(seqs, k);
- if (tmp == 0) { // present in the hash table
- if (vpos - f->vpos + 1 < MAX_VARS) {
- f->vlen = vpos - f->vpos + 1;
- f->seq[f->vlen-1] = c;
- f->end = bam_calend(&p->b->core, bam1_cigar(p->b));
- }
- dophase = 0;
- } else { // absent
- memset(f->seq, 0, MAX_VARS);
- f->beg = p->b->core.pos;
- f->end = bam_calend(&p->b->core, bam1_cigar(p->b));
- f->vpos = vpos, f->vlen = 1, f->seq[0] = c, f->single = f->phased = f->flip = f->ambig = 0;
- }
- }
- if (dophase) {
- seqs = shrink_hash(seqs);
- phase(&g, h->target_name[tid], vpos, cns, seqs);
- update_vpos(vpos, seqs);
- cns[0] = cns[vpos];
- vpos = 0;
- }
- ++vpos;
- }
- if (tid >= 0) phase(&g, h->target_name[tid], vpos, cns, seqs);
- bam_header_destroy(h);
- bam_plp_destroy(iter);
- bam_close(g.fp);
- kh_destroy(64, seqs);
- kh_destroy(set64, set);
- free(cns);
- errmod_destroy(em);
- free(bases);
- if (g.pre) {
- for (c = 0; c <= 2; ++c) bam_close(g.out[c]);
- free(g.pre); free(g.b);
- }
- return 0;
-}
diff --git a/src/samtools-0.1.18/razf.c b/src/samtools-0.1.18/razf.c
deleted file mode 100644
index e7499f9..0000000
--- a/src/samtools-0.1.18/razf.c
+++ /dev/null
@@ -1,853 +0,0 @@
-/*
- * RAZF : Random Access compressed(Z) File
- * Version: 1.0
- * Release Date: 2008-10-27
- *
- * Copyright 2008, Jue Ruan <ruanjue at gmail.com>, Heng Li <lh3 at sanger.ac.uk>
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _NO_RAZF
-
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include "razf.h"
-
-
-#if ZLIB_VERNUM < 0x1221
-struct _gz_header_s {
- int text;
- uLong time;
- int xflags;
- int os;
- Bytef *extra;
- uInt extra_len;
- uInt extra_max;
- Bytef *name;
- uInt name_max;
- Bytef *comment;
- uInt comm_max;
- int hcrc;
- int done;
-};
-#warning "zlib < 1.2.2.1; RAZF writing is disabled."
-#endif
-
-#define DEF_MEM_LEVEL 8
-
-static inline uint32_t byte_swap_4(uint32_t v){
- v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
- return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
-}
-
-static inline uint64_t byte_swap_8(uint64_t v){
- v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
- v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
- return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
-}
-
-static inline int is_big_endian(){
- int x = 0x01;
- char *c = (char*)&x;
- return (c[0] != 0x01);
-}
-
-#ifndef _RZ_READONLY
-static void add_zindex(RAZF *rz, int64_t in, int64_t out){
- if(rz->index->size == rz->index->cap){
- rz->index->cap = rz->index->cap * 1.5 + 2;
- rz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);
- rz->index->bin_offsets = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));
- }
- if(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;
- rz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];
- rz->index->size ++;
-}
-
-static void save_zindex(RAZF *rz, int fd){
- int32_t i, v32;
- int is_be;
- is_be = is_big_endian();
- if(is_be) write(fd, &rz->index->size, sizeof(int));
- else {
- v32 = byte_swap_4((uint32_t)rz->index->size);
- write(fd, &v32, sizeof(uint32_t));
- }
- v32 = rz->index->size / RZ_BIN_SIZE + 1;
- if(!is_be){
- for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
- for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
- }
- write(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
- write(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size);
-}
-#endif
-
-#ifdef _USE_KNETFILE
-static void load_zindex(RAZF *rz, knetFile *fp){
-#else
-static void load_zindex(RAZF *rz, int fd){
-#endif
- int32_t i, v32;
- int is_be;
- if(!rz->load_index) return;
- if(rz->index == NULL) rz->index = malloc(sizeof(ZBlockIndex));
- is_be = is_big_endian();
-#ifdef _USE_KNETFILE
- knet_read(fp, &rz->index->size, sizeof(int));
-#else
- read(fd, &rz->index->size, sizeof(int));
-#endif
- if(!is_be) rz->index->size = byte_swap_4((uint32_t)rz->index->size);
- rz->index->cap = rz->index->size;
- v32 = rz->index->size / RZ_BIN_SIZE + 1;
- rz->index->bin_offsets = malloc(sizeof(int64_t) * v32);
-#ifdef _USE_KNETFILE
- knet_read(fp, rz->index->bin_offsets, sizeof(int64_t) * v32);
-#else
- read(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);
-#endif
- rz->index->cell_offsets = malloc(sizeof(int) * rz->index->size);
-#ifdef _USE_KNETFILE
- knet_read(fp, rz->index->cell_offsets, sizeof(int) * rz->index->size);
-#else
- read(fd, rz->index->cell_offsets, sizeof(int) * rz->index->size);
-#endif
- if(!is_be){
- for(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);
- for(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);
- }
-}
-
-#ifdef _RZ_READONLY
-static RAZF* razf_open_w(int fd)
-{
- fprintf(stderr, "[razf_open_w] Writing is not available with zlib ver < 1.2.2.1\n");
- return 0;
-}
-#else
-static RAZF* razf_open_w(int fd){
- RAZF *rz;
-#ifdef _WIN32
- setmode(fd, O_BINARY);
-#endif
- rz = calloc(1, sizeof(RAZF));
- rz->mode = 'w';
-#ifdef _USE_KNETFILE
- rz->x.fpw = fd;
-#else
- rz->filedes = fd;
-#endif
- rz->stream = calloc(sizeof(z_stream), 1);
- rz->inbuf = malloc(RZ_BUFFER_SIZE);
- rz->outbuf = malloc(RZ_BUFFER_SIZE);
- rz->index = calloc(sizeof(ZBlockIndex), 1);
- deflateInit2(rz->stream, RZ_COMPRESS_LEVEL, Z_DEFLATED, WINDOW_BITS + 16, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
- rz->stream->avail_out = RZ_BUFFER_SIZE;
- rz->stream->next_out = rz->outbuf;
- rz->header = calloc(sizeof(gz_header), 1);
- rz->header->os = 0x03; //Unix
- rz->header->text = 0;
- rz->header->time = 0;
- rz->header->extra = malloc(7);
- strncpy((char*)rz->header->extra, "RAZF", 4);
- rz->header->extra[4] = 1; // obsolete field
- // block size = RZ_BLOCK_SIZE, Big-Endian
- rz->header->extra[5] = RZ_BLOCK_SIZE >> 8;
- rz->header->extra[6] = RZ_BLOCK_SIZE & 0xFF;
- rz->header->extra_len = 7;
- rz->header->name = rz->header->comment = 0;
- rz->header->hcrc = 0;
- deflateSetHeader(rz->stream, rz->header);
- rz->block_pos = rz->block_off = 0;
- return rz;
-}
-
-static void _razf_write(RAZF* rz, const void *data, int size){
- int tout;
- rz->stream->avail_in = size;
- rz->stream->next_in = (void*)data;
- while(1){
- tout = rz->stream->avail_out;
- deflate(rz->stream, Z_NO_FLUSH);
- rz->out += tout - rz->stream->avail_out;
- if(rz->stream->avail_out) break;
-#ifdef _USE_KNETFILE
- write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
-#else
- write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
-#endif
- rz->stream->avail_out = RZ_BUFFER_SIZE;
- rz->stream->next_out = rz->outbuf;
- if(rz->stream->avail_in == 0) break;
- };
- rz->in += size - rz->stream->avail_in;
- rz->block_off += size - rz->stream->avail_in;
-}
-
-static void razf_flush(RAZF *rz){
- uint32_t tout;
- if(rz->buf_len){
- _razf_write(rz, rz->inbuf, rz->buf_len);
- rz->buf_off = rz->buf_len = 0;
- }
- if(rz->stream->avail_out){
-#ifdef _USE_KNETFILE
- write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
-#else
- write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
-#endif
- rz->stream->avail_out = RZ_BUFFER_SIZE;
- rz->stream->next_out = rz->outbuf;
- }
- while(1){
- tout = rz->stream->avail_out;
- deflate(rz->stream, Z_FULL_FLUSH);
- rz->out += tout - rz->stream->avail_out;
- if(rz->stream->avail_out == 0){
-#ifdef _USE_KNETFILE
- write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
-#else
- write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
-#endif
- rz->stream->avail_out = RZ_BUFFER_SIZE;
- rz->stream->next_out = rz->outbuf;
- } else break;
- }
- rz->block_pos = rz->out;
- rz->block_off = 0;
-}
-
-static void razf_end_flush(RAZF *rz){
- uint32_t tout;
- if(rz->buf_len){
- _razf_write(rz, rz->inbuf, rz->buf_len);
- rz->buf_off = rz->buf_len = 0;
- }
- while(1){
- tout = rz->stream->avail_out;
- deflate(rz->stream, Z_FINISH);
- rz->out += tout - rz->stream->avail_out;
- if(rz->stream->avail_out < RZ_BUFFER_SIZE){
-#ifdef _USE_KNETFILE
- write(rz->x.fpw, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
-#else
- write(rz->filedes, rz->outbuf, RZ_BUFFER_SIZE - rz->stream->avail_out);
-#endif
- rz->stream->avail_out = RZ_BUFFER_SIZE;
- rz->stream->next_out = rz->outbuf;
- } else break;
- }
-}
-
-static void _razf_buffered_write(RAZF *rz, const void *data, int size){
- int i, n;
- while(1){
- if(rz->buf_len == RZ_BUFFER_SIZE){
- _razf_write(rz, rz->inbuf, rz->buf_len);
- rz->buf_len = 0;
- }
- if(size + rz->buf_len < RZ_BUFFER_SIZE){
- for(i=0;i<size;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
- rz->buf_len += size;
- return;
- } else {
- n = RZ_BUFFER_SIZE - rz->buf_len;
- for(i=0;i<n;i++) ((char*)rz->inbuf + rz->buf_len)[i] = ((char*)data)[i];
- size -= n;
- data += n;
- rz->buf_len += n;
- }
- }
-}
-
-int razf_write(RAZF* rz, const void *data, int size){
- int ori_size, n;
- int64_t next_block;
- ori_size = size;
- next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
- while(rz->in + rz->buf_len + size >= next_block){
- n = next_block - rz->in - rz->buf_len;
- _razf_buffered_write(rz, data, n);
- data += n;
- size -= n;
- razf_flush(rz);
- add_zindex(rz, rz->in, rz->out);
- next_block = ((rz->in / RZ_BLOCK_SIZE) + 1) * RZ_BLOCK_SIZE;
- }
- _razf_buffered_write(rz, data, size);
- return ori_size;
-}
-#endif
-
-/* gzip flag byte */
-#define ASCII_FLAG 0x01 /* bit 0 set: file probably ascii text */
-#define HEAD_CRC 0x02 /* bit 1 set: header CRC present */
-#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
-#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
-#define COMMENT 0x10 /* bit 4 set: file comment present */
-#define RESERVED 0xE0 /* bits 5..7: reserved */
-
-static int _read_gz_header(unsigned char *data, int size, int *extra_off, int *extra_len){
- int method, flags, n, len;
- if(size < 2) return 0;
- if(data[0] != 0x1f || data[1] != 0x8b) return 0;
- if(size < 4) return 0;
- method = data[2];
- flags = data[3];
- if(method != Z_DEFLATED || (flags & RESERVED)) return 0;
- n = 4 + 6; // Skip 6 bytes
- *extra_off = n + 2;
- *extra_len = 0;
- if(flags & EXTRA_FIELD){
- if(size < n + 2) return 0;
- len = ((int)data[n + 1] << 8) | data[n];
- n += 2;
- *extra_off = n;
- while(len){
- if(n >= size) return 0;
- n ++;
- len --;
- }
- *extra_len = n - (*extra_off);
- }
- if(flags & ORIG_NAME) while(n < size && data[n++]);
- if(flags & COMMENT) while(n < size && data[n++]);
- if(flags & HEAD_CRC){
- if(n + 2 > size) return 0;
- n += 2;
- }
- return n;
-}
-
-#ifdef _USE_KNETFILE
-static RAZF* razf_open_r(knetFile *fp, int _load_index){
-#else
-static RAZF* razf_open_r(int fd, int _load_index){
-#endif
- RAZF *rz;
- int ext_off, ext_len;
- int n, is_be, ret;
- int64_t end;
- unsigned char c[] = "RAZF";
- rz = calloc(1, sizeof(RAZF));
- rz->mode = 'r';
-#ifdef _USE_KNETFILE
- rz->x.fpr = fp;
-#else
-#ifdef _WIN32
- setmode(fd, O_BINARY);
-#endif
- rz->filedes = fd;
-#endif
- rz->stream = calloc(sizeof(z_stream), 1);
- rz->inbuf = malloc(RZ_BUFFER_SIZE);
- rz->outbuf = malloc(RZ_BUFFER_SIZE);
- rz->end = rz->src_end = 0x7FFFFFFFFFFFFFFFLL;
-#ifdef _USE_KNETFILE
- n = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
-#else
- n = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
-#endif
- ret = _read_gz_header(rz->inbuf, n, &ext_off, &ext_len);
- if(ret == 0){
- PLAIN_FILE:
- rz->in = n;
- rz->file_type = FILE_TYPE_PLAIN;
- memcpy(rz->outbuf, rz->inbuf, n);
- rz->buf_len = n;
- free(rz->stream);
- rz->stream = NULL;
- return rz;
- }
- rz->header_size = ret;
- ret = inflateInit2(rz->stream, -WINDOW_BITS);
- if(ret != Z_OK){ inflateEnd(rz->stream); goto PLAIN_FILE;}
- rz->stream->avail_in = n - rz->header_size;
- rz->stream->next_in = rz->inbuf + rz->header_size;
- rz->stream->avail_out = RZ_BUFFER_SIZE;
- rz->stream->next_out = rz->outbuf;
- rz->file_type = FILE_TYPE_GZ;
- rz->in = rz->header_size;
- rz->block_pos = rz->header_size;
- rz->next_block_pos = rz->header_size;
- rz->block_off = 0;
- if(ext_len < 7 || memcmp(rz->inbuf + ext_off, c, 4) != 0) return rz;
- if(((((unsigned char*)rz->inbuf)[ext_off + 5] << 8) | ((unsigned char*)rz->inbuf)[ext_off + 6]) != RZ_BLOCK_SIZE){
- fprintf(stderr, " -- WARNING: RZ_BLOCK_SIZE is not %d, treat source as gz file. in %s -- %s:%d --\n", RZ_BLOCK_SIZE, __FUNCTION__, __FILE__, __LINE__);
- return rz;
- }
- rz->load_index = _load_index;
- rz->file_type = FILE_TYPE_RZ;
-#ifdef _USE_KNETFILE
- if(knet_seek(fp, -16, SEEK_END) == -1){
-#else
- if(lseek(fd, -16, SEEK_END) == -1){
-#endif
- UNSEEKABLE:
- rz->seekable = 0;
- rz->index = NULL;
- rz->src_end = rz->end = 0x7FFFFFFFFFFFFFFFLL;
- } else {
- is_be = is_big_endian();
- rz->seekable = 1;
-#ifdef _USE_KNETFILE
- knet_read(fp, &end, sizeof(int64_t));
-#else
- read(fd, &end, sizeof(int64_t));
-#endif
- if(!is_be) rz->src_end = (int64_t)byte_swap_8((uint64_t)end);
- else rz->src_end = end;
-
-#ifdef _USE_KNETFILE
- knet_read(fp, &end, sizeof(int64_t));
-#else
- read(fd, &end, sizeof(int64_t));
-#endif
- if(!is_be) rz->end = (int64_t)byte_swap_8((uint64_t)end);
- else rz->end = end;
- if(n > rz->end){
- rz->stream->avail_in -= n - rz->end;
- n = rz->end;
- }
- if(rz->end > rz->src_end){
-#ifdef _USE_KNETFILE
- knet_seek(fp, rz->in, SEEK_SET);
-#else
- lseek(fd, rz->in, SEEK_SET);
-#endif
- goto UNSEEKABLE;
- }
-#ifdef _USE_KNETFILE
- knet_seek(fp, rz->end, SEEK_SET);
- if(knet_tell(fp) != rz->end){
- knet_seek(fp, rz->in, SEEK_SET);
-#else
- if(lseek(fd, rz->end, SEEK_SET) != rz->end){
- lseek(fd, rz->in, SEEK_SET);
-#endif
- goto UNSEEKABLE;
- }
-#ifdef _USE_KNETFILE
- load_zindex(rz, fp);
- knet_seek(fp, n, SEEK_SET);
-#else
- load_zindex(rz, fd);
- lseek(fd, n, SEEK_SET);
-#endif
- }
- return rz;
-}
-
-#ifdef _USE_KNETFILE
-RAZF* razf_dopen(int fd, const char *mode){
- if (strstr(mode, "r")) fprintf(stderr,"[razf_dopen] implement me\n");
- else if(strstr(mode, "w")) return razf_open_w(fd);
- return NULL;
-}
-
-RAZF* razf_dopen2(int fd, const char *mode)
-{
- fprintf(stderr,"[razf_dopen2] implement me\n");
- return NULL;
-}
-#else
-RAZF* razf_dopen(int fd, const char *mode){
- if(strstr(mode, "r")) return razf_open_r(fd, 1);
- else if(strstr(mode, "w")) return razf_open_w(fd);
- else return NULL;
-}
-
-RAZF* razf_dopen2(int fd, const char *mode)
-{
- if(strstr(mode, "r")) return razf_open_r(fd, 0);
- else if(strstr(mode, "w")) return razf_open_w(fd);
- else return NULL;
-}
-#endif
-
-static inline RAZF* _razf_open(const char *filename, const char *mode, int _load_index){
- int fd;
- RAZF *rz;
- if(strstr(mode, "r")){
-#ifdef _USE_KNETFILE
- knetFile *fd = knet_open(filename, "r");
- if (fd == 0) {
- fprintf(stderr, "[_razf_open] fail to open %s\n", filename);
- return NULL;
- }
-#else
-#ifdef _WIN32
- fd = open(filename, O_RDONLY | O_BINARY);
-#else
- fd = open(filename, O_RDONLY);
-#endif
-#endif
- if(fd < 0) return NULL;
- rz = razf_open_r(fd, _load_index);
- } else if(strstr(mode, "w")){
-#ifdef _WIN32
- fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666);
-#else
- fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0666);
-#endif
- if(fd < 0) return NULL;
- rz = razf_open_w(fd);
- } else return NULL;
- return rz;
-}
-
-RAZF* razf_open(const char *filename, const char *mode){
- return _razf_open(filename, mode, 1);
-}
-
-RAZF* razf_open2(const char *filename, const char *mode){
- return _razf_open(filename, mode, 0);
-}
-
-int razf_get_data_size(RAZF *rz, int64_t *u_size, int64_t *c_size){
- int64_t n;
- if(rz->mode != 'r' && rz->mode != 'R') return 0;
- switch(rz->file_type){
- case FILE_TYPE_PLAIN:
- if(rz->end == 0x7fffffffffffffffLL){
-#ifdef _USE_KNETFILE
- if(knet_seek(rz->x.fpr, 0, SEEK_CUR) == -1) return 0;
- n = knet_tell(rz->x.fpr);
- knet_seek(rz->x.fpr, 0, SEEK_END);
- rz->end = knet_tell(rz->x.fpr);
- knet_seek(rz->x.fpr, n, SEEK_SET);
-#else
- if((n = lseek(rz->filedes, 0, SEEK_CUR)) == -1) return 0;
- rz->end = lseek(rz->filedes, 0, SEEK_END);
- lseek(rz->filedes, n, SEEK_SET);
-#endif
- }
- *u_size = *c_size = rz->end;
- return 1;
- case FILE_TYPE_GZ:
- return 0;
- case FILE_TYPE_RZ:
- if(rz->src_end == rz->end) return 0;
- *u_size = rz->src_end;
- *c_size = rz->end;
- return 1;
- default:
- return 0;
- }
-}
-
-static int _razf_read(RAZF* rz, void *data, int size){
- int ret, tin;
- if(rz->z_eof || rz->z_err) return 0;
- if (rz->file_type == FILE_TYPE_PLAIN) {
-#ifdef _USE_KNETFILE
- ret = knet_read(rz->x.fpr, data, size);
-#else
- ret = read(rz->filedes, data, size);
-#endif
- if (ret == 0) rz->z_eof = 1;
- return ret;
- }
- rz->stream->avail_out = size;
- rz->stream->next_out = data;
- while(rz->stream->avail_out){
- if(rz->stream->avail_in == 0){
- if(rz->in >= rz->end){ rz->z_eof = 1; break; }
- if(rz->end - rz->in < RZ_BUFFER_SIZE){
-#ifdef _USE_KNETFILE
- rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, rz->end -rz->in);
-#else
- rz->stream->avail_in = read(rz->filedes, rz->inbuf, rz->end -rz->in);
-#endif
- } else {
-#ifdef _USE_KNETFILE
- rz->stream->avail_in = knet_read(rz->x.fpr, rz->inbuf, RZ_BUFFER_SIZE);
-#else
- rz->stream->avail_in = read(rz->filedes, rz->inbuf, RZ_BUFFER_SIZE);
-#endif
- }
- if(rz->stream->avail_in == 0){
- rz->z_eof = 1;
- break;
- }
- rz->stream->next_in = rz->inbuf;
- }
- tin = rz->stream->avail_in;
- ret = inflate(rz->stream, Z_BLOCK);
- rz->in += tin - rz->stream->avail_in;
- if(ret == Z_NEED_DICT || ret == Z_MEM_ERROR || ret == Z_DATA_ERROR){
- fprintf(stderr, "[_razf_read] inflate error: %d %s (at %s:%d)\n", ret, rz->stream->msg ? rz->stream->msg : "", __FILE__, __LINE__);
- rz->z_err = 1;
- break;
- }
- if(ret == Z_STREAM_END){
- rz->z_eof = 1;
- break;
- }
- if ((rz->stream->data_type&128) && !(rz->stream->data_type&64)){
- rz->buf_flush = 1;
- rz->next_block_pos = rz->in;
- break;
- }
- }
- return size - rz->stream->avail_out;
-}
-
-int razf_read(RAZF *rz, void *data, int size){
- int ori_size, i;
- ori_size = size;
- while(size > 0){
- if(rz->buf_len){
- if(size < rz->buf_len){
- for(i=0;i<size;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
- rz->buf_off += size;
- rz->buf_len -= size;
- data += size;
- rz->block_off += size;
- size = 0;
- break;
- } else {
- for(i=0;i<rz->buf_len;i++) ((char*)data)[i] = ((char*)rz->outbuf + rz->buf_off)[i];
- data += rz->buf_len;
- size -= rz->buf_len;
- rz->block_off += rz->buf_len;
- rz->buf_off = 0;
- rz->buf_len = 0;
- if(rz->buf_flush){
- rz->block_pos = rz->next_block_pos;
- rz->block_off = 0;
- rz->buf_flush = 0;
- }
- }
- } else if(rz->buf_flush){
- rz->block_pos = rz->next_block_pos;
- rz->block_off = 0;
- rz->buf_flush = 0;
- }
- if(rz->buf_flush) continue;
- rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
- if(rz->z_eof && rz->buf_len == 0) break;
- }
- rz->out += ori_size - size;
- return ori_size - size;
-}
-
-int razf_skip(RAZF* rz, int size){
- int ori_size;
- ori_size = size;
- while(size > 0){
- if(rz->buf_len){
- if(size < rz->buf_len){
- rz->buf_off += size;
- rz->buf_len -= size;
- rz->block_off += size;
- size = 0;
- break;
- } else {
- size -= rz->buf_len;
- rz->buf_off = 0;
- rz->buf_len = 0;
- rz->block_off += rz->buf_len;
- if(rz->buf_flush){
- rz->block_pos = rz->next_block_pos;
- rz->block_off = 0;
- rz->buf_flush = 0;
- }
- }
- } else if(rz->buf_flush){
- rz->block_pos = rz->next_block_pos;
- rz->block_off = 0;
- rz->buf_flush = 0;
- }
- if(rz->buf_flush) continue;
- rz->buf_len = _razf_read(rz, rz->outbuf, RZ_BUFFER_SIZE);
- if(rz->z_eof || rz->z_err) break;
- }
- rz->out += ori_size - size;
- return ori_size - size;
-}
-
-static void _razf_reset_read(RAZF *rz, int64_t in, int64_t out){
-#ifdef _USE_KNETFILE
- knet_seek(rz->x.fpr, in, SEEK_SET);
-#else
- lseek(rz->filedes, in, SEEK_SET);
-#endif
- rz->in = in;
- rz->out = out;
- rz->block_pos = in;
- rz->next_block_pos = in;
- rz->block_off = 0;
- rz->buf_flush = 0;
- rz->z_eof = rz->z_err = 0;
- inflateReset(rz->stream);
- rz->stream->avail_in = 0;
- rz->buf_off = rz->buf_len = 0;
-}
-
-int64_t razf_jump(RAZF *rz, int64_t block_start, int block_offset){
- int64_t pos;
- rz->z_eof = 0;
- if(rz->file_type == FILE_TYPE_PLAIN){
- rz->buf_off = rz->buf_len = 0;
- pos = block_start + block_offset;
-#ifdef _USE_KNETFILE
- knet_seek(rz->x.fpr, pos, SEEK_SET);
- pos = knet_tell(rz->x.fpr);
-#else
- pos = lseek(rz->filedes, pos, SEEK_SET);
-#endif
- rz->out = rz->in = pos;
- return pos;
- }
- if(block_start == rz->block_pos && block_offset >= rz->block_off) {
- block_offset -= rz->block_off;
- goto SKIP; // Needn't reset inflate
- }
- if(block_start == 0) block_start = rz->header_size; // Automaticly revist wrong block_start
- _razf_reset_read(rz, block_start, 0);
- SKIP:
- if(block_offset) razf_skip(rz, block_offset);
- return rz->block_off;
-}
-
-int64_t razf_seek(RAZF* rz, int64_t pos, int where){
- int64_t idx;
- int64_t seek_pos, new_out;
- rz->z_eof = 0;
- if (where == SEEK_CUR) pos += rz->out;
- else if (where == SEEK_END) pos += rz->src_end;
- if(rz->file_type == FILE_TYPE_PLAIN){
-#ifdef _USE_KNETFILE
- knet_seek(rz->x.fpr, pos, SEEK_SET);
- seek_pos = knet_tell(rz->x.fpr);
-#else
- seek_pos = lseek(rz->filedes, pos, SEEK_SET);
-#endif
- rz->buf_off = rz->buf_len = 0;
- rz->out = rz->in = seek_pos;
- return seek_pos;
- } else if(rz->file_type == FILE_TYPE_GZ){
- if(pos >= rz->out) goto SKIP;
- return rz->out;
- }
- if(pos == rz->out) return pos;
- if(pos > rz->src_end) return rz->out;
- if(!rz->seekable || !rz->load_index){
- if(pos >= rz->out) goto SKIP;
- }
- idx = pos / RZ_BLOCK_SIZE - 1;
- seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
- new_out = (idx + 1) * RZ_BLOCK_SIZE;
- if(pos > rz->out && new_out <= rz->out) goto SKIP;
- _razf_reset_read(rz, seek_pos, new_out);
- SKIP:
- razf_skip(rz, (int)(pos - rz->out));
- return rz->out;
-}
-
-uint64_t razf_tell2(RAZF *rz)
-{
- /*
- if (rz->load_index) {
- int64_t idx, seek_pos;
- idx = rz->out / RZ_BLOCK_SIZE - 1;
- seek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);
- if (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)
- fprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\n",
- (long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);
- }
- */
- return (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);
-}
-
-int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)
-{
- if (where != SEEK_SET) return -1;
- return razf_jump(rz, voffset>>16, voffset&0xffff);
-}
-
-void razf_close(RAZF *rz){
- if(rz->mode == 'w'){
-#ifndef _RZ_READONLY
- razf_end_flush(rz);
- deflateEnd(rz->stream);
-#ifdef _USE_KNETFILE
- save_zindex(rz, rz->x.fpw);
- if(is_big_endian()){
- write(rz->x.fpw, &rz->in, sizeof(int64_t));
- write(rz->x.fpw, &rz->out, sizeof(int64_t));
- } else {
- uint64_t v64 = byte_swap_8((uint64_t)rz->in);
- write(rz->x.fpw, &v64, sizeof(int64_t));
- v64 = byte_swap_8((uint64_t)rz->out);
- write(rz->x.fpw, &v64, sizeof(int64_t));
- }
-#else
- save_zindex(rz, rz->filedes);
- if(is_big_endian()){
- write(rz->filedes, &rz->in, sizeof(int64_t));
- write(rz->filedes, &rz->out, sizeof(int64_t));
- } else {
- uint64_t v64 = byte_swap_8((uint64_t)rz->in);
- write(rz->filedes, &v64, sizeof(int64_t));
- v64 = byte_swap_8((uint64_t)rz->out);
- write(rz->filedes, &v64, sizeof(int64_t));
- }
-#endif
-#endif
- } else if(rz->mode == 'r'){
- if(rz->stream) inflateEnd(rz->stream);
- }
- if(rz->inbuf) free(rz->inbuf);
- if(rz->outbuf) free(rz->outbuf);
- if(rz->header){
- free(rz->header->extra);
- free(rz->header->name);
- free(rz->header->comment);
- free(rz->header);
- }
- if(rz->index){
- free(rz->index->bin_offsets);
- free(rz->index->cell_offsets);
- free(rz->index);
- }
- free(rz->stream);
-#ifdef _USE_KNETFILE
- if (rz->mode == 'r')
- knet_close(rz->x.fpr);
- if (rz->mode == 'w')
- close(rz->x.fpw);
-#else
- close(rz->filedes);
-#endif
- free(rz);
-}
-
-#endif
diff --git a/src/samtools-0.1.18/razf.h b/src/samtools-0.1.18/razf.h
deleted file mode 100644
index 60a0c96..0000000
--- a/src/samtools-0.1.18/razf.h
+++ /dev/null
@@ -1,134 +0,0 @@
- /*-
- * RAZF : Random Access compressed(Z) File
- * Version: 1.0
- * Release Date: 2008-10-27
- *
- * Copyright 2008, Jue Ruan <ruanjue at gmail.com>, Heng Li <lh3 at sanger.ac.uk>
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-
-#ifndef __RAZF_RJ_H
-#define __RAZF_RJ_H
-
-#include <stdint.h>
-#include <stdio.h>
-#include "zlib.h"
-
-#ifdef _USE_KNETFILE
-#include "knetfile.h"
-#endif
-
-#if ZLIB_VERNUM < 0x1221
-#define _RZ_READONLY
-struct _gz_header_s;
-typedef struct _gz_header_s _gz_header;
-#define gz_header _gz_header
-#endif
-
-#define WINDOW_BITS 15
-
-#ifndef RZ_BLOCK_SIZE
-#define RZ_BLOCK_SIZE (1<<WINDOW_BITS)
-#endif
-
-#ifndef RZ_BUFFER_SIZE
-#define RZ_BUFFER_SIZE 4096
-#endif
-
-#ifndef RZ_COMPRESS_LEVEL
-#define RZ_COMPRESS_LEVEL 6
-#endif
-
-#define RZ_BIN_SIZE ((1LLU << 32) / RZ_BLOCK_SIZE)
-
-typedef struct {
- uint32_t *cell_offsets; // i
- int64_t *bin_offsets; // i / BIN_SIZE
- int size;
- int cap;
-} ZBlockIndex;
-/* When storing index, output bytes in Big-Endian everywhere */
-
-#define FILE_TYPE_RZ 1
-#define FILE_TYPE_PLAIN 2
-#define FILE_TYPE_GZ 3
-
-typedef struct RandomAccessZFile {
- char mode; /* 'w' : write mode; 'r' : read mode */
- int file_type;
- /* plain file or rz file, razf_read support plain file as input too, in this case, razf_read work as buffered fread */
-#ifdef _USE_KNETFILE
- union {
- knetFile *fpr;
- int fpw;
- } x;
-#else
- int filedes; /* the file descriptor */
-#endif
- z_stream *stream;
- ZBlockIndex *index;
- int64_t in, out, end, src_end;
- /* in: n bytes total in; out: n bytes total out; */
- /* end: the end of all data blocks, while the start of index; src_end: the true end position in uncompressed file */
- int buf_flush; // buffer should be flush, suspend inflate util buffer is empty
- int64_t block_pos, block_off, next_block_pos;
- /* block_pos: the start postiion of current block in compressed file */
- /* block_off: tell how many bytes have been read from current block */
- void *inbuf, *outbuf;
- int header_size;
- gz_header *header;
- /* header is used to transfer inflate_state->mode from HEAD to TYPE after call inflateReset */
- int buf_off, buf_len;
- int z_err, z_eof;
- int seekable;
- /* Indice where the source is seekable */
- int load_index;
- /* set has_index to 0 in mode 'w', then index will be discarded */
-} RAZF;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- RAZF* razf_dopen(int data_fd, const char *mode);
- RAZF *razf_open(const char *fn, const char *mode);
- int razf_write(RAZF* rz, const void *data, int size);
- int razf_read(RAZF* rz, void *data, int size);
- int64_t razf_seek(RAZF* rz, int64_t pos, int where);
- void razf_close(RAZF* rz);
-
-#define razf_tell(rz) ((rz)->out)
-
- RAZF* razf_open2(const char *filename, const char *mode);
- RAZF* razf_dopen2(int fd, const char *mode);
- uint64_t razf_tell2(RAZF *rz);
- int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/samtools-0.1.18/razip.c b/src/samtools-0.1.18/razip.c
deleted file mode 100644
index 825e732..0000000
--- a/src/samtools-0.1.18/razip.c
+++ /dev/null
@@ -1,141 +0,0 @@
-#include <stdio.h>
-#include <fcntl.h>
-#include <unistd.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include "razf.h"
-
-#define WINDOW_SIZE 4096
-
-static int razf_main_usage()
-{
- printf("\n");
- printf("Usage: razip [options] [file] ...\n\n");
- printf("Options: -c write on standard output, keep original files unchanged\n");
- printf(" -d decompress\n");
- printf(" -l list compressed file contents\n");
- printf(" -b INT decompress at INT position in the uncompressed file\n");
- printf(" -s INT decompress INT bytes in the uncompressed file\n");
- printf(" -h give this help\n");
- printf("\n");
- return 0;
-}
-
-static int write_open(const char *fn, int is_forced)
-{
- int fd = -1;
- char c;
- if (!is_forced) {
- if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) {
- printf("razip: %s already exists; do you wish to overwrite (y or n)? ", fn);
- scanf("%c", &c);
- if (c != 'Y' && c != 'y') {
- printf("razip: not overwritten\n");
- exit(1);
- }
- }
- }
- if (fd < 0) {
- if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) {
- fprintf(stderr, "razip: %s: Fail to write\n", fn);
- exit(1);
- }
- }
- return fd;
-}
-
-int main(int argc, char **argv)
-{
- int c, compress, pstdout, is_forced;
- RAZF *rz;
- void *buffer;
- long start, end, size;
-
- compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0;
- while((c = getopt(argc, argv, "cdlhfb:s:")) >= 0){
- switch(c){
- case 'h': return razf_main_usage();
- case 'd': compress = 0; break;
- case 'c': pstdout = 1; break;
- case 'l': compress = 2; break;
- case 'b': start = atol(optarg); break;
- case 's': size = atol(optarg); break;
- case 'f': is_forced = 1; break;
- }
- }
- if (size >= 0) end = start + size;
- if(end >= 0 && end < start){
- fprintf(stderr, " -- Illegal region: [%ld, %ld] --\n", start, end);
- return 1;
- }
- if(compress == 1){
- int f_src, f_dst = -1;
- if(argc > optind){
- if((f_src = open(argv[optind], O_RDONLY)) < 0){
- fprintf(stderr, " -- Cannot open file: %s --\n", argv[optind]);
- return 1;
- }
- if(pstdout){
- f_dst = fileno(stdout);
- } else {
- char *name = malloc(sizeof(strlen(argv[optind]) + 5));
- strcpy(name, argv[optind]);
- strcat(name, ".rz");
- f_dst = write_open(name, is_forced);
- if (f_dst < 0) return 1;
- free(name);
- }
- } else if(pstdout){
- f_src = fileno(stdin);
- f_dst = fileno(stdout);
- } else return razf_main_usage();
- rz = razf_dopen(f_dst, "w");
- buffer = malloc(WINDOW_SIZE);
- while((c = read(f_src, buffer, WINDOW_SIZE)) > 0) razf_write(rz, buffer, c);
- razf_close(rz); // f_dst will be closed here
- if (argc > optind && !pstdout) unlink(argv[optind]);
- free(buffer);
- close(f_src);
- return 0;
- } else {
- if(argc <= optind) return razf_main_usage();
- if(compress == 2){
- rz = razf_open(argv[optind], "r");
- if(rz->file_type == FILE_TYPE_RZ) {
- printf("%20s%20s%7s %s\n", "compressed", "uncompressed", "ratio", "name");
- printf("%20lld%20lld%6.1f%% %s\n", (long long)rz->end, (long long)rz->src_end, rz->end * 100.0f / rz->src_end,
- argv[optind]);
- } else fprintf(stdout, "%s is not a regular rz file\n", argv[optind]);
- } else {
- int f_dst;
- if (argc > optind && !pstdout) {
- char *name;
- if (strstr(argv[optind], ".rz") - argv[optind] != strlen(argv[optind]) - 3) {
- printf("razip: %s: unknown suffix -- ignored\n", argv[optind]);
- return 1;
- }
- name = strdup(argv[optind]);
- name[strlen(name) - 3] = '\0';
- f_dst = write_open(name, is_forced);
- free(name);
- } else f_dst = fileno(stdout);
- rz = razf_open(argv[optind], "r");
- buffer = malloc(WINDOW_SIZE);
- razf_seek(rz, start, SEEK_SET);
- while(1){
- if(end < 0) c = razf_read(rz, buffer, WINDOW_SIZE);
- else c = razf_read(rz, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));
- if(c <= 0) break;
- start += c;
- write(f_dst, buffer, c);
- if(end >= 0 && start >= end) break;
- }
- free(buffer);
- if (!pstdout) unlink(argv[optind]);
- }
- razf_close(rz);
- return 0;
- }
-}
-
diff --git a/src/samtools-0.1.18/sam.c b/src/samtools-0.1.18/sam.c
deleted file mode 100644
index f026bc8..0000000
--- a/src/samtools-0.1.18/sam.c
+++ /dev/null
@@ -1,179 +0,0 @@
-#include <string.h>
-#include <unistd.h>
-#include "faidx.h"
-#include "sam.h"
-
-#define TYPE_BAM 1
-#define TYPE_READ 2
-
-bam_header_t *bam_header_dup(const bam_header_t *h0)
-{
- bam_header_t *h;
- int i;
- h = bam_header_init();
- *h = *h0;
- h->hash = h->dict = h->rg2lib = 0;
- h->text = (char*)calloc(h->l_text + 1, 1);
- memcpy(h->text, h0->text, h->l_text);
- h->target_len = (uint32_t*)calloc(h->n_targets, 4);
- h->target_name = (char**)calloc(h->n_targets, sizeof(void*));
- for (i = 0; i < h->n_targets; ++i) {
- h->target_len[i] = h0->target_len[i];
- h->target_name[i] = strdup(h0->target_name[i]);
- }
- return h;
-}
-static void append_header_text(bam_header_t *header, char* text, int len)
-{
- int x = header->l_text + 1;
- int y = header->l_text + len + 1; // 1 byte null
- if (text == 0) return;
- kroundup32(x);
- kroundup32(y);
- if (x < y) header->text = (char*)realloc(header->text, y);
- strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here.
- header->l_text += len;
- header->text[header->l_text] = 0;
-}
-
-samfile_t *samopen(const char *fn, const char *mode, const void *aux)
-{
- samfile_t *fp;
- fp = (samfile_t*)calloc(1, sizeof(samfile_t));
- if (strchr(mode, 'r')) { // read
- fp->type |= TYPE_READ;
- if (strchr(mode, 'b')) { // binary
- fp->type |= TYPE_BAM;
- fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");
- if (fp->x.bam == 0) goto open_err_ret;
- fp->header = bam_header_read(fp->x.bam);
- } else { // text
- fp->x.tamr = sam_open(fn);
- if (fp->x.tamr == 0) goto open_err_ret;
- fp->header = sam_header_read(fp->x.tamr);
- if (fp->header->n_targets == 0) { // no @SQ fields
- if (aux) { // check if aux is present
- bam_header_t *textheader = fp->header;
- fp->header = sam_header_read2((const char*)aux);
- if (fp->header == 0) goto open_err_ret;
- append_header_text(fp->header, textheader->text, textheader->l_text);
- bam_header_destroy(textheader);
- }
- if (fp->header->n_targets == 0 && bam_verbose >= 1)
- fprintf(stderr, "[samopen] no @SQ lines in the header.\n");
- } else if (bam_verbose >= 2) fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets);
- }
- } else if (strchr(mode, 'w')) { // write
- fp->header = bam_header_dup((const bam_header_t*)aux);
- if (strchr(mode, 'b')) { // binary
- char bmode[3];
- int i, compress_level = -1;
- for (i = 0; mode[i]; ++i) if (mode[i] >= '0' && mode[i] <= '9') break;
- if (mode[i]) compress_level = mode[i] - '0';
- if (strchr(mode, 'u')) compress_level = 0;
- bmode[0] = 'w'; bmode[1] = compress_level < 0? 0 : compress_level + '0'; bmode[2] = 0;
- fp->type |= TYPE_BAM;
- fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode);
- if (fp->x.bam == 0) goto open_err_ret;
- bam_header_write(fp->x.bam, fp->header);
- } else { // text
- // open file
- fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout;
- if (fp->x.tamr == 0) goto open_err_ret;
- if (strchr(mode, 'X')) fp->type |= BAM_OFSTR<<2;
- else if (strchr(mode, 'x')) fp->type |= BAM_OFHEX<<2;
- else fp->type |= BAM_OFDEC<<2;
- // write header
- if (strchr(mode, 'h')) {
- int i;
- bam_header_t *alt;
- // parse the header text
- alt = bam_header_init();
- alt->l_text = fp->header->l_text; alt->text = fp->header->text;
- sam_header_parse(alt);
- alt->l_text = 0; alt->text = 0;
- // check if there are @SQ lines in the header
- fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw); // FIXME: better to skip the trailing NULL
- if (alt->n_targets) { // then write the header text without dumping ->target_{name,len}
- if (alt->n_targets != fp->header->n_targets && bam_verbose >= 1)
- fprintf(stderr, "[samopen] inconsistent number of target sequences. Output the text header.\n");
- } else { // then dump ->target_{name,len}
- for (i = 0; i < fp->header->n_targets; ++i)
- fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]);
- }
- bam_header_destroy(alt);
- }
- }
- }
- return fp;
-
-open_err_ret:
- free(fp);
- return 0;
-}
-
-void samclose(samfile_t *fp)
-{
- if (fp == 0) return;
- if (fp->header) bam_header_destroy(fp->header);
- if (fp->type & TYPE_BAM) bam_close(fp->x.bam);
- else if (fp->type & TYPE_READ) sam_close(fp->x.tamr);
- else fclose(fp->x.tamw);
- free(fp);
-}
-
-int samread(samfile_t *fp, bam1_t *b)
-{
- if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading
- if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b);
- else return sam_read1(fp->x.tamr, fp->header, b);
-}
-
-int samwrite(samfile_t *fp, const bam1_t *b)
-{
- if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing
- if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b);
- else {
- char *s = bam_format1_core(fp->header, b, fp->type>>2&3);
- int l = strlen(s);
- fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw);
- free(s);
- return l + 1;
- }
-}
-
-int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data)
-{
- bam_plbuf_t *buf;
- int ret;
- bam1_t *b;
- b = bam_init1();
- buf = bam_plbuf_init(func, func_data);
- bam_plbuf_set_mask(buf, mask);
- while ((ret = samread(fp, b)) >= 0)
- bam_plbuf_push(b, buf);
- bam_plbuf_push(0, buf);
- bam_plbuf_destroy(buf);
- bam_destroy1(b);
- return 0;
-}
-
-char *samfaipath(const char *fn_ref)
-{
- char *fn_list = 0;
- if (fn_ref == 0) return 0;
- fn_list = calloc(strlen(fn_ref) + 5, 1);
- strcat(strcpy(fn_list, fn_ref), ".fai");
- if (access(fn_list, R_OK) == -1) { // fn_list is unreadable
- if (access(fn_ref, R_OK) == -1) {
- fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref);
- } else {
- if (bam_verbose >= 3) fprintf(stderr, "[samfaipath] build FASTA index...\n");
- if (fai_build(fn_ref) == -1) {
- fprintf(stderr, "[samfaipath] fail to build FASTA index.\n");
- free(fn_list); fn_list = 0;
- }
- }
- }
- return fn_list;
-}
diff --git a/src/samtools-0.1.18/sam.h b/src/samtools-0.1.18/sam.h
deleted file mode 100644
index 0b87194..0000000
--- a/src/samtools-0.1.18/sam.h
+++ /dev/null
@@ -1,98 +0,0 @@
-#ifndef BAM_SAM_H
-#define BAM_SAM_H
-
-#include "bam.h"
-
-/*!
- @header
-
- This file provides higher level of I/O routines and unifies the APIs
- for SAM and BAM formats. These APIs are more convenient and
- recommended.
-
- @copyright Genome Research Ltd.
- */
-
-/*! @typedef
- @abstract SAM/BAM file handler
- @field type type of the handler; bit 1 for BAM, 2 for reading and bit 3-4 for flag format
- @field bam BAM file handler; valid if (type&1) == 1
- @field tamr SAM file handler for reading; valid if type == 2
- @field tamw SAM file handler for writing; valid if type == 0
- @field header header struct
- */
-typedef struct {
- int type;
- union {
- tamFile tamr;
- bamFile bam;
- FILE *tamw;
- } x;
- bam_header_t *header;
-} samfile_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- /*!
- @abstract Open a SAM/BAM file
-
- @param fn SAM/BAM file name; "-" is recognized as stdin (for
- reading) or stdout (for writing).
-
- @param mode open mode /[rw](b?)(u?)(h?)([xX]?)/: 'r' for reading,
- 'w' for writing, 'b' for BAM I/O, 'u' for uncompressed BAM output,
- 'h' for outputing header in SAM, 'x' for HEX flag and 'X' for
- string flag. If 'b' present, it must immediately follow 'r' or
- 'w'. Valid modes are "r", "w", "wh", "wx", "whx", "wX", "whX",
- "rb", "wb" and "wbu" exclusively.
-
- @param aux auxiliary data; if mode[0]=='w', aux points to
- bam_header_t; if strcmp(mode, "rb")!=0 and @SQ header lines in SAM
- are absent, aux points the file name of the list of the reference;
- aux is not used otherwise. If @SQ header lines are present in SAM,
- aux is not used, either.
-
- @return SAM/BAM file handler
- */
- samfile_t *samopen(const char *fn, const char *mode, const void *aux);
-
- /*!
- @abstract Close a SAM/BAM handler
- @param fp file handler to be closed
- */
- void samclose(samfile_t *fp);
-
- /*!
- @abstract Read one alignment
- @param fp file handler
- @param b alignment
- @return bytes read
- */
- int samread(samfile_t *fp, bam1_t *b);
-
- /*!
- @abstract Write one alignment
- @param fp file handler
- @param b alignment
- @return bytes written
- */
- int samwrite(samfile_t *fp, const bam1_t *b);
-
- /*!
- @abstract Get the pileup for a whole alignment file
- @param fp file handler
- @param mask mask transferred to bam_plbuf_set_mask()
- @param func user defined function called in the pileup process
- #param data user provided data for func()
- */
- int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *data);
-
- char *samfaipath(const char *fn_ref);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/samtools-0.1.18/sam_header.c b/src/samtools-0.1.18/sam_header.c
deleted file mode 100644
index f4c8a3b..0000000
--- a/src/samtools-0.1.18/sam_header.c
+++ /dev/null
@@ -1,736 +0,0 @@
-#include "sam_header.h"
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdarg.h>
-
-#include "khash.h"
-KHASH_MAP_INIT_STR(str, const char *)
-
-struct _HeaderList
-{
- struct _HeaderList *last; // Hack: Used and maintained only by list_append_to_end. Maintained in the root node only.
- struct _HeaderList *next;
- void *data;
-};
-typedef struct _HeaderList list_t;
-typedef list_t HeaderDict;
-
-typedef struct
-{
- char key[2];
- char *value;
-}
-HeaderTag;
-
-typedef struct
-{
- char type[2];
- list_t *tags;
-}
-HeaderLine;
-
-const char *o_hd_tags[] = {"SO","GO",NULL};
-const char *r_hd_tags[] = {"VN",NULL};
-
-const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL};
-const char *r_sq_tags[] = {"SN","LN",NULL};
-const char *u_sq_tags[] = {"SN",NULL};
-
-const char *o_rg_tags[] = {"CN","DS","DT","FO","KS","LB","PG","PI","PL","PU","SM",NULL};
-const char *r_rg_tags[] = {"ID",NULL};
-const char *u_rg_tags[] = {"ID",NULL};
-
-const char *o_pg_tags[] = {"VN","CL",NULL};
-const char *r_pg_tags[] = {"ID",NULL};
-
-const char *types[] = {"HD","SQ","RG","PG","CO",NULL};
-const char **optional_tags[] = {o_hd_tags,o_sq_tags,o_rg_tags,o_pg_tags,NULL,NULL};
-const char **required_tags[] = {r_hd_tags,r_sq_tags,r_rg_tags,r_pg_tags,NULL,NULL};
-const char **unique_tags[] = {NULL, u_sq_tags,u_rg_tags,NULL,NULL,NULL};
-
-
-static void debug(const char *format, ...)
-{
- va_list ap;
- va_start(ap, format);
- vfprintf(stderr, format, ap);
- va_end(ap);
-}
-
-#if 0
-// Replaced by list_append_to_end
-static list_t *list_prepend(list_t *root, void *data)
-{
- list_t *l = malloc(sizeof(list_t));
- l->next = root;
- l->data = data;
- return l;
-}
-#endif
-
-// Relies on the root->last being correct. Do not use with the other list_*
-// routines unless they are fixed to modify root->last as well.
-static list_t *list_append_to_end(list_t *root, void *data)
-{
- list_t *l = malloc(sizeof(list_t));
- l->last = l;
- l->next = NULL;
- l->data = data;
-
- if ( !root )
- return l;
-
- root->last->next = l;
- root->last = l;
- return root;
-}
-
-static list_t *list_append(list_t *root, void *data)
-{
- list_t *l = root;
- while (l && l->next)
- l = l->next;
- if ( l )
- {
- l->next = malloc(sizeof(list_t));
- l = l->next;
- }
- else
- {
- l = malloc(sizeof(list_t));
- root = l;
- }
- l->data = data;
- l->next = NULL;
- return root;
-}
-
-static void list_free(list_t *root)
-{
- list_t *l = root;
- while (root)
- {
- l = root;
- root = root->next;
- free(l);
- }
-}
-
-
-
-// Look for a tag "XY" in a predefined const char *[] array.
-static int tag_exists(const char *tag, const char **tags)
-{
- int itag=0;
- if ( !tags ) return -1;
- while ( tags[itag] )
- {
- if ( tags[itag][0]==tag[0] && tags[itag][1]==tag[1] ) return itag;
- itag++;
- }
- return -1;
-}
-
-
-
-// Mimics the behaviour of getline, except it returns pointer to the next chunk of the text
-// or NULL if everything has been read. The lineptr should be freed by the caller. The
-// newline character is stripped.
-static const char *nextline(char **lineptr, size_t *n, const char *text)
-{
- int len;
- const char *to = text;
-
- if ( !*to ) return NULL;
-
- while ( *to && *to!='\n' && *to!='\r' ) to++;
- len = to - text + 1;
-
- if ( *to )
- {
- // Advance the pointer for the next call
- if ( *to=='\n' ) to++;
- else if ( *to=='\r' && *(to+1)=='\n' ) to+=2;
- }
- if ( !len )
- return to;
-
- if ( !*lineptr )
- {
- *lineptr = malloc(len);
- *n = len;
- }
- else if ( *n<len )
- {
- *lineptr = realloc(*lineptr, len);
- *n = len;
- }
- if ( !*lineptr ) {
- debug("[nextline] Insufficient memory!\n");
- return 0;
- }
-
- memcpy(*lineptr,text,len);
- (*lineptr)[len-1] = 0;
-
- return to;
-}
-
-// name points to "XY", value_from points to the first character of the value string and
-// value_to points to the last character of the value string.
-static HeaderTag *new_tag(const char *name, const char *value_from, const char *value_to)
-{
- HeaderTag *tag = malloc(sizeof(HeaderTag));
- int len = value_to-value_from+1;
-
- tag->key[0] = name[0];
- tag->key[1] = name[1];
- tag->value = malloc(len+1);
- memcpy(tag->value,value_from,len+1);
- tag->value[len] = 0;
- return tag;
-}
-
-static HeaderTag *header_line_has_tag(HeaderLine *hline, const char *key)
-{
- list_t *tags = hline->tags;
- while (tags)
- {
- HeaderTag *tag = tags->data;
- if ( tag->key[0]==key[0] && tag->key[1]==key[1] ) return tag;
- tags = tags->next;
- }
- return NULL;
-}
-
-
-// Return codes:
-// 0 .. different types or unique tags differ or conflicting tags, cannot be merged
-// 1 .. all tags identical -> no need to merge, drop one
-// 2 .. the unique tags match and there are some conflicting tags (same tag, different value) -> error, cannot be merged nor duplicated
-// 3 .. there are some missing complementary tags and no unique conflict -> can be merged into a single line
-static int sam_header_compare_lines(HeaderLine *hline1, HeaderLine *hline2)
-{
- HeaderTag *t1, *t2;
-
- if ( hline1->type[0]!=hline2->type[0] || hline1->type[1]!=hline2->type[1] )
- return 0;
-
- int itype = tag_exists(hline1->type,types);
- if ( itype==-1 ) {
- debug("[sam_header_compare_lines] Unknown type [%c%c]\n", hline1->type[0],hline1->type[1]);
- return -1; // FIXME (lh3): error; I do not know how this will be handled in Petr's code
- }
-
- if ( unique_tags[itype] )
- {
- t1 = header_line_has_tag(hline1,unique_tags[itype][0]);
- t2 = header_line_has_tag(hline2,unique_tags[itype][0]);
- if ( !t1 || !t2 ) // this should never happen, the unique tags are required
- return 2;
-
- if ( strcmp(t1->value,t2->value) )
- return 0; // the unique tags differ, cannot be merged
- }
- if ( !required_tags[itype] && !optional_tags[itype] )
- {
- t1 = hline1->tags->data;
- t2 = hline2->tags->data;
- if ( !strcmp(t1->value,t2->value) ) return 1; // identical comments
- return 0;
- }
-
- int missing=0, itag=0;
- while ( required_tags[itype] && required_tags[itype][itag] )
- {
- t1 = header_line_has_tag(hline1,required_tags[itype][itag]);
- t2 = header_line_has_tag(hline2,required_tags[itype][itag]);
- if ( !t1 && !t2 )
- return 2; // this should never happen
- else if ( !t1 || !t2 )
- missing = 1; // there is some tag missing in one of the hlines
- else if ( strcmp(t1->value,t2->value) )
- {
- if ( unique_tags[itype] )
- return 2; // the lines have a matching unique tag but have a conflicting tag
-
- return 0; // the lines contain conflicting tags, cannot be merged
- }
- itag++;
- }
- itag = 0;
- while ( optional_tags[itype] && optional_tags[itype][itag] )
- {
- t1 = header_line_has_tag(hline1,optional_tags[itype][itag]);
- t2 = header_line_has_tag(hline2,optional_tags[itype][itag]);
- if ( !t1 && !t2 )
- {
- itag++;
- continue;
- }
- if ( !t1 || !t2 )
- missing = 1; // there is some tag missing in one of the hlines
- else if ( strcmp(t1->value,t2->value) )
- {
- if ( unique_tags[itype] )
- return 2; // the lines have a matching unique tag but have a conflicting tag
-
- return 0; // the lines contain conflicting tags, cannot be merged
- }
- itag++;
- }
- if ( missing ) return 3; // there are some missing complementary tags with no conflicts, can be merged
- return 1;
-}
-
-
-static HeaderLine *sam_header_line_clone(const HeaderLine *hline)
-{
- list_t *tags;
- HeaderLine *out = malloc(sizeof(HeaderLine));
- out->type[0] = hline->type[0];
- out->type[1] = hline->type[1];
- out->tags = NULL;
-
- tags = hline->tags;
- while (tags)
- {
- HeaderTag *old = tags->data;
-
- HeaderTag *new = malloc(sizeof(HeaderTag));
- new->key[0] = old->key[0];
- new->key[1] = old->key[1];
- new->value = strdup(old->value);
- out->tags = list_append(out->tags, new);
-
- tags = tags->next;
- }
- return out;
-}
-
-static int sam_header_line_merge_with(HeaderLine *out_hline, const HeaderLine *tmpl_hline)
-{
- list_t *tmpl_tags;
-
- if ( out_hline->type[0]!=tmpl_hline->type[0] || out_hline->type[1]!=tmpl_hline->type[1] )
- return 0;
-
- tmpl_tags = tmpl_hline->tags;
- while (tmpl_tags)
- {
- HeaderTag *tmpl_tag = tmpl_tags->data;
- HeaderTag *out_tag = header_line_has_tag(out_hline, tmpl_tag->key);
- if ( !out_tag )
- {
- HeaderTag *tag = malloc(sizeof(HeaderTag));
- tag->key[0] = tmpl_tag->key[0];
- tag->key[1] = tmpl_tag->key[1];
- tag->value = strdup(tmpl_tag->value);
- out_hline->tags = list_append(out_hline->tags,tag);
- }
- tmpl_tags = tmpl_tags->next;
- }
- return 1;
-}
-
-
-static HeaderLine *sam_header_line_parse(const char *headerLine)
-{
- HeaderLine *hline;
- HeaderTag *tag;
- const char *from, *to;
- from = headerLine;
-
- if ( *from != '@' ) {
- debug("[sam_header_line_parse] expected '@', got [%s]\n", headerLine);
- return 0;
- }
- to = ++from;
-
- while (*to && *to!='\t') to++;
- if ( to-from != 2 ) {
- debug("[sam_header_line_parse] expected '@XY', got [%s]\nHint: The header tags must be tab-separated.\n", headerLine);
- return 0;
- }
-
- hline = malloc(sizeof(HeaderLine));
- hline->type[0] = from[0];
- hline->type[1] = from[1];
- hline->tags = NULL;
-
- int itype = tag_exists(hline->type, types);
-
- from = to;
- while (*to && *to=='\t') to++;
- if ( to-from != 1 ) {
- debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from));
- return 0;
- }
- from = to;
- while (*from)
- {
- while (*to && *to!='\t') to++;
-
- if ( !required_tags[itype] && !optional_tags[itype] )
- {
- // CO is a special case, it can contain anything, including tabs
- if ( *to ) { to++; continue; }
- tag = new_tag(" ",from,to-1);
- }
- else
- tag = new_tag(from,from+3,to-1);
-
- if ( header_line_has_tag(hline,tag->key) )
- debug("The tag '%c%c' present (at least) twice on line [%s]\n", tag->key[0],tag->key[1], headerLine);
- hline->tags = list_append(hline->tags, tag);
-
- from = to;
- while (*to && *to=='\t') to++;
- if ( *to && to-from != 1 ) {
- debug("[sam_header_line_parse] multiple tabs on line [%s] (%d)\n", headerLine,(int)(to-from));
- return 0;
- }
-
- from = to;
- }
- return hline;
-}
-
-
-// Must be of an existing type, all tags must be recognised and all required tags must be present
-static int sam_header_line_validate(HeaderLine *hline)
-{
- list_t *tags;
- HeaderTag *tag;
- int itype, itag;
-
- // Is the type correct?
- itype = tag_exists(hline->type, types);
- if ( itype==-1 )
- {
- debug("The type [%c%c] not recognised.\n", hline->type[0],hline->type[1]);
- return 0;
- }
-
- // Has all required tags?
- itag = 0;
- while ( required_tags[itype] && required_tags[itype][itag] )
- {
- if ( !header_line_has_tag(hline,required_tags[itype][itag]) )
- {
- debug("The tag [%c%c] required for [%c%c] not present.\n", required_tags[itype][itag][0],required_tags[itype][itag][1],
- hline->type[0],hline->type[1]);
- return 0;
- }
- itag++;
- }
-
- // Are all tags recognised?
- tags = hline->tags;
- while ( tags )
- {
- tag = tags->data;
- if ( !tag_exists(tag->key,required_tags[itype]) && !tag_exists(tag->key,optional_tags[itype]) )
- {
- debug("Unknown tag [%c%c] for [%c%c].\n", tag->key[0],tag->key[1], hline->type[0],hline->type[1]);
- return 0;
- }
- tags = tags->next;
- }
-
- return 1;
-}
-
-
-static void print_header_line(FILE *fp, HeaderLine *hline)
-{
- list_t *tags = hline->tags;
- HeaderTag *tag;
-
- fprintf(fp, "@%c%c", hline->type[0],hline->type[1]);
- while (tags)
- {
- tag = tags->data;
-
- fprintf(fp, "\t");
- if ( tag->key[0]!=' ' || tag->key[1]!=' ' )
- fprintf(fp, "%c%c:", tag->key[0],tag->key[1]);
- fprintf(fp, "%s", tag->value);
-
- tags = tags->next;
- }
- fprintf(fp,"\n");
-}
-
-
-static void sam_header_line_free(HeaderLine *hline)
-{
- list_t *tags = hline->tags;
- while (tags)
- {
- HeaderTag *tag = tags->data;
- free(tag->value);
- free(tag);
- tags = tags->next;
- }
- list_free(hline->tags);
- free(hline);
-}
-
-void sam_header_free(void *_header)
-{
- HeaderDict *header = (HeaderDict*)_header;
- list_t *hlines = header;
- while (hlines)
- {
- sam_header_line_free(hlines->data);
- hlines = hlines->next;
- }
- list_free(header);
-}
-
-HeaderDict *sam_header_clone(const HeaderDict *dict)
-{
- HeaderDict *out = NULL;
- while (dict)
- {
- HeaderLine *hline = dict->data;
- out = list_append(out, sam_header_line_clone(hline));
- dict = dict->next;
- }
- return out;
-}
-
-// Returns a newly allocated string
-char *sam_header_write(const void *_header)
-{
- const HeaderDict *header = (const HeaderDict*)_header;
- char *out = NULL;
- int len=0, nout=0;
- const list_t *hlines;
-
- // Calculate the length of the string to allocate
- hlines = header;
- while (hlines)
- {
- len += 4; // @XY and \n
-
- HeaderLine *hline = hlines->data;
- list_t *tags = hline->tags;
- while (tags)
- {
- HeaderTag *tag = tags->data;
- len += strlen(tag->value) + 1; // \t
- if ( tag->key[0]!=' ' || tag->key[1]!=' ' )
- len += strlen(tag->value) + 3; // XY:
- tags = tags->next;
- }
- hlines = hlines->next;
- }
-
- nout = 0;
- out = malloc(len+1);
- hlines = header;
- while (hlines)
- {
- HeaderLine *hline = hlines->data;
-
- nout += sprintf(out+nout,"@%c%c",hline->type[0],hline->type[1]);
-
- list_t *tags = hline->tags;
- while (tags)
- {
- HeaderTag *tag = tags->data;
- nout += sprintf(out+nout,"\t");
- if ( tag->key[0]!=' ' || tag->key[1]!=' ' )
- nout += sprintf(out+nout,"%c%c:", tag->key[0],tag->key[1]);
- nout += sprintf(out+nout,"%s", tag->value);
- tags = tags->next;
- }
- hlines = hlines->next;
- nout += sprintf(out+nout,"\n");
- }
- out[len] = 0;
- return out;
-}
-
-void *sam_header_parse2(const char *headerText)
-{
- list_t *hlines = NULL;
- HeaderLine *hline;
- const char *text;
- char *buf=NULL;
- size_t nbuf = 0;
- int tovalidate = 0;
-
- if ( !headerText )
- return 0;
-
- text = headerText;
- while ( (text=nextline(&buf, &nbuf, text)) )
- {
- hline = sam_header_line_parse(buf);
- if ( hline && (!tovalidate || sam_header_line_validate(hline)) )
- // With too many (~250,000) reference sequences the header parsing was too slow with list_append.
- hlines = list_append_to_end(hlines, hline);
- else
- {
- if (hline) sam_header_line_free(hline);
- sam_header_free(hlines);
- if ( buf ) free(buf);
- return NULL;
- }
- }
- if ( buf ) free(buf);
-
- return hlines;
-}
-
-void *sam_header2tbl(const void *_dict, char type[2], char key_tag[2], char value_tag[2])
-{
- const HeaderDict *dict = (const HeaderDict*)_dict;
- const list_t *l = dict;
- khash_t(str) *tbl = kh_init(str);
- khiter_t k;
- int ret;
-
- if (_dict == 0) return tbl; // return an empty (not null) hash table
- while (l)
- {
- HeaderLine *hline = l->data;
- if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] )
- {
- l = l->next;
- continue;
- }
-
- HeaderTag *key, *value;
- key = header_line_has_tag(hline,key_tag);
- value = header_line_has_tag(hline,value_tag);
- if ( !key || !value )
- {
- l = l->next;
- continue;
- }
-
- k = kh_get(str, tbl, key->value);
- if ( k != kh_end(tbl) )
- debug("[sam_header_lookup_table] They key %s not unique.\n", key->value);
- k = kh_put(str, tbl, key->value, &ret);
- kh_value(tbl, k) = value->value;
-
- l = l->next;
- }
- return tbl;
-}
-
-char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n)
-{
- const HeaderDict *dict = (const HeaderDict*)_dict;
- const list_t *l = dict;
- int max, n;
- char **ret;
-
- ret = 0; *_n = max = n = 0;
- while (l)
- {
- HeaderLine *hline = l->data;
- if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] )
- {
- l = l->next;
- continue;
- }
-
- HeaderTag *key;
- key = header_line_has_tag(hline,key_tag);
- if ( !key )
- {
- l = l->next;
- continue;
- }
-
- if (n == max) {
- max = max? max<<1 : 4;
- ret = realloc(ret, max * sizeof(void*));
- }
- ret[n++] = key->value;
-
- l = l->next;
- }
- *_n = n;
- return ret;
-}
-
-const char *sam_tbl_get(void *h, const char *key)
-{
- khash_t(str) *tbl = (khash_t(str)*)h;
- khint_t k;
- k = kh_get(str, tbl, key);
- return k == kh_end(tbl)? 0 : kh_val(tbl, k);
-}
-
-int sam_tbl_size(void *h)
-{
- khash_t(str) *tbl = (khash_t(str)*)h;
- return h? kh_size(tbl) : 0;
-}
-
-void sam_tbl_destroy(void *h)
-{
- khash_t(str) *tbl = (khash_t(str)*)h;
- kh_destroy(str, tbl);
-}
-
-void *sam_header_merge(int n, const void **_dicts)
-{
- const HeaderDict **dicts = (const HeaderDict**)_dicts;
- HeaderDict *out_dict;
- int idict, status;
-
- if ( n<2 ) return NULL;
-
- out_dict = sam_header_clone(dicts[0]);
-
- for (idict=1; idict<n; idict++)
- {
- const list_t *tmpl_hlines = dicts[idict];
-
- while ( tmpl_hlines )
- {
- list_t *out_hlines = out_dict;
- int inserted = 0;
- while ( out_hlines )
- {
- status = sam_header_compare_lines(tmpl_hlines->data, out_hlines->data);
- if ( status==0 )
- {
- out_hlines = out_hlines->next;
- continue;
- }
-
- if ( status==2 )
- {
- print_header_line(stderr,tmpl_hlines->data);
- print_header_line(stderr,out_hlines->data);
- debug("Conflicting lines, cannot merge the headers.\n");
- return 0;
- }
- if ( status==3 )
- sam_header_line_merge_with(out_hlines->data, tmpl_hlines->data);
-
- inserted = 1;
- break;
- }
- if ( !inserted )
- out_dict = list_append(out_dict, sam_header_line_clone(tmpl_hlines->data));
-
- tmpl_hlines = tmpl_hlines->next;
- }
- }
-
- return out_dict;
-}
-
-
diff --git a/src/samtools-0.1.18/sam_header.h b/src/samtools-0.1.18/sam_header.h
deleted file mode 100644
index e5c754f..0000000
--- a/src/samtools-0.1.18/sam_header.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef __SAM_HEADER_H__
-#define __SAM_HEADER_H__
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
- void *sam_header_parse2(const char *headerText);
- void *sam_header_merge(int n, const void **dicts);
- void sam_header_free(void *header);
- char *sam_header_write(const void *headerDict); // returns a newly allocated string
-
- char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n);
-
- void *sam_header2tbl(const void *dict, char type[2], char key_tag[2], char value_tag[2]);
- const char *sam_tbl_get(void *h, const char *key);
- int sam_tbl_size(void *h);
- void sam_tbl_destroy(void *h);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/samtools-0.1.18/sam_view.c b/src/samtools-0.1.18/sam_view.c
deleted file mode 100644
index efda4e8..0000000
--- a/src/samtools-0.1.18/sam_view.c
+++ /dev/null
@@ -1,406 +0,0 @@
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <math.h>
-#include "sam_header.h"
-#include "sam.h"
-#include "faidx.h"
-#include "kstring.h"
-#include "khash.h"
-KHASH_SET_INIT_STR(rg)
-
-// When counting records instead of printing them,
-// data passed to the bam_fetch callback is encapsulated in this struct.
-typedef struct {
- bam_header_t *header;
- int *count;
-} count_func_data_t;
-
-typedef khash_t(rg) *rghash_t;
-
-// FIXME: we'd better use no global variables...
-static rghash_t g_rghash = 0;
-static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0;
-static float g_subsam = -1;
-static char *g_library, *g_rg;
-static void *g_bed;
-
-void *bed_read(const char *fn);
-void bed_destroy(void *_h);
-int bed_overlap(const void *_h, const char *chr, int beg, int end);
-
-static inline int __g_skip_aln(const bam_header_t *h, const bam1_t *b)
-{
- if (b->core.qual < g_min_mapQ || ((b->core.flag & g_flag_on) != g_flag_on) || (b->core.flag & g_flag_off))
- return 1;
- if (g_bed && b->core.tid >= 0 && !bed_overlap(g_bed, h->target_name[b->core.tid], b->core.pos, bam_calend(&b->core, bam1_cigar(b))))
- return 1;
- if (g_subsam > 0.) {
- int x = (int)(g_subsam + .499);
- uint32_t k = __ac_X31_hash_string(bam1_qname(b)) + x;
- if (k%1024 / 1024.0 >= g_subsam - x) return 1;
- }
- if (g_rg || g_rghash) {
- uint8_t *s = bam_aux_get(b, "RG");
- if (s) {
- if (g_rg) return (strcmp(g_rg, (char*)(s + 1)) == 0)? 0 : 1;
- if (g_rghash) {
- khint_t k = kh_get(rg, g_rghash, (char*)(s + 1));
- return (k != kh_end(g_rghash))? 0 : 1;
- }
- }
- }
- if (g_library) {
- const char *p = bam_get_library((bam_header_t*)h, b);
- return (p && strcmp(p, g_library) == 0)? 0 : 1;
- }
- return 0;
-}
-
-static char *drop_rg(char *hdtxt, rghash_t h, int *len)
-{
- char *p = hdtxt, *q, *r, *s;
- kstring_t str;
- memset(&str, 0, sizeof(kstring_t));
- while (1) {
- int toprint = 0;
- q = strchr(p, '\n');
- if (q == 0) q = p + strlen(p);
- if (q - p < 3) break; // the line is too short; then stop
- if (strncmp(p, "@RG\t", 4) == 0) {
- int c;
- khint_t k;
- if ((r = strstr(p, "\tID:")) != 0) {
- r += 4;
- for (s = r; *s != '\0' && *s != '\n' && *s != '\t'; ++s);
- c = *s; *s = '\0';
- k = kh_get(rg, h, r);
- *s = c;
- if (k != kh_end(h)) toprint = 1;
- }
- } else toprint = 1;
- if (toprint) {
- kputsn(p, q - p, &str); kputc('\n', &str);
- }
- p = q + 1;
- }
- *len = str.l;
- return str.s;
-}
-
-// callback function for bam_fetch() that prints nonskipped records
-static int view_func(const bam1_t *b, void *data)
-{
- if (!__g_skip_aln(((samfile_t*)data)->header, b))
- samwrite((samfile_t*)data, b);
- return 0;
-}
-
-// callback function for bam_fetch() that counts nonskipped records
-static int count_func(const bam1_t *b, void *data)
-{
- if (!__g_skip_aln(((count_func_data_t*)data)->header, b)) {
- (*((count_func_data_t*)data)->count)++;
- }
- return 0;
-}
-
-static int usage(int is_long_help);
-
-int main_samview(int argc, char *argv[])
-{
- int c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, compress_level = -1, is_bamout = 0, is_count = 0;
- int of_type = BAM_OFDEC, is_long_help = 0;
- int count = 0;
- samfile_t *in = 0, *out = 0;
- char in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0, *fn_ref = 0, *fn_rg = 0;
-
- /* parse command-line options */
- strcpy(in_mode, "r"); strcpy(out_mode, "w");
- while ((c = getopt(argc, argv, "Sbct:h1Ho:q:f:F:ul:r:xX?T:R:L:s:")) >= 0) {
- switch (c) {
- case 's': g_subsam = atof(optarg); break;
- case 'c': is_count = 1; break;
- case 'S': is_bamin = 0; break;
- case 'b': is_bamout = 1; break;
- case 't': fn_list = strdup(optarg); is_bamin = 0; break;
- case 'h': is_header = 1; break;
- case 'H': is_header_only = 1; break;
- case 'o': fn_out = strdup(optarg); break;
- case 'f': g_flag_on = strtol(optarg, 0, 0); break;
- case 'F': g_flag_off = strtol(optarg, 0, 0); break;
- case 'q': g_min_mapQ = atoi(optarg); break;
- case 'u': compress_level = 0; break;
- case '1': compress_level = 1; break;
- case 'l': g_library = strdup(optarg); break;
- case 'L': g_bed = bed_read(optarg); break;
- case 'r': g_rg = strdup(optarg); break;
- case 'R': fn_rg = strdup(optarg); break;
- case 'x': of_type = BAM_OFHEX; break;
- case 'X': of_type = BAM_OFSTR; break;
- case '?': is_long_help = 1; break;
- case 'T': fn_ref = strdup(optarg); is_bamin = 0; break;
- default: return usage(is_long_help);
- }
- }
- if (compress_level >= 0) is_bamout = 1;
- if (is_header_only) is_header = 1;
- if (is_bamout) strcat(out_mode, "b");
- else {
- if (of_type == BAM_OFHEX) strcat(out_mode, "x");
- else if (of_type == BAM_OFSTR) strcat(out_mode, "X");
- }
- if (is_bamin) strcat(in_mode, "b");
- if (is_header) strcat(out_mode, "h");
- if (compress_level >= 0) {
- char tmp[2];
- tmp[0] = compress_level + '0'; tmp[1] = '\0';
- strcat(out_mode, tmp);
- }
- if (argc == optind) return usage(is_long_help); // potential memory leak...
-
- // read the list of read groups
- if (fn_rg) {
- FILE *fp_rg;
- char buf[1024];
- int ret;
- g_rghash = kh_init(rg);
- fp_rg = fopen(fn_rg, "r");
- while (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but bear me...
- kh_put(rg, g_rghash, strdup(buf), &ret); // we'd better check duplicates...
- fclose(fp_rg);
- }
-
- // generate the fn_list if necessary
- if (fn_list == 0 && fn_ref) fn_list = samfaipath(fn_ref);
- // open file handlers
- if ((in = samopen(argv[optind], in_mode, fn_list)) == 0) {
- fprintf(stderr, "[main_samview] fail to open \"%s\" for reading.\n", argv[optind]);
- ret = 1;
- goto view_end;
- }
- if (in->header == 0) {
- fprintf(stderr, "[main_samview] fail to read the header from \"%s\".\n", argv[optind]);
- ret = 1;
- goto view_end;
- }
- if (g_rghash) { // FIXME: I do not know what "bam_header_t::n_text" is for...
- char *tmp;
- int l;
- tmp = drop_rg(in->header->text, g_rghash, &l);
- free(in->header->text);
- in->header->text = tmp;
- in->header->l_text = l;
- }
- if (!is_count && (out = samopen(fn_out? fn_out : "-", out_mode, in->header)) == 0) {
- fprintf(stderr, "[main_samview] fail to open \"%s\" for writing.\n", fn_out? fn_out : "standard output");
- ret = 1;
- goto view_end;
- }
- if (is_header_only) goto view_end; // no need to print alignments
-
- if (argc == optind + 1) { // convert/print the entire file
- bam1_t *b = bam_init1();
- int r;
- while ((r = samread(in, b)) >= 0) { // read one alignment from `in'
- if (!__g_skip_aln(in->header, b)) {
- if (!is_count) samwrite(out, b); // write the alignment to `out'
- count++;
- }
- }
- if (r < -1) {
- fprintf(stderr, "[main_samview] truncated file.\n");
- ret = 1;
- }
- bam_destroy1(b);
- } else { // retrieve alignments in specified regions
- int i;
- bam_index_t *idx = 0;
- if (is_bamin) idx = bam_index_load(argv[optind]); // load BAM index
- if (idx == 0) { // index is unavailable
- fprintf(stderr, "[main_samview] random alignment retrieval only works for indexed BAM files.\n");
- ret = 1;
- goto view_end;
- }
- for (i = optind + 1; i < argc; ++i) {
- int tid, beg, end, result;
- bam_parse_region(in->header, argv[i], &tid, &beg, &end); // parse a region in the format like `chr2:100-200'
- if (tid < 0) { // reference name is not found
- fprintf(stderr, "[main_samview] region \"%s\" specifies an unknown reference name. Continue anyway.\n", argv[i]);
- continue;
- }
- // fetch alignments
- if (is_count) {
- count_func_data_t count_data = { in->header, &count };
- result = bam_fetch(in->x.bam, idx, tid, beg, end, &count_data, count_func);
- } else
- result = bam_fetch(in->x.bam, idx, tid, beg, end, out, view_func);
- if (result < 0) {
- fprintf(stderr, "[main_samview] retrieval of region \"%s\" failed due to truncated file or corrupt BAM index file\n", argv[i]);
- ret = 1;
- break;
- }
- }
- bam_index_destroy(idx); // destroy the BAM index
- }
-
-view_end:
- if (is_count && ret == 0) {
- printf("%d\n", count);
- }
- // close files, free and return
- free(fn_list); free(fn_ref); free(fn_out); free(g_library); free(g_rg); free(fn_rg);
- if (g_bed) bed_destroy(g_bed);
- if (g_rghash) {
- khint_t k;
- for (k = 0; k < kh_end(g_rghash); ++k)
- if (kh_exist(g_rghash, k)) free((char*)kh_key(g_rghash, k));
- kh_destroy(rg, g_rghash);
- }
- samclose(in);
- if (!is_count)
- samclose(out);
- return ret;
-}
-
-static int usage(int is_long_help)
-{
- fprintf(stderr, "\n");
- fprintf(stderr, "Usage: samtools view [options] <in.bam>|<in.sam> [region1 [...]]\n\n");
- fprintf(stderr, "Options: -b output BAM\n");
- fprintf(stderr, " -h print header for the SAM output\n");
- fprintf(stderr, " -H print header only (no alignments)\n");
- fprintf(stderr, " -S input is SAM\n");
- fprintf(stderr, " -u uncompressed BAM output (force -b)\n");
- fprintf(stderr, " -1 fast compression (force -b)\n");
- fprintf(stderr, " -x output FLAG in HEX (samtools-C specific)\n");
- fprintf(stderr, " -X output FLAG in string (samtools-C specific)\n");
- fprintf(stderr, " -c print only the count of matching records\n");
- fprintf(stderr, " -L FILE output alignments overlapping the input BED FILE [null]\n");
- fprintf(stderr, " -t FILE list of reference names and lengths (force -S) [null]\n");
- fprintf(stderr, " -T FILE reference sequence file (force -S) [null]\n");
- fprintf(stderr, " -o FILE output file name [stdout]\n");
- fprintf(stderr, " -R FILE list of read groups to be outputted [null]\n");
- fprintf(stderr, " -f INT required flag, 0 for unset [0]\n");
- fprintf(stderr, " -F INT filtering flag, 0 for unset [0]\n");
- fprintf(stderr, " -q INT minimum mapping quality [0]\n");
- fprintf(stderr, " -l STR only output reads in library STR [null]\n");
- fprintf(stderr, " -r STR only output reads in read group STR [null]\n");
- fprintf(stderr, " -s FLOAT fraction of templates to subsample; integer part as seed [-1]\n");
- fprintf(stderr, " -? longer help\n");
- fprintf(stderr, "\n");
- if (is_long_help)
- fprintf(stderr, "Notes:\n\
-\n\
- 1. By default, this command assumes the file on the command line is in\n\
- the BAM format and it prints the alignments in SAM. If `-t' is\n\
- applied, the input file is assumed to be in the SAM format. The\n\
- file supplied with `-t' is SPACE/TAB delimited with the first two\n\
- fields of each line consisting of the reference name and the\n\
- corresponding sequence length. The `.fai' file generated by `faidx'\n\
- can be used here. This file may be empty if reads are unaligned.\n\
-\n\
- 2. SAM->BAM conversion: `samtools view -bT ref.fa in.sam.gz'.\n\
-\n\
- 3. BAM->SAM conversion: `samtools view in.bam'.\n\
-\n\
- 4. A region should be presented in one of the following formats:\n\
- `chr1', `chr2:1,000' and `chr3:1000-2,000'. When a region is\n\
- specified, the input alignment file must be an indexed BAM file.\n\
-\n\
- 5. Option `-u' is preferred over `-b' when the output is piped to\n\
- another samtools command.\n\
-\n\
- 6. In a string FLAG, each character represents one bit with\n\
- p=0x1 (paired), P=0x2 (properly paired), u=0x4 (unmapped),\n\
- U=0x8 (mate unmapped), r=0x10 (reverse), R=0x20 (mate reverse)\n\
- 1=0x40 (first), 2=0x80 (second), s=0x100 (not primary), \n\
- f=0x200 (failure) and d=0x400 (duplicate). Note that `-x' and\n\
- `-X' are samtools-C specific. Picard and older samtools do not\n\
- support HEX or string flags.\n\
-\n");
- return 1;
-}
-
-int main_import(int argc, char *argv[])
-{
- int argc2, ret;
- char **argv2;
- if (argc != 4) {
- fprintf(stderr, "Usage: bamtk import <in.ref_list> <in.sam> <out.bam>\n");
- return 1;
- }
- argc2 = 6;
- argv2 = calloc(6, sizeof(char*));
- argv2[0] = "import", argv2[1] = "-o", argv2[2] = argv[3], argv2[3] = "-bt", argv2[4] = argv[1], argv2[5] = argv[2];
- ret = main_samview(argc2, argv2);
- free(argv2);
- return ret;
-}
-
-int8_t seq_comp_table[16] = { 0, 8, 4, 12, 2, 10, 9, 14, 1, 6, 5, 13, 3, 11, 7, 15 };
-
-int main_bam2fq(int argc, char *argv[])
-{
- bamFile fp;
- bam_header_t *h;
- bam1_t *b;
- int8_t *buf;
- int max_buf;
- if (argc == 1) {
- fprintf(stderr, "Usage: samtools bam2fq <in.bam>\n");
- return 1;
- }
- fp = strcmp(argv[1], "-")? bam_open(argv[1], "r") : bam_dopen(fileno(stdin), "r");
- if (fp == 0) return 1;
- h = bam_header_read(fp);
- b = bam_init1();
- buf = 0;
- max_buf = 0;
- while (bam_read1(fp, b) >= 0) {
- int i, qlen = b->core.l_qseq;
- uint8_t *seq;
- putchar('@'); fputs(bam1_qname(b), stdout);
- if ((b->core.flag & 0x40) && !(b->core.flag & 0x80)) puts("/1");
- else if ((b->core.flag & 0x80) && !(b->core.flag & 0x40)) puts("/2");
- else putchar('\n');
- if (max_buf < qlen + 1) {
- max_buf = qlen + 1;
- kroundup32(max_buf);
- buf = realloc(buf, max_buf);
- }
- buf[qlen] = 0;
- seq = bam1_seq(b);
- for (i = 0; i < qlen; ++i)
- buf[i] = bam1_seqi(seq, i);
- if (b->core.flag & 16) { // reverse complement
- for (i = 0; i < qlen>>1; ++i) {
- int8_t t = seq_comp_table[buf[qlen - 1 - i]];
- buf[qlen - 1 - i] = seq_comp_table[buf[i]];
- buf[i] = t;
- }
- if (qlen&1) buf[i] = seq_comp_table[buf[i]];
- }
- for (i = 0; i < qlen; ++i)
- buf[i] = bam_nt16_rev_table[buf[i]];
- puts((char*)buf);
- puts("+");
- seq = bam1_qual(b);
- for (i = 0; i < qlen; ++i)
- buf[i] = 33 + seq[i];
- if (b->core.flag & 16) { // reverse
- for (i = 0; i < qlen>>1; ++i) {
- int8_t t = buf[qlen - 1 - i];
- buf[qlen - 1 - i] = buf[i];
- buf[i] = t;
- }
- }
- puts((char*)buf);
- }
- free(buf);
- bam_destroy1(b);
- bam_header_destroy(h);
- bam_close(fp);
- return 0;
-}
diff --git a/src/samtools-0.1.18/sample.c b/src/samtools-0.1.18/sample.c
deleted file mode 100644
index 830b9d1..0000000
--- a/src/samtools-0.1.18/sample.c
+++ /dev/null
@@ -1,107 +0,0 @@
-#include <stdlib.h>
-#include <string.h>
-#include "sample.h"
-#include "khash.h"
-KHASH_MAP_INIT_STR(sm, int)
-
-bam_sample_t *bam_smpl_init(void)
-{
- bam_sample_t *s;
- s = calloc(1, sizeof(bam_sample_t));
- s->rg2smid = kh_init(sm);
- s->sm2id = kh_init(sm);
- return s;
-}
-
-void bam_smpl_destroy(bam_sample_t *sm)
-{
- int i;
- khint_t k;
- khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid;
- if (sm == 0) return;
- for (i = 0; i < sm->n; ++i) free(sm->smpl[i]);
- free(sm->smpl);
- for (k = kh_begin(rg2smid); k != kh_end(rg2smid); ++k)
- if (kh_exist(rg2smid, k)) free((char*)kh_key(rg2smid, k));
- kh_destroy(sm, sm->rg2smid);
- kh_destroy(sm, sm->sm2id);
- free(sm);
-}
-
-static void add_pair(bam_sample_t *sm, khash_t(sm) *sm2id, const char *key, const char *val)
-{
- khint_t k_rg, k_sm;
- int ret;
- khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid;
- k_rg = kh_get(sm, rg2smid, key);
- if (k_rg != kh_end(rg2smid)) return; // duplicated @RG-ID
- k_rg = kh_put(sm, rg2smid, strdup(key), &ret);
- k_sm = kh_get(sm, sm2id, val);
- if (k_sm == kh_end(sm2id)) { // absent
- if (sm->n == sm->m) {
- sm->m = sm->m? sm->m<<1 : 1;
- sm->smpl = realloc(sm->smpl, sizeof(void*) * sm->m);
- }
- sm->smpl[sm->n] = strdup(val);
- k_sm = kh_put(sm, sm2id, sm->smpl[sm->n], &ret);
- kh_val(sm2id, k_sm) = sm->n++;
- }
- kh_val(rg2smid, k_rg) = kh_val(sm2id, k_sm);
-}
-
-int bam_smpl_add(bam_sample_t *sm, const char *fn, const char *txt)
-{
- const char *p = txt, *q, *r;
- kstring_t buf, first_sm;
- int n = 0;
- khash_t(sm) *sm2id = (khash_t(sm)*)sm->sm2id;
- if (txt == 0) {
- add_pair(sm, sm2id, fn, fn);
- return 0;
- }
- memset(&buf, 0, sizeof(kstring_t));
- memset(&first_sm, 0, sizeof(kstring_t));
- while ((q = strstr(p, "@RG")) != 0) {
- p = q + 3;
- r = q = 0;
- if ((q = strstr(p, "\tID:")) != 0) q += 4;
- if ((r = strstr(p, "\tSM:")) != 0) r += 4;
- if (r && q) {
- char *u, *v;
- int oq, or;
- for (u = (char*)q; *u && *u != '\t' && *u != '\n'; ++u);
- for (v = (char*)r; *v && *v != '\t' && *v != '\n'; ++v);
- oq = *u; or = *v; *u = *v = '\0';
- buf.l = 0; kputs(fn, &buf); kputc('/', &buf); kputs(q, &buf);
- add_pair(sm, sm2id, buf.s, r);
- if ( !first_sm.s )
- kputs(r,&first_sm);
- *u = oq; *v = or;
- } else break;
- p = q > r? q : r;
- ++n;
- }
- if (n == 0) add_pair(sm, sm2id, fn, fn);
- // If there is only one RG tag present in the header and reads are not annotated, don't refuse to work but
- // use the tag instead.
- else if ( n==1 && first_sm.s )
- add_pair(sm,sm2id,fn,first_sm.s);
- if ( first_sm.s )
- free(first_sm.s);
-
-// add_pair(sm, sm2id, fn, fn);
- free(buf.s);
- return 0;
-}
-
-int bam_smpl_rg2smid(const bam_sample_t *sm, const char *fn, const char *rg, kstring_t *str)
-{
- khint_t k;
- khash_t(sm) *rg2smid = (khash_t(sm)*)sm->rg2smid;
- if (rg) {
- str->l = 0;
- kputs(fn, str); kputc('/', str); kputs(rg, str);
- k = kh_get(sm, rg2smid, str->s);
- } else k = kh_get(sm, rg2smid, fn);
- return k == kh_end(rg2smid)? -1 : kh_val(rg2smid, k);
-}
diff --git a/src/samtools-0.1.18/sample.h b/src/samtools-0.1.18/sample.h
deleted file mode 100644
index 85fe499..0000000
--- a/src/samtools-0.1.18/sample.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef BAM_SAMPLE_H
-#define BAM_SAMPLE_H
-
-#include "kstring.h"
-
-typedef struct {
- int n, m;
- char **smpl;
- void *rg2smid, *sm2id;
-} bam_sample_t;
-
-bam_sample_t *bam_smpl_init(void);
-int bam_smpl_add(bam_sample_t *sm, const char *abs, const char *txt);
-int bam_smpl_rg2smid(const bam_sample_t *sm, const char *fn, const char *rg, kstring_t *str);
-void bam_smpl_destroy(bam_sample_t *sm);
-
-#endif
diff --git a/src/samtools-0.1.18/samtools.1 b/src/samtools-0.1.18/samtools.1
deleted file mode 100644
index 98ce9d0..0000000
--- a/src/samtools-0.1.18/samtools.1
+++ /dev/null
@@ -1,994 +0,0 @@
-.TH samtools 1 "05 July 2011" "samtools-0.1.17" "Bioinformatics tools"
-.SH NAME
-.PP
-samtools - Utilities for the Sequence Alignment/Map (SAM) format
-
-bcftools - Utilities for the Binary Call Format (BCF) and VCF
-.SH SYNOPSIS
-.PP
-samtools view -bt ref_list.txt -o aln.bam aln.sam.gz
-.PP
-samtools sort aln.bam aln.sorted
-.PP
-samtools index aln.sorted.bam
-.PP
-samtools idxstats aln.sorted.bam
-.PP
-samtools view aln.sorted.bam chr2:20,100,000-20,200,000
-.PP
-samtools merge out.bam in1.bam in2.bam in3.bam
-.PP
-samtools faidx ref.fasta
-.PP
-samtools pileup -vcf ref.fasta aln.sorted.bam
-.PP
-samtools mpileup -C50 -gf ref.fasta -r chr3:1,000-2,000 in1.bam in2.bam
-.PP
-samtools tview aln.sorted.bam ref.fasta
-.PP
-bcftools index in.bcf
-.PP
-bcftools view in.bcf chr2:100-200 > out.vcf
-.PP
-bcftools view -vc in.bcf > out.vcf 2> out.afs
-
-.SH DESCRIPTION
-.PP
-Samtools is a set of utilities that manipulate alignments in the BAM
-format. It imports from and exports to the SAM (Sequence Alignment/Map)
-format, does sorting, merging and indexing, and allows to retrieve reads
-in any regions swiftly.
-
-Samtools is designed to work on a stream. It regards an input file `-'
-as the standard input (stdin) and an output file `-' as the standard
-output (stdout). Several commands can thus be combined with Unix
-pipes. Samtools always output warning and error messages to the standard
-error output (stderr).
-
-Samtools is also able to open a BAM (not SAM) file on a remote FTP or
-HTTP server if the BAM file name starts with `ftp://' or `http://'.
-Samtools checks the current working directory for the index file and
-will download the index upon absence. Samtools does not retrieve the
-entire alignment file unless it is asked to do so.
-
-.SH SAMTOOLS COMMANDS AND OPTIONS
-
-.TP 10
-.B view
-samtools view [-bchuHS] [-t in.refList] [-o output] [-f reqFlag] [-F
-skipFlag] [-q minMapQ] [-l library] [-r readGroup] [-R rgFile] <in.bam>|<in.sam> [region1 [...]]
-
-Extract/print all or sub alignments in SAM or BAM format. If no region
-is specified, all the alignments will be printed; otherwise only
-alignments overlapping the specified regions will be output. An
-alignment may be given multiple times if it is overlapping several
-regions. A region can be presented, for example, in the following
-format: `chr2' (the whole chr2), `chr2:1000000' (region starting from
-1,000,000bp) or `chr2:1,000,000-2,000,000' (region between 1,000,000 and
-2,000,000bp including the end points). The coordinate is 1-based.
-
-.B OPTIONS:
-.RS
-.TP 8
-.B -b
-Output in the BAM format.
-.TP
-.BI -f \ INT
-Only output alignments with all bits in INT present in the FLAG
-field. INT can be in hex in the format of /^0x[0-9A-F]+/ [0]
-.TP
-.BI -F \ INT
-Skip alignments with bits present in INT [0]
-.TP
-.B -h
-Include the header in the output.
-.TP
-.B -H
-Output the header only.
-.TP
-.BI -l \ STR
-Only output reads in library STR [null]
-.TP
-.BI -o \ FILE
-Output file [stdout]
-.TP
-.BI -q \ INT
-Skip alignments with MAPQ smaller than INT [0]
-.TP
-.BI -r \ STR
-Only output reads in read group STR [null]
-.TP
-.BI -R \ FILE
-Output reads in read groups listed in
-.I FILE
-[null]
-.TP
-.B -S
-Input is in SAM. If @SQ header lines are absent, the
-.B `-t'
-option is required.
-.TP
-.B -c
-Instead of printing the alignments, only count them and print the
-total number. All filter options, such as
-.B `-f',
-.B `-F'
-and
-.B `-q'
-, are taken into account.
-.TP
-.BI -t \ FILE
-This file is TAB-delimited. Each line must contain the reference name
-and the length of the reference, one line for each distinct reference;
-additional fields are ignored. This file also defines the order of the
-reference sequences in sorting. If you run `samtools faidx <ref.fa>',
-the resultant index file
-.I <ref.fa>.fai
-can be used as this
-.I <in.ref_list>
-file.
-.TP
-.B -u
-Output uncompressed BAM. This option saves time spent on
-compression/decomprssion and is thus preferred when the output is piped
-to another samtools command.
-.RE
-
-.TP
-.B tview
-samtools tview <in.sorted.bam> [ref.fasta]
-
-Text alignment viewer (based on the ncurses library). In the viewer,
-press `?' for help and press `g' to check the alignment start from a
-region in the format like `chr10:10,000,000' or `=10,000,000' when
-viewing the same reference sequence.
-
-.TP
-.B mpileup
-.B samtools mpileup
-.RB [ \-EBug ]
-.RB [ \-C
-.IR capQcoef ]
-.RB [ \-r
-.IR reg ]
-.RB [ \-f
-.IR in.fa ]
-.RB [ \-l
-.IR list ]
-.RB [ \-M
-.IR capMapQ ]
-.RB [ \-Q
-.IR minBaseQ ]
-.RB [ \-q
-.IR minMapQ ]
-.I in.bam
-.RI [ in2.bam
-.RI [ ... ]]
-
-Generate BCF or pileup for one or multiple BAM files. Alignment records
-are grouped by sample identifiers in @RG header lines. If sample
-identifiers are absent, each input file is regarded as one sample.
-
-In the pileup format (without
-.BR -u or -g ),
-each
-line represents a genomic position, consisting of chromosome name,
-coordinate, reference base, read bases, read qualities and alignment
-mapping qualities. Information on match, mismatch, indel, strand,
-mapping quality and start and end of a read are all encoded at the read
-base column. At this column, a dot stands for a match to the reference
-base on the forward strand, a comma for a match on the reverse strand,
-a '>' or '<' for a reference skip, `ACGTN' for a mismatch on the forward
-strand and `acgtn' for a mismatch on the reverse strand. A pattern
-`\\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this
-reference position and the next reference position. The length of the
-insertion is given by the integer in the pattern, followed by the
-inserted sequence. Similarly, a pattern `-[0-9]+[ACGTNacgtn]+'
-represents a deletion from the reference. The deleted bases will be
-presented as `*' in the following lines. Also at the read base column, a
-symbol `^' marks the start of a read. The ASCII of the character
-following `^' minus 33 gives the mapping quality. A symbol `$' marks the
-end of a read segment.
-
-.B Input Options:
-.RS
-.TP 10
-.B -6
-Assume the quality is in the Illumina 1.3+ encoding.
-.B -A
-Do not skip anomalous read pairs in variant calling.
-.TP
-.B -B
-Disable probabilistic realignment for the computation of base alignment
-quality (BAQ). BAQ is the Phred-scaled probability of a read base being
-misaligned. Applying this option greatly helps to reduce false SNPs
-caused by misalignments.
-.TP
-.BI -b \ FILE
-List of input BAM files, one file per line [null]
-.TP
-.BI -C \ INT
-Coefficient for downgrading mapping quality for reads containing
-excessive mismatches. Given a read with a phred-scaled probability q of
-being generated from the mapped position, the new mapping quality is
-about sqrt((INT-q)/INT)*INT. A zero value disables this
-functionality; if enabled, the recommended value for BWA is 50. [0]
-.TP
-.BI -d \ INT
-At a position, read maximally
-.I INT
-reads per input BAM. [250]
-.TP
-.B -E
-Extended BAQ computation. This option helps sensitivity especially for MNPs, but may hurt
-specificity a little bit.
-.TP
-.BI -f \ FILE
-The
-.BR faidx -indexed
-reference file in the FASTA format. The file can be optionally compressed by
-.BR razip .
-[null]
-.TP
-.BI -l \ FILE
-BED or position list file containing a list of regions or sites where pileup or BCF should be generated [null]
-.TP
-.BI -q \ INT
-Minimum mapping quality for an alignment to be used [0]
-.TP
-.BI -Q \ INT
-Minimum base quality for a base to be considered [13]
-.TP
-.BI -r \ STR
-Only generate pileup in region
-.I STR
-[all sites]
-.TP
-.B Output Options:
-
-.TP
-.B -D
-Output per-sample read depth
-.TP
-.B -g
-Compute genotype likelihoods and output them in the binary call format (BCF).
-.TP
-.B -S
-Output per-sample Phred-scaled strand bias P-value
-.TP
-.B -u
-Similar to
-.B -g
-except that the output is uncompressed BCF, which is preferred for piping.
-
-.TP
-.B Options for Genotype Likelihood Computation (for -g or -u):
-
-.TP
-.BI -e \ INT
-Phred-scaled gap extension sequencing error probability. Reducing
-.I INT
-leads to longer indels. [20]
-.TP
-.BI -h \ INT
-Coefficient for modeling homopolymer errors. Given an
-.IR l -long
-homopolymer
-run, the sequencing error of an indel of size
-.I s
-is modeled as
-.IR INT * s / l .
-[100]
-.TP
-.B -I
-Do not perform INDEL calling
-.TP
-.BI -L \ INT
-Skip INDEL calling if the average per-sample depth is above
-.IR INT .
-[250]
-.TP
-.BI -o \ INT
-Phred-scaled gap open sequencing error probability. Reducing
-.I INT
-leads to more indel calls. [40]
-.TP
-.BI -P \ STR
-Comma dilimited list of platforms (determined by
-.BR @RG-PL )
-from which indel candidates are obtained. It is recommended to collect
-indel candidates from sequencing technologies that have low indel error
-rate such as ILLUMINA. [all]
-.RE
-
-.TP
-.B reheader
-samtools reheader <in.header.sam> <in.bam>
-
-Replace the header in
-.I in.bam
-with the header in
-.I in.header.sam.
-This command is much faster than replacing the header with a
-BAM->SAM->BAM conversion.
-
-.TP
-.B cat
-samtools cat [-h header.sam] [-o out.bam] <in1.bam> <in2.bam> [ ... ]
-
-Concatenate BAMs. The sequence dictionary of each input BAM must be identical,
-although this command does not check this. This command uses a similar trick
-to
-.B reheader
-which enables fast BAM concatenation.
-
-.TP
-.B sort
-samtools sort [-no] [-m maxMem] <in.bam> <out.prefix>
-
-Sort alignments by leftmost coordinates. File
-.I <out.prefix>.bam
-will be created. This command may also create temporary files
-.I <out.prefix>.%d.bam
-when the whole alignment cannot be fitted into memory (controlled by
-option -m).
-
-.B OPTIONS:
-.RS
-.TP 8
-.B -o
-Output the final alignment to the standard output.
-.TP
-.B -n
-Sort by read names rather than by chromosomal coordinates
-.TP
-.BI -m \ INT
-Approximately the maximum required memory. [500000000]
-.RE
-
-.TP
-.B merge
-samtools merge [-nur1f] [-h inh.sam] [-R reg] <out.bam> <in1.bam> <in2.bam> [...]
-
-Merge multiple sorted alignments.
-The header reference lists of all the input BAM files, and the @SQ headers of
-.IR inh.sam ,
-if any, must all refer to the same set of reference sequences.
-The header reference list and (unless overridden by
-.BR -h )
-`@' headers of
-.I in1.bam
-will be copied to
-.IR out.bam ,
-and the headers of other files will be ignored.
-
-.B OPTIONS:
-.RS
-.TP 8
-.B -1
-Use zlib compression level 1 to comrpess the output
-.TP
-.B -f
-Force to overwrite the output file if present.
-.TP 8
-.BI -h \ FILE
-Use the lines of
-.I FILE
-as `@' headers to be copied to
-.IR out.bam ,
-replacing any header lines that would otherwise be copied from
-.IR in1.bam .
-.RI ( FILE
-is actually in SAM format, though any alignment records it may contain
-are ignored.)
-.TP
-.B -n
-The input alignments are sorted by read names rather than by chromosomal
-coordinates
-.TP
-.BI -R \ STR
-Merge files in the specified region indicated by
-.I STR
-[null]
-.TP
-.B -r
-Attach an RG tag to each alignment. The tag value is inferred from file names.
-.TP
-.B -u
-Uncompressed BAM output
-.RE
-
-.TP
-.B index
-samtools index <aln.bam>
-
-Index sorted alignment for fast random access. Index file
-.I <aln.bam>.bai
-will be created.
-
-.TP
-.B idxstats
-samtools idxstats <aln.bam>
-
-Retrieve and print stats in the index file. The output is TAB delimited
-with each line consisting of reference sequence name, sequence length, #
-mapped reads and # unmapped reads.
-
-.TP
-.B faidx
-samtools faidx <ref.fasta> [region1 [...]]
-
-Index reference sequence in the FASTA format or extract subsequence from
-indexed reference sequence. If no region is specified,
-.B faidx
-will index the file and create
-.I <ref.fasta>.fai
-on the disk. If regions are speficified, the subsequences will be
-retrieved and printed to stdout in the FASTA format. The input file can
-be compressed in the
-.B RAZF
-format.
-
-.TP
-.B fixmate
-samtools fixmate <in.nameSrt.bam> <out.bam>
-
-Fill in mate coordinates, ISIZE and mate related flags from a
-name-sorted alignment.
-
-.TP
-.B rmdup
-samtools rmdup [-sS] <input.srt.bam> <out.bam>
-
-Remove potential PCR duplicates: if multiple read pairs have identical
-external coordinates, only retain the pair with highest mapping quality.
-In the paired-end mode, this command
-.B ONLY
-works with FR orientation and requires ISIZE is correctly set. It does
-not work for unpaired reads (e.g. two ends mapped to different
-chromosomes or orphan reads).
-
-.B OPTIONS:
-.RS
-.TP 8
-.B -s
-Remove duplicate for single-end reads. By default, the command works for
-paired-end reads only.
-.TP 8
-.B -S
-Treat paired-end reads and single-end reads.
-.RE
-
-.TP
-.B calmd
-samtools calmd [-EeubSr] [-C capQcoef] <aln.bam> <ref.fasta>
-
-Generate the MD tag. If the MD tag is already present, this command will
-give a warning if the MD tag generated is different from the existing
-tag. Output SAM by default.
-
-.B OPTIONS:
-.RS
-.TP 8
-.B -A
-When used jointly with
-.B -r
-this option overwrites the original base quality.
-.TP 8
-.B -e
-Convert a the read base to = if it is identical to the aligned reference
-base. Indel caller does not support the = bases at the moment.
-.TP
-.B -u
-Output uncompressed BAM
-.TP
-.B -b
-Output compressed BAM
-.TP
-.B -S
-The input is SAM with header lines
-.TP
-.BI -C \ INT
-Coefficient to cap mapping quality of poorly mapped reads. See the
-.B pileup
-command for details. [0]
-.TP
-.B -r
-Compute the BQ tag (without -A) or cap base quality by BAQ (with -A).
-.TP
-.B -E
-Extended BAQ calculation. This option trades specificity for sensitivity, though the
-effect is minor.
-.RE
-
-.TP
-.B targetcut
-samtools targetcut [-Q minBaseQ] [-i inPenalty] [-0 em0] [-1 em1] [-2 em2] [-f ref] <in.bam>
-
-This command identifies target regions by examining the continuity of read depth, computes
-haploid consensus sequences of targets and outputs a SAM with each sequence corresponding
-to a target. When option
-.B -f
-is in use, BAQ will be applied. This command is
-.B only
-designed for cutting fosmid clones from fosmid pool sequencing [Ref. Kitzman et al. (2010)].
-.RE
-
-.TP
-.B phase
-samtools phase [-AF] [-k len] [-b prefix] [-q minLOD] [-Q minBaseQ] <in.bam>
-
-Call and phase heterozygous SNPs.
-.B OPTIONS:
-.RS
-.TP 8
-.B -A
-Drop reads with ambiguous phase.
-.TP 8
-.BI -b \ STR
-Prefix of BAM output. When this option is in use, phase-0 reads will be saved in file
-.BR STR .0.bam
-and phase-1 reads in
-.BR STR .1.bam.
-Phase unknown reads will be randomly allocated to one of the two files. Chimeric reads
-with switch errors will be saved in
-.BR STR .chimeric.bam.
-[null]
-.TP
-.B -F
-Do not attempt to fix chimeric reads.
-.TP
-.BI -k \ INT
-Maximum length for local phasing. [13]
-.TP
-.BI -q \ INT
-Minimum Phred-scaled LOD to call a heterozygote. [40]
-.TP
-.BI -Q \ INT
-Minimum base quality to be used in het calling. [13]
-.RE
-
-.SH BCFTOOLS COMMANDS AND OPTIONS
-
-.TP 10
-.B view
-.B bcftools view
-.RB [ \-AbFGNQSucgv ]
-.RB [ \-D
-.IR seqDict ]
-.RB [ \-l
-.IR listLoci ]
-.RB [ \-s
-.IR listSample ]
-.RB [ \-i
-.IR gapSNPratio ]
-.RB [ \-t
-.IR mutRate ]
-.RB [ \-p
-.IR varThres ]
-.RB [ \-P
-.IR prior ]
-.RB [ \-1
-.IR nGroup1 ]
-.RB [ \-d
-.IR minFrac ]
-.RB [ \-U
-.IR nPerm ]
-.RB [ \-X
-.IR permThres ]
-.RB [ \-T
-.IR trioType ]
-.I in.bcf
-.RI [ region ]
-
-Convert between BCF and VCF, call variant candidates and estimate allele
-frequencies.
-
-.RS
-.TP
-.B Input/Output Options:
-.TP 10
-.B -A
-Retain all possible alternate alleles at variant sites. By default, the view
-command discards unlikely alleles.
-.TP 10
-.B -b
-Output in the BCF format. The default is VCF.
-.TP
-.BI -D \ FILE
-Sequence dictionary (list of chromosome names) for VCF->BCF conversion [null]
-.TP
-.B -F
-Indicate PL is generated by r921 or before (ordering is different).
-.TP
-.B -G
-Suppress all individual genotype information.
-.TP
-.BI -l \ FILE
-List of sites at which information are outputted [all sites]
-.TP
-.B -N
-Skip sites where the REF field is not A/C/G/T
-.TP
-.B -Q
-Output the QCALL likelihood format
-.TP
-.BI -s \ FILE
-List of samples to use. The first column in the input gives the sample names
-and the second gives the ploidy, which can only be 1 or 2. When the 2nd column
-is absent, the sample ploidy is assumed to be 2. In the output, the ordering of
-samples will be identical to the one in
-.IR FILE .
-[null]
-.TP
-.B -S
-The input is VCF instead of BCF.
-.TP
-.B -u
-Uncompressed BCF output (force -b).
-.TP
-.B Consensus/Variant Calling Options:
-.TP 10
-.B -c
-Call variants using Bayesian inference. This option automatically invokes option
-.BR -e .
-.TP
-.BI -d \ FLOAT
-When
-.B -v
-is in use, skip loci where the fraction of samples covered by reads is below FLOAT. [0]
-.TP
-.B -e
-Perform max-likelihood inference only, including estimating the site allele frequency,
-testing Hardy-Weinberg equlibrium and testing associations with LRT.
-.TP
-.B -g
-Call per-sample genotypes at variant sites (force -c)
-.TP
-.BI -i \ FLOAT
-Ratio of INDEL-to-SNP mutation rate [0.15]
-.TP
-.BI -p \ FLOAT
-A site is considered to be a variant if P(ref|D)<FLOAT [0.5]
-.TP
-.BI -P \ STR
-Prior or initial allele frequency spectrum. If STR can be
-.IR full ,
-.IR cond2 ,
-.I flat
-or the file consisting of error output from a previous variant calling
-run.
-.TP
-.BI -t \ FLOAT
-Scaled muttion rate for variant calling [0.001]
-.TP
-.BI -T \ STR
-Enable pair/trio calling. For trio calling, option
-.B -s
-is usually needed to be applied to configure the trio members and their ordering.
-In the file supplied to the option
-.BR -s ,
-the first sample must be the child, the second the father and the third the mother.
-The valid values of
-.I STR
-are `pair', `trioauto', `trioxd' and `trioxs', where `pair' calls differences between two input samples, and `trioxd' (`trioxs') specifies that the input
-is from the X chromosome non-PAR regions and the child is a female (male). [null]
-.TP
-.B -v
-Output variant sites only (force -c)
-.TP
-.B Contrast Calling and Association Test Options:
-.TP
-.BI -1 \ INT
-Number of group-1 samples. This option is used for dividing the samples into
-two groups for contrast SNP calling or association test.
-When this option is in use, the following VCF INFO will be outputted:
-PC2, PCHI2 and QCHI2. [0]
-.TP
-.BI -U \ INT
-Number of permutations for association test (effective only with
-.BR -1 )
-[0]
-.TP
-.BI -X \ FLOAT
-Only perform permutations for P(chi^2)<FLOAT (effective only with
-.BR -U )
-[0.01]
-.RE
-
-.TP
-.B index
-.B bcftools index
-.I in.bcf
-
-Index sorted BCF for random access.
-.RE
-
-.TP
-.B cat
-.B bcftools cat
-.I in1.bcf
-.RI [ "in2.bcf " [ ... "]]]"
-
-Concatenate BCF files. The input files are required to be sorted and
-have identical samples appearing in the same order.
-.RE
-.SH SAM FORMAT
-
-Sequence Alignment/Map (SAM) format is TAB-delimited. Apart from the header lines, which are started
-with the `@' symbol, each alignment line consists of:
-
-.TS
-center box;
-cb | cb | cb
-n | l | l .
-Col Field Description
-_
-1 QNAME Query template/pair NAME
-2 FLAG bitwise FLAG
-3 RNAME Reference sequence NAME
-4 POS 1-based leftmost POSition/coordinate of clipped sequence
-5 MAPQ MAPping Quality (Phred-scaled)
-6 CIAGR extended CIGAR string
-7 MRNM Mate Reference sequence NaMe (`=' if same as RNAME)
-8 MPOS 1-based Mate POSistion
-9 TLEN inferred Template LENgth (insert size)
-10 SEQ query SEQuence on the same strand as the reference
-11 QUAL query QUALity (ASCII-33 gives the Phred base quality)
-12+ OPT variable OPTional fields in the format TAG:VTYPE:VALUE
-.TE
-
-.PP
-Each bit in the FLAG field is defined as:
-
-.TS
-center box;
-cb | cb | cb
-l | c | l .
-Flag Chr Description
-_
-0x0001 p the read is paired in sequencing
-0x0002 P the read is mapped in a proper pair
-0x0004 u the query sequence itself is unmapped
-0x0008 U the mate is unmapped
-0x0010 r strand of the query (1 for reverse)
-0x0020 R strand of the mate
-0x0040 1 the read is the first read in a pair
-0x0080 2 the read is the second read in a pair
-0x0100 s the alignment is not primary
-0x0200 f the read fails platform/vendor quality checks
-0x0400 d the read is either a PCR or an optical duplicate
-.TE
-
-where the second column gives the string representation of the FLAG field.
-
-.SH VCF FORMAT
-
-The Variant Call Format (VCF) is a TAB-delimited format with each data line consists of the following fields:
-.TS
-center box;
-cb | cb | cb
-n | l | l .
-Col Field Description
-_
-1 CHROM CHROMosome name
-2 POS the left-most POSition of the variant
-3 ID unique variant IDentifier
-4 REF the REFerence allele
-5 ALT the ALTernate allele(s), separated by comma
-6 QUAL variant/reference QUALity
-7 FILTER FILTers applied
-8 INFO INFOrmation related to the variant, separated by semi-colon
-9 FORMAT FORMAT of the genotype fields, separated by colon (optional)
-10+ SAMPLE SAMPLE genotypes and per-sample information (optional)
-.TE
-
-.PP
-The following table gives the
-.B INFO
-tags used by samtools and bcftools.
-
-.TS
-center box;
-cb | cb | cb
-l | l | l .
-Tag Format Description
-_
-AF1 double Max-likelihood estimate of the site allele frequency (AF) of the first ALT allele
-DP int Raw read depth (without quality filtering)
-DP4 int[4] # high-quality reference forward bases, ref reverse, alternate for and alt rev bases
-FQ int Consensus quality. Positive: sample genotypes different; negative: otherwise
-MQ int Root-Mean-Square mapping quality of covering reads
-PC2 int[2] Phred probability of AF in group1 samples being larger (,smaller) than in group2
-PCHI2 double Posterior weighted chi^2 P-value between group1 and group2 samples
-PV4 double[4] P-value for strand bias, baseQ bias, mapQ bias and tail distance bias
-QCHI2 int Phred-scaled PCHI2
-RP int # permutations yielding a smaller PCHI2
-CLR int Phred log ratio of genotype likelihoods with and without the trio/pair constraint
-UGT string Most probable genotype configuration without the trio constraint
-CGT string Most probable configuration with the trio constraint
-.TE
-
-.SH EXAMPLES
-.IP o 2
-Import SAM to BAM when
-.B @SQ
-lines are present in the header:
-
- samtools view -bS aln.sam > aln.bam
-
-If
-.B @SQ
-lines are absent:
-
- samtools faidx ref.fa
- samtools view -bt ref.fa.fai aln.sam > aln.bam
-
-where
-.I ref.fa.fai
-is generated automatically by the
-.B faidx
-command.
-
-.IP o 2
-Attach the
-.B RG
-tag while merging sorted alignments:
-
- perl -e 'print "@RG\\tID:ga\\tSM:hs\\tLB:ga\\tPL:Illumina\\n at RG\\tID:454\\tSM:hs\\tLB:454\\tPL:454\\n"' > rg.txt
- samtools merge -rh rg.txt merged.bam ga.bam 454.bam
-
-The value in a
-.B RG
-tag is determined by the file name the read is coming from. In this
-example, in the
-.IR merged.bam ,
-reads from
-.I ga.bam
-will be attached
-.IR RG:Z:ga ,
-while reads from
-.I 454.bam
-will be attached
-.IR RG:Z:454 .
-
-.IP o 2
-Call SNPs and short INDELs for one diploid individual:
-
- samtools mpileup -ugf ref.fa aln.bam | bcftools view -bvcg - > var.raw.bcf
- bcftools view var.raw.bcf | vcfutils.pl varFilter -D 100 > var.flt.vcf
-
-The
-.B -D
-option of varFilter controls the maximum read depth, which should be
-adjusted to about twice the average read depth. One may consider to add
-.B -C50
-to
-.B mpileup
-if mapping quality is overestimated for reads containing excessive
-mismatches. Applying this option usually helps
-.B BWA-short
-but may not other mappers.
-
-.IP o 2
-Generate the consensus sequence for one diploid individual:
-
- samtools mpileup -uf ref.fa aln.bam | bcftools view -cg - | vcfutils.pl vcf2fq > cns.fq
-
-.IP o 2
-Call somatic mutations from a pair of samples:
-
- samtools mpileup -DSuf ref.fa aln.bam | bcftools view -bvcgT pair - > var.bcf
-
-In the output INFO field,
-.I CLR
-gives the Phred-log ratio between the likelihood by treating the
-two samples independently, and the likelihood by requiring the genotype to be identical.
-This
-.I CLR
-is effectively a score measuring the confidence of somatic calls. The higher the better.
-
-.IP o 2
-Call de novo and somatic mutations from a family trio:
-
- samtools mpileup -DSuf ref.fa aln.bam | bcftools view -bvcgT pair -s samples.txt - > var.bcf
-
-File
-.I samples.txt
-should consist of three lines specifying the member and order of samples (in the order of child-father-mother).
-Similarly,
-.I CLR
-gives the Phred-log likelihood ratio with and without the trio constraint.
-.I UGT
-shows the most likely genotype configuration without the trio constraint, and
-.I CGT
-gives the most likely genotype configuration satisfying the trio constraint.
-
-.IP o 2
-Phase one individual:
-
- samtools calmd -AEur aln.bam ref.fa | samtools phase -b prefix - > phase.out
-
-The
-.B calmd
-command is used to reduce false heterozygotes around INDELs.
-
-.IP o 2
-Call SNPs and short indels for multiple diploid individuals:
-
- samtools mpileup -P ILLUMINA -ugf ref.fa *.bam | bcftools view -bcvg - > var.raw.bcf
- bcftools view var.raw.bcf | vcfutils.pl varFilter -D 2000 > var.flt.vcf
-
-Individuals are identified from the
-.B SM
-tags in the
-.B @RG
-header lines. Individuals can be pooled in one alignment file; one
-individual can also be separated into multiple files. The
-.B -P
-option specifies that indel candidates should be collected only from
-read groups with the
-.B @RG-PL
-tag set to
-.IR ILLUMINA .
-Collecting indel candidates from reads sequenced by an indel-prone
-technology may affect the performance of indel calling.
-
-.IP o 2
-Derive the allele frequency spectrum (AFS) on a list of sites from multiple individuals:
-
- samtools mpileup -Igf ref.fa *.bam > all.bcf
- bcftools view -bl sites.list all.bcf > sites.bcf
- bcftools view -cGP cond2 sites.bcf > /dev/null 2> sites.1.afs
- bcftools view -cGP sites.1.afs sites.bcf > /dev/null 2> sites.2.afs
- bcftools view -cGP sites.2.afs sites.bcf > /dev/null 2> sites.3.afs
- ......
-
-where
-.I sites.list
-contains the list of sites with each line consisting of the reference
-sequence name and position. The following
-.B bcftools
-commands estimate AFS by EM.
-
-.IP o 2
-Dump BAQ applied alignment for other SNP callers:
-
- samtools calmd -bAr aln.bam > aln.baq.bam
-
-It adds and corrects the
-.B NM
-and
-.B MD
-tags at the same time. The
-.B calmd
-command also comes with the
-.B -C
-option, the same as the one in
-.B pileup
-and
-.BR mpileup .
-Apply if it helps.
-
-.SH LIMITATIONS
-.PP
-.IP o 2
-Unaligned words used in bam_import.c, bam_endian.h, bam.c and bam_aux.c.
-.IP o 2
-Samtools paired-end rmdup does not work for unpaired reads (e.g. orphan
-reads or ends mapped to different chromosomes). If this is a concern,
-please use Picard's MarkDuplicate which correctly handles these cases,
-although a little slower.
-
-.SH AUTHOR
-.PP
-Heng Li from the Sanger Institute wrote the C version of samtools. Bob
-Handsaker from the Broad Institute implemented the BGZF library and Jue
-Ruan from Beijing Genomics Institute wrote the RAZF library. John
-Marshall and Petr Danecek contribute to the source code and various
-people from the 1000 Genomes Project have contributed to the SAM format
-specification.
-
-.SH SEE ALSO
-.PP
-Samtools website: <http://samtools.sourceforge.net>
diff --git a/src/sortedcontainers/__init__.py b/src/sortedcontainers/__init__.py
deleted file mode 100755
index 98e04cd..0000000
--- a/src/sortedcontainers/__init__.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# -*- coding: utf-8 -*-
-
-"""
-sortedcontainers Sorted Container Types Library
-===============================================
-
-SortedContainers is an Apache2 licensed containers library, written in
-pure-Python, and fast as C-extensions.
-
-Python's standard library is great until you need a sorted container type. Many
-will attest that you can get really far without one, but the moment you
-**really need** a sorted list, dict, or set, you're faced with a dozen
-different implementations, most using C-extensions without great documentation
-and benchmarking.
-
-Things shouldn't be this way. Not in Python.
-
-::
-
- >>> from sortedcontainers import SortedList, SortedDict, SortedSet
- >>> sl = SortedList(xrange(10000000))
- >>> 1234567 in sl
- True
- >>> sl[7654321]
- 7654321
- >>> sl.add(1234567)
- >>> sl.count(1234567)
- 2
- >>> sl *= 3
- >>> len(sl)
- 30000003
-
-SortedContainers takes all of the work out of Python sorted types - making your
-deployment and use of Python easy. There's no need to install a C compiler or
-pre-build and distribute custom extensions. Performance is a feature and
-testing has 100% coverage with unit tests and hours of stress.
-
-:copyright: (c) 2014 by Grant Jenks.
-:license: Apache 2.0, see LICENSE for more details.
-
-"""
-
-__title__ = 'sortedcontainers'
-__version__ = '0.9.4'
-__build__ = 0x000904
-__author__ = 'Grant Jenks'
-__license__ = 'Apache 2.0'
-__copyright__ = 'Copyright 2014 Grant Jenks'
-
-from .sortedlist import SortedList
-from .sortedset import SortedSet
-from .sorteddict import SortedDict
-from .sortedlistwithkey import SortedListWithKey
-
-__all__ = ['SortedList', 'SortedSet', 'SortedDict', 'SortedListWithKey']
diff --git a/src/sortedcontainers/sorteddict.py b/src/sortedcontainers/sorteddict.py
deleted file mode 100755
index 0282ad5..0000000
--- a/src/sortedcontainers/sorteddict.py
+++ /dev/null
@@ -1,737 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Sorted dict implementation.
-
-from .sortedset import SortedSet
-from .sortedlist import SortedList, recursive_repr
-from .sortedlistwithkey import SortedListWithKey
-from collections import Set, Sequence
-from collections import KeysView as AbstractKeysView
-from collections import ValuesView as AbstractValuesView
-from collections import ItemsView as AbstractItemsView
-
-from functools import wraps
-from sys import hexversion
-
-_NotGiven = object()
-
-def not26(func):
- """Function decorator for methods not implemented in Python 2.6."""
-
- @wraps(func)
- def errfunc(*args, **kwargs):
- raise NotImplementedError
-
- if hexversion < 0x02070000:
- return errfunc
- else:
- return func
-
-class _IlocWrapper:
- def __init__(self, _dict):
- self._dict = _dict
- def __len__(self):
- return len(self._dict)
- def __getitem__(self, index):
- """
- Very efficiently return the key at index *index* in iteration. Supports
- negative indices and slice notation. Raises IndexError on invalid
- *index*.
- """
- return self._dict._list[index]
- def __delitem__(self, index):
- """
- Remove the ``sdict[sdict.iloc[index]]`` from *sdict*. Supports negative
- indices and slice notation. Raises IndexError on invalid *index*.
- """
- _temp = self._dict
- _list = _temp._list
- _delitem = _temp._delitem
-
- if isinstance(index, slice):
- keys = _list[index]
- del _list[index]
- for key in keys:
- _delitem(key)
- else:
- key = _list[index]
- del _list[index]
- _delitem(key)
-
-class SortedDict(dict):
- """
- A SortedDict provides the same methods as a dict. Additionally, a
- SortedDict efficiently maintains its keys in sorted order. Consequently, the
- keys method will return the keys in sorted order, the popitem method will
- remove the item with the highest key, etc.
- """
- def __init__(self, *args, **kwargs):
- """
- A SortedDict provides the same methods as a dict. Additionally, a
- SortedDict efficiently maintains its keys in sorted order. Consequently,
- the keys method will return the keys in sorted order, the popitem method
- will remove the item with the highest key, etc.
-
- An optional *key* argument defines a callable that, like the `key`
- argument to Python's `sorted` function, extracts a comparison key from
- each dict key. If no function is specified, the default compares the
- dict keys directly. The `key` argument must be provided as a positional
- argument and must come before all other arguments.
-
- An optional *load* argument defines the load factor of the internal list
- used to maintain sort order. If present, this argument must come before
- an iterable. The default load factor of '1000' works well for lists from
- tens to tens of millions of elements. Good practice is to use a value
- that is the cube root of the list size. With billions of elements, the
- best load factor depends on your usage. It's best to leave the load
- factor at the default until you start benchmarking.
-
- An optional *iterable* argument provides an initial series of items to
- populate the SortedDict. Each item in the series must itself contain
- two items. The first is used as a key in the new dictionary, and the
- second as the key's value. If a given key is seen more than once, the
- last value associated with it is retained in the new dictionary.
-
- If keyword arguments are given, the keywords themselves with their
- associated values are added as items to the dictionary. If a key is
- specified both in the positional argument and as a keyword argument, the
- value associated with the keyword is retained in the dictionary. For
- example, these all return a dictionary equal to ``{"one": 2, "two":
- 3}``:
-
- * ``SortedDict(one=2, two=3)``
- * ``SortedDict({'one': 2, 'two': 3})``
- * ``SortedDict(zip(('one', 'two'), (2, 3)))``
- * ``SortedDict([['two', 3], ['one', 2]])``
-
- The first example only works for keys that are valid Python
- identifiers; the others work with any valid keys.
- """
- if len(args) > 0 and (args[0] is None or callable(args[0])):
- self._key = args[0]
- args = args[1:]
- else:
- self._key = None
-
- if len(args) > 0 and type(args[0]) == int:
- self._load = args[0]
- args = args[1:]
- else:
- self._load = 1000
-
- if self._key is None:
- self._list = SortedList(load=self._load)
- else:
- self._list = SortedListWithKey(key=self._key, load=self._load)
-
- # Cache function pointers to dict methods.
-
- _dict = super(SortedDict, self)
- self._dict = _dict
- self._clear = _dict.clear
- self._delitem = _dict.__delitem__
- self._iter = _dict.__iter__
- self._pop = _dict.pop
- self._setdefault = _dict.setdefault
- self._setitem = _dict.__setitem__
- self._update = _dict.update
-
- # Cache function pointers to SortedList methods.
-
- self._list_add = self._list.add
- self._list_bisect_left = self._list.bisect_left
- self._list_bisect_right = self._list.bisect_right
- self._list_clear = self._list.clear
- self._list_index = self._list.index
- self._list_pop = self._list.pop
- self._list_remove = self._list.remove
- self._list_update = self._list.update
-
- self.iloc = _IlocWrapper(self)
-
- self.update(*args, **kwargs)
-
- def clear(self):
- """Remove all elements from the dictionary."""
- self._clear()
- self._list_clear()
-
- def __delitem__(self, key):
- """
- Remove ``d[key]`` from *d*. Raises a KeyError if *key* is not in the
- dictionary.
- """
- self._delitem(key)
- self._list_remove(key)
-
- def __iter__(self):
- """Create an iterator over the sorted keys of the dictionary."""
- return iter(self._list)
-
- def __reversed__(self):
- """
- Create a reversed iterator over the sorted keys of the dictionary.
- """
- return reversed(self._list)
-
- def __setitem__(self, key, value):
- """Set `d[key]` to *value*."""
- if key not in self:
- self._list_add(key)
- self._setitem(key, value)
-
- def copy(self):
- """Return a shallow copy of the sorted dictionary."""
- return self.__class__(self._key, self._load, self.iteritems())
-
- __copy__ = copy
-
- @classmethod
- def fromkeys(cls, seq, value=None):
- """
- Create a new dictionary with keys from *seq* and values set to *value*.
- """
- return cls((key, value) for key in seq)
-
- if hexversion < 0x03000000:
- def items(self):
- """
- Return a list of the dictionary's items (``(key, value)`` pairs).
- """
- return list(self.iteritems())
- else:
- def items(self):
- """
- Return a new ItemsView of the dictionary's items. In addition to
- the methods provided by the built-in `view` the ItemsView is
- indexable (e.g. ``d.items()[5]``).
- """
- return ItemsView(self)
-
- def iteritems(self):
- """Return an iterable over the items (``(key, value)`` pairs)."""
- return iter((key, self[key]) for key in self._list)
-
- if hexversion < 0x03000000:
- def keys(self):
- """Return a SortedSet of the dictionary's keys."""
- return SortedSet(self._list, key=self._key, load=self._load)
- else:
- def keys(self):
- """
- Return a new KeysView of the dictionary's keys. In addition to the
- methods provided by the built-in `view` the KeysView is indexable
- (e.g. ``d.keys()[5]``).
- """
- return KeysView(self)
-
- def iterkeys(self):
- """Return an iterable over the keys of the dictionary."""
- return iter(self._list)
-
- if hexversion < 0x03000000:
- def values(self):
- """Return a list of the dictionary's values."""
- return list(self.itervalues())
- else:
- def values(self):
- """
- Return a new :class:`ValuesView` of the dictionary's values.
- In addition to the methods provided by the built-in `view` the
- ValuesView is indexable (e.g., ``d.values()[5]``).
- """
- return ValuesView(self)
-
- def itervalues(self):
- """Return an iterable over the values of the dictionary."""
- return iter(self[key] for key in self._list)
-
- def pop(self, key, default=_NotGiven):
- """
- If *key* is in the dictionary, remove it and return its value,
- else return *default*. If *default* is not given and *key* is not in
- the dictionary, a KeyError is raised.
- """
- if key in self:
- self._list_remove(key)
- return self._pop(key)
- else:
- if default is _NotGiven:
- raise KeyError(key)
- else:
- return default
-
- def popitem(self):
- """
- Remove and return the ``(key, value)`` pair with the greatest *key*
- from the dictionary.
-
- If the dictionary is empty, calling `popitem` raises a
- KeyError`.
- """
- if not len(self):
- raise KeyError('popitem(): dictionary is empty')
-
- key = self._list_pop()
- value = self._pop(key)
-
- return (key, value)
-
- def setdefault(self, key, default=None):
- """
- If *key* is in the dictionary, return its value. If not, insert *key*
- with a value of *default* and return *default*. *default* defaults to
- ``None``.
- """
- if key in self:
- return self[key]
- else:
- self._setitem(key, default)
- self._list_add(key)
- return default
-
- def update(self, *args, **kwargs):
- """
- Update the dictionary with the key/value pairs from *other*, overwriting
- existing keys.
-
- *update* accepts either another dictionary object or an iterable of
- key/value pairs (as a tuple or other iterable of length two). If
- keyword arguments are specified, the dictionary is then updated with
- those key/value pairs: ``d.update(red=1, blue=2)``.
- """
- if not len(self):
- self._update(*args, **kwargs)
- self._list_update(self._iter())
- return
-
- if (len(kwargs) == 0 and len(args) == 1 and isinstance(args[0], dict)):
- pairs = args[0]
- else:
- pairs = dict(*args, **kwargs)
-
- if (10 * len(pairs)) > len(self):
- self._update(pairs)
- self._list_clear()
- self._list_update(self._iter())
- else:
- for key in pairs:
- self[key] = pairs[key]
-
- def index(self, key, start=None, stop=None):
- """
- Return the smallest *k* such that `d.iloc[k] == key` and `i <= k < j`.
- Raises `ValueError` if *key* is not present. *stop* defaults to the end
- of the set. *start* defaults to the beginning. Negative indexes are
- supported, as for slice indices.
- """
- return self._list_index(key, start, stop)
-
- def bisect_left(self, key):
- """
- Similar to the ``bisect`` module in the standard library, this returns
- an appropriate index to insert *key* in SortedDict. If *key* is
- already present in SortedDict, the insertion point will be before (to
- the left of) any existing entries.
- """
- return self._list_bisect_left(key)
-
- def bisect(self, key):
- """Same as bisect_right."""
- return self._list_bisect_right(key)
-
- def bisect_right(self, key):
- """
- Same as `bisect_left`, but if *key* is already present in SortedDict,
- the insertion point will be after (to the right of) any existing
- entries.
- """
- return self._list_bisect_right(key)
-
- @not26
- def viewkeys(self):
- """
- In Python 2.7 and later, return a new `KeysView` of the dictionary's
- keys.
-
- In Python 2.6, raise a NotImplementedError.
- """
- return KeysView(self)
-
- @not26
- def viewvalues(self):
- """
- In Python 2.7 and later, return a new `ValuesView` of the dictionary's
- values.
-
- In Python 2.6, raise a NotImplementedError.
- """
- return ValuesView(self)
-
- @not26
- def viewitems(self):
- """
- In Python 2.7 and later, return a new `ItemsView` of the dictionary's
- items.
-
- In Python 2.6, raise a NotImplementedError.
- """
- return ItemsView(self)
-
- def __reduce__(self):
- return (self.__class__, (self._key, self._load, list(self.iteritems())))
-
- @recursive_repr
- def __repr__(self):
- temp = '{0}({1}, {2}, {{{3}}})'
- items = ', '.join('{0}: {1}'.format(repr(key), repr(self[key]))
- for key in self._list)
- return temp.format(
- self.__class__.__name__,
- repr(self._key),
- repr(self._load),
- items
- )
-
- def _check(self):
- self._list._check()
- assert len(self) == len(self._list)
- assert all(val in self for val in self._list)
-
-class KeysView(AbstractKeysView, Set, Sequence):
- """
- A KeysView object is a dynamic view of the dictionary's keys, which
- means that when the dictionary's keys change, the view reflects
- those changes.
-
- The KeysView class implements the Set and Sequence Abstract Base Classes.
- """
- if hexversion < 0x03000000:
- def __init__(self, sorted_dict):
- """
- Initialize a KeysView from a SortedDict container as *sorted_dict*.
- """
- self._list = sorted_dict._list
- self._view = sorted_dict._dict.viewkeys()
- else:
- def __init__(self, sorted_dict):
- """
- Initialize a KeysView from a SortedDict container as *sorted_dict*.
- """
- self._list = sorted_dict._list
- self._view = sorted_dict._dict.keys()
- def __len__(self):
- """Return the number of entries in the dictionary."""
- return len(self._view)
- def __contains__(self, key):
- """
- Return True if and only if *key* is one of the underlying dictionary's
- keys.
- """
- return key in self._view
- def __iter__(self):
- """
- Return an iterable over the keys in the dictionary. Keys are iterated
- over in their sorted order.
-
- Iterating views while adding or deleting entries in the dictionary may
- raise a RuntimeError or fail to iterate over all entries.
- """
- return iter(self._list)
- def __getitem__(self, index):
- """Return the key at position *index*."""
- return self._list[index]
- def __reversed__(self):
- """
- Return a reversed iterable over the keys in the dictionary. Keys are
- iterated over in their reverse sort order.
-
- Iterating views while adding or deleting entries in the dictionary may
- raise a RuntimeError or fail to iterate over all entries.
- """
- return reversed(self._list)
- def index(self, value, start=None, stop=None):
- """
- Return the smallest *k* such that `keysview[k] == value` and `start <= k
- < end`. Raises `KeyError` if *value* is not present. *stop* defaults
- to the end of the set. *start* defaults to the beginning. Negative
- indexes are supported, as for slice indices.
- """
- return self._list.index(value, start, stop)
- def count(self, value):
- """Return the number of occurrences of *value* in the set."""
- return 1 if value in self._view else 0
- def __eq__(self, that):
- """Test set-like equality with *that*."""
- return self._view == that
- def __ne__(self, that):
- """Test set-like inequality with *that*."""
- return self._view != that
- def __lt__(self, that):
- """Test whether self is a proper subset of *that*."""
- return self._view < that
- def __gt__(self, that):
- """Test whether self is a proper superset of *that*."""
- return self._view > that
- def __le__(self, that):
- """Test whether self is contained within *that*."""
- return self._view <= that
- def __ge__(self, that):
- """Test whether *that* is contained within self."""
- return self._view >= that
- def __and__(self, that):
- """Return a SortedSet of the intersection of self and *that*."""
- return SortedSet(self._view & that)
- def __or__(self, that):
- """Return a SortedSet of the union of self and *that*."""
- return SortedSet(self._view | that)
- def __sub__(self, that):
- """Return a SortedSet of the difference of self and *that*."""
- return SortedSet(self._view - that)
- def __xor__(self, that):
- """Return a SortedSet of the symmetric difference of self and *that*."""
- return SortedSet(self._view ^ that)
- if hexversion < 0x03000000:
- def isdisjoint(self, that):
- """Return True if and only if *that* is disjoint with self."""
- return not any(key in self._list for key in that)
- else:
- def isdisjoint(self, that):
- """Return True if and only if *that* is disjoint with self."""
- return self._view.isdisjoint(that)
- @recursive_repr
- def __repr__(self):
- return 'SortedDict_keys({0})'.format(repr(list(self)))
-
-class ValuesView(AbstractValuesView, Sequence):
- """
- A ValuesView object is a dynamic view of the dictionary's values, which
- means that when the dictionary's values change, the view reflects those
- changes.
-
- The ValuesView class implements the Sequence Abstract Base Class.
- """
- if hexversion < 0x03000000:
- def __init__(self, sorted_dict):
- """
- Initialize a ValuesView from a SortedDict container as
- *sorted_dict*.
- """
- self._dict = sorted_dict
- self._list = sorted_dict._list
- self._view = sorted_dict._dict.viewvalues()
- else:
- def __init__(self, sorted_dict):
- """
- Initialize a ValuesView from a SortedDict container as
- *sorted_dict*.
- """
- self._dict = sorted_dict
- self._list = sorted_dict._list
- self._view = sorted_dict._dict.values()
- def __len__(self):
- """Return the number of entries in the dictionary."""
- return len(self._dict)
- def __contains__(self, value):
- """
- Return True if and only if *value* is on the underlying dictionary's
- values.
- """
- return value in self._view
- def __iter__(self):
- """
- Return an iterator over the values in the dictionary. Values are
- iterated over in sorted order of the keys.
-
- Iterating views while adding or deleting entries in the dictionary may
- raise a `RuntimeError` or fail to iterate over all entries.
- """
- _dict = self._dict
- return iter(_dict[key] for key in self._list)
- def __getitem__(self, index):
- """
- Efficiently return value at *index* in iteration.
-
- Supports slice notation and negative indexes.
- """
- _dict, _list = self._dict, self._list
- if isinstance(index, slice):
- return [_dict[key] for key in _list[index]]
- else:
- return _dict[_list[index]]
- def __reversed__(self):
- """
- Return a reverse iterator over the values in the dictionary. Values are
- iterated over in reverse sort order of the keys.
-
- Iterating views while adding or deleting entries in the dictionary may
- raise a `RuntimeError` or fail to iterate over all entries.
- """
- _dict = self._dict
- return iter(_dict[key] for key in reversed(self._list))
- def index(self, value):
- """
- Return index of *value* in self.
-
- Raises ValueError if *value* is not found.
- """
- for idx, val in enumerate(self):
- if value == val:
- return idx
- else:
- raise ValueError('{0} is not in dict'.format(repr(value)))
- if hexversion < 0x03000000:
- def count(self, value):
- """Return the number of occurrences of *value* in self."""
- return sum(1 for val in self._dict.itervalues() if val == value)
- else:
- def count(self, value):
- """Return the number of occurrences of *value* in self."""
- return sum(1 for val in _dict.values() if val == value)
- def __lt__(self, that):
- raise TypeError
- def __gt__(self, that):
- raise TypeError
- def __le__(self, that):
- raise TypeError
- def __ge__(self, that):
- raise TypeError
- def __and__(self, that):
- raise TypeError
- def __or__(self, that):
- raise TypeError
- def __sub__(self, that):
- raise TypeError
- def __xor__(self, that):
- raise TypeError
- @recursive_repr
- def __repr__(self):
- return 'SortedDict_values({0})'.format(repr(list(self)))
-
-class ItemsView(AbstractItemsView, Set, Sequence):
- """
- An ItemsView object is a dynamic view of the dictionary's ``(key,
- value)`` pairs, which means that when the dictionary changes, the
- view reflects those changes.
-
- The ItemsView class implements the Set and Sequence Abstract Base Classes.
- However, the set-like operations (``&``, ``|``, ``-``, ``^``) will only
- operate correctly if all of the dictionary's values are hashable.
- """
- if hexversion < 0x03000000:
- def __init__(self, sorted_dict):
- """
- Initialize an ItemsView from a SortedDict container as
- *sorted_dict*.
- """
- self._dict = sorted_dict
- self._list = sorted_dict._list
- self._view = sorted_dict._dict.viewitems()
- else:
- def __init__(self, sorted_dict):
- """
- Initialize an ItemsView from a SortedDict container as
- *sorted_dict*.
- """
- self._dict = sorted_dict
- self._list = sorted_dict._list
- self._view = sorted_dict._dict.items()
- def __len__(self):
- """Return the number of entries in the dictionary."""
- return len(self._view)
- def __contains__(self, key):
- """
- Return True if and only if *key* is one of the underlying dictionary's
- items.
- """
- return key in self._view
- def __iter__(self):
- """
- Return an iterable over the items in the dictionary. Items are iterated
- over in their sorted order.
-
- Iterating views while adding or deleting entries in the dictionary may
- raise a RuntimeError or fail to iterate over all entries.
- """
- _dict = self._dict
- return iter((key, _dict[key]) for key in self._list)
- def __getitem__(self, index):
- """Return the item as position *index*."""
- _dict, _list = self._dict, self._list
- if isinstance(index, slice):
- return [(key, _dict[key]) for key in _list[index]]
- else:
- key = _list[index]
- return (key, _dict[key])
- def __reversed__(self):
- """
- Return a reversed iterable over the items in the dictionary. Items are
- iterated over in their reverse sort order.
-
- Iterating views while adding or deleting entries in the dictionary may
- raise a RuntimeError or fail to iterate over all entries.
- """
- _dict = self._dict
- return iter((key, _dict[key]) for key in reversed(self._list))
- def index(self, key, start=None, stop=None):
- """
- Return the smallest *k* such that `itemssview[k] == key` and `start <= k
- < end`. Raises `KeyError` if *key* is not present. *stop* defaults
- to the end of the set. *start* defaults to the beginning. Negative
- indexes are supported, as for slice indices.
- """
- temp, value = key
- pos = self._list.index(temp, start, stop)
- if value == self._dict[temp]:
- return pos
- else:
- raise ValueError('{0} is not in dict'.format(repr(key)))
- def count(self, item):
- """Return the number of occurrences of *item* in the set."""
- key, value = item
- return 1 if key in self._dict and self._dict[key] == value else 0
- def __eq__(self, that):
- """Test set-like equality with *that*."""
- return self._view == that
- def __ne__(self, that):
- """Test set-like inequality with *that*."""
- return self._view != that
- def __lt__(self, that):
- """Test whether self is a proper subset of *that*."""
- return self._view < that
- def __gt__(self, that):
- """Test whether self is a proper superset of *that*."""
- return self._view > that
- def __le__(self, that):
- """Test whether self is contained within *that*."""
- return self._view <= that
- def __ge__(self, that):
- """Test whether *that* is contained within self."""
- return self._view >= that
- def __and__(self, that):
- """Return a SortedSet of the intersection of self and *that*."""
- return SortedSet(self._view & that)
- def __or__(self, that):
- """Return a SortedSet of the union of self and *that*."""
- return SortedSet(self._view | that)
- def __sub__(self, that):
- """Return a SortedSet of the difference of self and *that*."""
- return SortedSet(self._view - that)
- def __xor__(self, that):
- """Return a SortedSet of the symmetric difference of self and *that*."""
- return SortedSet(self._view ^ that)
- if hexversion < 0x03000000:
- def isdisjoint(self, that):
- """Return True if and only if *that* is disjoint with self."""
- _dict = self._dict
- for key, value in that:
- if key in _dict and _dict[key] == value:
- return False
- return True
- else:
- def isdisjoint(self, that):
- """Return True if and only if *that* is disjoint with self."""
- return self._view.isdisjoint(that)
- @recursive_repr
- def __repr__(self):
- return 'SortedDict_items({0})'.format(repr(list(self)))
diff --git a/src/sortedcontainers/sortedlist.py b/src/sortedcontainers/sortedlist.py
deleted file mode 100755
index 8549cb4..0000000
--- a/src/sortedcontainers/sortedlist.py
+++ /dev/null
@@ -1,1233 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Sorted list implementation.
-
-from __future__ import print_function
-from sys import hexversion
-
-from bisect import bisect_left, bisect_right, insort
-from itertools import chain, repeat, starmap
-from collections import MutableSequence
-from operator import iadd, add
-from functools import wraps
-from math import log
-
-if hexversion < 0x03000000:
- from itertools import izip as zip
- from itertools import imap as map
- try:
- from thread import get_ident
- except ImportError:
- from dummy_thread import get_ident
-else:
- from functools import reduce
- try:
- from _thread import get_ident
- except ImportError:
- from _dummy_thread import get_ident
-
-def recursive_repr(func):
- """Decorator to prevent infinite repr recursion."""
- repr_running = set()
-
- @wraps(func)
- def wrapper(self):
- key = id(self), get_ident()
-
- if key in repr_running:
- return '...'
-
- repr_running.add(key)
-
- try:
- return func(self)
- finally:
- repr_running.discard(key)
-
- return wrapper
-
-class SortedList(MutableSequence):
- """
- SortedList provides most of the same methods as a list but keeps the items
- in sorted order.
- """
-
- def __init__(self, iterable=None, load=1000):
- """
- SortedList provides most of the same methods as a list but keeps the
- items in sorted order.
-
- An optional *iterable* provides an initial series of items to populate
- the SortedList.
-
- An optional *load* specifies the load-factor of the list. The default
- load factor of '1000' works well for lists from tens to tens of millions
- of elements. Good practice is to use a value that is the cube root of
- the list size. With billions of elements, the best load factor depends
- on your usage. It's best to leave the load factor at the default until
- you start benchmarking.
- """
- self._len, self._maxes, self._lists, self._index = 0, [], [], []
- self._load, self._twice, self._half = load, load * 2, load >> 1
- self._offset = 0
-
- if iterable is not None:
- self.update(iterable)
-
- def clear(self):
- """Remove all the elements from the list."""
- self._len = 0
- del self._maxes[:]
- del self._lists[:]
- del self._index[:]
-
- def add(self, val):
- """Add the element *val* to the list."""
- _maxes, _lists = self._maxes, self._lists
-
- if _maxes:
- pos = bisect_right(_maxes, val)
-
- if pos == len(_maxes):
- pos -= 1
- _maxes[pos] = val
- _lists[pos].append(val)
- else:
- insort(_lists[pos], val)
-
- self._expand(pos)
- else:
- _maxes.append(val)
- _lists.append([val])
-
- self._len += 1
-
- def _expand(self, pos):
- """Splits sublists that are more than double the load level.
-
- Updates the index when the sublist length is less than double the load
- level. This requires incrementing the nodes in a traversal from the leaf
- node to the root. For an example traversal see self._loc.
- """
- _lists, _index = self._lists, self._index
-
- if len(_lists[pos]) > self._twice:
- _maxes, _load = self._maxes, self._load
- half = _lists[pos][_load:]
- del _lists[pos][_load:]
- _maxes[pos] = _lists[pos][-1]
- _maxes.insert(pos + 1, half[-1])
- _lists.insert(pos + 1, half)
- del _index[:]
- else:
- if len(_index) > 0:
- child = self._offset + pos
- while child > 0:
- _index[child] += 1
- child = (child - 1) >> 1
- _index[0] += 1
-
- def update(self, iterable):
- """Update the list by adding all elements from *iterable*."""
- _maxes, _lists = self._maxes, self._lists
- values = sorted(iterable)
-
- if _maxes:
- if len(values) * 4 >= self._len:
- values.extend(chain.from_iterable(_lists))
- values.sort()
- self.clear()
- else:
- _add = self.add
- for val in values:
- _add(val)
- return
-
- _load, _index = self._load, self._index
- _lists.extend(values[pos:(pos + _load)]
- for pos in range(0, len(values), _load))
- _maxes.extend(sublist[-1] for sublist in _lists)
- self._len = len(values)
- del _index[:]
-
- def __contains__(self, val):
- """Return True if and only if *val* is an element in the list."""
- _maxes = self._maxes
-
- if not _maxes:
- return False
-
- pos = bisect_left(_maxes, val)
-
- if pos == len(_maxes):
- return False
-
- _lists = self._lists
- idx = bisect_left(_lists[pos], val)
- return _lists[pos][idx] == val
-
- def discard(self, val):
- """
- Remove the first occurrence of *val*.
-
- If *val* is not a member, does nothing.
- """
- _maxes = self._maxes
-
- if not _maxes:
- return
-
- pos = bisect_left(_maxes, val)
-
- if pos == len(_maxes):
- return
-
- _lists = self._lists
- idx = bisect_left(_lists[pos], val)
- if _lists[pos][idx] == val:
- self._delete(pos, idx)
-
- def remove(self, val):
- """
- Remove first occurrence of *val*.
-
- Raises ValueError if *val* is not present.
- """
- _maxes = self._maxes
-
- if not _maxes:
- raise ValueError('{0} not in list'.format(repr(val)))
-
- pos = bisect_left(_maxes, val)
-
- if pos == len(_maxes):
- raise ValueError('{0} not in list'.format(repr(val)))
-
- _lists = self._lists
- idx = bisect_left(_lists[pos], val)
- if _lists[pos][idx] == val:
- self._delete(pos, idx)
- else:
- raise ValueError('{0} not in list'.format(repr(val)))
-
- def _delete(self, pos, idx):
- """Delete the item at the given (pos, idx).
-
- Combines lists that are less than half the load level.
-
- Updates the index when the sublist length is more than half the load
- level. This requires decrementing the nodes in a traversal from the leaf
- node to the root. For an example traversal see self._loc.
- """
- _maxes, _lists, _index = self._maxes, self._lists, self._index
-
- lists_pos = _lists[pos]
-
- del lists_pos[idx]
- self._len -= 1
-
- len_lists_pos = len(lists_pos)
-
- if len_lists_pos > self._half:
-
- _maxes[pos] = lists_pos[-1]
-
- if len(_index) > 0:
- child = self._offset + pos
- while child > 0:
- _index[child] -= 1
- child = (child - 1) >> 1
- _index[0] -= 1
-
- elif len(_lists) > 1:
-
- if not pos:
- pos += 1
-
- prev = pos - 1
- _lists[prev].extend(_lists[pos])
- _maxes[prev] = _lists[prev][-1]
-
- del _maxes[pos]
- del _lists[pos]
- del _index[:]
-
- self._expand(prev)
-
- elif len_lists_pos:
-
- _maxes[pos] = lists_pos[-1]
-
- else:
-
- del _maxes[pos]
- del _lists[pos]
- del _index[:]
-
- def _loc(self, pos, idx):
- """Convert an index pair (alpha, beta) into a single index that corresponds to
- the position of the value in the sorted list.
-
- Most queries require the index be built. Details of the index are
- described in self._build_index.
-
- Indexing requires traversing the tree from a leaf node to the root. The
- parent of each node is easily computable at (pos - 1) // 2.
-
- Left-child nodes are always at odd indices and right-child nodes are
- always at even indices.
-
- When traversing up from a right-child node, increment the total by the
- left-child node.
-
- The final index is the sum from traversal and the index in the sublist.
-
- For example, using the index from self._build_index:
-
- _index = 14 5 9 3 2 4 5
- _offset = 3
-
- Tree:
-
- 14
- 5 9
- 3 2 4 5
-
- Converting index pair (2, 3) into a single index involves iterating like
- so:
-
- 1. Starting at the leaf node: offset + alpha = 3 + 2 = 5. We identify
- the node as a left-child node. At such nodes, we simply traverse to
- the parent.
-
- 2. At node 9, position 2, we recognize the node as a right-child node
- and accumulate the left-child in our total. Total is now 5 and we
- traverse to the parent at position 0.
-
- 3. Iteration ends at the root.
-
- Computing the index is the sum of the total and beta: 5 + 3 = 8.
- """
- if not pos:
- return idx
-
- _index = self._index
-
- if not len(_index):
- self._build_index()
-
- total = 0
-
- # Increment pos to point in the index to len(self._lists[pos]).
-
- pos += self._offset
-
- # Iterate until reaching the root of the index tree at pos = 0.
-
- while pos:
-
- # Right-child nodes are at odd indices. At such indices
- # account the total below the left child node.
-
- if not (pos & 1):
- total += _index[pos - 1]
-
- # Advance pos to the parent node.
-
- pos = (pos - 1) >> 1
-
- return total + idx
-
- def _pos(self, idx):
- """Convert an index into a pair (alpha, beta) that can be used to access
- the corresponding _lists[alpha][beta] position.
-
- Most queries require the index be built. Details of the index are
- described in self._build_index.
-
- Indexing requires traversing the tree to a leaf node. Each node has
- two children which are easily computable. Given an index, pos, the
- left-child is at pos * 2 + 1 and the right-child is at pos * 2 + 2.
-
- When the index is less than the left-child, traversal moves to the
- left sub-tree. Otherwise, the index is decremented by the left-child
- and traversal moves to the right sub-tree.
-
- At a child node, the indexing pair is computed from the relative
- position of the child node as compared with the offset and the remaining
- index.
-
- For example, using the index from self._build_index:
-
- _index = 14 5 9 3 2 4 5
- _offset = 3
-
- Tree:
-
- 14
- 5 9
- 3 2 4 5
-
- Indexing position 8 involves iterating like so:
-
- 1. Starting at the root, position 0, 8 is compared with the left-child
- node (5) which it is greater than. When greater the index is
- decremented and the position is updated to the right child node.
-
- 2. At node 9 with index 3, we again compare the index to the left-child
- node with value 4. Because the index is the less than the left-child
- node, we simply traverse to the left.
-
- 3. At node 4 with index 3, we recognize that we are at a leaf node and
- stop iterating.
-
- 4. To compute the sublist index, we subtract the offset from the index
- of the leaf node: 5 - 3 = 2. To compute the index in the sublist, we
- simply use the index remaining from iteration. In this case, 3.
-
- The final index pair from our example is (2, 3) which corresponds to
- index 8 in the sorted list.
- """
- _len, _lists = self._len, self._lists
-
- if idx < 0:
- last_len = len(_lists[-1])
- if (-idx) <= last_len:
- return len(_lists) - 1, last_len + idx
- idx += _len
- if idx < 0:
- raise IndexError('list index out of range')
- elif idx >= _len:
- raise IndexError('list index out of range')
-
- if idx < len(_lists[0]):
- return 0, idx
-
- _index = self._index
-
- if not len(_index):
- self._build_index()
-
- pos = 0
- len_index = len(_index)
- child = (pos << 1) + 1
-
- while child < len_index:
- index_child = _index[child]
-
- if idx < index_child:
- pos = child
- else:
- idx -= index_child
- pos = child + 1
-
- child = (pos << 1) + 1
-
- return (pos - self._offset, idx)
-
- def _build_index(self):
- """Build an index for indexing the sorted list.
-
- Indexes are represented as binary trees in a dense array notation
- similar to a binary heap.
-
- For example, given a _lists representation storing integers:
-
- [0]: 1 2 3
- [1]: 4 5
- [2]: 6 7 8 9
- [3]: 10 11 12 13 14
-
- The first transformation maps the sub-lists by their length. The
- first row of the index is the length of the sub-lists.
-
- [0]: 3 2 4 5
-
- Each row after that is the sum of consecutive pairs of the previous row:
-
- [1]: 5 9
- [2]: 14
-
- Finally, the index is built by concatenating these lists together:
-
- _index = 14 5 9 3 2 4 5
-
- An offset storing the start of the first row is also stored:
-
- _offset = 3
-
- When built, the index can be used for efficient indexing into the list.
- See the comment and notes on self._pos for details.
- """
- row0 = list(map(len, self._lists))
-
- if len(row0) == 1:
- self._index[:] = row0
- self._offset = 0
- return
-
- head = iter(row0)
- tail = iter(head)
- row1 = list(starmap(add, zip(head, tail)))
-
- if len(row0) & 1:
- row1.append(row0[-1])
-
- if len(row1) == 1:
- self._index[:] = row1 + row0
- self._offset = 1
- return
-
- size = 2 ** (int(log(len(row1) - 1, 2)) + 1)
- row1.extend(repeat(0, size - len(row1)))
- tree = [row0, row1]
-
- while len(tree[-1]) > 1:
- head = iter(tree[-1])
- tail = iter(head)
- row = list(starmap(add, zip(head, tail)))
- tree.append(row)
-
- reduce(iadd, reversed(tree), self._index)
- self._offset = size * 2 - 1
-
- def _slice(self, slc):
- start, stop, step = slc.start, slc.stop, slc.step
-
- if step == 0:
- raise ValueError('slice step cannot be zero')
-
- # Set defaults for missing values.
-
- if step is None:
- step = 1
-
- if step > 0:
- if start is None:
- start = 0
-
- if stop is None:
- stop = len(self)
- elif stop < 0:
- stop += len(self)
- else:
- if start is None:
- start = len(self) - 1
-
- if stop is None:
- stop = -1
- elif stop < 0:
- stop += len(self)
-
- if start < 0:
- start += len(self)
-
- # Fix indices that are too big or too small.
- # Slice notation is surprisingly permissive
- # where normal indexing would raise IndexError.
-
- if step > 0:
- if start < 0:
- start = 0
- elif start > len(self):
- start = len(self)
-
- if stop < 0:
- stop = 0
- elif stop > len(self):
- stop = len(self)
- else:
- if start < 0:
- start = -1
- elif start >= len(self):
- start = len(self) - 1
-
- if stop < 0:
- stop = -1
- elif stop > len(self):
- stop = len(self)
-
- return start, stop, step
-
- def __delitem__(self, idx):
- """Remove the element at *idx*. Supports slicing."""
- if isinstance(idx, slice):
- start, stop, step = self._slice(idx)
-
- if ((step == 1) and (start < stop)
- and ((stop - start) * 8 >= self._len)):
-
- values = self[:start]
- if stop < self._len:
- values += self[stop:]
- self.clear()
- self.update(values)
- return
-
- indices = range(start, stop, step)
-
- # Delete items from greatest index to least so
- # that the indices remain valid throughout iteration.
-
- if step > 0:
- indices = reversed(indices)
-
- _pos, _delete = self._pos, self._delete
-
- for index in indices:
- pos, idx = _pos(index)
- _delete(pos, idx)
- else:
- pos, idx = self._pos(idx)
- self._delete(pos, idx)
-
- def __getitem__(self, idx):
- """Return the element at *idx*. Supports slicing."""
- _lists = self._lists
-
- if isinstance(idx, slice):
- start, stop, step = self._slice(idx)
-
- if step == 1 and start < stop:
- if start == 0 and stop == self._len:
- return self.as_list()
-
- start_pos, start_idx = self._pos(start)
-
- if stop == self._len:
- stop_pos = len(_lists) - 1
- stop_idx = len(_lists[stop_pos])
- else:
- stop_pos, stop_idx = self._pos(stop)
-
- if start_pos == stop_pos:
- return _lists[start_pos][start_idx:stop_idx]
-
- prefix = _lists[start_pos][start_idx:]
- middle = _lists[(start_pos + 1):stop_pos]
- result = reduce(iadd, middle, prefix)
- result += _lists[stop_pos][:stop_idx]
-
- return result
-
- if step == -1 and start > stop:
- result = self[(stop + 1):(start + 1)]
- result.reverse()
- return result
-
- # Return a list because a negative step could
- # reverse the order of the items and this could
- # be the desired behavior.
-
- indices = range(start, stop, step)
- return list(self[index] for index in indices)
- else:
- pos, idx = self._pos(idx)
- return _lists[pos][idx]
-
- def _check_order(self, idx, val):
- _lists, _len = self._lists, self._len
-
- pos, loc = self._pos(idx)
-
- if idx < 0:
- idx += _len
-
- # Check that the inserted value is not less than the
- # previous value.
-
- if idx > 0:
- idx_prev = loc - 1
- pos_prev = pos
-
- if idx_prev < 0:
- pos_prev -= 1
- idx_prev = len(_lists[pos_prev]) - 1
-
- if _lists[pos_prev][idx_prev] > val:
- msg = '{0} not in sort order at index {1}'.format(repr(val), idx)
- raise ValueError(msg)
-
- # Check that the inserted value is not greater than
- # the previous value.
-
- if idx < (_len - 1):
- idx_next = loc + 1
- pos_next = pos
-
- if idx_next == len(_lists[pos_next]):
- pos_next += 1
- idx_next = 0
-
- if _lists[pos_next][idx_next] < val:
- msg = '{0} not in sort order at index {1}'.format(repr(val), idx)
- raise ValueError(msg)
-
- def __setitem__(self, index, value):
- """
- Replace the item at position *index* with *value*.
-
- Supports slice notation. Raises a :exc:`ValueError` if the sort order
- would be violated. When used with a slice and iterable, the
- :exc:`ValueError` is raised before the list is mutated if the sort order
- would be violated by the operation.
- """
- _maxes, _lists, _pos = self._maxes, self._lists, self._pos
- _check_order = self._check_order
-
- if isinstance(index, slice):
- start, stop, step = self._slice(index)
- indices = range(start, stop, step)
-
- if step != 1:
- if not hasattr(value, '__len__'):
- value = list(value)
-
- indices = list(indices)
-
- if len(value) != len(indices):
- raise ValueError(
- 'attempt to assign sequence of size {0}'
- ' to extended slice of size {1}'
- .format(len(value), len(indices)))
-
- # Keep a log of values that are set so that we can
- # roll back changes if ordering is violated.
-
- log = []
- _append = log.append
-
- for idx, val in zip(indices, value):
- pos, loc = _pos(idx)
- _append((idx, _lists[pos][loc], val))
- _lists[pos][loc] = val
- if len(_lists[pos]) == (loc + 1):
- _maxes[pos] = val
-
- try:
- # Validate ordering of new values.
-
- for idx, oldval, newval in log:
- _check_order(idx, newval)
-
- except ValueError:
-
- # Roll back changes from log.
-
- for idx, oldval, newval in log:
- pos, loc = _pos(idx)
- _lists[pos][loc] = oldval
- if len(_lists[pos]) == (loc + 1):
- _maxes[pos] = oldval
-
- raise
- else:
- # Test ordering using indexing. If the value given
- # doesn't support getitem, convert it to a list.
-
- if not hasattr(value, '__getitem__'):
- value = list(value)
-
- # Check that the given values are ordered properly.
-
- ordered = all(value[pos - 1] <= value[pos]
- for pos in range(1, len(value)))
-
- if not ordered:
- raise ValueError('given sequence not in sort order')
-
- # Check ordering in context of sorted list.
-
- if not start or not len(value):
- # Nothing to check on the lhs.
- pass
- else:
- if self[start - 1] > value[0]:
- msg = '{0} not in sort order at index {1}'.format(repr(value[0]), start)
- raise ValueError(msg)
-
- if stop == len(self) or not len(value):
- # Nothing to check on the rhs.
- pass
- else:
- # "stop" is exclusive so we don't need
- # to add one for the index.
- if self[stop] < value[-1]:
- msg = '{0} not in sort order at index {1}'.format(repr(value[-1]), stop)
- raise ValueError(msg)
-
- # Delete the existing values.
-
- del self[index]
-
- # Insert the new values.
-
- _insert = self.insert
- for idx, val in enumerate(value):
- _insert(start + idx, val)
- else:
- pos, loc = _pos(index)
- _check_order(index, value)
- _lists[pos][loc] = value
- if len(_lists[pos]) == (loc + 1):
- _maxes[pos] = value
-
- def __iter__(self):
- """Create an iterator over the list."""
- return chain.from_iterable(self._lists)
-
- def __reversed__(self):
- """Create an iterator to traverse the list in reverse."""
- return chain.from_iterable(map(reversed, reversed(self._lists)))
-
- def __len__(self):
- """Return the number of elements in the list."""
- return self._len
-
- def bisect_left(self, val):
- """
- Similar to the *bisect* module in the standard library, this returns an
- appropriate index to insert *val*. If *val* is already present, the
- insertion point will be before (to the left of) any existing entries.
- """
- _maxes = self._maxes
-
- if not _maxes:
- return 0
-
- pos = bisect_left(_maxes, val)
-
- if pos == len(_maxes):
- return self._len
-
- idx = bisect_left(self._lists[pos], val)
-
- return self._loc(pos, idx)
-
- def bisect_right(self, val):
- """
- Same as *bisect_left*, but if *val* is already present, the insertion
- point will be after (to the right of) any existing entries.
- """
- _maxes = self._maxes
-
- if not _maxes:
- return 0
-
- pos = bisect_right(_maxes, val)
-
- if pos == len(_maxes):
- return self._len
-
- idx = bisect_right(self._lists[pos], val)
-
- return self._loc(pos, idx)
-
- bisect = bisect_right
-
- def count(self, val):
- """Return the number of occurrences of *val* in the list."""
- _maxes = self._maxes
-
- if not _maxes:
- return 0
-
- pos_left = bisect_left(_maxes, val)
-
- if pos_left == len(_maxes):
- return 0
-
- _lists = self._lists
- idx_left = bisect_left(_lists[pos_left], val)
- pos_right = bisect_right(_maxes, val)
-
- if pos_right == len(_maxes):
- return self._len - self._loc(pos_left, idx_left)
-
- idx_right = bisect_right(_lists[pos_right], val)
-
- if pos_left == pos_right:
- return idx_right - idx_left
-
- right = self._loc(pos_right, idx_right)
- left = self._loc(pos_left, idx_left)
-
- return right - left
-
- def copy(self):
- """Return a shallow copy of the sorted list."""
- return self.__class__(self, load=self._load)
-
- __copy__ = copy
-
- def append(self, val):
- """
- Append the element *val* to the list. Raises a ValueError if the *val*
- would violate the sort order.
- """
- _maxes, _lists = self._maxes, self._lists
-
- if not _maxes:
- _maxes.append(val)
- _lists.append([val])
- self._len = 1
- return
-
- pos = len(_lists) - 1
-
- if val < _lists[pos][-1]:
- msg = '{0} not in sort order at index {1}'.format(repr(val), self._len)
- raise ValueError(msg)
-
- _maxes[pos] = val
- _lists[pos].append(val)
- self._len += 1
- self._expand(pos)
-
- def extend(self, values):
- """
- Extend the list by appending all elements from the *values*. Raises a
- ValueError if the sort order would be violated.
- """
- _maxes, _lists, _load = self._maxes, self._lists, self._load
-
- if not isinstance(values, list):
- values = list(values)
-
- if any(values[pos - 1] > values[pos]
- for pos in range(1, len(values))):
- raise ValueError('given sequence not in sort order')
-
- offset = 0
-
- if _maxes:
- if values[0] < _lists[-1][-1]:
- msg = '{0} not in sort order at index {1}'.format(repr(values[0]), self._len)
- raise ValueError(msg)
-
- if len(_lists[-1]) < self._half:
- _lists[-1].extend(values[:_load])
- _maxes[-1] = _lists[-1][-1]
- offset = _load
-
- len_lists = len(_lists)
-
- for idx in range(offset, len(values), _load):
- _lists.append(values[idx:(idx + _load)])
- _maxes.append(_lists[-1][-1])
-
- _index = self._index
-
- if len_lists == len(_lists):
- len_index = len(_index)
- if len_index > 0:
- len_values = len(values)
- child = len_index - 1
- while child:
- _index[child] += len_values
- child = (child - 1) >> 1
- _index[0] += len_values
- else:
- del _index[:]
-
- self._len += len(values)
-
- def insert(self, idx, val):
- """
- Insert the element *val* into the list at *idx*. Raises a ValueError if
- the *val* at *idx* would violate the sort order.
- """
- _maxes, _lists, _len = self._maxes, self._lists, self._len
-
- if idx < 0:
- idx += _len
- if idx < 0:
- idx = 0
- if idx > _len:
- idx = _len
-
- if not _maxes:
- # The idx must be zero by the inequalities above.
- _maxes.append(val)
- _lists.append([val])
- self._len = 1
- return
-
- if not idx:
- if val > _lists[0][0]:
- msg = '{0} not in sort order at index {1}'.format(repr(val), 0)
- raise ValueError(msg)
- else:
- _lists[0].insert(0, val)
- self._expand(0)
- self._len += 1
- return
-
- if idx == _len:
- pos = len(_lists) - 1
- if _lists[pos][-1] > val:
- msg = '{0} not in sort order at index {1}'.format(repr(val), _len)
- raise ValueError(msg)
- else:
- _lists[pos].append(val)
- _maxes[pos] = _lists[pos][-1]
- self._expand(pos)
- self._len += 1
- return
-
- pos, idx = self._pos(idx)
- idx_before = idx - 1
- if idx_before < 0:
- pos_before = pos - 1
- idx_before = len(_lists[pos_before]) - 1
- else:
- pos_before = pos
-
- before = _lists[pos_before][idx_before]
- if before <= val <= _lists[pos][idx]:
- _lists[pos].insert(idx, val)
- self._expand(pos)
- self._len += 1
- else:
- msg = '{0} not in sort order at index {1}'.format(repr(val), idx)
- raise ValueError(msg)
-
- def pop(self, idx=-1):
- """
- Remove and return item at *idx* (default last). Raises IndexError if
- list is empty or index is out of range. Negative indices are supported,
- as for slice indices.
- """
- if (idx < 0 and -idx > self._len) or (idx >= self._len):
- raise IndexError('pop index out of range')
-
- pos, idx = self._pos(idx)
- val = self._lists[pos][idx]
- self._delete(pos, idx)
-
- return val
-
- def index(self, val, start=None, stop=None):
- """
- Return the smallest *k* such that L[k] == val and i <= k < j`. Raises
- ValueError if *val* is not present. *stop* defaults to the end of the
- list. *start* defaults to the beginning. Negative indices are supported,
- as for slice indices.
- """
- _len, _maxes = self._len, self._maxes
-
- if not _maxes:
- raise ValueError('{0} is not in list'.format(repr(val)))
-
- if start is None:
- start = 0
- if start < 0:
- start += _len
- if start < 0:
- start = 0
-
- if stop is None:
- stop = _len
- if stop < 0:
- stop += _len
- if stop > _len:
- stop = _len
-
- if stop <= start:
- raise ValueError('{0} is not in list'.format(repr(val)))
-
- stop -= 1
- pos_left = bisect_left(_maxes, val)
-
- if pos_left == len(_maxes):
- raise ValueError('{0} is not in list'.format(repr(val)))
-
- _lists = self._lists
- idx_left = bisect_left(_lists[pos_left], val)
-
- if _lists[pos_left][idx_left] != val:
- raise ValueError('{0} is not in list'.format(repr(val)))
-
- left = self._loc(pos_left, idx_left)
-
- if start <= left:
- if left <= stop:
- return left
- else:
- right = self.bisect_right(val) - 1
-
- if start <= right:
- return start
-
- raise ValueError('{0} is not in list'.format(repr(val)))
-
- def as_list(self):
- """Very efficiently convert the SortedList to a list."""
- return reduce(iadd, self._lists, [])
-
- def __add__(self, that):
- """
- Return a new sorted list containing all the elements in *self* and
- *that*. Elements in *that* do not need to be properly ordered with
- respect to *self*.
- """
- values = self.as_list()
- values.extend(that)
- return self.__class__(values, load=self._load)
-
- def __iadd__(self, that):
- """
- Update *self* to include all values in *that*. Elements in *that* do not
- need to be properly ordered with respect to *self*.
- """
- self.update(that)
- return self
-
- def __mul__(self, that):
- """
- Return a new sorted list containing *that* shallow copies of each item
- in SortedList.
- """
- values = self.as_list() * that
- return self.__class__(values, load=self._load)
-
- def __imul__(self, that):
- """
- Increase the length of the list by appending *that* shallow copies of
- each item.
- """
- values = self.as_list() * that
- self.clear()
- self.update(values)
- return self
-
- def __eq__(self, that):
- """Compare two Sequences for equality."""
- return ((self._len == len(that))
- and all(lhs == rhs for lhs, rhs in zip(self, that)))
-
- def __ne__(self, that):
- """Compare two Sequences for inequality."""
- return ((self._len != len(that))
- or any(lhs != rhs for lhs, rhs in zip(self, that)))
-
- def __lt__(self, that):
- """Compare two Sequences for less than."""
- return ((self._len <= len(that))
- and all(lhs < rhs for lhs, rhs in zip(self, that)))
-
- def __le__(self, that):
- """Compare two Sequences for less than equal."""
- return ((self._len <= len(that))
- and all(lhs <= rhs for lhs, rhs in zip(self, that)))
-
- def __gt__(self, that):
- """Compare two Sequences for greater than."""
- return ((self._len >= len(that))
- and all(lhs > rhs for lhs, rhs in zip(self, that)))
-
- def __ge__(self, that):
- """Compare two Sequences for greater than equal."""
- return ((self._len >= len(that))
- and all(lhs >= rhs for lhs, rhs in zip(self, that)))
-
- @recursive_repr
- def __repr__(self):
- """Return string representation of SortedList."""
- temp = '{0}({1}, load={2})'
- return temp.format(
- self.__class__.__name__,
- repr(list(self)),
- repr(self._load)
- )
-
- def _check(self):
- try:
- # Check load parameters.
-
- assert self._load >= 4
- assert self._half == (self._load >> 1)
- assert self._twice == (self._load * 2)
-
- # Check empty sorted list case.
-
- if self._maxes == []:
- assert self._lists == []
- return
-
- assert len(self._maxes) > 0 and len(self._lists) > 0
-
- # Check all sublists are sorted.
-
- assert all(sublist[pos - 1] <= sublist[pos]
- for sublist in self._lists
- for pos in range(1, len(sublist)))
-
- # Check beginning/end of sublists are sorted.
-
- for pos in range(1, len(self._lists)):
- assert self._lists[pos - 1][-1] <= self._lists[pos][0]
-
- # Check length of _maxes and _lists match.
-
- assert len(self._maxes) == len(self._lists)
-
- # Check _maxes is a map of _lists.
-
- assert all(self._maxes[pos] == self._lists[pos][-1]
- for pos in range(len(self._maxes)))
-
- # Check load level is less than _twice.
-
- assert all(len(sublist) <= self._twice for sublist in self._lists)
-
- # Check load level is greater than _half for all
- # but the last sublist.
-
- assert all(len(self._lists[pos]) >= self._half
- for pos in range(0, len(self._lists) - 1))
-
- # Check length.
-
- assert self._len == sum(len(sublist) for sublist in self._lists)
-
- # Check index.
-
- if len(self._index):
- assert len(self._index) == self._offset + len(self._lists)
- assert self._len == self._index[0]
-
- def test_offset_pos(pos):
- from_index = self._index[self._offset + pos]
- return from_index == len(self._lists[pos])
-
- assert all(test_offset_pos(pos)
- for pos in range(len(self._lists)))
-
- for pos in range(self._offset):
- child = (pos << 1) + 1
- if self._index[pos] == 0:
- assert child >= len(self._index)
- elif child + 1 == len(self._index):
- assert self._index[pos] == self._index[child]
- else:
- child_sum = self._index[child] + self._index[child + 1]
- assert self._index[pos] == child_sum
-
- except:
- import sys
- import traceback
-
- traceback.print_exc(file=sys.stdout)
-
- print('len', self._len)
- print('load', self._load, self._half, self._twice)
- print('offset', self._offset)
- print('len_index', len(self._index))
- print('index', self._index)
- print('len_maxes', len(self._maxes))
- print('maxes', self._maxes)
- print('len_lists', len(self._lists))
- print('lists', self._lists)
-
- raise
diff --git a/src/sortedcontainers/sortedlistwithkey.py b/src/sortedcontainers/sortedlistwithkey.py
deleted file mode 100755
index 688cf1f..0000000
--- a/src/sortedcontainers/sortedlistwithkey.py
+++ /dev/null
@@ -1,1331 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Sorted list implementation.
-
-from __future__ import print_function
-from sys import hexversion
-
-from .sortedlist import recursive_repr
-from bisect import bisect_left, bisect_right, insort
-from itertools import chain, repeat, starmap
-from collections import MutableSequence
-from operator import iadd, add
-from functools import wraps
-from math import log
-
-if hexversion < 0x03000000:
- from itertools import izip as zip
- from itertools import imap as map
-else:
- from functools import reduce
-
-def identity(value):
- return value
-
-class SortedListWithKey(MutableSequence):
- """
- SortedList provides most of the same methods as a list but keeps the items
- in sorted order.
- """
-
- def __init__(self, iterable=None, key=identity, load=1000):
- """
- SortedList provides most of the same methods as a list but keeps the
- items in sorted order.
-
- An optional *iterable* provides an initial series of items to populate
- the SortedList.
-
- An optional *load* specifies the load-factor of the list. The default
- load factor of '1000' works well for lists from tens to tens of millions
- of elements. Good practice is to use a value that is the cube root of
- the list size. With billions of elements, the best load factor depends
- on your usage. It's best to leave the load factor at the default until
- you start benchmarking.
- """
- self._len, self._maxes, self._lists, self._keys, self._index = 0, [], [], [], []
- self._key, self._load, self._twice, self._half = key, load, load * 2, load >> 1
- self._offset = 0
-
- if iterable is not None:
- self.update(iterable)
-
- def clear(self):
- """Remove all the elements from the list."""
- self._len = 0
- del self._maxes[:]
- del self._lists[:]
- del self._keys[:]
- del self._index[:]
-
- def add(self, val):
- """Add the element *val* to the list."""
- _maxes, _lists, _keys = self._maxes, self._lists, self._keys
-
- key = self._key(val)
-
- if _maxes:
- pos = bisect_right(_maxes, key)
-
- if pos == len(_maxes):
- pos -= 1
- _maxes[pos] = key
- _keys[pos].append(key)
- _lists[pos].append(val)
- else:
- idx = bisect_right(_keys[pos], key)
- _keys[pos].insert(idx, key)
- _lists[pos].insert(idx, val)
-
- self._expand(pos)
- else:
- _maxes.append(key)
- _keys.append([key])
- _lists.append([val])
-
- self._len += 1
-
- def _expand(self, pos):
- """
- Splits sublists that are more than double the load level.
-
- Updates the index when the sublist length is less than double the load
- level. This requires incrementing the nodes in a traversal from the leaf
- node to the root. For an example traversal see self._loc.
- """
- _lists, _keys, _index = self._lists, self._keys, self._index
-
- if len(_keys[pos]) > self._twice:
- _maxes, _load = self._maxes, self._load
-
- half = _keys[pos][_load:]
- half_list = _lists[pos][_load:]
- del _keys[pos][_load:]
- del _lists[pos][_load:]
- _maxes[pos] = _keys[pos][-1]
-
- _maxes.insert(pos + 1, half[-1])
- _keys.insert(pos + 1, half)
- _lists.insert(pos + 1, half_list)
-
- del _index[:]
- else:
- if len(_index) > 0:
- child = self._offset + pos
- while child > 0:
- _index[child] += 1
- child = (child - 1) >> 1
- _index[0] += 1
-
- def update(self, iterable):
- """Update the list by adding all elements from *iterable*."""
- _maxes, _lists, _keys = self._maxes, self._lists, self._keys
- values = sorted(iterable, key=self._key)
-
- if _maxes:
- if len(values) * 4 >= self._len:
- values.extend(chain.from_iterable(_lists))
- values.sort(key=self._key)
- self.clear()
- else:
- _add = self.add
- for val in values:
- _add(val)
- return
-
- _load, _index = self._load, self._index
- _lists.extend(values[pos:(pos + _load)]
- for pos in range(0, len(values), _load))
- _keys.extend(list(map(self._key, _list)) for _list in _lists)
- _maxes.extend(sublist[-1] for sublist in _keys)
- self._len = len(values)
- del _index[:]
-
- def __contains__(self, val):
- """Return True if and only if *val* is an element in the list."""
- _maxes = self._maxes
-
- if not _maxes:
- return False
-
- key = self._key(val)
- pos = bisect_left(_maxes, key)
-
- if pos == len(_maxes):
- return False
-
- _keys = self._keys
- _lists = self._lists
-
- idx = bisect_left(_keys[pos], key)
-
- len_keys = len(_keys)
- len_sublist = len(_keys[pos])
-
- while True:
- if _keys[pos][idx] != key:
- return False
- if _lists[pos][idx] == val:
- return True
- idx += 1
- if idx == len_sublist:
- pos += 1
- if pos == len_keys:
- return False
- len_sublist = len(_keys[pos])
- idx = 0
-
- def discard(self, val):
- """
- Remove the first occurrence of *val*.
-
- If *val* is not a member, does nothing.
- """
- _maxes = self._maxes
-
- if not _maxes:
- return
-
- key = self._key(val)
- pos = bisect_left(_maxes, key)
-
- if pos == len(_maxes):
- return
-
- _keys = self._keys
- _lists = self._lists
- idx = bisect_left(_keys[pos], key)
-
- len_keys = len(_keys)
- len_sublist = len(_keys[pos])
-
- while True:
- if _keys[pos][idx] != key:
- return
- if _lists[pos][idx] == val:
- self._delete(pos, idx)
- return
- idx += 1
- if idx == len_sublist:
- pos += 1
- if pos == len_keys:
- return
- len_sublist = len(_keys[pos])
- idx = 0
-
- def remove(self, val):
- """
- Remove first occurrence of *val*.
-
- Raises ValueError if *val* is not present.
- """
- _maxes = self._maxes
-
- if not _maxes:
- raise ValueError('{0} not in list'.format(repr(val)))
-
- key = self._key(val)
- pos = bisect_left(_maxes, key)
-
- if pos == len(_maxes):
- raise ValueError('{0} not in list'.format(repr(val)))
-
- _keys = self._keys
- _lists = self._lists
- idx = bisect_left(_keys[pos], key)
-
- len_keys = len(_keys)
- len_sublist = len(_keys[pos])
-
- while True:
- if _keys[pos][idx] != key:
- raise ValueError('{0} not in list'.format(repr(val)))
- if _lists[pos][idx] == val:
- self._delete(pos, idx)
- return
- idx += 1
- if idx == len_sublist:
- pos += 1
- if pos == len_keys:
- raise ValueError('{0} not in list'.format(repr(val)))
- len_sublist = len(_keys[pos])
- idx = 0
-
- def _delete(self, pos, idx):
- """
- Delete the item at the given (pos, idx).
-
- Combines lists that are less than half the load level.
-
- Updates the index when the sublist length is more than half the load
- level. This requires decrementing the nodes in a traversal from the leaf
- node to the root. For an example traversal see self._loc.
- """
- _maxes, _lists, _keys, _index = self._maxes, self._lists, self._keys, self._index
-
- keys_pos = _keys[pos]
- lists_pos = _lists[pos]
-
- del keys_pos[idx]
- del lists_pos[idx]
- self._len -= 1
-
- len_keys_pos = len(keys_pos)
-
- if len_keys_pos > self._half:
-
- _maxes[pos] = keys_pos[-1]
-
- if len(_index) > 0:
- child = self._offset + pos
- while child > 0:
- _index[child] -= 1
- child = (child - 1) >> 1
- _index[0] -= 1
-
- elif len(_keys) > 1:
-
- if not pos:
- pos += 1
-
- prev = pos - 1
- _keys[prev].extend(_keys[pos])
- _lists[prev].extend(_lists[pos])
- _maxes[prev] = _keys[prev][-1]
-
- del _keys[pos]
- del _lists[pos]
- del _maxes[pos]
- del _index[:]
-
- self._expand(prev)
-
- elif len_keys_pos:
-
- _maxes[pos] = keys_pos[-1]
-
- else:
-
- del _keys[pos]
- del _lists[pos]
- del _maxes[pos]
- del _index[:]
-
- def _loc(self, pos, idx):
- """Convert an index pair (alpha, beta) into a single index that corresponds to
- the position of the value in the sorted list.
-
- Most queries require the index be built. Details of the index are
- described in self._build_index.
-
- Indexing requires traversing the tree from a leaf node to the root. The
- parent of each node is easily computable at (pos - 1) // 2.
-
- Left-child nodes are always at odd indices and right-child nodes are
- always at even indices.
-
- When traversing up from a right-child node, increment the total by the
- left-child node.
-
- The final index is the sum from traversal and the index in the sublist.
-
- For example, using the index from self._build_index:
-
- _index = 14 5 9 3 2 4 5
- _offset = 3
-
- Tree:
-
- 14
- 5 9
- 3 2 4 5
-
- Converting index pair (2, 3) into a single index involves iterating like
- so:
-
- 1. Starting at the leaf node: offset + alpha = 3 + 2 = 5. We identify
- the node as a left-child node. At such nodes, we simply traverse to
- the parent.
-
- 2. At node 9, position 2, we recognize the node as a right-child node
- and accumulate the left-child in our total. Total is now 5 and we
- traverse to the parent at position 0.
-
- 3. Iteration ends at the root.
-
- Computing the index is the sum of the total and beta: 5 + 3 = 8.
- """
- if not pos:
- return idx
-
- _index = self._index
-
- if not len(_index):
- self._build_index()
-
- total = 0
-
- # Increment pos to point in the index to len(self._lists[pos]).
-
- pos += self._offset
-
- # Iterate until reaching the root of the index tree at pos = 0.
-
- while pos:
-
- # Right-child nodes are at odd indices. At such indices
- # account the total below the left child node.
-
- if not (pos & 1):
- total += _index[pos - 1]
-
- # Advance pos to the parent node.
-
- pos = (pos - 1) >> 1
-
- return total + idx
-
- def _pos(self, idx):
- """Convert an index into a pair (alpha, beta) that can be used to access
- the corresponding _lists[alpha][beta] position.
-
- Most queries require the index be built. Details of the index are
- described in self._build_index.
-
- Indexing requires traversing the tree to a leaf node. Each node has
- two children which are easily computable. Given an index, pos, the
- left-child is at pos * 2 + 1 and the right-child is at pos * 2 + 2.
-
- When the index is less than the left-child, traversal moves to the
- left sub-tree. Otherwise, the index is decremented by the left-child
- and traversal moves to the right sub-tree.
-
- At a child node, the indexing pair is computed from the relative
- position of the child node as compared with the offset and the remaining
- index.
-
- For example, using the index from self._build_index:
-
- _index = 14 5 9 3 2 4 5
- _offset = 3
-
- Tree:
-
- 14
- 5 9
- 3 2 4 5
-
- Indexing position 8 involves iterating like so:
-
- 1. Starting at the root, position 0, 8 is compared with the left-child
- node (5) which it is greater than. When greater the index is
- decremented and the position is updated to the right child node.
-
- 2. At node 9 with index 3, we again compare the index to the left-child
- node with value 4. Because the index is the less than the left-child
- node, we simply traverse to the left.
-
- 3. At node 4 with index 3, we recognize that we are at a leaf node and
- stop iterating.
-
- 4. To compute the sublist index, we subtract the offset from the index
- of the leaf node: 5 - 3 = 2. To compute the index in the sublist, we
- simply use the index remaining from iteration. In this case, 3.
-
- The final index pair from our example is (2, 3) which corresponds to
- index 8 in the sorted list.
- """
- _len, _lists = self._len, self._lists
-
- if idx < 0:
- last_len = len(_lists[-1])
- if (-idx) <= last_len:
- return len(_lists) - 1, last_len + idx
- idx += _len
- if idx < 0:
- raise IndexError('list index out of range')
- elif idx >= _len:
- raise IndexError('list index out of range')
-
- if idx < len(_lists[0]):
- return 0, idx
-
- _index = self._index
-
- if not len(_index):
- self._build_index()
-
- pos = 0
- len_index = len(_index)
- child = (pos << 1) + 1
-
- while child < len_index:
- index_child = _index[child]
-
- if idx < index_child:
- pos = child
- else:
- idx -= index_child
- pos = child + 1
-
- child = (pos << 1) + 1
-
- return (pos - self._offset, idx)
-
- def _build_index(self):
- """Build an index for indexing the sorted list.
-
- Indexes are represented as binary trees in a dense array notation
- similar to a binary heap.
-
- For example, given a _lists representation storing integers:
-
- [0]: 1 2 3
- [1]: 4 5
- [2]: 6 7 8 9
- [3]: 10 11 12 13 14
-
- The first transformation maps the sub-lists by their length. The
- first row of the index is the length of the sub-lists.
-
- [0]: 3 2 4 5
-
- Each row after that is the sum of consecutive pairs of the previous row:
-
- [1]: 5 9
- [2]: 14
-
- Finally, the index is built by concatenating these lists together:
-
- _index = 14 5 9 3 2 4 5
-
- An offset storing the start of the first row is also stored:
-
- _offset = 3
-
- When built, the index can be used for efficient indexing into the list.
- See the comment and notes on self._pos for details.
- """
- row0 = list(map(len, self._lists))
-
- if len(row0) == 1:
- self._index[:] = row0
- self._offset = 0
- return
-
- head = iter(row0)
- tail = iter(head)
- row1 = list(starmap(add, zip(head, tail)))
-
- if len(row0) & 1:
- row1.append(row0[-1])
-
- if len(row1) == 1:
- self._index[:] = row1 + row0
- self._offset = 1
- return
-
- size = 2 ** (int(log(len(row1) - 1, 2)) + 1)
- row1.extend(repeat(0, size - len(row1)))
- tree = [row0, row1]
-
- while len(tree[-1]) > 1:
- head = iter(tree[-1])
- tail = iter(head)
- row = list(starmap(add, zip(head, tail)))
- tree.append(row)
-
- reduce(iadd, reversed(tree), self._index)
- self._offset = size * 2 - 1
-
- def _slice(self, slc):
- start, stop, step = slc.start, slc.stop, slc.step
-
- if step == 0:
- raise ValueError('slice step cannot be zero')
-
- # Set defaults for missing values.
-
- if step is None:
- step = 1
-
- if step > 0:
- if start is None:
- start = 0
-
- if stop is None:
- stop = len(self)
- elif stop < 0:
- stop += len(self)
- else:
- if start is None:
- start = len(self) - 1
-
- if stop is None:
- stop = -1
- elif stop < 0:
- stop += len(self)
-
- if start < 0:
- start += len(self)
-
- # Fix indices that are too big or too small.
- # Slice notation is surprisingly permissive
- # where normal indexing would raise IndexError.
-
- if step > 0:
- if start < 0:
- start = 0
- elif start > len(self):
- start = len(self)
-
- if stop < 0:
- stop = 0
- elif stop > len(self):
- stop = len(self)
- else:
- if start < 0:
- start = -1
- elif start >= len(self):
- start = len(self) - 1
-
- if stop < 0:
- stop = -1
- elif stop > len(self):
- stop = len(self)
-
- return start, stop, step
-
- def __delitem__(self, idx):
- """Remove the element at *idx*. Supports slicing."""
- if isinstance(idx, slice):
- start, stop, step = self._slice(idx)
-
- if ((step == 1) and (start < stop)
- and ((stop - start) * 8 >= self._len)):
-
- values = self[:start]
- if stop < self._len:
- values += self[stop:]
- self.clear()
- self.update(values)
- return
-
- indices = range(start, stop, step)
-
- # Delete items from greatest index to least so
- # that the indices remain valid throughout iteration.
-
- if step > 0:
- indices = reversed(indices)
-
- _pos, _delete = self._pos, self._delete
-
- for index in indices:
- pos, idx = _pos(index)
- _delete(pos, idx)
- else:
- pos, idx = self._pos(idx)
- self._delete(pos, idx)
-
- def __getitem__(self, idx):
- """Return the element at *idx*. Supports slicing."""
- _lists = self._lists
-
- if isinstance(idx, slice):
- start, stop, step = self._slice(idx)
-
- if step == 1 and start < stop:
- if start == 0 and stop == self._len:
- return self.as_list()
-
- start_pos, start_idx = self._pos(start)
-
- if stop == self._len:
- stop_pos = len(_lists) - 1
- stop_idx = len(_lists[stop_pos])
- else:
- stop_pos, stop_idx = self._pos(stop)
-
- if start_pos == stop_pos:
- return _lists[start_pos][start_idx:stop_idx]
-
- prefix = _lists[start_pos][start_idx:]
- middle = _lists[(start_pos + 1):stop_pos]
- result = reduce(iadd, middle, prefix)
- result += _lists[stop_pos][:stop_idx]
-
- return result
-
- if step == -1 and start > stop:
- result = self[(stop + 1):(start + 1)]
- result.reverse()
- return result
-
- # Return a list because a negative step could
- # reverse the order of the items and this could
- # be the desired behavior.
-
- indices = range(start, stop, step)
- return list(self[index] for index in indices)
- else:
- pos, idx = self._pos(idx)
- return _lists[pos][idx]
-
- def _check_order(self, idx, key, val):
- _keys, _len = self._keys, self._len
-
- pos, loc = self._pos(idx)
-
- if idx < 0:
- idx += _len
-
- # Check that the inserted value is not less than the
- # previous value.
-
- if idx > 0:
- idx_prev = loc - 1
- pos_prev = pos
-
- if idx_prev < 0:
- pos_prev -= 1
- idx_prev = len(_keys[pos_prev]) - 1
-
- if _keys[pos_prev][idx_prev] > key:
- msg = '{0} not in sort order at index {1}'.format(repr(val), idx)
- raise ValueError(msg)
-
- # Check that the inserted value is not greater than
- # the previous value.
-
- if idx < (_len - 1):
- idx_next = loc + 1
- pos_next = pos
-
- if idx_next == len(_keys[pos_next]):
- pos_next += 1
- idx_next = 0
-
- if _keys[pos_next][idx_next] < key:
- msg = '{0} not in sort order at index {1}'.format(repr(val), idx)
- raise ValueError(msg)
-
- def __setitem__(self, index, value):
- """
- Replace the item at position *index* with *value*.
-
- Supports slice notation. Raises a :exc:`ValueError` if the sort order
- would be violated. When used with a slice and iterable, the
- :exc:`ValueError` is raised before the list is mutated if the sort order
- would be violated by the operation.
- """
- _maxes, _lists, _keys, _pos = self._maxes, self._lists, self._keys, self._pos
- _check_order = self._check_order
-
- if isinstance(index, slice):
- start, stop, step = self._slice(index)
- indices = range(start, stop, step)
-
- if step != 1:
- if not hasattr(value, '__len__'):
- value = list(value)
-
- indices = list(indices)
-
- if len(value) != len(indices):
- raise ValueError(
- 'attempt to assign sequence of size {0}'
- ' to extended slice of size {1}'
- .format(len(value), len(indices)))
-
- # Keep a log of values that are set so that we can
- # roll back changes if ordering is violated.
-
- log = []
- _append = log.append
-
- for idx, val in zip(indices, value):
- pos, loc = _pos(idx)
- key = self._key(val)
- _append((idx, _keys[pos][loc], key, _lists[pos][loc], val))
- _keys[pos][loc] = key
- _lists[pos][loc] = val
- if len(_keys[pos]) == (loc + 1):
- _maxes[pos] = key
-
- try:
- # Validate ordering of new values.
-
- for idx, oldkey, newkey, oldval, newval in log:
- _check_order(idx, newkey, newval)
-
- except ValueError:
-
- # Roll back changes from log.
-
- for idx, oldkey, newkey, oldval, newval in log:
- pos, loc = _pos(idx)
- _keys[pos][loc] = oldkey
- _lists[pos][loc] = oldval
- if len(_keys[pos]) == (loc + 1):
- _maxes[pos] = oldkey
-
- raise
- else:
- # Test ordering using indexing. If the value given
- # doesn't support getitem, convert it to a list.
-
- if not hasattr(value, '__getitem__'):
- value = list(value)
-
- # Check that the given values are ordered properly.
-
- keys = list(map(self._key, value))
- ordered = all(keys[pos - 1] <= keys[pos]
- for pos in range(1, len(keys)))
-
- if not ordered:
- raise ValueError('given sequence not in sort order')
-
- # Check ordering in context of sorted list.
-
- if not start or not len(value):
- # Nothing to check on the lhs.
- pass
- else:
- pos, loc = _pos(start - 1)
- if _keys[pos][loc] > keys[0]:
- msg = '{0} not in sort order at index {1}'.format(repr(value[0]), start)
- raise ValueError(msg)
-
- if stop == len(self) or not len(value):
- # Nothing to check on the rhs.
- pass
- else:
- # "stop" is exclusive so we don't need
- # to add one for the index.
- pos, loc = _pos(stop)
- if _keys[pos][loc] < keys[-1]:
- msg = '{0} not in sort order at index {1}'.format(repr(value[-1]), stop)
- raise ValueError(msg)
-
- # Delete the existing values.
-
- del self[index]
-
- # Insert the new values.
-
- _insert = self.insert
- for idx, val in enumerate(value):
- _insert(start + idx, val)
- else:
- pos, loc = _pos(index)
- key = self._key(value)
- _check_order(index, key, value)
- _keys[pos][loc] = key
- _lists[pos][loc] = value
- if len(_lists[pos]) == (loc + 1):
- _maxes[pos] = key
-
- def __iter__(self):
- """Create an iterator over the list."""
- return chain.from_iterable(self._lists)
-
- def __reversed__(self):
- """Create an iterator to traverse the list in reverse."""
- return chain.from_iterable(map(reversed, reversed(self._lists)))
-
- def __len__(self):
- """Return the number of elements in the list."""
- return self._len
-
- def bisect_left(self, val):
- """
- Similar to the *bisect* module in the standard library, this returns an
- appropriate index to insert *val*. If *val* is already present, the
- insertion point will be before (to the left of) any existing entries.
- """
- _maxes = self._maxes
-
- if not _maxes:
- return 0
-
- key = self._key(val)
- pos = bisect_left(_maxes, key)
-
- if pos == len(_maxes):
- return self._len
-
- idx = bisect_left(self._keys[pos], key)
-
- return self._loc(pos, idx)
-
- def bisect_right(self, val):
- """
- Same as *bisect_left*, but if *val* is already present, the insertion
- point will be after (to the right of) any existing entries.
- """
- _maxes = self._maxes
-
- if not _maxes:
- return 0
-
- key = self._key(val)
- pos = bisect_right(_maxes, key)
-
- if pos == len(_maxes):
- return self._len
-
- idx = bisect_right(self._keys[pos], key)
-
- return self._loc(pos, idx)
-
- bisect = bisect_right
-
- def count(self, val):
- """Return the number of occurrences of *val* in the list."""
- _maxes = self._maxes
-
- if not _maxes:
- return 0
-
- key = self._key(val)
- pos = bisect_left(_maxes, key)
-
- if pos == len(_maxes):
- return 0
-
- _keys = self._keys
- _lists = self._lists
-
- idx = bisect_left(_keys[pos], key)
-
- total = 0
- len_keys = len(_keys)
- len_sublist = len(_keys[pos])
-
- while True:
- if _keys[pos][idx] != key:
- return total
- if _lists[pos][idx] == val:
- total += 1
- idx += 1
- if idx == len_sublist:
- pos += 1
- if pos == len_keys:
- return total
- len_sublist = len(_keys[pos])
- idx = 0
-
- def copy(self):
- """Return a shallow copy of the sorted list."""
- return self.__class__(self, key=self._key, load=self._load)
-
- __copy__ = copy
-
- def append(self, val):
- """
- Append the element *val* to the list. Raises a ValueError if the *val*
- would violate the sort order.
- """
- _maxes, _lists, _keys = self._maxes, self._lists, self._keys
-
- key = self._key(val)
-
- if not _maxes:
- _maxes.append(key)
- _keys.append([key])
- _lists.append([val])
- self._len = 1
- return
-
- pos = len(_keys) - 1
-
- if key < _keys[pos][-1]:
- msg = '{0} not in sort order at index {1}'.format(repr(val), self._len)
- raise ValueError(msg)
-
- _maxes[pos] = key
- _keys[pos].append(key)
- _lists[pos].append(val)
- self._len += 1
- self._expand(pos)
-
- def extend(self, values):
- """
- Extend the list by appending all elements from the *values*. Raises a
- ValueError if the sort order would be violated.
- """
- _maxes, _keys, _lists, _load = self._maxes, self._keys, self._lists, self._load
-
- if not isinstance(values, list):
- values = list(values)
-
- keys = list(map(self._key, values))
-
- if any(keys[pos - 1] > keys[pos]
- for pos in range(1, len(keys))):
- raise ValueError('given sequence not in sort order')
-
- offset = 0
-
- if _maxes:
- if keys[0] < _keys[-1][-1]:
- msg = '{0} not in sort order at index {1}'.format(repr(values[0]), self._len)
- raise ValueError(msg)
-
- if len(_keys[-1]) < self._half:
- _lists[-1].extend(values[:_load])
- _keys[-1].extend(keys[:_load])
- _maxes[-1] = _keys[-1][-1]
- offset = _load
-
- len_keys = len(_keys)
-
- for idx in range(offset, len(keys), _load):
- _lists.append(values[idx:(idx + _load)])
- _keys.append(keys[idx:(idx + _load)])
- _maxes.append(_keys[-1][-1])
-
- _index = self._index
-
- if len_keys == len(_keys):
- len_index = len(_index)
- if len_index > 0:
- len_values = len(values)
- child = len_index - 1
- while child:
- _index[child] += len_values
- child = (child - 1) >> 1
- _index[0] += len_values
- else:
- del _index[:]
-
- self._len += len(values)
-
- def insert(self, idx, val):
- """
- Insert the element *val* into the list at *idx*. Raises a ValueError if
- the *val* at *idx* would violate the sort order.
- """
- _maxes, _lists, _keys, _len = self._maxes, self._lists, self._keys, self._len
-
- if idx < 0:
- idx += _len
- if idx < 0:
- idx = 0
- if idx > _len:
- idx = _len
-
- key = self._key(val)
-
- if not _maxes:
- # The idx must be zero by the inequalities above.
- _maxes.append(key)
- _lists.append([val])
- _keys.append([key])
- self._len = 1
- return
-
- if not idx:
- if key > _keys[0][0]:
- msg = '{0} not in sort order at index {1}'.format(repr(val), 0)
- raise ValueError(msg)
- else:
- _keys[0].insert(0, key)
- _lists[0].insert(0, val)
- self._expand(0)
- self._len += 1
- return
-
- if idx == _len:
- pos = len(_keys) - 1
- if _keys[pos][-1] > key:
- msg = '{0} not in sort order at index {1}'.format(repr(val), _len)
- raise ValueError(msg)
- else:
- _keys[pos].append(key)
- _lists[pos].append(val)
- _maxes[pos] = _keys[pos][-1]
- self._expand(pos)
- self._len += 1
- return
-
- pos, idx = self._pos(idx)
- idx_before = idx - 1
- if idx_before < 0:
- pos_before = pos - 1
- idx_before = len(_keys[pos_before]) - 1
- else:
- pos_before = pos
-
- before = _keys[pos_before][idx_before]
- if before <= key <= _keys[pos][idx]:
- _lists[pos].insert(idx, val)
- _keys[pos].insert(idx, key)
- self._expand(pos)
- self._len += 1
- else:
- msg = '{0} not in sort order at index {1}'.format(repr(val), idx)
- raise ValueError(msg)
-
- def pop(self, idx=-1):
- """
- Remove and return item at *idx* (default last). Raises IndexError if
- list is empty or index is out of range. Negative indices are supported,
- as for slice indices.
- """
- if (idx < 0 and -idx > self._len) or (idx >= self._len):
- raise IndexError('pop index out of range')
-
- pos, idx = self._pos(idx)
- val = self._lists[pos][idx]
- self._delete(pos, idx)
-
- return val
-
- def index(self, val, start=None, stop=None):
- """
- Return the smallest *k* such that L[k] == val and i <= k < j`. Raises
- ValueError if *val* is not present. *stop* defaults to the end of the
- list. *start* defaults to the beginning. Negative indices are supported,
- as for slice indices.
- """
- _len, _maxes = self._len, self._maxes
-
- if not _maxes:
- raise ValueError('{0} is not in list'.format(repr(val)))
-
- if start is None:
- start = 0
- if start < 0:
- start += _len
- if start < 0:
- start = 0
-
- if stop is None:
- stop = _len
- if stop < 0:
- stop += _len
- if stop > _len:
- stop = _len
-
- if stop <= start:
- raise ValueError('{0} is not in list'.format(repr(val)))
-
- stop -= 1
- key = self._key(val)
- pos = bisect_left(_maxes, key)
-
- if pos == len(_maxes):
- raise ValueError('{0} is not in list'.format(repr(val)))
-
- _keys = self._keys
- _lists = self._lists
-
- idx = bisect_left(_keys[pos], key)
-
- len_keys = len(_keys)
- len_sublist = len(_keys[pos])
-
- while True:
- if _keys[pos][idx] != key:
- raise ValueError('{0} is not in list'.format(repr(val)))
- if _lists[pos][idx] == val:
- loc = self._loc(pos, idx)
- if start <= loc <= stop:
- return loc
- elif loc > stop:
- break
- idx += 1
- if idx == len_sublist:
- pos += 1
- if pos == len_keys:
- raise ValueError('{0} is not in list'.format(repr(val)))
- len_sublist = len(_keys[pos])
- idx = 0
-
- raise ValueError('{0} is not in list'.format(repr(val)))
-
- def as_list(self):
- """Very efficiently convert the SortedList to a list."""
- return reduce(iadd, self._lists, [])
-
- def __add__(self, that):
- """
- Return a new sorted list containing all the elements in *self* and
- *that*. Elements in *that* do not need to be properly ordered with
- respect to *self*.
- """
- values = self.as_list()
- values.extend(that)
- return self.__class__(values, key=self._key, load=self._load)
-
- def __iadd__(self, that):
- """
- Update *self* to include all values in *that*. Elements in *that* do not
- need to be properly ordered with respect to *self*.
- """
- self.update(that)
- return self
-
- def __mul__(self, that):
- """
- Return a new sorted list containing *that* shallow copies of each item
- in SortedList.
- """
- values = self.as_list() * that
- return self.__class__(values, key=self._key, load=self._load)
-
- def __imul__(self, that):
- """
- Increase the length of the list by appending *that* shallow copies of
- each item.
- """
- values = self.as_list() * that
- self.clear()
- self.update(values)
- return self
-
- def __eq__(self, that):
- """Compare two Sequences for equality."""
- return ((self._len == len(that))
- and all(lhs == rhs for lhs, rhs in zip(self, that)))
-
- def __ne__(self, that):
- """Compare two Sequences for inequality."""
- return ((self._len != len(that))
- or any(lhs != rhs for lhs, rhs in zip(self, that)))
-
- def __lt__(self, that):
- """Compare two Sequences for less than."""
- return ((self._len <= len(that))
- and all(lhs < rhs for lhs, rhs in zip(self, that)))
-
- def __le__(self, that):
- """Compare two Sequences for less than equal."""
- return ((self._len <= len(that))
- and all(lhs <= rhs for lhs, rhs in zip(self, that)))
-
- def __gt__(self, that):
- """Compare two Sequences for greater than."""
- return ((self._len >= len(that))
- and all(lhs > rhs for lhs, rhs in zip(self, that)))
-
- def __ge__(self, that):
- """Compare two Sequences for greater than equal."""
- return ((self._len >= len(that))
- and all(lhs >= rhs for lhs, rhs in zip(self, that)))
-
- @recursive_repr
- def __repr__(self):
- """Return string representation of SortedListWithKey."""
- temp = '{0}({1}, key={2}, load={3})'
- return temp.format(
- self.__class__.__name__,
- repr(list(self)),
- repr(self._key),
- repr(self._load)
- )
-
- def _check(self):
- try:
- # Check load parameters.
-
- assert self._load >= 4
- assert self._half == (self._load >> 1)
- assert self._twice == (self._load * 2)
-
- # Check empty sorted list case.
-
- if self._maxes == []:
- assert self._keys == []
- assert self._lists == []
- return
-
- assert len(self._maxes) > 0 and len(self._keys) > 0 and len(self._lists) > 0
-
- # Check all sublists are sorted.
-
- assert all(sublist[pos - 1] <= sublist[pos]
- for sublist in self._keys
- for pos in range(1, len(sublist)))
-
- # Check beginning/end of sublists are sorted.
-
- for pos in range(1, len(self._keys)):
- assert self._keys[pos - 1][-1] <= self._keys[pos][0]
-
- # Check length of _maxes and _lists match.
-
- assert len(self._maxes) == len(self._lists) == len(self._keys)
-
- # Check _keys matches _key mapped to _lists.
-
- assert all(len(val_list) == len(key_list)
- for val_list, key_list in zip(self._lists, self._keys))
- assert all(self._key(val) == key for val, key in
- zip((_val for _val_list in self._lists for _val in _val_list),
- (_key for _key_list in self._keys for _key in _key_list)))
-
- # Check _maxes is a map of _keys.
-
- assert all(self._maxes[pos] == self._keys[pos][-1]
- for pos in range(len(self._maxes)))
-
- # Check load level is less than _twice.
-
- assert all(len(sublist) <= self._twice for sublist in self._lists)
-
- # Check load level is greater than _half for all
- # but the last sublist.
-
- assert all(len(self._lists[pos]) >= self._half
- for pos in range(0, len(self._lists) - 1))
-
- # Check length.
-
- assert self._len == sum(len(sublist) for sublist in self._lists)
-
- # Check index.
-
- if len(self._index):
- assert len(self._index) == self._offset + len(self._lists)
- assert self._len == self._index[0]
-
- def test_offset_pos(pos):
- from_index = self._index[self._offset + pos]
- return from_index == len(self._lists[pos])
-
- assert all(test_offset_pos(pos)
- for pos in range(len(self._lists)))
-
- for pos in range(self._offset):
- child = (pos << 1) + 1
- if self._index[pos] == 0:
- assert child >= len(self._index)
- elif child + 1 == len(self._index):
- assert self._index[pos] == self._index[child]
- else:
- child_sum = self._index[child] + self._index[child + 1]
- assert self._index[pos] == child_sum
-
- except:
- import sys
- import traceback
-
- traceback.print_exc(file=sys.stdout)
-
- print('len', self._len)
- print('load', self._load, self._half, self._twice)
- print('offset', self._offset)
- print('len_index', len(self._index))
- print('index', self._index)
- print('len_maxes', len(self._maxes))
- print('maxes', self._maxes)
- print('len_keys', len(self._keys))
- print('keys', self._keys)
- print('len_lists', len(self._lists))
- print('lists', self._lists)
-
- raise
diff --git a/src/sortedcontainers/sortedset.py b/src/sortedcontainers/sortedset.py
deleted file mode 100755
index d9ab42c..0000000
--- a/src/sortedcontainers/sortedset.py
+++ /dev/null
@@ -1,294 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# Sorted set implementation.
-
-from .sortedlist import SortedList, recursive_repr
-from .sortedlistwithkey import SortedListWithKey
-from collections import Set, MutableSet, Sequence
-from itertools import chain
-import operator as op
-
-class SortedSet(MutableSet, Sequence):
- """
- A `SortedSet` provides the same methods as a `set`. Additionally, a
- `SortedSet` maintains its items in sorted order, allowing the `SortedSet` to
- be indexed.
-
- Unlike a `set`, a `SortedSet` requires items be hashable and comparable.
- """
- def __init__(self, iterable=None, key=None, load=1000, _set=None):
- """
- A `SortedSet` provides the same methods as a `set`. Additionally, a
- `SortedSet` maintains its items in sorted order, allowing the
- `SortedSet` to be indexed.
-
- An optional *iterable* provides an initial series of items to populate
- the `SortedSet`.
-
- An optional *key* argument defines a callable that, like the `key`
- argument to Python's `sorted` function, extracts a comparison key from
- each set item. If no function is specified, the default compares the
- set items directly.
-
- An optional *load* specifies the load-factor of the set. The default
- load factor of '1000' works well for sets from tens to tens of millions
- of elements. Good practice is to use a value that is the cube root of
- the set size. With billions of elements, the best load factor depends
- on your usage. It's best to leave the load factor at the default until
- you start benchmarking.
- """
- self._key = key
- self._load = load
-
- self._set = set() if _set is None else _set
-
- _set = self._set
- self.isdisjoint = _set.isdisjoint
- self.issubset = _set.issubset
- self.issuperset = _set.issuperset
-
- if key is None:
- self._list = SortedList(self._set, load=load)
- else:
- self._list = SortedListWithKey(self._set, key=key, load=load)
-
- _list = self._list
- self.bisect_left = _list.bisect_left
- self.bisect = _list.bisect
- self.bisect_right = _list.bisect_right
- self.index = _list.index
-
- if iterable is not None:
- self.update(iterable)
-
- def __contains__(self, value):
- """Return True if and only if *value* is an element in the set."""
- return (value in self._set)
-
- def __getitem__(self, index):
- """
- Return the element at position *index*.
-
- Supports slice notation and negative indexes.
- """
- return self._list[index]
-
- def __delitem__(self, index):
- """
- Remove the element at position *index*.
-
- Supports slice notation and negative indexes.
- """
- _list = self._list
- if isinstance(index, slice):
- values = _list[index]
- self._set.difference_update(values)
- else:
- value = _list[index]
- self._set.remove(value)
- del _list[index]
-
- def _make_cmp(set_op, doc):
- def comparer(self, that):
- if isinstance(that, SortedSet):
- return set_op(self._set, that._set)
- elif isinstance(that, Set):
- return set_op(self._set, that)
- else:
- raise TypeError('can only compare to a Set')
-
- comparer.__name__ = '__{0}__'.format(set_op.__name__)
- comparer.__doc__ = 'Return True if and only if ' + doc
-
- return comparer
-
- __eq__ = _make_cmp(op.eq, 'self and *that* are equal sets.')
- __ne__ = _make_cmp(op.ne, 'self and *that* are inequal sets.')
- __lt__ = _make_cmp(op.lt, 'self is a proper subset of *that*.')
- __gt__ = _make_cmp(op.gt, 'self is a proper superset of *that*.')
- __le__ = _make_cmp(op.le, 'self is a subset of *that*.')
- __ge__ = _make_cmp(op.ge, 'self is a superset of *that*.')
-
- def __len__(self):
- """Return the number of elements in the set."""
- return len(self._set)
-
- def __iter__(self):
- """
- Return an iterator over the SortedSet. Elements are iterated over
- in their sorted order.
- """
- return iter(self._list)
-
- def __reversed__(self):
- """
- Return an iterator over the SortedSet. Elements are iterated over
- in their reversed sorted order.
- """
- return reversed(self._list)
-
- def add(self, value):
- """Add the element *value* to the set."""
- if value not in self._set:
- self._set.add(value)
- self._list.add(value)
-
- def clear(self):
- """Remove all elements from the set."""
- self._set.clear()
- self._list.clear()
-
- def copy(self):
- """Create a shallow copy of the sorted set."""
- return self.__class__(key=self._key, load=self._load, _set=set(self._set))
-
- __copy__ = copy
-
- def count(self, value):
- """Return the number of occurrences of *value* in the set."""
- return 1 if value in self._set else 0
-
- def discard(self, value):
- """
- Remove the first occurrence of *value*. If *value* is not a member,
- does nothing.
- """
- if value in self._set:
- self._set.remove(value)
- self._list.discard(value)
-
- def pop(self, index=-1):
- """
- Remove and return item at *index* (default last). Raises IndexError if
- set is empty or index is out of range. Negative indexes are supported,
- as for slice indices.
- """
- value = self._list.pop(index)
- self._set.remove(value)
- return value
-
- def remove(self, value):
- """
- Remove first occurrence of *value*. Raises ValueError if
- *value* is not present.
- """
- self._set.remove(value)
- self._list.remove(value)
-
- def difference(self, *iterables):
- """
- Return a new set with elements in the set that are not in the
- *iterables*.
- """
- diff = self._set.difference(*iterables)
- new_set = self.__class__(key=self._key, load=self._load, _set=diff)
- return new_set
-
- __sub__ = difference
- __rsub__ = __sub__
-
- def difference_update(self, *iterables):
- """
- Update the set, removing elements found in keeping only elements
- found in any of the *iterables*.
- """
- values = set(chain(*iterables))
- if (4 * len(values)) > len(self):
- self._set.difference_update(values)
- self._list.clear()
- self._list.update(self._set)
- else:
- _discard = self.discard
- for value in values:
- _discard(value)
- return self
-
- __isub__ = difference_update
-
- def intersection(self, *iterables):
- """
- Return a new set with elements common to the set and all *iterables*.
- """
- comb = self._set.intersection(*iterables)
- new_set = self.__class__(key=self._key, load=self._load, _set=comb)
- return new_set
-
- __and__ = intersection
- __rand__ = __and__
-
- def intersection_update(self, *iterables):
- """
- Update the set, keeping only elements found in it and all *iterables*.
- """
- self._set.intersection_update(*iterables)
- self._list.clear()
- self._list.update(self._set)
- return self
-
- __iand__ = intersection_update
-
- def symmetric_difference(self, that):
- """
- Return a new set with elements in either *self* or *that* but not both.
- """
- diff = self._set.symmetric_difference(that)
- new_set = self.__class__(key=self._key, load=self._load, _set=diff)
- return new_set
-
- __xor__ = symmetric_difference
- __rxor__ = __xor__
-
- def symmetric_difference_update(self, that):
- """
- Update the set, keeping only elements found in either *self* or *that*,
- but not in both.
- """
- self._set.symmetric_difference_update(that)
- self._list.clear()
- self._list.update(self._set)
- return self
-
- __ixor__ = symmetric_difference_update
-
- def union(self, *iterables):
- """
- Return a new SortedSet with elements from the set and all *iterables*.
- """
- return self.__class__(chain(iter(self), *iterables), key=self._key, load=self._load)
-
- __or__ = union
- __ror__ = __or__
-
- def update(self, *iterables):
- """Update the set, adding elements from all *iterables*."""
- values = set(chain(*iterables))
- if (4 * len(values)) > len(self):
- self._set.update(values)
- self._list.clear()
- self._list.update(self._set)
- else:
- _add = self.add
- for value in values:
- _add(value)
- return self
-
- __ior__ = union
-
- def __reduce__(self):
- return (self.__class__, ((), self._key, self._load, self._set))
-
- @recursive_repr
- def __repr__(self):
- temp = '{0}({1}, key={2}, load={3})'
- return temp.format(
- self.__class__.__name__,
- repr(list(self)),
- repr(self._key),
- repr(self._load)
- )
-
- def _check(self):
- self._list._check()
- assert len(self._set) == len(self._list)
- _set = self._set
- assert all(val in _set for val in self._list)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/tophat.git
More information about the debian-med-commit
mailing list