[med-svn] [freebayes] 02/05: Only keep the manpages in this branch
Andreas Tille
tille at debian.org
Sat Jan 14 06:51:03 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch manpages
in repository freebayes.
commit aee5684babf6d22d880fc54de4f9b3a652a7a4e7
Author: Andreas Tille <tille at debian.org>
Date: Sat Jan 14 07:46:56 2017 +0100
Only keep the manpages in this branch
.gitignore | 5 -
.gitmodules | 15 -
.travis.yml | 14 -
LICENSE | 19 -
Makefile | 33 -
README.md | 532 -
bin/.keep | 0
debian/bash-tap/bash-tap | 369 -
debian/bash-tap/bash-tap-bootstrap | 28 -
debian/bash-tap/bash-tap-mock | 106 -
debian/changelog | 5 -
debian/compat | 1 -
debian/control | 31 -
debian/copyright | 64 -
debian/install | 2 -
debian/manpages | 1 -
debian/patches/fix_test.patch | 15 -
debian/patches/series | 3 -
debian/patches/use_debian_packaged_bamtools.patch | 89 -
debian/patches/use_debian_packaged_vcflib.patch | 292 -
debian/rules | 13 -
debian/source/format | 1 -
debian/upstream/metadata | 8 -
debian/watch | 3 -
examples/pipeline.sh | 29 -
paper/1000G_performance_comparison.png | Bin 61887 -> 0 bytes
paper/100samples10x_0_25_both.png | Bin 89862 -> 0 bytes
paper/Makefile | 10 -
paper/genome_research.bst | 1620 -
paper/haplotype_calling.png | Bin 175436 -> 0 bytes
paper/indel_error.png | Bin 44097 -> 0 bytes
paper/low_frequency_sensitivity.png | Bin 138914 -> 0 bytes
paper/main.aux | 121 -
paper/main.bbl | 170 -
paper/main.blg | 49 -
paper/main.tex | 664 -
paper/miseq.png | Bin 80796 -> 0 bytes
paper/omni_errors.png | Bin 94711 -> 0 bytes
paper/references.bib | 321 -
python/.gitignore | 1 -
python/README | 1 -
python/allelebayes.py | 373 -
python/dirichlet.py | 58 -
python/factorialln.py | 11 -
python/hwe.py | 64 -
python/logsumexp.py | 23 -
python/multiset.py | 140 -
python/phred.py | 17 -
scripts/coverage_to_regions.py | 51 -
scripts/fasta_generate_regions.py | 35 -
scripts/freebayes-parallel | 40 -
scripts/generate_freebayes_region_scripts.sh | 25 -
scripts/sam_add_rg.pl | 36 -
scripts/samples.cnv | 9 -
src/.gitignore | 4 -
src/Allele.cpp | 1534 -
src/Allele.h | 393 -
src/AlleleParser.cpp | 3976 -
src/AlleleParser.h | 356 -
src/BGZF.cpp | 398 -
src/BGZF.h | 320 -
src/BedReader.cpp | 80 -
src/BedReader.h | 75 -
src/Bias.cpp | 56 -
src/Bias.h | 30 -
src/CNV.cpp | 57 -
src/CNV.h | 33 -
src/Contamination.cpp | 76 -
src/Contamination.h | 39 -
src/DataLikelihood.cpp | 299 -
src/DataLikelihood.h | 65 -
src/Dirichlet.cpp | 62 -
src/Dirichlet.h | 8 -
src/Ewens.cpp | 51 -
src/Ewens.h | 25 -
src/Fasta.cpp | 308 -
src/Fasta.h | 73 -
src/Genotype.cpp | 1894 -
src/Genotype.h | 453 -
src/GenotypePriors.cpp | 188 -
src/GenotypePriors.h | 54 -
src/IndelAllele.cpp | 48 -
src/IndelAllele.h | 36 -
src/LargeFileSupport.h | 15 -
src/LeftAlign.cpp | 398 -
src/LeftAlign.h | 37 -
src/Makefile | 268 -
src/Marginals.cpp | 198 -
src/Marginals.h | 26 -
src/Multinomial.cpp | 49 -
src/Multinomial.h | 13 -
src/NonCall.cpp | 86 -
src/NonCall.h | 48 -
src/Parameters.cpp | 1105 -
src/Parameters.h | 139 -
src/Product.h | 19 -
src/Result.cpp | 6 -
src/Result.h | 25 -
src/ResultData.cpp | 698 -
src/ResultData.h | 75 -
src/Sample.cpp | 472 -
src/Sample.h | 135 -
src/SegfaultHandler.cpp | 17 -
src/SegfaultHandler.h | 11 -
src/Sum.h | 15 -
src/TryCatch.h | 16 -
src/Utility.cpp | 100758 -------------------
src/Utility.h | 154 -
src/Version.h | 2 -
src/alleles.cpp | 92 -
src/bamfiltertech.cpp | 109 -
src/bamleftalign.cpp | 197 -
src/convert.h | 22 -
src/dummy.cpp | 46 -
src/fastlz.c | 559 -
src/fastlz.h | 100 -
src/freebayes.cpp | 712 -
src/join.h | 24 -
src/levenshtein.cpp | 93 -
src/multichoose.h | 79 -
src/multipermute.h | 185 -
src/split.cpp | 33 -
src/split.h | 20 -
src/version_release.txt | 5 -
test/Makefile | 15 -
test/region-and-target-handling.t | 104 -
test/splice/1:883884-887618.bam | Bin 22589 -> 0 bytes
test/splice/1:883884-887618.bam.bai | Bin 288 -> 0 bytes
test/splice/1:883884-887618.fa | 64 -
test/splice/1:883884-887618.fa.fai | 1 -
test/t/01_call_variants.t | 110 -
test/tiny/1read.bam | Bin 636 -> 0 bytes
test/tiny/1read.bam.bai | Bin 96 -> 0 bytes
test/tiny/NA12878.chr22.tiny.bam | Bin 287213 -> 0 bytes
test/tiny/NA12878.chr22.tiny.bam.bai | Bin 96 -> 0 bytes
test/tiny/NA12878.chr22.tiny.giab.vcf | 89 -
test/tiny/q with spaces.fa | 207 -
test/tiny/q with spaces.fa.fai | 1 -
test/tiny/q with spaces.regions | 3 -
test/tiny/q.fa | 207 -
test/tiny/q.fa.fai | 1 -
test/tiny/q.regions | 3 -
test/tiny/q.vcf.gz | Bin 4305 -> 0 bytes
test/tiny/q.vcf.gz.tbi | Bin 102 -> 0 bytes
test/tiny/q_spiked.vcf.gz | Bin 2378 -> 0 bytes
test/tiny/q_spiked.vcf.gz.tbi | Bin 91 -> 0 bytes
ttmath/COPYRIGHT | 28 -
ttmath/ttmath.h | 2853 -
ttmath/ttmathbig.h | 6045 --
ttmath/ttmathdec.h | 419 -
ttmath/ttmathint.h | 1922 -
ttmath/ttmathmisc.h | 250 -
ttmath/ttmathobjects.h | 809 -
ttmath/ttmathparser.h | 2777 -
ttmath/ttmaththreads.h | 250 -
ttmath/ttmathtypes.h | 676 -
ttmath/ttmathuint.h | 4165 -
ttmath/ttmathuint_noasm.h | 1017 -
ttmath/ttmathuint_x86.h | 1602 -
ttmath/ttmathuint_x86_64.h | 1146 -
ttmath/ttmathuint_x86_64_msvc.asm | 548 -
162 files changed, 148888 deletions(-)
diff --git a/.gitignore b/.gitignore
deleted file mode 100644
index 81dbe06..0000000
--- a/.gitignore
+++ /dev/null
@@ -1,5 +0,0 @@
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index 6341f4d..0000000
--- a/.gitmodules
+++ /dev/null
@@ -1,15 +0,0 @@
-[submodule "vcflib"]
- path = vcflib
- url = https://github.com/ekg/vcflib.git
-[submodule "bamtools"]
- path = bamtools
- url = https://github.com/ekg/bamtools.git
-[submodule "intervaltree"]
- path = intervaltree
- url = https://github.com/ekg/intervaltree.git
-[submodule "test/test-simple-bash"]
- path = test/test-simple-bash
- url = https://github.com/ingydotnet/test-simple-bash.git
-[submodule "bash-tap"]
- path = test/bash-tap
- url = https://github.com/illusori/bash-tap.git
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 8acf75c..0000000
--- a/.travis.yml
+++ /dev/null
@@ -1,14 +0,0 @@
-# Control file for continuous integration testing at http://travis-ci.org/
-language: cpp
-compiler: gcc
- - git submodule update --init --recursive
- - sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y
- - sudo apt-get update -qq
- - sudo apt-get install -qq gcc-4.8 g++-4.8
- - sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-4.8 60 --slave /usr/bin/g++ g++ /usr/bin/g++-4.8
- - gcc --version && g++ --version
- - sudo apt-get install -qq bc samtools parallel
-#install: make get-deps
-script: make && make test
deleted file mode 100644
index bb8a23f..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-Erik Garrison
-Thomas Sibley
-Dillon Lee
-Patrick Marks
-Noah Spies
-Joshua Randall
-Jeremy Anderson
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 9e3322e..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-Copyright (c) 2010 Erik Garrison, Gabor Marth
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
diff --git a/Makefile b/Makefile
deleted file mode 100644
index 6848dcd..0000000
--- a/Makefile
+++ /dev/null
@@ -1,33 +0,0 @@
-all: vcflib/Makefile log
- cd src && $(MAKE)
-log: src/version_git.h
- wget -q http://hypervolu.me/freebayes/build/$(shell cat src/version_git.h | grep v | cut -f 3 -d\ | sed s/\"//g) &
- cd src && $(MAKE) autoversion
- touch src/version_git.h
- @echo "To build freebayes you must use git to also download its submodules."
- @echo "Do so by downloading freebayes again using this command (note --recursive flag):"
- @echo " git clone --recursive git://github.com/ekg/freebayes.git"
- @error
- cd src && $(MAKE) debug
- cp bin/freebayes bin/bamleftalign /usr/local/bin/
- rm /usr/local/bin/freebayes /usr/local/bin/bamleftalign
- cd test && make test
- cd src && $(MAKE) clean
- rm -fr bin/*
-.PHONY: all install uninstall clean test
diff --git a/README.md b/README.md
deleted file mode 100644
index f233549..0000000
--- a/README.md
+++ /dev/null
@@ -1,532 +0,0 @@
-# *freebayes*, a haplotype-based variant detector
-## user manual and guide
-## Overview
-[*FreeBayes*](http://arxiv.org/abs/1207.3907) is a
-[Bayesian](http://en.wikipedia.org/wiki/Bayesian_inference) genetic variant
-detector designed to find small polymorphisms, specifically SNPs
-(single-nucleotide polymorphisms), indels (insertions and deletions), MNPs
-(multi-nucleotide polymorphisms), and complex events (composite insertion and
-substitution events) smaller than the length of a short-read sequencing
-*FreeBayes* is haplotype-based, in the sense that it calls variants based on
-the literal sequences of reads aligned to a particular target, not their
-precise alignment. This model is a straightforward generalization of previous
-ones (e.g. PolyBayes, samtools, GATK) which detect or report variants based on
-alignments. This method avoids one of the core problems with alignment-based
-variant detection--- that identical sequences may have multiple possible
-<img src="http://hypervolu.me/freebayes/figures/haplotype_calling.png"
-*FreeBayes* uses short-read alignments
-([BAM](http://samtools.sourceforge.net/SAMv1.pdf) files with
-[Phred+33](http://en.wikipedia.org/wiki/Phred_quality_score) encoded quality
-scores, now standard) for any number of individuals from a population and a
-[reference genome](http://en.wikipedia.org/wiki/Reference_genome) (in
-[FASTA](http://en.wikipedia.org/wiki/FASTA_format) format)
-to determine the most-likely combination of genotypes for the population at
-each position in the reference. It reports positions which it finds putatively
-polymorphic in variant call file ([VCF](http://www.1000genomes.org/node/101))
-format. It can also use an input set of variants (VCF) as a source of prior
-information, and a copy number variant map (BED) to define non-uniform ploidy
-variation across the samples under analysis.
-## Citing freebayes
-A preprint [Haplotype-based variant detection from short-read
-sequencing](http://arxiv.org/abs/1207.3907) provides an overview of the
-statistical models
-used in FreeBayes. We ask that you cite this paper if you use FreeBayes in
-work that leads to publication.
-Please use this citation format:
-Garrison E, Marth G. Haplotype-based variant detection from short-read sequencing.
-*arXiv preprint arXiv:1207.3907 [q-bio.GN]* 2012
-If possible, please also refer to the version number provided by freebayes when
-it is run without arguments or with the `--help` option. For example, you
-should see something like this:
- version: v0.9.10-3-g47a713e-dirty
-This provides both a point release number and a git commit id, which will
-ensure precise reproducibility of results.
-## Obtaining
-To download FreeBayes, please use git to download the most recent development
-tree. Currently, the tree is hosted on github, and can be obtained via:
- git clone --recursive git://github.com/ekg/freebayes.git
-Note the use of --recursive. This is required in order to download all
-nested git submodules for external repositories.
-After you've already done the above to clone the most recent development
-version, if you wish to compile a specific version of FreeBayes from, you
-can then do something like the following:
- git checkout v0.9.20 && git submodule update --recursive
-### Resolving proxy issues with git
-Depending on your local network configuration, you may have problems obtaining
-freebayes via git. If you see something like this you may be behind a proxy
-that blocks access to standard git:// port (9418).
- $ git clone --recursive git://github.com/ekg/freebayes.git
- Cloning into 'freebayes'...
- fatal: Unable to look up github.com (port 9418) (Name or service not known)
-Luckily, if you have access to https:// on port 443, then you can use this
-'magic' command as a workaround to enable download of the submodules:
- git config --global url.https://github.com/.insteadOf git://github.com/
-## Compilation
-FreeBayes requires g++ and the standard C and C++ development libraries.
-Additionally, cmake is required for building the BamTools API.
- make
-Will build the executable freebayes, as well as the utilities bamfiltertech and
-bamleftalign. These executables can be found in the `bin/` directory in the
-Users may wish to install to e.g. /usr/local/bin (default), which is
-accomplished via
- sudo make install
-## Usage
-In its simplest operation, freebayes requires only two inputs: a FASTA reference
-sequence, and a BAM-format alignment file sorted by reference position. For
- freebayes --fasta-reference h.sapiens.fasta NA20504.bam
-... produce (on standard output) a VCF file on standard out describing
-all SNPs, INDELs, MNPs, and Complex events between the reference and the
-alignments in NA20504.bam. In order to produce correct output, the reference
-supplied must be the reference to which NA20504.bam was aligned.
-Users may specify any number of BAM files on the command line. FreeBayes uses
-the [BamTools API](http://github.com/pezmaster31/bamtools) to open and parse
-these files in parallel, virtually merging them at runtime into one logical
-file with a merged header.
-For a description of available command-line options and their defaults, run:
- freebayes --help
-## Examples
-Call variants assuming a diploid sample:
- freebayes -f ref.fa aln.bam >var.vcf
-Require at least 5 supporting observations to consider a variant:
- freebayes -f ref.fa -C 5 aln.bam >var.vcf
-Use a different ploidy:
- freebayes -f ref.fa -p 4 aln.bam >var.vcf
-Assume a pooled sample with a known number of genome copies. Note that this
-means that each sample identified in the BAM file is assumed to have 32 genome
-copies. When running with highh --ploidy settings, it may be required to set
-`--use-best-n-alleles` to a low number to limit memory usage.
- freebayes -f ref.fa -p 32 --use-best-n-alleles 4 --pooled-discrete aln.bam >var.vcf
-Generate frequency-based calls for all variants passing input thresholds. You'd do
-this in the case that you didn't know the number of samples in the pool.
- freebayes -f ref.fa -F 0.01 -C 1 --pooled-continuous aln.bam >var.vcf
-Use an input VCF (bgzipped + tabix indexed) to force calls at particular alleles:
- freebayes -f ref.fa -@ in.vcf.gz aln.bam >var.vcf
-Generate long haplotype calls over known variants:
- freebayes -f ref.fa --haplotype-basis-alleles in.vcf.gz \
- --haplotype-length 50 aln.bam
-Naive variant calling: simply annotate observation counts of SNPs and indels:
- freebayes -f ref.fa --haplotype-length 0 --min-alternate-count 1 \
- --min-alternate-fraction 0 --pooled-continuous --report-monomorphic >var.vcf
-Parallel operation (use 36 cores in this case):
- freebayes-parallel <(fasta_generate_regions.py ref.fa.fai 100000) 36 \
- -f ref.fa aln.bam >var.vcf
-Note that any of the above examples can be made parallel by using the
-scripts/freebayes-parallel script. If you find freebayes to be slow, you
-should probably be running it in parallel using this script to run on a single
-host, or generating a series of scripts, one per region, and run them on a
-## Calling variants: from fastq to VCF
-You've sequenced some samples. You have a reference genome or assembled set of
-contigs, and you'd like to determine reference-relative variants in your
-samples. You can use freebayes to detect the variants, following these steps:
-* **Align** your reads to a suitable reference (e.g. with
-[bwa](http://bio-bwa.sourceforge.net/) or
-* Ensure your alignments have **read groups** attached so their sample may be
-identified by freebayes. Aligners allow you to do this, but you can also use
-[bamaddrg](http://github.com/ekg/bamaddrg) to do so post-alignment.
-* **Sort** the alignments (e.g. bamtools sort).
-* **Mark duplicates**, for instance with [samtools
-rmdup](http://samtools.sourceforge.net/) (if PCR was used in the preparation of
-your sequencing library)
-* ***Run freebayes*** on all your alignment data simultaneously, generating a
-VCF. The default settings should work for most use cases, but if your samples
-are not diploid, set the `--ploidy` and adjust the `--min-alternate-fraction`
-* **Filter** the output e.g. using reported QUAL and/or depth (DP) or
-observation count (AO).
-* **Interpret** your results.
-* (possibly, **Iterate** the variant detection process in response to insight
-gained from your interpretation)
-FreeBayes emits a standard VCF 4.1 output stream. This format is designed for the
-probabilistic description of allelic variants within a population of samples,
-but it is equally suited to describing the probability of variation in a single
-Of primary interest to most users is the QUAL field, which estimates the
-probability that there is a polymorphism at the loci described by the record.
-In freebayes, this value can be understood as 1 - P(locus is homozygous given
-the data). It is recommended that users use this value to filter their
-results, rather than accepting anything output by freebayes as ground truth.
-By default, records are output even if they have very low probability of
-variation, in expectation that the VCF will be filtered using tools such as
-[vcffilter](http://github.com/ekg/vcflib#vcffilter) in
-[vcflib](http://github.com/ekg/vcflib), which is also included in the
-repository under `vcflib/`. For instance,
- freebayes -f ref.fa aln.bam | vcffilter -f "QUAL > 20" >results.vcf
-removes any sites with estimated probability of not being polymorphic less than
-phred 20 (aka 0.01), or probability of polymorphism > 0.99.
-In simulation, the [receiver-operator
- (ROC) tends to have a very sharp inflection between Q1 and Q30, depending on
-input data characteristics, and a filter setting in this range should provide
-decent performance. Users are encouraged to examine their output and both
-variants which are retained and those they filter out. Most problems tend to
-occur in low-depth areas, and so users may wish to remove these as well, which
-can also be done by filtering on the DP flag.
-## Calling variants in a population
-FreeBayes is designed to be run on many individuals from the same population
-(e.g. many human individuals) simultaneously. The algorithm exploits a neutral
-model of allele diffusion to impute most-confident genotypings
-across the entire population. In practice, the discriminant power of the
-method will improve if you run multiple samples simultaneously. In other
-words, if your
-study has multiple individuals, you should run freebayes against them at the
-same time. This also ensures consistent reporting of information about
-evidence for all samples at any locus where any are apparently polymorphic.
-To call variants in a population of samples, each alignment must have a read
-group identifier attached to it (RG tag), and the header of the BAM file in
-which it resides must map the RG tags to sample names (SM). Furthermore, read
-group IDs must be unique across all the files used in the analysis. One read
-group cannot map to multiple samples. The reason this is required is that
-freebayes operates on a virtually merged BAM stream provided by the BamTools
-API. If merging the files in your analysis using bamtools merge would generate
-a file in which multiple samples map to the same RG, the files are not suitable
-for use in population calling, and they must be modified.
-Users may add RG tags to BAM files which were generated without this
-information by using (as mentioned in "Calling variants" above)
-If you have many files corresponding to
-many individuals, add a unique read group and sample name to each, and then
-open them all simultaneously with freebayes. The VCF output will have one
-column per sample in the input.
-## Performance tuning
-If you find freebayes to be slow, or use large amounts of memory, consider the
-following options:
-- Set `--use-best-n-alleles 4`: this will reduce the number of alleles that are
- considered, which will decrease runtime at the cost of sensitivity to
-lower-frequency alleles at multiallelic loci. Calculating site qualities
-requires O(samples\*genotypes) runtime, and the number of genotypes is
-exponential in ploidy and the number of alleles that are considered, so this is
-very important when working with high ploidy samples (and also
-`--pooled-discrete`). By default, freebayes puts no limit on this.
-- Remove `--genotype-qualities`: calculating genotype qualities requires
- O(samples\*genotypes) memory.
-- Set higher input thresholds. Require that N reads in one sample support an
- allele in order to consider it: `--min-alternate-count N`, or that the allele
-fraction in one sample is M: `--min-alternate-fraction M`. This will filter
-noisy alleles. The defaults, `--min-alternate-count 2 --min-alternate-fraction
-0.2`, are most-suitable for diploid, moderate-to-high depth samples, and should
-be changed when working with different ploidy samples. Alternatively,
-`--min-alternate-qsum` can be used to set a specific quality sum, which may be
-more flexible than setting a hard count on the number of observations.
-## Observation filters and qualities
-### Input filters
-FreeBayes filters its input so as to ignore low-confidence alignments and
-alleles which are only supported by low-quality sequencing observations (see
-`--min-mapping-quality` and `--min-base-quality`). It also will only evaluate a
-position if at least one read has mapping quality of
-`--min-supporting-mapping-quality` and one allele has quality of at least
-Reads with more than a fixed number of high-quality mismatches can be excluded
-by specifying `--read-mismatch-limit`. This is meant as a workaround when
-mapping quality estimates are not appropriately calibrated.
-Reads marked as duplicates in the BAM file are ignored, but this can be
-disabled for testing purposes by providing `--use-duplicate-reads`. FreeBayes
-does not mark duplicates on its own, you must use another process to do this.
-### Observation thresholds
-As a guard against spurious variation caused by sequencing artifacts, positions
-are skipped when no more than `--min-alternate-count` or
-non-clonal observations of an alternate are found in one sample. These default
-to 2 and 0.2 respectively. The default setting of `--min-alternate-fraction
-0.2` is suitable for diploid samples but should be changed for ploidy > 2.
-### Allele type exclusion
-FreeBayes provides a few methods to ignore certain classes of allele, e.g.
-`--no-indels` and `--no-mnps`. Users are *strongly cautioned against using
-these*, because removing this information is very likely to reduce detection
-power. To generate a report only including SNPs, use vcffilter post-call as
- freebayes ... | vcffilter -f "TYPE = snp"
-### Observation qualities
-FreeBayes estimates observation quality using several simple heuristics based
-on manipulations of the phred-scaled base qualities:
-* For single-base observations, *mismatches* and *reference observations*: the
-un-adjusted base quality provided in the BAM alignment record.
-* For *insertions*: the mean quality of the bases inside of the putatively
-inserted sequence.
-* For *deletions*: the mean quality of the bases flanking the putatively
-deleted sequence.
-* For *haplotypes*: the mean quality of allele observations within the
-### Effective base depth
-By default, filters are left completely open.
-Use `--experimental-gls` if you would like to integrate both base and mapping
-quality are into the reported site quality (QUAL in the VCF) and
-genotype quality (GQ, when supplying `--genotype-qualities`). This integration
-is driven by the "Effective Base Depth" metric first developed in
-[snpTools](http://www.hgsc.bcm.edu/software/snptools), which scales observation
-quality by mapping quality. When `--experimental-gls` is given, *P(Obs|Genotype) ~
-## Stream processing
-FreeBayes can read BAM from standard input `--stdin` instead of directly from
-files. This allows the application of any number of streaming BAM filters and
-calibrators to its input.
- bam_merger.sh | streaming_filter_or_process.sh | freebayes --stdin ...
-This pattern allows the adjustment of alignments without rewriting BAM files,
-which could be expensive depending on context and available storage. A prime
-example of this would be graph-based realignment of reads to known variants as
-implemented in [glia](http://github.com/ekg/glia).
-Using this pattern, you can filter out reads with certain criteria using
-bamtools filter without having to modify the input BAM file. You can also use
-the bamtools API to write your own custom filters in C++. An example filter is
-rtech.cpp), which could be used to filter out
-technologies which have characteristic errors which may frustrate certain types
-of variant detection.
-## INDELs
-In principle, any gapped aligner which is sensitive to indels will
-produce satisfactory input for use by freebayes. Due to potential ambiguity,
-indels are
-not parsed when they overlap the beginning or end of alignment boundaries.
-When calling indels, it is important to homogenize the positional distribution
-of insertions and deletions in the input by using left realignment. This is
-now done automatically by freebayes, but the behavior can be turned off via
-`--dont-left-align-indels` flag. You probably don't want to do this.
-Left realignment will place all indels in homopolymer and microsatellite
-repeats at the same position, provided that doing so does not introduce
-mismatches between the read and reference other than the indel. This method
-computationally inexpensive and handles the most common classes of alignment
-## Haplotype calls
-As freebayes is haplotype-based, left-alignment is necessary only for the
-determination of candidate polymorphic loci. Once such loci are determined,
-haplotype observations are extracted from reads where:
-1. putative variants lie within `--haplotype-window` bases of each other
-(default 3bp),
-2. the reference sequence has repeats (e.g. microsatellites or STRs are called
-as one haplotype),
-3. the haplotype which is called has Shannon entropy less than
-`--min-repeat-entropy`, which is off by default but can be set to ~1 for
-optimal genotyping of indels in lower-complexity sequence.
-After a haplotype window is determined by greedily expanding the window across
-overlapping haplotype observations, all reads overlapping the window are used
-to establish data likelihoods, *P(Observations|Genotype)*, for all haplotypes
-which have sufficient support to pass the input filters.
-Partial observations are considered to support those haplotypes which they
-could match exactly. For expedience, only haplotypes which are contiguously
-observed by the reads are considered as putative alleles in this process. This
-differs from other haplotype-based methods, such as
-[Platypus](http://www.well.ox.ac.uk/platypus), which consider all possible
-haplotypes composed of observed component alleles (SNPs, indels) in a given
-region when generating likelihoods.
-The primary adantages of this approach are conceptual simplicity and
-performance, and it is primarily limited in the case of short reads, an issue
-that is mitigated by increasing read lengths. Also, a hybrid approach must be
-used to call haplotypes from high-error rate long reads.
-### Re-genotyping known variants and calling long haplotypes
-For longer reads with higher error rates, it is possible to generate long
-haplotypes in two passes over the data. For instance, if we had very long
-reads (e.g. >10kb) at moderate depth and high error rate (>5%) such as might be
-produced by PacBio, we could do something like:
- freebayes -f ref.fa aln.bam | vcffilter -f "QUAL > 20" >vars.vcf
-... thus generating candidate variants of suitable quality using the default
-detection window. We can then use these as "basis alleles" for the observation
-of haplotypes, considering all other putative variants supported by the
-alignment to be sequencing errors:
- freebayes -f ref.fa --haplotype-window 500 \
- --haplotype-basis-alleles vars.vcf aln.bam >haps.vcf
-These steps should allow us to read long haplotypes directly from input data
-with high error rates.
-The high error rate means that beyond a small window each read will contain a
-completely different literal haplotype. To a point, this property improves our
-signal to noise ratio and can effectively filter out sequencing errors at the
-point of the input filters, but it also decreases the effective observation
-depth will prevent the generation of any calls if a long `--haplotype-window`
-is combined with high a sequencing error rate.
-## Best practices and design philosophy
-FreeBayes follows the patterns suggested by the [Unix
-philosophy](https://en.wikipedia.org/wiki/Unix_philosophy), which promotes the
-development of simple, modular systems that perform a single function, and can
-be combined into more complex systems using stream processing of common
-interchange formats.
-FreeBayes incorporates a number of features in order to reduce the complexity
-of variant detection for researchers and developers:
-* **Indel realignment is accomplished internally** using a read-independent
-method, and issues resulting from discordant alignments are dramatically
-reducedy through the direct detection of haplotypes.
-* The need for **base quality recalibration is avoided** through the direct
-detection of haplotypes. Sequencing platform errors tend to cluster (e.g. at
-the ends of reads), and generate unique, non-repeating haplotypes at a given
-* **Variant quality recalibration is avoided** by incorporating a number of
-metrics, such as read placement bias and allele balance, directly into the
-Bayesian model. (Our upcoming publication will discuss this in more detail.)
-A minimal pre-processing pipeline similar to that described in "Calling
-variants" should be sufficient for most uses. For more information, please
-refer to a recent post by Brad Chapman [on minimal BAM preprocessing
-For a push-button solution to variant detection, from reads to variant calls,
-look no further than the [gkno genome analysis platform](http://gkno.me/).
-## Contributors
-FreeBayes is made by:
-- Erik Garrison
-- Thomas Sibley
-- Dillon Lee
-- Patrick Marks
-- Noah Spies
-- Joshua Randall
-- Jeremy Anderson
-## Support
-### email
-Please report any issues or questions to the [freebayes mailing
-list](https://groups.google.com/forum/#!forum/freebayes), [freebayes issue
-tracker](https://github.com/ekg/freebayes/issues), or by email to
-<erik.garrison at gmail.com>.
-### IRC
-If you would like to chat real-time about freebayes, join #freebayes on
-freenode. A gittr.im chat is also available.
-### reversion
-Note that if you encounter issues with the development HEAD and you would like
-a quick workaround for an issue that is likely to have been reintroduced
-recently, you can use `git checkout` to step back a few revisions.
- git checkout [git-commit-id]
-It will also help with debugging to know if a problem has arisen in recent
diff --git a/bin/.keep b/bin/.keep
deleted file mode 100644
index e69de29..0000000
diff --git a/debian/bash-tap/bash-tap b/debian/bash-tap/bash-tap
deleted file mode 100755
index e71fe9b..0000000
--- a/debian/bash-tap/bash-tap
+++ /dev/null
@@ -1,369 +0,0 @@
-# Our state.
-# Our test results so far
-unset _bt_test_ok
-unset _bt_test_actual_ok
-unset _bt_test_name
-unset _bt_test_type
-unset _bt_test_reason
-# Cleanup stuff.
-declare -a _bt_on_exit_cmds
-trap "_bt_on_exit" EXIT
-# Planning functions.
-function _bt_output_plan() {
- local num_tests="$1"
- local directive="$2"
- local reason="$3"
- if [ "$_bt_has_output_plan" = 1 ]; then
- _caller_error "The plan was already output"
- fi
- _bt_clear_out
- _bt_out "1..$num_tests"
- if [ -n "$directive" ]; then
- _bt_out " # $directive"
- fi
- if [ -n "$reason" ]; then
- _bt_out " $reason"
- fi
- _bt_print_out
- _bt_has_output_plan=1
-function plan() {
- local plan="$1"
- case "$plan" in
- no_plan) no_plan ;;
- skip_all) skip_all "$2" ;;
- tests) expected_tests "$2" ;;
- *) _bt_die "Unknown or missing plan: '$plan'" ;;
- esac
-function expected_tests() {
- local num="$1"
- if [ -z "$num" ]; then
- echo $_bt_expected_tests
- else
- if [ -n "$_bt_plan" ]; then
- _bt_caller_error "Plan is already defined"
- fi
- # TODO: validate
- _bt_plan="$num"
- _bt_expected_tests="$num"
- _bt_output_plan "$_bt_expected_tests"
- fi
-function no_plan() {
- if [ -n "$_bt_plan" ]; then
- _bt_caller_error "Plan is already defined"
- fi
- _bt_plan="no plan"
-function done_testing() {
- local num_tests="$1"
- if [ -z "$num_tests" ]; then
- num_tests="$_bt_current_test"
- fi
- if [ "$_bt_done_testing" = 1 ]; then
- _bt_caller_error "done_testing was already called"
- fi
- if [ "$_bt_expected_tests" != 0 -a "$num_tests" != "$_bt_expected_tests" ]; then
- ok 0 "planned to run $_bt_expected_tests but done_testing expects $num_tests"
- else
- _bt_expected_tests="$num_tests"
- fi
- if [ "$_bt_has_output_plan" = 0 ]; then
- _bt_plan="done testing"
- _bt_output_plan "$num_tests"
- fi
-function has_plan() {
- test -n "$_bt_plan"
-function skip_all() {
- local reason="${*:?}"
- _bt_output_plan 0 SKIP "$reason"
-# Test functions.
-function ok() {
- local result="$1"
- local name="$2"
- _bt_current_test=$((_bt_current_test + 1))
- # TODO: validate $name
- if [ -z "$name" ]; then
- name='unnamed test'
- fi
- name="${name//#/\\#}"
- _bt_clear_out
- if [ "$result" = 0 ]; then
- _bt_out "not ok"
- if [ -n "$TODO" ]; then
- _bt_test_ok[$_bt_current_test]=1
- else
- _bt_test_ok[$_bt_current_test]=0
- fi
- _bt_test_actual_ok[$_bt_current_test]=0
- else
- _bt_out "ok"
- _bt_test_ok[$_bt_current_test]=1
- _bt_test_actual_ok[$_bt_current_test]="$result"
- fi
- _bt_out " $_bt_current_test - $name"
- _bt_test_name[$_bt_current_test]="$name"
- if [ -n "$TODO" ]; then
- _bt_out " # TODO $TODO"
- _bt_test_reason[$_bt_current_test]="$TODO"
- _bt_test_type[$_bt_current_test]="todo"
- else
- _bt_test_reason[$_bt_current_test]=''
- _bt_test_type[$_bt_current_test]=''
- fi
- _bt_print_out
-function _is_diag() {
- local result="$1"
- local expected="$2"
- diag " got: '$result'"
- diag " expected: '$expected'"
-function is() {
- local result="$1"
- local expected="$2"
- local name="$3"
- if [ "$result" = "$expected" ]; then
- ok 1 "$name"
- else
- ok 0 "$name"
- _is_diag "$result" "$expected"
- fi
-function _isnt_diag() {
- local result="$1"
- local expected="$2"
- diag " got: '$result'"
- diag " expected: anything else"
-function isnt() {
- local result="$1"
- local expected="$2"
- local name="$3"
- if [ "$result" != "$expected" ]; then
- ok 1 "$name"
- else
- ok 0 "$name"
- _isnt_diag "$result" "$expected"
- fi
-function like() {
- local result="$1"
- local pattern="$2"
- local name="$3"
- # NOTE: leave $pattern unquoted, see http://stackoverflow.com/a/218217/870000
- if [[ "$result" =~ $pattern ]]; then
- ok 1 "$name"
- else
- ok 0 "$name"
- diag " got: '$result'"
- diag " expected: match for '$pattern'"
- fi
-function unlike() {
- local result="$1"
- local pattern="$2"
- local name="$3"
- # NOTE: leave $pattern unquoted, see http://stackoverflow.com/a/218217/870000
- if [[ ! "$result" =~ $pattern ]]; then
- ok 1 "$name"
- else
- ok 0 "$name"
- diag " got: '$result'"
- diag " expected: no match for '$pattern'"
- fi
-function cmp_ok() {
- echo TODO
-# Other helper functions
-function BAIL_OUT() {
- echo TODO
-function skip() {
- echo TODO
-function todo_skip() {
- echo TODO
-function todo_start() {
- echo TODO
-function todo_end() {
- echo TODO
-# Output
-function diag() {
- local message="$1"
- if [ -n "$message" ]; then
- _bt_escaped_echo "# $message"
- fi
-# Util functions for output capture within current shell
-function start_output_capture() {
- if [ $_bt_output_capture = 1 ]; then
- finish_output_capture
- _bt_caller_error "Can't start output capture while already active"
- fi
- local stdout_tmpfile="/tmp/bash-itunes-test-out.$$"
- local stderr_tmpfile="/tmp/bash-itunes-test-err.$$"
- _bt_add_on_exit_cmd "rm -f '$stdout_tmpfile' '$stderr_tmpfile'"
- _bt_output_capture=1
- exec 3>&1 >$stdout_tmpfile 4>&2 2>$stderr_tmpfile
-function finish_output_capture() {
- local capture_stdout_varname="$1"
- local capture_stderr_varname="$2"
- if [ $_bt_output_capture != 1 ]; then
- _bt_caller_error "Can't finish output capture when it wasn't started"
- fi
- exec 1>&3 3>&- 2>&4 4>&-
- _bt_output_capture=0
- if [ -n "$capture_stdout_varname" ]; then
- local stdout_tmpfile="/tmp/bash-itunes-test-out.$$"
- eval "$capture_stdout_varname=\$(< $stdout_tmpfile)"
- fi
- if [ -n "$capture_stderr_varname" ]; then
- local stderr_tmpfile="/tmp/bash-itunes-test-err.$$"
- eval "$capture_stderr_varname=\$(< $stderr_tmpfile)"
- fi
-# Internals
-function _bt_stdout() {
- echo "$@"
-function _bt_stderr() {
- echo "$@" >&2
-function _bt_die() {
- _bt_stderr "$@"
- exit 255
-# Report an error from the POV of the first calling point outside this file
-function _bt_caller_error() {
- local message="$*"
- local thisfile="${BASH_SOURCE[0]}"
- local file="$thisfile"
- local frame_num=2
- until [ "$file" != "$thisfile" ]; do
- frame=$(caller "$frame_num")
- IFS=' ' read line func file <<<"$frame"
- done
- _bt_die "Error: $message, on line $line of $file"
-# Echo the supplied message with lines after the
-# first escaped as TAP comments.
-function _bt_escaped_echo() {
- local message="$*"
- local output=''
- while IFS= read -r line; do
- output="$output\n# $line"
- done <<<"$message"
- echo -e "${output:4}"
-function _bt_clear_out() {
- _bt_tap_output=""
-function _bt_out() {
- _bt_tap_output="$_bt_tap_output$*"
-function _bt_print_out() {
- _bt_escaped_echo "$_bt_tap_output"
-# Cleanup stuff
-function _bt_add_on_exit_cmd() {
- _bt_on_exit_cmds[${#_bt_on_exit_cmds[*]}]="$*"
-function _bt_on_exit() {
- if [ $_bt_output_capture = 1 ]; then
- finish_output_capture
- fi
- for exit_cmd in "${_bt_on_exit_cmds[@]}"; do
- diag "cleanup: $exit_cmd"
- eval "$exit_cmd"
- done
- # TODO: check that we've output a plan/results
diff --git a/debian/bash-tap/bash-tap-bootstrap b/debian/bash-tap/bash-tap-bootstrap
deleted file mode 100755
index 23074de..0000000
--- a/debian/bash-tap/bash-tap-bootstrap
+++ /dev/null
@@ -1,28 +0,0 @@
-# Bash TAP Bootstrap:
-# Copy this file into your project tests dir and source it
-# from each test file with:
-# . $(dirname $0)/bash-tap-bootstrap
-# It takes care of finding bash-tap or outputing a usage message.
-if [ "${BASH_SOURCE[0]}" = "$0" ]; then
- # Being run directly, probably by test harness running entire dir.
- echo "1..0 # SKIP bash-tap-bootstrap isn't a test file"
- exit 0
-if [ -z "$BASH_TAP_ROOT" ]; then
- # TODO: search likely locations.
- BASH_TAP_ROOT="$(dirname ${BASH_SOURCE[0]})/../../bash-tap"
-if [ -f "$BASH_TAP_ROOT/bash-tap" ]; then
- . "$BASH_TAP_ROOT/bash-tap"
- echo "Bail out! Unable to find bash-tap. Install from https://github.com/illusori/bash-tap or set \$BASH_TAP_ROOT if you have it installed somewhere unusual."
- exit 255
diff --git a/debian/bash-tap/bash-tap-mock b/debian/bash-tap/bash-tap-mock
deleted file mode 100755
index 1800198..0000000
--- a/debian/bash-tap/bash-tap-mock
+++ /dev/null
@@ -1,106 +0,0 @@
-# While not directly TAP-specific, being able to mock stuff
-# in tests is pretty useful.
-# If you're using bash-tap-bootstrap, then just source this
-# file in your tests from the bash-tap directory found by
-# the bootstrap by including this line after you've sourced
-# bash-tap-bootstrap:
-# . "$BASH_TAP_ROOT/bash-tap-mock"
-# If you're not using bash-tap-bootstrap then copy this file
-# to your test directory and source it with:
-# . $(dirname $0)/bash-tap-mock
-# It's important to note that if you're capturing the arguments
-# passed to your mock function in a variable, and want that
-# variable to be accessible to your tests, you must ensure that
-# the mocked function is executed in the current shell and not
-# a subshell. In particular, this means you cannot use $() or
-# `` to capture output of the function at the same time, as these
-# invoke a subshell - the mock will happen, but any variables you
-# set within your mock will only exist within the subshell.
-# If you wish to capture output at the same time, you need to
-# make use of the start_output_capture and finish_output_capture
-# helper functons in bash-tap, or manually use file-descriptor
-# redirects yourself to achieve the same effect.
-if [ "${BASH_SOURCE[0]}" = "$0" ]; then
- # Being run directly, probably by test harness running entire dir.
- echo "1..0 # SKIP bash-tap-mock isn't a test file"
- exit 0
-function mock_function() {
- local original_name="$1"
- local mock_name="$2"
- local save_original_as="_btm_mocked_${original_name}"
- if [ -z $(declare -F "$save_original_as") ]; then
- _btm_copy_function "$original_name" "$save_original_as"
- fi
- _btm_copy_function "$mock_name" "$original_name"
-function restore_mocked_function() {
- local original_name="$1"
- local save_original_as="_btm_mocked_${original_name}"
- if [ ! -z $(declare -F "$save_original_as") ]; then
- _btm_copy_function "$save_original_as" "$original_name"
- unset -f "$save_original_as"
- else
- _btm_caller_error "Can't find saved original function '$original_name' to restore"
- fi
-function mock_command() {
- local command_name="$1"
- local mock_name="$2"
- if [ ! -z $(declare -F "$command_name") ]; then
- # It's not actually a command, it's a function, mock that
- mock_function "$command_name" "$mock_name"
- else
- _btm_copy_function "$mock_name" "$command_name"
- fi
-function restore_mocked_command() {
- local command_name="$1"
- local save_original_as="_btm_mocked_${command_name}"
- if [ ! -z $(declare -F "$save_original_as") ]; then
- # Was actually a function mock not a command mock.
- restore_mocked_function "$command_name"
- else
- unset -f "$command_name" >/dev/null
- fi
-# Copied from http://stackoverflow.com/a/1203628/870000
-function _btm_copy_function() {
- declare -F $1 >/dev/null || _btm_caller_error "Can't find function '$1' to copy"
- eval "$(echo "${2}()"; declare -f ${1} | tail -n +2)"
-# Report an error from the POV of the first calling point outside this file
-function _btm_caller_error() {
- local message="$*"
- local thisfile="${BASH_SOURCE[0]}"
- local file="$thisfile"
- local frame_num=2
- until [ "$file" != "$thisfile" ]; do
- frame=$(caller "$frame_num")
- IFS=' ' read line func file <<<"$frame"
- done
- echo "Error: $message, on line $line of $file" >&2
- exit 255
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index eaeaad5..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,5 +0,0 @@
-freebayes (1.0.2-1) unstable; urgency=low
- * Initial packaging (Closes: #851306)
- -- Andreas Tille <tille at debian.org> Tue, 08 Nov 2016 11:40:28 +0100
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index f11c82a..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
\ No newline at end of file
diff --git a/debian/control b/debian/control
deleted file mode 100644
index 301577b..0000000
--- a/debian/control
+++ /dev/null
@@ -1,31 +0,0 @@
-Source: freebayes
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Brad Chapman <chapmanb at 50mail.com>,
- Andreas Tille <tille at debian.org>
-Section: science
-Priority: optional
-Build-Depends: debhelper (>= 9),
- cmake,
- pkg-config,
- zlib1g-dev,
- libbamtools-dev,
- libvcflib-dev,
- libtabixpp-dev,
- bc,
- samtools
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/cgit/debian-med/freebayes.git
-Vcs-Git: https://anonscm.debian.org/git/debian-med/freebayes.git
-Homepage: https://github.com/ekg/freebayes
-Package: freebayes
-Architecture: any
-Depends: ${shlibs:Depends},
- ${misc:Depends}
-Description: Bayesian haplotype-based polymorphism discovery and genotyping
- FreeBayes is a Bayesian genetic variant detector designed to find
- small polymorphisms, specifically SNPs (single-nucleotide
- polymorphisms), indels (insertions and deletions), MNPs
- (multi-nucleotide polymorphisms), and complex events (composite
- insertion and substitution events) smaller than the length of a
- short-read sequencing alignment.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index 6e67c9b..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,64 +0,0 @@
-Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: freebayes
-Source: https://github.com/ekg/freebayes
-Files: *
-Copyright: 2010-2014 Erik Garrison, Gabor Marth
-License: MIT
-Files: debian/bash-tap/*
-Copyright: 2012-2016 Sam Graham
-License: MIT
-Files: ttmath/*
-Copyright: 2006-2012, Tomasz Sowa
-License: BSD-3-clause
- Redistribution and use in source and binary forms, with or without
- modification, are permitted provided that the following conditions are met:
- .
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- .
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- .
- * Neither the name Tomasz Sowa nor the names of contributors to this
- project may be used to endorse or promote products derived
- from this software without specific prior written permission.
- .
-Files: debian/*
-Copyright: 2014 Brad Chapman <chapmanb at 50mail.com>
-License: MIT
-License: MIT
- Permission is hereby granted, free of charge, to any person obtaining a
- copy of this software and associated documentation files (the "Software"),
- to deal in the Software without restriction, including without limitation
- the rights to use, copy, modify, merge, publish, distribute, sublicense,
- and/or sell copies of the Software, and to permit persons to whom the
- Software is furnished to do so, subject to the following conditions:
- .
- The above copyright notice and this permission notice shall be included
- in all copies or substantial portions of the Software.
- .
diff --git a/debian/install b/debian/install
deleted file mode 100644
index f82faed..0000000
--- a/debian/install
+++ /dev/null
@@ -1,2 +0,0 @@
-bin/freebayes usr/bin
-bin/bamleftalign usr/bin
\ No newline at end of file
diff --git a/debian/manpages b/debian/manpages
deleted file mode 100644
index 0f65186..0000000
--- a/debian/manpages
+++ /dev/null
@@ -1 +0,0 @@
diff --git a/debian/patches/fix_test.patch b/debian/patches/fix_test.patch
deleted file mode 100644
index a5af80c..0000000
--- a/debian/patches/fix_test.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Tue, 08 Nov 2016 11:40:28 +0100
-Description: vcflib is in a different package - do not test this
---- a/test/Makefile
-+++ b/test/Makefile
-@@ -5,7 +5,7 @@ vcfuniq=../vcflib/bin/vcfuniq
- all: test
--test: $(freebayes) $(vcfuniq)
-+test: $(freebayes)
- prove -v t
- $(freebayes):
diff --git a/debian/patches/series b/debian/patches/series
deleted file mode 100644
index e9ae68a..0000000
--- a/debian/patches/series
+++ /dev/null
@@ -1,3 +0,0 @@
diff --git a/debian/patches/use_debian_packaged_bamtools.patch b/debian/patches/use_debian_packaged_bamtools.patch
deleted file mode 100644
index 23f035e..0000000
--- a/debian/patches/use_debian_packaged_bamtools.patch
+++ /dev/null
@@ -1,89 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Wed, 28 May 2014 21:23:38 +0200
-Description: Use Debian packaged bamtools
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -12,11 +12,10 @@ C=gcc
- #CFLAGS=-O3 -static -D VERBOSE_DEBUG # enables verbose debugging via --debug2
- VCFLIB_ROOT=../vcflib
--LIBS = -L./ -L$(VCFLIB_ROOT)/tabixpp/ -L$(BAMTOOLS_ROOT)/lib -ltabix -lz -lm
-+LIBS = -L./ -L$(VCFLIB_ROOT)/tabixpp/ -lbamtools -ltabixpp -lz -lm
-+INCLUDE = -I/usr/include/bamtools -I../ttmath -I$(VCFLIB_ROOT)/src -I$(VCFLIB_ROOT)/
- all: autoversion ../bin/freebayes ../bin/bamleftalign
-@@ -34,10 +33,6 @@ gprof:
- .PHONY: all static debug profiling gprof
--# builds bamtools static lib, and copies into root
-- cd $(BAMTOOLS_ROOT) && mkdir -p build && cd build && cmake .. && $(MAKE)
- OBJECTS=BedReader.o \
- CNV.o \
-@@ -70,8 +65,7 @@ OBJECTS=BedReader.o \
- ../vcflib/smithwaterman/LeftAlign.o \
- ../vcflib/smithwaterman/Repeats.o \
- ../vcflib/smithwaterman/IndelAllele.o \
-- Variant.o \
-- $(BAMTOOLS_ROOT)/lib/libbamtools.a
-+ Variant.o
- HEADERS=multichoose.h version_git.h
-@@ -86,10 +80,10 @@ alleles ../bin/alleles: alleles.o $(OBJE
- dummy ../bin/dummy: dummy.o $(OBJECTS) $(HEADERS)
- $(CXX) $(CFLAGS) $(INCLUDE) dummy.o $(OBJECTS) -o ../bin/dummy $(LIBS)
--bamleftalign ../bin/bamleftalign: $(BAMTOOLS_ROOT)/lib/libbamtools.a bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o
-- $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o $(BAMTOOLS_ROOT)/lib/libbamtools.a -o ../bin/bamleftalign $(LIBS)
-+bamleftalign ../bin/bamleftalign: bamleftalign.o Fasta.o
-+ $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o -o ../bin/bamleftalign -lbamtools
--bamfiltertech ../bin/bamfiltertech: $(BAMTOOLS_ROOT)/lib/libbamtools.a bamfiltertech.o $(OBJECTS) $(HEADERS)
-+bamfiltertech ../bin/bamfiltertech: bamfiltertech.o $(OBJECTS) $(HEADERS)
- $(CXX) $(CFLAGS) $(INCLUDE) bamfiltertech.o $(OBJECTS) -o ../bin/bamfiltertech $(LIBS)
-@@ -104,7 +98,7 @@ alleles.o: alleles.cpp AlleleParser.o Al
- dummy.o: dummy.cpp AlleleParser.o Allele.o
- $(CXX) $(CFLAGS) $(INCLUDE) -c dummy.cpp
--freebayes.o: freebayes.cpp TryCatch.h $(BAMTOOLS_ROOT)/lib/libbamtools.a
-+freebayes.o: freebayes.cpp TryCatch.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c freebayes.cpp
- fastlz.o: fastlz.c fastlz.h
-@@ -125,7 +119,7 @@ Genotype.o: Genotype.cpp Genotype.h Alle
- Ewens.o: Ewens.cpp Ewens.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Ewens.cpp
--AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h $(BAMTOOLS_ROOT)/lib/libbamtools.a
-+AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c AlleleParser.cpp
- Utility.o: Utility.cpp Utility.h Sum.h Product.h
-@@ -173,7 +167,7 @@ bamleftalign.o: bamleftalign.cpp LeftAli
- bamfiltertech.o: bamfiltertech.cpp
- $(CXX) $(CFLAGS) $(INCLUDE) -c bamfiltertech.cpp
--LeftAlign.o: LeftAlign.h LeftAlign.cpp $(BAMTOOLS_ROOT)/lib/libbamtools.a
-+LeftAlign.o: LeftAlign.h LeftAlign.cpp
- $(CXX) $(CFLAGS) $(INCLUDE) -c LeftAlign.cpp
- IndelAllele.o: IndelAllele.cpp IndelAllele.h
-@@ -263,6 +257,5 @@ autoversion:
- clean:
- rm -rf *.o *.cgh *~ freebayes alleles ../bin/freebayes ../bin/alleles ../vcflib/*.o ../vcflib/tabixpp/*.{o,a}
-- cd $(BAMTOOLS_ROOT)/build && make clean
- cd ../vcflib/smithwaterman && make clean
diff --git a/debian/patches/use_debian_packaged_vcflib.patch b/debian/patches/use_debian_packaged_vcflib.patch
deleted file mode 100644
index dd907a8..0000000
--- a/debian/patches/use_debian_packaged_vcflib.patch
+++ /dev/null
@@ -1,292 +0,0 @@
-Author: Andreas Tille <tille at debian.org>
-Last-Update: Wed, 22 Jun 2016 14:36:13 +0200
-Description: vcflib needs to be packaged separately
---- a/src/Makefile
-+++ b/src/Makefile
-@@ -12,10 +12,8 @@ C=gcc
- #CFLAGS=-O3 -static -D VERBOSE_DEBUG # enables verbose debugging via --debug2
--LIBS = -L./ -L$(VCFLIB_ROOT)/tabixpp/ -lbamtools -ltabixpp -lz -lm
--INCLUDE = -I/usr/include/bamtools -I../ttmath -I$(VCFLIB_ROOT)/src -I$(VCFLIB_ROOT)/
-+LIBS = -lbamtools -ltabixpp -lz -lm -lvcflib `pkg-config --libs libsmithwaterman`
-+INCLUDE = -I/usr/include/bamtools -I../ttmath -I/usr/include/vcflib -I/usr/include/intervaltree `pkg-config --cflags libsmithwaterman` -ldisorder
- all: autoversion ../bin/freebayes ../bin/bamleftalign
-@@ -57,15 +55,7 @@ OBJECTS=BedReader.o \
- Bias.o \
- Contamination.o \
- NonCall.o \
-- SegfaultHandler.o \
-- ../vcflib/tabixpp/tabix.o \
-- ../vcflib/tabixpp/bgzf.o \
-- ../vcflib/smithwaterman/SmithWatermanGotoh.o \
-- ../vcflib/smithwaterman/disorder.c \
-- ../vcflib/smithwaterman/LeftAlign.o \
-- ../vcflib/smithwaterman/Repeats.o \
-- ../vcflib/smithwaterman/IndelAllele.o \
-- Variant.o
-+ SegfaultHandler.o
- HEADERS=multichoose.h version_git.h
-@@ -173,17 +163,6 @@ LeftAlign.o: LeftAlign.h LeftAlign.cpp
- IndelAllele.o: IndelAllele.cpp IndelAllele.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c IndelAllele.cpp
--Variant.o: $(VCFLIB_ROOT)/src/Variant.h $(VCFLIB_ROOT)/src/Variant.cpp
-- $(CXX) $(CFLAGS) $(INCLUDE) -c $(VCFLIB_ROOT)/src/Variant.cpp
--../vcflib/tabixpp/tabix.o: ../vcflib/tabixpp/tabix.hpp ../vcflib/tabixpp/tabix.cpp
--../vcflib/tabixpp/bgzf.o: ../vcflib/tabixpp/bgzf.c ../vcflib/tabixpp/bgzf.h
-- cd ../vcflib/tabixpp && $(MAKE)
--../vcflib/smithwaterman/SmithWatermanGotoh.o: ../vcflib/smithwaterman/SmithWatermanGotoh.h ../vcflib/smithwaterman/SmithWatermanGotoh.cpp
-- cd ../vcflib/smithwaterman && $(MAKE)
- VERSION_FILE=./version_git.h
- RELEASED_VERSION_FILE=./version_release.txt
-@@ -257,5 +236,4 @@ autoversion:
- clean:
- rm -rf *.o *.cgh *~ freebayes alleles ../bin/freebayes ../bin/alleles ../vcflib/*.o ../vcflib/tabixpp/*.{o,a}
-- cd ../vcflib/smithwaterman && make clean
---- a/Makefile
-+++ b/Makefile
-@@ -1,4 +1,4 @@
--all: vcflib/Makefile log
- cd src && $(MAKE)
- log: src/version_git.h
---- a/src/AlleleParser.cpp
-+++ b/src/AlleleParser.cpp
-@@ -485,7 +485,7 @@ void AlleleParser::setupVCFInput(void) {
- // variant input for analysis and targeting
- if (!parameters.variantPriorsFile.empty()) {
- variantCallInputFile.open(parameters.variantPriorsFile);
-- currentVariant = new vcf::Variant(variantCallInputFile);
-+ currentVariant = new vcflib::Variant(variantCallInputFile);
- usingVariantInputAlleles = true;
- // get sample names from VCF input file
-@@ -1108,7 +1108,7 @@ void AlleleParser::updateHaplotypeBasisA
- pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW + 1)) {
- //cerr << "the vcf line " << haplotypeVariantInputFile.line << endl;
- // get the variants in the target region
-- vcf::Variant var(haplotypeVariantInputFile);
-+ vcflib::Variant var(haplotypeVariantInputFile);
- while (haplotypeVariantInputFile.getNextVariant(var)) {
- //cerr << "input variant: " << var << endl;
-@@ -1122,9 +1122,9 @@ void AlleleParser::updateHaplotypeBasisA
- }
- */
-- map<string, vector<vcf::VariantAllele> > variants = var.parsedAlternates();
-- for (map<string, vector<vcf::VariantAllele> >::iterator a = variants.begin(); a != variants.end(); ++a) {
-- for (vector<vcf::VariantAllele>::iterator v = a->second.begin(); v != a->second.end(); ++v) {
-+ map<string, vector<vcflib::VariantAllele> > variants = var.parsedAlternates();
-+ for (map<string, vector<vcflib::VariantAllele> >::iterator a = variants.begin(); a != variants.end(); ++a) {
-+ for (vector<vcflib::VariantAllele>::iterator v = a->second.begin(); v != a->second.end(); ++v) {
- //cerr << v->ref << "/" << v->alt << endl;
- if (v->ref != v->alt) {
- //cerr << "basis allele " << v->position << " " << v->ref << "/" << v->alt << endl;
-@@ -2107,7 +2107,7 @@ void AlleleParser::getInputVariantsInReg
- if (!usingVariantInputAlleles) return;
- // get the variants in the target region
-- vcf::Variant var(variantCallInputFile);
-+ vcflib::Variant var(variantCallInputFile);
- if (!seq.empty()) {
- variantCallInputFile.setRegion(seq, start, end);
- }
-@@ -2117,10 +2117,10 @@ void AlleleParser::getInputVariantsInReg
- long int pos = currentVariant->position - 1;
- // get alternate alleles
- bool includePreviousBaseForIndels = true;
-- map<string, vector<vcf::VariantAllele> > variantAlleles = currentVariant->parsedAlternates();
-+ map<string, vector<vcflib::VariantAllele> > variantAlleles = currentVariant->parsedAlternates();
- // TODO this would be a nice option: why does it not work?
-- //map<string, vector<vcf::VariantAllele> > variantAlleles = currentVariant->flatAlternates();
-- vector< vector<vcf::VariantAllele> > orderedVariantAlleles;
-+ //map<string, vector<vcflib::VariantAllele> > variantAlleles = currentVariant->flatAlternates();
-+ vector< vector<vcflib::VariantAllele> > orderedVariantAlleles;
- for (vector<string>::iterator a = currentVariant->alt.begin(); a != currentVariant->alt.end(); ++a) {
- orderedVariantAlleles.push_back(variantAlleles[*a]);
- }
-@@ -2128,14 +2128,14 @@ void AlleleParser::getInputVariantsInReg
- vector<Allele> genotypeAlleles;
- set<long int> alternatePositions;
-- for (vector< vector<vcf::VariantAllele> >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) {
-+ for (vector< vector<vcflib::VariantAllele> >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) {
-- vector<vcf::VariantAllele>& altAllele = *g;
-+ vector<vcflib::VariantAllele>& altAllele = *g;
- vector<Allele> alleles;
-- for (vector<vcf::VariantAllele>::iterator v = altAllele.begin(); v != altAllele.end(); ++v) {
-- vcf::VariantAllele& variant = *v;
-+ for (vector<vcflib::VariantAllele>::iterator v = altAllele.begin(); v != altAllele.end(); ++v) {
-+ vcflib::VariantAllele& variant = *v;
- long int allelePos = variant.position - 1;
- AlleleType type;
- string alleleSequence = variant.alt;
-@@ -2240,7 +2240,7 @@ void AlleleParser::updateInputVariants(l
- if (gotRegion) {
- // get the variants in the target region
-- vcf::Variant var(variantCallInputFile);
-+ vcflib::Variant var(variantCallInputFile);
- bool ok;
- while (ok = variantCallInputFile.getNextVariant(*currentVariant)) {
-@@ -2248,10 +2248,10 @@ void AlleleParser::updateInputVariants(l
- long int pos = currentVariant->position - 1;
- // get alternate alleles
- bool includePreviousBaseForIndels = true;
-- map<string, vector<vcf::VariantAllele> > variantAlleles = currentVariant->parsedAlternates();
-+ map<string, vector<vcflib::VariantAllele> > variantAlleles = currentVariant->parsedAlternates();
- // TODO this would be a nice option: why does it not work?
-- //map<string, vector<vcf::VariantAllele> > variantAlleles = currentVariant->flatAlternates();
-- vector< vector<vcf::VariantAllele> > orderedVariantAlleles;
-+ //map<string, vector<vcflib::VariantAllele> > variantAlleles = currentVariant->flatAlternates();
-+ vector< vector<vcflib::VariantAllele> > orderedVariantAlleles;
- for (vector<string>::iterator a = currentVariant->alt.begin(); a != currentVariant->alt.end(); ++a) {
- orderedVariantAlleles.push_back(variantAlleles[*a]);
- }
-@@ -2259,14 +2259,14 @@ void AlleleParser::updateInputVariants(l
- vector<Allele> genotypeAlleles;
- set<long int> alternatePositions;
-- for (vector< vector<vcf::VariantAllele> >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) {
-+ for (vector< vector<vcflib::VariantAllele> >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) {
-- vector<vcf::VariantAllele>& altAllele = *g;
-+ vector<vcflib::VariantAllele>& altAllele = *g;
- vector<Allele> alleles;
-- for (vector<vcf::VariantAllele>::iterator v = altAllele.begin(); v != altAllele.end(); ++v) {
-- vcf::VariantAllele& variant = *v;
-+ for (vector<vcflib::VariantAllele>::iterator v = altAllele.begin(); v != altAllele.end(); ++v) {
-+ vcflib::VariantAllele& variant = *v;
- long int allelePos = variant.position - 1;
- AlleleType type;
- string alleleSequence = variant.alt;
---- a/src/AlleleParser.h
-+++ b/src/AlleleParser.h
-@@ -163,9 +163,9 @@ public:
- BedReader bedReader;
- // VCF
-- vcf::VariantCallFile variantCallFile;
-- vcf::VariantCallFile variantCallInputFile; // input variant alleles, to target analysis
-- vcf::VariantCallFile haplotypeVariantInputFile; // input alleles which will be used to construct haplotype alleles
-+ vcflib::VariantCallFile variantCallFile;
-+ vcflib::VariantCallFile variantCallInputFile; // input variant alleles, to target analysis
-+ vcflib::VariantCallFile haplotypeVariantInputFile; // input alleles which will be used to construct haplotype alleles
- // input haplotype alleles
- //
-@@ -349,7 +349,7 @@ private:
- int currentRefID;
- BamAlignment currentAlignment;
-- vcf::Variant* currentVariant;
-+ vcflib::Variant* currentVariant;
- };
---- a/src/ResultData.cpp
-+++ b/src/ResultData.cpp
-@@ -5,8 +5,8 @@ using namespace std;
--vcf::Variant& Results::vcf(
-- vcf::Variant& var, // variant to update
-+vcflib::Variant& Results::vcf(
-+ vcflib::Variant& var, // variant to update
- BigFloat pHom,
- long double bestComboOddsRatio,
- //long double alleleSamplingProb,
-@@ -630,8 +630,8 @@ vcf::Variant& Results::vcf(
- }
--vcf::Variant& Results::gvcf(
-- vcf::Variant& var,
-+vcflib::Variant& Results::gvcf(
-+ vcflib::Variant& var,
- NonCalls& nonCalls,
- AlleleParser* parser) {
---- a/src/ResultData.h
-+++ b/src/ResultData.h
-@@ -41,8 +41,8 @@ public:
- }
- }
-- vcf::Variant& vcf(
-- vcf::Variant& var, // variant to update
-+ vcflib::Variant& vcf(
-+ vcflib::Variant& var, // variant to update
- BigFloat pHom,
- long double bestComboOddsRatio,
- //long double alleleSamplingProb,
-@@ -61,8 +61,8 @@ public:
- vector<string>& sequencingTechnologies,
- AlleleParser* parser);
-- vcf::Variant& gvcf(
-- vcf::Variant& var,
-+ vcflib::Variant& gvcf(
-+ vcflib::Variant& var,
- NonCalls& noncalls,
- AlleleParser* parser);
- };
---- a/src/freebayes.cpp
-+++ b/src/freebayes.cpp
-@@ -144,7 +144,7 @@ int main (int argc, char *argv[]) {
- || (parameters.gVCFchunk &&
- nonCalls.lastPos().second - nonCalls.firstPos().second
- > parameters.gVCFchunk))) {
-- vcf::Variant var(parser->variantCallFile);
-+ vcflib::Variant var(parser->variantCallFile);
- out << results.gvcf(var, nonCalls, parser) << endl;
- nonCalls.clear();
- }
-@@ -658,12 +658,12 @@ int main (int argc, char *argv[]) {
- // write the last gVCF record(s)
- if (parameters.gVCFout && !nonCalls.empty()) {
-- vcf::Variant var(parser->variantCallFile);
-+ vcflib::Variant var(parser->variantCallFile);
- out << results.gvcf(var, nonCalls, parser) << endl;
- nonCalls.clear();
- }
-- vcf::Variant var(parser->variantCallFile);
-+ vcflib::Variant var(parser->variantCallFile);
- out << results.vcf(
- var,
-@@ -696,7 +696,7 @@ int main (int argc, char *argv[]) {
- // write the last gVCF record
- if (parameters.gVCFout && !nonCalls.empty()) {
- Results results;
-- vcf::Variant var(parser->variantCallFile);
-+ vcflib::Variant var(parser->variantCallFile);
- out << results.gvcf(var, nonCalls, parser) << endl;
- nonCalls.clear();
- }
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 8045fdf..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/usr/bin/make -f
- dh $@
- echo "Skip autoinstall process - files are moved around by dh_install"
- mkdir -p $(CURDIR)/test/bash-tap/
- for bt in $(CURDIR)/debian/bash-tap/* ; do ln -s $${bt} $(CURDIR)/test/bash-tap/`basename $${bt}` ; done
- export PATH=/usr/lib/vcflib/binaries/:$(PATH) dh_auto_test && echo "Tests were running successfully"
- rm -rf $(CURDIR)/test/bash-tap
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/upstream/metadata b/debian/upstream/metadata
deleted file mode 100644
index eb3d836..0000000
--- a/debian/upstream/metadata
+++ /dev/null
@@ -1,8 +0,0 @@
- Author: Erik Garrison and Gabor Marth
- Title: Haplotype-based variant detection from short-read sequencing
- Journal: arXiv
- Year: 2012
- DOI: arXiv:1207.3907
- URL: http://arxiv.org/abs/1207.3907
- eprint: http://arxiv.org/pdf/1207.3907v2.pdf
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index d167ca1..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,3 +0,0 @@
-opts="uversionmangle=s/^9/0.9/" \
- https://github.com/ekg/freebayes/tags .*/archive/v?(\d\S*)\.tar\.gz
diff --git a/examples/pipeline.sh b/examples/pipeline.sh
deleted file mode 100644
index f6c8037..0000000
--- a/examples/pipeline.sh
+++ /dev/null
@@ -1,29 +0,0 @@
-mkdir -p $outdir
-time /share/home/erik/bin/bamtools merge -region $region \
- $(for file in $(cat $bamlist); do echo " -in "$file; done) \
- | time /share/home/erik/bin/ogap -z -R 25 -C 20 -Q 20 -S 0 -f $reference \
- | time /share/home/erik/bin/bamleftalign -f $reference \
- | time /share/home/erik/bin/samtools calmd -EAru - $reference 2>/dev/null \
- | time /share/home/erik/bin/freebayes \
- --min-alternate-count 2 \
- --min-alternate-qsum 40 \
- --pvar 0.0001 \
- --use-mapping-quality \
- --posterior-integration-limits 1,3 \
- --genotype-variant-threshold 4 \
- --site-selection-max-iterations 3 \
- --genotyping-max-iterations 25 \
- --max-complex-gap 3 \
- --cnv-map $cnvmap \
- --stdin \
- --region $region \
- -f $reference \
- | gzip >$outdir/$region.vcf.gz
diff --git a/paper/1000G_performance_comparison.png b/paper/1000G_performance_comparison.png
deleted file mode 100644
index 7661678..0000000
Binary files a/paper/1000G_performance_comparison.png and /dev/null differ
diff --git a/paper/100samples10x_0_25_both.png b/paper/100samples10x_0_25_both.png
deleted file mode 100644
index 11eaa23..0000000
Binary files a/paper/100samples10x_0_25_both.png and /dev/null differ
diff --git a/paper/Makefile b/paper/Makefile
deleted file mode 100644
index d46e331..0000000
--- a/paper/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-all: paper
- pdflatex main
- bibtex main
- pdflatex main
- rm -f main.{aux,bbl,blg,log,pdf}
diff --git a/paper/genome_research.bst b/paper/genome_research.bst
deleted file mode 100644
index 0c5e2dc..0000000
--- a/paper/genome_research.bst
+++ /dev/null
@@ -1,1620 +0,0 @@
-%% This is file `genome_research.bst',
-%% generated with the docstrip utility.
-%% The original source files were:
-%% merlin.mbs (with options: `ay,nat,vonx,nm-rvx,nmlm,x10,x0,m10,m0,keyxyr,dt-beg,yr-blk,note-yr,vol-bf,vnum-x,volp-sp,num-xser,jnm-x,edby-par,edbyx,pp,ed,ord,jabr,nfss,')
-%% ----------------------------------------
-%% *** Genome Research ***
-%% Copyright 1994-2007 Patrick W Daly
- % ===============================================================
- % This bibliographic style (bst) file has been generated from one or
- % more master bibliographic style (mbs) files, listed above.
- %
- % This generated file can be redistributed and/or modified under the terms
- % of the LaTeX Project Public License Distributed from CTAN
- % archives in directory macros/latex/base/lppl.txt; either
- % version 1 of the License, or any later version.
- % ===============================================================
- % Name and version information of the main mbs file:
- % \ProvidesFile{merlin.mbs}[2007/04/24 4.20 (PWD, AO, DPC)]
- % For use with BibTeX version 0.99a or later
- %-------------------------------------------------------------------
- % This bibliography style file is intended for texts in ENGLISH
- % This is an author-year citation style bibliography. As such, it is
- % non-standard LaTeX, and requires a special package file to function properly.
- % Such a package is natbib.sty by Patrick W. Daly
- % The form of the \bibitem entries is
- % \bibitem[Jones et al.(1990)]{key}...
- % \bibitem[Jones et al.(1990)Jones, Baker, and Smith]{key}...
- % The essential feature is that the label (the part in brackets) consists
- % of the author names, as they should appear in the citation, with the year
- % in parentheses following. There must be no space before the opening
- % parenthesis!
- % With natbib v5.3, a full list of authors may also follow the year.
- % In natbib.sty, it is possible to define the type of enclosures that is
- % really wanted (brackets or parentheses), but in either case, there must
- % be parentheses in the label.
- % The \cite command functions as follows:
- % \citet{key} ==>> Jones et al. (1990)
- % \citet*{key} ==>> Jones, Baker, and Smith (1990)
- % \citep{key} ==>> (Jones et al., 1990)
- % \citep*{key} ==>> (Jones, Baker, and Smith, 1990)
- % \citep[chap. 2]{key} ==>> (Jones et al., 1990, chap. 2)
- % \citep[e.g.][]{key} ==>> (e.g. Jones et al., 1990)
- % \citep[e.g.][p. 32]{key} ==>> (e.g. Jones et al., p. 32)
- % \citeauthor{key} ==>> Jones et al.
- % \citeauthor*{key} ==>> Jones, Baker, and Smith
- % \citeyear{key} ==>> 1990
- %---------------------------------------------------------------------
- { address
- author
- booktitle
- chapter
- edition
- editor
- eid
- howpublished
- institution
- journal
- key
- month
- note
- number
- organization
- pages
- publisher
- school
- series
- title
- type
- volume
- year
- }
- {}
- { label extra.label sort.label short.list }
-INTEGERS { output.state before.all mid.sentence after.sentence after.block }
-FUNCTION {init.state.consts}
-{ #0 'before.all :=
- #1 'mid.sentence :=
- #2 'after.sentence :=
- #3 'after.block :=
-STRINGS { s t}
-FUNCTION {output.nonnull}
-{ 's :=
- output.state mid.sentence =
- { ", " * write$ }
- { output.state after.block =
- { add.period$ write$
- newline$
- "\newblock " write$
- }
- { output.state before.all =
- 'write$
- { add.period$ " " * write$ }
- if$
- }
- if$
- mid.sentence 'output.state :=
- }
- if$
- s
-FUNCTION {output}
-{ duplicate$ empty$
- 'pop$
- 'output.nonnull
- if$
-FUNCTION {output.check}
-{ 't :=
- duplicate$ empty$
- { pop$ "empty " t * " in " * cite$ * warning$ }
- 'output.nonnull
- if$
-FUNCTION {fin.entry}
-{ add.period$
- write$
- newline$
-FUNCTION {new.block}
-{ output.state before.all =
- 'skip$
- { after.block 'output.state := }
- if$
-FUNCTION {new.sentence}
-{ output.state after.block =
- 'skip$
- { output.state before.all =
- 'skip$
- { after.sentence 'output.state := }
- if$
- }
- if$
-FUNCTION {add.blank}
-{ " " * before.all 'output.state :=
-FUNCTION {date.block}
- new.block
-{ { #0 }
- { #1 }
- if$
-{ 'skip$
- { pop$ #0 }
- if$
-{ { pop$ #1 }
- 'skip$
- if$
-FUNCTION {remove.dots}
-{ 'z :=
- ""
- { z empty$ not }
- { z #1 #1 substring$
- z #2 global.max$ substring$ 'z :=
- duplicate$ "." = 'pop$
- { * }
- if$
- }
- while$
-FUNCTION {new.block.checkb}
-{ empty$
- swap$ empty$
- and
- 'skip$
- 'new.block
- if$
-FUNCTION {field.or.null}
-{ duplicate$ empty$
- { pop$ "" }
- 'skip$
- if$
-FUNCTION {emphasize}
-{ duplicate$ empty$
- { pop$ "" }
- { "\emph{" swap$ * "}" * }
- if$
-FUNCTION {bolden}
-{ duplicate$ empty$
- { pop$ "" }
- { "\textbf{" swap$ * "}" * }
- if$
-FUNCTION {tie.or.space.prefix}
-{ duplicate$ text.length$ #3 <
- { "~" }
- { " " }
- if$
- swap$
-FUNCTION {capitalize}
-{ "u" change.case$ "t" change.case$ }
-FUNCTION {space.word}
-{ " " swap$ * " " * }
- % Here are the language-specific definitions for explicit words.
- % Each function has a name bbl.xxx where xxx is the English word.
- % The language selected here is ENGLISH
-FUNCTION {bbl.and}
-{ "and"}
-FUNCTION {bbl.etal}
-{ "et~al." }
-FUNCTION {bbl.editors}
-{ "eds." }
-FUNCTION {bbl.editor}
-{ "ed." }
-FUNCTION {bbl.edby}
-{ "edited by" }
-FUNCTION {bbl.edition}
-{ "edition" }
-FUNCTION {bbl.volume}
-{ "volume" }
-FUNCTION {bbl.of}
-{ "of" }
-FUNCTION {bbl.number}
-{ "number" }
-FUNCTION {bbl.nr}
-{ "no." }
-FUNCTION {bbl.in}
-{ "in" }
-FUNCTION {bbl.pages}
-{ "pp." }
-FUNCTION {bbl.page}
-{ "p." }
-FUNCTION {bbl.chapter}
-{ "chapter" }
-FUNCTION {bbl.techrep}
-{ "Technical Report" }
-FUNCTION {bbl.mthesis}
-{ "Master's thesis" }
-FUNCTION {bbl.phdthesis}
-{ "Ph.D. thesis" }
-FUNCTION {bbl.first}
-{ "1st" }
-FUNCTION {bbl.second}
-{ "2nd" }
-FUNCTION {bbl.third}
-{ "3rd" }
-FUNCTION {bbl.fourth}
-{ "4th" }
-FUNCTION {bbl.fifth}
-{ "5th" }
-FUNCTION {bbl.st}
-{ "st" }
-FUNCTION {bbl.nd}
-{ "nd" }
-FUNCTION {bbl.rd}
-{ "rd" }
-FUNCTION {bbl.th}
-{ "th" }
-MACRO {jan} {"January"}
-MACRO {feb} {"February"}
-MACRO {mar} {"March"}
-MACRO {apr} {"April"}
-MACRO {may} {"May"}
-MACRO {jun} {"June"}
-MACRO {jul} {"July"}
-MACRO {aug} {"August"}
-MACRO {sep} {"September"}
-MACRO {oct} {"October"}
-MACRO {nov} {"November"}
-MACRO {dec} {"December"}
-FUNCTION {eng.ord}
-{ duplicate$ "1" swap$ *
- #-2 #1 substring$ "1" =
- { bbl.th * }
- { duplicate$ #-1 #1 substring$
- duplicate$ "1" =
- { pop$ bbl.st * }
- { duplicate$ "2" =
- { pop$ bbl.nd * }
- { "3" =
- { bbl.rd * }
- { bbl.th * }
- if$
- }
- if$
- }
- if$
- }
- if$
-MACRO {acmcs} {"ACM Comput. Surv."}
-MACRO {acta} {"Acta Inf."}
-MACRO {cacm} {"Commun. ACM"}
-MACRO {ibmjrd} {"IBM J. Res. Dev."}
-MACRO {ibmsj} {"IBM Syst.~J."}
-MACRO {ieeese} {"IEEE Trans. Software Eng."}
-MACRO {ieeetc} {"IEEE Trans. Comput."}
-MACRO {ieeetcad}
- {"IEEE Trans. Comput. Aid. Des."}
-MACRO {ipl} {"Inf. Process. Lett."}
-MACRO {jacm} {"J.~ACM"}
-MACRO {jcss} {"J.~Comput. Syst. Sci."}
-MACRO {scp} {"Sci. Comput. Program."}
-MACRO {sicomp} {"SIAM J. Comput."}
-MACRO {tocs} {"ACM Trans. Comput. Syst."}
-MACRO {tods} {"ACM Trans. Database Syst."}
-MACRO {tog} {"ACM Trans. Graphic."}
-MACRO {toms} {"ACM Trans. Math. Software"}
-MACRO {toois} {"ACM Trans. Office Inf. Syst."}
-MACRO {toplas} {"ACM Trans. Progr. Lang. Syst."}
-MACRO {tcs} {"Theor. Comput. Sci."}
-FUNCTION {bibinfo.check}
-{ swap$
- duplicate$ missing$
- {
- pop$ pop$
- ""
- }
- { duplicate$ empty$
- {
- swap$ pop$
- }
- { swap$
- pop$
- }
- if$
- }
- if$
-FUNCTION {bibinfo.warn}
-{ swap$
- duplicate$ missing$
- {
- swap$ "missing " swap$ * " in " * cite$ * warning$ pop$
- ""
- }
- { duplicate$ empty$
- {
- swap$ "empty " swap$ * " in " * cite$ * warning$
- }
- { swap$
- pop$
- }
- if$
- }
- if$
-INTEGERS { nameptr namesleft numnames }
-STRINGS { bibinfo}
-FUNCTION {format.names}
-{ 'bibinfo :=
- duplicate$ empty$ 'skip$ {
- 's :=
- "" 't :=
- #1 'nameptr :=
- s num.names$ 'numnames :=
- numnames 'namesleft :=
- { namesleft #0 > }
- { s nameptr
- "{vv~}{ll}{ jj}{ f{}}"
- format.name$
- remove.dots
- bibinfo bibinfo.check
- 't :=
- nameptr #1 >
- {
- nameptr #0
- #10 +
- #1 + =
- numnames #0
- #10 +
- > and
- { "others" 't :=
- #1 'namesleft := }
- 'skip$
- if$
- namesleft #1 >
- { ", " * t * }
- {
- s nameptr "{ll}" format.name$ duplicate$ "others" =
- { 't := }
- { pop$ }
- if$
- numnames #2 >
- { "," * }
- 'skip$
- if$
- t "others" =
- {
- " " * bbl.etal *
- }
- {
- bbl.and
- space.word * t *
- }
- if$
- }
- if$
- }
- 't
- if$
- nameptr #1 + 'nameptr :=
- namesleft #1 - 'namesleft :=
- }
- while$
- } if$
-FUNCTION {format.names.ed}
- 'bibinfo :=
- duplicate$ empty$ 'skip$ {
- 's :=
- "" 't :=
- #1 'nameptr :=
- s num.names$ 'numnames :=
- numnames 'namesleft :=
- { namesleft #0 > }
- { s nameptr
- "{f{}~}{vv~}{ll}{ jj}"
- format.name$
- remove.dots
- bibinfo bibinfo.check
- 't :=
- nameptr #1 >
- {
- namesleft #1 >
- { ", " * t * }
- {
- s nameptr "{ll}" format.name$ duplicate$ "others" =
- { 't := }
- { pop$ }
- if$
- numnames #2 >
- { "," * }
- 'skip$
- if$
- t "others" =
- {
- " " * bbl.etal *
- }
- {
- bbl.and
- space.word * t *
- }
- if$
- }
- if$
- }
- 't
- if$
- nameptr #1 + 'nameptr :=
- namesleft #1 - 'namesleft :=
- }
- while$
- } if$
-FUNCTION {format.key}
-{ empty$
- { key field.or.null }
- { "" }
- if$
-FUNCTION {format.authors}
-{ author "author" format.names * "." %added to give period at end of author list
-FUNCTION {get.bbl.editor}
-{ editor num.names$ #1 > 'bbl.editors 'bbl.editor if$ }
-FUNCTION {format.editors}
-{ editor "editor" format.names duplicate$ empty$ 'skip$
- {
- "," *
- " " *
- get.bbl.editor
- *
- }
- if$
-FUNCTION {format.note}
- note empty$
- { "" }
- { note #1 #1 substring$
- duplicate$ "{" =
- 'skip$
- { output.state mid.sentence =
- { "l" }
- { "u" }
- if$
- change.case$
- }
- if$
- note #2 global.max$ substring$ * "note" bibinfo.check
- }
- if$
-FUNCTION {format.title}
-{ title
- duplicate$ empty$ 'skip$
- { "t" change.case$ }
- if$
- "title" bibinfo.check
-FUNCTION {format.full.names}
-{'s :=
- "" 't :=
- #1 'nameptr :=
- s num.names$ 'numnames :=
- numnames 'namesleft :=
- { namesleft #0 > }
- { s nameptr
- "{vv~}{ll}" format.name$
- 't :=
- nameptr #1 >
- {
- nameptr #0
- #10 +
- #1 + =
- numnames #0
- #10 +
- > and
- { "others" 't :=
- #1 'namesleft := }
- 'skip$
- if$
- namesleft #1 >
- { ", " * t * }
- {
- s nameptr "{ll}" format.name$ duplicate$ "others" =
- { 't := }
- { pop$ }
- if$
- t "others" =
- {
- " " * bbl.etal *
- }
- {
- numnames #2 >
- { "," * }
- 'skip$
- if$
- bbl.and
- space.word * t *
- }
- if$
- }
- if$
- }
- 't
- if$
- nameptr #1 + 'nameptr :=
- namesleft #1 - 'namesleft :=
- }
- while$
-FUNCTION {author.editor.key.full}
-{ author empty$
- { editor empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { editor format.full.names }
- if$
- }
- { author format.full.names }
- if$
-FUNCTION {author.key.full}
-{ author empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { author format.full.names }
- if$
-FUNCTION {editor.key.full}
-{ editor empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { editor format.full.names }
- if$
-FUNCTION {make.full.names}
-{ type$ "book" =
- type$ "inbook" =
- or
- 'author.editor.key.full
- { type$ "proceedings" =
- 'editor.key.full
- 'author.key.full
- if$
- }
- if$
-FUNCTION {output.bibitem}
-{ newline$
- "\bibitem[{" write$
- label write$
- ")" make.full.names duplicate$ short.list =
- { pop$ }
- { * }
- if$
- "}]{" * write$
- cite$ write$
- "}" write$
- newline$
- ""
- before.all 'output.state :=
-FUNCTION {n.dashify}
- 't :=
- ""
- { t empty$ not }
- { t #1 #1 substring$ "-" =
- { t #1 #2 substring$ "--" = not
- { "--" *
- t #2 global.max$ substring$ 't :=
- }
- { { t #1 #1 substring$ "-" = }
- { "-" *
- t #2 global.max$ substring$ 't :=
- }
- while$
- }
- if$
- }
- { t #1 #1 substring$ *
- t #2 global.max$ substring$ 't :=
- }
- if$
- }
- while$
-FUNCTION {word.in}
-{ bbl.in capitalize
- " " * }
-FUNCTION {format.date}
-{ year "year" bibinfo.check duplicate$ empty$
- {
- "empty year in " cite$ * "; set to ????" * warning$
- pop$ "????"
- }
- 'skip$
- if$
- extra.label *
- before.all 'output.state :=
- " " swap$ *
-FUNCTION {format.btitle}
-{ title "title" bibinfo.check
- duplicate$ empty$ 'skip$
- {
- emphasize
- }
- if$
-FUNCTION {either.or.check}
-{ empty$
- 'pop$
- { "can't use both " swap$ * " fields in " * cite$ * warning$ }
- if$
-FUNCTION {format.bvolume}
-{ volume empty$
- { "" }
- { bbl.volume volume tie.or.space.prefix
- "volume" bibinfo.check * *
- series "series" bibinfo.check
- duplicate$ empty$ 'pop$
- { swap$ bbl.of space.word * swap$
- emphasize * }
- if$
- "volume and number" number either.or.check
- }
- if$
-FUNCTION {format.number.series}
-{ volume empty$
- { number empty$
- { series field.or.null }
- { series empty$
- { number "number" bibinfo.check }
- { output.state mid.sentence =
- { bbl.number }
- { bbl.number capitalize }
- if$
- number tie.or.space.prefix "number" bibinfo.check * *
- bbl.in space.word *
- series "series" bibinfo.check *
- }
- if$
- }
- if$
- }
- { "" }
- if$
-FUNCTION {is.num}
-{ chr.to.int$
- duplicate$ "0" chr.to.int$ < not
- swap$ "9" chr.to.int$ > not and
-FUNCTION {extract.num}
-{ duplicate$ 't :=
- "" 's :=
- { t empty$ not }
- { t #1 #1 substring$
- t #2 global.max$ substring$ 't :=
- duplicate$ is.num
- { s swap$ * 's := }
- { pop$ "" 't := }
- if$
- }
- while$
- s empty$
- 'skip$
- { pop$ s }
- if$
-FUNCTION {convert.edition}
-{ extract.num "l" change.case$ 's :=
- s "first" = s "1" = or
- { bbl.first 't := }
- { s "second" = s "2" = or
- { bbl.second 't := }
- { s "third" = s "3" = or
- { bbl.third 't := }
- { s "fourth" = s "4" = or
- { bbl.fourth 't := }
- { s "fifth" = s "5" = or
- { bbl.fifth 't := }
- { s #1 #1 substring$ is.num
- { s eng.ord 't := }
- { edition 't := }
- if$
- }
- if$
- }
- if$
- }
- if$
- }
- if$
- }
- if$
- t
-FUNCTION {format.edition}
-{ edition duplicate$ empty$ 'skip$
- {
- convert.edition
- output.state mid.sentence =
- { "l" }
- { "t" }
- if$ change.case$
- "edition" bibinfo.check
- " " * bbl.edition *
- }
- if$
-INTEGERS { multiresult }
-FUNCTION {multi.page.check}
-{ 't :=
- #0 'multiresult :=
- { multiresult not
- t empty$ not
- and
- }
- { t #1 #1 substring$
- duplicate$ "-" =
- swap$ duplicate$ "," =
- swap$ "+" =
- or or
- { #1 'multiresult := }
- { t #2 global.max$ substring$ 't := }
- if$
- }
- while$
- multiresult
-FUNCTION {format.pages}
-{ pages duplicate$ empty$ 'skip$
- { duplicate$ multi.page.check
- {
- bbl.pages swap$
- n.dashify
- }
- {
- bbl.page swap$
- }
- if$
- tie.or.space.prefix
- "pages" bibinfo.check
- * *
- }
- if$
-FUNCTION {format.journal.pages}
-{ pages duplicate$ empty$ 'pop$
- { swap$ duplicate$ empty$
- { pop$ pop$ format.pages }
- {
- ": " *
- swap$
- n.dashify
- "pages" bibinfo.check
- *
- }
- if$
- }
- if$
-FUNCTION {format.journal.eid}
-{ eid "eid" bibinfo.check
- duplicate$ empty$ 'pop$
- { swap$ duplicate$ empty$ 'skip$
- {
- ": " *
- }
- if$
- swap$ *
- }
- if$
-FUNCTION {format.vol.num.pages}
-{ volume field.or.null
- duplicate$ empty$ 'skip$
- {
- "volume" bibinfo.check
- }
- if$
- bolden
- eid empty$
- { format.journal.pages }
- { format.journal.eid }
- if$
-FUNCTION {format.chapter.pages}
-{ chapter empty$
- 'format.pages
- { type empty$
- { bbl.chapter }
- { type "l" change.case$
- "type" bibinfo.check
- }
- if$
- chapter tie.or.space.prefix
- "chapter" bibinfo.check
- * *
- pages empty$
- 'skip$
- { ", " * format.pages * }
- if$
- }
- if$
-FUNCTION {format.booktitle}
- booktitle "booktitle" bibinfo.check
- emphasize
-FUNCTION {format.in.ed.booktitle}
-{ format.booktitle duplicate$ empty$ 'skip$
- {
- editor "editor" format.names.ed duplicate$ empty$ 'pop$
- {
- get.bbl.editor
- " " * swap$ *
- "(" swap$ * ")" *
- swap$
- " " * swap$
- * }
- if$
- word.in swap$ *
- }
- if$
-FUNCTION {format.thesis.type}
-{ type duplicate$ empty$
- 'pop$
- { swap$ pop$
- "t" change.case$ "type" bibinfo.check
- }
- if$
-FUNCTION {format.tr.number}
-{ number "number" bibinfo.check
- type duplicate$ empty$
- { pop$ bbl.techrep }
- 'skip$
- if$
- "type" bibinfo.check
- swap$ duplicate$ empty$
- { pop$ "t" change.case$ }
- { tie.or.space.prefix * * }
- if$
-FUNCTION {format.article.crossref}
- word.in
- " \cite{" * crossref * "}" *
-FUNCTION {format.book.crossref}
-{ volume duplicate$ empty$
- { "empty volume in " cite$ * "'s crossref of " * crossref * warning$
- pop$ word.in
- }
- { bbl.volume
- capitalize
- swap$ tie.or.space.prefix "volume" bibinfo.check * * bbl.of space.word *
- }
- if$
- " \cite{" * crossref * "}" *
-FUNCTION {format.incoll.inproc.crossref}
- word.in
- " \cite{" * crossref * "}" *
-FUNCTION {format.org.or.pub}
-{ 't :=
- ""
- address empty$ t empty$ and
- 'skip$
- {
- t empty$
- { address "address" bibinfo.check *
- }
- { t *
- address empty$
- 'skip$
- { ", " * address "address" bibinfo.check * }
- if$
- }
- if$
- }
- if$
-FUNCTION {format.publisher.address}
-{ publisher "publisher" bibinfo.warn format.org.or.pub
-FUNCTION {format.organization.address}
-{ organization "organization" bibinfo.check format.org.or.pub
-FUNCTION {article}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- crossref missing$
- {
- journal
- "journal" bibinfo.check
- emphasize
- "journal" output.check
- add.blank
- format.vol.num.pages output
- }
- { format.article.crossref output.nonnull
- format.pages output
- }
- if$
- new.block
- format.note output
- fin.entry
-FUNCTION {book}
-{ output.bibitem
- author empty$
- { format.editors "author and editor" output.check
- editor format.key output
- }
- { format.authors output.nonnull
- crossref missing$
- { "author and editor" editor either.or.check }
- 'skip$
- if$
- }
- if$
- format.date "year" output.check
- date.block
- format.btitle "title" output.check
- crossref missing$
- { format.bvolume output
- new.block
- format.number.series output
- new.sentence
- format.publisher.address output
- }
- {
- new.block
- format.book.crossref output.nonnull
- }
- if$
- format.edition output
- new.block
- format.note output
- fin.entry
-FUNCTION {booklet}
-{ output.bibitem
- format.authors output
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- howpublished "howpublished" bibinfo.check output
- address "address" bibinfo.check output
- new.block
- format.note output
- fin.entry
-FUNCTION {inbook}
-{ output.bibitem
- author empty$
- { format.editors "author and editor" output.check
- editor format.key output
- }
- { format.authors output.nonnull
- crossref missing$
- { "author and editor" editor either.or.check }
- 'skip$
- if$
- }
- if$
- format.date "year" output.check
- date.block
- format.btitle "title" output.check
- crossref missing$
- {
- format.bvolume output
- format.chapter.pages "chapter and pages" output.check
- new.block
- format.number.series output
- new.sentence
- format.publisher.address output
- }
- {
- format.chapter.pages "chapter and pages" output.check
- new.block
- format.book.crossref output.nonnull
- }
- if$
- format.edition output
- new.block
- format.note output
- fin.entry
-FUNCTION {incollection}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- crossref missing$
- { format.in.ed.booktitle "booktitle" output.check
- format.bvolume output
- format.number.series output
- format.chapter.pages output
- new.sentence
- format.publisher.address output
- format.edition output
- }
- { format.incoll.inproc.crossref output.nonnull
- format.chapter.pages output
- }
- if$
- new.block
- format.note output
- fin.entry
-FUNCTION {inproceedings}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- crossref missing$
- { format.in.ed.booktitle "booktitle" output.check
- format.bvolume output
- format.number.series output
- format.pages output
- new.sentence
- publisher empty$
- { format.organization.address output }
- { organization "organization" bibinfo.check output
- format.publisher.address output
- }
- if$
- }
- { format.incoll.inproc.crossref output.nonnull
- format.pages output
- }
- if$
- new.block
- format.note output
- fin.entry
-FUNCTION {conference} { inproceedings }
-FUNCTION {manual}
-{ output.bibitem
- format.authors output
- author format.key output
- format.date "year" output.check
- date.block
- format.btitle "title" output.check
- organization address new.block.checkb
- organization "organization" bibinfo.check output
- address "address" bibinfo.check output
- format.edition output
- new.block
- format.note output
- fin.entry
-FUNCTION {mastersthesis}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.btitle
- "title" output.check
- new.block
- bbl.mthesis format.thesis.type output.nonnull
- school "school" bibinfo.warn output
- address "address" bibinfo.check output
- new.block
- format.note output
- fin.entry
-FUNCTION {misc}
-{ output.bibitem
- format.authors output
- author format.key output
- format.date "year" output.check
- date.block
- format.title output
- new.block
- howpublished "howpublished" bibinfo.check output
- new.block
- format.note output
- fin.entry
-FUNCTION {phdthesis}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.btitle
- "title" output.check
- new.block
- bbl.phdthesis format.thesis.type output.nonnull
- school "school" bibinfo.warn output
- address "address" bibinfo.check output
- new.block
- format.note output
- fin.entry
-FUNCTION {proceedings}
-{ output.bibitem
- format.editors output
- editor format.key output
- format.date "year" output.check
- date.block
- format.btitle "title" output.check
- format.bvolume output
- format.number.series output
- new.sentence
- publisher empty$
- { format.organization.address output }
- { organization "organization" bibinfo.check output
- format.publisher.address output
- }
- if$
- new.block
- format.note output
- fin.entry
-FUNCTION {techreport}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.title
- "title" output.check
- new.block
- format.tr.number output.nonnull
- institution "institution" bibinfo.warn output
- address "address" bibinfo.check output
- new.block
- format.note output
- fin.entry
-FUNCTION {unpublished}
-{ output.bibitem
- format.authors "author" output.check
- author format.key output
- format.date "year" output.check
- date.block
- format.title "title" output.check
- new.block
- format.note "note" output.check
- fin.entry
-FUNCTION {default.type} { misc }
-FUNCTION {sortify}
-{ purify$
- "l" change.case$
-INTEGERS { len }
-FUNCTION {chop.word}
-{ 's :=
- 'len :=
- s #1 len substring$ =
- { s len #1 + global.max$ substring$ }
- 's
- if$
-FUNCTION {format.lab.names}
-{ 's :=
- "" 't :=
- s #1 "{vv~}{ll}" format.name$
- s num.names$ duplicate$
- #2 >
- { pop$
- " " * bbl.etal *
- }
- { #2 <
- 'skip$
- { s #2 "{ff }{vv }{ll}{ jj}" format.name$ "others" =
- {
- " " * bbl.etal *
- }
- { bbl.and space.word * s #2 "{vv~}{ll}" format.name$
- * }
- if$
- }
- if$
- }
- if$
-FUNCTION {author.key.label}
-{ author empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { author format.lab.names }
- if$
-FUNCTION {author.editor.key.label}
-{ author empty$
- { editor empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { editor format.lab.names }
- if$
- }
- { author format.lab.names }
- if$
-FUNCTION {editor.key.label}
-{ editor empty$
- { key empty$
- { cite$ #1 #3 substring$ }
- 'key
- if$
- }
- { editor format.lab.names }
- if$
-FUNCTION {calc.short.authors}
-{ type$ "book" =
- type$ "inbook" =
- or
- 'author.editor.key.label
- { type$ "proceedings" =
- 'editor.key.label
- 'author.key.label
- if$
- }
- if$
- 'short.list :=
-FUNCTION {calc.label}
-{ calc.short.authors
- short.list
- "("
- *
- year duplicate$ empty$
- short.list key field.or.null = or
- { pop$ "" }
- 'skip$
- if$
- *
- 'label :=
-FUNCTION {sort.format.names}
-{ 's :=
- #1 'nameptr :=
- ""
- s num.names$ 'numnames :=
- numnames 'namesleft :=
- { namesleft #0 > }
- { s nameptr
- "{ll{ }}{ f{ }}{ jj{ }}"
- format.name$ 't :=
- nameptr #1 >
- {
- nameptr #0
- #10 +
- #1 + =
- numnames #0
- #10 +
- > and
- { "others" 't :=
- #1 'namesleft := }
- 'skip$
- if$
- " " *
- namesleft #1 = t "others" = and
- { "zzzzz" * }
- { t sortify * }
- if$
- }
- { t sortify * }
- if$
- nameptr #1 + 'nameptr :=
- namesleft #1 - 'namesleft :=
- }
- while$
-FUNCTION {sort.format.title}
-{ 't :=
- "A " #2
- "An " #3
- "The " #4 t chop.word
- chop.word
- chop.word
- sortify
- #1 global.max$ substring$
-FUNCTION {author.sort}
-{ author empty$
- { key empty$
- { "to sort, need author or key in " cite$ * warning$
- ""
- }
- { key sortify }
- if$
- }
- { author sort.format.names }
- if$
-FUNCTION {author.editor.sort}
-{ author empty$
- { editor empty$
- { key empty$
- { "to sort, need author, editor, or key in " cite$ * warning$
- ""
- }
- { key sortify }
- if$
- }
- { editor sort.format.names }
- if$
- }
- { author sort.format.names }
- if$
-FUNCTION {editor.sort}
-{ editor empty$
- { key empty$
- { "to sort, need editor or key in " cite$ * warning$
- ""
- }
- { key sortify }
- if$
- }
- { editor sort.format.names }
- if$
-FUNCTION {presort}
-{ calc.label
- label sortify
- " "
- *
- type$ "book" =
- type$ "inbook" =
- or
- 'author.editor.sort
- { type$ "proceedings" =
- 'editor.sort
- 'author.sort
- if$
- }
- if$
- #1 entry.max$ substring$
- 'sort.label :=
- sort.label
- *
- " "
- *
- title field.or.null
- sort.format.title
- *
- #1 entry.max$ substring$
- 'sort.key$ :=
-ITERATE {presort}
-STRINGS { last.label next.extra }
-INTEGERS { last.extra.num number.label }
-FUNCTION {initialize.extra.label.stuff}
-{ #0 int.to.chr$ 'last.label :=
- "" 'next.extra :=
- #0 'last.extra.num :=
- #0 'number.label :=
-FUNCTION {forward.pass}
-{ last.label label =
- { last.extra.num #1 + 'last.extra.num :=
- last.extra.num int.to.chr$ 'extra.label :=
- }
- { "a" chr.to.int$ 'last.extra.num :=
- "" 'extra.label :=
- label 'last.label :=
- }
- if$
- number.label #1 + 'number.label :=
-FUNCTION {reverse.pass}
-{ next.extra "b" =
- { "a" 'extra.label := }
- 'skip$
- if$
- extra.label 'next.extra :=
- extra.label
- duplicate$ empty$
- 'skip$
- { "{\natexlab{" swap$ * "}}" * }
- if$
- 'extra.label :=
- label extra.label * 'label :=
-EXECUTE {initialize.extra.label.stuff}
-ITERATE {forward.pass}
-REVERSE {reverse.pass}
-FUNCTION {bib.sort.order}
-{ sort.label
- " "
- *
- year field.or.null sortify
- *
- " "
- *
- title field.or.null
- sort.format.title
- *
- #1 entry.max$ substring$
- 'sort.key$ :=
-ITERATE {bib.sort.order}
-FUNCTION {begin.bib}
-{ preamble$ empty$
- 'skip$
- { preamble$ write$ newline$ }
- if$
- "\begin{thebibliography}{" number.label int.to.str$ * "}" *
- write$ newline$
- "\providecommand{\natexlab}[1]{#1}"
- write$ newline$
-EXECUTE {begin.bib}
-EXECUTE {init.state.consts}
-ITERATE {call.type$}
-FUNCTION {end.bib}
-{ newline$
- "\end{thebibliography}" write$ newline$
-EXECUTE {end.bib}
-%% End of customized bst file
-%% End of file `genome_research.bst'.
\ No newline at end of file
diff --git a/paper/haplotype_calling.png b/paper/haplotype_calling.png
deleted file mode 100644
index bf0bd3c..0000000
Binary files a/paper/haplotype_calling.png and /dev/null differ
diff --git a/paper/indel_error.png b/paper/indel_error.png
deleted file mode 100644
index 9d110dc..0000000
Binary files a/paper/indel_error.png and /dev/null differ
diff --git a/paper/low_frequency_sensitivity.png b/paper/low_frequency_sensitivity.png
deleted file mode 100644
index 485396d..0000000
Binary files a/paper/low_frequency_sensitivity.png and /dev/null differ
diff --git a/paper/main.aux b/paper/main.aux
deleted file mode 100644
index c89a8ea..0000000
--- a/paper/main.aux
+++ /dev/null
@@ -1,121 +0,0 @@
-\@writefile{toc}{\contentsline {section}{\numberline {1}Motivation}{1}}
-\@writefile{toc}{\contentsline {section}{\numberline {2}Results}{2}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}Small variant detection in simulated data}{2}}
-\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces Receiver-operator characteristics (ROCs) for FreeBayes, GATK HaplotypeCaller and UnifiedGenotyper, and samtools on 100 samples at 10x simulated sequencing depth. FreeBayes achieves the highest area under the curve (AUC) 1\hbox {}, with the HaplotypeCaller and samtools each performing next-best for indels and SNPs, respectively.}}{3}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Using simulation to assess the direct detection of haplotypes}{3}}
-\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Performance of FreeBayes, GATK HaplotypeCaller and UnifiedGenotyper, and samtools against simulated data. }}{4}}
-\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces A known error mode of Illumina sequencing methods generates a 1bp insertion artifact that is detected by standard mapping-based variant calling methods. The artifact results in a relative over-abundance of 1bp insertions. Here, we characterize the ability of our method to remove this artifact by detecting variants in a larger detection window. As the calling window size is increased beyond 10bp, the artifact is effecti [...]
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Using haplotype-based variant detection to improve the signal to noise ratio of candidate variants}{4}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Using haplotype-based variant detection to understand genotyping array design failure}{5}}
-\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces The Omni 2.5 genotyping array includes a number of alleles which consistently report as non-polymorphic (monomorphic) in the 1000 Genomes cohort in which they were originally detected. By detecting variants using our method at a 10bp variant calling window, we demonstrate that more than 90\% of the apparently monomorphic loci are not biallelic SNPs, and thus the array design does not match the local variant structure i [...]
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.5}The importance of accurately modeling copy number variations on sex chromosomes}{6}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.6}Comparing to other methods in low-coverage sequencing data}{6}}
-\@writefile{lot}{\contentsline {table}{\numberline {2}{\ignorespaces Performance of various variant detection pipelines tested as part of the 1000 Genomes Project. Sets are Boston College; non-haplotype-based method (BC), haplotype-based method described in this paper (BC2), Baylor College of Medicine (BCM), Broad Institute GATK UnifiedGenotyper (BI1), Sanger Institute Samtools (SI1), University of Michigan GlfMultiples (UM), Broad Institute GATK HaplotypeCaller (BI2), Oxford Platypus (O [...]
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.7}Indel detection performance}{7}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.8}Sensitivity to low-frequency variation}{7}}
-\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Performance of indel detection methods in 1000 Genomes project on the AFR191 sample set as assesed via high-depth resequencing validation. Sets are Boston College FreeBayes (BC), Broad Institute GATK UnifiedGenotyper (BI1), Sanger Institute Samtools (SI1), Broad Institute GATK HaplotypeCaller (BI2), Oxford Platypus (OX1), Oxford Cortex (OX2).}}{8}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {2.9}Haplotype-based consolidation of small variant calls}{8}}
-\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Sensitivity to low-frequency variants of various detection methods, as assessed in 191 samples of African ancestry in the 1000 Genomes low-coverage cohort. BC2 is FreeBayes, BI1 is the GATK UnifiedGenotyper, BI2 is the GATK HaplotypeCaller, and SI2 is the global assembler SGA.}}{9}}
-\@writefile{toc}{\contentsline {section}{\numberline {3}Methods}{10}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Definitions}{10}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}A Bayesian approach}{10}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Estimating the probability of sequencing observations given an underlying genotype, $P(R_i|G)$}{11}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Genotype combination priors, $P(G_1,\ldots ,G_n)$}{11}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.1}Decomposition of prior probability of genotype combination}{11}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.2}Genotype combination sampling probability $P(G_1,\ldots ,G_n | f_1,\ldots ,f_k)$}{12}}
-\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.3}Derivation of $P(f_1,\ldots ,f_k)$ by Ewens' sampling formula}{13}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {3.5}Expanding the model to incorporate the observability of the locus and alleles}{13}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {3.6}Estimation of the probability that the locus is sequencable $P(S)$}{14}}
-\@writefile{toc}{\contentsline {section}{\numberline {4}Direct detection of phase from short-read sequencing}{16}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Parsing haplotype observations from sequencing data}{16}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Determining a window over which to assemble haplotype observations}{16}}
-\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces The direct detection of phase from short-read sequencing traces and counting of haplotypes across dynamically-determined windows.}}{17}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Detection and genotyping of local haplotypes}{17}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.4}Probability of polymorphism}{18}}
-\@writefile{toc}{\contentsline {subsection}{\numberline {4.5}Marginal likelihoods of individual genotypes}{18}}
-\bibcite{1000Gphase1}{{1}{2012}{{1000 Genomes Project~Participants}}{{}}}
-\bibcite{dindel}{{2}{2011}{{Albers et~al.}}{{Albers, Lunter, MacArthur, McVean, Ouwehand, and Durbin}}}
-\bibcite{branton2008}{{3}{2008}{{Branton et~al.}}{{Branton, Deamer, Marziali, Bayley, Benner, Butler, Di~Ventra, Garaj, Hibbs, Huang et~al.}}}
-\bibcite{browning2007}{{4}{2007}{{Browning and Browning}}{{}}}
-\bibcite{clarke2009}{{5}{2009}{{Clarke et~al.}}{{Clarke, Wu, Jayasinghe, Patel, Reid, and Bayley}}}
-\bibcite{blmash}{{6}{2003}{{Cleary et~al.}}{{Cleary, Zhang, Di~Nicola, Aronson, Aube, Steinman, Haddad, Redston, Gallinger, Narod et~al.}}}
-\bibcite{delaneau2012}{{7}{2012}{{Delaneau et~al.}}{{Delaneau, Marchini, and Zagury}}}
-\bibcite{gatk2011}{{8}{2011}{{DePristo et~al.}}{{DePristo, Banks, Poplin, Garimella, Maguire, Hartl, Philippakis, del Angel, Rivas, Hanna et~al.}}}
-\bibcite{howie2011}{{14}{2011}{{Howie et~al.}}{{Howie, Marchini, and Stephens}}}
-\bibcite{recurrentTERT2013}{{15}{2013}{{Huang et~al.}}{{Huang, Hodis, Xu, Kryukov, Chin, and Garraway}}}
-\bibcite{cortex}{{16}{2012}{{Iqbal et~al.}}{{Iqbal, Caccamo, Turner, Flicek, and McVean}}}
-\bibcite{mosaik}{{17}{2012}{{Lee and Str{\"{o}}mberg}}{{}}}
-\bibcite{samtools}{{19}{2009}{{Li et~al.}}{{Li, Handsaker, Wysoker, Fennell, Ruan, Homer, Marth, Abecasis, and Durbin}}}
-\bibcite{maq}{{20}{2008}{{Li et~al.}}{{Li, Ruan, and Durbin}}}
-\bibcite{mach2010}{{21}{2010}{{Li et~al.}}{{Li, Willer, Ding, Scheet, and Abecasis}}}
-\bibcite{marth99}{{22}{1999}{{Marth et~al.}}{{Marth, Korf, Yandell, Yeh, Gu, Zakeri, Stitziel, Hillier, Kwok, and Gish}}}
-\bibcite{snpsvm}{{23}{2013}{{O'Fallon et~al.}}{{O'Fallon, Wooderchak-Donahue, and Crockett}}}
-\bibcite{opmac99}{{24}{1999}{{Opitz and Maclin}}{{}}}
-\bibcite{snptools}{{26}{2013}{{Wang et~al.}}{{Wang, Lu, Yu, Gibbs, and Yu}}}
diff --git a/paper/main.bbl b/paper/main.bbl
deleted file mode 100644
index 0e685a3..0000000
--- a/paper/main.bbl
+++ /dev/null
@@ -1,170 +0,0 @@
-\bibitem[{1000 Genomes Project~Participants(2012)}]{1000Gphase1}
-1000 Genomes Project~Participants T. 2012.
-\newblock {{A}n integrated map of genetic variation from 1,092 human genomes}.
-\newblock \emph{Nature} \textbf{491}: 56--65.
-\bibitem[{Albers et~al.(2011)Albers, Lunter, MacArthur, McVean, Ouwehand, and
- Durbin}]{dindel}
-Albers CA, Lunter G, MacArthur DG, McVean G, Ouwehand WH, and Durbin R. 2011.
-\newblock {{D}indel: accurate indel calls from short-read data}.
-\newblock \emph{Genome Res.} \textbf{21}: 961--973.
-\bibitem[{Branton et~al.(2008)Branton, Deamer, Marziali, Bayley, Benner,
- Butler, Di~Ventra, Garaj, Hibbs, Huang et~al.}]{branton2008}
-Branton D, Deamer DW, Marziali A, Bayley H, Benner SA, Butler T, Di~Ventra M,
- Garaj S, Hibbs A, Huang X, et~al.. 2008.
-\newblock {{T}he potential and challenges of nanopore sequencing}.
-\newblock \emph{Nat. Biotechnol.} \textbf{26}: 1146--1153.
-\bibitem[{Browning and Browning(2007)}]{browning2007}
-Browning SR and Browning BL. 2007.
-\newblock {{R}apid and accurate haplotype phasing and missing-data inference
- for whole-genome association studies by use of localized haplotype
- clustering}.
-\newblock \emph{Am. J. Hum. Genet.} \textbf{81}: 1084--1097.
-\bibitem[{Clarke et~al.(2009)Clarke, Wu, Jayasinghe, Patel, Reid, and
- Bayley}]{clarke2009}
-Clarke J, Wu HC, Jayasinghe L, Patel A, Reid S, and Bayley H. 2009.
-\newblock {{C}ontinuous base identification for single-molecule nanopore
- {D}{N}{A} sequencing}.
-\newblock \emph{Nat Nanotechnol} \textbf{4}: 265--270.
-\bibitem[{Cleary et~al.(2003)Cleary, Zhang, Di~Nicola, Aronson, Aube, Steinman,
- Haddad, Redston, Gallinger, Narod et~al.}]{blmash}
-Cleary SP, Zhang W, Di~Nicola N, Aronson M, Aube J, Steinman A, Haddad R,
- Redston M, Gallinger S, Narod SA, et~al.. 2003.
-\newblock {{H}eterozygosity for the {B}{L}{M}({A}sh) mutation and cancer risk}.
-\newblock \emph{Cancer Res.} \textbf{63}: 1769--1771.
-\bibitem[{Delaneau et~al.(2012)Delaneau, Marchini, and Zagury}]{delaneau2012}
-Delaneau O, Marchini J, and Zagury JF. 2012.
-\newblock {{A} linear complexity phasing method for thousands of genomes}.
-\newblock \emph{Nat. Methods} \textbf{9}: 179--181.
-\bibitem[{DePristo et~al.(2011)DePristo, Banks, Poplin, Garimella, Maguire,
- Hartl, Philippakis, del Angel, Rivas, Hanna et~al.}]{gatk2011}
-DePristo MA, Banks E, Poplin R, Garimella KV, Maguire JR, Hartl C, Philippakis
- AA, del Angel G, Rivas MA, Hanna M, et~al.. 2011.
-\newblock {{A} framework for variation discovery and genotyping using
- next-generation {D}{N}{A} sequencing data}.
-\newblock \emph{Nat. Genet.} \textbf{43}: 491--498.
-Ewens WJ. 1972.
-\newblock {{T}he sampling theory of selectively neutral alleles}.
-\newblock \emph{Theor Popul Biol} \textbf{3}: 87--112.
-Garrison E. 2012{\natexlab{a}}.
-\newblock {FreeBayes source repository}.
-\newblock \url{https://github.com/ekg/freebayes}.
-Garrison E. 2012{\natexlab{b}}.
-\newblock {mutatrix population genome simulator}.
-\newblock \url{https://github.com/ekg/mutatrix}.
-Garrison E. 2012{\natexlab{c}}.
-\newblock {vcflib: variant call file processing and manipulation utilities}.
-\newblock \url{https://github.com/ekg/vcflib}.
-Holtgrewe M. 2010.
-\newblock Mason – a read simulator for second generation sequencing data.
-\newblock Technical Report TR-B-10-06, Institut für Mathematik und Informatik,
- Freie Universität Berlin.
-\bibitem[{Howie et~al.(2011)Howie, Marchini, and Stephens}]{howie2011}
-Howie B, Marchini J, and Stephens M. 2011.
-\newblock {{G}enotype imputation with thousands of genomes}.
-\newblock \emph{G3 (Bethesda)} \textbf{1}: 457--470.
-\bibitem[{Huang et~al.(2013)Huang, Hodis, Xu, Kryukov, Chin, and
- Garraway}]{recurrentTERT2013}
-Huang FW, Hodis E, Xu MJ, Kryukov GV, Chin L, and Garraway LA. 2013.
-\newblock {{H}ighly recurrent {T}{E}{R}{T} promoter mutations in human
- melanoma}.
-\newblock \emph{Science} \textbf{339}: 957--959.
-\bibitem[{Iqbal et~al.(2012)Iqbal, Caccamo, Turner, Flicek, and
- McVean}]{cortex}
-Iqbal Z, Caccamo M, Turner I, Flicek P, and McVean G. 2012.
-\newblock {{D}e novo assembly and genotyping of variants using colored de
- {B}ruijn graphs}.
-\newblock \emph{Nat. Genet.} \textbf{44}: 226--232.
-\bibitem[{Lee and Str{\"{o}}mberg(2012)}]{mosaik}
-Lee WP and Str{\"{o}}mberg M. 2012.
-\newblock {MOSAIK reference-guided aligner for next-generation sequencing
- technologies}.
-\newblock \url{https://github.com/wanpinglee/MOSAIK}.
-Li H. 2011.
-\newblock {{A} statistical framework for {S}{N}{P} calling, mutation discovery,
- association mapping and population genetical parameter estimation from
- sequencing data}.
-\newblock \emph{Bioinformatics} \textbf{27}: 2987--2993.
-\bibitem[{Li et~al.(2009)Li, Handsaker, Wysoker, Fennell, Ruan, Homer, Marth,
- Abecasis, and Durbin}]{samtools}
-Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G,
- and Durbin R. 2009.
-\newblock {{T}he {S}equence {A}lignment/{M}ap format and {S}{A}{M}tools}.
-\newblock \emph{Bioinformatics} \textbf{25}: 2078--2079.
-\bibitem[{Li et~al.(2008)Li, Ruan, and Durbin}]{maq}
-Li H, Ruan J, and Durbin R. 2008.
-\newblock {{M}apping short {D}{N}{A} sequencing reads and calling variants
- using mapping quality scores}.
-\newblock \emph{Genome Res.} \textbf{18}: 1851--1858.
-\bibitem[{Li et~al.(2010)Li, Willer, Ding, Scheet, and Abecasis}]{mach2010}
-Li Y, Willer CJ, Ding J, Scheet P, and Abecasis GR. 2010.
-\newblock {{M}a{C}{H}: using sequence and genotype data to estimate haplotypes
- and unobserved genotypes}.
-\newblock \emph{Genet. Epidemiol.} \textbf{34}: 816--834.
-\bibitem[{Marth et~al.(1999)Marth, Korf, Yandell, Yeh, Gu, Zakeri, Stitziel,
- Hillier, Kwok, and Gish}]{marth99}
-Marth GT, Korf I, Yandell MD, Yeh RT, Gu Z, Zakeri H, Stitziel NO, Hillier L,
- Kwok PY, and Gish WR. 1999.
-\newblock {{A} general approach to single-nucleotide polymorphism discovery}.
-\newblock \emph{Nat. Genet.} \textbf{23}: 452--456.
-\bibitem[{O'Fallon et~al.(2013)O'Fallon, Wooderchak-Donahue, and
- Crockett}]{snpsvm}
-O'Fallon BD, Wooderchak-Donahue W, and Crockett DK. 2013.
-\newblock {{A} support vector machine for identification of single-nucleotide
- polymorphisms from next-generation sequencing data}.
-\newblock \emph{Bioinformatics} \textbf{29}: 1361--1366.
-\bibitem[{Opitz and Maclin(1999)}]{opmac99}
-Opitz DW and Maclin R. 1999.
-\newblock Popular ensemble methods: An empirical study.
-\newblock \emph{J. Artif. Intell. Res. (JAIR)} \textbf{11}: 169--198.
-Tajima F. 1983.
-\newblock {{E}volutionary relationship of {D}{N}{A} sequences in finite
- populations}.
-\newblock \emph{Genetics} \textbf{105}: 437--460.
-\bibitem[{Wang et~al.(2013)Wang, Lu, Yu, Gibbs, and Yu}]{snptools}
-Wang Y, Lu J, Yu J, Gibbs RA, and Yu F. 2013.
-\newblock {{A}n integrative variant analysis pipeline for accurate
- genotype/haplotype inference in population {N}{G}{S} data}.
-\newblock \emph{Genome Res.} \textbf{23}: 833--842.
-Watterson GA. 1975.
-\newblock {{O}n the number of segregating sites in genetical models without
- recombination}.
-\newblock \emph{Theor Popul Biol} \textbf{7}: 256--276.
diff --git a/paper/main.blg b/paper/main.blg
deleted file mode 100644
index 91a5948..0000000
--- a/paper/main.blg
+++ /dev/null
@@ -1,49 +0,0 @@
-This is BibTeX, Version 0.99d (TeX Live 2015/dev/Debian)
-Capacity: max_strings=35307, hash_size=35307, hash_prime=30011
-The top-level auxiliary file: main.aux
-The style file: genome_research.bst
-Database file #1: references.bib
-Warning--I didn't find a database entry for "1000GPhaseI"
-Reallocated wiz_functions (elt_size=4) to 6000 items from 3000.
-You've used 27 entries,
- 3195 wiz_defined-function locations,
- 811 strings with 9619 characters,
-and the built_in function-call counts, 29266 in all, are:
-= -- 2663
-> -- 1238
-< -- 13
-+ -- 1130
-- -- 336
-* -- 2419
-:= -- 4009
-add.period$ -- 81
-call.type$ -- 27
-change.case$ -- 244
-chr.to.int$ -- 25
-cite$ -- 27
-duplicate$ -- 2114
-empty$ -- 2240
-format.name$ -- 405
-if$ -- 5958
-int.to.chr$ -- 3
-int.to.str$ -- 1
-missing$ -- 262
-newline$ -- 139
-num.names$ -- 108
-pop$ -- 634
-preamble$ -- 1
-purify$ -- 217
-quote$ -- 0
-skip$ -- 670
-stack$ -- 0
-substring$ -- 2840
-swap$ -- 632
-text.length$ -- 1
-text.prefix$ -- 0
-top$ -- 0
-type$ -- 243
-warning$ -- 0
-while$ -- 236
-width$ -- 0
-write$ -- 350
-(There was 1 warning)
diff --git a/paper/main.tex b/paper/main.tex
deleted file mode 100644
index 31682f3..0000000
--- a/paper/main.tex
+++ /dev/null
@@ -1,664 +0,0 @@
-\usepackage[square, comma, sort&compress]{natbib}
-\DeclareMathOperator*{\E}{\mbox{\large E}}
- \offinterlineskip\halign{\hfil$##$\cr
- #1\propto\cr\noalign{\kern2pt}#1\sim\cr\noalign{\kern-2pt}}}}}
- \mathrel{%
- \setbox0=\hbox{$#1\sim$}%
- \setbox2=\hbox{%
- \rlap{\hbox{$#1\propto$}}%
- \lower1.1\ht0\box0%
- }%
- \raise0.25\ht2\box2%
- }%
-\title{Haplotype-based variant detection from short-read sequencing}
-%\title{Characterization of complex variants by haplotype-based variant detection}
-\author{Erik Garrison and Gabor Marth}
-With genomic variant detection methods, we can determine point-wise differences against a reference genome. Widely-used methods fail to reliably characterize the relative phase of proximal alleles. This information is essential for the functional interpretation of genomic material, and can be determined directly from the primary sequence reads. Here we propose such a method, and demonstrate that the use of haplotypes does not only improve our ability to interpret genomic information, [...]
-write introduction / motivation statement to lead into results
-GTM: Biological motivations:
-(1) Haplotype phasing: [Result: resolution of compound hets. e.g. BLM allele-like situations such as the frame-restoring indels, and the 6 SNPs in a row. This improves functional impact prediction] [Display items: example of phase-restoring INDELs]
-(2) Complex variant detection: [Result: Alleles are reported in a consistent fashion, allowing e.g. improved genotyping chip design] [Display items: OMNI results]
-(3) Accuracy of SNP and INDEL detection improves because haplotype-level detection improves signal to noise ratio [Result: comparison to other SNP calls and INDEL calls; 1bp INDEL detection accuracy as a function of clump size] [Display items: SNP AUC table; INDEL size as a function of clump size; INDEL caller ROC comparison]
-(4) Enables integration of multiple independent callsets. [Results: comparison to other integration strategies; Quoting number of inconsistent allele locations in Phase 1.] [Display items: cartoon of inconsistent calls; table comparing performance of variant consolidation methods]
-(5) Ploidy framework allows accurate genotype determination in haploid regions. [Result: Chromosome X calling]. [Display items: in-text chromosome X results]
-(6) Physical haplotype method gives better sensitivity than assembly based methods (local or global), but comparable to mapping methods, while achieving better specificity. [Results: SNP AFS comparison across mapping, assembly methods] [Display items: AFS comparison; call comparison table]
-Gabor's logical ordering: 1, 2, 4, 3, 6, 5
-Literature/competing method review:
-How other methods deal with the same issues?
-Other approaches for variant calling (per-site mapping based, assembly, local assembly)
-How our approach promises to solve the problems.
-While \emph{statistical phasing} approaches are necessary for the determination of large-scale haplotype structure \citep{browning2007, mach2010, delaneau2012, howie2011}, sequencing traces provide short-range phasing information that may be employed directly in primary variant detection to establish phase between proximal alleles. Present read lengths and error rates limit this \emph{physical phasing} approach to variants clustered within tens to hundreds of bases, but as the cost of o [...]
-Haplotype-based variant detection methods, in which short haplotypes are read directly from sequencing traces, offer a number of benefits over methods which operate on a single position at a time. Haplotype-based methods ensure semantic consistency among described variants by simultaneously evaluating all classes of alleles in the same context.
-Locally phased genotypes can be used to improve genotyping accuracy in the context of rare variations that can be difficult to impute due to sparse linkage information.
-Similarly, they can assist in the design of genotyping
-assays, which can fail in the context of undescribed variation at the assayed locus. These methods can provide the direct detection of complex variants of clinical significance, such as the BLM\textsuperscript{Ash} allele, a complex block substitution in a helicase gene related to cancer risk \citep{blmash} or recurrent multi-nucleotide polymorphisms often found in particular cancer types \citep{recurrentTERT2013}. Directly detecting such alleles from sequencing data decreases the cost [...]
-The use of longer haplotypes in variant detection can improve detection by increasing the signal to noise ratio of the genotype likelihood space that is used in analysis, provided some degree of independence between sequencing errors. This follows from the fact that the space of possible erroneous haplotypes expands dramatically with haplotype length, while the space of true variation remains constant, with the number of true alleles less than or equal to the ploidy of the sample at a g [...]
-The direct detection of haplotypes from alignment data presents several challenges to existing variant detection methods. As the length of a haplotype increases, so does the number of possible alleles within the haplotype, and thus methods designed to detect genetic variation over haplotypes in a unified context must be able to model multiallelism. However, most variant detection methods establish estimates of the likelihood of polymorphism at a given loci using statistical models whic [...]
-To enable the application of population-level inference methods to the detection of haplotypes, we generalize the Bayesian statistical method described by \citet{marth99} to allow multiallelic loci and non-uniform copy number across the samples under consideration. We have implemented this model in FreeBayes \citep{freebayesgit}. In addition to extensions enabling haplotype-based detection, we have incorporated a model of the capacity for the alignments to characterize the locus and al [...]
-\subsection{Small variant detection in simulated data}
-%\emph{GTM: This section presents results for Biological problem 3. The table for SNPs, and the figure for INDELs.}
-To assess the performance of our method, we used the population genome simulator mutatrix \citep{mutatrixgit} to simulate variation in 100 samples over 100 kilobases of human chromosome 20, and the mason read simulator \citep{holtgrewe2010} to generate a simulated Illumina-like 70bp-reads at 10x depth per sample.
-%several sets of simulated data: 10 samples at 100x coverage, 1000 samples at 10x coverage, and a single sample at 20x, 50x, 100x, and 200x coverage, over 100 kilobases of human chromosome 20.
-%This simulator employs a $1/i$ allele frequency model to simulate SNPs and indels across a number of samples.
-The data were aligned with Mosaik \citep{mosaik}, and variants were called using several popular detection methods capable of simultaneously detecting SNPs and short indels: GATK HaplotypeCaller and UnifiedGenotyper (version 2.7.4) \citep{gatk2011}, samtools (version 0.1.19-44428cd) \citep{samtools}, and FreeBayes (version
-%In order to improve statistical power, each simulation was run 100 times and the results were merged.
-To assess each caller's detection performance we generated receiver-operator characteristics (ROCs) using vcfroc \citep{vcflibgit}. We provide results in terms of area under the curve (AUC) for all tested variant callers in table \ref{tab:simROCs}.% (See supplement for ROC plots for each simulation.)
-These results indicate that FreeBayes provides superior performance to the GATK and samtools at all assayed depths and numbers of samples.
-%For both SNPs and indels, the relative performance difference between FreeBayes and these other methods is largest at low sequencing depth and numbers of samples, and diminishes as sequencing depth or number of samples increases.
-We observe that the difference in the AUC metric is dominated by both minimum distance from perfect discrimination (perfect sensitivity and perfect specificity), in which FreeBayes consistently outperforms the other methods, and by apparent hard limitation on sensitivity imposed by the other methods. We hypothesize that the difference in performance for indels, which is larger than that for SNPs, reflects our method's detection of alleles on haplotypes, which improves the signal to nois [...]
-\caption{Receiver-operator characteristics (ROCs) for FreeBayes, GATK HaplotypeCaller and UnifiedGenotyper, and samtools on 100 samples at 10x simulated sequencing depth. FreeBayes achieves the highest area under the curve (AUC) \ref{tab:simROCs}, with the HaplotypeCaller and samtools each performing next-best for indels and SNPs, respectively.}
- \begin{tabular}{|l||l|l|l|l|}
- \hline
- variant detector & depth & samples & AUC SNPs & AUC indels \\ \hline
- FreeBayes & 10 & 100 & 0.9594 & 0.9400 \\
- \hline
- GATK HaplotypeCaller & 10 & 100 & 0.8155 & 0.7765 \\
- \hline
- GATK UnifiedGenotyper & 10 & 100 & 0.8907 & 0.7073 \\
- \hline
- samtools & 10 & 100 & 0.9056 & 0.4698 \\ \hline
- \end{tabular}
-\caption{Performance of FreeBayes, GATK HaplotypeCaller and UnifiedGenotyper, and samtools against simulated data.
-%FreeBayes provides the best area under the curve (AUC) at all sequencing depths and numbers of samples for both SNPs and indels.}
-\subsection{Using simulation to assess the direct detection of haplotypes}
-In order to facilitate our assessment of the method at determining phase between clusters of alleles, we set a mutation rate sufficient to generate many clusters of variants in these simulated samples. We then simulated reads at 20x coverage from the resulting simulated chromosomes using wgsim \citep{samtools}, aligned the results using Mosaik \citep{mosaik} and ran freebayes on the resulting alignments specifying a haplotype detection length of 10bp. The results were compared to the t [...]
-%For the evaluation of complex alleles detection, we ignored haplotypes which could not be detected by the algorithm given the maximum haplotype length constraint of the detection method.
-Our results agree with those obtained for other classes of small variants in section \ref{sec:simulation}, showing high performance against SNPs (AUC of 0.979) and indels (AUC of 0.948). For complex variants composed between multiple small variants, direct detection provides an AUC of 0.919.
-% TODO rerun using newest freebayes version
-% humu.bc.edu:haplotypecomparisons/1mb_10x_10samples
-%> abs(trapz(c(1, roc$snpsfpr), c(1, roc$snpstpr)))
-%[1] 0.9795195
-%> abs(trapz(c(1, roc$complexfpr), c(1, roc$complextpr)))
-%[1] 0.9033818
-%> abs(trapz(c(1, roc$indelsfpr), c(1, roc$indelstpr)))
-%[1] 0.7524788
-%> abs(trapz(c(1, roc$mnpsfpr), c(1, roc$mnpstpr)))
-%[1] 0.9480347
-%\section{Comparison of haplotype-based calls to statistically phased genotypes}
-\caption{A known error mode of Illumina sequencing methods generates a 1bp insertion artifact that is detected by standard mapping-based variant calling methods. The artifact results in a relative over-abundance of 1bp insertions. Here, we characterize the ability of our method to remove this artifact by detecting variants in a larger detection window. As the calling window size is increased beyond 10bp, the artifact is effectively removed, and the balance between insertions and delet [...]
-\subsection{Using haplotype-based variant detection to improve the signal to noise ratio of candidate variants}
-The fluorescence-based imaging utilized by Illumina sequencing machines is susceptible to errors generated by air bubbles introduced into the flowcell in which the sequencing reaction takes place. Bubble errors tend to manifest themselves as high-quality 1bp insertions in sequencing traces derived from spots in the affected regions of the sequencing flowcell. These errors are randomly distributed with respect to reference position, but their high frequency in some sequencing runs means [...]
-To assess the ability of our haplotype-based method to overcome this characteristic error, we detected variants in the previously described AFR191 sample set using a number of different haplotype lengths. The indel detection results (figure \ref{fig:indelerror}) indicate that this error mode can be effectively removed from small variant calls by increasing the detection window size to 10bp or greater.
-As we increase the length of detected haplotypes, we increase the number of possible erroneous haplotypes without increasing the number of true haplotypes. This effect results in an improved signal to noise ratio for detected variants at larger haplotype sizes. As such, increasing window size in our algorithm allows us to exclude likely insertion artifacts from consideration, as the recurrance of an erroneous haplotype diminishes rapidly with haplotype length. We hypothesize that this [...]
-%\caption{Omni errors.}
-\caption{The Omni 2.5 genotyping array includes a number of alleles which consistently report as non-polymorphic (monomorphic) in the 1000 Genomes cohort in which they were originally detected. By detecting variants using our method at a 10bp variant calling window, we demonstrate that more than 90\% of the apparently monomorphic loci are not biallelic SNPs, and thus the array design does not match the local variant structure in these samples. By using a haplotype-based approach, group [...]
-\subsection{Using haplotype-based variant detection to understand genotyping array design failure}
-Variant calls generated during the pilot phase of the 1000 Genomes Project \citep{1000Gphase1} were used to design a genotyping array, (the Illumina OMNI2.5). Subsequently, many of the alleles on this array (approximately 10\%) were found to be putatively monomorphic in the same set of samples, suggesting they resulted from variant detection error.
-We investigated these loci using whole-genome calls in the low-coverage cohort in Phase I of the 1000 Genomes Project. We ran freebayes using a haplotype window of 10 base pairs. On comparison with the monomorphic array loci, we found that approximately 90\% of the array-monomorphic loci overlap non-SNP or non-biallelic variation in these samples within 10bp of the target SNP, whereas the opposite is true of polymorphic loci--- greater than 90\% of loci assayed as polymorphic overlap b [...]
-We observe that many of the apparent failures in variant detection are actually caused by an inability of methods to assess local clusters of variation. The accurate design of genotyping arrays and their use in cross-validation of sequencing-based genotyping performance thus requires information about local haplotypes structure.
-\subsection{The importance of accurately modeling copy number variations on sex chromosomes}
-Our method is currently the only variant detector in common use which provides the ability to call males and females jointly on chromosome X with correct copy number. To evaluate the benefits of this approach, we detected variants in chromosome X for 191 low-coverage 1000 Genomes samples of African ancestry using FreeBayes both with and without copy-number awareness. Comparison of our results to the genotyping array calls (excluding cases of likely array failure due to non-SNP, non-bia [...]
-\subsection{Comparing to other methods in low-coverage sequencing data}
-In the testing phase of the 1000 Genomes Project, participating groups submitted callsets based on 191 samples of African ancestry (AFR191). Results are characterized in figure \ref{table:1000Gcomparisons}. Unlike other haplotype-based and assembly metods, the approach described in this paper (BC2) provides sensitivity to known variants equivalent to mapping-based methods (BCM, BC1, SI1, UM). Furthermore, the method's ability to characterize haplotypes in loci which appeared to be mon [...]
-call set & BC & BCM & BI1 & SI1 & UM & BC2 & BI2 & OX1 & SI2 & OX2 & Union & 2/9 & 3/9 & 4/9 & BC cons \\
-SNPs [K] & 459 & 512 & 481 & 480 & 491 & 495 & 362 & 452 & 252 & 101 & 621 & 548 & 518 & 487 & 543 \\
-Omni poly [\%] & 91.6 & 98.9 & 96.5 & 95.2 & 97.6 & 97.4 & 88.4 & 87 & 83.1 & 44.6 & 99.3 & 98.9 & 98.6 & 97.6 & 98.7 \\
-Hapmap [\%] & 94.5 & 99.4 & 98 & 95.6 & 98.9 & 98.3 & 93.6 & 90.3 & 91.1 & 53.7 & 99.4 & 99.4 & 99.3 & 99 & 98.6 \\
-Omni mono [\%] & 1.39 & 1.63 & 0.29 & 0.62 & 0.77 & 0.56 & 0.14 & 1.1 & 0.72 & 0.1 & 3.73 & 0.97 & 0.67 & 0.48 & 0.65 \\
-\caption{Performance of various variant detection pipelines tested as part of the 1000 Genomes Project. Sets are Boston College; non-haplotype-based method (BC), haplotype-based method described in this paper (BC2), Baylor College of Medicine (BCM), Broad Institute GATK UnifiedGenotyper (BI1), Sanger Institute Samtools (SI1), University of Michigan GlfMultiples (UM), Broad Institute GATK HaplotypeCaller (BI2), Oxford Platypus (OX1), Sanger SGA (SI2), Oxford Cortex (OX2). Union: combina [...]
-\subsection{Indel detection performance}
-center & specificity & sensitivity & caller & optimality & AUC \\
-Oxford Cortex & 98 & 27 & OX2 & 73.02739 & 0.2646 \\
-Pindel & 90 & 52 & Pindel & 49.03060 & 0.4680 \\
-BC & 83 & 66 & BC & 38.01316 & 0.5478 \\
-Broad assembly & 80 & 67 & BI2 & 38.58756 & 0.5360 \\
-Sanger & 76 & 69 & SI1 & 39.20459 & 0.5244 \\
-Broad mapping & 65 & 74 & BI1 & 43.60046 & 0.4810 \\
-Oxford Platypus & 60 & 55 & OX1 & 60.20797 & 0.3300 \\ \hline
-\caption{Performance of indel detection methods in 1000 Genomes project on the AFR191 sample set as assesed via high-depth resequencing validation. Sets are Boston College FreeBayes (BC), Broad Institute GATK UnifiedGenotyper (BI1), Sanger Institute Samtools (SI1), Broad Institute GATK HaplotypeCaller (BI2), Oxford Platypus (OX1), Oxford Cortex (OX2).}
-\subsection{Sensitivity to low-frequency variation}
-Current methods for haplotype-based variant detection rely on assembly methods, which can be applied globally \citep{cortex} or locally \citep{dindel}. These methods remove reference bias from the analysis of short-read sequencing data, but the generation of assemblies of large genomes requires pruning of low-frequency kmer observations. While low-frequency kmers are often generated by sequencing error, in many cases they represent true variation, and thus this pruning reduces the sens [...]
-Results from the experiments described in \ref{sec:1000Gcomparisons} demonstrate that our method, while acting as a form of local assembly, does not incur the same sensitivity penalties seen in both local and global assembly methods. We assess this using the count of minor alternate alleles as reported by each caller (figure \ref{fig:lowfreqsens}). These results indicate that both global and local assembly methods suffer significant decrease in sensitivity to low-frequency variants, al [...]
-\caption{Sensitivity to low-frequency variants of various detection methods, as assessed in 191 samples of African ancestry in the 1000 Genomes low-coverage cohort. BC2 is FreeBayes, BI1 is the GATK UnifiedGenotyper, BI2 is the GATK HaplotypeCaller, and SI2 is the global assembler SGA.}
-\subsection{Haplotype-based consolidation of small variant calls}
-Ensemble methods have been shown to provide superior performance to component inference methods in many contexts \citep{opmac99}. We hypothesize that ensemble approaches to variant detection from short-read sequencing may provide improved performance in the context of variant detection. While ensemble approaches have already been successfully applied to SNPs in large-scale resequencing projects \citep{1000GPhaseI}, their application to other variant classes is problematic because detec [...]
-%\section{Generalizing variant detection to multiallelic loci and non-uniform copy number}
-At a given genetic locus we have $n$ samples drawn from a population, each of which has a copy number or multiplicity of $m$ within the locus. We denote the number of copies of the locus present within our set of samples as $M = \sum_{i=1}^n m_i$. Among these $M$ copies we have $K$ distinct alleles, $b_1,\ldots,b_K$ with allele counts $c_1,\ldots,c_K$ and frequencies $f_1,\ldots,f_K$. Each individual has an unphased genotype $G_i$ comprised of $k_i$ distinct alleles $b_{i_1},\ldots,b_ [...]
-\subsection{A Bayesian approach}
-To genotype the samples at a specific locus, we could simply apply a Bayesian statistic relating $P(G_i|R_i)$ to the likelihood of sequencing errors in our reads and the prior likelihood of specific genotypes. However, this maximum-likelihood approach limits our ability to incorporate information from other individuals in the population under analysis, which can improve detection power.
-Given a set of genotypes $G_1,\ldots,G_n$ and a set of observations observations $R_1,\ldots,R_n$ for all individuals at the current genetic locus, we can use Bayes' theorem to related the probability of a specific combination of genotypes to both the quality of sequencing observations and \emph{a priori} expectations about the distribution of alleles within a set of individuals sampled from the same population:
-= { P(G_1,\ldots,G_n) P(R_1,\ldots,R_n|G_1,\ldots,G_n) \over P(R_1,\ldots,R_n)} \\
-P(G_1,\ldots,G_n|R_1,\ldots,R_n) = { P(G_1,\ldots,G_n) \prod_{i=1}^n P(R_i|G_i) \over
-\sum_{\forall{G_1,\ldots,G_n}} P(G_1,\ldots,G_n) \prod_{i=1}^n P(R_i|G_i) }
-In this formulation, $P(R_1,\ldots,R_n|G_1,\ldots,G_n) = \prod_{i=1}^n P(R_i|G_i)$ represents the likelihood that our observations match a given genotype combination (our data likelihood), and $P(G_1,\ldots,G_n)$ represents the prior likelihood of observing a specific genotype combination. We estimate the data likelihood as the joint probability that the observations for a specific individual support a given genotype. We use a neutral model of allele diffusion conditioned on an estimat [...]
-Except for situations with small numbers of samples and potential alleles, we avoid the explicit evaluation of the posterior distribution as implied by (\ref{eq:bayesian}), instead using a number of optimizations to make the algorithm tractable to apply to very large datasets (see section \ref{sec:genotyping}).
-\subsection{Estimating the probability of sequencing observations given an underlying genotype, $P(R_i|G)$}
-Given a set of reads $R_i = r_{i_1},\ldots,r_{i_{s_i}}$ from a sample at a given locus, we can extract a set of $k_i$ observed alleles $B'_i = b'_1,\ldots,b'_{k_i}$ corresponding to underlying alleles $b_1,\ldots,b_i$ which encapsulate the potential set of represented variants at the locus in the given sample, including erroneous observations. Each of these observed alleles $b'_i$ has a count $o_f$ within the observations of the individual sample $: \sum_{j=1}^{k_i} o_j = s_i$ and corre [...]
-The probability of obtaining a single observation $b_i'$ provided a genotype in a single sample is:
-P(b'_i|G) = \sum_{\forall(b_i \in G)} { f_i P(b'_i|b_i) }
-Here $f_i$ is the genotype allele frequency of $b_i$ in $G$. We observe that the process generating reads from a given locus in a given sample is a multinomial process in which the sampling probabilities for each allele are governed by both the counts of alleles in the genotype and the error process that generates $b'_i$ from underlying $b_i$. However, for the case that the base observation agrees with the underlying genotype, sampling probability dominates the probability that the obs [...]
-P(b'|b) =
- \begin{array}{ll}
- 1 & \mbox{if } b' = b \\
- P(error) & \mbox{if } b' \neq b
- \end{array}
-Here $P(error)$ is the probability that the base is erroneous as determined by the sequencing process used to generate the reads from the sample. Provided this approximation, we can estimate the probability of a given set of reads conditioned on an underlying genotype by using the multinomial sampling probability to estimate the probability of obtaining the observations that support the genotype scaled by the probability that the observations that disagree with the genotype are erroneous:
-P(R_i|G) \approx {s_i \choose o_1,\ldots,o_{k_i} }
-\prod_{j=1}^{k_i} { f_{i_j}^{o_j} }
-\prod_{l=1}^{s_i} { P(b'_l | b_l) }
-%which define $G_i$, $f_{i_1},\ldots,f_{i_{k_i}}$.
-%If we had perfect observations of a locus, $P(R_i|G_i)$ for any individual would approximate the probability of sampling observations $R_i$ out of $G_i$ with replacement. This probability is given by the multinomial distribution in $s_i$ over the probability $P(b_l)$ of obtaining each allele from the given genotype, which is ${f_{i_j} \over m_i}$ for each allele frequency in the frequencies which define $G_i$, $f_{i_1},\ldots,f_{i_{k_i}}$.
-% TODO f_k_i
-% Furthermore, we must sum $P(R_i|G_i)$ for all possible $R_i$ combinations $\forall(R_i \in G_i : | R_i | = k_i)$ drawn from our genotype to obtain the joint probability of $R_i$ given $G_i$, as each $\prod_{l=1}^{s_i} { P(b'_l | b_l) }$ only accounts for the marginal probability of the a specific $R_i$ given $B'_i$.
-P(R_i | G_i)
-P(B'_i | G_i) =
-{ s_i! \over {
-\prod_{j=1}^{k_i} o'_j !
-} }
-\prod_{j=1}^{k_i} { \left({f_{i_j} \over m_i}\right)^{o'_j} }
-\prod_{l=1}^{s_i} { P(b'_l | b_l) }
-This extends $P(R_i|G_i)$ as follows:
-P(R_i | G_i) =
-\sum_{\forall(R_i \in G_i)} \left(
-{ s_i! \over {
-\prod_{j=1}^{k_i} o'_j !
-} }
-\prod_{j=1}^{k_i} { \left({f_{i_j} \over m_i}\right)^{o'_j} }
-\prod_{l=1}^{s_i} { P(b'_l | b_l) }
-\subsection{Genotype combination priors, $P(G_1,\ldots,G_n)$}
-\subsubsection{Decomposition of prior probability of genotype combination}
-Let $G_1,\ldots,G_n$ denote the set of genotypes at the locus and $f_1,\ldots,f_k$ denote the set of allele frequencies which corresponds to these genotypes. We estimate the prior likelihood of observing a specific combination of genotypes within a given locus by decomposition into resolvable terms:
-P(G_1,\ldots,G_n) = P(G_1,\ldots,G_n \cap f_1,\ldots,f_k)
-The probability of a given genotype combination is equivalent to the intersection of that probability and the probability of the corresponding set of allele frequencies. This identity follows from the fact that the allele frequencies are derived from the set of genotypes and we always will have the same $f_1,\ldots,f_k$ for any equivalent $G_1,\ldots,G_n$.
-Following Bayes' Rule, this identity further decomposes to:
-P(G_1,\ldots,G_n \cap f_1,\ldots,f_k) = P(G_1,\ldots,G_n | f_1,\ldots,f_k) P(f_1,\ldots,f_k)
-We now can estimate the prior probability of $G_1,\ldots,G_n$ in terms of the genotype combination sampling probability, $P(G_1,\ldots,G_n | f_1,\ldots,f_k)$, and the probability of observing a given allele frequency in our population, $P(f_1,\ldots,f_k)$.
-\subsubsection{Genotype combination sampling probability $P(G_1,\ldots,G_n | f_1,\ldots,f_k)$}
-The multinomial coefficient ${M \choose c_1,\ldots,c_k }$ gives the number of ways which a set of alleles with frequencies $f_1,\ldots,f_k : f_i = c_i/M$ may be distributed among $M$ copies of a locus. For phased genotypes $\hat{G_i}$ the probability of sampling a specific $\hat{G_1},\ldots,\hat{G_n}$ given allele frequencies $f_1,\ldots,f_k$ is thus provided by the inverse of this term:
-P(\hat{G_1},\ldots,\hat{G_n} | f_1,\ldots,f_k) =
-{M \choose
- c_1,\ldots,c_k }^{-1}
-However, our model is limited to unphased genotypes because our primary data only allows phasing within a limited context. Consequently, we must adjust (\ref{eq:phasedsampling}) to reflect the number of phased genotypes which correspond to the unphased genotyping $G_1,\ldots,G_n$. Each unphased genotype corresponds to as many phased genotypes as there are permutations of the alleles in $G_i$. Thus, for a given unphased genotyping $G_1,\ldots,G_n$, there are $\prod_{i=1}^n { m_i \choose [...]
-In conjunction, these two terms provide the probability of sampling a particular unphased genotype combination given a set of allele frequencies:
-P(G_1,\ldots,G_n | f_1,\ldots,f_k) =
-{ M \choose c_1,\ldots,c_k }^{-1}
-\prod_{i=1}^n { m_i \choose c_{i_1}, \ldots, c_{i_{k_i}}}
-% =
-%\prod_{l=1}^k f_l!
-%\prod_{i=1}^n \frac{m_i!}{\prod_{j=1}^{k_i} f_{i_j}!}
-In the case of a fully diploid population, the product of all possible multiset permutations of all genotypes reduces to $2^h$, where $h$ is the number of heterozygous genotypes, simplifying (\ref{eq:unphasedsampling}) to:
-P(G_1,\ldots,G_n | f_1,\ldots,f_k) =
-{ M \choose c_1,\ldots,c_k }^{-1}
-\subsubsection{Derivation of $P(f_1,\ldots,f_k)$ by Ewens' sampling formula}
-Provided our sample size $n$ is small relative to the population which it samples, and the population is in equilibrium under mutation and genetic drift, the probability of observing a given set of allele frequencies at a locus is given by Ewens' sampling formula \citep{ewens72}. Ewens' sampling formula is based on an infinite alleles coalescent model, and relates the probability of observing a given set of allele frequencies to the number of sampled chromosomes at the locus ($M$) and t [...]
-The application of Ewens' formula to our context is straightforward. Let $a_f$ be the number of alleles among $b_1,\ldots,b_k$ whose allele count within our set of samples is $c$. We can thus transform our set of frequencies $f_1,\ldots,f_k$ (equivalently, allele counts, $c_1,\ldots,c_k$) into a set of non-negative frequency counts $a_1,\ldots,a_M : \sum_{c=1}^M{ca_c} = M$. As many $c_1,\ldots,c_k$ can map to the same $a_1,\ldots,a_M$, this transformation is not invertible, but it is [...]
-Having transformed a set of frequencies over alleles to a set of frequency counts over frequencies, we can now use Ewens' sampling formula to approximate $P(f_1,\ldots,f_k)$ given $\theta$:
-P(f_1,\ldots,f_k) = P(a_1,\ldots,a_M) = {M! \over \theta(\theta+1)\cdots(\theta+M-1)}\prod_{j=1}^M{\theta^{a_j} \over j^{a_j} a_j!}
-P(f_1,\ldots,f_k) =
-P(a_1,\ldots,a_M) =
-{M! \over \theta \prod_{z=1}^{M-1}(\theta+z)}
-\prod_{j=1}^M{\theta^{a_j} \over j^{a_j} a_j!}
-In the bi-allelic case in which our set of samples has two alleles with frequencies $f_1$ and $f_2$ such that $f_1 + f_2 = M$:
-P(a_{f_1} = 1, a_{f_2} = 1) =
-{M! \over \prod_{z=1}^{M-1}(\theta+z)}
-{\theta \over f_1 f_2}
-While in the monomorphic case, where only a single allele is represented at this locus in our population, this term reduces to:
-P(a_M = 1) =
-{(M-1)! \over \prod_{z=1}^{M-1}(\theta+z)}
-In this case, $P(f_1,\ldots,f_k) = 1 - \theta$ when $M = 2$. This is sensible as $\theta$ represents the population mutation rate, which can be estimated from the pairwise heterozygosity rate of any two chromosomes in the population \citep{watterson1975, tajima1983}.
-\subsection{Expanding the model to incorporate the observability of the locus and alleles}
-The bayesian model described in section \ref{sec:modeloverview} can generate posterior estimates based on sequencing quality information and genotype distribution in a panel of samples. However, this estimate can incorporate only information captured in base quality information and read counts. This may fail to assess the ability of the sequencing and alignment methods to accurately characterize the locus and alleles that we genotype, which is an important consideration for downstream [...]
-Previous authors have addressed this limitation by adding post-processing steps to recalibrate the estimated quality of variants using training sets of known variants and known artifacts. Once variant calls have been made we can annotate them with a variety of features and apply standard machine learning methods to ``recalibrate'' the quality estimates produced from genotype distribution, allele frequency, observation counts, and base quality. For instance, \cite{gatk2011} apply a guas [...]
-Problematically, such an approach requires a training set, which may not be applicable in contexts with limited validation data, such as is commonly the case in non-model organisms. Furthermore, the training set may bias our results towards established patterns, decreasing sensitivity to novel variation that might have been previously uncharacterized due to technological limitations.
-In contrast, we address the issue of loci sequencability in a general, \emph{a priori} fashion by extending the traditional Bayesian variant detection model to incorporate an indicator, $S$, which describes the ability of our sequencing and alignment methods to characterize the locus we are considering. We define $S = true$ when we can sequence the locus and alleles and $S = false$ otherwise, and redefine our model (\ref{eq:bayesian}) to estimate the posterior probability of a particula [...]
-P(G_1,\ldots,G_n, S | R_1,\ldots,R_n) = { P(G_1,\ldots,G_n) P(S) \prod_{i=1}^n P(R_i|G_i) \over
-\sum_{\forall{G_1,\ldots,G_n}} ( P(G_1,\ldots,G_n) P(S) \prod_{i=1}^n P(R_i|G_i) ) }
-We will describe the development of $P(S)$ using aggregate statistics built from the read evidence overlapping the locus in section \ref{sec:sequencable}.
-\subsection{Estimation of the probability that the locus is sequencable $P(S)$}
-For accurate variant detection via resequencing, we require that the locus in question is sequencable. That is, we require that the reference is accurate, that we have an accurate model of copy number at the locus, that we have genomic coverage, and that reads can be aligned to the alleles of interest in the region. In a case where these conditions are met, we assume $S = true$. Where it is not, $S = false$.
-% TODO cleanup, remove duplication with previous section
-The sequenceability of a locus and its alleles is assumed under previous Bayesian variant detection models \citep{marth99, samtools, li2011stats}. Uncertainty about the genomic model characterization has been incorporated into data likelihoods or detection thresholds using read mapping quality \citep{snptools, maq}. In practice, the incorporation of confidence in the characterizability of the locus and alleles requires the reclassification of variant calls on the basis of aggregate met [...]
-A quality score recalibrator utilizes training data, particularly as sets of known variants or validated errors, to describe the distribution of true events and errors across the space of possible annotations in the data set to be recalibrated. The variant calling error function as described by these aggregate metrics can then be approximated using a variety of machine learning methods, such as support vector machines \citep{snpsvm} or a gaussian mixture model as implemented in the GATK [...]
-% Feature-based recalibration and detection methods have wide utility in variant detection, and we can expect them to become more comon in practice, but they can also be problematic in that they require adequate training sets, which may not be available in a specific experimental context.
-We observe that $S$ is proportional to a number of variables which can be estimated directly from the observations covering a genomic locus. For instance, if the locus and alleles are observable without bias, we expect the count of observations of a sample supporting a particular alternate allele $R_i \equiv b$ to approximate its frequency in the correct genotype $G_i$ for the sample, $|R_i \equiv b|/|R_i| \approx |b \in G_i|/m_i$. Deviation from this expectation which is observed acro [...]
-In an unbiased context, we expect half of our reads to place to either side of the locus (placement bias $B_p$):
-P(B_p = 0) \propto \binom{|R_{left}|}{|R|} 0.5^{|R_{left}|}
-We expect half to contain the allele in the first half of their length (cycle bias $B_c$):
-P(B_c = 0) \propto \binom{|R_{start}|}{|R|} 0.5^{|R_{start}|}
-Half should be derived from each strand of DNA (strand bias $B_s$):
-P(B_s = 0) \propto \binom{|R_{forward}|}{|R|} 0.5^{|R_{forward}|}
-And, the aggregate fraction of reads supporting a particular allele in samples with a particular genotype should approximate the frequency of the allele in that particular genotype (allele balance, $B_a$). Recall that the distinct alleles in a particular set of genotypes are $b_1,\ldots,b_K$, the corresponding allele frequencies in the set are $f_1,\ldots,f_k$, and the observation counts are represented by $o_1,\ldots,o_K$:
-%o_i = |\{r \in R : r \equiv b_i\}|
-P(B_a = 0) \propto
-\prod_{\forall g \in \{G\}}
-\binom{ |R| }{ o_1,\ldots,o_K }
-\prod_{j=1}^{K} f_j^{o_j}
-We use these relationships to determine relationships in $P(S)$ under various configurations of alleles and genotypes in the samples:
-P(S) \propto P(B_p = 0) P(B_c = 0) P(B_s = 0) P(B_a = 0)
-P(S) \propto & & multinom([ |\{R \equiv b\}| \forall b \in K] ; \sum_{i=1}^{n} |R_i|, f_i,\ldots,f_K) \\
-& \times \prod_{\forall b \in K} & binom(|forwardStrand(\{R \equiv b\})|; |\{R \equiv b\}|, 1/2) \\
-& & \times binom(|placedLeft(\{R \equiv b\})|; |\{R \equiv b\}|, 1/2) \\
-& & \times binom(|placedRight(\{R \equiv b\})|; |\{R \equiv b\}|, 1/2)
-Here $binom(k; n, p)$ is the binomial probability mass function of $k$ successes in $n$ trials with probability $p$. Similarly, $multinom(k_1,\ldots,k_n; n, p_1,\ldots,p_i)$ provides the multinomial PMF for results $k_1,\ldots,k_n$ in $n$ trials with probabilities $p_1,\ldots,p_i$. As defined before, we have a set of reads $\{R\}$ at the locus and a set of alleles $K = b_1,\ldots,b_K$ in a given genotyping across all the samples.
-%% TODO
-\section{Direct detection of phase from short-read sequencing}
-By modeling multiallelic loci, this Bayesian statistical framework provides the foundation for the direct detection of longer, multi-base alleles from sequence alignments. In this section we describe our implementation of a haplotype-based variant detection method based on this model.
-Our method assembles haplotype observations over minimal, dynamically-determined, reference-relative windows which contain multiple segregating alleles. To be used in the analysis, haplotype observations must be derived from aligned reads which are anchored by reference-matching sequence at both ends of the detection window. These haplotype observations have derived quality estimations which allow their incorporation into the general statistical model described in section \ref{sec:mode [...]
-\subsection{Parsing haplotype observations from sequencing data}
-In order to establish a range of sequence in which multiple polymorphisms segregate in the population under analysis, it is necessary to first determine potentially polymorphic windows in order to bound the analysis. This determination is complicated by the fact that a strict windowing can inappropriately break clusters of alleles into multiple variant calls. We employ a dynamic windowing approach that is driven by the observation of multiple proximal reference-relative variations (SNP [...]
-Where reference-relative variations are separated by less than a configurable number of non-polymorphic bases in an aligned sequence trace, our method combines them into a single haplotype allele observation, $H_i$. The observational quality of these haplotype alleles is given as $\min ( q_l \, \forall \, b'_i \in H_i , \, Q_i)$, or the minimum of the supporting read's mapping quality and the minimum base quality of the haplotype's component variant allele observations.
-\caption{The direct detection of phase from short-read sequencing traces and counting of haplotypes across dynamically-determined windows.}
-\subsection{Determining a window over which to assemble haplotype observations}
-At each position in the reference, we collect allele observations derived from alignments as described in \ref{sec:parsing}. To improve performance, we apply a set of input filters to exclude alleles from the analysis which are highly unlikely to be true. These filters require a minimum number of alternate observations and a minimum sum of base qualities in a single sample in order to incorporate a putative allele and its observations into the analysis.
-We then determine a haplotype length over which to genotype samples by a bounded iterative process. We first determine the allele passing the input filters which is longest relative to the reference. For instance, a longer allele could be a multi-base indel or a composite haplotype observation flanked by SNPs. Then, we parse haplotype observations from all the alignments which fully overlap this window, finding the rightmost end of the longest haplotype allele which begins within the [...]
-\subsection{Detection and genotyping of local haplotypes}
-Once a window for analysis has been determined, we parse all fully-overlapping reads into haplotype observations which are anchored at the boundaries of the window. Given these sets of sequencing observations $r_{i_1},\ldots,r_{i_{s_i}} = R_i$ and data likelihoods $P(R_i|G_i)$ for each sample and possible genotype derived from the putative alleles, we then determine the probability of polymorphism at the locus given the Bayesian model described in section \ref{sec:model}.
-To establish a maximum \emph{a posteriori} estimate of the genotype for each sample, we employ a convergent gradient ascent approach to the posterior probability distribution over the mutual genotyping across all samples under our Bayesian model. This process begins at the genotyping across all samples $G_1,\ldots,G_n$ where each sample's genotype is the maximum-likelihood genotype given the data likelihood $P(R_i|G_i)$:
-G_1,\ldots,G_n =
-\underset{G_i}{\operatorname{argmax}} \; P(R_i|G_i)
-%:= \{ G_i | \forall G : P(R_i|G_i) >= P(R_i|G) \}
-The posterior search then attempts to find a genotyping $G_1,\ldots,G_n$ in the local space of genotypings which has higher posterior probability under the model than this initial genotyping. In practice, this step is done by searching through all genotypings in which a single sample has up to the $N$th best genotype when ranked by $P(R_i|G_i)$, and $N$ is a small number (e.g. 2). This search starts with some set of genotypes $G_1,\ldots,G_n = \{G\}$ and attempts to find a genotyping $ [...]
-P(\{G\}'|R_1,\ldots,R_n) > P(\{G\}|R_1,\ldots,R_n)
-$\{G\}'$ is then used as a basis for the next update step. This search iterates until convergence, but in practice must be bounded at a fixed number of steps in order to ensure optimal performance. As the quality of input data increases in coverage and confidence, this search will converge more quickly because the maximum-likelihood estimate will lie closer to the maximum \emph{a posteriori} estimate under the model.
-This method incorporates a basic form of genotype imputation into the detection method, which in practice improves the quality of raw genotypes produced in primary allele detection and genotyping relative to methods which only utilize a maximum-likelihood method to determine genotypes. Furthermore, this method allows for the determination of marginal genotype likelihoods via the marginalization of assigned genotypes for each sample over the posterior probability distribution.
-\subsection{Probability of polymorphism}
-Provided a maximum \emph{a posteriori} estimate of the genotyping of all the individuals in our sample, we might like establish an estimate of the quality of the genotyping. For this, we can use the probability that the locus is polymorphic, which means that the number of distinct alleles at the locus, $K$, is greater than 1. While in practice the space of possible genotypings is too great to integrate over, it is possible to derive the probability that the loci is polymorphic in our s [...]
-P(K > 1 | R_1,\ldots,R_n)
-1 - P(K = 1 | R_1,\ldots,R_n)
-%1 - \sum_{\forall(G_i,\ldots,G_n : K = 1)} P(G_i,\ldots,G_n|R_i,\ldots,R_n)
-Equation (\ref{eq:probpoly}) thus provides the probability of polymorphism at the site, which is provided as a quality estimate for each evaluated locus in the output of FreeBayes.
-\subsection{Marginal likelihoods of individual genotypes}
-Similarly, we can establish a quality estimate for a single genotype by summing over the marginal probability of that specific genotype and sample combination under the model. The marginal probability of a given genotype is thus:
-\sum_{\forall(\{G\} : G_j \in \{G\})}
-In implementation, the estimation of this term requires us to must sample enough genotypings from the posterior in order to obtain well-normalized marginal likelihoods. In practice, we marginalize from the local space of genotypings in which each individual genotype is no more than a small number of steps in one sample from the maximum \emph{a posteriori} estimate of $G_i,\ldots,G_n$. This space is similar to that used during the posterior search described in section \ref{sec:genotypin [...]
-%\subsection{Extensions to the method}
-%\subsection{Using prior empirical information to improve detection}
diff --git a/paper/miseq.png b/paper/miseq.png
deleted file mode 100644
index 5a72618..0000000
Binary files a/paper/miseq.png and /dev/null differ
diff --git a/paper/omni_errors.png b/paper/omni_errors.png
deleted file mode 100644
index c815b85..0000000
Binary files a/paper/omni_errors.png and /dev/null differ
diff --git a/paper/references.bib b/paper/references.bib
deleted file mode 100644
index 8e4421a..0000000
--- a/paper/references.bib
+++ /dev/null
@@ -1,321 +0,0 @@
- at Article{marth99,
- Author="Marth, G. T. and Korf, I. and Yandell, M. D. and Yeh, R. T. and Gu, Z. and Zakeri, H. and Stitziel, N. O. and Hillier, L. and Kwok, P. Y. and Gish, W. R. ",
- Title="{{A} general approach to single-nucleotide polymorphism discovery}",
- Journal="Nat. Genet.",
- Year="1999",
- Volume="23",
- Pages="452--456",
- Month="Dec"
- at misc{freebayeshome,
-Author="Garrison, E.",
- at misc{freebayesgit,
-Author="Garrison, E.",
-Title="{FreeBayes source repository}",
- at misc{mutatrixgit,
-Author="Garrison, E.",
-Title="{mutatrix population genome simulator}",
- at misc{ogapgit,
-Author="Garrison, E.",
-Title="{ogap: a local indel realigner}",
- at misc{vcflibgit,
-Author="Garrison, E.",
-Title="{vcflib: variant call file processing and manipulation utilities}",
- at misc{bamleftaligngit,
-Author="Garrison, E.",
-Title="{bamleftalign: BAM indel left-realigner}",
- at misc{mosaik,
-Author="Lee, W. P. and M. Str{\"{o}}mberg",
-Title="{MOSAIK reference-guided aligner for next-generation sequencing technologies}",
- at Article{ewens72,
- Author="Ewens, W. J. ",
- Title="{{T}he sampling theory of selectively neutral alleles}",
- Journal="Theor Popul Biol",
- Year="1972",
- Volume="3",
- Pages="87--112",
- Month="Mar"
- at Article{li2011stats,
- Author="Li, H. ",
- Title="{{A} statistical framework for {S}{N}{P} calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data}",
- Journal="Bioinformatics",
- Year="2011",
- Volume="27",
- Number="21",
- Pages="2987--2993",
- Month="Nov"
- at Article{samtools,
- Author="Li, H. and Handsaker, B. and Wysoker, A. and Fennell, T. and Ruan, J. and Homer, N. and Marth, G. and Abecasis, G. and Durbin, R. ",
- Title="{{T}he {S}equence {A}lignment/{M}ap format and {S}{A}{M}tools}",
- Journal="Bioinformatics",
- Year="2009",
- Volume="25",
- Number="16",
- Pages="2078--2079",
- Month="Aug"
- at Article{libaq,
- Author="Li, H. ",
- Title="{{I}mproving {S}{N}{P} discovery by base alignment quality}",
- Journal="Bioinformatics",
- Year="2011",
- Volume="27",
- Number="8",
- Pages="1157--1158",
- Month="Apr"
- at Article{gatk2011,
- Author="DePristo, M. A. and Banks, E. and Poplin, R. and Garimella, K. V. and Maguire, J. R. and Hartl, C. and Philippakis, A. A. and del Angel, G. and Rivas, M. A. and Hanna, M. and McKenna, A. and Fennell, T. J. and Kernytsky, A. M. and Sivachenko, A. Y. and Cibulskis, K. and Gabriel, S. B. and Altshuler, D. and Daly, M. J. ",
- Title="{{A} framework for variation discovery and genotyping using next-generation {D}{N}{A} sequencing data}",
- Journal="Nat. Genet.",
- Year="2011",
- Volume="43",
- Number="5",
- Pages="491--498",
- Month="May"
- at Article{browning2007,
- Author="Browning, S. R. and Browning, B. L. ",
- Title="{{R}apid and accurate haplotype phasing and missing-data inference for whole-genome association studies by use of localized haplotype clustering}",
- Journal="Am. J. Hum. Genet.",
- Year="2007",
- Volume="81",
- Number="5",
- Pages="1084--1097",
- Month="Nov"
- at Article{mach2010,
- Author="Li, Y. and Willer, C. J. and Ding, J. and Scheet, P. and Abecasis, G. R. ",
- Title="{{M}a{C}{H}: using sequence and genotype data to estimate haplotypes and unobserved genotypes}",
- Journal="Genet. Epidemiol.",
- Year="2010",
- Volume="34",
- Number="8",
- Pages="816--834",
- Month="Dec"
- at Article{delaneau2012,
- Author="Delaneau, O. and Marchini, J. and Zagury, J. F. ",
- Title="{{A} linear complexity phasing method for thousands of genomes}",
- Journal="Nat. Methods",
- Year="2012",
- Volume="9",
- Number="2",
- Pages="179--181",
- Month="Feb"
- at Article{howie2009,
- Author="Howie, B. N. and Donnelly, P. and Marchini, J. ",
- Title="{{A} flexible and accurate genotype imputation method for the next generation of genome-wide association studies}",
- Journal="PLoS Genet.",
- Year="2009",
- Volume="5",
- Number="6",
- Pages="e1000529",
- Month="Jun"
- at Article{howie2011,
- Author="Howie, B. and Marchini, J. and Stephens, M. ",
- Title="{{G}enotype imputation with thousands of genomes}",
- Journal="G3 (Bethesda)",
- Year="2011",
- Volume="1",
- Number="6",
- Pages="457--470",
- Month="Nov"
- at Article{clarke2009,
- Author="Clarke, J. and Wu, H. C. and Jayasinghe, L. and Patel, A. and Reid, S. and Bayley, H. ",
- Title="{{C}ontinuous base identification for single-molecule nanopore {D}{N}{A} sequencing}",
- Journal="Nat Nanotechnol",
- Year="2009",
- Volume="4",
- Number="4",
- Pages="265--270",
- Month="Apr"
- at Article{branton2008,
- Author="Branton, D. and Deamer, D. W. and Marziali, A. and Bayley, H. and Benner, S. A. and Butler, T. and Di Ventra, M. and Garaj, S. and Hibbs, A. and Huang, X. and Jovanovich, S. B. and Krstic, P. S. and Lindsay, S. and Ling, X. S. and Mastrangelo, C. H. and Meller, A. and Oliver, J. S. and Pershin, Y. V. and Ramsey, J. M. and Riehn, R. and Soni, G. V. and Tabard-Cossa, V. and Wanunu, M. and Wiggin, M. and Schloss, J. A. ",
- Title="{{T}he potential and challenges of nanopore sequencing}",
- Journal="Nat. Biotechnol.",
- Year="2008",
- Volume="26",
- Number="10",
- Pages="1146--1153",
- Month="Oct"
- at Article{tajima1983,
- Author="Tajima, F. ",
- Title="{{E}volutionary relationship of {D}{N}{A} sequences in finite populations}",
- Journal="Genetics",
- Year="1983",
- Volume="105",
- Number="2",
- Pages="437--460",
- Month="Oct"
- at Article{watterson1975,
- Author="Watterson, G. A. ",
- Title="{{O}n the number of segregating sites in genetical models without recombination}",
- Journal="Theor Popul Biol",
- Year="1975",
- Volume="7",
- Number="2",
- Pages="256--276",
- Month="Apr"
- at Techreport{holtgrewe2010,
- Author="Holtgrewe, M.",
- Year="2010",
- Title="Mason – a read simulator for second generation sequencing data",
- Journal="Technical Report",
- Number="TR-B-10-06",
- Institution="Institut für Mathematik und Informatik, Freie Universität Berlin"
-% 23128226
- at Article{1000Gphase1,
- Title="{{A}n integrated map of genetic variation from 1,092 human genomes}",
- Journal="Nature",
- Year="2012",
- Volume="491",
- Number="7422",
- Pages="56--65",
- Month="Nov",
- Author="1000 Genomes Project Participants, The"
- at Article{cortex,
- Author="Iqbal, Z. and Caccamo, M. and Turner, I. and Flicek, P. and McVean, G. ",
- Title="{{D}e novo assembly and genotyping of variants using colored de {B}ruijn graphs}",
- Journal="Nat. Genet.",
- Year="2012",
- Volume="44",
- Number="2",
- Pages="226--232",
- Month="Feb"
- at Article{dindel,
- Author="Albers, C. A. and Lunter, G. and MacArthur, D. G. and McVean, G. and Ouwehand, W. H. and Durbin, R. ",
- Title="{{D}indel: accurate indel calls from short-read data}",
- Journal="Genome Res.",
- Year="2011",
- Volume="21",
- Number="6",
- Pages="961--973",
- Month="Jun"
- at article{opmac99,
- author = {David W. Opitz and
- Richard Maclin},
- title = {Popular Ensemble Methods: An Empirical Study},
- journal = {J. Artif. Intell. Res. (JAIR)},
- volume = {11},
- year = {1999},
- pages = {169-198},
- at Article{blmash,
- Author="Cleary, S. P. and Zhang, W. and Di Nicola, N. and Aronson, M. and Aube, J. and Steinman, A. and Haddad, R. and Redston, M. and Gallinger, S. and Narod, S. A. and Gryfe, R. ",
- Title="{{H}eterozygosity for the {B}{L}{M}({A}sh) mutation and cancer risk}",
- Journal="Cancer Res.",
- Year="2003",
- Volume="63",
- Number="8",
- Pages="1769--1771",
- Month="Apr"
- at Article{recurrentTERT2013,
- Author="Huang, F. W. and Hodis, E. and Xu, M. J. and Kryukov, G. V. and Chin, L. and Garraway, L. A. ",
- Title="{{H}ighly recurrent {T}{E}{R}{T} promoter mutations in human melanoma}",
- Journal="Science",
- Year="2013",
- Volume="339",
- Number="6122",
- Pages="957--959",
- Month="Feb"
- at Article{snpsvm,
- Author="O'Fallon, B. D. and Wooderchak-Donahue, W. and Crockett, D. K. ",
- Title="{{A} support vector machine for identification of single-nucleotide polymorphisms from next-generation sequencing data}",
- Journal="Bioinformatics",
- Year="2013",
- Volume="29",
- Number="11",
- Pages="1361--1366",
- Month="Jun"
- at Article{snptools,
- Author="Wang, Y. and Lu, J. and Yu, J. and Gibbs, R. A. and Yu, F. ",
- Title="{{A}n integrative variant analysis pipeline for accurate genotype/haplotype inference in population {N}{G}{S} data}",
- Journal="Genome Res.",
- Year="2013",
- Volume="23",
- Number="5",
- Pages="833--842",
- Month="May"
- at Article{maq,
- Author="Li, H. and Ruan, J. and Durbin, R. ",
- Title="{{M}apping short {D}{N}{A} sequencing reads and calling variants using mapping quality scores}",
- Journal="Genome Res.",
- Year="2008",
- Volume="18",
- Number="11",
- Pages="1851--1858",
- Month="Nov"
\ No newline at end of file
diff --git a/python/.gitignore b/python/.gitignore
deleted file mode 100644
index 72723e5..0000000
--- a/python/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
diff --git a/python/README b/python/README
deleted file mode 100644
index f2454fc..0000000
--- a/python/README
+++ /dev/null
@@ -1 +0,0 @@
-This is a python implementation of the Bayes-Ewens variant caller algorithm.
diff --git a/python/allelebayes.py b/python/allelebayes.py
deleted file mode 100644
index 6bf6ba1..0000000
--- a/python/allelebayes.py
+++ /dev/null
@@ -1,373 +0,0 @@
-# calculates data likelihoods for sets of alleles
-import multiset
-import sys
-import cjson
-import phred
-import json
-import math
-import operator
-from logsumexp import logsumexp
-from dirichlet import dirichlet_maximum_likelihood_ratio, dirichlet, multinomial, multinomialln
-from factorialln import factorialln
-This module attempts to find the best method to approximate the integration of
-data likelihoods for the bayesian variant caller we're currently working on.
-stdin should be a stream of newline-delimited json records each encoding a list
-of alleles which have been parsed out of alignment records. alleles.cpp in
-this distribution provides such a stream.
-Erik Garrison <erik.garrison at bc.edu> 2010-07-15
-#potential_alleles = [
-# {'type':'reference'},
-# {'type':'snp', 'alt':'A'},
-# {'type':'snp', 'alt':'T'},
-# {'type':'snp', 'alt':'G'},
-# {'type':'snp', 'alt':'C'}
-# ]
-def list_genotypes_to_count_genotypes(genotypes):
- count_genotypes = []
- for genotype in genotypes:
- counts = {}
- for allele in genotype:
- if counts.has_key(allele):
- counts[allele] += 1
- else:
- counts[allele] = 1
- count_genotypes.append(counts.items())
- return count_genotypes
-ploidy = 2
-potential_alleles = ['A','T','G','C']
-# genotypes are expressed as sets of allele frequencies
-genotypes = list_genotypes_to_count_genotypes(list(multiset.multichoose(ploidy, potential_alleles)))
-# update this so that we aren't just using the 'alternate' field from the alleles
-# and are also incorporating the type of allele (ins, deletion, ref, snp)
-def group_alleles(alleles):
- groups = {}
- for allele in alleles:
- alt = allele['alt']
- if groups.has_key(alt):
- groups[alt].append(allele)
- else:
- groups[alt] = [allele]
- return groups
-def alleles_quality_to_lnprob(alleles):
- for allele in alleles:
- allele['quality'] = phred.phred2ln(allele['quality'])
- return alleles
-def product(l):
- return reduce(operator.mul, l)
-def observed_alleles_in_genotype(genotype, allele_groups):
- in_genotype = {}
- not_in_genotype = {}
- for key in allele_groups.keys():
- found = False
- for allele, count in genotype:
- if allele == key:
- in_genotype[key] = allele_groups[key]
- found = True
- break
- if not found:
- not_in_genotype[key] = allele_groups[key]
- return in_genotype, not_in_genotype
-#def scaled_sampling_prob(genotype, alleles):
-# """The probability of drawing the observations in the allele_groups out of
-# the given genotype, scaled by the number of possible multiset permutations
-# of the genotype (we scale because we don't phase our genotypes under
-# evaluation)."""
-# allele_groups = group_alleles(alleles)
-# if len(allele_groups.items()) == 0:
-# return 0
-# genotype_allele_frequencies = [x[1] for x in genotype]
-# multiplicity = sum(genotype_allele_frequencies)
-# genotype_allele_probabilities = [float(x)/multiplicity for x in genotype_allele_frequencies]
-# observed_allele_frequencies = [len(x) for x in allele_groups.items()]
-# observation_product = 1
-# for allele, count in genotype:
-# if allele_groups.has_key(allele):
-# observation_product *= math.pow(float(count) / multiplicity, len(allele_groups[allele]))
-# return float(math.pow(math.factorial(multiplicity), 2)) \
-# / (product([math.factorial(x) for x in genotype_allele_frequencies]) *
-# sum([math.factorial(x) for x in observed_allele_frequencies])) \
-# * observation_product
-# Yes, this is the sampling probability. It is the multinomial sampling
-# probability, which is the specific probability of a specific set of
-# categorical outcomes. Unfortunately, this is not what we really want here.
-# What we want is the prior probability that a given set of draws come out of a
-# given multiset (genotype, in our case). I believe that this is given by the
-# Dirichlet distribution. Investigate.
-def sampling_prob(genotype, alleles):
- """The specific probability of drawing the observations in alleles out of the given
- genotype, follows the multinomial probability distribution."""
- allele_groups = group_alleles(alleles)
- multiplicity = sum([x[1] for x in genotype])
- print genotype, multiplicity, alleles
- for allele, count in genotype:
- if allele_groups.has_key(allele):
- print allele, count, math.pow(float(count) / multiplicity, len(allele_groups[allele]))
- print product([math.factorial(len(obs)) for obs in allele_groups.values()])
- print allele_groups.values()
- return float(math.factorial(len(alleles))) \
- / product([math.factorial(len(obs)) for obs in allele_groups.values()]) \
- * product([math.pow(float(count) / multiplicity, len(allele_groups[allele])) \
- for allele, count in genotype if allele_groups.has_key(allele)])
-def likelihood_given_true_alleles(observed_alleles, true_alleles):
- prob = 0
- for o, t in zip(observed_alleles, true_alleles):
- if o['alt'] == t['alt']:
- prob += math.log(1 - math.exp(o['quality']))
- else:
- prob += o['quality']
- return prob
-def data_likelihood_exact(genotype, observed_alleles):
- """'Exact' data likelihood, sum of sampling probability * join Q score for
- the observed alleles over all possible underlying 'true allele'
- combinations."""
- #print "probability that observations", [o['alt'] for o in observed_alleles], "arise from genotype", genotype
- observation_count = len(observed_alleles)
- ploidy = sum([count for allele, count in genotype])
- allele_probs = [count / float(ploidy) for allele, count in genotype]
- probs = []
- # for all true allele combinations X permutations
- for true_allele_combination in multiset.multichoose(observation_count, [x[0] for x in genotype]):
- for true_allele_permutation in multiset.permutations(true_allele_combination):
- # this mapping allows us to use sampling_prob the same way as we do when we use JSON allele observation records
- true_alleles = [{'alt':allele} for allele in true_allele_permutation]
- allele_groups = group_alleles(true_alleles)
- observations = []
- for allele, count in genotype:
- if allele_groups.has_key(allele):
- observations.append(len(allele_groups[allele]))
- else:
- observations.append(0)
- #sprob = dirichlet_maximum_likelihood_ratio(allele_probs, observations) # distribution parameter here
- lnsampling_prob = multinomialln(allele_probs, observations)
- prob = lnsampling_prob + likelihood_given_true_alleles(observed_alleles, true_alleles)
- #print math.exp(prob), sprob, genotype, true_allele_permutation
- #print genotype, math.exp(prob), sprob, true_allele_permutation, [o['alt'] for o in observed_alleles]
- probs.append(prob)
- # sum the individual probability of all combinations
- p = logsumexp(probs)
- #print math.exp(p)
- return p
-def data_likelihood_estimate(genotype, alleles):
- """Estimates the data likelihood, which is a sum over all possible error
- profiles, or underlying 'true alleles', motivating the observations."""
- # for up to error_depth errors
- pass
-def genotype_combination_sampling_probability(genotype_combination, observed_alleles):
- multiplicity = math.log(ploidy * len(genotype_combination))
- result = 1 - multiplicity
- allele_groups = group_alleles(observed_alleles)
- for allele, observations in allele_groups.iteritems():
- result += math.log(math.factorial(len(observations)))
- # scale by product of multiset permutations of all genotypes in combo
- for combo in genotype_combination:
- for genotype in combo:
- m_i = sum([a[1] for a in genotype])
- result += math.log(math.factorial(m_i))
- result -= sum([math.log(math.factorial(allele[1])) for allele in genotype])
- return result
-def count_frequencies(genotype_combo):
- counts = {}
- alleles = {}
- for genotype in genotype_combo:
- for allele, count in genotype:
- if alleles.has_key(allele):
- alleles[allele] += count
- else:
- alleles[allele] = count
- for allele, count in alleles.iteritems():
- if counts.has_key(count):
- counts[count] += 1
- else:
- counts[count] = 1
- return counts
-def allele_frequency_probability(allele_frequency_counts, theta=0.001):
- """Implements Ewens' Sampling Formula. allele_frequency_counts is a
- dictionary mapping count -> number of alleles with this count in the
- population."""
- M = sum([frequency * count for frequency, count in allele_frequency_counts.iteritems()])
- return math.factorial(M) \
- / (theta * product([theta + h for h in range(1, M)])) \
- * product([math.pow(theta, count) / math.pow(frequency, count) * math.factorial(count) \
- for frequency, count in allele_frequency_counts.iteritems()])
-def powln(n, m):
- """Power of number in log space"""
- return sum([n] * m)
-def allele_frequency_probabilityln(allele_frequency_counts, theta=0.001):
- """Log space version to avoid inevitable overflows with coverage >100.
- Implements Ewens' Sampling Formula. allele_frequency_counts is a
- dictionary mapping count -> number of alleles with this count in the
- population."""
- thetaln = math.log(theta)
- M = sum([frequency * count for frequency, count in allele_frequency_counts.iteritems()])
- return factorialln(M) \
- - (thetaln + sum([math.log(theta + h) for h in range(1, M)])) \
- + sum([powln(thetaln, count) - powln(math.log(frequency), count) + factorialln(count) \
- for frequency, count in allele_frequency_counts.iteritems()])
-def genotype_probabilities(genotypes, alleles):
- return [[str(genotype), data_likelihood_exact(genotype, alleles)] for genotype in genotypes]
-def genotype_probabilities_heuristic(genotypes, alleles):
- groups = group_alleles(alleles)
- # group genotypes relative to the groups of observed alleles
- # take the first member of each group and apply our data likelihood calculation
- # then apply it to the rest
- if len(groups.keys()) is 1:
- # we can cleanly do all-right, part-right, all-wrong
- pass
- if len(groups.keys()) is 2:
- # we can do all-right, two types of 'part-right', and all-wrong
- pass
-def multiset_banded_genotype_combinations(sample_genotypes, bandwidth):
- for index_combo in multiset.multichoose(len(samples), range(bandwidth)):
- for index_permutation in multiset.permutations(index_combo):
- yield [genotypes[index] for index, genotypes in zip(index_permutation, sample_genotypes)]
-# TODO you should implement gabor's banding solution; the above multiset method
-# is comically large and produces incorrect results despite the computational load
-def banded_genotype_combinations(sample_genotypes, bandwidth, band_depth):
- # always provide the 'best' case
- yield [(sample, genotypes[0]) for sample, genotypes in sample_genotypes]
- for i in range(1, bandwidth):
- for j in range(1, band_depth): # band_depth is the depth to which we explore the bandwith... TODO explain better
- indexes = j * [i] + (len(sample_genotypes) - j) * [0]
- for index_permutation in multiset.permutations(indexes):
- yield [(sample, genotypes[index]) for index, (sample, genotypes) in zip(index_permutation, sample_genotypes)]
-def genotype_str(genotype):
- return reduce(operator.add, [allele * count for allele, count in genotype])
-if __name__ == '__main__':
- ploidy = 2 # assume ploidy 2 for all individuals and all positions
- potential_alleles = ['A','T','G','C']
- # genotypes are expressed as sets of allele frequencies
- genotypes = list_genotypes_to_count_genotypes(list(multiset.multichoose(ploidy, potential_alleles)))
- for line in sys.stdin:
- position = cjson.decode(line)
- #print position['position']
- samples = position['samples']
- position['coverage'] = sum([len(sample['alleles']) for samplename, sample in samples.iteritems()])
- #potential_alleles = ['A','T','G','C']
- potential_alleles = set()
- for samplename, sample in samples.items():
- # only process snps and reference alleles
- alleles = [allele for allele in sample['alleles'] if allele['type'] in ['reference', 'snp']]
- alleles = alleles_quality_to_lnprob(alleles)
- sample['alleles'] = alleles
- potential_alleles = potential_alleles.union(set([allele['alt'] for allele in alleles]))
- position['filtered coverage'] = sum([len(sample['alleles']) for samplename, sample in samples.iteritems()])
- # genotypes are expressed as sets of allele frequencies
- #genotypes = list_genotypes_to_count_genotypes(list(multiset.multichoose(ploidy, list(potential_alleles))))
- for samplename, sample in samples.items():
- alleles = sample['alleles']
- groups = group_alleles(alleles)
- sample['genotypes'] = [[genotype, data_likelihood_exact(genotype, alleles)] for genotype in genotypes]
- #sample['genotypes_estimate'] = [[str(genotype), data_likelihood_estimate(genotype, alleles)] for genotype in genotypes]
- # estimate the posterior over all genotype combinations within some indexed bandwidth of optimal
- # TODO preserve sample names in the genotype comos
- sample_genotypes = [(name, sorted(sample['genotypes'], key=lambda genotype: genotype[1], reverse=True)) for name, sample in samples.iteritems()]
- genotype_combo_probs = []
- #for combo in multiset_banded_genotype_combinations(sample_genotypes, 2):
- #for combo in banded_genotype_combinations(sample_genotypes, min(len(genotypes), 2), len(samples)):
- # now marginals time...
- marginals = {}
- for name, sample in samples.iteritems():
- marginals[name] = {}
- combos_tested = 0
- for combo in banded_genotype_combinations(sample_genotypes, min(len(genotypes), 2), 2):
- combos_tested += 1
- probability_observations_given_genotypes = sum([prob for name, (genotype, prob) in combo])
- frequency_counts = count_frequencies([genotype for name, (genotype, prob) in combo])
- prior_probability_of_genotype = allele_frequency_probabilityln(frequency_counts)
- combo_prob = prior_probability_of_genotype + probability_observations_given_genotypes
- for name, (genotype, prob) in combo:
- gstr = genotype_str(genotype)
- if marginals[name].has_key(gstr):
- marginals[name][gstr].append(combo_prob)
- else:
- marginals[name][gstr] = [combo_prob]
- genotype_combo_probs.append([combo, combo_prob])
- genotype_combo_probs = sorted(genotype_combo_probs, key=lambda c: c[1], reverse=True)
- #for line in [json.dumps({'prob':prior_probability_of_genotype, 'combo':combo}) for combo, prior_probability_of_genotype in genotype_combo_probs]:
- # print line
- # sum, use to normalize
- # apply bayes rule
- #print genotype_combo_probs
- #print [prob for combo, prob in genotype_combo_probs]
- #for combo, prob in genotype_combo_probs:
- # print prob
- posterior_normalizer = logsumexp([prob for combo, prob in genotype_combo_probs])
- # handle marginals
- for sample, genotype_probs in marginals.iteritems():
- for genotype, probs in genotype_probs.iteritems():
- marginals[sample][genotype] = logsumexp(probs) - posterior_normalizer
- best_genotype_combo = genotype_combo_probs[0][0]
- best_genotype_combo_prob = genotype_combo_probs[0][1]
- #best_genotype_probability = math.exp(sum([prob for name, (genotype, prob) in best_genotype_combo]) \
- # + allele_frequency_probabilityln(count_frequencies([genotype for name, (genotype, prob) in best_genotype_combo])) \
- # - posterior_normalizer)
- best_genotype_probability = math.exp(best_genotype_combo_prob - posterior_normalizer)
- position['best_genotype_combo'] = [[name, genotype_str(genotype), math.exp(marginals[name][genotype_str(genotype)])]
- for name, (genotype, prob) in best_genotype_combo]
- position['best_genotype_combo_prob'] = best_genotype_probability
- position['posterior_normalizer'] = math.exp(posterior_normalizer)
- position['combos_tested'] = combos_tested
- #position['genotype_combo_probs'] = genotype_combo_probs
- # TODO estimate marginal probabilities of genotypings
- # here we cast everything into float-space
- for samplename, sample in samples.items():
- sample['genotypes'] = sorted([[genotype_str(genotype), math.exp(prob)] for genotype, prob in sample['genotypes']],
- key=lambda c: c[1], reverse=True)
- print cjson.encode(position)
- #print position['position']
diff --git a/python/dirichlet.py b/python/dirichlet.py
deleted file mode 100644
index 1c9d416..0000000
--- a/python/dirichlet.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from scipy.special import gamma, gammaln
-import operator
-import math
-from logsumexp import logsumexp
-from factorialln import factorialln
-def product(l):
- return reduce(operator.mul, l)
-def beta(alphas):
- """Multivariate beta function"""
- return math.exp(sum(map(gammaln, alphas)) - gammaln(sum(alphas)))
-def dirichlet(probs, obs, s=1):
- """Dirichlet probability density for a given set probabilities and prior
- observation counts. s serves as a concentration parameter between 0 and 1,
- with smaller s yielding progressively more diffuse probability density
- distributions."""
- alphas = [(a + 1) * float(s) for a in obs]
- return 1 / beta(alphas) * product([math.pow(x, a - 1) for x, a in zip(probs, alphas)])
-def dirichlet_maximum_likelihood_ratio(probs, obs, s=1):
- """Ratio between the dirichlet for the specific probs and obs and the
- maximum likelihood value for the dirichlet over the given probs (this can
- be determined by partitioning the observations according to the
- probabilities)"""
- maximum_likelihood = dirichlet(probs, [float(sum(obs)) / len(obs) for o in obs], s)
- return dirichlet(probs, obs, s) / float(maximum_likelihood)
-def multinomial(probs, obs):
- return math.factorial(sum(obs)) / product(map(math.factorial, obs)) * product([math.pow(p, x) for p,x in zip(probs, obs)])
-def multinomialln(probs, obs):
- return factorialln(sum(obs)) - sum(map(factorialln, obs)) + sum([math.log(math.pow(p, x)) for p,x in zip(probs, obs)])
-def multinomial_coefficientln(n, counts):
- return factorialln(n) - sum(map(factorialln, counts))
-def multinomial_coefficient(n, counts):
- return math.exp(multinomial_coefficientln(n, counts))
-def multinomial_dirichlet(probs, obs): return multinomial(probs, obs) * dirichlet(probs, obs)
-# NOTE:
-# I started exploring the multinomial_maximum_likelihood_ratio to see if the
-# same maximim likelihood estimation approach could be applied to multinomials.
-# It *can't* for the obvious reason that you can't have non-integral
-# observation counts while the dirichlet distribution is defined across
-# non-integral alphas! I see no clean way to resolve this using the
-# multinomial distribution and now have a better understanding of the use and
-# abuse of the dirichlet distribution as a conjugate prior for multinomial
-# posteriors.
-def multinomial_maximum_likelihood_ratio(probs, obs):
- maximum_likelihood = multinomial(probs, [float(sum(obs)) / len(obs) for o in obs])
- return multinomial(probs, obs) / float(maximum_likelihood)
-def binomial(successes, trials, prob):
- return math.factorial(trials) / (math.factorial(successes) * math.factorial(trials - successes)) * math.pow(prob, successes) * math.pow(1 - prob, trials - successes)
diff --git a/python/factorialln.py b/python/factorialln.py
deleted file mode 100644
index b9151d2..0000000
--- a/python/factorialln.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from scipy.special import gamma, gammaln
-def factorialln(n):
- if n == 1:
- return 0
- elif n == 0:
- return 0
- elif n < 0:
- raise Exception("factorial is not defined for n < 0")
- else:
- return gammaln(n + 1)
diff --git a/python/hwe.py b/python/hwe.py
deleted file mode 100644
index e7e96e5..0000000
--- a/python/hwe.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from dirichlet import multinomial, multinomialln, multinomial_coefficient, multinomial_coefficientln
-import math
-import operator
-def hwe_expectation(genotype, allele_counts):
- """@genotype is counts of A,B,C etc. alleles, e.g. (2,0) is AA and (1,1) is AB
- @allele_counts is the counts of the alleles in the population"""
- population_total_alleles = sum(allele_counts)
- ploidy = sum(genotype)
- genotype_coeff = multinomial_coefficient(ploidy, genotype)
- allele_frequencies = [count / float(population_total_alleles) for count in allele_counts]
- genotype_expected_frequency = genotype_coeff * reduce(operator.mul, [math.pow(freq, p) for freq, p in zip(allele_frequencies, genotype)])
- return genotype_expected_frequency
-def add_tuple(a, b):
- l = []
- for i,j in zip(a,b):
- l.append(i + j)
- return tuple(l)
-# TODO handle NULL case, genotype has frequency 0
-def hwe_sampling_probln(genotype, genotypes, ploidy):
- """@genotype: counts of A,B,C etc. alleles, e.g. (2,0) is AA and (1,1) is AB
- @genotypes: counts of the genotypes in the population,
- e.g. (((2,0),1), ((1,1),2), ((0,2),1)) would be 1xAA, 2xAB, 1xBB
- @return: the probability that there are exactly as many "genotype" in the
- population as suggested by the genotype counts, given HWE"""
- population_total_alleles = sum([sum(g[0]) * g[1] for g in genotypes])
- #print "population_total_alleles", population_total_alleles
- allele_counts = reduce(add_tuple, [[a * g[1] for a in g[0]] for g in genotypes])
- #print "allele_counts", allele_counts
- genotype_counts = [g[1] for g in genotypes]
- #print "genotype_counts", genotype_counts
- population_total_genotypes = sum([gtc[1] for gtc in genotypes])
- #print "total genotypes:", population_total_genotypes
- #print "ploidy", ploidy
- # number of arrangements of the alleles in the sample
- arrangements_of_alleles_in_sample = multinomial_coefficientln(population_total_alleles, allele_counts)
- #print "arrangements_of_alleles_in_sample", math.exp(arrangements_of_alleles_in_sample)
- # number of arrangements which contain exactly count genotypes
- #arrangements_with_exactly_count_genotype = multinomial_coefficientln(population_total_genotypes, genotype_counts)
- #print math.exp(multinomial_coefficientln(ploidy, genotype))
- arrangements_with_exactly_count_genotype = \
- multinomial_coefficientln(ploidy, genotype) \
- + multinomial_coefficientln(population_total_genotypes, genotype_counts)
- #print "arrangements_with_exactly_count_genotype", math.exp(arrangements_with_exactly_count_genotype)
- return arrangements_with_exactly_count_genotype - arrangements_of_alleles_in_sample;
-def inbreeding_coefficient(genotype, genotypes):
- population_total_alleles = sum([sum(g[0]) * g[1] for g in genotypes])
- allele_counts = reduce(add_tuple, [[a * g[1] for a in g[0]] for g in genotypes])
- genotype_counts = [g[1] for g in genotypes]
- population_total_genotypes = sum([gtc[1] for gtc in genotypes])
- expected = hwe_expectation(genotype, allele_counts) * population_total_genotypes
- observed = 0
- for g in genotypes:
- if g[0] == genotype:
- observed = g[1]
- break
- if observed == 0:
- print "error, no observations of genotype, cannot calculate inbreeding coefficient"
- return 0
- return 1 - (observed / expected)
diff --git a/python/logsumexp.py b/python/logsumexp.py
deleted file mode 100644
index 1aadc65..0000000
--- a/python/logsumexp.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import math
-def logsumexp(lnv):
- """Sum exp(item) for item in lnv (log-normal vector) without overflow."""
- n = lnv[0]
- maxAbs = n
- minN = n
- maxN = n
- c = n
- for item in lnv[1:]:
- n = item
- if n > maxN:
- maxN = n
- if abs(n) > maxAbs:
- maxAbs = abs(n)
- if n < minN:
- minN = n
- if maxAbs > maxN:
- c = minN
- else:
- c = maxN
- return c + math.log(sum([math.exp(i - c) for i in lnv]))
diff --git a/python/multiset.py b/python/multiset.py
deleted file mode 100644
index 4a4942b..0000000
--- a/python/multiset.py
+++ /dev/null
@@ -1,140 +0,0 @@
-# -*- coding: utf-8 -*-
-multiset.py -- non-recursive n multichoose k and
- non-recursive multiset permutations
- for python lists
-author: Erik Garrison <erik.garrison at bc.edu>
-last revised: 2010-07-15
-Copyright (c) 2010 by Erik Garrison
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated documentation
-files (the "Software"), to deal in the Software without
-restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the
-Software is furnished to do so, subject to the following
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-def multichoose(k, objects):
- """n multichoose k multisets from the list of objects. n is the size of
- the objects."""
- j,j_1,q = k,k,k # init here for scoping
- r = len(objects) - 1
- a = [0 for i in range(k)] # initial multiset indexes
- while True:
- yield [objects[a[i]] for i in range(0,k)] # emit result
- j = k - 1
- while j >= 0 and a[j] == r: j -= 1
- if j < 0: break # check for end condition
- j_1 = j
- while j_1 <= k - 1:
- a[j_1] = a[j_1] + 1 # increment
- q = j_1
- while q < k - 1:
- a[q+1] = a[q] # shift left
- q += 1
- q += 1
- j_1 = q
-Permutations of a multiset:
-Algorithm 1
-Visits the permutations of multiset E. The permutations are stored
-in a singly-linked list pointed to by head pointer h. Each node in the linked
-list has a value field v and a next field n. The init(E) call creates a
-singly-linked list storing the elements of E in non-increasing order with h, i,
-and j pointing to its first, second-last, and last nodes, respectively. The
-null pointer is given by φ. Note: If E is empty, then init(E) should exit.
-Also, if E contains only one element, then init(E) does not need to provide a
-value for i.
-[h, i, j] ← init(E)
-while j.n ≠ φ orj.v <h.v do
- if j.n ≠ φ and i.v ≥ j.n.v then
- s←j
- else
- s←i
- end if
- t←s.n
- s.n ← t.n
- t.n ← h
- if t.v < h.v then
- i←t
- end if
- j←i.n
- h←t
- visit(h)
-end while
-... from "Loopless Generation of Multiset Permutations using a Constant Number
-of Variables by Prefix Shifts." Aaron Williams, 2009
-class ListElement:
- def __init__(self, value, next):
- self.value = value
- self.next = next
- def nth(self, n):
- o = self
- i = 0
- while i < n and o.next is not None:
- o = o.next
- i += 1
- return o
-def __init(multiset):
- multiset.sort() # ensures proper non-increasing order
- h = ListElement(multiset[0], None)
- for item in multiset[1:]:
- h = ListElement(item, h)
- return h, h.nth(len(multiset) - 2), h.nth(len(multiset) - 1)
-def __visit(h):
- """Converts our bespoke linked list to a python list."""
- o = h
- l = []
- while o is not None:
- l.append(o.value)
- o = o.next
- return l
-def permutations(multiset):
- """Generator providing all multiset permutations of a multiset."""
- h, i, j = __init(multiset)
- yield __visit(h)
- while j.next is not None or j.value < h.value:
- if j.next is not None and i.value >= j.next.value:
- s = j
- else:
- s = i
- t = s.next
- s.next = t.next
- t.next = h
- if t.value < h.value:
- i = t
- j = i.next
- h = t
- yield __visit(h)
diff --git a/python/phred.py b/python/phred.py
deleted file mode 100644
index 354d067..0000000
--- a/python/phred.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import math
-M_LN10 = math.log(10)
-M_LOG10E = math.log10(math.e)
-def phred2ln(qual):
- return M_LN10 * qual * -.1
-def ln2phred(prob):
- return -10 * M_LOG10E * prob
-def phred2float(qual):
- return math.pow(10, qual * -.1)
-def float2phred(prob):
- return min(-10 * math.log10(prob), 99)
diff --git a/scripts/coverage_to_regions.py b/scripts/coverage_to_regions.py
deleted file mode 100755
index e4dff3e..0000000
--- a/scripts/coverage_to_regions.py
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/usr/bin/env python
-import sys
-if len(sys.argv) < 3:
- print "usage: <bamtools_coverage_output ", sys.argv[0], " fasta_index num_regions >regions.bed"
- print "Generates regions with even sequencing coverage, provided an input of coverage per-position as"
- print "generated by bamtools coverage. In other words, generates regions such that the integral of"
- print "coverage is approximately equal for each. These can be used when variant calling to reduce"
- print "variance in job runtime."
- exit(1)
-lengths = {}
-fai = open(sys.argv[1])
-for line in fai.readlines():
- c, l = line.split("\t")[:2]
- lengths[c] = int(l)
-positions = []
-total_coverage = 0
-for line in sys.stdin:
- chrom, pos, depth = line.strip().split("\t")
- pos = int(pos)
- depth = int(depth)
- positions.append([chrom, pos, depth])
- total_coverage += depth
-bp_per_region = total_coverage / int(sys.argv[2])
-lchrom = None
-lpos = 0
-bp_in_region = 0
-for line in positions:
- chrom, pos, depth = line #line.strip().split("\t")
- if lchrom != chrom:
- if lchrom:
- print lchrom+":"+str(lpos)+"-"+str(lengths[lchrom])
- lpos = 0
- lchrom = chrom
- bp_in_region = 0
- else:
- lchrom = chrom
- bp_in_region += depth
- if bp_in_region > bp_per_region:
- print chrom+":"+str(lpos)+"-"+str(pos) #, pos - lpos
- lpos = pos
- bp_in_region = 0
-print lchrom+":"+str(lpos)+"-"+str(lengths[lchrom])
diff --git a/scripts/fasta_generate_regions.py b/scripts/fasta_generate_regions.py
deleted file mode 100755
index 0433410..0000000
--- a/scripts/fasta_generate_regions.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import sys
-if len(sys.argv) == 1:
- print "usage: ", sys.argv[0], " <fasta file or index file> <region size>"
- print "generates a list of freebayes/bamtools region specifiers on stdout"
- print "intended for use in creating cluster jobs"
- exit(1)
-fasta_index_file = sys.argv[1]
-if not fasta_index_file.endswith(".fai"):
- fasta_index_file = fasta_index_file + ".fai"
-fasta_index_file = open(fasta_index_file)
-region_size = int(sys.argv[2])
-for line in fasta_index_file:
- fields = line.strip().split("\t")
- chrom_name = fields[0]
- chrom_length = int(fields[1])
- region_start = 0
- while region_start < chrom_length:
- start = region_start
- end = region_start + region_size
- if end > chrom_length:
- end = chrom_length
- print chrom_name + ":" + str(region_start) + "-" + str(end)
- region_start = end
diff --git a/scripts/freebayes-parallel b/scripts/freebayes-parallel
deleted file mode 100755
index 63bdc62..0000000
--- a/scripts/freebayes-parallel
+++ /dev/null
@@ -1,40 +0,0 @@
-if [ $# -lt 3 ];
- echo "usage: $0 [regions file] [ncpus] [freebayes arguments]"
- echo
- echo "Run freebayes in parallel over regions listed in regions file, using ncpus processors."
- echo "Will merge and sort output, producing a uniform VCF stream on stdout. Flags to freebayes"
- echo "which would write to e.g. a particular file will obviously cause problms, so caution is"
- echo "encouraged when using this script."
- echo
- echo "examples:"
- echo
- echo "Run freebayes in parallel on 100000bp chunks of the ref (fasta_generate_regions.py is also"
- echo "located in the scripts/ directory in the freebayes distribution). Use 36 threads."
- echo
- echo " freebayes-parallel <(fasta_generate_regions.py ref.fa.fai 100000) 36 -f ref.fa aln.bam >out.vcf"
- echo
- echo "Generate regions that are equal in terms of data content, and thus have lower variance"
- echo "in runtime. This will yield better resource utilization."
- echo
- echo " bamtools coverage -in aln.bam | coverage_to_regions.py ref.fa 500 >ref.fa.500.regions"
- echo " freebayes-parallel ref.fa.500.regions 36 -f ref.fa aln.bam >out.vcf"
- echo
- exit
-command=("freebayes" "$@")
-#$command | head -100 | grep "^#" # generate header
-# iterate over regions using gnu parallel to dispatch jobs
-cat "$regionsfile" | parallel -k -j "$ncpus" "${command[@]}" --region {}
-) | vcffirstheader \
- | vcfstreamsort -w 1000 | vcfuniq # remove duplicates at region edges
diff --git a/scripts/generate_freebayes_region_scripts.sh b/scripts/generate_freebayes_region_scripts.sh
deleted file mode 100755
index e9b3762..0000000
--- a/scripts/generate_freebayes_region_scripts.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-if [ $# -ne 4 ]
- echo "usage: $0 [region list] [stub script] [output directory] [script directory]"
- echo "Writes single-region scripts using the list of regions in [region list]."
- echo "The scripts will write their output to [output directory]."
- echo "The stub script has REGION in the place of the region and OUTPUT in the"
- echo "place of the output directory."
- exit
-mkdir -p $scriptdir
-mkdir -p $outputdir
-for region in $(cat $regionfile)
- echo writing script for $region with output to $outputdir
- cat $stub | sed -e "s/REGION/$region/g" | sed -e "s%OUTPUT%$outputdir%g" >$scriptdir/$region.sh
diff --git a/scripts/sam_add_rg.pl b/scripts/sam_add_rg.pl
deleted file mode 100644
index cf5892a..0000000
--- a/scripts/sam_add_rg.pl
+++ /dev/null
@@ -1,36 +0,0 @@
-$argc = @ARGV;
-if ($argc == 0) {
- print "usage: cat aln1.sam | sam_add_rg.pl <read group id> <sample name>\n";
- print "changes all alignments to have RG tags matching the new sample name.\n";
- print "prints SAM format out stdout\n";
- exit(0);
-$ID = $ARGV[0];
-$SM = $ARGV[1];
-$in_header = 1;
-while (<STDIN>) {
- if ($_ =~ /^@/) {
- print $_;
- next;
- } else {
- # add the new RG group to the end of the header
- if ($in_header) {
- print "\@RG\tID:$ID\tSM:$SM\n";
- $in_header = 0;
- }
- if ($_ =~ /\tRG:Z:(.+?)/) {
- $_ =~ s/\tRG:Z:.+?([\t\n])/\tRG:Z:$ID$1/;
- } else {
- $_ =~ s/\n/\tRG:Z:$ID\n/;
- }
- print $_;
- }
diff --git a/scripts/samples.cnv b/scripts/samples.cnv
deleted file mode 100644
index 989089f..0000000
--- a/scripts/samples.cnv
+++ /dev/null
@@ -1,9 +0,0 @@
-chrX -1 -1 NA12878 2 # means NA12878 has copy number 2 in all of chrX
-chrX -1 -1 NA12879 1 # ... copy number 1
-chrX -1 -1 NA13469 2
-chrX -1 -1 NA21328 2
-chrX -1 -1 NA12970 2
-chrY -1 -1 NA12877 1
-chrX -1 -1 NA12877 1
-chr20 20000 30000 NA12877 1 # NA12877 has one copy of chr20:20000..30000
-chr20 20000 30000 NA12879 3 # NA12879 has four copies of chr20:20000..30000
diff --git a/src/.gitignore b/src/.gitignore
deleted file mode 100644
index 51e352a..0000000
--- a/src/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
diff --git a/src/Allele.cpp b/src/Allele.cpp
deleted file mode 100644
index 2c58c79..0000000
--- a/src/Allele.cpp
+++ /dev/null
@@ -1,1534 +0,0 @@
-#include "Allele.h"
-#include "multichoose.h"
-#include "TryCatch.h"
-int Allele::referenceOffset(void) const {
- /*cout << readID << " offset checked " << referencePosition - position << " against position " << position
- << " allele length " << length << " str length " << referenceSequence.size() << " qstr size " << qualityString.size() << endl;
- */
- return *currentReferencePosition - position;
-void Allele::setQuality(void) {
- quality = currentQuality();
- lnquality = phred2ln(quality);
-int Allele::bpLeft(void) {
- return position - alignmentStart;
-int Allele::bpRight(void) {
- return alignmentEnd - (position + referenceLength);
-// called prior to using the allele in analysis
-// called again when haplotype alleles are built, in which case the "currentBase" is set to the alternate sequence of the allele
-void Allele::update(int haplotypeLength) {
- if (haplotypeLength == 1) {
- if (type == ALLELE_REFERENCE) {
- currentBase = string(1, *currentReferenceBase);
- } else {
- currentBase = base();
- }
- } else {
- currentBase = base();
- }
- // should be done after setting currentBase to haplotypeLength
- if (isReference()) setQuality();
- basesLeft = bpLeft();
- basesRight = bpRight();
-// quality of subsequence of allele
-const int Allele::subquality(int startpos, int len) const {
- int start = startpos - position;
- int sum = 0;
- for (int i = start; i < len; ++i) {
- sum += baseQualities.at(i);
- }
- return sum;
-// quality of subsequence of allele
-const long double Allele::lnsubquality(int startpos, int len) const {
- return phred2ln(subquality(startpos, len));
-const int Allele::subquality(const Allele &a) const {
- int sum = 0;
- int rp = a.position - position;
- int l = a.length;
- int L = l;
- int spanstart = 0;
- int spanend = 1;
- //int l = min((int) a.length, (int) baseQualities.size() - start);
- if (a.type == ALLELE_INSERTION) {
- L = l + 2;
- if (L > baseQualities.size()) {
- L = baseQualities.size();
- spanstart = 0;
- } else {
- // set lower bound to 0
- if (rp < (L / 2)) {
- spanstart = 0;
- } else {
- spanstart = rp - (L / 2);
- }
- // set upper bound to the string length
- if (spanstart + L > baseQualities.size()) {
- spanstart = baseQualities.size() - L;
- }
- }
- //string qualstr = baseQualities.substr(spanstart, L);
- spanend = spanstart + L;
- } else if (a.type == ALLELE_DELETION) {
- L = l + 2;
- if (L > baseQualities.size()) {
- L = baseQualities.size();
- spanstart = 0;
- } else {
- // set lower bound to 0
- if (rp < 1) {
- spanstart = 0;
- } else {
- spanstart = rp - 1;
- }
- // set upper bound to the string length
- if (spanstart + L > baseQualities.size()) {
- spanstart = baseQualities.size() - L;
- }
- }
- spanend = spanstart + L;
- } else if (a.type == ALLELE_MNP) {
- L = l;
- if (L > baseQualities.size()) {
- L = baseQualities.size();
- spanstart = 0;
- } else {
- if (rp < 1) {
- spanstart = 0;
- } else {
- spanstart = rp;
- }
- // impossible
- if (spanstart + L > baseQualities.size()) {
- spanstart = baseQualities.size() - L;
- }
- }
- spanend = spanstart + L;
- }
- for (int i = spanstart; i < spanend; ++i) {
- sum += baseQualities.at(i);
- }
- return sum * (l / L);
-const long double Allele::lnsubquality(const Allele& a) const {
- return phred2ln(subquality(a));
-void updateAllelesCachedData(vector<Allele*>& alleles) {
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- (*a)->update();
- }
- const int Allele::basesLeft(void) const {
- if (type == ALLELE_REFERENCE) {
- return bpLeft + referenceOffset();
- } else {
- return bpLeft;
- }
- }
- const int Allele::basesRight(void) const {
- if (type == ALLELE_REFERENCE) {
- return bpRight - referenceOffset();
- } else {
- return bpRight;
- }
- }
-// quality at a given reference position
-const short Allele::currentQuality(void) const {
- //cerr << readID << " " << position << "-" << position + length << " " << alternateSequence.size() << " vs " << baseQualities.size() << endl;
- switch (this->type) {
- // should check a different way... this is wrong
- // it will catch it all the time,
- if (currentBase.size() > 1) {
- return averageQuality(baseQualities);
- } else {
- int off = referenceOffset();
- if (off < 0 || off > baseQualities.size()) {
- return 0;
- } else {
- return baseQualities.at(off);
- }
- }
- break;
- case ALLELE_SNP:
- case ALLELE_MNP:
- return quality;
- break;
- }
-const long double Allele::lncurrentQuality(void) const {
- return phred2ln(currentQuality());
-string Allele::typeStr(void) const {
- string t;
- switch (this->type) {
- t = "genotype";
- break;
- t = "reference";
- break;
- case ALLELE_MNP:
- t = "mnp";
- break;
- case ALLELE_SNP:
- t = "snp";
- break;
- t = "insertion";
- break;
- t = "deletion";
- break;
- t = "complex";
- break;
- t = "null";
- break;
- default:
- t = "unknown";
- break;
- }
- return t;
-bool Allele::isReference(void) const {
- return type == ALLELE_REFERENCE;
-bool Allele::isSNP(void) const {
- return type == ALLELE_SNP;
-bool Allele::isInsertion(void) const {
- return type == ALLELE_INSERTION;
-bool Allele::isDeletion(void) const {
- return type == ALLELE_DELETION;
-bool Allele::isMNP(void) const {
- return type == ALLELE_MNP;
-bool Allele::isComplex(void) const {
- return type == ALLELE_COMPLEX;
-bool Allele::isNull(void) const {
- return type == ALLELE_NULL;
-const string Allele::base(void) const { // the base of this allele
- switch (this->type) {
- if (genotypeAllele)
- return alternateSequence;
- else
- return currentBase;
- break;
- return alternateSequence;
- break;
- /*
- return alternateSequence; // todo fix
- break;
- return "R:" + convert(position) + ":" + cigar + ":" + alternateSequence;
- break;
- */
- case ALLELE_SNP:
- return "S:" + convert(position) + ":" + cigar + ":" + alternateSequence;
- break;
- case ALLELE_MNP:
- return "M:" + convert(position) + ":" + cigar + ":" + alternateSequence;
- break;
- return "I:" + convert(position) + ":" + cigar + ":" + alternateSequence;
- break;
- return "D:" + convert(position) + ":" + cigar;
- break;
- return "C:" + convert(position) + ":" + cigar + ":" + alternateSequence;
- break;
- return "N:" + convert(position) + ":" + alternateSequence;
- default:
- break;
- }
-string stringForAllele(const Allele &allele) {
- stringstream out;
- if (!allele.genotypeAllele) {
- out.precision(1);
- out
- << allele.sampleID << ":"
- << allele.readID << ":"
- << allele.typeStr() << ":"
- << allele.cigar << ":"
- << scientific << fixed << allele.position << ":"
- << allele.length << ":"
- << (allele.strand == STRAND_FORWARD ? "+" : "-") << ":"
- << allele.referenceSequence << ":"
- << allele.alternateSequence << ":"
- << allele.quality << ":"
- << allele.basesLeft << ":"
- << allele.basesRight;
- } else {
- out << allele.typeStr() << ":"
- << allele.cigar << ":"
- << scientific << fixed << allele.position << ":"
- << allele.length << ":"
- << allele.alternateSequence;
- }
- return out.str();
-string stringForAlleles(vector<Allele> &alleles) {
- stringstream out;
- for (vector<Allele>::iterator allele = alleles.begin(); allele != alleles.end(); allele++) {
- out << stringForAllele(*allele) << endl;
- }
- return out.str();
-string json(vector<Allele*> &alleles) {
- stringstream out;
- vector<Allele*>::iterator a = alleles.begin();
- out << "[" << (*a)->json(); ++a;
- for (; a != alleles.end(); ++a)
- out << "," << (*a)->json();
- out << "]";
- return out.str();
-string json(Allele*& allele) { return allele->json(); }
-string Allele::json(void) {
- stringstream out;
- if (!genotypeAllele) {
- out << "{\"id\":\"" << readID << "\""
- << ",\"type\":\"" << typeStr() << "\""
- << ",\"length\":" << ((type == ALLELE_REFERENCE) ? 1 : length)
- << ",\"position\":" << position
- << ",\"strand\":\"" << (strand == STRAND_FORWARD ? "+" : "-") << "\"";
- if (type == ALLELE_REFERENCE ) {
- out << ",\"base\":\"" << alternateSequence.at(referenceOffset()) << "\""
- //<< ",\"reference\":\"" << allele.referenceSequence.at(referenceOffset) << "\""
- << ",\"quality\":" << currentQuality();
- } else {
- out << ",\"base\":\"" << alternateSequence << "\""
- //<< ",\"reference\":\"" << allele.referenceSequence << "\""
- << ",\"quality\":" << quality;
- }
- out << "}";
- } else {
- out << "{\"type\":\"" << typeStr() << "\"";
- switch (type) {
- out << "}";
- break;
- default:
- out << "\",\"length\":" << length
- << ",\"alt\":\"" << alternateSequence << "\"}";
- break;
- }
- }
- return out.str();
-ostream &operator<<(ostream &out, vector<Allele*> &alleles) {
- vector<Allele*>::iterator a = alleles.begin();
- out << **a++;
- while (a != alleles.end())
- out << "|" << **a++;
- return out;
-ostream &operator<<(ostream &out, vector<Allele> &alleles) {
- vector<Allele>::iterator a = alleles.begin();
- out << *a++;
- while (a != alleles.end())
- out << "|" << *a++;
- return out;
-ostream &operator<<(ostream &out, list<Allele*> &alleles) {
- list<Allele*>::iterator a = alleles.begin();
- out << **a++;
- while (a != alleles.end())
- out << "|" << **a++;
- return out;
-ostream &operator<<(ostream &out, Allele* &allele) {
- out << *allele;
- return out;
-ostream &operator<<(ostream &out, Allele &allele) {
- if (!allele.genotypeAllele) {
- int prec = out.precision();
- // << &allele << ":"
- out.precision(1);
- out << allele.sampleID
- << ":" << allele.readID
- << ":" << allele.typeStr()
- << ":" << allele.length
- << ":" << allele.referenceLength
- << ":" << scientific << fixed << allele.position
- << ":" << (allele.strand == STRAND_FORWARD ? "+" : "-")
- << ":" << allele.alternateSequence
- //<< ":" << allele.referenceSequence
- << ":" << allele.repeatRightBoundary
- << ":" << allele.cigar
- << ":" << allele.lnmapQuality
- << ":" << allele.lnquality;
- out.precision(prec);
- } else {
- out << allele.typeStr()
- << ":" << allele.cigar
- << ":" << scientific << fixed << allele.position
- << ":" << allele.length
- << ":" << (string) allele.alternateSequence;
- }
- out.precision(5);
- return out;
-bool operator<(const Allele &a, const Allele &b) {
- //cerr << "allele<" << endl;
- return a.currentBase < b.currentBase;
-// TODO fixme??
-// alleles are equal if they represent the same reference-relative variation or
-// sequence, which we encode as a string and compare here
-bool operator==(const Allele &a, const Allele &b) {
- //cerr << "allele==" << endl;
- return a.currentBase == b.currentBase;
-bool operator!=(const Allele& a, const Allele& b) {
- return ! (a == b);
-bool Allele::equivalent(Allele &b) {
- if (type != b.type) {
- return false;
- } else {
- switch (type) {
- // reference alleles are, by definition, always equivalent
- return true;
- break;
- case ALLELE_SNP:
- case ALLELE_MNP:
- if (alternateSequence == b.alternateSequence)
- return true;
- break;
- if (length == b.length)
- return true;
- break;
- if (length == b.length
- && alternateSequence == b.alternateSequence)
- return true;
- break;
- if (length == b.length
- && alternateSequence == b.alternateSequence
- && cigar == b.cigar)
- return true;
- break;
- return alternateSequence == b.alternateSequence;
- default:
- break;
- }
- }
- return false;
-bool areHomozygous(vector<Allele*>& alleles) {
- Allele* prev = alleles.front();
- for (vector<Allele*>::iterator allele = alleles.begin() + 1; allele != alleles.end(); ++allele) {
- if (**allele != *prev) {
- return false;
- }
- }
- return true;
-// counts alleles which satisfy operator==
-map<Allele, int> countAlleles(vector<Allele*>& alleles) {
- map<Allele, int> counts;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele& allele = **a;
- map<Allele, int>::iterator f = counts.find(allele);
- if (f == counts.end()) {
- counts[allele] = 1;
- } else {
- counts[allele] += 1;
- }
- }
- return counts;
-// counts alleles which satisfy operator==
-map<string, int> countAllelesString(vector<Allele*>& alleles) {
- map<string, int> counts;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele& thisAllele = **a;
- const string& allele = thisAllele.currentBase;
- map<string, int>::iterator f = counts.find(allele);
- if (f == counts.end()) {
- counts[allele] = 1;
- } else {
- counts[allele] += 1;
- }
- }
- return counts;
-map<string, int> countAllelesString(vector<Allele>& alleles) {
- map<string, int> counts;
- for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele& thisAllele = *a;
- const string& allele = thisAllele.currentBase;
- map<string, int>::iterator f = counts.find(allele);
- if (f == counts.end()) {
- counts[allele] = 1;
- } else {
- counts[allele] += 1;
- }
- }
- return counts;
-map<Allele, int> countAlleles(vector<Allele>& alleles) {
- map<Allele, int> counts;
- for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele& allele = *a;
- map<Allele, int>::iterator f = counts.find(allele);
- if (f == counts.end()) {
- counts[allele] = 1;
- } else {
- counts[allele] += 1;
- }
- }
- return counts;
-map<Allele, int> countAlleles(list<Allele*>& alleles) {
- map<Allele, int> counts;
- for (list<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele& allele = **a;
- map<Allele, int>::iterator f = counts.find(allele);
- if (f == counts.end()) {
- counts[allele] = 1;
- } else {
- counts[allele] += 1;
- }
- }
- return counts;
-map<string, vector<Allele*> > groupAllelesBySample(list<Allele*>& alleles) {
- map<string, vector<Allele*> > groups;
- for (list<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele*& allele = *a;
- groups[allele->sampleID].push_back(allele);
- }
- return groups;
-vector<Allele> uniqueAlleles(list<Allele*>& alleles) {
- vector<Allele> uniques;
- map<Allele, int> counts = countAlleles(alleles);
- for (map<Allele, int>::iterator c = counts.begin(); c != counts.end(); ++c) {
- uniques.push_back(c->first);
- }
- return uniques;
-void groupAllelesBySample(list<Allele*>& alleles, map<string, vector<Allele*> >& groups) {
- for (list<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele*& allele = *a;
- groups[allele->sampleID].push_back(allele);
- }
-// in haplotype calling, if alleles have the same alternate sequence, the
-// should have the same cigar and position. this function picks the most
-// common allele observation per alternate sequence and homogenizes the rest to
-// the same if they are not reference alleles
-void homogenizeAlleles(map<string, vector<Allele*> >& alleleGroups, string& refseq, Allele& refallele) {
- map<string, map<string, int> > equivs;
- map<string, Allele*> homogenizeTo;
- // find equivalencies between alleles
- // base equivalency is self
- for (map<string, vector<Allele*> >::iterator g = alleleGroups.begin(); g != alleleGroups.end(); ++g) {
- Allele& allele = *g->second.front();
- if (allele.isReference()) {
- continue;
- }
- equivs[allele.alternateSequence][g->first]++;
- }
- //
- for (map<string, map<string, int> >::iterator e = equivs.begin(); e != equivs.end(); ++e) {
- string altseq = e->first;
- map<string, int>& group = e->second;
- map<int, string> ordered;
- for (map<string, int>::iterator f = group.begin(); f != group.end(); ++f) {
- // pick the best by count
- ordered[f->second] = f->first;
- }
- // choose the most common group
- string& altbase = ordered.rbegin()->second;
- if (altseq == refseq) {
- homogenizeTo[altseq] = &refallele;
- } else {
- homogenizeTo[altseq] = alleleGroups[altbase].front();
- }
- }
- for (map<string, vector<Allele*> >::iterator g = alleleGroups.begin(); g != alleleGroups.end(); ++g) {
- vector<Allele*>& alleles = g->second;
- if (alleles.front()->isReference()) {
- continue;
- }
- string& altseq = alleles.front()->alternateSequence;
- Allele* toallele = homogenizeTo[altseq];
- string& cigar = toallele->cigar;
- AlleleType type = toallele->type;
- long int position = toallele->position;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- (*a)->cigar = cigar;
- (*a)->type = type;
- (*a)->position = position;
- (*a)->update();
- }
- }
-void resetProcessedFlag(map<string, vector<Allele*> >& alleleGroups) {
- for (map<string, vector<Allele*> >::iterator g = alleleGroups.begin(); g != alleleGroups.end(); ++g) {
- for (vector<Allele*>::iterator a = g->second.begin(); a != g->second.end(); ++a) {
- (*a)->processed = false;
- }
- }
-void groupAlleles(map<string, vector<Allele*> >& sampleGroups, map<string, vector<Allele*> >& alleleGroups) {
- for (map<string, vector<Allele*> >::iterator sample = sampleGroups.begin(); sample != sampleGroups.end(); ++sample) {
- for (vector<Allele*>::iterator allele = sample->second.begin(); allele != sample->second.end(); ++allele) {
- alleleGroups[(*allele)->currentBase].push_back(*allele);
- }
- }
-vector<vector<Allele*> > groupAlleles(map<string, vector<Allele*> > &sampleGroups, bool (*fncompare)(Allele &a, Allele &b)) {
- vector<vector<Allele*> > groups;
- for (map<string, vector<Allele*> >::iterator sg = sampleGroups.begin(); sg != sampleGroups.end(); ++sg) {
- vector<Allele*>& alleles = sg->second;
- for (vector<Allele*>::iterator oa = alleles.begin(); oa != alleles.end(); ++oa) {
- bool unique = true;
- for (vector<vector<Allele*> >::iterator ag = groups.begin(); ag != groups.end(); ++ag) {
- if ((*fncompare)(**oa, *ag->front())) {
- ag->push_back(*oa);
- unique = false;
- break;
- }
- }
- if (unique) {
- vector<Allele*> trueAlleleGroup;
- trueAlleleGroup.push_back(*oa);
- groups.push_back(trueAlleleGroup);
- }
- }
- }
- return groups;
-vector<vector<Allele*> > groupAlleles(list<Allele*> &alleles, bool (*fncompare)(Allele* &a, Allele* &b)) {
- vector<vector<Allele*> > groups;
- for (list<Allele*>::iterator oa = alleles.begin(); oa != alleles.end(); ++oa) {
- bool unique = true;
- for (vector<vector<Allele*> >::iterator ag = groups.begin(); ag != groups.end(); ++ag) {
- if ((*fncompare)(*oa, ag->front())) {
- ag->push_back(*oa);
- unique = false;
- break;
- }
- }
- if (unique) {
- vector<Allele*> trueAlleleGroup;
- trueAlleleGroup.push_back(*oa);
- groups.push_back(trueAlleleGroup);
- }
- }
- return groups;
-vector<vector<Allele*> > groupAlleles(list<Allele> &alleles, bool (*fncompare)(Allele &a, Allele &b)) {
- vector<vector<Allele*> > groups;
- for (list<Allele>::iterator oa = alleles.begin(); oa != alleles.end(); ++oa) {
- bool unique = true;
- for (vector<vector<Allele*> >::iterator ag = groups.begin(); ag != groups.end(); ++ag) {
- if ((*fncompare)(*oa, *ag->front())) {
- ag->push_back(&*oa);
- unique = false;
- break;
- }
- }
- if (unique) {
- vector<Allele*> trueAlleleGroup;
- trueAlleleGroup.push_back(&*oa);
- groups.push_back(trueAlleleGroup);
- }
- }
- return groups;
-vector<vector<Allele*> > groupAlleles(vector<Allele*> &alleles, bool (*fncompare)(Allele &a, Allele &b)) {
- vector<vector<Allele*> > groups;
- for (vector<Allele*>::iterator oa = alleles.begin(); oa != alleles.end(); ++oa) {
- bool unique = true;
- for (vector<vector<Allele*> >::iterator ag = groups.begin(); ag != groups.end(); ++ag) {
- if ((*fncompare)(**oa, *ag->front())) {
- ag->push_back(*oa);
- unique = false;
- break;
- }
- }
- if (unique) {
- vector<Allele*> trueAlleleGroup;
- trueAlleleGroup.push_back(*oa);
- groups.push_back(trueAlleleGroup);
- }
- }
- return groups;
-vector<vector<Allele*> > groupAlleles(vector<Allele> &alleles, bool (*fncompare)(Allele &a, Allele &b)) {
- vector<vector<Allele*> > groups;
- for (vector<Allele>::iterator oa = alleles.begin(); oa != alleles.end(); ++oa) {
- bool unique = true;
- for (vector<vector<Allele*> >::iterator ag = groups.begin(); ag != groups.end(); ++ag) {
- if ((*fncompare)(*oa, *ag->front())) {
- ag->push_back(&*oa);
- unique = false;
- break;
- }
- }
- if (unique) {
- vector<Allele*> trueAlleleGroup;
- trueAlleleGroup.push_back(&*oa);
- groups.push_back(trueAlleleGroup);
- }
- }
- return groups;
-vector<vector<Allele> > groupAlleles_copy(list<Allele> &alleles, bool (*fncompare)(Allele &a, Allele &b)) {
- vector<vector<Allele> > groups;
- for (list<Allele>::iterator oa = alleles.begin(); oa != alleles.end(); ++oa) {
- bool unique = true;
- for (vector<vector<Allele> >::iterator ag = groups.begin(); ag != groups.end(); ++ag) {
- if ((*fncompare)(*oa, ag->front())) {
- ag->push_back(*oa);
- unique = false;
- break;
- }
- }
- if (unique) {
- vector<Allele> trueAlleleGroup;
- trueAlleleGroup.push_back(*oa);
- groups.push_back(trueAlleleGroup);
- }
- }
- return groups;
-vector<vector<Allele> > groupAlleles_copy(vector<Allele> &alleles, bool (*fncompare)(Allele &a, Allele &b)) {
- vector<vector<Allele> > groups;
- for (vector<Allele>::iterator oa = alleles.begin(); oa != alleles.end(); ++oa) {
- bool unique = true;
- for (vector<vector<Allele> >::iterator ag = groups.begin(); ag != groups.end(); ++ag) {
- if ((*fncompare)(*oa, ag->front())) {
- ag->push_back(*oa);
- unique = false;
- break;
- }
- }
- if (unique) {
- vector<Allele> trueAlleleGroup;
- trueAlleleGroup.push_back(*oa);
- groups.push_back(trueAlleleGroup);
- }
- }
- return groups;
-vector<vector<Allele> > groupAlleles_copy(vector<Allele> &alleles) {
- vector<vector<Allele> > groups;
- for (vector<Allele>::iterator oa = alleles.begin(); oa != alleles.end(); ++oa) {
- bool unique = true;
- for (vector<vector<Allele> >::iterator ag = groups.begin(); ag != groups.end(); ++ag) {
- if (*oa == ag->front()) {
- ag->push_back(*oa);
- unique = false;
- break;
- }
- }
- if (unique) {
- vector<Allele> trueAlleleGroup;
- trueAlleleGroup.push_back(*oa);
- groups.push_back(trueAlleleGroup);
- }
- }
- return groups;
-bool Allele::sameSample(Allele &other) { return this->sampleID == other.sampleID; }
-bool allelesSameType(Allele* &a, Allele* &b) { return a->type == b->type; }
-bool allelesEquivalent(Allele* &a, Allele* &b) { return a->equivalent(*b); }
-bool allelesSameSample(Allele* &a, Allele* &b) { return a->sampleID == b->sampleID; }
-bool allelesSameType(Allele &a, Allele &b) { return a.type == b.type; }
-bool allelesEquivalent(Allele &a, Allele &b) { return a.equivalent(b); }
-bool allelesSameSample(Allele &a, Allele &b) { return a.sampleID == b.sampleID; }
-bool allelesEqual(Allele &a, Allele &b) { return a == b; }
-vector<Allele> genotypeAllelesFromAlleleGroups(vector<vector<Allele*> > &groups) {
- vector<Allele> results;
- for (vector<vector<Allele*> >::iterator g = groups.begin(); g != groups.end(); ++g)
- results.push_back(genotypeAllele(*g->front()));
- return results;
-vector<Allele> genotypeAllelesFromAlleles(vector<Allele*> &alleles) {
- vector<Allele> results;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a)
- results.push_back(genotypeAllele(**a));
- return results;
-vector<Allele> genotypeAllelesFromAlleleGroups(vector<vector<Allele> > &groups) {
- vector<Allele> results;
- for (vector<vector<Allele> >::iterator g = groups.begin(); g != groups.end(); ++g)
- results.push_back(genotypeAllele(g->front()));
- return results;
-vector<Allele> genotypeAllelesFromAlleles(vector<Allele> &alleles) {
- vector<Allele> results;
- for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a)
- results.push_back(genotypeAllele(*a));
- return results;
-Allele genotypeAllele(Allele &a) {
- return Allele(a.type, a.alternateSequence, a.length, a.referenceLength, a.cigar, a.position, a.repeatRightBoundary);
-Allele genotypeAllele(AlleleType type, string alt, unsigned int len, string cigar, unsigned int reflen, long int pos, long int rrbound) {
- return Allele(type, alt, len, reflen, cigar, pos, rrbound);
-int allowedAlleleTypes(vector<AlleleType>& allowedEnumeratedTypes) {
- int allowedTypes = 0;// (numberOfPossibleAlleleTypes, false);
- for (vector<AlleleType>::iterator t = allowedEnumeratedTypes.begin(); t != allowedEnumeratedTypes.end(); ++t) {
- allowedTypes |= *t;
- }
- return allowedTypes;
-void filterAlleles(list<Allele*>& alleles, int allowedTypes) {
- for (list<Allele*>::iterator allele = alleles.begin(); allele != alleles.end(); ++allele) {
- bool allowed = false;
- if (!(allowedTypes & (*allele)->type))
- *allele = NULL;
- }
- alleles.erase(remove(alleles.begin(), alleles.end(), (Allele*)NULL), alleles.end());
-int countAlleles(map<string, vector<Allele*> >& sampleGroups) {
- int count = 0;
- for (map<string, vector<Allele*> >::iterator sg = sampleGroups.begin(); sg != sampleGroups.end(); ++sg) {
- count += sg->second.size();
- }
- return count;
-int countAllelesWithBase(vector<Allele*>& alleles, string base) {
- int count = 0;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- if ((*a)->currentBase == base)
- ++count;
- }
- return count;
-int baseCount(vector<Allele*>& alleles, string base, AlleleStrand strand) {
- int count = 0;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- if ((*a)->currentBase == base && (*a)->strand == strand)
- ++count;
- }
- return count;
-pair<pair<int, int>, pair<int, int> >
-baseCount(vector<Allele*>& alleles, string refbase, string altbase) {
- int forwardRef = 0;
- int reverseRef = 0;
- int forwardAlt = 0;
- int reverseAlt = 0;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- string base = (*a)->currentBase;
- AlleleStrand strand = (*a)->strand;
- if (base == refbase) {
- if (strand == STRAND_FORWARD)
- ++forwardRef;
- else if (strand == STRAND_REVERSE)
- ++reverseRef;
- } else if (base == altbase) {
- if (strand == STRAND_FORWARD)
- ++forwardAlt;
- else if (strand == STRAND_REVERSE)
- ++reverseAlt;
- }
- }
- return make_pair(make_pair(forwardRef, forwardAlt), make_pair(reverseRef, reverseAlt));
-string Allele::readSeq(void) {
- string r;
- for (vector<Allele>::iterator a = alignmentAlleles->begin(); a != alignmentAlleles->end(); ++a) {
- r.append(a->alternateSequence);
- }
- return r;
-string Allele::read5p(void) {
- string r;
- vector<Allele>::const_reverse_iterator a = alignmentAlleles->rbegin();
- while (&*a != this) {
- ++a;
- }
- if ((a+1) != alignmentAlleles->rend()) ++a;
- while (a != alignmentAlleles->rend()) {
- r = a->alternateSequence + r;
- ++a;
- }
- r.append(alternateSequence);
- return r;
-string Allele::read3p(void) {
- string r = alternateSequence;
- vector<Allele>::const_iterator a = alignmentAlleles->begin();
- while (&*a != this) {
- ++a;
- }
- if ((a+1) != alignmentAlleles->end()) ++a;
- while (a != alignmentAlleles->end()) {
- r.append(a->alternateSequence);
- ++a;
- }
- return r;
-string Allele::read5pNonNull(void) {
- string r = alternateSequence;
- vector<Allele>::const_reverse_iterator a = alignmentAlleles->rbegin();
- while (&*a != this) {
- ++a;
- }
- while (a != alignmentAlleles->rend() && !a->isNull()) {
- if (&*a != this) {
- r = a->alternateSequence + r;
- }
- ++a;
- }
- return r;
-string Allele::read3pNonNull(void) {
- string r = alternateSequence;
- vector<Allele>::const_iterator a = alignmentAlleles->begin();
- while (&*a != this) {
- ++a;
- }
- while (a != alignmentAlleles->end() && !a->isNull()) {
- if (&*a != this) {
- r.append(a->alternateSequence);
- }
- ++a;
- }
- return r;
-int Allele::read5pNonNullBases(void) {
- int bp = 0;
- vector<Allele>::const_reverse_iterator a = alignmentAlleles->rbegin();
- while (&*a != this) {
- ++a;
- }
- while (a != alignmentAlleles->rend() && !a->isNull()) {
- if (&*a != this) {
- //cerr << "5p bp = " << bp << " adding " << stringForAllele(*a) << " to " << stringForAllele(*this) << endl;
- bp += a->alternateSequence.size();
- }
- ++a;
- }
- return bp;
-int Allele::read3pNonNullBases(void) {
- int bp = 0;
- vector<Allele>::const_iterator a = alignmentAlleles->begin();
- while (&*a != this) {
- ++a;
- }
- while (a != alignmentAlleles->end() && !a->isNull()) {
- if (&*a != this) {
- //cerr << "3p bp = " << bp << " adding " << stringForAllele(*a) << " to " << stringForAllele(*this) << endl;
- bp += a->alternateSequence.size();
- }
- ++a;
- }
- return bp;
-// adjusts the allele to have a new start
-// returns the ref/alt sequence obtained by subtracting length from the left end of the allele
-void Allele::subtract(
- int subtractFromRefStart,
- int subtractFromRefEnd,
- string& substart,
- string& subend,
- vector<pair<int, string> >& cigarStart,
- vector<pair<int, string> >& cigarEnd,
- vector<short>& qsubstart,
- vector<short>& qsubend
- ) {
- substart.clear();
- subend.clear();
- cigarStart.clear();
- cigarEnd.clear();
- qsubstart.clear();
- qsubend.clear();
- // prepare to adjust cigar
- list<pair<int, string> > cigarL = splitCigarList(cigar);
- // walk the cigar string to determine where to make the left cut in the alternate sequence
- int subtractFromAltStart = 0;
- if (subtractFromRefStart) {
- int refbpstart = subtractFromRefStart;
- pair<int, string> c;
- while (!cigarL.empty()) {
- c = cigarL.front();
- cigarL.pop_front();
- char op = c.second[0];
- switch (op) {
- case 'M':
- case 'X':
- case 'N':
- refbpstart -= c.first;
- subtractFromAltStart += c.first;
- break;
- case 'I':
- subtractFromAltStart += c.first;
- break;
- case 'D':
- refbpstart -= c.first;
- break;
- default:
- break;
- }
- cigarStart.push_back(c);
- if (refbpstart < 0) {
- // split/adjust the last cigar element
- cigarL.push_front(c);
- cigarL.front().first = -refbpstart;
- cigarStart.back().first += refbpstart;
- switch (op) {
- case 'M':
- case 'X':
- case 'N':
- case 'I':
- subtractFromAltStart += refbpstart;
- break;
- case 'D':
- default:
- break;
- }
- break; // we're done
- }
- }
- }
- int subtractFromAltEnd = 0;
- // walk the cigar string to determine where to make the right cut in the alternate sequence
- if (subtractFromRefEnd) {
- int refbpend = subtractFromRefEnd;
- pair<int, string> c;
- while (!cigarL.empty() && refbpend > 0) {
- c = cigarL.back();
- cigarL.pop_back();
- char op = c.second[0];
- switch (op) {
- case 'M':
- case 'X':
- case 'N':
- subtractFromAltEnd += c.first;
- refbpend -= c.first;
- break;
- case 'I':
- subtractFromAltEnd += c.first;
- break;
- case 'D':
- refbpend -= c.first;
- break;
- default:
- break;
- }
- cigarEnd.insert(cigarEnd.begin(), c);
- if (refbpend < 0) {
- // split/adjust the last cigar element
- cigarL.push_back(c);
- cigarL.back().first = -refbpend;
- cigarEnd.front().first += refbpend;
- switch (op) {
- case 'M':
- case 'X':
- case 'I':
- case 'N':
- subtractFromAltEnd += refbpend;
- break;
- case 'D':
- default:
- break;
- }
- break; // drop out of loop, we're done
- }
- }
- }
- // adjust the alternateSequence
- substart = alternateSequence.substr(0, subtractFromAltStart);
- subend = alternateSequence.substr(alternateSequence.size() - subtractFromAltEnd, subtractFromAltEnd);
- alternateSequence.erase(0, subtractFromAltStart);
- alternateSequence.erase(alternateSequence.size() - subtractFromAltEnd, subtractFromAltEnd);
- // adjust the quality string
- qsubstart.insert(qsubstart.begin(), baseQualities.begin(), baseQualities.begin() + subtractFromAltStart);
- qsubend.insert(qsubend.begin(), baseQualities.begin() + baseQualities.size() - subtractFromAltEnd, baseQualities.end());
- baseQualities.erase(baseQualities.begin(), baseQualities.begin() + subtractFromAltStart);
- baseQualities.erase(baseQualities.begin() + baseQualities.size() - subtractFromAltEnd, baseQualities.end());
- // reset the cigar
- cigarL.erase(remove_if(cigarL.begin(), cigarL.end(), isEmptyCigarElement), cigarL.end());
- cigar = joinCigarList(cigarL);
- // reset the length
- length = alternateSequence.size();
- // update the type specification
- updateTypeAndLengthFromCigar();
- // adjust the position
- position += subtractFromRefStart; // assumes the first-base of the alleles is reference==, not ins
- //referenceLength -= subtractFromRefStart;
- //referenceLength -= subtractFromRefEnd;
- referenceLength = referenceLengthFromCigar();
-void Allele::subtractFromStart(int bp, string& seq, vector<pair<int, string> >& cig, vector<short>& quals) {
- string emptystr;
- vector<pair<int, string> > emptycigar;
- vector<short> emptyquals;
- subtract(bp, 0, seq, emptystr, cig, emptycigar, quals, emptyquals);
-void Allele::subtractFromEnd(int bp, string& seq, vector<pair<int, string> >& cig, vector<short>& quals) {
- string emptystr;
- vector<pair<int, string> > emptycigar;
- vector<short> emptyquals;
- subtract(0, bp, emptystr, seq, emptycigar, cig, emptyquals, quals);
-void Allele::addToStart(string& seq, vector<pair<int, string> >& cig, vector<short>& quals) {
- string emptystr;
- vector<pair<int, string> > emptycigar;
- vector<short> emptyquals;
- add(seq, emptystr, cig, emptycigar, quals, emptyquals);
-void Allele::addToEnd(string& seq, vector<pair<int, string> >& cig, vector<short>& quals) {
- string emptystr;
- vector<pair<int, string> > emptycigar;
- vector<short> emptyquals;
- add(emptystr, seq, emptycigar, cig, emptyquals, quals);
-void Allele::add(
- string& addToStart,
- string& addToEnd,
- vector<pair<int, string> >& cigarStart,
- vector<pair<int, string> >& cigarEnd,
- vector<short>& qaddToStart,
- vector<short>& qaddToEnd
- ) {
- // adjust the position
- for (vector<pair<int, string> >::iterator c = cigarStart.begin(); c != cigarStart.end(); ++c) {
- switch (c->second[0]) {
- case 'M':
- case 'X':
- case 'D':
- case 'N':
- position -= c->first;
- break;
- case 'I':
- default:
- break;
- }
- }
- // prepare to adjust cigar
- vector<pair<int, string> > cigarV = splitCigar(cigar);
- // adjust the cigar
- if (!cigarStart.empty()) {
- if (cigarStart.back().second == cigarV.front().second) {
- // merge
- cigarV.front().first += cigarStart.back().first;
- cigarStart.pop_back();
- }
- }
- cigarV.insert(cigarV.begin(), cigarStart.begin(), cigarStart.end());
- if (!cigarEnd.empty()) {
- if (cigarEnd.front().second == cigarV.back().second) {
- // merge
- cigarV.back().first += cigarEnd.front().first;
- cigarEnd.pop_back();
- } else {
- cigarV.insert(cigarV.end(), cigarEnd.begin(), cigarEnd.end());
- }
- }
- // adjust the alternateSequence
- alternateSequence.insert(0, addToStart);
- alternateSequence.append(addToEnd);
- // adjust the quality string
- baseQualities.insert(baseQualities.begin(), qaddToStart.begin(), qaddToStart.end());
- baseQualities.insert(baseQualities.end(), qaddToEnd.begin(), qaddToEnd.end());
- // reset the cigar
- cigarV.erase(remove_if(cigarV.begin(), cigarV.end(), isEmptyCigarElement), cigarV.end());
- cigar = joinCigar(cigarV);
- updateTypeAndLengthFromCigar();
- // reset referenceLength
- referenceLength = referenceLengthFromCigar();
-void Allele::updateTypeAndLengthFromCigar(void) {
- vector<pair<int, string> > cigarV = splitCigar(cigar);
- map<char, int> cigarTypes;
- map<char, int> cigarLengths;
- for (vector<pair<int, string> >::iterator c = cigarV.begin(); c != cigarV.end(); ++c) {
- ++cigarTypes[c->second[0]];
- cigarLengths[c->second[0]] += c->first;
- }
- if (cigarTypes.size() == 1) {
- switch (cigarTypes.begin()->first) {
- case 'M':
- break;
- case 'I':
- break;
- case 'D':
- break;
- case 'X':
- if (cigarLengths['X'] > 1) {
- type = ALLELE_MNP;
- } else {
- type = ALLELE_SNP;
- }
- break;
- case 'N':
- type = ALLELE_NULL;
- break;
- default:
- break;
- }
- } else if (cigarTypes.size() == 2) {
- if (cigarTypes['M'] > 0) {
- if (cigarTypes['I'] == 1) {
- } else if (cigarTypes['D'] == 1) {
- } else if (cigarTypes['X'] == 1) {
- if (cigarLengths['X'] > 1) {
- type = ALLELE_MNP;
- } else {
- type = ALLELE_SNP;
- }
- } else {
- }
- } else {
- }
- } else {
- }
- // recalculate allele length and quality, based on type
- switch (type) {
- length = alternateSequence.size();
- break;
- case ALLELE_SNP:
- case ALLELE_MNP:
- length = cigarLengths['X'];
- break;
- length = cigarLengths['I'];
- break;
- length = cigarLengths['D'];
- break;
- length = alternateSequence.size();
- break;
- length = alternateSequence.size();
- break;
- default:
- break;
- }
-int referenceLengthFromCigar(string& cigar) {
- int r = 0;
- vector<pair<int, string> > cigarV = splitCigar(cigar);
- for (vector<pair<int, string> >::iterator c = cigarV.begin(); c != cigarV.end(); ++c) {
- switch (c->second[0]) {
- case 'M':
- case 'X':
- case 'D':
- case 'N':
- r += c->first;
- break;
- case 'I':
- default:
- break;
- }
- }
- return r;
-int Allele::referenceLengthFromCigar(void) {
- int r = 0;
- vector<pair<int, string> > cigarV = splitCigar(cigar);
- for (vector<pair<int, string> >::iterator c = cigarV.begin(); c != cigarV.end(); ++c) {
- switch (c->second[0]) {
- case 'M':
- case 'X':
- case 'D':
- case 'N':
- r += c->first;
- break;
- case 'I':
- default:
- break;
- }
- }
- return r;
-// combines the two alleles into a complex variant, updates important data
-void Allele::mergeAllele(const Allele& newAllele, AlleleType newType) {
- //cout << stringForAllele(*this) << endl << stringForAllele(newAllele) << endl;
- type = newType;
- alternateSequence += newAllele.alternateSequence;
- length += newAllele.length; // hmmm
- basesRight = newAllele.basesRight;
- baseQualities.insert(baseQualities.end(), newAllele.baseQualities.begin(), newAllele.baseQualities.end());
- currentBase = base();
- // XXX note that we don't add Q values for intermingled gaps in combined alleles
- if (newAllele.type != ALLELE_REFERENCE) {
- quality = min(newAllele.quality, quality);
- lnquality = max(newAllele.lnquality, lnquality);
- //quality = minQuality(baseQualities);
- //lnquality = log(quality);
- } else {
- quality = averageQuality(baseQualities);
- lnquality = log(quality);
- basesRight += newAllele.referenceLength;
- }
- if (newAllele.type != ALLELE_REFERENCE) {
- repeatRightBoundary = newAllele.repeatRightBoundary;
- }
- cigar = mergeCigar(cigar, newAllele.cigar);
- referenceLength = referenceLengthFromCigar();
- //cout << stringForAllele(*this) << endl << endl;
-void Allele::squash(void) {
- // will trigger destruction of this allele in the AlleleParser
- length = 0;
- position = 0;
-unsigned int Allele::getLengthOnReference(void) {
- return referenceLengthFromCigar();
-vector<Allele> alleleUnion(vector<Allele>& a1, vector<Allele>& a2) {
- map<string, Allele> alleleSet;
- vector<Allele> results;
- for (vector<Allele>::iterator a = a1.begin(); a != a1.end(); ++a) {
- alleleSet.insert(make_pair(a->base(), *a));
- }
- for (vector<Allele>::iterator a = a2.begin(); a != a2.end(); ++a) {
- alleleSet.insert(make_pair(a->base(), *a));
- }
- for (map<string, Allele>::iterator a = alleleSet.begin(); a != alleleSet.end(); ++a) {
- results.push_back(a->second);
- }
- return results;
-bool isEmptyAllele(const Allele& allele) {
- return allele.length == 0;
-bool isDividedIndel(const Allele& allele) {
- vector<pair<int, string> > cigarV = splitCigar(allele.cigar);
- if (cigarV.front().second == "D" || cigarV.front().second == "I") {
- return true;
- } else {
- return false;
- }
-// returns true if this indel is not properly flanked by reference-matching sequence
-bool isUnflankedIndel(const Allele& allele) {
- if (allele.isReference() || allele.isSNP() || allele.isMNP()) {
- return false;
- } else {
- vector<pair<int, string> > cigarV = splitCigar(allele.cigar);
- if (cigarV.back().second == "D"
- || cigarV.back().second == "I"
- || cigarV.front().second == "D"
- || cigarV.front().second == "I") {
- return true;
- } else {
- return false;
- }
- }
-bool isEmptyAlleleOrIsDividedIndel(const Allele& allele) {
- return isEmptyAllele(allele) || isDividedIndel(allele);
diff --git a/src/Allele.h b/src/Allele.h
deleted file mode 100644
index 6122f96..0000000
--- a/src/Allele.h
+++ /dev/null
@@ -1,393 +0,0 @@
-#ifndef _ALLELE_H
-#define _ALLELE_H
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-#include <list>
-#include <map>
-#include <limits>
-#include <sstream>
-#include <assert.h>
-#include "Utility.h"
-#include "convert.h"
-#include "api/BamAlignment.h"
-using namespace std;
-using namespace BamTools;
-class Allele;
-// Allele recycling allocator
-// without we spend 30% of our runtime deleting Allele instances
-class AlleleFreeList {
- AlleleFreeList()
- : _p(NULL)
- , _size(0)
- , _allocs(0)
- , _min_size(0)
- , _max_size(0)
- , _tick_allocs(1000000) // attempt realloc every million allocs
- { }
- ~AlleleFreeList();
- void Purge();
- void Resize(int new_size);
- void* NewAllele();
- void Recycle(void* mem);
- Allele* _p;
- int _size; // number of alleles on list
- int _allocs; // allocation counter
- int _min_size; // min size within some previous number of calls to new
- int _max_size;
- int _tick_allocs; // GC cycle length
-// a structure describing an allele
-enum AlleleType {
-// used in making allele type filter vectors
-//const int numberOfPossibleAlleleTypes = 7;
-enum AlleleStrand {
-typedef long int Position;
-class Allele {
- friend class AlleleFreeList;
- friend string stringForAllele(const Allele &a);
- friend string stringForAlleles(vector<Allele> &av);
- friend bool operator<(const Allele &a, const Allele &b);
- friend bool operator==(const Allele &a, const Allele &b);
- friend bool operator!=(const Allele &a, const Allele &b);
- friend ostream &operator<<(ostream &out, vector<Allele> &a);
- friend ostream &operator<<(ostream &out, vector<Allele*> &a);
- friend ostream &operator<<(ostream &out, list<Allele*> &a);
- friend ostream &operator<<(ostream &out, Allele &a);
- friend ostream &operator<<(ostream &out, Allele* &a);
- friend string json(vector<Allele*> &alleles, long int &position);
- friend string json(vector<Allele*> &alleles);
- friend string json(Allele &allele, long int &position);
- friend string json(Allele* &allele);
- AlleleType type; // type of the allele, enumerated above
- string referenceName; // reference name, for sanity checking
- string referenceSequence; // reference sequence or "" (in case of insertions)
- string alternateSequence; // alternate sequence or "" (in case of deletions and reference alleles)
- string sequencingTechnology; // the technology used to generate this allele
- long int position; // position 0-based against reference
- long int* currentReferencePosition; // pointer to the current reference position (which may be updated during the life of this allele)
- char* currentReferenceBase; // pointer to current reference base
- unsigned int length; // and event length (deletion implies 0, snp implies 1, insertion >1)
- unsigned int referenceLength; // length of the event relative to the reference
- long int repeatRightBoundary; // if this allele is an indel, and if it is embedded in a tandem repeat
- // TODO cleanup
- int basesLeft; // these are the "updated" versions of the above
- int basesRight;
- AlleleStrand strand; // strand, true = +, false = -
- string sampleID; // representative sample ID
- string readGroupID; // read group membership
- string readID; // id of the read which the allele is drawn from
- vector<short> baseQualities;
- long double quality; // base quality score associated with this allele, updated every position in the case of reference alleles
- long double lnquality; // log version of above
- string currentBase; // current base, meant to be updated every position
- short mapQuality; // map quality for the originating read
- long double lnmapQuality; // map quality for the originating read
- double readMismatchRate; // per-base mismatch rate for the read
- double readIndelRate; // only considering gaps
- double readSNPRate; // only considering snps/mnps
- bool isProperPair; // if the allele is supported by a properly paired read
- bool isPaired; // if the allele is supported by a read that is part of a pair
- bool isMateMapped; // if the mate in the pair is mapped
- bool genotypeAllele; // if this is an abstract 'genotype' allele
- bool processed; // flag to mark if we've presented this allele for analysis
- string cigar; // a cigar representation of the allele
- vector<Allele>* alignmentAlleles;
- long int alignmentStart;
- long int alignmentEnd;
- // default constructor, for converting alignments into allele observations
- Allele(AlleleType t,
- string& refname,
- long int pos,
- long int* crefpos,
- char* crefbase,
- unsigned int len,
- long int rrbound,
- int bleft,
- int bright,
- string alt,
- string& sampleid,
- string& readid,
- string& readgroupid,
- string& sqtech,
- bool strnd,
- long double qual,
- string qstr,
- short mapqual,
- bool ispair,
- bool ismm,
- bool isproppair,
- string cigarstr,
- vector<Allele>* ra,
- long int bas,
- long int bae)
- : type(t)
- , referenceName(refname)
- , position(pos)
- , currentReferencePosition(crefpos)
- , currentReferenceBase(crefbase)
- , length(len)
- , repeatRightBoundary(rrbound)
- , basesLeft(bleft)
- , basesRight(bright)
- , currentBase(alt)
- , alternateSequence(alt)
- , sampleID(sampleid)
- , readID(readid)
- , readGroupID(readgroupid)
- , sequencingTechnology(sqtech)
- , quality((qual == -1) ? averageQuality(qstr) : qual) // passing -1 as quality triggers this calculation
- , lnquality(phred2ln((qual == -1) ? averageQuality(qstr) : qual))
- , mapQuality(mapqual)
- , lnmapQuality(phred2ln(mapqual))
- , isProperPair(isproppair)
- , isPaired(ispair)
- , isMateMapped(ismm)
- , genotypeAllele(false)
- , processed(false)
- , readMismatchRate(0)
- , readIndelRate(0)
- , readSNPRate(0)
- , cigar(cigarstr)
- , alignmentAlleles(ra)
- , alignmentStart(bas)
- , alignmentEnd(bae)
- {
- baseQualities.resize(qstr.size()); // cache qualities
- transform(qstr.begin(), qstr.end(), baseQualities.begin(), qualityChar2ShortInt);
- referenceLength = referenceLengthFromCigar();
- }
- // for constructing genotype alleles
- Allele(AlleleType t,
- string alt,
- unsigned int len,
- unsigned int reflen,
- string cigarStr,
- long int pos=0,
- long int rrbound=0,
- bool gallele=true)
- : type(t)
- , alternateSequence(alt)
- , length(len)
- , referenceLength(reflen)
- , repeatRightBoundary(rrbound)
- , quality(0)
- , lnquality(1)
- , position(pos)
- , genotypeAllele(true)
- , readMismatchRate(0)
- , readIndelRate(0)
- , readSNPRate(0)
- , cigar(cigarStr)
- , alignmentAlleles(NULL)
- , processed(false)
- {
- currentBase = base();
- baseQualities.assign(alternateSequence.size(), 0);
- referenceLength = referenceLengthFromCigar();
- }
- bool equivalent(Allele &a); // heuristic 'equivalency' between two alleles, which depends on their type
- string typeStr(void) const; // return a string representation of the allele type, for output
- bool isReference(void) const; // true if type == ALLELE_REFERENCE
- bool isSNP(void) const; // true if type == ALLELE_SNP
- bool isInsertion(void) const; // true if type == ALLELE_INSERTION
- bool isDeletion(void) const; // true if type == ALLELE_DELETION
- bool isMNP(void) const; // true if type == ALLELE_MNP
- bool isComplex(void) const; // true if type == ALLELE_COMPLEX
- bool isNull(void) const; // true if type == ALLELE_NULL
- int referenceOffset(void) const;
- const short currentQuality(void) const; // for getting the quality of a given position in multi-bp alleles
- const long double lncurrentQuality(void) const;
- const int subquality(int startpos, int len) const;
- const long double lnsubquality(int startpos, int len) const;
- const int subquality(const Allele &a) const;
- const long double lnsubquality(const Allele &a) const;
- //const int basesLeft(void) const; // returns the bases left within the read of the current position within the allele
- //const int basesRight(void) const; // returns the bases right within the read of the current position within the allele
- bool sameSample(Allele &other); // if the other allele has the same sample as this one
- void update(int haplotypeLength = 1); // for reference alleles, updates currentBase and quality
- void setQuality(void); // sets 'current quality' for alleles
- // TODO update this to reflect different insertions (e.g. IATGC instead of I4)
- const string base(void) const; // the 'current' base of the allele or a string describing the allele, e.g. I10 or D2
- // this is used to update cached data in the allele prior to presenting the allele for analysis
- // for the current base, just use allele.currentBase
- string json(void);
- unsigned int getLengthOnReference(void);
- int referenceLengthFromCigar(void);
- string readSeq(void);
- string read5p(void);
- string read3p(void);
- string read5pNonNull(void);
- string read3pNonNull(void);
- // the number of bases from the 5p edge of the allele until the end or the next null allele
- int read5pNonNullBases(void);
- // the number of bases from the 3p edge of the allele until the end or the next null allele
- int read3pNonNullBases(void);
- // wish list...
- //string readRefRelativeSubstr(long int start, long int end);
- //string readRefStartLenSubstr(long int start, int bp);
- vector<Allele*> extend(int pos, int haplotypeLength);
- void squash(void);
- void subtract(int subtractFromRefStart,
- int subtractFromRefEnd,
- string& substart,
- string& subend,
- vector<pair<int, string> >& cigarstart,
- vector<pair<int, string> >& cigarend,
- vector<short>& qsubstart,
- vector<short>& qsubend);
- void add(string& addToStart,
- string& addToEnd,
- vector<pair<int, string> >& cigarStart,
- vector<pair<int, string> >& cigarEnd,
- vector<short>& qaddToStart,
- vector<short>& qaddToEnd);
- void subtractFromStart(int bp, string& seq, vector<pair<int, string> >& cig, vector<short>& quals);
- void subtractFromEnd(int bp, string& seq, vector<pair<int, string> >& cig, vector<short>& quals);
- void addToStart(string& seq, vector<pair<int, string> >& cig, vector<short>& quals);
- void addToEnd(string& seq, vector<pair<int, string> >& cig, vector<short>& quals);
- void mergeAllele(const Allele& allele, AlleleType newType);
- void updateTypeAndLengthFromCigar(void);
- int bpLeft(void); // how many bases are in the read to the left of the allele
- int bpRight(void); // how many bases are in the read to the left of the allele
-// for sorting pairs of alleles and ints
-class AllelePairIntCompare {
- bool operator()(const pair<Allele, int>& a, const pair<Allele, int>& b) {
- return a.second > b.second;
- }
-class AllelePositionCompare {
- bool operator()(const Allele& a, const Allele& b) {
- return a.position < b.position;
- }
-void updateAllelesCachedData(vector<Allele*>& alleles);
-map<string, vector<Allele*> > groupAllelesBySample(list<Allele*>& alleles);
-void groupAllelesBySample(list<Allele*>& alleles, map<string, vector<Allele*> >& groups);
-int allowedAlleleTypes(vector<AlleleType>& allowedEnumeratedTypes);
-void filterAlleles(list<Allele*>& alleles, int allowedTypes);
-int countAlleles(map<string, vector<Allele*> >& sampleGroups);
-int baseCount(vector<Allele*>& alleles, string base, AlleleStrand strand);
-pair<pair<int, int>, pair<int, int> >
-baseCount(vector<Allele*>& alleles, string refbase, string altbase);
-int countAllelesWithBase(vector<Allele*>& alleles, string base);
-bool areHomozygous(vector<Allele*>& alleles);
-map<Allele, int> countAlleles(vector<Allele*>& alleles);
-map<string, int> countAllelesString(vector<Allele*>& alleles);
-map<string, int> countAllelesString(vector<Allele>& alleles);
-map<Allele, int> countAlleles(vector<Allele>& alleles);
-map<Allele, int> countAlleles(list<Allele*>& alleles);
-vector<Allele> uniqueAlleles(list<Allele*>& alleles);
-bool allelesSameType(Allele* &a, Allele* &b);
-bool allelesEquivalent(Allele* &a, Allele* &b);
-bool allelesSameSample(Allele* &a, Allele* &b);
-bool allelesSameType(Allele &a, Allele &b);
-bool allelesEquivalent(Allele &a, Allele &b);
-bool allelesSameSample(Allele &a, Allele &b);
-bool allelesEqual(Allele &a, Allele &b);
-void groupAlleles(map<string, vector<Allele*> >& sampleGroups, map<string, vector<Allele*> >& alleleGroups);
-void homogenizeAlleles(map<string, vector<Allele*> >& alleleGroups, string& refseq, Allele& refallele);
-void resetProcessedFlag(map<string, vector<Allele*> >& alleleGroups);
-vector<Allele> alleleUnion(vector<Allele>& a1, vector<Allele>& a2);
-// XXX cleanup
-// is there a way to template these? difficult as the syntax for pointer-based comparisons is different than non-pointer
-vector<vector<Allele*> > groupAlleles(list<Allele*> &alleles, bool (*fncompare)(Allele* &a, Allele* &b));
-vector<vector<Allele*> > groupAlleles(list<Allele> &alleles, bool (*fncompare)(Allele &a, Allele &b));
-vector<vector<Allele*> > groupAlleles(vector<Allele*> &alleles, bool (*fncompare)(Allele &a, Allele &b));
-vector<vector<Allele*> > groupAlleles(vector<Allele> &alleles, bool (*fncompare)(Allele &a, Allele &b));
-vector<vector<Allele*> > groupAlleles(map<string, vector<Allele*> > &alleles, bool (*fncompare)(Allele &a, Allele &b));
-vector<vector<Allele> > groupAlleles_copy(vector<Allele> &alleles, bool (*fncompare)(Allele &a, Allele &b));
-vector<vector<Allele> > groupAlleles_copy(list<Allele> &alleles, bool (*fncompare)(Allele &a, Allele &b));
-vector<vector<Allele> > groupAlleles_copy(vector<Allele> &alleles);
-vector<Allele> genotypeAllelesFromAlleleGroups(vector<vector<Allele> > &groups);
-vector<Allele> genotypeAllelesFromAlleleGroups(vector<vector<Allele*> > &groups);
-vector<Allele> genotypeAllelesFromAlleles(vector<Allele> &alleles);
-vector<Allele> genotypeAllelesFromAlleles(vector<Allele*> &alleles);
-Allele genotypeAllele(Allele& a);
-Allele genotypeAllele(AlleleType type, string alt = "", unsigned int length = 0, string cigar = "", unsigned int reflen = 0, long int position = 0, long int rrbound = 0);
-bool isEmptyAllele(const Allele& allele);
-bool isDividedIndel(const Allele& allele);
-bool isEmptyAlleleOrIsDividedIndel(const Allele& allele);
-bool isUnflankedIndel(const Allele& allele);
-int referenceLengthFromCigar(string& cigar);
-//AlleleFreeList Allele::_freeList;
diff --git a/src/AlleleParser.cpp b/src/AlleleParser.cpp
deleted file mode 100644
index 444034e..0000000
--- a/src/AlleleParser.cpp
+++ /dev/null
@@ -1,3976 +0,0 @@
-#include "AlleleParser.h"
-#include "multichoose.h" // includes generic functions, so it must be included here
- // otherwise we will get a linker error
- // see: http://stackoverflow.com/questions/36039/templates-spread-across-multiple-files
- // http://www.cplusplus.com/doc/tutorial/templates/ "Templates and Multi-file projects"
-#include "multipermute.h"
-// local helper debugging macros to improve code readability
-#define DEBUG(msg) \
- if (parameters.debug) { cerr << msg << endl; }
-// lower-priority messages
-#define DEBUG2(msg) \
- if (parameters.debug2) { cerr << msg << endl; }
-#define DEBUG2(msg)
-// must-see error messages
-#define ERROR(msg) \
- cerr << "ERROR(freebayes): " << msg << endl;
-// must-see warning messages
-#define WARNING(msg) \
- cerr << "WARNING(freebayes): " << msg << endl;
-using namespace std;
-// open BAM input file
-void AlleleParser::openBams(void) {
- // report differently if we have one or many bam files
- if (parameters.bams.size() == 1) {
- DEBUG("Opening BAM fomat alignment input file: " << parameters.bams.front() << " ...");
- } else if (parameters.bams.size() > 1) {
- DEBUG("Opening " << parameters.bams.size() << " BAM fomat alignment input files");
- for (vector<string>::const_iterator b = parameters.bams.begin();
- b != parameters.bams.end(); ++b) {
- DEBUG2(*b);
- }
- }
- if (parameters.useStdin) {
- if (!bamMultiReader.Open(parameters.bams)) {
- ERROR("Could not read BAM data from stdin");
- cerr << bamMultiReader.GetErrorString() << endl;
- exit(1);
- }
- } else {
- if (!bamMultiReader.Open(parameters.bams)) {
- ERROR("Could not open input BAM files");
- cerr << bamMultiReader.GetErrorString() << endl;
- exit(1);
- } else {
- if (!bamMultiReader.LocateIndexes()) {
- ERROR("Opened BAM reader without index file, jumping is disabled.");
- cerr << bamMultiReader.GetErrorString() << endl;
- if (!targets.empty()) {
- ERROR("Targets specified but no BAM index file provided.");
- ERROR("FreeBayes cannot jump through targets in BAM files without BAM index files, exiting.");
- ERROR("Please generate a BAM index file eithe, e.g.:");
- ERROR(" \% bamtools index -in <bam_file>");
- ERROR(" \% samtools index <bam_file>");
- exit(1);
- }
- }
- }
- if (!bamMultiReader.SetExplicitMergeOrder(bamMultiReader.MergeByCoordinate)) {
- ERROR("could not set sort order to coordinate");
- cerr << bamMultiReader.GetErrorString() << endl;
- exit(1);
- }
- }
- // retrieve header information
- bamHeader = bamMultiReader.GetHeaderText();
- bamHeaderLines = split(bamHeader, '\n');
- DEBUG(" done");
-void AlleleParser::openOutputFile(void) {
- if (parameters.outputFile != "") {
- outputFile.open(parameters.outputFile.c_str(), ios::out);
- DEBUG("Opening output file: " << parameters.outputFile << " ...");
- if (!outputFile) {
- ERROR(" unable to open output file: " << parameters.outputFile);
- exit(1);
- }
- output = &outputFile;
- } else {
- output = &cout;
- }
-void AlleleParser::getSequencingTechnologies(void) {
- map<string, bool> technologies;
- for (vector<string>::const_iterator it = bamHeaderLines.begin(); it != bamHeaderLines.end(); ++it) {
- // get next line from header, skip if empty
- string headerLine = *it;
- if ( headerLine.empty() ) { continue; }
- // lines of the header look like:
- // "@RG ID:- SM:NA11832 CN:BCM PL:454"
- // ^^^^^^^\ is our sample name
- if ( headerLine.find("@RG") == 0 ) {
- vector<string> readGroupParts = split(headerLine, "\t ");
- string tech;
- string readGroupID;
- for (vector<string>::const_iterator r = readGroupParts.begin(); r != readGroupParts.end(); ++r) {
- size_t colpos = r->find(":");
- if (colpos != string::npos) {
- string fieldname = r->substr(0, colpos);
- if (fieldname == "PL") {
- tech = r->substr(colpos+1);
- } else if (fieldname == "ID") {
- readGroupID = r->substr(colpos+1);
- }
- }
- }
- if (tech.empty()) {
- if (!sequencingTechnologies.empty()) {
- cerr << "no sequencing technology specified in @RG tag (no PL: in @RG tag) " << endl << headerLine << endl;
- }
- } else {
- readGroupToTechnology[readGroupID] = tech;
- technologies[tech] = true;
- }
- if (readGroupID.empty()) {
- cerr << "could not find ID: in @RG tag " << endl << headerLine << endl;
- continue;
- }
- //string name = nameParts.back();
- //mergedHeader.append(1, '\n');
- //cerr << "found read group id " << readGroupID << " containing sample " << name << endl;
- }
- }
- for (map<string, bool>::iterator st = technologies.begin(); st != technologies.end(); ++st) {
- sequencingTechnologies.push_back(st->first);
- }
-void AlleleParser::getPopulations(void) {
- map<string, string> allSamplePopulation;
- if (!parameters.populationsFile.empty()) {
- ifstream populationsFile(parameters.populationsFile.c_str(), ios::in);
- if (!populationsFile) {
- cerr << "unable to open population file: " << parameters.populationsFile << endl;
- exit(1);
- }
- string line;
- while (getline(populationsFile, line)) {
- DEBUG2("found sample-population mapping: " << line);
- vector<string> popsample = split(line, "\t ");
- if (popsample.size() == 2) {
- string& sample = popsample.front();
- string& population = popsample.back();
- DEBUG2("sample: " << sample << " population: " << population);
- allSamplePopulation[sample] = population;
- } else {
- cerr << "malformed population/sample pair, " << line << endl;
- exit(1);
- }
- }
- }
- // XXX
- // TODO now, assign a default population to all the rest of the samples...
- // XXX
- for (vector<string>::iterator s = sampleList.begin(); s != sampleList.end(); ++s) {
- if (!allSamplePopulation.count(*s)) {
- samplePopulation[*s] = "DEFAULT";
- } else {
- samplePopulation[*s] = allSamplePopulation[*s];
- }
- }
- // now, only keep the samples we are using for processing
- for (map<string, string>::iterator s = samplePopulation.begin(); s != samplePopulation.end(); ++s) {
- populationSamples[s->second].push_back(s->first);
- }
-// read sample list file or get sample names from bam file header
-void AlleleParser::getSampleNames(void) {
- // If a sample file is given, use it. But otherwise process the bam file
- // header to get the sample names.
- //
- if (!parameters.samples.empty()) {
- ifstream sampleFile(parameters.samples.c_str(), ios::in);
- if (! sampleFile) {
- cerr << "unable to open file: " << parameters.samples << endl;
- exit(1);
- }
- string line;
- while (getline(sampleFile, line)) {
- DEBUG2("found sample " << line);
- sampleList.push_back(line);
- }
- }
- for (vector<string>::const_iterator it = bamHeaderLines.begin(); it != bamHeaderLines.end(); ++it) {
- // get next line from header, skip if empty
- string headerLine = *it;
- if ( headerLine.empty() ) { continue; }
- // lines of the header look like:
- // "@RG ID:- SM:NA11832 CN:BCM PL:454"
- // ^^^^^^^\ is our sample name
- if ( headerLine.find("@RG") == 0 ) {
- vector<string> readGroupParts = split(headerLine, "\t ");
- string name = "";
- string readGroupID = "";
- for (vector<string>::const_iterator r = readGroupParts.begin(); r != readGroupParts.end(); ++r) {
- size_t colpos = r->find(":");
- if (colpos != string::npos) {
- string fieldname = r->substr(0, colpos);
- if (fieldname == "SM") {
- name = r->substr(colpos+1);
- } else if (fieldname == "ID") {
- readGroupID = r->substr(colpos+1);
- }
- }
- }
- if (name == "") {
- ERROR(" could not find SM: in @RG tag " << endl << headerLine);
- exit(1);
- }
- if (readGroupID == "") {
- ERROR(" could not find ID: in @RG tag " << endl << headerLine);
- exit(1);
- }
- //string name = nameParts.back();
- //mergedHeader.append(1, '\n');
- DEBUG2("found read group id " << readGroupID << " containing sample " << name);
- sampleListFromBam.push_back(name);
- map<string, string>::iterator s = readGroupToSampleNames.find(readGroupID);
- if (s != readGroupToSampleNames.end()) {
- if (s->second != name) {
- ERROR("ERROR: multiple samples (SM) map to the same read group (RG)" << endl
- << endl
- << "samples " << name << " and " << s->second << " map to " << readGroupID << endl
- << endl
- << "As freebayes operates on a virtually merged stream of its input files," << endl
- << "it will not be possible to determine what sample an alignment belongs to" << endl
- << "at runtime." << endl
- << endl
- << "To resolve the issue, ensure that RG ids are unique to one sample" << endl
- << "across all the input files to freebayes." << endl
- << endl
- << "See bamaddrg (https://github.com/ekg/bamaddrg) for a method which can" << endl
- << "add RG tags to alignments." << endl);
- exit(1);
- }
- // if it's the same sample name and RG combo, no worries
- }
- readGroupToSampleNames[readGroupID] = name;
- }
- }
- //cout << sampleListFromBam.size() << endl;
- // no samples file given, read from BAM file header for sample names
- if (sampleList.empty()) {
- DEBUG("no sample list file given, reading sample names from bam file");
- for (vector<string>::const_iterator s = sampleListFromBam.begin(); s != sampleListFromBam.end(); ++s) {
- DEBUG2("found sample " << *s);
- if (!stringInVector(*s, sampleList)) {
- sampleList.push_back(*s);
- }
- }
- DEBUG("found " << sampleList.size() << " samples in BAM file");
- } else {
- // verify that the samples in the sample list are present in the bam,
- // and raise an error and exit if not
- for (vector<string>::const_iterator s = sampleList.begin(); s != sampleList.end(); ++s) {
- bool inBam = false;
- bool inReadGroup = false;
- //cout << "checking sample from sample file " << *s << endl;
- for (vector<string>::const_iterator b = sampleListFromBam.begin(); b != sampleListFromBam.end(); ++b) {
- //cout << *s << " against " << *b << endl;
- if (*s == *b) { inBam = true; break; }
- }
- for (map<string, string>::const_iterator p = readGroupToSampleNames.begin(); p != readGroupToSampleNames.end(); ++p) {
- if (*s == p->second) { inReadGroup = true; break; }
- }
- if (!inBam) {
- ERROR("sample " << *s << " listed in sample file "
- << parameters.samples.c_str() << " is not listed in the header of BAM file(s) "
- << parameters.bam);
- exit(1);
- }
- if (!inReadGroup) {
- ERROR("sample " << *s << " listed in sample file "
- << parameters.samples.c_str() << " is not associated with any read group in the header of BAM file(s) "
- << parameters.bam);
- exit(1);
- }
- }
- }
- if (sampleList.empty()) {
- /*
- ERROR(string(80, '-') << endl
- //--------------------------------------------------------------------------------
- << "Warning: No sample file given, and no @RG tags found in BAM header." << endl
- << "All alignments from all input files will be assumed to come from the same" << endl
- << "individual. To group alignments by sample, you must add read groups and sample" << endl
- << "names to your alignments. You can do this using ./scripts/sam_add_rg.pl in the" << endl
- << "freebayes source tree, or by specifying read groups and sample names when you" << endl
- << "prepare your sequencing data for alignment." << endl
- << string(80, '-'));
- */
- sampleList.push_back("unknown");
- readGroupToSampleNames["unknown"] = "unknown";
- oneSampleAnalysis = true;
- }
-string AlleleParser::vcfHeader() {
- stringstream headerss;
- headerss
- << "##fileformat=VCFv4.1" << endl
- << "##fileDate=" << dateStr() << endl
- << "##source=freeBayes " << VERSION_GIT << endl
- << "##reference=" << reference.filename << endl
- << "##phasing=none" << endl
- << "##commandline=\"" << parameters.commandline << "\"" << endl
- << "##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of samples with data\">" << endl
- << "##INFO=<ID=DP,Number=1,Type=Integer,Description=\"Total read depth at the locus\">" << endl
- << "##INFO=<ID=DPB,Number=1,Type=Float,Description=\"Total read depth per bp at the locus; bases in reads overlapping / bases in haplotype\">" << endl
- // allele frequency metrics
- << "##INFO=<ID=AC,Number=A,Type=Integer,Description=\"Total number of alternate alleles in called genotypes\">" << endl
- << "##INFO=<ID=AN,Number=1,Type=Integer,Description=\"Total number of alleles in called genotypes\">" << endl
- << "##INFO=<ID=AF,Number=A,Type=Float,Description=\"Estimated allele frequency in the range (0,1]\">" << endl
- // observation counts
- << "##INFO=<ID=RO,Number=1,Type=Integer,Description=\"Reference allele observation count, with partial observations recorded fractionally\">" << endl
- << "##INFO=<ID=AO,Number=A,Type=Integer,Description=\"Alternate allele observations, with partial observations recorded fractionally\">" << endl
- << "##INFO=<ID=PRO,Number=1,Type=Float,Description=\"Reference allele observation count, with partial observations recorded fractionally\">" << endl
- << "##INFO=<ID=PAO,Number=A,Type=Float,Description=\"Alternate allele observations, with partial observations recorded fractionally\">" << endl
- // qualities
- << "##INFO=<ID=QR,Number=1,Type=Integer,Description=\"Reference allele quality sum in phred\">" << endl
- << "##INFO=<ID=QA,Number=A,Type=Integer,Description=\"Alternate allele quality sum in phred\">" << endl
- << "##INFO=<ID=PQR,Number=1,Type=Float,Description=\"Reference allele quality sum in phred for partial observations\">" << endl
- << "##INFO=<ID=PQA,Number=A,Type=Float,Description=\"Alternate allele quality sum in phred for partial observations\">" << endl
- // binomial balance metrics
- << "##INFO=<ID=SRF,Number=1,Type=Integer,Description=\"Number of reference observations on the forward strand\">" << endl
- << "##INFO=<ID=SRR,Number=1,Type=Integer,Description=\"Number of reference observations on the reverse strand\">" << endl
- << "##INFO=<ID=SAF,Number=A,Type=Integer,Description=\"Number of alternate observations on the forward strand\">" << endl
- << "##INFO=<ID=SAR,Number=A,Type=Integer,Description=\"Number of alternate observations on the reverse strand\">" << endl
- //<< "##INFO=<ID=SRB,Number=1,Type=Float,Description=\"Strand bias for the reference allele: SRF / ( SRF + SRR )\">" << endl
- //<< "##INFO=<ID=SAB,Number=1,Type=Float,Description=\"Strand bias for the alternate allele: SAF / ( SAF + SAR )\">" << endl
- << "##INFO=<ID=SRP,Number=1,Type=Float,Description=\"Strand balance probability for the reference allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SRF and SRR given E(SRF/SRR) ~ 0.5, derived using Hoeffding's inequality\">" << endl
- << "##INFO=<ID=SAP,Number=A,Type=Float,Description=\"Strand balance probability for the alternate allele: Phred-scaled upper-bounds estimate of the probability of observing the deviation between SAF and SAR given E(SAF/SAR) ~ 0.5, derived using Hoeffding's inequality\">" << endl
- //<< "##INFO=<ID=ABR,Number=1,Type=Integer,Description=\"Reference allele balance count: the number of sequence reads from apparent heterozygotes supporting the reference allele\">" << endl
- //<< "##INFO=<ID=ABA,Number=1,Type=Integer,Description=\"Alternate allele balance count: the number of sequence reads from apparent heterozygotes supporting the alternate allele\">" << endl
- << "##INFO=<ID=AB,Number=A,Type=Float,Description=\"Allele balance at heterozygous sites: a number between 0 and 1 representing the ratio of reads showing the reference allele to all reads, considering only reads from individuals called as heterozygous\">" << endl
- << "##INFO=<ID=ABP,Number=A,Type=Float,Description=\"Allele balance probability at heterozygous sites: Phred-scaled upper-bounds estimate of the probability of observing the deviation between ABR and ABA given E(ABR/ABA) ~ 0.5, derived using Hoeffding's inequality\">" << endl
- << "##INFO=<ID=RUN,Number=A,Type=Integer,Description=\"Run length: the number of consecutive repeats of the alternate allele in the reference genome\">" << endl
- //<< "##INFO=<ID=RL,Number=1,Type=Integer,Description=\"Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele\">" << endl
- //<< "##INFO=<ID=RR,Number=1,Type=Integer,Description=\"Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele\">" << endl
- << "##INFO=<ID=RPP,Number=A,Type=Float,Description=\"Read Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality\">" << endl
- << "##INFO=<ID=RPPR,Number=1,Type=Float,Description=\"Read Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between RPL and RPR given E(RPL/RPR) ~ 0.5, derived using Hoeffding's inequality\">" << endl
- << "##INFO=<ID=RPL,Number=A,Type=Float,Description=\"Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele\">" << endl
- //<< "##INFO=<ID=RPLR,Number=A,Type=Float,Description=\"Reads Placed Left: number of reads supporting the alternate balanced to the left (5') of the alternate allele\">" << endl
- << "##INFO=<ID=RPR,Number=A,Type=Float,Description=\"Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele\">" << endl
- //<< "##INFO=<ID=RPRR,Number=A,Type=Float,Description=\"Reads Placed Right: number of reads supporting the alternate balanced to the right (3') of the alternate allele\">" << endl
- //<< "##INFO=<ID=EL,Number=1,Type=Integer,Description=\"Allele End Left: number of observations of the alternate where the alternate occurs in the left end of the read\">" << endl
- //<< "##INFO=<ID=ER,Number=1,Type=Integer,Description=\"Allele End Right: number of observations of the alternate where the alternate occurs in the right end of the read\">" << endl
- << "##INFO=<ID=EPP,Number=A,Type=Float,Description=\"End Placement Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality\">" << endl
- << "##INFO=<ID=EPPR,Number=1,Type=Float,Description=\"End Placement Probability for reference observations: Phred-scaled upper-bounds estimate of the probability of observing the deviation between EL and ER given E(EL/ER) ~ 0.5, derived using Hoeffding's inequality\">" << endl
- //<< "##INFO=<ID=BL,Number=1,Type=Integer,Description=\"Base Pairs Left: number of base pairs in reads supporting the alternate to the left (5') of the alternate allele\">" << endl
- //<< "##INFO=<ID=BR,Number=1,Type=Integer,Description=\"Base Pairs Right: number of base pairs in reads supporting the alternate to the right (3') of the alternate allele\">" << endl
- //<< "##INFO=<ID=LRB,Number=1,Type=Float,Description=\"((max(BR, BL) / (BR + BL)) - 0.5) * 2 : The proportion of base pairs in reads on one side of the alternate allele relative to total bases, scaled from [0.5,1] to [0,1]\">" << endl
- //<< "##INFO=<ID=LRBP,Number=1,Type=Float,Description=\"Left-Right Balance Probability: Phred-scaled upper-bounds estimate of the probability of observing the deviation between BL and BR given E(BR/BL) ~ 0.5, derived using Hoeffding's inequality\">" << endl
- << "##INFO=<ID=DPRA,Number=A,Type=Float,Description=\"Alternate allele depth ratio. Ratio between depth in samples with each called alternate allele and those without.\">" << endl
- // error rates
- /*
- << "##INFO=<ID=XRM,Number=1,Type=Float,Description=\"Reference allele read mismatch rate: The rate of SNPs + MNPs + INDELs in reads supporting the reference allele.\">" << endl
- << "##INFO=<ID=XRS,Number=1,Type=Float,Description=\"Reference allele read SNP rate: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the reference allele.\">" << endl
- << "##INFO=<ID=XRI,Number=1,Type=Float,Description=\"Reference allele read INDEL rate: The rate of INDELs (gaps) in reads supporting the reference allele.\">" << endl
- << "##INFO=<ID=XAM,Number=A,Type=Float,Description=\"Alternate allele read mismatch rate: The rate of SNPs + MNPs + INDELs in reads supporting the alternate allele, excluding the called variant.\">" << endl
- << "##INFO=<ID=XAS,Number=A,Type=Float,Description=\"Alternate allele read SNP rate: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the alternate allele, excluding the called variant.\">" << endl
- << "##INFO=<ID=XAI,Number=A,Type=Float,Description=\"Alternate allele read INDEL rate: The rate of INDELs (gaps) in reads supporting the alternate allele, excluding the called variant.\">" << endl
- */
- // error rate ratios
- //<< "##INFO=<ID=ARM,Number=A,Type=Float,Description=\"Alternate allele / reference allele read mismatch ratio: The rate of SNPs + MNPs + INDELs in reads supporting the alternate allele versus reads supporting the reference allele, excluding the called variant.\">" << endl
- //<< "##INFO=<ID=ARS,Number=A,Type=Float,Description=\"Alternate allele / reference allele read SNP ratio: The rate of per-base mismatches (SNPs + MNPs) in reads supporting the alternate allele versus reads supporting the reference allele, excluding the called variant.\">" << endl
- //<< "##INFO=<ID=ARI,Number=A,Type=Float,Description=\"Alternate allele / reference allele read INDEL ratio: The ratio in rate rate of INDELs (gaps) in reads supporting the alternate allele versus reads supporting the reference allele, excluding the called variant.\">" << endl
- // supplementary information about the site
- << "##INFO=<ID=ODDS,Number=1,Type=Float,Description=\"The log odds ratio of the best genotype combination to the second-best.\">" << endl
- << "##INFO=<ID=GTI,Number=1,Type=Integer,Description=\"Number of genotyping iterations required to reach convergence or bailout.\">" << endl
- //<< "##INFO=<ID=TS,Number=0,Type=Flag,Description=\"site has transition SNP\">" << endl
- //<< "##INFO=<ID=TV,Number=0,Type=Flag,Description=\"site has transversion SNP\">" << endl
- //<< "##INFO=<ID=CpG,Number=0,Type=Flag,Description=\"CpG site (either CpG, TpG or CpA)\">" << endl
- << "##INFO=<ID=TYPE,Number=A,Type=String,Description=\"The type of allele, either snp, mnp, ins, del, or complex.\">" << endl
- << "##INFO=<ID=CIGAR,Number=A,Type=String,Description=\"The extended CIGAR representation of each alternate allele, with the exception that '=' is replaced by 'M' to ease VCF parsing. Note that INDEL alleles do not have the first matched base (which is provided by default, per the spec) referred to by the CIGAR.\">" << endl
- //<< "##INFO=<ID=SNP,Number=0,Type=Flag,Description=\"SNP allele at site\">" << endl
- //<< "##INFO=<ID=MNP,Number=0,Type=Flag,Description=\"MNP allele at site\">" << endl
- //<< "##INFO=<ID=INS,Number=0,Type=Flag,Description=\"insertion allele at site\">" << endl
- //<< "##INFO=<ID=DEL,Number=0,Type=Flag,Description=\"deletion allele at site\">" << endl
- //<< "##INFO=<ID=COMPLEX,Number=0,Type=Flag,Description=\"complex allele (insertion/deletion/substitution composite) at site\">" << endl
- << "##INFO=<ID=NUMALT,Number=1,Type=Integer,Description=\"Number of unique non-reference alleles in called genotypes at this position.\">" << endl
- << "##INFO=<ID=MEANALT,Number=A,Type=Float,Description=\"Mean number of unique non-reference allele observations per sample with the corresponding alternate alleles.\">" << endl
- //<< "##INFO=<ID=HWE,Number=1,Type=Float,Description=\"Phred-scaled discrete HWE prior probability of the genotyping across all samples.\">" << endl
- << "##INFO=<ID=LEN,Number=A,Type=Integer,Description=\"allele length\">" << endl
- << "##INFO=<ID=MQM,Number=A,Type=Float,Description=\"Mean mapping quality of observed alternate alleles\">" << endl
- << "##INFO=<ID=MQMR,Number=1,Type=Float,Description=\"Mean mapping quality of observed reference alleles\">" << endl
- << "##INFO=<ID=PAIRED,Number=A,Type=Float,Description=\"Proportion of observed alternate alleles which are supported by properly paired read fragments\">" << endl
- << "##INFO=<ID=PAIREDR,Number=1,Type=Float,Description=\"Proportion of observed reference alleles which are supported by properly paired read fragments\">" << endl
- << "##INFO=<ID=MIN,Number=1,Type=Integer,Description=\"Minimum depth in gVCF output block.\">" << endl
- << "##INFO=<ID=END,Number=1,Type=Integer,Description=\"Last position (inclusive) in gVCF output record.\">" << endl;
- // sequencing technology tags, which vary according to input data
- for (vector<string>::iterator st = sequencingTechnologies.begin(); st != sequencingTechnologies.end(); ++st) {
- string& tech = *st;
- headerss << "##INFO=<ID=technology." << tech << ",Number=A,Type=Float,Description=\"Fraction of observations supporting the alternate observed in reads from " << tech << "\">" << endl;
- }
- if (parameters.showReferenceRepeats) {
- headerss << "##INFO=<ID=REPEAT,Number=1,Type=String,Description=\"Description of the local repeat structures flanking the current position\">" << endl;
- }
- // format fields for genotypes
- headerss << "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">" << endl
- << "##FORMAT=<ID=GQ,Number=1,Type=Float,Description=\"Genotype Quality, the Phred-scaled marginal (or unconditional) probability of the called genotype\">" << endl
- // this can be regenerated with RA, AA, QR, QA
- << "##FORMAT=<ID=GL,Number=G,Type=Float,Description=\"Genotype Likelihood, log10-scaled likelihoods of the data given the called genotype for each possible genotype generated from the reference and alternate alleles given the sample ploidy\">" << endl
- //<< "##FORMAT=<ID=GLE,Number=1,Type=String,Description=\"Genotype Likelihood Explicit, same as GL, but with tags to indicate the specific genotype. For instance, 0^-75.22|1^-223.42|0/0^-323.03|1/0^-99.29|1/1^-802.53 represents both haploid and diploid genotype likilehoods in a biallelic context\">" << endl
- << "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\"Read Depth\">" << endl
- << "##FORMAT=<ID=RO,Number=1,Type=Integer,Description=\"Reference allele observation count\">" << endl
- << "##FORMAT=<ID=QR,Number=1,Type=Integer,Description=\"Sum of quality of the reference observations\">" << endl
- << "##FORMAT=<ID=AO,Number=A,Type=Integer,Description=\"Alternate allele observation count\">" << endl
- << "##FORMAT=<ID=QA,Number=A,Type=Integer,Description=\"Sum of quality of the alternate observations\">" << endl
- << "##FORMAT=<ID=MIN,Number=1,Type=Integer,Description=\"Minimum depth in gVCF output block.\">" << endl
- //<< "##FORMAT=<ID=SRF,Number=1,Type=Integer,Description=\"Number of reference observations on the forward strand\">" << endl
- //<< "##FORMAT=<ID=SRR,Number=1,Type=Integer,Description=\"Number of reference observations on the reverse strand\">" << endl
- //<< "##FORMAT=<ID=SAF,Number=1,Type=Integer,Description=\"Number of alternate observations on the forward strand\">" << endl
- //<< "##FORMAT=<ID=SAR,Number=1,Type=Integer,Description=\"Number of alternate observations on the reverse strand\">" << endl
- //<< "##FORMAT=<ID=LR,Number=1,Type=Integer,Description=\"Number of reference observations placed left of the loci\">" << endl
- //<< "##FORMAT=<ID=LA,Number=1,Type=Integer,Description=\"Number of alternate observations placed left of the loci\">" << endl
- //<< "##FORMAT=<ID=ER,Number=1,Type=Integer,Description=\"Number of reference observations overlapping the loci in their '3 end\">" << endl
- //<< "##FORMAT=<ID=EA,Number=1,Type=Integer,Description=\"Number of alternate observations overlapping the loci in their '3 end\">" << endl
- << join(sampleList, "\t") << endl;
- return headerss.str();
-void AlleleParser::setupVCFOutput(void) {
- string vcfheader = vcfHeader();
- variantCallFile.openForOutput(vcfheader);
-void AlleleParser::setupVCFInput(void) {
- // variant input for analysis and targeting
- if (!parameters.variantPriorsFile.empty()) {
- variantCallInputFile.open(parameters.variantPriorsFile);
- currentVariant = new vcf::Variant(variantCallInputFile);
- usingVariantInputAlleles = true;
- // get sample names from VCF input file
- //
- // NB, adding this stanza will change the way that the VCF output
- // describes alternates, present observations, etc. so that the samples
- // in the VCF input are also included. the result is confusing output,
- // but it could be useful in some situations.
- //
- // TODO optionally include this (via command-line parameter)
- //
- //for (vector<string>::iterator s = variantCallInputFile.sampleNames.begin(); s != variantCallInputFile.sampleNames.end(); ++s) {
- // sampleList.push_back(*s);
- //}
- }
- // haplotype alleles for constructing haplotype alleles
- if (!parameters.haplotypeVariantFile.empty()) {
- haplotypeVariantInputFile.open(parameters.haplotypeVariantFile);
- usingHaplotypeBasisAlleles = true;
- }
-void AlleleParser::loadBamReferenceSequenceNames(void) {
- //--------------------------------------------------------------------------
- // read reference sequences from input file
- //--------------------------------------------------------------------------
- // store the names of all the reference sequences in the BAM file
- referenceSequences = bamMultiReader.GetReferenceData();
- int i = 0;
- for (RefVector::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) {
- referenceIDToName[i] = r->RefName;
- ++i;
- }
- DEBUG("Number of ref seqs: " << bamMultiReader.GetReferenceCount());
-void AlleleParser::loadFastaReference(void) {
- DEBUG("loading fasta reference " << parameters.fasta);
- // This call loads the reference and reads any index file it can find. If
- // it can't find an index file for the reference, it will attempt to
- // generate one alongside it. Note that this only loads the reference.
- // Sequence data is obtained by progressive calls to
- // reference.getSubSequence(..), thus keeping our memory requirements low.
- reference.open(parameters.fasta);
-bool AlleleParser::hasMoreInputVariants(void) {
- pair<int, long> next = nextInputVariantPosition();
- return next.first != -1;
-bool AlleleParser::loadNextPositionWithAlignmentOrInputVariant(BamAlignment& alignment) {
- pair<int, long> next = nextInputVariantPosition();
- if (next.first != -1) {
- int varRefID = next.first;
- //cerr << varRefID << " " << alignment.RefID << " " << next.second << " " << alignment.Position << endl;
- if (!hasMoreAlignments || varRefID < alignment.RefID || varRefID == alignment.RefID && next.second < alignment.Position) {
- return loadNextPositionWithInputVariant();
- } else {
- loadReferenceSequence(alignment);
- }
- } else {
- loadReferenceSequence(alignment);
- }
- return true;
-bool AlleleParser::loadNextPositionWithInputVariant(void) {
- pair<int, long> next = nextInputVariantPosition();
- if (next.first != -1) {
- //cerr << "Next is " << next.first << ":" << next.second << endl;
- loadReferenceSequence(referenceIDToName[next.first]);
- currentPosition = next.second;
- rightmostHaplotypeBasisAllelePosition = currentPosition;
- return true;
- } else {
- return false;
- }
-// alignment-based method for loading the first bit of our reference sequence
-void AlleleParser::loadReferenceSequence(BamAlignment& alignment) {
- loadReferenceSequence(referenceIDToName[alignment.RefID]);
- currentPosition = alignment.Position;
-void AlleleParser::loadReferenceSequence(string& seqname) {
- if (currentSequenceName != seqname) {
- currentSequenceName = seqname;
- currentSequenceStart = 0;
- currentRefID = bamMultiReader.GetReferenceID(currentSequenceName);
- currentSequence = uppercase(reference.getSequence(currentSequenceName));
- int i = 0; // check the first few characters and verify they are not garbage
- for (string::iterator citr = currentSequence.begin();
- i < 100 && citr != currentSequence.end(); ++citr, ++i) {
- char c = *citr;
- if (c != 'A' && c != 'T' && c != 'G' && c != 'C' && c != 'N') {
- ERROR("Found non-DNA character " << c << " at position " << i << " in " << seqname << endl
- << ". Is your reference compressed or corrupted? "
- << "freebayes requires an uncompressed reference sequence.");
- exit(1);
- }
- }
- }
-void AlleleParser::loadTargets(void) {
- // if we have a targets file, use it...
- // if target file specified use targets from file
- if (!parameters.targets.empty()) {
- DEBUG("Making BedReader object for target file: " << parameters.targets << " ...");
- bedReader.openFile(parameters.targets);
- if (!bedReader.is_open()) {
- ERROR("Unable to open target file: " << parameters.targets << "... terminating.");
- exit(1);
- }
- targets = bedReader.targets;
- if (targets.empty()) {
- ERROR("Could not load any targets from " << parameters.targets);
- exit(1);
- }
- bedReader.close();
- DEBUG("done");
- }
- // if we have a region specified, use it to generate a target
- for (vector<string>::iterator r = parameters.regions.begin(); r != parameters.regions.end(); ++r) {
- // drawn from bamtools_utilities.cpp, modified to suit 1-based context, no end sequence
- string region = *r;
- string startSeq;
- int startPos;
- int stopPos;
- size_t foundFirstColon = region.find(":");
- // we only have a single string, use the whole sequence as the target
- if (foundFirstColon == string::npos) {
- startSeq = region;
- startPos = 0;
- stopPos = -1;
- } else {
- startSeq = region.substr(0, foundFirstColon);
- string sep = "..";
- size_t foundRangeSep = region.find(sep, foundFirstColon);
- if (foundRangeSep == string::npos) {
- sep = "-";
- foundRangeSep = region.find("-", foundFirstColon);
- }
- if (foundRangeSep == string::npos) {
- startPos = atoi(region.substr(foundFirstColon + 1).c_str());
- // differ from bamtools in this regard, in that we process only
- // the specified position if a range isn't given
- stopPos = startPos + 1;
- } else {
- startPos = atoi(region.substr(foundFirstColon + 1, foundRangeSep - foundFirstColon).c_str());
- // if we have range sep specified, but no second number, read to the end of sequence
- if (foundRangeSep + sep.size() != region.size()) {
- stopPos = atoi(region.substr(foundRangeSep + sep.size()).c_str()); // end-exclusive, bed-format
- } else {
- stopPos = -1;
- }
- }
- }
- //DEBUG("startPos == " << startPos);
- //DEBUG("stopPos == " << stopPos);
- // REAL BED format is 0 based, half open (end base not included)
- BedTarget bd(startSeq,
- (startPos == 0) ? 0 : startPos,
- ((stopPos == -1) ? reference.sequenceLength(startSeq) : stopPos) - 1); // internally, we use 0-base inclusive end
- DEBUG("will process reference sequence " << startSeq << ":" << bd.left << ".." << bd.right + 1);
- targets.push_back(bd);
- bedReader.targets.push_back(bd);
- }
- // check validity of targets wrt. reference
- for (vector<BedTarget>::iterator e = targets.begin(); e != targets.end(); ++e) {
- BedTarget& bd = *e;
- // internally, we use 0-base inclusive end
- if (bd.left < 0 || bd.right + 1 > reference.sequenceLength(bd.seq)) {
- ERROR("Target region coordinates (" << bd.seq << " "
- << bd.left << " " << bd.right + 1
- << ") outside of reference sequence bounds ("
- << bd.seq << " " << reference.sequenceLength(bd.seq) << ") terminating.");
- exit(1);
- }
- if (bd.right < bd.left) {
- ERROR("Invalid target region coordinates (" << bd.seq << " " << bd.left << " " << bd.right + 1 << ")"
- << " right bound is lower than left bound!");
- exit(1);
- }
- }
- bedReader.buildIntervals(); // set up interval tree in the bedreader
- DEBUG("Number of target regions: " << targets.size());
-void AlleleParser::loadTargetsFromBams(void) {
- // otherwise, if we weren't given a region string or targets file, analyze
- // all reference sequences from BAM file
- DEBUG2("no targets specified, using all targets from BAM files");
- RefVector::iterator refIter = referenceSequences.begin();
- RefVector::iterator refEnd = referenceSequences.end();
- for( ; refIter != refEnd; ++refIter) {
- RefData refData = *refIter;
- string refName = refData.RefName;
- BedTarget bd(refName, 0, refData.RefLength); // 0-based inclusive internally
- DEBUG2("will process reference sequence " << refName << ":" << bd.left << ".." << bd.right + 1);
- targets.push_back(bd);
- }
-void AlleleParser::loadSampleCNVMap(void) {
- // set default ploidy
- sampleCNV.setDefaultPloidy(parameters.ploidy);
- // load CNV map if provided
- if (!parameters.cnvFile.empty()) {
- if (!sampleCNV.load(parameters.cnvFile)) {
- ERROR("could not load sample map " << parameters.cnvFile << " ... exiting!");
- exit(1);
- }
- }
- // to assert that the reference is haploid, we can iterate through the BAM
- // header to get the reference names and sizes, and then setPloidy on them
- // in the sampleCNV map. note that the reference "sample" is named after
- // the current reference sequence.
- if (!parameters.diploidReference) {
- for (RefVector::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) {
- sampleCNV.setPloidy(referenceSampleName, r->RefName, 0, r->RefLength, 1);
- }
- }
-int AlleleParser::currentSamplePloidy(string const& sample) {
- return sampleCNV.ploidy(sample, currentSequenceName, currentPosition);
-int AlleleParser::copiesOfLocus(Samples& samples) {
- int copies = 0;
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
- string const& name = s->first;
- copies += currentSamplePloidy(name);
- }
- return copies;
-vector<int> AlleleParser::currentPloidies(Samples& samples) {
- map<int, bool> ploidiesMap;
- vector<int> ploidies;
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
- string const& name = s->first;
- int samplePloidy = currentSamplePloidy(name);
- ploidiesMap[samplePloidy] = true;
- }
- ploidiesMap[parameters.ploidy] = true;
- for (map<int, bool>::iterator p = ploidiesMap.begin(); p != ploidiesMap.end(); ++p) {
- ploidies.push_back(p->first);
- }
- return ploidies;
-// meant to be used when we are reading from stdin, to check if we are within targets
-bool AlleleParser::inTarget(void) {
- if (targets.empty()) {
- return true; // everything is in target if we don't have targets
- } else {
- // expects 0-based, fully-closed, and we're only checking a single
- // base, so start == end.
- if (bedReader.targetsOverlap(currentSequenceName, currentPosition, currentPosition)) {
- return true;
- } else {
- return false;
- }
- }
-// initialization function
-// sets up environment so we can start registering alleles
-AlleleParser::AlleleParser(int argc, char** argv) : parameters(Parameters(argc, argv))
- oneSampleAnalysis = false;
- currentRefID = 0; // will get set properly via toNextRefID
- currentPosition = 0;
- currentTarget = NULL; // to be initialized on first call to getNextAlleles
- currentReferenceAllele = NULL; // same, NULL is brazenly used as an initialization flag
- justSwitchedTargets = false; // flag to trigger cleanup of Allele*'s and objects after jumping targets
- hasMoreAlignments = true; // flag to track when we run out of alignments in the current target or BAM files
- currentSequenceStart = 0;
- lastHaplotypeLength = 0;
- usingHaplotypeBasisAlleles = false;
- usingVariantInputAlleles = false;
- rightmostHaplotypeBasisAllelePosition = 0;
- rightmostInputAllelePosition = 0;
- nullSample = new Sample();
- referenceSampleName = "reference_sample";
- // initialization
- openOutputFile();
- loadFastaReference();
- // when we open the bam files we can use the number of targets to decide if
- // we should load the indexes
- openBams();
- loadBamReferenceSequenceNames();
- // check how many targets we have specified
- loadTargets();
- getSampleNames();
- getPopulations();
- getSequencingTechnologies();
- // sample CNV
- loadSampleCNVMap();
- // output
- setupVCFOutput();
- // input
- // (now that the VCF file is set up with the samples which are in the input alignments
- // add the samples from the input VCF to the mix)
- setupVCFInput();
-AlleleParser::~AlleleParser(void) {
- delete nullSample;
- // close trace file? seems to get closed properly on object deletion...
- if (currentReferenceAllele) delete currentReferenceAllele;
- if (variantCallInputFile.is_open()) delete currentVariant;
-// position of alignment relative to current sequence
-int AlleleParser::currentSequencePosition(const BamAlignment& alignment) {
- return alignment.Position - currentSequenceStart;
-// relative current position within the cached currentSequence
-int AlleleParser::currentSequencePosition() {
- return currentPosition - currentSequenceStart;
-char AlleleParser::currentReferenceBaseChar(void) {
- return toupper(*currentReferenceBaseIterator());
-string AlleleParser::currentReferenceBaseString(void) {
- return currentSequence.substr(floor(currentPosition) - currentSequenceStart, 1);
-string::iterator AlleleParser::currentReferenceBaseIterator(void) {
- return currentSequence.begin() + (floor(currentPosition) - currentSequenceStart);
-string AlleleParser::currentReferenceHaplotype(void) {
- return currentSequence.substr(floor(currentPosition) - currentSequenceStart, lastHaplotypeLength);
-string AlleleParser::referenceSubstr(long int pos, unsigned int len) {
- return uppercase(reference.getSubSequence(currentSequenceName, floor(pos), len));
-bool AlleleParser::isCpG(string& altbase) {
- // bounds check
- if (floor(currentPosition) - currentSequenceStart - 1 < 0
- || floor(currentPosition) - currentSequenceStart + 1 >= currentSequence.size()) {
- return false;
- }
- string prevb = currentSequence.substr(floor(currentPosition) - currentSequenceStart - 1, 1);
- string currb = currentSequence.substr(floor(currentPosition) - currentSequenceStart, 1);
- string nextb = currentSequence.substr(floor(currentPosition) - currentSequenceStart + 1, 1);
- // 5'-3' CpG <-> TpG is represented as CpG <-> CpA in on the opposite strand
- if ((nextb == "G" && ((currb == "C" && altbase == "T") || (currb == "T" && altbase == "C")))
- ||
- (prevb == "C" && ((currb == "G" && altbase == "A") || (currb == "A" && altbase == "G"))))
- {
- return true;
- } else {
- return false;
- }
-void capBaseQuality(BamAlignment& alignment, int baseQualityCap) {
- string& rQual = alignment.Qualities;
- char qualcap = qualityInt2Char(baseQualityCap);
- for (string::iterator c = rQual.begin(); c != rQual.end(); ++c) {
- if (qualityChar2ShortInt(*c) > baseQualityCap) {
- *c = qualcap;
- }
- }
-void RegisteredAlignment::addAllele(Allele newAllele, bool mergeComplex, int maxComplexGap, bool boundIndels) {
- // allele combination rules. combine the last allele in the list of allele
- // observations according to the following rules
- // 0) reference + SNP, MNP
- // 1) INDEL + (REF <= maxComplexGap) + MNP, INDEL + (REF <= maxComplexGap) + SNP -> complex
- // 2) MNP + SNP, SNP + SNP -> MNP
- // 2) reference + INDEL -> reference.substr(0, reference.size() - 1), reference.at(reference.size()) + INDEL
- if (newAllele.alternateSequence.size() != newAllele.baseQualities.size()) {
- cerr << "new allele qualities not == in length to sequence: " << newAllele << endl;
- assert(false);
- }
- //cerr << "adding allele " << newAllele << " to " << alleles.size() << " alleles" << endl;
- //if (!alleles.empty()) { cerr << "last allele " << alleles.back() << endl; }
- alleleTypes |= newAllele.type;
- if (alleles.empty()) {
- // presently, it's unclear how to handle insertions and deletions
- // reported at the beginning of the read. are these events actually
- // indicative of longer alleles?
- if (boundIndels && (newAllele.isInsertion() || newAllele.isDeletion() || !newAllele.isNull())) {
- // ignore the allele
- } else {
- alleles.push_back(newAllele);
- }
- // the same goes for insertions and deletions at the end of reads,
- // these must be dealt with elsewhere
- } else {
- Allele& lastAllele = alleles.back();
- if (isEmptyAllele(newAllele) ||
- newAllele.isReference() && newAllele.referenceLength == 0) {
- // do nothing
- } else if (newAllele.isReference() && isUnflankedIndel(lastAllele)) {
- // add flanking base to indel, ensuring haplotype length of 2 for all indels
- string seq; vector<pair<int, string> > cig; vector<short> quals;
- //cerr << "subtracting from start " << newAllele << " giving to " << lastAllele << endl;
- newAllele.subtractFromStart(1, seq, cig, quals);
- lastAllele.addToEnd(seq, cig, quals);
- //cerr << "done " << newAllele << " gave to " << lastAllele << " reflen " << lastAllele.referenceLength << endl;
- // check that the new allele still has sequence
- if (!isEmptyAllele(newAllele)) {
- alleles.push_back(newAllele);
- }
- } else if (newAllele.isReference()
- && (newAllele.referenceLength > maxComplexGap
- || newAllele.basesRight == 0)) {
- // if the last allele is reference too, we need to combine them!
- if (lastAllele.isReference()) {
- DEBUG2("addAllele: mergeAllele/1:"
- << " lastAllele " << lastAllele.typeStr() << "@" << lastAllele.position << ":" << lastAllele.cigar
- << " newAllele " << newAllele.typeStr() << "@" << newAllele.position << ":" << newAllele.cigar);
- lastAllele.mergeAllele(newAllele, ALLELE_REFERENCE);
- assert(lastAllele.alternateSequence.size() == lastAllele.baseQualities.size());
- } else if (lastAllele.isComplex() || lastAllele.isMNP() || lastAllele.isSNP()) {
- // split apart the last allele if it's 'complex' but followed by another reference allele
- // that would cause the reference gap to be greater than the maxComplexGap
- vector<pair<int, string> > cigar = splitCigar(lastAllele.cigar);
- if (cigar.back().second == "M") {
- int matchlen = cigar.back().first;
- if (matchlen + newAllele.referenceLength > maxComplexGap) {
- // break apart the complex allele
- alleles.push_back(lastAllele);
- Allele& pAllele = alleles.at(alleles.size() - 2);
- string seq; vector<pair<int, string> > cig; vector<short> quals;
- pAllele.subtractFromEnd(matchlen, seq, cig, quals);
- alleles.back().subtractFromStart(pAllele.referenceLength, seq, cig, quals);
- DEBUG2("addAllele: mergeAllele/2:"
- << " lastAllele " << lastAllele.typeStr() << "@" << lastAllele.position << ":" << lastAllele.cigar
- << " .back() " << alleles.back().typeStr() << "@" << alleles.back().position << ":" << alleles.back().cigar
- << " newAllele " << newAllele.typeStr() << "@" << newAllele.position << ":" << newAllele.cigar);
- alleles.back().mergeAllele(newAllele, ALLELE_REFERENCE);
- } else { // expand the complex allele
- DEBUG2("addAllele: mergeAllele/3:"
- << " lastAllele " << lastAllele.typeStr() << "@" << lastAllele.position << ":" << lastAllele.cigar
- << " newAllele " << newAllele.typeStr() << "@" << newAllele.position << ":" << newAllele.cigar);
- lastAllele.mergeAllele(newAllele, ALLELE_COMPLEX);
- }
- } else {
- alleles.push_back(newAllele);
- }
- } else {
- alleles.push_back(newAllele);
- }
- } else if (lastAllele.isReference()) {
- if (newAllele.isSNP() || newAllele.isMNP() || newAllele.isComplex()) {
- alleles.push_back(newAllele);
- } else if (newAllele.isInsertion() || newAllele.isDeletion()) {
- int p = newAllele.position - 1;
- string seq; vector<pair<int, string> > cig; vector<short> quals;
- lastAllele.subtractFromEnd(1, seq, cig, quals);
- if (lastAllele.length == 0) {
- alleles.pop_back(); // remove 0-length alleles
- }
- newAllele.addToStart(seq, cig, quals);
- if (newAllele.position != p) {
- cerr << "newAllele.position != p" << endl << newAllele << " != " << p << endl;
- exit(1);
- }
- alleles.push_back(newAllele);
- assert(newAllele.alternateSequence.size() == newAllele.baseQualities.size());
- } else {
- alleles.push_back(newAllele); // NULL case
- }
- } else if (newAllele.isNull()) {
- if (lastAllele.isComplex()) {
- // split apart the last allele if it's 'complex' but followed by a null allele
- vector<pair<int, string> > cigar = splitCigar(lastAllele.cigar);
- if (cigar.back().second == "M") {
- int matchlen = cigar.back().first;
- alleles.push_back(lastAllele);
- Allele& pAllele = alleles.at(alleles.size() - 2);
- string seq; vector<pair<int, string> > cig; vector<short> quals;
- pAllele.subtractFromEnd(matchlen, seq, cig, quals);
- alleles.back().subtractFromStart(pAllele.referenceLength, seq, cig, quals);
- }
- }
- alleles.push_back(newAllele);
- } else {
- // -> complex event or MNP
- if (mergeComplex && lastAllele.position + lastAllele.referenceLength == newAllele.position
- && !lastAllele.isNull()) {
- vector<pair<int, string> > lastCigar = splitCigar(lastAllele.cigar);
- // If the last allele is complex and ends in a match, we need
- // to check that after merging the then-embedded match won't be
- // longer than maxComplexGap. We do this for every new allele,
- // since we don't want to allow the complex allele to grow
- // beyond maxComplexGap before splitting.
- if (lastAllele.isComplex()
- && lastCigar.back().second == "M"
- && lastCigar.back().first > maxComplexGap)
- {
- // Break apart the complex allele into one complex and one
- // reference allele.
- //
- // FIXME TODO: The allele may not actually be complex
- // anymore after splitting, in which case we should demote
- // its type to SNP/MNP/INDEL.
- // -trs, 20 Nov 2014
- alleles.push_back(lastAllele);
- Allele& pAllele = alleles.at(alleles.size() - 2);
- string seq; vector<pair<int, string> > cig; vector<short> quals;
- pAllele.subtractFromEnd(lastCigar.back().first, seq, cig, quals);
- alleles.back().subtractFromStart(pAllele.referenceLength, seq, cig, quals);
- if (newAllele.isReference()) {
- DEBUG2("addAllele: mergeAllele/5:"
- << " lastAllele " << lastAllele.typeStr() << "@" << lastAllele.position << ":" << lastAllele.cigar
- << " .back() " << alleles.back().typeStr() << "@" << alleles.back().position << ":" << alleles.back().cigar
- << " newAllele " << newAllele.typeStr() << "@" << newAllele.position << ":" << newAllele.cigar);
- alleles.back().mergeAllele(newAllele, ALLELE_REFERENCE);
- } else {
- alleles.push_back(newAllele);
- }
- } else {
- AlleleType atype = ALLELE_COMPLEX;
- if (lastAllele.isSNP() || lastAllele.isMNP()) {
- if (lastCigar.back().second == "X" && newAllele.isSNP() || newAllele.isMNP()) {
- atype = ALLELE_MNP;
- }
- }
- DEBUG2("addAllele: mergeAllele/4:"
- << " lastAllele " << lastAllele.typeStr() << "@" << lastAllele.position << ":" << lastAllele.cigar
- << " newAllele " << newAllele.typeStr() << "@" << newAllele.position << ":" << newAllele.cigar);
- lastAllele.mergeAllele(newAllele, atype);
- assert(lastAllele.alternateSequence.size() == lastAllele.baseQualities.size());
- }
- } else {
- alleles.push_back(newAllele);
- }
- }
- }
-// TODO erase alleles which are beyond N bp before the current position on position step
-void AlleleParser::updateHaplotypeBasisAlleles(long int pos, int referenceLength) {
- if (pos + referenceLength > rightmostHaplotypeBasisAllelePosition) {
- stringstream r;
- //r << currentSequenceName << ":" << rightmostHaplotypeBasisAllelePosition << "-" << pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW;
- //cerr << "getting variants in " << r.str() << endl;
- // tabix expects 1-based, fully closed regions for ti_parse_region()
- // (which is what setRegion() calls eventually)
- if (haplotypeVariantInputFile.setRegion(currentSequenceName,
- rightmostHaplotypeBasisAllelePosition + 1,
- pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW + 1)) {
- //cerr << "the vcf line " << haplotypeVariantInputFile.line << endl;
- // get the variants in the target region
- vcf::Variant var(haplotypeVariantInputFile);
- while (haplotypeVariantInputFile.getNextVariant(var)) {
- //cerr << "input variant: " << var << endl;
- // the following stanza is for parsed
- // alternates. instead use whole haplotype calls, as
- // alternates can be parsed prior to providing the
- // file as input.
- /*
- for (vector<string>::iterator a = var.alt.begin(); a != var.alt.end(); ++a) {
- haplotypeBasisAlleles[var.position].insert(AllelicPrimitive(var.ref.size(), *a));
- }
- */
- map<string, vector<vcf::VariantAllele> > variants = var.parsedAlternates();
- for (map<string, vector<vcf::VariantAllele> >::iterator a = variants.begin(); a != variants.end(); ++a) {
- for (vector<vcf::VariantAllele>::iterator v = a->second.begin(); v != a->second.end(); ++v) {
- //cerr << v->ref << "/" << v->alt << endl;
- if (v->ref != v->alt) {
- //cerr << "basis allele " << v->position << " " << v->ref << "/" << v->alt << endl;
- haplotypeBasisAlleles[v->position].push_back(AllelicPrimitive(v->ref, v->alt));
- //cerr << "number of alleles at position " << haplotypeBasisAlleles[v->position].size() << endl;
- }
- }
- }
- }
- } else {
- // indicates empty region
- //ERROR("Could not set haplotype-basis VCF file to target region");
- //exit(1);
- }
- // set the rightmost haplotype position to trigger the next update
- rightmostHaplotypeBasisAllelePosition = pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW;
- }
-bool AlleleParser::allowedHaplotypeBasisAllele(long int pos, string& ref, string& alt) {
- // check the haplotypeBasisAllele map for membership of the allele in question in the current sequence
- //cerr << "is allowed: " << pos << " " << ref << "/" << alt << " ?" << endl;
- if (!usingHaplotypeBasisAlleles) {
- return true; // always true if we aren't using the haplotype basis allele system
- } else {
- map<long int, vector<AllelicPrimitive> >::iterator p = haplotypeBasisAlleles.find(pos);
- if (p != haplotypeBasisAlleles.end()) {
- vector<AllelicPrimitive>& alleles = p->second;
- for (vector<AllelicPrimitive>::iterator z = alleles.begin(); z != alleles.end(); ++z) {
- //cerr << "overlapping allele " << z->ref << ":" << z->alt << endl;
- if (z->ref == ref && z->alt == alt) {
- //cerr << "yess" << endl;
- return true;
- }
- }
- }
- return false;
- }
-Allele AlleleParser::makeAllele(RegisteredAlignment& ra,
- AlleleType type,
- long int pos,
- int length,
- int basesLeft,
- int basesRight,
- string& readSequence,
- string& sampleName,
- BamAlignment& alignment,
- string& sequencingTech,
- long double qual,
- string& qualstr
- ) {
- string cigar;
- int reflen = length;
- if (type == ALLELE_REFERENCE) {
- cigar = convert(length) + "M";
- } else if (type == ALLELE_SNP || type == ALLELE_MNP) {
- cigar = convert(length) + "X";
- } else if (type == ALLELE_INSERTION) {
- reflen = 0;
- cigar = convert(length) + "I";
- } else if (type == ALLELE_DELETION) {
- cigar = convert(length) + "D";
- } else if (type == ALLELE_NULL) {
- cigar = convert(length) + "N";
- }
- string refSequence;
- if (type != ALLELE_NULL) { // only used for non null allele, avoid soft clipping edge cases
- refSequence = currentSequence.substr(pos - currentSequenceStart, reflen);
- }
- long int repeatRightBoundary = pos;
- // check if it's allowed
- // if it isn't allowed
- // and referenceLength > 0, make a reference allele with reference quality
- // if referenceLength == 0 (insertion), make a reference allele with 0 length (it will be filtered out in another context)
- // if it is allowed, make a normal allele
- // if not, adjust the allele so that it's a reference allele with preset BQ and length
- // in effect, this means creating a reference allele of the reference length of the allele with 0 BQ
- // NB, if we are using haplotype basis alleles the algorithm forces
- // alleles that aren't in the haplotype basis set into the reference space
- if (type != ALLELE_REFERENCE
- && type != ALLELE_NULL
- && !allowedHaplotypeBasisAllele(pos + 1,
- refSequence,
- readSequence)) {
- length = referenceLengthFromCigar(cigar);
- cigar = convert(length) + "M";
- // by adjusting the cigar, we implicitly adjust
- // allele.referenceLength, which is calculated when the allele is made
- qualstr = string(length, qualityInt2Char(0));
- readSequence = currentSequence.substr(pos - currentSequenceStart, length);
- }
- // cache information about repeat structure in the alleles, to
- // allow haplotype construction to be forced to extend across
- // tandem repeats and homopolymers when indels are present
- if (type == ALLELE_INSERTION || type == ALLELE_DELETION) {
- string alleleseq;
- if (type == ALLELE_INSERTION) {
- alleleseq = readSequence;
- } else if (type == ALLELE_DELETION) {
- alleleseq = refSequence;
- }
- map<long int, map<string, int> >::iterator rc = cachedRepeatCounts.find(pos);
- if (rc == cachedRepeatCounts.end()) {
- cachedRepeatCounts[pos] = repeatCounts(pos - currentSequenceStart, currentSequence, 12);
- rc = cachedRepeatCounts.find(pos);
- }
- map<string, int>& matchedRepeatCounts = rc->second;
- for (map<string, int>::iterator r = matchedRepeatCounts.begin(); r != matchedRepeatCounts.end(); ++r) {
- const string& repeatunit = r->first;
- int rptcount = r->second;
- string repeatstr = repeatunit * rptcount;
- // assumption of left-alignment may be problematic... so this should be updated
- if (repeatstr.size() >= parameters.minRepeatSize && isRepeatUnit(alleleseq, repeatunit)) {
- // determine the boundaries of the repeat
- long int p = pos - currentSequenceStart;
- // adjust to ensure we hit the first of the repeatstr
- size_t startpos = currentSequence.find(repeatstr, max((long int) 0, p - (long int) repeatstr.size() - 1));
- long int leftbound = startpos + currentSequenceStart;
- if (startpos == string::npos) {
- cerr << "could not find repeat sequence?" << endl;
- cerr << "repeat sequence: " << repeatstr << endl;
- cerr << "currentsequence start: " << currentSequenceStart << endl;
- cerr << currentSequence << endl;
- cerr << "matched repeats:" << endl;
- for (map<string, int>::iterator q = matchedRepeatCounts.begin(); q != matchedRepeatCounts.end(); ++q) {
- cerr << q->first << " : " << q->second << endl;
- cerr << "... at position " << pos << endl;
- }
- break; // ignore right-repeat boundary in this case
- }
- repeatRightBoundary = leftbound + repeatstr.size() + 1; // 1 past edge of repeat
- }
- }
- // a dangerous game
- int start = pos - currentSequenceStart;
- double minEntropy = parameters.minRepeatEntropy;
- // check first that' wer'e actually ina repeat... TODO
- //cerr << "entropy of " << entropy(currentSequence.substr(start, repeatRightBoundary - pos)) << " is too low, " << endl;
- while (minEntropy > 0 && // ignore if turned off
- repeatRightBoundary - currentSequenceStart < currentSequence.size() && //guard
- entropy(currentSequence.substr(start, repeatRightBoundary - pos)) < minEntropy) {
- //cerr << "entropy of " << entropy(currentSequence.substr(start, repeatRightBoundary - pos)) << " is too low, ";
- //cerr << "increasing rought boundary to ";
- ++repeatRightBoundary;
- //cerr << repeatRightBoundary << endl;
- }
- // now we
- //cachedRepeatCounts[pos] = repeatCounts(pos - currentSequenceStart, currentSequence, 12);
- // edge case, the indel is an insertion and matches the reference to the right
- // this means there is a repeat structure in the read, but not the ref
- if (currentSequence.substr(pos - currentSequenceStart, length) == readSequence) {
- repeatRightBoundary = max(repeatRightBoundary, pos + length + 1);
- }
- }
- return Allele(type,
- currentSequenceName,
- pos,
- ¤tPosition,
- ¤tReferenceBase,
- length,
- repeatRightBoundary,
- basesLeft,
- basesRight,
- readSequence,
- sampleName,
- alignment.Name,
- ra.readgroup,
- sequencingTech,
- !alignment.IsReverseStrand(),
- max(qual, (long double) 0), // ensure qual is at least 0
- qualstr,
- alignment.MapQuality,
- alignment.IsPaired(),
- alignment.IsMateMapped(),
- alignment.IsProperPair(),
- cigar,
- &ra.alleles,
- alignment.Position,
- alignment.GetEndPosition());
-RegisteredAlignment& AlleleParser::registerAlignment(BamAlignment& alignment, RegisteredAlignment& ra, string& sampleName, string& sequencingTech) {
- string rDna = alignment.QueryBases;
- string rQual = alignment.Qualities;
- int rp = 0; // read position, 0-based relative to read
- int csp = currentSequencePosition(alignment); // current sequence position, 0-based relative to currentSequence
- int sp = alignment.Position; // sequence position
- if (usingHaplotypeBasisAlleles) {
- updateHaplotypeBasisAlleles(sp, alignment.AlignedBases.size());
- }
- if (parameters.debug2) {
- DEBUG2("registering alignment " << rp << " " << csp << " " << sp << endl <<
- "alignment readName " << alignment.Name << endl <<
- "alignment isPaired " << alignment.IsPaired() << endl <<
- "alignment isMateMapped " << alignment.IsMateMapped() << endl <<
- "alignment isProperPair " << alignment.IsProperPair() << endl <<
- "alignment mapQual " << alignment.MapQuality << endl <<
- "alignment sampleID " << sampleName << endl <<
- "alignment position " << alignment.Position << endl <<
- "alignment length " << alignment.Length << endl <<
- "alignment AlignedBases.size() " << alignment.AlignedBases.size() << endl <<
- "alignment GetEndPosition() " << alignment.GetEndPosition() << endl <<
- "alignment end position " << alignment.Position + alignment.AlignedBases.size());
- stringstream cigarss;
- int alignedLength = 0;
- for (vector<CigarOp>::const_iterator c = alignment.CigarData.begin(); c != alignment.CigarData.end(); ++c) {
- cigarss << c->Type << c->Length;
- if (c->Type == 'D')
- alignedLength += c->Length;
- if (c->Type == 'M')
- alignedLength += c->Length;
- }
- DEBUG2("alignment cigar " << cigarss.str());
- DEBUG2("current sequence pointer: " << csp);
- DEBUG2("read: " << rDna);
- DEBUG2("aligned bases: " << alignment.AlignedBases);
- DEBUG2("qualities: " << alignment.Qualities);
- DEBUG2("reference seq: " << currentSequence.substr(csp, alignment.AlignedBases.size()));
- }
- /*
- * The cigar only records matches for sequences that have embedded
- * mismatches.
- *
- * Also, we don't store the entire undelying sequence; just the subsequence
- * that matches our current target region.
- *
- * As we step through a match sequence, we look for mismatches. When we
- * see one we set a positional flag indicating the location, and we emit a
- * 'Reference' allele that stretches from the the base after the last
- * mismatch to the base before the current one.
- *
- * An example follows:
- *
- * reference ^\-snp reference
- *
- */
- vector<bool> indelMask (alignment.AlignedBases.size(), false);
- vector<CigarOp>::const_iterator cigarIter = alignment.CigarData.begin();
- vector<CigarOp>::const_iterator cigarEnd = alignment.CigarData.end();
- for ( ; cigarIter != cigarEnd; ++cigarIter ) {
- int l = cigarIter->Length;
- char t = cigarIter->Type;
- DEBUG2("cigar item: " << t << l);
- if (t == 'M' || t == 'X' || t == '=') { // match or mismatch
- int firstMatch = csp; // track the first match after a mismatch, for recording 'reference' alleles
- int mismatchStart = -1;
- bool inMismatch = false;
- // for each base in the match region
- // increment the csp, sp, and rp
- // if there is a mismatch, record the last matching stretch as a reference allele
- // presently just record one snp per mismatched position, whether or not they are in a series
- for (int i=0; i<l; i++) {
- // extract aligned base
- string b;
- try {
- b = rDna.at(rp);
- } catch (std::out_of_range outOfRange) {
- cerr << "Exception: Cannot read past the end of the alignment's sequence." << endl
- << alignment.Name << endl
- << currentSequenceName << ":" << (long unsigned int) currentPosition + 1 << endl
- << alignment.AlignedBases << endl
- << currentSequence.substr(csp, alignment.AlignedBases.size()) << endl;
- abort();
- }
- // convert base quality value into short int
- long double qual = qualityChar2LongDouble(rQual.at(rp));
- // get reference allele
- string sb;
- try {
- sb = currentSequence.at(csp);
- } catch (std::out_of_range outOfRange) {
- cerr << "Exception: Unable to read reference sequence base past end of current cached sequence." << endl
- << currentSequenceName << ":" << (long unsigned int) currentPosition + 1 << endl
- << alignment.Position << "-" << alignment.GetEndPosition() << endl
- << "alignment: " << alignment.AlignedBases << endl
- << "currentSequence: " << currentSequence << endl
- << "currentSequence matching: " << currentSequence.substr(csp, alignment.AlignedBases.size()) << endl;
- //abort();
- break;
- }
- // record mismatch if we have a mismatch here
- if (b != sb || sb == "N") { // when the reference is N, we should always call a mismatch
- if (firstMatch < csp) {
- int length = csp - firstMatch;
- string readSequence = rDna.substr(rp - length, length);
- string qualstr = rQual.substr(rp - length, length);
- // record 'reference' allele for last matching region
- if (allATGC(readSequence)) {
- ra.addAllele(
- makeAllele(ra,
- sp - length,
- length,
- rp, // bases left (for first base in ref allele)
- alignment.QueryBases.size() - rp, // bases right (for first base in ref allele)
- readSequence,
- sampleName,
- alignment,
- sequencingTech,
- alignment.MapQuality, // reference allele quality == mapquality
- qualstr),
- parameters.allowComplex, parameters.maxComplexGap);
- }
- }
- // register mismatch
- if (qual >= parameters.BQL2) {
- ++ra.mismatches; // increment our mismatch counter if we're over BQL2
- ++ra.snpCount; // always increment snp counter
- }
- // always emit a snp, if we have too many mismatches over
- // BQL2 then we will discard the registered allele in the
- // calling context
- if (!inMismatch) {
- mismatchStart = csp;
- inMismatch = true;
- }
- firstMatch = csp + 1;
- } else if (inMismatch) {
- inMismatch = false;
- int length = csp - mismatchStart;
- string readSequence = rDna.substr(rp - length, length);
- string qualstr = rQual.substr(rp - length, length);
- for (int j = 0; j < length; ++j) {
- long double lqual = qualityChar2LongDouble(qualstr.at(j));
- string qualp = qualstr.substr(j, 1);
- string rs = readSequence.substr(j, 1);
- if (allATGC(rs)) {
- ra.addAllele(
- makeAllele(ra,
- sp - length + j,
- 1,
- rp - length - j, // bases left
- alignment.QueryBases.size() - rp + j, // bases right
- rs,
- sampleName,
- alignment,
- sequencingTech,
- lqual,
- qualp),
- parameters.allowComplex, parameters.maxComplexGap);
- } else {
- ra.addAllele(
- makeAllele(ra,
- sp - length + j,
- 1,
- rp - length - j, // bases left
- alignment.QueryBases.size() - rp + j, // bases right
- rs,
- sampleName,
- alignment,
- sequencingTech,
- lqual,
- qualp),
- parameters.allowComplex, parameters.maxComplexGap);
- }
- }
- }
- // update positions
- ++sp;
- ++csp;
- ++rp;
- }
- // catch mismatches at the end of the match
- if (inMismatch) {
- inMismatch = false;
- int length = csp - mismatchStart;
- string readSequence = rDna.substr(rp - length, length);
- string qualstr = rQual.substr(rp - length, length);
- for (int j = 0; j < length; ++j) {
- long double lqual = qualityChar2LongDouble(qualstr.at(j));
- string qualp = qualstr.substr(j, 1);
- string rs = readSequence.substr(j, 1);
- if (allATGC(rs)) {
- ra.addAllele(
- makeAllele(ra,
- sp - length + j,
- 1,
- rp - length - j, // bases left
- alignment.QueryBases.size() - rp + j, // bases right
- rs,
- sampleName,
- alignment,
- sequencingTech,
- lqual,
- qualp),
- parameters.allowComplex, parameters.maxComplexGap);
- } else {
- ra.addAllele(
- makeAllele(ra,
- sp - length + j,
- 1,
- rp - length - j, // bases left
- alignment.QueryBases.size() - rp + j, // bases right
- rs,
- sampleName,
- alignment,
- sequencingTech,
- lqual,
- qualp),
- parameters.allowComplex, parameters.maxComplexGap);
- }
- }
- // or, if we are not in a mismatch, construct the last reference allele of the match
- } else if (firstMatch < csp) {
- int length = csp - firstMatch;
- //string matchingSequence = currentSequence.substr(csp - length, length);
- string readSequence = rDna.substr(rp - length, length);
- string qualstr = rQual.substr(rp - length, length);
- if (allATGC(readSequence)) {
- ra.addAllele(
- makeAllele(ra,
- sp - length,
- length,
- rp, // bases left (for first base in ref allele)
- alignment.QueryBases.size() - rp, // bases right (for first base in ref allele)
- readSequence,
- sampleName,
- alignment,
- sequencingTech,
- alignment.MapQuality, // ... hmm
- qualstr),
- parameters.allowComplex, parameters.maxComplexGap);
- }
- }
- } else if (t == 'D') { // deletion
- // because deletions have no quality information,
- // use the surrounding sequence quality as a proxy
- // to provide quality scores of equivalent magnitude to insertions,
- // take N bp, right-centered on the position of the deletion
- // this logic prevents overflow of the read
- int spanstart;
- // this is used to calculate the quality string adding 2bp grounds
- // the indel in the surrounding sequence, which it is dependent
- // upon
- int L = l + 2;
- if (L > rQual.size()) {
- L = rQual.size();
- spanstart = 0;
- } else {
- // set lower bound to 0
- if (rp < (L / 2)) {
- spanstart = 0;
- } else {
- spanstart = rp - (L / 2);
- }
- // set upper bound to the string length
- if (spanstart + L > rQual.size()) {
- spanstart = rQual.size() - L;
- }
- }
- string qualstr = rQual.substr(spanstart, L);
- long double qual;
- if (parameters.useMinIndelQuality) {
- qual = minQuality(qualstr);
- //qual = averageQuality(qualstr);
- } else {
- // quality, scaled inversely by the ratio between the quality
- // string length and the length of the event
- qual = sumQuality(qualstr);
- // quality adjustment:
- // scale the quality by the inverse harmonic sum of the length of
- // the quality string X a scaling constant derived from the ratio
- // between the length of the quality string and the length of the
- // allele
- //qual += ln2phred(log((long double) l / (long double) L));
- qual += ln2phred(log((long double) L / (long double) l));
- qual /= harmonicSum(l);
- }
- if (qual >= parameters.BQL2) {
- //ra.mismatches += l;
- for (int i=0; i<l; i++) {
- indelMask[sp - alignment.Position + i] = true;
- }
- }
- string refseq = currentSequence.substr(csp, l);
- // some aligners like to report deletions at the beginnings and ends of reads.
- // without any sequence in the read to support this, it is hard to believe
- // that these deletions are real, so we ignore them here.
- if (cigarIter != alignment.CigarData.begin() // guard against deletion at beginning
- && (cigarIter+1) != alignment.CigarData.end() // and against deletion at end
- && allATGC(refseq)) {
- string nullstr;
- ra.addAllele(
- makeAllele(ra,
- sp,
- l,
- rp, // bases left (for first base in ref allele)
- alignment.QueryBases.size() - rp, // bases right (for first base in ref allele)
- nullstr, // no read sequence for deletions
- sampleName,
- alignment,
- sequencingTech,
- qual,
- nullstr), // no qualstr for deletions
- parameters.allowComplex, parameters.maxComplexGap);
- }
- ++ra.indelCount;
- sp += l; // update sample position
- csp += l;
- } else if (t == 'I') { // insertion
- //string qualstr = rQual.substr(rp, l);
- int spanstart;
- // this is used to calculate the quality string adding 2bp grounds
- // the indel in the surrounding sequence, which it is dependent
- // upon
- int L = l + 2;
- if (L > rQual.size()) {
- L = rQual.size();
- spanstart = 0;
- } else {
- // set lower bound to 0
- if (rp < 1) {
- spanstart = 0;
- } else {
- spanstart = rp - 1;
- }
- // set upper bound to the string length
- if (spanstart + L > rQual.size()) {
- spanstart = rQual.size() - L;
- }
- }
- string qualstr = rQual.substr(spanstart, L);
- long double qual;
- if (parameters.useMinIndelQuality) {
- qual = minQuality(qualstr);
- //qual = averageQuality(qualstr); // does not work as well as the min
- } else {
- // quality, scaled inversely by the ratio between the quality
- // string length and the length of the event
- qual = sumQuality(qualstr);
- // quality adjustment:
- // scale the quality by the inverse harmonic sum of the length of
- // the quality string X a scaling constant derived from the ratio
- // between the length of the quality string and the length of the
- // allele
- //qual += ln2phred(log((long double) l / (long double) L));
- qual += ln2phred(log((long double) L / (long double) l));
- qual /= harmonicSum(l);
- }
- if (qual >= parameters.BQL2) {
- //ra.mismatches += l;
- indelMask[sp - alignment.Position] = true;
- }
- string readseq = rDna.substr(rp, l);
- if (allATGC(readseq)) {
- string qualstr = rQual.substr(rp, l);
- ra.addAllele(
- makeAllele(ra,
- sp,
- l,
- rp - l, // bases left (for first base in ref allele)
- alignment.QueryBases.size() - rp, // bases right (for first base in ref allele)
- readseq,
- sampleName,
- alignment,
- sequencingTech,
- qual,
- qualstr),
- parameters.allowComplex, parameters.maxComplexGap);
- }
- ++ra.indelCount;
- rp += l;
- // handle other cigar element types
- } else if (t == 'S') { // soft clip, clipped sequence present in the read not matching the reference
- if (sp - l < 0) {
- // nothing to do, soft clip is beyond the beginning of the reference
- } else {
- string qualstr = rQual.substr(rp, l);
- string readseq = alignment.QueryBases.substr(rp, l);
- // skip these bases in the read
- ra.addAllele(
- makeAllele(ra,
- sp - l,
- l,
- rp - l, // bases left
- alignment.QueryBases.size() - rp, // bases right
- readseq,
- sampleName,
- alignment,
- sequencingTech,
- alignment.MapQuality,
- qualstr),
- parameters.allowComplex, parameters.maxComplexGap);
- }
- rp += l;// sp += l; csp += l;
- } else if (t == 'H') { // hard clip on the read, clipped sequence is not present in the read
- // the alignment position is the first non-clipped base.
- // thus, hard clipping seems to just be an indicator that we clipped something
- // here we do nothing
- //sp += l; csp += l;
- } else if (t == 'N') { // skipped region in the reference not present in read, aka splice
- sp += l; csp += l;
- }
- // ignore padding
- //} else if (t == 'P') { // padding, silent deletion from the padded reference sequence
- // sp += l; csp += l;
- //}
- } // end cigar iter loop
- if (ra.alleles.empty()) {
- DEBUG2("generated no alleles from read");
- return ra;
- }
- // this deals with the case in which we have embedded Ns in the read
- // often this happens at the start or end of reads, thus affecting our RegisteredAlignment::start and ::end
- ra.start = ra.alleles.front().position;
- ra.end = ra.alleles.back().position + ra.alleles.back().referenceLength;
- double alignedBases = 0;
- double mismatchCount = 0;
- double matchCount = 0;
- double indelCount = 0;
- // tally mismatches in two categories, gaps and mismatched bases
- for (vector<Allele>::iterator a = ra.alleles.begin(); a != ra.alleles.end(); ++a) {
- Allele& allele = *a;
- switch (allele.type) {
- alignedBases += allele.length;
- matchCount += allele.length;
- break;
- case ALLELE_SNP:
- case ALLELE_MNP:
- alignedBases += allele.length;
- mismatchCount += allele.length;
- break;
- ++indelCount;
- break;
- default:
- break;
- }
- }
- double mismatchRate = ( indelCount + mismatchCount ) / alignedBases;
- double snpRate = mismatchCount / alignedBases;
- double indelRate = indelCount / alignedBases;
- // store mismatch information about the alignment in the alleles
- // for each allele, normalize the mismatch rates by ignoring that allele,
- // this allows us to relate the mismatch rate without reference to called alleles
- for (vector<Allele>::iterator a = ra.alleles.begin(); a != ra.alleles.end(); ++a) {
- Allele& allele = *a;
- allele.readMismatchRate = mismatchRate;
- allele.readSNPRate = snpRate;
- allele.readIndelRate = indelRate;
- switch (allele.type) {
- allele.readMismatchRate = mismatchRate;
- allele.readSNPRate = snpRate;
- allele.readIndelRate = indelRate;
- break;
- case ALLELE_SNP:
- case ALLELE_MNP:
- allele.readSNPRate = ( mismatchCount - allele.length ) / alignedBases;
- allele.readIndelRate = indelRate;
- allele.readMismatchRate = indelRate + allele.readSNPRate;
- break;
- allele.readSNPRate = snpRate;
- allele.readIndelRate = ( indelCount - 1 ) / alignedBases;
- allele.readMismatchRate = allele.readIndelRate + snpRate;
- break;
- default:
- break;
- }
- }
- // ignore insertions, deletions, and N's which occur at the end of the read with
- // no reference-matching bases before the end of the read
- if (parameters.boundIndels &&
- (ra.alleles.back().isInsertion()
- || ra.alleles.back().isDeletion()
- || ra.alleles.back().isNull())) {
- ra.alleles.pop_back();
- }
- if (parameters.debug2) {
- cerr << "alleles:\n" << join(ra.alleles, "\n");
- cerr << endl;
- }
- /*
- cerr << "ra.alleles.size() = " << ra.alleles.size() << endl;
- for (vector<Allele>::iterator a = ra.alleles.begin(); a != ra.alleles.end(); ++a) {
- cerr << *a << endl;
- }
- */
- return ra;
-void AlleleParser::updateAlignmentQueue(long int position,
- vector<Allele*>& newAlleles,
- bool gettingPartials) {
- DEBUG2("updating alignment queue");
- DEBUG2("currentPosition = " << position
- << "; currentSequenceStart = " << currentSequenceStart
- << "; currentSequence end = " << currentSequence.size() + currentSequenceStart);
- // make sure we have sequence for the *first* alignment
- //extendReferenceSequence(currentAlignment);
- // push to the front until we get to an alignment that doesn't overlap our
- // current position or we reach the end of available alignments
- // filter input reads; only allow mapped reads with a certain quality
- DEBUG2("currentAlignment.Position == " << currentAlignment.Position
- << ", currentAlignment.AlignedBases.size() == " << currentAlignment.AlignedBases.size()
- << ", currentPosition == " << position
- << ", currentSequenceStart == " << currentSequenceStart
- << " .. + currentSequence.size() == " << currentSequenceStart + currentSequence.size()
- );
- if (hasMoreAlignments
- && currentAlignment.Position <= position
- && currentAlignment.RefID == currentRefID) {
- do {
- DEBUG2("top of alignment parsing loop");
- DEBUG("alignment: " << currentAlignment.Name);
- // get read group, and map back to a sample name
- string readGroup;
- if (!currentAlignment.GetTag("RG", readGroup)) {
- if (!oneSampleAnalysis) {
- ERROR("Couldn't find read group id (@RG tag) for BAM Alignment " <<
- currentAlignment.Name << " at position " << position
- << " in sequence " << currentSequence << " EXITING!");
- exit(1);
- } else {
- readGroup = "unknown";
- }
- } else {
- if (oneSampleAnalysis) {
- ERROR("No read groups specified in BAM header, but alignment " <<
- currentAlignment.Name << " at position " << position
- << " in sequence " << currentSequence << " has a read group.");
- exit(1);
- }
- }
- // skip this alignment if we are not analyzing the sample it is drawn from
- if (readGroupToSampleNames.find(readGroup) == readGroupToSampleNames.end()) {
- ERROR("could not find sample matching read group id " << readGroup);
- continue;
- }
- // skip this alignment if we are not using duplicate reads (we remove them by default)
- if (currentAlignment.IsDuplicate() && !parameters.useDuplicateReads) {
- //DEBUG("skipping alignment " << currentAlignment.Name << " because it is a duplicate read");
- continue;
- }
- // skip unmapped alignments, as they cannot be used in the algorithm
- if (!currentAlignment.IsMapped()) {
- //DEBUG("skipping alignment " << currentAlignment.Name << " because it is not mapped");
- continue;
- }
- // skip alignments which have no aligned bases
- if (currentAlignment.AlignedBases.size() == 0) {
- //DEBUG("skipping alignment " << currentAlignment.Name << " because it has no aligned bases");
- continue;
- }
- // skip alignments which are non-primary
- if (!currentAlignment.IsPrimaryAlignment()) {
- //DEBUG("skipping alignment " << currentAlignment.Name << " because it is not marked primary");
- continue;
- }
- if (!gettingPartials && currentAlignment.GetEndPosition() < position) {
- cerr << currentAlignment.Name << " at " << currentSequenceName << ":" << currentAlignment.Position << " is out of order!"
- << " expected after " << position << endl;
- continue;
- }
- // otherwise, get the sample name and register the alignment to generate a sequence of alleles
- // we have to register the alignment to acquire some information required by filters
- // such as mismatches
- // initially skip reads with low mapping quality (what happens if MapQuality is not in the file)
- if (currentAlignment.MapQuality >= parameters.MQL0) {
- // extend our cached reference sequence to allow processing of this alignment
- //extendReferenceSequence(currentAlignment);
- // left realign indels
- if (parameters.leftAlignIndels) {
- int length = currentAlignment.GetEndPosition() - currentAlignment.Position + 1;
- stablyLeftAlign(currentAlignment,
- currentSequence.substr(currentSequencePosition(currentAlignment), length));
- }
- // get sample name
- string sampleName = readGroupToSampleNames[readGroup];
- string sequencingTech;
- map<string, string>::iterator t = readGroupToTechnology.find(readGroup);
- if (t != readGroupToTechnology.end()) {
- sequencingTech = t->second;
- }
- // limit base quality if cap set
- if (parameters.baseQualityCap != 0) {
- capBaseQuality(currentAlignment, parameters.baseQualityCap);
- }
- // decomposes alignment into a set of alleles
- // here we get the deque of alignments ending at this alignment's end position
- deque<RegisteredAlignment>& rq = registeredAlignments[currentAlignment.GetEndPosition()];
- // and insert the registered alignment into that deque
- rq.push_front(RegisteredAlignment(currentAlignment, parameters));
- RegisteredAlignment& ra = rq.front();
- registerAlignment(currentAlignment, ra, sampleName, sequencingTech);
- // backtracking if we have too many mismatches
- // or if there are no recorded alleles
- if (ra.alleles.empty()
- || ((float) ra.mismatches / (float) currentAlignment.QueryBases.size()) > parameters.readMaxMismatchFraction
- || ra.mismatches > parameters.RMU
- || ra.snpCount > parameters.readSnpLimit
- || ra.indelCount > parameters.readIndelLimit) {
- rq.pop_front(); // backtrack
- } else {
- // push the alleles into our new alleles vector
- for (vector<Allele>::iterator allele = ra.alleles.begin(); allele != ra.alleles.end(); ++allele) {
- newAlleles.push_back(&*allele);
- }
- }
- }
- } while ((hasMoreAlignments = bamMultiReader.GetNextAlignment(currentAlignment))
- && currentAlignment.Position <= position
- && currentAlignment.RefID == currentRefID);
- }
- DEBUG2("... finished pushing new alignments");
-void AlleleParser::addToRegisteredAlleles(vector<Allele*>& alleles) {
- registeredAlleles.insert(registeredAlleles.end(),
- alleles.begin(),
- alleles.end());
-// updates registered alleles and erases the unused portion of our cached reference sequence
-void AlleleParser::updateRegisteredAlleles(void) {
- long int lowestPosition = currentSequenceStart + currentSequence.size();
- // remove reference alleles which are no longer overlapping the current position
- // http://stackoverflow.com/questions/347441/erasing-elements-from-a-vector
- vector<Allele*>& alleles = registeredAlleles;
- for (vector<Allele*>::iterator allele = alleles.begin(); allele != alleles.end(); ++allele) {
- long unsigned int position = (*allele)->position;
- // note that this will underflow if currentPosition == 0 and lastHaplotypeLength > 0
- // resolved by setting lastHaplotypeLength = 0 in init, and when we switch targets
- if (currentPosition - lastHaplotypeLength > position + (*allele)->referenceLength) {
- *allele = NULL;
- }
- else {
- if (position < lowestPosition)
- lowestPosition = position;
- }
- }
- alleles.erase(remove(alleles.begin(), alleles.end(), (Allele*)NULL), alleles.end());
-pair<int, long int> AlleleParser::nextInputVariantPosition(void) {
- // are we past the last one in the sequence?
- if (usingVariantInputAlleles &&
- ((inputVariantAlleles.find(currentRefID) != inputVariantAlleles.end()
- && inputVariantAlleles[currentRefID].upper_bound(currentPosition) != inputVariantAlleles[currentRefID].end())
- || inputVariantAlleles.upper_bound(currentRefID) != inputVariantAlleles.end())) {
- map<long, vector<Allele> >& inChrom = inputVariantAlleles[currentRefID];
- map<long, vector<Allele> >::iterator ic = inChrom.upper_bound(currentPosition);
- if (ic != inChrom.end()) {
- return make_pair(currentRefID, ic->first);
- } else {
- // find next chrom with input alleles
- map<int, map<long, vector<Allele> > >::iterator nc = inputVariantAlleles.upper_bound(currentRefID);
- if (nc != inputVariantAlleles.end()) {
- return make_pair(nc->first, nc->second.begin()->first);
- } else {
- return make_pair(-1, 0);
- }
- }
- }
- return make_pair(-1, 0);
-void AlleleParser::getAllInputVariants(void) {
- string nullstr;
- getInputVariantsInRegion(nullstr);
-void AlleleParser::getInputVariantsInRegion(string& seq, long start, long end) {
- if (!usingVariantInputAlleles) return;
- // get the variants in the target region
- vcf::Variant var(variantCallInputFile);
- if (!seq.empty()) {
- variantCallInputFile.setRegion(seq, start, end);
- }
- bool ok;
- while (ok = variantCallInputFile.getNextVariant(*currentVariant)) {
- long int pos = currentVariant->position - 1;
- // get alternate alleles
- bool includePreviousBaseForIndels = true;
- map<string, vector<vcf::VariantAllele> > variantAlleles = currentVariant->parsedAlternates();
- // TODO this would be a nice option: why does it not work?
- //map<string, vector<vcf::VariantAllele> > variantAlleles = currentVariant->flatAlternates();
- vector< vector<vcf::VariantAllele> > orderedVariantAlleles;
- for (vector<string>::iterator a = currentVariant->alt.begin(); a != currentVariant->alt.end(); ++a) {
- orderedVariantAlleles.push_back(variantAlleles[*a]);
- }
- vector<Allele> genotypeAlleles;
- set<long int> alternatePositions;
- for (vector< vector<vcf::VariantAllele> >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) {
- vector<vcf::VariantAllele>& altAllele = *g;
- vector<Allele> alleles;
- for (vector<vcf::VariantAllele>::iterator v = altAllele.begin(); v != altAllele.end(); ++v) {
- vcf::VariantAllele& variant = *v;
- long int allelePos = variant.position - 1;
- AlleleType type;
- string alleleSequence = variant.alt;
- int len = 0;
- int reflen = 0;
- string cigar;
- // XXX
- // FAIL
- // you need to add in the reference bases between the non-reference ones!
- // to allow for complex events!
- if (variant.ref == variant.alt) {
- // XXX note that for reference alleles, we only use the first base internally
- // but this is technically incorrect, so this hack should be noted
- len = variant.ref.size();
- reflen = len;
- //alleleSequence = alleleSequence.at(0); // take only the first base
- cigar = convert(len) + "M";
- } else if (variant.ref.size() == variant.alt.size()) {
- len = variant.ref.size();
- reflen = len;
- if (variant.ref.size() == 1) {
- type = ALLELE_SNP;
- } else {
- type = ALLELE_MNP;
- }
- cigar = convert(len) + "X";
- } else if (variant.ref.size() > variant.alt.size()) {
- len = variant.ref.size() - variant.alt.size();
- allelePos -= 1;
- reflen = len + 2;
- alleleSequence =
- uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos, 1))
- + alleleSequence
- + uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos+1+len, 1));
- cigar = "1M" + convert(len) + "D" + "1M";
- } else {
- // we always include the flanking bases for these elsewhere, so here too in order to be consistent and trigger use
- // add previous base and post base to match format typically used for calling
- allelePos -= 1;
- alleleSequence =
- uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos, 1))
- + alleleSequence
- + uppercase(reference.getSubSequence(currentVariant->sequenceName, allelePos+1, 1));
- len = variant.alt.size() - var.ref.size();
- cigar = "1M" + convert(len) + "I" + "1M";
- reflen = 2;
- }
- // TODO deal woth complex subs
- Allele allele = genotypeAllele(type, alleleSequence, (unsigned int) len, cigar, (unsigned int) reflen, allelePos);
- DEBUG("input allele: " << allele.referenceName << " " << allele);
- //cerr << "input allele: " << allele.referenceName << " " << allele << endl;
- //alleles.push_back(allele);
- genotypeAlleles.push_back(allele);
- if (allele.type != ALLELE_REFERENCE) {
- inputVariantAlleles[bamMultiReader.GetReferenceID(currentVariant->sequenceName)][allele.position].push_back(allele);
- alternatePositions.insert(allele.position);
- }
- }
- }
- }
-void AlleleParser::updateInputVariants(long int pos, int referenceLength) {
- //cerr << "updating input variants (?) " << pos << " + " << referenceLength << " >? " << rightmostInputAllelePosition << endl;
- if (!usingVariantInputAlleles) return;
- if (pos + referenceLength > rightmostInputAllelePosition) {
- long int start = rightmostInputAllelePosition;
- if (start == 0) {
- start = rightmostHaplotypeBasisAllelePosition;
- }
- /*
- stringstream r;
- r << currentSequenceName << ":" << start
- << "-" << pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW;
- cerr << "getting variants in " << r.str() << endl;
- */
- // tabix expects 1-based, fully closed regions for ti_parse_region()
- // (which is what setRegion() calls eventually)
- bool gotRegion = false;
- if (referenceLength > 0) {
- gotRegion = variantCallInputFile.setRegion(currentSequenceName,
- start + 1,
- pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW + 1);
- } else {
- // whole chromosome
- gotRegion = variantCallInputFile.setRegion(currentSequenceName);
- }
- if (gotRegion) {
- // get the variants in the target region
- vcf::Variant var(variantCallInputFile);
- bool ok;
- while (ok = variantCallInputFile.getNextVariant(*currentVariant)) {
- DEBUG("getting input alleles from input VCF at position " << currentVariant->sequenceName << ":" << currentVariant->position);
- long int pos = currentVariant->position - 1;
- // get alternate alleles
- bool includePreviousBaseForIndels = true;
- map<string, vector<vcf::VariantAllele> > variantAlleles = currentVariant->parsedAlternates();
- // TODO this would be a nice option: why does it not work?
- //map<string, vector<vcf::VariantAllele> > variantAlleles = currentVariant->flatAlternates();
- vector< vector<vcf::VariantAllele> > orderedVariantAlleles;
- for (vector<string>::iterator a = currentVariant->alt.begin(); a != currentVariant->alt.end(); ++a) {
- orderedVariantAlleles.push_back(variantAlleles[*a]);
- }
- vector<Allele> genotypeAlleles;
- set<long int> alternatePositions;
- for (vector< vector<vcf::VariantAllele> >::iterator g = orderedVariantAlleles.begin(); g != orderedVariantAlleles.end(); ++g) {
- vector<vcf::VariantAllele>& altAllele = *g;
- vector<Allele> alleles;
- for (vector<vcf::VariantAllele>::iterator v = altAllele.begin(); v != altAllele.end(); ++v) {
- vcf::VariantAllele& variant = *v;
- long int allelePos = variant.position - 1;
- AlleleType type;
- string alleleSequence = variant.alt;
- int len = 0;
- int reflen = 0;
- string cigar;
- // XXX
- // FAIL
- // you need to add in the reference bases between the non-reference ones!
- // to allow for complex events!
- if (variant.ref == variant.alt) {
- // XXX note that for reference alleles, we only use the first base internally
- // but this is technically incorrect, so this hack should be noted
- len = variant.ref.size();
- reflen = len;
- //alleleSequence = alleleSequence.at(0); // take only the first base
- cigar = convert(len) + "M";
- } else if (variant.ref.size() == variant.alt.size()) {
- len = variant.ref.size();
- reflen = len;
- if (variant.ref.size() == 1) {
- type = ALLELE_SNP;
- } else {
- type = ALLELE_MNP;
- }
- cigar = convert(len) + "X";
- } else if (variant.ref.size() > variant.alt.size()) {
- len = variant.ref.size() - variant.alt.size();
- allelePos -= 1;
- reflen = len + 2;
- alleleSequence =
- uppercase(reference.getSubSequence(currentSequenceName, allelePos, 1))
- + alleleSequence
- + uppercase(reference.getSubSequence(currentSequenceName, allelePos+1+len, 1));
- cigar = "1M" + convert(len) + "D" + "1M";
- } else {
- // we always include the flanking bases for these elsewhere, so here too in order to be consistent and trigger use
- // add previous base and post base to match format typically used for calling
- allelePos -= 1;
- alleleSequence =
- uppercase(reference.getSubSequence(currentSequenceName, allelePos, 1))
- + alleleSequence
- + uppercase(reference.getSubSequence(currentSequenceName, allelePos+1, 1));
- len = variant.alt.size() - var.ref.size();
- cigar = "1M" + convert(len) + "I" + "1M";
- reflen = 2;
- }
- // TODO deal woth complex subs
- Allele allele = genotypeAllele(type, alleleSequence, (unsigned int) len, cigar, (unsigned int) reflen, allelePos);
- DEBUG("input allele: " << allele.referenceName << " " << allele);
- //alleles.push_back(allele);
- genotypeAlleles.push_back(allele);
- if (allele.type != ALLELE_REFERENCE) {
- inputVariantAlleles[bamMultiReader.GetReferenceID(allele.referenceName)][allele.position].push_back(allele);
- alternatePositions.insert(allele.position);
- }
- }
- }
- // store the allele counts, if they are provided
- //
- }
- if (!ok) hasMoreVariants = false;
- }
- /*
- for (map<long int, vector<Allele> >::iterator v = inputVariantAlleles.begin(); v != inputVariantAlleles.end(); ++v) {
- vector<Allele>& iv = v->second;
- cerr << "input variants pos = " << v->first << endl;
- for (vector<Allele>::iterator a = iv.begin(); a != iv.end(); ++a) {
- cerr << *a << endl;
- }
- }
- */
- //rightmostHaplotypeBasisAllelePosition = pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW;
- //rightmostInputAllelePosition = pos + referenceLength + CACHED_BASIS_HAPLOTYPE_WINDOW;
- }
-void AlleleParser::addCurrentGenotypeLikelihoods(map<int, vector<Genotype> >& genotypesByPloidy,
- vector<vector<SampleDataLikelihood> >& sampleDataLikelihoods) {
- // check if there are any genotype likelihoods at the current position
- if (inputGenotypeLikelihoods.find(currentPosition) != inputGenotypeLikelihoods.end()) {
- map<string, map<string, long double> >& inputLikelihoodsBySample = inputGenotypeLikelihoods[currentPosition];
- vector<Genotype*> genotypePtrs;
- for (map<int, vector<Genotype> >::iterator gp = genotypesByPloidy.begin(); gp != genotypesByPloidy.end(); ++gp) {
- vector<Genotype>& genotypes = gp->second;
- for (vector<Genotype>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
- genotypePtrs.push_back(&*g);
- }
- }
- // if there are, add them to the sample data likelihoods
- for (map<string, map<string, long double> >::iterator gls = inputLikelihoodsBySample.begin();
- gls != inputLikelihoodsBySample.end(); ++gls) {
- const string& sampleName = gls->first;
- map<string, long double>& likelihoods = gls->second;
- map<Genotype*, long double> likelihoodsPtr;
- for (map<string, long double>::iterator gl = likelihoods.begin(); gl != likelihoods.end(); ++gl) {
- const string& genotype = gl->first;
- long double l = gl->second;
- for (vector<Genotype*>::iterator g = genotypePtrs.begin(); g != genotypePtrs.end(); ++g) {
- if (convert(**g) == genotype) {
- likelihoodsPtr[*g] = l;
- }
- }
- }
- Result sampleData;
- sampleData.name = sampleName;
- // TODO add null sample object to sampleData
- // do you need to????
- for (map<Genotype*, long double>::iterator p = likelihoodsPtr.begin(); p != likelihoodsPtr.end(); ++p) {
- sampleData.push_back(SampleDataLikelihood(sampleName, nullSample, p->first, p->second, 0));
- }
- sortSampleDataLikelihoods(sampleData);
- if (!sampleData.empty()) {
- sampleDataLikelihoods.push_back(sampleData);
- }
- }
- }
-void AlleleParser::getInputAlleleCounts(vector<Allele>& genotypeAlleles, map<string, int>& inputACs) {
- // are there input ACs?
- //
- // if so, match them to the genotype alleles
- if (inputAlleleCounts.find(currentPosition) != inputAlleleCounts.end()) {
- map<Allele, int>& inputCounts = inputAlleleCounts[currentPosition];
- // XXX NB. We only use ACs for alleles in genotypeAlleles
- for (vector<Allele>::iterator a = genotypeAlleles.begin(); a != genotypeAlleles.end(); ++a) {
- if (inputCounts.find(*a) != inputCounts.end()) {
- inputACs[a->currentBase] = inputCounts[*a];
- }
- }
- }
-void AlleleParser::removeAllelesWithoutReadSpan(vector<Allele*>& alleles, int probeLength, int haplotypeLength) {
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele* allele = *a;
- if (!(allele->position == currentPosition && allele->referenceLength == haplotypeLength))
- continue;
- // require additionally
- int additionalRequiredBases = probeLength - allele->alternateSequence.size();
- int requiredFlank = ceil((double) additionalRequiredBases / 2);
- DEBUG2(allele << " needs at least " << additionalRequiredBases
- << " bpleft " << allele->read5pNonNullBases() << " bpright " << allele->read3pNonNullBases());
- if (additionalRequiredBases > 0 &&
- (allele->read5pNonNullBases() < additionalRequiredBases
- || allele->read3pNonNullBases() < additionalRequiredBases)) {
- DEBUG("removing " << allele << " as it does not have the required probe length");
- *a = NULL;
- }
- }
- alleles.erase(remove(alleles.begin(), alleles.end(), (Allele*)NULL), alleles.end());
-void AlleleParser::removeNonOverlappingAlleles(vector<Allele*>& alleles, int haplotypeLength, bool getAllAllelesInHaplotype) {
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele* allele = *a;
- if (allele->type == ALLELE_REFERENCE) {
- // does the reference allele overlap the haplotype
- if (getAllAllelesInHaplotype
- && !(currentPosition <= allele->position && allele->position < currentPosition + haplotypeLength)) {
- //cerr << *a << " is not in haplotype" << endl;
- *a = NULL;
- } else if (!(allele->position <= currentPosition
- && allele->position + allele->referenceLength >= currentPosition + haplotypeLength)) {
- //cerr << *a << " is not fully overlapping haplotype from " << currentPosition << " to " << currentPosition + haplotypeLength << endl;
- *a = NULL;
- } else if (currentPosition < allele->position) { // not there yet
- //cerr << *a << " is not before current position" << endl;
- allele->processed = false;
- *a = NULL;
- }
- } else { // snps, insertions, deletions
- if (getAllAllelesInHaplotype
- && !(currentPosition <= allele->position && allele->position < currentPosition + haplotypeLength)) {
- *a = NULL;
- } else if (!(currentPosition == allele->position && allele->referenceLength == haplotypeLength)) {
- *a = NULL;
- } else if (currentPosition + haplotypeLength <= allele->position) {
- allele->processed = false;
- *a = NULL;
- }
- }
- }
- alleles.erase(remove(alleles.begin(), alleles.end(), (Allele*)NULL), alleles.end());
-// removes alleles which are filtered at the current position, and unsets their 'processed' flag so they are later evaluated
-void AlleleParser::removeFilteredAlleles(vector<Allele*>& alleles) {
- for (vector<Allele*>::iterator allele = alleles.begin(); allele != alleles.end(); ++allele) {
- if ((*allele)->quality < parameters.BQL0 || (*allele)->currentBase == "N") {
- (*allele)->processed = false; // force re-processing later
- *allele = NULL;
- }
- }
- alleles.erase(remove(alleles.begin(), alleles.end(), (Allele*)NULL), alleles.end());
-void AlleleParser::removePreviousAlleles(vector<Allele*>& alleles) {
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele* allele = *a;
- if (allele->position + allele->referenceLength <= currentPosition) {
- allele->processed = true;
- *a = NULL;
- }
- }
- alleles.erase(remove(alleles.begin(), alleles.end(), (Allele*)NULL), alleles.end());
-// steps our position/beddata/reference pointers through all positions in all
-// targets, returns false when we are finished
-// pushes and pulls alignments out of our queue of overlapping alignments via
-// updateAlignmentQueue() as we progress
-// returns true if we still have more targets to process
-// false otherwise
-bool AlleleParser::toNextTarget(void) {
- DEBUG("to next target");
- clearRegisteredAlignments();
- // reset haplotype length; there is no last call in this sequence; it isn't relevant
- lastHaplotypeLength = 0;
- if (targets.empty() && usingVariantInputAlleles) {
- // we are processing everything, so load the entire input variant allele set
- getAllInputVariants();
- }
- // load first target if we have targets and have not loaded the first
- if (!parameters.useStdin && !targets.empty()) {
- bool ok = false;
- // try to load the first target if we need to
- if (!currentTarget) {
- ok = loadTarget(&targets.front()) && getFirstAlignment();
- }
- // step through targets until we get to one with alignments
- while (!ok && currentTarget != &targets.back()) {
- if (!loadTarget(++currentTarget)) {
- continue;
- }
- if (ok = getFirstAlignment()) {
- break;
- }
- }
- if (!ok) {
- return loadNextPositionWithInputVariant();
- }
- // stdin, no targets cases
- } else if (!currentTarget && (parameters.useStdin || targets.empty())) {
- // if we have a target for limiting the analysis, use it
- // this happens when you specify stdin + a region string
- if (!targets.empty()) {
- currentTarget = &targets.front();
- loadTarget(currentTarget);
- }
- if (!getFirstAlignment()) {
- ERROR("Could not get first alignment from target");
- return false;
- }
- loadNextPositionWithAlignmentOrInputVariant(currentAlignment);
- //loadReferenceSequence(currentAlignment); // this seeds us with new reference sequence
- // however, if we have a target list of variants and we should also respect them
- // we've reached the end of file, or stdin
- } else if (parameters.useStdin || targets.empty()) {
- return false;
- }
- if (currentTarget && usingVariantInputAlleles) {
- getInputVariantsInRegion(currentTarget->seq, currentTarget->left, currentTarget->right);
- }
- loadReferenceSequence(currentSequenceName);
- justSwitchedTargets = true;
- return true;
-// TODO refactor this to allow reading from stdin or reading the whole file
-// without loading each sequence as a target
-bool AlleleParser::loadTarget(BedTarget* target) {
- currentTarget = target;
- DEBUG("processing target " << currentTarget->desc << " " <<
- currentTarget->seq << " " << currentTarget->left << " " <<
- currentTarget->right + 1);
- DEBUG2("loading target reference subsequence");
- loadReferenceSequence(currentTarget->seq);
- DEBUG2("setting new position " << currentTarget->left);
- currentPosition = currentTarget->left;
- rightmostHaplotypeBasisAllelePosition = currentTarget->left;
- if (!bamMultiReader.SetRegion(currentRefID, currentTarget->left, currentRefID, currentTarget->right + 1)) { // bamtools expects 0-based, half-open
- ERROR("Could not SetRegion to " << currentTarget->seq << ":" << currentTarget->left << ".." << currentTarget->right + 1);
- cerr << bamMultiReader.GetErrorString() << endl;
- return false;
- }
- if (variantCallInputFile.is_open()) {
- stringstream r;
- // tabix expects 1-based, fully closed regions for ti_parse_region()
- // (which is what setRegion() calls eventually)
- r << currentTarget->seq << ":" << currentTarget->left + 1 << "-" << currentTarget->right + 1;
- if (!variantCallInputFile.setRegion(r.str())) {
- WARNING("Could not set the region of the variants input file to " <<
- currentTarget->seq << ":" << currentTarget->left << ".." <<
- currentTarget->right + 1);
- //return false;
- } else {
- DEBUG("set region of variant input file to " <<
- currentTarget->seq << ":" << currentTarget->left << ".." <<
- currentTarget->right + 1);
- }
- }
- // now that we've jumped, reset the hasMoreAlignments counter
- hasMoreAlignments = true;
- DEBUG2("set region");
- return true;
-bool AlleleParser::getFirstAlignment(void) {
- bool hasAlignments = true;
- if (!bamMultiReader.GetNextAlignment(currentAlignment)) {
- hasAlignments = false;
- } else {
- while (!currentAlignment.IsMapped()) {
- if (!bamMultiReader.GetNextAlignment(currentAlignment)) {
- hasAlignments = false;
- break;
- }
- }
- }
- if (hasAlignments) {
- DEBUG2("got first alignment in target region");
- } else {
- if (currentTarget) {
- DEBUG("Could not find any mapped reads in target region " << currentSequenceName << ":" << currentTarget->left << ".." << currentTarget->right + 1);
- } else {
- DEBUG("Could not find any mapped reads in target region " << currentSequenceName);
- }
- return false;
- }
- return true;
-bool AlleleParser::getFirstVariant(void) {
- hasMoreVariants = false;
- if (variantCallInputFile.is_open()) {
- if (!variantCallInputFile.getNextVariant(*currentVariant)) {
- hasMoreVariants = false;
- } else {
- hasMoreVariants = true;
- }
- if (hasMoreVariants) {
- DEBUG2("got first variant in target region");
- } else {
- return false;
- }
- }
- return true;
-void AlleleParser::clearRegisteredAlignments(void) {
- DEBUG2("clearing registered alignments and alleles");
- registeredAlignments.clear();
- registeredAlleles.clear();
-// TODO
-// this should be simplified
-// there are two modes of operation
-// that in which we have targets
-// and that without
-// if we have targets, we need to keep track of which we're in
-// and if we're outside of it, try to get to the next one
-// and, if we have targets, we will try to jump around the bam file
-// if we don't have targets we will just GetNextAlignment until we can't
-// anymore. all positionality of the parser will respond to input alignments.
-// rewrite things so that we aren't strung out between 8 functions
-// stepping
-// if the next position is outside of target region
-// seek to next target which is in-bounds for its sequence
-// if none exist, return false
-bool AlleleParser::toNextPosition(void) {
- // is this our first position? (indicated by empty currentSequenceName)
- // if so, load it up
- bool first_pos = false;
- if (currentSequenceName.empty()) {
- DEBUG("loading first target");
- if (!toNextTarget()) {
- return false;
- }
- first_pos = true;
- }
- // here we assume we are processing an entire BAM or one contiguous region
- if (parameters.useStdin || targets.empty()) {
- // here we loop over unaligned reads at the beginning of a target
- // we need to get to a mapped read to figure out where we are
- while (hasMoreAlignments && !currentAlignment.IsMapped()) {
- hasMoreAlignments = bamMultiReader.GetNextAlignment(currentAlignment);
- }
- // determine if we have more alignments or not
- if (!hasMoreAlignments) {
- if (hasMoreInputVariants()) {
- // continue as we have more variants
- DEBUG("continuing because we have more input variants");
- loadNextPositionWithInputVariant();
- } else if (registeredAlignments.empty()) {
- DEBUG("no more alignments in input");
- return false;
- } else if (currentPosition >= currentSequence.size() + currentSequenceStart) {
- DEBUG("no more alignments in input");
- DEBUG("at end of sequence");
- return false;
- } else {
- ++currentPosition;
- }
- } else {
- // step the position
- ++currentPosition;
- // if the current position of this alignment is outside of the reference sequence length
- // we need to switch references
- if (currentPosition >= reference.sequenceLength(currentSequenceName)
- || registeredAlignments.empty() && currentRefID != currentAlignment.RefID) {
- DEBUG("at end of sequence");
- clearRegisteredAlignments();
- loadNextPositionWithAlignmentOrInputVariant(currentAlignment);
- justSwitchedTargets = true;
- }
- }
- } else {
- // or if it's not we should step to the next position
- ++currentPosition;
- // if we've run off the right edge of a target, jump
- if (currentPosition > currentTarget->right) {
- // time to move to a new target
- DEBUG("next position " << (long int) currentPosition
- << " outside of current target right bound " << currentTarget->right + 1);
- // try to get to the next one, and if this fails, bail out
- if (!toNextTarget()) {
- DEBUG("no more targets, finishing");
- return false;
- }
- justSwitchedTargets = true;
- }
- }
- // so we have to make sure it's still there (this matters in low-coverage)
- currentReferenceBase = currentReferenceBaseChar();
- // handle the case in which we don't have targets but in which we've switched reference sequence
- DEBUG("processing position " << (long unsigned int) currentPosition + 1 << " in sequence " << currentSequenceName);
- vector<Allele*> newAlleles;
- updateAlignmentQueue(currentPosition, newAlleles);
- addToRegisteredAlleles(newAlleles);
- DEBUG2("updating variants");
- // done typically at each new read, but this handles the case where there is no data for a while
- //updateInputVariants(currentPosition, 1);
- DEBUG2("updating registered alleles");
- updateRegisteredAlleles(); // this removes unused left-flanking sequence
- //DEBUG2("updating prior variant alleles");
- //updatePriorAlleles();
- // if we have alignments which ended at the previous base, erase them and their alleles
- // TODO check that this doesn't leak...
- DEBUG2("erasing old registered alignments");
- map<long unsigned int, deque<RegisteredAlignment> >::iterator f = registeredAlignments.begin();
- while (f != registeredAlignments.end()
- && f->first < currentPosition - lastHaplotypeLength) {
- registeredAlignments.erase(f++);
- }
- // remove past registered alleles
- DEBUG2("marking previous alleles as processed and removing from registered alleles");
- removePreviousAlleles(registeredAlleles);
- sort(registeredAlleles.begin(), registeredAlleles.end());
- registeredAlleles.erase(unique(registeredAlleles.begin(), registeredAlleles.end()), registeredAlleles.end());
- // and do the same for the variants from the input VCF
- /*
- DEBUG2("erasing old input variant alleles");
- if (inputVariantAlleles.find(currentSequenceName) != inputVariantAlleles.end()) {
- map<long int, vector<Allele> >::iterator v = inputVariantAlleles[currentSequenceName].begin();
- while (v != inputVariantAlleles[currentSequenceName].end() && v->first < currentPosition) {
- inputVariantAlleles[currentSequenceName].erase(v++);
- }
- }
- */
- DEBUG2("erasing old input haplotype basis alleles");
- map<long int, vector<AllelicPrimitive> >::iterator z = haplotypeBasisAlleles.begin();
- while (z != haplotypeBasisAlleles.end() && z->first < currentPosition) {
- haplotypeBasisAlleles.erase(z++);
- }
- DEBUG2("erasing old cached repeat counts");
- map<long int, map<string, int> >::iterator rc = cachedRepeatCounts.begin();
- while (rc != cachedRepeatCounts.end() && rc->first < currentPosition) {
- cachedRepeatCounts.erase(rc++);
- }
- return true;
-// XXX for testing only, steps targets but does nothing
-bool AlleleParser::dummyProcessNextTarget(void) {
- if (!toNextTarget()) {
- DEBUG("no more targets, finishing");
- return false;
- }
- while (bamMultiReader.GetNextAlignment(currentAlignment)) {
- }
- return true;
-void AlleleParser::removeDuplicateAlleles(Samples& samples, map<string, vector<Allele*> >& alleleGroups, int allowedAlleleTypes, int haplotypeLength, Allele& refallele) {
- map<string, int> seqCounts;
- bool multipleAllelesWithIdenticalAlts = false;
- string refseq = currentReferenceHaplotype();
- ++seqCounts[refseq];
- for (map<string, vector<Allele*> >::iterator a = alleleGroups.begin(); a != alleleGroups.end(); ++a) {
- Allele& allele = *a->second.front();
- if (seqCounts[allele.alternateSequence] > 0) {
- multipleAllelesWithIdenticalAlts = true;
- break;
- } else {
- ++seqCounts[allele.alternateSequence];
- }
- }
- if (multipleAllelesWithIdenticalAlts) {
- homogenizeAlleles(alleleGroups, refseq, refallele);
- getAlleles(samples, allowedAlleleTypes, haplotypeLength, false, true);
- alleleGroups.clear();
- groupAlleles(samples, alleleGroups); // groups by alternate sequence
- }
-// adjusts the registered alignment and contained alleles so that one allele
-// covers the entire haplotype window
-// returns a vector of pointers to alleles generated in this process
-// alleles which are discarded are not explicitly removed, but 'squashed',
-// which triggers their collection later
-bool RegisteredAlignment::fitHaplotype(int haplotypeStart, int haplotypeLength, Allele*& aptr, bool allowPartials) {
- // if the read overlaps the haplotype window,
- // generate one Allele to describe the read in that region
- // and "squash" the unused ones
- vector<Allele*> newAllelesPtr;
- vector<Allele> newAlleles;
- int haplotypeEnd = haplotypeStart + haplotypeLength;
- //if (containedAlleleTypes == ALLELE_REFERENCE) {
- // return false;
- //}
- /*
- cerr << "start: " << start << " end: " << end << endl;
- cerr << "haplotypestart: " << haplotypeStart << " haplotypeend: " << haplotypeEnd << endl;
- cerr << "registered alignment alleles," << endl << alleles << endl;
- */
- // save and bail out if we can't construct a haplotype allele
- vector<Allele> savedAlleles = alleles;
- if ((allowPartials && (start <= haplotypeEnd || end >= haplotypeStart))
- || (start <= haplotypeStart && end >= haplotypeEnd)) {
- vector<Allele>::iterator a = alleles.begin();
- //cerr << "trying to find overlapping haplotype alleles for the range " << haplotypeStart << " to " << haplotypeEnd << endl;
- while (a + 1 != alleles.end() && a->position + a->referenceLength <= haplotypeStart) {
- ++a;
- }
- vector<Allele>::iterator b = a;
- while (b + 1 != alleles.end() && b->position + b->referenceLength < haplotypeEnd) {
- ++b;
- }
- // do not attempt to build haplotype alleles where there are non-contiguous reads
- for (vector<Allele>::iterator p = alleles.begin(); p != alleles.end(); ++p) {
- if (p != alleles.begin()) {
- if (p->position != (p - 1)->position + (p - 1)->referenceLength) {
- //cerr << "non-contiguous reads, cannot construct haplotype allele" << endl;
- return true;
- }
- }
- }
- // conceptually it will be easier to work on the haplotype obs if the reference alleles match the haplotype specification
- //if (a == b && a->isReference()) {
- // break the reference observation
- //cerr << "we just have a reference allele" << endl;
- //return true;
- //}
- string seq;
- vector<pair<int, string> > cigar;
- vector<short> quals;
- // now "a" should overlap the start of the haplotype block, and "b" the end
- //cerr << "block start overlaps: " << *a << endl;
- //cerr << "block end overlaps: " << *b << endl;
- //cerr << "haplotype start: " << haplotypeStart << endl;
- for (vector<Allele>::iterator p = a; p != (b+1); ++p) {
- if (p->isNull()) return false; // can't assemble across NULL alleles
- }
- // adjust a to match the start of the haplotype block
- if (a->position == haplotypeStart) {
- // nothing to do!
- } else if (a->position < haplotypeStart) {
- // squeeze bases off the front of this allele onto the last allele
- // generating a new allele if there isn't one
- Allele newAllele = *a;
- newAllele.subtractFromEnd(a->position + a->referenceLength - haplotypeStart, seq, cigar, quals);
- a->subtractFromStart(haplotypeStart - a->position, seq, cigar, quals);
- newAlleles.push_back(newAllele);
- }
- if (b->position + b->referenceLength == haplotypeEnd) {
- // nothing to do!!!!
- } else if (b->position + b->referenceLength > haplotypeEnd) {
- Allele newAllele = *b;
- newAllele.subtractFromStart(haplotypeEnd - b->position, seq, cigar, quals);
- if (isUnflankedIndel(newAllele)) {
- if (b + 1 != alleles.end()) {
- ++b;
- }
- } else {
- b->subtractFromEnd(b->position + b->referenceLength - haplotypeEnd, seq, cigar, quals);
- newAlleles.push_back(newAllele);
- }
- }
- // now, for everything between a and b, merge them into one allele
- while (a != b) {
- vector<pair<int, string> > cigarV = splitCigar(a->cigar);
- vector<Allele>::iterator p = a + 1;
- // update the quality of the merged allele in the same way as we do
- // for complex events
- if (!a->isReference() && !a->isNull()) {
- p->quality = min(a->quality, p->quality); // note that phred and log are inverted
- p->lnquality = max(a->lnquality, p->lnquality);
- }
- p->addToStart(a->alternateSequence, cigarV, a->baseQualities);
- a->squash();
- ++a;
- }
- // remove any 0-length alleles, these are useless
- // this operation requires independent removal of references to these alleles (e.g. registeredAlleles.clear())
- alleles.erase(remove_if(alleles.begin(), alleles.end(), isEmptyAllele), alleles.end());
- for (vector<Allele>::iterator p = newAlleles.begin(); p != newAlleles.end(); ++p) {
- alleles.push_back(*p);
- }
- AllelePositionCompare apcomp;
- sort(alleles.begin(), alleles.end(), apcomp);
- // now the pointers have changed, so find the allele we want... again!!!!!!
- //cerr << "registered alignment alleles, after haplotype construction," << endl << alleles << endl;
- bool hasHaplotypeAllele = false;
- bool dividedIndel = false;
- for (vector<Allele>::iterator p = alleles.begin(); p != alleles.end(); ++p) {
- // fix the "base"
- if (!p->isReference()) {
- p->update(haplotypeLength);
- }
- //cerr << *p << endl;
- if (p->position == haplotypeStart && p->position + p->referenceLength == haplotypeEnd) {
- aptr = &*p;
- if (isUnflankedIndel(*p)) {
- hasHaplotypeAllele = false;
- dividedIndel = true;
- } else {
- hasHaplotypeAllele = true;
- }
- break;
- }
- }
- if (hasHaplotypeAllele) {
- //cerr << "registered alignment alleles after (pass)," << endl << alleles << endl;
- return true;
- } else {
- if (!allowPartials) {
- alleles = savedAlleles; // reset alleles
- }
- //cerr << "registered alignment alleles after (fail)," << endl << alleles << endl;
- return false;
- //assert(hasHaplotypeAllele);
- }
- } else {
- cerr << "registered alignment alleles after (pass)," << endl << alleles << endl;
- return true;
- }
-void AlleleParser::buildHaplotypeAlleles(
- vector<Allele>& alleles,
- Samples& samples,
- map<string, vector<Allele*> >& alleleGroups,
- // provides observation group counts, counts of partial observations
- map<string, vector<Allele*> >& partialObservationGroups,
- map<Allele*, set<Allele*> >& partialObservationSupport,
- int allowedAlleleTypes) {
- int haplotypeLength = 1;
- for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele& allele = *a;
- if (allele.isReference()) continue;
- // check if there are any complex alleles
- if (allele.referenceLength > haplotypeLength) {
- DEBUG("reference length of " << allele << " is " << allele.referenceLength
- << " so extending haplotype");
- haplotypeLength = allele.referenceLength;
- }
- // check if we are embedded in a repeat structure
- if (allele.repeatRightBoundary > currentPosition + haplotypeLength) {
- DEBUG("right boundary " << allele.repeatRightBoundary << " for " << allele << " is past "
- << currentPosition + haplotypeLength);
- haplotypeLength = allele.repeatRightBoundary - currentPosition;
- }
- }
- // return here if we have no registered alignments
- if (registeredAlignments.empty()) return;
- // always attempt to determine haplotype length in this fashion
- {
- DEBUG("haplotype length is " << haplotypeLength);
- // NB: for indels in tandem repeats, if the indel sequence is
- // derived from the repeat structure, build the haplotype
- // across the entire repeat pattern. This ensures we actually
- // can discriminate between reference and indel/complex
- // alleles in the most common misalignment case. For indels
- // that match the repeat structure, we have cached the right
- // boundary of the repeat. We build the haplotype to the
- // maximal boundary indicated by the present alleles.
- int oldHaplotypeLength = haplotypeLength;
- do {
- oldHaplotypeLength = haplotypeLength;
- // rebuild everything...
- registeredAlleles.clear();
- samples.clear();
- long int maxAlignmentEnd = registeredAlignments.rbegin()->first;
- for (long int i = currentPosition+1; i < maxAlignmentEnd; ++i) {
- deque<RegisteredAlignment>& ras = registeredAlignments[i];
- for (deque<RegisteredAlignment>::iterator r = ras.begin(); r != ras.end(); ++r) {
- RegisteredAlignment& ra = *r;
- if (ra.start > currentPosition && ra.start < currentPosition + haplotypeLength
- || ra.end > currentPosition && ra.end < currentPosition + haplotypeLength) {
- Allele* aptr;
- bool allowPartials = true;
- ra.fitHaplotype(currentPosition, haplotypeLength, aptr, allowPartials);
- for (vector<Allele>::iterator a = ra.alleles.begin(); a != ra.alleles.end(); ++a) {
- registeredAlleles.push_back(&*a);
- }
- }
- }
- }
- getAlleles(samples, allowedAlleleTypes, haplotypeLength, true, true);
- alleleGroups.clear();
- groupAlleles(samples, alleleGroups);
- alleles = genotypeAlleles(alleleGroups, samples, parameters.onlyUseInputAlleles);
- for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele& allele = *a;
- //cerr << "genotype allele, in haplotype length determination " << allele << endl;
- if (!allele.isReference()) {
- long int alleleend = (allele.position + allele.referenceLength);
- // this adjustment forces reference observations to overlap the ends of the indels
- //if (allele.isInsertion() || allele.isDeletion()) {
- // alleleend += 1;
- //}
- long int hapend = max((long int) alleleend,
- allele.repeatRightBoundary);
- /*
- cerr << currentPosition + haplotypeLength << " vs " << alleleend
- << " end " << hapend << " ? " << allele.position + allele.referenceLengthFromCigar()
- << " hapend for " << allele << endl;
- */
- if (hapend > currentPosition + haplotypeLength) {
- DEBUG("adjusting haplotype length to " << hapend - currentPosition
- << " to overlap allele end " << alleleend
- << " or right repeat boundary " << allele.repeatRightBoundary
- << " " << allele);
- haplotypeLength = hapend - currentPosition;
- }
- }
- }
- } while (haplotypeLength != oldHaplotypeLength); // && haplotypeLength < parameters.maxHaplotypeLength);
- // TODO?
- //haplotypeLength = min(parameters.maxHaplotypeLength, haplotypeLength);
- // TODO adjust haplotypes over indels to include +1 bp on 3' end
- // this will force reference observations across the entire allele
- // for each non-reference allele within the haplotype length of this
- // position, adjust the length and reference sequences of the adjacent
- // alleles
- DEBUG("fitting haplotype block " << currentPosition << " to " << currentPosition + haplotypeLength << ", " << haplotypeLength << "bp");
- lastHaplotypeLength = haplotypeLength;
- registeredAlleles.clear();
- samples.clear();
- vector<Allele*> haplotypeObservations;
- getCompleteObservationsOfHaplotype(samples, haplotypeLength, haplotypeObservations);
- addToRegisteredAlleles(haplotypeObservations);
- DEBUG("added to registered alleles");
- // add partial observations
- // first get all the alleles up to the end of the haplotype window
- vector<Allele*> partialHaplotypeObservations;
- if (parameters.usePartialObservations && haplotypeLength > 1) {
- getPartialObservationsOfHaplotype(samples, haplotypeLength, partialHaplotypeObservations);
- }
- DEBUG("got partial observations of haplotype");
- //addToRegisteredAlleles(partialHaplotypeObservations);
- // now align the sequences of these alleles to the haplotype alleles
- // and put them into the partials bin in each sample
- // correct quality and alternate sequence for reference
- for (vector<Allele*>::iterator h = haplotypeObservations.begin(); h != haplotypeObservations.end(); ++h) {
- if ((*h)->position == currentPosition && (*h)->referenceLength == haplotypeLength) {
- (*h)->currentBase = (*h)->alternateSequence;
- (*h)->setQuality();
- (*h)->update(haplotypeLength);
- if ((*h)->isReference()) { // HACK.. undoes damage of update() call
- (*h)->currentBase = (*h)->alternateSequence;
- }
- }
- }
- for (vector<Allele*>::iterator p = partialHaplotypeObservations.begin(); p != partialHaplotypeObservations.end(); ++p) {
- (*p)->currentBase = (*p)->alternateSequence;
- (*p)->setQuality();
- (*p)->update(haplotypeLength);
- }
- DEBUG("done updating");
- // debugging
- /*
- for (vector<Allele*>::iterator h = haplotypeObservations.begin(); h != haplotypeObservations.end(); ++h) {
- cerr << "haplo_obs\t" << *h << endl;
- }
- */
- if (parameters.debug) {
- cerr << "refr_seq\t" << currentPosition << "\t\t" << reference.getSubSequence(currentSequenceName, currentPosition, haplotypeLength) << endl;
- for (vector<Allele*>::iterator h = haplotypeObservations.begin(); h != haplotypeObservations.end(); ++h) {
- if ((*h)->position == currentPosition && (*h)->referenceLength == haplotypeLength) {
- cerr << "haplo_obs\t" << (*h)->position << "\t" << (*h)->lnquality << "\t"
- //<< (*h)->currentBase << "\t"
- << string(max((long int)0,(*h)->position-currentPosition), ' ')
- << (*h)->alternateSequence << "\t" << *h << endl;
- }
- }
- for (vector<Allele*>::iterator p = partialHaplotypeObservations.begin(); p != partialHaplotypeObservations.end(); ++p) {
- if ((*p)->position >= currentPosition && (*p)->position < currentPosition+haplotypeLength) {
- cerr << "part_obs\t" << (*p)->position << "\t" << (*p)->lnquality << "\t"
- //<< (*p)->currentBase << "\t"
- << string(max((long int)0,(*p)->position-currentPosition), ' ')
- << (*p)->alternateSequence << "\t" << *p << endl;
- }
- }
- }
- // now re-get the alleles
- getAlleles(samples, allowedAlleleTypes, haplotypeLength, false, true);
- // re-group the alleles using groupAlleles()
- alleleGroups.clear();
- groupAlleles(samples, alleleGroups);
- /*
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
- cerr << s->first << endl;
- for (Sample::iterator t = s->second.begin(); t != s->second.end(); ++t) {
- cerr << t->first << " " << t->second << endl << endl;
- }
- }
- */
- Allele refAllele = genotypeAllele(ALLELE_REFERENCE,
- uppercase(reference.getSubSequence(currentSequenceName, currentPosition, haplotypeLength)),
- haplotypeLength,
- convert(haplotypeLength)+"M",
- haplotypeLength,
- currentPosition);
- // are there two alleles with the same alt sequence?
- // if so, homogenize them, and then re-sort the alleles
- // ensure uniqueness of registered alleles
- sort(registeredAlleles.begin(), registeredAlleles.end());
- registeredAlleles.erase(unique(registeredAlleles.begin(), registeredAlleles.end()), registeredAlleles.end());
- removeDuplicateAlleles(samples, alleleGroups, allowedAlleleTypes, haplotypeLength, refAllele);
- alleles = genotypeAlleles(alleleGroups, samples, parameters.onlyUseInputAlleles, haplotypeLength);
- // require all complete observations to effectively cover the same amount of sequence
- // basically, the "probe" length should be the same or we will incur bias when generating likelihoods
- // should these be put into the partial observations bin?
- int maxAlleleLength = haplotypeLength;
- for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- // get max allele length
- if (a->alternateSequence.size() > maxAlleleLength) maxAlleleLength = a->alternateSequence.size();
- }
- // bound this to 50bp so as to not drop out reference obs when we have long insertions directly encoded in the reads
- maxAlleleLength = min(50, maxAlleleLength);
- //cerr << "max allele length is " << maxAlleleLength << " but haplotype length = " << haplotypeLength << endl;
- // XXX make work for deletions as well
- if (maxAlleleLength > haplotypeLength) {
- //cerr << "max allele length = " << maxAlleleLength << endl;
- removeAllelesWithoutReadSpan(registeredAlleles, maxAlleleLength, haplotypeLength);
- samples.clear();
- // require that reference obs are over an equivalent amount of sequence as the max allele length
- getAlleles(samples, allowedAlleleTypes, haplotypeLength, false, true);
- alleleGroups.clear();
- groupAlleles(samples, alleleGroups); // groups by alternate sequence
- // establish alleles again, now that we've filtered observations which don't have the required probe length
- alleles = genotypeAlleles(alleleGroups, samples, parameters.onlyUseInputAlleles, haplotypeLength);
- }
- // force the ref allele into the analysis, if it somehow isn't supported
- // this can happen where we don't have sufficient read span, such as in long deletions
- // or where our samples are homozygous for an alternate
- if (!parameters.useRefAllele) {
- vector<Allele> refAlleleVector;
- refAlleleVector.push_back(refAllele);
- alleles = alleleUnion(alleles, refAlleleVector);
- }
- // this is where we have established our genotype alleles
- /*
- for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- cerr << "genotype allele " << &*a << " " << *a << endl;
- }
- */
- // pick up observations that are potentially partial (not unambiguous)
- // the way to do this is to test the full observations as if they are partial, and if they
- // end up partially supporting multiple observations, removing them from the "complete" observations
- if (parameters.usePartialObservations && haplotypeLength > 1) {
- // check this out
- // here we are going to pass a set of full haplotype observations
- // and we'll remove now-partial obs from the full set
- samples.assignPartialSupport(alleles,
- haplotypeObservations,
- partialObservationGroups,
- partialObservationSupport,
- currentPosition,
- haplotypeLength);
- vector<Allele*> pureHaplotypeObservations;
- for (vector<Allele*>::iterator h = haplotypeObservations.begin(); h != haplotypeObservations.end(); ++h) {
- //if (partialObservationSupport.find(*h) != partialObservationSupport.end())
- //cerr << "partials for " << **h << " are " << partialObservationSupport[*h].size() << endl;
- if (partialObservationSupport.find(*h) != partialObservationSupport.end()
- && partialObservationSupport[*h].size() > 0) {
- DEBUG("full obs " << **h << " is actually partial and supports "
- << partialObservationSupport[*h].size() << " alleles");
- partialObservationSupport.erase(*h);
- // and remove from partial observation groups?
- } else {
- //cerr << "saving " << *h << endl;
- pureHaplotypeObservations.push_back(*h);
- }
- }
- // now regenerate partial observation groups using updated partial support
- partialObservationGroups.clear();
- for (map<Allele*, set<Allele*> >::iterator p = partialObservationSupport.begin();
- p != partialObservationSupport.end(); ++p) {
- set<Allele*>& supported = p->second;
- for (set<Allele*>::iterator s = supported.begin(); s != supported.end(); ++s) {
- partialObservationGroups[(*s)->currentBase].push_back(p->first);
- }
- }
- // and keep only the pure haplotype observations for further use
- haplotypeObservations = pureHaplotypeObservations;
- addToRegisteredAlleles(haplotypeObservations);
- // clean up potential duplicates
- sort(registeredAlleles.begin(), registeredAlleles.end());
- registeredAlleles.erase(unique(registeredAlleles.begin(), registeredAlleles.end()), registeredAlleles.end());
- samples.clearFullObservations();
- getAlleles(samples, allowedAlleleTypes, haplotypeLength, false, true);
- alleleGroups.clear();
- groupAlleles(samples, alleleGroups);
- // stash partials for later
- addToRegisteredAlleles(partialHaplotypeObservations);
- for (vector<Allele*>::iterator p = partialHaplotypeObservations.begin(); p != partialHaplotypeObservations.end(); ++p) {
- (*p)->currentBase = (*p)->alternateSequence;
- (*p)->setQuality();
- (*p)->update(haplotypeLength);
- }
- // now add in partial observations collected from partially-overlapping reads
- if (!partialHaplotypeObservations.empty()) {
- samples.assignPartialSupport(alleles,
- partialHaplotypeObservations,
- partialObservationGroups,
- partialObservationSupport,
- currentPosition,
- haplotypeLength);
- }
- }
- registeredAlleles.clear();
- // reset registered alleles
- for (map<long unsigned int, deque<RegisteredAlignment> >::iterator ras = registeredAlignments.begin(); ras != registeredAlignments.end(); ++ras) {
- deque<RegisteredAlignment>& rq = ras->second;
- for (deque<RegisteredAlignment>::iterator rai = rq.begin(); rai != rq.end(); ++rai) {
- RegisteredAlignment& ra = *rai;
- for (vector<Allele>::iterator a = ra.alleles.begin(); a != ra.alleles.end(); ++a) {
- registeredAlleles.push_back(&*a);
- }
- }
- }
- if (!parameters.useRefAllele) {
- vector<Allele> refAlleleVector;
- refAlleleVector.push_back(refAllele);
- alleles = alleleUnion(alleles, refAlleleVector);
- }
- //removeDuplicateAlleles(samples, alleleGroups, allowedAlleleTypes, haplotypeLength);
- //alleles = genotypeAlleles(alleleGroups, samples, parameters.onlyUseInputAlleles, haplotypeLength);
- }
- // hack......... TODO unhack this and set in Sample class
- samples.setSupportedAlleles();
- // processed flag..
- //unsetAllProcessedFlags();
- // redundant?
- // remove alleles which should no longer be considered
- removePreviousAlleles(registeredAlleles);
- lastHaplotypeLength = haplotypeLength;
-bool AlleleParser::getCompleteObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector<Allele*>& haplotypeObservations) {
- for (map<long unsigned int, deque<RegisteredAlignment> >::iterator ras = registeredAlignments.begin(); ras != registeredAlignments.end(); ++ras) {
- deque<RegisteredAlignment>& rq = ras->second;
- for (deque<RegisteredAlignment>::iterator rai = rq.begin(); rai != rq.end(); ++rai) {
- RegisteredAlignment& ra = *rai;
- Allele* aptr;
- // this guard prevents trashing allele pointers when getting partial observations
- if (ra.start <= currentPosition && ra.end >= currentPosition + haplotypeLength) {
- if (ra.fitHaplotype(currentPosition, haplotypeLength, aptr)) {
- for (vector<Allele>::iterator a = ra.alleles.begin(); a != ra.alleles.end(); ++a) {
- if (a->position == currentPosition && a->referenceLength == haplotypeLength) {
- haplotypeObservations.push_back(&*a);
- }
- }
- } /*else {
- DEBUG("could not fit observation " << ra.name << " with alleles " << ra.alleles);
- // the alleles have (possibly) been changed in fithaplotype, so add them to the registered alleles again
- for (vector<Allele>::iterator a = ra.alleles.begin(); a != ra.alleles.end(); ++a) {
- registeredAlleles.push_back(&*a);
- }
- }*/
- }
- }
- }
- DEBUG("got complete observations of haplotype");
-void AlleleParser::unsetAllProcessedFlags(void) {
- for (map<long unsigned int, deque<RegisteredAlignment> >::iterator ras = registeredAlignments.begin(); ras != registeredAlignments.end(); ++ras) {
- deque<RegisteredAlignment>& rq = ras->second;
- for (deque<RegisteredAlignment>::iterator rai = rq.begin(); rai != rq.end(); ++rai) {
- RegisteredAlignment& ra = *rai;
- Allele* aptr;
- for (vector<Allele>::iterator a = ra.alleles.begin(); a != ra.alleles.end(); ++a) {
- a->processed = false; // re-trigger use of all alleles
- }
- }
- }
-// process the next length bp of alignments, so as to get allele observations partially overlapping our calling window
-bool AlleleParser::getPartialObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector<Allele*>& partials) {
- //cerr << "getting partial observations of haplotype from " << currentPosition << " to " << currentPosition + haplotypeLength << endl;
- vector<Allele*> newAlleles;
- bool gettingPartials = true;
- DEBUG("in AlleleParser::getPartialObservationsOfHaplotype, updating alignment queue");
- updateAlignmentQueue(currentPosition + haplotypeLength, newAlleles, gettingPartials);
- DEBUG("in AlleleParser::getPartialObservationsOfHaplotype, done updating alignment queue");
- vector<Allele*> otherObs;
- vector<Allele*> partialObs;
- // now get the partial obs
- // get the max alignment end position, iterate to there
- long int maxAlignmentEnd = registeredAlignments.rbegin()->first;
- for (long int i = currentPosition+1; i < maxAlignmentEnd; ++i) {
- DEBUG("getting partial observations of haplotype @" << i);
- deque<RegisteredAlignment>& ras = registeredAlignments[i];
- for (deque<RegisteredAlignment>::iterator r = ras.begin(); r != ras.end(); ++r) {
- RegisteredAlignment& ra = *r;
- if (ra.start > currentPosition && ra.start < currentPosition + haplotypeLength
- || ra.end > currentPosition && ra.end < currentPosition + haplotypeLength) {
- Allele* aptr;
- bool allowPartials = true;
- ra.fitHaplotype(currentPosition, haplotypeLength, aptr, allowPartials);
- for (vector<Allele>::iterator a = ra.alleles.begin(); a != ra.alleles.end(); ++a) {
- if (a->position >= currentPosition
- && a->position < currentPosition+haplotypeLength
- && !a->isNull()) {
- //a->processed = false; // re-trigger use of all alleles
- partials.push_back(&*a);
- } else {
- //a->processed = false;
- otherObs.push_back(&*a);
- }
- }
- } else {
- for (vector<Allele>::iterator a = ra.alleles.begin(); a != ra.alleles.end(); ++a) {
- //a->processed = false;
- otherObs.push_back(&*a);
- }
- }
- }
- }
- //addToRegisteredAlleles(partialObs);
- addToRegisteredAlleles(otherObs);
-bool AlleleParser::getNextAlleles(Samples& samples, int allowedAlleleTypes) {
- long int nextPosition = currentPosition + lastHaplotypeLength;
- while (currentPosition < nextPosition) {
- if (!toNextPosition()) {
- return false;
- } else {
- // triggers cleanup
- if (justSwitchedTargets) {
- nextPosition = 0;
- justSwitchedTargets = false;
- }
- getAlleles(samples, allowedAlleleTypes);
- }
- }
- lastHaplotypeLength = 1;
- return true;
-void AlleleParser::getAlleles(Samples& samples, int allowedAlleleTypes,
- int haplotypeLength, bool getAllAllelesInHaplotype,
- bool ignoreProcessedFlag) {
- DEBUG2("getting alleles");
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s)
- s->second.clear();
- // TODO ^^^ this should be optimized for better scanning performance
- // if we have targets and are outside of the current target, don't return anything
- // add the reference allele to the analysis
- if (parameters.useRefAllele) {
- if (currentReferenceAllele) delete currentReferenceAllele; // clean up after last position
- currentReferenceAllele = referenceAllele(parameters.MQR, parameters.BQR);
- samples[referenceSampleName].clear();
- samples[referenceSampleName][currentReferenceAllele->currentBase].push_back(currentReferenceAllele);
- //alleles.push_back(currentReferenceAllele);
- }
- // get the variant alleles *at* the current position
- // and the reference alleles *overlapping* the current position
- for (vector<Allele*>::const_iterator a = registeredAlleles.begin(); a != registeredAlleles.end(); ++a) {
- Allele& allele = **a;
- //cerr << "getting alleles at position " << currentPosition << " with length " << haplotypeLength << " " << allele << endl;
- if (!ignoreProcessedFlag && allele.processed) continue;
- if (allowedAlleleTypes & allele.type
- && ((haplotypeLength > 1 &&
- ((allele.type == ALLELE_REFERENCE
- && allele.position <= currentPosition
- && allele.position + allele.referenceLength >= currentPosition + haplotypeLength)
- ||
- (allele.position == currentPosition
- && allele.referenceLength == haplotypeLength)
- ||
- (getAllAllelesInHaplotype
- && allele.type != ALLELE_REFERENCE
- && allele.position >= currentPosition
- && allele.position < currentPosition + haplotypeLength)))
- ||
- (haplotypeLength == 1 &&
- ((allele.type == ALLELE_REFERENCE
- && allele.position <= currentPosition
- && allele.position + allele.referenceLength > currentPosition)
- ||
- (allele.position == currentPosition)))
- ) ) {
- allele.update(haplotypeLength);
- if (allele.quality >= parameters.BQL0 && allele.currentBase != "N"
- && (allele.isReference() || !allele.alternateSequence.empty())) { // filters haplotype construction chaff
- //cerr << "keeping allele " << allele << endl;
- samples[allele.sampleID][allele.currentBase].push_back(*a);
- // XXX testing
- if (!getAllAllelesInHaplotype) {
- allele.processed = true;
- if (haplotypeLength > 1) {
- if (!allele.isReference() && !(allele.position == currentPosition && allele.referenceLength == haplotypeLength)) {
- cerr << "non-reference allele should not be added to result alleles because it does not match the haplotype!:" << endl;
- cerr << "haplotype is from " << currentPosition << " to " << currentPosition + haplotypeLength << ", " << haplotypeLength << "bp" << endl;
- cerr << allele << endl;
- assert(false);
- }
- }
- }
- }
- }
- }
- vector<string> samplesToErase;
- // now remove empty alleles from our return so as to not confuse processing
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
- const string& name = s->first;
- Sample& sample = s->second;
- // move updated reference alleles to the right bin
- // everything else will get axed
- //sample.sortReferenceAlleles();
- bool empty = true;
- vector<string> genotypesToErase;
- // and remove any empty groups which remain
- for (Sample::iterator g = sample.begin(); g != sample.end(); ++g) {
- if (g->second.empty()) {
- //cerr << "sample " << name << " has an empty " << g->first << endl;
- //sample.erase(g);
- genotypesToErase.push_back(g->first);
- } else {
- // accumulate bitmap of unique types
- empty = false;
- }
- }
- for (vector<string>::iterator gt = genotypesToErase.begin(); gt != genotypesToErase.end(); ++gt) {
- sample.erase(*gt);
- }
- // and remove the entire sample if it has no alleles
- if (empty || currentSamplePloidy(name) == 0) {
- samplesToErase.push_back(name);
- }
- }
- for (vector<string>::iterator name = samplesToErase.begin(); name != samplesToErase.end(); ++name) {
- samples.erase(*name);
- }
- DEBUG2("done getting alleles");
-Allele* AlleleParser::referenceAllele(int mapQ, int baseQ) {
- string base = currentReferenceBaseString();
- //string name = reference.filename;
- string name = currentSequenceName; // this behavior matches old bambayes
- string sequencingTech = "reference";
- string baseQstr = "";
- //baseQstr += qualityInt2Char(baseQ);
- Allele* allele = new Allele(ALLELE_REFERENCE,
- currentSequenceName,
- currentPosition,
- ¤tPosition,
- ¤tReferenceBase,
- 1,
- currentPosition + 1,
- 0,
- 0,
- base,
- name,
- name,
- name,
- sequencingTech,
- true,
- baseQ,
- baseQstr,
- mapQ,
- false,
- false,
- false,
- "1M",
- currentPosition,
- currentPosition+1); // pair information
- allele->genotypeAllele = true;
- allele->baseQualities.push_back(baseQ);
- allele->update();
- return allele;
-vector<Allele> AlleleParser::genotypeAlleles(
- map<string, vector<Allele*> >& alleleGroups, // alleles grouped by equivalence
- Samples& samples, // alleles grouped by sample
- bool useOnlyInputAlleles,
- int haplotypeLength
- ) {
- vector<pair<Allele, int> > unfilteredAlleles;
- DEBUG("getting genotype alleles");
- for (map<string, vector<Allele*> >::iterator group = alleleGroups.begin(); group != alleleGroups.end(); ++group) {
- // for each allele that we're going to evaluate, we have to have at least one supporting read with
- // map quality >= MQL1 and the specific quality of the allele has to be >= BQL1
- DEBUG("allele group " << group->first);
- vector<Allele*>& alleles = group->second;
- DEBUG(alleles);
- if (!allATGC(group->second.front()->alternateSequence)) {
- DEBUG("allele group contains partially-null observations, skipping");
- continue;
- }
- if (alleles.size() < parameters.minAltTotal) {
- DEBUG("allele group lacks sufficient observations in the whole population (min-alternate-total)");
- continue;
- }
- bool passesFilters = false;
- int qSum = 0;
- int mqSum = 0;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- DEBUG2("allele " << **a);
- Allele& allele = **a;
- qSum += allele.quality;
- mqSum += allele.mapQuality;
- }
- if (qSum >= parameters.minSupportingAlleleQualitySum && mqSum >= parameters.minSupportingMappingQualitySum) {
- Allele& allele = *(alleles.front());
- int length = allele.length;
- int reflength = allele.referenceLength;
- string altseq = allele.alternateSequence;
- if (allele.type == ALLELE_REFERENCE) {
- length = haplotypeLength;
- reflength = haplotypeLength;
- if (haplotypeLength == 1) {
- altseq = currentReferenceBase;
- } else {
- altseq = uppercase(reference.getSubSequence(currentSequenceName, currentPosition, haplotypeLength));
- }
- }
- unfilteredAlleles.push_back(make_pair(genotypeAllele(allele.type,
- altseq,
- length,
- allele.cigar,
- reflength,
- allele.position,
- allele.repeatRightBoundary), qSum));
- }
- }
- DEBUG("found genotype alleles");
- map<Allele, int> filteredAlleles;
- DEBUG("filtering genotype alleles which are not supported by at least " << parameters.minAltCount
- << " observations comprising at least " << parameters.minAltFraction << " of the observations in a single individual");
- for (vector<pair<Allele, int> >::iterator p = unfilteredAlleles.begin();
- p != unfilteredAlleles.end(); ++p) {
- Allele& genotypeAllele = p->first;
- int qSum = p->second;
- DEBUG("genotype allele: " << genotypeAllele << " qsum " << qSum);
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
- Sample& sample = s->second;
- int alleleCount = 0;
- int qsum = 0;
- Sample::iterator c = sample.find(genotypeAllele.currentBase);
- if (c != sample.end()) {
- vector<Allele*>& obs = c->second;
- alleleCount = obs.size();
- for (vector<Allele*>::iterator a = obs.begin(); a != obs.end(); ++a) {
- Allele& allele = **a;
- qsum += allele.quality;
- }
- }
- int observationCount = sample.observationCount();
- if (qsum >= parameters.minAltQSum
- && alleleCount >= parameters.minAltCount
- && ((float) alleleCount / (float) observationCount) >= parameters.minAltFraction) {
- DEBUG(genotypeAllele << " has support of " << alleleCount
- << " in individual " << s->first << " (" << observationCount << " obs)" << " and fraction "
- << (float) alleleCount / (float) observationCount);
- filteredAlleles[genotypeAllele] = qSum;
- break;
- //out << *genotypeAllele << endl;
- }
- }
- }
- DEBUG("filtered genotype alleles");
- vector<Allele> resultAlleles;
- vector<Allele> resultIndelAndMNPAlleles;
- //string refBase = currentReferenceBaseString();
- string refBase = currentReferenceHaplotype();
- if (parameters.useBestNAlleles == 0) {
- // this means "use everything"
- bool hasRefAllele = false;
- for (map<Allele, int>::iterator p = filteredAlleles.begin();
- p != filteredAlleles.end(); ++p) {
- if (p->first.currentBase == refBase)
- hasRefAllele = true;
- DEBUG("adding allele to result alleles " << p->first.currentBase);
- resultAlleles.push_back(p->first);
- }
- // and add the reference allele if we need it
- if (parameters.forceRefAllele && !hasRefAllele) {
- DEBUG("including reference allele");
- // XXX TODO change to get the haplotype of the reference sequence
- resultAlleles.insert(resultAlleles.begin(), genotypeAllele(ALLELE_REFERENCE, refBase, 1, "1M", 1, currentPosition));
- }
- } else {
- // this means, use the N best
- vector<pair<Allele, int> > sortedAlleles;
- for (map<Allele, int>::iterator p = filteredAlleles.begin();
- p != filteredAlleles.end(); ++p) {
- sortedAlleles.push_back(make_pair(p->first, p->second));
- }
- DEBUG2("sorting alleles to get best alleles");
- AllelePairIntCompare alleleQualityCompare;
- sort(sortedAlleles.begin(), sortedAlleles.end(), alleleQualityCompare);
- DEBUG("getting " << parameters.useBestNAlleles << " best SNP alleles, and all other alleles");
- bool hasRefAllele = false;
- for (vector<pair<Allele, int> >::iterator a = sortedAlleles.begin(); a != sortedAlleles.end(); ++a) {
- Allele& allele = a->first;
- if (allele.currentBase == refBase) {
- hasRefAllele = true;
- }
- DEBUG("adding allele to result alleles " << allele.currentBase);
- resultIndelAndMNPAlleles.push_back(allele);
- } else {
- DEBUG("adding allele to SNP alleles " << allele.currentBase);
- }
- */
- DEBUG("adding allele to result alleles " << allele.currentBase);
- resultAlleles.push_back(allele);
- DEBUG("allele quality sum " << a->second);
- }
- DEBUG("found " << sortedAlleles.size() << " SNP/ref alleles of which we now have " << resultAlleles.size() << endl
- << "and " << resultIndelAndMNPAlleles.size() << " INDEL and MNP alleles");
- // if we have reached the limit of allowable alleles, and still
- // haven't included the reference allele, include it
- if (parameters.forceRefAllele && !hasRefAllele) {
- DEBUG("including reference allele in analysis");
- resultAlleles.insert(resultAlleles.begin(), genotypeAllele(ALLELE_REFERENCE, refBase, 1, "1M", 1, currentPosition));
- }
- // if we now have too many alleles (most likely one too many), get rid of some
- while (resultAlleles.size() > parameters.useBestNAlleles) {
- resultAlleles.pop_back();
- }
- // drop the SNPs back into the set of alleles
- for (vector<Allele>::iterator a = resultIndelAndMNPAlleles.begin(); a != resultIndelAndMNPAlleles.end(); ++a) {
- resultAlleles.push_back(*a);
- }
- }
- // now add in the alleles from the input variant set
- if (useOnlyInputAlleles)
- resultAlleles.clear();
- // this needs to be fixed in a big way
- // the alleles have to be put into the local haplotype structure
- if (inputVariantAlleles.find(currentRefID) != inputVariantAlleles.end()) {
- map<long int, vector<Allele> >::iterator v = inputVariantAlleles[currentRefID].find(currentPosition);
- if (v != inputVariantAlleles[currentRefID].end()) {
- vector<Allele>& inputalleles = v->second;
- for (vector<Allele>::iterator a = inputalleles.begin(); a != inputalleles.end(); ++a) {
- DEBUG("evaluating input allele " << *a);
- Allele& allele = *a;
- // check if the allele is already present
- bool alreadyPresent = false;
- for (vector<Allele>::iterator r = resultAlleles.begin(); r != resultAlleles.end(); ++r) {
- if (r->equivalent(allele)) {
- alreadyPresent = true;
- break;
- }
- }
- if (!alreadyPresent) {
- resultAlleles.push_back(allele);
- }
- }
- }
- }
- // remove non-unique alleles after
- DEBUG2("found " << resultAlleles.size() << " result alleles");
- return resultAlleles;
-// homopolymer run length. number of consecutive nucleotides (prior to this
-// position) in the genome reference sequence matching the alternate allele,
-// after substituting the alternate in place of the reference sequence allele
-int AlleleParser::homopolymerRunLeft(string altbase) {
- int position = currentPosition - 1;
- int sequenceposition = position - currentSequenceStart;
- int runlength = 0;
- while (sequenceposition >= 0 && currentSequence.substr(sequenceposition, 1) == altbase) {
- ++runlength;
- --position;
- sequenceposition = position - currentSequenceStart;
- }
- return runlength;
-int AlleleParser::homopolymerRunRight(string altbase) {
- int position = currentPosition + 1;
- int sequenceposition = position - currentSequenceStart;
- int runlength = 0;
- while (sequenceposition >= 0 && currentSequence.substr(sequenceposition, 1) == altbase) {
- ++runlength;
- ++position;
- sequenceposition = position - currentSequenceStart;
- }
- return runlength;
-map<string, int> AlleleParser::repeatCounts(long int position, const string& sequence, int maxsize) {
- map<string, int> counts;
- for (int i = 1; i <= maxsize; ++i) {
- // subseq here i bases
- string seq = sequence.substr(position, i);
- // go left.
- int j = position - i;
- int leftsteps = 0;
- while (j >= 0 && seq == sequence.substr(j, i)) {
- j -= i;
- ++leftsteps;
- }
- // go right.
- j = position;
- int rightsteps = 0;
- while (j + i <= sequence.size() && seq == sequence.substr(j, i)) {
- j += i;
- ++rightsteps;
- }
- // if we went left and right a non-zero number of times,
- if (leftsteps + rightsteps > 1) {
- counts[seq] = leftsteps + rightsteps;
- }
- }
- // filter out redundant repeat information
- if (counts.size() > 1) {
- map<string, int> filteredcounts;
- map<string, int>::iterator c = counts.begin();
- string prev = c->first;
- filteredcounts[prev] = c->second; // shortest sequence
- ++c;
- for (; c != counts.end(); ++c) {
- int i = 0;
- string seq = c->first;
- while (i + prev.length() <= seq.length() && seq.substr(i, prev.length()) == prev) {
- i += prev.length();
- }
- if (i < seq.length()) {
- filteredcounts[seq] = c->second;
- prev = seq;
- }
- }
- return filteredcounts;
- } else {
- return counts;
- }
-bool AlleleParser::isRepeatUnit(const string& seq, const string& unit) {
- if (seq.size() % unit.size() != 0) {
- return false;
- } else {
- int maxrepeats = seq.size() / unit.size();
- for (int i = 0; i < maxrepeats; ++i) {
- if (seq.substr(i * unit.size(), unit.size()) != unit) {
- return false;
- }
- }
- return true;
- }
-bool AlleleParser::hasInputVariantAllelesAtCurrentPosition(void) {
- if (inputVariantAlleles.find(currentRefID) != inputVariantAlleles.end()) {
- map<long int, vector<Allele> >::iterator v = inputVariantAlleles[currentRefID].find(currentPosition);
- if (v != inputVariantAlleles[currentRefID].end()) {
- return true;
- }
- }
- return false;
-bool operator<(const AllelicPrimitive& a, const AllelicPrimitive& b) {
- return a.ref < b.ref && a.alt < b.alt;
diff --git a/src/AlleleParser.h b/src/AlleleParser.h
deleted file mode 100644
index 0eed6be..0000000
--- a/src/AlleleParser.h
+++ /dev/null
@@ -1,356 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-#include <map>
-#include <deque>
-#include <utility>
-#include <algorithm>
-#include <time.h>
-#include <assert.h>
-#include <ctype.h>
-#include <cmath>
-#include "split.h"
-#include "join.h"
-#include "api/BamReader.h"
-#include "BedReader.h"
-#include "Parameters.h"
-#include "Utility.h"
-#include "Allele.h"
-#include "Sample.h"
-#include "Fasta.h"
-#include "TryCatch.h"
-#include "api/BamMultiReader.h"
-#include "Genotype.h"
-#include "CNV.h"
-#include "Result.h"
-#include "LeftAlign.h"
-#include "Variant.h"
-#include "version_git.h"
-// the size of the window of the reference which is always cached in memory
-// the window of haplotype basis alleles which we ensure we keep
-// increasing this reduces disk access when using haplotype basis alleles, but increases memory usage
-using namespace std;
-using namespace BamTools;
-// a structure holding information about our parameters
-// structure to encapsulate registered reads and alleles
-class RegisteredAlignment {
- friend ostream &operator<<(ostream &out, RegisteredAlignment &a);
- //BamAlignment alignment;
- long unsigned int start;
- long unsigned int end;
- int refid;
- string name;
- string readgroup;
- vector<Allele> alleles;
- int mismatches;
- int snpCount;
- int indelCount;
- int alleleTypes;
- Parameters parameters;
- RegisteredAlignment(BamAlignment& alignment, Parameters parameters)
- //: alignment(alignment)
- : start(alignment.Position)
- , end(alignment.GetEndPosition())
- , refid(alignment.RefID)
- , name(alignment.Name)
- , mismatches(0)
- , snpCount(0)
- , indelCount(0)
- , alleleTypes(0)
- , parameters(parameters)
- {
- alignment.GetTag("RG", readgroup);
- }
- void addAllele(Allele allele, bool mergeComplex = true,
- int maxComplexGap = 0, bool boundIndels = false);
- bool fitHaplotype(int pos, int haplotypeLength, Allele*& aptr, bool allowPartials = false);
-// functor to filter alleles outside of our analysis window
-class AlleleFilter {
- AlleleFilter(long unsigned int s, long unsigned int e) : start(s), end(e) {}
- // true of the allele is outside of our window
- bool operator()(Allele& a) {
- return !(start >= a.position && end < a.position + a.length);
- }
- bool operator()(Allele*& a) {
- return !(start >= a->position && end < a->position + a->length);
- }
- long unsigned int start, end;
-class AllelePtrCmp {
- bool operator()(Allele* &a, Allele* &b) {
- return a->type < b->type;
- }
-class AllelicPrimitive {
- string alt;
- string ref;
- AllelicPrimitive(string& r, string& a)
- : ref(r)
- , alt(a) { }
-bool operator<(const AllelicPrimitive& a, const AllelicPrimitive& b);
-void capBaseQuality(BamAlignment& alignment, int baseQualityCap);
-class AlleleParser {
- Parameters parameters; // holds operational parameters passed at program invocation
- AlleleParser(int argc, char** argv);
- ~AlleleParser(void);
- vector<string> sampleList; // list of sample names, indexed by sample id
- vector<string> sampleListFromBam; // sample names drawn from BAM file
- vector<string> sampleListFromVCF; // sample names drawn from input VCF
- map<string, string> samplePopulation; // population subdivisions of samples
- map<string, vector<string> > populationSamples; // inversion of samplePopulation
- map<string, string> readGroupToSampleNames; // maps read groups to samples
- map<string, string> readGroupToTechnology; // maps read groups to technologies
- vector<string> sequencingTechnologies; // a list of the present technologies
- CNVMap sampleCNV;
- // reference
- FastaReference reference;
- vector<string> referenceSequenceNames;
- map<int, string> referenceIDToName;
- string referenceSampleName;
- // target regions
- vector<BedTarget> targets;
- // returns true if we are within a target
- // useful for controlling output when we are reading from stdin
- bool inTarget(void);
- // bamreader
- BamMultiReader bamMultiReader;
- // bed reader
- BedReader bedReader;
- // VCF
- vcf::VariantCallFile variantCallFile;
- vcf::VariantCallFile variantCallInputFile; // input variant alleles, to target analysis
- vcf::VariantCallFile haplotypeVariantInputFile; // input alleles which will be used to construct haplotype alleles
- // input haplotype alleles
- //
- // as calling progresses, a window of haplotype basis alleles from the flanking sequence
- // map from starting position to length->alle
- map<long int, vector<AllelicPrimitive> > haplotypeBasisAlleles; // this is in the current reference sequence
- bool usingHaplotypeBasisAlleles;
- bool usingVariantInputAlleles;
- long int rightmostHaplotypeBasisAllelePosition;
- long int rightmostInputAllelePosition;
- void updateHaplotypeBasisAlleles(long int pos, int referenceLength);
- bool allowedHaplotypeBasisAllele(long int pos, string& ref, string& alt);
- Allele makeAllele(RegisteredAlignment& ra,
- AlleleType type,
- long int pos,
- int length,
- int basesLeft,
- int basesRight,
- string& readSequence,
- string& sampleName,
- BamAlignment& alignment,
- string& sequencingTech,
- long double qual,
- string& qualstr);
- vector<Allele*> registeredAlleles;
- map<long unsigned int, deque<RegisteredAlignment> > registeredAlignments;
- map<int, map<long int, vector<Allele> > > inputVariantAlleles; // all variants present in the input VCF, as 'genotype' alleles
- pair<int, long int> nextInputVariantPosition(void);
- void getInputVariantsInRegion(string& seq, long start = 0, long end = 0);
- void getAllInputVariants(void);
- // position sample genotype likelihood
- map<string, map<long int, map<string, map<string, long double> > > > inputGenotypeLikelihoods; // drawn from input VCF
- map<string, map<long int, map<Allele, int> > > inputAlleleCounts; // drawn from input VCF
- Sample* nullSample;
- bool loadNextPositionWithAlignmentOrInputVariant(BamAlignment& currentAlignment);
- bool loadNextPositionWithInputVariant(void);
- bool hasMoreInputVariants(void);
- void addCurrentGenotypeLikelihoods(map<int, vector<Genotype> >& genotypesByPloidy,
- vector<vector<SampleDataLikelihood> >& sampleDataLikelihoods);
- void getInputAlleleCounts(vector<Allele>& genotypeAlleles, map<string, int>& inputAFs);
- // reference names indexed by id
- vector<RefData> referenceSequences;
- // ^^ vector of objects containing:
- //RefName; //!< Name of reference sequence
- //RefLength; //!< Length of reference sequence
- //RefHasAlignments; //!< True if BAM file contains alignments mapped to reference sequence
- string bamHeader;
- vector<string> bamHeaderLines;
- void openBams(void);
- void openOutputFile(void);
- void getSampleNames(void);
- void getPopulations(void);
- void getSequencingTechnologies(void);
- void loadSampleCNVMap(void);
- int currentSamplePloidy(string const& sample);
- int copiesOfLocus(Samples& samples);
- vector<int> currentPloidies(Samples& samples);
- void loadBamReferenceSequenceNames(void);
- void loadFastaReference(void);
- void loadReferenceSequence(BamAlignment& alignment);
- void loadReferenceSequence(string& seqname);
- string referenceSubstr(long int position, unsigned int length);
- void loadTargets(void);
- bool getFirstAlignment(void);
- bool getFirstVariant(void);
- void loadTargetsFromBams(void);
- void initializeOutputFiles(void);
- RegisteredAlignment& registerAlignment(BamAlignment& alignment, RegisteredAlignment& ra, string& sampleName, string& sequencingTech);
- void clearRegisteredAlignments(void);
- void updateAlignmentQueue(long int position, vector<Allele*>& newAlleles, bool gettingPartials = false);
- void updateInputVariants(long int pos, int referenceLength);
- void updateHaplotypeBasisAlleles(void);
- void removeAllelesWithoutReadSpan(vector<Allele*>& alleles, int probeLength, int haplotypeLength);
- void removeNonOverlappingAlleles(vector<Allele*>& alleles,
- int haplotypeLength = 1,
- bool getAllAllelesInHaplotype = false);
- void removePreviousAlleles(vector<Allele*>& alleles);
- void removeFilteredAlleles(vector<Allele*>& alleles);
- void removeDuplicateAlleles(Samples& samples, map<string, vector<Allele*> >& alleleGroups,
- int allowedAlleleTypes, int haplotypeLength, Allele& refallele);
- void updateRegisteredAlleles(void);
- void addToRegisteredAlleles(vector<Allele*>& alleles);
- void updatePriorAlleles(void);
- vector<BedTarget>* targetsInCurrentRefSeq(void);
- bool toNextRefID(void);
- bool loadTarget(BedTarget*);
- bool toFirstTargetPosition(void);
- bool toNextPosition(void);
- bool getCompleteObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector<Allele*>& haplotypeObservations);
- bool getPartialObservationsOfHaplotype(Samples& samples, int haplotypeLength, vector<Allele*>& partials);
- bool dummyProcessNextTarget(void);
- bool toNextTarget(void);
- void setPosition(long unsigned int);
- int currentSequencePosition(const BamAlignment& alignment);
- int currentSequencePosition();
- void unsetAllProcessedFlags(void);
- bool getNextAlleles(Samples& allelesBySample, int allowedAlleleTypes);
- // builds up haplotype (longer, e.g. ref+snp+ref) alleles to match the longest allele in genotypeAlleles
- // updates vector<Allele>& alleles with the new alleles
- void buildHaplotypeAlleles(vector<Allele>& alleles,
- Samples& allelesBySample,
- map<string, vector<Allele*> >& alleleGroups,
- // provides observation group counts, counts of partial observations
- map<string, vector<Allele*> >& partialObservationGroups,
- map<Allele*, set<Allele*> >& partialObservationSupport,
- int allowedAlleleTypes);
- void getAlleles(Samples& allelesBySample,
- int allowedAlleleTypes,
- int haplotypeLength = 1,
- bool getAllAllelesInHaplotype = false,
- bool ignoreProcessedAlleles = true);
- Allele* referenceAllele(int mapQ, int baseQ);
- Allele* alternateAllele(int mapQ, int baseQ);
- int homopolymerRunLeft(string altbase);
- int homopolymerRunRight(string altbase);
- map<string, int> repeatCounts(long int position, const string& sequence, int maxsize);
- map<long int, map<string, int> > cachedRepeatCounts; // cached version of previous
- bool isRepeatUnit(const string& seq, const string& unit);
- void setupVCFOutput(void);
- void setupVCFInput(void);
- string vcfHeader(void);
- bool hasInputVariantAllelesAtCurrentPosition(void);
- // gets the genotype alleles we should evaluate among the allele groups and
- // sample groups at the current position, according to our filters
- vector<Allele> genotypeAlleles(map<string, vector<Allele*> >& alleleGroups,
- Samples& samples,
- bool useOnlyInputAlleles,
- int haplotypeLength = 1);
- // pointer to current position in targets
- int fastaReferenceSequenceCount; // number of reference sequences
- bool hasTarget;
- BedTarget* currentTarget;
- long int currentPosition; // 0-based current position
- int lastHaplotypeLength;
- char currentReferenceBase;
- string currentSequence;
- char currentReferenceBaseChar();
- string currentReferenceBaseString();
- string::iterator currentReferenceBaseIterator();
- string currentReferenceHaplotype();
- // output files
- ofstream logFile, outputFile;
- ostream* output;
- // utility
- bool isCpG(string& altbase);
- string currentSequenceName;
- bool justSwitchedTargets; // to trigger clearing of queues, maps and such holding Allele*'s on jump
- Allele* currentReferenceAllele;
- Allele* currentAlternateAllele;
- //BedTarget currentSequenceBounds;
- long int currentSequenceStart;
- bool hasMoreAlignments;
- bool hasMoreVariants;;
- bool oneSampleAnalysis; // if we are analyzing just one sample, and there are no specified read groups
- int basesBeforeCurrentTarget; // number of bases in sequence we're storing before the current target
- int basesAfterCurrentTarget; // ........................................ after ...................
- int currentRefID;
- BamAlignment currentAlignment;
- vcf::Variant* currentVariant;
diff --git a/src/BGZF.cpp b/src/BGZF.cpp
deleted file mode 100644
index 2b74343..0000000
--- a/src/BGZF.cpp
+++ /dev/null
@@ -1,398 +0,0 @@
-// ***************************************************************************
-// BGZF.cpp (c) 2009 Derek Barnett, Michael Str�mberg
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 16 August 2010 (DB)
-// ---------------------------------------------------------------------------
-// BGZF routines were adapted from the bgzf.c code developed at the Broad
-// Institute.
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for reading & writing BGZF files
-// ***************************************************************************
-#include <algorithm>
-#include "BGZF.h"
-using namespace BamTools;
-using std::string;
-using std::min;
- : UncompressedBlockSize(DEFAULT_BLOCK_SIZE)
- , CompressedBlockSize(MAX_BLOCK_SIZE)
- , BlockLength(0)
- , BlockOffset(0)
- , BlockAddress(0)
- , IsOpen(false)
- , IsWriteOnly(false)
- , IsWriteUncompressed(false)
- , Stream(NULL)
- , UncompressedBlock(NULL)
- , CompressedBlock(NULL)
- try {
- CompressedBlock = new char[CompressedBlockSize];
- UncompressedBlock = new char[UncompressedBlockSize];
- } catch( std::bad_alloc& ba ) {
- fprintf(stderr, "BGZF ERROR: unable to allocate memory for our BGZF object.\n");
- exit(1);
- }
-// destructor
-BgzfData::~BgzfData(void) {
- if( CompressedBlock ) delete[] CompressedBlock;
- if( UncompressedBlock ) delete[] UncompressedBlock;
-// closes BGZF file
-void BgzfData::Close(void) {
- // skip if file not open, otherwise set flag
- if ( !IsOpen ) return;
- // if writing to file, flush the current BGZF block,
- // then write an empty block (as EOF marker)
- if ( IsWriteOnly ) {
- FlushBlock();
- int blockLength = DeflateBlock();
- fwrite(CompressedBlock, 1, blockLength, Stream);
- }
- // flush and close
- fflush(Stream);
- fclose(Stream);
- IsWriteUncompressed = false;
- IsOpen = false;
-// compresses the current block
-int BgzfData::DeflateBlock(void) {
- // initialize the gzip header
- char* buffer = CompressedBlock;
- memset(buffer, 0, 18);
- buffer[0] = GZIP_ID1;
- buffer[1] = (char)GZIP_ID2;
- buffer[2] = CM_DEFLATE;
- buffer[3] = FLG_FEXTRA;
- buffer[9] = (char)OS_UNKNOWN;
- buffer[10] = BGZF_XLEN;
- buffer[12] = BGZF_ID1;
- buffer[13] = BGZF_ID2;
- buffer[14] = BGZF_LEN;
- // set compression level
- const int compressionLevel = ( IsWriteUncompressed ? 0 : Z_DEFAULT_COMPRESSION );
- // loop to retry for blocks that do not compress enough
- int inputLength = BlockOffset;
- int compressedLength = 0;
- unsigned int bufferSize = CompressedBlockSize;
- while ( true ) {
- // initialize zstream values
- z_stream zs;
- zs.zalloc = NULL;
- zs.zfree = NULL;
- zs.next_in = (Bytef*)UncompressedBlock;
- zs.avail_in = inputLength;
- zs.next_out = (Bytef*)&buffer[BLOCK_HEADER_LENGTH];
- zs.avail_out = bufferSize - BLOCK_HEADER_LENGTH - BLOCK_FOOTER_LENGTH;
- // initialize the zlib compression algorithm
- if ( deflateInit2(&zs, compressionLevel, Z_DEFLATED, GZIP_WINDOW_BITS, Z_DEFAULT_MEM_LEVEL, Z_DEFAULT_STRATEGY) != Z_OK ) {
- fprintf(stderr, "BGZF ERROR: zlib deflate initialization failed.\n");
- exit(1);
- }
- // compress the data
- int status = deflate(&zs, Z_FINISH);
- if ( status != Z_STREAM_END ) {
- deflateEnd(&zs);
- // reduce the input length and try again
- if ( status == Z_OK ) {
- inputLength -= 1024;
- if( inputLength < 0 ) {
- fprintf(stderr, "BGZF ERROR: input reduction failed.\n");
- exit(1);
- }
- continue;
- }
- fprintf(stderr, "BGZF ERROR: zlib::deflateEnd() failed.\n");
- exit(1);
- }
- // finalize the compression routine
- if ( deflateEnd(&zs) != Z_OK ) {
- fprintf(stderr, "BGZF ERROR: zlib::deflateEnd() failed.\n");
- exit(1);
- }
- compressedLength = zs.total_out;
- if ( compressedLength > MAX_BLOCK_SIZE ) {
- fprintf(stderr, "BGZF ERROR: deflate overflow.\n");
- exit(1);
- }
- break;
- }
- // store the compressed length
- BgzfData::PackUnsignedShort(&buffer[16], (unsigned short)(compressedLength - 1));
- // store the CRC32 checksum
- unsigned int crc = crc32(0, NULL, 0);
- crc = crc32(crc, (Bytef*)UncompressedBlock, inputLength);
- BgzfData::PackUnsignedInt(&buffer[compressedLength - 8], crc);
- BgzfData::PackUnsignedInt(&buffer[compressedLength - 4], inputLength);
- // ensure that we have less than a block of data left
- int remaining = BlockOffset - inputLength;
- if ( remaining > 0 ) {
- if ( remaining > inputLength ) {
- fprintf(stderr, "BGZF ERROR: after deflate, remainder too large.\n");
- exit(1);
- }
- memcpy(UncompressedBlock, UncompressedBlock + inputLength, remaining);
- }
- BlockOffset = remaining;
- return compressedLength;
-// flushes the data in the BGZF block
-void BgzfData::FlushBlock(void) {
- // flush all of the remaining blocks
- while ( BlockOffset > 0 ) {
- // compress the data block
- int blockLength = DeflateBlock();
- // flush the data to our output stream
- int numBytesWritten = fwrite(CompressedBlock, 1, blockLength, Stream);
- if ( numBytesWritten != blockLength ) {
- fprintf(stderr, "BGZF ERROR: expected to write %u bytes during flushing, but wrote %u bytes.\n", blockLength, numBytesWritten);
- exit(1);
- }
- BlockAddress += blockLength;
- }
-// de-compresses the current block
-int BgzfData::InflateBlock(const int& blockLength) {
- // Inflate the block in m_BGZF.CompressedBlock into m_BGZF.UncompressedBlock
- z_stream zs;
- zs.zalloc = NULL;
- zs.zfree = NULL;
- zs.next_in = (Bytef*)CompressedBlock + 18;
- zs.avail_in = blockLength - 16;
- zs.next_out = (Bytef*)UncompressedBlock;
- zs.avail_out = UncompressedBlockSize;
- int status = inflateInit2(&zs, GZIP_WINDOW_BITS);
- if ( status != Z_OK ) {
- fprintf(stderr, "BGZF ERROR: could not decompress block - zlib::inflateInit() failed\n");
- return -1;
- }
- status = inflate(&zs, Z_FINISH);
- if ( status != Z_STREAM_END ) {
- inflateEnd(&zs);
- fprintf(stderr, "BGZF ERROR: could not decompress block - zlib::inflate() failed\n");
- return -1;
- }
- status = inflateEnd(&zs);
- if ( status != Z_OK ) {
- fprintf(stderr, "BGZF ERROR: could not decompress block - zlib::inflateEnd() failed\n");
- return -1;
- }
- return zs.total_out;
-// opens the BGZF file for reading (mode is either "rb" for reading, or "wb" for writing)
-bool BgzfData::Open(const string& filename, const char* mode, bool isWriteUncompressed ) {
- // determine open mode
- if ( strcmp(mode, "rb") == 0 )
- IsWriteOnly = false;
- else if ( strcmp(mode, "wb") == 0)
- IsWriteOnly = true;
- else {
- fprintf(stderr, "BGZF ERROR: unknown file mode: %s\n", mode);
- return false;
- }
- // ----------------------------------------------------------------
- // open Stream to read to/write from file, stdin, or stdout
- // stdin/stdout option contributed by Aaron Quinlan (2010-Jan-03)
- // read/write BGZF data to/from a file
- if ( (filename != "stdin") && (filename != "stdout") )
- Stream = fopen(filename.c_str(), mode);
- // read BGZF data from stdin
- else if ( (filename == "stdin") && (strcmp(mode, "rb") == 0 ) )
- Stream = freopen(NULL, mode, stdin);
- // write BGZF data to stdout
- else if ( (filename == "stdout") && (strcmp(mode, "wb") == 0) )
- Stream = freopen(NULL, mode, stdout);
- if ( !Stream ) {
- fprintf(stderr, "BGZF ERROR: unable to open file %s\n", filename.c_str() );
- return false;
- }
- // set flags, return success
- IsOpen = true;
- IsWriteUncompressed = isWriteUncompressed;
- return true;
-// reads BGZF data into a byte buffer
-int BgzfData::Read(char* data, const unsigned int dataLength) {
- if ( !IsOpen || IsWriteOnly || dataLength == 0 ) return 0;
- char* output = data;
- unsigned int numBytesRead = 0;
- while ( numBytesRead < dataLength ) {
- int bytesAvailable = BlockLength - BlockOffset;
- if ( bytesAvailable <= 0 ) {
- if ( !ReadBlock() ) return -1;
- bytesAvailable = BlockLength - BlockOffset;
- if ( bytesAvailable <= 0 ) break;
- }
- char* buffer = UncompressedBlock;
- int copyLength = min( (int)(dataLength-numBytesRead), bytesAvailable );
- memcpy(output, buffer + BlockOffset, copyLength);
- BlockOffset += copyLength;
- output += copyLength;
- numBytesRead += copyLength;
- }
- if ( BlockOffset == BlockLength ) {
- BlockAddress = ftell64(Stream);
- BlockOffset = 0;
- BlockLength = 0;
- }
- return numBytesRead;
-// reads a BGZF block
-bool BgzfData::ReadBlock(void) {
- char header[BLOCK_HEADER_LENGTH];
- int64_t blockAddress = ftell64(Stream);
- int count = fread(header, 1, sizeof(header), Stream);
- if ( count == 0 ) {
- BlockLength = 0;
- return true;
- }
- if ( count != sizeof(header) ) {
- fprintf(stderr, "BGZF ERROR: read block failed - could not read block header\n");
- return false;
- }
- if ( !BgzfData::CheckBlockHeader(header) ) {
- fprintf(stderr, "BGZF ERROR: read block failed - invalid block header\n");
- return false;
- }
- int blockLength = BgzfData::UnpackUnsignedShort(&header[16]) + 1;
- char* compressedBlock = CompressedBlock;
- memcpy(compressedBlock, header, BLOCK_HEADER_LENGTH);
- int remaining = blockLength - BLOCK_HEADER_LENGTH;
- count = fread(&compressedBlock[BLOCK_HEADER_LENGTH], 1, remaining, Stream);
- if ( count != remaining ) {
- fprintf(stderr, "BGZF ERROR: read block failed - could not read data from block\n");
- return false;
- }
- count = InflateBlock(blockLength);
- if ( count < 0 ) {
- fprintf(stderr, "BGZF ERROR: read block failed - could not decompress block data\n");
- return false;
- }
- if ( BlockLength != 0 )
- BlockOffset = 0;
- BlockAddress = blockAddress;
- BlockLength = count;
- return true;
-// seek to position in BGZF file
-bool BgzfData::Seek(int64_t position) {
- if ( !IsOpen ) return false;
- int blockOffset = (position & 0xFFFF);
- int64_t blockAddress = (position >> 16) & 0xFFFFFFFFFFFFLL;
- if ( fseek64(Stream, blockAddress, SEEK_SET) != 0 ) {
- fprintf(stderr, "BGZF ERROR: unable to seek in file\n");
- return false;
- }
- BlockLength = 0;
- BlockAddress = blockAddress;
- BlockOffset = blockOffset;
- return true;
-// get file position in BGZF file
-int64_t BgzfData::Tell(void) {
- if ( !IsOpen )
- return false;
- else
- return ( (BlockAddress << 16) | (BlockOffset & 0xFFFF) );
-// writes the supplied data into the BGZF buffer
-unsigned int BgzfData::Write(const char* data, const unsigned int dataLen) {
- if ( !IsOpen || !IsWriteOnly ) return false;
- // initialize
- unsigned int numBytesWritten = 0;
- const char* input = data;
- unsigned int blockLength = UncompressedBlockSize;
- // copy the data to the buffer
- while ( numBytesWritten < dataLen ) {
- unsigned int copyLength = min(blockLength - BlockOffset, dataLen - numBytesWritten);
- char* buffer = UncompressedBlock;
- memcpy(buffer + BlockOffset, input, copyLength);
- BlockOffset += copyLength;
- input += copyLength;
- numBytesWritten += copyLength;
- if ( BlockOffset == blockLength )
- FlushBlock();
- }
- return numBytesWritten;
diff --git a/src/BGZF.h b/src/BGZF.h
deleted file mode 100644
index c1ff2f8..0000000
--- a/src/BGZF.h
+++ /dev/null
@@ -1,320 +0,0 @@
-// ***************************************************************************
-// BGZF.h (c) 2009 Derek Barnett, Michael Str�mberg
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 20 October 2010 (DB)
-// ---------------------------------------------------------------------------
-// BGZF routines were adapted from the bgzf.c code developed at the Broad
-// Institute.
-// ---------------------------------------------------------------------------
-// Provides the basic functionality for reading & writing BGZF files
-// ***************************************************************************
-#ifndef BGZF_H
-#define BGZF_H
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <string>
-#include "zlib.h"
-// Platform-specific large-file support
- #ifdef WIN32
- #define ftell64(a) _ftelli64(a)
- #define fseek64(a,b,c) _fseeki64(a,b,c)
- #else
- #define ftell64(a) ftello(a)
- #define fseek64(a,b,c) fseeko(a,b,c)
- #endif
-#endif // BAMTOOLS_LFS
-// Platform-specific type definitions
- #ifdef _MSC_VER
- typedef char int8_t;
- typedef unsigned char uint8_t;
- typedef short int16_t;
- typedef unsigned short uint16_t;
- typedef int int32_t;
- typedef unsigned int uint32_t;
- typedef long long int64_t;
- typedef unsigned long long uint64_t;
- #else
- #include <stdint.h>
- #endif
-namespace BamTools {
-// zlib constants
-const int GZIP_ID1 = 31;
-const int GZIP_ID2 = 139;
-const int CM_DEFLATE = 8;
-const int FLG_FEXTRA = 4;
-const int OS_UNKNOWN = 255;
-const int BGZF_XLEN = 6;
-const int BGZF_ID1 = 66;
-const int BGZF_ID2 = 67;
-const int BGZF_LEN = 2;
-const int GZIP_WINDOW_BITS = -15;
-const int Z_DEFAULT_MEM_LEVEL = 8;
-// BZGF constants
-const int BLOCK_HEADER_LENGTH = 18;
-const int BLOCK_FOOTER_LENGTH = 8;
-const int MAX_BLOCK_SIZE = 65536;
-const int DEFAULT_BLOCK_SIZE = 65536;
-struct BgzfData {
- // data members
- public:
- unsigned int UncompressedBlockSize;
- unsigned int CompressedBlockSize;
- unsigned int BlockLength;
- unsigned int BlockOffset;
- uint64_t BlockAddress;
- bool IsOpen;
- bool IsWriteOnly;
- bool IsWriteUncompressed;
- FILE* Stream;
- char* UncompressedBlock;
- char* CompressedBlock;
- // constructor & destructor
- public:
- BgzfData(void);
- ~BgzfData(void);
- // main interface methods
- public:
- // closes BGZF file
- void Close(void);
- // opens the BGZF file (mode is either "rb" for reading, or "wb" for writing)
- bool Open(const std::string& filename, const char* mode, bool isWriteUncompressed = false);
- // reads BGZF data into a byte buffer
- int Read(char* data, const unsigned int dataLength);
- // seek to position in BGZF file
- bool Seek(int64_t position);
- // get file position in BGZF file
- int64_t Tell(void);
- // writes the supplied data into the BGZF buffer
- unsigned int Write(const char* data, const unsigned int dataLen);
- // internal methods
- private:
- // compresses the current block
- int DeflateBlock(void);
- // flushes the data in the BGZF block
- void FlushBlock(void);
- // de-compresses the current block
- int InflateBlock(const int& blockLength);
- // reads a BGZF block
- bool ReadBlock(void);
- // static 'utility' methods
- public:
- // checks BGZF block header
- static inline bool CheckBlockHeader(char* header);
- // packs an unsigned integer into the specified buffer
- static inline void PackUnsignedInt(char* buffer, unsigned int value);
- // packs an unsigned short into the specified buffer
- static inline void PackUnsignedShort(char* buffer, unsigned short value);
- // unpacks a buffer into a double
- static inline double UnpackDouble(char* buffer);
- static inline double UnpackDouble(const char* buffer);
- // unpacks a buffer into a float
- static inline float UnpackFloat(char* buffer);
- static inline float UnpackFloat(const char* buffer);
- // unpacks a buffer into a signed int
- static inline signed int UnpackSignedInt(char* buffer);
- static inline signed int UnpackSignedInt(const char* buffer);
- // unpacks a buffer into a signed short
- static inline signed short UnpackSignedShort(char* buffer);
- static inline signed short UnpackSignedShort(const char* buffer);
- // unpacks a buffer into an unsigned int
- static inline unsigned int UnpackUnsignedInt(char* buffer);
- static inline unsigned int UnpackUnsignedInt(const char* buffer);
- // unpacks a buffer into an unsigned short
- static inline unsigned short UnpackUnsignedShort(char* buffer);
- static inline unsigned short UnpackUnsignedShort(const char* buffer);
-// -------------------------------------------------------------
-// static 'utility' method implementations
-// checks BGZF block header
-bool BgzfData::CheckBlockHeader(char* header) {
- return (header[0] == GZIP_ID1 &&
- header[1] == (char)GZIP_ID2 &&
- header[2] == Z_DEFLATED &&
- (header[3] & FLG_FEXTRA) != 0 &&
- BgzfData::UnpackUnsignedShort(&header[10]) == BGZF_XLEN &&
- header[12] == BGZF_ID1 &&
- header[13] == BGZF_ID2 &&
- BgzfData::UnpackUnsignedShort(&header[14]) == BGZF_LEN );
-// 'packs' an unsigned integer into the specified buffer
-void BgzfData::PackUnsignedInt(char* buffer, unsigned int value) {
- buffer[0] = (char)value;
- buffer[1] = (char)(value >> 8);
- buffer[2] = (char)(value >> 16);
- buffer[3] = (char)(value >> 24);
-// 'packs' an unsigned short into the specified buffer
-void BgzfData::PackUnsignedShort(char* buffer, unsigned short value) {
- buffer[0] = (char)value;
- buffer[1] = (char)(value >> 8);
-// 'unpacks' a buffer into a double (includes both non-const & const char* flavors)
-double BgzfData::UnpackDouble(char* buffer) {
- union { double value; unsigned char valueBuffer[sizeof(double)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- un.valueBuffer[2] = buffer[2];
- un.valueBuffer[3] = buffer[3];
- un.valueBuffer[4] = buffer[4];
- un.valueBuffer[5] = buffer[5];
- un.valueBuffer[6] = buffer[6];
- un.valueBuffer[7] = buffer[7];
- return un.value;
-double BgzfData::UnpackDouble(const char* buffer) {
- union { double value; unsigned char valueBuffer[sizeof(double)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- un.valueBuffer[2] = buffer[2];
- un.valueBuffer[3] = buffer[3];
- un.valueBuffer[4] = buffer[4];
- un.valueBuffer[5] = buffer[5];
- un.valueBuffer[6] = buffer[6];
- un.valueBuffer[7] = buffer[7];
- return un.value;
-// 'unpacks' a buffer into a float (includes both non-const & const char* flavors)
-float BgzfData::UnpackFloat(char* buffer) {
- union { float value; unsigned char valueBuffer[sizeof(float)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- un.valueBuffer[2] = buffer[2];
- un.valueBuffer[3] = buffer[3];
- return un.value;
-float BgzfData::UnpackFloat(const char* buffer) {
- union { float value; unsigned char valueBuffer[sizeof(float)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- un.valueBuffer[2] = buffer[2];
- un.valueBuffer[3] = buffer[3];
- return un.value;
-// 'unpacks' a buffer into a signed int (includes both non-const & const char* flavors)
-signed int BgzfData::UnpackSignedInt(char* buffer) {
- union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- un.valueBuffer[2] = buffer[2];
- un.valueBuffer[3] = buffer[3];
- return un.value;
-signed int BgzfData::UnpackSignedInt(const char* buffer) {
- union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- un.valueBuffer[2] = buffer[2];
- un.valueBuffer[3] = buffer[3];
- return un.value;
-// 'unpacks' a buffer into a signed short (includes both non-const & const char* flavors)
-signed short BgzfData::UnpackSignedShort(char* buffer) {
- union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- return un.value;
-signed short BgzfData::UnpackSignedShort(const char* buffer) {
- union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- return un.value;
-// 'unpacks' a buffer into an unsigned int (includes both non-const & const char* flavors)
-unsigned int BgzfData::UnpackUnsignedInt(char* buffer) {
- union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- un.valueBuffer[2] = buffer[2];
- un.valueBuffer[3] = buffer[3];
- return un.value;
-unsigned int BgzfData::UnpackUnsignedInt(const char* buffer) {
- union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- un.valueBuffer[2] = buffer[2];
- un.valueBuffer[3] = buffer[3];
- return un.value;
-// 'unpacks' a buffer into an unsigned short (includes both non-const & const char* flavors)
-unsigned short BgzfData::UnpackUnsignedShort(char* buffer) {
- union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- return un.value;
-unsigned short BgzfData::UnpackUnsignedShort(const char* buffer) {
- union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un;
- un.value = 0;
- un.valueBuffer[0] = buffer[0];
- un.valueBuffer[1] = buffer[1];
- return un.value;
-} // namespace BamTools
-#endif // BGZF_H
diff --git a/src/BedReader.cpp b/src/BedReader.cpp
deleted file mode 100644
index b629b4c..0000000
--- a/src/BedReader.cpp
+++ /dev/null
@@ -1,80 +0,0 @@
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-#include <deque>
-#include <map>
-#include <iterator>
-#include <algorithm>
-#include <cmath>
-#include "split.h"
-#include "Utility.h"
-#include "BedReader.h"
-#include "../intervaltree/IntervalTree.h"
-using namespace std;
-vector<BedTarget> BedReader::entries(void) {
- vector<BedTarget> entries;
- if (!is_open()) {
- cerr << "bed targets file is not open" << endl;
- exit(1);
- }
- string line;
- while (std::getline(*this, line)) {
- // BED is base-numbered, 0-origin, half-open. This parse turns that
- // into base-numbered, 0-origin, fully-closed for internal use. All
- // coordinates used internally should be in the latter, and coordinates
- // from the user in the former should be converted immediately to the
- // internal format.
- vector<string> fields = split(line, " \t");
- BedTarget entry(strip(fields[0]),
- atoi(strip(fields[1]).c_str()),
- atoi(strip(fields[2]).c_str()) - 1, // use inclusive format internally
- (fields.size() >= 4) ? strip(fields[3]) : "");
- entries.push_back(entry);
- }
- return entries;
-bool BedReader::targetsContained(string& seq, long left, long right) {
- vector<Interval<BedTarget*> > results;
- intervals[seq].findContained(left, right, results);
- return !results.empty();
-bool BedReader::targetsOverlap(string& seq, long left, long right) {
- vector<Interval<BedTarget*> > results;
- intervals[seq].findOverlapping(left, right, results);
- return !results.empty();
-vector<BedTarget*> BedReader::targetsContaining(BedTarget& target) {
- vector<Interval<BedTarget*> > results;
- intervals[target.seq].findContained(target.left, target.right, results);
- vector<BedTarget*> contained;
- for (vector<Interval<BedTarget*> >::iterator r = results.begin(); r != results.end(); ++r) {
- contained.push_back(r->value);
- }
- return contained;
-vector<BedTarget*> BedReader::targetsOverlapping(BedTarget& target) {
- vector<Interval<BedTarget*> > results;
- intervals[target.seq].findOverlapping(target.left, target.right, results);
- vector<BedTarget*> overlapping;
- for (vector<Interval<BedTarget*> >::iterator r = results.begin(); r != results.end(); ++r) {
- overlapping.push_back(r->value);
- }
- return overlapping;
diff --git a/src/BedReader.h b/src/BedReader.h
deleted file mode 100644
index 9d7f271..0000000
--- a/src/BedReader.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef BEDREADER_H
-#define BEDREADER_H
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-#include <deque>
-#include <map>
-#include <iterator>
-#include <algorithm>
-#include "../intervaltree/IntervalTree.h"
-#include "split.h"
-using namespace std;
-// stores the posiitional information of a bed target entry
-class BedTarget {
- string seq; // sequence name
- int left; // left position
- int right; // right position, adjusted to 0-base inclusive
- string desc; // descriptive information, target name typically
- BedTarget(string s, int l, int r, string d = "")
- : seq(s)
- , left(l)
- , right(r)
- , desc(d)
- { }
-class BedReader : public ifstream {
- vector<BedTarget> targets;
- map<string, IntervalTree<BedTarget*> > intervals; // intervals by reference sequence
- vector<BedTarget> entries(void);
- bool targetsContained(string& seq, long left, long right);
- bool targetsOverlap(string& seq, long left, long right);
- vector<BedTarget*> targetsContaining(BedTarget& target);
- vector<BedTarget*> targetsOverlapping(BedTarget& target);
- BedReader(void) { }
- BedReader(string& fname) {
- openFile(fname);
- buildIntervals();
- }
- void openFile(string& fname) {
- open(fname.c_str());
- targets = entries();
- }
- void buildIntervals(void) {
- map<string, vector<Interval<BedTarget*> > > intervalsBySeq;
- for (vector<BedTarget>::iterator t = targets.begin(); t != targets.end(); ++t) {
- intervalsBySeq[t->seq].push_back(Interval<BedTarget*>(t->left, t->right, &*t));
- }
- for (map<string, vector<Interval<BedTarget*> > >::iterator s = intervalsBySeq.begin(); s != intervalsBySeq.end(); ++s) {
- intervals[s->first] = IntervalTree<BedTarget*>(s->second);
- }
- }
diff --git a/src/Bias.cpp b/src/Bias.cpp
deleted file mode 100644
index 8ad380a..0000000
--- a/src/Bias.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-#ifndef BIAS_CPP
-#define BIAS_CPP
-#include "Bias.h"
-#include "convert.h"
-void Bias::open(string& file) {
- ifstream input;
- input.open(file.c_str());
- if (!input.is_open()) {
- cerr << "allele reference bias description " << file << " is not open" << endl;
- exit(1);
- }
- string line;
- bool firstrecord = true;
- int last;
- while (std::getline(input, line)) {
- vector<string> fields = split(line, " \t");
- if (firstrecord) {
- convert(fields[0], minLength);
- last = minLength - 1;
- }
- convert(fields[0], maxLength);
- if (maxLength != last + 1) {
- cerr << "gap or out-of-order bias list in " << file << endl;
- cerr << line << endl;
- exit(1);
- } else {
- last = maxLength;
- }
- long double dbias;
- convert(fields[1], dbias);
- biases.push_back(dbias);
- }
- input.close();
-long double Bias::bias(int length) {
- if (biases.empty()) return 1; // no bias
- if (length < minLength) {
- return biases.front();
- } else if (length > maxLength) {
- return biases.back();
- } else {
- return biases.at(length - minLength);
- }
-bool Bias::empty(void) {
- return biases.empty();
diff --git a/src/Bias.h b/src/Bias.h
deleted file mode 100644
index 3c8eae1..0000000
--- a/src/Bias.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef BIAS_H
-#define BIAS_H
-#include <map>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-#include <cstdlib>
-#include "split.h"
-using namespace std;
-class Bias {
- int minLength;
- int maxLength;
- vector<long double> biases;
- Bias(void) : minLength(0), maxLength(0) { }
- void open(string& file);
- long double bias(int length);
- bool empty(void);
diff --git a/src/CNV.cpp b/src/CNV.cpp
deleted file mode 100644
index 866f93c..0000000
--- a/src/CNV.cpp
+++ /dev/null
@@ -1,57 +0,0 @@
-#include "CNV.h"
-bool CNVMap::load(string const& filename) {
- string line;
- ifstream cnvFile(filename.c_str(), ios::in);
- if (cnvFile.is_open()) {
- while (getline (cnvFile, line)) {
- vector<string> fields = split(line, " \t");
- // note conversion between 1 and 0 based
- setPloidy(fields.at(3), fields.at(0), atol(fields.at(1).c_str()), atol(fields.at(2).c_str()), atoi(fields.at(4).c_str()));
- }
- } else {
- return false;
- }
- return true;
-void CNVMap::setDefaultPloidy(int defploidy) {
- defaultPloidy = defploidy;
-void CNVMap::setPloidy(string const& sample, string const& seq, long int start, long int end, int ploidy) {
- sampleSeqCNV[sample][seq][make_pair(start, end)] = ploidy;
-int CNVMap::ploidy(string const& sample, string const& seq, long int position) {
- if (sampleSeqCNV.empty()) {
- return defaultPloidy;
- }
- SampleSeqCNVMap::iterator scnv = sampleSeqCNV.find(sample);
- if (scnv == sampleSeqCNV.end()) {
- return defaultPloidy;
- } else {
- map<string, map<pair<long int, long int>, int> >::iterator c = scnv->second.find(seq);
- if (c == scnv->second.end()) {
- return defaultPloidy;
- } else {
- map<pair<long int, long int>, int>& cnvs = c->second;
- for (map<pair<long int, long int>, int>::iterator i = cnvs.begin(); i != cnvs.end(); ++i) {
- pair<long int, long int> range = i->first;
- int copyNumber = i->second;
- if (range.first <= position && range.second > position) {
- return copyNumber;
- } else if (position > range.first && position > range.second) {
- // we've passed any potential matches in this sequence, and the map
- // is sorted by pair, so we don't have any matching ranges
- break;
- }
- }
- return defaultPloidy;
- }
- }
diff --git a/src/CNV.h b/src/CNV.h
deleted file mode 100644
index 141c118..0000000
--- a/src/CNV.h
+++ /dev/null
@@ -1,33 +0,0 @@
-#ifndef __CNV_H
-#define __CNV_H
-#include <map>
-#include <string>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <utility>
-#include <stdlib.h>
-#include "split.h"
-using namespace std;
-typedef map<string, map<string, map<pair<long int, long int>, int> > > SampleSeqCNVMap;
-class CNVMap {
- CNVMap(void) : defaultPloidy(2) { }
- void setDefaultPloidy(int defploidy);
- bool load(string const& filename);
- int ploidy(string const& sample, string const& seq, long int position);
- void setPloidy(string const& sample, string const& seq, long int start, long int end, int ploidy);
- // note: this map is stored as 0-based, end position exclusive
- SampleSeqCNVMap sampleSeqCNV;
- int defaultPloidy;
diff --git a/src/Contamination.cpp b/src/Contamination.cpp
deleted file mode 100644
index 434f273..0000000
--- a/src/Contamination.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-#include "Contamination.h"
-#include "convert.h"
-void Contamination::open(string& file) {
- ifstream input;
- input.open(file.c_str());
- if (!input.is_open()) {
- cerr << "contamination estimates file " << file << " is not open" << endl;
- exit(1);
- }
- string line;
- int last;
- while (std::getline(input, line)) {
- vector<string> fields = split(line, " \t");
- if (fields.size() != 3) {
- cerr << "could not parse contamination estimate:" << endl
- << line << endl
- << "should be of the form:" << endl
- << "sample p(read=R|genotype=AR) p(read=A|genotype=AA)" << endl;
- exit(1);
- }
- string sample = fields[0];
- ContaminationEstimate c;
- convert(fields[1], c.probRefGivenHet);
- convert(fields[2], c.probRefGivenHomAlt);
- if (sample == "*") { // default
- defaultEstimate = c;
- } else {
- insert(make_pair(sample, c));
- }
- }
- input.close();
-double Contamination::probRefGivenHet(string& sample) {
- Contamination::iterator s = find(sample);
- if (s != end()) {
- return s->second.probRefGivenHet;
- } else {
- return defaultEstimate.probRefGivenHet;
- }
-double Contamination::probRefGivenHomAlt(string& sample) {
- Contamination::iterator s = find(sample);
- if (s != end()) {
- return s->second.probRefGivenHomAlt;
- } else {
- return defaultEstimate.probRefGivenHomAlt;
- }
-double Contamination::refBias(string& sample) {
- Contamination::iterator s = find(sample);
- if (s != end()) {
- return s->second.refBias;
- } else {
- return defaultEstimate.refBias;
- }
-ContaminationEstimate& Contamination::of(string& sample) {
- Contamination::iterator s = find(sample);
- if (s != end()) {
- return s->second;
- } else {
- return defaultEstimate;
- }
diff --git a/src/Contamination.h b/src/Contamination.h
deleted file mode 100644
index 80dfb0b..0000000
--- a/src/Contamination.h
+++ /dev/null
@@ -1,39 +0,0 @@
-#include <map>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-#include <cstdlib>
-#include <cmath>
-#include "split.h"
-using namespace std;
-class ContaminationEstimate {
- double probRefGivenHet;
- double probRefGivenHomAlt;
- double refBias;
-ContaminationEstimate(void) : probRefGivenHet(0.5), probRefGivenHomAlt(0), refBias(0) { }
-ContaminationEstimate(double ra, double aa) : probRefGivenHet(ra), probRefGivenHomAlt(aa)
- {
- refBias = probRefGivenHet * 2 - 1;
- }
-class Contamination : public map<string, ContaminationEstimate> {
- ContaminationEstimate defaultEstimate;
- void open(string& file);
- double probRefGivenHet(string& sample);
- double probRefGivenHomAlt(string& sample);
- double refBias(string& sample);
- ContaminationEstimate& of(string& sample);
-Contamination(void) : defaultEstimate(ContaminationEstimate(0.5, 0)) { }
-Contamination(double ra, double aa) : defaultEstimate(ContaminationEstimate(ra, aa)) { }
diff --git a/src/DataLikelihood.cpp b/src/DataLikelihood.cpp
deleted file mode 100644
index 1779352..0000000
--- a/src/DataLikelihood.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
-#include "DataLikelihood.h"
-#include "multichoose.h"
-#include "multipermute.h"
-long double
- Sample& sample,
- Genotype& genotype,
- double dependenceFactor,
- bool useMapQ,
- Bias& observationBias,
- bool standardGLs,
- vector<Allele>& genotypeAlleles,
- Contamination& contaminations,
- map<string, double>& freqs
- ) {
- //cerr << "P(" << genotype << " given" << endl << sample;
- int observationCount = sample.observationCount();
- vector<long double> alleleProbs = genotype.alleleProbabilities(observationBias);
- vector<int> observationCounts = genotype.alleleObservationCounts(sample);
- int countOut = 0;
- double countIn = 0;
- long double prodQout = 0; // the probability that the reads not in the genotype are all wrong
- long double prodSample = 0;
- if (standardGLs) {
- for (Sample::iterator s = sample.begin(); s != sample.end(); ++s) {
- const string& base = s->first;
- if (!genotype.containsAllele(base)) {
- vector<Allele*>& alleles = s->second;
- if (useMapQ) {
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- // take the lesser of mapping quality and base quality (in log space)
- prodQout += max((*a)->lnquality, (*a)->lnmapQuality);
- }
- } else {
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- prodQout += (*a)->lnquality;
- }
- }
- countOut += alleles.size();
- }
- }
- } else {
- vector<Allele*> emptyA;
- vector<Allele*> emptyB;
- for (set<string>::iterator c = sample.supportedAlleles.begin();
- c != sample.supportedAlleles.end(); ++c) {
- vector<Allele*>* alleles = &emptyA;
- Sample::iterator si = sample.find(*c);
- if (si != sample.end()) alleles = &si->second;
- vector<Allele*>* partials = &emptyB;
- map<string, vector<Allele*> >::iterator pi = sample.partialSupport.find(*c);
- if (pi != sample.partialSupport.end()) partials = &pi->second;
- bool onPartials = false;
- vector<Allele*>::iterator a = alleles->begin();
- bool hasPartials = !partials->empty();
- for ( ; (!hasPartials && a != alleles->end()) || a != partials->end(); ++a) {
- if (a == alleles->end()) {
- if (hasPartials) {
- a = partials->begin();
- onPartials = true;
- } else {
- break;
- }
- }
- Allele& obs = **a;
- //cerr << "observation: " << obs << endl;
- long double probi = 0;
- ContaminationEstimate& contamination = contaminations.of(obs.readGroupID);
- double scale = 1;
- // note that this will underflow if we have mapping quality = 0
- // we guard against this externally, by ignoring such alignments (quality has to be > MQL0)
- long double qual = (1.0 - exp(obs.lnquality)) * (1.0 - exp(obs.lnmapQuality));
- if (onPartials) {
- map<Allele*, set<Allele*> >::iterator r = sample.reversePartials.find(*a);
- if (r != sample.reversePartials.end()) {
- if (sample.reversePartials[*a].empty()) {
- cerr << "partial " << *a << " has empty reverse" << endl;
- exit(1);
- }
- //cerr << "partial " << *a << " supports potentially " << sample.reversePartials[*a].size() << " alleles : " << endl;
- //for (set<Allele*>::iterator m = sample.reversePartials[*a].begin();
- //m != sample.reversePartials[*a].end(); ++m) cerr << **m << " ";
- //cerr << endl;
- scale = (double)1/(double)sample.reversePartials[*a].size();
- qual *= scale;
- }
- }
- // TODO add partial obs, now that we have them recorded
- // how does this work?
- // each partial obs is recorded as supporting, but with observation probability scaled by the number of possible haplotypes it supports
- bool isInGenotype = false;
- long double asampl = genotype.alleleSamplingProb(obs);
- // for each of the unique genotype alleles
- for (vector<Allele>::iterator b = genotypeAlleles.begin(); b != genotypeAlleles.end(); ++b) {
- Allele& allele = *b;
- const string& base = allele.currentBase;
- if (genotype.containsAllele(base)
- && (obs.currentBase == base
- || (onPartials && sample.observationSupports(*a, &*b)))) {
- isInGenotype = true;
- // use the matched allele to estimate the asampl
- asampl = max(asampl, (long double)genotype.alleleSamplingProb(allele));
- }
- }
- if (asampl == 0) {
- // scale by frequency of (this) possibly contaminating allele
- asampl = contamination.probRefGivenHomAlt;
- } else if (asampl == 1) {
- // scale by frequency of (other) possibly contaminating alleles
- asampl = 1 - contamination.probRefGivenHomAlt;
- } else { //if (genotype.ploidy == 2) {
- // to deal with polyploids
- // note that this reduces to 1 for diploid heterozygotes
- // this term captures reference bias
- if (obs.isReference()) {
- asampl *= (contamination.probRefGivenHet / 0.5);
- } else {
- asampl *= ((1 - contamination.probRefGivenHet) / 0.5);
- }
- }
- // distribute observation support across haplotypes
- if (!isInGenotype) {
- prodQout += log(1-qual);
- countOut += scale;
- } else {
- prodSample += log(asampl*scale);
- }
- }
- }
- }
- // read dependence factor, asymptotically downgrade quality values of
- // successive reads to dependenceFactor * quality
- if (standardGLs) {
- if (countOut > 1) {
- prodQout *= (1 + (countOut - 1) * dependenceFactor) / countOut;
- }
- if (sum(observationCounts) == 0) {
- return prodQout;
- } else {
- //cerr << "P(obs|" << genotype << ") = " << prodQout + multinomialSamplingProbLn(alleleProbs, observationCounts) << endl << endl << string(80, '@') << endl << endl;
- return prodQout + multinomialSamplingProbLn(alleleProbs, observationCounts);
- //return prodQout + samplingProbLn(alleleProbs, observationCounts);
- }
- } else {
- if (countOut > 1) {
- prodQout *= (1 + (countOut - 1) * dependenceFactor) / countOut;
- }
- long double probObsGivenGt = prodQout + prodSample;
- return isinf(probObsGivenGt) ? 0 : probObsGivenGt;
- }
-vector<pair<Genotype*, long double> >
- Sample& sample,
- vector<Genotype*>& genotypes,
- double dependenceFactor,
- bool useMapQ,
- Bias& observationBias,
- bool standardGLs,
- vector<Allele>& genotypeAlleles,
- Contamination& contaminations,
- map<string, double>& freqs
- ) {
- vector<pair<Genotype*, long double> > results;
- for (vector<Genotype*>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
- Genotype& genotype = **g;
- results.push_back(
- make_pair(*g,
- probObservedAllelesGivenGenotype(
- sample,
- **g,
- dependenceFactor,
- useMapQ,
- observationBias,
- standardGLs,
- genotypeAlleles,
- contaminations,
- freqs)));
- }
- return results;
- Samples& samples,
- Results& results,
- AlleleParser* parser,
- map<int, vector<Genotype> >& genotypesByPloidy,
- Parameters& parameters,
- bool usingNull,
- Bias& observationBias,
- vector<Allele>& genotypeAlleles,
- Contamination& contaminationEstimates,
- map<string, double>& estimatedAlleleFrequencies,
- map<string, vector<vector<SampleDataLikelihood> > >& sampleDataLikelihoodsByPopulation,
- map<string, vector<vector<SampleDataLikelihood> > >& variantSampleDataLikelihoodsByPopulation,
- map<string, vector<vector<SampleDataLikelihood> > >& invariantSampleDataLikelihoodsByPopulation) {
- for (vector<string>::iterator n = parser->sampleList.begin(); n != parser->sampleList.end(); ++n) {
- //string sampleName = s->first;
- string& sampleName = *n;
- //DEBUG2("sample: " << sampleName);
- //Sample& sample = s->second;
- if (samples.find(sampleName) == samples.end()
- && !(parser->hasInputVariantAllelesAtCurrentPosition()
- || parameters.reportMonomorphic)) {
- continue;
- }
- Sample& sample = samples[sampleName];
- vector<Genotype>& genotypes = genotypesByPloidy[parser->currentSamplePloidy(sampleName)];
- vector<Genotype*> genotypesWithObs;
- for (vector<Genotype>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
- if (parameters.excludePartiallyObservedGenotypes) {
- if (g->sampleHasSupportingObservationsForAllAlleles(sample)) {
- genotypesWithObs.push_back(&*g);
- }
- } else if (parameters.excludeUnobservedGenotypes && usingNull) {
- if (g->sampleHasSupportingObservations(sample)) {
- //cerr << sampleName << " has suppporting obs for " << *g << endl;
- genotypesWithObs.push_back(&*g);
- } else if (g->hasNullAllele() && g->homozygous) {
- // this genotype will never be added if we are running in observed-only mode, but
- // we still need it for consistency
- genotypesWithObs.push_back(&*g);
- }
- } else {
- genotypesWithObs.push_back(&*g);
- }
- }
- // skip this sample if we have no observations supporting any of the genotypes we are going to evaluate
- if (genotypesWithObs.empty()) {
- continue;
- }
- vector<pair<Genotype*, long double> > probs
- = probObservedAllelesGivenGenotypes(sample, genotypesWithObs,
- parameters.RDF, parameters.useMappingQuality,
- observationBias, parameters.standardGLs,
- genotypeAlleles,
- contaminationEstimates,
- estimatedAlleleFrequencies);
- if (parameters.debug2) {
- for (vector<pair<Genotype*, long double> >::iterator p = probs.begin(); p != probs.end(); ++p) {
- cerr << parser->currentSequenceName << "," << (long unsigned int) parser->currentPosition + 1 << ","
- << sampleName << ",likelihood," << *(p->first) << "," << p->second << endl;
- }
- }
- Result& sampleData = results[sampleName];
- sampleData.name = sampleName;
- sampleData.observations = &sample;
- for (vector<pair<Genotype*, long double> >::iterator p = probs.begin(); p != probs.end(); ++p) {
- sampleData.push_back(SampleDataLikelihood(sampleName, &sample, p->first, p->second, 0));
- }
- sortSampleDataLikelihoods(sampleData);
- string& population = parser->samplePopulation[sampleName];
- vector<vector<SampleDataLikelihood> >& sampleDataLikelihoods = sampleDataLikelihoodsByPopulation[population];
- vector<vector<SampleDataLikelihood> >& variantSampleDataLikelihoods = variantSampleDataLikelihoodsByPopulation[population];
- vector<vector<SampleDataLikelihood> >& invariantSampleDataLikelihoods = invariantSampleDataLikelihoodsByPopulation[population];
- if (parameters.genotypeVariantThreshold != 0) {
- if (sampleData.size() > 1
- && abs(sampleData.at(1).prob - sampleData.front().prob)
- < parameters.genotypeVariantThreshold) {
- variantSampleDataLikelihoods.push_back(sampleData);
- } else {
- invariantSampleDataLikelihoods.push_back(sampleData);
- }
- } else {
- variantSampleDataLikelihoods.push_back(sampleData);
- }
- sampleDataLikelihoods.push_back(sampleData);
- }
diff --git a/src/DataLikelihood.h b/src/DataLikelihood.h
deleted file mode 100644
index 07aaaf4..0000000
--- a/src/DataLikelihood.h
+++ /dev/null
@@ -1,65 +0,0 @@
-#include <iostream>
-#include <vector>
-#include <utility> // pair
-#include <algorithm>
-#include <numeric>
-#include <vector>
-#include <iterator>
-#include <cmath>
-#include "Allele.h"
-#include "Sample.h"
-#include "Genotype.h"
-#include "Utility.h"
-#include "Multinomial.h"
-#include "Dirichlet.h"
-#include "Bias.h"
-#include "Contamination.h"
-#include "AlleleParser.h"
-#include "ResultData.h"
-using namespace std;
-long double
- Sample& sample,
- Genotype& genotype,
- double dependenceFactor,
- bool useMapQ,
- Bias& observationBias,
- bool standardGLs,
- vector<Allele>& genotypeAlleles,
- Contamination& contaminations,
- map<string, double>& freqs);
-vector<pair<Genotype*, long double> >
- Sample& sample,
- vector<Genotype*>& genotypes,
- double dependenceFactor,
- bool useMapQ,
- Bias& observationBias,
- bool standardGLs,
- vector<Allele>& genotypeAlleles,
- Contamination& contaminations,
- map<string, double>& freqs);
- Samples& samples,
- Results& results,
- AlleleParser* parser,
- map<int, vector<Genotype> >& genotypesByPloidy,
- Parameters& parameters,
- bool usingNull,
- Bias& observationBias,
- vector<Allele>& genotypeAlleles,
- Contamination& contaminationEstimates,
- map<string, double>& estimatedAlleleFrequencies,
- map<string, vector<vector<SampleDataLikelihood> > >& sampleDataLikelihoodsByPopulation,
- map<string, vector<vector<SampleDataLikelihood> > >& variantSampleDataLikelihoodsByPopulation,
- map<string, vector<vector<SampleDataLikelihood> > >& invariantSampleDataLikelihoodsByPopulation);
diff --git a/src/Dirichlet.cpp b/src/Dirichlet.cpp
deleted file mode 100644
index 08242b2..0000000
--- a/src/Dirichlet.cpp
+++ /dev/null
@@ -1,62 +0,0 @@
-#include "Dirichlet.h"
-#include "Sum.h"
-#include "Product.h"
-#include <iostream>
-long double dirichlet(const vector<long double>& probs,
- const vector<int>& obs,
- long double s) {
- vector<long double> alphas;
- for (vector<int>::const_iterator o = obs.begin(); o != obs.end(); ++o)
- alphas.push_back(*o + 1 * s);
- vector<long double> obsProbs;
- vector<long double>::const_iterator a = alphas.begin();
- vector<long double>::const_iterator p = probs.begin();
- for (; p != probs.end() && a != alphas.end(); ++p, ++a) {
- obsProbs.push_back(pow(*p, *a - 1));
- }
- return 1.0 / beta(alphas) * product(obsProbs);
-long double dirichletMaximumLikelihoodRatio(const vector<long double>& probs,
- const vector<int>& obs,
- long double s) {
- long double maximizingObs = obs.size() / sum(obs);
- vector<int> m(obs.size(), maximizingObs);
- return dirichlet(probs, obs, s) / dirichlet(probs, m, s);
-// XXX the logspace versions are broken
-long double dirichletln(const vector<long double>& probs,
- const vector<int>& obs,
- long double s) {
- vector<long double> alphas;
- for (vector<int>::const_iterator o = obs.begin(); o != obs.end(); ++o)
- alphas.push_back(*o + 1 * s);
- vector<long double> obsProbs;
- vector<long double>::const_iterator a = alphas.begin();
- vector<long double>::const_iterator p = probs.begin();
- for (; p != probs.end() && a != alphas.end(); ++p, ++a) {
- obsProbs.push_back(powln(log(*p), *a - 1));
- }
- return log(1.0) - (betaln(alphas) + sum(obsProbs));
-long double dirichletMaximumLikelihoodRatioln(const vector<long double>& probs,
- const vector<int>& obs,
- long double s) {
- long double maximizingObs = (long double) obs.size() / (long double) sum(obs);
- vector<int> m(obs.size(), maximizingObs);
- return dirichletln(probs, obs, s) - dirichletln(probs, m, s);
diff --git a/src/Dirichlet.h b/src/Dirichlet.h
deleted file mode 100644
index 847f0da..0000000
--- a/src/Dirichlet.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#include <vector>
-#include "Utility.h"
-#include "Sum.h"
-long double dirichletMaximumLikelihoodRatio(const vector<long double>& probs, const vector<int>& obs, long double s = (long double) 1.0);
-long double dirichlet(const vector<long double>& probs, const vector<int>& obs, long double s = (long double) 1.0);
-long double dirichletMaximumLikelihoodRatioln(const vector<long double>& probs, const vector<int>& obs, long double s = (long double) 1.0);
-long double dirichletln(const vector<long double>& probs, const vector<int>& obs, long double s = (long double) 1.0);
diff --git a/src/Ewens.cpp b/src/Ewens.cpp
deleted file mode 100644
index 7616bd2..0000000
--- a/src/Ewens.cpp
+++ /dev/null
@@ -1,51 +0,0 @@
-#include "Ewens.h"
-long double alleleFrequencyProbability(const map<int, int>& alleleFrequencyCounts, long double theta) {
- int M = 0;
- long double p = 1;
- for (map<int, int>::const_iterator f = alleleFrequencyCounts.begin(); f != alleleFrequencyCounts.end(); ++f) {
- int frequency = f->first;
- int count = f->second;
- M += frequency * count;
- p *= (double) pow((double) theta, (double) count) / ((double) pow((double) frequency, (double) count) * factorial(count));
- }
- long double thetaH = 1;
- for (int h = 1; h < M; ++h)
- thetaH *= theta + h;
- return factorial(M) / (theta * thetaH) * p;
-AlleleFrequencyProbabilityCache alleleFrequencyProbabilityCache;
-long double alleleFrequencyProbabilityln(const map<int, int>& alleleFrequencyCounts, long double theta) {
- return alleleFrequencyProbabilityCache.alleleFrequencyProbabilityln(alleleFrequencyCounts, theta);
-// Implements Ewens' Sampling Formula, which provides probability of a given
-// partition of alleles in a sample from a population
-long double __alleleFrequencyProbabilityln(const map<int, int>& alleleFrequencyCounts, long double theta) {
- int M = 0; // multiplicity of site
- long double p = 0;
- long double thetaln = log(theta);
- for (map<int, int>::const_iterator f = alleleFrequencyCounts.begin(); f != alleleFrequencyCounts.end(); ++f) {
- int frequency = f->first;
- int count = f->second;
- M += frequency * count;
- p += powln(thetaln, count) - (powln(log(frequency), count) + factorialln(count));
- }
- long double thetaH = 0;
- for (int h = 1; h < M; ++h)
- thetaH += log(theta + h);
- return factorialln(M) - (thetaln + thetaH) + p;
diff --git a/src/Ewens.h b/src/Ewens.h
deleted file mode 100644
index 4c1d9f6..0000000
--- a/src/Ewens.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#include <map>
-#include <cmath>
-#include "Utility.h"
-using namespace std;
-// genotype priors
-long double alleleFrequencyProbability(const map<int, int>& alleleFrequencyCounts, long double theta);
-long double alleleFrequencyProbabilityln(const map<int, int>& alleleFrequencyCounts, long double theta);
-long double __alleleFrequencyProbabilityln(const map<int, int>& alleleFrequencyCounts, long double theta);
-class AlleleFrequencyProbabilityCache : public map<map<int, int>, long double> {
- long double alleleFrequencyProbabilityln(const map<int, int>& counts, long double theta) {
- map<map<int, int>, long double>::iterator p = find(counts);
- if (p == end()) {
- long double pln = __alleleFrequencyProbabilityln(counts, theta);
- insert(make_pair(counts, pln));
- return pln;
- } else {
- return p->second;
- }
- }
diff --git a/src/Fasta.cpp b/src/Fasta.cpp
deleted file mode 100644
index 6488de1..0000000
--- a/src/Fasta.cpp
+++ /dev/null
@@ -1,308 +0,0 @@
-// ***************************************************************************
-// FastaIndex.cpp (c) 2010 Erik Garrison <erik.garrison at bc.edu>
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 9 February 2010 (EG)
-// ---------------------------------------------------------------------------
-#include "Fasta.h"
-FastaIndexEntry::FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len)
- : name(name)
- , length(length)
- , offset(offset)
- , line_blen(line_blen)
- , line_len(line_len)
-FastaIndexEntry::FastaIndexEntry(void) // empty constructor
-{ clear(); }
-void FastaIndexEntry::clear(void)
- name = "";
- length = 0;
- offset = -1; // no real offset will ever be below 0, so this allows us to
- // check if we have already recorded a real offset
- line_blen = 0;
- line_len = 0;
-ostream& operator<<(ostream& output, const FastaIndexEntry& e) {
- // just write the first component of the name, for compliance with other tools
- output << split(e.name, ' ').at(0) << "\t" << e.length << "\t" << e.offset << "\t" <<
- e.line_blen << "\t" << e.line_len;
- return output; // for multiple << operators.
-void FastaIndex::readIndexFile(string fname) {
- string line;
- long long linenum = 0;
- indexFile.open(fname.c_str(), ifstream::in);
- if (indexFile.is_open()) {
- while (getline (indexFile, line)) {
- ++linenum;
- // the fai format defined in samtools is tab-delimited, every line being:
- // fai->name[i], (int)x.len, (long long)x.offset, (int)x.line_blen, (int)x.line_len
- vector<string> fields = split(line, '\t');
- if (fields.size() == 5) { // if we don't get enough fields then there is a problem with the file
- // note that fields[0] is the sequence name
- char* end;
- string name = split(fields[0], " \t").at(0); // key by first token of name
- sequenceNames.push_back(name);
- this->insert(make_pair(name, FastaIndexEntry(fields[0], atoi(fields[1].c_str()),
- strtoll(fields[2].c_str(), &end, 10),
- atoi(fields[3].c_str()),
- atoi(fields[4].c_str()))));
- } else {
- cerr << "Warning: malformed fasta index file " << fname <<
- "does not have enough fields @ line " << linenum << endl;
- cerr << line << endl;
- exit(1);
- }
- }
- } else {
- cerr << "could not open index file " << fname << endl;
- exit(1);
- }
-// for consistency this should be a class method
-bool fastaIndexEntryCompare ( FastaIndexEntry a, FastaIndexEntry b) { return (a.offset<b.offset); }
-ostream& operator<<(ostream& output, FastaIndex& fastaIndex) {
- vector<FastaIndexEntry> sortedIndex;
- for(vector<string>::const_iterator it = fastaIndex.sequenceNames.begin(); it != fastaIndex.sequenceNames.end(); ++it)
- {
- sortedIndex.push_back(fastaIndex[*it]);
- }
- sort(sortedIndex.begin(), sortedIndex.end(), fastaIndexEntryCompare);
- for( vector<FastaIndexEntry>::iterator fit = sortedIndex.begin(); fit != sortedIndex.end(); ++fit) {
- output << *fit << endl;
- }
-void FastaIndex::indexReference(string refname) {
- // overview:
- // for line in the reference fasta file
- // track byte offset from the start of the file
- // if line is a fasta header, take the name and dump the last sequnece to the index
- // if line is a sequence, add it to the current sequence
- //cerr << "indexing fasta reference " << refname << endl;
- string line;
- FastaIndexEntry entry; // an entry buffer used in processing
- entry.clear();
- int line_length = 0;
- long long offset = 0; // byte offset from start of file
- long long line_number = 0; // current line number
- bool mismatchedLineLengths = false; // flag to indicate if our line length changes mid-file
- // this will be used to raise an error
- // if we have a line length change at
- // any line other than the last line in
- // the sequence
- bool emptyLine = false; // flag to catch empty lines, which we allow for
- // index generation only on the last line of the sequence
- ifstream refFile;
- refFile.open(refname.c_str());
- if (refFile.is_open()) {
- while (getline(refFile, line)) {
- ++line_number;
- line_length = line.length();
- if (line[0] == ';') {
- // fasta comment, skip
- } else if (line[0] == '+') {
- // fastq quality header
- getline(refFile, line);
- line_length = line.length();
- offset += line_length + 1;
- // get and don't handle the quality line
- getline(refFile, line);
- line_length = line.length();
- } else if (line[0] == '>' || line[0] == '@') { // fasta /fastq header
- // if we aren't on the first entry, push the last sequence into the index
- if (entry.name != "") {
- mismatchedLineLengths = false; // reset line length error tracker for every new sequence
- emptyLine = false;
- flushEntryToIndex(entry);
- entry.clear();
- }
- entry.name = line.substr(1, line_length - 1);
- } else { // we assume we have found a sequence line
- if (entry.offset == -1) // NB initially the offset is -1
- entry.offset = offset;
- entry.length += line_length;
- if (entry.line_len) {
- //entry.line_len = entry.line_len ? entry.line_len : line_length + 1;
- if (mismatchedLineLengths || emptyLine) {
- if (line_length == 0) {
- emptyLine = true; // flag empty lines, raise error only if this is embedded in the sequence
- } else {
- if (emptyLine) {
- cerr << "ERROR: embedded newline";
- } else {
- cerr << "ERROR: mismatched line lengths";
- }
- cerr << " at line " << line_number << " within sequence " << entry.name <<
- endl << "File not suitable for fasta index generation." << endl;
- exit(1);
- }
- }
- // this flag is set here and checked on the next line
- // because we may have reached the end of the sequence, in
- // which case a mismatched line length is OK
- if (entry.line_len != line_length + 1) {
- mismatchedLineLengths = true;
- if (line_length == 0) {
- emptyLine = true; // flag empty lines, raise error only if this is embedded in the sequence
- }
- }
- } else {
- entry.line_len = line_length + 1; // first line
- }
- entry.line_blen = entry.line_len - 1;
- }
- offset += line_length + 1;
- }
- // we've hit the end of the fasta file!
- // flush the last entry
- flushEntryToIndex(entry);
- } else {
- cerr << "could not open reference file " << refname << " for indexing!" << endl;
- exit(1);
- }
-void FastaIndex::flushEntryToIndex(FastaIndexEntry& entry) {
- string name = split(entry.name, " \t").at(0); // key by first token of name
- sequenceNames.push_back(name);
- this->insert(make_pair(name, FastaIndexEntry(entry.name, entry.length,
- entry.offset, entry.line_blen,
- entry.line_len)));
-void FastaIndex::writeIndexFile(string fname) {
- //cerr << "writing fasta index file " << fname << endl;
- ofstream file;
- file.open(fname.c_str());
- if (file.is_open()) {
- file << *this;
- } else {
- cerr << "could not open index file " << fname << " for writing!" << endl;
- exit(1);
- }
-FastaIndex::~FastaIndex(void) {
- indexFile.close();
-FastaIndexEntry FastaIndex::entry(string name) {
- FastaIndex::iterator e = this->find(name);
- if (e == this->end()) {
- cerr << "unable to find FASTA index entry for '" << name << "'" << endl;
- exit(1);
- } else {
- return e->second;
- }
-string FastaIndex::indexFileExtension() { return ".fai"; }
-FastaReference::FastaReference(string reffilename) {
-void FastaReference::open(string reffilename) {
- filename = reffilename;
- if (!(file = fopen(filename.c_str(), "r"))) {
- cerr << "could not open " << filename << endl;
- exit(1);
- }
- index = new FastaIndex();
- struct stat stFileInfo;
- string indexFileName = filename + index->indexFileExtension();
- // if we can find an index file, use it
- if(stat(indexFileName.c_str(), &stFileInfo) == 0) {
- index->readIndexFile(indexFileName);
- } else { // otherwise, read the reference and generate the index file in the cwd
- cerr << "index file " << indexFileName << " not found, generating..." << endl;
- index->indexReference(filename);
- index->writeIndexFile(indexFileName);
- }
-FastaReference::~FastaReference(void) {
- fclose(file);
- delete index;
-string FastaReference::getSequence(string seqname) {
- FastaIndexEntry entry = index->entry(seqname);
- int newlines_in_sequence = entry.length / entry.line_blen;
- int seqlen = newlines_in_sequence + entry.length;
- char* seq = (char*) calloc (seqlen + 1, sizeof(char));
- fseek64(file, entry.offset, SEEK_SET);
- fread(seq, sizeof(char), seqlen, file);
- seq[seqlen] = '\0';
- char* pbegin = seq;
- char* pend = seq + (seqlen/sizeof(char));
- pend = remove(pbegin, pend, '\n');
- pend = remove(pbegin, pend, '\0');
- string s = seq;
- free(seq);
- s.resize((pend - pbegin)/sizeof(char));
- return s;
-// TODO cleanup; odd function. use a map
-string FastaReference::sequenceNameStartingWith(string seqnameStart) {
- try {
- return (*index)[seqnameStart].name;
- } catch (exception& e) {
- cerr << e.what() << ": unable to find index entry for " << seqnameStart << endl;
- exit(1);
- }
-string FastaReference::getSubSequence(string seqname, int start, int length) {
- FastaIndexEntry entry = index->entry(seqname);
- length = min(length, entry.length - start);
- if (start < 0 || length < 1) {
- return "";
- }
- // we have to handle newlines
- // approach: count newlines before start
- // count newlines by end of read
- // subtracting newlines before start find count of embedded newlines
- int newlines_before = start > 0 ? (start - 1) / entry.line_blen : 0;
- int newlines_by_end = (start + length - 1) / entry.line_blen;
- int newlines_inside = newlines_by_end - newlines_before;
- int seqlen = length + newlines_inside;
- char* seq = (char*) calloc (seqlen + 1, sizeof(char));
- fseek64(file, (off_t) (entry.offset + newlines_before + start), SEEK_SET);
- fread(seq, sizeof(char), (off_t) seqlen, file);
- seq[seqlen] = '\0';
- char* pbegin = seq;
- char* pend = seq + (seqlen/sizeof(char));
- pend = remove(pbegin, pend, '\n');
- pend = remove(pbegin, pend, '\0');
- string s = seq;
- free(seq);
- s.resize((pend - pbegin)/sizeof(char));
- return s;
-long unsigned int FastaReference::sequenceLength(string seqname) {
- FastaIndexEntry entry = index->entry(seqname);
- return entry.length;
diff --git a/src/Fasta.h b/src/Fasta.h
deleted file mode 100644
index e99cb7e..0000000
--- a/src/Fasta.h
+++ /dev/null
@@ -1,73 +0,0 @@
-// ***************************************************************************
-// FastaIndex.h (c) 2010 Erik Garrison <erik.garrison at bc.edu>
-// Marth Lab, Department of Biology, Boston College
-// All rights reserved.
-// ---------------------------------------------------------------------------
-// Last modified: 5 February 2010 (EG)
-// ---------------------------------------------------------------------------
-#ifndef _FASTA_H
-#define _FASTA_H
-#include <map>
-#include <iostream>
-#include <fstream>
-#include <vector>
-#include <stdint.h>
-#include <stdio.h>
-#include <algorithm>
-#include "LargeFileSupport.h"
-#include <sys/stat.h>
-#include "split.h"
-#include <stdlib.h>
-#include <ctype.h>
-#include <unistd.h>
-using namespace std;
-class FastaIndexEntry {
- friend ostream& operator<<(ostream& output, const FastaIndexEntry& e);
- public:
- FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len);
- FastaIndexEntry(void);
- ~FastaIndexEntry(void);
- string name; // sequence name
- int length; // length of sequence
- long long offset; // bytes offset of sequence from start of file
- int line_blen; // line length in bytes, sequence characters
- int line_len; // line length including newline
- void clear(void);
-class FastaIndex : public map<string, FastaIndexEntry> {
- friend ostream& operator<<(ostream& output, FastaIndex& i);
- public:
- FastaIndex(void);
- ~FastaIndex(void);
- vector<string> sequenceNames;
- void indexReference(string refName);
- void readIndexFile(string fname);
- void writeIndexFile(string fname);
- ifstream indexFile;
- FastaIndexEntry entry(string key);
- void flushEntryToIndex(FastaIndexEntry& entry);
- string indexFileExtension(void);
-class FastaReference {
- public:
- void open(string reffilename);
- string filename;
- ~FastaReference(void);
- FILE* file;
- FastaIndex* index;
- vector<FastaIndexEntry> findSequencesStartingWith(string seqnameStart);
- string getSequence(string seqname);
- // potentially useful for performance, investigate
- // void getSequence(string seqname, string& sequence);
- string getSubSequence(string seqname, int start, int length);
- string sequenceNameStartingWith(string seqnameStart);
- long unsigned int sequenceLength(string seqname);
diff --git a/src/Genotype.cpp b/src/Genotype.cpp
deleted file mode 100644
index 779a03d..0000000
--- a/src/Genotype.cpp
+++ /dev/null
@@ -1,1894 +0,0 @@
-#include "Genotype.h"
-#include "multichoose.h"
-#include "multipermute.h"
-vector<Allele*> Genotype::uniqueAlleles(void) {
- vector<Allele*> uniques;
- for (Genotype::iterator g = this->begin(); g != this->end(); ++g) {
- uniques.push_back(&g->allele);
- }
- return uniques;
-int Genotype::getPloidy(void) {
- int result = 0;
- for (Genotype::const_iterator i = this->begin(); i != this->end(); ++i) {
- result += i->count;
- }
- return result;
-vector<int> Genotype::counts(void) {
- vector<int> counts;
- for (Genotype::iterator i = this->begin(); i != this->end(); ++i) {
- counts.push_back(i->count);
- }
- return counts;
-vector<Allele> Genotype::alternateAlleles(string& base) {
- vector<Allele> alleles;
- for (Genotype::iterator i = this->begin(); i != this->end(); ++i) {
- Allele& b = i->allele;
- if (base != b.currentBase)
- alleles.push_back(b);
- }
- return alleles;
-vector<string> Genotype::alternateBases(string& base) {
- vector<string> alleles;
- for (Genotype::iterator i = this->begin(); i != this->end(); ++i) {
- Allele& b = i->allele;
- if (base != b.currentBase)
- alleles.push_back(b.currentBase);
- }
- return alleles;
-int Genotype::alleleCount(const string& base) {
- map<string, int>::iterator ge = alleleCounts.find(base);
- if (ge == alleleCounts.end()) {
- return 0;
- } else {
- return ge->second;
- }
-int Genotype::alleleCount(Allele& allele) {
- map<string, int>::iterator ge = alleleCounts.find(allele.currentBase);
- if (ge == alleleCounts.end()) {
- return 0;
- } else {
- return ge->second;
- }
-// returns true when the genotype is composed of a subset of the alleles
-bool Genotype::matchesAlleles(vector<Allele>& alleles) {
- int p = 0;
- for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- p += alleleCount(*a);
- }
- return ploidy == p;
-double Genotype::alleleSamplingProb(const string& base) {
- map<string, int>::iterator ge = alleleCounts.find(base);
- if (ge == alleleCounts.end()) {
- return 0;
- } else {
- return (double) ge->second / (double) ploidy;
- }
-double Genotype::alleleSamplingProb(Allele& allele) {
- map<string, int>::iterator ge = alleleCounts.find(allele.currentBase);
- if (ge == alleleCounts.end()) {
- return 0;
- } else {
- return (double) ge->second / (double) ploidy;
- }
-string Genotype::relativeGenotype(string& refbase, vector<Allele>& alts) {
- vector<string> rg;
- for (Genotype::iterator i = this->begin(); i != this->end(); ++i) {
- Allele& b = i->allele;
- string& base = b.currentBase;
- if (base == refbase) {
- for (int j = 0; j < i->count; ++j)
- rg.push_back("0");
- } else {
- int n = 1;
- bool matchingalt = false;
- for (vector<Allele>::iterator a = alts.begin(); a != alts.end(); ++a, ++n) {
- if (base == a->currentBase) {
- matchingalt = true;
- for (int j = 0; j < i->count; ++j)
- rg.push_back(convert(n));
- break;
- }
- }
- if (!matchingalt) {
- for (int j = 0; j < i->count; ++j)
- rg.push_back(".");
- }
- }
- }
- sort(rg.begin(), rg.end()); // enforces the same ordering for all genotypes
- //reverse(rg.begin(), rg.end()); // 1/0 ordering, or 1/1/0 etc.
- string result = join(rg, "/");
- return result; // chop trailing '/'
-void Genotype::relativeGenotype(vector<int>& rg, string& refbase, vector<Allele>& alts) {
- for (Genotype::iterator i = this->begin(); i != this->end(); ++i) {
- Allele& b = i->allele;
- string& base = b.currentBase;
- if (base == refbase) {
- for (int j = 0; j < i->count; ++j)
- rg.push_back(0);
- } else {
- int n = 1;
- bool matchingalt = false;
- for (vector<Allele>::iterator a = alts.begin(); a != alts.end(); ++a, ++n) {
- if (base == a->currentBase) {
- matchingalt = true;
- for (int j = 0; j < i->count; ++j)
- rg.push_back(n);
- break;
- }
- }
- if (!matchingalt) {
- for (int j = 0; j < i->count; ++j)
- rg.push_back(-1);
- }
- }
- }
- sort(rg.begin(), rg.end()); // enforces the same ordering for all genotypes
- //reverse(rg.begin(), rg.end()); // 1/0 ordering, or 1/1/0 etc.
-void Genotype::relativeGenotype(vector<int>& rg, vector<Allele>& alleles) {
- for (Genotype::iterator i = this->begin(); i != this->end(); ++i) {
- Allele& b = i->allele;
- string& base = b.currentBase;
- int n = 0;
- bool matchingalt = false;
- for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a, ++n) {
- if (base == a->base()) {
- matchingalt = true;
- for (int j = 0; j < i->count; ++j)
- rg.push_back(n);
- break;
- }
- }
- if (!matchingalt) {
- for (int j = 0; j < i->count; ++j)
- rg.push_back(-1);
- }
- }
- sort(rg.begin(), rg.end()); // enforces the same ordering for all genotypes
- //reverse(rg.begin(), rg.end()); // 1/0 ordering, or 1/1/0 etc.
-string Genotype::relativeGenotype(string& refbase, string& altbase) {
- vector<string> rg;
- for (Genotype::iterator i = this->begin(); i != this->end(); ++i) {
- Allele& b = i->allele;
- if (b.currentBase == altbase && refbase != b.currentBase) {
- for (int j = 0; j < i->count; ++j)
- rg.push_back("1/");
- } else if (b.currentBase != altbase && refbase != b.currentBase) {
- for (int j = 0; j < i->count; ++j)
- rg.push_back("./");
- } else {
- for (int j = 0; j < i->count; ++j)
- rg.push_back("0/");
- }
- }
- sort(rg.begin(), rg.end()); // enforces the same ordering for all genotypes
- //reverse(rg.begin(), rg.end()); // 1/0 ordering, or 1/1/0 etc.
- string result = accumulate(rg.begin(), rg.end(), string(""));
- return result.substr(0, result.size() - 1); // chop trailing '/'
-bool Genotype::containsAllele(const string& base) {
- map<string, int>::iterator ge = alleleCounts.find(base);
- if (ge == alleleCounts.end()) {
- return false;
- } else {
- return true;
- }
-bool Genotype::containsAllele(Allele& allele) {
- map<string, int>::iterator ge = alleleCounts.find(allele.currentBase);
- if (ge == alleleCounts.end()) {
- return false;
- } else {
- return true;
- }
-bool Genotype::isHomozygous(void) {
- return size() == 1;
-// if heterozgyous
-bool Genotype::isHeterozygous(void) {
- return size() > 1;
-// if homozygous alternate
-bool Genotype::isHomozygousAlternate(void) {
- return isHomozygous() && !front().allele.isReference();
-// if homozygous reference
-bool Genotype::isHomozygousReference(void) {
- return isHomozygous() && front().allele.isReference();
-// the probability of drawing each allele out of the genotype, ordered by allele
-vector<long double> Genotype::alleleProbabilities(void) {
- vector<long double> probs;
- for (vector<GenotypeElement>::const_iterator a = this->begin(); a != this->end(); ++a) {
- probs.push_back((long double) a->count / (long double) ploidy);
- }
- return probs;
-// the probability of drawing each allele out of the genotype, ordered by allele, adjusted for reference bias
-vector<long double> Genotype::alleleProbabilities(Bias& observationBias) {
- vector<long double> probs;
- for (vector<GenotypeElement>::const_iterator a = this->begin(); a != this->end(); ++a) {
- long double bias = 1;
- if (!a->allele.isReference()) {
- int alleleLengthDifference = a->allele.alternateSequence.size() - a->allele.referenceLength;
- bias = observationBias.bias(alleleLengthDifference);
- }
- probs.push_back(((long double) a->count / (long double) ploidy) * bias);
- }
- normalizeSumToOne(probs);
- return probs;
-string Genotype::str(void) const {
- string s;
- for (Genotype::const_iterator ge = this->begin(); ge != this->end(); ++ge) {
- for (int i = 0; i < ge->count; ++i)
- s += ((ge == this->begin() && i == 0) ? "" : "/") + ge->allele.currentBase;
- }
- return s;
-string IUPAC(Genotype& genotype) {
- const string g = genotype.str();
- if (g == "AA") return "A";
- if (g == "AC") return "M";
- if (g == "AG") return "R";
- if (g == "AT") return "W";
- if (g == "CA") return "M";
- if (g == "CC") return "C";
- if (g == "CG") return "S";
- if (g == "CT") return "Y";
- if (g == "GA") return "R";
- if (g == "GC") return "S";
- if (g == "GG") return "G";
- if (g == "GT") return "K";
- if (g == "TA") return "W";
- if (g == "TC") return "Y";
- if (g == "TG") return "K";
- if (g == "TT") return "T";
- return g;
-string IUPAC2GenotypeStr(string iupac, int ploidy) {
- if (iupac == "A") return "AA";
- if (iupac == "M") return "AC";
- if (iupac == "R") return "AG";
- if (iupac == "W") return "AT";
- if (iupac == "C") return "CC";
- if (iupac == "S") return "CG";
- if (iupac == "Y") return "CT";
- if (iupac == "G") return "GG";
- if (iupac == "K") return "GT";
- if (iupac == "T") return "TT";
- return iupac;
-ostream& operator<<(ostream& out, const GenotypeElement& rhs) {
- for (int i = 0; i < rhs.count; ++i)
- out << rhs.allele.base() << "/";
- //for (int i = 0; i < rhs.second; ++i)
- // out << rhs.first.currentBase;
- return out;
-ostream& operator<<(ostream& out, const Genotype& g) {
- out << g.str();
- return out;
-ostream& operator<<(ostream& out, list<GenotypeCombo>& g) {
- for (list<GenotypeCombo>::iterator i = g.begin(); i != g.end(); ++i) {
- out << *i << endl;
- }
- return out;
-ostream& operator<<(ostream& out, GenotypeCombo& g) {
- GenotypeCombo::iterator i = g.begin(); ++i;
- out << "combo posterior prob: " << g.posteriorProb << endl;
- out << "{\"" << g.front()->name << "\":[\"" << *(g.front()->genotype) << "\"," << exp(g.front()->prob) << "]";
- for (;i != g.end(); ++i) {
- out << ", \"" << (*i)->name << "\":[\"" << *((*i)->genotype) << "\"," << exp((*i)->prob) << "]";
- }
- out << "}";
- return out;
-bool operator<(Genotype& a, Genotype& b) {
- // genotypes of different ploidy are evaluated according to their relative ploidy
- if (a.ploidy != b.ploidy)
- return a.ploidy < b.ploidy;
- // because our constructor sorts each Genotype.alleles, we assume that we
- // have two equivalently sorted vectors to work with
- Genotype::iterator ai = a.begin();
- Genotype::iterator bi = b.begin();
- // step through each genotype, and if we find a difference between either
- // their allele or count return a<b
- for (; ai != a.end() && bi != b.end(); ++ai, ++bi) {
- if (ai->allele != bi->allele)
- return ai->allele < bi->allele;
- else if (ai->count != bi->count)
- return ai->count < bi->count;
- }
- return false; // if the two are equal, then we return false per C++ convention
-vector<Genotype> allPossibleGenotypes(int ploidy, vector<Allele>& potentialAlleles) {
- vector<Genotype> genotypes;
- vector<vector<Allele> > alleleCombinations = multichoose(ploidy, potentialAlleles);
- for (vector<vector<Allele> >::iterator combo = alleleCombinations.begin(); combo != alleleCombinations.end(); ++combo) {
- genotypes.push_back(Genotype(*combo));
- }
- return genotypes;
-int GenotypeCombo::numberOfAlleles(void) {
- int count = 0;
- for (map<string, AlleleCounter>::iterator f = alleleCounters.begin(); f != alleleCounters.end(); ++f) {
- const AlleleCounter& allele = f->second;
- count += allele.frequency;
- }
- return count;
-// initializes cached counts associated with each GenotypeCombo
-void GenotypeCombo::init(bool useObsExpectations) {
- for (GenotypeCombo::iterator s = begin(); s != end(); ++s) {
- const SampleDataLikelihood& sdl = **s;
- const Sample& sample = *sdl.sample;
- ++genotypeCounts[sdl.genotype];
- permutationsln += sdl.genotype->permutationsln;
- for (Genotype::iterator a = sdl.genotype->begin(); a != sdl.genotype->end(); ++a) {
- const string& alleleBase = a->allele.currentBase;
- // allele frequencies in selected genotypes in combo
- AlleleCounter& alleleCounter = alleleCounters[alleleBase];
- alleleCounter.frequency += a->count;
- if (useObsExpectations) {
- // observational frequencies for binomial priors
- Sample::const_iterator as = sample.find(alleleBase);
- if (as != sample.end()) {
- vector<Allele*> alleles = as->second;
- alleleCounter.observations += alleles.size();
- for (vector<Allele*>::iterator o = alleles.begin(); o != alleles.end(); ++o) {
- const Allele& allele = **o;
- if (allele.basesLeft >= allele.basesRight) {
- ++alleleCounter.placedLeft;
- if (allele.strand == STRAND_FORWARD) {
- ++alleleCounter.placedStart;
- } else {
- ++alleleCounter.placedEnd;
- }
- } else {
- ++alleleCounter.placedRight;
- if (allele.strand == STRAND_FORWARD) {
- ++alleleCounter.placedEnd;
- } else {
- ++alleleCounter.placedStart;
- }
- }
- if (allele.strand == STRAND_FORWARD) {
- ++alleleCounter.forwardStrand;
- } else {
- ++alleleCounter.reverseStrand;
- }
- }
- }
- }
- }
- }
-void GenotypeCombo::addPriorAlleleCounts(map<string, int>& priorACs) {
- for (map<string, int>::iterator p = priorACs.begin(); p != priorACs.end(); ++p) {
- const string& alleleBase = p->first;
- int count = p->second;
- AlleleCounter& alleleCounter = alleleCounters[alleleBase];
- //cerr <<"init "<< alleleCounter.frequency;
- alleleCounter.frequency += count;
- }
-// frequency... should this just be "allele count"?
-int GenotypeCombo::alleleCount(Allele& allele) {
- map<string, AlleleCounter>::iterator f = alleleCounters.find(allele.currentBase);
- if (f == alleleCounters.end()) {
- return 0;
- } else {
- return f->second.frequency;
- }
-int GenotypeCombo::alleleCount(const string& allele) {
- map<string, AlleleCounter>::iterator f = alleleCounters.find(allele);
- if (f == alleleCounters.end()) {
- return 0;
- } else {
- return f->second.frequency;
- }
-long double GenotypeCombo::alleleFrequency(Allele& allele) {
- return alleleCount(allele) / (long double) numberOfAlleles();
-long double GenotypeCombo::alleleFrequency(const string& allele) {
- return alleleCount(allele) / (long double) numberOfAlleles();
-long double GenotypeCombo::genotypeFrequency(Genotype* genotype) {
- map<Genotype*, int>::iterator g = genotypeCounts.find(genotype);
- if (g == genotypeCounts.end()) {
- return 0;
- } else {
- return g->second / size();
- }
-void GenotypeCombo::updateCachedCounts(
- Sample* sample,
- Genotype* oldGenotype,
- Genotype* newGenotype,
- bool useObsExpectations) {
- // update genotype counts
- --genotypeCounts[oldGenotype];
- ++genotypeCounts[newGenotype];
- // update permutations
- permutationsln -= oldGenotype->permutationsln;
- permutationsln += newGenotype->permutationsln;
- // remove allele frequencies which are now 0 or below
- map<Genotype*, int>::iterator gc = genotypeCounts.begin();
- while (gc != genotypeCounts.end()) {
- assert(gc->second >= 0);
- if (gc->second == 0) {
- genotypeCounts.erase(gc++);
- } else {
- ++gc;
- }
- }
- // TODO can we improve efficiency by only adjusting for bases which are actually changed
- // remove allele frequency information for old genotype
- for (Genotype::iterator g = oldGenotype->begin(); g != oldGenotype->end(); ++g) {
- GenotypeElement& ge = *g;
- const string& base = ge.allele.currentBase;
- AlleleCounter& alleleCounter = alleleCounters[base];
- alleleCounter.frequency -= ge.count;
- if (useObsExpectations) {
- Sample::iterator s = sample->find(base);
- if (s != sample->end()) {
- const vector<Allele*>& alleles = s->second;
- alleleCounter.observations -= alleles.size();
- int forward_strand = 0;
- int reverse_strand = 0;
- int placed_left = 0;
- int placed_right = 0;
- int placed_start = 0;
- int placed_end = 0;
- for (vector<Allele*>::const_iterator a = alleles.begin(); a != alleles.end(); ++a) {
- const Allele& allele = **a;
- if (allele.strand == STRAND_FORWARD) {
- ++forward_strand;
- } else {
- ++reverse_strand;
- }
- if (allele.basesLeft >= allele.basesRight) {
- ++placed_left;
- if (allele.strand == STRAND_FORWARD) {
- ++placed_start;
- } else {
- ++placed_end;
- }
- } else {
- ++placed_right;
- if (allele.strand == STRAND_FORWARD) {
- ++placed_end;
- } else {
- ++placed_start;
- }
- }
- }
- alleleCounter.forwardStrand -= forward_strand;
- alleleCounter.reverseStrand -= reverse_strand;
- alleleCounter.placedLeft -= placed_left;
- alleleCounter.placedRight -= placed_right;
- alleleCounter.placedStart -= placed_start;
- alleleCounter.placedEnd -= placed_end;
- }
- }
- }
- // add allele frequency information for new genotype
- for (Genotype::iterator g = newGenotype->begin(); g != newGenotype->end(); ++g) {
- GenotypeElement& ge = *g;
- const string& base = ge.allele.currentBase;
- AlleleCounter& alleleCounter = alleleCounters[base];
- alleleCounter.frequency += ge.count;
- if (useObsExpectations) {
- Sample::iterator s = sample->find(base);
- if (s != sample->end()) {
- const vector<Allele*>& alleles = s->second;
- alleleCounter.observations += alleles.size();
- int forward_strand = 0;
- int reverse_strand = 0;
- int placed_left = 0;
- int placed_right = 0;
- int placed_start = 0;
- int placed_end = 0;
- for (vector<Allele*>::const_iterator a = alleles.begin(); a != alleles.end(); ++a) {
- const Allele& allele = **a;
- if (allele.strand == STRAND_FORWARD) {
- ++forward_strand;
- } else {
- ++reverse_strand;
- }
- if (allele.basesLeft >= allele.basesRight) {
- ++placed_left;
- if (allele.strand == STRAND_FORWARD) {
- ++placed_start;
- } else {
- ++placed_end;
- }
- } else {
- ++placed_right;
- if (allele.strand == STRAND_FORWARD) {
- ++placed_end;
- } else {
- ++placed_start;
- }
- }
- }
- alleleCounter.forwardStrand += forward_strand;
- alleleCounter.reverseStrand += reverse_strand;
- alleleCounter.placedLeft += placed_left;
- alleleCounter.placedRight += placed_right;
- alleleCounter.placedStart += placed_start;
- alleleCounter.placedEnd += placed_end;
- }
- }
- }
- // remove allele frequencies which are now 0 or below
- map<string, AlleleCounter>::iterator af = alleleCounters.begin();
- while (af != alleleCounters.end()) {
- assert(af->second.frequency >= 0);
- if (af->second.frequency == 0) {
- assert(af->second.observations == 0);
- alleleCounters.erase(af++);
- } else {
- ++af;
- }
- }
-map<int, int> GenotypeCombo::countFrequencies(void) {
- map<int, int> frequencyCounts;
- for (map<string, AlleleCounter>::iterator a = alleleCounters.begin(); a != alleleCounters.end(); ++a) {
- const AlleleCounter& allele = a->second;
- map<int, int>::iterator c = frequencyCounts.find(allele.frequency);
- if (c != frequencyCounts.end()) {
- c->second += 1;
- } else {
- frequencyCounts[allele.frequency] = 1;
- }
- }
- return frequencyCounts;
-vector<int> GenotypeCombo::counts(void) {
- //map<string, int> alleleCounters = countAlleles();
- vector<int> counts;
- for (map<string, AlleleCounter>::iterator a = alleleCounters.begin(); a != alleleCounters.end(); ++a) {
- const AlleleCounter& allele = a->second;
- counts.push_back(allele.frequency);
- }
- return counts;
-int GenotypeCombo::hetCount(void) {
- int hc = 0;
- for (GenotypeCombo::iterator s = begin(); s != end(); ++s) {
- if (!(*s)->genotype->homozygous) {
- ++hc;
- }
- }
- return hc;
-vector<int> GenotypeCombo::observationCounts(void) {
- vector<int> counts;
- for (map<string, AlleleCounter>::iterator a = alleleCounters.begin(); a != alleleCounters.end(); ++a) {
- const AlleleCounter& allele = a->second;
- counts.push_back(allele.observations);
- }
- return counts;
-int GenotypeCombo::observationTotal(void) {
- int total = 0;
- for (map<string, AlleleCounter>::iterator a = alleleCounters.begin(); a != alleleCounters.end(); ++a) {
- const AlleleCounter& allele = a->second;
- total += allele.observations;
- }
- return total;
-// how many copies of the locus are in the whole genotype combination?
-int GenotypeCombo::ploidy(void) {
- int copies = 0;
- for (map<string, AlleleCounter>::iterator a = alleleCounters.begin(); a != alleleCounters.end(); ++a) {
- const AlleleCounter& allele = a->second;
- copies += allele.frequency;
- }
- return copies;
-vector<long double> GenotypeCombo::alleleProbs(void) {
- vector<long double> probs;
- long double copies = ploidy();
- for (map<string, AlleleCounter>::iterator a = alleleCounters.begin(); a != alleleCounters.end(); ++a) {
- const AlleleCounter& allele = a->second;
- probs.push_back(allele.frequency / copies);
- }
- return probs;
-vector<string> GenotypeCombo::alleles(void) {
- vector<string> bases;
- for (map<string, AlleleCounter>::iterator a = alleleCounters.begin(); a != alleleCounters.end(); ++a) {
- bases.push_back(a->first);
- }
- return bases;
-// returns true if the combination is 100% homozygous
-bool GenotypeCombo::isHomozygous(void) {
- return alleleCounters.size() == 1;
-void sortSampleDataLikelihoods(vector<SampleDataLikelihood>& likelihoods) {
- SampleDataLikelihoodCompare datalikelihoodCompare;
- sort(likelihoods.begin(), likelihoods.end(), datalikelihoodCompare);
- int i = 0;
- for (vector<SampleDataLikelihood>::iterator sdl = likelihoods.begin(); sdl != likelihoods.end(); ++sdl) {
- sdl->rank = i++;
- }
-bool sortSampleDataLikelihoodsByMarginals(vector<SampleDataLikelihood>& likelihoods) {
- SampleMarginalCompare marginalLikelihoodCompare;
- sort(likelihoods.begin(), likelihoods.end(), marginalLikelihoodCompare);
- bool reordered = false;
- int i = 0;
- for (vector<SampleDataLikelihood>::iterator sdl = likelihoods.begin(); sdl != likelihoods.end(); ++sdl) {
- int newrank = i++;
- if (sdl->rank != newrank) {
- reordered = true;
- sdl->rank = newrank;
- }
- }
- return reordered;
-bool sortSampleDataLikelihoodsByMarginals(SampleDataLikelihoods& samplesLikelihoods) {
- bool reordered = false;
- for (SampleDataLikelihoods::iterator s = samplesLikelihoods.begin(); s != samplesLikelihoods.end(); ++s) {
- reordered |= sortSampleDataLikelihoodsByMarginals(*s);
- }
- return reordered;
-bool sortSampleDataLikelihoodsByMarginalsAndObs(vector<SampleDataLikelihood>& likelihoods) {
- SampleMarginalAndObsCompare marginalLikelihoodAndObsCompare;
- sort(likelihoods.begin(), likelihoods.end(), marginalLikelihoodAndObsCompare);
- bool reordered = false;
- int i = 0;
- for (vector<SampleDataLikelihood>::iterator sdl = likelihoods.begin(); sdl != likelihoods.end(); ++sdl) {
- int newrank = i++;
- if (sdl->rank != newrank) {
- reordered = true;
- sdl->rank = newrank;
- }
- }
- return reordered;
-bool sortSampleDataLikelihoodsByMarginalsAndObs(SampleDataLikelihoods& samplesLikelihoods) {
- bool reordered = false;
- for (SampleDataLikelihoods::iterator s = samplesLikelihoods.begin(); s != samplesLikelihoods.end(); ++s) {
- reordered |= sortSampleDataLikelihoodsByMarginalsAndObs(*s);
- }
- return reordered;
-bool sortSampleDataLikelihoodsScaledByMarginals(vector<SampleDataLikelihood>& likelihoods) {
- SampleLikelihoodCompare likelihoodCompare;
- sort(likelihoods.begin(), likelihoods.end(), likelihoodCompare);
- bool reordered = false;
- int i = 0;
- for (vector<SampleDataLikelihood>::iterator sdl = likelihoods.begin(); sdl != likelihoods.end(); ++sdl) {
- int newrank = i++;
- if (sdl->rank != newrank) {
- reordered = true;
- sdl->rank = newrank;
- }
- }
- return reordered;
-bool sortSampleDataLikelihoodsScaledByMarginals(SampleDataLikelihoods& samplesLikelihoods) {
- bool reordered = false;
- for (SampleDataLikelihoods::iterator s = samplesLikelihoods.begin(); s != samplesLikelihoods.end(); ++s) {
- reordered |= sortSampleDataLikelihoodsScaledByMarginals(*s);
- }
- return reordered;
-// assumes that the data likelihoods are sorted
- GenotypeCombo& combo,
- SampleDataLikelihoods& sampleDataLikelihoods,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar) {
- for (SampleDataLikelihoods::iterator s = sampleDataLikelihoods.begin();
- s != sampleDataLikelihoods.end(); ++s) {
- SampleDataLikelihood* sdl = &s->at(0);
- combo.push_back(sdl);
- combo.probObsGivenGenotypes += sdl->prob;
- }
- combo.init(binomialObsPriors);
- combo.calculatePosteriorProbability(theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar);
- GenotypeCombo& combo,
- vector<int>& initialPosition, // starting combo in terms of offsets from data likelihood maximum
- SampleDataLikelihoods& variantSampleDataLikelihoods,
- SampleDataLikelihoods& invariantSampleDataLikelihoods,
- map<string, int>& priorACs,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar) {
- // generate the best genotype combination according to data
- // likelihoods
- vector<int>::iterator offset = initialPosition.begin();
- for (SampleDataLikelihoods::iterator s = variantSampleDataLikelihoods.begin();
- s != variantSampleDataLikelihoods.end(); ++s) {
- // use the offsets to generate the starting combination
- SampleDataLikelihood* sdl = &s->at(*offset++);
- combo.push_back(sdl);
- combo.probObsGivenGenotypes += sdl->prob;
- }
- // these samples have well-differentiated data likelihoods, and
- // aren't changed during posterior integration
- for (SampleDataLikelihoods::iterator s = invariantSampleDataLikelihoods.begin();
- s != invariantSampleDataLikelihoods.end(); ++s) {
- SampleDataLikelihood* sdl = &s->at(*offset++);
- combo.push_back(sdl);
- combo.probObsGivenGenotypes += sdl->prob;
- }
- combo.init(binomialObsPriors);
- // add the prior ACs into the comob allele counters
- combo.addPriorAlleleCounts(priorACs);
- combo.calculatePosteriorProbability(theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar);
-// 'local' genotype combinations which step only in one sample away from the
-// data likelihood maxiumum. deal with all genotypes.
- list<GenotypeCombo>& combos,
- GenotypeCombo& comboKing,
- SampleDataLikelihoods& sampleDataLikelihoods,
- Samples& samples,
- map<string, int>& priorACs,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar,
- bool keepCombos) {
- // make the data likelihood maximum if needed
- if (comboKing.empty()) {
- vector<int> initialPosition;
- initialPosition.assign(sampleDataLikelihoods.size(), 0);
- SampleDataLikelihoods nullDataLikelihoods; // dummy variable
- makeComboByDatalLikelihoodRank(comboKing,
- initialPosition,
- sampleDataLikelihoods,
- nullDataLikelihoods,
- priorACs,
- theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar);
- }
- // ensure the comboKing is added
- if (combos.empty()) {
- combos.push_back(comboKing);
- }
- // for each sampledatalikelihood
- // add a combo for each genotype where the combo is one step from the comboKing
- size_t sampleOffset = 0;
- //GenotypeCombo::iterator sampleGenotypeItr = comboKing.begin();
- for (SampleDataLikelihoods::iterator s = sampleDataLikelihoods.begin();
- s != sampleDataLikelihoods.end(); ++s, ++sampleOffset) {
- SampleDataLikelihood& oldsdl = *comboKing.at(sampleOffset);
- vector<SampleDataLikelihood>& sdls = *s;
- for (vector<SampleDataLikelihood>::iterator dl = sdls.begin(); dl != sdls.end(); ++dl) {
- SampleDataLikelihood& newsdl = *dl;
- if (newsdl.genotype == oldsdl.genotype) { // don't duplicate the comboKing
- continue;
- }
- combos.push_back(comboKing);
- GenotypeCombo& combo = combos.back();
- // get the old and new genotypes, which we compare
- // to change the cached counts and probability of
- // the combo
- combo.updateCachedCounts(oldsdl.sample,
- oldsdl.genotype, newsdl.genotype,
- binomialObsPriors);
- // replace genotype with new genotype
- combo.at(sampleOffset) = &*dl;
- // find data likelihood difference from ComboKing
- long double diff = oldsdl.prob - newsdl.prob;
- // adjust combination total data likelihood
- combo.probObsGivenGenotypes -= diff;
- combo.calculatePosteriorProbability(theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar);
- // TODO
- // memory-saving intervention, improve this
- // difficult if we want to calculate marginals...
- if (!keepCombos) {
- // we should only have two combos in the list now...
- if (combos.front().posteriorProb < combos.back().posteriorProb) {
- combos.pop_front();
- } else {
- combos.pop_back();
- }
- }
- }
- }
- GenotypeComboResultSorter gcrSorter;
- combos.sort(gcrSorter);
- combos.unique();
- list<GenotypeCombo>& combos,
- GenotypeCombo& comboKing,
- SampleDataLikelihoods& variantSampleDataLikelihoods,
- SampleDataLikelihoods& invariantSampleDataLikelihoods,
- Samples& samples,
- map<string, int>& priorACs,
- int bandwidth, int banddepth,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar,
- bool keepCombos) {
- // get the number of samples that vary
- int nsamples = variantSampleDataLikelihoods.size();
- // cap bandwidth at the number of variant samples
- bandwidth = (bandwidth > nsamples) ? nsamples : bandwidth;
- // no variant samples
- if (nsamples == 0) {
- combos.push_back(comboKing);
- return true;
- }
- // overview:
- //
- // For each order of indexes in the bandwidth and banddepth, Obtain
- // all multiset permutations of a set of indexes. Then use these
- // indexes to get the nth-best genotype from each individual's set
- // of genotypes for which we have data likelihoods
- // (sampleDataLikelihoods), and turn this set into a genotype
- // combination. Update the combination probability inline here so
- // we don't incur O(N^2) penalty calculating the probability within
- // our genotypeCombinationPriors calculation loop, where we
- // estimate the posterior probability of each genotype combination
- // given its data likelihood and the prior probability of the
- // distribution of alleles it represents.
- //
- // example (bandwidth = 2, banddepth = 2)
- // indexes: 0 0 0 0 1, 0 0 0 1 1
- //
- // permutations: 0 0 0 0 1
- // 0 0 0 1 0
- // 0 0 1 0 0
- // 0 1 0 0 0
- // 1 0 0 0 0
- // 1 1 0 0 0
- // 0 1 1 0 0
- // 1 0 1 0 0
- // 0 1 0 1 0
- // 0 0 1 1 0
- // 1 0 0 1 0
- // 0 1 0 0 1
- // 0 0 1 0 1
- // 0 0 0 1 1
- // 1 0 0 0 1
- //
- // We then convert these permutation to genotype combinations by
- // using the index to pick the nth-best genotype according to
- // sorted individual genotype data likelihoods.
- //
- // In addition to this simple case, We can flexibly extend this to
- // larger search spaces by changing the depth and width of the
- // deviations from the data likelihood maximizer (aka 'king').
- //
- vector<int> depths;
- depths.reserve(banddepth);
- for (int i = 0; i < banddepth; ++i) {
- depths.push_back(i);
- }
- vector<vector<int> > deviations = multichoose(bandwidth, depths);
- // skip the first vector, which will always be the same as the
- // combo king, and has been pushed into our combinations already
- for (vector<vector<int> >::iterator d = deviations.begin(); d != deviations.end(); ++d) {
- vector<int>& indexes = *d;
- indexes.reserve(nsamples);
- for (int h = 0; h < (nsamples - bandwidth); ++h) {
- indexes.push_back(0);
- }
- vector<vector<int> > indexPermutations = multipermute(indexes);
- for (vector<vector<int> >::const_iterator p = indexPermutations.begin(); p != indexPermutations.end(); ++p) {
- combos.push_back(comboKing); // copy the king, and then we'll modify it according to the indicies
- GenotypeCombo& combo = combos.back();
- GenotypeCombo::iterator sampleGenotypeItr = combo.begin();
- vector<int>::const_iterator n = p->begin();
- for (SampleDataLikelihoods::iterator s = variantSampleDataLikelihoods.begin();
- s != variantSampleDataLikelihoods.end(); ++s, ++n, ++sampleGenotypeItr) {
- SampleDataLikelihood& oldsdl = **sampleGenotypeItr;
- SampleDataLikelihood*& oldsdl_ptr = *sampleGenotypeItr;
- vector<SampleDataLikelihood>& sdls = *s;
- int offset = *n + oldsdl.rank;
- if (offset > 0) {
- // shift-back if this combo is beyond the bounds of the individual's set of genotypes
- offset %= s->size();
- SampleDataLikelihood* newsdl = &sdls.at(offset);
- // get the old and new genotypes, which we compare
- // to change the cached counts and probability of
- // the combo
- combo.updateCachedCounts(oldsdl.sample,
- oldsdl.genotype, newsdl->genotype,
- binomialObsPriors);
- // replace genotype with new genotype
- oldsdl_ptr = newsdl;
- // find data likelihood difference from ComboKing
- long double diff = oldsdl.prob - newsdl->prob;
- // adjust combination total data likelihood
- combo.probObsGivenGenotypes -= diff;
- }
- }
- combo.calculatePosteriorProbability(theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar);
- if (!keepCombos && combos.size() > 1) {
- // we should only have two combos in the list now...
- if (combos.front().posteriorProb < combos.back().posteriorProb) {
- combos.pop_front();
- } else {
- combos.pop_back();
- }
- }
- }
- }
- GenotypeComboResultSorter gcrSorter;
- combos.sort(gcrSorter);
- combos.unique();
- return true;
- list<GenotypeCombo>& combos,
- GenotypeCombo& comboKing,
- SampleDataLikelihoods& sampleDataLikelihoods,
- SampleDataLikelihoods& variantSampleDataLikelihoods,
- SampleDataLikelihoods& invariantSampleDataLikelihoods,
- Samples& samples,
- vector<Allele>& genotypeAlleles,
- map<string, int>& priorACs,
- int bandwidth, int banddepth,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar,
- int maxiterations,
- int& totaliterations,
- bool addHomozygousCombos) {
- if (comboKing.empty()) {
- // seed EM with the data likelihood maximum
- vector<int> initialPosition;
- initialPosition.assign(sampleDataLikelihoods.size(), 0);
- makeComboByDatalLikelihoodRank(comboKing,
- initialPosition,
- variantSampleDataLikelihoods,
- invariantSampleDataLikelihoods,
- priorACs,
- theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar);
- }
- // set best position, which is updated during the EM step
- GenotypeCombo bestCombo = comboKing;
- int i = 0;
- for (; i < maxiterations; ++i) {
- combos.clear();
- if (bandwidth == 0 && banddepth == 0) {
- allLocalGenotypeCombinations(
- combos,
- bestCombo,
- sampleDataLikelihoods,
- samples,
- priorACs,
- theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar,
- false); // throw away combos, so as to reduce memory usage
- } else {
- bandedGenotypeCombinations(
- combos,
- bestCombo,
- variantSampleDataLikelihoods,
- invariantSampleDataLikelihoods,
- samples,
- priorACs,
- bandwidth,
- banddepth,
- theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar,
- false); // throw away combos, so as to reduce memory usage
- }
- //cerr << "combos size = " << combos.size() << endl;
- //cerr << "best combo: " << combos.front() << endl;
- // check for convergence
- //
- // either we've converged on the best homozygous combo, which suggests
- // weak support for variation, or we've got the same combo twice in a
- // row as our best
- if (combos.front().isHomozygous() || bestCombo == combos.front()) {
- // we've converged
- if (bandwidth == 0 && banddepth == 0) {
- // XXX temporary hack
- // get the rest of the combos in memory so we can do computation with them...
- allLocalGenotypeCombinations(
- combos,
- combos.front(),
- sampleDataLikelihoods,
- samples,
- priorACs,
- theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar,
- true); // keep combos
- } else {
- bandedGenotypeCombinations(
- combos,
- bestCombo,
- variantSampleDataLikelihoods,
- invariantSampleDataLikelihoods,
- samples,
- priorACs,
- bandwidth,
- banddepth,
- theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar,
- true); // keep combos
- }
- break;
- } else {
- bestCombo = combos.front();
- }
- }
- //cout << i << " iterations" << "\t" << variantSampleDataLikelihoods.size() << " varying samples"
- // << " and " << invariantSampleDataLikelihoods.size() << " invariant samples" << endl;
- totaliterations = i;
- // add the homozygous cases
- if (addHomozygousCombos) {
- addAllHomozygousCombos(combos,
- sampleDataLikelihoods,
- variantSampleDataLikelihoods,
- invariantSampleDataLikelihoods,
- samples,
- genotypeAlleles,
- theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar);
- }
-void addAllHomozygousCombos(
- list<GenotypeCombo>& combos,
- SampleDataLikelihoods& sampleDataLikelihoods,
- SampleDataLikelihoods& variantSampleDataLikelihoods,
- SampleDataLikelihoods& invariantSampleDataLikelihoods,
- Samples& samples,
- vector<Allele>& genotypeAlleles,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar) {
- // determine which homozygous combos we already have
- map<Allele, bool> allelesWithHomozygousCombos;
- for (list<GenotypeCombo>::iterator c = combos.begin(); c != combos.end(); ++c) {
- bool allSameAndHomozygous = true;
- GenotypeCombo::iterator gc = c->begin();
- Genotype* genotype;
- if ((*gc)->genotype->homozygous) {
- genotype = (*gc)->genotype;
- } else {
- continue;
- }
- for (; gc != c->end(); ++gc) {
- if (! ((*gc)->genotype == genotype) ) {
- allSameAndHomozygous = false;
- break;
- }
- }
- if (allSameAndHomozygous) {
- allelesWithHomozygousCombos[genotype->front().allele] == true;
- }
- }
- // accumulate the needed homozygous combos
- map<Allele, GenotypeCombo> homozygousCombos;
- for (vector<Allele>::iterator a = genotypeAlleles.begin(); a != genotypeAlleles.end(); ++a) {
- Allele& allele = *a;
- map<Allele, bool>::iterator g = allelesWithHomozygousCombos.find(allele);
- if (g == allelesWithHomozygousCombos.end()) {
- // we need to make a new combo
- // iterate through the sample genotype vector
- GenotypeCombo& combo = homozygousCombos[allele];
- // match the way we make combos in bandedCombos*()
- SampleDataLikelihoods::iterator s = variantSampleDataLikelihoods.begin();
- while (s != invariantSampleDataLikelihoods.end()) {
- // for each sample genotype, if the genotype is the same as our currently needed genotype, push it back onto a new combo
- for (vector<SampleDataLikelihood>::iterator d = s->begin(); d != s->end(); ++d) {
- SampleDataLikelihood& sdl = *d;
- // this check is ploidy-independent
- if (sdl.genotype->homozygous && sdl.genotype->front().allele == allele) {
- combo.push_back(&sdl);
- break;
- }
- }
- ++s;
- if (s == variantSampleDataLikelihoods.end()) {
- s = invariantSampleDataLikelihoods.begin();
- }
- }
- }
- }
- // accumulate homozygous combos and set their combo data probabilities
- for (map<Allele, GenotypeCombo>::iterator c = homozygousCombos.begin(); c != homozygousCombos.end(); ++c) {
- GenotypeCombo& gc = c->second;
- if (gc.empty()) {
- continue;
- }
- gc.probObsGivenGenotypes = 0;
- for (GenotypeCombo::iterator sdl = gc.begin(); sdl != gc.end(); ++sdl) {
- gc.probObsGivenGenotypes += (*sdl)->prob; // set up data likelihood for combo
- }
- gc.init(binomialObsPriors); // cache allele frequency information
- gc.calculatePosteriorProbability(theta,
- pooled,
- ewensPriors,
- permute,
- hwePriors,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar);
- combos.push_back(gc);
- }
- GenotypeComboResultSorter gcrSorter;
- combos.sort(gcrSorter);
- combos.unique();
- /*
- for (list<GenotypeCombo>::iterator g = combos.begin(); g != combos.end(); ++g) {
- GenotypeCombo& gc = *g;
- cerr << gc << endl
- << "," << gc.probObsGivenGenotypes
- << "," << gc.posteriorProb
- << "," << gc.priorProbG_Af
- << "," << gc.priorProbAf
- << "," << gc.priorProbObservations
- << endl;
- map<int, int> acs = gc.countFrequencies();
- for (map<int, int>::iterator a = acs.begin(); a != acs.end(); ++a) {
- cerr << a->first << " " << a->second << endl;
- }
- cerr << "***************************" << endl;
- }
- */
-// conditional probability of the genotype combination given the represented allele frequencies
-long double GenotypeCombo::probabilityGivenAlleleFrequencyln(bool permute) {
- //return -multinomialCoefficientLn(numberOfAlleles(), counts());
- int n = numberOfAlleles();
- long double lnhetscalar = 0;
- if (permute) {
- // scale by the product of permutations of heterozygotes
- lnhetscalar = permutationsln; // cached permutations of this combo
- }
- return lnhetscalar - multinomialCoefficientLn(n, counts());
-long double GenotypeCombo::hweComboProb(void) {
- long double comboHweProb = 0;
- for (map<Genotype*, int>::iterator gc = genotypeCounts.begin(); gc != genotypeCounts.end(); ++gc) {
- Genotype* genotype = gc->first;
- comboHweProb += hweProbGenotypeFrequencyln(genotype);
- }
- return comboHweProb;
-// probability of the combo under HWE
-long double GenotypeCombo::hweExpectedFrequencyln(Genotype* genotype) {
- int ploidy = genotype->ploidy;
- vector<int> genotypeAlleleCounts;
- vector<long double> alleleFrequencies;
- for (map<string, AlleleCounter>::iterator a = alleleCounters.begin(); a != alleleCounters.end(); ++a) {
- genotypeAlleleCounts.push_back(genotype->alleleCount(a->first));
- alleleFrequencies.push_back((long double) a->second.frequency / (long double) numberOfAlleles());
- }
- long double HWECoefficientln = multinomialCoefficientLn(ploidy, genotypeAlleleCounts);
- vector<int>::iterator c = genotypeAlleleCounts.begin();
- vector<long double>::iterator f = alleleFrequencies.begin();
- for (; c != genotypeAlleleCounts.end(); ++c, ++f) {
- HWECoefficientln += powln(log(*f), *c);
- }
- return HWECoefficientln;
-// probability that the genotype count in the combo is what it is given the
-// counts of the other alleles
-long double GenotypeCombo::hweProbGenotypeFrequencyln(Genotype* genotype) {
- //cout << endl << *genotype << endl;
- int popTotalAlleles = numberOfAlleles();
- //cout << "popTotalAlleles = " << popTotalAlleles << endl;
- vector<int> popAlleleCounts;
- vector<int> thisGenotypeAlleleCounts;
- for (map<string, AlleleCounter>::iterator a = alleleCounters.begin(); a != alleleCounters.end(); ++a) {
- //cout << a->first << "\t" << a->second.frequency << "\t" << genotype->alleleCount(a->first) << endl;
- popAlleleCounts.push_back(a->second.frequency);
- thisGenotypeAlleleCounts.push_back(genotype->alleleCount(a->first));
- }
- int popTotalGenotypes = 0;
- vector<int> popGenotypeCounts;
- // for haploid, estimate as if we have all ploidy 1
- if (genotype->ploidy == 1) {
- for (map<string, AlleleCounter>::iterator a = alleleCounters.begin(); a != alleleCounters.end(); ++a) {
- popGenotypeCounts.push_back(a->second.frequency);
- popTotalGenotypes += a->second.frequency;
- }
- } else {
- for (map<Genotype*, int>::iterator g = genotypeCounts.begin(); g != genotypeCounts.end(); ++g) {
- if (g->first->ploidy == genotype->ploidy) {
- //cout << *g->first << "\t" << g->second << endl;
- popGenotypeCounts.push_back(g->second);
- popTotalGenotypes += g->second;
- }
- }
- }
- long double arrangementsOfAllelesInSample = multinomialCoefficientLn(popTotalAlleles, popAlleleCounts);
- //cout << "arrangementsOfAllelesInSample = " << exp(arrangementsOfAllelesInSample) << endl;
- long double arrangementsWithExactlyCountGenotypesGivenAF =
- multinomialCoefficientLn(genotype->ploidy, thisGenotypeAlleleCounts)
- + multinomialCoefficientLn(popTotalGenotypes, popGenotypeCounts);
- /*
- cout << "multinomialCoefficientLn(genotype->ploidy, thisGenotypeAlleleCounts) = "
- << exp(multinomialCoefficientLn(genotype->ploidy, thisGenotypeAlleleCounts)) << endl;
- cout << "multinomialCoefficientLn(popTotalGenotypes, popGenotypeCounts) = "
- << exp(multinomialCoefficientLn(popTotalGenotypes, popGenotypeCounts)) << endl;
- cout << "arrangementsWithExactlyCountGenotypesGivenAF = " << exp(arrangementsWithExactlyCountGenotypesGivenAF) << endl;
- cout << "hwe prob = " << exp(arrangementsWithExactlyCountGenotypesGivenAF - arrangementsOfAllelesInSample) << endl;
- */
- return arrangementsWithExactlyCountGenotypesGivenAF - arrangementsOfAllelesInSample;
-// core calculation of genotype combination likelihoods
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar) {
- posteriorProb = 0;
- priorProb = 0;
- priorProbG_Af = 0;
- priorProbAf = 0;
- priorProbObservations = 0;
- priorProbGenotypesGivenHWE = 0;
- // when we are operating on pooled samples, we will not be able to
- // ascertain the number of heterozygotes in the pool,
- // rendering P(Genotype combo | Allele frequency) meaningless
- if (!pooled) {
- priorProbG_Af = probabilityGivenAlleleFrequencyln(permute);
- }
- // XXX XXX hwe
- if (hwePriors) {
- for (map<Genotype*, int>::iterator gc = genotypeCounts.begin(); gc != genotypeCounts.end(); ++gc) {
- Genotype* genotype = gc->first;
- priorProbGenotypesGivenHWE += hweProbGenotypeFrequencyln(genotype);
- }
- }
- if (binomialObsPriors) {
- // for each alternate and the reference allele
- // calculate the binomial probability that we see the given strand balance and read placement prob
- //cerr << *this << endl;
- for (map<string, AlleleCounter>::iterator ac = alleleCounters.begin(); ac != alleleCounters.end(); ++ac) {
- //const string& allele = ac->first;
- const AlleleCounter& alleleCounter = ac->second;
- int obs = alleleCounter.observations;
- /*
- cerr << endl
- << "--------------------------------------------" << endl;
- cerr << " counts: " << alleleCounter.frequency
- << " observations " << alleleCounter.observations
- << " " << alleleCounter.forwardStrand
- << "," << alleleCounter.reverseStrand
- << " " << alleleCounter.placedLeft
- << "," << alleleCounter.placedRight
- << " " << alleleCounter.placedStart
- << "," << alleleCounter.placedEnd
- << endl;
- cerr << "priorProbObservations = " << priorProbObservations << endl;
- cerr << "binprobln strand = " << binomialProbln(alleleCounter.forwardStrand, obs, 0.5) << endl;
- cerr << "binprobln position = " << binomialProbln(alleleCounter.placedLeft, obs, 0.5) << endl;
- cerr << "binprobln start = " << binomialProbln(alleleCounter.placedStart, obs, 0.5) << endl;
- */
- priorProbObservations
- += binomialProbln(alleleCounter.forwardStrand, obs, 0.5)
- + binomialProbln(alleleCounter.placedLeft, obs, 0.5)
- + binomialProbln(alleleCounter.placedStart, obs, 0.5);
- }
- }
- // ok... now do the same move for the observation counts
- // --- this should capture "Allele Balance"
- if (alleleBalancePriors) {
- priorProbObservations += multinomialSamplingProbLn(alleleProbs(), observationCounts());
- }
- // with larger population samples, the effect of
- // P(Genotype combo | Allele frequency) may bias us against reporting
- // true variants which are under selection despite overwhelming evidence
- // for variation. this allows us to scale the effect of this prior
- if (diffusionPriorScalar != 1) {
- priorProbG_Af /= diffusionPriorScalar;
- }
- // Ewens' Sampling Formula
- if (ewensPriors) {
- priorProbAf = alleleFrequencyProbabilityln(countFrequencies(), theta);
- }
- // posterior probability
- /*
- cerr << "priorProbG_Af " << priorProbG_Af << endl
- << "priorProbAf " << priorProbAf << endl
- << "priorProbObservations " << priorProbObservations << endl
- << "priorProbGenotypesGivenHWE " << priorProbGenotypesGivenHWE << endl
- << "probObsGivenGenotypes " << probObsGivenGenotypes << endl;
- */
- priorProb = priorProbG_Af + priorProbAf + priorProbObservations + priorProbGenotypesGivenHWE;
- posteriorProb = priorProb + probObsGivenGenotypes;
- /*
- cerr << "priorProb " << priorProb << endl;
- cerr << "posteriorProb " << posteriorProb << endl;
- cerr << ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>" << endl << endl;
- */
-pair<int, int> alternateAndReferenceCount(vector<Allele*>& observations, string& refbase, string altbase) {
- int altcount = 0;
- int refcount = 0;
- for (vector<Allele*>::iterator allele = observations.begin(); allele != observations.end(); ++allele) {
- if ((*allele)->currentBase == refbase)
- ++refcount;
- else if ((*allele)->currentBase == altbase)
- ++altcount;
- }
- return make_pair(altcount, refcount);
-void genotypeCombo2Map(GenotypeCombo& gc, GenotypeComboMap& gcm) {
- for (GenotypeCombo::iterator g = gc.begin(); g != gc.end(); ++g) {
- gcm[(*g)->name] = *g;;
- }
-void orderedGenotypeCombo(
- GenotypeCombo& combo,
- GenotypeCombo& orderedCombo,
- SampleDataLikelihoods& sampleDataLikelihoods,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar) {
- GenotypeComboMap bestComboMap;
- genotypeCombo2Map(combo, bestComboMap);
- for (SampleDataLikelihoods::iterator sdl = sampleDataLikelihoods.begin(); sdl != sampleDataLikelihoods.end(); ++sdl) {
- orderedCombo.push_back(bestComboMap[sdl->front().name]);
- }
- orderedCombo.init(binomialObsPriors);
- orderedCombo.calculatePosteriorProbability(theta, pooled, ewensPriors, permute,
- hwePriors, binomialObsPriors, alleleBalancePriors,
- diffusionPriorScalar);
-// returns a list of the alternate alleles represented by the given genotype
-// combo sorted by frequency
-vector<pair<Allele, int> > alternateAlleles(GenotypeCombo& combo, string referenceBase) {
- map<Allele, int> alternates;
- for (GenotypeCombo::iterator g = combo.begin(); g != combo.end(); ++g) {
- vector<Allele> alts = (*g)->genotype->alternateAlleles(referenceBase);
- for (vector<Allele>::iterator a = alts.begin(); a != alts.end(); ++a) {
- if (alternates.find(*a) == alternates.end()) {
- alternates[*a] = 1;
- } else {
- alternates[*a] += 1;
- }
- }
- }
- vector<pair<Allele, int> > sortedAlternates;
- for (map<Allele, int>::iterator a = alternates.begin(); a != alternates.end(); ++a) {
- sortedAlternates.push_back(make_pair(a->first, a->second));
- }
- AllelePairIntCompare alleleCountCompare;
- sort(sortedAlternates.begin(), sortedAlternates.end(), alleleCountCompare);
- return sortedAlternates;
-int Genotype::containedAlleleTypes(void) {
- int t = 0;
- for (Genotype::iterator g = begin(); g != end(); ++g) {
- t |= g->allele.type;
- }
- return t;
-vector<int> Genotype::alleleObservationCounts(Sample& sample) {
- vector<int> counts;
- for (Genotype::iterator i = begin(); i != end(); ++i) {
- Allele& b = i->allele;
- counts.push_back(sample.observationCount(b));
- }
- return counts;
-int Genotype::alleleObservationCount(Sample& sample) {
- int count = 0;
- for (Genotype::iterator i = begin(); i != end(); ++i) {
- Allele& b = i->allele;
- count += sample.observationCount(b);
- }
- return count;
-bool Genotype::sampleHasSupportingObservations(Sample& sample) {
- for (Genotype::iterator i = begin(); i != end(); ++i) {
- Allele& b = i->allele;
- if (sample.observationCount(b) != 0) {
- return true;
- }
- }
- return false;
-bool Genotype::sampleHasSupportingObservationsForAllAlleles(Sample& sample) {
- vector<int> counts = alleleObservationCounts(sample);
- for (vector<int>::iterator c = counts.begin(); c != counts.end(); ++c) {
- if (*c == 0) {
- return false;
- }
- }
- return true;
-map<int, vector<Genotype> > getGenotypesByPloidy(vector<int>& ploidies, vector<Allele>& genotypeAlleles) {
- map<int, vector<Genotype> > genotypesByPloidy;
- for (vector<int>::iterator p = ploidies.begin(); p != ploidies.end(); ++p) {
- int ploidy = *p;
- if (genotypesByPloidy.find(ploidy) == genotypesByPloidy.end()) {
- genotypesByPloidy[ploidy] = allPossibleGenotypes(ploidy, genotypeAlleles);
- }
- }
- return genotypesByPloidy;
-vector<Genotype*> Genotype::nullMatchingGenotypes(vector<Genotype>& gts) {
- vector<Genotype*> results;
- // assert that this genotype has null alleles
- for (vector<Genotype>::iterator g = gts.begin(); g != gts.end(); ++g) {
- Genotype& genotype = *g;
- if (genotype.ploidy == ploidy) {
- bool match = true;
- // if the non-null alleles and counts are the same between genotypes, add the genotype to the results
- // null matching genotypes have the same number of alleles and alts as this genotype,
- for (Genotype::iterator gt = begin(); gt != end(); ++gt) {
- if (genotype.alleleCount(gt->allele) != gt->count) {
- match = false;
- }
- }
- if (match) {
- results.push_back(&*g);
- }
- }
- }
- return results;
-bool Genotype::hasNullAllele(void) {
- return alleleCount("N") != 0;
-void GenotypeCombo::appendIndependentCombo(GenotypeCombo& other) {
- for (map<string, AlleleCounter>::iterator c = other.alleleCounters.begin(); c != other.alleleCounters.end(); ++c) {
- const string& allele = c->first;
- AlleleCounter& otherCounter = c->second;
- AlleleCounter& thisCounter = alleleCounters[allele];
- thisCounter.frequency += otherCounter.frequency;
- thisCounter.observations += otherCounter.observations;
- thisCounter.forwardStrand += otherCounter.forwardStrand;
- thisCounter.reverseStrand += otherCounter.reverseStrand;
- thisCounter.placedLeft += otherCounter.placedLeft;
- thisCounter.placedRight += otherCounter.placedRight;
- thisCounter.placedStart += otherCounter.placedStart;
- thisCounter.placedEnd += otherCounter.placedEnd;
- }
- for (GenotypeCombo::iterator s = begin(); s != end(); ++s) {
- const SampleDataLikelihood& sdl = **s;
- const Sample& sample = *sdl.sample;
- ++genotypeCounts[sdl.genotype];
- }
- // permutations
- permutationsln += other.permutationsln;
- // combine probabilities assuming conditional independence between these two combinations
- // data likelihood
- probObsGivenGenotypes += other.probObsGivenGenotypes;
- // posterior
- posteriorProb += other.posteriorProb;
- // priors
- priorProb += other.priorProb;
- priorProbG_Af += other.priorProbG_Af;
- priorProbAf += other.priorProbAf;
- priorProbObservations += other.priorProbObservations;
- priorProbGenotypesGivenHWE += other.priorProbGenotypesGivenHWE;
- // add the other sample data likelihoods to this combo
- reserve(size() + distance(other.begin(), other.end()));
- insert(end(), other.begin(), other.end());
-// all combos of each population are combined with the best combos of the other pops
-// combines all like homozygous combos
-void combinePopulationCombos(list<GenotypeCombo>& genotypeCombos, map<string, list<GenotypeCombo> >& genotypeCombosByPopulation) {
- if (genotypeCombosByPopulation.size() == 1) {
- // one pop, default case is to just pass forward the current set of combos
- genotypeCombos = genotypeCombosByPopulation.begin()->second;
- } else {
- // for each sub-pop
- for (map<string, list<GenotypeCombo> >::iterator p = genotypeCombosByPopulation.begin(); p != genotypeCombosByPopulation.end(); ++p) {
- const string& population = p->first;
- list<GenotypeCombo>& populationGenotypeCombos = p->second;
- GenotypeCombo otherPopulationsBestCombo;
- // run through all the other combos to generate a best combo for the
- // other populations, and accumulate homozygous combos, keyed by allele
- for (map<string, list<GenotypeCombo> >::iterator o = genotypeCombosByPopulation.begin(); o != genotypeCombosByPopulation.end(); ++o) {
- if (o->first != p->first) { // if the genotype list is for a different population
- GenotypeCombo& bestCombo = o->second.front(); // this is the "best" combo from the other population
- // add the best combo from this population to the best combos from the other populations
- if (otherPopulationsBestCombo.empty()) {
- otherPopulationsBestCombo = bestCombo;
- } else {
- otherPopulationsBestCombo.appendIndependentCombo(bestCombo);
- }
- }
- }
- // append the best "other population" combo to all the combos in this set
- for (list<GenotypeCombo>::iterator g = populationGenotypeCombos.begin(); g != populationGenotypeCombos.end(); ++g) {
- genotypeCombos.push_back(*g);
- genotypeCombos.back().appendIndependentCombo(otherPopulationsBestCombo);
- }
- }
- map<Allele, GenotypeCombo> otherPopulationsHomozygousCombos;
- // generate the homozygous combos for all the populations
- for (map<string, list<GenotypeCombo> >::iterator o = genotypeCombosByPopulation.begin(); o != genotypeCombosByPopulation.end(); ++o) {
- // accumulate all the homozygous combos into the otherPopulationsHomozygousCombos
- for (list<GenotypeCombo>::iterator c = o->second.begin(); c != o->second.end(); ++c) {
- GenotypeCombo& combo = *c;
- if (combo.isHomozygous()) {
- Allele& allele = combo.front()->genotype->alleles.front();
- map<Allele, GenotypeCombo>::iterator g = otherPopulationsHomozygousCombos.find(allele);
- if (g == otherPopulationsHomozygousCombos.end()) {
- otherPopulationsHomozygousCombos[allele] = combo;
- } else {
- GenotypeCombo& homozygousCombo = g->second;
- homozygousCombo.appendIndependentCombo(combo);
- }
- }
- }
- }
- // and add them to the result set
- for (map<Allele, GenotypeCombo>::iterator h = otherPopulationsHomozygousCombos.begin(); h!= otherPopulationsHomozygousCombos.end(); ++h) {
- GenotypeCombo& combo = h->second;
- //assert(genotypeCombos.back().size() == combo.size());
- genotypeCombos.push_back(combo);
- }
- // sort the combined combos
- GenotypeComboResultSorter gcrSorter;
- genotypeCombos.sort(gcrSorter);
- genotypeCombos.unique();
- }
diff --git a/src/Genotype.h b/src/Genotype.h
deleted file mode 100644
index 98cdb6b..0000000
--- a/src/Genotype.h
+++ /dev/null
@@ -1,453 +0,0 @@
-#ifndef __GENOTYPE_H
-#define __GENOTYPE_H
-#include <iostream>
-#include <vector>
-#include <utility> // pair
-#include <algorithm>
-#include <numeric>
-#include <vector>
-#include <map>
-#include <iterator>
-#include <cmath>
-#include <numeric>
-#include <assert.h>
-#include "Allele.h"
-#include "Sample.h"
-#include "Utility.h"
-#include "Multinomial.h"
-#include "CNV.h"
-#include "Ewens.h"
-#include "Bias.h"
-#include "join.h"
-#include "convert.h"
-using namespace std;
-// each genotype is a vetor of GenotypeElements, each is a count of alleles
-class GenotypeElement {
- friend ostream& operator<<(ostream& out, GenotypeElement& rhs);
- Allele allele;
- int count;
- GenotypeElement(const Allele& a, int c) : allele(a), count(c) { }
-class Genotype : public vector<GenotypeElement> {
- friend ostream& operator<<(ostream& out, const pair<Allele, int>& rhs);
- friend ostream& operator<<(ostream& out, const Genotype& g);
- friend bool operator<(Genotype& a, Genotype& b);
- int ploidy;
- vector<Allele> alleles;
- map<string, int> alleleCounts;
- bool homozygous;
- long double permutationsln; // aka, multinomialCoefficientLn(ploidy, counts())
- Genotype(vector<Allele>& ungroupedAlleles) {
- alleles = ungroupedAlleles;
- sort(alleles.begin(), alleles.end());
- vector<vector<Allele> > groups = groupAlleles_copy(alleles);
- for (vector<vector<Allele> >::const_iterator group = groups.begin(); group != groups.end(); ++group) {
- this->push_back(GenotypeElement(group->front(), group->size()));
- alleleCounts[group->front().currentBase] = group->size();
- }
- ploidy = getPloidy();
- homozygous = isHomozygous();
- permutationsln = 0;
- if (!homozygous) {
- permutationsln = multinomialCoefficientLn(ploidy, counts());
- }
- }
- vector<Allele*> uniqueAlleles(void);
- int getPloidy(void);
- int alleleCount(const string& base);
- int alleleCount(Allele& allele);
- bool containsAllele(Allele& allele);
- bool containsAllele(const string& base);
- // returns true when the genotype is composed of a subset of the alleles
- bool matchesAlleles(vector<Allele>& alleles);
- vector<Allele> alternateAlleles(string& refbase);
- vector<string> alternateBases(string& refbase);
- vector<int> counts(void);
- // the probability of drawing each allele out of the genotype, ordered by allele
- vector<long double> alleleProbabilities(void);
- vector<long double> alleleProbabilities(Bias& observationBias);
- double alleleSamplingProb(const string& base);
- double alleleSamplingProb(Allele& allele);
- string str(void) const;
- string relativeGenotype(string& refbase, vector<Allele>& altbases);
- void relativeGenotype(vector<int>& spec, string& refbase, vector<Allele>& altbases);
- string relativeGenotype(string& refbase, string& altbase);
- void relativeGenotype(vector<int>& rg, vector<Allele>& alleles);
- bool isHeterozygous(void);
- bool isHomozygous(void);
- bool isHomozygousAlternate(void);
- bool isHomozygousReference(void);
- int containedAlleleTypes(void);
- vector<int> alleleObservationCounts(Sample& sample);
- int alleleObservationCount(Sample& sample);
- bool sampleHasSupportingObservations(Sample& sample);
- bool sampleHasSupportingObservationsForAllAlleles(Sample& sample);
- bool hasNullAllele(void);
- vector<Genotype*> nullMatchingGenotypes(vector<Genotype>& gts);
-string IUPAC(Genotype& g);
-string IUPAC2GenotypeStr(string iupac);
-vector<Genotype> allPossibleGenotypes(int ploidy, vector<Allele>& potentialAlleles);
-class SampleDataLikelihood {
- string name;
- Genotype* genotype;
- long double prob;
- long double marginal;
- Sample* sample;
- bool hasObservations;
- int rank; // the rank of this data likelihood relative to others for the sample, 0 is best
- SampleDataLikelihood(string n, Sample* s, Genotype* g, long double p, int r)
- : name(n)
- , sample(s)
- , genotype(g)
- , prob(p)
- , rank(r)
- , marginal(0)
- , hasObservations(true)
- { }
- bool hasSupportingObservations(void) const {
- return genotype->sampleHasSupportingObservations(*sample);
- }
- int supportingObservationCount(void) const {
- return genotype->alleleObservationCount(*sample);
- }
-class AlleleCounter {
- int frequency;
- int observations;
- int forwardStrand; // supporting reads on the forward strand
- int reverseStrand; // supporting reads on the reverse strand
- int placedLeft; // supporting reads placed to the left of the allele
- int placedRight; // supporting reads placed to the right of the allele
- int placedStart; // supporting reads for which the allele occurs in the first half of the read (5'-3')
- int placedEnd; // supporting reads for which the allele occurs in the second half of the read (5'-3')
- AlleleCounter(void)
- : frequency(0)
- , observations(0)
- , forwardStrand(0)
- , reverseStrand(0)
- , placedLeft(0)
- , placedRight(0)
- , placedStart(0)
- , placedEnd(0)
- { }
-// a combination of genotypes for the population of samples in the analysis
-class GenotypeCombo : public vector<SampleDataLikelihood*> {
- // GenotypeCombo::prob is equal to the sum of probs in the combo. We
- // factor it out so that we can construct the probabilities efficiently as
- // we generate the genotype combinations
- long double probObsGivenGenotypes; // aka data likelihood
- long double permutationsln; // the number of perutations of unphased genotypes in the combo
- // these *must* be generated at construction time
- // for efficiency they can be updated as each genotype combo is generated
- //map<string, int> alleleCounts; // frequencies of each allele in the combo
- //map<string, pair<int, int> > alleleStrandCounts; // map from allele spec to (forword, reverse) counts
- //map<string, pair<int, int> > alleleReadPlacementCounts; // map from allele spec to (left, right) counts
- //map<string, pair<int, int> > alleleReadPositionCounts; // map from allele spec to (left, right) counts
- map<string, AlleleCounter> alleleCounters;
- map<Genotype*, int> genotypeCounts;
- GenotypeCombo(void)
- : probObsGivenGenotypes(0)
- , posteriorProb(0)
- , priorProb(0)
- , priorProbG_Af(0)
- , priorProbAf(0)
- , priorProbObservations(0)
- , permutationsln(0)
- { }
- void init(bool useObsExpectations);
- void addPriorAlleleCounts(map<string, int>& priorACs);
- // appends the other combo to this one,
- // updates the counts, and multiplies the probabilites,
- // assuming independence between the two combos
- void appendIndependentCombo(GenotypeCombo& other);
- int numberOfAlleles(void);
- vector<long double> alleleProbs(void); // scales counts() by the total number of alleles
- int ploidy(void); // the number of copies of the locus in this combination
- int alleleCount(Allele& allele);
- int alleleCount(const string& allele);
- long double alleleFrequency(Allele& allele);
- long double alleleFrequency(const string& allele);
- long double genotypeFrequency(Genotype* genotype);
- void updateCachedCounts(Sample* sample, Genotype* oldGenotype, Genotype* newGenotype, bool useObsExpectations);
- map<string, int> countAlleles(void);
- map<int, int> countFrequencies(void);
- int hetCount(void);
- vector<int> counts(void); // the counts of frequencies of the alleles in the genotype combo
- vector<int> observationCounts(void); // the counts of observations of the alleles (in sorted order)
- int observationTotal(void);
- vector<string> alleles(void); // the string representations of alleles in the genotype combo
- bool isHomozygous(void); // returns true if the combination is 100% homozygous across all individuals
- // e.g. if there is no variation
- // posterior
- long double posteriorProb; // p(genotype combo) * p(observations | genotype combo)
- // priors
- long double priorProb; // p(genotype combo) = p(genotype combo | allele frequency) * p(allele frequency) * p(observations)
- long double priorProbG_Af; // p(genotype combo | allele frequency)
- long double priorProbAf; // p(allele frequency)
- long double priorProbObservations; // p(observations)
- long double priorProbGenotypesGivenHWE;
- //GenotypeCombo* combo,
- void calculatePosteriorProbability(
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool obsBinomialPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalarln);
- long double probabilityGivenAlleleFrequencyln(bool permute);
- long double hweExpectedFrequencyln(Genotype* genotype);
- long double hweProbGenotypeFrequencyln(Genotype* genotype);
- long double hweComboProb(void);
-struct GenotypeComboResultSorter {
- bool operator()(const GenotypeCombo& gc1, const GenotypeCombo& gc2) {
- if (gc1.posteriorProb == gc2.posteriorProb) {
- return gc1 > gc2;
- } else {
- return gc1.posteriorProb > gc2.posteriorProb;
- }
- }
-// for comparing GenotypeCombos which are empty
-struct GenotypeComboResultEqual {
- bool operator()(const GenotypeCombo& gc1, const GenotypeCombo& gc2) {
- return gc1.posteriorProb == gc2.posteriorProb;
- }
-// for sorting data likelihoods
-struct SampleDataLikelihoodCompare {
- bool operator()(const SampleDataLikelihood& a,
- const SampleDataLikelihood& b) {
- return a.prob > b.prob;
- }
-struct SampleMarginalCompare {
- bool operator()(const SampleDataLikelihood& a,
- const SampleDataLikelihood& b) {
- return a.marginal > b.marginal;
- }
-struct SampleLikelihoodCompare {
- bool operator()(const SampleDataLikelihood& a,
- const SampleDataLikelihood& b) {
- return (a.marginal + a.prob) > (b.marginal + b.prob);
- }
-struct SampleMarginalAndObsCompare {
- bool operator()(const SampleDataLikelihood& a,
- const SampleDataLikelihood& b) {
- int aObsCount = a.supportingObservationCount();
- int bObsCount = b.supportingObservationCount();
- if (aObsCount != bObsCount) {
- if (aObsCount == 0) {
- return false;
- } else if (bObsCount == 0) {
- return true;
- }
- }
- return (a.marginal + a.prob) > (b.marginal + b.prob);
- }
-// a set of probabilities for a set of genotypes for a set of samples
-typedef vector<vector<SampleDataLikelihood> > SampleDataLikelihoods;
-void sortSampleDataLikelihoods(vector<SampleDataLikelihood>& likelihoods);
-bool sortSampleDataLikelihoodsByMarginals(vector<SampleDataLikelihood>& likelihoods);
-bool sortSampleDataLikelihoodsByMarginals(SampleDataLikelihoods& samplesLikelihoods);
-bool sortSampleDataLikelihoodsByMarginalsAndObs(SampleDataLikelihoods& samplesLikelihoods);
-bool sortSampleDataLikelihoodsScaledByMarginals(vector<SampleDataLikelihood>& likelihoods);
-bool sortSampleDataLikelihoodsScaledByMarginals(SampleDataLikelihoods& samplesLikelihoods);
-typedef map<string, SampleDataLikelihood*> GenotypeComboMap;
-void genotypeCombo2Map(GenotypeCombo& gc, GenotypeComboMap& gcm);
- GenotypeCombo& combo,
- GenotypeCombo& orderedCombo,
- SampleDataLikelihoods& sampleDataLikelihoods,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar);
- GenotypeCombo& combo,
- vector<int>& initialPosition,
- SampleDataLikelihoods& variantSampleDataLikelihoods,
- SampleDataLikelihoods& invariantSampleDataLikelihoods,
- map<string, int>& priorACs,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar);
- GenotypeCombo& combo,
- SampleDataLikelihoods& sampleDataLikelihoods,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar);
- list<GenotypeCombo>& combos,
- GenotypeCombo& comboKing,
- SampleDataLikelihoods& variantDataLikelihoods,
- SampleDataLikelihoods& invariantDataLikelihoods,
- Samples& samples,
- map<string, int>& priorACs,
- int bandwidth, int banddepth,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar);
- list<GenotypeCombo>& combos,
- GenotypeCombo& comboKing,
- SampleDataLikelihoods& sampleDataLikelihoods,
- Samples& samples,
- map<string, int>& priorACs,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar,
- bool keepCombos);
- list<GenotypeCombo>& combos,
- GenotypeCombo& comboKing,
- SampleDataLikelihoods& sampleDataLikelihoods,
- SampleDataLikelihoods& variantDataLikelihoods,
- SampleDataLikelihoods& invariantDataLikelihoods,
- Samples& samples,
- vector<Allele>& genotypeAlleles,
- map<string, int>& priorACs,
- int bandwidth, int banddepth,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar,
- int maxiterations,
- int& totaliterations,
- bool addHomozygousCombos);
- list<GenotypeCombo>& combos,
- SampleDataLikelihoods& sampleDataLikelihoods,
- SampleDataLikelihoods& variantSampleDataLikelihoods,
- SampleDataLikelihoods& invariantSampleDataLikelihoods,
- Samples& samples,
- vector<Allele>& genotypeAlleles,
- long double theta,
- bool pooled,
- bool ewensPriors,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar);
-vector<pair<Allele, int> > alternateAlleles(GenotypeCombo& combo, string referenceBase);
-pair<int, int> alternateAndReferenceCount(vector<Allele*>& observations, string& refbase, string altbase);
-ostream& operator<<(ostream& out, list<GenotypeCombo>& combo);
-ostream& operator<<(ostream& out, GenotypeCombo& g);
-map<int, vector<Genotype> > getGenotypesByPloidy(vector<int>& ploidies, vector<Allele>& genotypeAlleles);
-void combinePopulationCombos(list<GenotypeCombo>& genotypeCombos,
- map<string, list<GenotypeCombo> >& genotypeCombosByPopulation);
diff --git a/src/GenotypePriors.cpp b/src/GenotypePriors.cpp
deleted file mode 100644
index 7ed74d8..0000000
--- a/src/GenotypePriors.cpp
+++ /dev/null
@@ -1,188 +0,0 @@
-#include "GenotypePriors.h"
-long double alleleFrequencyProbability(const map<int, int>& alleleFrequencyCounts, long double theta) {
- int M = 0;
- long double p = 1;
- for (map<int, int>::const_iterator f = alleleFrequencyCounts.begin(); f != alleleFrequencyCounts.end(); ++f) {
- int frequency = f->first;
- int count = f->second;
- M += frequency * count;
- p *= (double) pow((double) theta, (double) count) / (double) pow((double) frequency, (double) count) * factorial(count);
- }
- long double thetaH = 1;
- for (int h = 1; h < M; ++h)
- thetaH *= theta + h;
- return factorial(M) / (theta * thetaH) * p;
-AlleleFrequencyProbabilityCache alleleFrequencyProbabilityCache;
-long double alleleFrequencyProbabilityln(const map<int, int>& alleleFrequencyCounts, long double theta) {
- return alleleFrequencyProbabilityCache.alleleFrequencyProbabilityln(alleleFrequencyCounts, theta);
-// Implements Ewens' Sampling Formula, which provides probability of a given
-// partition of alleles in a sample from a population
-long double __alleleFrequencyProbabilityln(const map<int, int>& alleleFrequencyCounts, long double theta) {
- int M = 0; // multiplicity of site
- long double p = 0;
- long double thetaln = log(theta);
- for (map<int, int>::const_iterator f = alleleFrequencyCounts.begin(); f != alleleFrequencyCounts.end(); ++f) {
- int frequency = f->first;
- int count = f->second;
- M += frequency * count;
- p += powln(thetaln, count) - powln(log(frequency), count) + factorialln(count);
- }
- long double thetaH = 0;
- for (int h = 1; h < M; ++h)
- thetaH += log(theta + h);
- return factorialln(M) - (thetaln + thetaH) + p;
-long double probabilityGenotypeComboGivenAlleleFrequencyln(GenotypeCombo& genotypeCombo, Allele& allele) {
- int n = genotypeCombo.numberOfAlleles();
- long double lnhetscalar = 0;
- for (GenotypeCombo::iterator gc = genotypeCombo.begin(); gc != genotypeCombo.end(); ++gc) {
- SampleDataLikelihood& sgp = **gc;
- if (!sgp.genotype->homozygous) {
- lnhetscalar += multinomialCoefficientLn(sgp.genotype->ploidy, sgp.genotype->counts());
- }
- }
- return lnhetscalar - multinomialCoefficientLn(n, genotypeCombo.counts());
-// core calculation of genotype combination likelihoods
- GenotypeCombo* combo,
- Allele& refAllele,
- long double theta,
- bool pooled,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar) {
- // when we are operating on pooled samples, we will not be able to
- // ascertain the number of heterozygotes in the pool,
- // rendering P(Genotype combo | Allele frequency) meaningless
- long double priorProbabilityOfGenotypeComboG_Af = 0;
- if (!pooled) {
- priorProbabilityOfGenotypeComboG_Af = probabilityGenotypeComboGivenAlleleFrequencyln(*combo, refAllele);
- }
- long double priorObservationExpectationProb = 0;
- if (binomialObsPriors) {
- // for each alternate and the reference allele
- // calculate the binomial probability that we see the given strand balance and read placement prob
- vector<string> alleles = combo->alleles();
- // cerr << *combo << endl;
- for (vector<string>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- const string& allele = *a;
- map<string, AlleleCounter>::iterator ac = combo->alleleCounters.find(allele);
- if (ac != combo->alleleCounters.end()) {
- const AlleleCounter& alleleCounter = ac->second;
- int obs = alleleCounter.observations;
- /*
- cerr << allele << " counts: " << alleleCounter.frequency
- << " observations " << alleleCounter.observations
- << " " << alleleCounter.forwardStrand
- << "," << alleleCounter.reverseStrand
- << " " << alleleCounter.placedLeft
- << "," << alleleCounter.placedRight
- << " " << alleleCounter.placedStart
- << "," << alleleCounter.placedEnd
- << endl;
- cerr << "priorObservationExpectationProb = " << priorObservationExpectationProb << endl;
- cerr << "binprobln strand = " << binomialProbln(alleleCounter.forwardStrand, obs, 0.5) << endl;
- cerr << "binprobln position = " << binomialProbln(alleleCounter.placedLeft, obs, 0.5) << endl;
- cerr << "binprobln start = " << binomialProbln(alleleCounter.placedStart, obs, 0.5) << endl;
- cerr << "priorObservationExpectationProb = " << priorObservationExpectationProb << endl;
- */
- priorObservationExpectationProb
- += binomialProbln(alleleCounter.forwardStrand, obs, 0.5)
- + binomialProbln(alleleCounter.placedLeft, obs, 0.5)
- + binomialProbln(alleleCounter.placedStart, obs, 0.5);
- }
- }
- // ok... now do the same move for the observation counts
- // --- this should capture "Allele Balance"
- }
- if (alleleBalancePriors) {
- priorObservationExpectationProb += multinomialSamplingProbLn(combo->alleleProbs(), combo->observationCounts());
- }
- // with larger population samples, the effect of
- // P(Genotype combo | Allele frequency) may bias us against reporting
- // true variants which are under selection despite overwhelming evidence
- // for variation. this allows us to scale the effect of this prior
- if (diffusionPriorScalar != 1) {
- priorProbabilityOfGenotypeComboG_Af /= diffusionPriorScalar;
- }
- // Ewens' Sampling Formula
- long double priorProbabilityOfGenotypeComboAf =
- alleleFrequencyProbabilityln(combo->countFrequencies(), theta);
- long double priorProbabilityOfGenotypeCombo =
- priorProbabilityOfGenotypeComboG_Af + priorProbabilityOfGenotypeComboAf;
- long double priorComboProb = priorProbabilityOfGenotypeCombo + combo->prob + priorObservationExpectationProb;
- return GenotypeComboResult(combo,
- priorComboProb,
- combo->prob,
- priorProbabilityOfGenotypeCombo,
- priorProbabilityOfGenotypeComboG_Af,
- priorProbabilityOfGenotypeComboAf,
- priorObservationExpectationProb);
- vector<GenotypeComboResult>& genotypeComboProbs,
- vector<GenotypeCombo>& bandedCombos,
- Allele& refAllele,
- long double theta,
- bool pooled,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar) {
- for (vector<GenotypeCombo>::iterator c = bandedCombos.begin(); c != bandedCombos.end(); ++c) {
- GenotypeCombo* combo = &*c;
- genotypeComboProbs.push_back(
- genotypeCombinationPriorProbability(
- combo,
- refAllele,
- theta,
- pooled,
- binomialObsPriors,
- alleleBalancePriors,
- diffusionPriorScalar));
- }
diff --git a/src/GenotypePriors.h b/src/GenotypePriors.h
deleted file mode 100644
index 4126a66..0000000
--- a/src/GenotypePriors.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#include <map>
-#include "Allele.h"
-#include "Genotype.h"
-#include "Multinomial.h"
-#include "CNV.h"
-#include "Utility.h"
-using namespace std;
-map<Allele, int> countAlleles(vector<Genotype*>& genotypeCombo);
-map<int, int> countFrequencies(vector<Genotype*>& genotypeCombo);
-long double alleleFrequencyProbability(const map<int, int>& alleleFrequencyCounts, long double theta);
-long double alleleFrequencyProbabilityln(const map<int, int>& alleleFrequencyCounts, long double theta);
-long double __alleleFrequencyProbabilityln(const map<int, int>& alleleFrequencyCounts, long double theta);
-long double probabilityGenotypeComboGivenAlleleFrequencyln(GenotypeCombo& genotypeCombo, Allele& allele);
-class AlleleFrequencyProbabilityCache : public map<map<int, int>, long double> {
- long double alleleFrequencyProbabilityln(const map<int, int>& counts, long double theta) {
- map<map<int, int>, long double>::iterator p = find(counts);
- if (p == end()) {
- long double pln = __alleleFrequencyProbabilityln(counts, theta);
- insert(make_pair(counts, pln));
- return pln;
- } else {
- return p->second;
- }
- }
- GenotypeCombo* combo,
- Allele& refAllele,
- long double theta,
- bool pooled,
- bool obsBinomialPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalarln);
-void genotypeCombinationsPriorProbability(
- vector<GenotypeComboResult>& genotypeComboProbs,
- vector<GenotypeCombo>& bandedCombos,
- Allele& refAllele,
- long double theta,
- bool pooled,
- bool obsBinomialPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalarln);
diff --git a/src/IndelAllele.cpp b/src/IndelAllele.cpp
deleted file mode 100644
index 20e6888..0000000
--- a/src/IndelAllele.cpp
+++ /dev/null
@@ -1,48 +0,0 @@
-#include "IndelAllele.h"
-using namespace std;
-bool FBIndelAllele::homopolymer(void) {
- string::iterator s = sequence.begin();
- char c = *s++;
- while (s != sequence.end()) {
- if (c != *s++) return false;
- }
- return true;
-bool FBhomopolymer(string sequence) {
- string::iterator s = sequence.begin();
- char c = *s++;
- while (s != sequence.end()) {
- if (c != *s++) return false;
- }
- return true;
-ostream& operator<<(ostream& out, const FBIndelAllele& indel) {
- string t = indel.insertion ? "i" : "d";
- out << t << ":" << indel.position << ":" << indel.readPosition
- << ":" << indel.sequence << ":" << (indel.splice?"splice":"");
- return out;
-bool operator==(const FBIndelAllele& a, const FBIndelAllele& b) {
- return (a.insertion == b.insertion
- && a.length == b.length
- && a.position == b.position
- && a.sequence == b.sequence
- && a.splice == b.splice);
-bool operator!=(const FBIndelAllele& a, const FBIndelAllele& b) {
- return !(a==b);
-bool operator<(const FBIndelAllele& a, const FBIndelAllele& b) {
- ostringstream as, bs;
- as << a;
- bs << b;
- return as.str() < bs.str();
diff --git a/src/IndelAllele.h b/src/IndelAllele.h
deleted file mode 100644
index 55c59e6..0000000
--- a/src/IndelAllele.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef __INDEL_ALLELE_H
-#define __INDEL_ALLELE_H
-#include <string>
-#include <iostream>
-#include <sstream>
-using namespace std;
-class FBIndelAllele {
- friend ostream& operator<<(ostream&, const FBIndelAllele&);
- friend bool operator==(const FBIndelAllele&, const FBIndelAllele&);
- friend bool operator!=(const FBIndelAllele&, const FBIndelAllele&);
- friend bool operator<(const FBIndelAllele&, const FBIndelAllele&);
- bool insertion;
- int length;
- int position;
- int readPosition;
- string sequence;
- bool splice;
- bool homopolymer(void);
- FBIndelAllele(bool i, int l, int p, int rp, string s, bool n)
- : insertion(i), length(l), position(p), readPosition(rp), sequence(s), splice(n)
- { }
-bool FBhomopolymer(string sequence);
-ostream& operator<<(ostream& out, const FBIndelAllele& indel);
-bool operator==(const FBIndelAllele& a, const FBIndelAllele& b);
-bool operator!=(const FBIndelAllele& a, const FBIndelAllele& b);
-bool operator<(const FBIndelAllele& a, const FBIndelAllele& b);
diff --git a/src/LargeFileSupport.h b/src/LargeFileSupport.h
deleted file mode 100644
index 9f0dc4c..0000000
--- a/src/LargeFileSupport.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#pragma once
-#ifdef WIN32
-#define ftell64(a) _ftelli64(a)
-#define fseek64(a,b,c) _fseeki64(a,b,c)
-typedef __int64 off_type;
-#elif defined(__APPLE__)
-#define ftell64(a) ftello(a)
-#define fseek64(a,b,c) fseeko(a,b,c)
-typedef off_t off_type;
-#define ftell64(a) ftello(a)
-#define fseek64(a,b,c) fseeko(a,b,c)
-typedef __off64_t off_type;
diff --git a/src/LeftAlign.cpp b/src/LeftAlign.cpp
deleted file mode 100644
index 78b7046..0000000
--- a/src/LeftAlign.cpp
+++ /dev/null
@@ -1,398 +0,0 @@
-#include "LeftAlign.h"
-//bool debug;
-// Attempts to left-realign all the indels represented by the alignment cigar.
-// This is done by shifting all indels as far left as they can go without
-// mismatch, then merging neighboring indels of the same class. leftAlign
-// updates the alignment cigar with changes, and returns true if realignment
-// changed the alignment cigar.
-// To left-align, we move multi-base indels left by their own length as long as
-// the preceding bases match the inserted or deleted sequence. After this
-// step, we handle multi-base homopolymer indels by shifting them one base to
-// the left until they mismatch the reference.
-// To merge neighboring indels, we iterate through the set of left-stabilized
-// indels. For each indel we add a new cigar element to the new cigar. If a
-// deletion follows a deletion, or an insertion occurs at the same place as
-// another insertion, we merge the events by extending the previous cigar
-// element.
-// In practice, we must call this function until the alignment is stabilized.
-bool leftAlign(BamAlignment& alignment, string& referenceSequence, bool debug) {
- int arsOffset = 0; // pointer to insertion point in aligned reference sequence
- string alignedReferenceSequence = referenceSequence;
- int aabOffset = 0;
- string alignmentAlignedBases = alignment.QueryBases;
- // store information about the indels
- vector<FBIndelAllele> indels;
- int rp = 0; // read position, 0-based relative to read
- int sp = 0; // sequence position
- string softBegin;
- string softEnd;
- stringstream cigar_before, cigar_after;
- for (vector<CigarOp>::const_iterator c = alignment.CigarData.begin();
- c != alignment.CigarData.end(); ++c) {
- unsigned int l = c->Length;
- char t = c->Type;
- cigar_before << l << t;
- if (t == 'M') { // match or mismatch
- sp += l;
- rp += l;
- } else if (t == 'D') { // deletion
- indels.push_back(FBIndelAllele(false, l, sp, rp, referenceSequence.substr(sp, l), false));
- alignmentAlignedBases.insert(rp + aabOffset, string(l, '-'));
- aabOffset += l;
- sp += l; // update reference sequence position
- } else if (t == 'N') {
- indels.push_back(FBIndelAllele(false, l, sp, rp, referenceSequence.substr(sp, l), true));
- alignmentAlignedBases.insert(rp + aabOffset, string(l, '-'));
- aabOffset += l;
- sp += l; // update reference sequence position
- } else if (t == 'I') { // insertion
- indels.push_back(FBIndelAllele(true, l, sp, rp, alignment.QueryBases.substr(rp, l), false));
- alignedReferenceSequence.insert(sp + softBegin.size() + arsOffset, string(l, '-'));
- arsOffset += l;
- rp += l;
- } else if (t == 'S') { // soft clip, clipped sequence present in the read not matching the reference
- // remove these bases from the refseq and read seq, but don't modify the alignment sequence
- if (rp == 0) {
- alignedReferenceSequence = string(l, '*') + alignedReferenceSequence;
- softBegin = alignmentAlignedBases.substr(0, l);
- } else {
- alignedReferenceSequence = alignedReferenceSequence + string(l, '*');
- softEnd = alignmentAlignedBases.substr(alignmentAlignedBases.size() - l, l);
- }
- rp += l;
- } else if (t == 'H') { // hard clip on the read, clipped sequence is not present in the read
- //} else if (t == 'N') { // skipped region in the reference not present in read, aka splice
- //sp += l;
- }
- }
- int alignedLength = sp;
- LEFTALIGN_DEBUG("| " << cigar_before.str() << endl
- << "| " << alignedReferenceSequence << endl
- << "| " << alignmentAlignedBases << endl);
- // if no indels, return the alignment
- if (indels.empty()) { return false; }
- // for each indel, from left to right
- // while the indel sequence repeated to the left and we're not matched up with the left-previous indel
- // move the indel left
- vector<FBIndelAllele>::iterator previous = indels.begin();
- for (vector<FBIndelAllele>::iterator id = indels.begin(); id != indels.end(); ++id) {
- // left shift by repeats
- //
- // from 1 base to the length of the indel, attempt to shift left
- // if the move would cause no change in alignment optimality (no
- // introduction of mismatches, and by definition no change in gap
- // length), move to the new position.
- // in practice this moves the indel left when we reach the size of
- // the repeat unit.
- //
- int steppos, readsteppos;
- FBIndelAllele& indel = *id;
- int i = 1;
- while (i <= indel.length) {
- int steppos = indel.position - i;
- int readsteppos = indel.readPosition - i;
- if (debug) {
- if (steppos >= 0 && readsteppos >= 0) {
- cerr << referenceSequence.substr(steppos, indel.length) << endl;
- cerr << alignment.QueryBases.substr(readsteppos, indel.length) << endl;
- cerr << indel.sequence << endl;
- }
- }
- while (steppos >= 0 && readsteppos >= 0
- && !indel.splice
- && indel.sequence == referenceSequence.substr(steppos, indel.length)
- && indel.sequence == alignment.QueryBases.substr(readsteppos, indel.length)
- && (id == indels.begin()
- || (previous->insertion && steppos >= previous->position)
- || (!previous->insertion && steppos >= previous->position + previous->length))) {
- LEFTALIGN_DEBUG((indel.insertion ? "insertion " : "deletion ") << indel << " shifting " << i << "bp left" << endl);
- indel.position -= i;
- indel.readPosition -= i;
- steppos = indel.position - i;
- readsteppos = indel.readPosition - i;
- }
- do {
- ++i;
- } while (i <= indel.length && indel.length % i != 0);
- }
- // left shift indels with exchangeable flanking sequence
- //
- // for example:
- //
- // GT-----T ----> G-----TT
- //
- // GTGTG-----T ----> GTG-----TGT
- //
- // GTGTG-----T GTG-----TGT
- //
- //
- steppos = indel.position - 1;
- readsteppos = indel.readPosition - 1;
- while (steppos >= 0 && readsteppos >= 0
- && alignment.QueryBases.at(readsteppos) == referenceSequence.at(steppos)
- && alignment.QueryBases.at(readsteppos) == indel.sequence.at(indel.sequence.size() - 1)
- && (id == indels.begin()
- || (previous->insertion && indel.position - 1 >= previous->position)
- || (!previous->insertion && indel.position - 1 >= previous->position + previous->length))) {
- LEFTALIGN_DEBUG((indel.insertion ? "insertion " : "deletion ") << indel << " exchanging bases " << 1 << "bp left" << endl);
- indel.sequence = indel.sequence.at(indel.sequence.size() - 1) + indel.sequence.substr(0, indel.sequence.size() - 1);
- indel.position -= 1;
- indel.readPosition -= 1;
- steppos = indel.position - 1;
- readsteppos = indel.readPosition - 1;
- }
- // tracks previous indel, so we don't run into it with the next shift
- previous = id;
- }
- // bring together floating indels
- // from left to right
- // check if we could merge with the next indel
- // if so, adjust so that we will merge in the next step
- if (indels.size() > 1) {
- previous = indels.begin();
- for (vector<FBIndelAllele>::iterator id = (indels.begin() + 1); id != indels.end(); ++id) {
- FBIndelAllele& indel = *id;
- // parsimony: could we shift right and merge with the previous indel?
- // if so, do it
- int prev_end_ref = previous->insertion ? previous->position : previous->position + previous->length;
- int prev_end_read = !previous->insertion ? previous->readPosition : previous->readPosition + previous->length;
- if (!previous->splice && !indel.splice &&
- previous->insertion == indel.insertion
- && ((previous->insertion
- && (previous->position < indel.position
- && previous->readPosition + previous->readPosition < indel.readPosition))
- ||
- (!previous->insertion
- && (previous->position + previous->length < indel.position)
- && (previous->readPosition < indel.readPosition)
- ))) {
- if (previous->homopolymer()) {
- string seq = referenceSequence.substr(prev_end_ref, indel.position - prev_end_ref);
- string readseq = alignment.QueryBases.substr(prev_end_read, indel.position - prev_end_ref);
- LEFTALIGN_DEBUG("seq: " << seq << endl << "readseq: " << readseq << endl);
- if (previous->sequence.at(0) == seq.at(0)
- && FBhomopolymer(seq)
- && FBhomopolymer(readseq)) {
- LEFTALIGN_DEBUG("moving " << *previous << " right to "
- << (indel.insertion ? indel.position : indel.position - previous->length) << endl);
- previous->position = indel.insertion ? indel.position : indel.position - previous->length;
- }
- }
- else {
- int pos = previous->position;
- while (pos < (int) referenceSequence.length() &&
- ((previous->insertion && pos + previous->length <= indel.position)
- ||
- (!previous->insertion && pos + previous->length < indel.position))
- && previous->sequence
- == referenceSequence.substr(pos + previous->length, previous->length)) {
- pos += previous->length;
- }
- if (pos < previous->position &&
- ((previous->insertion && pos + previous->length == indel.position)
- ||
- (!previous->insertion && pos == indel.position - previous->length))
- ) {
- LEFTALIGN_DEBUG("right-merging tandem repeat: moving " << *previous << " right to " << pos << endl);
- previous->position = pos;
- }
- }
- }
- previous = id;
- }
- }
- // for each indel
- // if ( we're matched up to the previous insertion (or deletion)
- // and it's also an insertion or deletion )
- // merge the indels
- //
- // and simultaneously reconstruct the cigar
- vector<CigarOp> newCigar;
- if (!softBegin.empty()) {
- newCigar.push_back(CigarOp('S', softBegin.size()));
- }
- vector<FBIndelAllele>::iterator id = indels.begin();
- FBIndelAllele last = *id++;
- if (last.position > 0) {
- newCigar.push_back(CigarOp('M', last.position));
- }
- if (last.insertion) {
- newCigar.push_back(CigarOp('I', last.length));
- } else if (last.splice) {
- newCigar.push_back(CigarOp('N', last.length));
- } else {
- newCigar.push_back(CigarOp('D', last.length));
- }
- int lastend = last.insertion ? last.position : (last.position + last.length);
- LEFTALIGN_DEBUG(last << ",");
- for (; id != indels.end(); ++id) {
- FBIndelAllele& indel = *id;
- LEFTALIGN_DEBUG(indel << ",");
- if (indel.position < lastend) {
- cerr << "impossibility?: indel realigned left of another indel" << endl << alignment.Name
- << " " << alignment.Position << endl << alignment.QueryBases << endl;
- exit(1);
- } else if (indel.position == lastend && indel.insertion == last.insertion) {
- CigarOp& op = newCigar.back();
- op.Length += indel.length;
- } else if (indel.position >= lastend) { // also catches differential indels, but with the same position
- newCigar.push_back(CigarOp('M', indel.position - lastend));
- if (indel.insertion) {
- newCigar.push_back(CigarOp('I', indel.length));
- } else if (indel.splice) {
- newCigar.push_back(CigarOp('N', indel.length));
- } else { // deletion
- newCigar.push_back(CigarOp('D', indel.length));
- }
- }
- last = *id;
- lastend = last.insertion ? last.position : (last.position + last.length);
- }
- if (lastend < alignedLength) {
- newCigar.push_back(CigarOp('M', alignedLength - lastend));
- }
- if (!softEnd.empty()) {
- newCigar.push_back(CigarOp('S', softEnd.size()));
- }
- if (debug) {
- for (vector<CigarOp>::const_iterator c = alignment.CigarData.begin();
- c != alignment.CigarData.end(); ++c) {
- unsigned int l = c->Length;
- char t = c->Type;
- cerr << l << t;
- }
- cerr << endl;
- }
- alignment.CigarData = newCigar;
- for (vector<CigarOp>::const_iterator c = alignment.CigarData.begin();
- c != alignment.CigarData.end(); ++c) {
- unsigned int l = c->Length;
- char t = c->Type;
- cigar_after << l << t;
- }
- LEFTALIGN_DEBUG(cigar_after.str() << endl);
- // check if we're realigned
- if (cigar_after.str() == cigar_before.str()) {
- return false;
- } else {
- return true;
- }
-int countMismatches(BamAlignment& alignment, string referenceSequence) {
- int mismatches = 0;
- int sp = 0;
- int rp = 0;
- for (vector<CigarOp>::const_iterator c = alignment.CigarData.begin();
- c != alignment.CigarData.end(); ++c) {
- unsigned int l = c->Length;
- char t = c->Type;
- if (t == 'M') { // match or mismatch
- for (int i = 0; i < l; ++i) {
- if (alignment.QueryBases.at(rp) != referenceSequence.at(sp))
- ++mismatches;
- ++sp;
- ++rp;
- }
- } else if (t == 'D') { // deletion
- sp += l; // update reference sequence position
- } else if (t == 'I') { // insertion
- rp += l; // update read position
- } else if (t == 'S') { // soft clip, clipped sequence present in the read not matching the reference
- rp += l;
- } else if (t == 'H') { // hard clip on the read, clipped sequence is not present in the read
- } else if (t == 'N') { // skipped region in the reference not present in read, aka splice
- sp += l;
- }
- }
- return mismatches;
-// Iteratively left-aligns the indels in the alignment until we have a stable
-// realignment. Returns true on realignment success or non-realignment.
-// Returns false if we exceed the maximum number of realignment iterations.
-bool stablyLeftAlign(BamAlignment& alignment, string referenceSequence, int maxiterations, bool debug) {
- int mismatchesBefore = countMismatches(alignment, referenceSequence);
- if (!leftAlign(alignment, referenceSequence, debug)) {
- LEFTALIGN_DEBUG("did not realign" << endl);
- return true;
- } else {
- while (leftAlign(alignment, referenceSequence, debug) && --maxiterations > 0) {
- LEFTALIGN_DEBUG("realigning ..." << endl);
- }
- int mismatchesAfter = countMismatches(alignment, referenceSequence);
- if (mismatchesBefore != mismatchesAfter) {
- cerr << alignment.Name << endl;
- cerr << "ERROR: found " << mismatchesBefore << " mismatches before, but " << mismatchesAfter << " after left realignment!" << endl;
- exit(1);
- }
- if (maxiterations <= 0) {
- return false;
- } else {
- return true;
- }
- }
diff --git a/src/LeftAlign.h b/src/LeftAlign.h
deleted file mode 100644
index 634dfef..0000000
--- a/src/LeftAlign.h
+++ /dev/null
@@ -1,37 +0,0 @@
-#ifndef __LEFTALIGN_H
-#define __LEFTALIGN_H
-#include <iostream>
-#include <getopt.h>
-#include <fstream>
-#include <iostream>
-#include <sstream>
-#include <signal.h>
-#include <stdlib.h>
-#include <cmath>
-#include <algorithm>
-#include <map>
-#include <vector>
-#include "Fasta.h"
-#include "api/BamAlignment.h"
-#include "api/BamReader.h"
-#include "api/BamWriter.h"
-#include "IndelAllele.h"
-#define LEFTALIGN_DEBUG(msg) \
- if (debug) { cerr << msg; }
-#define LEFTALIGN_DEBUG(msg)
-using namespace std;
-using namespace BamTools;
-bool leftAlign(BamAlignment& alignment, string& referenceSequence, bool debug = false);
-bool stablyLeftAlign(BamAlignment& alignment, string referenceSequence, int maxiterations = 20, bool debug = false);
-int countMismatches(BamAlignment& alignment, string referenceSequence);
diff --git a/src/Makefile b/src/Makefile
deleted file mode 100644
index 86adbb2..0000000
--- a/src/Makefile
+++ /dev/null
@@ -1,268 +0,0 @@
-# Makefile for FreeBayes
-# Erik Garrison, 2010
-# Boston College
-# Compiler
-# Compiler flags
-#CFLAGS=-O3 -static -D VERBOSE_DEBUG # enables verbose debugging via --debug2
-LIBS = -L./ -L$(VCFLIB_ROOT)/tabixpp/ -L$(BAMTOOLS_ROOT)/lib -ltabix -lz -lm
-all: autoversion ../bin/freebayes ../bin/bamleftalign
- $(MAKE) CFLAGS="$(CFLAGS) -static" all
- $(MAKE) CFLAGS="$(CFLAGS) -D VERBOSE_DEBUG -g -rdynamic" all
- $(MAKE) CFLAGS="$(CFLAGS) -g" all
- $(MAKE) CFLAGS="$(CFLAGS) -pg" all
-.PHONY: all static debug profiling gprof
-# builds bamtools static lib, and copies into root
- cd $(BAMTOOLS_ROOT) && mkdir -p build && cd build && cmake .. && $(MAKE)
-OBJECTS=BedReader.o \
- CNV.o \
- fastlz.o \
- Fasta.o \
- Parameters.o \
- Allele.o \
- Sample.o \
- Result.o \
- AlleleParser.o \
- Utility.o \
- Genotype.o \
- DataLikelihood.o \
- Multinomial.o \
- Ewens.o \
- ResultData.o \
- Dirichlet.o \
- Marginals.o \
- split.o \
- LeftAlign.o \
- IndelAllele.o \
- Bias.o \
- Contamination.o \
- NonCall.o \
- SegfaultHandler.o \
- ../vcflib/tabixpp/tabix.o \
- ../vcflib/tabixpp/bgzf.o \
- ../vcflib/smithwaterman/SmithWatermanGotoh.o \
- ../vcflib/smithwaterman/disorder.c \
- ../vcflib/smithwaterman/LeftAlign.o \
- ../vcflib/smithwaterman/Repeats.o \
- ../vcflib/smithwaterman/IndelAllele.o \
- Variant.o \
- $(BAMTOOLS_ROOT)/lib/libbamtools.a
-HEADERS=multichoose.h version_git.h
-# executables
-freebayes ../bin/freebayes: freebayes.o $(OBJECTS) $(HEADERS)
- $(CXX) $(CFLAGS) $(INCLUDE) freebayes.o $(OBJECTS) -o ../bin/freebayes $(LIBS)
-alleles ../bin/alleles: alleles.o $(OBJECTS) $(HEADERS)
- $(CXX) $(CFLAGS) $(INCLUDE) alleles.o $(OBJECTS) -o ../bin/alleles $(LIBS)
-dummy ../bin/dummy: dummy.o $(OBJECTS) $(HEADERS)
- $(CXX) $(CFLAGS) $(INCLUDE) dummy.o $(OBJECTS) -o ../bin/dummy $(LIBS)
-bamleftalign ../bin/bamleftalign: $(BAMTOOLS_ROOT)/lib/libbamtools.a bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o
- $(CXX) $(CFLAGS) $(INCLUDE) bamleftalign.o Fasta.o LeftAlign.o IndelAllele.o split.o $(BAMTOOLS_ROOT)/lib/libbamtools.a -o ../bin/bamleftalign $(LIBS)
-bamfiltertech ../bin/bamfiltertech: $(BAMTOOLS_ROOT)/lib/libbamtools.a bamfiltertech.o $(OBJECTS) $(HEADERS)
- $(CXX) $(CFLAGS) $(INCLUDE) bamfiltertech.o $(OBJECTS) -o ../bin/bamfiltertech $(LIBS)
-# objects
-Fasta.o: Fasta.cpp
- $(CXX) $(CFLAGS) $(INCLUDE) -c Fasta.cpp
-alleles.o: alleles.cpp AlleleParser.o Allele.o
- $(CXX) $(CFLAGS) $(INCLUDE) -c alleles.cpp
-dummy.o: dummy.cpp AlleleParser.o Allele.o
- $(CXX) $(CFLAGS) $(INCLUDE) -c dummy.cpp
-freebayes.o: freebayes.cpp TryCatch.h $(BAMTOOLS_ROOT)/lib/libbamtools.a
- $(CXX) $(CFLAGS) $(INCLUDE) -c freebayes.cpp
-fastlz.o: fastlz.c fastlz.h
- $(C) $(CFLAGS) $(INCLUDE) -c fastlz.c
-Parameters.o: Parameters.cpp Parameters.h Version.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Parameters.cpp
-Allele.o: Allele.cpp Allele.h multichoose.h Genotype.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Allele.cpp
-Sample.o: Sample.cpp Sample.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Sample.cpp
-Genotype.o: Genotype.cpp Genotype.h Allele.h multipermute.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Genotype.cpp
-Ewens.o: Ewens.cpp Ewens.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Ewens.cpp
-AlleleParser.o: AlleleParser.cpp AlleleParser.h multichoose.h Parameters.h $(BAMTOOLS_ROOT)/lib/libbamtools.a
- $(CXX) $(CFLAGS) $(INCLUDE) -c AlleleParser.cpp
-Utility.o: Utility.cpp Utility.h Sum.h Product.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Utility.cpp
-SegfaultHandler.o: SegfaultHandler.cpp SegfaultHandler.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c SegfaultHandler.cpp
-Dirichlet.o: Dirichlet.h Dirichlet.cpp Sum.h Product.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Dirichlet.cpp
-Multinomial.o: Multinomial.h Multinomial.cpp Sum.h Product.h Utility.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Multinomial.cpp
-DataLikelihood.o: DataLikelihood.cpp DataLikelihood.h Sum.h Product.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c DataLikelihood.cpp
-Marginals.o: Marginals.cpp Marginals.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Marginals.cpp
-ResultData.o: ResultData.cpp ResultData.h Result.h Result.cpp Allele.h Utility.h Genotype.h AlleleParser.h Version.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c ResultData.cpp
-Result.o: Result.cpp Result.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Result.cpp
-NonCall.o: NonCall.cpp NonCall.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c NonCall.cpp
-BedReader.o: BedReader.cpp BedReader.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c BedReader.cpp
-CNV.o: CNV.cpp CNV.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c CNV.cpp
-Bias.o: Bias.cpp Bias.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c Bias.cpp
-split.o: split.h split.cpp
- $(CXX) $(CFLAGS) $(INCLUDE) -c split.cpp
-bamleftalign.o: bamleftalign.cpp LeftAlign.cpp
- $(CXX) $(CFLAGS) $(INCLUDE) -c bamleftalign.cpp
-bamfiltertech.o: bamfiltertech.cpp
- $(CXX) $(CFLAGS) $(INCLUDE) -c bamfiltertech.cpp
-LeftAlign.o: LeftAlign.h LeftAlign.cpp $(BAMTOOLS_ROOT)/lib/libbamtools.a
- $(CXX) $(CFLAGS) $(INCLUDE) -c LeftAlign.cpp
-IndelAllele.o: IndelAllele.cpp IndelAllele.h
- $(CXX) $(CFLAGS) $(INCLUDE) -c IndelAllele.cpp
-Variant.o: $(VCFLIB_ROOT)/src/Variant.h $(VCFLIB_ROOT)/src/Variant.cpp
- $(CXX) $(CFLAGS) $(INCLUDE) -c $(VCFLIB_ROOT)/src/Variant.cpp
-../vcflib/tabixpp/tabix.o: ../vcflib/tabixpp/tabix.hpp ../vcflib/tabixpp/tabix.cpp
-../vcflib/tabixpp/bgzf.o: ../vcflib/tabixpp/bgzf.c ../vcflib/tabixpp/bgzf.h
- cd ../vcflib/tabixpp && $(MAKE)
-../vcflib/smithwaterman/SmithWatermanGotoh.o: ../vcflib/smithwaterman/SmithWatermanGotoh.h ../vcflib/smithwaterman/SmithWatermanGotoh.cpp
- cd ../vcflib/smithwaterman && $(MAKE)
-## versioning system from BedTools
-## For freebayes developers (not users):
-## When you want to release (and tag) a new version, run:
-## $ make setversion VERSION=v2.17.2
-## This will:
-## 1. Update the "/src/utils/version/version_release.txt" file
-## 2. Commit the file
-## 3. Git-Tag the commit with the latest version
-.PHONY: setversion
- ifeq "$(VERSION)" ""
- $(error please set VERSION variable to the new version (e.g "make setversion VERSION=v2.17.2"))
- endif
- @echo "# This file was auto-generated by running \"make setversion VERSION=$(VERSION)\"" > "$(RELEASED_VERSION_FILE)"
- @echo "# on $$(date) ." >> "$(RELEASED_VERSION_FILE)"
- @echo "# Please do not edit or commit this file manually." >> "$(RELEASED_VERSION_FILE)"
- @echo "#" >> "$(RELEASED_VERSION_FILE)"
- @git commit -q -m "Setting Release-Version $(VERSION)"
- @git tag "$(VERSION)"
- @echo "Version updated to $(VERSION)."
- @echo ""
- @echo "Don't forget to push the commits AND the tags:"
- @echo " git push --all --tags"
- @echo ""
-## Automatic version detection
-## What's going on here?
-## 1. If there's a ".git" repository - use the version from the repository.
-## ignore any released-version file. git repository is authorative.
-## 2, If there's no ".git" repository,
-## get the "released" version number from the release-version file.
-## 3. Compare the detected version (from steps 1,2) to the current string
-## in ./src/utils/version/version_git.h .
-## If they differ, update the header file - will cause a recompilation
-## of version.o .
-.PHONY: autoversion
- @( \
- if [ -d "../.git" ] && which git > /dev/null ; then \
- DETECTED_VERSION=$$(git describe --always --tags --dirty) ; \
- else \
- fi ; \
- \
- [ -e "$(VERSION_FILE)" ] && CURRENT_VERSION=$$(grep "define VERSION_GIT " "$(VERSION_FILE)" | cut -f3 -d" " | sed 's/"//g') ; \
- \
- if [ "$${DETECTED_VERSION}" != "$${CURRENT_VERSION}" ] ; then \
- echo "Updating version file." ; \
- echo "#ifndef VERSION_GIT_H" > $(VERSION_FILE) ; \
- echo "#define VERSION_GIT_H" >> $(VERSION_FILE) ; \
- echo "#define VERSION_GIT \"$${DETECTED_VERSION}\"" >> $(VERSION_FILE) ; \
- echo "#endif /* VERSION_GIT_H */" >> $(VERSION_FILE) ; \
- fi )
- rm -rf *.o *.cgh *~ freebayes alleles ../bin/freebayes ../bin/alleles ../vcflib/*.o ../vcflib/tabixpp/*.{o,a}
- cd $(BAMTOOLS_ROOT)/build && make clean
- cd ../vcflib/smithwaterman && make clean
diff --git a/src/Marginals.cpp b/src/Marginals.cpp
deleted file mode 100644
index cebca4d..0000000
--- a/src/Marginals.cpp
+++ /dev/null
@@ -1,198 +0,0 @@
-#include "Marginals.h"
-// calculate marginals from the combos, store in results
-void marginalGenotypeLikelihoods(list<GenotypeCombo>& genotypeCombos, Results& results) {
- map<string, map<Genotype*, vector<long double> > > rawMarginals;
- // push the marginal likelihoods into the rawMarginals vectors in the results
- for (list<GenotypeCombo>::iterator gc = genotypeCombos.begin(); gc != genotypeCombos.end(); ++gc) {
- for (GenotypeCombo::const_iterator i = gc->begin(); i != gc->end(); ++i) {
- const SampleDataLikelihood& sdl = **i;
- rawMarginals[sdl.name][sdl.genotype].push_back(gc->posteriorProb);
- }
- }
- // safely add the raw marginal vectors using logsumexp
- for (Results::iterator r = results.begin(); r != results.end(); ++r) {
- ResultData& sample = r->second;
- map<Genotype*, vector<long double> >& rawmgs = rawMarginals[r->first];
- vector<long double> probs;
- for (map<Genotype*, vector<long double> >::iterator m = rawmgs.begin(); m != rawmgs.end(); ++m) {
- probs.push_back(logsumexp_probs(m->second));
- }
- long double normalizer = logsumexp_probs(probs);
- vector<long double>::iterator p = probs.begin();
- for (map<Genotype*, vector<long double> >::iterator m = rawmgs.begin(); m != rawmgs.end(); ++m, ++p) {
- sample.marginals[m->first] = *p - normalizer;
- }
- }
-// recompute data likelihoods using marginals from the combos
-// assumes that the genotype combos are in the same order as the likelihoods
-// assumes that the genotype combos are the same size as the number of samples in the likelihoods
-// returns the delta from the previous marginals, informative in the case of EM
-long double marginalGenotypeLikelihoods(list<GenotypeCombo>& genotypeCombos, SampleDataLikelihoods& likelihoods) {
- long double delta = 0;
- vector< map<Genotype*, long double> > rawMarginals;
- rawMarginals.resize(likelihoods.size());
- vector< map<Genotype*, long double> >::iterator rawMarginalsItr;
- // push the marginal likelihoods into the rawMarginals maps
- for (list<GenotypeCombo>::iterator gc = genotypeCombos.begin(); gc != genotypeCombos.end(); ++gc) {
- rawMarginalsItr = rawMarginals.begin();
- for (GenotypeCombo::const_iterator i = gc->begin(); i != gc->end(); ++i) {
- const SampleDataLikelihood& sdl = **i;
- map<Genotype*, long double>& rmgs = *rawMarginalsItr++;
- map<Genotype*, long double>::iterator rmgsItr = rmgs.find(sdl.genotype);
- if (rmgsItr == rmgs.end()) {
- rmgs[sdl.genotype] = gc->posteriorProb;
- } else {
- //vector<long double> x;
- //x.push_back(rmgsItr->second); x.push_back(gc->posteriorProb);
- //rmgs[sdl.genotype] = logsumexp_probs(x);
- rmgs[sdl.genotype] = log(safe_exp(rmgsItr->second) + safe_exp(gc->posteriorProb));
- }
- }
- }
- // safely add the raw marginal vectors using logsumexp
- // and use to update the sample data likelihoods
- rawMarginalsItr = rawMarginals.begin();
- long double minAllowedMarginal = -1e-16;
- for (SampleDataLikelihoods::iterator s = likelihoods.begin(); s != likelihoods.end(); ++s) {
- vector<SampleDataLikelihood>& sdls = *s;
- const map<Genotype*, long double>& rawmgs = *rawMarginalsItr++;
- map<Genotype*, long double> marginals;
- vector<long double> rawprobs;
- for (map<Genotype*, long double>::const_iterator m = rawmgs.begin(); m != rawmgs.end(); ++m) {
- long double p = m->second;
- marginals[m->first] = p;
- rawprobs.push_back(p);
- }
- long double normalizer = logsumexp_probs(rawprobs);
- for (vector<SampleDataLikelihood>::iterator sdl = sdls.begin(); sdl != sdls.end(); ++sdl) {
- long double newmarginal = marginals[sdl->genotype] - normalizer;
- delta += newmarginal - sdl->marginal;
- // ensure the marginal is non-0 to guard against underflow
- sdl->marginal = min(minAllowedMarginal, newmarginal);
- }
- }
- return delta;
-void bestMarginalGenotypeCombo(GenotypeCombo& combo,
- Results& results,
- SampleDataLikelihoods& samples,
- long double theta,
- bool pooled,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar) {
- for (SampleDataLikelihoods::iterator s = samples.begin(); s != samples.end(); ++s) {
- vector<SampleDataLikelihood>& sdls = *s;
- const string& name = sdls.front().name;
- const map<Genotype*, long double>& marginals = results[name].marginals;;
- map<Genotype*, long double>::const_iterator m = marginals.begin();
- long double bestMarginalProb = m->second;
- Genotype* bestMarginalGenotype = m->first;
- ++m;
- for (; m != marginals.end(); ++m) {
- if (abs(m->second) < abs(bestMarginalProb)) {
- bestMarginalProb = m->second;
- bestMarginalGenotype = m->first;
- }
- }
- for (vector<SampleDataLikelihood>::iterator d = sdls.begin(); d != sdls.end(); ++d) {
- if (bestMarginalGenotype == d->genotype) {
- combo.push_back(&*d);
- break;
- }
- }
- }
- combo.init(binomialObsPriors);
- combo.calculatePosteriorProbability(theta, pooled, permute, hwePriors, binomialObsPriors,
- alleleBalancePriors, diffusionPriorScalar);
-long double balancedMarginalGenotypeLikelihoods(list<GenotypeCombo>& genotypeCombos, SampleDataLikelihoods& likelihoods) {
- long double delta = 0;
- //map<string, map<Genotype*, vector<long double> > > rawMarginals;
- vector< map<Genotype*, vector<long double> > > rawMarginals;
- rawMarginals.resize(likelihoods.size());
- vector< map<Genotype*, vector<long double> > >::iterator rawMarginalsItr;
- // push the marginal likelihoods into the rawMarginals maps
- for (list<GenotypeCombo>::iterator gc = genotypeCombos.begin(); gc != genotypeCombos.end(); ++gc) {
- if (gc->isHomozygous()) {
- rawMarginalsItr = rawMarginals.begin();
- for (GenotypeCombo::const_iterator i = gc->begin(); i != gc->end(); ++i) {
- const SampleDataLikelihood& sdl = **i;
- map<Genotype*, vector<long double> >& rmgs = *rawMarginalsItr++;
- rmgs[sdl.genotype].push_back(gc->posteriorProb);
- }
- } else {
- bool isComboKing = true;
- rawMarginalsItr = rawMarginals.begin();
- for (GenotypeCombo::const_iterator i = gc->begin(); i != gc->end(); ++i) {
- const SampleDataLikelihood& sdl = **i;
- if (sdl.rank != 0) {
- isComboKing = false;
- map<Genotype*, vector<long double> >& rmgs = *rawMarginalsItr;
- rmgs[sdl.genotype].push_back(gc->posteriorProb);
- }
- ++rawMarginalsItr;
- }
- if (isComboKing) {
- rawMarginalsItr = rawMarginals.begin();
- for (GenotypeCombo::const_iterator i = gc->begin(); i != gc->end(); ++i) {
- const SampleDataLikelihood& sdl = **i;
- map<Genotype*, vector<long double> >& rmgs = *rawMarginalsItr++;
- rmgs[sdl.genotype].push_back(gc->posteriorProb);
- }
- }
- }
- }
- // safely add the raw marginal vectors using logsumexp
- rawMarginalsItr = rawMarginals.begin();
- for (SampleDataLikelihoods::iterator s = likelihoods.begin(); s != likelihoods.end(); ++s) {
- vector<SampleDataLikelihood>& sdls = *s;
- const map<Genotype*, vector<long double> >& rawmgs = *rawMarginalsItr++;
- map<Genotype*, long double> marginals;
- vector<long double> rawprobs;
- for (map<Genotype*, vector<long double> >::const_iterator m = rawmgs.begin(); m != rawmgs.end(); ++m) {
- long double p = logsumexp_probs(m->second);
- marginals[m->first] = p;
- rawprobs.push_back(p);
- }
- long double normalizer = logsumexp_probs(rawprobs);
- for (vector<SampleDataLikelihood>::iterator sdl = sdls.begin(); sdl != sdls.end(); ++sdl) {
- long double newmarginal = marginals[sdl->genotype] - normalizer;
- delta += newmarginal - sdl->marginal;
- sdl->marginal = newmarginal;
- }
- }
- return delta;
diff --git a/src/Marginals.h b/src/Marginals.h
deleted file mode 100644
index 6c39bba..0000000
--- a/src/Marginals.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#ifndef __MARGINALS_H
-#define __MARGINALS_H
-#include <vector>
-#include <map>
-#include "Genotype.h"
-#include "ResultData.h"
-#include "Utility.h"
-using namespace std;
-//void marginalGenotypeLikelihoods(list<GenotypeCombo>& genotypeCombos, Results& results);
-long double marginalGenotypeLikelihoods(list<GenotypeCombo>& genotypeCombos, SampleDataLikelihoods& likelihoods);
-void bestMarginalGenotypeCombo(GenotypeCombo& combo,
- Results& results,
- SampleDataLikelihoods& samples,
- long double theta,
- bool pooled,
- bool permute,
- bool hwePriors,
- bool binomialObsPriors,
- bool alleleBalancePriors,
- long double diffusionPriorScalar);
-long double balancedMarginalGenotypeLikelihoods(list<GenotypeCombo>& genotypeCombos, SampleDataLikelihoods& likelihoods);
diff --git a/src/Multinomial.cpp b/src/Multinomial.cpp
deleted file mode 100644
index 7453afb..0000000
--- a/src/Multinomial.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-#include "Multinomial.h"
-#include "Sum.h"
-#include "Product.h"
-long double multinomialSamplingProb(const vector<long double>& probs, const vector<int>& obs) {
- vector<long double> factorials;
- vector<long double> probsPowObs;
- factorials.resize(obs.size());
- transform(obs.begin(), obs.end(), factorials.begin(), factorial);
- vector<long double>::const_iterator p = probs.begin();
- vector<int>::const_iterator o = obs.begin();
- for (; p != probs.end() && o != obs.end(); ++p, ++o) {
- probsPowObs.push_back(pow(*p, *o));
- }
- return factorial(sum(obs)) / product(factorials) * product(probsPowObs);
-// TODO rename to reflect the fact that this is the multinomial sampling
-// probability for obs counts given probs probabilities
-long double multinomialSamplingProbLn(const vector<long double>& probs, const vector<int>& obs) {
- vector<long double> factorials;
- vector<long double> probsPowObs;
- factorials.resize(obs.size());
- transform(obs.begin(), obs.end(), factorials.begin(), factorialln);
- vector<long double>::const_iterator p = probs.begin();
- vector<int>::const_iterator o = obs.begin();
- for (; p != probs.end() && o != obs.end(); ++p, ++o) {
- probsPowObs.push_back(powln(log(*p), *o));
- }
- return factorialln(sum(obs)) - sum(factorials) + sum(probsPowObs);
-long double multinomialCoefficientLn(int n, const vector<int>& counts) {
- vector<long double> count_factorials;
- count_factorials.resize(counts.size());
- transform(counts.begin(), counts.end(), count_factorials.begin(), factorialln);
- return factorialln(n) - sum(count_factorials);
-long double samplingProbLn(const vector<long double>& probs, const vector<int>& obs) {
- vector<long double>::const_iterator p = probs.begin();
- vector<int>::const_iterator o = obs.begin();
- long double r = 0;
- for (; p != probs.end() && o != obs.end(); ++p, ++o) {
- r += powln(log(*p), *o);
- }
- return r;
diff --git a/src/Multinomial.h b/src/Multinomial.h
deleted file mode 100644
index e2ab0bd..0000000
--- a/src/Multinomial.h
+++ /dev/null
@@ -1,13 +0,0 @@
-#ifndef __MULTINOMIAL_H
-#define __MULTINOMIAL_H
-#include "Utility.h"
-#include <vector>
-long double multinomialSamplingProb(const vector<long double>& probs, const vector<int>& obs);
-long double multinomialSamplingProbLn(const vector<long double>& probs, const vector<int>& obs);
-long double multinomialCoefficientLn(int n, const vector<int>& counts);
-long double samplingProbLn(const vector<long double>& probs, const vector<int>& obs);
diff --git a/src/NonCall.cpp b/src/NonCall.cpp
deleted file mode 100644
index e9a7fe4..0000000
--- a/src/NonCall.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-#include "NonCall.h"
-NonCall NonCalls::aggregateAll(void) {
- NonCall aggregate;
- bool first = true;
- for (NonCalls::const_iterator nc = this->begin(); nc != this->end(); ++nc) {
- for (map<long, map<string, NonCall> >::const_iterator p = nc->second.begin();
- p != nc->second.end(); ++p) {
- for (map<string, NonCall>::const_iterator s = p->second.begin();
- s != p->second.end(); ++s) {
- const NonCall& nonCall = s->second;
- aggregate.refCount += nonCall.refCount;
- aggregate.altCount += nonCall.altCount;
- aggregate.reflnQ += nonCall.reflnQ;
- aggregate.altlnQ += nonCall.altlnQ;
- if (first) {
- aggregate.minDepth = nonCall.refCount + nonCall.altCount;
- first = false;
- } else {
- aggregate.minDepth = min(aggregate.minDepth, nonCall.refCount + nonCall.altCount);
- }
- }
- }
- }
- return aggregate;
-void NonCalls::aggregatePerSample(map<string, NonCall>& perSample) {
- set<string> seen;
- for (NonCalls::const_iterator nc = this->begin(); nc != this->end(); ++nc) {
- for (map<long, map<string, NonCall> >::const_iterator p = nc->second.begin();
- p != nc->second.end(); ++p) {
- for (map<string, NonCall>::const_iterator s = p->second.begin();
- s != p->second.end(); ++s) {
- const string& name = s->first;
- const NonCall& nonCall = s->second;
- NonCall& aggregate = perSample[name];
- aggregate.refCount += nonCall.refCount;
- aggregate.altCount += nonCall.altCount;
- aggregate.reflnQ += nonCall.reflnQ;
- aggregate.altlnQ += nonCall.altlnQ;
- if (!seen.count(name)) {
- aggregate.minDepth = nonCall.refCount + nonCall.altCount;
- seen.insert(name);
- } else {
- aggregate.minDepth = min(aggregate.minDepth, nonCall.refCount + nonCall.altCount);
- }
- }
- }
- }
-void NonCalls::record(const string& seqName, long pos, const Samples& samples) {
- map<string, NonCall>& site = (*this)[seqName][pos];
- for (Samples::const_iterator s = samples.begin(); s != samples.end(); ++s) {
- // tally ref and non-ref alleles
- const string& name = s->first;
- const Sample& sample = s->second;
- NonCall& noncall = site[name];
- for (Sample::const_iterator a = sample.begin(); a != sample.end(); ++a) {
- const vector<Allele*>& alleles = a->second;
- for (vector<Allele*>::const_iterator o = alleles.begin(); o != alleles.end(); ++o) {
- Allele& allele = **o;
- if (allele.isReference()) {
- ++noncall.refCount;
- noncall.reflnQ += allele.lnquality;
- } else {
- ++noncall.altCount;
- noncall.altlnQ += allele.lnquality;
- }
- }
- }
- }
-pair<string, long> NonCalls::firstPos(void) {
- const string& startChrom = begin()->first;
- long startPos = begin()->second.begin()->first;
- return make_pair(startChrom, startPos);
-pair<string, long> NonCalls::lastPos(void) {
- const string& endChrom = rbegin()->first;
- long endPos = rbegin()->second.rbegin()->first;
- return make_pair(endChrom, endPos);
diff --git a/src/NonCall.h b/src/NonCall.h
deleted file mode 100644
index c1715a1..0000000
--- a/src/NonCall.h
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef __NONCALL_H
-#define __NONCALL_H
-#include <string>
-#include <vector>
-#include <set>
-#include <map>
-#include <utility>
-#include "Utility.h"
-#include "Allele.h"
-#include "Sample.h"
-using namespace std;
-class NonCall {
- NonCall(void)
- : refCount(0)
- , reflnQ(0)
- , altCount(0)
- , altlnQ(0)
- , minDepth(0)
- { }
- NonCall(int rc, long double rq, int ac, long double aq, int mdp)
- : refCount(rc)
- , reflnQ(rq)
- , altCount(ac)
- , altlnQ(aq)
- , minDepth(mdp)
- { }
- int refCount;
- int altCount;
- int minDepth;
- long double reflnQ;
- long double altlnQ;
-class NonCalls : public map<string, map<long, map<string, NonCall> > > {
- void record(const string& seqName, long pos, const Samples& samples);
- NonCall aggregateAll(void);
- void aggregatePerSample(map<string, NonCall>& perSite);
- pair<string, long> firstPos(void);
- pair<string, long> lastPos(void);
diff --git a/src/Parameters.cpp b/src/Parameters.cpp
deleted file mode 100644
index 3dfbf51..0000000
--- a/src/Parameters.cpp
+++ /dev/null
@@ -1,1105 +0,0 @@
-#include "Parameters.h"
-#include "convert.h"
-using namespace std;
-void Parameters::simpleUsage(char ** argv) {
- cout
- << "usage: " << argv[0] << " -f [REFERENCE] [OPTIONS] [BAM FILES] >[OUTPUT]" << endl
- << endl
- << "Bayesian haplotype-based polymorphism discovery." << endl
- << endl
- << "parameters:" << endl
- << endl
- << " -h --help For a complete description of options." << endl
- << endl
- << "citation: Erik Garrison, Gabor Marth" << endl
- << " \"Haplotype-based variant detection from short-read sequencing\"" << endl
- << " arXiv:1207.3907 (http://arxiv.org/abs/1207.3907)" << endl
- << endl
- << "author: Erik Garrison <erik.garrison at bc.edu>, Marth Lab, Boston College, 2010-2014" << endl
- << "version: " << VERSION_GIT << endl;
-void Parameters::usage(char** argv) {
- cout
- << "usage: " << argv[0] << " [OPTION] ... [BAM FILE] ... " << endl
- << endl
- << "Bayesian haplotype-based polymorphism discovery." << endl
- << endl
- << "citation: Erik Garrison, Gabor Marth" << endl
- << " \"Haplotype-based variant detection from short-read sequencing\"" << endl
- << " arXiv:1207.3907 (http://arxiv.org/abs/1207.3907)" << endl
- << endl
- << "overview:" << endl
- << endl
- << " To call variants from aligned short-read sequencing data, supply BAM files and" << endl
- << " a reference. FreeBayes will provide VCF output on standard out describing SNPs," << endl
- << " indels, and complex variants in samples in the input alignments." << endl
- << endl
- << " By default, FreeBayes will consider variants supported by at least 2" << endl
- << " observations in a single sample (-C) and also by at least 20% of the reads from" << endl
- << " a single sample (-F). These settings are suitable to low to high depth" << endl
- << " sequencing in haploid and diploid samples, but users working with polyploid or" << endl
- << " pooled samples may wish to adjust them depending on the characteristics of" << endl
- << " their sequencing data." << endl
- << endl
- << " FreeBayes is capable of calling variant haplotypes shorter than a read length" << endl
- << " where multiple polymorphisms segregate on the same read. The maximum distance" << endl
- << " between polymorphisms phased in this way is determined by the" << endl
- << " --max-complex-gap, which defaults to 3bp. In practice, this can comfortably be" << endl
- << " set to half the read length." << endl
- << endl
- << " Ploidy may be set to any level (-p), but by default all samples are assumed to" << endl
- << " be diploid. FreeBayes can model per-sample and per-region variation in" << endl
- << " copy-number (-A) using a copy-number variation map." << endl
- << endl
- << " FreeBayes can act as a frequency-based pooled caller and describe variants" << endl
- << " and haplotypes in terms of observation frequency rather than called genotypes." << endl
- << " To do so, use --pooled-continuous and set input filters to a suitable level." << endl
- << " Allele observation counts will be described by AO and RO fields in the VCF output." << endl
- << endl
- << endl
- << "examples:" << endl
- << endl
- << " # call variants assuming a diploid sample" << endl
- << " freebayes -f ref.fa aln.bam >var.vcf" << endl
- << endl
- << " # call variants assuming a diploid sample, providing gVCF output" << endl
- << " freebayes -f ref.fa --gvcf aln.bam >var.gvcf" << endl
- << endl
- << " # require at least 5 supporting observations to consider a variant" << endl
- << " freebayes -f ref.fa -C 5 aln.bam >var.vcf" << endl
- << endl
- << " # use a different ploidy" << endl
- << " freebayes -f ref.fa -p 4 aln.bam >var.vcf" << endl
- << endl
- << " # assume a pooled sample with a known number of genome copies" << endl
- << " freebayes -f ref.fa -p 20 --pooled-discrete aln.bam >var.vcf" << endl
- << endl
- << " # generate frequency-based calls for all variants passing input thresholds" << endl
- << " freebayes -f ref.fa -F 0.01 -C 1 --pooled-continuous aln.bam >var.vcf" << endl
- << endl
- << " # use an input VCF (bgzipped + tabix indexed) to force calls at particular alleles" << endl
- << " freebayes -f ref.fa -@ in.vcf.gz aln.bam >var.vcf" << endl
- << endl
- << " # generate long haplotype calls over known variants" << endl
- << " freebayes -f ref.fa --haplotype-basis-alleles in.vcf.gz \\ " << endl
- << " --haplotype-length 50 aln.bam" << endl
- << endl
- << " # naive variant calling: simply annotate observation counts of SNPs and indels" << endl
- << " freebayes -f ref.fa --haplotype-length 0 --min-alternate-count 1 \\ " << endl
- << " --min-alternate-fraction 0 --pooled-continuous --report-monomorphic >var.vcf" << endl
- << endl
- << endl
- << "parameters:" << endl
- << endl
- << " -h --help Prints this help dialog." << endl
- << " --version Prints the release number and the git commit id." << endl
- << endl
- << "input:" << endl
- << endl
- << " -b --bam FILE Add FILE to the set of BAM files to be analyzed." << endl
- << " -L --bam-list FILE" << endl
- << " A file containing a list of BAM files to be analyzed." << endl
- << " -c --stdin Read BAM input on stdin." << endl
- << " -f --fasta-reference FILE" << endl
- << " Use FILE as the reference sequence for analysis." << endl
- << " An index file (FILE.fai) will be created if none exists." << endl
- << " If neither --targets nor --region are specified, FreeBayes" << endl
- << " will analyze every position in this reference." << endl
- << " -t --targets FILE" << endl
- << " Limit analysis to targets listed in the BED-format FILE." << endl
- << " -r --region <chrom>:<start_position>-<end_position>" << endl
- << " Limit analysis to the specified region, 0-base coordinates," << endl
- << " end_position not included (same as BED format)." << endl
- << " Either '-' or '..' maybe used as a separator." << endl
- << " -s --samples FILE" << endl
- << " Limit analysis to samples listed (one per line) in the FILE." << endl
- << " By default FreeBayes will analyze all samples in its input" << endl
- << " BAM files." << endl
- << " --populations FILE" << endl
- << " Each line of FILE should list a sample and a population which" << endl
- << " it is part of. The population-based bayesian inference model" << endl
- << " will then be partitioned on the basis of the populations." << endl
- << " -A --cnv-map FILE" << endl
- << " Read a copy number map from the BED file FILE, which has" << endl
- << " the format:" << endl
- << " reference sequence, start, end, sample name, copy number" << endl
- << " ... for each region in each sample which does not have the" << endl
- << " default copy number as set by --ploidy." << endl
- << endl
- << "output:" << endl
- << endl
- << " -v --vcf FILE Output VCF-format results to FILE. (default: stdout)" << endl
- << " --gvcf" << endl
- << " Write gVCF output, which indicates coverage in uncalled regions." << endl
- << " --gvcf-chunk NUM" << endl
- << " When writing gVCF output emit a record for every NUM bases." << endl
- << " -@ --variant-input VCF" << endl
- << " Use variants reported in VCF file as input to the algorithm." << endl
- << " Variants in this file will included in the output even if" << endl
- << " there is not enough support in the data to pass input filters." << endl
- << " -l --only-use-input-alleles" << endl
- << " Only provide variant calls and genotype likelihoods for sites" << endl
- << " and alleles which are provided in the VCF input, and provide" << endl
- << " output in the VCF for all input alleles, not just those which" << endl
- << " have support in the data." << endl
- << " --haplotype-basis-alleles VCF" << endl
- << " When specified, only variant alleles provided in this input" << endl
- << " VCF will be used for the construction of complex or haplotype" << endl
- << " alleles." << endl
- << " --report-all-haplotype-alleles" << endl
- << " At sites where genotypes are made over haplotype alleles," << endl
- << " provide information about all alleles in output, not only" << endl
- << " those which are called." << endl
- << " --report-monomorphic" << endl
- << " Report even loci which appear to be monomorphic, and report all" << endl
- << " considered alleles, even those which are not in called genotypes." << endl
- << " Loci which do not have any potential alternates have '.' for ALT." << endl
- << " -P --pvar N Report sites if the probability that there is a polymorphism" << endl
- << " at the site is greater than N. default: 0.0. Note that post-" << endl
- << " filtering is generally recommended over the use of this parameter." << endl
- << endl
- << "population model:" << endl
- << endl
- << " -T --theta N The expected mutation rate or pairwise nucleotide diversity" << endl
- << " among the population under analysis. This serves as the" << endl
- << " single parameter to the Ewens Sampling Formula prior model" << endl
- << " default: 0.001" << endl
- << " -p --ploidy N Sets the default ploidy for the analysis to N. default: 2" << endl
- << " -J --pooled-discrete" << endl
- << " Assume that samples result from pooled sequencing." << endl
- << " Model pooled samples using discrete genotypes across pools." << endl
- << " When using this flag, set --ploidy to the number of" << endl
- << " alleles in each sample or use the --cnv-map to define" << endl
- << " per-sample ploidy." << endl
- << " -K --pooled-continuous" << endl
- << " Output all alleles which pass input filters, regardles of" << endl
- << " genotyping outcome or model." << endl
- << endl
- << "reference allele:" << endl
- << endl
- << " -Z --use-reference-allele" << endl
- << " This flag includes the reference allele in the analysis as" << endl
- << " if it is another sample from the same population." << endl
- << " --reference-quality MQ,BQ" << endl
- << " Assign mapping quality of MQ to the reference allele at each" << endl
- << " site and base quality of BQ. default: 100,60" << endl
- << endl
- << "allele scope:" << endl
- << endl
- << " -I --no-snps Ignore SNP alleles." << endl
- << " -i --no-indels Ignore insertion and deletion alleles." << endl
- << " -X --no-mnps Ignore multi-nuceotide polymorphisms, MNPs." << endl
- << " -u --no-complex Ignore complex events (composites of other classes)." << endl
- << " -n --use-best-n-alleles N" << endl
- << " Evaluate only the best N SNP alleles, ranked by sum of" << endl
- << " supporting quality scores. (Set to 0 to use all; default: all)" << endl
- << " -E --max-complex-gap N" << endl
- << " --haplotype-length N" << endl
- << " Allow haplotype calls with contiguous embedded matches of up" << endl
- << " to this length. (default: 3)" << endl
- << " --min-repeat-size N" << endl
- << " When assembling observations across repeats, require the total repeat" << endl
- << " length at least this many bp. (default: 5)" << endl
- << " --min-repeat-entropy N" << endl
- << " To detect interrupted repeats, build across sequence until it has" << endl
- << " entropy > N bits per bp. (default: 0, off)" << endl
- << " --no-partial-observations" << endl
- << " Exclude observations which do not fully span the dynamically-determined" << endl
- << " detection window. (default, use all observations, dividing partial" << endl
- << " support across matching haplotypes when generating haplotypes.)" << endl
- << endl
- << "indel realignment:" << endl
- << endl
- << " -O --dont-left-align-indels" << endl
- << " Turn off left-alignment of indels, which is enabled by default." << endl
- << endl
- << "input filters:" << endl
- << endl
- << " -4 --use-duplicate-reads" << endl
- << " Include duplicate-marked alignments in the analysis." << endl
- << " default: exclude duplicates marked as such in alignments" << endl
- << " -m --min-mapping-quality Q" << endl
- << " Exclude alignments from analysis if they have a mapping" << endl
- << " quality less than Q. default: 1" << endl
- << " -q --min-base-quality Q" << endl
- << " Exclude alleles from analysis if their supporting base" << endl
- << " quality is less than Q. default: 0" << endl
- << " -R --min-supporting-allele-qsum Q" << endl
- << " Consider any allele in which the sum of qualities of supporting" << endl
- << " observations is at least Q. default: 0" << endl
- << " -Y --min-supporting-mapping-qsum Q" << endl
- << " Consider any allele in which and the sum of mapping qualities of" << endl
- << " supporting reads is at least Q. default: 0" << endl
- << " -Q --mismatch-base-quality-threshold Q" << endl
- << " Count mismatches toward --read-mismatch-limit if the base" << endl
- << " quality of the mismatch is >= Q. default: 10" << endl
- << " -U --read-mismatch-limit N" << endl
- << " Exclude reads with more than N mismatches where each mismatch" << endl
- << " has base quality >= mismatch-base-quality-threshold." << endl
- << " default: ~unbounded" << endl
- << " -z --read-max-mismatch-fraction N" << endl
- << " Exclude reads with more than N [0,1] fraction of mismatches where" << endl
- << " each mismatch has base quality >= mismatch-base-quality-threshold" << endl
- << " default: 1.0" << endl
- << " -$ --read-snp-limit N" << endl
- << " Exclude reads with more than N base mismatches, ignoring gaps" << endl
- << " with quality >= mismatch-base-quality-threshold." << endl
- << " default: ~unbounded" << endl
- << " -e --read-indel-limit N" << endl
- << " Exclude reads with more than N separate gaps." << endl
- << " default: ~unbounded" << endl
- << " -0 --standard-filters Use stringent input base and mapping quality filters" << endl
- << " Equivalent to -m 30 -q 20 -R 0 -S 0" << endl
- << " -F --min-alternate-fraction N" << endl
- << " Require at least this fraction of observations supporting" << endl
- << " an alternate allele within a single individual in the" << endl
- << " in order to evaluate the position. default: 0.2" << endl
- << " -C --min-alternate-count N" << endl
- << " Require at least this count of observations supporting" << endl
- << " an alternate allele within a single individual in order" << endl
- << " to evaluate the position. default: 2" << endl
- << " -3 --min-alternate-qsum N" << endl
- << " Require at least this sum of quality of observations supporting" << endl
- << " an alternate allele within a single individual in order" << endl
- << " to evaluate the position. default: 0" << endl
- << " -G --min-alternate-total N" << endl
- << " Require at least this count of observations supporting" << endl
- << " an alternate allele within the total population in order" << endl
- << " to use the allele in analysis. default: 1" << endl
- << " --min-coverage N" << endl
- << " Require at least this coverage to process a site. default: 0" << endl
- << " --max-coverage N" << endl
- << " Do not process sites with greater than this coverage. default: no limit" << endl
- << endl
- << "population priors:" << endl
- << endl
- << " -k --no-population-priors" << endl
- << " Equivalent to --pooled-discrete --hwe-priors-off and removal of" << endl
- << " Ewens Sampling Formula component of priors." << endl
- << endl
- << "mappability priors:" << endl
- << endl
- << " -w --hwe-priors-off" << endl
- << " Disable estimation of the probability of the combination" << endl
- << " arising under HWE given the allele frequency as estimated" << endl
- << " by observation frequency." << endl
- << " -V --binomial-obs-priors-off" << endl
- << " Disable incorporation of prior expectations about observations." << endl
- << " Uses read placement probability, strand balance probability," << endl
- << " and read position (5'-3') probability." << endl
- << " -a --allele-balance-priors-off" << endl
- << " Disable use of aggregate probability of observation balance between alleles" << endl
- << " as a component of the priors." << endl
- << endl
- << "genotype likelihoods:" << endl
- << endl
- << " --observation-bias FILE" << endl
- << " Read length-dependent allele observation biases from FILE." << endl
- << " The format is [length] [alignment efficiency relative to reference]" << endl
- << " where the efficiency is 1 if there is no relative observation bias." << endl
- << " --base-quality-cap Q" << endl
- << " Limit estimated observation quality by capping base quality at Q." << endl
- << " --prob-contamination F" << endl
- << " An estimate of contamination to use for all samples. default: 10e-9" << endl
- << " --legacy-gls Use legacy (polybayes equivalent) genotype likelihood calculations" << endl
- << " --contamination-estimates FILE" << endl
- << " A file containing per-sample estimates of contamination, such as" << endl
- << " those generated by VerifyBamID. The format should be:" << endl
- << " sample p(read=R|genotype=AR) p(read=A|genotype=AA)" << endl
- << " Sample '*' can be used to set default contamination estimates." << endl
- << endl
- << "algorithmic features:" << endl
- << endl
- << " --report-genotype-likelihood-max" << endl
- << " Report genotypes using the maximum-likelihood estimate provided" << endl
- << " from genotype likelihoods." << endl
- << " -B --genotyping-max-iterations N" << endl
- << " Iterate no more than N times during genotyping step. default: 1000." << endl
- << " --genotyping-max-banddepth N" << endl
- << " Integrate no deeper than the Nth best genotype by likelihood when" << endl
- << " genotyping. default: 6." << endl
- << " -W --posterior-integration-limits N,M" << endl
- << " Integrate all genotype combinations in our posterior space" << endl
- << " which include no more than N samples with their Mth best" << endl
- << " data likelihood. default: 1,3." << endl
- << " -N --exclude-unobserved-genotypes" << endl
- << " Skip sample genotypings for which the sample has no supporting reads." << endl
- << " -S --genotype-variant-threshold N" << endl
- << " Limit posterior integration to samples where the second-best" << endl
- << " genotype likelihood is no more than log(N) from the highest" << endl
- << " genotype likelihood for the sample. default: ~unbounded" << endl
- << " -j --use-mapping-quality" << endl
- << " Use mapping quality of alleles when calculating data likelihoods." << endl
- << " -H --harmonic-indel-quality" << endl
- << " Use a weighted sum of base qualities around an indel, scaled by the" << endl
- << " distance from the indel. By default use a minimum BQ in flanking sequence." << endl
- << " -D --read-dependence-factor N" << endl
- << " Incorporate non-independence of reads by scaling successive" << endl
- << " observations by this factor during data likelihood" << endl
- << " calculations. default: 0.9" << endl
- << " -= --genotype-qualities" << endl
- << " Calculate the marginal probability of genotypes and report as GQ in" << endl
- << " each sample field in the VCF output." << endl
- << endl
- << "debugging:" << endl
- << endl
- << " -d --debug Print debugging output." << endl
- << " -dd Print more verbose debugging output (requires \"make DEBUG\")" << endl
- << endl
- << endl
- << "author: Erik Garrison <erik.garrison at bc.edu>, Marth Lab, Boston College, 2010-2014" << endl
- << "version: " << VERSION_GIT << endl;
-Parameters::Parameters(int argc, char** argv) {
- if (argc == 1) {
- simpleUsage(argv);
- exit(1);
- }
- // record command line parameters
- commandline = argv[0];
- for (int i = 1; i < argc; ++i) {
- commandline += " ";
- commandline += argv[i];
- }
- // set defaults
- // i/o parameters:
- useStdin = false; // -c --stdin
- fasta = ""; // -f --fasta-reference
- targets = ""; // -t --targets
- samples = ""; // -s --samples
- populationsFile = "";
- cnvFile = "";
- output = "vcf"; // -v --vcf
- outputFile = "";
- gVCFout = false;
- gVCFchunk = 0;
- alleleObservationBiasFile = "";
- // operation parameters
- useDuplicateReads = false; // -E --use-duplicate-reads
- suppressOutput = false; // -N --suppress-output
- useBestNAlleles = 0; // -n --use-best-n-alleles
- forceRefAllele = false; // -Z --use-reference-allele
- useRefAllele = false; // .....
- diploidReference = false; // -H --diploid-reference
- allowIndels = true; // -i --no-indels
- leftAlignIndels = true; // -O --dont-left-align-indels
- allowMNPs = true; // -X --no-mnps
- allowSNPs = true; // -I --no-snps
- allowComplex = true;
- maxComplexGap = 3;
- //maxHaplotypeLength = 100;
- minRepeatSize = 5;
- minRepeatEntropy = 0;
- usePartialObservations = true;
- pooledDiscrete = false; // -J --pooled
- pooledContinuous = false;
- ewensPriors = true;
- permute = true; // -K --permute
- useMappingQuality = false;
- useMinIndelQuality = true;
- obsBinomialPriors = true;
- hwePriors = true;
- alleleBalancePriors = true;
- excludeUnobservedGenotypes = false;
- excludePartiallyObservedGenotypes = false;
- genotypeVariantThreshold = 0;
- siteSelectionMaxIterations = 5;
- reportGenotypeLikelihoodMax = false;
- genotypingMaxIterations = 1000;
- genotypingMaxBandDepth = 7;
- minPairedAltCount = 0;
- minAltMeanMapQ = 0;
- limitGL = 0;
- reportAllHaplotypeAlleles = false;
- reportMonomorphic = false;
- boundIndels = true; // ignore indels at ends of reads
- onlyUseInputAlleles = false;
- standardGLs = false; // use experimental gls by default // XXX
- MQR = 100; // -M --reference-mapping-quality
- BQR = 60; // -B --reference-base-quality
- ploidy = 2; // -p --ploidy
- MQL0 = 1; // -m --min-mapping-quality
- BQL0 = 0; // -q --min-base-quality
- minSupportingAlleleQualitySum = 0;
- minSupportingMappingQualitySum = 0;
- BQL2 = 10; // -Q --mismatch-base-quality-threshold
- RMU = 10000000; // -U --read-mismatch-limit
- readMaxMismatchFraction = 1.0; // -z --read-max-mismatch-fraction
- readSnpLimit = 10000000; // -$ --read-snp-limit
- readIndelLimit = 10000000; // -e --read-indel-limit
- IDW = -1; // -x --indel-exclusion-window
- TH = 10e-3; // -T --theta
- PVL = 0.0; // -P --pvar
- RDF = 0.9; // -D --read-dependence-factor
- diffusionPriorScalar = 1.0; // -V --diffusion-prior-scalar
- WB = 1; // -W --posterior-integration-limits
- TB = 3;
- posteriorIntegrationDepth = 0;
- calculateMarginals = false;
- minAltFraction = 0.2; // require 20% of reads from sample to be supporting the same alternate to consider
- minAltCount = 2; // require 2 reads in same sample call
- minAltTotal = 1;
- minAltQSum = 0;
- baseQualityCap = 0;
- probContamination = 10e-9;
- //minAltQSumTotal = 0;
- minCoverage = 0;
- maxCoverage = 0;
- debuglevel = 0;
- debug = false;
- debug2 = false;
- showReferenceRepeats = false;
- int c; // counter for getopt
- static struct option long_options[] =
- {
- {"help", no_argument, 0, 'h'},
- {"version", no_argument, 0, '#'},
- {"bam", required_argument, 0, 'b'},
- {"bam-list", required_argument, 0, 'L'},
- {"stdin", no_argument, 0, 'c'},
- {"fasta-reference", required_argument, 0, 'f'},
- {"targets", required_argument, 0, 't'},
- {"region", required_argument, 0, 'r'},
- {"samples", required_argument, 0, 's'},
- {"populations", required_argument, 0, '2'},
- {"cnv-map", required_argument, 0, 'A'},
- {"vcf", required_argument, 0, 'v'},
- {"gvcf", no_argument, 0, '8'},
- {"gvcf-chunk", required_argument, 0, '&'},
- {"use-duplicate-reads", no_argument, 0, '4'},
- {"no-partial-observations", no_argument, 0, '['},
- {"use-best-n-alleles", required_argument, 0, 'n'},
- {"use-reference-allele", no_argument, 0, 'Z'},
- {"harmonic-indel-quality", no_argument, 0, 'H'},
- {"standard-filters", no_argument, 0, '0'},
- {"reference-quality", required_argument, 0, '1'},
- {"ploidy", required_argument, 0, 'p'},
- {"pooled-discrete", no_argument, 0, 'J'},
- {"pooled-continuous", no_argument, 0, 'K'},
- {"no-population-priors", no_argument, 0, 'k'},
- {"use-mapping-quality", no_argument, 0, 'j'},
- {"min-mapping-quality", required_argument, 0, 'm'},
- {"min-base-quality", required_argument, 0, 'q'},
- {"min-supporting-allele-qsum", required_argument, 0, 'R'},
- {"min-supporting-mapping-qsum", required_argument, 0, 'Y'},
- {"mismatch-base-quality-threshold", required_argument, 0, 'Q'},
- {"read-mismatch-limit", required_argument, 0, 'U'},
- {"read-max-mismatch-fraction", required_argument, 0, 'z'},
- {"read-snp-limit", required_argument, 0, '$'},
- {"read-indel-limit", required_argument, 0, 'e'},
- {"no-indels", no_argument, 0, 'i'},
- {"dont-left-align-indels", no_argument, 0, 'O'},
- {"no-mnps", no_argument, 0, 'X'},
- {"no-complex", no_argument, 0, 'u'},
- {"max-complex-gap", required_argument, 0, 'E'},
- {"haplotype-length", required_argument, 0, 'E'},
- {"min-repeat-size", required_argument, 0, 'E'},
- {"min-repeat-entropy", required_argument, 0, 'E'},
- {"no-snps", no_argument, 0, 'I'},
- {"indel-exclusion-window", required_argument, 0, 'x'},
- {"theta", required_argument, 0, 'T'},
- {"pvar", required_argument, 0, 'P'},
- {"read-dependence-factor", required_argument, 0, 'D'},
- {"binomial-obs-priors-off", no_argument, 0, 'V'},
- {"allele-balance-priors-off", no_argument, 0, 'a'},
- {"hwe-priors-off", no_argument, 0, 'w'},
- {"posterior-integration-limits", required_argument, 0, 'W'},
- {"min-alternate-fraction", required_argument, 0, 'F'},
- {"min-alternate-count", required_argument, 0, 'C'},
- //{"min-paired-alternate-count", required_argument, 0, 'Y'},
- {"observation-bias", required_argument, 0, '%'},
- {"min-alternate-total", required_argument, 0, 'G'},
- //{"min-alternate-mean-mapq", required_argument, 0, 'k'},
- {"min-alternate-qsum", required_argument, 0, '3'},
- {"min-coverage", required_argument, 0, '!'},
- {"max-coverage", required_argument, 0, '+'},
- {"genotype-qualities", no_argument, 0, '='},
- {"variant-input", required_argument, 0, '@'},
- {"only-use-input-alleles", no_argument, 0, 'l'},
- //{"show-reference-repeats", no_argument, 0, '_'},
- {"exclude-unobserved-genotypes", no_argument, 0, 'N'},
- {"genotype-variant-threshold", required_argument, 0, 'S'},
- {"site-selection-max-iterations", required_argument, 0, 'M'},
- {"genotyping-max-iterations", required_argument, 0, 'B'},
- {"genotyping-max-banddepth", required_argument, 0, '7'},
- {"haplotype-basis-alleles", required_argument, 0, '9'},
- {"report-genotype-likelihood-max", no_argument, 0, '5'},
- {"report-all-haplotype-alleles", no_argument, 0, '6'},
- {"base-quality-cap", required_argument, 0, '('},
- {"legacy-gls", no_argument, 0, ')'},
- {"prob-contamination", required_argument, 0, '_'},
- {"contamination-estimates", required_argument, 0, ','},
- {"report-monomorphic", no_argument, 0, '6'},
- {"debug", no_argument, 0, 'd'},
- {0, 0, 0, 0}
- };
- while (true) {
- int option_index = 0;
- c = getopt_long(argc, argv, "hcO4ZKjH[0diN5a)Ik=wl6#uVXJY:b:G:M:x:@:A:f:t:r:s:v:n:B:p:m:q:R:Q:U:$:e:T:P:D:^:S:W:F:C:&:L:8z:1:3:E:7:2:9:%:_:,:(:!:+:",
- long_options, &option_index);
- if (c == -1) // end of options
- break;
- switch (c) {
- // i/o parameters:
- // -b --bam
- case 'b':
- bams.push_back(optarg);
- break;
- // -c --stdin
- case 'c':
- useStdin = true;
- bams.push_back("stdin");
- break;
- // -f --fasta-reference
- case 'f':
- fasta = optarg;
- break;
- // -t --targets
- case 't':
- targets = optarg;
- break;
- // -r --region
- case 'r':
- regions.push_back(optarg);
- break;
- // -s --samples
- case 's':
- samples = optarg;
- break;
- // --populations
- case '2':
- populationsFile = optarg;
- break;
- // -A --cnv-file
- case 'A':
- cnvFile = optarg;
- break;
- // -j --use-mapping-quality
- case 'j':
- useMappingQuality = true;
- break;
- // -v --vcf
- case 'v':
- output = "vcf";
- outputFile = optarg;
- break;
- // -O --dont-left-align-indels
- case 'O':
- leftAlignIndels = false;
- break;
- // --bam-list
- case 'L':
- addLinesFromFile(bams, string(optarg));
- break;
- // -8 --gvcf
- case '8':
- gVCFout = true;
- break;
- case '&':
- gVCFchunk = atoi(optarg);
- break;
- // -4 --use-duplicate-reads
- case '4':
- useDuplicateReads = true;
- break;
- // -3 --min-alternate-qsum
- case '3':
- if (!convert(optarg, minAltQSum)) {
- cerr << "could not parse min-alternate-qsum" << endl;
- exit(1);
- }
- break;
- // -G --min-alternate-total
- case 'G':
- if (!convert(optarg, minAltTotal)) {
- cerr << "could not parse min-alternate-total" << endl;
- exit(1);
- }
- break;
- // -! --min-coverage
- case '!':
- if (!convert(optarg, minCoverage)) {
- cerr << "could not parse min-coverage" << endl;
- exit(1);
- }
- break;
- // -+ --max-coverage
- case '+':
- if (!convert(optarg, maxCoverage)) {
- cerr << "could not parse max-coverage" << endl;
- exit(1);
- }
- break;
- // -n --use-best-n-alleles
- case 'n':
- if (!convert(optarg, useBestNAlleles)) {
- cerr << "could not parse use-best-n-alleles" << endl;
- exit(1);
- }
- break;
- // -Z --use-reference-allele
- case 'Z':
- forceRefAllele = true;
- useRefAllele = true;
- break;
- // -H --harmonic-indel-quality
- case 'H':
- useMinIndelQuality = false;
- break;
- // -0 --standard-filters
- case '0':
- MQL0 = 30;
- BQL0 = 20;
- break;
- // -M --expectation-maximization
- case 'M':
- if (!convert(optarg, siteSelectionMaxIterations)) {
- cerr << "could not parse site-selection-max-iterations" << endl;
- exit(1);
- }
- break;
- case 'u':
- allowComplex = false;
- break;
- case 'E':
- {
- string arg(argv[optind - 2]);
- if (arg == "--min-repeat-size") {
- if (!convert(optarg, minRepeatSize)) {
- cerr << "could not parse " << arg << endl;
- exit(1);
- }
- } else if (arg == "--min-repeat-entropy") {
- if (!convert(optarg, minRepeatEntropy)) {
- cerr << "could not parse " << arg << endl;
- exit(1);
- }
- } else {
- if (!convert(optarg, maxComplexGap)) {
- cerr << "could not parse maxComplexGap" << endl;
- exit(1);
- }
- }
- break;
- }
- // -B --genotyping-max-iterations
- case 'B':
- if (!convert(optarg, genotypingMaxIterations)) {
- cerr << "could not parse genotyping-max-iterations" << endl;
- exit(1);
- }
- break;
- // -7 --genotyping-max-banddepth
- case '7':
- if (!convert(optarg, genotypingMaxBandDepth)) {
- cerr << "could not parse genotyping-max-iterations" << endl;
- exit(1);
- }
- break;
- // -1 --reference-quality
- case '1':
- if (!convert(split(optarg, ",").front(), MQR)) {
- cerr << "could not parse reference mapping quality" << endl;
- exit(1);
- }
- if (!convert(split(optarg, ",").back(), BQR)) {
- cerr << "could not parse reference base quality" << endl;
- exit(1);
- }
- break;
- // -p --ploidy
- case 'p':
- if (!convert(optarg, ploidy)) {
- cerr << "could not parse ploidy" << endl;
- exit(1);
- }
- if (ploidy <= 0) {
- cerr << "cannot set ploidy to less than 1" << endl;
- exit(1);
- }
- break;
- case 'J':
- pooledDiscrete = true;
- hwePriors = false; // disable hwe sampling prob when using discrete pooling
- break;
- // -m --min-mapping-quality
- case 'm':
- if (!convert(optarg, MQL0)) {
- cerr << "could not parse min-base-quality" << endl;
- exit(1);
- }
- break;
- // -q --min-base-quality
- case 'q':
- if (!convert(optarg, BQL0)) {
- cerr << "could not parse min-base-quality" << endl;
- exit(1);
- }
- break;
- // -R --min-supporting-allele-qsum
- case 'R':
- if (!convert(optarg, minSupportingAlleleQualitySum)) {
- cerr << "could not parse min-supporting-allele-qsum" << endl;
- exit(1);
- }
- break;
- // -Y --min-supporting-mapping-quality
- case 'Y':
- if (!convert(optarg, minSupportingMappingQualitySum)) {
- cerr << "could not parse min-supporting-mapping-qsum" << endl;
- exit(1);
- }
- break;
- // -N --exclude-unobserved-genotypes
- case 'N':
- excludeUnobservedGenotypes = true;
- break;
- // -S --genotype-variant-threshold
- case 'S':
- if (!convert(optarg, genotypeVariantThreshold)) {
- cerr << "could not parse genotype-variant-threshold" << endl;
- exit(1);
- }
- break;
- case '5':
- reportGenotypeLikelihoodMax = true;
- break;
- // -Q --mismatch-base-quality-threshold
- case 'Q':
- if (!convert(optarg, BQL2)) {
- cerr << "could not parse mismatch-base-quality-threshold" << endl;
- exit(1);
- }
- break;
- // -U --read-mismatch-limit
- case 'U':
- if (!convert(optarg, RMU)) {
- cerr << "could not parse read-mismatch-limit" << endl;
- exit(1);
- }
- break;
- // -z --read-max-mismatch-fraction
- case 'z':
- if (!convert(optarg, readMaxMismatchFraction)) {
- cerr << "could not parse read-mismatch-limit" << endl;
- exit(1);
- }
- break;
- // -$ --read-snp-limit
- case '$':
- if (!convert(optarg, readSnpLimit)) {
- cerr << "could not parse read-snp-limit" << endl;
- exit(1);
- }
- break;
- // -e --read-indel-limit
- case 'e':
- if (!convert(optarg, readIndelLimit)) {
- cerr << "could not parse read-indel-limit" << endl;
- exit(1);
- }
- break;
- // -x --indel-exclusion-window
- case 'x':
- if (!convert(optarg, IDW)) {
- cerr << "could not parse indel-exclusion-window" << endl;
- exit(1);
- }
- break;
- // -i --indels
- case 'i':
- allowIndels = false;
- break;
- // -X --mnps
- case 'X':
- allowMNPs = false;
- break;
- // -I --no-snps
- case 'I':
- allowSNPs = false;
- break;
- // -T --theta
- case 'T':
- if (!convert(optarg, TH)) {
- cerr << "could not parse theta" << endl;
- exit(1);
- }
- break;
- // -P --pvar
- case 'P':
- if (!convert(optarg, PVL)) {
- cerr << "could not parse pvar" << endl;
- exit(1);
- }
- break;
- // -D --read-dependence-factor
- case 'D':
- if (!convert(optarg, RDF)) {
- cerr << "could not parse read-dependence-factor" << endl;
- exit(1);
- }
- break;
- // -% --observation-bias
- case '%':
- alleleObservationBiasFile = optarg;
- break;
- // observation priors
- case 'V':
- obsBinomialPriors = false;
- break;
- // allele balance
- case 'a':
- alleleBalancePriors = false;
- break;
- // hwe expectations
- case 'w':
- hwePriors = false;
- break;
- // -W --posterior-integration-limits
- case 'W':
- if (!convert(split(optarg, ",").front(), WB)) {
- cerr << "could not parse posterior-integration-limits (bandwidth)" << endl;
- exit(1);
- }
- if (!convert(split(optarg, ",").back(), TB)) {
- cerr << "could not parse posterior-integration-limits (banddepth)" << endl;
- exit(1);
- }
- break;
- // -F --min-alternate-fraction
- case 'F':
- if (!convert(optarg, minAltFraction)) {
- cerr << "could not parse min-alternate-fraction" << endl;
- exit(1);
- }
- break;
- // -C --min-alternate-count
- case 'C':
- if (!convert(optarg, minAltCount)) {
- cerr << "could not parse min-alternate-count" << endl;
- exit(1);
- }
- break;
- // -k --no-population-priors
- case 'k':
- pooledDiscrete = true;
- ewensPriors = false;
- hwePriors = false;
- break;
- case 'K':
- pooledContinuous = true;
- break;
- case '=':
- calculateMarginals = true;
- break;
- case '@':
- variantPriorsFile = optarg;
- break;
- case '9':
- haplotypeVariantFile = optarg;
- break;
- case 'l':
- onlyUseInputAlleles = true;
- break;
- case '6':
- {
- string arg(argv[optind - 1]);
- if (arg == "--report-monomorphic") {
- reportMonomorphic = true;
- }
- reportAllHaplotypeAlleles = true;
- }
- break;
- case '[':
- usePartialObservations = false;
- break;
- case '_':
- if (!convert(optarg, probContamination)) {
- cerr << "could not parse prob-contamination" << endl;
- exit(1);
- }
- break;
- case ',':
- contaminationEstimateFile = optarg;
- break;
- case ')':
- standardGLs = true;
- break;
- case '(':
- if (!convert(optarg, baseQualityCap)) {
- cerr << "could not parse base-quality-cap" << endl;
- exit(1);
- }
- break;
- // -d --debug
- case 'd':
- ++debuglevel;
- break;
- case '#':
- // --version
- cout << "version: " << VERSION_GIT << endl;
- exit(0);
- break;
- case 'h':
- usage(argv);
- exit(0);
- break;
- // either catch "long options" or
- case '?': // print a suggestion about the most-likely long option which the argument matches
- {
- string bad_arg(argv[optind - 1]);
- option* opt = &long_options[0];
- option* closest_opt = opt;
- int shortest_distance = levenshteinDistance(opt->name, bad_arg);
- ++opt;
- while (opt->name != 0) {
- int distance = levenshteinDistance(opt->name, bad_arg);
- if (distance < shortest_distance) {
- shortest_distance = distance;
- closest_opt = opt;
- }
- ++opt;
- }
- cerr << "did you mean --" << closest_opt->name << " ?" << endl;
- exit(1);
- }
- break;
- default:
- abort ();
- }
- }
- // any remaining arguments are considered as bam files
- if (optind < argc) {
- if (useStdin) {
- cerr << "--stdin flag specified, but a list of BAM files given. Jumping disabled." << endl;
- }
- while (optind < argc) {
- bams.push_back(argv[optind++]);
- }
- }
- if (debuglevel >= 1) {
- debug = true;
- }
- if (debuglevel >= 2) {
- debug2 = true;
- }
- if (bams.size() == 0) {
- cerr << "Please specify a BAM file or files." << endl;
- exit(1);
- }
- if (fasta == "") {
- cerr << "Please specify a fasta reference file." << endl;
- exit(1);
- }
- // check that there aren't duplicates in the bams list
- for( int i=1; i<bams.size(); ++i ){
- for( int j=0; j<i; ++j ){
- if( bams[i] == bams[j] ){
- cerr << "Error: Duplicate bam file '" << bams[i] << "'" << endl;
- exit(1);
- }
- }
- }
diff --git a/src/Parameters.h b/src/Parameters.h
deleted file mode 100644
index 80d20ba..0000000
--- a/src/Parameters.h
+++ /dev/null
@@ -1,139 +0,0 @@
-#ifndef _PARAMETERS_H
-#define _PARAMETERS_H
-#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-#include <map>
-#include <getopt.h>
-#include <stdlib.h>
-#include "split.h"
-#include "version_git.h"
-#include "Utility.h"
-using namespace std;
-// Encapsulates tracking and parsing of command line program parameters
-class Parameters {
- friend ostream &operator<<(ostream &out, const Parameters &p);
- // i/o parameters:
- string bam; // -b --bam
- vector<string> bams;
- bool useStdin; // -c --stdin
- string fasta; // -f --fasta-reference
- string targets; // -t --targets
- vector<string> regions; // -r --region
- string samples; // -s --samples
- string populationsFile;
- string cnvFile;
- //string log;
- string output; // -v --vcf
- string outputFile;
- bool gVCFout; // -l --gvcf
- int gVCFchunk;
- string variantPriorsFile;
- string haplotypeVariantFile;
- bool reportAllHaplotypeAlleles;
- bool reportMonomorphic;
- bool boundIndels;
- bool onlyUseInputAlleles;
- string alleleObservationBiasFile;
- bool standardGLs;
- int baseQualityCap;
- double probContamination;
- string contaminationEstimateFile;
- // operation parameters
- bool useDuplicateReads; // -E --use-duplicate-reads
- bool suppressOutput; // -S --suppress-output
- int useBestNAlleles; // -n --use-best-n-alleles
- bool forceRefAllele; // -F --force-reference-allele
- bool useRefAllele; // -U --use-reference-allele
- bool diploidReference; // -H --haploid-reference
- bool allowIndels; // -I --allow-indels
- bool leftAlignIndels; // -O --left-align-indels
- bool allowMNPs; // -X --allow-mnps
- bool allowComplex; // -X --allow-complex
- int maxComplexGap;
- //int maxHaplotypeLength;
- int minRepeatSize;
- double minRepeatEntropy;
- bool usePartialObservations;
- bool allowSNPs; // -I --no-snps
- bool pooledDiscrete;
- bool pooledContinuous;
- bool ewensPriors;
- bool permute; // --permute
- bool useMappingQuality; //
- bool useMinIndelQuality;
- bool obsBinomialPriors;
- bool alleleBalancePriors;
- bool hwePriors;
- bool reportGenotypeLikelihoodMax;
- int genotypingMaxIterations;
- int genotypingMaxBandDepth;
- bool excludePartiallyObservedGenotypes;
- bool excludeUnobservedGenotypes;
- float genotypeVariantThreshold;
- int siteSelectionMaxIterations;
- bool allSites; // TODO
- double limitGL; // minimum GL that is output
- int minPairedAltCount;
- double minAltMeanMapQ;
- int minAltQSum;
- int MQR; // -M --reference-mapping-quality
- int BQR; // -B --reference-base-quality
- int ploidy; // -p --ploidy
- int MQL0; // -m --min-mapping-quality
- int BQL0; // -q --min-base-quality
- int minSupportingMappingQualitySum; // -R --min-supporting-mapping-quality
- int minSupportingAlleleQualitySum; // -S --min-supporting-base-quality
- int BQL2; // -Q --mismatch-base-quality-threshold
- int RMU; // -U --read-mismatch-limit
- float readMaxMismatchFraction; // -z --read-max-mismatch-fraction
- int readSnpLimit; // -$ --read-snp-limit
- int readIndelLimit; // -e --read-indel-limit
- int IDW; // -I --indel-exclusion-window
- long double TH; // -T --theta
- long double PVL; // -P --pvar
- // -K --posterior-integration-depth
- int posteriorIntegrationDepth;
- bool calculateMarginals;
- string algorithm;
- double RDF; // -D --read-dependence-factor
- long double diffusionPriorScalar; // -V --diffusion-prior-scalar
- int WB; // -W --posterior-integration-bandwidth
- // XXX adjusting this to anything other than 1 may have bad consequences
- // for large numbers of samples
- int TB; // -Y --posterior-integration-depth
- bool includeMonoB;
- int TR;
- int I;
- long double minAltFraction; // -F --min-alternate-fraction
- int minAltCount; // -C --min-alternate-count
- int minAltTotal; // -G --min-alternate-total
- int minCoverage; // -! --min-coverage
- int maxCoverage; // -+ --max-coverage
- int debuglevel; // -d --debug increments
- bool debug; // set if debuglevel >=1
- bool debug2; // set if debuglevel >=2
- bool showReferenceRepeats;
- // functions
- Parameters(int argc, char** argv);
- void usage(char **argv);
- void simpleUsage(char **argv);
- // reporting
- string commandline;
diff --git a/src/Product.h b/src/Product.h
deleted file mode 100644
index 28f149c..0000000
--- a/src/Product.h
+++ /dev/null
@@ -1,19 +0,0 @@
-#ifndef __PRODUCT_H
-#define __PRODUCT_H
-#include <vector>
-template <class T>
-T product(const std::vector<T>& v) {
- if (v.size() > 0) {
- T result = 1;
- for (typename std::vector<T>::const_iterator i = v.begin(); i != v.end(); ++i) {
- result *= *i;
- }
- return result;
- } else {
- return 0;
- }
diff --git a/src/Result.cpp b/src/Result.cpp
deleted file mode 100644
index 60ef82d..0000000
--- a/src/Result.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-#include "Result.h"
-void Result::sortDataLikelihoods(void) {
- SampleDataLikelihoodCompare datalikelihoodCompare;
- sort(begin(), end(), datalikelihoodCompare);
diff --git a/src/Result.h b/src/Result.h
deleted file mode 100644
index 2fad06e..0000000
--- a/src/Result.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __RESULT_H
-#define __RESULT_H
-#include <vector>
-#include <string>
-#include <algorithm>
-#include <utility>
-#include "Genotype.h"
-using namespace std;
-class Result : public vector<SampleDataLikelihood> {
- string name;
- Sample* observations;
- void sortDataLikelihoods(void);
- //pair<Genotype*, long double> bestMarginalGenotype(void);
diff --git a/src/ResultData.cpp b/src/ResultData.cpp
deleted file mode 100644
index b92dfac..0000000
--- a/src/ResultData.cpp
+++ /dev/null
@@ -1,698 +0,0 @@
-#include "ResultData.h"
-#include "TryCatch.h"
-using namespace std;
-vcf::Variant& Results::vcf(
- vcf::Variant& var, // variant to update
- BigFloat pHom,
- long double bestComboOddsRatio,
- //long double alleleSamplingProb,
- Samples& samples,
- string refbase,
- vector<Allele>& altAllelesIncludingNulls,
- map<string, int> repeats,
- int genotypingIterations,
- vector<string>& sampleNames,
- int coverage,
- GenotypeCombo& genotypeCombo,
- map<string, vector<Allele*> >& alleleGroups,
- map<string, vector<Allele*> >& partialObservationGroups,
- map<Allele*, set<Allele*> >& partialObservationSupport,
- map<int, vector<Genotype> >& genotypesByPloidy,
- vector<string>& sequencingTechnologies,
- AlleleParser* parser) {
- Parameters& parameters = parser->parameters;
- GenotypeComboMap comboMap;
- genotypeCombo2Map(genotypeCombo, comboMap);
- // set up the reported reference allele
- long int referencePosition = (long int) parser->currentPosition; // 0-based
- // remove NULL alt alleles
- vector<Allele> altAlleles;
- for (vector<Allele>::iterator aa = altAllelesIncludingNulls.begin(); aa != altAllelesIncludingNulls.end(); ++aa) {
- if (!aa->isNull()) {
- altAlleles.push_back(*aa);
- }
- }
- map<string, string> adjustedCigar;
- vector<Allele>& adjustedAltAlleles = altAlleles; // just an alias
- for (vector<Allele>::iterator aa = altAlleles.begin(); aa != altAlleles.end(); ++aa) {
- adjustedCigar[aa->base()] = aa->cigar;
- var.alt.push_back(aa->alternateSequence);
- }
- var.ref = refbase;
- assert(!var.ref.empty());
- // get the required size of the reference sequence
- // strip identical bases from start and/or end of alleles
- // if bases have been stripped from the beginning,
- // set up VCF record-wide variables
- var.sequenceName = parser->currentSequenceName;
- var.position = referencePosition + 1;
- var.id = ".";
- var.filter = ".";
- // note that we set QUAL to 0 at loci with no data
- var.quality = max((long double) 0, nan2zero(big2phred(pHom)));
- if (coverage == 0) {
- var.quality = 0;
- }
- // set up format string
- var.format.clear();
- var.format.push_back("GT");
- if (parameters.calculateMarginals) var.format.push_back("GQ");
- // XXX
- var.format.push_back("DP");
- var.format.push_back("RO");
- var.format.push_back("QR");
- var.format.push_back("AO");
- var.format.push_back("QA");
- // add GL/GLE later, when we know if we need to use one or the other
- unsigned int refBasesLeft = 0;
- unsigned int refBasesRight = 0;
- unsigned int refReadsLeft = 0;
- unsigned int refReadsRight = 0;
- unsigned int refEndLeft = 0;
- unsigned int refEndRight = 0;
- unsigned int refmqsum = 0;
- unsigned int refProperPairs = 0;
- long double refReadMismatchSum = 0;
- long double refReadSNPSum = 0;
- long double refReadIndelSum = 0;
- long double refReadSoftClipSum = 0;
- unsigned int refObsCount = 0;
- map<string, int> refObsBySequencingTechnology;
- map<string, vector<Allele*> >::iterator f = alleleGroups.find(refbase);
- if (f != alleleGroups.end()) {
- vector<Allele*>& referenceAlleles = alleleGroups.at(refbase);
- refObsCount = referenceAlleles.size();
- for (vector<Allele*>::iterator app = referenceAlleles.begin(); app != referenceAlleles.end(); ++app) {
- Allele& allele = **app;
- refReadMismatchSum += allele.readMismatchRate;
- refReadSNPSum += allele.readSNPRate;
- refReadIndelSum += allele.readIndelRate;
- if (allele.isProperPair) {
- ++refProperPairs;
- }
- if (!allele.sequencingTechnology.empty()) {
- ++refObsBySequencingTechnology[allele.sequencingTechnology];
- }
- refBasesLeft += allele.basesLeft;
- refBasesRight += allele.basesRight;
- if (allele.basesLeft >= allele.basesRight) {
- refReadsLeft += 1;
- if (allele.strand == STRAND_FORWARD) {
- refEndLeft += 1;
- } else {
- refEndRight += 1;
- }
- } else {
- refReadsRight += 1;
- if (allele.strand == STRAND_FORWARD) {
- refEndRight += 1;
- } else {
- refEndLeft += 1;
- }
- }
- refmqsum += allele.mapQuality;
- }
- }
- long double refReadMismatchRate = (refObsCount == 0 ? 0 : refReadMismatchSum / (long double) refObsCount);
- long double refReadSNPRate = (refObsCount == 0 ? 0 : refReadSNPSum / (long double) refObsCount);
- long double refReadIndelRate = (refObsCount == 0 ? 0 : refReadIndelSum / (long double) refObsCount);
- //var.info["XRM"].push_back(convert(refReadMismatchRate));
- //var.info["XRS"].push_back(convert(refReadSNPRate));
- //var.info["XRI"].push_back(convert(refReadIndelRate));
- var.info["MQMR"].push_back(convert((refObsCount == 0) ? 0 : (double) refmqsum / (double) refObsCount));
- var.info["RPPR"].push_back(convert((refObsCount == 0) ? 0 : nan2zero(ln2phred(hoeffdingln(refReadsLeft, refReadsRight + refReadsLeft, 0.5)))));
- var.info["EPPR"].push_back(convert((refBasesLeft + refBasesRight == 0) ? 0 : nan2zero(ln2phred(hoeffdingln(refEndLeft, refEndLeft + refEndRight, 0.5)))));
- var.info["PAIREDR"].push_back(convert((refObsCount == 0) ? 0 : (double) refProperPairs / (double) refObsCount));
- //var.info["HWE"].push_back(convert(nan2zero(ln2phred(genotypeCombo.hweComboProb()))));
- var.info["GTI"].push_back(convert(genotypingIterations));
- // loop over all alternate alleles
- for (vector<Allele>::iterator aa = altAlleles.begin(); aa != altAlleles.end(); ++aa) {
- Allele& altAllele = *aa;
- string altbase = altAllele.base();
- // count alternate alleles in the best genotyping
- unsigned int alternateCount = 0;
- unsigned int alleleCount = 0;
- double alternateQualitySum = 0;
- double partialObservationCount = 0;
- double partialObservationQualitySum;
- // reference / alternate base counts by strand
- //map<string, unsigned int> altCountBySample;
- //map<string, unsigned int> altQualBySample;
- // het counts
- unsigned int hetReferenceObsCount = 0;
- unsigned int hetOtherObsCount = 0;
- unsigned int hetAlternateObsCount = 0;
- unsigned int hetAltSamples = 0;
- unsigned int homAltSamples = 0;
- unsigned int homRefSamples = 0;
- unsigned int refSampleObsCount = 0; // depth in hom-ref samples
- unsigned int altSampleObsCount = 0; // depth in samples with called alternates
- // unique alternate alleles / all alternate alleles in alt-associated samples
- unsigned int uniqueAllelesInAltSamples = 0;
- //unsigned int hetAllObsCount = hetOtherObsCount + hetAlternateObsCount + hetReferenceObsCount;
- unsigned int hetAllObsCount = 0;
- StrandBaseCounts baseCountsTotal;
- map<string, StrandBaseCounts> baseCountsBySample;
- for (vector<string>::iterator sampleName = sampleNames.begin(); sampleName != sampleNames.end(); ++sampleName) {
- GenotypeComboMap::iterator gc = comboMap.find(*sampleName);
- //cerr << "alternate count for " << altbase << " and " << *genotype << " is " << genotype->alleleCount(altbase) << endl;
- if (gc != comboMap.end()) {
- Genotype* genotype = gc->second->genotype;
- Sample& sample = *gc->second->sample;
- // check that we actually have observations for this sample
- unsigned int observationCount = sample.observationCount();
- if (observationCount == 0) {
- continue;
- }
- alternateCount += genotype->alleleCount(altbase);
- alleleCount += genotype->ploidy;
- unsigned int altCount = sample.observationCount(altbase);
- unsigned int refCount = sample.observationCount(refbase);
- if (!genotype->homozygous) {
- // het case
- if (altCount > 0) {
- ++hetAltSamples;
- hetAllObsCount += observationCount;
- hetReferenceObsCount += refCount;
- hetOtherObsCount += observationCount - altCount;
- hetAlternateObsCount += altCount;
- altSampleObsCount += observationCount;
- uniqueAllelesInAltSamples += sample.size();
- if (refCount > 0) {
- --uniqueAllelesInAltSamples; // ignore reference allele
- }
- }
- } else {
- if (altCount > 0) {
- ++homAltSamples;
- altSampleObsCount += observationCount;
- uniqueAllelesInAltSamples += sample.size();
- if (refCount > 0) {
- --uniqueAllelesInAltSamples; // ignore reference allele
- }
- } else {
- ++homRefSamples;
- refSampleObsCount += observationCount;
- }
- }
- //altCountBySample[*sampleName] = altCount;
- //altQualBySample[*sampleName] = sample.qualSum(altbase);
- StrandBaseCounts baseCounts = sample.strandBaseCount(refbase, altbase);
- baseCountsBySample[*sampleName] = baseCounts;
- baseCountsTotal.forwardRef += baseCounts.forwardRef;
- baseCountsTotal.forwardAlt += baseCounts.forwardAlt;
- baseCountsTotal.reverseRef += baseCounts.reverseRef;
- baseCountsTotal.reverseAlt += baseCounts.reverseAlt;
- }
- }
- unsigned int altBasesLeft = 0;
- unsigned int altBasesRight = 0;
- unsigned int altReadsLeft = 0;
- unsigned int altReadsRight = 0;
- unsigned int altEndLeft = 0;
- unsigned int altEndRight = 0;
- unsigned int altmqsum = 0;
- unsigned int altproperPairs = 0;
- long double altReadMismatchSum = 0;
- long double altReadSNPSum = 0;
- long double altReadIndelSum = 0;
- unsigned int altObsCount = 0;
- map<string, int> altObsBySequencingTechnology;
- // TODO we need a partial obs structure to annotate partial obs
- map<string, vector<Allele*> >::iterator f = alleleGroups.find(altbase);
- if (f != alleleGroups.end()) {
- vector<Allele*>& alternateAlleles = alleleGroups.at(altbase);
- // TODO XXX XXX adjust to use partial observations
- altObsCount = alternateAlleles.size();
- for (vector<Allele*>::iterator app = alternateAlleles.begin(); app != alternateAlleles.end(); ++app) {
- Allele& allele = **app;
- altReadMismatchSum += allele.readMismatchRate;
- altReadSNPSum += allele.readSNPRate;
- altReadIndelSum += allele.readIndelRate;
- // TODO: add altReadSoftClipRate (avg)
- if (allele.isProperPair) {
- ++altproperPairs;
- }
- if (!allele.sequencingTechnology.empty()) {
- ++altObsBySequencingTechnology[allele.sequencingTechnology];
- }
- altBasesLeft += allele.basesLeft;
- altBasesRight += allele.basesRight;
- if (allele.basesLeft >= allele.basesRight) {
- altReadsLeft += 1;
- if (allele.strand == STRAND_FORWARD) {
- altEndLeft += 1;
- } else {
- altEndRight += 1;
- }
- } else {
- altReadsRight += 1;
- if (allele.strand == STRAND_FORWARD) {
- altEndRight += 1;
- } else {
- altEndLeft += 1;
- }
- }
- altmqsum += allele.mapQuality;
- }
- }
- long double altReadMismatchRate = (altObsCount == 0 ? 0 : altReadMismatchSum / altObsCount);
- long double altReadSNPRate = (altObsCount == 0 ? 0 : altReadSNPSum / altObsCount);
- long double altReadIndelRate = (altObsCount == 0 ? 0 : altReadIndelSum / altObsCount);
- //var.info["XAM"].push_back(convert(altReadMismatchRate));
- //var.info["XAS"].push_back(convert(altReadSNPRate));
- //var.info["XAI"].push_back(convert(altReadIndelRate));
- // alt/ref ratios
- //var.info["ARM"].push_back(convert(refReadMismatchRate == 0 ? 0 : altReadMismatchRate / refReadMismatchRate));
- //var.info["ARS"].push_back(convert(refReadSNPRate == 0 ? 0 : altReadSNPRate / refReadSNPRate));
- //var.info["ARI"].push_back(convert(refReadIndelRate == 0 ? 0 : altReadIndelRate / refReadIndelRate));
- //string refbase = parser->currentReferenceBase();
- // positional information
- //out.setf(ios::fixed,ios::floatfield);
- //out.precision(5);
- var.info["AC"].push_back(convert(alternateCount));
- var.info["AN"].clear(); var.info["AN"].push_back(convert(alleleCount)); // XXX hack...
- var.info["AF"].push_back(convert((alleleCount == 0) ? 0 : (double) alternateCount / (double) alleleCount));
- var.info["AO"].push_back(convert(altObsCount));
- var.info["PAO"].push_back(convert(samples.partialObservationCount(altbase)));
- var.info["QA"].push_back(convert(samples.qualSum(altbase)));
- var.info["PQA"].push_back(convert(samples.partialQualSum(altbase)));
- if (homRefSamples > 0 && hetAltSamples + homAltSamples > 0) {
- double altSampleAverageDepth = (double) altSampleObsCount
- / ( (double) hetAltSamples + (double) homAltSamples );
- double refSampleAverageDepth = (double) refSampleObsCount / (double) homRefSamples;
- var.info["DPRA"].push_back(convert(altSampleAverageDepth / refSampleAverageDepth));
- } else {
- var.info["DPRA"].push_back(convert(0));
- }
- var.info["SRP"].clear(); // XXX hack
- var.info["SRF"].clear();
- var.info["SRR"].clear();
- var.info["SRF"].push_back(convert(baseCountsTotal.forwardRef));
- var.info["SRR"].push_back(convert(baseCountsTotal.reverseRef));
- var.info["SRP"].push_back(convert((refObsCount == 0) ? 0 : nan2zero(ln2phred(hoeffdingln(baseCountsTotal.forwardRef, refObsCount, 0.5)))));
- var.info["SAF"].push_back(convert(baseCountsTotal.forwardAlt));
- var.info["SAR"].push_back(convert(baseCountsTotal.reverseAlt));
- var.info["SAP"].push_back(convert((altObsCount == 0) ? 0 : nan2zero(ln2phred(hoeffdingln(baseCountsTotal.forwardAlt, altObsCount, 0.5)))));
- var.info["AB"].push_back(convert((hetAllObsCount == 0) ? 0 : nan2zero((double) hetAlternateObsCount / (double) hetAllObsCount )));
- var.info["ABP"].push_back(convert((hetAllObsCount == 0) ? 0 : nan2zero(ln2phred(hoeffdingln(hetAlternateObsCount, hetAllObsCount, 0.5)))));
- var.info["RUN"].push_back(convert(parser->homopolymerRunLeft(altbase) + 1 + parser->homopolymerRunRight(altbase)));
- var.info["MQM"].push_back(convert((altObsCount == 0) ? 0 : nan2zero((double) altmqsum / (double) altObsCount)));
- var.info["RPP"].push_back(convert((altObsCount == 0) ? 0 : nan2zero(ln2phred(hoeffdingln(altReadsLeft, altReadsRight + altReadsLeft, 0.5)))));
- var.info["RPR"].push_back(convert(altReadsRight));
- var.info["RPL"].push_back(convert(altReadsLeft));
- var.info["EPP"].push_back(convert((altBasesLeft + altBasesRight == 0) ? 0 : nan2zero(ln2phred(hoeffdingln(altEndLeft, altEndLeft + altEndRight, 0.5)))));
- var.info["PAIRED"].push_back(convert((altObsCount == 0) ? 0 : nan2zero((double) altproperPairs / (double) altObsCount)));
- var.info["CIGAR"].push_back(adjustedCigar[altAllele.base()]);
- var.info["MEANALT"].push_back(convert((hetAltSamples + homAltSamples == 0) ? 0 : nan2zero((double) uniqueAllelesInAltSamples / (double) (hetAltSamples + homAltSamples))));
- for (vector<string>::iterator st = sequencingTechnologies.begin();
- st != sequencingTechnologies.end(); ++st) { string& tech = *st;
- var.info["technology." + tech].push_back(convert((altObsCount == 0) ? 0
- : nan2zero((double) altObsBySequencingTechnology[tech] / (double) altObsCount )));
- }
- // allele class
- if (altAllele.type == ALLELE_DELETION) {
- var.info["TYPE"].push_back("del");
- // what is the class of deletion
- // microsatellite repeat?
- // "novel"?
- // how large is the repeat, if there is one?
- } else if (altAllele.type == ALLELE_INSERTION) {
- var.info["TYPE"].push_back("ins");
- } else if (altAllele.type == ALLELE_COMPLEX) {
- var.info["TYPE"].push_back("complex");
- } else if (altAllele.type == ALLELE_SNP) {
- var.info["TYPE"].push_back("snp");
- /*
- // CpG
- if (parser->isCpG(altbase)) {
- var.infoFlags["CpG"] = true;
- }
- */
- } else if (altAllele.type == ALLELE_MNP) {
- var.info["TYPE"].push_back("mnp");
- } else {
- /*
- cerr << "What is this?"
- << "type: " << altAllele.type << " "
- << "allele: " << altAllele << endl;
- */
- }
- var.info["LEN"].push_back(convert(altAllele.length));
- }
- // set up site-wide INFO tags, non-multiple
- // info variables
- // site-wide coverage
- int samplesWithData = 0;
- int refAlleleObservations = 0;
- for (vector<string>::iterator sampleName = sampleNames.begin(); sampleName != sampleNames.end(); ++sampleName) {
- GenotypeComboMap::iterator gc = comboMap.find(*sampleName);
- //cerr << "alternate count for " << altbase << " and " << *genotype << " is " << genotype->alleleCount(altbase) << endl;
- if (gc != comboMap.end()) {
- Genotype* genotype = gc->second->genotype;
- Sample& sample = *gc->second->sample;
- //refAlleleObservations += sample.observationCount(refbase);
- refAlleleObservations += sample.observationCount(refbase);
- ++samplesWithData;
- }
- }
- var.info["NS"].push_back(convert(samplesWithData));
- var.info["DP"].push_back(convert(coverage));
- var.info["RO"].push_back(convert(refAlleleObservations));
- var.info["PRO"].push_back(convert(samples.partialObservationCount(refbase)));
- var.info["QR"].push_back(convert(samples.qualSum(refbase)));
- var.info["PQR"].push_back(convert(samples.partialQualSum(refbase)));
- // tally partial observations to get a mean coverage per bp of reference
- int haplotypeLength = refbase.size();
- int basesInObservations = 0;
- for (map<string, vector<Allele*> >::iterator g = alleleGroups.begin(); g != alleleGroups.end(); ++g) {
- for (vector<Allele*>::iterator a = g->second.begin(); a != g->second.end(); ++a) {
- basesInObservations += (*a)->alternateSequence.size();
- }
- }
- for (map<Allele*, set<Allele*> >::iterator p = partialObservationSupport.begin(); p != partialObservationSupport.end(); ++p) {
- basesInObservations += p->first->alternateSequence.size();
- }
- double depthPerBase = (double) basesInObservations / (double) haplotypeLength;
- var.info["DPB"].push_back(convert(depthPerBase));
- // number of alternate alleles
- var.info["NUMALT"].push_back(convert(altAlleles.size()));
- if (parameters.showReferenceRepeats && !repeats.empty()) {
- stringstream repeatsstr;
- for (map<string, int>::iterator c = repeats.begin(); c != repeats.end(); ++c) {
- repeatsstr << c->first << ":" << c->second << "|";
- }
- string repeatstr = repeatsstr.str();
- TRY { repeatstr = repeatstr.substr(0, repeatstr.size() - 1); } CATCH;
- var.info["REPEAT"].clear();
- var.info["REPEAT"].push_back(repeatstr);
- }
- var.info["ODDS"].push_back(convert(bestComboOddsRatio));
- // samples
- bool outputExplicitGenotypeLikelihoods = false;
- bool outputAnyGenotypeLikelihoods = true;
- // for ordering GLs
- // ordering is F(j/k) = (k*(k+1)/2)+j.
- map<int, map<string, int> > vcfGenotypeOrder;
- for (map<int, vector<Genotype> >::iterator gtg = genotypesByPloidy.begin(); gtg != genotypesByPloidy.end(); ++gtg) {
- int groupPloidy = gtg->first;
- vector<Genotype>& genotypes = gtg->second;
- for (vector<Genotype>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
- Genotype* genotypePtr = &*g;
- Genotype& genotype = *g;
- string genotypeStr = genotype.str();
- // only provide output for genotypes for which we have data
- bool fullySpecified = true;
- vector<int> gtspec;
- genotype.relativeGenotype(gtspec, refbase, altAlleles);
- // null allele case handled by the fact that we don't have any null alternate alleles
- for (vector<int>::iterator n = gtspec.begin(); n != gtspec.end(); ++n) {
- if (*n < 0) {
- fullySpecified = false;
- break;
- }
- }
- if (fullySpecified) {
- if (groupPloidy == 2) {
- int j = gtspec.front();
- int k = gtspec.back();
- vcfGenotypeOrder[groupPloidy][genotypeStr] = (k * (k + 1) / 2) + j;
- } else if (groupPloidy == 1) {
- vcfGenotypeOrder[groupPloidy][genotypeStr] = gtspec.front();
- } else {
- outputAnyGenotypeLikelihoods = false; // XXX prevents output of GLs for polyploid data
- outputExplicitGenotypeLikelihoods = true;
- }
- }
- }
- }
- // get the best genotypes from the combos, and set the output GTs and GQs using them
- for (vector<string>::iterator sn = sampleNames.begin(); sn != sampleNames.end(); ++sn) {
- string& sampleName = *sn;
- GenotypeComboMap::iterator gc = comboMap.find(sampleName);
- Results::iterator s = find(sampleName);
- map<string, vector<string> >& sampleOutput = var.samples[sampleName];
- if (gc != comboMap.end() && s != end()) {
- Sample& sample = *gc->second->sample;
- Result& sampleLikelihoods = s->second;
- Genotype* genotype = gc->second->genotype;
- if (sample.observationCount() == 0) {
- continue;
- }
- sampleOutput["GT"].push_back(genotype->relativeGenotype(refbase, altAlleles));
- if (parameters.calculateMarginals) {
- sampleOutput["GQ"].push_back(convert(nan2zero(big2phred((BigFloat)1 - big_exp(sampleLikelihoods.front().marginal)))));
- }
- sampleOutput["DP"].push_back(convert(sample.observationCount()));
- sampleOutput["RO"].push_back(convert(sample.observationCount(refbase)));
- sampleOutput["QR"].push_back(convert(sample.qualSum(refbase)));
- for (vector<Allele>::iterator aa = altAlleles.begin(); aa != altAlleles.end(); ++aa) {
- Allele& altAllele = *aa;
- string altbase = altAllele.base();
- sampleOutput["AO"].push_back(convert(sample.observationCount(altbase)));
- sampleOutput["QA"].push_back(convert(sample.qualSum(altbase)));
- }
- if (outputAnyGenotypeLikelihoods && !parameters.excludeUnobservedGenotypes && !parameters.excludePartiallyObservedGenotypes) {
- // get data likelihoods for present genotypes, none if we have excluded genotypes from data likelihood calculations
- if (outputExplicitGenotypeLikelihoods) {
- if (var.format.back() != "GLE") {
- var.format.push_back("GLE");
- }
- map<string, string> genotypeLikelihoodsExplicit;
- for (Result::iterator g = sampleLikelihoods.begin(); g != sampleLikelihoods.end(); ++g) {
- if (g->genotype->hasNullAllele()) {
- // if the genotype has null (unspecified) alleles, find
- // the fully specified genotypes it can match with.
- vector<Genotype*> nullmatchgts = g->genotype->nullMatchingGenotypes(genotypesByPloidy[g->genotype->ploidy]);
- // the gls for these will be the same, so the gl for
- // this genotype can be used for all of them. these
- // are the genotypes which the sample does not have,
- // but for which one allele or no alleles match
- for (vector<Genotype*>::iterator n = nullmatchgts.begin(); n != nullmatchgts.end(); ++n) {
- genotypeLikelihoodsExplicit[(*n)->relativeGenotype(refbase, altAlleles)] = convert(ln2log10(g->prob));
- }
- } else {
- // otherwise, we are well-specified, and only one
- // genotype should match
- genotypeLikelihoodsExplicit[g->genotype->relativeGenotype(refbase, altAlleles)] = convert(ln2log10(g->prob));
- }
- }
- vector<string> datalikelihoods;
- for (map<string, string>::iterator gle = genotypeLikelihoodsExplicit.begin(); gle != genotypeLikelihoodsExplicit.end(); ++gle) {
- datalikelihoods.push_back(gle->first + "^" + gle->second);
- }
- sampleOutput["GLE"].push_back(join(datalikelihoods, "|"));
- } else {
- if (var.format.back() != "GL") {
- var.format.push_back("GL");
- }
- map<int, double> genotypeLikelihoods;
- map<int, string> genotypeLikelihoodsOutput;
- for (Result::iterator g = sampleLikelihoods.begin(); g != sampleLikelihoods.end(); ++g) {
- if (g->genotype->hasNullAllele()) {
- // if the genotype has null (unspecified) alleles, find
- // the fully specified genotypes it can match with.
- vector<Genotype*> nullmatchgts = g->genotype->nullMatchingGenotypes(genotypesByPloidy[g->genotype->ploidy]);
- // the gls for these will be the same, so the gl for
- // this genotype can be used for all of them. these
- // are the genotypes which the sample does not have,
- // but for which one allele or no alleles match
- for (vector<Genotype*>::iterator n = nullmatchgts.begin(); n != nullmatchgts.end(); ++n) {
- map<string, int>::iterator o = vcfGenotypeOrder[(*n)->ploidy].find((*n)->str());
- if (o != vcfGenotypeOrder[(*n)->ploidy].end()) {
- genotypeLikelihoods[o->second] = ln2log10(g->prob);
- }
- }
- } else {
- // otherwise, we are well-specified, and only one
- // genotype should match
- map<string, int>::iterator o = vcfGenotypeOrder[g->genotype->ploidy].find(g->genotype->str());
- if (o != vcfGenotypeOrder[g->genotype->ploidy].end()) {
- genotypeLikelihoods[o->second] = ln2log10(g->prob);
- }
- }
- }
- // normalize GLs to 0 max using division by max
- long double minGL = 0;
- for (map<int, double>::iterator g = genotypeLikelihoods.begin(); g != genotypeLikelihoods.end(); ++g) {
- if (g->second < minGL) minGL = g->second;
- }
- long double maxGL = minGL;
- for (map<int, double>::iterator g = genotypeLikelihoods.begin(); g != genotypeLikelihoods.end(); ++g) {
- if (g->second > maxGL) maxGL = g->second;
- }
- if (parameters.limitGL == 0) {
- for (map<int, double>::iterator g = genotypeLikelihoods.begin(); g != genotypeLikelihoods.end(); ++g) {
- genotypeLikelihoodsOutput[g->first] = convert(g->second-maxGL);
- }
- } else {
- for (map<int, double>::iterator g = genotypeLikelihoods.begin(); g != genotypeLikelihoods.end(); ++g) {
- genotypeLikelihoodsOutput[g->first] = convert( max((long double) + parameters.limitGL, (g->second-maxGL)) );
- }
- }
- vector<string>& datalikelihoods = sampleOutput["GL"];
- // output is sorted by map
- for (map<int, string>::iterator gl = genotypeLikelihoodsOutput.begin(); gl != genotypeLikelihoodsOutput.end(); ++gl) {
- datalikelihoods.push_back(gl->second);
- }
- }
- }
- }
- }
- return var;
-vcf::Variant& Results::gvcf(
- vcf::Variant& var,
- NonCalls& nonCalls,
- AlleleParser* parser) {
- // what is the first position in the nonCalls?
- pair<string, long> start = nonCalls.firstPos();
- const string& startChrom = start.first;
- long startPos = start.second;
- // startPos and endPos are zero-based, half-open -- [startPos,endPos)
- // what is the current position? nb: can't be on a different chrom
- long endPos;
- if (startChrom != parser->currentSequenceName) {
- endPos = parser->reference.sequenceLength(startChrom);
- } else {
- endPos = parser->currentPosition;
- }
- long numSites = endPos - startPos;
- assert(numSites > 0);
- // set up site call
- var.ref = parser->currentReferenceBaseString();
- var.alt.push_back("<*>");
- var.sequenceName = parser->currentSequenceName;
- var.position = startPos + 1; // output text field is one-based
- var.id = ".";
- var.filter = ".";
- // TODO change to actual quality
- var.quality = 0;
- // set up format string
- var.format.clear();
- var.format.push_back("GQ");
- var.format.push_back("DP");
- var.format.push_back("MIN");
- var.format.push_back("QR");
- var.format.push_back("QA");
- NonCall total = nonCalls.aggregateAll();
- var.info["DP"].push_back(convert((total.refCount+total.altCount) / numSites));
- var.info["MIN"].push_back(convert(total.minDepth));
- // The text END field is one-based, inclusive. We proudly conflate this
- // with our zero-based, exclusive endPos.
- var.info["END"].push_back(convert(endPos));
- // genotype quality is 1- p(polymorphic)
- map<string, NonCall> perSample;
- nonCalls.aggregatePerSample(perSample);
- // iterate through the samples and aggregate information about them
- for (vector<string>::const_iterator s = parser->sampleList.begin();
- s != parser->sampleList.end(); ++s) {
- const string& sampleName = *s;
- const NonCall& nc = perSample[sampleName];
- map<string, vector<string> >& sampleOutput = var.samples[sampleName];
- long double qual = nc.reflnQ - nc.altlnQ;
- sampleOutput["GQ"].push_back(convert(ln2phred(qual)));
- sampleOutput["DP"].push_back(convert((nc.refCount+nc.altCount) / numSites));
- sampleOutput["MIN"].push_back(convert(nc.minDepth));
- sampleOutput["QR"].push_back(convert(ln2phred(nc.reflnQ)));
- sampleOutput["QA"].push_back(convert(ln2phred(nc.altlnQ)));
- }
- return var;
diff --git a/src/ResultData.h b/src/ResultData.h
deleted file mode 100644
index ef02cae..0000000
--- a/src/ResultData.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef __RESULT_DATA_H
-#define __RESULT_DATA_H
-#include <vector>
-#include <ostream>
-#include <iomanip>
-#include "Genotype.h"
-#include "Allele.h"
-#include "Utility.h"
-#include "AlleleParser.h"
-#include "Variant.h"
-#include "version_git.h"
-#include "Result.h"
-#include "NonCall.h"
-using namespace std;
-// for sorting data likelihoods
-class DataLikelihoodCompare {
- bool operator()(const pair<Genotype*, long double>& a,
- const pair<Genotype*, long double>& b) {
- return a.second > b.second;
- }
-// maps sample names to results
-class Results : public map<string, Result> {
- void update(SampleDataLikelihoods& likelihoods) {
- for (SampleDataLikelihoods::iterator s = likelihoods.begin(); s != likelihoods.end(); ++s) {
- vector<SampleDataLikelihood>& sdls = *s;
- string& name = sdls.front().name;
- Result& result = (*this)[name];
- result.clear();
- for (vector<SampleDataLikelihood>::iterator s = sdls.begin(); s != sdls.end(); ++s) {
- result.push_back(*s);
- }
- }
- }
- vcf::Variant& vcf(
- vcf::Variant& var, // variant to update
- BigFloat pHom,
- long double bestComboOddsRatio,
- //long double alleleSamplingProb,
- Samples& samples,
- string refbase,
- vector<Allele>& altAlleles,
- map<string, int> repeats,
- int genotypingIterations,
- vector<string>& sampleNames,
- int coverage,
- GenotypeCombo& genotypeCombo,
- map<string, vector<Allele*> >& alleleGroups,
- map<string, vector<Allele*> >& partialObservationGroups,
- map<Allele*, set<Allele*> >& partialSupport,
- map<int, vector<Genotype> >& genotypesByPloidy,
- vector<string>& sequencingTechnologies,
- AlleleParser* parser);
- vcf::Variant& gvcf(
- vcf::Variant& var,
- NonCalls& noncalls,
- AlleleParser* parser);
-string dateStr(void);
-void vcfHeader(ostream& out, string referenceFileName, vector<string>& samples, Parameters& parameters, vector<string>& sequencingTechnologies);
diff --git a/src/Sample.cpp b/src/Sample.cpp
deleted file mode 100644
index 06207cd..0000000
--- a/src/Sample.cpp
+++ /dev/null
@@ -1,472 +0,0 @@
-#include "Sample.h"
-// sample tracking and allele sorting
-// the number of observations for this allele
-int Sample::observationCount(Allele& allele) {
- return observationCount(allele.currentBase);
-int Sample::observationCountInclPartials(void) {
- return observationCount() + partialObservationCount();
-double Sample::observationCountInclPartials(Allele& allele) {
- return observationCountInclPartials(allele.currentBase);
-double Sample::partialObservationCount(Allele& allele) {
- return partialObservationCount(allele.currentBase);
-// the number of observations for this base
-int Sample::observationCount(const string& base) {
- Sample::iterator g = find(base);
- if (g != end())
- return g->second.size();
- else
- return 0;
-int Sample::partialObservationCount(void) {
- return reversePartials.size();
-double Sample::partialObservationCount(const string& base) {
- double scaledPartialCount = 0;
- map<string, vector<Allele*> >::iterator g = partialSupport.find(base);
- if (g != partialSupport.end()) {
- vector<Allele*>& supportingObs = g->second;
- for (vector<Allele*>::iterator a = supportingObs.begin(); a != supportingObs.end(); ++a) {
- scaledPartialCount += (double) 1 / (double) reversePartials[*a].size();
- }
- }
- return scaledPartialCount;
-double Sample::observationCountInclPartials(const string& base) {
- return observationCount(base) + partialObservationCount(base);
-// the total number of observations
-int Sample::observationCount(void) {
- int count = 0;
- for (Sample::iterator g = begin(); g != end(); ++g) {
- count += g->second.size();
- }
- return count;
-int Sample::qualSum(Allele& allele) {
- return qualSum(allele.currentBase);
-int Sample::qualSum(const string& base) {
- Sample::iterator g = find(base);
- int qsum = 0;
- if (g != end()) {
- vector<Allele*>& alleles = g->second;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- qsum += (*a)->quality;
- }
- }
- return qsum;
-double Sample::partialQualSum(Allele& allele) {
- return partialQualSum(allele.currentBase);
-double Sample::partialQualSum(const string& base) {
- Sample::iterator g = partialSupport.find(base);
- double qsum = 0;
- if (g != partialSupport.end()) {
- vector<Allele*>& alleles = g->second;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- qsum += (double) (*a)->quality / (double) reversePartials[*a].size();
- }
- }
- return qsum;
-// sample tracking and allele sorting
-// the number of observations for this allele
-int Samples::observationCount(Allele& allele) {
- return observationCount(allele.currentBase);
-double Samples::observationCountInclPartials(Allele& allele) {
- return observationCountInclPartials(allele.currentBase);
-double Samples::partialObservationCount(Allele& allele) {
- return partialObservationCount(allele.currentBase);
-// the number of observations for this base
-int Samples::observationCount(const string& base) {
- int c = 0;
- for (Samples::iterator s = begin(); s != end(); ++s) {
- c += s->second.observationCount(base);
- }
- return c;
-double Samples::partialObservationCount(const string& base) {
- double c = 0;
- for (Samples::iterator s = begin(); s != end(); ++s) {
- c += s->second.partialObservationCount(base);
- }
- return c;
-double Samples::observationCountInclPartials(const string& base) {
- return observationCount(base) + partialObservationCount(base);
-// the total number of observations
-int Samples::observationCount(void) {
- int c = 0;
- for (Samples::iterator s = begin(); s != end(); ++s) {
- c += s->second.observationCount();
- }
- return c;
-double Samples::observationCountInclPartials(void) {
- double c = 0;
- for (Samples::iterator s = begin(); s != end(); ++s) {
- c += s->second.observationCountInclPartials();
- }
- return c;
-int Samples::qualSum(Allele& allele) {
- qualSum(allele.currentBase);
-int Samples::qualSum(const string& base) {
- int q = 0;
- for (Samples::iterator s = begin(); s != end(); ++s) {
- q += s->second.qualSum(base);
- }
- return q;
-double Samples::partialQualSum(Allele& allele) {
- partialQualSum(allele.currentBase);
-double Samples::partialQualSum(const string& base) {
- double q = 0;
- for (Samples::iterator s = begin(); s != end(); ++s) {
- q += s->second.partialQualSum(base);
- }
- return q;
-map<string, double> Samples::estimatedAlleleFrequencies(void) {
- map<string, long double> qualsums;
- for (Samples::iterator s = begin(); s != end(); ++s) {
- Sample& sample = s->second;
- for (Sample::iterator o = sample.begin(); o != sample.end(); ++o) {
- const string& base = o->first;
- qualsums[base] += sample.qualSum(base);
- }
- }
- long double total = 0;
- for (map<string, long double>::iterator q = qualsums.begin(); q != qualsums.end(); ++q) {
- total += q->second;
- }
- map<string, double> freqs;
- for (map<string, long double>::iterator q = qualsums.begin(); q != qualsums.end(); ++q) {
- freqs[q->first] = q->second / total;
- //cerr << "estimated frequency " << q->first << " " << freqs[q->first] << endl;
- }
- return freqs;
-// puts alleles into the right bins if they have changed their base (as
-// occurs in the case of reference alleles)
-void Sample::sortReferenceAlleles(void) {
- for (Sample::iterator g = begin(); g != end(); ++g) {
- const string& groupBase = g->first;
- vector<Allele*>& alleles = g->second;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- const string& base = (*a)->currentBase;
- if (base != groupBase) {
- Sample::iterator g = find(base);
- if (g != end()) {
- g->second.push_back(*a);
- } else {
- vector<Allele*> alleles;
- alleles.push_back(*a);
- insert(begin(), make_pair(base, alleles));
- }
- *a = NULL;
- }
- }
- alleles.erase(remove(alleles.begin(), alleles.end(), (Allele*)NULL), alleles.end());
- }
-Sample::strandBaseCount(string refbase, string altbase) {
- int forwardRef = 0;
- int reverseRef = 0;
- int forwardAlt = 0;
- int reverseAlt = 0;
- for (Sample::iterator s = begin(); s != end(); ++s) {
- vector<Allele*>& alleles = s->second;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- string base = (*a)->currentBase;
- AlleleStrand strand = (*a)->strand;
- if (base == refbase) {
- if (strand == STRAND_FORWARD)
- ++forwardRef;
- else if (strand == STRAND_REVERSE)
- ++reverseRef;
- } else if (base == altbase) {
- if (strand == STRAND_FORWARD)
- ++forwardAlt;
- else if (strand == STRAND_REVERSE)
- ++reverseAlt;
- }
- }
- }
- return StrandBaseCounts(forwardRef, forwardAlt, reverseRef, reverseAlt);
-int Sample::baseCount(string base, AlleleStrand strand) {
- int count = 0;
- for (Sample::iterator g = begin(); g != end(); ++g) {
- vector<Allele*>& alleles = g->second;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- if ((*a)->currentBase == base && (*a)->strand == strand)
- ++count;
- }
- }
- return count;
-string Sample::json(void) {
- stringstream out;
- out << "[";
- bool first = true;
- for (map<string, vector<Allele*> >::iterator g = this->begin(); g != this->end(); ++g) {
- vector<Allele*>& alleles = g->second;
- for (vector<Allele*>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- if (!first) { out << ","; } else { first = false; }
- out << (*a)->json();
- }
- }
- out << "]";
- return out.str();
-void groupAlleles(Samples& samples, map<string, vector<Allele*> >& alleleGroups) {
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
- Sample& sample = s->second;
- for (Sample::iterator g = sample.begin(); g != sample.end(); ++g) {
- const string& base = g->first;
- const vector<Allele*>& alleles = g->second;
- vector<Allele*>& group = alleleGroups[base];
- group.reserve(group.size() + distance(alleles.begin(), alleles.end()));
- group.insert(group.end(), alleles.begin(), alleles.end());
- }
- }
-bool sufficientAlternateObservations(Samples& samples, int mincount, float minfraction) {
- int totalAlternateCount = 0;
- int totalReferenceCount = 0;
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
- //cerr << s->first << endl;
- Sample& sample = s->second;
- int alternateCount = 0;
- int observationCount = 0;
- for (Sample::iterator group = sample.begin(); group != sample.end(); ++group) {
- const string& base = group->first;
- //cerr << base << endl;
- vector<Allele*>& alleles = group->second;
- //cerr << alleles.size() << endl;
- if (alleles.size() == 0)
- continue;
- if (alleles.front()->type != ALLELE_REFERENCE) {
- alternateCount += alleles.size();
- } else {
- totalReferenceCount += alleles.size();
- }
- observationCount += alleles.size();
- }
- //cerr << "alternateCount " << alternateCount << " ratio " << ((float) alternateCount / (float) observationCount) << endl;
- if (alternateCount >= mincount && ((float) alternateCount / (float) observationCount) >= minfraction)
- return true;
- totalAlternateCount += alternateCount;
- }
- // always analyze if we have more alternate observations than reference observations
- // this is meant to catch the case in which the reference is the rare allele
- // it will probably also catch cases in which we have very low coverage
- if (totalReferenceCount < totalAlternateCount) {
- return true;
- }
- return false;
-int countAlleles(Samples& samples) {
- int count = 0;
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
- Sample& sample = s->second;
- for (Sample::iterator sg = sample.begin(); sg != sample.end(); ++sg) {
- count += sg->second.size();
- }
- }
- return count;
-ostream& operator<<(ostream& out, Sample& sample) {
- for (Sample::iterator s = sample.begin(); s != sample.end(); ++s) {
- out << s->first << " #" << s->second.size() << endl << s->second << endl;
- }
- return out;
-void Samples::assignPartialSupport(vector<Allele>& alleles,
- vector<Allele*>& partialObservations,
- map<string, vector<Allele*> >& partialObservationGroups,
- map<Allele*, set<Allele*> >& partialObservationSupport,
- unsigned long haplotypeStart,
- int haplotypeLength) {
- // clean up results of any previous calls to this function
- clearPartialObservations();
- for (vector<Allele>::iterator a = alleles.begin(); a != alleles.end(); ++a) {
- Allele& allele = *a;
- //string& base = allele.currentBase;
- // hacks here
- string& aseq = allele.alternateSequence;
- //cerr << "alternate, seeking partial support " << aseq << endl
- // << "allele: " << allele << endl;
- // construct pseudo-sequence
- for (vector<Allele*>::iterator p = partialObservations.begin(); p != partialObservations.end(); ++p) {
- Allele& partial = **p;
- string pseq = partial.alternateSequence;
- bool same = false;
- // if the partial could support the alternate if we consider "reference-matching"
- // sequence beyond the haplotype window, add it to the comparison
- if (partial.position == haplotypeStart && partial.referenceLength == haplotypeLength) {
- if (pseq.size() + partial.basesLeft <= aseq.size()) {
- pseq = partial.read5p();
- } else if (pseq.size() + partial.basesRight <= aseq.size()) {
- pseq = partial.read3p();
- }
- }
- // otherwise, we should be fine to go with the seqs
- // basically, this is the partial reference coordinate-matching case
- //cerr << partial << " bp l/r " << partial.basesLeft << "/" << partial.basesRight
- // << " szes , " << pseq.size() << " vs " << aseq.size() << endl;
- if (!pseq.empty()
- && aseq.size() >= pseq.size()
- && ((partial.alternateSequence.size() + partial.basesRight <= aseq.size()
- && (aseq.substr(0, pseq.size()) == pseq))
- || (partial.alternateSequence.size() + partial.basesLeft <= aseq.size()
- && (aseq.substr(aseq.size()-pseq.size()) == pseq)))) {
- // dAY's du saem
- partialObservationGroups[allele.currentBase].push_back(*p);
- partialObservationSupport[*p].insert(&*a);
- //cerr << "partial support of " << *a << " by " << *p << endl;
- same = true;
- }
- }
- }
- for (vector<Allele*>::iterator p = partialObservations.begin(); p != partialObservations.end(); ++p) {
- // get the sample
- Allele& partial = **p;
- Samples::iterator siter = find(partial.sampleID);
- if (siter == end()) {
- continue;
- }
- Sample& sample = siter->second;
- map<Allele*, set<Allele*> >::iterator sup = partialObservationSupport.find(*p);
- if (sup != partialObservationSupport.end()) {
- set<Allele*>& supported = sup->second;
- for (set<Allele*>::iterator s = supported.begin(); s != supported.end(); ++s) {
- sample.partialSupport[(*s)->currentBase].push_back(*p);
- sample.supportedAlleles.insert((*s)->currentBase);
- }
- if (!supported.empty()) {
- sample.reversePartials[*p] = supported;
- }
- }
- //sample.partialObservations.push_back(*p);
- }
-bool Sample::observationSupports(Allele* obs, Allele* allele) {
- if (obs->currentBase == allele->currentBase) {
- return true;
- } else {
- map<Allele*, set<Allele*> >::iterator p = reversePartials.find(obs);
- if (p != reversePartials.end()) {
- set<Allele*>& supports = p->second;
- if (supports.find(allele) != supports.end()) {
- return true;
- }
- }
- return false;
- }
-void Samples::clearFullObservations(void) {
- for (Samples::iterator s = begin(); s != end(); ++s) {
- s->second.clear();
- }
-void Samples::clearPartialObservations(void) {
- for (Samples::iterator s = begin(); s != end(); ++s) {
- s->second.clearPartialObservations();
- }
-void Sample::clearPartialObservations(void) {
- supportedAlleles.clear();
- for (Sample::iterator a = begin(); a != end(); ++a)
- supportedAlleles.insert(a->first);
- partialSupport.clear();
- reversePartials.clear();
-void Sample::setSupportedAlleles(void) {
- for (Sample::iterator a = begin(); a != end(); ++a)
- supportedAlleles.insert(a->first);
-void Samples::setSupportedAlleles(void) {
- for (Samples::iterator s = begin(); s != end(); ++s)
- s->second.setSupportedAlleles();
diff --git a/src/Sample.h b/src/Sample.h
deleted file mode 100644
index e74d1eb..0000000
--- a/src/Sample.h
+++ /dev/null
@@ -1,135 +0,0 @@
-#ifndef __SAMPLE_H
-#define __SAMPLE_H
-#include <string>
-#include <vector>
-#include <map>
-#include <utility>
-#include "Utility.h"
-#include "Allele.h"
-using namespace std;
-class StrandBaseCounts {
- int forwardRef;
- int forwardAlt;
- int reverseRef;
- int reverseAlt;
- : forwardRef(0), forwardAlt(0), reverseRef(0), reverseAlt(0)
- { }
-StrandBaseCounts(int fr,
- int fa,
- int rr,
- int ra)
- : forwardRef(fr)
- , forwardAlt(fa)
- , reverseRef(rr)
- , reverseAlt(ra) { }
-// sample tracking and allele sorting
-class Sample : public map<string, vector<Allele*> > {
- friend ostream& operator<<(ostream& out, Sample& sample);
- // includes both fully and partially-supported observations after adding partial obs
- set<string> supportedAlleles;
- void setSupportedAlleles(void);
- // partial support for alleles, such as for observations that partially overlap the calling window
- map<string, vector<Allele*> > partialSupport;
- // for fast scaling of qualities for partial supports
- map<Allele*, set<Allele*> > reversePartials;
- // clear the above
- void clearPartialObservations(void);
- // set of partial observations (keys of the above map) cached for faster GL calculation
- //vector<Allele*> partialObservations;
- // if the observation (partial or otherwise) supports the allele
- bool observationSupports(Allele* obs, Allele* allele);
- // the number of observations for this allele
- int observationCount(Allele& allele);
- double observationCountInclPartials(Allele& allele);
- double partialObservationCount(Allele& allele);
- // the number of observations for this base
- int observationCount(const string& base);
- double observationCountInclPartials(const string& base);
- double partialObservationCount(const string& base);
- int partialObservationCount(void);
- // the total number of observations
- int observationCount(void);
- int observationCountInclPartials(void);
- // sum of quality for the given allele
- // (includes partial support)
- int qualSum(Allele& allele);
- int qualSum(const string& base);
- double partialQualSum(Allele& allele);
- double partialQualSum(const string& base);
- // puts alleles into the right bins if they have changed their base (as
- // occurs in the case of reference alleles)
- void sortReferenceAlleles(void);
- StrandBaseCounts strandBaseCount(string refbase, string altbase);
- int baseCount(string base, AlleleStrand strand);
- string json(void);
-class Samples : public map<string, Sample> {
- map<string, double> estimatedAlleleFrequencies(void);
- void assignPartialSupport(vector<Allele>& alleles,
- vector<Allele*>& partialObservations,
- map<string, vector<Allele*> >& partialObservationGroups,
- map<Allele*, set<Allele*> >& partialObservationSupport,
- unsigned long haplotypeStart,
- int haplotypeLength);
- int observationCount(void);
- double observationCountInclPartials(void);
- int observationCount(Allele& allele);
- double observationCountInclPartials(Allele& allele);
- double partialObservationCount(Allele& allele);
- int observationCount(const string& base);
- double observationCountInclPartials(const string& base);
- double partialObservationCount(const string& base);
- int qualSum(Allele& allele);
- int qualSum(const string& base);
- double partialQualSum(Allele& allele);
- double partialQualSum(const string& base);
- void clearFullObservations(void);
- void clearPartialObservations(void);
- void setSupportedAlleles(void);
-int countAlleles(Samples& samples);
-// using this one...
-void groupAlleles(Samples& samples, map<string, vector<Allele*> >& alleleGroups);
-// filters... maybe move to its own file?
-bool sufficientAlternateObservations(Samples& observations, int mincount, float minfraction);
diff --git a/src/SegfaultHandler.cpp b/src/SegfaultHandler.cpp
deleted file mode 100644
index ac17044..0000000
--- a/src/SegfaultHandler.cpp
+++ /dev/null
@@ -1,17 +0,0 @@
-#include "SegfaultHandler.h"
-// from http://stackoverflow.com/a/77336/238609
-// many thanks to tgamblin
-void segfaultHandler(int sig) {
- void *array[10];
- size_t size;
- // get void*'s for all entries on the stack
- size = backtrace(array, 10);
- // print out all the frames to stderr
- fprintf(stderr, "Error: signal %d:\n", sig);
- backtrace_symbols_fd(array, size, 2);
- exit(1);
diff --git a/src/SegfaultHandler.h b/src/SegfaultHandler.h
deleted file mode 100644
index 4f8d9c7..0000000
--- a/src/SegfaultHandler.h
+++ /dev/null
@@ -1,11 +0,0 @@
-#include <stdio.h>
-#include <execinfo.h>
-#include <signal.h>
-#include <stdlib.h>
-void segfaultHandler(int sig);
diff --git a/src/Sum.h b/src/Sum.h
deleted file mode 100644
index aaa4517..0000000
--- a/src/Sum.h
+++ /dev/null
@@ -1,15 +0,0 @@
-#ifndef __SUM_H
-#define __SUM_H
-#include <vector>
-template <class T>
-T sum(const std::vector<T>& v) {
- T result = 0;
- for (typename std::vector<T>::const_iterator i = v.begin(); i != v.end(); ++i) {
- result += *i;
- }
- return result;
diff --git a/src/TryCatch.h b/src/TryCatch.h
deleted file mode 100644
index 7b15248..0000000
--- a/src/TryCatch.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <stdexcept> // out_of_range exception
-#include <stdlib.h> // abort
-// macros which improve our error handling
-#ifndef TRY
-#define TRY try
-#ifndef CATCH
-#define CATCH \
- catch (std::out_of_range outOfRange) { \
- cerr << "exception: " << outOfRange.what() \
- << " at line " << __LINE__ \
- << " in file " << __FILE__ << endl; \
- abort(); \
- }
diff --git a/src/Utility.cpp b/src/Utility.cpp
deleted file mode 100644
index 1f9b654..0000000
--- a/src/Utility.cpp
+++ /dev/null
@@ -1,100758 +0,0 @@
-// utility functions
-#include "Utility.h"
-#include "Sum.h"
-#include "Product.h"
-#define PHRED_MAX 50000.0 // max Phred seems to be about 43015 (?), could be an underflow bug...
-using namespace std;
-short qualityChar2ShortInt(char c) {
- return static_cast<short>(c) - 33;
-long double qualityChar2LongDouble(char c) {
- return static_cast<long double>(c) - 33;
-long double lnqualityChar2ShortInt(char c) {
- return log(static_cast<short>(c) - 33);
-char qualityInt2Char(short i) {
- return static_cast<char>(i + 33);
-long double ln2log10(long double prob) {
- return M_LOG10E * prob;
-long double log102ln(long double prob) {
- return M_LN10 * prob;
-long double phred2ln(int qual) {
- return M_LN10 * qual * -.1;
-long double ln2phred(long double prob) {
- return -10 * M_LOG10E * prob;
-long double phred2float(int qual) {
- return pow(10, qual * -.1);
-long double float2phred(long double prob) {
- if (prob == 1)
- return PHRED_MAX; // guards against "-0"
- long double p = -10 * (long double) log10(prob);
- if (p < 0 || p > PHRED_MAX) // int overflow guard
- return PHRED_MAX;
- else
- return p;
-long double big2phred(const BigFloat& prob) {
- return -10 * (long double) (ttmath::Log(prob, (BigFloat)10)).ToDouble();
-long double nan2zero(long double x) {
- if (x != x) {
- return 0;
- } else {
- return x;
- }
-long double powln(long double m, int n) {
- return m * n;
-// the probability that we have a completely true vector of qualities
-long double jointQuality(const std::vector<short>& quals) {
- std::vector<long double> probs;
- for (int i = 0; i<quals.size(); ++i) {
- probs.push_back(phred2float(quals[i]));
- }
- // product of probability we don't have a true event for each element
- long double prod = 1 - probs.front();
- for (int i = 1; i<probs.size(); ++i) {
- prod *= 1 - probs.at(i);
- }
- // and then invert it again to get probability of an event
- return 1 - prod;
-long double jointQuality(const std::string& qualstr) {
- long double jq = 1;
- // product of probability we don't have a true event for each element
- for (string::const_iterator q = qualstr.begin(); q != qualstr.end(); ++q) {
- jq *= 1 - phred2float(qualityChar2ShortInt(*q));
- }
- // and then invert it again to get probability of an event
- return 1 - jq;
-std::vector<short> qualities(const std::string& qualstr) {
- std::vector<short> quals;
- for (int i=0; i<qualstr.size(); i++)
- quals.push_back(qualityChar2ShortInt(qualstr.at(i)));
- return quals;
-long double sumQuality(const std::string& qualstr) {
- long double qual = 0;
- for (string::const_iterator q = qualstr.begin(); q != qualstr.end(); ++q)
- qual += qualityChar2LongDouble(*q);
- return qual;
-long double minQuality(const std::string& qualstr) {
- long double qual = 0;
- for (string::const_iterator q = qualstr.begin(); q != qualstr.end(); ++q) {
- long double nq = qualityChar2LongDouble(*q);
- if (qual == 0) {
- qual = nq;
- } else if (nq < qual) {
- qual = nq;
- }
- }
- return qual;
-short minQuality(const std::vector<short>& qualities) {
- short m = 0;
- for (vector<short>::const_iterator q = qualities.begin(); q != qualities.end(); ++q) {
- if (*q < m) m = *q;
- }
- return m;
-// crudely averages quality scores in phred space
-long double averageQuality(const std::string& qualstr) {
- long double qual = 0; //(long double) *max_element(quals.begin(), quals.end());
- for (string::const_iterator q = qualstr.begin(); q != qualstr.end(); ++q)
- qual += qualityChar2LongDouble(*q);
- return qual / qualstr.size();
-long double averageQuality(const vector<short>& qualities) {
- long double qual = 0;
- for (vector<short>::const_iterator q = qualities.begin(); q != qualities.end(); ++q) {
- qual += *q;
- }
- return qual / qualities.size();
-bool stringInVector(string item, vector<string> items) {
- for (vector<string>::iterator i = items.begin(); i != items.end(); ++i) {
- if (item == *i) {
- return true;
- }
- }
- return false;
-bool allATGC(string& s) {
- for (string::iterator c = s.begin(); c != s.end(); ++c) {
- char b = *c;
- if (b != 'A' && b != 'T' && b != 'G' && b != 'C') {
- return false;
- }
- }
- return true;
-int upper(int c) {
- return toupper((unsigned char) c);
-string uppercase(string s) {
- transform(s.begin(), s.end(), s.begin(), upper);
- return s;
-string strip(string const& str, char const* separators) {
- string::size_type const first = str.find_first_not_of(separators);
- return (first == string::npos) ? string()
- : str.substr(first, str.find_last_not_of(separators) - first + 1);
-int binomialCoefficient(int n, int k) {
- int i = 1;
- int result = n - k + i++;
- while (i <= k) {
- result *= (n - k + i) / i;
- ++i;
- }
- return result;
-// k successes in n trials with prob of success p
-long double binomialProb(int k, int n, long double p) {
- return factorial(n) / (factorial(k) * factorial(n - k)) * pow(p, k) * pow(1 - p, n - k);
-long double __binomialProbln(int k, int n, long double p) {
- return factorialln(n) - (factorialln(k) + factorialln(n - k)) + powln(log(p), k) + powln(log(1 - p), n - k);
-long double binomialCoefficientLn(int k, int n) {
- return factorialln(n) - (factorialln(k) + factorialln(n - k));
-BinomialCache binomialCache;
-long double binomialProbln(int k, int n, long double p) {
- return binomialCache.binomialProbln(k, n, p);
-long double probability(int k, int n, long double p) {
- int n = n - k;
- int m = k;
- long double q = 1 - p;
- long double temp = lgammal(m + n + 1.0);
- temp -= lgammal(n + 1.0) + lgammal(m + 1.0);
- temp += m*log(p) + n*log(q);
- return temp;
-long double poissonpln(int observed, int expected) {
- return ((log(expected) * observed) - expected) - factorialln(observed);
-long double poissonp(int observed, int expected) {
- return (double) pow((double) expected, (double) observed) * (double) pow(M_E, (double) -expected) / factorial(observed);
-// given the expected number of events is the max of a and b
-// what is the probability that we might observe less than the observed?
-long double poissonPvalLn(int a, int b) {
- int expected, observed;
- if (a > b) {
- expected = a; observed = b;
- } else {
- expected = b; observed = a;
- }
- vector<long double> probs;
- for (int i = 0; i < observed; ++i) {
- probs.push_back(poissonpln(i, expected));
- }
- return logsumexp_probs(probs);
-long double gammaln(
- long double x
- ) {
- long double cofactors[] = { 76.18009173,
- -86.50532033,
- 24.01409822,
- -1.231739516,
- 0.120858003E-2,
- -0.536382E-5 };
- long double x1 = x - 1.0;
- long double tmp = x1 + 5.5;
- tmp -= (x1 + 0.5) * log(tmp);
- long double ser = 1.0;
- for (int j=0; j<=5; j++) {
- x1 += 1.0;
- ser += cofactors[j]/x1;
- }
- long double y = (-1.0 * tmp + log(2.50662827465 * ser));
- return y;
-long double factorial(
- int n
- ) {
- if (n < 0) {
- return (long double)0.0;
- }
- else if (n == 0) {
- return (long double)1.0;
- }
- else {
- return exp(gammaln(n + 1.0));
- }
-FactorialCache factorialCache;
-long double factorialln(int n) {
- return factorialCache.factorialln(n);
-long double __factorialln(
- int n
- ) {
- if (n < 0) {
- return (long double)-1.0;
- }
- else if (n == 0) {
- return (long double)0.0;
- }
- else {
- return gammaln(n + 1.0);
- }
-long double cofactor(
- int n,
- int i
- ) {
- if ((n < 0) || (i < 0) || (n < i)) {
- return (long double)0.0;
- }
- else if (n == i) {
- return (long double)1.0;
- }
- else {
- return exp(gammaln(n + 1.0) - gammaln(i + 1.0) - gammaln(n-i + 1.0));
- }
-long double cofactorln(
- int n,
- int i
- ) {
- if ((n < 0) || (i < 0) || (n < i)) {
- return (long double)-1.0;
- }
- else if (n == i) {
- return (long double)0.0;
- }
- else {
- return gammaln(n + 1.0) - gammaln(i + 1.0) - gammaln(n-i + 1.0);
- }
-// prevent underflows by returning exp(LDBL_MIN_EXP) if exponentiation will produce an underflow
-long double safe_exp(long double ln) {
- if (ln < LDBL_MIN_EXP) { // -16381
- return LDBL_MIN; // 3.3621e-4932
- } else {
- return exp(ln);
- }
-BigFloat big_exp(long double ln) {
- BigFloat x, result;
- x.FromDouble(ln);
- result = ttmath::Exp(x);
- return result;
-// 'safe' log summation for probabilities
-long double logsumexp_probs(const vector<long double>& lnv) {
- vector<long double>::const_iterator i = lnv.begin();
- long double maxN = *i;
- ++i;
- for (; i != lnv.end(); ++i) {
- if (*i > maxN)
- maxN = *i;
- }
- BigFloat sum = 0;
- for (vector<long double>::const_iterator i = lnv.begin(); i != lnv.end(); ++i) {
- sum += big_exp(*i - maxN);
- }
- BigFloat maxNb; maxNb.FromDouble(maxN);
- BigFloat bigResult = maxNb + ttmath::Ln(sum);
- long double result;
- return bigResult.ToDouble();
-// unsafe, kept for potential future use
-long double logsumexp(const vector<long double>& lnv) {
- long double maxAbs, minN, maxN, c;
- vector<long double>::const_iterator i = lnv.begin();
- long double n = *i;
- maxAbs = n; maxN = n; minN = n;
- ++i;
- for (; i != lnv.end(); ++i) {
- n = *i;
- if (n > maxN)
- maxN = n;
- if (fabs(n) > maxAbs)
- maxAbs = fabs(n);
- if (n < minN)
- minN = n;
- }
- if (maxAbs > maxN) {
- c = minN;
- } else {
- c = maxN;
- }
- long double sum = 0;
- for (vector<long double>::const_iterator i = lnv.begin(); i != lnv.end(); ++i) {
- sum += exp(*i - c);
- }
- return c + log(sum);
-long double betaln(const vector<long double>& alphas) {
- vector<long double> gammalnAlphas;
- gammalnAlphas.resize(alphas.size());
- transform(alphas.begin(), alphas.end(), gammalnAlphas.begin(), gammaln);
- return sum(gammalnAlphas) - gammaln(sum(alphas));
-long double beta(const vector<long double>& alphas) {
- return exp(betaln(alphas));
-long double hoeffding(double successes, double trials, double prob) {
- return 0.5 * exp(-2 * pow(trials * prob - successes, 2) / trials);
-long double hoeffdingln(double successes, double trials, double prob) {
- return log(0.5) + (-2 * pow(trials * prob - successes, 2) / trials);
-// the sum of the harmonic series 1, n
-long double harmonicSum(int n) {
- long double r = 0;
- long double i = 1;
- while (i <= n) {
- r += 1 / i;
- ++i;
- }
- return r;
-bool isTransition(string& ref, string& alt) {
- if (((ref == "A" && alt == "G") || (ref == "G" && alt == "A")) ||
- ((ref == "C" && alt == "T") || (ref == "T" && alt == "C"))) {
- return true;
- } else {
- return false;
- }
-// Levenshtein Distance Algorithm: C++ Implementation
-// by Anders Sewerin Johansen
-// http://www.merriampark.com/ldcpp.htm
-int levenshteinDistance(const std::string source, const std::string target) {
- // Step 1
- const int n = source.length();
- const int m = target.length();
- if (n == 0) {
- return m;
- }
- if (m == 0) {
- return n;
- }
- // Good form to declare a TYPEDEF
- typedef std::vector< std::vector<int> > Tmatrix;
- Tmatrix matrix(n+1);
- // Size the vectors in the 2.nd dimension. Unfortunately C++ doesn't
- // allow for allocation on declaration of 2.nd dimension of vec of vec
- for (int i = 0; i <= n; i++) {
- matrix[i].resize(m+1);
- }
- // Step 2
- for (int i = 0; i <= n; i++) {
- matrix[i][0]=i;
- }
- for (int j = 0; j <= m; j++) {
- matrix[0][j]=j;
- }
- // Step 3
- for (int i = 1; i <= n; i++) {
- const char s_i = source[i-1];
- // Step 4
- for (int j = 1; j <= m; j++) {
- const char t_j = target[j-1];
- // Step 5
- int cost;
- if (s_i == t_j) {
- cost = 0;
- }
- else {
- cost = 1;
- }
- // Step 6
- const int above = matrix[i-1][j];
- const int left = matrix[i][j-1];
- const int diag = matrix[i-1][j-1];
- int cell = min( above + 1, min(left + 1, diag + cost));
- // Step 6A: Cover transposition, in addition to deletion,
- // insertion and substitution. This step is taken from:
- // Berghel, Hal ; Roach, David : "An Extension of Ukkonen's
- // Enhanced Dynamic Programming ASM Algorithm"
- // (http://www.acm.org/~hlb/publications/asm/asm.html)
- if (i>2 && j>2) {
- int trans=matrix[i-2][j-2]+1;
- if (source[i-2]!=t_j) trans++;
- if (s_i!=target[j-2]) trans++;
- if (cell>trans) cell=trans;
- }
- matrix[i][j]=cell;
- }
- }
- // Step 7
- return matrix[n][m];
-// current date string in YYYYMMDD format
-string dateStr(void) {
- time_t rawtime;
- struct tm* timeinfo;
- char buffer[80];
- time(&rawtime);
- timeinfo = localtime(&rawtime);
- strftime(buffer, 80, "%Y%m%d", timeinfo);
- return string(buffer);
-long double string2float(const string& s) {
- long double r;
- convert(s, r);
- return r;
-long double log10string2ln(const string& s) {
- long double r;
- convert(s, r);
- return log102ln(r);
-long double safedivide(long double a, long double b) {
- if (b == 0) {
- if (a == 0) {
- return 1;
- } else {
- return 0;
- }
- } else {
- return a / b;
- }
-string mergeCigar(const string& c1, const string& c2) {
- vector<pair<int, string> > cigar1 = splitCigar(c1);
- vector<pair<int, string> > cigar2 = splitCigar(c2);
- // check if the middle elements are the same
- if (cigar1.back().second == cigar2.front().second) {
- cigar1.back().first += cigar2.front().first;
- cigar2.erase(cigar2.begin());
- }
- for (vector<pair<int, string> >::iterator c = cigar2.begin(); c != cigar2.end(); ++c) {
- cigar1.push_back(*c);
- }
- return joinCigar(cigar1);
-vector<pair<int, string> > splitCigar(const string& cigarStr) {
- vector<pair<int, string> > cigar;
- string number;
- string type;
- // strings go [Number][Type] ...
- for (string::const_iterator s = cigarStr.begin(); s != cigarStr.end(); ++s) {
- char c = *s;
- if (isdigit(c)) {
- if (type.empty()) {
- number += c;
- } else {
- // signal for next token, push back the last pair, clean up
- cigar.push_back(make_pair(atoi(number.c_str()), type));
- number.clear();
- type.clear();
- number += c;
- }
- } else {
- type += c;
- }
- }
- if (!number.empty() && !type.empty()) {
- cigar.push_back(make_pair(atoi(number.c_str()), type));
- }
- return cigar;
-list<pair<int, string> > splitCigarList(const string& cigarStr) {
- list<pair<int, string> > cigar;
- string number;
- string type;
- // strings go [Number][Type] ...
- for (string::const_iterator s = cigarStr.begin(); s != cigarStr.end(); ++s) {
- char c = *s;
- if (isdigit(c)) {
- if (type.empty()) {
- number += c;
- } else {
- // signal for next token, push back the last pair, clean up
- cigar.push_back(make_pair(atoi(number.c_str()), type));
- number.clear();
- type.clear();
- number += c;
- }
- } else {
- type += c;
- }
- }
- if (!number.empty() && !type.empty()) {
- cigar.push_back(make_pair(atoi(number.c_str()), type));
- }
- return cigar;
-string joinCigar(const vector<pair<int, string> >& cigar) {
- string cigarStr;
- for (vector<pair<int, string> >::const_iterator c = cigar.begin(); c != cigar.end(); ++c) {
- if (c->first) {
- cigarStr += convert(c->first) + c->second;
- }
- }
- return cigarStr;
-string joinCigarList(const list<pair<int, string> >& cigar) {
- string cigarStr;
- for (list<pair<int, string> >::const_iterator c = cigar.begin(); c != cigar.end(); ++c) {
- cigarStr += convert(c->first) + c->second;
- }
- return cigarStr;
-bool isEmptyCigarElement(const pair<int, string>& elem) {
- return elem.first == 0;
-// string * overload
-// from http://stackoverflow.com/a/5145880
-std::string operator*(std::string const &s, size_t n)
- std::string r; // empty string
- r.reserve(n * s.size());
- for (size_t i=0; i<n; i++)
- r += s;
- return r;
-// normalize vector sum to 1
-void normalizeSumToOne(vector<long double>& v) {
- long double sum = 0;
- for (vector<long double>::iterator i = v.begin(); i != v.end(); ++i) {
- sum += *i;
- }
- for (vector<long double>::iterator i = v.begin(); i != v.end(); ++i) {
- *i /= sum;
- }
-// splits the file on '\n', adds the resulting values to v
-void addLinesFromFile(vector<string>& v, const string& f) {
- ifstream ifs;
- ifs.open(f.c_str(), ifstream::in);
- if (!ifs.is_open()) {
- cerr << "could not open " << f << endl;
- exit(1);
- }
- string line;
- while (std::getline(ifs, line)) {
- v.push_back(line);
- }
-double lf[] = {
-double factorialln(int n)
- if (n < 0)
- {
- std::stringstream os;
- os << "Invalid input argument (" << n
- << "); may not be negative";
- throw std::invalid_argument( os.str() );
- }
- else if (n > 100000-1)
- {
- const double PI = 3.141592653589793;
- double x = n + 1;
- return (x - 0.5)*log(x) - x + 0.5*log(2*PI) + 1.0/(12.0*x);
- }
- else
- {
- return lf[n];
- }
-double entropy(const string& st) {
- vector<char> stvec(st.begin(), st.end());
- set<char> alphabet(stvec.begin(), stvec.end());
- vector<double> freqs;
- for (set<char>::iterator c = alphabet.begin(); c != alphabet.end(); ++c) {
- int ctr = 0;
- for (vector<char>::iterator s = stvec.begin(); s != stvec.end(); ++s) {
- if (*s == *c) {
- ++ctr;
- }
- }
- freqs.push_back((double)ctr / (double)stvec.size());
- }
- double ent = 0;
- double ln2 = log(2);
- for (vector<double>::iterator f = freqs.begin(); f != freqs.end(); ++f) {
- ent += *f * log(*f)/ln2;
- }
- ent = -ent;
- return ent;
diff --git a/src/Utility.h b/src/Utility.h
deleted file mode 100644
index 2811afb..0000000
--- a/src/Utility.h
+++ /dev/null
@@ -1,154 +0,0 @@
-// utility functions
-#ifndef UTILITY_H
-#define UTILITY_H
-#include <cmath>
-#include <vector>
-#include <list>
-#include <string>
-#include <algorithm>
-#include <string>
-#include <float.h>
-#include <iostream>
-#include <fstream>
-#include <map>
-#include <time.h>
-#include "convert.h"
-#include "ttmath.h"
-using namespace std;
-typedef ttmath::Big<TTMATH_BITS(256), TTMATH_BITS(64)> BigFloat;
-long double factorial(int);
-short qualityChar2ShortInt(char c);
-long double qualityChar2LongDouble(char c);
-long double lnqualityChar2ShortInt(char c);
-char qualityInt2Char(short i);
-//long double phred2float(int qual);
-long double phred2ln(int qual);
-long double ln2phred(long double prob);
-long double ln2log10(long double prob);
-long double log102ln(long double prob);
-long double phred2float(int qual);
-long double float2phred(long double prob);
-long double big2phred(const BigFloat& prob);
-long double nan2zero(long double x);
-long double powln(long double m, int n);
-// here 'joint' means 'probability that we have a vector entirely composed of true bases'
-long double jointQuality(const std::vector<short>& quals);
-long double jointQuality(const std::string& qualstr);
-std::vector<short> qualities(const std::string& qualstr);
-long double sumQuality(const std::string& qualstr);
-long double minQuality(const std::string& qualstr);
-short minQuality(const std::vector<short>& qualities);
-long double averageQuality(const std::string& qualstr);
-long double averageQuality(const std::vector<short>& qualities);
-//unsigned int factorial(int n);
-bool stringInVector(string item, vector<string> items);
-int upper(int c); // helper to below, wraps toupper
-string uppercase(string s);
-bool allATGC(string& s);
-string strip(string const& str, char const* separators = " \t");
-int binomialCoefficient(int n, int k);
-long double binomialCoefficientLn(int k, int n);
-long double binomialProb(int k, int n, long double p);
-long double __binomialProbln(int k, int n, long double p);
-long double binomialProbln(int k, int n, long double p);
-long double poissonpln(int observed, int expected);
-long double poissonp(int observed, int expected);
-long double poissonPvalLn(int a, int b);
-long double gammaln( long double x);
-long double factorial( int n);
-double factorialln( int n);
-long double __factorialln( int n);
-class FactorialCache : public map<int, long double> {
- long double factorialln(int n) {
- map<int, long double>::iterator f = find(n);
- if (f == end()) {
- if (size() > MAX_FACTORIAL_CACHE_SIZE) {
- clear();
- }
- long double fln = __factorialln(n);
- insert(make_pair(n, fln));
- return fln;
- } else {
- return f->second;
- }
- }
-class BinomialCache : public map<long double, map<pair<int, int>, long double> > {
- long double binomialProbln(int k, int n, long double p) {
- map<pair<int, int>, long double>& t = (*this)[p];
- pair<int, int> kn = make_pair(k, n);
- map<pair<int, int>, long double>::iterator f = t.find(kn);
- if (f == t.end()) {
- if (t.size() > MAX_BINOMIAL_CACHE_SIZE) {
- t.clear();
- }
- long double bln = __binomialProbln(k, n, p);
- t.insert(make_pair(kn, bln));
- return bln;
- } else {
- return f->second;
- }
- }
-long double cofactor( int n, int i);
-long double cofactorln( int n, int i);
-long double harmonicSum(int n);
-long double safedivide(long double a, long double b);
-long double safe_exp(long double ln);
-BigFloat big_exp(long double ln);
-long double logsumexp_probs(const vector<long double>& lnv);
-long double logsumexp(const vector<long double>& lnv);
-long double betaln(const vector<long double>& alphas);
-long double beta(const vector<long double>& alphas);
-long double hoeffding(double successes, double trials, double prob);
-long double hoeffdingln(double successes, double trials, double prob);
-int levenshteinDistance(const std::string source, const std::string target);
-bool isTransition(string& ref, string& alt);
-string dateStr(void);
-long double string2float(const string& s);
-long double log10string2ln(const string& s);
-string mergeCigar(const string& c1, const string& c2);
-vector<pair<int, string> > splitCigar(const string& cigarStr);
-list<pair<int, string> > splitCigarList(const string& cigarStr);
-string joinCigar(const vector<pair<int, string> >& cigar);
-string joinCigarList(const list<pair<int, string> >& cigar);
-bool isEmptyCigarElement(const pair<int, string>& elem);
-std::string operator*(std::string const &s, size_t n);
-void normalizeSumToOne(vector<long double>&);
-void addLinesFromFile(vector<string>& v, const string& f);
-double entropy(const string& st);
diff --git a/src/Version.h b/src/Version.h
deleted file mode 100644
index 770bf7b..0000000
--- a/src/Version.h
+++ /dev/null
@@ -1,2 +0,0 @@
-#define FREEBAYES_VERSION "0.9.9"
-#define FREEBAYES_COMPILE_DATE "2013-01-28"
diff --git a/src/alleles.cpp b/src/alleles.cpp
deleted file mode 100644
index e09a666..0000000
--- a/src/alleles.cpp
+++ /dev/null
@@ -1,92 +0,0 @@
-// alleles.cpp
-// outputs a json-formatted stream of alleles over target regions
-// Erik Garrison <erik.garrison at bc.edu>
-// Marth Lab, Boston College
-// July 14, 2010
-// standard includes
-//#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-#include <map>
-#include <iterator>
-#include <algorithm>
-#include <cmath>
-#include <time.h>
-// private libraries
-#include "BamReader.h"
-#include "Fasta.h"
-#include "TryCatch.h"
-#include "Parameters.h"
-#include "Allele.h"
-#include "AlleleParser.h"
-#include "split.h"
-#include "multichoose.h"
-#include "multipermute.h"
-using namespace std;
-// Allele object recycling:
-// We use the Allele freelist for performance reasons. When an Allele object
-// is destroyed, it is pushed onto this freelist. When a new Allele object is
-// created, new first checks if we have a free Allele object on the freelist.
-// Because we are dynamically linked, we have to declare the freelist here,
-// although it exists as a static member of the Allele class.
-AlleleFreeList Allele::_freeList;
-int main (int argc, char *argv[]) {
- AlleleParser* parser = new AlleleParser(argc, argv);
- list<Allele*> alleles;
- map<string, vector<Allele*> > sampleGroups;
- while (parser->getNextAlleles(sampleGroups, allowedAlleleTypes)) {
- int coverage = countAlleles(sampleGroups);
- // skips 0-coverage regions
- if (coverage == 0)
- continue;
- // report in json-formatted stream
- //
- cout << "{\"sequence\":\"" << parser->currentTarget->seq << "\","
- << "\"total coverage\":" << coverage << ","
- << "\"position\":" << parser->currentPosition + 1 << "," /// XXX basing somehow is 1-off...
- << "\"reference base\":\"" << parser->currentReferenceBase << "\","
- //<< "\"raDepth\":" << parser->registeredAlleles.size() << ","
- << "\"samples\":{"; // TODO ... quality (~pSnp)
- bool suppressComma = true; // output flag
- for (map<string, vector<Allele*> >::iterator
- sample = sampleGroups.begin();
- sample != sampleGroups.end(); ++sample) {
- if (!suppressComma) { cout << ","; } else { suppressComma = false; }
- cout << "\"" << sample->first << "\":{"
- << "\"coverage\":" << sample->second.size()
- << ",\"alleles\":" << json(sample->second)
- << "}";
- }
- cout << "}}" << endl;
- }
- delete parser;
- return 0;
diff --git a/src/bamfiltertech.cpp b/src/bamfiltertech.cpp
deleted file mode 100644
index d42660c..0000000
--- a/src/bamfiltertech.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-#include <iostream>
-#include <getopt.h>
-#include <fstream>
-#include <iostream>
-#include <sstream>
-#include <signal.h>
-#include <stdlib.h>
-#include <cmath>
-#include <algorithm>
-#include <map>
-#include "BamAlignment.h"
-#include "BamReader.h"
-#include "BamWriter.h"
-#include "split.h"
-using namespace std;
-using namespace BamTools;
-int main(int argc, char** argv) {
- if (argc == 1) {
- cerr << "usage: " << argv[0] << " [technology name] [ [technology name] ... ]" << endl;
- cerr << "filters BAM file piped on stdin for reads generated by a sequencing technology listed on the command line" << endl;
- return 1;
- }
- map<string, bool> technologies;
- for (int i = 1; i < argc; ++i) {
- technologies[argv[i]] = true;
- }
- BamReader reader;
- if (!reader.Open("stdin")) {
- cerr << "Could not open stdin for reading" << endl;
- return 1;
- }
- // retrieve header information
- map<string, string> readGroupToTechnology;
- string bamHeader = reader.GetHeaderText();
- vector<string> headerLines = split(bamHeader, '\n');
- for (vector<string>::const_iterator it = headerLines.begin(); it != headerLines.end(); ++it) {
- // get next line from header, skip if empty
- string headerLine = *it;
- if ( headerLine.empty() ) { continue; }
- // lines of the header look like:
- // "@RG ID:- SM:NA11832 CN:BCM PL:454"
- // ^^^^^^^\ is our sample name
- if ( headerLine.find("@RG") == 0 ) {
- vector<string> readGroupParts = split(headerLine, "\t ");
- string tech;
- string readGroupID;
- for (vector<string>::const_iterator r = readGroupParts.begin(); r != readGroupParts.end(); ++r) {
- vector<string> nameParts = split(*r, ":");
- if (nameParts.at(0) == "PL") {
- tech = nameParts.at(1);
- } else if (nameParts.at(0) == "ID") {
- readGroupID = nameParts.at(1);
- }
- }
- if (tech.empty()) {
- cerr << " could not find PL: in @RG tag " << endl << headerLine << endl;
- return 1;
- }
- if (readGroupID.empty()) {
- cerr << " could not find ID: in @RG tag " << endl << headerLine << endl;
- return 1;
- }
- //string name = nameParts.back();
- //mergedHeader.append(1, '\n');
- //cerr << "found read group id " << readGroupID << " containing sample " << name << endl;
- readGroupToTechnology[readGroupID] = tech;
- }
- }
- // open writer, uncompressed BAM
- BamWriter writer;
- bool writeUncompressed = true;
- if ( !writer.Open("stdout", bamHeader, reader.GetReferenceData(), writeUncompressed) ) {
- cerr << "Could not open stdout for writing." << endl;
- return 1;
- }
- BamAlignment alignment;
- while (reader.GetNextAlignment(alignment)) {
- string name;
- if (alignment.GetTag("RG", name)) {
- if (technologies.find(readGroupToTechnology[name]) != technologies.end()) {
- //cout << name << " " << readGroupToTechnology[name] << endl;
- writer.SaveAlignment(alignment);
- }
- }
- }
- reader.Close();
- writer.Close();
- return 0;
diff --git a/src/bamleftalign.cpp b/src/bamleftalign.cpp
deleted file mode 100644
index be8b927..0000000
--- a/src/bamleftalign.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-#include <iostream>
-#include <getopt.h>
-#include <fstream>
-#include <iostream>
-#include <sstream>
-#include <signal.h>
-#include <stdlib.h>
-#include <cmath>
-#include <algorithm>
-#include <map>
-#include <vector>
-#include "Fasta.h"
-#include "api/BamAlignment.h"
-#include "api/BamReader.h"
-#include "api/BamWriter.h"
-//#include "IndelAllele.h"
-#include "LeftAlign.h"
-#define DEBUG(msg) \
- if (debug) { cerr << msg; }
-#define DEBUG(msg)
-using namespace std;
-using namespace BamTools;
-void printUsage(char** argv) {
- cerr << "usage: [BAM data stream] | " << argv[0] << " [options]" << endl
- << endl
- << "Left-aligns and merges the insertions and deletions in all alignments in stdin." << endl
- << "Iterates until each alignment is stable through a left-realignment step." << endl
- << endl
- << "arguments:" << endl
- << " -f --fasta-reference FILE FASTA reference file to use for realignment (required)" << endl
- << " -d --debug Print debugging information about realignment process" << endl
- << " -s --suppress-output Don't write BAM output stream (for debugging)" << endl
- << " -m --max-iterations N Iterate the left-realignment no more than this many times" << endl
- << " -c --compressed Write compressed BAM on stdout, default is uncompressed" << endl;
-int main(int argc, char** argv) {
- int c;
- FastaReference reference;
- bool has_ref = false;
- bool suppress_output = false;
- bool debug = false;
- bool isuncompressed = true;
- int maxiterations = 50;
- if (argc < 2) {
- printUsage(argv);
- exit(1);
- }
- while (true) {
- static struct option long_options[] =
- {
- {"help", no_argument, 0, 'h'},
- {"debug", no_argument, 0, 'd'},
- {"fasta-reference", required_argument, 0, 'f'},
- {"max-iterations", required_argument, 0, 'm'},
- {"suppress-output", no_argument, 0, 's'},
- {"compressed", no_argument, 0, 'c'},
- {0, 0, 0, 0}
- };
- int option_index = 0;
- c = getopt_long (argc, argv, "hdcsf:m:",
- long_options, &option_index);
- /* Detect the end of the options. */
- if (c == -1)
- break;
- switch (c) {
- case 'f':
- reference.open(optarg); // will exit on open failure
- has_ref = true;
- break;
- case 'm':
- maxiterations = atoi(optarg);
- break;
- case 'd':
- debug = true;
- break;
- case 's':
- suppress_output = true;
- break;
- case 'c':
- isuncompressed = false;
- break;
- case 'h':
- printUsage(argv);
- exit(0);
- break;
- case '?':
- printUsage(argv);
- exit(1);
- break;
- default:
- abort();
- break;
- }
- }
- if (!has_ref) {
- cerr << "no FASTA reference provided, cannot realign" << endl;
- exit(1);
- }
- BamReader reader;
- if (!reader.Open("stdin")) {
- cerr << "could not open stdin for reading" << endl;
- exit(1);
- }
- BamWriter writer;
- if (isuncompressed) {
- writer.SetCompressionMode(BamWriter::Uncompressed);
- }
- if (!suppress_output && !writer.Open("stdout", reader.GetHeaderText(), reader.GetReferenceData())) {
- cerr << "could not open stdout for writing" << endl;
- exit(1);
- }
- // store the names of all the reference sequences in the BAM file
- map<int, string> referenceIDToName;
- vector<RefData> referenceSequences = reader.GetReferenceData();
- int i = 0;
- for (RefVector::iterator r = referenceSequences.begin(); r != referenceSequences.end(); ++r) {
- referenceIDToName[i] = r->RefName;
- ++i;
- }
- BamAlignment alignment;
- while (reader.GetNextAlignment(alignment)) {
- DEBUG("--------------------------- read --------------------------" << endl);
- DEBUG("| " << referenceIDToName[alignment.RefID] << ":" << alignment.Position << endl);
- DEBUG("| " << alignment.Name << ":" << alignment.GetEndPosition() << endl);
- DEBUG("| " << alignment.Name << ":" << (alignment.IsMapped() ? " mapped" : " unmapped") << endl);
- DEBUG("| " << alignment.Name << ":" << " cigar data size: " << alignment.CigarData.size() << endl);
- DEBUG("--------------------------- realigned --------------------------" << endl);
- // skip unmapped alignments, as they cannot be left-realigned without CIGAR data
- if (alignment.IsMapped()) {
- int endpos = alignment.GetEndPosition();
- int length = endpos - alignment.Position + 1;
- if (alignment.Position >= 0 && length > 0) {
- if (!stablyLeftAlign(alignment,
- reference.getSubSequence(
- referenceIDToName[alignment.RefID],
- alignment.Position,
- length),
- maxiterations, debug)) {
- cerr << "unstable realignment of " << alignment.Name
- << " at " << referenceIDToName[alignment.RefID] << ":" << alignment.Position << endl
- << alignment.AlignedBases << endl;
- }
- }
- }
- DEBUG("----------------------------------------------------------------" << endl);
- DEBUG(endl);
- if (!suppress_output)
- writer.SaveAlignment(alignment);
- }
- reader.Close();
- if (!suppress_output)
- writer.Close();
- return 0;
diff --git a/src/convert.h b/src/convert.h
deleted file mode 100644
index 399bcea..0000000
--- a/src/convert.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#ifndef __CONVERT_H
-#define __CONVERT_H
-#include <sstream>
-// converts the string into the specified type, setting r to the converted
-// value and returning true/false on success or failure
-template<typename T>
-bool convert(const std::string& s, T& r) {
- std::istringstream iss(s);
- iss >> r;
- return iss.eof() ? true : false;
-template<typename T>
-std::string convert(const T& r) {
- std::ostringstream iss;
- iss << r;
- return iss.str();
diff --git a/src/dummy.cpp b/src/dummy.cpp
deleted file mode 100644
index 9afcab6..0000000
--- a/src/dummy.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-// alleles.cpp
-// outputs a json-formatted stream of alleles over target regions
-// Erik Garrison <erik.garrison at bc.edu>
-// Marth Lab, Boston College
-// July 14, 2010
-// standard includes
-//#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-#include <map>
-#include <iterator>
-#include <algorithm>
-#include <cmath>
-#include <time.h>
-// private libraries
-#include "BamReader.h"
-#include "Fasta.h"
-#include "TryCatch.h"
-#include "Parameters.h"
-#include "Allele.h"
-#include "AlleleParser.h"
-#include "multichoose.h"
-#include "multipermute.h"
-using namespace std;
-AlleleFreeList Allele::_freeList;
-int main (int argc, char *argv[]) {
- AlleleParser* parser = new AlleleParser(argc, argv);
- while (parser->dummyProcessNextTarget()) {
- }
- delete parser;
- return 0;
diff --git a/src/fastlz.c b/src/fastlz.c
deleted file mode 100644
index 47da271..0000000
--- a/src/fastlz.c
+++ /dev/null
@@ -1,559 +0,0 @@
- FastLZ - lightning-fast lossless compression library
- Copyright (C) 2007 Ariya Hidayat (ariya at kde.org)
- Copyright (C) 2006 Ariya Hidayat (ariya at kde.org)
- Copyright (C) 2005 Ariya Hidayat (ariya at kde.org)
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
- * Always check for bound when decompressing.
- * Generally it is best to leave it defined.
- */
-#define FASTLZ_SAFE
- * Give hints to the compiler for branch prediction optimization.
- */
-#if defined(__GNUC__) && (__GNUC__ > 2)
-#define FASTLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1))
-#define FASTLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0))
- * Use inlined functions for supported systems.
- */
-#if defined(__GNUC__) || defined(__DMC__) || defined(__POCC__) || defined(__WATCOMC__) || defined(__SUNPRO_C)
-#define FASTLZ_INLINE inline
-#elif defined(__BORLANDC__) || defined(_MSC_VER) || defined(__LCC__)
-#define FASTLZ_INLINE __inline
- * Prevent accessing more than 8-bit at once, except on x86 architectures.
- */
-#if !defined(FASTLZ_STRICT_ALIGN)
-#if defined(__i386__) || defined(__386) /* GNU C, Sun Studio */
-#elif defined(__i486__) || defined(__i586__) || defined(__i686__) /* GNU C */
-#elif defined(_M_IX86) /* Intel, MSVC */
-#elif defined(__386)
-#elif defined(_X86_) /* MinGW */
-#elif defined(__I86__) /* Digital Mars */
- * FIXME: use preprocessor magic to set this on different platforms!
- */
-typedef unsigned char flzuint8;
-typedef unsigned short flzuint16;
-typedef unsigned int flzuint32;
-/* prototypes */
-int fastlz_compress(const void* input, int length, void* output);
-int fastlz_compress_level(int level, const void* input, int length, void* output);
-int fastlz_decompress(const void* input, int length, void* output, int maxout);
-#define MAX_COPY 32
-#define MAX_LEN 264 /* 256 + 8 */
-#define MAX_DISTANCE 8192
-#if !defined(FASTLZ_STRICT_ALIGN)
-#define FASTLZ_READU16(p) *((const flzuint16*)(p))
-#define FASTLZ_READU16(p) ((p)[0] | (p)[1]<<8)
-#define HASH_LOG 13
-#define HASH_SIZE (1<< HASH_LOG)
-#define HASH_MASK (HASH_SIZE-1)
-#define HASH_FUNCTION(v,p) { v = FASTLZ_READU16(p); v ^= FASTLZ_READU16(p+1)^(v>>(16-HASH_LOG));v &= HASH_MASK; }
-#define FASTLZ_LEVEL 1
-#define FASTLZ_COMPRESSOR fastlz1_compress
-#define FASTLZ_DECOMPRESSOR fastlz1_decompress
-static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output);
-static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout);
-#include "fastlz.c"
-#define FASTLZ_LEVEL 2
-#define MAX_DISTANCE 8191
-#define FASTLZ_COMPRESSOR fastlz2_compress
-#define FASTLZ_DECOMPRESSOR fastlz2_decompress
-static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output);
-static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout);
-#include "fastlz.c"
-int fastlz_compress(const void* input, int length, void* output)
- /* for short block, choose fastlz1 */
- if(length < 65536)
- return fastlz1_compress(input, length, output);
- /* else... */
- return fastlz2_compress(input, length, output);
-int fastlz_decompress(const void* input, int length, void* output, int maxout)
- /* magic identifier for compression level */
- int level = ((*(const flzuint8*)input) >> 5) + 1;
- //printf("level: %u\n", level);
- if(level == 1)
- return fastlz1_decompress(input, length, output, maxout);
- if(level == 2)
- return fastlz2_decompress(input, length, output, maxout);
- /* unknown level, trigger error */
- return 0;
-int fastlz_compress_level(int level, const void* input, int length, void* output)
- if(level == 1)
- return fastlz1_compress(input, length, output);
- if(level == 2)
- return fastlz2_compress(input, length, output);
- return 0;
-#else /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
-static FASTLZ_INLINE int FASTLZ_COMPRESSOR(const void* input, int length, void* output)
- const flzuint8* ip = (const flzuint8*) input;
- const flzuint8* ip_bound = ip + length - 2;
- const flzuint8* ip_limit = ip + length - 12;
- flzuint8* op = (flzuint8*) output;
- const flzuint8* htab[HASH_SIZE];
- const flzuint8** hslot;
- flzuint32 hval;
- flzuint32 copy;
- /* sanity check */
- {
- if(length)
- {
- /* create literal copy only */
- *op++ = length-1;
- ip_bound++;
- while(ip <= ip_bound)
- *op++ = *ip++;
- return length+1;
- }
- else
- return 0;
- }
- /* initializes hash table */
- for (hslot = htab; hslot < htab + HASH_SIZE; hslot++)
- *hslot = ip;
- /* we start with literal copy */
- copy = 2;
- *op++ = MAX_COPY-1;
- *op++ = *ip++;
- *op++ = *ip++;
- /* main loop */
- while(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit))
- {
- const flzuint8* ref;
- flzuint32 distance;
- /* minimum match length */
- flzuint32 len = 3;
- /* comparison starting-point */
- const flzuint8* anchor = ip;
- /* check for a run */
- if(ip[0] == ip[-1] && FASTLZ_READU16(ip-1)==FASTLZ_READU16(ip+1))
- {
- distance = 1;
- ip += 3;
- ref = anchor - 1 + 3;
- goto match;
- }
- /* find potential match */
- HASH_FUNCTION(hval,ip);
- hslot = htab + hval;
- ref = htab[hval];
- /* calculate distance to the match */
- distance = anchor - ref;
- /* update hash table */
- *hslot = anchor;
- /* is this a match? check the first 3 bytes */
- if(distance==0 ||
- (distance >= MAX_DISTANCE) ||
- (distance >= MAX_FARDISTANCE) ||
- *ref++ != *ip++ || *ref++!=*ip++ || *ref++!=*ip++)
- goto literal;
- /* far, needs at least 5-byte match */
- if(distance >= MAX_DISTANCE)
- {
- if(*ip++ != *ref++ || *ip++!= *ref++)
- goto literal;
- len += 2;
- }
- match:
- /* last matched byte */
- ip = anchor + len;
- /* distance is biased */
- distance--;
- if(!distance)
- {
- /* zero distance means a run */
- flzuint8 x = ip[-1];
- while(ip < ip_bound)
- if(*ref++ != x) break; else ip++;
- }
- else
- for(;;)
- {
- /* safe because the outer check against ip limit */
- if(*ref++ != *ip++) break;
- if(*ref++ != *ip++) break;
- if(*ref++ != *ip++) break;
- if(*ref++ != *ip++) break;
- if(*ref++ != *ip++) break;
- if(*ref++ != *ip++) break;
- if(*ref++ != *ip++) break;
- if(*ref++ != *ip++) break;
- while(ip < ip_bound)
- if(*ref++ != *ip++) break;
- break;
- }
- /* if we have copied something, adjust the copy count */
- if(copy)
- /* copy is biased, '0' means 1 byte copy */
- *(op-copy-1) = copy-1;
- else
- /* back, to overwrite the copy count */
- op--;
- /* reset literal counter */
- copy = 0;
- /* length is biased, '1' means a match of 3 bytes */
- ip -= 3;
- len = ip - anchor;
- /* encode the match */
- if(distance < MAX_DISTANCE)
- {
- if(len < 7)
- {
- *op++ = (len << 5) + (distance >> 8);
- *op++ = (distance & 255);
- }
- else
- {
- *op++ = (7 << 5) + (distance >> 8);
- for(len-=7; len >= 255; len-= 255)
- *op++ = 255;
- *op++ = len;
- *op++ = (distance & 255);
- }
- }
- else
- {
- /* far away, but not yet in the another galaxy... */
- if(len < 7)
- {
- distance -= MAX_DISTANCE;
- *op++ = (len << 5) + 31;
- *op++ = 255;
- *op++ = distance >> 8;
- *op++ = distance & 255;
- }
- else
- {
- distance -= MAX_DISTANCE;
- *op++ = (7 << 5) + 31;
- for(len-=7; len >= 255; len-= 255)
- *op++ = 255;
- *op++ = len;
- *op++ = 255;
- *op++ = distance >> 8;
- *op++ = distance & 255;
- }
- }
- while(len > MAX_LEN-2)
- {
- *op++ = (7 << 5) + (distance >> 8);
- *op++ = MAX_LEN - 2 - 7 -2;
- *op++ = (distance & 255);
- len -= MAX_LEN-2;
- }
- if(len < 7)
- {
- *op++ = (len << 5) + (distance >> 8);
- *op++ = (distance & 255);
- }
- else
- {
- *op++ = (7 << 5) + (distance >> 8);
- *op++ = len - 7;
- *op++ = (distance & 255);
- }
- /* update the hash at match boundary */
- HASH_FUNCTION(hval,ip);
- htab[hval] = ip++;
- HASH_FUNCTION(hval,ip);
- htab[hval] = ip++;
- /* assuming literal copy */
- *op++ = MAX_COPY-1;
- continue;
- literal:
- *op++ = *anchor++;
- ip = anchor;
- copy++;
- {
- copy = 0;
- *op++ = MAX_COPY-1;
- }
- }
- /* left-over as literal copy */
- ip_bound++;
- while(ip <= ip_bound)
- {
- *op++ = *ip++;
- copy++;
- if(copy == MAX_COPY)
- {
- copy = 0;
- *op++ = MAX_COPY-1;
- }
- }
- /* if we have copied something, adjust the copy length */
- if(copy)
- *(op-copy-1) = copy-1;
- else
- op--;
- /* marker for fastlz2 */
- *(flzuint8*)output |= (1 << 5);
- return op - (flzuint8*)output;
-static FASTLZ_INLINE int FASTLZ_DECOMPRESSOR(const void* input, int length, void* output, int maxout)
- const flzuint8* ip = (const flzuint8*) input;
- const flzuint8* ip_limit = ip + length;
- flzuint8* op = (flzuint8*) output;
- flzuint8* op_limit = op + maxout;
- flzuint32 ctrl = (*ip++) & 31;
- int loop = 1;
- do
- {
- const flzuint8* ref = op;
- flzuint32 len = ctrl >> 5;
- flzuint32 ofs = (ctrl & 31) << 8;
- if(ctrl >= 32)
- {
- flzuint8 code;
- len--;
- ref -= ofs;
- if (len == 7-1)
- len += *ip++;
- ref -= *ip++;
- do
- {
- code = *ip++;
- len += code;
- } while (code==255);
- code = *ip++;
- ref -= code;
- /* match from 16-bit distance */
- if(FASTLZ_EXPECT_CONDITIONAL(ofs==(31 << 8)))
- {
- ofs = (*ip++) << 8;
- ofs += *ip++;
- ref = op - ofs - MAX_DISTANCE;
- }
- //printf("OSCAR 1\n");
- if (FASTLZ_UNEXPECT_CONDITIONAL(op + len + 3 > op_limit))
- return 0;
- //printf("OSCAR 2\n");
- if (FASTLZ_UNEXPECT_CONDITIONAL(ref-1 < (flzuint8 *)output))
- return 0;
- if(FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit))
- ctrl = *ip++;
- else
- loop = 0;
- if(ref == op)
- {
- /* optimize copy for a run */
- flzuint8 b = ref[-1];
- *op++ = b;
- *op++ = b;
- *op++ = b;
- for(; len; --len)
- *op++ = b;
- }
- else
- {
-#if !defined(FASTLZ_STRICT_ALIGN)
- const flzuint16* p;
- flzuint16* q;
- /* copy from reference */
- ref--;
- *op++ = *ref++;
- *op++ = *ref++;
- *op++ = *ref++;
-#if !defined(FASTLZ_STRICT_ALIGN)
- /* copy a byte, so that now it's word aligned */
- if(len & 1)
- {
- *op++ = *ref++;
- len--;
- }
- /* copy 16-bit at once */
- q = (flzuint16*) op;
- op += len;
- p = (const flzuint16*) ref;
- for(len>>=1; len > 4; len-=4)
- {
- *q++ = *p++;
- *q++ = *p++;
- *q++ = *p++;
- *q++ = *p++;
- }
- for(; len; --len)
- *q++ = *p++;
- for(; len; --len)
- *op++ = *ref++;
- }
- }
- else
- {
- ctrl++;
- //printf("OSCAR 3\n");
- if (FASTLZ_UNEXPECT_CONDITIONAL(op + ctrl > op_limit))
- return 0;
- //printf("OSCAR 4: ip: %u ctrl: %u, ip_limit: %u\n", ip, ctrl, ip_limit);
- if (FASTLZ_UNEXPECT_CONDITIONAL(ip + ctrl > ip_limit))
- return 0;
- *op++ = *ip++;
- for(--ctrl; ctrl; ctrl--)
- *op++ = *ip++;
- loop = FASTLZ_EXPECT_CONDITIONAL(ip < ip_limit);
- if(loop)
- ctrl = *ip++;
- }
- }
- //printf("OSCAR 5\n");
- return op - (flzuint8*)output;
-#endif /* !defined(FASTLZ_COMPRESSOR) && !defined(FASTLZ_DECOMPRESSOR) */
diff --git a/src/fastlz.h b/src/fastlz.h
deleted file mode 100644
index f87bc7b..0000000
--- a/src/fastlz.h
+++ /dev/null
@@ -1,100 +0,0 @@
- FastLZ - lightning-fast lossless compression library
- Copyright (C) 2007 Ariya Hidayat (ariya at kde.org)
- Copyright (C) 2006 Ariya Hidayat (ariya at kde.org)
- Copyright (C) 2005 Ariya Hidayat (ariya at kde.org)
- Permission is hereby granted, free of charge, to any person obtaining a copy
- of this software and associated documentation files (the "Software"), to deal
- in the Software without restriction, including without limitation the rights
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- copies of the Software, and to permit persons to whom the Software is
- furnished to do so, subject to the following conditions:
- The above copyright notice and this permission notice shall be included in
- all copies or substantial portions of the Software.
-#ifndef FASTLZ_H
-#define FASTLZ_H
-#define FASTLZ_VERSION 0x000100
-#define FASTLZ_VERSION_STRING "0.1.0"
-#if defined (__cplusplus)
-extern "C" {
- Compress a block of data in the input buffer and returns the size of
- compressed block. The size of input buffer is specified by length. The
- minimum input buffer size is 16.
- The output buffer must be at least 5% larger than the input buffer
- and can not be smaller than 66 bytes.
- If the input is not compressible, the return value might be larger than
- length (input buffer size).
- The input buffer and the output buffer can not overlap.
-int fastlz_compress(const void* input, int length, void* output);
- Decompress a block of compressed data and returns the size of the
- decompressed block. If error occurs, e.g. the compressed data is
- corrupted or the output buffer is not large enough, then 0 (zero)
- will be returned instead.
- The input buffer and the output buffer can not overlap.
- Decompression is memory safe and guaranteed not to write the output buffer
- more than what is specified in maxout.
- */
-int fastlz_decompress(const void* input, int length, void* output, int maxout);
- Compress a block of data in the input buffer and returns the size of
- compressed block. The size of input buffer is specified by length. The
- minimum input buffer size is 16.
- The output buffer must be at least 5% larger than the input buffer
- and can not be smaller than 66 bytes.
- If the input is not compressible, the return value might be larger than
- length (input buffer size).
- The input buffer and the output buffer can not overlap.
- Compression level can be specified in parameter level. At the moment,
- only level 1 and level 2 are supported.
- Level 1 is the fastest compression and generally useful for short data.
- Level 2 is slightly slower but it gives better compression ratio.
- Note that the compressed data, regardless of the level, can always be
- decompressed using the function fastlz_decompress above.
-int fastlz_compress_level(int level, const void* input, int length, void* output);
-#if defined (__cplusplus)
-#endif /* FASTLZ_H */
diff --git a/src/freebayes.cpp b/src/freebayes.cpp
deleted file mode 100644
index be412da..0000000
--- a/src/freebayes.cpp
+++ /dev/null
@@ -1,712 +0,0 @@
-// freebayes
-// A bayesian genetic variant detector.
-// standard includes
-//#include <cstdio>
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-#include <map>
-#include <iterator>
-#include <algorithm>
-#include <cmath>
-#include <time.h>
-#include <float.h>
-#include <stdlib.h>
-// private libraries
-#include "api/BamReader.h"
-#include "Fasta.h"
-#include "TryCatch.h"
-#include "Parameters.h"
-#include "Allele.h"
-#include "Sample.h"
-#include "AlleleParser.h"
-#include "Utility.h"
-#include "SegfaultHandler.h"
-#include "multichoose.h"
-#include "multipermute.h"
-#include "Genotype.h"
-#include "DataLikelihood.h"
-#include "Marginals.h"
-#include "ResultData.h"
-#include "Bias.h"
-#include "Contamination.h"
-#include "NonCall.h"
-// local helper debugging macros to improve code readability
-#define DEBUG(msg) \
- if (parameters.debug) { cerr << msg << endl; }
-// lower-priority messages
-#define DEBUG2(msg) \
- if (parameters.debug2) { cerr << msg << endl; }
-#define DEBUG2(msg)
-// must-see error messages
-#define ERROR(msg) \
- cerr << msg << endl;
-using namespace std;
-// todo
-// generalize the main function to take the parameters as input
-// so that we can invoke the entire algorithm on different regions
-// when requested to run in parallel
-// take the targets (or whole genome) and make small jobs
-// run the main function for each region in an omp parallel for loop
-// only do this if the --parallel flag is set > 1
-// freebayes main
-int main (int argc, char *argv[]) {
- // install segfault handler
- signal(SIGSEGV, segfaultHandler);
- AlleleParser* parser = new AlleleParser(argc, argv);
- Parameters& parameters = parser->parameters;
- list<Allele*> alleles;
- Samples samples;
- NonCalls nonCalls;
- ostream& out = *(parser->output);
- Bias observationBias;
- if (!parameters.alleleObservationBiasFile.empty()) {
- observationBias.open(parameters.alleleObservationBiasFile);
- }
- Contamination contaminationEstimates(0.5+parameters.probContamination, parameters.probContamination);
- if (!parameters.contaminationEstimateFile.empty()) {
- contaminationEstimates.open(parameters.contaminationEstimateFile);
- }
- // this can be uncommented to force operation on a specific set of genotypes
- vector<Allele> allGenotypeAlleles;
- allGenotypeAlleles.push_back(genotypeAllele(ALLELE_GENOTYPE, "A", 1));
- allGenotypeAlleles.push_back(genotypeAllele(ALLELE_GENOTYPE, "T", 1));
- allGenotypeAlleles.push_back(genotypeAllele(ALLELE_GENOTYPE, "G", 1));
- allGenotypeAlleles.push_back(genotypeAllele(ALLELE_GENOTYPE, "C", 1));
- int allowedAlleleTypes = ALLELE_REFERENCE;
- if (parameters.allowSNPs) {
- allowedAlleleTypes |= ALLELE_SNP;
- }
- if (parameters.allowIndels) {
- allowedAlleleTypes |= ALLELE_INSERTION;
- allowedAlleleTypes |= ALLELE_DELETION;
- }
- if (parameters.allowMNPs) {
- allowedAlleleTypes |= ALLELE_MNP;
- }
- if (parameters.allowComplex) {
- allowedAlleleTypes |= ALLELE_COMPLEX;
- }
- // output VCF header
- if (parameters.output == "vcf") {
- out << parser->variantCallFile.header << endl;
- }
- if (0 < parameters.maxCoverage) {
- srand(13);
- }
- Allele nullAllele = genotypeAllele(ALLELE_NULL, "N", 1, "1N");
- unsigned long total_sites = 0;
- unsigned long processed_sites = 0;
- while (parser->getNextAlleles(samples, allowedAlleleTypes)) {
- ++total_sites;
- DEBUG2("at start of main loop");
- // did we switch chromosomes or exceed our gVCF chunk size?
- // if so, we may need to output a gVCF record
- Results results;
- if (parameters.gVCFout && !nonCalls.empty() &&
- ( nonCalls.begin()->first != parser->currentSequenceName
- || (parameters.gVCFchunk &&
- nonCalls.lastPos().second - nonCalls.firstPos().second
- > parameters.gVCFchunk))) {
- vcf::Variant var(parser->variantCallFile);
- out << results.gvcf(var, nonCalls, parser) << endl;
- nonCalls.clear();
- }
- // don't process non-ATGC's in the reference
- string cb = parser->currentReferenceBaseString();
- if (cb != "A" && cb != "T" && cb != "C" && cb != "G") {
- DEBUG2("current reference base is N");
- continue;
- }
- int coverage = countAlleles(samples);
- DEBUG("position: " << parser->currentSequenceName << ":" << (long unsigned int) parser->currentPosition + 1 << " coverage: " << coverage);
- bool skip = false;
- if (!parser->hasInputVariantAllelesAtCurrentPosition()) {
- // skips 0-coverage regions
- if (coverage == 0) {
- DEBUG("no alleles left at this site after filtering");
- skip = true;
- } else if (coverage < parameters.minCoverage) {
- DEBUG("post-filtering coverage of " << coverage << " is less than --min-coverage of " << parameters.minCoverage);
- skip = true;
- } else if (parameters.onlyUseInputAlleles) {
- DEBUG("no input alleles, but using only input alleles for analysis, skipping position");
- skip = true;
- } else if (0 < parameters.maxCoverage) {
- // go through each sample
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
- string sampleName = s->first;
- Sample& sample = s->second;
- // get the coverage for this sample
- int sampleCoverage = 0;
- for (Sample::iterator sg = sample.begin(); sg != sample.end(); ++sg) {
- sampleCoverage += sg->second.size();
- }
- if (sampleCoverage <= parameters.maxCoverage) {
- skip = true;
- continue;
- }
- DEBUG("coverage " << sampleCoverage << " for sample " << sampleName << " was > " << parameters.maxCoverage << ", so we will remove " << (sampleCoverage - parameters.maxCoverage) << " genotypes");
- vector<string> genotypesToErase;
- do {
- double probRemove = (sampleCoverage - parameters.maxCoverage) / (double)sampleCoverage;
- vector<string> genotypesToErase;
- // iterate through the genotypes
- for (Sample::iterator sg = sample.begin(); sg != sample.end(); ++sg) {
- vector<Allele*> allelesToKeep;
- // iterate through each allele
- for (int alleleIndex = 0; alleleIndex < sg->second.size(); alleleIndex++) {
- // only if we have more alleles to remove
- if (parameters.maxCoverage < sampleCoverage) {
- double r = rand() / (double)RAND_MAX;
- if (r < probRemove) { // skip over this allele
- sampleCoverage--;
- continue;
- }
- }
- // keep it
- allelesToKeep.push_back(sg->second[alleleIndex]);
- }
- // re-assign the alleles to this genotype
- if (allelesToKeep.size() < sg->second.size()) {
- sg->second.assign(allelesToKeep.begin(), allelesToKeep.end());
- }
- // if no more alleles for this genotype, remove it later
- if (sg->second.empty()) {
- genotypesToErase.push_back(sg->first);
- }
- }
- // remove empty genotypes
- for (vector<string>::iterator gt = genotypesToErase.begin(); gt != genotypesToErase.end(); ++gt) {
- sample.erase(*gt);
- }
- } while (parameters.maxCoverage < sampleCoverage);
- sampleCoverage = 0;
- for (Sample::iterator sg = sample.begin(); sg != sample.end(); ++sg) {
- sampleCoverage += sg->second.size();
- }
- DEBUG("coverage for sample " << sampleName << " is now " << sampleCoverage);
- }
- // update coverage
- coverage = countAlleles(samples);
- }
- DEBUG2("coverage " << parser->currentSequenceName << ":" << parser->currentPosition << " == " << coverage);
- // establish a set of possible alternate alleles to evaluate at this location
- if (!parameters.reportMonomorphic
- && !sufficientAlternateObservations(samples, parameters.minAltCount, parameters.minAltFraction)) {
- DEBUG("insufficient alternate observations");
- skip = true;
- }
- if (parameters.reportMonomorphic) {
- DEBUG("calling at site even though there are no alternate observations");
- }
- } else {
- /*
- cerr << "has input variants at " << parser->currentSequenceName << ":" << parser->currentPosition << endl;
- vector<Allele>& inputs = parser->inputVariantAlleles[parser->currentSequenceName][parser->currentPosition];
- for (vector<Allele>::iterator a = inputs.begin(); a != inputs.end(); ++a) {
- cerr << *a << endl;
- }
- */
- }
- if (skip) {
- // record data for gVCF
- if (parameters.gVCFout) {
- nonCalls.record(parser->currentSequenceName, parser->currentPosition, samples);
- }
- // and step ahead
- continue;
- }
- // to ensure proper ordering of output stream
- vector<string> sampleListPlusRef;
- for (vector<string>::iterator s = parser->sampleList.begin(); s != parser->sampleList.end(); ++s) {
- sampleListPlusRef.push_back(*s);
- }
- if (parameters.useRefAllele) {
- sampleListPlusRef.push_back(parser->currentSequenceName);
- }
- // establish genotype alleles using input filters
- map<string, vector<Allele*> > alleleGroups;
- groupAlleles(samples, alleleGroups);
- DEBUG2("grouped alleles by equivalence");
- vector<Allele> genotypeAlleles = parser->genotypeAlleles(alleleGroups, samples, parameters.onlyUseInputAlleles);
- // always include the reference allele as a possible genotype, even when we don't include it by default
- if (!parameters.useRefAllele) {
- vector<Allele> refAlleleVector;
- refAlleleVector.push_back(genotypeAllele(ALLELE_REFERENCE, string(1, parser->currentReferenceBase), 1, "1M"));
- genotypeAlleles = alleleUnion(genotypeAlleles, refAlleleVector);
- }
- map<string, vector<Allele*> > partialObservationGroups;
- map<Allele*, set<Allele*> > partialObservationSupport;
- // build haplotype alleles matching the current longest allele (often will do nothing)
- // this will adjust genotypeAlleles if changes are made
- DEBUG("building haplotype alleles, currently there are " << genotypeAlleles.size() << " genotype alleles");
- DEBUG(genotypeAlleles);
- parser->buildHaplotypeAlleles(genotypeAlleles,
- samples,
- alleleGroups,
- partialObservationGroups,
- partialObservationSupport,
- allowedAlleleTypes);
- DEBUG("built haplotype alleles, now there are " << genotypeAlleles.size() << " genotype alleles");
- DEBUG(genotypeAlleles);
- string referenceBase = parser->currentReferenceHaplotype();
- /* for debugging
- for (Samples::iterator s = samples.begin(); s != samples.end(); ++s) {
- string sampleName = s->first;
- Sample& sample = s->second;
- cerr << sampleName << ": " << sample << endl;
- }
- */
- // re-calculate coverage, as this could change now that we've built haplotype alleles
- coverage = countAlleles(samples);
- // estimate theta using the haplotype length
- long double theta = parameters.TH * parser->lastHaplotypeLength;
- // if we have only one viable allele, we don't have evidence for variation at this site
- if (!parser->hasInputVariantAllelesAtCurrentPosition() && !parameters.reportMonomorphic && genotypeAlleles.size() <= 1 && genotypeAlleles.front().isReference()) {
- DEBUG("no alternate genotype alleles passed filters at " << parser->currentSequenceName << ":" << parser->currentPosition);
- continue;
- }
- DEBUG("genotype alleles: " << genotypeAlleles);
- // add the null genotype
- bool usingNull = false;
- if (parameters.excludeUnobservedGenotypes && genotypeAlleles.size() > 2) {
- genotypeAlleles.push_back(nullAllele);
- usingNull = true;
- }
- ++processed_sites;
- // generate possible genotypes
- // for each possible ploidy in the dataset, generate all possible genotypes
- vector<int> ploidies = parser->currentPloidies(samples);
- map<int, vector<Genotype> > genotypesByPloidy = getGenotypesByPloidy(ploidies, genotypeAlleles);
- int numCopiesOfLocus = parser->copiesOfLocus(samples);
- DEBUG2("generated all possible genotypes:");
- if (parameters.debug2) {
- for (map<int, vector<Genotype> >::iterator s = genotypesByPloidy.begin(); s != genotypesByPloidy.end(); ++s) {
- vector<Genotype>& genotypes = s->second;
- for (vector<Genotype>::iterator g = genotypes.begin(); g != genotypes.end(); ++g) {
- DEBUG2(*g);
- }
- }
- }
- // get estimated allele frequencies using sum of estimated qualities
- map<string, double> estimatedAlleleFrequencies = samples.estimatedAlleleFrequencies();
- double estimatedMaxAlleleFrequency = 0;
- double estimatedMaxAlleleCount = 0;
- double estimatedMajorFrequency = estimatedAlleleFrequencies[referenceBase];
- if (estimatedMajorFrequency < 0.5) estimatedMajorFrequency = 1-estimatedMajorFrequency;
- double estimatedMinorFrequency = 1-estimatedMajorFrequency;
- //cerr << "num copies of locus " << numCopiesOfLocus << endl;
- int estimatedMinorAllelesAtLocus = max(1, (int) ceil((double) numCopiesOfLocus * estimatedMinorFrequency));
- //cerr << "estimated minor frequency " << estimatedMinorFrequency << endl;
- //cerr << "estimated minor count " << estimatedMinorAllelesAtLocus << endl;
- map<string, vector<vector<SampleDataLikelihood> > > sampleDataLikelihoodsByPopulation;
- map<string, vector<vector<SampleDataLikelihood> > > variantSampleDataLikelihoodsByPopulation;
- map<string, vector<vector<SampleDataLikelihood> > > invariantSampleDataLikelihoodsByPopulation;
- map<string, int> inputAlleleCounts;
- int inputLikelihoodCount = 0;
- DEBUG2("calculating data likelihoods");
- calculateSampleDataLikelihoods(
- samples,
- results,
- parser,
- genotypesByPloidy,
- parameters,
- usingNull,
- observationBias,
- genotypeAlleles,
- contaminationEstimates,
- estimatedAlleleFrequencies,
- sampleDataLikelihoodsByPopulation,
- variantSampleDataLikelihoodsByPopulation,
- invariantSampleDataLikelihoodsByPopulation);
- DEBUG2("finished calculating data likelihoods");
- // if somehow we get here without any possible sample genotype likelihoods, bail out
- bool hasSampleLikelihoods = false;
- for (map<string, vector<vector<SampleDataLikelihood> > >::iterator s = sampleDataLikelihoodsByPopulation.begin();
- s != sampleDataLikelihoodsByPopulation.end(); ++s) {
- if (!s->second.empty()) {
- hasSampleLikelihoods = true;
- break;
- }
- }
- if (!hasSampleLikelihoods) {
- continue;
- }
- DEBUG2("calulating combo posteriors over " << parser->populationSamples.size() << " populations");
- // XXX
- // TODO skip these steps in the case that there is only one population?
- // we provide p(var|data), or the probability that the location has
- // variation between individuals relative to the probability that it
- // has no variation
- //
- // in other words:
- // p(var|d) = 1 - p(AA|d) - p(TT|d) - P(GG|d) - P(CC|d)
- //
- // the approach is go through all the homozygous combos
- // and then subtract this from 1... resolving p(var|d)
- BigFloat pVar = 1.0;
- BigFloat pHom = 0.0;
- long double bestComboOddsRatio = 0;
- bool bestOverallComboIsHet = false;
- GenotypeCombo bestCombo; // = NULL;
- GenotypeCombo bestGenotypeComboByMarginals;
- vector<vector<SampleDataLikelihood> > allSampleDataLikelihoods;
- DEBUG("searching genotype space");
- // resample the posterior, this time without bounds on the
- // samples we vary, ensuring that we can generate marginals for
- // all sample/genotype combinations
- //SampleDataLikelihoods marginalLikelihoods = sampleDataLikelihoods; // heavyweight copy...
- map<string, list<GenotypeCombo> > genotypeCombosByPopulation;
- int genotypingTotalIterations = 0; // tally total iterations required to reach convergence
- map<string, list<GenotypeCombo> > glMaxCombos;
- for (map<string, SampleDataLikelihoods>::iterator p = sampleDataLikelihoodsByPopulation.begin(); p != sampleDataLikelihoodsByPopulation.end(); ++p) {
- const string& population = p->first;
- SampleDataLikelihoods& sampleDataLikelihoods = p->second;
- list<GenotypeCombo>& populationGenotypeCombos = genotypeCombosByPopulation[population];
- DEBUG2("genqerating banded genotype combinations from " << sampleDataLikelihoods.size() << " sample genotypes in population " << population);
- // cap the number of iterations at 2 x the number of alternate alleles
- // max it at parameters.genotypingMaxIterations iterations, min at 10
- int itermax = min(max(10, 2 * estimatedMinorAllelesAtLocus), parameters.genotypingMaxIterations);
- //int itermax = parameters.genotypingMaxIterations;
- // passing 0 for bandwidth and banddepth means "exhaustive local search"
- // this produces properly normalized GQ's at polyallelic sites
- int adjustedBandwidth = 0;
- int adjustedBanddepth = 0;
- // however, this can lead to huge performance problems at complex sites,
- // so we implement this hack...
- if (parameters.genotypingMaxBandDepth > 0 &&
- genotypeAlleles.size() > parameters.genotypingMaxBandDepth) {
- adjustedBandwidth = 1;
- adjustedBanddepth = parameters.genotypingMaxBandDepth;
- }
- GenotypeCombo nullCombo;
- SampleDataLikelihoods nullSampleDataLikelihoods;
- // this is the genotype-likelihood maximum
- if (parameters.reportGenotypeLikelihoodMax) {
- GenotypeCombo comboKing;
- vector<int> initialPosition;
- initialPosition.assign(sampleDataLikelihoods.size(), 0);
- SampleDataLikelihoods nullDataLikelihoods; // dummy variable
- makeComboByDatalLikelihoodRank(comboKing,
- initialPosition,
- sampleDataLikelihoods,
- nullDataLikelihoods,
- inputAlleleCounts,
- theta,
- parameters.pooledDiscrete,
- parameters.ewensPriors,
- parameters.permute,
- parameters.hwePriors,
- parameters.obsBinomialPriors,
- parameters.alleleBalancePriors,
- parameters.diffusionPriorScalar);
- glMaxCombos[population].push_back(comboKing);
- }
- // search much longer for convergence
- convergentGenotypeComboSearch(
- populationGenotypeCombos,
- nullCombo,
- sampleDataLikelihoods, // vary everything
- sampleDataLikelihoods,
- nullSampleDataLikelihoods,
- samples,
- genotypeAlleles,
- inputAlleleCounts,
- adjustedBandwidth,
- adjustedBanddepth,
- theta,
- parameters.pooledDiscrete,
- parameters.ewensPriors,
- parameters.permute,
- parameters.hwePriors,
- parameters.obsBinomialPriors,
- parameters.alleleBalancePriors,
- parameters.diffusionPriorScalar,
- itermax,
- genotypingTotalIterations,
- true); // add homozygous combos
- // ^^ combo results are sorted by default
- }
- // generate the GL max combo
- GenotypeCombo glMax;
- if (parameters.reportGenotypeLikelihoodMax) {
- list<GenotypeCombo> glMaxGenotypeCombos;
- combinePopulationCombos(glMaxGenotypeCombos, glMaxCombos);
- glMax = glMaxGenotypeCombos.front();
- }
- // accumulate combos from independently-calculated populations into the list of combos
- list<GenotypeCombo> genotypeCombos; // build new combos into this list
- combinePopulationCombos(genotypeCombos, genotypeCombosByPopulation);
- // TODO factor out the following blocks as they are repeated from above
- // re-get posterior normalizer
- vector<long double> comboProbs;
- for (list<GenotypeCombo>::iterator gc = genotypeCombos.begin(); gc != genotypeCombos.end(); ++gc) {
- comboProbs.push_back(gc->posteriorProb);
- }
- long double posteriorNormalizer = logsumexp_probs(comboProbs);
- // recalculate posterior normalizer
- pVar = 1.0;
- pHom = 0.0;
- // calculates pvar and gets the best het combo
- list<GenotypeCombo>::iterator gc = genotypeCombos.begin();
- bestCombo = *gc;
- for ( ; gc != genotypeCombos.end(); ++gc) {
- if (gc->isHomozygous() && gc->alleles().front() == referenceBase) {
- pVar -= big_exp(gc->posteriorProb - posteriorNormalizer);
- pHom += big_exp(gc->posteriorProb - posteriorNormalizer);
- } else if (gc == genotypeCombos.begin()) {
- bestOverallComboIsHet = true;
- }
- }
- // odds ratio between the first and second-best combinations
- if (genotypeCombos.size() > 1) {
- bestComboOddsRatio = genotypeCombos.front().posteriorProb - (++genotypeCombos.begin())->posteriorProb;
- }
- if (parameters.calculateMarginals) {
- // make a combined, all-populations sample data likelihoods vector to accumulate marginals
- SampleDataLikelihoods allSampleDataLikelihoods;
- for (map<string, SampleDataLikelihoods>::iterator p = sampleDataLikelihoodsByPopulation.begin(); p != sampleDataLikelihoodsByPopulation.end(); ++p) {
- SampleDataLikelihoods& sdls = p->second;
- allSampleDataLikelihoods.reserve(allSampleDataLikelihoods.size() + distance(sdls.begin(), sdls.end()));
- allSampleDataLikelihoods.insert(allSampleDataLikelihoods.end(), sdls.begin(), sdls.end());
- }
- // calculate the marginal likelihoods for this population
- marginalGenotypeLikelihoods(genotypeCombos, allSampleDataLikelihoods);
- // store the marginal data likelihoods in the results, for easy parsing
- // like a vector -> map conversion...
- results.update(allSampleDataLikelihoods);
- }
- map<string, int> repeats;
- if (parameters.showReferenceRepeats) {
- repeats = parser->repeatCounts(parser->currentSequencePosition(), parser->currentSequence, 12);
- }
- vector<Allele> alts;
- if (parameters.onlyUseInputAlleles
- || parameters.reportAllHaplotypeAlleles
- || parameters.pooledContinuous) {
- //alts = genotypeAlleles;
- for (vector<Allele>::iterator a = genotypeAlleles.begin(); a != genotypeAlleles.end(); ++a) {
- if (!a->isReference()) {
- alts.push_back(*a);
- }
- }
- } else {
- // get the unique alternate alleles in this combo, sorted by frequency in the combo
- vector<pair<Allele, int> > alternates = alternateAlleles(bestCombo, referenceBase);
- for (vector<pair<Allele, int> >::iterator a = alternates.begin(); a != alternates.end(); ++a) {
- Allele& alt = a->first;
- if (!alt.isNull() && !alt.isReference())
- alts.push_back(alt);
- }
- // if there are no alternate alleles in the best combo, use the genotype alleles
- // XXX ...
- if (alts.empty()) {
- for (vector<Allele>::iterator a = genotypeAlleles.begin(); a != genotypeAlleles.end(); ++a) {
- if (!a->isReference()) {
- alts.push_back(*a);
- }
- }
- }
- }
- // reporting the GL maximum *over all alleles*
- if (parameters.reportGenotypeLikelihoodMax) {
- bestCombo = glMax;
- } else {
- // the default behavior is to report the GL maximum genotyping over the alleles in the best posterior genotyping
- // select the maximum-likelihood GL given the alternates we have
- // this is not the same thing as the GL max over all alleles!
- // it is the GL max over the selected alleles at this point
- vector<Allele> alleles = alts;
- for (vector<Allele>::iterator a = genotypeAlleles.begin(); a != genotypeAlleles.end(); ++a) {
- if (a->isReference()) {
- alleles.push_back(*a);
- }
- }
- map<string, list<GenotypeCombo> > glMaxComboBasedOnAltsByPop;
- for (map<string, SampleDataLikelihoods>::iterator p = sampleDataLikelihoodsByPopulation.begin(); p != sampleDataLikelihoodsByPopulation.end(); ++p) {
- const string& population = p->first;
- SampleDataLikelihoods& sampleDataLikelihoods = p->second;
- GenotypeCombo glMaxBasedOnAlts;
- for (SampleDataLikelihoods::iterator v = sampleDataLikelihoods.begin(); v != sampleDataLikelihoods.end(); ++v) {
- SampleDataLikelihood* m = NULL;
- for (vector<SampleDataLikelihood>::iterator d = v->begin(); d != v->end(); ++d) {
- if (d->genotype->matchesAlleles(alleles)) {
- m = &*d;
- break;
- }
- }
- assert(m != NULL);
- glMaxBasedOnAlts.push_back(m);
- }
- glMaxComboBasedOnAltsByPop[population].push_back(glMaxBasedOnAlts);
- }
- list<GenotypeCombo> glMaxBasedOnAltsGenotypeCombos; // build new combos into this list
- combinePopulationCombos(glMaxBasedOnAltsGenotypeCombos, glMaxComboBasedOnAltsByPop);
- bestCombo = glMaxBasedOnAltsGenotypeCombos.front();
- }
- DEBUG("best combo: " << bestCombo);
- // output
- if (!alts.empty() && (1 - pHom.ToDouble()) >= parameters.PVL || parameters.PVL == 0) {
- // write the last gVCF record(s)
- if (parameters.gVCFout && !nonCalls.empty()) {
- vcf::Variant var(parser->variantCallFile);
- out << results.gvcf(var, nonCalls, parser) << endl;
- nonCalls.clear();
- }
- vcf::Variant var(parser->variantCallFile);
- out << results.vcf(
- var,
- pHom,
- bestComboOddsRatio,
- samples,
- referenceBase,
- alts,
- repeats,
- genotypingTotalIterations,
- parser->sampleList,
- coverage,
- bestCombo,
- alleleGroups,
- partialObservationGroups,
- partialObservationSupport,
- genotypesByPloidy,
- parser->sequencingTechnologies,
- parser)
- << endl;
- } else if (parameters.gVCFout) {
- // record statistics for gVCF output
- nonCalls.record(parser->currentSequenceName, parser->currentPosition, samples);
- }
- DEBUG2("finished position");
- }
- // write the last gVCF record
- if (parameters.gVCFout && !nonCalls.empty()) {
- Results results;
- vcf::Variant var(parser->variantCallFile);
- out << results.gvcf(var, nonCalls, parser) << endl;
- nonCalls.clear();
- }
- DEBUG("total sites: " << total_sites << endl
- << "processed sites: " << processed_sites << endl
- << "ratio: " << (float) processed_sites / (float) total_sites);
- delete parser;
- return 0;
diff --git a/src/join.h b/src/join.h
deleted file mode 100644
index b02a947..0000000
--- a/src/join.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#ifndef __JOIN_H
-#define __JOIN_H
-// functions to split a string by a specific delimiter
-#include <string>
-#include <vector>
-#include <sstream>
-#include <string.h>
-// join a vector of elements by a delimiter object. ostream<< must be defined
-// for both class S and T and an ostream, as it is e.g. in the case of strings
-// and character arrays
-template<class S, class T>
-std::string join(std::vector<T>& elems, S& delim) {
- std::stringstream ss;
- typename std::vector<T>::iterator e = elems.begin();
- ss << *e++;
- for (; e != elems.end(); ++e) {
- ss << delim << *e;
- }
- return ss.str();
diff --git a/src/levenshtein.cpp b/src/levenshtein.cpp
deleted file mode 100644
index 2469e70..0000000
--- a/src/levenshtein.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-// Levenshtein Distance Algorithm: C++ Implementation
-// by Anders Sewerin Johansen
-// http://www.merriampark.com/ldcpp.htm
-#include <string>
-#include <vector>
-int levenshteinDistance(const std::string source, const std::string target) {
- // Step 1
- const int n = source.length();
- const int m = target.length();
- if (n == 0) {
- return m;
- }
- if (m == 0) {
- return n;
- }
- // Good form to declare a TYPEDEF
- typedef std::vector< std::vector<int> > Tmatrix;
- Tmatrix matrix(n+1);
- // Size the vectors in the 2.nd dimension. Unfortunately C++ doesn't
- // allow for allocation on declaration of 2.nd dimension of vec of vec
- for (int i = 0; i <= n; i++) {
- matrix[i].resize(m+1);
- }
- // Step 2
- for (int i = 0; i <= n; i++) {
- matrix[i][0]=i;
- }
- for (int j = 0; j <= m; j++) {
- matrix[0][j]=j;
- }
- // Step 3
- for (int i = 1; i <= n; i++) {
- const char s_i = source[i-1];
- // Step 4
- for (int j = 1; j <= m; j++) {
- const char t_j = target[j-1];
- // Step 5
- int cost;
- if (s_i == t_j) {
- cost = 0;
- }
- else {
- cost = 1;
- }
- // Step 6
- const int above = matrix[i-1][j];
- const int left = matrix[i][j-1];
- const int diag = matrix[i-1][j-1];
- const int cell = min( above + 1, min(left + 1, diag + cost));
- // Step 6A: Cover transposition, in addition to deletion,
- // insertion and substitution. This step is taken from:
- // Berghel, Hal ; Roach, David : "An Extension of Ukkonen's
- // Enhanced Dynamic Programming ASM Algorithm"
- // (http://www.acm.org/~hlb/publications/asm/asm.html)
- if (i>2 && j>2) {
- int trans=matrix[i-2][j-2]+1;
- if (source[i-2]!=t_j) trans++;
- if (s_i!=target[j-2]) trans++;
- if (cell>trans) cell=trans;
- }
- matrix[i][j]=cell;
- }
- }
- // Step 7
- return matrix[n][m];
diff --git a/src/multichoose.h b/src/multichoose.h
deleted file mode 100644
index 275610b..0000000
--- a/src/multichoose.h
+++ /dev/null
@@ -1,79 +0,0 @@
-#ifndef __MULTICHOOSE_H
-#define __MULTICHOOSE_H
-multichoose.h -- n multichoose k for generic vectors
-author: Erik Garrison <erik.garrison at bc.edu>
-last revised: 2010-04-16
-Copyright (c) 2010 by Erik Garrison
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated documentation
-files (the "Software"), to deal in the Software without
-restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the
-Software is furnished to do so, subject to the following
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-// provides multiset combinations out of the std::vector of objects
-template <class T>
-std::vector< std::vector<T> > multichoose(int k, std::vector<T>& objects) {
- std::vector< std::vector<T> > choices;
- int j,j_1,q,r;
- r = objects.size() - 1;
- // combination indexes
- std::vector<T*> a, b;
- for (int i=0;i<k;i++) {
- a.push_back(&objects[0]); b.push_back(&objects[r]);
- }
- j=k;
- while(1){
- std::vector<T> multiset;
- for(int i=0;i<k;i++)
- multiset.push_back(*a[i]);
- choices.push_back(multiset);
- j=k;
- do { j--; } while(j>=0 && a[j]==b[j]);
- if (j<0) break;
- j_1=j;
- while(j_1<=k-1){
- a[j_1]=a[j_1]+1;
- q=j_1;
- while(q<k-1) {
- a[q+1]=a[q];
- q++;
- }
- q++;
- j_1=q;
- }
- }
- return choices;
diff --git a/src/multipermute.h b/src/multipermute.h
deleted file mode 100644
index b0eeec3..0000000
--- a/src/multipermute.h
+++ /dev/null
@@ -1,185 +0,0 @@
-multipermute.h -- multiset permutations for generic vectors
-Follows 'Algorithm 1' from "Loopless Generation of Multiset Permutations using
-a Constant Number of Variables by Prefix Shifts." Aaron Williams, 2009
-author: Erik Garrison <erik.garrison at bc.edu>
-last revised: 2010-04-16
-Copyright (c) 2010 by Erik Garrison
-Permission is hereby granted, free of charge, to any person
-obtaining a copy of this software and associated documentation
-files (the "Software"), to deal in the Software without
-restriction, including without limitation the rights to use,
-copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the
-Software is furnished to do so, subject to the following
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-#include <vector>
-#include <algorithm>
-template <class T>
-class ListElement {
- T value;
- ListElement<T>* next;
- ListElement<T>() { }
- ListElement<T>(T val, ListElement<T>* n) {
- value = val;
- next = n;
- }
- ListElement<T>* nth(int n) {
- ListElement<T>* o = this;
- int i = 0;
- while (i < n && o->next != NULL) {
- o = o->next;
- ++i;
- }
- return o;
- }
- ~ListElement<T>() {
- if (next != NULL) {
- delete next;
- }
- }
-template <class T>
-ListElement<T>* list_init(std::vector<T>& multiset) {
- std::sort(multiset.begin(), multiset.end()); // ensures proper non-increasing order
- typename std::vector<T>::const_iterator item = multiset.begin();
- ListElement<T>* h = new ListElement<T>(*item, NULL);
- ++item;
- while (item != multiset.end()) {
- h = new ListElement<T>(*item, h);
- ++item;
- }
- return h;
-template <class T>
-std::vector<T> linked_list_to_vector(ListElement<T>* h) {
- ListElement<T>* o = h;
- std::vector<T> l;
- while (o != NULL) {
- l.push_back(o->value);
- o = o->next;
- }
- return l;
-// provides multiset permutations out of the std::vector multiset
-template <class T>
-std::vector< std::vector<T> > multipermute(std::vector<T>& multiset) {
- std::vector< std::vector<T> > results;
- ListElement<T>* h = list_init(multiset);
- ListElement<T>* i = h->nth(multiset.size() - 2);
- ListElement<T>* j = h->nth(multiset.size() - 1);
- ListElement<T>* s;
- ListElement<T>* t;
- results.push_back(linked_list_to_vector(h));
- while (j->next != NULL || j->value < h->value) {
- if (j->next != NULL && i->value >= j->next->value) {
- s = j;
- } else {
- s = i;
- }
- t = s->next;
- s->next = t->next;
- t->next = h;
- if (t->value < h->value) {
- i = t;
- }
- j = i->next;
- h = t;
- results.push_back(linked_list_to_vector(h));
- }
- delete h;
- return results;
-template <class T>
-class MultisetPermutations {
- std::vector<T> multiset;
- ListElement<T> *h, *i, *j, *s, *t;
- bool firstPermutation;
- MultisetPermutations(std::vector<T>& m_multiset)
- : multiset(m_multiset)
- , firstPermutation(true)
- {
- h = list_init(multiset);
- i = h->nth(multiset.size() - 2);
- j = h->nth(multiset.size() - 1);
- }
- std::vector<T> next(void) {
- if (firstPermutation) {
- firstPermutation = false;
- return linked_list_to_vector(h);
- }
- while (j->next != NULL || j->value < h->value) {
- if (j->next != NULL && i->value >= j->next->value) {
- s = j;
- } else {
- s = i;
- }
- t = s->next;
- s->next = t->next;
- t->next = h;
- if (t->value < h->value) {
- i = t;
- }
- j = i->next;
- h = t;
- return linked_list_to_vector(h);
- }
- std::vector<T> empty;
- return empty;
- }
- ~MultisetPermutations(void) {
- delete h;
- }
diff --git a/src/split.cpp b/src/split.cpp
deleted file mode 100644
index 5f1dc4e..0000000
--- a/src/split.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-#include "split.h"
-std::vector<std::string> &split(const std::string &s, char delim, std::vector<std::string> &elems) {
- std::stringstream ss(s);
- std::string item;
- while(std::getline(ss, item, delim)) {
- elems.push_back(item);
- }
- return elems;
-std::vector<std::string> split(const std::string &s, char delim) {
- std::vector<std::string> elems;
- return split(s, delim, elems);
-std::vector<std::string> &split(const std::string &s, const std::string& delims, std::vector<std::string> &elems) {
- char* tok;
- char cchars [s.size()+1];
- char* cstr = &cchars[0];
- strcpy(cstr, s.c_str());
- tok = strtok(cstr, delims.c_str());
- while (tok != NULL) {
- elems.push_back(tok);
- tok = strtok(NULL, delims.c_str());
- }
- return elems;
-std::vector<std::string> split(const std::string &s, const std::string& delims) {
- std::vector<std::string> elems;
- return split(s, delims, elems);
diff --git a/src/split.h b/src/split.h
deleted file mode 100644
index bd5525d..0000000
--- a/src/split.h
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef __SPLIT_H
-#define __SPLIT_H
-// functions to split a string by a specific delimiter
-#include <string>
-#include <vector>
-#include <sstream>
-#include <string.h>
-// thanks to Evan Teran, http://stackoverflow.com/questions/236129/how-to-split-a-string/236803#236803
-// split a string on a single delimiter character (delim)
-std::vector<std::string>& split(const std::string &s, char delim, std::vector<std::string> &elems);
-std::vector<std::string> split(const std::string &s, char delim);
-// split a string on any character found in the string of delimiters (delims)
-std::vector<std::string>& split(const std::string &s, const std::string& delims, std::vector<std::string> &elems);
-std::vector<std::string> split(const std::string &s, const std::string& delims);
diff --git a/src/version_release.txt b/src/version_release.txt
deleted file mode 100644
index 875d109..0000000
--- a/src/version_release.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file was auto-generated by running "make setversion VERSION=v1.0.0"
-# on Sat Nov 14 16:39:28 CET 2015 .
-# Please do not edit or commit this file manually.
diff --git a/test/Makefile b/test/Makefile
deleted file mode 100644
index 6285c24..0000000
--- a/test/Makefile
+++ /dev/null
@@ -1,15 +0,0 @@
-.PHONY: all clean
-all: test
-test: $(freebayes) $(vcfuniq)
- prove -v t
- cd .. && $(MAKE)
- cd ../vcflib && $(MAKE)
diff --git a/test/region-and-target-handling.t b/test/region-and-target-handling.t
deleted file mode 100644
index b99b6ee..0000000
--- a/test/region-and-target-handling.t
+++ /dev/null
@@ -1,104 +0,0 @@
-# vi: set ft=sh :
-test=$(dirname $0)
-root=$(dirname $0)/..
-source $test/test-simple-bash/lib/test-simple.bash \
- tests 8
-ref=$test/`basename $0`.ref
-alt=$test/`basename $0`.alt
-bam=$test/`basename $0`.bam
-bed=$test/`basename $0`.bed
-vcf=$test/`basename $0`.vcf
-trap 'rm -f $ref* $alt $bam* $bed $vcf' EXIT
-# 01234567 start
-# 12345678 end
-cat >$ref <<REF
-samtools faidx $ref
-cat >$alt <<ALT
-samtools view -b - >$bam <<SAM
- at HD VN:1.5 SO:coordinate
- at SQ SN:ref LN:8
-alt 0 ref 1 30 1X1=2X1=1X1=1X * 0 0 GTTAGGTT *
-samtools index $bam
-cat >$bed <<BED
-ref 0 1 first_base
-ref 2 4 third_and_fourth_base
-ref 5 6 sixth_base
-ref 7 8 eigth_base
-cat >$vcf <<VCF
-##INFO=<ID=NAME,Number=0,Type=String,Description="Test name">
-ref 1 . A G 1234 PASS NAME=first base
-ref 2 . T . 1234 PASS NAME=second base
-ref 3 . C T 1234 PASS NAME=third base
-ref 4 . G A 1234 PASS NAME=fourth base
-ref 5 . G . 1234 PASS NAME=fifth base
-ref 6 . C G 1234 PASS NAME=sixth base
-ref 7 . T . 1234 PASS NAME=seventh base
-ref 8 . A T 1234 PASS NAME=eigth base
-PS4='\n+ '
-function run_freebayes() {
- ($root/bin/freebayes "$@" \
- --haplotype-length 0 --min-alternate-count 1 \
- --min-alternate-fraction 0 --pooled-continuous --report-monomorphic \
- --ploidy 1 \
- -f $ref $bam \
- | grep -vE '^#' | cut -f1-5)
-if [[ -n $TEST_DEBUG ]]; then
- cat $ref >&2
- cat $bed >&2
- cat $vcf >&2
- vcfannotate --bed $bed --key MATCH $vcf >&2
- vcfintersect --bed $bed $vcf >&2
- bedtools intersect -a $vcf -b $bed >&2
-[[ -z $(run_freebayes --region ref:4-5 --region ref:6-7) ]]; ok $? 'ref:4-5 ref:6-7 are empty'
-[[ -z $(run_freebayes --region ref:4 --region ref:6) ]]; ok $? 'ref:4 ref:6 are empty'
-expected=`cat <<END
-ref 6 . C G
-ref 8 . A T
-[[ $(run_freebayes --region ref:5-6 --region ref:7-8) == $expected ]]; ok $? 'ref:5-6 ref:7-8'
-[[ $(run_freebayes --region ref:5 --region ref:7) == $expected ]]; ok $? 'ref:5 ref:7'
-[[ $(run_freebayes --region ref:5-) == $expected ]]; ok $? 'ref:5-'
-expected=`cat <<END
-ref 1 . A G
-ref 3 . CG TA
-ref 6 . C G
-ref 8 . A T
-[[ $(run_freebayes --targets $bed) == $expected ]]; ok $? "--targets $bed"
-[[ $(run_freebayes --region ref) == $expected ]]; ok $? "--region ref"
-[[ $(run_freebayes --region ref:1-20 2>&1) =~ "Target region coordinates (ref 1 19) outside of reference sequence bounds (ref 8)" ]]
- ok $? 'region outside of bounds error'
diff --git a/test/splice/1:883884-887618.bam b/test/splice/1:883884-887618.bam
deleted file mode 100644
index 61bdcb9..0000000
Binary files a/test/splice/1:883884-887618.bam and /dev/null differ
diff --git a/test/splice/1:883884-887618.bam.bai b/test/splice/1:883884-887618.bam.bai
deleted file mode 100644
index 09649e4..0000000
Binary files a/test/splice/1:883884-887618.bam.bai and /dev/null differ
diff --git a/test/splice/1:883884-887618.fa b/test/splice/1:883884-887618.fa
deleted file mode 100644
index 6f31855..0000000
--- a/test/splice/1:883884-887618.fa
+++ /dev/null
@@ -1,64 +0,0 @@
diff --git a/test/splice/1:883884-887618.fa.fai b/test/splice/1:883884-887618.fa.fai
deleted file mode 100644
index dc79067..0000000
--- a/test/splice/1:883884-887618.fa.fai
+++ /dev/null
@@ -1 +0,0 @@
-1 3735 3 60 61
diff --git a/test/t/01_call_variants.t b/test/t/01_call_variants.t
deleted file mode 100644
index abc25f1..0000000
--- a/test/t/01_call_variants.t
+++ /dev/null
@@ -1,110 +0,0 @@
-#!/usr/bin/env bash
-. ./bash-tap/bash-tap-bootstrap
-PATH=../bin:$PATH # for freebayes
-PATH=../scripts:$PATH # for freebayes-parallel
-PATH=../vcflib/bin:$PATH # for vcf binaries used by freebayes-parallel
-plan tests 19
-is $(echo "$(comm -12 <(cat tiny/NA12878.chr22.tiny.giab.vcf | grep -v "^#" | cut -f 2 | sort) <(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f 2 | sort) | wc -l) >= 13" | bc) 1 "variant calling recovers most of the GiAB variants in a test region"
-by_region=$((for region in \
- q:180-191 \
- q:1002-1013 \
- q:1811-1825 \
- q:1911-1922 \
- q:2344-2355 \
- q:3257-3268 \
- q:4443-4454 \
- q:5003-5014 \
- q:5074-5085 \
- q:5089-5100 \
- q:5632-5646 \
- q:6412-6423 \
- q:8840-8851 \
- q:9245-9265 \
- q:9785-9796 \
- q:10526-10537 \
- q:11255-11266 \
- q:11530-11541 \
- q:12119-12130;
- freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam -r $region | grep -v "^#"
-done) |wc -l)
-at_once=$(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l)
-is $by_region $at_once "freebayes produces the same number of calls if targeted per site or called without targets"
-cat >targets.bed <<EOF
-q 180 191
-q 1002 1013
-q 1811 1825
-q 1911 1922
-q 2344 2355
-q 3257 3268
-q 4443 4454
-q 5003 5014
-q 5074 5085
-q 5089 5100
-q 5632 5646
-q 6412 6423
-q 8840 8851
-q 9245 9265
-q 9785 9796
-q 10526 10537
-q 11255 11266
-q 11530 11541
-q 12119 12130
-is $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam -t targets.bed | grep -v "^#" | wc -l) $by_region "a targets bed file can be used with the same effect as running by region"
-#rm targets.bed
-is $(samtools view -u tiny/NA12878.chr22.tiny.bam | freebayes -f tiny/q.fa --stdin | grep -v "^#" | wc -l) \
- $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) "reading from stdin or not makes no difference"
-is $(samtools view tiny/NA12878.chr22.tiny.bam | wc -l) $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam -d 2>&1 | grep ^alignment: | wc -l) "freebayes processes all alignments in input"
-# ensure targeting works even when there are no reads
-is $(freebayes -f tiny/q.fa -@ tiny/q.vcf.gz tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) 19 "freebayes correctly handles variant input"
-# ensure that positions at which no variants exist get put in the out vcf
-is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 3 "freebayes puts required variants in output"
-is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz tiny/NA12878.chr22.tiny.bam -l | grep -v "^#" | wc -l) 3 "freebayes limits calls to input variants correctly"
-is $(freebayes -f tiny/q.fa -@ tiny/q.vcf.gz -l tiny/1read.bam | grep -v "^#" | wc -l) 20 "freebayes reports all input variants even when there is no input data"
-# check variant input with region specified
-is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 2 "freebayes handles region and variant input"
-is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 tiny/NA12878.chr22.tiny.bam -l | grep -v "^#" | wc -l) 2 "freebayes limits to variant input correctly when region is given"
-# check variant input when reading from stdin
-is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | cut -f1,2 | grep -P "(\t500$|\t11000$|\t1000$)" | wc -l) 3 "freebayes handles variant input and reading from stdin"
-is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -l --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) 3 "freebayes limits to variant input when reading from stdin"
-is $(freebayes -f tiny/q.fa -@ tiny/q_spiked.vcf.gz -r q:1-10000 -l --stdin < tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) 2 "freebayes handles region, stdin, and variant input"
-gzip -c tiny/q.fa >tiny/q.fa.gz
-cp tiny/q.fa.fai tiny/q.fa.gz.fai
-freebayes -f tiny/q.fa.gz -@ tiny/q_spiked.vcf.gz -r q:1-10000 -l - < tiny/NA12878.chr22.tiny.bam >/dev/null 2>/dev/null
-is $? 1 "freebayes bails out when given a gzipped or corrupted reference"
-rm tiny/q.fa.gz.*
-is $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) $(freebayes-parallel tiny/q.regions 2 -f tiny/q.fa tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) "running in parallel makes no difference"
-#is $(freebayes -f 'tiny/q with spaces.fa' tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) $(freebayes-parallel 'tiny/q with spaces.regions' 2 -f 'tiny/q with spaces.fa' tiny/NA12878.chr22.tiny.bam | grep -v "^#" | wc -l) "freebayes handles spaces in file names"
-is $(freebayes -f splice/1:883884-887618.fa splice/1:883884-887618.bam | grep ^1 | wc -l) 1 "freebayes can handle spliced reads"
-is $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam --gvcf | grep '<\*>' | wc -l) 20 "freebayes produces the expected number of lines of gVCF output"
-is $(freebayes -f tiny/q.fa tiny/NA12878.chr22.tiny.bam --gvcf --gvcf-chunk 50 | grep '<\*>' | wc -l) 245 "freebayes produces the expected number of lines of gVCF output"
diff --git a/test/tiny/1read.bam b/test/tiny/1read.bam
deleted file mode 100644
index f052290..0000000
Binary files a/test/tiny/1read.bam and /dev/null differ
diff --git a/test/tiny/1read.bam.bai b/test/tiny/1read.bam.bai
deleted file mode 100644
index 38bec45..0000000
Binary files a/test/tiny/1read.bam.bai and /dev/null differ
diff --git a/test/tiny/NA12878.chr22.tiny.bam b/test/tiny/NA12878.chr22.tiny.bam
deleted file mode 100644
index 1c7f605..0000000
Binary files a/test/tiny/NA12878.chr22.tiny.bam and /dev/null differ
diff --git a/test/tiny/NA12878.chr22.tiny.bam.bai b/test/tiny/NA12878.chr22.tiny.bam.bai
deleted file mode 100644
index 066e9bd..0000000
Binary files a/test/tiny/NA12878.chr22.tiny.bam.bai and /dev/null differ
diff --git a/test/tiny/NA12878.chr22.tiny.giab.vcf b/test/tiny/NA12878.chr22.tiny.giab.vcf
deleted file mode 100644
index 1b27478..0000000
--- a/test/tiny/NA12878.chr22.tiny.giab.vcf
+++ /dev/null
@@ -1,89 +0,0 @@
-##FILTER=<ID=Uncertain,Description="Uncertain genotype due to reason in filter INFO field">
-##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Total read depth summed across all datasets, excluding MQ0 reads">
-##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Net Genotype quality across all datasets, defined as difference between most likely and next most likely genotype likelihoods">
-##FORMAT=<ID=GT,Number=1,Type=String,Description="Net Genotype across all datasets">
-##FORMAT=<ID=PL,Number=G,Type=Integer,Description="Normalized, Phred-scaled likelihoods summed across all unfiltered datasets for genotypes as defined in the VCF specification">
-##INFO=<ID=allalts,Number=1,Type=Integer,Description="All ALT alleles originally considered at this position">
-##INFO=<ID=datasetcalls,Number=1,Type=Integer,Description="Number of datasets with any genotype call at this position">
-##INFO=<ID=DPSum,Number=1,Type=Integer,Description="Total read depth summed across all datasets, excluding MQ0 reads">
-##INFO=<ID=Entropy,Number=1,Type=Float,Description="Shannon entropy of variant flanking regions, 12bp on both sides">
-##INFO=<ID=filter,Number=1,Type=String,Description="Reason for filtering this genotype as uncertain">
-##INFO=<ID=geno,Number=1,Type=Integer,Description="Most probable genotype, corresponding to the minimum entry in the PL field (e.g., 1=0/0,2=0/1,3=1/1,4=0/2,etc)">
-##INFO=<ID=genoMapGood,Number=1,Type=Integer,Description="Number of datasets calling this genotype with VQSR mapping tranche <= 95">
-##INFO=<ID=HapNoVar,Number=1,Type=Integer,Description="Number of datasets for which HaplotypeCaller called a variant within 35bp and did not call a variant at this location">
-##INFO=<ID=HRun,Number=1,Type=Integer,Description="Largest Contiguous Homopolymer Run of Variant Allele In Either Direction">
-##INFO=<ID=NoCG,Number=0,Type=Flag,Description="Present if no consensus reached, so looked at all datasets except Complete Genomics since it may have a different representation of complex variants">
-##INFO=<ID=NoPLTot,Number=1,Type=Integer,Description="Number of datasets with likelihood ratio > 20 for a genotype different from the called genotype">
-##INFO=<ID=platforms,Number=1,Type=Integer,Description="Number of different platforms that called this genotype">
-##INFO=<ID=platformbias,Number=.,Type=String,Description="Names of platforms that have at more than twice as many incorrect than correct genotypes at this location, indicating platform-specific bias (ill=Illumina,sol=SOLiD,454=454,ion=Ion Torrent,cg=Complete Genomics)">
-##INFO=<ID=platformnames,Number=.,Type=String,Description="Names of platforms that called this genotype (ill=Illumina,sol=SOLiD,454=454,ion=Ion Torrent,cg=Complete Genomics)">
-##INFO=<ID=PL454WG,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~16x 454 whole genome sequencing from 1000 Genomes Project, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLCG,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~73x Complete Genomics whole genome sequencing, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLHSWEx,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~66x 2x100bp Illumina exome sequencing from Broad Institute, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLHSWG,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~68x 2x100bp Illumina whole genome sequencing from Broad Institute, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLILL250,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~50x 2x250bp Illumina PCR-free whole genome sequencing from Broad Institute, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLILLCLIA,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~80x 2x100bp Illumina whole genome sequencing from Illumina CLIA lab, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLIllPCRFree,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~56x 2x100bp Illumina PCR-free whole genome sequencing from Illumina Platinum Genomes Project, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLILLWEx,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~30x 2x54bp Illumina exome sequencing from Broad Institute, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLILLWG,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~39x 2x44bp Illumina whole genome sequencing from Broad Institute, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLIonEx,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~80x mean 237bp Ion Torrent exome sequencing from Life Technologies, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLPlatGen,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~190x 2x100bp Illumina PCR-free whole genome sequencing from Illumina Platinum Genomes Project, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLXIll,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~37x 2x100bp Illumina whole genome sequencing from X Prize, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLXPSolWGLS,Number=.,Type=String,Description="Genotype likelihoods (PL) for ~24x 50bpx35bp SOLiD whole genome sequencing from X Prize, preceded by filtering info if this dataset was not used due to evidence of bias">
-##INFO=<ID=PLminsum,Number=1,Type=Integer,Description="Net Genotype quality across all datasets, defined as difference between most likely and next most likely genotype likelihoods">
-##INFO=<ID=PLminsumOverDP,Number=1,Type=Float,Description="Net Genotype quality across all datasets, defined as difference between most likely and next most likely genotype likelihoods, divided by the depth of coverage">
-##INFO=<ID=RU,Number=1,Type=String,Description="Tandem repeat unit (bases)">
-##INFO=<ID=RPA,Number=.,Type=Integer,Description="Number of times tandem repeat unit is repeated, for each allele (including reference)">
-##INFO=<ID=TrancheABQDmin2,Number=1,Type=Float,Description="2nd lowest VQSR tranche for the called genotype for annotations associated with abnormal allele balance (AB and QD)">
-##INFO=<ID=TrancheAlignmin2,Number=1,Type=Float,Description="2nd lowest VQSR tranche for the called genotype for annotations associated with local alignment errors (distance from the end of the read and clipping)">
-##INFO=<ID=TrancheMapmin2,Number=1,Type=Float,Description="2nd lowest VQSR tranche for the called genotype for annotations associated with mapping errors (mapping quality and depth of coverage)">
-##INFO=<ID=TrancheSSEmin2,Number=1,Type=Float,Description="2nd lowest VQSR tranche for the called genotype for annotations associated with systematic sequencing errors (strand bias and neighboring base quality)">
-##INFO=<ID=varType,Number=1,Type=String,Description="Type of variant">
-##INFO=<ID=YesPLtot,Number=1,Type=Integer,Description="Number of datasets with likelihood ratio > 20 for the called genotype">
-##INFO=<ID=TYPE,Number=A,Type=String,Description="The type of allele, either snp, mnp, ins, del, or complex.">
-##INFO=<ID=LEN,Number=A,Type=Integer,Description="allele length">
-q 186 . T C 11488 PASS DPSum=824;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=353,0,389;PLCG=698,0,750;PLHSWG=1303,0,1514;PLILL250=694,0,744;PLILLCLIA=1984,0,1727;PLILLWG=652,0,578;PLIllPCRFree=749,0,1617;PLNCIIonWG=67,0,116;PLPlatGen=4041,0,4291;PLXIll=515,0,763;PLXPSolWGLS=432,0,608;PLminsum=11488;PLminsumOverDP=13.94;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=11;allalts=C;datasetcalls=11;geno=2;genoMapGood=11;platformbias=none;platformnames=ill,454,sol,cg,i [...]
-q 1008 . C T 6462 PASS DPSum=569;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=112,0,152;PLCG=471,0,330;PLHSWG=579,0,778;PLILL250=335,0,178;PLILLCLIA=1013,0,1657;PLILLWG=173,0,294;PLIllPCRFree=781,0,698;PLPlatGen=2598,0,2675;PLXIll=303,0,376;PLXPSolWGLS=97,0,57;PLminsum=6462;PLminsumOverDP=11.36;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=10;allalts=T;datasetcalls=10;geno=2;genoMapGood=10;platformbias=none;platformnames=ill,454,sol,cg;platforms=4;varType=SNP GT: [...]
-q 1817 . G A 3061 PASS DPSum=361;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=98,0,24;PLCG=125,0,138;PLHSWG=164,0,80;PLILL250=325,0,792;PLILLCLIA=316,0,212;PLILLWG=65,0,27;PLIllPCRFree=546,0,489;PLPlatGen=1422,0,2430;PLminsum=3061;PLminsumOverDP=8.48;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=8;allalts=A;datasetcalls=8;geno=2;genoMapGood=8;platformbias=none;platformnames=ill,454,cg;platforms=3;varType=SNP GT:DP:GQ:PL 0/1:361:3061:3061,0,4192
-q 1820 . C T 3594 PASS DPSum=358;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=97,0,20;PLCG=140,0,160;PLHSWG=165,0,106;PLILL250=332,0,821;PLILLCLIA=317,0,191;PLILLWG=63,0,62;PLIllPCRFree=695,0,582;PLPlatGen=1785,0,2767;PLminsum=3594;PLminsumOverDP=10.04;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=7;allalts=T;datasetcalls=8;geno=2;genoMapGood=7;platformbias=none;platformnames=ill,cg;platforms=2;varType=SNP GT:DP:GQ:PL 0/1:358:3594:3594,0,4709
-q 1917 . A G 5718 PASS DPSum=430;HRun=0;HapNoVar=0;NoPLTot=0;PLCG=175,0,339;PLHSWG=231,0,186;PLILL250=450,0,787;PLILLCLIA=697,0,510;PLILLWG=76,0,179;PLIllPCRFree=776,0,547;PLNCIIonWG=44,0,22;PLPlatGen=3171,0,2815;PLXIll=98,0,94;PLminsum=5479;PLminsumOverDP=12.74;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=9;allalts=G;datasetcalls=9;geno=2;genoMapGood=9;platformbias=none;platformnames=ill,cg,ion;platforms=3;varType=SNP GT:DP:GQ:PL 0/1:430:5479:5718,0,5479
-q 4449 . G A 11413 PASS DPSum=725;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=242,0,405;PLCG=685,0,483;PLHSWG=1176,0,998;PLILL250=967,0,487;PLILLCLIA=956,0,1221;PLILLWG=615,0,542;PLIllPCRFree=1346,0,1143;PLNCIIonWG=1,0,192;PLPlatGen=4726,0,3765;PLXIll=699,0,761;PLminsum=9997;PLminsumOverDP=13.79;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=9;allalts=A;datasetcalls=10;geno=2;genoMapGood=9;platformbias=none;platformnames=ill,454,cg;platforms=3;varType=SNP GT:DP:G [...]
-q 5009 . C T 8517 PASS DPSum=639;HRun=0;HapNoVar=0;NoPLTot=0;PL454WG=205,0,204;PLCG=744,0,686;PLHSWG=932,0,971;PLILL250=315,0,370;PLILLCLIA=774,0,943;PLILLWG=273,0,298;PLIllPCRFree=878,0,1020;PLNCIIonWG=28,0,82;PLPlatGen=3611,0,4445;PLXIll=567,0,492;PLXPSolWGLS=190,0,302;PLminsum=8517;PLminsumOverDP=13.33;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=11;allalts=T;datasetcalls=11;geno=2;genoMapGood=11;platformbias=none;platformnames=ill,454,sol,cg,ion;pla [...]
-q 6418 . G A 9700 PASS DPSum=777;HRun=1;HapNoVar=0;NoPLTot=0;PL454WG=117,0,141;PLCG=519,0,688;PLHSWG=1193,0,1365;PLILL250=760,0,399;PLILLCLIA=1473,0,1473;PLILLWG=477,0,680;PLIllPCRFree=743,0,1372;PLPlatGen=3923,0,3639;PLXIll=382,0,826;PLXPSolWGLS=113,0,198;PLminsum=9700;PLminsumOverDP=12.48;RPA=16,17;RU=A;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=10;allalts=A;datasetcalls=10;geno=2;genoMapGood=10;platformbias=none;platformnames=ill,454,sol,cg;platfor [...]
-q 8846 . T C 9797 PASS DPSum=727;HRun=1;HapNoVar=0;NoPLTot=0;PLCG=470,0,525;PLHSWG=1228,0,1675;PLILL250=516,0,649;PLILLCLIA=1506,0,1304;PLILLWG=541,0,453;PLIllPCRFree=1022,0,801;PLNCIIonWG=36,0,132;PLPlatGen=3724,0,4719;PLXIll=623,0,669;PLXPSolWGLS=131,0,408;PLminsum=9797;PLminsumOverDP=13.48;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=10;allalts=C;datasetcalls=10;geno=2;genoMapGood=10;platformbias=none;platformnames=ill,sol,cg,ion;platforms=4;varType= [...]
-q 9791 . A C 10272 PASS DPSum=777;HRun=1;HapNoVar=0;NoPLTot=0;PL454WG=239,0,213;PLCG=839,0,825;PLHSWG=1051,0,1286;PLILL250=337,0,490;PLILLCLIA=1591,0,1298;PLILLWG=458,0,844;PLIllPCRFree=716,0,1064;PLNCIIonWG=254,0,57;PLPlatGen=3778,0,4302;PLXIll=513,0,812;PLXPSolWGLS=496,0,558;PLminsum=10272;PLminsumOverDP=13.22;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=11;allalts=C;datasetcalls=11;geno=2;genoMapGood=11;platformbias=none;platformnames=ill,454,sol,cg, [...]
-q 10532 . C A 8913 PASS DPSum=705;HRun=2;HapNoVar=0;NoPLTot=0;PLCG=653,0,743;PLHSWG=1626,0,1610;PLILL250=501,0,624;PLILLCLIA=896,0,1250;PLILLWG=320,0,420;PLIllPCRFree=908,0,1146;PLPlatGen=3625,0,4125;PLXIll=384,0,581;PLminsum=8913;PLminsumOverDP=12.64;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=8;allalts=A;datasetcalls=8;geno=2;genoMapGood=8;platformbias=none;platformnames=ill,cg;platforms=2;varType=SNP GT:DP:GQ:PL 0/1:705:8913:8913,0,10499
-q 11261 . T C 9848 PASS DPSum=709;HRun=1;HapNoVar=0;NoPLTot=0;PL454WG=106,0,86;PLCG=471,0,482;PLHSWG=1029,0,914;PLILL250=632,0,656;PLILLCLIA=1189,0,1776;PLILLWG=315,0,760;PLIllPCRFree=987,0,701;PLNCIIonWG=91,0,94;PLPlatGen=4150,0,4008;PLXIll=829,0,904;PLXPSolWGLS=49,0,135;PLminsum=9848;PLminsumOverDP=13.89;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=11;allalts=C;datasetcalls=11;geno=2;genoMapGood=11;platformbias=none;platformnames=ill,454,sol,cg,ion;pl [...]
-q 11536 . T C 11666 PASS DPSum=841;HRun=1;HapNoVar=0;NoPLTot=0;PL454WG=172,0,410;PLCG=652,0,960;PLHSWEx=31,0,32;PLHSWG=1156,0,1240;PLILL250=818,0,685;PLILLCLIA=1490,0,1870;PLILLWG=807,0,626;PLIllPCRFree=1042,0,1136;PLNCIIonWG=303,0,116;PLPlatGen=4105,0,3971;PLXIll=795,0,850;PLXPSolWGLS=295,0,414;PLminsum=11666;PLminsumOverDP=13.87;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=12;allalts=C;datasetcalls=12;geno=2;genoMapGood=12;platformbias=none;platformna [...]
-q 12125 . T C 11323 PASS DPSum=806;HRun=1;HapNoVar=0;NoPLTot=0;PL454WG=254,0,312;PLCG=512,0,595;PLHSWG=1073,0,1401;PLILL250=603,0,978;PLILLCLIA=1833,0,1747;PLILLWG=597,0,727;PLIllPCRFree=1219,0,658;PLNCIIonWG=256,0,148;PLPlatGen=4169,0,3563;PLXIll=641,0,783;PLXPSolWGLS=166,0,106;PLminsum=11018;PLminsumOverDP=13.67;TrancheABQDmin2=0;TrancheAlignmin2=0;TrancheMapmin2=0;TrancheSSEmin2=0;YesPLtot=11;allalts=C;datasetcalls=11;geno=2;genoMapGood=11;platformbias=none;platformnames=ill,454,sol,c [...]
diff --git a/test/tiny/q with spaces.fa b/test/tiny/q with spaces.fa
deleted file mode 100644
index 085a809..0000000
--- a/test/tiny/q with spaces.fa
+++ /dev/null
@@ -1,207 +0,0 @@
diff --git a/test/tiny/q with spaces.fa.fai b/test/tiny/q with spaces.fa.fai
deleted file mode 100644
index dd7673b..0000000
--- a/test/tiny/q with spaces.fa.fai
+++ /dev/null
@@ -1 +0,0 @@
-q 12356 3 60 61
diff --git a/test/tiny/q with spaces.regions b/test/tiny/q with spaces.regions
deleted file mode 100644
index d129bcc..0000000
--- a/test/tiny/q with spaces.regions
+++ /dev/null
@@ -1,3 +0,0 @@
diff --git a/test/tiny/q.fa b/test/tiny/q.fa
deleted file mode 100644
index 085a809..0000000
--- a/test/tiny/q.fa
+++ /dev/null
@@ -1,207 +0,0 @@
diff --git a/test/tiny/q.fa.fai b/test/tiny/q.fa.fai
deleted file mode 100644
index dd7673b..0000000
--- a/test/tiny/q.fa.fai
+++ /dev/null
@@ -1 +0,0 @@
-q 12356 3 60 61
diff --git a/test/tiny/q.regions b/test/tiny/q.regions
deleted file mode 100644
index d129bcc..0000000
--- a/test/tiny/q.regions
+++ /dev/null
@@ -1,3 +0,0 @@
diff --git a/test/tiny/q.vcf.gz b/test/tiny/q.vcf.gz
deleted file mode 100644
index 87d557e..0000000
Binary files a/test/tiny/q.vcf.gz and /dev/null differ
diff --git a/test/tiny/q.vcf.gz.tbi b/test/tiny/q.vcf.gz.tbi
deleted file mode 100644
index 0781014..0000000
Binary files a/test/tiny/q.vcf.gz.tbi and /dev/null differ
diff --git a/test/tiny/q_spiked.vcf.gz b/test/tiny/q_spiked.vcf.gz
deleted file mode 100644
index 16f3ac4..0000000
Binary files a/test/tiny/q_spiked.vcf.gz and /dev/null differ
diff --git a/test/tiny/q_spiked.vcf.gz.tbi b/test/tiny/q_spiked.vcf.gz.tbi
deleted file mode 100644
index 0f0f0da..0000000
Binary files a/test/tiny/q_spiked.vcf.gz.tbi and /dev/null differ
diff --git a/ttmath/COPYRIGHT b/ttmath/COPYRIGHT
deleted file mode 100644
index 15bec4e..0000000
--- a/ttmath/COPYRIGHT
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright (c) 2006-2012, Tomasz Sowa
-All rights reserved.
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are met:
- * Redistributions of source code must retain the above copyright notice,
- this list of conditions and the following disclaimer.
- * Redistributions in binary form must reproduce the above copyright
- notice, this list of conditions and the following disclaimer in the
- documentation and/or other materials provided with the distribution.
- * Neither the name Tomasz Sowa nor the names of contributors to this
- project may be used to endorse or promote products derived
- from this software without specific prior written permission.
diff --git a/ttmath/ttmath.h b/ttmath/ttmath.h
deleted file mode 100644
index ee40e6e..0000000
--- a/ttmath/ttmath.h
+++ /dev/null
@@ -1,2853 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2012, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathmathtt
-#define headerfilettmathmathtt
- \file ttmath.h
- \brief Mathematics functions.
-#ifdef _MSC_VER
-//warning C4127: conditional expression is constant
-#pragma warning( disable: 4127 )
-//warning C4702: unreachable code
-#pragma warning( disable: 4702 )
-//warning C4800: forcing value to bool 'true' or 'false' (performance warning)
-#pragma warning( disable: 4800 )
-#include "ttmathbig.h"
-#include "ttmathobjects.h"
-namespace ttmath
- /*
- *
- * functions defined here are used only with Big<> types
- *
- *
- */
- /*
- *
- * functions for rounding
- *
- *
- */
- /*!
- this function skips the fraction from x
- e.g 2.2 = 2
- 2.7 = 2
- -2.2 = 2
- -2.7 = 2
- */
- template<class ValueType>
- ValueType SkipFraction(const ValueType & x)
- {
- ValueType result( x );
- result.SkipFraction();
- return result;
- }
- /*!
- this function rounds to the nearest integer value
- e.g 2.2 = 2
- 2.7 = 3
- -2.2 = -2
- -2.7 = -3
- */
- template<class ValueType>
- ValueType Round(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType result( x );
- uint c = result.Round();
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- this function returns a value representing the smallest integer
- that is greater than or equal to x
- Ceil(-3.7) = -3
- Ceil(-3.1) = -3
- Ceil(-3.0) = -3
- Ceil(4.0) = 4
- Ceil(4.2) = 5
- Ceil(4.8) = 5
- */
- template<class ValueType>
- ValueType Ceil(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType result(x);
- uint c = 0;
- result.SkipFraction();
- if( result != x )
- {
- // x is with fraction
- // if x is negative we don't have to do anything
- if( !x.IsSign() )
- {
- ValueType one;
- one.SetOne();
- c += result.Add(one);
- }
- }
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- this function returns a value representing the largest integer
- that is less than or equal to x
- Floor(-3.6) = -4
- Floor(-3.1) = -4
- Floor(-3) = -3
- Floor(2) = 2
- Floor(2.3) = 2
- Floor(2.8) = 2
- */
- template<class ValueType>
- ValueType Floor(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType result(x);
- uint c = 0;
- result.SkipFraction();
- if( result != x )
- {
- // x is with fraction
- // if x is positive we don't have to do anything
- if( x.IsSign() )
- {
- ValueType one;
- one.SetOne();
- c += result.Sub(one);
- }
- }
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*
- *
- * logarithms and the exponent
- *
- *
- */
- /*!
- this function calculates the natural logarithm (logarithm with the base 'e')
- */
- template<class ValueType>
- ValueType Ln(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType result;
- uint state = result.Ln(x);
- if( err )
- {
- switch( state )
- {
- case 0:
- *err = err_ok;
- break;
- case 1:
- *err = err_overflow;
- break;
- case 2:
- *err = err_improper_argument;
- break;
- default:
- *err = err_internal_error;
- break;
- }
- }
- return result;
- }
- /*!
- this function calculates the logarithm
- */
- template<class ValueType>
- ValueType Log(const ValueType & x, const ValueType & base, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err ) *err = err_improper_argument;
- return x;
- }
- if( base.IsNan() )
- {
- if( err ) *err = err_improper_argument;
- return base;
- }
- ValueType result;
- uint state = result.Log(x, base);
- if( err )
- {
- switch( state )
- {
- case 0:
- *err = err_ok;
- break;
- case 1:
- *err = err_overflow;
- break;
- case 2:
- case 3:
- *err = err_improper_argument;
- break;
- default:
- *err = err_internal_error;
- break;
- }
- }
- return result;
- }
- /*!
- this function calculates the expression e^x
- */
- template<class ValueType>
- ValueType Exp(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType result;
- uint c = result.Exp(x);
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- *
- * trigonometric functions
- *
- */
- /*
- this namespace consists of auxiliary functions
- (something like 'private' in a class)
- */
- namespace auxiliaryfunctions
- {
- /*!
- an auxiliary function for calculating the Sine
- (you don't have to call this function)
- */
- template<class ValueType>
- uint PrepareSin(ValueType & x, bool & change_sign)
- {
- ValueType temp;
- change_sign = false;
- if( x.IsSign() )
- {
- // we're using the formula 'sin(-x) = -sin(x)'
- change_sign = !change_sign;
- x.ChangeSign();
- }
- // we're reducing the period 2*PI
- // (for big values there'll always be zero)
- temp.Set2Pi();
- if( x.Mod(temp) )
- return 1;
- // we're setting 'x' as being in the range of <0, 0.5PI>
- temp.SetPi();
- if( x > temp )
- {
- // x is in (pi, 2*pi>
- x.Sub( temp );
- change_sign = !change_sign;
- }
- temp.Set05Pi();
- if( x > temp )
- {
- // x is in (0.5pi, pi>
- x.Sub( temp );
- x = temp - x;
- }
- return 0;
- }
- /*!
- an auxiliary function for calculating the Sine
- (you don't have to call this function)
- it returns Sin(x) where 'x' is from <0, PI/2>
- we're calculating the Sin with using Taylor series in zero or PI/2
- (depending on which point of these two points is nearer to the 'x')
- Taylor series:
- sin(x) = sin(a) + cos(a)*(x-a)/(1!)
- - sin(a)*((x-a)^2)/(2!) - cos(a)*((x-a)^3)/(3!)
- + sin(a)*((x-a)^4)/(4!) + ...
- when a=0 it'll be:
- sin(x) = (x)/(1!) - (x^3)/(3!) + (x^5)/(5!) - (x^7)/(7!) + (x^9)/(9!) ...
- and when a=PI/2:
- sin(x) = 1 - ((x-PI/2)^2)/(2!) + ((x-PI/2)^4)/(4!) - ((x-PI/2)^6)/(6!) ...
- */
- template<class ValueType>
- ValueType Sin0pi05(const ValueType & x)
- {
- ValueType result;
- ValueType numerator, denominator;
- ValueType d_numerator, d_denominator;
- ValueType one, temp, old_result;
- // temp = pi/4
- temp.Set05Pi();
- temp.exponent.SubOne();
- one.SetOne();
- if( x < temp )
- {
- // we're using the Taylor series with a=0
- result = x;
- numerator = x;
- denominator = one;
- // d_numerator = x^2
- d_numerator = x;
- d_numerator.Mul(x);
- d_denominator = 2;
- }
- else
- {
- // we're using the Taylor series with a=PI/2
- result = one;
- numerator = one;
- denominator = one;
- // d_numerator = (x-pi/2)^2
- ValueType pi05;
- pi05.Set05Pi();
- temp = x;
- temp.Sub( pi05 );
- d_numerator = temp;
- d_numerator.Mul( temp );
- d_denominator = one;
- }
- uint c = 0;
- bool addition = false;
- old_result = result;
- for(uint i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i)
- {
- // we're starting from a second part of the formula
- c += numerator. Mul( d_numerator );
- c += denominator. Mul( d_denominator );
- c += d_denominator.Add( one );
- c += denominator. Mul( d_denominator );
- c += d_denominator.Add( one );
- temp = numerator;
- c += temp.Div(denominator);
- if( c )
- // Sin is from <-1,1> and cannot make an overflow
- // but the carry can be from the Taylor series
- // (then we only break our calculations)
- break;
- if( addition )
- result.Add( temp );
- else
- result.Sub( temp );
- addition = !addition;
- // we're testing whether the result has changed after adding
- // the next part of the Taylor formula, if not we end the loop
- // (it means 'x' is zero or 'x' is PI/2 or this part of the formula
- // is too small)
- if( result == old_result )
- break;
- old_result = result;
- }
- return result;
- }
- } // namespace auxiliaryfunctions
- /*!
- this function calculates the Sine
- */
- template<class ValueType>
- ValueType Sin(ValueType x, ErrorCode * err = 0)
- {
- using namespace auxiliaryfunctions;
- ValueType one, result;
- bool change_sign;
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x;
- }
- if( err )
- *err = err_ok;
- if( PrepareSin( x, change_sign ) )
- {
- // x is too big, we cannnot reduce the 2*PI period
- // prior to version 0.8.5 the result was zero
- // result has NaN flag set by default
- if( err )
- *err = err_overflow; // maybe another error code? err_improper_argument?
- return result; // NaN is set by default
- }
- result = Sin0pi05( x );
- one.SetOne();
- // after calculations there can be small distortions in the result
- if( result > one )
- result = one;
- else
- if( result.IsSign() )
- // we've calculated the sin from <0, pi/2> and the result
- // should be positive
- result.SetZero();
- if( change_sign )
- result.ChangeSign();
- return result;
- }
- /*!
- this function calulates the Cosine
- we're using the formula cos(x) = sin(x + PI/2)
- */
- template<class ValueType>
- ValueType Cos(ValueType x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType pi05;
- pi05.Set05Pi();
- uint c = x.Add( pi05 );
- if( c )
- {
- if( err )
- *err = err_overflow;
- return ValueType(); // result is undefined (NaN is set by default)
- }
- return Sin(x, err);
- }
- /*!
- this function calulates the Tangent
- we're using the formula tan(x) = sin(x) / cos(x)
- it takes more time than calculating the Tan directly
- from for example Taylor series but should be a bit preciser
- because Tan receives its values from -infinity to +infinity
- and when we calculate it from any series then we can make
- a greater mistake than calculating 'sin/cos'
- */
- template<class ValueType>
- ValueType Tan(const ValueType & x, ErrorCode * err = 0)
- {
- ValueType result = Cos(x, err);
- if( err && *err != err_ok )
- return result;
- if( result.IsZero() )
- {
- if( err )
- *err = err_improper_argument;
- result.SetNan();
- return result;
- }
- return Sin(x, err) / result;
- }
- /*!
- this function calulates the Tangent
- look at the description of Tan(...)
- (the abbreviation of Tangent can be 'tg' as well)
- */
- template<class ValueType>
- ValueType Tg(const ValueType & x, ErrorCode * err = 0)
- {
- return Tan(x, err);
- }
- /*!
- this function calulates the Cotangent
- we're using the formula tan(x) = cos(x) / sin(x)
- (why do we make it in this way?
- look at information in Tan() function)
- */
- template<class ValueType>
- ValueType Cot(const ValueType & x, ErrorCode * err = 0)
- {
- ValueType result = Sin(x, err);
- if( err && *err != err_ok )
- return result;
- if( result.IsZero() )
- {
- if( err )
- *err = err_improper_argument;
- result.SetNan();
- return result;
- }
- return Cos(x, err) / result;
- }
- /*!
- this function calulates the Cotangent
- look at the description of Cot(...)
- (the abbreviation of Cotangent can be 'ctg' as well)
- */
- template<class ValueType>
- ValueType Ctg(const ValueType & x, ErrorCode * err = 0)
- {
- return Cot(x, err);
- }
- /*
- *
- * inverse trigonometric functions
- *
- *
- */
- namespace auxiliaryfunctions
- {
- /*!
- an auxiliary function for calculating the Arc Sine
- we're calculating asin from the following formula:
- asin(x) = x + (1*x^3)/(2*3) + (1*3*x^5)/(2*4*5) + (1*3*5*x^7)/(2*4*6*7) + ...
- where abs(x) <= 1
- we're using this formula when x is from <0, 1/2>
- */
- template<class ValueType>
- ValueType ASin_0(const ValueType & x)
- {
- ValueType nominator, denominator, nominator_add, nominator_x, denominator_add, denominator_x;
- ValueType two, result(x), x2(x);
- ValueType nominator_temp, denominator_temp, old_result = result;
- uint c = 0;
- x2.Mul(x);
- two = 2;
- nominator.SetOne();
- denominator = two;
- nominator_add = nominator;
- denominator_add = denominator;
- nominator_x = x;
- denominator_x = 3;
- for(uint i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i)
- {
- c += nominator_x.Mul(x2);
- nominator_temp = nominator_x;
- c += nominator_temp.Mul(nominator);
- denominator_temp = denominator;
- c += denominator_temp.Mul(denominator_x);
- c += nominator_temp.Div(denominator_temp);
- // if there is a carry somewhere we only break the calculating
- // the result should be ok -- it's from <-pi/2, pi/2>
- if( c )
- break;
- result.Add(nominator_temp);
- if( result == old_result )
- // there's no sense to calculate more
- break;
- old_result = result;
- c += nominator_add.Add(two);
- c += denominator_add.Add(two);
- c += nominator.Mul(nominator_add);
- c += denominator.Mul(denominator_add);
- c += denominator_x.Add(two);
- }
- return result;
- }
- /*!
- an auxiliary function for calculating the Arc Sine
- we're calculating asin from the following formula:
- asin(x) = pi/2 - sqrt(2)*sqrt(1-x) * asin_temp
- asin_temp = 1 + (1*(1-x))/((2*3)*(2)) + (1*3*(1-x)^2)/((2*4*5)*(4)) + (1*3*5*(1-x)^3)/((2*4*6*7)*(8)) + ...
- where abs(x) <= 1
- we're using this formula when x is from (1/2, 1>
- */
- template<class ValueType>
- ValueType ASin_1(const ValueType & x)
- {
- ValueType nominator, denominator, nominator_add, nominator_x, nominator_x_add, denominator_add, denominator_x;
- ValueType denominator2;
- ValueType one, two, result;
- ValueType nominator_temp, denominator_temp, old_result;
- uint c = 0;
- two = 2;
- one.SetOne();
- nominator = one;
- result = one;
- old_result = result;
- denominator = two;
- nominator_add = nominator;
- denominator_add = denominator;
- nominator_x = one;
- nominator_x.Sub(x);
- nominator_x_add = nominator_x;
- denominator_x = 3;
- denominator2 = two;
- for(uint i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i)
- {
- nominator_temp = nominator_x;
- c += nominator_temp.Mul(nominator);
- denominator_temp = denominator;
- c += denominator_temp.Mul(denominator_x);
- c += denominator_temp.Mul(denominator2);
- c += nominator_temp.Div(denominator_temp);
- // if there is a carry somewhere we only break the calculating
- // the result should be ok -- it's from <-pi/2, pi/2>
- if( c )
- break;
- result.Add(nominator_temp);
- if( result == old_result )
- // there's no sense to calculate more
- break;
- old_result = result;
- c += nominator_x.Mul(nominator_x_add);
- c += nominator_add.Add(two);
- c += denominator_add.Add(two);
- c += nominator.Mul(nominator_add);
- c += denominator.Mul(denominator_add);
- c += denominator_x.Add(two);
- c += denominator2.Mul(two);
- }
- nominator_x_add.exponent.AddOne(); // *2
- one.exponent.SubOne(); // =0.5
- nominator_x_add.Pow(one); // =sqrt(nominator_x_add)
- result.Mul(nominator_x_add);
- one.Set05Pi();
- one.Sub(result);
- return one;
- }
- } // namespace auxiliaryfunctions
- /*!
- this function calculates the Arc Sine
- x is from <-1,1>
- */
- template<class ValueType>
- ValueType ASin(ValueType x, ErrorCode * err = 0)
- {
- using namespace auxiliaryfunctions;
- ValueType result, one;
- one.SetOne();
- bool change_sign = false;
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x;
- }
- if( x.GreaterWithoutSignThan(one) )
- {
- if( err )
- *err = err_improper_argument;
- return result; // NaN is set by default
- }
- if( x.IsSign() )
- {
- change_sign = true;
- x.Abs();
- }
- one.exponent.SubOne(); // =0.5
- // asin(-x) = -asin(x)
- if( x.GreaterWithoutSignThan(one) )
- result = ASin_1(x);
- else
- result = ASin_0(x);
- if( change_sign )
- result.ChangeSign();
- if( err )
- *err = err_ok;
- return result;
- }
- /*!
- this function calculates the Arc Cosine
- we're using the formula:
- acos(x) = pi/2 - asin(x)
- */
- template<class ValueType>
- ValueType ACos(const ValueType & x, ErrorCode * err = 0)
- {
- ValueType temp;
- temp.Set05Pi();
- temp.Sub(ASin(x, err));
- return temp;
- }
- namespace auxiliaryfunctions
- {
- /*!
- an auxiliary function for calculating the Arc Tangent
- arc tan (x) where x is in <0; 0.5)
- (x can be in (-0.5 ; 0.5) too)
- we're using the Taylor series expanded in zero:
- atan(x) = x - (x^3)/3 + (x^5)/5 - (x^7)/7 + ...
- */
- template<class ValueType>
- ValueType ATan0(const ValueType & x)
- {
- ValueType nominator, denominator, nominator_add, denominator_add, temp;
- ValueType result, old_result;
- bool adding = false;
- uint c = 0;
- result = x;
- old_result = result;
- nominator = x;
- nominator_add = x;
- nominator_add.Mul(x);
- denominator.SetOne();
- denominator_add = 2;
- for(uint i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i)
- {
- c += nominator.Mul(nominator_add);
- c += denominator.Add(denominator_add);
- temp = nominator;
- c += temp.Div(denominator);
- if( c )
- // the result should be ok
- break;
- if( adding )
- result.Add(temp);
- else
- result.Sub(temp);
- if( result == old_result )
- // there's no sense to calculate more
- break;
- old_result = result;
- adding = !adding;
- }
- return result;
- }
- /*!
- an auxiliary function for calculating the Arc Tangent
- where x is in <0 ; 1>
- */
- template<class ValueType>
- ValueType ATan01(const ValueType & x)
- {
- ValueType half;
- half.Set05();
- /*
- it would be better if we chose about sqrt(2)-1=0.41... instead of 0.5 here
- because as you can see below:
- when x = sqrt(2)-1
- abs(x) = abs( (x-1)/(1+x) )
- so when we're calculating values around x
- then they will be better converged to each other
- for example if we have x=0.4999 then during calculating ATan0(0.4999)
- we have to make about 141 iterations but when we have x=0.5
- then during calculating ATan0( (x-1)/(1+x) ) we have to make
- only about 89 iterations (both for Big<3,9>)
- in the future this 0.5 can be changed
- */
- if( x.SmallerWithoutSignThan(half) )
- return ATan0(x);
- /*
- x>=0.5 and x<=1
- (x can be even smaller than 0.5)
- y = atac(x)
- x = tan(y)
- tan(y-b) = (tan(y)-tab(b)) / (1+tan(y)*tan(b))
- y-b = atan( (tan(y)-tab(b)) / (1+tan(y)*tan(b)) )
- y = b + atan( (x-tab(b)) / (1+x*tan(b)) )
- let b = pi/4
- tan(b) = tan(pi/4) = 1
- y = pi/4 + atan( (x-1)/(1+x) )
- so
- atac(x) = pi/4 + atan( (x-1)/(1+x) )
- when x->1 (x converges to 1) the (x-1)/(1+x) -> 0
- and we can use ATan0() function here
- */
- ValueType n(x),d(x),one,result;
- one.SetOne();
- n.Sub(one);
- d.Add(one);
- n.Div(d);
- result = ATan0(n);
- n.Set05Pi();
- n.exponent.SubOne(); // =pi/4
- result.Add(n);
- return result;
- }
- /*!
- an auxiliary function for calculating the Arc Tangent
- where x > 1
- we're using the formula:
- atan(x) = pi/2 - atan(1/x) for x>0
- */
- template<class ValueType>
- ValueType ATanGreaterThanPlusOne(const ValueType & x)
- {
- ValueType temp, atan;
- temp.SetOne();
- if( temp.Div(x) )
- {
- // if there was a carry here that means x is very big
- // and atan(1/x) fast converged to 0
- atan.SetZero();
- }
- else
- atan = ATan01(temp);
- temp.Set05Pi();
- temp.Sub(atan);
- return temp;
- }
- } // namespace auxiliaryfunctions
- /*!
- this function calculates the Arc Tangent
- */
- template<class ValueType>
- ValueType ATan(ValueType x)
- {
- using namespace auxiliaryfunctions;
- ValueType one, result;
- one.SetOne();
- bool change_sign = false;
- if( x.IsNan() )
- return x;
- // if x is negative we're using the formula:
- // atan(-x) = -atan(x)
- if( x.IsSign() )
- {
- change_sign = true;
- x.Abs();
- }
- if( x.GreaterWithoutSignThan(one) )
- result = ATanGreaterThanPlusOne(x);
- else
- result = ATan01(x);
- if( change_sign )
- result.ChangeSign();
- return result;
- }
- /*!
- this function calculates the Arc Tangent
- look at the description of ATan(...)
- (the abbreviation of Arc Tangent can be 'atg' as well)
- */
- template<class ValueType>
- ValueType ATg(const ValueType & x)
- {
- return ATan(x);
- }
- /*!
- this function calculates the Arc Cotangent
- we're using the formula:
- actan(x) = pi/2 - atan(x)
- */
- template<class ValueType>
- ValueType ACot(const ValueType & x)
- {
- ValueType result;
- result.Set05Pi();
- result.Sub(ATan(x));
- return result;
- }
- /*!
- this function calculates the Arc Cotangent
- look at the description of ACot(...)
- (the abbreviation of Arc Cotangent can be 'actg' as well)
- */
- template<class ValueType>
- ValueType ACtg(const ValueType & x)
- {
- return ACot(x);
- }
- /*
- *
- * hyperbolic functions
- *
- *
- */
- /*!
- this function calculates the Hyperbolic Sine
- we're using the formula sinh(x)= ( e^x - e^(-x) ) / 2
- */
- template<class ValueType>
- ValueType Sinh(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType ex, emx;
- uint c = 0;
- c += ex.Exp(x);
- c += emx.Exp(-x);
- c += ex.Sub(emx);
- c += ex.exponent.SubOne();
- if( err )
- *err = c ? err_overflow : err_ok;
- return ex;
- }
- /*!
- this function calculates the Hyperbolic Cosine
- we're using the formula cosh(x)= ( e^x + e^(-x) ) / 2
- */
- template<class ValueType>
- ValueType Cosh(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType ex, emx;
- uint c = 0;
- c += ex.Exp(x);
- c += emx.Exp(-x);
- c += ex.Add(emx);
- c += ex.exponent.SubOne();
- if( err )
- *err = c ? err_overflow : err_ok;
- return ex;
- }
- /*!
- this function calculates the Hyperbolic Tangent
- we're using the formula tanh(x)= ( e^x - e^(-x) ) / ( e^x + e^(-x) )
- */
- template<class ValueType>
- ValueType Tanh(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType ex, emx, nominator, denominator;
- uint c = 0;
- c += ex.Exp(x);
- c += emx.Exp(-x);
- nominator = ex;
- c += nominator.Sub(emx);
- denominator = ex;
- c += denominator.Add(emx);
- c += nominator.Div(denominator);
- if( err )
- *err = c ? err_overflow : err_ok;
- return nominator;
- }
- /*!
- this function calculates the Hyperbolic Tangent
- look at the description of Tanh(...)
- (the abbreviation of Hyperbolic Tangent can be 'tgh' as well)
- */
- template<class ValueType>
- ValueType Tgh(const ValueType & x, ErrorCode * err = 0)
- {
- return Tanh(x, err);
- }
- /*!
- this function calculates the Hyperbolic Cotangent
- we're using the formula coth(x)= ( e^x + e^(-x) ) / ( e^x - e^(-x) )
- */
- template<class ValueType>
- ValueType Coth(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- if( x.IsZero() )
- {
- if( err )
- *err = err_improper_argument;
- return ValueType(); // NaN is set by default
- }
- ValueType ex, emx, nominator, denominator;
- uint c = 0;
- c += ex.Exp(x);
- c += emx.Exp(-x);
- nominator = ex;
- c += nominator.Add(emx);
- denominator = ex;
- c += denominator.Sub(emx);
- c += nominator.Div(denominator);
- if( err )
- *err = c ? err_overflow : err_ok;
- return nominator;
- }
- /*!
- this function calculates the Hyperbolic Cotangent
- look at the description of Coth(...)
- (the abbreviation of Hyperbolic Cotangent can be 'ctgh' as well)
- */
- template<class ValueType>
- ValueType Ctgh(const ValueType & x, ErrorCode * err = 0)
- {
- return Coth(x, err);
- }
- /*
- *
- * inverse hyperbolic functions
- *
- *
- */
- /*!
- inverse hyperbolic sine
- asinh(x) = ln( x + sqrt(x^2 + 1) )
- */
- template<class ValueType>
- ValueType ASinh(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType xx(x), one, result;
- uint c = 0;
- one.SetOne();
- c += xx.Mul(x);
- c += xx.Add(one);
- one.exponent.SubOne(); // one=0.5
- // xx is >= 1
- c += xx.PowFrac(one); // xx=sqrt(xx)
- c += xx.Add(x);
- c += result.Ln(xx); // xx > 0
- // here can only be a carry
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- inverse hyperbolic cosine
- acosh(x) = ln( x + sqrt(x^2 - 1) ) x in <1, infinity)
- */
- template<class ValueType>
- ValueType ACosh(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType xx(x), one, result;
- uint c = 0;
- one.SetOne();
- if( x < one )
- {
- if( err )
- *err = err_improper_argument;
- return result; // NaN is set by default
- }
- c += xx.Mul(x);
- c += xx.Sub(one);
- // xx is >= 0
- // we can't call a PowFrac when the 'x' is zero
- // if x is 0 the sqrt(0) is 0
- if( !xx.IsZero() )
- {
- one.exponent.SubOne(); // one=0.5
- c += xx.PowFrac(one); // xx=sqrt(xx)
- }
- c += xx.Add(x);
- c += result.Ln(xx); // xx >= 1
- // here can only be a carry
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- inverse hyperbolic tangent
- atanh(x) = 0.5 * ln( (1+x) / (1-x) ) x in (-1, 1)
- */
- template<class ValueType>
- ValueType ATanh(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType nominator(x), denominator, one, result;
- uint c = 0;
- one.SetOne();
- if( !x.SmallerWithoutSignThan(one) )
- {
- if( err )
- *err = err_improper_argument;
- return result; // NaN is set by default
- }
- c += nominator.Add(one);
- denominator = one;
- c += denominator.Sub(x);
- c += nominator.Div(denominator);
- c += result.Ln(nominator);
- c += result.exponent.SubOne();
- // here can only be a carry
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- inverse hyperbolic tantent
- */
- template<class ValueType>
- ValueType ATgh(const ValueType & x, ErrorCode * err = 0)
- {
- return ATanh(x, err);
- }
- /*!
- inverse hyperbolic cotangent
- acoth(x) = 0.5 * ln( (x+1) / (x-1) ) x in (-infinity, -1) or (1, infinity)
- */
- template<class ValueType>
- ValueType ACoth(const ValueType & x, ErrorCode * err = 0)
- {
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x; // NaN
- }
- ValueType nominator(x), denominator(x), one, result;
- uint c = 0;
- one.SetOne();
- if( !x.GreaterWithoutSignThan(one) )
- {
- if( err )
- *err = err_improper_argument;
- return result; // NaN is set by default
- }
- c += nominator.Add(one);
- c += denominator.Sub(one);
- c += nominator.Div(denominator);
- c += result.Ln(nominator);
- c += result.exponent.SubOne();
- // here can only be a carry
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- inverse hyperbolic cotantent
- */
- template<class ValueType>
- ValueType ACtgh(const ValueType & x, ErrorCode * err = 0)
- {
- return ACoth(x, err);
- }
- /*
- *
- * functions for converting between degrees, radians and gradians
- *
- *
- */
- /*!
- this function converts degrees to radians
- it returns: x * pi / 180
- */
- template<class ValueType>
- ValueType DegToRad(const ValueType & x, ErrorCode * err = 0)
- {
- ValueType result, temp;
- uint c = 0;
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x;
- }
- result = x;
- // it is better to make division first and then multiplication
- // the result is more accurate especially when x is: 90,180,270 or 360
- temp = 180;
- c += result.Div(temp);
- temp.SetPi();
- c += result.Mul(temp);
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- this function converts radians to degrees
- it returns: x * 180 / pi
- */
- template<class ValueType>
- ValueType RadToDeg(const ValueType & x, ErrorCode * err = 0)
- {
- ValueType result, delimiter;
- uint c = 0;
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x;
- }
- result = 180;
- c += result.Mul(x);
- delimiter.SetPi();
- c += result.Div(delimiter);
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- this function converts degrees in the long format into one value
- long format: (degrees, minutes, seconds)
- minutes and seconds must be greater than or equal zero
- result:
- if d>=0 : result= d + ((s/60)+m)/60
- if d<0 : result= d - ((s/60)+m)/60
- ((s/60)+m)/60 = (s+60*m)/3600 (second version is faster because
- there's only one division)
- for example:
- DegToDeg(10, 30, 0) = 10.5
- DegToDeg(10, 24, 35.6)=10.4098(8)
- */
- template<class ValueType>
- ValueType DegToDeg( const ValueType & d, const ValueType & m, const ValueType & s,
- ErrorCode * err = 0)
- {
- ValueType delimiter, multipler;
- uint c = 0;
- if( d.IsNan() || m.IsNan() || s.IsNan() || m.IsSign() || s.IsSign() )
- {
- if( err )
- *err = err_improper_argument;
- delimiter.SetZeroNan(); // not needed, only to get rid of GCC warning about an uninitialized variable
- return delimiter;
- }
- multipler = 60;
- delimiter = 3600;
- c += multipler.Mul(m);
- c += multipler.Add(s);
- c += multipler.Div(delimiter);
- if( d.IsSign() )
- multipler.ChangeSign();
- c += multipler.Add(d);
- if( err )
- *err = c ? err_overflow : err_ok;
- return multipler;
- }
- /*!
- this function converts degrees in the long format to radians
- */
- template<class ValueType>
- ValueType DegToRad( const ValueType & d, const ValueType & m, const ValueType & s,
- ErrorCode * err = 0)
- {
- ValueType temp_deg = DegToDeg(d,m,s,err);
- if( err && *err!=err_ok )
- return temp_deg;
- return DegToRad(temp_deg, err);
- }
- /*!
- this function converts gradians to radians
- it returns: x * pi / 200
- */
- template<class ValueType>
- ValueType GradToRad(const ValueType & x, ErrorCode * err = 0)
- {
- ValueType result, temp;
- uint c = 0;
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x;
- }
- result = x;
- // it is better to make division first and then multiplication
- // the result is more accurate especially when x is: 100,200,300 or 400
- temp = 200;
- c += result.Div(temp);
- temp.SetPi();
- c += result.Mul(temp);
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- this function converts radians to gradians
- it returns: x * 200 / pi
- */
- template<class ValueType>
- ValueType RadToGrad(const ValueType & x, ErrorCode * err = 0)
- {
- ValueType result, delimiter;
- uint c = 0;
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x;
- }
- result = 200;
- c += result.Mul(x);
- delimiter.SetPi();
- c += result.Div(delimiter);
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- this function converts degrees to gradians
- it returns: x * 200 / 180
- */
- template<class ValueType>
- ValueType DegToGrad(const ValueType & x, ErrorCode * err = 0)
- {
- ValueType result, temp;
- uint c = 0;
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x;
- }
- result = x;
- temp = 200;
- c += result.Mul(temp);
- temp = 180;
- c += result.Div(temp);
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*!
- this function converts degrees in the long format to gradians
- */
- template<class ValueType>
- ValueType DegToGrad( const ValueType & d, const ValueType & m, const ValueType & s,
- ErrorCode * err = 0)
- {
- ValueType temp_deg = DegToDeg(d,m,s,err);
- if( err && *err!=err_ok )
- return temp_deg;
- return DegToGrad(temp_deg, err);
- }
- /*!
- this function converts degrees to gradians
- it returns: x * 180 / 200
- */
- template<class ValueType>
- ValueType GradToDeg(const ValueType & x, ErrorCode * err = 0)
- {
- ValueType result, temp;
- uint c = 0;
- if( x.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return x;
- }
- result = x;
- temp = 180;
- c += result.Mul(temp);
- temp = 200;
- c += result.Div(temp);
- if( err )
- *err = c ? err_overflow : err_ok;
- return result;
- }
- /*
- *
- * another functions
- *
- *
- */
- /*!
- this function calculates the square root
- Sqrt(9) = 3
- */
- template<class ValueType>
- ValueType Sqrt(ValueType x, ErrorCode * err = 0)
- {
- if( x.IsNan() || x.IsSign() )
- {
- if( err )
- *err = err_improper_argument;
- x.SetNan();
- return x;
- }
- uint c = x.Sqrt();
- if( err )
- *err = c ? err_overflow : err_ok;
- return x;
- }
- namespace auxiliaryfunctions
- {
- template<class ValueType>
- bool RootCheckIndexSign(ValueType & x, const ValueType & index, ErrorCode * err)
- {
- if( index.IsSign() )
- {
- // index cannot be negative
- if( err )
- *err = err_improper_argument;
- x.SetNan();
- return true;
- }
- return false;
- }
- template<class ValueType>
- bool RootCheckIndexZero(ValueType & x, const ValueType & index, ErrorCode * err)
- {
- if( index.IsZero() )
- {
- if( x.IsZero() )
- {
- // there isn't root(0;0) - we assume it's not defined
- if( err )
- *err = err_improper_argument;
- x.SetNan();
- return true;
- }
- // root(x;0) is 1 (if x!=0)
- x.SetOne();
- if( err )
- *err = err_ok;
- return true;
- }
- return false;
- }
- template<class ValueType>
- bool RootCheckIndexOne(const ValueType & index, ErrorCode * err)
- {
- ValueType one;
- one.SetOne();
- if( index == one )
- {
- //root(x;1) is x
- // we do it because if we used the PowFrac function
- // we would lose the precision
- if( err )
- *err = err_ok;
- return true;
- }
- return false;
- }
- template<class ValueType>
- bool RootCheckIndexTwo(ValueType & x, const ValueType & index, ErrorCode * err)
- {
- if( index == 2 )
- {
- x = Sqrt(x, err);
- return true;
- }
- return false;
- }
- template<class ValueType>
- bool RootCheckIndexFrac(ValueType & x, const ValueType & index, ErrorCode * err)
- {
- if( !index.IsInteger() )
- {
- // index must be integer
- if( err )
- *err = err_improper_argument;
- x.SetNan();
- return true;
- }
- return false;
- }
- template<class ValueType>
- bool RootCheckXZero(ValueType & x, ErrorCode * err)
- {
- if( x.IsZero() )
- {
- // root(0;index) is zero (if index!=0)
- // RootCheckIndexZero() must be called beforehand
- x.SetZero();
- if( err )
- *err = err_ok;
- return true;
- }
- return false;
- }
- template<class ValueType>
- bool RootCheckIndex(ValueType & x, const ValueType & index, ErrorCode * err, bool * change_sign)
- {
- *change_sign = false;
- if( index.Mod2() )
- {
- // index is odd (1,3,5...)
- if( x.IsSign() )
- {
- *change_sign = true;
- x.Abs();
- }
- }
- else
- {
- // index is even
- // x cannot be negative
- if( x.IsSign() )
- {
- if( err )
- *err = err_improper_argument;
- x.SetNan();
- return true;
- }
- }
- return false;
- }
- template<class ValueType>
- uint RootCorrectInteger(ValueType & old_x, ValueType & x, const ValueType & index)
- {
- if( !old_x.IsInteger() || x.IsInteger() || !index.exponent.IsSign() )
- return 0;
- // old_x is integer,
- // x is not integer,
- // index is relatively small (index.exponent<0 or index.exponent<=0)
- // (because we're using a special powering algorithm Big::PowUInt())
- uint c = 0;
- ValueType temp(x);
- c += temp.Round();
- ValueType temp_round(temp);
- c += temp.PowUInt(index);
- if( temp == old_x )
- x = temp_round;
- return (c==0)? 0 : 1;
- }
- } // namespace auxiliaryfunctions
- /*!
- indexth Root of x
- index must be integer and not negative <0;1;2;3....)
- if index==0 the result is one
- if x==0 the result is zero and we assume root(0;0) is not defined
- if index is even (2;4;6...) the result is x^(1/index) and x>0
- if index is odd (1;2;3;...) the result is either
- -(abs(x)^(1/index)) if x<0 or
- x^(1/index)) if x>0
- (for index==1 the result is equal x)
- */
- template<class ValueType>
- ValueType Root(ValueType x, const ValueType & index, ErrorCode * err = 0)
- {
- using namespace auxiliaryfunctions;
- if( x.IsNan() || index.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- x.SetNan();
- return x;
- }
- if( RootCheckIndexSign(x, index, err) ) return x;
- if( RootCheckIndexZero(x, index, err) ) return x;
- if( RootCheckIndexOne ( index, err) ) return x;
- if( RootCheckIndexTwo (x, index, err) ) return x;
- if( RootCheckIndexFrac(x, index, err) ) return x;
- if( RootCheckXZero (x, err) ) return x;
- // index integer and index!=0
- // x!=0
- ValueType old_x(x);
- bool change_sign;
- if( RootCheckIndex(x, index, err, &change_sign ) ) return x;
- ValueType temp;
- uint c = 0;
- // we're using the formula: root(x ; n) = exp( ln(x) / n )
- c += temp.Ln(x);
- c += temp.Div(index);
- c += x.Exp(temp);
- if( change_sign )
- {
- // x is different from zero
- x.SetSign();
- }
- c += RootCorrectInteger(old_x, x, index);
- if( err )
- *err = c ? err_overflow : err_ok;
- return x;
- }
- /*!
- absolute value of x
- e.g. -2 = 2
- 2 = 2
- */
- template<class ValueType>
- ValueType Abs(const ValueType & x)
- {
- ValueType result( x );
- result.Abs();
- return result;
- }
- /*!
- it returns the sign of the value
- e.g. -2 = -1
- 0 = 0
- 10 = 1
- */
- template<class ValueType>
- ValueType Sgn(ValueType x)
- {
- x.Sgn();
- return x;
- }
- /*!
- the remainder from a division
- e.g.
- mod( 12.6 ; 3) = 0.6 because 12.6 = 3*4 + 0.6
- mod(-12.6 ; 3) = -0.6 bacause -12.6 = 3*(-4) + (-0.6)
- mod( 12.6 ; -3) = 0.6
- mod(-12.6 ; -3) = -0.6
- */
- template<class ValueType>
- ValueType Mod(ValueType a, const ValueType & b, ErrorCode * err = 0)
- {
- if( a.IsNan() || b.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- a.SetNan();
- return a;
- }
- uint c = a.Mod(b);
- if( err )
- *err = c ? err_overflow : err_ok;
- return a;
- }
- namespace auxiliaryfunctions
- {
- /*!
- this function is used to store factorials in a given container
- 'more' means how many values should be added at the end
- e.g.
- std::vector<ValueType> fact;
- SetFactorialSequence(fact, 3);
- // now the container has three values: 1 1 2
- SetFactorialSequence(fact, 2);
- // now the container has five values: 1 1 2 6 24
- */
- template<class ValueType>
- void SetFactorialSequence(std::vector<ValueType> & fact, uint more = 20)
- {
- if( more == 0 )
- more = 1;
- uint start = static_cast<uint>(fact.size());
- fact.resize(fact.size() + more);
- if( start == 0 )
- {
- fact[0] = 1;
- ++start;
- }
- for(uint i=start ; i<fact.size() ; ++i)
- {
- fact[i] = fact[i-1];
- fact[i].MulInt(i);
- }
- }
- /*!
- an auxiliary function used to calculate Bernoulli numbers
- this function returns a sum:
- sum(m) = sum_{k=0}^{m-1} {2^k * (m k) * B(k)} k in [0, m-1] (m k) means binomial coefficient = (m! / (k! * (m-k)!))
- you should have sufficient factorials in cgamma.fact
- (cgamma.fact should have at least m items)
- n_ should be equal 2
- */
- template<class ValueType>
- ValueType SetBernoulliNumbersSum(CGamma<ValueType> & cgamma, const ValueType & n_, uint m,
- const volatile StopCalculating * stop = 0)
- {
- ValueType k_, temp, temp2, temp3, sum;
- sum.SetZero();
- for(uint k=0 ; k<m ; ++k) // k<m means k<=m-1
- {
- if( stop && (k & 15)==0 ) // means: k % 16 == 0
- if( stop->WasStopSignal() )
- return ValueType(); // NaN
- if( k>1 && (k & 1) == 1 ) // for that k the Bernoulli number is zero
- continue;
- k_ = k;
- temp = n_; // n_ is equal 2
- temp.Pow(k_);
- // temp = 2^k
- temp2 = cgamma.fact[m];
- temp3 = cgamma.fact[k];
- temp3.Mul(cgamma.fact[m-k]);
- temp2.Div(temp3);
- // temp2 = (m k) = m! / ( k! * (m-k)! )
- temp.Mul(temp2);
- temp.Mul(cgamma.bern[k]);
- sum.Add(temp);
- // sum += 2^k * (m k) * B(k)
- if( sum.IsNan() )
- break;
- }
- return sum;
- }
- /*!
- an auxiliary function used to calculate Bernoulli numbers
- start is >= 2
- we use the recurrence formula:
- B(m) = 1 / (2*(1 - 2^m)) * sum(m)
- where sum(m) is calculated by SetBernoulliNumbersSum()
- */
- template<class ValueType>
- bool SetBernoulliNumbersMore(CGamma<ValueType> & cgamma, uint start, const volatile StopCalculating * stop = 0)
- {
- ValueType denominator, temp, temp2, temp3, m_, sum, sum2, n_, k_;
- const uint n = 2;
- n_ = n;
- // start is >= 2
- for(uint m=start ; m<cgamma.bern.size() ; ++m)
- {
- if( (m & 1) == 1 )
- {
- cgamma.bern[m].SetZero();
- }
- else
- {
- m_ = m;
- temp = n_; // n_ = 2
- temp.Pow(m_);
- // temp = 2^m
- denominator.SetOne();
- denominator.Sub(temp);
- if( denominator.exponent.AddOne() ) // it means: denominator.MulInt(2)
- denominator.SetNan();
- // denominator = 2 * (1 - 2^m)
- cgamma.bern[m] = SetBernoulliNumbersSum(cgamma, n_, m, stop);
- if( stop && stop->WasStopSignal() )
- {
- cgamma.bern.resize(m); // valid numbers are in [0, m-1]
- return false;
- }
- cgamma.bern[m].Div(denominator);
- }
- }
- return true;
- }
- /*!
- this function is used to calculate Bernoulli numbers,
- returns false if there was a stop signal,
- 'more' means how many values should be added at the end
- e.g.
- typedef Big<1,2> MyBig;
- CGamma<MyBig> cgamma;
- SetBernoulliNumbers(cgamma, 3);
- // now we have three first Bernoulli numbers: 1 -0.5 0.16667
- SetBernoulliNumbers(cgamma, 4);
- // now we have 7 Bernoulli numbers: 1 -0.5 0.16667 0 -0.0333 0 0.0238
- */
- template<class ValueType>
- bool SetBernoulliNumbers(CGamma<ValueType> & cgamma, uint more = 20, const volatile StopCalculating * stop = 0)
- {
- if( more == 0 )
- more = 1;
- uint start = static_cast<uint>(cgamma.bern.size());
- cgamma.bern.resize(cgamma.bern.size() + more);
- if( start == 0 )
- {
- cgamma.bern[0].SetOne();
- ++start;
- }
- if( cgamma.bern.size() == 1 )
- return true;
- if( start == 1 )
- {
- cgamma.bern[1].Set05();
- cgamma.bern[1].ChangeSign();
- ++start;
- }
- // we should have sufficient factorials in cgamma.fact
- if( cgamma.fact.size() < cgamma.bern.size() )
- SetFactorialSequence(cgamma.fact, static_cast<uint>(cgamma.bern.size() - cgamma.fact.size()));
- return SetBernoulliNumbersMore(cgamma, start, stop);
- }
- /*!
- an auxiliary function used to calculate the Gamma() function
- we calculate a sum:
- sum(n) = sum_{m=2} { B(m) / ( (m^2 - m) * n^(m-1) ) } = 1/(12*n) - 1/(360*n^3) + 1/(1260*n^5) + ...
- B(m) means a mth Bernoulli number
- the sum starts from m=2, we calculate as long as the value will not change after adding a next part
- */
- template<class ValueType>
- ValueType GammaFactorialHighSum(const ValueType & n, CGamma<ValueType> & cgamma, ErrorCode & err,
- const volatile StopCalculating * stop)
- {
- ValueType temp, temp2, denominator, sum, oldsum;
- sum.SetZero();
- for(uint m=2 ; m<TTMATH_ARITHMETIC_MAX_LOOP ; m+=2)
- {
- if( stop && (m & 3)==0 ) // (m & 3)==0 means: (m % 4)==0
- if( stop->WasStopSignal() )
- {
- err = err_interrupt;
- return ValueType(); // NaN
- }
- temp = (m-1);
- denominator = n;
- denominator.Pow(temp);
- // denominator = n ^ (m-1)
- temp = m;
- temp2 = temp;
- temp.Mul(temp2);
- temp.Sub(temp2);
- // temp = m^2 - m
- denominator.Mul(temp);
- // denominator = (m^2 - m) * n ^ (m-1)
- if( m >= cgamma.bern.size() )
- {
- if( !SetBernoulliNumbers(cgamma, m - cgamma.bern.size() + 1 + 3, stop) ) // 3 more than needed
- {
- // there was the stop signal
- err = err_interrupt;
- return ValueType(); // NaN
- }
- }
- temp = cgamma.bern[m];
- temp.Div(denominator);
- oldsum = sum;
- sum.Add(temp);
- if( sum.IsNan() || oldsum==sum )
- break;
- }
- return sum;
- }
- /*!
- an auxiliary function used to calculate the Gamma() function
- we calculate a helper function GammaFactorialHigh() by using Stirling's series:
- n! = (n/e)^n * sqrt(2*pi*n) * exp( sum(n) )
- where n is a real number (not only an integer) and is sufficient large (greater than TTMATH_GAMMA_BOUNDARY)
- and sum(n) is calculated by GammaFactorialHighSum()
- */
- template<class ValueType>
- ValueType GammaFactorialHigh(const ValueType & n, CGamma<ValueType> & cgamma, ErrorCode & err,
- const volatile StopCalculating * stop)
- {
- ValueType temp, temp2, temp3, denominator, sum;
- temp.Set2Pi();
- temp.Mul(n);
- temp2 = Sqrt(temp);
- // temp2 = sqrt(2*pi*n)
- temp = n;
- temp3.SetE();
- temp.Div(temp3);
- temp.Pow(n);
- // temp = (n/e)^n
- sum = GammaFactorialHighSum(n, cgamma, err, stop);
- temp3.Exp(sum);
- // temp3 = exp(sum)
- temp.Mul(temp2);
- temp.Mul(temp3);
- return temp;
- }
- /*!
- an auxiliary function used to calculate the Gamma() function
- Gamma(x) = GammaFactorialHigh(x-1)
- */
- template<class ValueType>
- ValueType GammaPlusHigh(ValueType n, CGamma<ValueType> & cgamma, ErrorCode & err, const volatile StopCalculating * stop)
- {
- ValueType one;
- one.SetOne();
- n.Sub(one);
- return GammaFactorialHigh(n, cgamma, err, stop);
- }
- /*!
- an auxiliary function used to calculate the Gamma() function
- we use this function when n is integer and a small value (from 0 to TTMATH_GAMMA_BOUNDARY]
- we use the formula:
- gamma(n) = (n-1)! = 1 * 2 * 3 * ... * (n-1)
- */
- template<class ValueType>
- ValueType GammaPlusLowIntegerInt(uint n, CGamma<ValueType> & cgamma)
- {
- TTMATH_ASSERT( n > 0 )
- if( n - 1 < static_cast<uint>(cgamma.fact.size()) )
- return cgamma.fact[n - 1];
- ValueType res;
- uint start = 2;
- if( cgamma.fact.size() < 2 )
- {
- res.SetOne();
- }
- else
- {
- start = static_cast<uint>(cgamma.fact.size());
- res = cgamma.fact[start-1];
- }
- for(uint i=start ; i<n ; ++i)
- res.MulInt(i);
- return res;
- }
- /*!
- an auxiliary function used to calculate the Gamma() function
- we use this function when n is integer and a small value (from 0 to TTMATH_GAMMA_BOUNDARY]
- */
- template<class ValueType>
- ValueType GammaPlusLowInteger(const ValueType & n, CGamma<ValueType> & cgamma)
- {
- sint n_;
- n.ToInt(n_);
- return GammaPlusLowIntegerInt(n_, cgamma);
- }
- /*!
- an auxiliary function used to calculate the Gamma() function
- we use this function when n is a small value (from 0 to TTMATH_GAMMA_BOUNDARY]
- we use a recurrence formula:
- gamma(z+1) = z * gamma(z)
- then: gamma(z) = gamma(z+1) / z
- e.g.
- gamma(3.89) = gamma(2001.89) / ( 3.89 * 4.89 * 5.89 * ... * 1999.89 * 2000.89 )
- */
- template<class ValueType>
- ValueType GammaPlusLow(ValueType n, CGamma<ValueType> & cgamma, ErrorCode & err, const volatile StopCalculating * stop)
- {
- ValueType one, denominator, temp, boundary;
- if( n.IsInteger() )
- return GammaPlusLowInteger(n, cgamma);
- one.SetOne();
- denominator = n;
- while( n < boundary )
- {
- n.Add(one);
- denominator.Mul(n);
- }
- n.Add(one);
- // now n is sufficient big
- temp = GammaPlusHigh(n, cgamma, err, stop);
- temp.Div(denominator);
- return temp;
- }
- /*!
- an auxiliary function used to calculate the Gamma() function
- */
- template<class ValueType>
- ValueType GammaPlus(const ValueType & n, CGamma<ValueType> & cgamma, ErrorCode & err, const volatile StopCalculating * stop)
- {
- return GammaPlusHigh(n, cgamma, err, stop);
- return GammaPlusLow(n, cgamma, err, stop);
- }
- /*!
- an auxiliary function used to calculate the Gamma() function
- this function is used when n is negative
- we use the reflection formula:
- gamma(1-z) * gamma(z) = pi / sin(pi*z)
- then: gamma(z) = pi / (sin(pi*z) * gamma(1-z))
- */
- template<class ValueType>
- ValueType GammaMinus(const ValueType & n, CGamma<ValueType> & cgamma, ErrorCode & err, const volatile StopCalculating * stop)
- {
- ValueType pi, denominator, temp, temp2;
- if( n.IsInteger() )
- {
- // gamma function is not defined when n is negative and integer
- err = err_improper_argument;
- return temp; // NaN
- }
- pi.SetPi();
- temp = pi;
- temp.Mul(n);
- temp2 = Sin(temp);
- // temp2 = sin(pi * n)
- temp.SetOne();
- temp.Sub(n);
- temp = GammaPlus(temp, cgamma, err, stop);
- // temp = gamma(1 - n)
- temp.Mul(temp2);
- pi.Div(temp);
- return pi;
- }
- } // namespace auxiliaryfunctions
- /*!
- this function calculates the Gamma function
- it's multithread safe, you should create a CGamma<> object and use it whenever you call the Gamma()
- e.g.
- typedef Big<1,2> MyBig;
- MyBig x=234, y=345.53;
- CGamma<MyBig> cgamma;
- std::cout << Gamma(x, cgamma) << std::endl;
- std::cout << Gamma(y, cgamma) << std::endl;
- in the CGamma<> object the function stores some coefficients (factorials, Bernoulli numbers),
- and they will be reused in next calls to the function
- each thread should have its own CGamma<> object, and you can use these objects with Factorial() function too
- */
- template<class ValueType>
- ValueType Gamma(const ValueType & n, CGamma<ValueType> & cgamma, ErrorCode * err = 0,
- const volatile StopCalculating * stop = 0)
- {
- using namespace auxiliaryfunctions;
- ValueType result;
- ErrorCode err_tmp;
- if( n.IsNan() )
- {
- if( err )
- *err = err_improper_argument;
- return n;
- }
- if( cgamma.history.Get(n, result, err_tmp) )
- {
- if( err )
- *err = err_tmp;
- return result;
- }
- err_tmp = err_ok;
- if( n.IsSign() )
- {
- result = GammaMinus(n, cgamma, err_tmp, stop);
- }
- else
- if( n.IsZero() )
- {
- err_tmp = err_improper_argument;
- result.SetNan();
- }
- else
- {
- result = GammaPlus(n, cgamma, err_tmp, stop);
- }
- if( result.IsNan() && err_tmp==err_ok )
- err_tmp = err_overflow;
- if( err )
- *err = err_tmp;
- if( stop && !stop->WasStopSignal() )
- cgamma.history.Add(n, result, err_tmp);
- return result;
- }
- /*!
- this function calculates the Gamma function
- note: this function should be used only in a single-thread environment
- */
- template<class ValueType>
- ValueType Gamma(const ValueType & n, ErrorCode * err = 0)
- {
- // warning: this static object is not thread safe
- static CGamma<ValueType> cgamma;
- return Gamma(n, cgamma, err);
- }
- namespace auxiliaryfunctions
- {
- /*!
- an auxiliary function for calculating the factorial function
- we use the formula:
- x! = gamma(x+1)
- */
- template<class ValueType>
- ValueType Factorial2(ValueType x,
- CGamma<ValueType> * cgamma = 0,
- ErrorCode * err = 0,
- const volatile StopCalculating * stop = 0)
- {
- ValueType result, one;
- if( x.IsNan() || x.IsSign() || !x.IsInteger() )
- {
- if( err )
- *err = err_improper_argument;
- x.SetNan();
- return x;
- }
- one.SetOne();
- x.Add(one);
- if( cgamma )
- return Gamma(x, *cgamma, err, stop);
- return Gamma(x, err);
- }
- } // namespace auxiliaryfunctions
- /*!
- the factorial from given 'x'
- e.g.
- Factorial(4) = 4! = 1*2*3*4
- it's multithread safe, you should create a CGamma<> object and use it whenever you call the Factorial()
- e.g.
- typedef Big<1,2> MyBig;
- MyBig x=234, y=54345;
- CGamma<MyBig> cgamma;
- std::cout << Factorial(x, cgamma) << std::endl;
- std::cout << Factorial(y, cgamma) << std::endl;
- in the CGamma<> object the function stores some coefficients (factorials, Bernoulli numbers),
- and they will be reused in next calls to the function
- each thread should have its own CGamma<> object, and you can use these objects with Gamma() function too
- */
- template<class ValueType>
- ValueType Factorial(const ValueType & x, CGamma<ValueType> & cgamma, ErrorCode * err = 0,
- const volatile StopCalculating * stop = 0)
- {
- return auxiliaryfunctions::Factorial2(x, &cgamma, err, stop);
- }
- /*!
- the factorial from given 'x'
- e.g.
- Factorial(4) = 4! = 1*2*3*4
- note: this function should be used only in a single-thread environment
- */
- template<class ValueType>
- ValueType Factorial(const ValueType & x, ErrorCode * err = 0)
- {
- return auxiliaryfunctions::Factorial2(x, (CGamma<ValueType>*)0, err, 0);
- }
- /*!
- this method prepares some coefficients: factorials and Bernoulli numbers
- stored in 'fact' and 'bern' objects
- we're defining the method here because we're using Gamma() function which
- is not available in ttmathobjects.h
- read the doc info in ttmathobjects.h file where CGamma<> struct is declared
- */
- template<class ValueType>
- void CGamma<ValueType>::InitAll()
- {
- ValueType x = TTMATH_GAMMA_BOUNDARY + 1;
- // history.Remove(x) removes only one object
- // we must be sure that there are not others objects with the key 'x'
- while( history.Remove(x) )
- {
- }
- // the simplest way to initialize is to call the Gamma function with (TTMATH_GAMMA_BOUNDARY + 1)
- // when x is larger then fewer coefficients we need
- Gamma(x, *this);
- }
-} // namespace
- this is for convenience for the user
- he can only use '#include <ttmath/ttmath.h>'
-#include "ttmathparser.h"
-// Dec is not finished yet
-//#include "ttmathdec.h"
-#ifdef _MSC_VER
-//warning C4127: conditional expression is constant
-#pragma warning( default: 4127 )
-//warning C4702: unreachable code
-#pragma warning( default: 4702 )
-//warning C4800: forcing value to bool 'true' or 'false' (performance warning)
-#pragma warning( default: 4800 )
diff --git a/ttmath/ttmathbig.h b/ttmath/ttmathbig.h
deleted file mode 100644
index 45793b4..0000000
--- a/ttmath/ttmathbig.h
+++ /dev/null
@@ -1,6045 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2012, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathbig
-#define headerfilettmathbig
- \file ttmathbig.h
- \brief A Class for representing floating point numbers
-#include "ttmathint.h"
-#include "ttmaththreads.h"
-#include <iostream>
-#include <signal.h>
-namespace ttmath
- \brief Big implements the floating point numbers
-template <uint exp, uint man>
-class Big
- value = mantissa * 2^exponent
- exponent - an integer value with a sign
- mantissa - an integer value without a sing
- mantissa must be pushed into the left side that is the highest bit from
- mantissa must be one (of course if there's another value than zero) -- this job
- (pushing bits into the left side) making Standardizing() method
- for example:
- if we want to store value one (1) into our Big object we must:
- set mantissa to 1
- set exponent to 0
- set info to 0
- and call method Standardizing()
-Int<exp> exponent;
-UInt<man> mantissa;
-unsigned char info;
- Sign
- the mask of a bit from 'info' which means that there is a sign
- (when the bit is set)
-#define TTMATH_BIG_SIGN 128
- Not a number
- if this bit is set that there is not a valid number
-#define TTMATH_BIG_NAN 64
- Zero
- if this bit is set that there is value zero
- mantissa should be zero and exponent should be zero too
- (the Standardizing() method does this)
-#define TTMATH_BIG_ZERO 32
- /*!
- this method sets NaN if there was a carry (and returns 1 in such a case)
- c can be 0, 1 or other value different from zero
- */
- uint CheckCarry(uint c)
- {
- if( c != 0 )
- {
- SetNan();
- return 1;
- }
- return 0;
- }
- /*!
- returning the string represents the currect type of the library
- we have following types:
- asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits)
- asm_gcc_32 - with asm code designed for GCC (32 bits)
- asm_vc_64 - with asm for VC (64 bit)
- asm_gcc_64 - with asm for GCC (64 bit)
- no_asm_32 - pure C++ version (32 bit) - without any asm code
- no_asm_64 - pure C++ version (64 bit) - without any asm code
- */
- static const char * LibTypeStr()
- {
- return UInt<man>::LibTypeStr();
- }
- /*!
- returning the currect type of the library
- */
- static LibTypeCode LibType()
- {
- return UInt<man>::LibType();
- }
- /*!
- this method moves all bits from mantissa into its left side
- (suitably changes the exponent) or if the mantissa is zero
- it sets the exponent to zero as well
- (and clears the sign bit and sets the zero bit)
- it can return a carry
- the carry will be when we don't have enough space in the exponent
- you don't have to use this method if you don't change the mantissa
- and exponent directly
- */
- uint Standardizing()
- {
- if( mantissa.IsTheHighestBitSet() )
- {
- ClearInfoBit(TTMATH_BIG_ZERO);
- return 0;
- }
- if( CorrectZero() )
- return 0;
- uint comp = mantissa.CompensationToLeft();
- return exponent.Sub( comp );
- }
- /*!
- if the mantissa is equal zero this method sets exponent to zero and
- info without the sign
- it returns true if there was the correction
- */
- bool CorrectZero()
- {
- if( mantissa.IsZero() )
- {
- ClearInfoBit(TTMATH_BIG_SIGN);
- exponent.SetZero();
- return true;
- }
- else
- {
- ClearInfoBit(TTMATH_BIG_ZERO);
- }
- return false;
- }
- /*!
- this method clears a specific bit in the 'info' variable
- bit is one of: TTMATH_BIG_SIGN, TTMATH_BIG_NAN etc.
- */
- void ClearInfoBit(unsigned char bit)
- {
- info = info & (~bit);
- }
- /*!
- this method sets a specific bit in the 'info' variable
- bit is one of: TTMATH_BIG_SIGN, TTMATH_BIG_NAN etc.
- */
- void SetInfoBit(unsigned char bit)
- {
- info = info | bit;
- }
- /*!
- this method returns true if a specific bit in the 'info' variable is set
- bit is one of: TTMATH_BIG_SIGN, TTMATH_BIG_NAN etc.
- */
- bool IsInfoBit(unsigned char bit) const
- {
- return (info & bit) != 0;
- }
- /*!
- this method sets zero
- */
- void SetZero()
- {
- exponent.SetZero();
- mantissa.SetZero();
- /*
- we don't have to compensate zero
- */
- }
- /*!
- this method sets one
- */
- void SetOne()
- {
- info = 0;
- mantissa.SetZero();
- mantissa.table[man-1] = TTMATH_UINT_HIGHEST_BIT;
- exponent = -sint(man * TTMATH_BITS_PER_UINT - 1);
- // don't have to Standardize() - the last bit from mantissa is set
- }
- /*!
- this method sets value 0.5
- */
- void Set05()
- {
- SetOne();
- exponent.SubOne();
- }
- /*!
- this method sets NaN flag (Not a Number)
- when this flag is set that means there is no a valid number
- */
- void SetNan()
- {
- }
- /*!
- this method sets NaN flag (Not a Number)
- also clears the mantissa and exponent (similarly as it would be a zero value)
- */
- void SetZeroNan()
- {
- SetZero();
- SetNan();
- }
- /*!
- this method swappes this for an argument
- */
- void Swap(Big<exp, man> & ss2)
- {
- unsigned char info_temp = info;
- info = ss2.info;
- ss2.info = info_temp;
- exponent.Swap(ss2.exponent);
- mantissa.Swap(ss2.mantissa);
- }
- /*!
- this method sets the mantissa of the value of pi
- */
- void SetMantissaPi()
- {
- // this is a static table which represents the value of Pi (mantissa of it)
- // (first is the highest word)
- // we must define this table as 'unsigned int' because
- // both on 32bit and 64bit platforms this table is 32bit
- static const unsigned int temp_table[] = {
- 0xc90fdaa2, 0x2168c234, 0xc4c6628b, 0x80dc1cd1, 0x29024e08, 0x8a67cc74, 0x020bbea6, 0x3b139b22,
- 0x514a0879, 0x8e3404dd, 0xef9519b3, 0xcd3a431b, 0x302b0a6d, 0xf25f1437, 0x4fe1356d, 0x6d51c245,
- 0xe485b576, 0x625e7ec6, 0xf44c42e9, 0xa637ed6b, 0x0bff5cb6, 0xf406b7ed, 0xee386bfb, 0x5a899fa5,
- 0xae9f2411, 0x7c4b1fe6, 0x49286651, 0xece45b3d, 0xc2007cb8, 0xa163bf05, 0x98da4836, 0x1c55d39a,
- 0x69163fa8, 0xfd24cf5f, 0x83655d23, 0xdca3ad96, 0x1c62f356, 0x208552bb, 0x9ed52907, 0x7096966d,
- 0x670c354e, 0x4abc9804, 0xf1746c08, 0xca18217c, 0x32905e46, 0x2e36ce3b, 0xe39e772c, 0x180e8603,
- 0x9b2783a2, 0xec07a28f, 0xb5c55df0, 0x6f4c52c9, 0xde2bcbf6, 0x95581718, 0x3995497c, 0xea956ae5,
- 0x15d22618, 0x98fa0510, 0x15728e5a, 0x8aaac42d, 0xad33170d, 0x04507a33, 0xa85521ab, 0xdf1cba64,
- 0xecfb8504, 0x58dbef0a, 0x8aea7157, 0x5d060c7d, 0xb3970f85, 0xa6e1e4c7, 0xabf5ae8c, 0xdb0933d7,
- 0x1e8c94e0, 0x4a25619d, 0xcee3d226, 0x1ad2ee6b, 0xf12ffa06, 0xd98a0864, 0xd8760273, 0x3ec86a64,
- 0x521f2b18, 0x177b200c, 0xbbe11757, 0x7a615d6c, 0x770988c0, 0xbad946e2, 0x08e24fa0, 0x74e5ab31,
- 0x43db5bfc, 0xe0fd108e, 0x4b82d120, 0xa9210801, 0x1a723c12, 0xa787e6d7, 0x88719a10, 0xbdba5b26,
- 0x99c32718, 0x6af4e23c, 0x1a946834, 0xb6150bda, 0x2583e9ca, 0x2ad44ce8, 0xdbbbc2db, 0x04de8ef9,
- 0x2e8efc14, 0x1fbecaa6, 0x287c5947, 0x4e6bc05d, 0x99b2964f, 0xa090c3a2, 0x233ba186, 0x515be7ed,
- 0x1f612970, 0xcee2d7af, 0xb81bdd76, 0x2170481c, 0xd0069127, 0xd5b05aa9, 0x93b4ea98, 0x8d8fddc1,
- 0x86ffb7dc, 0x90a6c08f, 0x4df435c9, 0x34028492, 0x36c3fab4, 0xd27c7026, 0xc1d4dcb2, 0x602646de,
- 0xc9751e76, 0x3dba37bd, 0xf8ff9406, 0xad9e530e, 0xe5db382f, 0x413001ae, 0xb06a53ed, 0x9027d831,
- 0x179727b0, 0x865a8918, 0xda3edbeb, 0xcf9b14ed, 0x44ce6cba, 0xced4bb1b, 0xdb7f1447, 0xe6cc254b,
- 0x33205151, 0x2bd7af42, 0x6fb8f401, 0x378cd2bf, 0x5983ca01, 0xc64b92ec, 0xf032ea15, 0xd1721d03,
- 0xf482d7ce, 0x6e74fef6, 0xd55e702f, 0x46980c82, 0xb5a84031, 0x900b1c9e, 0x59e7c97f, 0xbec7e8f3,
- 0x23a97a7e, 0x36cc88be, 0x0f1d45b7, 0xff585ac5, 0x4bd407b2, 0x2b4154aa, 0xcc8f6d7e, 0xbf48e1d8,
- 0x14cc5ed2, 0x0f8037e0, 0xa79715ee, 0xf29be328, 0x06a1d58b, 0xb7c5da76, 0xf550aa3d, 0x8a1fbff0,
- 0xeb19ccb1, 0xa313d55c, 0xda56c9ec, 0x2ef29632, 0x387fe8d7, 0x6e3c0468, 0x043e8f66, 0x3f4860ee,
- 0x12bf2d5b, 0x0b7474d6, 0xe694f91e, 0x6dbe1159, 0x74a3926f, 0x12fee5e4, 0x38777cb6, 0xa932df8c,
- 0xd8bec4d0, 0x73b931ba, 0x3bc832b6, 0x8d9dd300, 0x741fa7bf, 0x8afc47ed, 0x2576f693, 0x6ba42466,
- 0x3aab639c, 0x5ae4f568, 0x3423b474, 0x2bf1c978, 0x238f16cb, 0xe39d652d, 0xe3fdb8be, 0xfc848ad9,
- 0x22222e04, 0xa4037c07, 0x13eb57a8, 0x1a23f0c7, 0x3473fc64, 0x6cea306b, 0x4bcbc886, 0x2f8385dd,
- 0xfa9d4b7f, 0xa2c087e8, 0x79683303, 0xed5bdd3a, 0x062b3cf5, 0xb3a278a6, 0x6d2a13f8, 0x3f44f82d,
- 0xdf310ee0, 0x74ab6a36, 0x4597e899, 0xa0255dc1, 0x64f31cc5, 0x0846851d, 0xf9ab4819, 0x5ded7ea1,
- 0xb1d510bd, 0x7ee74d73, 0xfaf36bc3, 0x1ecfa268, 0x359046f4, 0xeb879f92, 0x4009438b, 0x481c6cd7,
- 0x889a002e, 0xd5ee382b, 0xc9190da6, 0xfc026e47, 0x9558e447, 0x5677e9aa, 0x9e3050e2, 0x765694df,
- 0xc81f56e8, 0x80b96e71, 0x60c980dd, 0x98a573ea, 0x4472065a, 0x139cd290, 0x6cd1cb72, 0x9ec52a53 // last one was: 0x9ec52a52
- //0x86d44014, ...
- // (the last word 0x9ec52a52 was rounded up because the next one is 0x86d44014 -- first bit is one 0x8..)
- // 256 32bit words for the mantissa -- about 2464 valid decimal digits
- };
- // the value of PI is comming from the website http://zenwerx.com/pi.php
- // 3101 digits were taken from this website
- // (later the digits were compared with:
- // http://www.eveandersson.com/pi/digits/1000000 and http://www.geom.uiuc.edu/~huberty/math5337/groupe/digits.html )
- // and they were set into Big<1,400> type (using operator=(const char*) on a 32bit platform)
- // and then the first 256 words were taken into this table
- // (TTMATH_BUILTIN_VARIABLES_SIZE on 32bit platform should have the value 256,
- // and on 64bit platform value 128 (256/2=128))
- mantissa.SetFromTable(temp_table, sizeof(temp_table) / sizeof(int));
- }
- /*!
- this method sets the value of pi
- */
- void SetPi()
- {
- SetMantissaPi();
- info = 0;
- exponent = -sint(man)*sint(TTMATH_BITS_PER_UINT) + 2;
- }
- /*!
- this method sets the value of 0.5 * pi
- */
- void Set05Pi()
- {
- SetMantissaPi();
- info = 0;
- exponent = -sint(man)*sint(TTMATH_BITS_PER_UINT) + 1;
- }
- /*!
- this method sets the value of 2 * pi
- */
- void Set2Pi()
- {
- SetMantissaPi();
- info = 0;
- exponent = -sint(man)*sint(TTMATH_BITS_PER_UINT) + 3;
- }
- /*!
- this method sets the value of e
- (the base of the natural logarithm)
- */
- void SetE()
- {
- static const unsigned int temp_table[] = {
- 0xadf85458, 0xa2bb4a9a, 0xafdc5620, 0x273d3cf1, 0xd8b9c583, 0xce2d3695, 0xa9e13641, 0x146433fb,
- 0xcc939dce, 0x249b3ef9, 0x7d2fe363, 0x630c75d8, 0xf681b202, 0xaec4617a, 0xd3df1ed5, 0xd5fd6561,
- 0x2433f51f, 0x5f066ed0, 0x85636555, 0x3ded1af3, 0xb557135e, 0x7f57c935, 0x984f0c70, 0xe0e68b77,
- 0xe2a689da, 0xf3efe872, 0x1df158a1, 0x36ade735, 0x30acca4f, 0x483a797a, 0xbc0ab182, 0xb324fb61,
- 0xd108a94b, 0xb2c8e3fb, 0xb96adab7, 0x60d7f468, 0x1d4f42a3, 0xde394df4, 0xae56ede7, 0x6372bb19,
- 0x0b07a7c8, 0xee0a6d70, 0x9e02fce1, 0xcdf7e2ec, 0xc03404cd, 0x28342f61, 0x9172fe9c, 0xe98583ff,
- 0x8e4f1232, 0xeef28183, 0xc3fe3b1b, 0x4c6fad73, 0x3bb5fcbc, 0x2ec22005, 0xc58ef183, 0x7d1683b2,
- 0xc6f34a26, 0xc1b2effa, 0x886b4238, 0x611fcfdc, 0xde355b3b, 0x6519035b, 0xbc34f4de, 0xf99c0238,
- 0x61b46fc9, 0xd6e6c907, 0x7ad91d26, 0x91f7f7ee, 0x598cb0fa, 0xc186d91c, 0xaefe1309, 0x85139270,
- 0xb4130c93, 0xbc437944, 0xf4fd4452, 0xe2d74dd3, 0x64f2e21e, 0x71f54bff, 0x5cae82ab, 0x9c9df69e,
- 0xe86d2bc5, 0x22363a0d, 0xabc52197, 0x9b0deada, 0x1dbf9a42, 0xd5c4484e, 0x0abcd06b, 0xfa53ddef,
- 0x3c1b20ee, 0x3fd59d7c, 0x25e41d2b, 0x669e1ef1, 0x6e6f52c3, 0x164df4fb, 0x7930e9e4, 0xe58857b6,
- 0xac7d5f42, 0xd69f6d18, 0x7763cf1d, 0x55034004, 0x87f55ba5, 0x7e31cc7a, 0x7135c886, 0xefb4318a,
- 0xed6a1e01, 0x2d9e6832, 0xa907600a, 0x918130c4, 0x6dc778f9, 0x71ad0038, 0x092999a3, 0x33cb8b7a,
- 0x1a1db93d, 0x7140003c, 0x2a4ecea9, 0xf98d0acc, 0x0a8291cd, 0xcec97dcf, 0x8ec9b55a, 0x7f88a46b,
- 0x4db5a851, 0xf44182e1, 0xc68a007e, 0x5e0dd902, 0x0bfd64b6, 0x45036c7a, 0x4e677d2c, 0x38532a3a,
- 0x23ba4442, 0xcaf53ea6, 0x3bb45432, 0x9b7624c8, 0x917bdd64, 0xb1c0fd4c, 0xb38e8c33, 0x4c701c3a,
- 0xcdad0657, 0xfccfec71, 0x9b1f5c3e, 0x4e46041f, 0x388147fb, 0x4cfdb477, 0xa52471f7, 0xa9a96910,
- 0xb855322e, 0xdb6340d8, 0xa00ef092, 0x350511e3, 0x0abec1ff, 0xf9e3a26e, 0x7fb29f8c, 0x183023c3,
- 0x587e38da, 0x0077d9b4, 0x763e4e4b, 0x94b2bbc1, 0x94c6651e, 0x77caf992, 0xeeaac023, 0x2a281bf6,
- 0xb3a739c1, 0x22611682, 0x0ae8db58, 0x47a67cbe, 0xf9c9091b, 0x462d538c, 0xd72b0374, 0x6ae77f5e,
- 0x62292c31, 0x1562a846, 0x505dc82d, 0xb854338a, 0xe49f5235, 0xc95b9117, 0x8ccf2dd5, 0xcacef403,
- 0xec9d1810, 0xc6272b04, 0x5b3b71f9, 0xdc6b80d6, 0x3fdd4a8e, 0x9adb1e69, 0x62a69526, 0xd43161c1,
- 0xa41d570d, 0x7938dad4, 0xa40e329c, 0xcff46aaa, 0x36ad004c, 0xf600c838, 0x1e425a31, 0xd951ae64,
- 0xfdb23fce, 0xc9509d43, 0x687feb69, 0xedd1cc5e, 0x0b8cc3bd, 0xf64b10ef, 0x86b63142, 0xa3ab8829,
- 0x555b2f74, 0x7c932665, 0xcb2c0f1c, 0xc01bd702, 0x29388839, 0xd2af05e4, 0x54504ac7, 0x8b758282,
- 0x2846c0ba, 0x35c35f5c, 0x59160cc0, 0x46fd8251, 0x541fc68c, 0x9c86b022, 0xbb709987, 0x6a460e74,
- 0x51a8a931, 0x09703fee, 0x1c217e6c, 0x3826e52c, 0x51aa691e, 0x0e423cfc, 0x99e9e316, 0x50c1217b,
- 0x624816cd, 0xad9a95f9, 0xd5b80194, 0x88d9c0a0, 0xa1fe3075, 0xa577e231, 0x83f81d4a, 0x3f2fa457,
- 0x1efc8ce0, 0xba8a4fe8, 0xb6855dfe, 0x72b0a66e, 0xded2fbab, 0xfbe58a30, 0xfafabe1c, 0x5d71a87e,
- 0x2f741ef8, 0xc1fe86fe, 0xa6bbfde5, 0x30677f0d, 0x97d11d49, 0xf7a8443d, 0x0822e506, 0xa9f4614e,
- 0x011e2a94, 0x838ff88c, 0xd68c8bb7, 0xc51eef6d, 0x49ea8ab4, 0xf2c3df5b, 0xb4e0735a, 0xb0d68749
- // 0x2fe26dd4, ...
- // 256 32bit words for the mantissa -- about 2464 valid decimal digits
- };
- // above value was calculated using Big<1,400> type on a 32bit platform
- // and then the first 256 words were taken,
- // the calculating was made by using ExpSurrounding0(1) method
- // which took 1420 iterations
- // (the result was compared with e taken from http://antwrp.gsfc.nasa.gov/htmltest/gifcity/e.2mil)
- // (TTMATH_BUILTIN_VARIABLES_SIZE on 32bit platform should have the value 256,
- // and on 64bit platform value 128 (256/2=128))
- mantissa.SetFromTable(temp_table, sizeof(temp_table) / sizeof(int));
- exponent = -sint(man)*sint(TTMATH_BITS_PER_UINT) + 2;
- info = 0;
- }
- /*!
- this method sets the value of ln(2)
- the natural logarithm from 2
- */
- void SetLn2()
- {
- static const unsigned int temp_table[] = {
- 0xb17217f7, 0xd1cf79ab, 0xc9e3b398, 0x03f2f6af, 0x40f34326, 0x7298b62d, 0x8a0d175b, 0x8baafa2b,
- 0xe7b87620, 0x6debac98, 0x559552fb, 0x4afa1b10, 0xed2eae35, 0xc1382144, 0x27573b29, 0x1169b825,
- 0x3e96ca16, 0x224ae8c5, 0x1acbda11, 0x317c387e, 0xb9ea9bc3, 0xb136603b, 0x256fa0ec, 0x7657f74b,
- 0x72ce87b1, 0x9d6548ca, 0xf5dfa6bd, 0x38303248, 0x655fa187, 0x2f20e3a2, 0xda2d97c5, 0x0f3fd5c6,
- 0x07f4ca11, 0xfb5bfb90, 0x610d30f8, 0x8fe551a2, 0xee569d6d, 0xfc1efa15, 0x7d2e23de, 0x1400b396,
- 0x17460775, 0xdb8990e5, 0xc943e732, 0xb479cd33, 0xcccc4e65, 0x9393514c, 0x4c1a1e0b, 0xd1d6095d,
- 0x25669b33, 0x3564a337, 0x6a9c7f8a, 0x5e148e82, 0x074db601, 0x5cfe7aa3, 0x0c480a54, 0x17350d2c,
- 0x955d5179, 0xb1e17b9d, 0xae313cdb, 0x6c606cb1, 0x078f735d, 0x1b2db31b, 0x5f50b518, 0x5064c18b,
- 0x4d162db3, 0xb365853d, 0x7598a195, 0x1ae273ee, 0x5570b6c6, 0x8f969834, 0x96d4e6d3, 0x30af889b,
- 0x44a02554, 0x731cdc8e, 0xa17293d1, 0x228a4ef9, 0x8d6f5177, 0xfbcf0755, 0x268a5c1f, 0x9538b982,
- 0x61affd44, 0x6b1ca3cf, 0x5e9222b8, 0x8c66d3c5, 0x422183ed, 0xc9942109, 0x0bbb16fa, 0xf3d949f2,
- 0x36e02b20, 0xcee886b9, 0x05c128d5, 0x3d0bd2f9, 0x62136319, 0x6af50302, 0x0060e499, 0x08391a0c,
- 0x57339ba2, 0xbeba7d05, 0x2ac5b61c, 0xc4e9207c, 0xef2f0ce2, 0xd7373958, 0xd7622658, 0x901e646a,
- 0x95184460, 0xdc4e7487, 0x156e0c29, 0x2413d5e3, 0x61c1696d, 0xd24aaebd, 0x473826fd, 0xa0c238b9,
- 0x0ab111bb, 0xbd67c724, 0x972cd18b, 0xfbbd9d42, 0x6c472096, 0xe76115c0, 0x5f6f7ceb, 0xac9f45ae,
- 0xcecb72f1, 0x9c38339d, 0x8f682625, 0x0dea891e, 0xf07afff3, 0xa892374e, 0x175eb4af, 0xc8daadd8,
- 0x85db6ab0, 0x3a49bd0d, 0xc0b1b31d, 0x8a0e23fa, 0xc5e5767d, 0xf95884e0, 0x6425a415, 0x26fac51c,
- 0x3ea8449f, 0xe8f70edd, 0x062b1a63, 0xa6c4c60c, 0x52ab3316, 0x1e238438, 0x897a39ce, 0x78b63c9f,
- 0x364f5b8a, 0xef22ec2f, 0xee6e0850, 0xeca42d06, 0xfb0c75df, 0x5497e00c, 0x554b03d7, 0xd2874a00,
- 0x0ca8f58d, 0x94f0341c, 0xbe2ec921, 0x56c9f949, 0xdb4a9316, 0xf281501e, 0x53daec3f, 0x64f1b783,
- 0x154c6032, 0x0e2ff793, 0x33ce3573, 0xfacc5fdc, 0xf1178590, 0x3155bbd9, 0x0f023b22, 0x0224fcd8,
- 0x471bf4f4, 0x45f0a88a, 0x14f0cd97, 0x6ea354bb, 0x20cdb5cc, 0xb3db2392, 0x88d58655, 0x4e2a0e8a,
- 0x6fe51a8c, 0xfaa72ef2, 0xad8a43dc, 0x4212b210, 0xb779dfe4, 0x9d7307cc, 0x846532e4, 0xb9694eda,
- 0xd162af05, 0x3b1751f3, 0xa3d091f6, 0x56658154, 0x12b5e8c2, 0x02461069, 0xac14b958, 0x784934b8,
- 0xd6cce1da, 0xa5053701, 0x1aa4fb42, 0xb9a3def4, 0x1bda1f85, 0xef6fdbf2, 0xf2d89d2a, 0x4b183527,
- 0x8fd94057, 0x89f45681, 0x2b552879, 0xa6168695, 0xc12963b0, 0xff01eaab, 0x73e5b5c1, 0x585318e7,
- 0x624f14a5, 0x1a4a026b, 0x68082920, 0x57fd99b6, 0x6dc085a9, 0x8ac8d8ca, 0xf9eeeea9, 0x8a2400ca,
- 0xc95f260f, 0xd10036f9, 0xf91096ac, 0x3195220a, 0x1a356b2a, 0x73b7eaad, 0xaf6d6058, 0x71ef7afb,
- 0x80bc4234, 0x33562e94, 0xb12dfab4, 0x14451579, 0xdf59eae0, 0x51707062, 0x4012a829, 0x62c59cab,
- 0x347f8304, 0xd889659e, 0x5a9139db, 0x14efcc30, 0x852be3e8, 0xfc99f14d, 0x1d822dd6, 0xe2f76797,
- 0xe30219c8, 0xaa9ce884, 0x8a886eb3, 0xc87b7295, 0x988012e8, 0x314186ed, 0xbaf86856, 0xccd3c3b6,
- 0xee94e62f, 0x110a6783, 0xd2aae89c, 0xcc3b76fc, 0x435a0ce1, 0x34c2838f, 0xd571ec6c, 0x1366a993 // last one was: 0x1366a992
- //0xcbb9ac40, ...
- // (the last word 0x1366a992 was rounded up because the next one is 0xcbb9ac40 -- first bit is one 0xc..)
- // 256 32bit words for the mantissa -- about 2464 valid decimal digits
- };
- // above value was calculated using Big<1,400> type on a 32bit platform
- // and then the first 256 words were taken,
- // the calculating was made by using LnSurrounding1(2) method
- // which took 4035 iterations
- // (the result was compared with ln(2) taken from http://ja0hxv.calico.jp/pai/estart.html)
- // (TTMATH_BUILTIN_VARIABLES_SIZE on 32bit platform should have the value 256,
- // and on 64bit platform value 128 (256/2=128))
- mantissa.SetFromTable(temp_table, sizeof(temp_table) / sizeof(int));
- exponent = -sint(man)*sint(TTMATH_BITS_PER_UINT);
- info = 0;
- }
- /*!
- this method sets the value of ln(10)
- the natural logarithm from 10
- I introduced this constant especially to make the conversion ToString()
- being faster. In fact the method ToString() is keeping values of logarithms
- it has calculated but it must calculate the logarithm at least once.
- If a program, which uses this library, is running for a long time this
- would be ok, but for programs which are running shorter, for example for
- CGI applications which only once are printing values, this would be much
- inconvenience. Then if we're printing with base (radix) 10 and the mantissa
- of our value is smaller than or equal to TTMATH_BUILTIN_VARIABLES_SIZE
- we don't calculate the logarithm but take it from this constant.
- */
- void SetLn10()
- {
- static const unsigned int temp_table[] = {
- 0x935d8ddd, 0xaaa8ac16, 0xea56d62b, 0x82d30a28, 0xe28fecf9, 0xda5df90e, 0x83c61e82, 0x01f02d72,
- 0x962f02d7, 0xb1a8105c, 0xcc70cbc0, 0x2c5f0d68, 0x2c622418, 0x410be2da, 0xfb8f7884, 0x02e516d6,
- 0x782cf8a2, 0x8a8c911e, 0x765aa6c3, 0xb0d831fb, 0xef66ceb0, 0x4ab3c6fa, 0x5161bb49, 0xd219c7bb,
- 0xca67b35b, 0x23605085, 0x8e93368d, 0x44789c4f, 0x5b08b057, 0xd5ede20f, 0x469ea58e, 0x9305e981,
- 0xe2478fca, 0xad3aee98, 0x9cd5b42e, 0x6a271619, 0xa47ecb26, 0x978c5d4f, 0xdb1d28ea, 0x57d4fdc0,
- 0xe40bf3cc, 0x1e14126a, 0x45765cde, 0x268339db, 0xf47fa96d, 0xeb271060, 0xaf88486e, 0xa9b7401e,
- 0x3dfd3c51, 0x748e6d6e, 0x3848c8d2, 0x5faf1bca, 0xe88047f1, 0x7b0d9b50, 0xa949eaaa, 0xdf69e8a5,
- 0xf77e3760, 0x4e943960, 0xe38a5700, 0xffde2db1, 0xad6bfbff, 0xd821ba0a, 0x4cb0466d, 0x61ba648e,
- 0xef99c8e5, 0xf6974f36, 0x3982a78c, 0xa45ddfc8, 0x09426178, 0x19127a6e, 0x3b70fcda, 0x2d732d47,
- 0xb5e4b1c8, 0xc0e5a10a, 0xaa6604a5, 0x324ec3dc, 0xbc64ea80, 0x6e198566, 0x1f1d366c, 0x20663834,
- 0x4d5e843f, 0x20642b97, 0x0a62d18e, 0x478f7bd5, 0x8fcd0832, 0x4a7b32a6, 0xdef85a05, 0xeb56323a,
- 0x421ef5e0, 0xb00410a0, 0xa0d9c260, 0x794a976f, 0xf6ff363d, 0xb00b6b33, 0xf42c58de, 0xf8a3c52d,
- 0xed69b13d, 0xc1a03730, 0xb6524dc1, 0x8c167e86, 0x99d6d20e, 0xa2defd2b, 0xd006f8b4, 0xbe145a2a,
- 0xdf3ccbb3, 0x189da49d, 0xbc1261c8, 0xb3e4daad, 0x6a36cecc, 0xb2d5ae5b, 0x89bf752f, 0xb5dfb353,
- 0xff3065c4, 0x0cfceec8, 0x1be5a9a9, 0x67fddc57, 0xc4b83301, 0x006bf062, 0x4b40ed7a, 0x56c6cdcd,
- 0xa2d6fe91, 0x388e9e3e, 0x48a93f5f, 0x5e3b6eb4, 0xb81c4a5b, 0x53d49ea6, 0x8e668aea, 0xba83c7f8,
- 0xfb5f06c3, 0x58ac8f70, 0xfa9d8c59, 0x8c574502, 0xbaf54c96, 0xc84911f0, 0x0482d095, 0x1a0af022,
- 0xabbab080, 0xec97efd3, 0x671e4e0e, 0x52f166b6, 0xcd5cd226, 0x0dc67795, 0x2e1e34a3, 0xf799677f,
- 0x2c1d48f1, 0x2944b6c5, 0x2ba1307e, 0x704d67f9, 0x1c1035e4, 0x4e927c63, 0x03cf12bf, 0xe2cd2e31,
- 0xf8ee4843, 0x344d51b0, 0xf37da42b, 0x9f0b0fd9, 0x134fb2d9, 0xf815e490, 0xd966283f, 0x23962766,
- 0xeceab1e4, 0xf3b5fc86, 0x468127e2, 0xb606d10d, 0x3a45f4b6, 0xb776102d, 0x2fdbb420, 0x80c8fa84,
- 0xd0ff9f45, 0xc58aef38, 0xdb2410fd, 0x1f1cebad, 0x733b2281, 0x52ca5f36, 0xddf29daa, 0x544334b8,
- 0xdeeaf659, 0x4e462713, 0x1ed485b4, 0x6a0822e1, 0x28db471c, 0xa53938a8, 0x44c3bef7, 0xf35215c8,
- 0xb382bc4e, 0x3e4c6f15, 0x6285f54c, 0x17ab408e, 0xccbf7f5e, 0xd16ab3f6, 0xced2846d, 0xf457e14f,
- 0xbb45d9c5, 0x646ad497, 0xac697494, 0x145de32e, 0x93907128, 0xd263d521, 0x79efb424, 0xd64651d6,
- 0xebc0c9f0, 0xbb583a44, 0xc6412c84, 0x85bb29a6, 0x4d31a2cd, 0x92954469, 0xa32b1abd, 0xf7f5202c,
- 0xa4aa6c93, 0x2e9b53cf, 0x385ab136, 0x2741f356, 0x5de9c065, 0x6009901c, 0x88abbdd8, 0x74efcf73,
- 0x3f761ad4, 0x35f3c083, 0xfd6b8ee0, 0x0bef11c7, 0xc552a89d, 0x58ce4a21, 0xd71e54f2, 0x4157f6c7,
- 0xd4622316, 0xe98956d7, 0x450027de, 0xcbd398d8, 0x4b98b36a, 0x0724c25c, 0xdb237760, 0xe9324b68,
- 0x7523e506, 0x8edad933, 0x92197f00, 0xb853a326, 0xb330c444, 0x65129296, 0x34bc0670, 0xe177806d,
- 0xe338dac4, 0x5537492a, 0xe19add83, 0xcf45000f, 0x5b423bce, 0x6497d209, 0xe30e18a1, 0x3cbf0687,
- 0x67973103, 0xd9485366, 0x81506bba, 0x2e93a9a4, 0x7dd59d3f, 0xf17cd746, 0x8c2075be, 0x552a4348 // last one was: 0x552a4347
- // 0xb4a638ef, ...
- //(the last word 0x552a4347 was rounded up because the next one is 0xb4a638ef -- first bit is one 0xb..)
- // 256 32bit words for the mantissa -- about 2464 valid digits (decimal)
- };
- // above value was calculated using Big<1,400> type on a 32bit platform
- // and then the first 256 32bit words were taken,
- // the calculating was made by using LnSurrounding1(10) method
- // which took 22080 iterations
- // (the result was compared with ln(10) taken from http://ja0hxv.calico.jp/pai/estart.html)
- // (the formula used in LnSurrounding1(x) converges badly when
- // the x is greater than one but in fact we can use it, only the
- // number of iterations will be greater)
- // (TTMATH_BUILTIN_VARIABLES_SIZE on 32bit platform should have the value 256,
- // and on 64bit platform value 128 (256/2=128))
- mantissa.SetFromTable(temp_table, sizeof(temp_table) / sizeof(int));
- exponent = -sint(man)*sint(TTMATH_BITS_PER_UINT) + 2;
- info = 0;
- }
- /*!
- this method sets the maximum value which can be held in this type
- */
- void SetMax()
- {
- info = 0;
- mantissa.SetMax();
- exponent.SetMax();
- // we don't have to use 'Standardizing()' because the last bit from
- // the mantissa is set
- }
- /*!
- this method sets the minimum value which can be held in this type
- */
- void SetMin()
- {
- info = 0;
- mantissa.SetMax();
- exponent.SetMax();
- SetSign();
- // we don't have to use 'Standardizing()' because the last bit from
- // the mantissa is set
- }
- /*!
- testing whether there is a value zero or not
- */
- bool IsZero() const
- {
- return IsInfoBit(TTMATH_BIG_ZERO);
- }
- /*!
- this method returns true when there's the sign set
- also we don't check the NaN flag
- */
- bool IsSign() const
- {
- return IsInfoBit(TTMATH_BIG_SIGN);
- }
- /*!
- this method returns true when there is not a valid number
- */
- bool IsNan() const
- {
- return IsInfoBit(TTMATH_BIG_NAN);
- }
- /*!
- this method clears the sign
- (there'll be an absolute value)
- e.g.
- -1 -> 1
- 2 -> 2
- */
- void Abs()
- {
- ClearInfoBit(TTMATH_BIG_SIGN);
- }
- /*!
- this method remains the 'sign' of the value
- e.g. -2 = -1
- 0 = 0
- 10 = 1
- */
- void Sgn()
- {
- // we have to check the NaN flag, because the next SetOne() method would clear it
- if( IsNan() )
- return;
- if( IsSign() )
- {
- SetOne();
- SetSign();
- }
- else
- if( IsZero() )
- SetZero(); // !! is nedeed here?
- else
- SetOne();
- }
- /*!
- this method sets the sign
- e.g.
- -1 -> -1
- 2 -> -2
- we do not check whether there is a zero or not, if you're using this method
- you must be sure that the value is (or will be afterwards) different from zero
- */
- void SetSign()
- {
- }
- /*!
- this method changes the sign
- when there is a value of zero then the sign is not changed
- e.g.
- -1 -> 1
- 2 -> -2
- */
- void ChangeSign()
- {
- // we don't have to check the NaN flag here
- if( IsZero() )
- return;
- if( IsSign() )
- ClearInfoBit(TTMATH_BIG_SIGN);
- else
- }
- /*!
- this method does the half-to-even rounding (banker's rounding)
- if is_half is:
- true - that means the rest was equal the half (0.5 decimal)
- false - that means the rest was greater than a half (greater than 0.5 decimal)
- if the rest was less than a half then don't call this method
- (the rounding should does nothing then)
- */
- uint RoundHalfToEven(bool is_half, bool rounding_up = true)
- {
- uint c = 0;
- if( !is_half || mantissa.IsTheLowestBitSet() )
- {
- if( rounding_up )
- {
- if( mantissa.AddOne() )
- {
- mantissa.Rcr(1, 1);
- c = exponent.AddOne();
- }
- }
- else
- {
- uint c_from_zero =
- #endif
- mantissa.SubOne();
- // we're using rounding_up=false in Add() when the mantissas have different signs
- // mantissa can be zero only when previous mantissa was equal to ss2.mantissa
- // but in such a case 'last_bit_set' will not be set and consequently 'do_rounding' will be false
- TTMATH_ASSERT( c_from_zero == 0 )
- }
- }
- return c;
- }
- /*!
- *
- * basic mathematic functions
- *
- */
- /*!
- this method adds one to the existing value
- */
- uint AddOne()
- {
- Big<exp, man> one;
- one.SetOne();
- return Add(one);
- }
- /*!
- this method subtracts one from the existing value
- */
- uint SubOne()
- {
- Big<exp, man> one;
- one.SetOne();
- return Sub(one);
- }
- /*!
- an auxiliary method for adding
- */
- void AddCheckExponents( Big<exp, man> & ss2,
- Int<exp> & exp_offset,
- bool & last_bit_set,
- bool & rest_zero,
- bool & do_adding,
- bool & do_rounding)
- {
- Int<exp> mantissa_size_in_bits( man * TTMATH_BITS_PER_UINT );
- if( exp_offset == mantissa_size_in_bits )
- {
- last_bit_set = ss2.mantissa.IsTheHighestBitSet();
- rest_zero = ss2.mantissa.AreFirstBitsZero(man*TTMATH_BITS_PER_UINT - 1);
- do_rounding = true; // we'are only rounding
- }
- else
- if( exp_offset < mantissa_size_in_bits )
- {
- uint moved = exp_offset.ToInt(); // how many times we must move ss2.mantissa
- rest_zero = true;
- if( moved > 0 )
- {
- last_bit_set = static_cast<bool>( ss2.mantissa.GetBit(moved-1) );
- if( moved > 1 )
- rest_zero = ss2.mantissa.AreFirstBitsZero(moved - 1);
- // (2) moving 'exp_offset' times
- ss2.mantissa.Rcr(moved, 0);
- }
- do_adding = true;
- do_rounding = true;
- }
- // if exp_offset is greater than mantissa_size_in_bits then we do nothing
- // ss2 is too small for taking into consideration in the sum
- }
- /*!
- an auxiliary method for adding
- */
- uint AddMantissas( Big<exp, man> & ss2,
- bool & last_bit_set,
- bool & rest_zero)
- {
- uint c = 0;
- if( IsSign() == ss2.IsSign() )
- {
- // values have the same signs
- if( mantissa.Add(ss2.mantissa) )
- {
- // we have one bit more from addition (carry)
- // now rest_zero means the old rest_zero with the old last_bit_set
- rest_zero = (!last_bit_set && rest_zero);
- last_bit_set = mantissa.Rcr(1,1);
- c += exponent.AddOne();
- }
- }
- else
- {
- // values have different signs
- // there shouldn't be a carry here because
- // (1) (2) guarantee that the mantissa of this
- // is greater than or equal to the mantissa of the ss2
- uint c_temp =
- #endif
- mantissa.Sub(ss2.mantissa);
- TTMATH_ASSERT( c_temp == 0 )
- }
- return c;
- }
- /*!
- Addition this = this + ss2
- it returns carry if the sum is too big
- */
- uint Add(Big<exp, man> ss2, bool round = true, bool adding = true)
- {
- bool last_bit_set, rest_zero, do_adding, do_rounding, rounding_up;
- Int<exp> exp_offset( exponent );
- uint c = 0;
- if( IsNan() || ss2.IsNan() )
- return CheckCarry(1);
- if( !adding )
- ss2.ChangeSign(); // subtracting
- exp_offset.Sub( ss2.exponent );
- exp_offset.Abs();
- // (1) abs(this) will be >= abs(ss2)
- if( SmallerWithoutSignThan(ss2) )
- Swap(ss2);
- if( ss2.IsZero() )
- return 0;
- last_bit_set = rest_zero = do_adding = do_rounding = false;
- rounding_up = (IsSign() == ss2.IsSign());
- AddCheckExponents(ss2, exp_offset, last_bit_set, rest_zero, do_adding, do_rounding);
- if( do_adding )
- c += AddMantissas(ss2, last_bit_set, rest_zero);
- if( !round || !last_bit_set )
- do_rounding = false;
- if( do_rounding )
- c += RoundHalfToEven(rest_zero, rounding_up);
- if( do_adding || do_rounding )
- c += Standardizing();
- return CheckCarry(c);
- }
- /*!
- Subtraction this = this - ss2
- it returns carry if the result is too big
- */
- uint Sub(const Big<exp, man> & ss2, bool round = true)
- {
- return Add(ss2, round, false);
- }
- /*!
- bitwise AND
- this and ss2 must be >= 0
- return values:
- 0 - ok
- 1 - carry
- 2 - this or ss2 was negative
- */
- uint BitAnd(Big<exp, man> ss2)
- {
- if( IsNan() || ss2.IsNan() )
- return CheckCarry(1);
- if( IsSign() || ss2.IsSign() )
- {
- SetNan();
- return 2;
- }
- if( IsZero() )
- return 0;
- if( ss2.IsZero() )
- {
- SetZero();
- return 0;
- }
- Int<exp> exp_offset( exponent );
- Int<exp> mantissa_size_in_bits( man * TTMATH_BITS_PER_UINT );
- uint c = 0;
- exp_offset.Sub( ss2.exponent );
- exp_offset.Abs();
- // abs(this) will be >= abs(ss2)
- if( SmallerWithoutSignThan(ss2) )
- Swap(ss2);
- if( exp_offset >= mantissa_size_in_bits )
- {
- // the second value is too small
- SetZero();
- return 0;
- }
- // exp_offset < mantissa_size_in_bits, moving 'exp_offset' times
- ss2.mantissa.Rcr( exp_offset.ToInt(), 0 );
- mantissa.BitAnd(ss2.mantissa);
- c += Standardizing();
- return CheckCarry(c);
- }
- /*!
- bitwise OR
- this and ss2 must be >= 0
- return values:
- 0 - ok
- 1 - carry
- 2 - this or ss2 was negative
- */
- uint BitOr(Big<exp, man> ss2)
- {
- if( IsNan() || ss2.IsNan() )
- return CheckCarry(1);
- if( IsSign() || ss2.IsSign() )
- {
- SetNan();
- return 2;
- }
- if( IsZero() )
- {
- *this = ss2;
- return 0;
- }
- if( ss2.IsZero() )
- return 0;
- Int<exp> exp_offset( exponent );
- Int<exp> mantissa_size_in_bits( man * TTMATH_BITS_PER_UINT );
- uint c = 0;
- exp_offset.Sub( ss2.exponent );
- exp_offset.Abs();
- // abs(this) will be >= abs(ss2)
- if( SmallerWithoutSignThan(ss2) )
- Swap(ss2);
- if( exp_offset >= mantissa_size_in_bits )
- // the second value is too small
- return 0;
- // exp_offset < mantissa_size_in_bits, moving 'exp_offset' times
- ss2.mantissa.Rcr( exp_offset.ToInt(), 0 );
- mantissa.BitOr(ss2.mantissa);
- c += Standardizing();
- return CheckCarry(c);
- }
- /*!
- bitwise XOR
- this and ss2 must be >= 0
- return values:
- 0 - ok
- 1 - carry
- 2 - this or ss2 was negative
- */
- uint BitXor(Big<exp, man> ss2)
- {
- if( IsNan() || ss2.IsNan() )
- return CheckCarry(1);
- if( IsSign() || ss2.IsSign() )
- {
- SetNan();
- return 2;
- }
- if( ss2.IsZero() )
- return 0;
- if( IsZero() )
- {
- *this = ss2;
- return 0;
- }
- Int<exp> exp_offset( exponent );
- Int<exp> mantissa_size_in_bits( man * TTMATH_BITS_PER_UINT );
- uint c = 0;
- exp_offset.Sub( ss2.exponent );
- exp_offset.Abs();
- // abs(this) will be >= abs(ss2)
- if( SmallerWithoutSignThan(ss2) )
- Swap(ss2);
- if( exp_offset >= mantissa_size_in_bits )
- // the second value is too small
- return 0;
- // exp_offset < mantissa_size_in_bits, moving 'exp_offset' times
- ss2.mantissa.Rcr( exp_offset.ToInt(), 0 );
- mantissa.BitXor(ss2.mantissa);
- c += Standardizing();
- return CheckCarry(c);
- }
- /*!
- Multiplication this = this * ss2 (ss2 is uint)
- ss2 without a sign
- */
- uint MulUInt(uint ss2)
- {
- UInt<man+1> man_result;
- uint i,c = 0;
- if( IsNan() )
- return 1;
- if( IsZero() )
- return 0;
- if( ss2 == 0 )
- {
- SetZero();
- return 0;
- }
- // man_result = mantissa * ss2.mantissa
- mantissa.MulInt(ss2, man_result);
- sint bit = UInt<man>::FindLeadingBitInWord(man_result.table[man]); // man - last word
- if( bit!=-1 && uint(bit) > (TTMATH_BITS_PER_UINT/2) )
- {
- // 'i' will be from 0 to TTMATH_BITS_PER_UINT
- i = man_result.CompensationToLeft();
- c = exponent.Add( TTMATH_BITS_PER_UINT - i );
- for(i=0 ; i<man ; ++i)
- mantissa.table[i] = man_result.table[i+1];
- }
- else
- {
- if( bit != -1 )
- {
- man_result.Rcr(bit+1, 0);
- c += exponent.Add(bit+1);
- }
- for(i=0 ; i<man ; ++i)
- mantissa.table[i] = man_result.table[i];
- }
- c += Standardizing();
- return CheckCarry(c);
- }
- /*!
- Multiplication this = this * ss2 (ss2 is sint)
- ss2 with a sign
- */
- uint MulInt(sint ss2)
- {
- if( IsNan() )
- return 1;
- if( ss2 == 0 )
- {
- SetZero();
- return 0;
- }
- if( IsZero() )
- return 0;
- if( IsSign() == (ss2<0) )
- {
- // the signs are the same (both are either - or +), the result is positive
- Abs();
- }
- else
- {
- // the signs are different, the result is negative
- SetSign();
- }
- if( ss2<0 )
- ss2 = -ss2;
- return MulUInt( uint(ss2) );
- }
- /*!
- this method checks whether a table pointed by 'tab' and 'len'
- has the value 0.5 decimal
- (it is treated as the comma operator would be before the highest bit)
- call this method only if the highest bit is set - you have to test it beforehand
- return:
- true - tab was equal the half (0.5 decimal)
- false - tab was greater than a half (greater than 0.5 decimal)
- */
- bool CheckGreaterOrEqualHalf(uint * tab, uint len)
- {
- uint i;
- TTMATH_ASSERT( len>0 && (tab[len-1] & TTMATH_UINT_HIGHEST_BIT)!=0 )
- for(i=0 ; i<len-1 ; ++i)
- if( tab[i] != 0 )
- return false;
- if( tab[i] != TTMATH_UINT_HIGHEST_BIT )
- return false;
- return true;
- }
- /*!
- multiplication this = this * ss2
- this method returns a carry
- */
- uint MulRef(const Big<exp, man> & ss2, bool round = true)
- {
- UInt<man*2> man_result;
- uint c = 0;
- uint i;
- if( IsNan() || ss2.IsNan() )
- return CheckCarry(1);
- if( IsZero() )
- return 0;
- if( ss2.IsZero() )
- {
- SetZero();
- return 0;
- }
- // man_result = mantissa * ss2.mantissa
- mantissa.MulBig(ss2.mantissa, man_result);
- // 'i' will be from 0 to man*TTMATH_BITS_PER_UINT
- // because mantissa and ss2.mantissa are standardized
- // (the highest bit in man_result is set to 1 or
- // if there is a zero value in man_result the method CompensationToLeft()
- // returns 0 but we'll correct this at the end in Standardizing() method)
- i = man_result.CompensationToLeft();
- uint exp_add = man * TTMATH_BITS_PER_UINT - i;
- if( exp_add )
- c += exponent.Add( exp_add );
- c += exponent.Add( ss2.exponent );
- for(i=0 ; i<man ; ++i)
- mantissa.table[i] = man_result.table[i+man];
- if( round && (man_result.table[man-1] & TTMATH_UINT_HIGHEST_BIT) != 0 )
- {
- bool is_half = CheckGreaterOrEqualHalf(man_result.table, man);
- c += RoundHalfToEven(is_half);
- }
- if( IsSign() == ss2.IsSign() )
- {
- // the signs are the same, the result is positive
- Abs();
- }
- else
- {
- // the signs are different, the result is negative
- // if the value is zero it will be corrected later in Standardizing method
- SetSign();
- }
- c += Standardizing();
- return CheckCarry(c);
- }
- /*!
- multiplication this = this * ss2
- this method returns a carry
- */
- uint Mul(const Big<exp, man> & ss2, bool round = true)
- {
- if( this == &ss2 )
- {
- Big<exp, man> copy_ss2(ss2);
- return MulRef(copy_ss2, round);
- }
- else
- {
- return MulRef(ss2, round);
- }
- }
- /*!
- division this = this / ss2
- return value:
- 0 - ok
- 1 - carry (in a division carry can be as well)
- 2 - improper argument (ss2 is zero)
- */
- uint DivRef(const Big<exp, man> & ss2, bool round = true)
- {
- UInt<man*2> man1;
- UInt<man*2> man2;
- uint i,c = 0;
- if( IsNan() || ss2.IsNan() )
- return CheckCarry(1);
- if( ss2.IsZero() )
- {
- SetNan();
- return 2;
- }
- if( IsZero() )
- return 0;
- // !! this two loops can be joined together
- for(i=0 ; i<man ; ++i)
- {
- man1.table[i+man] = mantissa.table[i];
- man2.table[i] = ss2.mantissa.table[i];
- }
- for(i=0 ; i<man ; ++i)
- {
- man1.table[i] = 0;
- man2.table[i+man] = 0;
- }
- man1.Div(man2);
- i = man1.CompensationToLeft();
- if( i )
- c += exponent.Sub(i);
- c += exponent.Sub(ss2.exponent);
- for(i=0 ; i<man ; ++i)
- mantissa.table[i] = man1.table[i+man];
- if( round && (man1.table[man-1] & TTMATH_UINT_HIGHEST_BIT) != 0 )
- {
- bool is_half = CheckGreaterOrEqualHalf(man1.table, man);
- c += RoundHalfToEven(is_half);
- }
- if( IsSign() == ss2.IsSign() )
- Abs();
- else
- SetSign(); // if there is a zero it will be corrected in Standardizing()
- c += Standardizing();
- return CheckCarry(c);
- }
- /*!
- division this = this / ss2
- return value:
- 0 - ok
- 1 - carry (in a division carry can be as well)
- 2 - improper argument (ss2 is zero)
- */
- uint Div(const Big<exp, man> & ss2, bool round = true)
- {
- if( this == &ss2 )
- {
- Big<exp, man> copy_ss2(ss2);
- return DivRef(copy_ss2, round);
- }
- else
- {
- return DivRef(ss2, round);
- }
- }
- /*!
- the remainder from a division
- */
- uint ModRef(const Big<exp, man> & ss2)
- {
- uint c = 0;
- if( IsNan() || ss2.IsNan() )
- return CheckCarry(1);
- if( ss2.IsZero() )
- {
- SetNan();
- return 2;
- }
- if( !SmallerWithoutSignThan(ss2) )
- {
- Big<exp, man> temp(*this);
- c = temp.Div(ss2);
- temp.SkipFraction();
- c += temp.Mul(ss2);
- c += Sub(temp);
- if( !SmallerWithoutSignThan( ss2 ) )
- c += 1;
- }
- return CheckCarry(c);
- }
- /*!
- the remainder from a division
- e.g.
- 12.6 mod 3 = 0.6 because 12.6 = 3*4 + 0.6
- -12.6 mod 3 = -0.6 bacause -12.6 = 3*(-4) + (-0.6)
- 12.6 mod -3 = 0.6
- -12.6 mod -3 = -0.6
- it means:
- in other words: this(old) = ss2 * q + this(new)
- return value:
- 0 - ok
- 1 - carry
- 2 - improper argument (ss2 is zero)
- */
- uint Mod(const Big<exp, man> & ss2)
- {
- if( this == &ss2 )
- {
- Big<exp, man> copy_ss2(ss2);
- return ModRef(copy_ss2);
- }
- else
- {
- return ModRef(ss2);
- }
- }
- /*!
- this method returns: 'this' mod 2
- (either zero or one)
- this method is much faster than using Mod( object_with_value_two )
- */
- uint Mod2() const
- {
- if( exponent>sint(0) || exponent<=-sint(man*TTMATH_BITS_PER_UINT) )
- return 0;
- sint exp_int = exponent.ToInt();
- // 'exp_int' is negative (or zero), we set it as positive
- exp_int = -exp_int;
- return mantissa.GetBit(exp_int);
- }
- /*!
- power this = this ^ pow
- (pow without a sign)
- binary algorithm (r-to-l)
- return values:
- 0 - ok
- 1 - carry
- 2 - incorrect arguments (0^0)
- */
- template<uint pow_size>
- uint Pow(UInt<pow_size> pow)
- {
- if( IsNan() )
- return 1;
- if( IsZero() )
- {
- if( pow.IsZero() )
- {
- // we don't define zero^zero
- SetNan();
- return 2;
- }
- // 0^(+something) is zero
- return 0;
- }
- Big<exp, man> start(*this);
- Big<exp, man> result;
- result.SetOne();
- uint c = 0;
- while( !c )
- {
- if( pow.table[0] & 1 )
- c += result.Mul(start);
- pow.Rcr(1);
- if( pow.IsZero() )
- break;
- c += start.Mul(start);
- }
- *this = result;
- return CheckCarry(c);
- }
- /*!
- power this = this ^ pow
- p can be negative
- return values:
- 0 - ok
- 1 - carry
- 2 - incorrect arguments 0^0 or 0^(-something)
- */
- template<uint pow_size>
- uint Pow(Int<pow_size> pow)
- {
- if( IsNan() )
- return 1;
- if( !pow.IsSign() )
- return Pow( UInt<pow_size>(pow) );
- if( IsZero() )
- {
- // if 'p' is negative then
- // 'this' must be different from zero
- SetNan();
- return 2;
- }
- uint c = pow.ChangeSign();
- Big<exp, man> t(*this);
- c += t.Pow( UInt<pow_size>(pow) ); // here can only be a carry (return:1)
- SetOne();
- c += Div(t);
- return CheckCarry(c);
- }
- /*!
- power this = this ^ abs([pow])
- pow is treated as a value without a sign and without a fraction
- if pow has a sign then the method pow.Abs() is used
- if pow has a fraction the fraction is skipped (not used in calculation)
- return values:
- 0 - ok
- 1 - carry
- 2 - incorrect arguments (0^0)
- */
- uint PowUInt(Big<exp, man> pow)
- {
- if( IsNan() || pow.IsNan() )
- return CheckCarry(1);
- if( IsZero() )
- {
- if( pow.IsZero() )
- {
- SetNan();
- return 2;
- }
- // 0^(+something) is zero
- return 0;
- }
- if( pow.IsSign() )
- pow.Abs();
- Big<exp, man> start(*this);
- Big<exp, man> result;
- Big<exp, man> one;
- uint c = 0;
- one.SetOne();
- result = one;
- while( !c )
- {
- if( pow.Mod2() )
- c += result.Mul(start);
- c += pow.exponent.SubOne();
- if( pow < one )
- break;
- c += start.Mul(start);
- }
- *this = result;
- return CheckCarry(c);
- }
- /*!
- power this = this ^ [pow]
- pow is treated as a value without a fraction
- pow can be negative
- return values:
- 0 - ok
- 1 - carry
- 2 - incorrect arguments 0^0 or 0^(-something)
- */
- uint PowInt(const Big<exp, man> & pow)
- {
- if( IsNan() || pow.IsNan() )
- return CheckCarry(1);
- if( !pow.IsSign() )
- return PowUInt(pow);
- if( IsZero() )
- {
- // if 'pow' is negative then
- // 'this' must be different from zero
- SetNan();
- return 2;
- }
- Big<exp, man> temp(*this);
- uint c = temp.PowUInt(pow); // here can only be a carry (result:1)
- SetOne();
- c += Div(temp);
- return CheckCarry(c);
- }
- /*!
- power this = this ^ pow
- this must be greater than zero (this > 0)
- pow can be negative and with fraction
- return values:
- 0 - ok
- 1 - carry
- 2 - incorrect argument ('this' <= 0)
- */
- uint PowFrac(const Big<exp, man> & pow)
- {
- if( IsNan() || pow.IsNan() )
- return CheckCarry(1);
- Big<exp, man> temp;
- uint c = temp.Ln(*this);
- if( c != 0 ) // can be 2 from Ln()
- {
- SetNan();
- return c;
- }
- c += temp.Mul(pow);
- c += Exp(temp);
- return CheckCarry(c);
- }
- /*!
- power this = this ^ pow
- pow can be negative and with fraction
- return values:
- 0 - ok
- 1 - carry
- 2 - incorrect argument ('this' or 'pow')
- */
- uint Pow(const Big<exp, man> & pow)
- {
- if( IsNan() || pow.IsNan() )
- return CheckCarry(1);
- if( IsZero() )
- {
- // 0^pow will be 0 only for pow>0
- if( pow.IsSign() || pow.IsZero() )
- {
- SetNan();
- return 2;
- }
- SetZero();
- return 0;
- }
- if( pow.exponent>-sint(man*TTMATH_BITS_PER_UINT) && pow.exponent<=0 )
- {
- if( pow.IsInteger() )
- return PowInt( pow );
- }
- return PowFrac(pow);
- }
- /*!
- this function calculates the square root
- e.g. let this=9 then this.Sqrt() gives 3
- return: 0 - ok
- 1 - carry
- 2 - improper argument (this<0 or NaN)
- */
- uint Sqrt()
- {
- if( IsNan() || IsSign() )
- {
- SetNan();
- return 2;
- }
- if( IsZero() )
- return 0;
- Big<exp, man> old(*this);
- Big<exp, man> ln;
- uint c = 0;
- // we're using the formula: sqrt(x) = e ^ (ln(x) / 2)
- c += ln.Ln(*this);
- c += ln.exponent.SubOne(); // ln = ln / 2
- c += Exp(ln);
- // above formula doesn't give accurate results for some integers
- // e.g. Sqrt(81) would not be 9 but a value very closed to 9
- // we're rounding the result, calculating result*result and comparing
- // with the old value, if they are equal then the result is an integer too
- if( !c && old.IsInteger() && !IsInteger() )
- {
- Big<exp, man> temp(*this);
- c += temp.Round();
- Big<exp, man> temp2(temp);
- c += temp.Mul(temp2);
- if( temp == old )
- *this = temp2;
- }
- return CheckCarry(c);
- }
- /*!
- Exponent this = exp(x) = e^x where x is in (-1,1)
- we're using the formula exp(x) = 1 + (x)/(1!) + (x^2)/(2!) + (x^3)/(3!) + ...
- */
- void ExpSurrounding0(const Big<exp,man> & x, uint * steps = 0)
- {
- Big<exp,man> denominator, denominator_i;
- Big<exp,man> one, old_value, next_part;
- Big<exp,man> numerator = x;
- SetOne();
- one.SetOne();
- denominator.SetOne();
- denominator_i.SetOne();
- uint i;
- old_value = *this;
- // we begin from 1 in order to not test at the beginning
- for(i=1 ; true ; ++i)
- #else
- for(i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i)
- #endif
- {
- bool testing = ((i & 3) == 0); // it means '(i % 4) == 0'
- next_part = numerator;
- if( next_part.Div( denominator ) )
- // if there is a carry here we only break the loop
- // however the result we return as good
- // it means there are too many parts of the formula
- break;
- // there shouldn't be a carry here
- Add( next_part );
- if( testing )
- {
- if( old_value == *this )
- // we've added next few parts of the formula but the result
- // is still the same then we break the loop
- break;
- else
- old_value = *this;
- }
- // we set the denominator and the numerator for a next part of the formula
- if( denominator_i.Add(one) )
- // if there is a carry here the result we return as good
- break;
- if( denominator.Mul(denominator_i) )
- break;
- if( numerator.Mul(x) )
- break;
- }
- if( steps )
- *steps = i;
- }
- /*!
- Exponent this = exp(x) = e^x
- we're using the fact that our value is stored in form of:
- x = mantissa * 2^exponent
- then
- e^x = e^(mantissa* 2^exponent) or
- e^x = (e^mantissa)^(2^exponent)
- 'Exp' returns a carry if we can't count the result ('x' is too big)
- */
- uint Exp(const Big<exp,man> & x)
- {
- uint c = 0;
- if( x.IsNan() )
- return CheckCarry(1);
- if( x.IsZero() )
- {
- SetOne();
- return 0;
- }
- // m will be the value of the mantissa in range (-1,1)
- Big<exp,man> m(x);
- m.exponent = -sint(man*TTMATH_BITS_PER_UINT);
- // 'e_' will be the value of '2^exponent'
- // e_.mantissa.table[man-1] = TTMATH_UINT_HIGHEST_BIT; and
- // e_.exponent.Add(1) mean:
- // e_.mantissa.table[0] = 1;
- // e_.Standardizing();
- // e_.exponent.Add(man*TTMATH_BITS_PER_UINT)
- // (we must add 'man*TTMATH_BITS_PER_UINT' because we've taken it from the mantissa)
- Big<exp,man> e_(x);
- e_.mantissa.SetZero();
- e_.mantissa.table[man-1] = TTMATH_UINT_HIGHEST_BIT;
- c += e_.exponent.Add(1);
- e_.Abs();
- /*
- now we've got:
- m - the value of the mantissa in range (-1,1)
- e_ - 2^exponent
- e_ can be as:
- ...2^-2, 2^-1, 2^0, 2^1, 2^2 ...
- ...1/4 , 1/2 , 1 , 2 , 4 ...
- above one e_ is integer
- if e_ is greater than 1 we calculate the exponent as:
- e^(m * e_) = ExpSurrounding0(m) ^ e_
- and if e_ is smaller or equal one we calculate the exponent in this way:
- e^(m * e_) = ExpSurrounding0(m* e_)
- because if e_ is smaller or equal 1 then the product of m*e_ is smaller or equal m
- */
- if( e_ <= 1 )
- {
- m.Mul(e_);
- ExpSurrounding0(m);
- }
- else
- {
- ExpSurrounding0(m);
- c += PowUInt(e_);
- }
- return CheckCarry(c);
- }
- /*!
- Natural logarithm this = ln(x) where x in range <1,2)
- we're using the formula:
- ln x = 2 * [ (x-1)/(x+1) + (1/3)((x-1)/(x+1))^3 + (1/5)((x-1)/(x+1))^5 + ... ]
- */
- void LnSurrounding1(const Big<exp,man> & x, uint * steps = 0)
- {
- Big<exp,man> old_value, next_part, denominator, one, two, x1(x), x2(x);
- one.SetOne();
- if( x == one )
- {
- // LnSurrounding1(1) is 0
- SetZero();
- return;
- }
- two = 2;
- x1.Sub(one);
- x2.Add(one);
- x1.Div(x2);
- x2 = x1;
- x2.Mul(x1);
- denominator.SetOne();
- SetZero();
- old_value = *this;
- uint i;
- for(i=1 ; true ; ++i)
- #else
- // we begin from 1 in order to not test at the beginning
- for(i=1 ; i<=TTMATH_ARITHMETIC_MAX_LOOP ; ++i)
- #endif
- {
- bool testing = ((i & 3) == 0); // it means '(i % 4) == 0'
- next_part = x1;
- if( next_part.Div(denominator) )
- // if there is a carry here we only break the loop
- // however the result we return as good
- // it means there are too many parts of the formula
- break;
- // there shouldn't be a carry here
- Add(next_part);
- if( testing )
- {
- if( old_value == *this )
- // we've added next (step_test) parts of the formula but the result
- // is still the same then we break the loop
- break;
- else
- old_value = *this;
- }
- if( x1.Mul(x2) )
- // if there is a carry here the result we return as good
- break;
- if( denominator.Add(two) )
- break;
- }
- // this = this * 2
- // ( there can't be a carry here because we calculate the logarithm between <1,2) )
- exponent.AddOne();
- if( steps )
- *steps = i;
- }
- /*!
- Natural logarithm this = ln(x)
- (a logarithm with the base equal 'e')
- we're using the fact that our value is stored in form of:
- x = mantissa * 2^exponent
- then
- ln(x) = ln (mantissa * 2^exponent) = ln (mantissa) + (exponent * ln (2))
- the mantissa we'll show as a value from range <1,2) because the logarithm
- is decreasing too fast when 'x' is going to 0
- return values:
- 0 - ok
- 1 - overflow (carry)
- 2 - incorrect argument (x<=0)
- */
- uint Ln(const Big<exp,man> & x)
- {
- if( x.IsNan() )
- return CheckCarry(1);
- if( x.IsSign() || x.IsZero() )
- {
- SetNan();
- return 2;
- }
- Big<exp,man> exponent_temp;
- exponent_temp.FromInt( x.exponent );
- // m will be the value of the mantissa in range <1,2)
- Big<exp,man> m(x);
- m.exponent = -sint(man*TTMATH_BITS_PER_UINT - 1);
- // we must add 'man*TTMATH_BITS_PER_UINT-1' because we've taken it from the mantissa
- uint c = exponent_temp.Add(man*TTMATH_BITS_PER_UINT-1);
- LnSurrounding1(m);
- Big<exp,man> ln2;
- ln2.SetLn2();
- c += exponent_temp.Mul(ln2);
- c += Add(exponent_temp);
- return CheckCarry(c);
- }
- /*!
- Logarithm from 'x' with a 'base'
- we're using the formula:
- Log(x) with 'base' = ln(x) / ln(base)
- return values:
- 0 - ok
- 1 - overflow
- 2 - incorrect argument (x<=0)
- 3 - incorrect base (a<=0 lub a=1)
- */
- uint Log(const Big<exp,man> & x, const Big<exp,man> & base)
- {
- if( x.IsNan() || base.IsNan() )
- return CheckCarry(1);
- if( x.IsSign() || x.IsZero() )
- {
- SetNan();
- return 2;
- }
- Big<exp,man> denominator;;
- denominator.SetOne();
- if( base.IsSign() || base.IsZero() || base==denominator )
- {
- SetNan();
- return 3;
- }
- if( x == denominator ) // (this is: if x == 1)
- {
- // log(1) is 0
- SetZero();
- return 0;
- }
- // another error values we've tested at the beginning
- // there can only be a carry
- uint c = Ln(x);
- c += denominator.Ln(base);
- c += Div(denominator);
- return CheckCarry(c);
- }
- /*!
- *
- * converting methods
- *
- */
- /*!
- converting from another type of a Big object
- */
- template<uint another_exp, uint another_man>
- uint FromBig(const Big<another_exp, another_man> & another)
- {
- info = another.info;
- if( IsNan() )
- return 1;
- if( exponent.FromInt(another.exponent) )
- {
- SetNan();
- return 1;
- }
- uint man_len_min = (man < another_man)? man : another_man;
- uint i;
- uint c = 0;
- for( i = 0 ; i<man_len_min ; ++i )
- mantissa.table[man-1-i] = another.mantissa.table[another_man-1-i];
- for( ; i<man ; ++i )
- mantissa.table[man-1-i] = 0;
- // MS Visual Express 2005 reports a warning (in the lines with 'uint man_diff = ...'):
- // warning C4307: '*' : integral constant overflow
- // but we're using 'if( man > another_man )' and 'if( man < another_man )' and there'll be no such situation here
- #ifdef _MSC_VER
- #pragma warning( disable: 4307 )
- #endif
- if( man > another_man )
- {
- uint man_diff = (man - another_man) * TTMATH_BITS_PER_UINT;
- c += exponent.SubInt(man_diff, 0);
- }
- else
- if( man < another_man )
- {
- uint man_diff = (another_man - man) * TTMATH_BITS_PER_UINT;
- c += exponent.AddInt(man_diff, 0);
- }
- #ifdef _MSC_VER
- #pragma warning( default: 4307 )
- #endif
- // mantissa doesn't have to be standardized (either the highest bit is set or all bits are equal zero)
- CorrectZero();
- return CheckCarry(c);
- }
- /*!
- an auxiliary method for converting 'this' into 'result'
- if the value is too big this method returns a carry (1)
- */
- uint ToUIntOrInt(uint & result) const
- {
- result = 0;
- if( IsZero() )
- return 0;
- sint maxbit = -sint(man*TTMATH_BITS_PER_UINT);
- if( exponent > maxbit + sint(TTMATH_BITS_PER_UINT) )
- // if exponent > (maxbit + sint(TTMATH_BITS_PER_UINT)) the value can't be passed
- // into the 'sint' type (it's too big)
- return 1;
- if( exponent <= maxbit )
- // our value is from the range of (-1,1) and we return zero
- return 0;
- // exponent is from a range of (maxbit, maxbit + sint(TTMATH_BITS_PER_UINT) >
- // and [maxbit + sint(TTMATH_BITS_PER_UINT] <= 0
- sint how_many_bits = exponent.ToInt();
- // how_many_bits is negative, we'll make it positive
- how_many_bits = -how_many_bits;
- result = (mantissa.table[man-1] >> (how_many_bits % TTMATH_BITS_PER_UINT));
- return 0;
- }
- /*!
- this method converts 'this' into uint
- */
- uint ToUInt() const
- {
- uint result;
- ToUInt(result);
- return result;
- }
- /*!
- this method converts 'this' into 'result'
- if the value is too big this method returns a carry (1)
- */
- uint ToUInt(uint & result) const
- {
- if( ToUIntOrInt(result) )
- return 1;
- if( IsSign() )
- return 1;
- return 0;
- }
- /*!
- this method converts 'this' into sint
- */
- sint ToInt() const
- {
- sint result;
- ToInt(result);
- return result;
- }
- /*!
- this method converts 'this' into 'result'
- if the value is too big this method returns a carry (1)
- */
- uint ToInt(uint & result) const
- {
- return ToUInt(result);
- }
- /*!
- this method converts 'this' into 'result'
- if the value is too big this method returns a carry (1)
- */
- uint ToInt(sint & result) const
- {
- uint result_uint;
- uint c = ToUIntOrInt(result_uint);
- result = sint(result_uint);
- if( c )
- return 1;
- uint mask = 0;
- if( IsSign() )
- {
- result = -result;
- }
- return ((result & TTMATH_UINT_HIGHEST_BIT) == (mask & TTMATH_UINT_HIGHEST_BIT)) ? 0 : 1;
- }
- /*!
- an auxiliary method for converting 'this' into 'result'
- if the value is too big this method returns a carry (1)
- */
- template<uint int_size>
- uint ToUIntOrInt(UInt<int_size> & result) const
- {
- result.SetZero();
- if( IsZero() )
- return 0;
- sint maxbit = -sint(man*TTMATH_BITS_PER_UINT);
- if( exponent > maxbit + sint(int_size*TTMATH_BITS_PER_UINT) )
- // if exponent > (maxbit + sint(int_size*TTMATH_BITS_PER_UINT)) the value can't be passed
- // into the 'UInt<int_size>' type (it's too big)
- return 1;
- if( exponent <= maxbit )
- // our value is from range (-1,1) and we return zero
- return 0;
- sint how_many_bits = exponent.ToInt();
- if( how_many_bits < 0 )
- {
- how_many_bits = -how_many_bits;
- uint index = how_many_bits / TTMATH_BITS_PER_UINT;
- UInt<man> mantissa_temp(mantissa);
- mantissa_temp.Rcr( how_many_bits % TTMATH_BITS_PER_UINT, 0 );
- for(uint i=index, a=0 ; i<man ; ++i,++a)
- result.table[a] = mantissa_temp.table[i];
- }
- else
- {
- uint index = how_many_bits / TTMATH_BITS_PER_UINT;
- if( index + (man-1) < int_size )
- {
- // above 'if' is always true
- // this is only to get rid of a warning "warning: array subscript is above array bounds"
- // (from gcc)
- // we checked the condition there: "if( exponent > maxbit + sint(int_size*TTMATH_BITS_PER_UINT) )"
- // but gcc doesn't understand our types - exponent is Int<>
- for(uint i=0 ; i<man ; ++i)
- result.table[index+i] = mantissa.table[i];
- }
- result.Rcl( how_many_bits % TTMATH_BITS_PER_UINT, 0 );
- }
- return 0;
- }
- /*!
- this method converts 'this' into 'result'
- if the value is too big this method returns a carry (1)
- */
- template<uint int_size>
- uint ToUInt(UInt<int_size> & result) const
- {
- uint c = ToUIntOrInt(result);
- if( c )
- return 1;
- if( IsSign() )
- return 1;
- return 0;
- }
- /*!
- this method converts 'this' into 'result'
- if the value is too big this method returns a carry (1)
- */
- template<uint int_size>
- uint ToInt(UInt<int_size> & result) const
- {
- return ToUInt(result);
- }
- /*!
- this method converts 'this' into 'result'
- if the value is too big this method returns a carry (1)
- */
- template<uint int_size>
- uint ToInt(Int<int_size> & result) const
- {
- uint c = ToUIntOrInt(result);
- if( c )
- return 1;
- uint mask = 0;
- if( IsSign() )
- {
- result.ChangeSign();
- }
- return ((result.table[int_size-1] & TTMATH_UINT_HIGHEST_BIT) == (mask & TTMATH_UINT_HIGHEST_BIT))? 0 : 1;
- }
- /*!
- a method for converting 'uint' to this class
- */
- uint FromUInt(uint value)
- {
- if( value == 0 )
- {
- SetZero();
- return 0;
- }
- info = 0;
- for(uint i=0 ; i<man-1 ; ++i)
- mantissa.table[i] = 0;
- mantissa.table[man-1] = value;
- exponent = -sint(man-1) * sint(TTMATH_BITS_PER_UINT);
- // there shouldn't be a carry because 'value' has the 'uint' type
- Standardizing();
- return 0;
- }
- /*!
- a method for converting 'uint' to this class
- */
- uint FromInt(uint value)
- {
- return FromUInt(value);
- }
- /*!
- a method for converting 'sint' to this class
- */
- uint FromInt(sint value)
- {
- bool is_sign = false;
- if( value < 0 )
- {
- value = -value;
- is_sign = true;
- }
- FromUInt(uint(value));
- if( is_sign )
- SetSign();
- return 0;
- }
- /*!
- this method converts from standard double into this class
- standard double means IEEE-754 floating point value with 64 bits
- it is as follows (from http://www.psc.edu/general/software/packages/ieee/ieee.html):
- The IEEE double precision floating point standard representation requires
- a 64 bit word, which may be represented as numbered from 0 to 63, left to
- right. The first bit is the sign bit, S, the next eleven bits are the
- exponent bits, 'E', and the final 52 bits are the fraction 'F':
- 0 1 11 12 63
- The value V represented by the word may be determined as follows:
- * If E=2047 and F is nonzero, then V=NaN ("Not a number")
- * If E=2047 and F is zero and S is 1, then V=-Infinity
- * If E=2047 and F is zero and S is 0, then V=Infinity
- * If 0<E<2047 then V=(-1)**S * 2 ** (E-1023) * (1.F) where "1.F" is intended
- to represent the binary number created by prefixing F with an implicit
- leading 1 and a binary point.
- * If E=0 and F is nonzero, then V=(-1)**S * 2 ** (-1022) * (0.F) These are
- "unnormalized" values.
- * If E=0 and F is zero and S is 1, then V=-0
- * If E=0 and F is zero and S is 0, then V=0
- */
- uint FromDouble(double value)
- {
- // I am not sure what will be on a platform which has
- // a different endianness... but we use this library only
- // on x86 and amd (intel) 64 bits (as there's a lot of assembler code)
- union
- {
- double d;
- uint u[2]; // two 32bit words
- } temp;
- temp.d = value;
- sint e = ( temp.u[1] & 0x7FF00000u) >> 20;
- uint m1 = ((temp.u[1] & 0xFFFFFu) << 11) | (temp.u[0] >> 21);
- uint m2 = temp.u[0] << 11;
- if( e == 2047 )
- {
- // If E=2047 and F is nonzero, then V=NaN ("Not a number")
- // If E=2047 and F is zero and S is 1, then V=-Infinity
- // If E=2047 and F is zero and S is 0, then V=Infinity
- // we do not support -Infinity and +Infinity
- // we assume that there is always NaN
- SetNan();
- }
- else
- if( e > 0 )
- {
- // If 0<E<2047 then
- // V=(-1)**S * 2 ** (E-1023) * (1.F)
- // where "1.F" is intended to represent the binary number
- // created by prefixing F with an implicit leading 1 and a binary point.
- FromDouble_SetExpAndMan((temp.u[1] & 0x80000000u) != 0,
- e - 1023 - man*TTMATH_BITS_PER_UINT + 1, 0x80000000u,
- m1, m2);
- // we do not have to call Standardizing() here
- // because the mantissa will have the highest bit set
- }
- else
- {
- // e == 0
- if( m1 != 0 || m2 != 0 )
- {
- // If E=0 and F is nonzero,
- // then V=(-1)**S * 2 ** (-1022) * (0.F)
- // These are "unnormalized" values.
- UInt<2> m;
- m.table[1] = m1;
- m.table[0] = m2;
- uint moved = m.CompensationToLeft();
- FromDouble_SetExpAndMan((temp.u[1] & 0x80000000u) != 0,
- e - 1022 - man*TTMATH_BITS_PER_UINT + 1 - moved, 0,
- m.table[1], m.table[0]);
- }
- else
- {
- // If E=0 and F is zero and S is 1, then V=-0
- // If E=0 and F is zero and S is 0, then V=0
- // we do not support -0 or 0, only is one 0
- SetZero();
- }
- }
- return 0; // never be a carry
- }
- void FromDouble_SetExpAndMan(bool is_sign, int e, uint mhighest, uint m1, uint m2)
- {
- exponent = e;
- if( man > 1 )
- {
- mantissa.table[man-1] = m1 | mhighest;
- mantissa.table[sint(man-2)] = m2;
- // although man>1 we're using casting into sint
- // to get rid from a warning which generates Microsoft Visual:
- // warning C4307: '*' : integral constant overflow
- for(uint i=0 ; i<man-2 ; ++i)
- mantissa.table[i] = 0;
- }
- else
- {
- mantissa.table[0] = m1 | mhighest;
- }
- info = 0;
- // the value should be different from zero
- TTMATH_ASSERT( mantissa.IsZero() == false )
- if( is_sign )
- SetSign();
- }
- // 64bit platforms
- uint FromDouble(double value)
- {
- // I am not sure what will be on a plaltform which has
- // a different endianness... but we use this library only
- // on x86 and amd (intel) 64 bits (as there's a lot of assembler code)
- union
- {
- double d;
- uint u; // one 64bit word
- } temp;
- temp.d = value;
- sint e = (temp.u & 0x7FF0000000000000ul) >> 52;
- uint m = (temp.u & 0xFFFFFFFFFFFFFul) << 11;
- if( e == 2047 )
- {
- // If E=2047 and F is nonzero, then V=NaN ("Not a number")
- // If E=2047 and F is zero and S is 1, then V=-Infinity
- // If E=2047 and F is zero and S is 0, then V=Infinity
- // we do not support -Infinity and +Infinity
- // we assume that there is always NaN
- SetNan();
- }
- else
- if( e > 0 )
- {
- // If 0<E<2047 then
- // V=(-1)**S * 2 ** (E-1023) * (1.F)
- // where "1.F" is intended to represent the binary number
- // created by prefixing F with an implicit leading 1 and a binary point.
- FromDouble_SetExpAndMan((temp.u & 0x8000000000000000ul) != 0,
- e - 1023 - man*TTMATH_BITS_PER_UINT + 1,
- 0x8000000000000000ul, m);
- // we do not have to call Standardizing() here
- // because the mantissa will have the highest bit set
- }
- else
- {
- // e == 0
- if( m != 0 )
- {
- // If E=0 and F is nonzero,
- // then V=(-1)**S * 2 ** (-1022) * (0.F)
- // These are "unnormalized" values.
- FromDouble_SetExpAndMan(bool(temp.u & 0x8000000000000000ul),
- e - 1022 - man*TTMATH_BITS_PER_UINT + 1, 0, m);
- Standardizing();
- }
- else
- {
- // If E=0 and F is zero and S is 1, then V=-0
- // If E=0 and F is zero and S is 0, then V=0
- // we do not support -0 or 0, only is one 0
- SetZero();
- }
- }
- return 0; // never be a carry
- }
- void FromDouble_SetExpAndMan(bool is_sign, sint e, uint mhighest, uint m)
- {
- exponent = e;
- mantissa.table[man-1] = m | mhighest;
- for(uint i=0 ; i<man-1 ; ++i)
- mantissa.table[i] = 0;
- info = 0;
- // the value should be different from zero
- TTMATH_ASSERT( mantissa.IsZero() == false )
- if( is_sign )
- SetSign();
- }
- /*!
- this method converts from float to this class
- */
- uint FromFloat(float value)
- {
- return FromDouble(double(value));
- }
- /*!
- this method converts from this class into the 'double'
- if the value is too big:
- 'result' will be +/-infinity (depending on the sign)
- if the value is too small:
- 'result' will be 0
- */
- double ToDouble() const
- {
- double result;
- ToDouble(result);
- return result;
- }
- /*!
- an auxiliary method to check if the float value is +/-infinity
- we provide this method because isinf(float) in only in C99 language
- description taken from: http://www.psc.edu/general/software/packages/ieee/ieee.php
- The IEEE single precision floating point standard representation requires a 32 bit word,
- which may be represented as numbered from 0 to 31, left to right.
- The first bit is the sign bit, S, the next eight bits are the exponent bits, 'E',
- and the final 23 bits are the fraction 'F':
- 0 1 8 9 31
- The value V represented by the word may be determined as follows:
- * If E=255 and F is nonzero, then V=NaN ("Not a number")
- * If E=255 and F is zero and S is 1, then V=-Infinity
- * If E=255 and F is zero and S is 0, then V=Infinity
- * If 0<E<255 then V=(-1)**S * 2 ** (E-127) * (1.F) where "1.F" is intended to represent
- the binary number created by prefixing F with an implicit leading 1 and a binary point.
- * If E=0 and F is nonzero, then V=(-1)**S * 2 ** (-126) * (0.F) These are "unnormalized" values.
- * If E=0 and F is zero and S is 1, then V=-0
- * If E=0 and F is zero and S is 0, then V=0
- */
- bool IsInf(float value) const
- {
- // need testing on a 64 bit machine
- union
- {
- float d;
- uint u;
- } temp;
- temp.d = value;
- if( ((temp.u >> 23) & 0xff) == 0xff )
- {
- if( (temp.u & 0x7FFFFF) == 0 )
- return true; // +/- infinity
- }
- return false;
- }
- /*!
- this method converts from this class into the 'float'
- if the value is too big:
- 'result' will be +/-infinity (depending on the sign)
- if the value is too small:
- 'result' will be 0
- */
- float ToFloat() const
- {
- float result;
- ToFloat(result);
- return result;
- }
- /*!
- this method converts from this class into the 'float'
- if the value is too big:
- 'result' will be +/-infinity (depending on the sign)
- and the method returns 1
- if the value is too small:
- 'result' will be 0
- and the method returns 1
- */
- uint ToFloat(float & result) const
- {
- double result_double;
- uint c = ToDouble(result_double);
- result = float(result_double);
- if( result == -0.0f )
- result = 0.0f;
- if( c )
- return 1;
- // although the result_double can have a correct value
- // but after converting to float there can be infinity
- if( IsInf(result) )
- return 1;
- if( result == 0.0f && result_double != 0.0 )
- // result_double was too small for float
- return 1;
- return 0;
- }
- /*!
- this method converts from this class into the 'double'
- if the value is too big:
- 'result' will be +/-infinity (depending on the sign)
- and the method returns 1
- if the value is too small:
- 'result' will be 0
- and the method returns 1
- */
- uint ToDouble(double & result) const
- {
- if( IsZero() )
- {
- result = 0.0;
- return 0;
- }
- if( IsNan() )
- {
- result = ToDouble_SetDouble( false, 2047, 0, false, true);
- return 0;
- }
- sint e_correction = sint(man*TTMATH_BITS_PER_UINT) - 1;
- if( exponent >= 1024 - e_correction )
- {
- // +/- infinity
- result = ToDouble_SetDouble( IsSign(), 2047, 0, true);
- return 1;
- }
- else
- if( exponent <= -1023 - 52 - e_correction )
- {
- // too small value - we assume that there'll be a zero
- result = 0;
- // and return a carry
- return 1;
- }
- sint e = exponent.ToInt() + e_correction;
- if( e <= -1023 )
- {
- // -1023-52 < e <= -1023 (unnormalized value)
- result = ToDouble_SetDouble( IsSign(), 0, -(e + 1023));
- }
- else
- {
- // -1023 < e < 1024
- result = ToDouble_SetDouble( IsSign(), e + 1023, -1);
- }
- return 0;
- }
- // 32bit platforms
- double ToDouble_SetDouble(bool is_sign, uint e, sint move, bool infinity = false, bool nan = false) const
- {
- union
- {
- double d;
- uint u[2]; // two 32bit words
- } temp;
- temp.u[0] = temp.u[1] = 0;
- if( is_sign )
- temp.u[1] |= 0x80000000u;
- temp.u[1] |= (e << 20) & 0x7FF00000u;
- if( nan )
- {
- temp.u[0] |= 1;
- return temp.d;
- }
- if( infinity )
- return temp.d;
- UInt<2> m;
- m.table[1] = mantissa.table[man-1];
- m.table[0] = ( man > 1 ) ? mantissa.table[sint(man-2)] : 0;
- // although man>1 we're using casting into sint
- // to get rid from a warning which generates Microsoft Visual:
- // warning C4307: '*' : integral constant overflow
- m.Rcr( 12 + move );
- m.table[1] &= 0xFFFFFu; // cutting the 20 bit (when 'move' was -1)
- temp.u[1] |= m.table[1];
- temp.u[0] |= m.table[0];
- return temp.d;
- }
- // 64bit platforms
- double ToDouble_SetDouble(bool is_sign, uint e, sint move, bool infinity = false, bool nan = false) const
- {
- union
- {
- double d;
- uint u; // 64bit word
- } temp;
- temp.u = 0;
- if( is_sign )
- temp.u |= 0x8000000000000000ul;
- temp.u |= (e << 52) & 0x7FF0000000000000ul;
- if( nan )
- {
- temp.u |= 1;
- return temp.d;
- }
- if( infinity )
- return temp.d;
- uint m = mantissa.table[man-1];
- m >>= ( 12 + move );
- m &= 0xFFFFFFFFFFFFFul; // cutting the 20 bit (when 'move' was -1)
- temp.u |= m;
- return temp.d;
- }
- /*!
- an operator= for converting 'sint' to this class
- */
- Big<exp, man> & operator=(sint value)
- {
- FromInt(value);
- return *this;
- }
- /*!
- an operator= for converting 'uint' to this class
- */
- Big<exp, man> & operator=(uint value)
- {
- FromUInt(value);
- return *this;
- }
- /*!
- an operator= for converting 'float' to this class
- */
- Big<exp, man> & operator=(float value)
- {
- FromFloat(value);
- return *this;
- }
- /*!
- an operator= for converting 'double' to this class
- */
- Big<exp, man> & operator=(double value)
- {
- FromDouble(value);
- return *this;
- }
- /*!
- a constructor for converting 'sint' to this class
- */
- Big(sint value)
- {
- FromInt(value);
- }
- /*!
- a constructor for converting 'uint' to this class
- */
- Big(uint value)
- {
- FromUInt(value);
- }
- /*!
- a constructor for converting 'double' to this class
- */
- Big(double value)
- {
- FromDouble(value);
- }
- /*!
- a constructor for converting 'float' to this class
- */
- Big(float value)
- {
- FromFloat(value);
- }
- /*!
- this method converts 'this' into 'result' (64 bit unsigned integer)
- if the value is too big this method returns a carry (1)
- */
- uint ToUInt(ulint & result) const
- {
- UInt<2> temp; // 64 bits container
- uint c = ToUInt(temp);
- temp.ToUInt(result);
- return c;
- }
- /*!
- this method converts 'this' into 'result' (64 bit unsigned integer)
- if the value is too big this method returns a carry (1)
- */
- uint ToInt(ulint & result) const
- {
- return ToUInt(result);
- }
- /*!
- this method converts 'this' into 'result' (64 bit unsigned integer)
- if the value is too big this method returns a carry (1)
- */
- uint ToInt(slint & result) const
- {
- Int<2> temp; // 64 bits container
- uint c = ToInt(temp);
- temp.ToInt(result);
- return c;
- }
- /*!
- a method for converting 'ulint' (64bit unsigned integer) to this class
- */
- uint FromUInt(ulint value)
- {
- if( value == 0 )
- {
- SetZero();
- return 0;
- }
- info = 0;
- if( man == 1 )
- {
- sint bit = mantissa.FindLeadingBitInWord(uint(value >> TTMATH_BITS_PER_UINT));
- if( bit != -1 )
- {
- // the highest word from value is different from zero
- bit += 1;
- value >>= bit;
- exponent = bit;
- }
- else
- {
- exponent.SetZero();
- }
- mantissa.table[0] = uint(value);
- }
- else
- {
- #ifdef _MSC_VER
- //warning C4307: '*' : integral constant overflow
- #pragma warning( disable: 4307 )
- #endif
- // man >= 2
- mantissa.table[man-1] = uint(value >> TTMATH_BITS_PER_UINT);
- mantissa.table[man-2] = uint(value);
- #ifdef _MSC_VER
- //warning C4307: '*' : integral constant overflow
- #pragma warning( default: 4307 )
- #endif
- exponent = -sint(man-2) * sint(TTMATH_BITS_PER_UINT);
- for(uint i=0 ; i<man-2 ; ++i)
- mantissa.table[i] = 0;
- }
- // there shouldn't be a carry because 'value' has the 'ulint' type
- // (we have sufficient exponent)
- Standardizing();
- return 0;
- }
- /*!
- a method for converting 'ulint' (64bit unsigned integer) to this class
- */
- uint FromInt(ulint value)
- {
- return FromUInt(value);
- }
- /*!
- a method for converting 'slint' (64bit signed integer) to this class
- */
- uint FromInt(slint value)
- {
- bool is_sign = false;
- if( value < 0 )
- {
- value = -value;
- is_sign = true;
- }
- FromUInt(ulint(value));
- if( is_sign )
- SetSign();
- return 0;
- }
- /*!
- a constructor for converting 'ulint' (64bit unsigned integer) to this class
- */
- Big(ulint value)
- {
- FromUInt(value);
- }
- /*!
- an operator for converting 'ulint' (64bit unsigned integer) to this class
- */
- Big<exp, man> & operator=(ulint value)
- {
- FromUInt(value);
- return *this;
- }
- /*!
- a constructor for converting 'slint' (64bit signed integer) to this class
- */
- Big(slint value)
- {
- FromInt(value);
- }
- /*!
- an operator for converting 'slint' (64bit signed integer) to this class
- */
- Big<exp, man> & operator=(slint value)
- {
- FromInt(value);
- return *this;
- }
- /*!
- this method converts 'this' into 'result' (32 bit unsigned integer)
- ***this method is created only on a 64bit platform***
- if the value is too big this method returns a carry (1)
- */
- uint ToUInt(unsigned int & result) const
- {
- uint result_uint;
- uint c = ToUInt(result_uint);
- result = (unsigned int)result_uint;
- if( c || result_uint != uint(result) )
- return 1;
- return 0;
- }
- /*!
- this method converts 'this' into 'result' (32 bit unsigned integer)
- ***this method is created only on a 64bit platform***
- if the value is too big this method returns a carry (1)
- */
- uint ToInt(unsigned int & result) const
- {
- return ToUInt(result);
- }
- /*!
- this method converts 'this' into 'result' (32 bit signed integer)
- ***this method is created only on a 64bit platform***
- if the value is too big this method returns a carry (1)
- */
- uint ToInt(signed int & result) const
- {
- sint result_sint;
- uint c = ToInt(result_sint);
- result = (signed int)result_sint;
- if( c || result_sint != sint(result) )
- return 1;
- return 0;
- }
- /*
- this method converts 32 bit unsigned int to this class
- ***this method is created only on a 64bit platform***
- */
- uint FromUInt(unsigned int value)
- {
- return FromUInt(uint(value));
- }
- /*
- this method converts 32 bit unsigned int to this class
- ***this method is created only on a 64bit platform***
- */
- uint FromInt(unsigned int value)
- {
- return FromUInt(uint(value));
- }
- /*
- this method converts 32 bit signed int to this class
- ***this method is created only on a 64bit platform***
- */
- uint FromInt(signed int value)
- {
- return FromInt(sint(value));
- }
- /*!
- an operator= for converting 32 bit unsigned int to this class
- ***this operator is created only on a 64bit platform***
- */
- Big<exp, man> & operator=(unsigned int value)
- {
- FromUInt(value);
- return *this;
- }
- /*!
- a constructor for converting 32 bit unsigned int to this class
- ***this constructor is created only on a 64bit platform***
- */
- Big(unsigned int value)
- {
- FromUInt(value);
- }
- /*!
- an operator for converting 32 bit signed int to this class
- ***this operator is created only on a 64bit platform***
- */
- Big<exp, man> & operator=(signed int value)
- {
- FromInt(value);
- return *this;
- }
- /*!
- a constructor for converting 32 bit signed int to this class
- ***this constructor is created only on a 64bit platform***
- */
- Big(signed int value)
- {
- FromInt(value);
- }
- /*!
- an auxiliary method for converting from UInt and Int
- we assume that there'll never be a carry here
- (we have an exponent and the value in Big can be bigger than
- that one from the UInt)
- */
- template<uint int_size>
- uint FromUIntOrInt(const UInt<int_size> & value, sint compensation)
- {
- uint minimum_size = (int_size < man)? int_size : man;
- exponent = (sint(int_size)-sint(man)) * sint(TTMATH_BITS_PER_UINT) - compensation;
- // copying the highest words
- uint i;
- for(i=1 ; i<=minimum_size ; ++i)
- mantissa.table[man-i] = value.table[int_size-i];
- // setting the rest of mantissa.table into zero (if some has left)
- for( ; i<=man ; ++i)
- mantissa.table[man-i] = 0;
- // the highest bit is either one or zero (when the whole mantissa is zero)
- // we can only call CorrectZero()
- CorrectZero();
- return 0;
- }
- /*!
- a method for converting from 'UInt<int_size>' to this class
- */
- template<uint int_size>
- uint FromUInt(UInt<int_size> value)
- {
- info = 0;
- sint compensation = (sint)value.CompensationToLeft();
- return FromUIntOrInt(value, compensation);
- }
- /*!
- a method for converting from 'UInt<int_size>' to this class
- */
- template<uint int_size>
- uint FromInt(const UInt<int_size> & value)
- {
- return FromUInt(value);
- }
- /*!
- a method for converting from 'Int<int_size>' to this class
- */
- template<uint int_size>
- uint FromInt(Int<int_size> value)
- {
- info = 0;
- bool is_sign = false;
- if( value.IsSign() )
- {
- value.ChangeSign();
- is_sign = true;
- }
- sint compensation = (sint)value.CompensationToLeft();
- FromUIntOrInt(value, compensation);
- if( is_sign )
- SetSign();
- return 0;
- }
- /*!
- an operator= for converting from 'Int<int_size>' to this class
- */
- template<uint int_size>
- Big<exp,man> & operator=(const Int<int_size> & value)
- {
- FromInt(value);
- return *this;
- }
- /*!
- a constructor for converting from 'Int<int_size>' to this class
- */
- template<uint int_size>
- Big(const Int<int_size> & value)
- {
- FromInt(value);
- }
- /*!
- an operator= for converting from 'UInt<int_size>' to this class
- */
- template<uint int_size>
- Big<exp,man> & operator=(const UInt<int_size> & value)
- {
- FromUInt(value);
- return *this;
- }
- /*!
- a constructor for converting from 'UInt<int_size>' to this class
- */
- template<uint int_size>
- Big(const UInt<int_size> & value)
- {
- FromUInt(value);
- }
- /*!
- an operator= for converting from 'Big<another_exp, another_man>' to this class
- */
- template<uint another_exp, uint another_man>
- Big<exp,man> & operator=(const Big<another_exp, another_man> & value)
- {
- FromBig(value);
- return *this;
- }
- /*!
- a constructor for converting from 'Big<another_exp, another_man>' to this class
- */
- template<uint another_exp, uint another_man>
- Big(const Big<another_exp, another_man> & value)
- {
- FromBig(value);
- }
- /*!
- a default constructor
- by default we don't set any of the members to zero
- only NaN flag is set
- if you want the mantissa and exponent to be set to zero
- (useful for debug purposes)
- */
- Big()
- {
- SetZeroNan();
- #else
- info = TTMATH_BIG_NAN;
- // we're directly setting 'info' (instead of calling SetNan())
- // in order to get rid of a warning saying that 'info' is uninitialized
- #endif
- }
- /*!
- a destructor
- */
- ~Big()
- {
- }
- /*!
- the default assignment operator
- */
- Big<exp,man> & operator=(const Big<exp,man> & value)
- {
- info = value.info;
- exponent = value.exponent;
- mantissa = value.mantissa;
- return *this;
- }
- /*!
- a constructor for copying from another object of this class
- */
- Big(const Big<exp,man> & value)
- {
- operator=(value);
- }
- /*!
- a method for converting into a string
- struct Conv is defined in ttmathtypes.h, look there for more information about parameters
- output:
- return value:
- 0 - ok and 'result' will be an object of type std::string (or std::wstring) which holds the value
- 1 - if there is a carry (it shoudn't be in a normal situation - if it is that means there
- is somewhere an error in the library)
- */
- uint ToString( std::string & result,
- uint base = 10,
- bool scient = false,
- sint scient_from = 15,
- sint round = -1,
- bool trim_zeroes = true,
- char comma = '.' ) const
- {
- Conv conv;
- conv.base = base;
- conv.scient = scient;
- conv.scient_from = scient_from;
- conv.round = round;
- conv.trim_zeroes = trim_zeroes;
- conv.comma = static_cast<uint>(comma);
- return ToStringBase<std::string, char>(result, conv);
- }
- /*!
- a method for converting into a string
- struct Conv is defined in ttmathtypes.h, look there for more information about parameters
- */
- uint ToString(std::string & result, const Conv & conv) const
- {
- return ToStringBase<std::string, char>(result, conv);
- }
- /*!
- a method for converting into a string
- struct Conv is defined in ttmathtypes.h, look there for more information about parameters
- */
- std::string ToString(const Conv & conv) const
- {
- std::string result;
- ToStringBase<std::string, char>(result, conv);
- return result;
- }
- /*!
- a method for converting into a string
- struct Conv is defined in ttmathtypes.h, look there for more information about parameters
- */
- std::string ToString(uint base = 10) const
- {
- Conv conv;
- conv.base = base;
- return ToString(conv);
- }
- /*!
- a method for converting into a string
- struct Conv is defined in ttmathtypes.h, look there for more information about parameters
- */
- uint ToString( std::wstring & result,
- uint base = 10,
- bool scient = false,
- sint scient_from = 15,
- sint round = -1,
- bool trim_zeroes = true,
- wchar_t comma = '.' ) const
- {
- Conv conv;
- conv.base = base;
- conv.scient = scient;
- conv.scient_from = scient_from;
- conv.round = round;
- conv.trim_zeroes = trim_zeroes;
- conv.comma = static_cast<uint>(comma);
- return ToStringBase<std::wstring, wchar_t>(result, conv);
- }
- /*!
- a method for converting into a string
- struct Conv is defined in ttmathtypes.h, look there for more information about parameters
- */
- uint ToString(std::wstring & result, const Conv & conv) const
- {
- return ToStringBase<std::wstring, wchar_t>(result, conv);
- }
- /*!
- a method for converting into a string
- struct Conv is defined in ttmathtypes.h, look there for more information about parameters
- */
- std::wstring ToWString(const Conv & conv) const
- {
- std::wstring result;
- ToStringBase<std::wstring, wchar_t>(result, conv);
- return result;
- }
- /*!
- a method for converting into a string
- struct Conv is defined in ttmathtypes.h, look there for more information about parameters
- */
- std::wstring ToWString(uint base = 10) const
- {
- Conv conv;
- conv.base = base;
- return ToWString(conv);
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- uint ToStringBase(string_type & result, const Conv & conv) const
- {
- static char error_overflow_msg[] = "overflow";
- static char error_nan_msg[] = "NaN";
- result.erase();
- if( IsNan() )
- {
- Misc::AssignString(result, error_nan_msg);
- return 0;
- }
- if( conv.base<2 || conv.base>16 )
- {
- Misc::AssignString(result, error_overflow_msg);
- return 1;
- }
- if( IsZero() )
- {
- result = '0';
- return 0;
- }
- /*
- since 'base' is greater or equal 2 that 'new_exp' of type 'Int<exp>' should
- hold the new value of exponent but we're using 'Int<exp+1>' because
- if the value for example would be 'max()' then we couldn't show it
- max() -> 11111111 * 2 ^ 11111111111 (bin)(the mantissa and exponent have all bits set)
- if we were using 'Int<exp>' we couldn't show it in this format:
- 1,1111111 * 2 ^ 11111111111 (bin)
- because we have to add something to the mantissa and because
- mantissa is full we can't do it and it'll be a carry
- (look at ToString_SetCommaAndExponent(...))
- when the base would be greater than two (for example 10)
- we could use 'Int<exp>' here
- */
- Int<exp+1> new_exp;
- if( ToString_CreateNewMantissaAndExponent<string_type, char_type>(result, conv, new_exp) )
- {
- Misc::AssignString(result, error_overflow_msg);
- return 1;
- }
- if( ToString_SetCommaAndExponent<string_type, char_type>(result, conv, new_exp) )
- {
- Misc::AssignString(result, error_overflow_msg);
- return 1;
- }
- if( IsSign() )
- result.insert(result.begin(), '-');
- // converted successfully
- return 0;
- }
- /*!
- in the method 'ToString_CreateNewMantissaAndExponent()' we're using
- type 'Big<exp+1,man>' and we should have the ability to use some
- necessary methods from that class (methods which are private here)
- */
- friend class Big<exp-1,man>;
- /*!
- an auxiliary method for converting into the string
- input:
- base - the base in range <2,16>
- output:
- return values:
- 0 - ok
- 1 - if there was a carry
- new_man - the new mantissa for 'base'
- new_exp - the new exponent for 'base'
- mathematic part:
- the value is stored as:
- value = mantissa * 2^exponent
- we want to show 'value' as:
- value = new_man * base^new_exp
- then 'new_man' we'll print using the standard method from UInt<> type for printing
- and 'new_exp' is the offset of the comma operator in a system of a base 'base'
- value = mantissa * 2^exponent
- value = mantissa * 2^exponent * (base^new_exp / base^new_exp)
- value = mantissa * (2^exponent / base^new_exp) * base^new_exp
- look at the part (2^exponent / base^new_exp), there'll be good if we take
- a 'new_exp' equal that value when the (2^exponent / base^new_exp) will be equal one
- on account of the 'base' is not as power of 2 (can be from 2 to 16),
- this formula will not be true for integer 'new_exp' then in our case we take
- 'base^new_exp' _greater_ than '2^exponent'
- if 'base^new_exp' were smaller than '2^exponent' the new mantissa could be
- greater than the max value of the container UInt<man>
- value = mantissa * (2^exponent / base^new_exp) * base^new_exp
- let M = mantissa * (2^exponent / base^new_exp) then
- value = M * base^new_exp
- in our calculation we treat M as floating value showing it as:
- M = mm * 2^ee where ee will be <= 0
- next we'll move all bits of mm into the right when ee is equal zero
- abs(ee) must not be too big that only few bits from mm we can leave
- then we'll have:
- M = mmm * 2^0
- 'mmm' is the new_man which we're looking for
- new_exp we calculate in this way:
- 2^exponent <= base^new_exp
- new_exp >= log base (2^exponent) <- logarithm with the base 'base' from (2^exponent)
- but we need new_exp as integer then we test:
- if new_exp is greater than zero and with fraction we add one to new_exp
- new_exp = new_exp + 1 (if new_exp>0 and with fraction)
- and at the end we take the integer part:
- new_exp = int(new_exp)
- */
- template<class string_type, class char_type>
- uint ToString_CreateNewMantissaAndExponent( string_type & new_man, const Conv & conv,
- Int<exp+1> & new_exp) const
- {
- uint c = 0;
- if( conv.base<2 || conv.base>16 )
- return 1;
- // special method for base equal 2
- if( conv.base == 2 )
- return ToString_CreateNewMantissaAndExponent_Base2(new_man, new_exp);
- // special method for base equal 4
- if( conv.base == 4 )
- return ToString_CreateNewMantissaAndExponent_BasePow2(new_man, new_exp, 2);
- // special method for base equal 8
- if( conv.base == 8 )
- return ToString_CreateNewMantissaAndExponent_BasePow2(new_man, new_exp, 3);
- // special method for base equal 16
- if( conv.base == 16 )
- return ToString_CreateNewMantissaAndExponent_BasePow2(new_man, new_exp, 4);
- // this = mantissa * 2^exponent
- // temp = +1 * 2^exponent
- // we're using a bigger type than 'big<exp,man>' (look below)
- Big<exp+1,man> temp;
- temp.info = 0;
- temp.exponent = exponent;
- temp.mantissa.SetOne();
- c += temp.Standardizing();
- // new_exp_ = log base (2^exponent)
- // if new_exp_ is positive and with fraction then we add one
- Big<exp+1,man> new_exp_;
- c += new_exp_.ToString_Log(temp, conv.base); // this logarithm isn't very complicated
- // rounding up to the nearest integer
- if( !new_exp_.IsInteger() )
- {
- if( !new_exp_.IsSign() )
- c += new_exp_.AddOne(); // new_exp_ > 0 and with fraction
- new_exp_.SkipFraction();
- }
- if( ToString_CreateNewMantissaTryExponent<string_type, char_type>(new_man, conv, new_exp_, new_exp) )
- {
- // in very rare cases there can be an overflow from ToString_CreateNewMantissaTryExponent
- // it means that new_exp_ was too small (the problem comes from floating point numbers precision)
- // so we increse new_exp_ and try again
- new_exp_.AddOne();
- c += ToString_CreateNewMantissaTryExponent<string_type, char_type>(new_man, conv, new_exp_, new_exp);
- }
- return (c==0)? 0 : 1;
- }
- /*!
- an auxiliary method for converting into the string
- trying to calculate new_man for given exponent (new_exp_)
- if there is a carry it can mean that new_exp_ is too small
- */
- template<class string_type, class char_type>
- uint ToString_CreateNewMantissaTryExponent( string_type & new_man, const Conv & conv,
- const Big<exp+1,man> & new_exp_, Int<exp+1> & new_exp) const
- {
- uint c = 0;
- // because 'base^new_exp' is >= '2^exponent' then
- // because base is >= 2 then we've got:
- // 'new_exp_' must be smaller or equal 'new_exp'
- // and we can pass it into the Int<exp> type
- // (in fact we're using a greater type then it'll be ok)
- c += new_exp_.ToInt(new_exp);
- // base_ = base
- Big<exp+1,man> base_(conv.base);
- // base_ = base_ ^ new_exp_
- c += base_.Pow( new_exp_ ); // use new_exp_ so Pow(Big<> &) version will be used
- // if we hadn't used a bigger type than 'Big<exp,man>' then the result
- // of this formula 'Pow(...)' would have been with an overflow
- // temp = mantissa * 2^exponent / base_^new_exp_
- Big<exp+1,man> temp;
- temp.info = 0;
- temp.mantissa = mantissa;
- temp.exponent = exponent;
- c += temp.Div(base_);
- // moving all bits of the mantissa into the right
- // (how many times to move depend on the exponent)
- c += temp.ToString_MoveMantissaIntoRight();
- // because we took 'new_exp' as small as it was
- // possible ([log base (2^exponent)] + 1) that after the division
- // (temp.Div( base_ )) the value of exponent should be equal zero or
- // minimum smaller than zero then we've got the mantissa which has
- // maximum valid bits
- temp.mantissa.ToString(new_man, conv.base);
- if( IsInteger() )
- {
- // making sure the new mantissa will be without fraction (integer)
- ToString_CheckMantissaInteger<string_type, char_type>(new_man, new_exp);
- }
- else
- if( conv.base_round )
- {
- c += ToString_BaseRound<string_type, char_type>(new_man, conv, new_exp);
- }
- return (c==0)? 0 : 1;
- }
- /*!
- this method calculates the logarithm
- it is used by ToString_CreateNewMantissaAndExponent() method
- it's not too complicated
- because x=+1*2^exponent (mantissa is one) then during the calculation
- the Ln(x) will not be making the long formula from LnSurrounding1()
- and only we have to calculate 'Ln(base)' but it'll be calculated
- only once, the next time we will get it from the 'history'
- x is greater than 0
- base is in <2,16> range
- */
- uint ToString_Log(const Big<exp,man> & x, uint base)
- {
- TTMATH_ASSERT( base>=2 && base<=16 )
- Big<exp,man> temp;
- temp.SetOne();
- if( x == temp )
- {
- // log(1) is 0
- SetZero();
- return 0;
- }
- // there can be only a carry
- // because the 'x' is in '1+2*exponent' form then
- // the long formula from LnSurrounding1() will not be calculated
- // (LnSurrounding1() will return one immediately)
- uint c = Ln(x);
- if( base==10 && man<=TTMATH_BUILTIN_VARIABLES_SIZE )
- {
- // for the base equal 10 we're using SetLn10() instead of calculating it
- // (only if we have the constant sufficient big)
- temp.SetLn10();
- }
- else
- {
- c += ToString_LogBase(base, temp);
- }
- c += Div( temp );
- return (c==0)? 0 : 1;
- }
- /*!
- this method calculates the logarithm of 'base'
- it's used in single thread environment
- */
- uint ToString_LogBase(uint base, Big<exp,man> & result)
- {
- TTMATH_ASSERT( base>=2 && base<=16 )
- // this guardians are initialized before the program runs (static POD types)
- static int guardians[15] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- static Big<exp,man> log_history[15];
- uint index = base - 2;
- uint c = 0;
- if( guardians[index] == 0 )
- {
- Big<exp,man> base_(base);
- c += log_history[index].Ln(base_);
- guardians[index] = 1;
- }
- result = log_history[index];
- return (c==0)? 0 : 1;
- }
- /*!
- this method calculates the logarithm of 'base'
- it's used in multi-thread environment
- */
- uint ToString_LogBase(uint base, Big<exp,man> & result)
- {
- TTMATH_ASSERT( base>=2 && base<=16 )
- // this guardians are initialized before the program runs (static POD types)
- volatile static sig_atomic_t guardians[15] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
- static Big<exp,man> * plog_history;
- uint index = base - 2;
- uint c = 0;
- // double-checked locking
- if( guardians[index] == 0 )
- {
- ThreadLock thread_lock;
- // locking
- if( thread_lock.Lock() )
- {
- static Big<exp,man> log_history[15];
- if( guardians[index] == 0 )
- {
- plog_history = log_history;
- Big<exp,man> base_(base);
- c += log_history[index].Ln(base_);
- guardians[index] = 1;
- }
- }
- else
- {
- // there was a problem with locking, we store the result directly in 'result' object
- Big<exp,man> base_(base);
- c += result.Ln(base_);
- return (c==0)? 0 : 1;
- }
- // automatically unlocking
- }
- result = plog_history[index];
- return (c==0)? 0 : 1;
- }
- /*!
- an auxiliary method for converting into the string (private)
- this method moving all bits from mantissa into the right side
- the exponent tell us how many times moving (the exponent is <=0)
- */
- uint ToString_MoveMantissaIntoRight()
- {
- if( exponent.IsZero() )
- return 0;
- // exponent can't be greater than zero
- // because we would cat the highest bits of the mantissa
- if( !exponent.IsSign() )
- return 1;
- if( exponent <= -sint(man*TTMATH_BITS_PER_UINT) )
- // if 'exponent' is <= than '-sint(man*TTMATH_BITS_PER_UINT)'
- // it means that we must cut the whole mantissa
- // (there'll not be any of the valid bits)
- return 1;
- // e will be from (-man*TTMATH_BITS_PER_UINT, 0>
- sint e = -( exponent.ToInt() );
- mantissa.Rcr(e,0);
- return 0;
- }
- /*!
- a special method similar to the 'ToString_CreateNewMantissaAndExponent'
- when the 'base' is equal 2
- we use it because if base is equal 2 we don't have to make those
- complicated calculations and the output is directly from the source
- (there will not be any small distortions)
- */
- template<class string_type>
- uint ToString_CreateNewMantissaAndExponent_Base2( string_type & new_man,
- Int<exp+1> & new_exp ) const
- {
- for( sint i=man-1 ; i>=0 ; --i )
- {
- uint value = mantissa.table[i];
- for( uint bit=0 ; bit<TTMATH_BITS_PER_UINT ; ++bit )
- {
- if( (value & TTMATH_UINT_HIGHEST_BIT) != 0 )
- new_man += '1';
- else
- new_man += '0';
- value <<= 1;
- }
- }
- new_exp = exponent;
- return 0;
- }
- /*!
- a special method used to calculate the new mantissa and exponent
- when the 'base' is equal 4, 8 or 16
- when base is 4 then bits is 2
- when base is 8 then bits is 3
- when base is 16 then bits is 4
- (and the algorithm can be used with a base greater than 16)
- */
- template<class string_type>
- uint ToString_CreateNewMantissaAndExponent_BasePow2( string_type & new_man,
- Int<exp+1> & new_exp,
- uint bits) const
- {
- sint move; // how many times move the mantissa
- UInt<man+1> man_temp(mantissa); // man+1 for moving
- new_exp = exponent;
- new_exp.DivInt((sint)bits, move);
- if( move != 0 )
- {
- // we're moving the man_temp to left-hand side
- if( move < 0 )
- {
- move = sint(bits) + move;
- new_exp.SubOne(); // when move is < than 0 then new_exp is < 0 too
- }
- man_temp.Rcl(move);
- }
- if( bits == 3 )
- {
- // base 8
- // now 'move' is greater than or equal 0
- uint len = man*TTMATH_BITS_PER_UINT + move;
- return ToString_CreateNewMantissaAndExponent_Base8(new_man, man_temp, len, bits);
- }
- else
- {
- // base 4 or 16
- return ToString_CreateNewMantissaAndExponent_Base4or16(new_man, man_temp, bits);
- }
- }
- /*!
- a special method used to calculate the new mantissa
- when the 'base' is equal 8
- bits is always 3
- we can use this algorithm when the base is 4 or 16 too
- but we have a faster method ToString_CreateNewMantissaAndExponent_Base4or16()
- */
- template<class string_type>
- uint ToString_CreateNewMantissaAndExponent_Base8( string_type & new_man,
- UInt<man+1> & man_temp,
- uint len,
- uint bits) const
- {
- uint shift = TTMATH_BITS_PER_UINT - bits;
- uint mask = TTMATH_UINT_MAX_VALUE >> shift;
- uint i;
- for( i=0 ; i<len ; i+=bits )
- {
- uint digit = man_temp.table[0] & mask;
- new_man.insert(new_man.begin(), static_cast<char>(Misc::DigitToChar(digit)));
- man_temp.Rcr(bits);
- }
- TTMATH_ASSERT( man_temp.IsZero() )
- return 0;
- }
- /*!
- a special method used to calculate the new mantissa
- when the 'base' is equal 4 or 16
- when the base is equal 4 or 16 the bits is 2 or 4
- and because TTMATH_BITS_PER_UINT (32 or 64) is divisible by 2 (or 4)
- then we can get digits from the end of our mantissa
- */
- template<class string_type>
- uint ToString_CreateNewMantissaAndExponent_Base4or16( string_type & new_man,
- UInt<man+1> & man_temp,
- uint bits) const
- {
- uint shift = TTMATH_BITS_PER_UINT - bits;
- uint mask = TTMATH_UINT_MAX_VALUE << shift;
- uint digit;
- // table[man] - last word - is different from zero if we moved man_temp
- digit = man_temp.table[man];
- if( digit != 0 )
- new_man += static_cast<char>(Misc::DigitToChar(digit));
- for( int i=man-1 ; i>=0 ; --i )
- {
- uint shift_local = shift;
- uint mask_local = mask;
- while( mask_local != 0 )
- {
- digit = man_temp.table[i] & mask_local;
- if( shift_local != 0 )
- digit = digit >> shift_local;
- new_man += static_cast<char>(Misc::DigitToChar(digit));
- mask_local = mask_local >> bits;
- shift_local = shift_local - bits;
- }
- }
- return 0;
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- bool ToString_RoundMantissaWouldBeInteger(string_type & new_man, const Conv & conv, Int<exp+1> & new_exp) const
- {
- // if new_exp is greater or equal to zero then we have an integer value,
- // if new_exp is equal -1 then we have only one digit after the comma
- // and after rounding it would be an integer value
- if( !new_exp.IsSign() || new_exp == -1 )
- return true;
- if( new_man.size() >= TTMATH_UINT_HIGHEST_BIT || new_man.size() < 2 )
- return true; // oops, the mantissa is too large for calculating (or too small) - we are not doing the base rounding
- uint i = 0;
- char_type digit;
- if( new_exp >= -sint(new_man.size()) )
- {
- uint new_exp_abs = -new_exp.ToInt();
- i = new_man.size() - new_exp_abs; // start from the first digit after the comma operator
- }
- if( Misc::CharToDigit(new_man[new_man.size()-1]) >= conv.base/2 )
- {
- if( new_exp < -sint(new_man.size()) )
- {
- // there are some zeroes after the comma operator
- // (between the comma and the first digit from the mantissa)
- // and the result value will never be an integer
- return false;
- }
- digit = static_cast<char_type>( Misc::DigitToChar(conv.base-1) );
- }
- else
- {
- digit = '0';
- }
- for( ; i < new_man.size()-1 ; ++i)
- if( new_man[i] != digit )
- return false; // it will not be an integer
- return true; // it will be integer after rounding
- }
- /*!
- an auxiliary method for converting into the string
- (when this is integer)
- after floating point calculating the new mantissa can consist of some fraction
- so if our value is integer we should check the new mantissa
- (after the decimal point there should be only zeroes)
- often this is a last digit different from zero
- ToString_BaseRound would not get rid of it because the method make a test against
- an integer value (ToString_RoundMantissaWouldBeInteger) and returns immediately
- */
- template<class string_type, class char_type>
- void ToString_CheckMantissaInteger(string_type & new_man, const Int<exp+1> & new_exp) const
- {
- if( !new_exp.IsSign() )
- return; // return if new_exp >= 0
- uint i = 0;
- uint man_size = new_man.size();
- if( man_size >= TTMATH_UINT_HIGHEST_BIT )
- return; // ops, the mantissa is too long
- sint sman_size = -sint(man_size);
- if( new_exp >= sman_size )
- {
- sint e = new_exp.ToInt();
- e = -e;
- // now e means how many last digits from the mantissa should be equal zero
- i = man_size - uint(e);
- }
- for( ; i<man_size ; ++i)
- new_man[i] = '0';
- }
- /*!
- an auxiliary method for converting into the string
- this method is used for base!=2, base!=4, base!=8 and base!=16
- we do the rounding when the value has fraction (is not an integer)
- */
- template<class string_type, class char_type>
- uint ToString_BaseRound(string_type & new_man, const Conv & conv, Int<exp+1> & new_exp) const
- {
- // we must have minimum two characters
- if( new_man.size() < 2 )
- return 0;
- // assert that there will not be an integer after rounding
- if( ToString_RoundMantissaWouldBeInteger<string_type, char_type>(new_man, conv, new_exp) )
- return 0;
- typename string_type::size_type i = new_man.length() - 1;
- // we're erasing the last character
- uint digit = Misc::CharToDigit( new_man[i] );
- new_man.erase(i, 1);
- uint c = new_exp.AddOne();
- // if the last character is greater or equal 'base/2'
- // we are adding one into the new mantissa
- if( digit >= conv.base / 2 )
- ToString_RoundMantissa_AddOneIntoMantissa<string_type, char_type>(new_man, conv);
- return c;
- }
- /*!
- an auxiliary method for converting into the string
- this method addes one into the new mantissa
- */
- template<class string_type, class char_type>
- void ToString_RoundMantissa_AddOneIntoMantissa(string_type & new_man, const Conv & conv) const
- {
- if( new_man.empty() )
- return;
- sint i = sint( new_man.length() ) - 1;
- bool was_carry = true;
- for( ; i>=0 && was_carry ; --i )
- {
- // we can have the comma as well because
- // we're using this method later in ToString_CorrectDigitsAfterComma_Round()
- // (we're only ignoring it)
- if( new_man[i] == static_cast<char_type>(conv.comma) )
- continue;
- // we're adding one
- uint digit = Misc::CharToDigit( new_man[i] ) + 1;
- if( digit == conv.base )
- digit = 0;
- else
- was_carry = false;
- new_man[i] = static_cast<char_type>( Misc::DigitToChar(digit) );
- }
- if( i<0 && was_carry )
- new_man.insert( new_man.begin() , '1' );
- }
- /*!
- an auxiliary method for converting into the string
- this method sets the comma operator and/or puts the exponent
- into the string
- */
- template<class string_type, class char_type>
- uint ToString_SetCommaAndExponent(string_type & new_man, const Conv & conv, Int<exp+1> & new_exp) const
- {
- uint carry = 0;
- if( new_man.empty() )
- return carry;
- Int<exp+1> scientific_exp( new_exp );
- // 'new_exp' depends on the 'new_man' which is stored like this e.g:
- // 32342343234 (the comma is at the end)
- // we'd like to show it in this way:
- // 3.2342343234 (the 'scientific_exp' is connected with this example)
- sint offset = sint( new_man.length() ) - 1;
- carry += scientific_exp.Add( offset );
- // there shouldn't have been a carry because we're using
- // a greater type -- 'Int<exp+1>' instead of 'Int<exp>'
- bool print_scientific = conv.scient;
- if( !print_scientific )
- {
- if( scientific_exp > conv.scient_from || scientific_exp < -sint(conv.scient_from) )
- print_scientific = true;
- }
- if( !print_scientific )
- ToString_SetCommaAndExponent_Normal<string_type, char_type>(new_man, conv, new_exp);
- else
- // we're passing the 'scientific_exp' instead of 'new_exp' here
- ToString_SetCommaAndExponent_Scientific<string_type, char_type>(new_man, conv, scientific_exp);
- return (carry==0)? 0 : 1;
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- void ToString_SetCommaAndExponent_Normal(string_type & new_man, const Conv & conv, Int<exp+1> & new_exp ) const
- {
- if( !new_exp.IsSign() ) // it means: if( new_exp >= 0 )
- ToString_SetCommaAndExponent_Normal_AddingZero<string_type, char_type>(new_man, new_exp);
- else
- ToString_SetCommaAndExponent_Normal_SetCommaInside<string_type, char_type>(new_man, conv, new_exp);
- ToString_Group_man<string_type, char_type>(new_man, conv);
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- void ToString_SetCommaAndExponent_Normal_AddingZero(string_type & new_man,
- Int<exp+1> & new_exp) const
- {
- // we're adding zero characters at the end
- // 'i' will be smaller than 'when_scientific' (or equal)
- uint i = new_exp.ToInt();
- if( new_man.length() + i > new_man.capacity() )
- // about 6 characters more (we'll need it for the comma or something)
- new_man.reserve( new_man.length() + i + 6 );
- for( ; i>0 ; --i)
- new_man += '0';
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- void ToString_SetCommaAndExponent_Normal_SetCommaInside(
- string_type & new_man,
- const Conv & conv,
- Int<exp+1> & new_exp ) const
- {
- // new_exp is < 0
- sint new_man_len = sint(new_man.length()); // 'new_man_len' with a sign
- sint e = -( new_exp.ToInt() ); // 'e' will be positive
- if( new_exp > -new_man_len )
- {
- // we're setting the comma within the mantissa
- sint index = new_man_len - e;
- new_man.insert( new_man.begin() + index, static_cast<char_type>(conv.comma));
- }
- else
- {
- // we're adding zero characters before the mantissa
- uint how_many = e - new_man_len;
- string_type man_temp(how_many+1, '0');
- man_temp.insert( man_temp.begin()+1, static_cast<char_type>(conv.comma));
- new_man.insert(0, man_temp);
- }
- ToString_CorrectDigitsAfterComma<string_type, char_type>(new_man, conv);
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- void ToString_SetCommaAndExponent_Scientific( string_type & new_man,
- const Conv & conv,
- Int<exp+1> & scientific_exp ) const
- {
- if( new_man.empty() )
- return;
- if( new_man.size() > 1 )
- {
- new_man.insert( new_man.begin()+1, static_cast<char_type>(conv.comma) );
- ToString_CorrectDigitsAfterComma<string_type, char_type>(new_man, conv);
- }
- ToString_Group_man<string_type, char_type>(new_man, conv);
- if( conv.base == 10 )
- {
- new_man += 'e';
- if( !scientific_exp.IsSign() )
- new_man += '+';
- }
- else
- {
- // the 10 here is meant as the base 'base'
- // (no matter which 'base' we're using there'll always be 10 here)
- Misc::AddString(new_man, "*10^");
- }
- string_type temp_exp;
- scientific_exp.ToString( temp_exp, conv.base );
- new_man += temp_exp;
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- void ToString_Group_man(string_type & new_man, const Conv & conv) const
- {
- typedef typename string_type::size_type StrSize;
- if( conv.group == 0 )
- return;
- // first we're looking for the comma operator
- StrSize index = new_man.find(static_cast<char_type>(conv.comma), 0);
- if( index == string_type::npos )
- index = new_man.size();
- ToString_Group_man_before_comma<string_type, char_type>(new_man, conv, index);
- ToString_Group_man_after_comma<string_type, char_type>(new_man, conv, index+1);
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- void ToString_Group_man_before_comma( string_type & new_man, const Conv & conv,
- typename string_type::size_type & index) const
- {
- typedef typename string_type::size_type StrSize;
- uint group = 0;
- StrSize i = index;
- uint group_digits = conv.group_digits;
- if( group_digits < 1 )
- group_digits = 1;
- // adding group characters before the comma operator
- // i>0 because on the first position we don't put any additional grouping characters
- for( ; i>0 ; --i, ++group)
- {
- if( group >= group_digits )
- {
- group = 0;
- new_man.insert(i, 1, static_cast<char_type>(conv.group));
- ++index;
- }
- }
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- void ToString_Group_man_after_comma(string_type & new_man, const Conv & conv,
- typename string_type::size_type index) const
- {
- uint group = 0;
- uint group_digits = conv.group_digits;
- if( group_digits < 1 )
- group_digits = 1;
- for( ; index<new_man.size() ; ++index, ++group)
- {
- if( group >= group_digits )
- {
- group = 0;
- new_man.insert(index, 1, static_cast<char_type>(conv.group));
- ++index;
- }
- }
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- void ToString_CorrectDigitsAfterComma( string_type & new_man,
- const Conv & conv ) const
- {
- if( conv.round >= 0 )
- ToString_CorrectDigitsAfterComma_Round<string_type, char_type>(new_man, conv);
- if( conv.trim_zeroes )
- ToString_CorrectDigitsAfterComma_CutOffZeroCharacters<string_type, char_type>(new_man, conv);
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- void ToString_CorrectDigitsAfterComma_CutOffZeroCharacters(
- string_type & new_man,
- const Conv & conv) const
- {
- // minimum two characters
- if( new_man.length() < 2 )
- return;
- // we're looking for the index of the last character which is not zero
- uint i = uint( new_man.length() ) - 1;
- for( ; i>0 && new_man[i]=='0' ; --i );
- // if there is another character than zero at the end
- // we're finishing
- if( i == new_man.length() - 1 )
- return;
- // we must have a comma
- // (the comma can be removed by ToString_CorrectDigitsAfterComma_Round
- // which is called before)
- if( new_man.find_last_of(static_cast<char_type>(conv.comma), i) == string_type::npos )
- return;
- // if directly before the first zero is the comma operator
- // we're cutting it as well
- if( i>0 && new_man[i]==static_cast<char_type>(conv.comma) )
- --i;
- new_man.erase(i+1, new_man.length()-i-1);
- }
- /*!
- an auxiliary method for converting into the string
- */
- template<class string_type, class char_type>
- void ToString_CorrectDigitsAfterComma_Round(
- string_type & new_man,
- const Conv & conv ) const
- {
- typedef typename string_type::size_type StrSize;
- // first we're looking for the comma operator
- StrSize index = new_man.find(static_cast<char_type>(conv.comma), 0);
- if( index == string_type::npos )
- // nothing was found (actually there can't be this situation)
- return;
- // we're calculating how many digits there are at the end (after the comma)
- // 'after_comma' will be greater than zero because at the end
- // we have at least one digit
- StrSize after_comma = new_man.length() - index - 1;
- // if 'max_digit_after_comma' is greater than 'after_comma' (or equal)
- // we don't have anything for cutting
- if( static_cast<StrSize>(conv.round) >= after_comma )
- return;
- uint last_digit = Misc::CharToDigit( new_man[ index + conv.round + 1 ], conv.base );
- // we're cutting the rest of the string
- new_man.erase(index + conv.round + 1, after_comma - conv.round);
- if( conv.round == 0 )
- {
- // we're cutting the comma operator as well
- // (it's not needed now because we've cut the whole rest after the comma)
- new_man.erase(index, 1);
- }
- if( last_digit >= conv.base / 2 )
- // we must round here
- ToString_RoundMantissa_AddOneIntoMantissa<string_type, char_type>(new_man, conv);
- }
- /*!
- a method for converting a string into its value
- it returns 1 if the value is too big -- we cannot pass it into the range
- of our class Big<exp,man> (or if the base is incorrect)
- that means only digits before the comma operator can make this value too big,
- all digits after the comma we can ignore
- 'source' - pointer to the string for parsing
- if 'after_source' is set that when this method finishes
- it sets the pointer to the new first character after parsed value
- 'value_read' - if the pointer is provided that means the value_read will be true
- only when a value has been actually read, there can be situation where only such
- a string '-' or '+' will be parsed -- 'after_source' will be different from 'source' but
- no value has been read (there are no digits)
- on other words if 'value_read' is true -- there is at least one digit in the string
- */
- uint FromString(const char * source, uint base = 10, const char ** after_source = 0, bool * value_read = 0)
- {
- Conv conv;
- conv.base = base;
- return FromStringBase(source, conv, after_source, value_read);
- }
- /*!
- a method for converting a string into its value
- */
- uint FromString(const char * source, const Conv & conv, const char ** after_source = 0, bool * value_read = 0)
- {
- return FromStringBase(source, conv, after_source, value_read);
- }
- /*!
- a method for converting a string into its value
- */
- uint FromString(const std::string & string, uint base = 10, const char ** after_source = 0, bool * value_read = 0)
- {
- return FromString(string.c_str(), base, after_source, value_read);
- }
- /*!
- a method for converting a string into its value
- */
- uint FromString(const std::string & string, const Conv & conv, const char ** after_source = 0, bool * value_read = 0)
- {
- return FromString(string.c_str(), conv, after_source, value_read);
- }
- /*!
- a method for converting a string into its value
- */
- uint FromString(const wchar_t * source, uint base = 10, const wchar_t ** after_source = 0, bool * value_read = 0)
- {
- Conv conv;
- conv.base = base;
- return FromStringBase(source, conv, after_source, value_read);
- }
- /*!
- a method for converting a string into its value
- */
- uint FromString(const wchar_t * source, const Conv & conv, const wchar_t ** after_source = 0, bool * value_read = 0)
- {
- return FromStringBase(source, conv, after_source, value_read);
- }
- /*!
- a method for converting a string into its value
- */
- uint FromString(const std::wstring & string, uint base = 10, const wchar_t ** after_source = 0, bool * value_read = 0)
- {
- return FromString(string.c_str(), base, after_source, value_read);
- }
- /*!
- a method for converting a string into its value
- */
- uint FromString(const std::wstring & string, const Conv & conv, const wchar_t ** after_source = 0, bool * value_read = 0)
- {
- return FromString(string.c_str(), conv, after_source, value_read);
- }
- /*!
- an auxiliary method for converting from a string
- */
- template<class char_type>
- uint FromStringBase(const char_type * source, const Conv & conv, const char_type ** after_source = 0, bool * value_read = 0)
- {
- bool is_sign;
- bool value_read_temp = false;
- if( conv.base<2 || conv.base>16 )
- {
- SetNan();
- if( after_source )
- *after_source = source;
- if( value_read )
- *value_read = value_read_temp;
- return 1;
- }
- SetZero();
- FromString_TestSign( source, is_sign );
- uint c = FromString_ReadPartBeforeComma( source, conv, value_read_temp );
- if( FromString_TestCommaOperator(source, conv) )
- c += FromString_ReadPartAfterComma( source, conv, value_read_temp );
- if( value_read_temp && conv.base == 10 )
- c += FromString_ReadScientificIfExists( source );
- if( is_sign && !IsZero() )
- ChangeSign();
- if( after_source )
- *after_source = source;
- if( value_read )
- *value_read = value_read_temp;
- return CheckCarry(c);
- }
- /*!
- we're testing whether the value is with the sign
- (this method is used from 'FromString_ReadPartScientific' too)
- */
- template<class char_type>
- void FromString_TestSign( const char_type * & source, bool & is_sign )
- {
- Misc::SkipWhiteCharacters(source);
- is_sign = false;
- if( *source == '-' )
- {
- is_sign = true;
- ++source;
- }
- else
- if( *source == '+' )
- {
- ++source;
- }
- }
- /*!
- we're testing whether there's a comma operator
- */
- template<class char_type>
- bool FromString_TestCommaOperator(const char_type * & source, const Conv & conv)
- {
- if( (*source == static_cast<char_type>(conv.comma)) ||
- (*source == static_cast<char_type>(conv.comma2) && conv.comma2 != 0 ) )
- {
- ++source;
- return true;
- }
- return false;
- }
- /*!
- this method reads the first part of a string
- (before the comma operator)
- */
- template<class char_type>
- uint FromString_ReadPartBeforeComma( const char_type * & source, const Conv & conv, bool & value_read )
- {
- sint character;
- Big<exp, man> temp;
- Big<exp, man> base_( conv.base );
- Misc::SkipWhiteCharacters( source );
- for( ; true ; ++source )
- {
- if( conv.group!=0 && *source==static_cast<char>(conv.group) )
- continue;
- character = Misc::CharToDigit(*source, conv.base);
- if( character == -1 )
- break;
- value_read = true;
- temp = character;
- if( Mul(base_) )
- return 1;
- if( Add(temp) )
- return 1;
- }
- return 0;
- }
- /*!
- this method reads the second part of a string
- (after the comma operator)
- */
- template<class char_type>
- uint FromString_ReadPartAfterComma( const char_type * & source, const Conv & conv, bool & value_read )
- {
- sint character;
- uint c = 0, power = 0;
- UInt<1> power_;
- Big<exp, man> sum, base_(conv.base);
- // we don't remove any white characters here
- sum.SetZero();
- for( ; sum.exponent.IsSign() || sum.exponent.IsZero() ; ++source )
- {
- if( conv.group!=0 && *source==static_cast<char>(conv.group) )
- continue;
- character = Misc::CharToDigit(*source, conv.base);
- if( character == -1 )
- break;
- value_read = true;
- // there actually shouldn't be a carry here
- c += sum.Mul(base_);
- c += sum.Add(character);
- power += 1;
- if( power == 0 )
- c += 1;
- }
- // we could break the parsing somewhere in the middle of the string,
- // but the result (value) still can be good
- // we should set a correct value of 'source' now
- for( ; Misc::CharToDigit(*source, conv.base) != -1 ; ++source );
- power_ = power;
- c += base_.Pow(power_);
- c += sum.Div(base_);
- c += Add(sum);
- return (c==0)? 0 : 1;
- }
- /*!
- this method checks whether there is a scientific part: [e|E][-|+]value
- it is called when the base is 10 and some digits were read before
- */
- template<class char_type>
- uint FromString_ReadScientificIfExists(const char_type * & source)
- {
- uint c = 0;
- bool scientific_read = false;
- const char_type * before_scientific = source;
- if( FromString_TestScientific(source) )
- c += FromString_ReadPartScientific( source, scientific_read );
- if( !scientific_read )
- source = before_scientific;
- return (c==0)? 0 : 1;
- }
- /*!
- we're testing whether is there the character 'e'
- this character is only allowed when we're using the base equals 10
- */
- template<class char_type>
- bool FromString_TestScientific(const char_type * & source)
- {
- Misc::SkipWhiteCharacters(source);
- if( *source=='e' || *source=='E' )
- {
- ++source;
- return true;
- }
- return false;
- }
- /*!
- this method reads the exponent (after 'e' character) when there's a scientific
- format of value and only when we're using the base equals 10
- */
- template<class char_type>
- uint FromString_ReadPartScientific( const char_type * & source, bool & scientific_read )
- {
- uint c = 0;
- Big<exp, man> new_exponent, temp;
- bool was_sign = false;
- FromString_TestSign( source, was_sign );
- c += FromString_ReadPartScientific_ReadExponent( source, new_exponent, scientific_read );
- if( scientific_read )
- {
- if( was_sign )
- new_exponent.ChangeSign();
- temp = 10;
- c += temp.Pow( new_exponent );
- c += Mul(temp);
- }
- return (c==0)? 0 : 1;
- }
- /*!
- this method reads the value of the extra exponent when scientific format is used
- (only when base == 10)
- */
- template<class char_type>
- uint FromString_ReadPartScientific_ReadExponent( const char_type * & source, Big<exp, man> & new_exponent, bool & scientific_read )
- {
- sint character;
- Big<exp, man> base, temp;
- Misc::SkipWhiteCharacters(source);
- new_exponent.SetZero();
- base = 10;
- for( ; (character=Misc::CharToDigit(*source, 10)) != -1 ; ++source )
- {
- scientific_read = true;
- temp = character;
- if( new_exponent.Mul(base) )
- return 1;
- if( new_exponent.Add(temp) )
- return 1;
- }
- return 0;
- }
- /*!
- a constructor for converting a string into this class
- */
- Big(const char * string)
- {
- FromString( string );
- }
- /*!
- a constructor for converting a string into this class
- */
- Big(const std::string & string)
- {
- FromString( string.c_str() );
- }
- /*!
- an operator= for converting a string into its value
- */
- Big<exp, man> & operator=(const char * string)
- {
- FromString( string );
- return *this;
- }
- /*!
- an operator= for converting a string into its value
- */
- Big<exp, man> & operator=(const std::string & string)
- {
- FromString( string.c_str() );
- return *this;
- }
- /*!
- a constructor for converting a string into this class
- */
- Big(const wchar_t * string)
- {
- FromString( string );
- }
- /*!
- a constructor for converting a string into this class
- */
- Big(const std::wstring & string)
- {
- FromString( string.c_str() );
- }
- /*!
- an operator= for converting a string into its value
- */
- Big<exp, man> & operator=(const wchar_t * string)
- {
- FromString( string );
- return *this;
- }
- /*!
- an operator= for converting a string into its value
- */
- Big<exp, man> & operator=(const std::wstring & string)
- {
- FromString( string.c_str() );
- return *this;
- }
- /*!
- *
- * methods for comparing
- *
- */
- /*!
- this method performs the formula 'abs(this) < abs(ss2)'
- and returns the result
- (in other words it treats 'this' and 'ss2' as values without a sign)
- we don't check the NaN flag
- */
- bool SmallerWithoutSignThan(const Big<exp,man> & ss2) const
- {
- if( IsZero() )
- {
- if( ss2.IsZero() )
- // we've got two zeroes
- return false;
- else
- // this==0 and ss2!=0
- return true;
- }
- if( ss2.IsZero() )
- // this!=0 and ss2==0
- return false;
- // we're using the fact that all bits in mantissa are pushed
- // into the left side -- Standardizing()
- if( exponent == ss2.exponent )
- return mantissa < ss2.mantissa;
- return exponent < ss2.exponent;
- }
- /*!
- this method performs the formula 'abs(this) > abs(ss2)'
- and returns the result
- (in other words it treats 'this' and 'ss2' as values without a sign)
- we don't check the NaN flag
- */
- bool GreaterWithoutSignThan(const Big<exp,man> & ss2) const
- {
- if( IsZero() )
- {
- if( ss2.IsZero() )
- // we've got two zeroes
- return false;
- else
- // this==0 and ss2!=0
- return false;
- }
- if( ss2.IsZero() )
- // this!=0 and ss2==0
- return true;
- // we're using the fact that all bits in mantissa are pushed
- // into the left side -- Standardizing()
- if( exponent == ss2.exponent )
- return mantissa > ss2.mantissa;
- return exponent > ss2.exponent;
- }
- /*!
- this method performs the formula 'abs(this) == abs(ss2)'
- and returns the result
- (in other words it treats 'this' and 'ss2' as values without a sign)
- we don't check the NaN flag
- */
- bool EqualWithoutSign(const Big<exp,man> & ss2) const
- {
- if( IsZero() )
- {
- if( ss2.IsZero() )
- // we've got two zeroes
- return true;
- else
- // this==0 and ss2!=0
- return false;
- }
- if( ss2.IsZero() )
- // this!=0 and ss2==0
- return false;
- if( exponent==ss2.exponent && mantissa==ss2.mantissa )
- return true;
- return false;
- }
- bool operator<(const Big<exp,man> & ss2) const
- {
- if( IsSign() && !ss2.IsSign() )
- // this<0 and ss2>=0
- return true;
- if( !IsSign() && ss2.IsSign() )
- // this>=0 and ss2<0
- return false;
- // both signs are the same
- if( IsSign() )
- return ss2.SmallerWithoutSignThan( *this );
- return SmallerWithoutSignThan( ss2 );
- }
- bool operator==(const Big<exp,man> & ss2) const
- {
- if( IsSign() != ss2.IsSign() )
- return false;
- return EqualWithoutSign( ss2 );
- }
- bool operator>(const Big<exp,man> & ss2) const
- {
- if( IsSign() && !ss2.IsSign() )
- // this<0 and ss2>=0
- return false;
- if( !IsSign() && ss2.IsSign() )
- // this>=0 and ss2<0
- return true;
- // both signs are the same
- if( IsSign() )
- return ss2.GreaterWithoutSignThan( *this );
- return GreaterWithoutSignThan( ss2 );
- }
- bool operator>=(const Big<exp,man> & ss2) const
- {
- return !operator<( ss2 );
- }
- bool operator<=(const Big<exp,man> & ss2) const
- {
- return !operator>( ss2 );
- }
- bool operator!=(const Big<exp,man> & ss2) const
- {
- return !operator==(ss2);
- }
- /*!
- *
- * standard mathematical operators
- *
- */
- /*!
- an operator for changing the sign
- this method is not changing 'this' but the changed value is returned
- */
- Big<exp,man> operator-() const
- {
- Big<exp,man> temp(*this);
- temp.ChangeSign();
- return temp;
- }
- Big<exp,man> operator-(const Big<exp,man> & ss2) const
- {
- Big<exp,man> temp(*this);
- temp.Sub(ss2);
- return temp;
- }
- Big<exp,man> & operator-=(const Big<exp,man> & ss2)
- {
- Sub(ss2);
- return *this;
- }
- Big<exp,man> operator+(const Big<exp,man> & ss2) const
- {
- Big<exp,man> temp(*this);
- temp.Add(ss2);
- return temp;
- }
- Big<exp,man> & operator+=(const Big<exp,man> & ss2)
- {
- Add(ss2);
- return *this;
- }
- Big<exp,man> operator*(const Big<exp,man> & ss2) const
- {
- Big<exp,man> temp(*this);
- temp.Mul(ss2);
- return temp;
- }
- Big<exp,man> & operator*=(const Big<exp,man> & ss2)
- {
- Mul(ss2);
- return *this;
- }
- Big<exp,man> operator/(const Big<exp,man> & ss2) const
- {
- Big<exp,man> temp(*this);
- temp.Div(ss2);
- return temp;
- }
- Big<exp,man> & operator/=(const Big<exp,man> & ss2)
- {
- Div(ss2);
- return *this;
- }
- /*!
- Prefix operator e.g ++variable
- */
- Big<exp,man> & operator++()
- {
- AddOne();
- return *this;
- }
- /*!
- Postfix operator e.g variable++
- */
- Big<exp,man> operator++(int)
- {
- Big<exp,man> temp( *this );
- AddOne();
- return temp;
- }
- Big<exp,man> & operator--()
- {
- SubOne();
- return *this;
- }
- Big<exp,man> operator--(int)
- {
- Big<exp,man> temp( *this );
- SubOne();
- return temp;
- }
- /*!
- *
- * bitwise operators
- * (we do not define bitwise not)
- */
- Big<exp,man> operator&(const Big<exp,man> & p2) const
- {
- Big<exp,man> temp( *this );
- temp.BitAnd(p2);
- return temp;
- }
- Big<exp,man> & operator&=(const Big<exp,man> & p2)
- {
- BitAnd(p2);
- return *this;
- }
- Big<exp,man> operator|(const Big<exp,man> & p2) const
- {
- Big<exp,man> temp( *this );
- temp.BitOr(p2);
- return temp;
- }
- Big<exp,man> & operator|=(const Big<exp,man> & p2)
- {
- BitOr(p2);
- return *this;
- }
- Big<exp,man> operator^(const Big<exp,man> & p2) const
- {
- Big<exp,man> temp( *this );
- temp.BitXor(p2);
- return temp;
- }
- Big<exp,man> & operator^=(const Big<exp,man> & p2)
- {
- BitXor(p2);
- return *this;
- }
- /*!
- this method makes an integer value by skipping any fractions
- for example:
- 10.7 will be 10
- 12.1 -- 12
- -20.2 -- 20
- -20.9 -- 20
- -0.7 -- 0
- 0.8 -- 0
- */
- void SkipFraction()
- {
- if( IsNan() || IsZero() )
- return;
- if( !exponent.IsSign() )
- // exponent >=0 -- the value don't have any fractions
- return;
- if( exponent <= -sint(man*TTMATH_BITS_PER_UINT) )
- {
- // the value is from (-1,1), we return zero
- SetZero();
- return;
- }
- // exponent is in range (-man*TTMATH_BITS_PER_UINT, 0)
- sint e = exponent.ToInt();
- mantissa.ClearFirstBits( -e );
- // we don't have to standardize 'Standardizing()' the value because
- // there's at least one bit in the mantissa
- // (the highest bit which we didn't touch)
- }
- /*!
- this method remains only a fraction from the value
- for example:
- 30.56 will be 0.56
- -12.67 -- -0.67
- */
- void RemainFraction()
- {
- if( IsNan() || IsZero() )
- return;
- if( !exponent.IsSign() )
- {
- // exponent >= 0 -- the value doesn't have any fractions
- // we return zero
- SetZero();
- return;
- }
- if( exponent <= -sint(man*TTMATH_BITS_PER_UINT) )
- {
- // the value is from (-1,1)
- // we don't make anything with the value
- return;
- }
- // e will be from (-man*TTMATH_BITS_PER_UINT, 0)
- sint e = exponent.ToInt();
- sint how_many_bits_leave = sint(man*TTMATH_BITS_PER_UINT) + e; // there'll be a subtraction -- e is negative
- mantissa.Rcl( how_many_bits_leave, 0);
- // there'll not be a carry because the exponent is too small
- exponent.Sub( how_many_bits_leave );
- // we must call Standardizing() here
- Standardizing();
- }
- /*!
- this method returns true if the value is integer
- (there is no a fraction)
- (we don't check nan)
- */
- bool IsInteger() const
- {
- if( IsZero() )
- return true;
- if( !exponent.IsSign() )
- // exponent >=0 -- the value don't have any fractions
- return true;
- if( exponent <= -sint(man*TTMATH_BITS_PER_UINT) )
- // the value is from (-1,1)
- return false;
- // exponent is in range (-man*TTMATH_BITS_PER_UINT, 0)
- sint e = exponent.ToInt();
- e = -e; // e means how many bits we must check
- uint len = e / TTMATH_BITS_PER_UINT;
- uint rest = e % TTMATH_BITS_PER_UINT;
- uint i = 0;
- for( ; i<len ; ++i )
- if( mantissa.table[i] != 0 )
- return false;
- if( rest > 0 )
- {
- uint rest_mask = TTMATH_UINT_MAX_VALUE >> (TTMATH_BITS_PER_UINT - rest);
- if( (mantissa.table[i] & rest_mask) != 0 )
- return false;
- }
- return true;
- }
- /*!
- this method rounds to the nearest integer value
- (it returns a carry if it was)
- for example:
- 2.3 = 2
- 2.8 = 3
- -2.3 = -2
- -2.8 = 3
- */
- uint Round()
- {
- Big<exp,man> half;
- uint c;
- if( IsNan() )
- return 1;
- if( IsZero() )
- return 0;
- half.Set05();
- if( IsSign() )
- {
- // 'this' is < 0
- c = Sub( half );
- }
- else
- {
- // 'this' is > 0
- c = Add( half );
- }
- SkipFraction();
- return CheckCarry(c);
- }
- /*!
- *
- * input/output operators for standard streams
- *
- */
- /*!
- an auxiliary method for outputing to standard streams
- */
- template<class ostream_type, class string_type>
- static ostream_type & OutputToStream(ostream_type & s, const Big<exp,man> & l)
- {
- string_type ss;
- l.ToString(ss);
- s << ss;
- return s;
- }
- /*!
- output to standard streams
- */
- friend std::ostream & operator<<(std::ostream & s, const Big<exp,man> & l)
- {
- return OutputToStream<std::ostream, std::string>(s, l);
- }
- /*!
- output to standard streams
- */
- friend std::wostream & operator<<(std::wostream & s, const Big<exp,man> & l)
- {
- return OutputToStream<std::wostream, std::wstring>(s, l);
- }
- /*!
- an auxiliary method for converting from a string
- */
- template<class istream_type, class string_type, class char_type>
- static istream_type & InputFromStream(istream_type & s, Big<exp,man> & l)
- {
- string_type ss;
- // char or wchar_t for operator>>
- char_type z, old_z;
- bool was_comma = false;
- bool was_e = false;
- // operator>> omits white characters if they're set for ommiting
- s >> z;
- if( z=='-' || z=='+' )
- {
- ss += z;
- s >> z; // we're reading a next character (white characters can be ommited)
- }
- old_z = 0;
- // we're reading only digits (base=10) and only one comma operator
- for( ; s.good() ; z=static_cast<char_type>(s.get()) )
- {
- if( z=='.' || z==',' )
- {
- if( was_comma || was_e )
- // second comma operator or comma operator after 'e' character
- break;
- was_comma = true;
- }
- else
- if( z == 'e' || z == 'E' )
- {
- if( was_e )
- // second 'e' character
- break;
- was_e = true;
- }
- else
- if( z == '+' || z == '-' )
- {
- if( old_z != 'e' && old_z != 'E' )
- // '+' or '-' is allowed only after 'e' character
- break;
- }
- else
- if( Misc::CharToDigit(z, 10) < 0 )
- break;
- ss += z;
- old_z = z;
- }
- // we're leaving the last read character
- // (it's not belonging to the value)
- s.unget();
- l.FromString( ss );
- return s;
- }
- /*!
- input from standard streams
- */
- friend std::istream & operator>>(std::istream & s, Big<exp,man> & l)
- {
- return InputFromStream<std::istream, std::string, char>(s, l);
- }
- /*!
- input from standard streams
- */
- friend std::wistream & operator>>(std::wistream & s, Big<exp,man> & l)
- {
- return InputFromStream<std::wistream, std::wstring, wchar_t>(s, l);
- }
-} // namespace
diff --git a/ttmath/ttmathdec.h b/ttmath/ttmathdec.h
deleted file mode 100644
index 92d3e39..0000000
--- a/ttmath/ttmathdec.h
+++ /dev/null
@@ -1,419 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2012, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathdec
-#define headerfilettmathdec
-#include "ttmathtypes.h"
-#include "ttmaththreads.h"
-#include "ttmathuint.h"
-namespace ttmath
-template<uint value_size, uint dec_digits>
-class Dec
- UInt<value_size> value;
- unsigned char info;
- /*!
- Sign
- the mask of a bit from 'info' which means that there is a sign
- (when the bit is set)
- */
- #define TTMATH_DEC_SIGN 128
- /*!
- Not a number
- if this bit is set that there is not a valid number
- */
- #define TTMATH_DEC_NAN 64
- Dec()
- {
- info = TTMATH_DEC_NAN;
- }
- Dec(const char * s)
- {
- info = TTMATH_DEC_NAN;
- FromString(s);
- }
- Dec<value_size, dec_digits> & operator=(const char * s)
- {
- FromString(s);
- return *this;
- }
- uint FromString(const char * s, const char ** after_source = 0, bool * value_read = 0)
- {
- return FromStringBase(s, after_source, value_read);
- }
- void ToString(std::string & result) const
- {
- ToStringBase(result);
- }
- /*!
- this method clears a specific bit in the 'info' variable
- bit is one of:
- */
- void ClearInfoBit(unsigned char bit)
- {
- info = info & (~bit);
- }
- /*!
- this method sets a specific bit in the 'info' variable
- bit is one of:
- */
- void SetInfoBit(unsigned char bit)
- {
- info = info | bit;
- }
- /*!
- this method returns true if a specific bit in the 'info' variable is set
- bit is one of:
- */
- bool IsInfoBit(unsigned char bit) const
- {
- return (info & bit) != 0;
- }
- bool IsNan() const
- {
- return IsInfoBit(TTMATH_DEC_NAN);
- }
- bool IsSign() const
- {
- return IsInfoBit(TTMATH_DEC_SIGN);
- }
- /*!
- this method sets the sign
- e.g.
- -1 -> -1
- 2 -> -2
- we do not check whether there is a zero or not, if you're using this method
- you must be sure that the value is (or will be afterwards) different from zero
- */
- void SetSign()
- {
- }
- void SetNaN()
- {
- }
- void Abs()
- {
- ClearInfoBit(TTMATH_DEC_SIGN);
- }
- uint Add(const Dec<value_size, dec_digits> & arg)
- {
- uint c = 0;
- if( IsSign() == arg.IsSign() )
- {
- c += value.Add(arg.value);
- }
- else
- {
- bool is_sign;
- if( value > arg.value )
- {
- is_sign = IsSign();
- value.Sub(arg.value);
- }
- else
- {
- is_sign = arg.IsSign();
- UInt<value_size> temp(this->value);
- value = arg.value;
- value.Sub(temp);
- }
- is_sign ? SetSign() : Abs();
- }
- if( c )
- SetNaN();
- return (c==0)? 0 : 1;
- }
- uint Sub(const Dec<value_size, dec_digits> & arg)
- {
- }
- /*!
- */
- void SetMultipler(UInt<value_size> & result)
- {
- // this guardian is initialized before the program runs (static POD type)
- static int guardian = 0;
- static UInt<value_size> multipler;
- if( guardian == 0 )
- {
- multipler = 10;
- multipler.Pow(dec_digits);
- guardian = 1;
- }
- result = multipler;
- }
- /*!
- */
- void SetMultipler(UInt<value_size> & result)
- {
- // this guardian is initialized before the program runs (static POD type)
- volatile static sig_atomic_t guardian = 0;
- static UInt<value_size> * pmultipler;
- // double-checked locking
- if( guardian == 0 )
- {
- ThreadLock thread_lock;
- // locking
- if( thread_lock.Lock() )
- {
- static UInt<value_size> multipler;
- if( guardian == 0 )
- {
- pmultipler = &multipler;
- multipler = 10;
- multipler.Pow(dec_digits);
- guardian = 1;
- }
- }
- else
- {
- // there was a problem with locking, we store the result directly in 'result' object
- result = 10;
- result.Pow(dec_digits);
- return;
- }
- // automatically unlocking
- }
- result = *pmultipler;
- }
- /*!
- an auxiliary method for converting from a string
- */
- template<class char_type>
- uint FromStringBase(const char_type * s, const char_type ** after_source = 0, bool * value_read = 0)
- {
- UInt<value_size> multipler;
- const char_type * after;
- uint c = 0;
- info = 0;
- Misc::SkipWhiteCharacters(s);
- if( *s == '-' )
- {
- s += 1;
- SetSign();
- }
- else
- if( *s == '+' )
- {
- s += 1;
- }
- c += value.FromString(s, 10, &after, value_read);
- if( after_source )
- *after_source = after;
- SetMultipler(multipler);
- c += value.Mul(multipler);
- if( *after == '.' )
- c += FromStringBaseAfterComma(after+1, after_source);
- if( c )
- return (c==0)? 0 : 1;
- }
- template<class char_type>
- uint FromStringBaseAfterComma(const char_type * s, const char_type ** after_source = 0, bool * value_read = 0)
- {
- UInt<value_size> temp;
- UInt<value_size> multipler;
- sint z;
- uint c = 0;
- size_t i = dec_digits;
- SetMultipler(multipler);
- for( ; i>0 && (z=Misc::CharToDigit(*s, 10)) != -1 ; --i, ++s )
- {
- multipler.DivInt(10);
- temp.SetZero();
- if( value_read )
- *value_read = true;
- if( c == 0 )
- {
- temp.table[0] = z;
- c += temp.Mul(multipler);
- c += value.Add(temp);
- }
- }
- if( i == 0 && (z=Misc::CharToDigit(*s, 10)) != -1 && z >= 5 )
- c += value.AddOne();
- if( after_source )
- {
- while( (z=Misc::CharToDigit(*s, 10)) != -1 )
- s += 1;
- *after_source = s;
- }
- return c;
- }
- template<class string_type>
- void ToStringBase(string_type & result) const
- {
- if( IsNan() )
- {
- result = "NaN";
- return;
- }
- value.ToStringBase(result, 10, IsSign());
- if( dec_digits > 0 )
- {
- size_t size = result.size();
- if( IsSign() && size > 0 )
- size -= 1;
- if( dec_digits >= size )
- {
- size_t zeroes = dec_digits - size + 1;
- size_t start = IsSign() ? 1 : 0;
- result.insert(start, zeroes, '0');
- }
- result.insert(result.end() - dec_digits, '.');
- }
- }
-} // namespace
diff --git a/ttmath/ttmathint.h b/ttmath/ttmathint.h
deleted file mode 100644
index ad306f0..0000000
--- a/ttmath/ttmathint.h
+++ /dev/null
@@ -1,1922 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2011, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathint
-#define headerfilettmathint
- \file ttmathint.h
- \brief template class Int<uint>
-#include "ttmathuint.h"
-namespace ttmath
- \brief Int implements a big integer value with a sign
- value_size - how many bytes specify our value
- on 32bit platforms: value_size=1 -> 4 bytes -> 32 bits
- on 64bit platforms: value_size=1 -> 8 bytes -> 64 bits
- value_size = 1,2,3,4,5,6....
-template<uint value_size>
-class Int : public UInt<value_size>
- /*!
- this method sets the max value which this class can hold
- (all bits will be one besides the last one)
- */
- void SetMax()
- {
- UInt<value_size>::SetMax();
- UInt<value_size>::table[value_size-1] = ~ TTMATH_UINT_HIGHEST_BIT;
- }
- /*!
- this method sets the min value which this class can hold
- (all bits will be zero besides the last one which is one)
- */
- void SetMin()
- {
- UInt<value_size>::SetZero();
- UInt<value_size>::table[value_size-1] = TTMATH_UINT_HIGHEST_BIT;
- }
- /*!
- this method sets -1 as the value
- (-1 is equal the max value in an unsigned type)
- */
- void SetSignOne()
- {
- UInt<value_size>::SetMax();
- }
- /*!
- we change the sign of the value
- if it isn't possible to change the sign this method returns 1
- else return 0 and changing the sign
- */
- uint ChangeSign()
- {
- /*
- if the value is equal that one which has been returned from SetMin
- (only the highest bit is set) that means we can't change sign
- because the value is too big (bigger about one)
- e.g. when value_size = 1 and value is -2147483648 we can't change it to the
- 2147483648 because the max value which can be held is 2147483647
- we don't change the value and we're using this fact somewhere in some methods
- (if we look on our value without the sign we get the correct value
- eg. -2147483648 in Int<1> will be 2147483648 on the UInt<1> type)
- */
- if( UInt<value_size>::IsOnlyTheHighestBitSet() )
- return 1;
- UInt<value_size> temp(*this);
- UInt<value_size>::SetZero();
- UInt<value_size>::Sub(temp);
- return 0;
- }
- /*!
- this method sets the sign
- e.g. 1 -> -1
- -2 -> -2
- from a positive value we make a negative value,
- if the value is negative we do nothing
- */
- void SetSign()
- {
- if( IsSign() )
- return;
- ChangeSign();
- }
- /*!
- this method returns true if there's the sign
- (the highest bit will be converted to the bool)
- */
- bool IsSign() const
- {
- return UInt<value_size>::IsTheHighestBitSet();
- }
- /*!
- it sets an absolute value
- it can return carry (1) (look on ChangeSign() for details)
- */
- uint Abs()
- {
- if( !IsSign() )
- return 0;
- return ChangeSign();
- }
- /*!
- *
- * basic mathematic functions
- *
- */
- uint CorrectCarryAfterAdding(bool p1_is_sign, bool p2_is_sign)
- {
- if( !p1_is_sign && !p2_is_sign )
- {
- if( UInt<value_size>::IsTheHighestBitSet() )
- return 1;
- }
- if( p1_is_sign && p2_is_sign )
- {
- if( ! UInt<value_size>::IsTheHighestBitSet() )
- return 1;
- }
- return 0;
- }
- /*!
- this method adds two value with a sign and returns a carry
- we're using methods from the base class because values are stored with U2
- we must only make the carry correction
- this = p1(=this) + p2
- when p1>=0 i p2>=0 carry is set when the highest bit of value is set
- when p1<0 i p2<0 carry is set when the highest bit of value is clear
- when p1>=0 i p2<0 carry will never be set
- when p1<0 i p2>=0 carry will never be set
- */
- uint Add(const Int<value_size> & ss2)
- {
- bool p1_is_sign = IsSign();
- bool p2_is_sign = ss2.IsSign();
- UInt<value_size>::Add(ss2);
- return CorrectCarryAfterAdding(p1_is_sign, p2_is_sign);
- }
- /*!
- this method adds one *unsigned* word (at a specific position)
- and returns a carry (if it was)
- look at a description in UInt<>::AddInt(...)
- */
- uint AddInt(uint value, uint index = 0)
- {
- bool p1_is_sign = IsSign();
- UInt<value_size>::AddInt(value, index);
- return CorrectCarryAfterAdding(p1_is_sign, false);
- }
- /*!
- this method adds two *unsigned* words to the existing value
- and these words begin on the 'index' position
- index should be equal or smaller than value_size-2 (index <= value_size-2)
- x1 - lower word, x2 - higher word
- look at a description in UInt<>::AddTwoInts(...)
- */
- uint AddTwoInts(uint x2, uint x1, uint index)
- {
- bool p1_is_sign = IsSign();
- UInt<value_size>::AddTwoInts(x2, x1, index);
- return CorrectCarryAfterAdding(p1_is_sign, false);
- }
- uint CorrectCarryAfterSubtracting(bool p1_is_sign, bool p2_is_sign)
- {
- if( !p1_is_sign && p2_is_sign )
- {
- if( UInt<value_size>::IsTheHighestBitSet() )
- return 1;
- }
- if( p1_is_sign && !p2_is_sign )
- {
- if( ! UInt<value_size>::IsTheHighestBitSet() )
- return 1;
- }
- return 0;
- }
- /*!
- this method subtracts two values with a sign
- we don't use the previous Add because the method ChangeSign can
- sometimes return carry
- this = p1(=this) - p2
- when p1>=0 i p2>=0 carry will never be set
- when p1<0 i p2<0 carry will never be set
- when p1>=0 i p2<0 carry is set when the highest bit of value is set
- when p1<0 i p2>=0 carry is set when the highest bit of value is clear
- */
- uint Sub(const Int<value_size> & ss2)
- {
- bool p1_is_sign = IsSign();
- bool p2_is_sign = ss2.IsSign();
- UInt<value_size>::Sub(ss2);
- return CorrectCarryAfterSubtracting(p1_is_sign, p2_is_sign);
- }
- /*!
- this method subtracts one *unsigned* word (at a specific position)
- and returns a carry (if it was)
- */
- uint SubInt(uint value, uint index = 0)
- {
- bool p1_is_sign = IsSign();
- UInt<value_size>::SubInt(value, index);
- return CorrectCarryAfterSubtracting(p1_is_sign, false);
- }
- /*!
- this method adds one to the value and returns carry
- */
- uint AddOne()
- {
- bool p1_is_sign = IsSign();
- UInt<value_size>::AddOne();
- return CorrectCarryAfterAdding(p1_is_sign, false);
- }
- /*!
- this method subtracts one from the value and returns carry
- */
- uint SubOne()
- {
- bool p1_is_sign = IsSign();
- UInt<value_size>::SubOne();
- return CorrectCarryAfterSubtracting(p1_is_sign, false);
- }
- uint CheckMinCarry(bool ss1_is_sign, bool ss2_is_sign)
- {
- /*
- we have to examine the sign of the result now
- but if the result is with the sign then:
- 1. if the signs were the same that means the result is too big
- (the result must be without a sign)
- 2. if the signs were different that means if the result
- is different from that one which has been returned from SetMin()
- that is carry (result too big) but if the result is equal SetMin()
- there'll be ok (and the next SetSign will has no effect because
- the value is actually negative -- look at description of that case
- in ChangeSign())
- */
- if( IsSign() )
- {
- if( ss1_is_sign != ss2_is_sign )
- {
- /*
- there can be one case where signs are different and
- the result will be equal the value from SetMin() (only the highest bit is set)
- (this situation is ok)
- */
- if( !UInt<value_size>::IsOnlyTheHighestBitSet() )
- return 1;
- }
- else
- {
- // signs were the same
- return 1;
- }
- }
- return 0;
- }
- /*!
- multiplication: this = this * ss2
- it can return a carry
- */
- uint MulInt(sint ss2)
- {
- bool ss1_is_sign, ss2_is_sign;
- uint c;
- ss1_is_sign = IsSign();
- /*
- we don't have to check the carry from Abs (values will be correct
- because next we're using the method MulInt from the base class UInt
- which is without a sign)
- */
- Abs();
- if( ss2 < 0 )
- {
- ss2 = -ss2;
- ss2_is_sign = true;
- }
- else
- {
- ss2_is_sign = false;
- }
- c = UInt<value_size>::MulInt((uint)ss2);
- c += CheckMinCarry(ss1_is_sign, ss2_is_sign);
- if( ss1_is_sign != ss2_is_sign )
- SetSign();
- return c;
- }
- /*!
- multiplication this = this * ss2
- it returns carry if the result is too big
- (we're using the method from the base class but we have to make
- one correction in account of signs)
- */
- uint Mul(Int<value_size> ss2)
- {
- bool ss1_is_sign, ss2_is_sign;
- uint c;
- ss1_is_sign = IsSign();
- ss2_is_sign = ss2.IsSign();
- /*
- we don't have to check the carry from Abs (values will be correct
- because next we're using the method Mul from the base class UInt
- which is without a sign)
- */
- Abs();
- ss2.Abs();
- c = UInt<value_size>::Mul(ss2);
- c += CheckMinCarry(ss1_is_sign, ss2_is_sign);
- if( ss1_is_sign != ss2_is_sign )
- SetSign();
- return c;
- }
- /*!
- division this = this / ss2
- returned values:
- 0 - ok
- 1 - division by zero
- for example: (result means 'this')
- 20 / 3 --> result: 6 remainder: 2
- -20 / 3 --> result: -6 remainder: -2
- 20 / -3 --> result: -6 remainder: 2
- -20 / -3 --> result: 6 remainder: -2
- in other words: this(old) = ss2 * this(new)(result) + remainder
- */
- uint Div(Int<value_size> ss2, Int<value_size> * remainder = 0)
- {
- bool ss1_is_sign, ss2_is_sign;
- ss1_is_sign = IsSign();
- ss2_is_sign = ss2.IsSign();
- /*
- we don't have to test the carry from Abs as well as in Mul
- */
- Abs();
- ss2.Abs();
- uint c = UInt<value_size>::Div(ss2, remainder);
- if( ss1_is_sign != ss2_is_sign )
- SetSign();
- if( ss1_is_sign && remainder )
- remainder->SetSign();
- return c;
- }
- uint Div(const Int<value_size> & ss2, Int<value_size> & remainder)
- {
- return Div(ss2, &remainder);
- }
- /*!
- division this = this / ss2 (ss2 is int)
- returned values:
- 0 - ok
- 1 - division by zero
- for example: (result means 'this')
- 20 / 3 --> result: 6 remainder: 2
- -20 / 3 --> result: -6 remainder: -2
- 20 / -3 --> result: -6 remainder: 2
- -20 / -3 --> result: 6 remainder: -2
- in other words: this(old) = ss2 * this(new)(result) + remainder
- */
- uint DivInt(sint ss2, sint * remainder = 0)
- {
- bool ss1_is_sign, ss2_is_sign;
- ss1_is_sign = IsSign();
- /*
- we don't have to test the carry from Abs as well as in Mul
- */
- Abs();
- if( ss2 < 0 )
- {
- ss2 = -ss2;
- ss2_is_sign = true;
- }
- else
- {
- ss2_is_sign = false;
- }
- uint rem;
- uint c = UInt<value_size>::DivInt((uint)ss2, &rem);
- if( ss1_is_sign != ss2_is_sign )
- SetSign();
- if( remainder )
- {
- if( ss1_is_sign )
- *remainder = -sint(rem);
- else
- *remainder = sint(rem);
- }
- return c;
- }
- uint DivInt(sint ss2, sint & remainder)
- {
- return DivInt(ss2, &remainder);
- }
- /*!
- power this = this ^ pow
- this can be negative
- pow is >= 0
- */
- uint Pow2(const Int<value_size> & pow)
- {
- bool was_sign = IsSign();
- uint c = 0;
- if( was_sign )
- c += Abs();
- uint c_temp = UInt<value_size>::Pow(pow);
- if( c_temp > 0 )
- return c_temp; // c_temp can be: 0, 1 or 2
- if( was_sign && (pow.table[0] & 1) == 1 )
- // negative value to the power of odd number is negative
- c += ChangeSign();
- return (c==0)? 0 : 1;
- }
- /*!
- power this = this ^ pow
- return values:
- 0 - ok
- 1 - carry
- 2 - incorrect arguments 0^0 or 0^(-something)
- */
- uint Pow(Int<value_size> pow)
- {
- if( !pow.IsSign() )
- return Pow2(pow);
- if( UInt<value_size>::IsZero() )
- // if 'pow' is negative then
- // 'this' must be different from zero
- return 2;
- if( pow.ChangeSign() )
- return 1;
- Int<value_size> t(*this);
- uint c_temp = t.Pow2(pow);
- if( c_temp > 0 )
- return c_temp;
- UInt<value_size>::SetOne();
- if( Div(t) )
- return 1;
- return 0;
- }
- /*!
- *
- * convertion methods
- *
- */
- /*!
- an auxiliary method for converting both from UInt and Int
- */
- template<uint argument_size>
- uint FromUIntOrInt(const UInt<argument_size> & p, bool UInt_type)
- {
- uint min_size = (value_size < argument_size)? value_size : argument_size;
- uint i;
- for(i=0 ; i<min_size ; ++i)
- UInt<value_size>::table[i] = p.table[i];
- if( value_size > argument_size )
- {
- uint fill;
- if( UInt_type )
- fill = 0;
- else
- fill = (p.table[argument_size-1] & TTMATH_UINT_HIGHEST_BIT)?
- // 'this' is longer than 'p'
- for( ; i<value_size ; ++i)
- UInt<value_size>::table[i] = fill;
- }
- else
- {
- uint test = (UInt<value_size>::table[value_size-1] & TTMATH_UINT_HIGHEST_BIT)?
- if( UInt_type && test!=0 )
- return 1;
- for( ; i<argument_size ; ++i)
- if( p.table[i] != test )
- return 1;
- }
- return 0;
- }
- /*!
- this method converts an Int<another_size> type into this class
- this operation has mainly sense if the value from p
- can be held in this type
- it returns a carry if the value 'p' is too big
- */
- template<uint argument_size>
- uint FromInt(const Int<argument_size> & p)
- {
- return FromUIntOrInt(p, false);
- }
- /*!
- this method converts the sint type into this class
- */
- uint FromInt(sint value)
- {
- uint fill = ( value<0 ) ? TTMATH_UINT_MAX_VALUE : 0;
- for(uint i=1 ; i<value_size ; ++i)
- UInt<value_size>::table[i] = fill;
- UInt<value_size>::table[0] = uint(value);
- // there'll never be a carry here
- return 0;
- }
- /*!
- this method converts UInt<another_size> into this class
- */
- template<uint argument_size>
- uint FromUInt(const UInt<argument_size> & p)
- {
- return FromUIntOrInt(p, true);
- }
- /*!
- this method converts UInt<another_size> into this class
- */
- template<uint argument_size>
- uint FromInt(const UInt<argument_size> & p)
- {
- return FromUIntOrInt(p, true);
- }
- /*!
- this method converts the uint type into this class
- */
- uint FromUInt(uint value)
- {
- for(uint i=1 ; i<value_size ; ++i)
- UInt<value_size>::table[i] = 0;
- UInt<value_size>::table[0] = value;
- // there can be a carry here when the size of this value is equal one word
- // and the 'value' has the highest bit set
- if( value_size==1 && (value & TTMATH_UINT_HIGHEST_BIT)!=0 )
- return 1;
- return 0;
- }
- /*!
- this method converts the uint type into this class
- */
- uint FromInt(uint value)
- {
- return FromUInt(value);
- }
- /*!
- the default assignment operator
- */
- Int<value_size> & operator=(const Int<value_size> & p)
- {
- FromInt(p);
- return *this;
- }
- /*!
- this operator converts an Int<another_size> type to this class
- it doesn't return a carry
- */
- template<uint argument_size>
- Int<value_size> & operator=(const Int<argument_size> & p)
- {
- FromInt(p);
- return *this;
- }
- /*!
- this method converts the sint type to this class
- */
- Int<value_size> & operator=(sint i)
- {
- FromInt(i);
- return *this;
- }
- /*!
- a constructor for converting the uint to this class
- */
- Int(sint i)
- {
- FromInt(i);
- }
- /*!
- a copy constructor
- */
- Int(const Int<value_size> & u)
- {
- FromInt(u);
- }
- /*!
- a constructor for copying from another types
- */
- template<uint argument_size>
- Int(const Int<argument_size> & u)
- {
- // look that 'size' we still set as 'value_size' and not as u.value_size
- FromInt(u);
- }
- /*!
- this operator converts an UInt<another_size> type to this class
- it doesn't return a carry
- */
- template<uint argument_size>
- Int<value_size> & operator=(const UInt<argument_size> & p)
- {
- FromUInt(p);
- return *this;
- }
- /*!
- this method converts the Uint type to this class
- */
- Int<value_size> & operator=(uint i)
- {
- FromUInt(i);
- return *this;
- }
- /*!
- a constructor for converting the uint to this class
- */
- Int(uint i)
- {
- FromUInt(i);
- }
- /*!
- a constructor for copying from another types
- */
- template<uint argument_size>
- Int(const UInt<argument_size> & u)
- {
- // look that 'size' we still set as 'value_size' and not as u.value_size
- FromUInt(u);
- }
- /*!
- this method converts unsigned 64 bit int type to this class
- ***this method is created only on a 32bit platform***
- */
- uint FromUInt(ulint n)
- {
- uint c = UInt<value_size>::FromUInt(n);
- if( c )
- return 1;
- if( value_size == 1 )
- return ((UInt<value_size>::table[0] & TTMATH_UINT_HIGHEST_BIT) == 0) ? 0 : 1;
- if( value_size == 2 )
- return ((UInt<value_size>::table[1] & TTMATH_UINT_HIGHEST_BIT) == 0) ? 0 : 1;
- return 0;
- }
- /*!
- this method converts unsigned 64 bit int type to this class
- ***this method is created only on a 32bit platform***
- */
- uint FromInt(ulint n)
- {
- return FromUInt(n);
- }
- /*!
- this method converts signed 64 bit int type to this class
- ***this method is created only on a 32bit platform***
- */
- uint FromInt(slint n)
- {
- uint mask = (n < 0) ? TTMATH_UINT_MAX_VALUE : 0;
- UInt<value_size>::table[0] = (uint)(ulint)n;
- if( value_size == 1 )
- {
- if( uint(ulint(n) >> 32) != mask )
- return 1;
- return ((UInt<value_size>::table[0] & TTMATH_UINT_HIGHEST_BIT) == (mask & TTMATH_UINT_HIGHEST_BIT)) ? 0 : 1;
- }
- UInt<value_size>::table[1] = (uint)(ulint(n) >> 32);
- for(uint i=2 ; i<value_size ; ++i)
- UInt<value_size>::table[i] = mask;
- return 0;
- }
- /*!
- this operator converts unsigned 64 bit int type to this class
- ***this operator is created only on a 32bit platform***
- */
- Int<value_size> & operator=(ulint n)
- {
- FromUInt(n);
- return *this;
- }
- /*!
- a constructor for converting unsigned 64 bit int to this class
- ***this constructor is created only on a 32bit platform***
- */
- Int(ulint n)
- {
- FromUInt(n);
- }
- /*!
- this operator converts signed 64 bit int type to this class
- ***this operator is created only on a 32bit platform***
- */
- Int<value_size> & operator=(slint n)
- {
- FromInt(n);
- return *this;
- }
- /*!
- a constructor for converting signed 64 bit int to this class
- ***this constructor is created only on a 32bit platform***
- */
- Int(slint n)
- {
- FromInt(n);
- }
- /*!
- this method converts 32 bit unsigned int type to this class
- ***this operator is created only on a 64bit platform***
- */
- uint FromUInt(unsigned int i)
- {
- return FromUInt(uint(i));
- }
- /*!
- this method converts 32 bit unsigned int type to this class
- ***this operator is created only on a 64bit platform***
- */
- uint FromInt(unsigned int i)
- {
- return FromUInt(i);
- }
- /*!
- this method converts 32 bit signed int type to this class
- ***this operator is created only on a 64bit platform***
- */
- uint FromInt(signed int i)
- {
- return FromInt(sint(i));
- }
- /*!
- this method converts 32 bit unsigned int type to this class
- ***this operator is created only on a 64bit platform***
- */
- Int<value_size> & operator=(unsigned int i)
- {
- FromUInt(i);
- return *this;
- }
- /*!
- a constructor for converting 32 bit unsigned int to this class
- ***this constructor is created only on a 64bit platform***
- */
- Int(unsigned int i)
- {
- FromUInt(i);
- }
- /*!
- this operator converts 32 bit signed int type to this class
- ***this operator is created only on a 64bit platform***
- */
- Int<value_size> & operator=(signed int i)
- {
- FromInt(i);
- return *this;
- }
- /*!
- a constructor for converting 32 bit signed int to this class
- ***this constructor is created only on a 64bit platform***
- */
- Int(signed int i)
- {
- FromInt(i);
- }
- /*!
- a constructor for converting string to this class (with the base=10)
- */
- Int(const char * s)
- {
- FromString(s);
- }
- /*!
- a constructor for converting a string to this class (with the base=10)
- */
- Int(const std::string & s)
- {
- FromString( s.c_str() );
- }
- /*!
- a constructor for converting string to this class (with the base=10)
- */
- Int(const wchar_t * s)
- {
- FromString(s);
- }
- /*!
- a constructor for converting a string to this class (with the base=10)
- */
- Int(const std::wstring & s)
- {
- FromString( s.c_str() );
- }
- /*!
- a default constructor
- we don't clear table etc.
- */
- Int()
- {
- }
- /*!
- the destructor
- */
- ~Int()
- {
- }
- /*!
- this method returns the lowest value from table with a sign
- we must be sure when we using this method whether the value
- will hold in an sint type or not (the rest value from table must be zero or -1)
- */
- sint ToInt() const
- {
- return sint( UInt<value_size>::table[0] );
- }
- /*!
- this method converts the value to uint type
- can return a carry if the value is too long to store it in uint type
- */
- uint ToUInt(uint & result) const
- {
- uint c = UInt<value_size>::ToUInt(result);
- if( value_size == 1 )
- return (result & TTMATH_UINT_HIGHEST_BIT) == 0 ? 0 : 1;
- return c;
- }
- /*!
- this method converts the value to uint type
- can return a carry if the value is too long to store it in uint type
- */
- uint ToInt(uint & result) const
- {
- return ToUInt(result);
- }
- /*!
- this method converts the value to sint type
- can return a carry if the value is too long to store it in sint type
- */
- uint ToInt(sint & result) const
- {
- result = sint( UInt<value_size>::table[0] );
- uint mask = IsSign() ? TTMATH_UINT_MAX_VALUE : 0;
- return 1;
- for(uint i=1 ; i<value_size ; ++i)
- if( UInt<value_size>::table[i] != mask )
- return 1;
- return 0;
- }
- /*!
- this method converts the value to ulint type (64 bit unsigned integer)
- can return a carry if the value is too long to store it in ulint type
- *** this method is created only on a 32 bit platform ***
- */
- uint ToUInt(ulint & result) const
- {
- uint c = UInt<value_size>::ToUInt(result);
- if( value_size == 1 )
- return (UInt<value_size>::table[0] & TTMATH_UINT_HIGHEST_BIT) == 0 ? 0 : 1;
- if( value_size == 2 )
- return (UInt<value_size>::table[1] & TTMATH_UINT_HIGHEST_BIT) == 0 ? 0 : 1;
- return c;
- }
- /*!
- this method converts the value to ulint type (64 bit unsigned integer)
- can return a carry if the value is too long to store it in ulint type
- *** this method is created only on a 32 bit platform ***
- */
- uint ToInt(ulint & result) const
- {
- return ToUInt(result);
- }
- /*!
- this method converts the value to slint type (64 bit signed integer)
- can return a carry if the value is too long to store it in slint type
- *** this method is created only on a 32 bit platform ***
- */
- uint ToInt(slint & result) const
- {
- if( value_size == 1 )
- {
- result = slint(sint(UInt<value_size>::table[0]));
- }
- else
- {
- uint low = UInt<value_size>::table[0];
- uint high = UInt<value_size>::table[1];
- result = low;
- result |= (ulint(high) << TTMATH_BITS_PER_UINT);
- uint mask = IsSign() ? TTMATH_UINT_MAX_VALUE : 0;
- return 1;
- for(uint i=2 ; i<value_size ; ++i)
- if( UInt<value_size>::table[i] != mask )
- return 1;
- }
- return 0;
- }
- /*!
- this method converts the value to a 32 bit unsigned integer
- can return a carry if the value is too long to store it in this type
- *** this method is created only on a 64 bit platform ***
- */
- uint ToUInt(unsigned int & result) const
- {
- uint c = UInt<value_size>::ToUInt(result);
- if( c || IsSign() )
- return 1;
- return 0;
- }
- /*!
- this method converts the value to a 32 bit unsigned integer
- can return a carry if the value is too long to store it in this type
- *** this method is created only on a 64 bit platform ***
- */
- uint ToInt(unsigned int & result) const
- {
- return ToUInt(result);
- }
- /*!
- this method converts the value to a 32 bit signed integer
- can return a carry if the value is too long to store it in this type
- *** this method is created only on a 64 bit platform ***
- */
- uint ToInt(int & result) const
- {
- uint first = UInt<value_size>::table[0];
- result = int(first);
- uint mask = IsSign() ? TTMATH_UINT_MAX_VALUE : 0;
- if( (first >> 31) != (mask >> 31) )
- return 1;
- for(uint i=1 ; i<value_size ; ++i)
- if( UInt<value_size>::table[i] != mask )
- return 1;
- return 0;
- }
- /*!
- an auxiliary method for converting to a string
- */
- template<class string_type>
- void ToStringBase(string_type & result, uint b = 10) const
- {
- if( IsSign() )
- {
- Int<value_size> temp(*this);
- temp.Abs();
- temp.UInt<value_size>::ToStringBase(result, b, true);
- }
- else
- {
- UInt<value_size>::ToStringBase(result, b, false);
- }
- }
- /*!
- this method converts the value to a string with a base equal 'b'
- */
- void ToString(std::string & result, uint b = 10) const
- {
- return ToStringBase(result, b);
- }
- /*!
- this method converts the value to a string with a base equal 'b'
- */
- std::string ToString(uint b = 10) const
- {
- std::string result;
- ToStringBase(result, b);
- return result;
- }
- /*!
- this method converts the value to a string with a base equal 'b'
- */
- void ToString(std::wstring & result, uint b = 10) const
- {
- return ToStringBase(result, b);
- }
- /*!
- this method converts the value to a string with a base equal 'b'
- */
- std::wstring ToWString(uint b = 10) const
- {
- std::wstring result;
- ToStringBase(result, b);
- return result;
- }
- /*!
- an auxiliary method for converting from a string
- */
- template<class char_type>
- uint FromStringBase(const char_type * s, uint b = 10, const char_type ** after_source = 0, bool * value_read = 0)
- {
- bool is_sign = false;
- Misc::SkipWhiteCharacters(s);
- if( *s == '-' )
- {
- is_sign = true;
- Misc::SkipWhiteCharacters(++s);
- }
- else
- if( *s == '+' )
- {
- Misc::SkipWhiteCharacters(++s);
- }
- if( UInt<value_size>::FromString(s,b,after_source,value_read) )
- return 1;
- if( is_sign )
- {
- Int<value_size> mmin;
- mmin.SetMin();
- /*
- the reference to mmin will be automatically converted to the reference
- to UInt type
- (this value can be equal mmin -- look at a description in ChangeSign())
- */
- if( UInt<value_size>::operator>( mmin ) )
- return 1;
- /*
- if the value is equal mmin the method ChangeSign() does nothing (only returns 1 but we ignore it)
- */
- ChangeSign();
- }
- else
- {
- Int<value_size> mmax;
- mmax.SetMax();
- if( UInt<value_size>::operator>( mmax ) )
- return 1;
- }
- return 0;
- }
- /*!
- this method converts a string into its value
- it returns carry=1 if the value will be too big or an incorrect base 'b' is given
- string is ended with a non-digit value, for example:
- "-12" will be translated to -12
- as well as:
- "- 12foo" will be translated to -12 too
- existing first white characters will be ommited
- (between '-' and a first digit can be white characters too)
- after_source (if exists) is pointing at the end of the parsed string
- value_read (if exists) tells whether something has actually been read (at least one digit)
- */
- uint FromString(const char * s, uint b = 10, const char ** after_source = 0, bool * value_read = 0)
- {
- return FromStringBase(s, b, after_source, value_read);
- }
- /*!
- this method converts a string into its value
- */
- uint FromString(const wchar_t * s, uint b = 10, const wchar_t ** after_source = 0, bool * value_read = 0)
- {
- return FromStringBase(s, b, after_source, value_read);
- }
- /*!
- this method converts a string into its value
- it returns carry=1 if the value will be too big or an incorrect base 'b' is given
- */
- uint FromString(const std::string & s, uint b = 10)
- {
- return FromString( s.c_str(), b );
- }
- /*!
- this operator converts a string into its value (with base = 10)
- */
- Int<value_size> & operator=(const char * s)
- {
- FromString(s);
- return *this;
- }
- /*!
- this method converts a string into its value
- it returns carry=1 if the value will be too big or an incorrect base 'b' is given
- */
- uint FromString(const std::wstring & s, uint b = 10)
- {
- return FromString( s.c_str(), b );
- }
- /*!
- this operator converts a string into its value (with base = 10)
- */
- Int<value_size> & operator=(const wchar_t * s)
- {
- FromString(s);
- return *this;
- }
- /*!
- this operator converts a string into its value (with base = 10)
- */
- Int<value_size> & operator=(const std::wstring & s)
- {
- FromString( s.c_str() );
- return *this;
- }
- /*!
- this operator converts a string into its value (with base = 10)
- */
- Int<value_size> & operator=(const std::string & s)
- {
- FromString( s.c_str() );
- return *this;
- }
- /*!
- *
- * methods for comparing
- *
- *
- */
- bool operator==(const Int<value_size> & l) const
- {
- return UInt<value_size>::operator==(l);
- }
- bool operator!=(const Int<value_size> & l) const
- {
- return UInt<value_size>::operator!=(l);
- }
- bool operator<(const Int<value_size> & l) const
- {
- sint i=value_size-1;
- sint a1 = sint(UInt<value_size>::table[i]);
- sint a2 = sint(l.table[i]);
- if( a1 != a2 )
- return a1 < a2;
- for(--i ; i>=0 ; --i)
- {
- if( UInt<value_size>::table[i] != l.table[i] )
- // comparison as unsigned int
- return UInt<value_size>::table[i] < l.table[i];
- }
- // they're equal
- return false;
- }
- bool operator>(const Int<value_size> & l) const
- {
- sint i=value_size-1;
- sint a1 = sint(UInt<value_size>::table[i]);
- sint a2 = sint(l.table[i]);
- if( a1 != a2 )
- return a1 > a2;
- for(--i ; i>=0 ; --i)
- {
- if( UInt<value_size>::table[i] != l.table[i] )
- // comparison as unsigned int
- return UInt<value_size>::table[i] > l.table[i];
- }
- // they're equal
- return false;
- }
- bool operator<=(const Int<value_size> & l) const
- {
- sint i=value_size-1;
- sint a1 = sint(UInt<value_size>::table[i]);
- sint a2 = sint(l.table[i]);
- if( a1 != a2 )
- return a1 < a2;
- for(--i ; i>=0 ; --i)
- {
- if( UInt<value_size>::table[i] != l.table[i] )
- // comparison as unsigned int
- return UInt<value_size>::table[i] < l.table[i];
- }
- // they're equal
- return true;
- }
- bool operator>=(const Int<value_size> & l) const
- {
- sint i=value_size-1;
- sint a1 = sint(UInt<value_size>::table[i]);
- sint a2 = sint(l.table[i]);
- if( a1 != a2 )
- return a1 > a2;
- for(--i ; i>=0 ; --i)
- {
- if( UInt<value_size>::table[i] != l.table[i] )
- // comparison as unsigned int
- return UInt<value_size>::table[i] > l.table[i];
- }
- // they're equal
- return true;
- }
- /*!
- *
- * standard mathematical operators
- *
- */
- /*!
- an operator for changing the sign
- it's not changing 'this' but the changed value will be returned
- */
- Int<value_size> operator-() const
- {
- Int<value_size> temp(*this);
- temp.ChangeSign();
- return temp;
- }
- Int<value_size> operator-(const Int<value_size> & p2) const
- {
- Int<value_size> temp(*this);
- temp.Sub(p2);
- return temp;
- }
- Int<value_size> & operator-=(const Int<value_size> & p2)
- {
- Sub(p2);
- return *this;
- }
- Int<value_size> operator+(const Int<value_size> & p2) const
- {
- Int<value_size> temp(*this);
- temp.Add(p2);
- return temp;
- }
- Int<value_size> & operator+=(const Int<value_size> & p2)
- {
- Add(p2);
- return *this;
- }
- Int<value_size> operator*(const Int<value_size> & p2) const
- {
- Int<value_size> temp(*this);
- temp.Mul(p2);
- return temp;
- }
- Int<value_size> & operator*=(const Int<value_size> & p2)
- {
- Mul(p2);
- return *this;
- }
- Int<value_size> operator/(const Int<value_size> & p2) const
- {
- Int<value_size> temp(*this);
- temp.Div(p2);
- return temp;
- }
- Int<value_size> & operator/=(const Int<value_size> & p2)
- {
- Div(p2);
- return *this;
- }
- Int<value_size> operator%(const Int<value_size> & p2) const
- {
- Int<value_size> temp(*this);
- Int<value_size> remainder;
- temp.Div(p2, remainder);
- return remainder;
- }
- Int<value_size> & operator%=(const Int<value_size> & p2)
- {
- Int<value_size> remainder;
- Div(p2, remainder);
- operator=(remainder);
- return *this;
- }
- /*!
- Prefix operator e.g. ++variable
- */
- UInt<value_size> & operator++()
- {
- AddOne();
- return *this;
- }
- /*!
- Postfix operator e.g. variable++
- */
- UInt<value_size> operator++(int)
- {
- UInt<value_size> temp( *this );
- AddOne();
- return temp;
- }
- UInt<value_size> & operator--()
- {
- SubOne();
- return *this;
- }
- UInt<value_size> operator--(int)
- {
- UInt<value_size> temp( *this );
- SubOne();
- return temp;
- }
- /*!
- *
- * input/output operators for standard streams
- *
- */
- /*!
- an auxiliary method for outputing to standard streams
- */
- template<class ostream_type, class string_type>
- static ostream_type & OutputToStream(ostream_type & s, const Int<value_size> & l)
- {
- string_type ss;
- l.ToString(ss);
- s << ss;
- return s;
- }
- /*!
- output to standard streams
- */
- friend std::ostream & operator<<(std::ostream & s, const Int<value_size> & l)
- {
- return OutputToStream<std::ostream, std::string>(s, l);
- }
- /*!
- output to standard streams
- */
- friend std::wostream & operator<<(std::wostream & s, const Int<value_size> & l)
- {
- return OutputToStream<std::wostream, std::wstring>(s, l);
- }
- /*!
- an auxiliary method for converting from a string
- */
- template<class istream_type, class string_type, class char_type>
- static istream_type & InputFromStream(istream_type & s, Int<value_size> & l)
- {
- string_type ss;
- // char or wchar_t for operator>>
- char_type z;
- // operator>> omits white characters if they're set for ommiting
- s >> z;
- if( z=='-' || z=='+' )
- {
- ss += z;
- s >> z; // we're reading a next character (white characters can be ommited)
- }
- // we're reading only digits (base=10)
- while( s.good() && Misc::CharToDigit(z, 10)>=0 )
- {
- ss += z;
- z = static_cast<char_type>(s.get());
- }
- // we're leaving the last readed character
- // (it's not belonging to the value)
- s.unget();
- l.FromString(ss);
- return s;
- }
- /*!
- input from standard streams
- */
- friend std::istream & operator>>(std::istream & s, Int<value_size> & l)
- {
- return InputFromStream<std::istream, std::string, char>(s, l);
- }
- /*!
- input from standard streams
- */
- friend std::wistream & operator>>(std::wistream & s, Int<value_size> & l)
- {
- return InputFromStream<std::wistream, std::wstring, wchar_t>(s, l);
- }
-} // namespace
diff --git a/ttmath/ttmathmisc.h b/ttmath/ttmathmisc.h
deleted file mode 100644
index 330a43a..0000000
--- a/ttmath/ttmathmisc.h
+++ /dev/null
@@ -1,250 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2010, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathmisc
-#define headerfilettmathmisc
- \file ttmathmisc.h
- \brief some helpful functions
-#include <string>
-namespace ttmath
- some helpful functions
-class Misc
- *
- * AssignString(result, str)
- * result = str
- *
- */
- result = str
-static void AssignString(std::string & result, const char * str)
- result = str;
- result = str
-static void AssignString(std::wstring & result, const char * str)
- result.clear();
- for( ; *str ; ++str )
- result += *str;
- result = str
-static void AssignString(std::wstring & result, const std::string & str)
- return AssignString(result, str.c_str());
- result = str
-static void AssignString(std::string & result, const wchar_t * str)
- result.clear();
- for( ; *str ; ++str )
- result += static_cast<char>(*str);
- result = str
-static void AssignString(std::string & result, const std::wstring & str)
- return AssignString(result, str.c_str());
- *
- * AddString(result, str)
- * result += str
- *
- */
- result += str
-static void AddString(std::string & result, const char * str)
- result += str;
- result += str
-static void AddString(std::wstring & result, const char * str)
- for( ; *str ; ++str )
- result += *str;
- this method omits any white characters from the string
- char_type is char or wchar_t
-template<class char_type>
-static void SkipWhiteCharacters(const char_type * & c)
- // 13 is at the end in a DOS text file (\r\n)
- while( (*c==' ' ) || (*c=='\t') || (*c==13 ) || (*c=='\n') )
- ++c;
- this static method converts one character into its value
- for example:
- 1 -> 1
- 8 -> 8
- A -> 10
- f -> 15
- this method don't check whether c is correct or not
-static uint CharToDigit(uint c)
- if(c>='0' && c<='9')
- return c-'0';
- if(c>='a' && c<='z')
- return c-'a'+10;
-return c-'A'+10;
- this method changes a character 'c' into its value
- (if there can't be a correct value it returns -1)
- for example:
- c=2, base=10 -> function returns 2
- c=A, base=10 -> function returns -1
- c=A, base=16 -> function returns 10
-static sint CharToDigit(uint c, uint base)
- if( c>='0' && c<='9' )
- c=c-'0';
- else
- if( c>='a' && c<='z' )
- c=c-'a'+10;
- else
- if( c>='A' && c<='Z' )
- c=c-'A'+10;
- else
- return -1;
- if( c >= base )
- return -1;
-return sint(c);
- this method converts a digit into a char
- digit should be from <0,F>
- (we don't have to get a base)
- for example:
- 1 -> 1
- 8 -> 8
- 10 -> A
- 15 -> F
-static uint DigitToChar(uint digit)
- if( digit < 10 )
- return digit + '0';
-return digit - 10 + 'A';
-}; // struct Misc
diff --git a/ttmath/ttmathobjects.h b/ttmath/ttmathobjects.h
deleted file mode 100644
index c35026b..0000000
--- a/ttmath/ttmathobjects.h
+++ /dev/null
@@ -1,809 +0,0 @@
- * This file is a part of TTMath Mathematical Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2010, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathobject
-#define headerfilettmathobject
- \file ttmathobjects.h
- \brief Mathematic functions.
-#include <string>
-#include <vector>
-#include <list>
-#include <map>
-#include "ttmathtypes.h"
-#include "ttmathmisc.h"
-namespace ttmath
- objects of this class are used with the mathematical parser
- they hold variables or functions defined by a user
- each object has its own table in which we're keeping variables or functions
-class Objects
- /*!
- one item (variable or function)
- 'items' will be on the table
- */
- struct Item
- {
- // name of a variable of a function
- // internally we store variables and funcions as std::string (not std::wstring even when wide characters are used)
- std::string value;
- // number of parameters required by the function
- // (if there's a variable this 'param' is ignored)
- int param;
- Item() {}
- Item(const std::string & v, int p) : value(v), param(p) {}
- };
- // 'Table' is the type of our table
- typedef std::map<std::string, Item> Table;
- typedef Table::iterator Iterator;
- typedef Table::const_iterator CIterator;
- /*!
- this method returns true if a character 'c' is a character
- which can be in a name
- if 'can_be_digit' is true that means when the 'c' is a digit this
- method returns true otherwise it returns false
- */
- static bool CorrectCharacter(int c, bool can_be_digit)
- {
- if( (c>='a' && c<='z') || (c>='A' && c<='Z') )
- return true;
- if( can_be_digit && ((c>='0' && c<='9') || c=='_') )
- return true;
- return false;
- }
- /*!
- this method returns true if the name can be as a name of an object
- */
- template<class string_type>
- static bool IsNameCorrect(const string_type & name)
- {
- if( name.empty() )
- return false;
- if( !CorrectCharacter(name[0], false) )
- return false;
- typename string_type::const_iterator i = name.begin();
- for(++i ; i!=name.end() ; ++i)
- if( !CorrectCharacter(*i, true) )
- return false;
- return true;
- }
- /*!
- this method returns true if such an object is defined (name exists)
- */
- bool IsDefined(const std::string & name)
- {
- Iterator i = table.find(name);
- if( i != table.end() )
- // we have this object in our table
- return true;
- return false;
- }
- /*!
- this method returns true if such an object is defined (name exists)
- */
- bool IsDefined(const std::wstring & name)
- {
- // we should check whether the name (in wide characters) are correct
- // before calling AssignString() function
- if( !IsNameCorrect(name) )
- return false;
- Misc::AssignString(str_tmp1, name);
- return IsDefined(str_tmp1);
- }
- /*!
- this method adds one object (variable of function) into the table
- */
- ErrorCode Add(const std::string & name, const std::string & value, int param = 0)
- {
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- Iterator i = table.find(name);
- if( i != table.end() )
- // we have this object in our table
- return err_object_exists;
- table.insert( std::make_pair(name, Item(value, param)) );
- return err_ok;
- }
- /*!
- this method adds one object (variable of function) into the table
- */
- ErrorCode Add(const std::wstring & name, const std::wstring & value, int param = 0)
- {
- // we should check whether the name (in wide characters) are correct
- // before calling AssignString() function
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- Misc::AssignString(str_tmp1, name);
- Misc::AssignString(str_tmp2, value);
- return Add(str_tmp1, str_tmp2, param);
- }
- /*!
- this method returns 'true' if the table is empty
- */
- bool Empty() const
- {
- return table.empty();
- }
- /*!
- this method clears the table
- */
- void Clear()
- {
- return table.clear();
- }
- /*!
- this method returns 'const_iterator' on the first item on the table
- */
- CIterator Begin() const
- {
- return table.begin();
- }
- /*!
- this method returns 'const_iterator' pointing at the space after last item
- (returns table.end())
- */
- CIterator End() const
- {
- return table.end();
- }
- /*!
- this method changes the value and the number of parameters for a specific object
- */
- ErrorCode EditValue(const std::string & name, const std::string & value, int param = 0)
- {
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- Iterator i = table.find(name);
- if( i == table.end() )
- return err_unknown_object;
- i->second.value = value;
- i->second.param = param;
- return err_ok;
- }
- /*!
- this method changes the value and the number of parameters for a specific object
- */
- ErrorCode EditValue(const std::wstring & name, const std::wstring & value, int param = 0)
- {
- // we should check whether the name (in wide characters) are correct
- // before calling AssignString() function
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- Misc::AssignString(str_tmp1, name);
- Misc::AssignString(str_tmp2, value);
- return EditValue(str_tmp1, str_tmp2, param);
- }
- /*!
- this method changes the name of a specific object
- */
- ErrorCode EditName(const std::string & old_name, const std::string & new_name)
- {
- if( !IsNameCorrect(old_name) || !IsNameCorrect(new_name) )
- return err_incorrect_name;
- Iterator old_i = table.find(old_name);
- if( old_i == table.end() )
- return err_unknown_object;
- if( old_name == new_name )
- // the new name is the same as the old one
- // we treat it as a normal situation
- return err_ok;
- ErrorCode err = Add(new_name, old_i->second.value, old_i->second.param);
- if( err == err_ok )
- {
- old_i = table.find(old_name);
- TTMATH_ASSERT( old_i != table.end() )
- table.erase(old_i);
- }
- return err;
- }
- /*!
- this method changes the name of a specific object
- */
- ErrorCode EditName(const std::wstring & old_name, const std::wstring & new_name)
- {
- // we should check whether the name (in wide characters) are correct
- // before calling AssignString() function
- if( !IsNameCorrect(old_name) || !IsNameCorrect(new_name) )
- return err_incorrect_name;
- Misc::AssignString(str_tmp1, old_name);
- Misc::AssignString(str_tmp2, new_name);
- return EditName(str_tmp1, str_tmp2);
- }
- /*!
- this method deletes an object
- */
- ErrorCode Delete(const std::string & name)
- {
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- Iterator i = table.find(name);
- if( i == table.end() )
- return err_unknown_object;
- table.erase( i );
- return err_ok;
- }
- /*!
- this method deletes an object
- */
- ErrorCode Delete(const std::wstring & name)
- {
- // we should check whether the name (in wide characters) are correct
- // before calling AssignString() function
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- Misc::AssignString(str_tmp1, name);
- return Delete(str_tmp1);
- }
- /*!
- this method gets the value of a specific object
- */
- ErrorCode GetValue(const std::string & name, std::string & value) const
- {
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- CIterator i = table.find(name);
- if( i == table.end() )
- {
- value.clear();
- return err_unknown_object;
- }
- value = i->second.value;
- return err_ok;
- }
- /*!
- this method gets the value of a specific object
- */
- ErrorCode GetValue(const std::wstring & name, std::wstring & value)
- {
- // we should check whether the name (in wide characters) are correct
- // before calling AssignString() function
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- Misc::AssignString(str_tmp1, name);
- ErrorCode err = GetValue(str_tmp1, str_tmp2);
- Misc::AssignString(value, str_tmp2);
- return err;
- }
- /*!
- this method gets the value of a specific object
- (this version is used for not copying the whole string)
- */
- ErrorCode GetValue(const std::string & name, const char ** value) const
- {
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- CIterator i = table.find(name);
- if( i == table.end() )
- {
- *value = 0;
- return err_unknown_object;
- }
- *value = i->second.value.c_str();
- return err_ok;
- }
- /*!
- this method gets the value of a specific object
- (this version is used for not copying the whole string)
- */
- ErrorCode GetValue(const std::wstring & name, const char ** value)
- {
- // we should check whether the name (in wide characters) are correct
- // before calling AssignString() function
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- Misc::AssignString(str_tmp1, name);
- return GetValue(str_tmp1, value);
- }
- /*!
- this method gets the value and the number of parameters
- of a specific object
- */
- ErrorCode GetValueAndParam(const std::string & name, std::string & value, int * param) const
- {
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- CIterator i = table.find(name);
- if( i == table.end() )
- {
- value.empty();
- *param = 0;
- return err_unknown_object;
- }
- value = i->second.value;
- *param = i->second.param;
- return err_ok;
- }
- /*!
- this method gets the value and the number of parameters
- of a specific object
- */
- ErrorCode GetValueAndParam(const std::wstring & name, std::wstring & value, int * param)
- {
- // we should check whether the name (in wide characters) are correct
- // before calling AssignString() function
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- Misc::AssignString(str_tmp1, name);
- ErrorCode err = GetValueAndParam(str_tmp1, str_tmp2, param);
- Misc::AssignString(value, str_tmp2);
- return err;
- }
- /*!
- this method sets the value and the number of parameters
- of a specific object
- (this version is used for not copying the whole string)
- */
- ErrorCode GetValueAndParam(const std::string & name, const char ** value, int * param) const
- {
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- CIterator i = table.find(name);
- if( i == table.end() )
- {
- *value = 0;
- *param = 0;
- return err_unknown_object;
- }
- *value = i->second.value.c_str();
- *param = i->second.param;
- return err_ok;
- }
- /*!
- this method sets the value and the number of parameters
- of a specific object
- (this version is used for not copying the whole string
- but in fact we make one copying during AssignString())
- */
- ErrorCode GetValueAndParam(const std::wstring & name, const char ** value, int * param)
- {
- // we should check whether the name (in wide characters) are correct
- // before calling AssignString() function
- if( !IsNameCorrect(name) )
- return err_incorrect_name;
- Misc::AssignString(str_tmp1, name);
- return GetValueAndParam(str_tmp1, value, param);
- }
- /*!
- this method returns a pointer into the table
- */
- Table * GetTable()
- {
- return &table;
- }
- Table table;
- std::string str_tmp1, str_tmp2;
-}; // end of class Objects
- objects of the class History are used to keep values in functions
- which take a lot of time during calculating, for instance in the
- function Factorial(x)
- it means that when we're calculating e.g. Factorial(1000) and the
- Factorial finds that we have calculated it before, the value (result)
- is taken from the history
-template<class ValueType>
-class History
- /*!
- one item in the History's object holds a key, a value for the key
- and a corresponding error code
- */
- struct Item
- {
- ValueType key, value;
- ErrorCode err;
- };
- /*!
- we use std::list for simply deleting the first item
- but because we're searching through the whole container
- (in the method Get) the container should not be too big
- (linear time of searching)
- */
- typedef std::list<Item> buffer_type;
- buffer_type buffer;
- typename buffer_type::size_type buffer_max_size;
- /*!
- default constructor
- default max size of the History's container is 15 items
- */
- History()
- {
- buffer_max_size = 15;
- }
- /*!
- a constructor which takes another value of the max size
- of the History's container
- */
- History(typename buffer_type::size_type new_size)
- {
- buffer_max_size = new_size;
- }
- /*!
- this method adds one item into the History
- if the size of the container is greater than buffer_max_size
- the first item will be removed
- */
- void Add(const ValueType & key, const ValueType & value, ErrorCode err)
- {
- Item item;
- item.key = key;
- item.value = value;
- item.err = err;
- buffer.insert( buffer.end(), item );
- if( buffer.size() > buffer_max_size )
- buffer.erase(buffer.begin());
- }
- /*!
- this method checks whether we have an item which has the key equal 'key'
- if there's such item the method sets the 'value' and the 'err'
- and returns true otherwise it returns false and 'value' and 'err'
- remain unchanged
- */
- bool Get(const ValueType & key, ValueType & value, ErrorCode & err)
- {
- typename buffer_type::iterator i = buffer.begin();
- for( ; i != buffer.end() ; ++i )
- {
- if( i->key == key )
- {
- value = i->value;
- err = i->err;
- return true;
- }
- }
- return false;
- }
- /*!
- this methods deletes an item
- we assume that there is only one item with the 'key'
- (this methods removes the first one)
- */
- bool Remove(const ValueType & key)
- {
- typename buffer_type::iterator i = buffer.begin();
- for( ; i != buffer.end() ; ++i )
- {
- if( i->key == key )
- {
- buffer.erase(i);
- return true;
- }
- }
- return false;
- }
-}; // end of class History
- this is an auxiliary class used when calculating Gamma() or Factorial()
- in multithreaded environment you can provide an object of this class to
- the Gamma() or Factorial() function, e.g;
- typedef Big<1, 3> MyBig;
- MyBig x = 123456;
- CGamma<MyBig> cgamma;
- std::cout << Gamma(x, cgamma);
- each thread should have its own CGamma<> object
- in a single-thread environment a CGamma<> object is a static variable
- in a second version of Gamma() and you don't have to explicitly use it, e.g.
- typedef Big<1, 3> MyBig;
- MyBig x = 123456;
- std::cout << Gamma(x);
-template<class ValueType>
-struct CGamma
- /*!
- this table holds factorials
- 1
- 1
- 2
- 6
- 24
- 120
- 720
- .......
- */
- std::vector<ValueType> fact;
- /*!
- this table holds Bernoulli numbers
- 1
- -0.5
- 0.166666666666666666666666667
- 0
- -0.0333333333333333333333333333
- 0
- 0.0238095238095238095238095238
- 0
- -0.0333333333333333333333333333
- 0
- 0.075757575757575757575757576
- .....
- */
- std::vector<ValueType> bern;
- /*!
- here we store some calculated values
- (this is for speeding up, if the next argument of Gamma() or Factorial()
- is in the 'history' then the result we are not calculating but simply
- return from the 'history' object)
- */
- History<ValueType> history;
- /*!
- this method prepares some coefficients: factorials and Bernoulli numbers
- stored in 'fact' and 'bern' objects
- how many values should be depends on the size of the mantissa - if
- the mantissa is larger then we must calculate more values
- for a mantissa which consists of 256 bits (8 words on a 32bit platform)
- we have to calculate about 30 values (the size of fact and bern will be 30),
- and for a 2048 bits mantissa we have to calculate 306 coefficients
- you don't have to call this method, these coefficients will be automatically calculated
- when they are needed
- you must note that calculating these coefficients is a little time-consuming operation,
- (especially when the mantissa is large) and first call to Gamma() or Factorial()
- can take more time than next calls, and in the end this is the point when InitAll()
- comes in handy: you can call this method somewhere at the beginning of your program
- */
- void InitAll();
- // definition is in ttmath.h
-} // namespace
diff --git a/ttmath/ttmathparser.h b/ttmath/ttmathparser.h
deleted file mode 100644
index ce07120..0000000
--- a/ttmath/ttmathparser.h
+++ /dev/null
@@ -1,2777 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2010, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathparser
-#define headerfilettmathparser
- \file ttmathparser.h
- \brief A mathematical parser
-#include <cstdio>
-#include <vector>
-#include <map>
-#include <set>
-#include "ttmath.h"
-#include "ttmathobjects.h"
-#include "ttmathmisc.h"
-namespace ttmath
- \brief Mathematical parser
- let x will be an input string meaning an expression for converting:
- x = [+|-]Value[operator[+|-]Value][operator[+|-]Value]...
- where:
- an operator can be:
- ^ (pow) (the heighest priority)
- * (mul) (or multiplication without an operator -- short mul)
- / (div) (* and / have the same priority)
- + (add)
- - (sub) (+ and - have the same priority)
- < (lower than)
- > (greater than)
- <= (lower or equal than)
- >= (greater or equal than)
- == (equal)
- != (not equal) (all above logical operators have the same priority)
- && (logical and)
- || (logical or) (the lowest priority)
- short mul:
- if the second Value (Var below) is either a variable or function there might not be
- an operator between them, e.g.
- "[+|-]Value Var" is treated as "[+|-]Value * Var" and the multiplication
- has the same priority as a normal multiplication:
- 4x = 4 * x
- 2^3m = (2^3)* m
- 6h^3 = 6 * (h^3)
- 2sin(pi) = 2 * sin(pi)
- etc.
- Value can be:
- constant e.g. 100, can be preceded by operators for changing the base (radix): [#|&]
- # - hex
- & - bin
- sample: #10 = 16
- &10 = 2
- variable e.g. pi
- another expression between brackets e.g (x)
- function e.g. sin(x)
- for example a correct input string can be:
- "1"
- "2.1234"
- "2,1234" (they are the same, by default we can either use a comma or a dot)
- "1 + 2"
- "(1 + 2) * 3"
- "pi"
- "sin(pi)"
- "(1+2)*(2+3)"
- "log(2;1234)" there's a semicolon here (not a comma), we use it in functions
- for separating parameters
- "1 < 2" (the result will be: 1)
- "4 < 3" (the result will be: 0)
- "2+x" (of course if the variable 'x' is defined)
- "4x+10"
- "#20+10" = 32 + 10 = 42
- "10 ^ -&101" = 10 ^ -5 = 0.00001
- "8 * -&10" = 8 * -2 = -16
- etc.
- we can also use a semicolon for separating any 'x' input strings
- for example:
- "1+2;4+5"
- the result will be on the stack as follows:
- stack[0].value=3
- stack[1].value=9
-template<class ValueType>
-class Parser
- there are 5 mathematical operators as follows (with their standard priorities):
- add (+)
- sub (-)
- mul (*)
- div (/)
- pow (^)
- and 'shortmul' used when there is no any operators between
- a first parameter and a variable or function
- (the 'shortmul' has the same priority as the normal multiplication )
- class MatOperator
- {
- public:
- enum Type
- {
- none,add,sub,mul,div,pow,lt,gt,let,get,eq,neq,lor,land,shortmul
- };
- enum Assoc
- {
- right, // right-associative
- non_right // associative or left-associative
- };
- Type GetType() const { return type; }
- int GetPriority() const { return priority; }
- Assoc GetAssoc() const { return assoc; }
- void SetType(Type t)
- {
- type = t;
- assoc = non_right;
- switch( type )
- {
- case lor:
- priority = 4;
- break;
- case land:
- priority = 5;
- break;
- case eq:
- case neq:
- case lt:
- case gt:
- case let:
- case get:
- priority = 7;
- break;
- case add:
- case sub:
- priority = 10;
- break;
- case mul:
- case shortmul:
- case div:
- priority = 12;
- break;
- case pow:
- priority = 14;
- assoc = right;
- break;
- default:
- Error( err_internal_error );
- break;
- }
- }
- MatOperator(): type(none), priority(0), assoc(non_right)
- {
- }
- private:
- Type type;
- int priority;
- Assoc assoc;
- }; // end of MatOperator class
- /*!
- Objects of type 'Item' we are keeping on our stack
- */
- struct Item
- {
- enum Type
- {
- none, numerical_value, mat_operator, first_bracket,
- last_bracket, variable, semicolon
- };
- // The kind of type which we're keeping
- Type type;
- // if type == numerical_value
- ValueType value;
- // if type == mat_operator
- MatOperator moperator;
- /*
- if type == first_bracket
- if 'function' is set to true it means that the first recognized bracket
- was the bracket from function in other words we must call a function when
- we'll find the 'last' bracket
- */
- bool function;
- // if function is true
- std::string function_name;
- /*
- the sign of value
- it can be for type==numerical_value or type==first_bracket
- when it's true it means e.g. that value is equal -value
- */
- bool sign;
- Item(): type(none), function(false), sign(false)
- {
- }
- }; // end of Item struct
- stack on which we're keeping the Items
- at the end of parsing we'll have the result here
- the result don't have to be one value, it can be
- more than one if we have used a semicolon in the global space
- e.g. such input string "1+2;3+4" will generate a result:
- stack[0].value=3
- stack[1].value=7
- you should check if the stack is not empty, because if there was
- a syntax error in the input string then we do not have any results
- on the stack
-std::vector<Item> stack;
- size of the stack when we're starting parsing of the string
- if it's to small while parsing the stack will be automatically resized
-const int default_stack_size;
- index of an object in our stack
- it's pointing on the place behind the last element
- for example at the beginning of parsing its value is zero
-unsigned int stack_index;
- code of the last error
-ErrorCode error;
- pointer to the currently reading char
- when an error has occured it may be used to count the index of the wrong character
-const char * pstring;
- the base (radix) of the mathematic system (for example it may be '10')
-int base;
- the unit of angles used in: sin,cos,tan,cot,asin,acos,atan,acot
- 0 - deg
- 1 - rad (default)
- 2 - grad
-int deg_rad_grad;
- a pointer to an object which tell us whether we should stop calculating or not
-const volatile StopCalculating * pstop_calculating;
- a pointer to the user-defined variables' table
-const Objects * puser_variables;
- a pointer to the user-defined functions' table
-const Objects * puser_functions;
-typedef std::map<std::string, ValueType> FunctionLocalVariables;
- a pointer to the local variables of a function
-const FunctionLocalVariables * pfunction_local_variables;
- a temporary set using during parsing user defined variables
-std::set<std::string> visited_variables;
- a temporary set using during parsing user defined functions
-std::set<std::string> visited_functions;
- pfunction is the type of pointer to a mathematic function
- these mathematic functions are private members of this class,
- they are the wrappers for standard mathematics function
- 'pstack' is the pointer to the first argument on our stack
- 'amount_of_arg' tell us how many argument there are in our stack
- 'result' is the reference for result of function
-typedef void (Parser<ValueType>::*pfunction)(int pstack, int amount_of_arg, ValueType & result);
- pfunction is the type of pointer to a method which returns value of variable
-typedef void (ValueType::*pfunction_var)();
- table of mathematic functions
- this map consists of:
- std::string - function's name
- pfunction - pointer to specific function
-typedef std::map<std::string, pfunction> FunctionsTable;
-FunctionsTable functions_table;
- table of mathematic operators
- this map consists of:
- std::string - operators's name
- MatOperator::Type - type of the operator
-typedef std::map<std::string, typename MatOperator::Type> OperatorsTable;
-OperatorsTable operators_table;
- table of mathematic variables
- this map consists of:
- std::string - variable's name
- pfunction_var - pointer to specific function which returns value of variable
-typedef std::map<std::string, pfunction_var> VariablesTable;
-VariablesTable variables_table;
- some coefficients used when calculating the gamma (or factorial) function
-CGamma<ValueType> cgamma;
- temporary object for a whole string when Parse(std::wstring) is used
-std::string wide_to_ansi;
- group character (used when parsing)
- default zero (not used)
-int group;
- characters used as a comma
- default: '.' and ','
- comma2 can be zero (it means it is not used)
-int comma, comma2;
- an additional character used as a separator between function parameters
- (semicolon is used always)
-int param_sep;
- true if something was calculated (at least one mathematical operator was used or a function or a variable)
-bool calculated;
- we're using this method for reporting an error
-static void Error(ErrorCode code)
- throw code;
- this method skips the white character from the string
- it's moving the 'pstring' to the first no-white character
-void SkipWhiteCharacters()
- while( (*pstring==' ' ) || (*pstring=='\t') )
- ++pstring;
- an auxiliary method for RecurrenceParsingVariablesOrFunction(...)
-void RecurrenceParsingVariablesOrFunction_CheckStopCondition(bool variable, const std::string & name)
- if( variable )
- {
- if( visited_variables.find(name) != visited_variables.end() )
- Error( err_variable_loop );
- }
- else
- {
- if( visited_functions.find(name) != visited_functions.end() )
- Error( err_functions_loop );
- }
- an auxiliary method for RecurrenceParsingVariablesOrFunction(...)
-void RecurrenceParsingVariablesOrFunction_AddName(bool variable, const std::string & name)
- if( variable )
- visited_variables.insert( name );
- else
- visited_functions.insert( name );
- an auxiliary method for RecurrenceParsingVariablesOrFunction(...)
-void RecurrenceParsingVariablesOrFunction_DeleteName(bool variable, const std::string & name)
- if( variable )
- visited_variables.erase( name );
- else
- visited_functions.erase( name );
- this method returns the value of a variable or function
- by creating a new instance of the mathematical parser
- and making the standard parsing algorithm on the given string
- this method is used only during parsing user defined variables or functions
- (there can be a recurrence here therefore we're using 'visited_variables'
- and 'visited_functions' sets to make a stop condition)
-ValueType RecurrenceParsingVariablesOrFunction(bool variable, const std::string & name, const char * new_string,
- FunctionLocalVariables * local_variables = 0)
- RecurrenceParsingVariablesOrFunction_CheckStopCondition(variable, name);
- RecurrenceParsingVariablesOrFunction_AddName(variable, name);
- Parser<ValueType> NewParser(*this);
- ErrorCode err;
- NewParser.pfunction_local_variables = local_variables;
- try
- {
- err = NewParser.Parse(new_string);
- }
- catch(...)
- {
- RecurrenceParsingVariablesOrFunction_DeleteName(variable, name);
- throw;
- }
- RecurrenceParsingVariablesOrFunction_DeleteName(variable, name);
- if( err != err_ok )
- Error( err );
- if( NewParser.stack.size() != 1 )
- Error( err_must_be_only_one_value );
- if( NewParser.stack[0].type != Item::numerical_value )
- // I think there shouldn't be this error here
- Error( err_incorrect_value );
-return NewParser.stack[0].value;
- this method returns the user-defined value of a variable
-bool GetValueOfUserDefinedVariable(const std::string & variable_name,ValueType & result)
- if( !puser_variables )
- return false;
- const char * string_value;
- if( puser_variables->GetValue(variable_name, &string_value) != err_ok )
- return false;
- result = RecurrenceParsingVariablesOrFunction(true, variable_name, string_value);
- calculated = true;
-return true;
- this method returns the value of a local variable of a function
-bool GetValueOfFunctionLocalVariable(const std::string & variable_name, ValueType & result)
- if( !pfunction_local_variables )
- return false;
- typename FunctionLocalVariables::const_iterator i = pfunction_local_variables->find(variable_name);
- if( i == pfunction_local_variables->end() )
- return false;
- result = i->second;
-return true;
- this method returns the value of a variable from variables' table
- we make an object of type ValueType then call a method which
- sets the correct value in it and finally we'll return the object
-ValueType GetValueOfVariable(const std::string & variable_name)
-ValueType result;
- if( GetValueOfFunctionLocalVariable(variable_name, result) )
- return result;
- if( GetValueOfUserDefinedVariable(variable_name, result) )
- return result;
- typename std::map<std::string, pfunction_var>::iterator i =
- variables_table.find(variable_name);
- if( i == variables_table.end() )
- Error( err_unknown_variable );
- (result.*(i->second))();
- calculated = true;
-return result;
- wrappers for mathematic functions
- 'sindex' is pointing on the first argument on our stack
- (the second argument has 'sindex+2'
- because 'sindex+1' is guaranted for the 'semicolon' operator)
- the third artument has of course 'sindex+4' etc.
- 'result' will be the result of the function
- (we're using exceptions here for example when function gets an improper argument)
- used by: sin,cos,tan,cot
-ValueType ConvertAngleToRad(const ValueType & input)
- if( deg_rad_grad == 1 ) // rad
- return input;
- ValueType result;
- ErrorCode err;
- if( deg_rad_grad == 0 ) // deg
- result = ttmath::DegToRad(input, &err);
- else // grad
- result = ttmath::GradToRad(input, &err);
- if( err != err_ok )
- Error( err );
-return result;
- used by: asin,acos,atan,acot
-ValueType ConvertRadToAngle(const ValueType & input)
- if( deg_rad_grad == 1 ) // rad
- return input;
- ValueType result;
- ErrorCode err;
- if( deg_rad_grad == 0 ) // deg
- result = ttmath::RadToDeg(input, &err);
- else // grad
- result = ttmath::RadToGrad(input, &err);
- if( err != err_ok )
- Error( err );
-return result;
-void Gamma(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Gamma(stack[sindex].value, cgamma, &err, pstop_calculating);
- if(err != err_ok)
- Error( err );
- factorial
- result = 1 * 2 * 3 * 4 * .... * x
-void Factorial(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Factorial(stack[sindex].value, cgamma, &err, pstop_calculating);
- if(err != err_ok)
- Error( err );
-void Abs(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = ttmath::Abs(stack[sindex].value);
-void Sin(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Sin( ConvertAngleToRad(stack[sindex].value), &err );
- if(err != err_ok)
- Error( err );
-void Cos(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Cos( ConvertAngleToRad(stack[sindex].value), &err );
- if(err != err_ok)
- Error( err );
-void Tan(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Tan(ConvertAngleToRad(stack[sindex].value), &err);
- if(err != err_ok)
- Error( err );
-void Cot(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Cot(ConvertAngleToRad(stack[sindex].value), &err);
- if(err != err_ok)
- Error( err );
-void Int(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = ttmath::SkipFraction(stack[sindex].value);
-void Round(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = stack[sindex].value;
- if( result.Round() )
- Error( err_overflow );
-void Ln(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Ln(stack[sindex].value, &err);
- if(err != err_ok)
- Error( err );
-void Log(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 2 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Log(stack[sindex].value, stack[sindex+2].value, &err);
- if(err != err_ok)
- Error( err );
-void Exp(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Exp(stack[sindex].value, &err);
- if(err != err_ok)
- Error( err );
-void Max(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args == 0 )
- {
- result.SetMax();
- return;
- }
- result = stack[sindex].value;
- for(int i=1 ; i<amount_of_args ; ++i)
- {
- if( result < stack[sindex + i*2].value )
- result = stack[sindex + i*2].value;
- }
-void Min(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args == 0 )
- {
- result.SetMin();
- return;
- }
- result = stack[sindex].value;
- for(int i=1 ; i<amount_of_args ; ++i)
- {
- if( result > stack[sindex + i*2].value )
- result = stack[sindex + i*2].value;
- }
-void ASin(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- ValueType temp = ttmath::ASin(stack[sindex].value, &err);
- if(err != err_ok)
- Error( err );
- result = ConvertRadToAngle(temp);
-void ACos(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- ValueType temp = ttmath::ACos(stack[sindex].value, &err);
- if(err != err_ok)
- Error( err );
- result = ConvertRadToAngle(temp);
-void ATan(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = ConvertRadToAngle(ttmath::ATan(stack[sindex].value));
-void ACot(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = ConvertRadToAngle(ttmath::ACot(stack[sindex].value));
-void Sgn(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = ttmath::Sgn(stack[sindex].value);
-void Mod(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 2 )
- Error( err_improper_amount_of_arguments );
- if( stack[sindex+2].value.IsZero() )
- Error( err_improper_argument );
- result = stack[sindex].value;
- uint c = result.Mod(stack[sindex+2].value);
- if( c )
- Error( err_overflow );
-void If(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 3 )
- Error( err_improper_amount_of_arguments );
- if( !stack[sindex].value.IsZero() )
- result = stack[sindex+2].value;
- else
- result = stack[sindex+4].value;
-void Or(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args < 2 )
- Error( err_improper_amount_of_arguments );
- for(int i=0 ; i<amount_of_args ; ++i)
- {
- if( !stack[sindex+i*2].value.IsZero() )
- {
- result.SetOne();
- return;
- }
- }
- result.SetZero();
-void And(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args < 2 )
- Error( err_improper_amount_of_arguments );
- for(int i=0 ; i<amount_of_args ; ++i)
- {
- if( stack[sindex+i*2].value.IsZero() )
- {
- result.SetZero();
- return;
- }
- }
- result.SetOne();
-void Not(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- if( stack[sindex].value.IsZero() )
- result.SetOne();
- else
- result.SetZero();
-void DegToRad(int sindex, int amount_of_args, ValueType & result)
- ErrorCode err = err_ok;
- if( amount_of_args == 1 )
- {
- result = ttmath::DegToRad(stack[sindex].value, &err);
- }
- else
- if( amount_of_args == 3 )
- {
- result = ttmath::DegToRad( stack[sindex].value, stack[sindex+2].value,
- stack[sindex+4].value, &err);
- }
- else
- Error( err_improper_amount_of_arguments );
- if( err != err_ok )
- Error( err );
-void RadToDeg(int sindex, int amount_of_args, ValueType & result)
- ErrorCode err;
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = ttmath::RadToDeg(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void DegToDeg(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 3 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::DegToDeg( stack[sindex].value, stack[sindex+2].value,
- stack[sindex+4].value, &err);
- if( err != err_ok )
- Error( err );
-void GradToRad(int sindex, int amount_of_args, ValueType & result)
- ErrorCode err;
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = ttmath::GradToRad(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void RadToGrad(int sindex, int amount_of_args, ValueType & result)
- ErrorCode err;
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = ttmath::RadToGrad(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void DegToGrad(int sindex, int amount_of_args, ValueType & result)
- ErrorCode err = err_ok;
- if( amount_of_args == 1 )
- {
- result = ttmath::DegToGrad(stack[sindex].value, &err);
- }
- else
- if( amount_of_args == 3 )
- {
- result = ttmath::DegToGrad( stack[sindex].value, stack[sindex+2].value,
- stack[sindex+4].value, &err);
- }
- else
- Error( err_improper_amount_of_arguments );
- if( err != err_ok )
- Error( err );
-void GradToDeg(int sindex, int amount_of_args, ValueType & result)
- ErrorCode err;
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = ttmath::GradToDeg(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void Ceil(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Ceil(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void Floor(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Floor(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void Sqrt(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Sqrt(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void Sinh(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Sinh(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void Cosh(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Cosh(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void Tanh(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Tanh(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void Coth(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Coth(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void Root(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 2 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::Root(stack[sindex].value, stack[sindex+2].value, &err);
- if( err != err_ok )
- Error( err );
-void ASinh(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::ASinh(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void ACosh(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::ACosh(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void ATanh(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::ATanh(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void ACoth(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- ErrorCode err;
- result = ttmath::ACoth(stack[sindex].value, &err);
- if( err != err_ok )
- Error( err );
-void BitAnd(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 2 )
- Error( err_improper_amount_of_arguments );
- uint err;
- result = stack[sindex].value;
- err = result.BitAnd(stack[sindex+2].value);
- switch(err)
- {
- case 1:
- Error( err_overflow );
- break;
- case 2:
- Error( err_improper_argument );
- break;
- }
-void BitOr(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 2 )
- Error( err_improper_amount_of_arguments );
- uint err;
- result = stack[sindex].value;
- err = result.BitOr(stack[sindex+2].value);
- switch(err)
- {
- case 1:
- Error( err_overflow );
- break;
- case 2:
- Error( err_improper_argument );
- break;
- }
-void BitXor(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 2 )
- Error( err_improper_amount_of_arguments );
- uint err;
- result = stack[sindex].value;
- err = result.BitXor(stack[sindex+2].value);
- switch(err)
- {
- case 1:
- Error( err_overflow );
- break;
- case 2:
- Error( err_improper_argument );
- break;
- }
-void Sum(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args == 0 )
- Error( err_improper_amount_of_arguments );
- result = stack[sindex].value;
- for(int i=1 ; i<amount_of_args ; ++i )
- if( result.Add( stack[ sindex + i*2 ].value ) )
- Error( err_overflow );
-void Avg(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args == 0 )
- Error( err_improper_amount_of_arguments );
- result = stack[sindex].value;
- for(int i=1 ; i<amount_of_args ; ++i )
- if( result.Add( stack[ sindex + i*2 ].value ) )
- Error( err_overflow );
- if( result.Div( amount_of_args ) )
- Error( err_overflow );
-void Frac(int sindex, int amount_of_args, ValueType & result)
- if( amount_of_args != 1 )
- Error( err_improper_amount_of_arguments );
- result = stack[sindex].value;
- result.RemainFraction();
- we use such a method because 'wvsprintf' is not everywhere defined
-void Sprintf(char * buffer, int par)
-char buf[30]; // char, not wchar_t
-int i;
- #ifdef _MSC_VER
- #pragma warning( disable: 4996 )
- //warning C4996: 'sprintf': This function or variable may be unsafe.
- #endif
- sprintf(buf, "%d", par);
- for(i=0 ; buf[i] != 0 ; ++i)
- buffer[i] = buf[i];
- buffer[i] = 0;
- #ifdef _MSC_VER
- #pragma warning( default: 4996 )
- #endif
- this method returns the value from a user-defined function
- (look at the description in 'CallFunction(...)')
-bool GetValueOfUserDefinedFunction(const std::string & function_name, int amount_of_args, int sindex)
- if( !puser_functions )
- return false;
- const char * string_value;
- int param;
- if( puser_functions->GetValueAndParam(function_name, &string_value, ¶m) != err_ok )
- return false;
- if( param != amount_of_args )
- Error( err_improper_amount_of_arguments );
- FunctionLocalVariables local_variables;
- if( amount_of_args > 0 )
- {
- char buffer[30];
- // x = x1
- buffer[0] = 'x';
- buffer[1] = 0;
- local_variables.insert( std::make_pair(buffer, stack[sindex].value) );
- for(int i=0 ; i<amount_of_args ; ++i)
- {
- buffer[0] = 'x';
- Sprintf(buffer+1, i+1);
- local_variables.insert( std::make_pair(buffer, stack[sindex + i*2].value) );
- }
- }
- stack[sindex-1].value = RecurrenceParsingVariablesOrFunction(false, function_name, string_value, &local_variables);
- calculated = true;
-return true;
- we're calling a specific function
- function_name - name of the function
- amount_of_args - how many arguments there are on our stack
- (function must check whether this is a correct value or not)
- sindex - index of the first argument on the stack (sindex is greater than zero)
- if there aren't any arguments on the stack 'sindex' pointing on
- a non existend element (after the first bracket)
- result will be stored in 'stack[sindex-1].value'
- (we don't have to set the correct type of this element, it'll be set later)
-void CallFunction(const std::string & function_name, int amount_of_args, int sindex)
- if( GetValueOfUserDefinedFunction(function_name, amount_of_args, sindex) )
- return;
- typename FunctionsTable::iterator i = functions_table.find( function_name );
- if( i == functions_table.end() )
- Error( err_unknown_function );
- /*
- calling the specify function
- */
- (this->*(i->second))(sindex, amount_of_args, stack[sindex-1].value);
- calculated = true;
- inserting a function to the functions' table
- function_name - name of the function
- pf - pointer to the function (to the wrapper)
-void InsertFunctionToTable(const char * function_name, pfunction pf)
- std::string str;
- Misc::AssignString(str, function_name);
- functions_table.insert( std::make_pair(str, pf) );
- inserting a function to the variables' table
- (this function returns value of variable)
- variable_name - name of the function
- pf - pointer to the function
-void InsertVariableToTable(const char * variable_name, pfunction_var pf)
- std::string str;
- Misc::AssignString(str, variable_name);
- variables_table.insert( std::make_pair(str, pf) );
- this method creates the table of functions
-void CreateFunctionsTable()
- InsertFunctionToTable("gamma", &Parser<ValueType>::Gamma);
- InsertFunctionToTable("factorial", &Parser<ValueType>::Factorial);
- InsertFunctionToTable("abs", &Parser<ValueType>::Abs);
- InsertFunctionToTable("sin", &Parser<ValueType>::Sin);
- InsertFunctionToTable("cos", &Parser<ValueType>::Cos);
- InsertFunctionToTable("tan", &Parser<ValueType>::Tan);
- InsertFunctionToTable("tg", &Parser<ValueType>::Tan);
- InsertFunctionToTable("cot", &Parser<ValueType>::Cot);
- InsertFunctionToTable("ctg", &Parser<ValueType>::Cot);
- InsertFunctionToTable("int", &Parser<ValueType>::Int);
- InsertFunctionToTable("round", &Parser<ValueType>::Round);
- InsertFunctionToTable("ln", &Parser<ValueType>::Ln);
- InsertFunctionToTable("log", &Parser<ValueType>::Log);
- InsertFunctionToTable("exp", &Parser<ValueType>::Exp);
- InsertFunctionToTable("max", &Parser<ValueType>::Max);
- InsertFunctionToTable("min", &Parser<ValueType>::Min);
- InsertFunctionToTable("asin", &Parser<ValueType>::ASin);
- InsertFunctionToTable("acos", &Parser<ValueType>::ACos);
- InsertFunctionToTable("atan", &Parser<ValueType>::ATan);
- InsertFunctionToTable("atg", &Parser<ValueType>::ATan);
- InsertFunctionToTable("acot", &Parser<ValueType>::ACot);
- InsertFunctionToTable("actg", &Parser<ValueType>::ACot);
- InsertFunctionToTable("sgn", &Parser<ValueType>::Sgn);
- InsertFunctionToTable("mod", &Parser<ValueType>::Mod);
- InsertFunctionToTable("if", &Parser<ValueType>::If);
- InsertFunctionToTable("or", &Parser<ValueType>::Or);
- InsertFunctionToTable("and", &Parser<ValueType>::And);
- InsertFunctionToTable("not", &Parser<ValueType>::Not);
- InsertFunctionToTable("degtorad", &Parser<ValueType>::DegToRad);
- InsertFunctionToTable("radtodeg", &Parser<ValueType>::RadToDeg);
- InsertFunctionToTable("degtodeg", &Parser<ValueType>::DegToDeg);
- InsertFunctionToTable("gradtorad", &Parser<ValueType>::GradToRad);
- InsertFunctionToTable("radtograd", &Parser<ValueType>::RadToGrad);
- InsertFunctionToTable("degtograd", &Parser<ValueType>::DegToGrad);
- InsertFunctionToTable("gradtodeg", &Parser<ValueType>::GradToDeg);
- InsertFunctionToTable("ceil", &Parser<ValueType>::Ceil);
- InsertFunctionToTable("floor", &Parser<ValueType>::Floor);
- InsertFunctionToTable("sqrt", &Parser<ValueType>::Sqrt);
- InsertFunctionToTable("sinh", &Parser<ValueType>::Sinh);
- InsertFunctionToTable("cosh", &Parser<ValueType>::Cosh);
- InsertFunctionToTable("tanh", &Parser<ValueType>::Tanh);
- InsertFunctionToTable("tgh", &Parser<ValueType>::Tanh);
- InsertFunctionToTable("coth", &Parser<ValueType>::Coth);
- InsertFunctionToTable("ctgh", &Parser<ValueType>::Coth);
- InsertFunctionToTable("root", &Parser<ValueType>::Root);
- InsertFunctionToTable("asinh", &Parser<ValueType>::ASinh);
- InsertFunctionToTable("acosh", &Parser<ValueType>::ACosh);
- InsertFunctionToTable("atanh", &Parser<ValueType>::ATanh);
- InsertFunctionToTable("atgh", &Parser<ValueType>::ATanh);
- InsertFunctionToTable("acoth", &Parser<ValueType>::ACoth);
- InsertFunctionToTable("actgh", &Parser<ValueType>::ACoth);
- InsertFunctionToTable("bitand", &Parser<ValueType>::BitAnd);
- InsertFunctionToTable("bitor", &Parser<ValueType>::BitOr);
- InsertFunctionToTable("bitxor", &Parser<ValueType>::BitXor);
- InsertFunctionToTable("band", &Parser<ValueType>::BitAnd);
- InsertFunctionToTable("bor", &Parser<ValueType>::BitOr);
- InsertFunctionToTable("bxor", &Parser<ValueType>::BitXor);
- InsertFunctionToTable("sum", &Parser<ValueType>::Sum);
- InsertFunctionToTable("avg", &Parser<ValueType>::Avg);
- InsertFunctionToTable("frac", &Parser<ValueType>::Frac);
- this method creates the table of variables
-void CreateVariablesTable()
- InsertVariableToTable("pi", &ValueType::SetPi);
- InsertVariableToTable("e", &ValueType::SetE);
- converting from a big letter to a small one
-int ToLowerCase(int c)
- if( c>='A' && c<='Z' )
- return c - 'A' + 'a';
-return c;
- this method read the name of a variable or a function
- 'result' will be the name of a variable or a function
- function return 'false' if this name is the name of a variable
- or function return 'true' if this name is the name of a function
- what should be returned is tested just by a '(' character that means if there's
- a '(' character after a name that function returns 'true'
-bool ReadName(std::string & result)
-int character;
- result.erase();
- character = *pstring;
- /*
- the first letter must be from range 'a' - 'z' or 'A' - 'Z'
- */
- if( ! (( character>='a' && character<='z' ) || ( character>='A' && character<='Z' )) )
- Error( err_unknown_character );
- do
- {
- result += static_cast<char>( character );
- character = * ++pstring;
- }
- while( (character>='a' && character<='z') ||
- (character>='A' && character<='Z') ||
- (character>='0' && character<='9') ||
- character=='_' );
- SkipWhiteCharacters();
- /*
- if there's a character '(' that means this name is a name of a function
- */
- if( *pstring == '(' )
- {
- ++pstring;
- return true;
- }
-return false;
- we're checking whether the first character is '-' or '+'
- if it is we'll return 'true' and if it is equally '-' we'll set the 'sign' member of 'result'
-bool TestSign(Item & result)
- SkipWhiteCharacters();
- result.sign = false;
- if( *pstring == '-' || *pstring == '+' )
- {
- if( *pstring == '-' )
- result.sign = true;
- ++pstring;
- return true;
- }
-return false;
- we're reading the name of a variable or a function
- if is there a function we'll return 'true'
-bool ReadVariableOrFunction(Item & result)
-std::string name;
-bool is_it_name_of_function = ReadName(name);
- if( is_it_name_of_function )
- {
- /*
- we've read the name of a function
- */
- result.function_name = name;
- result.type = Item::first_bracket;
- result.function = true;
- }
- else
- {
- /*
- we've read the name of a variable and we're getting its value now
- */
- result.value = GetValueOfVariable( name );
- }
-return is_it_name_of_function;
- we're reading a numerical value directly from the string
-void ReadValue(Item & result, int reading_base)
-const char * new_stack_pointer;
-bool value_read;
-Conv conv;
- conv.base = reading_base;
- conv.comma = comma;
- conv.comma2 = comma2;
- conv.group = group;
- uint carry = result.value.FromString(pstring, conv, &new_stack_pointer, &value_read);
- pstring = new_stack_pointer;
- if( carry )
- Error( err_overflow );
- if( !value_read )
- Error( err_unknown_character );
- this method returns true if 'character' is a proper first digit for the value (or a comma -- can be first too)
-bool ValueStarts(int character, int base)
- if( character == comma )
- return true;
- if( comma2!=0 && character==comma2 )
- return true;
- if( Misc::CharToDigit(character, base) != -1 )
- return true;
-return false;
- we're reading the item
- return values:
- 0 - all ok, the item is successfully read
- 1 - the end of the string (the item is not read)
- 2 - the final bracket ')'
-int ReadValueVariableOrFunction(Item & result)
-bool it_was_sign = false;
-int character;
- if( TestSign(result) )
- // 'result.sign' was set as well
- it_was_sign = true;
- SkipWhiteCharacters();
- character = ToLowerCase( *pstring );
- if( character == 0 )
- {
- if( it_was_sign )
- // at the end of the string a character like '-' or '+' has left
- Error( err_unexpected_end );
- // there's the end of the string here
- return 1;
- }
- else
- if( character == '(' )
- {
- // we've got a normal bracket (not a function)
- result.type = Item::first_bracket;
- result.function = false;
- ++pstring;
- return 0;
- }
- else
- if( character == ')' )
- {
- // we've got a final bracket
- // (in this place we can find a final bracket only when there are empty brackets
- // without any values inside or with a sign '-' or '+' inside)
- if( it_was_sign )
- Error( err_unexpected_final_bracket );
- result.type = Item::last_bracket;
- // we don't increment 'pstring', this final bracket will be read next by the
- // 'ReadOperatorAndCheckFinalBracket(...)' method
- return 2;
- }
- else
- if( character == '#' )
- {
- ++pstring;
- SkipWhiteCharacters();
- // after '#' character we do not allow '-' or '+' (can be white characters)
- if( ValueStarts(*pstring, 16) )
- ReadValue( result, 16 );
- else
- Error( err_unknown_character );
- }
- else
- if( character == '&' )
- {
- ++pstring;
- SkipWhiteCharacters();
- // after '&' character we do not allow '-' or '+' (can be white characters)
- if( ValueStarts(*pstring, 2) )
- ReadValue( result, 2 );
- else
- Error( err_unknown_character );
- }
- else
- if( ValueStarts(character, base) )
- {
- ReadValue( result, base );
- }
- else
- if( character>='a' && character<='z' )
- {
- if( ReadVariableOrFunction(result) )
- // we've read the name of a function
- return 0;
- }
- else
- Error( err_unknown_character );
- /*
- we've got a value in the 'result'
- this value is from a variable or directly from the string
- */
- result.type = Item::numerical_value;
- if( result.sign )
- {
- result.value.ChangeSign();
- result.sign = false;
- }
-return 0;
-void InsertOperatorToTable(const char * name, typename MatOperator::Type type)
- operators_table.insert( std::make_pair(std::string(name), type) );
- this method creates the table of operators
-void CreateMathematicalOperatorsTable()
- InsertOperatorToTable("||", MatOperator::lor);
- InsertOperatorToTable("&&", MatOperator::land);
- InsertOperatorToTable("!=", MatOperator::neq);
- InsertOperatorToTable("==", MatOperator::eq);
- InsertOperatorToTable(">=", MatOperator::get);
- InsertOperatorToTable("<=", MatOperator::let);
- InsertOperatorToTable(">", MatOperator::gt);
- InsertOperatorToTable("<", MatOperator::lt);
- InsertOperatorToTable("-", MatOperator::sub);
- InsertOperatorToTable("+", MatOperator::add);
- InsertOperatorToTable("/", MatOperator::div);
- InsertOperatorToTable("*", MatOperator::mul);
- InsertOperatorToTable("^", MatOperator::pow);
- returns true if 'str2' is the substring of str1
- e.g.
- true when str1="test" and str2="te"
-bool IsSubstring(const std::string & str1, const std::string & str2)
- if( str2.length() > str1.length() )
- return false;
- for(typename std::string::size_type i=0 ; i<str2.length() ; ++i)
- if( str1[i] != str2[i] )
- return false;
-return true;
- this method reads a mathematical (or logical) operator
-void ReadMathematicalOperator(Item & result)
-std::string oper;
-typename OperatorsTable::iterator iter_old, iter_new;
- iter_old = operators_table.end();
- for( ; true ; ++pstring )
- {
- oper += *pstring;
- iter_new = operators_table.lower_bound(oper);
- if( iter_new == operators_table.end() || !IsSubstring(iter_new->first, oper) )
- {
- oper.erase( --oper.end() ); // we've got mininum one element
- if( iter_old != operators_table.end() && iter_old->first == oper )
- {
- result.type = Item::mat_operator;
- result.moperator.SetType( iter_old->second );
- break;
- }
- Error( err_unknown_operator );
- }
- iter_old = iter_new;
- }
- this method makes a calculation for the percentage operator
- e.g.
- 1000-50% = 1000-(1000*0,5) = 500
-void OperatorPercentage()
- if( stack_index < 3 ||
- stack[stack_index-1].type != Item::numerical_value ||
- stack[stack_index-2].type != Item::mat_operator ||
- stack[stack_index-3].type != Item::numerical_value )
- Error(err_percent_from);
- ++pstring;
- SkipWhiteCharacters();
- uint c = 0;
- c += stack[stack_index-1].value.Div(100);
- c += stack[stack_index-1].value.Mul(stack[stack_index-3].value);
- if( c )
- Error(err_overflow);
- this method reads a mathematic operators
- or the final bracket or the semicolon operator
- return values:
- 0 - ok
- 1 - the string is finished
-int ReadOperator(Item & result)
- SkipWhiteCharacters();
- if( *pstring == '%' )
- OperatorPercentage();
- if( *pstring == 0 )
- return 1;
- else
- if( *pstring == ')' )
- {
- result.type = Item::last_bracket;
- ++pstring;
- }
- else
- if( *pstring == ';' || (param_sep!=0 && *pstring==param_sep) )
- {
- result.type = Item::semicolon;
- ++pstring;
- }
- else
- if( (*pstring>='a' && *pstring<='z') || (*pstring>='A' && *pstring<='Z') )
- {
- // short mul (without any operators)
- result.type = Item::mat_operator;
- result.moperator.SetType( MatOperator::shortmul );
- }
- else
- ReadMathematicalOperator(result);
-return 0;
- this method is making the standard mathematic operation like '-' '+' '*' '/' and '^'
- the operation is made between 'value1' and 'value2'
- the result of this operation is stored in the 'value1'
-void MakeStandardMathematicOperation(ValueType & value1, typename MatOperator::Type mat_operator,
- const ValueType & value2)
-uint res;
- calculated = true;
- switch( mat_operator )
- {
- case MatOperator::land:
- (!value1.IsZero() && !value2.IsZero()) ? value1.SetOne() : value1.SetZero();
- break;
- case MatOperator::lor:
- (!value1.IsZero() || !value2.IsZero()) ? value1.SetOne() : value1.SetZero();
- break;
- case MatOperator::eq:
- (value1 == value2) ? value1.SetOne() : value1.SetZero();
- break;
- case MatOperator::neq:
- (value1 != value2) ? value1.SetOne() : value1.SetZero();
- break;
- case MatOperator::lt:
- (value1 < value2) ? value1.SetOne() : value1.SetZero();
- break;
- case MatOperator::gt:
- (value1 > value2) ? value1.SetOne() : value1.SetZero();
- break;
- case MatOperator::let:
- (value1 <= value2) ? value1.SetOne() : value1.SetZero();
- break;
- case MatOperator::get:
- (value1 >= value2) ? value1.SetOne() : value1.SetZero();
- break;
- case MatOperator::sub:
- if( value1.Sub(value2) ) Error( err_overflow );
- break;
- case MatOperator::add:
- if( value1.Add(value2) ) Error( err_overflow );
- break;
- case MatOperator::mul:
- case MatOperator::shortmul:
- if( value1.Mul(value2) ) Error( err_overflow );
- break;
- case MatOperator::div:
- if( value2.IsZero() ) Error( err_division_by_zero );
- if( value1.Div(value2) ) Error( err_overflow );
- break;
- case MatOperator::pow:
- res = value1.Pow( value2 );
- if( res == 1 ) Error( err_overflow );
- else
- if( res == 2 ) Error( err_improper_argument );
- break;
- default:
- /*
- on the stack left an unknown operator but we had to recognize its before
- that means there's an error in our algorithm
- */
- Error( err_internal_error );
- }
- this method is trying to roll the stack up with the operator's priority
- for example if there are:
- "1 - 2 +"
- we can subtract "1-2" and the result store on the place where is '1' and copy the last
- operator '+', that means there'll be '-1+' on our stack
- but if there are:
- "1 - 2 *"
- we can't roll the stack up because the operator '*' has greater priority than '-'
-void TryRollingUpStackWithOperatorPriority()
- while( stack_index>=4 &&
- stack[stack_index-4].type == Item::numerical_value &&
- stack[stack_index-3].type == Item::mat_operator &&
- stack[stack_index-2].type == Item::numerical_value &&
- stack[stack_index-1].type == Item::mat_operator &&
- (
- (
- // the first operator has greater priority
- stack[stack_index-3].moperator.GetPriority() > stack[stack_index-1].moperator.GetPriority()
- ) ||
- (
- // or both operators have the same priority and the first operator is not right associative
- stack[stack_index-3].moperator.GetPriority() == stack[stack_index-1].moperator.GetPriority() &&
- stack[stack_index-3].moperator.GetAssoc() == MatOperator::non_right
- )
- )
- )
- {
- MakeStandardMathematicOperation(stack[stack_index-4].value,
- stack[stack_index-3].moperator.GetType(),
- stack[stack_index-2].value);
- /*
- copying the last operator and setting the stack pointer to the correct value
- */
- stack[stack_index-3] = stack[stack_index-1];
- stack_index -= 2;
- }
- this method is trying to roll the stack up without testing any operators
- for example if there are:
- "1 - 2"
- there'll be "-1" on our stack
-void TryRollingUpStack()
- while( stack_index >= 3 &&
- stack[stack_index-3].type == Item::numerical_value &&
- stack[stack_index-2].type == Item::mat_operator &&
- stack[stack_index-1].type == Item::numerical_value )
- {
- MakeStandardMathematicOperation( stack[stack_index-3].value,
- stack[stack_index-2].moperator.GetType(),
- stack[stack_index-1].value );
- stack_index -= 2;
- }
- this method is reading a value or a variable or a function
- (the normal first bracket as well) and push it into the stack
-int ReadValueVariableOrFunctionAndPushItIntoStack(Item & temp)
-int code = ReadValueVariableOrFunction( temp );
- if( code == 0 )
- {
- if( stack_index < stack.size() )
- stack[stack_index] = temp;
- else
- stack.push_back( temp );
- ++stack_index;
- }
- if( code == 2 )
- // there was a final bracket, we didn't push it into the stack
- // (it'll be read by the 'ReadOperatorAndCheckFinalBracket' method next)
- code = 0;
-return code;
- this method calculate how many parameters there are on the stack
- and the index of the first parameter
- if there aren't any parameters on the stack this method returns
- 'size' equals zero and 'index' pointing after the first bracket
- (on non-existend element)
-void HowManyParameters(int & size, int & index)
- size = 0;
- index = stack_index;
- if( index == 0 )
- // we haven't put a first bracket on the stack
- Error( err_unexpected_final_bracket );
- if( stack[index-1].type == Item::first_bracket )
- // empty brackets
- return;
- for( --index ; index>=1 ; index-=2 )
- {
- if( stack[index].type != Item::numerical_value )
- {
- /*
- this element must be 'numerical_value', if not that means
- there's an error in our algorithm
- */
- Error( err_internal_error );
- }
- ++size;
- if( stack[index-1].type != Item::semicolon )
- break;
- }
- if( index<1 || stack[index-1].type != Item::first_bracket )
- {
- /*
- we haven't put a first bracket on the stack
- */
- Error( err_unexpected_final_bracket );
- }
- this method is being called when the final bracket ')' is being found
- this method's rolling the stack up, counting how many parameters there are
- on the stack and if there was a function it's calling the function
-void RollingUpFinalBracket()
-int amount_of_parameters;
-int index;
- if( stack_index<1 ||
- (stack[stack_index-1].type != Item::numerical_value &&
- stack[stack_index-1].type != Item::first_bracket)
- )
- Error( err_unexpected_final_bracket );
- TryRollingUpStack();
- HowManyParameters(amount_of_parameters, index);
- // 'index' will be greater than zero
- // 'amount_of_parameters' can be zero
- if( amount_of_parameters==0 && !stack[index-1].function )
- Error( err_unexpected_final_bracket );
- bool was_sign = stack[index-1].sign;
- if( stack[index-1].function )
- {
- // the result of a function will be on 'stack[index-1]'
- // and then at the end we'll set the correct type (numerical value) of this element
- CallFunction(stack[index-1].function_name, amount_of_parameters, index);
- }
- else
- {
- /*
- there was a normal bracket (not a funcion)
- */
- if( amount_of_parameters != 1 )
- Error( err_unexpected_semicolon_operator );
- /*
- in the place where is the bracket we put the result
- */
- stack[index-1] = stack[index];
- }
- /*
- if there was a '-' character before the first bracket
- we change the sign of the expression
- */
- stack[index-1].sign = false;
- if( was_sign )
- stack[index-1].value.ChangeSign();
- stack[index-1].type = Item::numerical_value;
- /*
- the pointer of the stack will be pointing on the next (non-existing now) element
- */
- stack_index = index;
- this method is putting the operator on the stack
-void PushOperatorIntoStack(Item & temp)
- if( stack_index < stack.size() )
- stack[stack_index] = temp;
- else
- stack.push_back( temp );
- ++stack_index;
- this method is reading a operator and if it's a final bracket
- it's calling RollingUpFinalBracket() and reading a operator again
-int ReadOperatorAndCheckFinalBracket(Item & temp)
- do
- {
- if( ReadOperator(temp) == 1 )
- {
- /*
- the string is finished
- */
- return 1;
- }
- if( temp.type == Item::last_bracket )
- RollingUpFinalBracket();
- }
- while( temp.type == Item::last_bracket );
-return 0;
- we check wheter there are only numerical value's or 'semicolon' operators on the stack
-void CheckIntegrityOfStack()
- for(unsigned int i=0 ; i<stack_index; ++i)
- {
- if( stack[i].type != Item::numerical_value &&
- stack[i].type != Item::semicolon)
- {
- /*
- on the stack we must only have 'numerical_value' or 'semicolon' operator
- if there is something another that means
- we probably didn't close any of the 'first' bracket
- */
- Error( err_stack_not_clear );
- }
- }
- the main loop of parsing
-void Parse()
-Item item;
-int result_code;
- while( true )
- {
- if( pstop_calculating && pstop_calculating->WasStopSignal() )
- Error( err_interrupt );
- result_code = ReadValueVariableOrFunctionAndPushItIntoStack( item );
- if( result_code == 0 )
- {
- if( item.type == Item::first_bracket )
- continue;
- result_code = ReadOperatorAndCheckFinalBracket( item );
- }
- if( result_code==1 || item.type==Item::semicolon )
- {
- /*
- the string is finished or the 'semicolon' operator has appeared
- */
- if( stack_index == 0 )
- Error( err_nothing_has_read );
- TryRollingUpStack();
- if( result_code == 1 )
- {
- CheckIntegrityOfStack();
- return;
- }
- }
- PushOperatorIntoStack( item );
- TryRollingUpStackWithOperatorPriority();
- }
- this method is called at the end of the parsing process
- on our stack we can have another value than 'numerical_values' for example
- when someone use the operator ';' in the global scope or there was an error during
- parsing and the parser hasn't finished its job
- if there was an error the stack is cleaned up now
- otherwise we resize stack and leave on it only 'numerical_value' items
-void NormalizeStack()
- if( error!=err_ok || stack_index==0 )
- {
- stack.clear();
- return;
- }
- /*
- 'stack_index' tell us how many elements there are on the stack,
- we must resize the stack now because 'stack_index' is using only for parsing
- and stack has more (or equal) elements than value of 'stack_index'
- */
- stack.resize( stack_index );
- for(uint i=stack_index-1 ; i!=uint(-1) ; --i)
- {
- if( stack[i].type != Item::numerical_value )
- stack.erase( stack.begin() + i );
- }
- the default constructor
-Parser(): default_stack_size(100)
- pstop_calculating = 0;
- puser_variables = 0;
- puser_functions = 0;
- pfunction_local_variables = 0;
- base = 10;
- deg_rad_grad = 1;
- error = err_ok;
- group = 0;
- comma = '.';
- comma2 = ',';
- param_sep = 0;
- CreateFunctionsTable();
- CreateVariablesTable();
- CreateMathematicalOperatorsTable();
- the assignment operator
-Parser<ValueType> & operator=(const Parser<ValueType> & p)
- pstop_calculating = p.pstop_calculating;
- puser_variables = p.puser_variables;
- puser_functions = p.puser_functions;
- pfunction_local_variables = 0;
- base = p.base;
- deg_rad_grad = p.deg_rad_grad;
- error = p.error;
- group = p.group;
- comma = p.comma;
- comma2 = p.comma2;
- param_sep = p.param_sep;
- /*
- we don't have to call 'CreateFunctionsTable()' etc.
- we can only copy these tables
- */
- functions_table = p.functions_table;
- variables_table = p.variables_table;
- operators_table = p.operators_table;
- visited_variables = p.visited_variables;
- visited_functions = p.visited_functions;
-return *this;
- the copying constructor
-Parser(const Parser<ValueType> & p): default_stack_size(p.default_stack_size)
- operator=(p);
- the new base of mathematic system
- default is 10
-void SetBase(int b)
- if( b>=2 && b<=16 )
- base = b;
- the unit of angles used in: sin,cos,tan,cot,asin,acos,atan,acot
- 0 - deg
- 1 - rad (default)
- 2 - grad
-void SetDegRadGrad(int angle)
- if( angle >= 0 || angle <= 2 )
- deg_rad_grad = angle;
- this method sets a pointer to the object which tell us whether we should stop
- calculations
-void SetStopObject(const volatile StopCalculating * ps)
- pstop_calculating = ps;
- this method sets the new table of user-defined variables
- if you don't want any other variables just put zero value into the 'puser_variables' variable
- (you can have only one table at the same time)
-void SetVariables(const Objects * pv)
- puser_variables = pv;
- this method sets the new table of user-defined functions
- if you don't want any other functions just put zero value into the 'puser_functions' variable
- (you can have only one table at the same time)
-void SetFunctions(const Objects * pf)
- puser_functions = pf;
- setting the group character
- default zero (not used)
-void SetGroup(int g)
- group = g;
- setting the main comma operator and the additional comma operator
- the additional operator can be zero (which means it is not used)
- default are: '.' and ','
-void SetComma(int c, int c2 = 0)
- comma = c;
- comma2 = c2;
- setting an additional character which is used as a parameters separator
- the main parameters separator is a semicolon (is used always)
- this character is used also as a global separator
-void SetParamSep(int s)
- param_sep = s;
- the main method using for parsing string
-ErrorCode Parse(const char * str)
- stack_index = 0;
- pstring = str;
- error = err_ok;
- calculated = false;
- stack.resize( default_stack_size );
- try
- {
- Parse();
- }
- catch(ErrorCode c)
- {
- error = c;
- calculated = false;
- }
- NormalizeStack();
-return error;
- the main method using for parsing string
-ErrorCode Parse(const std::string & str)
- return Parse(str.c_str());
- the main method using for parsing string
-ErrorCode Parse(const wchar_t * str)
- Misc::AssignString(wide_to_ansi, str);
-return Parse(wide_to_ansi.c_str());
- // !! wide_to_ansi clearing can be added here
- the main method using for parsing string
-ErrorCode Parse(const std::wstring & str)
- return Parse(str.c_str());
- this method returns true is something was calculated
- (at least one mathematical operator was used or a function or variable)
- e.g. true if the string to Parse() looked like this:
- "1+1"
- "2*3"
- "sin(5)"
- if the string was e.g. "678" the result is false
-bool Calculated()
- return calculated;
- initializing coefficients used when calculating the gamma (or factorial) function
- this speed up the next calculations
- you don't have to call this method explicitly
- these coefficients will be calculated when needed
-void InitCGamma()
- cgamma.InitAll();
-} // namespace
diff --git a/ttmath/ttmaththreads.h b/ttmath/ttmaththreads.h
deleted file mode 100644
index 586227f..0000000
--- a/ttmath/ttmaththreads.h
+++ /dev/null
@@ -1,250 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2009, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmaththreads
-#define headerfilettmaththreads
-#include "ttmathtypes.h"
-#include <windows.h>
-#include <cstdio>
-#include <pthread.h>
- \file ttmaththreads.h
- \brief Some objects used in multithreads environment
- this is a simple skeleton of a program in multithreads environment:
- #include<ttmath/ttmath.h>
- int main()
- {
- [...]
- }
- make sure that macro TTMATH_MULTITHREADS is defined and (somewhere in *.cpp file)
- use TTMATH_MULTITHREADS_HELPER macro (outside of any classes/functions/namespaces scope)
-namespace ttmath
- /*
- we use win32 threads
- */
- /*!
- in multithreads environment you should use TTMATH_MULTITHREADS_HELPER macro
- somewhere in *.cpp file
- (at the moment in win32 this macro does nothing)
- */
- /*!
- objects of this class are used to synchronize
- */
- class ThreadLock
- {
- HANDLE mutex_handle;
- void CreateName(char * buffer) const
- {
- #ifdef _MSC_VER
- #pragma warning (disable : 4996)
- // warning C4996: 'sprintf': This function or variable may be unsafe. Consider using sprintf_s instead.
- #endif
- sprintf(buffer, "TTMATH_LOCK_%ul", (unsigned long)GetCurrentProcessId());
- #ifdef _MSC_VER
- #pragma warning (default : 4996)
- #endif
- }
- public:
- bool Lock()
- {
- char buffer[50];
- CreateName(buffer);
- mutex_handle = CreateMutexA(0, false, buffer);
- if( mutex_handle == 0 )
- return false;
- WaitForSingleObject(mutex_handle, INFINITE);
- return true;
- }
- ThreadLock()
- {
- mutex_handle = 0;
- }
- ~ThreadLock()
- {
- if( mutex_handle != 0 )
- {
- ReleaseMutex(mutex_handle);
- CloseHandle(mutex_handle);
- }
- }
- };
-#endif // #ifdef TTMATH_WIN32_THREADS
- /*
- we use posix threads
- */
- /*!
- in multithreads environment you should use TTMATH_MULTITHREADS_HELPER macro
- somewhere in *.cpp file
- (this macro defines a pthread_mutex_t object used by TTMath library)
- */
- namespace ttmath \
- { \
- pthread_mutex_t ttmath_mutex = PTHREAD_MUTEX_INITIALIZER; \
- }
- /*!
- ttmath_mutex will be defined by TTMATH_MULTITHREADS_HELPER macro
- */
- extern pthread_mutex_t ttmath_mutex;
- /*!
- objects of this class are used to synchronize
- */
- class ThreadLock
- {
- public:
- bool Lock()
- {
- if( pthread_mutex_lock(&ttmath_mutex) != 0 )
- return false;
- return true;
- }
- ~ThreadLock()
- {
- pthread_mutex_unlock(&ttmath_mutex);
- }
- };
-#endif // #ifdef TTMATH_POSIX_THREADS
-#if !defined(TTMATH_POSIX_THREADS) && !defined(TTMATH_WIN32_THREADS)
- /*!
- we don't use win32 and pthreads
- */
- /*!
- */
- /*!
- objects of this class are used to synchronize
- actually we don't synchronize, the method Lock() returns always 'false'
- */
- class ThreadLock
- {
- public:
- bool Lock()
- {
- return false;
- }
- };
-#endif // #if !defined(TTMATH_POSIX_THREADS) && !defined(TTMATH_WIN32_THREADS)
-} // namespace
diff --git a/ttmath/ttmathtypes.h b/ttmath/ttmathtypes.h
deleted file mode 100644
index 3d9ddbe..0000000
--- a/ttmath/ttmathtypes.h
+++ /dev/null
@@ -1,676 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2012, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathtypes
-#define headerfilettmathtypes
- \file ttmathtypes.h
- \brief constants used in the library
- As our library is written in header files (templates) we cannot use
- constants like 'const int' etc. because we should have some source files
- *.cpp to define this variables. Only what we can have are constants
- defined by #define preprocessor macros.
- All macros are preceded by TTMATH_ prefix
-#include <stdexcept>
-#include <sstream>
-#include <vector>
-#ifndef _MSC_VER
-#include <stdint.h>
-// for uint64_t and int64_t on a 32 bit platform
- the version of the library
- TTMATH_PRERELEASE_VER is either zero or one
- zero means that this is the release version of the library
- (one means something like beta)
- you can define a platform explicitly by defining either
-#if !defined TTMATH_PLATFORM32 && !defined TTMATH_PLATFORM64
- #if !defined _M_X64 && !defined __x86_64__
- /*
- other platforms than x86 and amd64 are not recognized at the moment
- so you should set TTMATH_PLATFORMxx manually
- */
- // we're using a 32bit platform
- #else
- // we're using a 64bit platform
- #endif
- asm version of the library is available by default only for:
- x86 and amd64 platforms and for Microsoft Visual and GCC compilers
- but you can force using asm version (the same asm as for Microsoft Visual)
- by defining TTMATH_FORCEASM macro
- you have to be sure that your compiler accept such an asm format
- #if !defined __i386__ && !defined _X86_ && !defined _M_IX86 && !defined __x86_64__ && !defined _M_X64
- /*!
- x86 architecture:
- __i386__ defined by GNU C
- _X86_ defined by MinGW32
- _M_IX86 defined by Visual Studio, Intel C/C++, Digital Mars and Watcom C/C++
- amd64 architecture:
- __x86_64__ defined by GNU C, CLANG (LLVM) and Sun Studio
- _M_X64 defined by Visual Studio
- asm version is available only for x86 or amd64 platforms
- */
- #define TTMATH_NOASM
- #endif
- #if !defined _MSC_VER && !defined __GNUC__
- /*!
- another compilers than MS VC or GCC or CLANG (LLVM) by default use no asm version
- (CLANG defines __GNUC__ too)
- */
- #define TTMATH_NOASM
- #endif
-namespace ttmath
- /*!
- on 32bit platforms one word (uint, sint) will be equal 32bits
- */
- typedef unsigned int uint;
- typedef signed int sint;
- /*!
- on 32 bit platform ulint and slint will be equal 64 bits
- */
- #ifdef _MSC_VER
- // long long on MS Windows (Visual and GCC mingw compilers) have 64 bits
- // stdint.h is not available on Visual Studio prior to VS 2010 version
- typedef unsigned long long int ulint;
- typedef signed long long int slint;
- #else
- // we do not use 'long' here because there is a difference in unix and windows
- // environments: in unix 'long' has 64 bits but in windows it has only 32 bits
- typedef uint64_t ulint;
- typedef int64_t slint;
- #endif
- /*!
- how many bits there are in the uint type
- */
- #define TTMATH_BITS_PER_UINT 32u
- /*!
- the mask for the highest bit in the unsigned 32bit word (2^31)
- */
- #define TTMATH_UINT_HIGHEST_BIT 2147483648u
- /*!
- the max value of the unsigned 32bit word (2^32 - 1)
- (all bits equal one)
- */
- #define TTMATH_UINT_MAX_VALUE 4294967295u
- /*!
- the number of words (32bit words on 32bit platform)
- which are kept in built-in variables for a Big<> type
- (these variables are defined in ttmathbig.h)
- */
- /*!
- this macro returns the number of machine words
- capable to hold min_bits bits
- e.g. TTMATH_BITS(128) returns 4
- */
- #define TTMATH_BITS(min_bits) ((min_bits-1)/32 + 1)
- /*!
- on 64bit platforms one word (uint, sint) will be equal 64bits
- */
- #ifdef _MSC_VER
- /* in VC 'long' type has 32 bits, __int64 is VC extension */
- typedef unsigned __int64 uint;
- typedef signed __int64 sint;
- #else
- typedef unsigned long uint;
- typedef signed long sint;
- #endif
- /*!
- on 64bit platforms we do not define ulint and slint
- */
- /*!
- how many bits there are in the uint type
- */
- #define TTMATH_BITS_PER_UINT 64ul
- /*!
- the mask for the highest bit in the unsigned 64bit word (2^63)
- */
- #define TTMATH_UINT_HIGHEST_BIT 9223372036854775808ul
- /*!
- the max value of the unsigned 64bit word (2^64 - 1)
- (all bits equal one)
- */
- #define TTMATH_UINT_MAX_VALUE 18446744073709551615ul
- /*!
- the number of words (64bit words on 64bit platforms)
- which are kept in built-in variables for a Big<> type
- (these variables are defined in ttmathbig.h)
- */
- /*!
- this macro returns the number of machine words
- capable to hold min_bits bits
- e.g. TTMATH_BITS(128) returns 2
- */
- #define TTMATH_BITS(min_bits) ((min_bits-1)/64 + 1)
- #if !defined(TTMATH_POSIX_THREADS) && !defined(TTMATH_WIN32_THREADS)
- #if defined(_WIN32)
- #elif defined(unix) || defined(__unix__) || defined(__unix)
- #endif
- #endif
- this variable defines how many iterations are performed
- during some kind of calculating when we're making any long formulas
- (for example Taylor series)
- it's used in ExpSurrounding0(...), LnSurrounding1(...), Sin0pi05(...), etc.
- note! there'll not be so many iterations, iterations are stopped when
- there is no sense to continue calculating (for example when the result
- still remains unchanged after adding next series and we know that the next
- series are smaller than previous ones)
- this is a limit when calculating Karatsuba multiplication
- if the size of a vector is smaller than TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE
- the Karatsuba algorithm will use standard schoolbook multiplication
- // if TTMATH_DEBUG_LOG is defined then we should use the same size regardless of the compiler
- #ifdef __GNUC__
- #else
- #endif
- this is a special value used when calculating the Gamma(x) function
- if x is greater than this value then the Gamma(x) will be calculated using
- some kind of series
- don't use smaller values than about 100
-namespace ttmath
- /*!
- lib type codes:
- asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits)
- asm_gcc_32 - with asm code designed for GCC (32 bits)
- asm_vc_64 - with asm for VC (64 bit)
- asm_gcc_64 - with asm for GCC (64 bit)
- no_asm_32 - pure C++ version (32 bit) - without any asm code
- no_asm_64 - pure C++ version (64 bit) - without any asm code
- */
- enum LibTypeCode
- {
- asm_vc_32 = 0,
- asm_gcc_32,
- asm_vc_64,
- asm_gcc_64,
- no_asm_32,
- no_asm_64
- };
- /*!
- error codes
- */
- enum ErrorCode
- {
- err_ok = 0,
- err_nothing_has_read,
- err_unknown_character,
- err_unexpected_final_bracket,
- err_stack_not_clear,
- err_unknown_variable,
- err_division_by_zero,
- err_interrupt,
- err_overflow,
- err_unknown_function,
- err_unknown_operator,
- err_unexpected_semicolon_operator,
- err_improper_amount_of_arguments,
- err_improper_argument,
- err_unexpected_end,
- err_internal_error,
- err_incorrect_name,
- err_incorrect_value,
- err_variable_exists,
- err_variable_loop,
- err_functions_loop,
- err_must_be_only_one_value,
- err_object_exists,
- err_unknown_object,
- err_still_calculating,
- err_in_short_form_used_function,
- err_percent_from
- };
- /*!
- this struct is used when converting to/from a string
- /temporarily only in Big::ToString() and Big::FromString()/
- */
- struct Conv
- {
- /*!
- base (radix) on which the value will be shown (or read)
- default: 10
- */
- uint base;
- /*!
- used only in Big::ToString()
- if true the value will be always shown in the scientific mode, e.g: 123e+30
- default: false
- */
- bool scient;
- /*!
- used only in Big::ToString()
- if scient is false then the value will be printed in the scientific mode
- only if the exponent is greater than scien_from
- default: 15
- */
- sint scient_from;
- /*!
- if 'base_round' is true and 'base' is different from 2, 4, 8, or 16
- and the result value is not an integer then we make an additional rounding
- (after converting the last digit from the result is skipped)
- default: true
- e.g.
- Conv c;
- c.base_round = false;
- Big<1, 1> a = "0.1"; // decimal input
- std::cout << a.ToString(c) << std::endl; // the result is: 0.099999999
- */
- bool base_round;
- /*!
- used only in Big::ToString()
- tells how many digits after comma are possible
- default: -1 which means all digits are printed
- set it to zero if you want integer value only
- for example when the value is:
- 12.345678 and 'round' is 4
- then the result will be
- 12.3457 (the last digit was rounded)
- */
- sint round;
- /*!
- if true that not mattered digits in the mantissa will be cut off
- (zero characters at the end -- after the comma operator)
- e.g. 1234,78000 will be: 1234,78
- default: true
- */
- bool trim_zeroes;
- /*!
- the main comma operator (used when reading and writing)
- default is a dot '.'
- */
- uint comma;
- /*!
- additional comma operator (used only when reading)
- if you don't want it just set it to zero
- default is a comma ','
- this allowes you to convert from a value:
- 123.45 as well as from 123,45
- */
- uint comma2;
- /*!
- it sets the character which is used for grouping
- if group=' ' then: 1234,56789 will be printed as: 1 234,567 89
- if you don't want grouping just set it to zero (which is default)
- */
- uint group;
- /*!
- how many digits should be grouped (it is used if 'group' is non zero)
- default: 3
- */
- uint group_digits;
- /*!
- */
- uint group_exp; // not implemented yet
- Conv()
- {
- // default values
- base = 10;
- scient = false;
- scient_from = 15;
- base_round = true;
- round = -1;
- trim_zeroes = true;
- comma = '.';
- comma2 = ',';
- group = 0;
- group_digits = 3;
- group_exp = 0;
- }
- };
- /*!
- this simple class can be used in multithreading model
- (you can write your own class derived from this one)
- for example: in some functions like Factorial()
- /at the moment only Factorial/ you can give a pointer to
- the 'stop object', if the method WasStopSignal() of this
- object returns true that means we should break the calculating
- and return
- */
- class StopCalculating
- {
- public:
- virtual bool WasStopSignal() const volatile { return false; }
- virtual ~StopCalculating(){}
- };
- /*!
- a small class which is useful when compiling with gcc
- object of this type holds the name and the line of a file
- in which the macro TTMATH_ASSERT or TTMATH_REFERENCE_ASSERT was used
- */
- class ExceptionInfo
- {
- const char * file;
- int line;
- public:
- ExceptionInfo() : file(0), line(0) {}
- ExceptionInfo(const char * f, int l) : file(f), line(l) {}
- std::string Where() const
- {
- if( !file )
- return "unknown";
- std::ostringstream result;
- result << file << ":" << line;
- return result.str();
- }
- };
- /*!
- A small class used for reporting 'reference' errors
- In the library is used macro TTMATH_REFERENCE_ASSERT which
- can throw an exception of this type
- ** from version 0.9.2 this macro is removed from all methods
- in public interface so you don't have to worry about it **
- If you compile with gcc you can get a small benefit
- from using method Where() (it returns std::string) with
- the name and the line of a file where the macro TTMATH_REFERENCE_ASSERT
- was used)
- */
- class ReferenceError : public std::logic_error, public ExceptionInfo
- {
- public:
- ReferenceError() : std::logic_error("reference error")
- {
- }
- ReferenceError(const char * f, int l) :
- std::logic_error("reference error"), ExceptionInfo(f,l)
- {
- }
- std::string Where() const
- {
- return ExceptionInfo::Where();
- }
- };
- /*!
- a small class used for reporting errors
- in the library is used macro TTMATH_ASSERT which
- (if the condition in it is false) throw an exception
- of this type
- if you compile with gcc you can get a small benefit
- from using method Where() (it returns std::string) with
- the name and the line of a file where the macro TTMATH_ASSERT
- was used)
- */
- class RuntimeError : public std::runtime_error, public ExceptionInfo
- {
- public:
- RuntimeError() : std::runtime_error("internal error")
- {
- }
- RuntimeError(const char * f, int l) :
- std::runtime_error("internal error"), ExceptionInfo(f,l)
- {
- }
- std::string Where() const
- {
- return ExceptionInfo::Where();
- }
- };
- /*!
- this macro enables further testing during writing your code
- you don't have to define it in a release mode
- if this macro is set then macros TTMATH_ASSERT and TTMATH_REFERENCE_ASSERT
- are set as well and these macros can throw an exception if a condition in it
- is not fulfilled (look at the definition of TTMATH_ASSERT and TTMATH_REFERENCE_ASSERT)
- TTMATH_DEBUG is set automatically if DEBUG or _DEBUG are defined
- */
- #if defined DEBUG || defined _DEBUG
- #define TTMATH_DEBUG
- #endif
- #if defined(__FILE__) && defined(__LINE__)
- #define TTMATH_REFERENCE_ASSERT(expression) \
- if( &(expression) == this ) throw ttmath::ReferenceError(__FILE__, __LINE__);
- #define TTMATH_ASSERT(expression) \
- if( !(expression) ) throw ttmath::RuntimeError(__FILE__, __LINE__);
- #else
- #define TTMATH_REFERENCE_ASSERT(expression) \
- if( &(expression) == this ) throw ReferenceError();
- #define TTMATH_ASSERT(expression) \
- if( !(expression) ) throw RuntimeError();
- #endif
- #else
- #define TTMATH_REFERENCE_ASSERT(expression)
- #define TTMATH_ASSERT(expression)
- #endif
- #define TTMATH_LOG(msg) PrintLog(msg, std::cout);
- #define TTMATH_LOGC(msg, carry) PrintLog(msg, carry, std::cout);
- #define TTMATH_VECTOR_LOG(msg, vector, len) PrintVectorLog(msg, std::cout, vector, len);
- #define TTMATH_VECTOR_LOGC(msg, carry, vector, len) PrintVectorLog(msg, carry, std::cout, vector, len);
- #else
- #define TTMATH_LOG(msg)
- #define TTMATH_LOGC(msg, carry)
- #define TTMATH_VECTOR_LOG(msg, vector, len)
- #define TTMATH_VECTOR_LOGC(msg, carry, vector, len)
- #endif
-} // namespace
diff --git a/ttmath/ttmathuint.h b/ttmath/ttmathuint.h
deleted file mode 100644
index d00a0d7..0000000
--- a/ttmath/ttmathuint.h
+++ /dev/null
@@ -1,4165 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2011, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathuint
-#define headerfilettmathuint
- \file ttmathuint.h
- \brief template class UInt<uint>
-#include <iostream>
-#include <iomanip>
-#include "ttmathtypes.h"
-#include "ttmathmisc.h"
- \brief a namespace for the TTMath library
-namespace ttmath
- \brief UInt implements a big integer value without a sign
- value_size - how many bytes specify our value
- on 32bit platforms: value_size=1 -> 4 bytes -> 32 bits
- on 64bit platforms: value_size=1 -> 8 bytes -> 64 bits
- value_size = 1,2,3,4,5,6....
-template<uint value_size>
-class UInt
- /*!
- buffer for the integer value
- table[0] - the lowest word of the value
- */
- uint table[value_size];
- /*!
- some methods used for debugging purposes
- */
- /*!
- this method is only for debugging purposes or when we want to make
- a table of a variable (constant) in ttmathbig.h
- it prints the table in a nice form of several columns
- */
- template<class ostream_type>
- void PrintTable(ostream_type & output) const
- {
- // how many columns there'll be
- const int columns = 8;
- int c = 1;
- for(int i=value_size-1 ; i>=0 ; --i)
- {
- output << "0x" << std::setfill('0');
- output << std::setw(8);
- #else
- output << std::setw(16);
- #endif
- output << std::hex << table[i];
- if( i>0 )
- {
- output << ", ";
- if( ++c > columns )
- {
- output << std::endl;
- c = 1;
- }
- }
- }
- output << std::dec << std::endl;
- }
- /*!
- this method is used when macro TTMATH_DEBUG_LOG is defined
- */
- template<class char_type, class ostream_type>
- static void PrintVectorLog(const char_type * msg, ostream_type & output, const uint * vector, uint vector_len)
- {
- output << msg << std::endl;
- for(uint i=0 ; i<vector_len ; ++i)
- output << " table[" << i << "]: " << vector[i] << std::endl;
- }
- /*!
- this method is used when macro TTMATH_DEBUG_LOG is defined
- */
- template<class char_type, class ostream_type>
- static void PrintVectorLog(const char_type * msg, uint carry, ostream_type & output, const uint * vector, uint vector_len)
- {
- PrintVectorLog(msg, output, vector, vector_len);
- output << " carry: " << carry << std::endl;
- }
- /*!
- this method is used when macro TTMATH_DEBUG_LOG is defined
- */
- template<class char_type, class ostream_type>
- void PrintLog(const char_type * msg, ostream_type & output) const
- {
- PrintVectorLog(msg, output, table, value_size);
- }
- /*!
- this method is used when macro TTMATH_DEBUG_LOG is defined
- */
- template<class char_type, class ostream_type>
- void PrintLog(const char_type * msg, uint carry, ostream_type & output) const
- {
- PrintVectorLog(msg, output, table, value_size);
- output << " carry: " << carry << std::endl;
- }
- /*!
- this method returns the size of the table
- */
- uint Size() const
- {
- return value_size;
- }
- /*!
- this method sets zero
- */
- void SetZero()
- {
- // in the future here can be 'memset'
- for(uint i=0 ; i<value_size ; ++i)
- table[i] = 0;
- TTMATH_LOG("UInt::SetZero")
- }
- /*!
- this method sets one
- */
- void SetOne()
- {
- SetZero();
- table[0] = 1;
- TTMATH_LOG("UInt::SetOne")
- }
- /*!
- this method sets the max value which this class can hold
- (all bits will be one)
- */
- void SetMax()
- {
- for(uint i=0 ; i<value_size ; ++i)
- TTMATH_LOG("UInt::SetMax")
- }
- /*!
- this method sets the min value which this class can hold
- (for an unsigned integer value the zero is the smallest value)
- */
- void SetMin()
- {
- SetZero();
- TTMATH_LOG("UInt::SetMin")
- }
- /*!
- this method swappes this for an argument
- */
- void Swap(UInt<value_size> & ss2)
- {
- for(uint i=0 ; i<value_size ; ++i)
- {
- uint temp = table[i];
- table[i] = ss2.table[i];
- ss2.table[i] = temp;
- }
- }
- /*!
- this method copies the value stored in an another table
- (warning: first values in temp_table are the highest words -- it's different
- from our table)
- we copy as many words as it is possible
- if temp_table_len is bigger than value_size we'll try to round
- the lowest word from table depending on the last not used bit in temp_table
- (this rounding isn't a perfect rounding -- look at the description below)
- and if temp_table_len is smaller than value_size we'll clear the rest words
- in the table
- */
- void SetFromTable(const uint * temp_table, uint temp_table_len)
- {
- uint temp_table_index = 0;
- sint i; // 'i' with a sign
- for(i=value_size-1 ; i>=0 && temp_table_index<temp_table_len; --i, ++temp_table_index)
- table[i] = temp_table[ temp_table_index ];
- // rounding mantissa
- if( temp_table_index < temp_table_len )
- {
- if( (temp_table[temp_table_index] & TTMATH_UINT_HIGHEST_BIT) != 0 )
- {
- /*
- very simply rounding
- if the bit from not used last word from temp_table is set to one
- we're rouding the lowest word in the table
- in fact there should be a normal addition but
- we don't use Add() or AddTwoInts() because these methods
- can set a carry and then there'll be a small problem
- for optimization
- */
- if( table[0] != TTMATH_UINT_MAX_VALUE )
- ++table[0];
- }
- }
- // cleaning the rest of the mantissa
- for( ; i>=0 ; --i)
- table[i] = 0;
- TTMATH_LOG("UInt::SetFromTable")
- }
- /*!
- this method copies the value stored in an another table
- (warning: first values in temp_table are the highest words -- it's different
- from our table)
- ***this method is created only on a 64bit platform***
- we copy as many words as it is possible
- if temp_table_len is bigger than value_size we'll try to round
- the lowest word from table depending on the last not used bit in temp_table
- (this rounding isn't a perfect rounding -- look at the description below)
- and if temp_table_len is smaller than value_size we'll clear the rest words
- in the table
- warning: we're using 'temp_table' as a pointer at 32bit words
- */
- void SetFromTable(const unsigned int * temp_table, uint temp_table_len)
- {
- uint temp_table_index = 0;
- sint i; // 'i' with a sign
- for(i=value_size-1 ; i>=0 && temp_table_index<temp_table_len; --i, ++temp_table_index)
- {
- table[i] = uint(temp_table[ temp_table_index ]) << 32;
- ++temp_table_index;
- if( temp_table_index<temp_table_len )
- table[i] |= temp_table[ temp_table_index ];
- }
- // rounding mantissa
- if( temp_table_index < temp_table_len )
- {
- if( (temp_table[temp_table_index] & TTMATH_UINT_HIGHEST_BIT) != 0 )
- {
- /*
- very simply rounding
- if the bit from not used last word from temp_table is set to one
- we're rouding the lowest word in the table
- in fact there should be a normal addition but
- we don't use Add() or AddTwoInts() because these methods
- can set a carry and then there'll be a small problem
- for optimization
- */
- if( table[0] != TTMATH_UINT_MAX_VALUE )
- ++table[0];
- }
- }
- // cleaning the rest of the mantissa
- for( ; i >= 0 ; --i)
- table[i] = 0;
- TTMATH_LOG("UInt::SetFromTable")
- }
- /*!
- *
- * basic mathematic functions
- *
- */
- /*!
- this method adds one to the existing value
- */
- uint AddOne()
- {
- return AddInt(1);
- }
- /*!
- this method subtracts one from the existing value
- */
- uint SubOne()
- {
- return SubInt(1);
- }
- /*!
- an auxiliary method for moving bits into the left hand side
- this method moves only words
- */
- void RclMoveAllWords(uint & rest_bits, uint & last_c, uint bits, uint c)
- {
- rest_bits = bits % TTMATH_BITS_PER_UINT;
- uint all_words = bits / TTMATH_BITS_PER_UINT;
- uint mask = ( c ) ? TTMATH_UINT_MAX_VALUE : 0;
- if( all_words >= value_size )
- {
- if( all_words == value_size && rest_bits == 0 )
- last_c = table[0] & 1;
- // else: last_c is default set to 0
- // clearing
- for(uint i = 0 ; i<value_size ; ++i)
- table[i] = mask;
- rest_bits = 0;
- }
- else
- if( all_words > 0 )
- {
- // 0 < all_words < value_size
- sint first, second;
- last_c = table[value_size - all_words] & 1; // all_words is greater than 0
- // copying the first part of the value
- for(first = value_size-1, second=first-all_words ; second>=0 ; --first, --second)
- table[first] = table[second];
- // setting the rest to 'c'
- for( ; first>=0 ; --first )
- table[first] = mask;
- }
- TTMATH_LOG("UInt::RclMoveAllWords")
- }
- /*!
- moving all bits into the left side 'bits' times
- return value <- this <- C
- bits is from a range of <0, man * TTMATH_BITS_PER_UINT>
- or it can be even bigger then all bits will be set to 'c'
- the value c will be set into the lowest bits
- and the method returns state of the last moved bit
- */
- uint Rcl(uint bits, uint c=0)
- {
- uint last_c = 0;
- uint rest_bits = bits;
- if( bits == 0 )
- return 0;
- if( bits >= TTMATH_BITS_PER_UINT )
- RclMoveAllWords(rest_bits, last_c, bits, c);
- if( rest_bits == 0 )
- {
- TTMATH_LOG("UInt::Rcl")
- return last_c;
- }
- // rest_bits is from 1 to TTMATH_BITS_PER_UINT-1 now
- if( rest_bits == 1 )
- {
- last_c = Rcl2_one(c);
- }
- else if( rest_bits == 2 )
- {
- // performance tests showed that for rest_bits==2 it's better to use Rcl2_one twice instead of Rcl2(2,c)
- Rcl2_one(c);
- last_c = Rcl2_one(c);
- }
- else
- {
- last_c = Rcl2(rest_bits, c);
- }
- TTMATH_LOGC("UInt::Rcl", last_c)
- return last_c;
- }
- /*!
- an auxiliary method for moving bits into the right hand side
- this method moves only words
- */
- void RcrMoveAllWords(uint & rest_bits, uint & last_c, uint bits, uint c)
- {
- rest_bits = bits % TTMATH_BITS_PER_UINT;
- uint all_words = bits / TTMATH_BITS_PER_UINT;
- uint mask = ( c ) ? TTMATH_UINT_MAX_VALUE : 0;
- if( all_words >= value_size )
- {
- if( all_words == value_size && rest_bits == 0 )
- last_c = (table[value_size-1] & TTMATH_UINT_HIGHEST_BIT) ? 1 : 0;
- // else: last_c is default set to 0
- // clearing
- for(uint i = 0 ; i<value_size ; ++i)
- table[i] = mask;
- rest_bits = 0;
- }
- else if( all_words > 0 )
- {
- // 0 < all_words < value_size
- uint first, second;
- last_c = (table[all_words - 1] & TTMATH_UINT_HIGHEST_BIT) ? 1 : 0; // all_words is > 0
- // copying the first part of the value
- for(first=0, second=all_words ; second<value_size ; ++first, ++second)
- table[first] = table[second];
- // setting the rest to 'c'
- for( ; first<value_size ; ++first )
- table[first] = mask;
- }
- TTMATH_LOG("UInt::RcrMoveAllWords")
- }
- /*!
- moving all bits into the right side 'bits' times
- c -> this -> return value
- bits is from a range of <0, man * TTMATH_BITS_PER_UINT>
- or it can be even bigger then all bits will be set to 'c'
- the value c will be set into the highest bits
- and the method returns state of the last moved bit
- */
- uint Rcr(uint bits, uint c=0)
- {
- uint last_c = 0;
- uint rest_bits = bits;
- if( bits == 0 )
- return 0;
- if( bits >= TTMATH_BITS_PER_UINT )
- RcrMoveAllWords(rest_bits, last_c, bits, c);
- if( rest_bits == 0 )
- {
- TTMATH_LOG("UInt::Rcr")
- return last_c;
- }
- // rest_bits is from 1 to TTMATH_BITS_PER_UINT-1 now
- if( rest_bits == 1 )
- {
- last_c = Rcr2_one(c);
- }
- else if( rest_bits == 2 )
- {
- // performance tests showed that for rest_bits==2 it's better to use Rcr2_one twice instead of Rcr2(2,c)
- Rcr2_one(c);
- last_c = Rcr2_one(c);
- }
- else
- {
- last_c = Rcr2(rest_bits, c);
- }
- TTMATH_LOGC("UInt::Rcr", last_c)
- return last_c;
- }
- /*!
- this method moves all bits into the left side
- (it returns value how many bits have been moved)
- */
- uint CompensationToLeft()
- {
- uint moving = 0;
- // a - index a last word which is different from zero
- sint a;
- for(a=value_size-1 ; a>=0 && table[a]==0 ; --a);
- if( a < 0 )
- return moving; // all words in table have zero
- if( a != value_size-1 )
- {
- moving += ( value_size-1 - a ) * TTMATH_BITS_PER_UINT;
- // moving all words
- sint i;
- for(i=value_size-1 ; a>=0 ; --i, --a)
- table[i] = table[a];
- // setting the rest word to zero
- for(; i>=0 ; --i)
- table[i] = 0;
- }
- uint moving2 = FindLeadingBitInWord( table[value_size-1] );
- // moving2 is different from -1 because the value table[value_size-1]
- // is not zero
- moving2 = TTMATH_BITS_PER_UINT - moving2 - 1;
- Rcl(moving2);
- TTMATH_LOG("UInt::CompensationToLeft")
- return moving + moving2;
- }
- /*!
- this method looks for the highest set bit
- result:
- if 'this' is not zero:
- return value - true
- 'table_id' - the index of a word <0..value_size-1>
- 'index' - the index of this set bit in the word <0..TTMATH_BITS_PER_UINT)
- if 'this' is zero:
- return value - false
- both 'table_id' and 'index' are zero
- */
- bool FindLeadingBit(uint & table_id, uint & index) const
- {
- for(table_id=value_size-1 ; table_id!=0 && table[table_id]==0 ; --table_id);
- if( table_id==0 && table[table_id]==0 )
- {
- // is zero
- index = 0;
- return false;
- }
- // table[table_id] is different from 0
- index = FindLeadingBitInWord( table[table_id] );
- return true;
- }
- /*!
- this method looks for the smallest set bit
- result:
- if 'this' is not zero:
- return value - true
- 'table_id' - the index of a word <0..value_size-1>
- 'index' - the index of this set bit in the word <0..TTMATH_BITS_PER_UINT)
- if 'this' is zero:
- return value - false
- both 'table_id' and 'index' are zero
- */
- bool FindLowestBit(uint & table_id, uint & index) const
- {
- for(table_id=0 ; table_id<value_size && table[table_id]==0 ; ++table_id);
- if( table_id >= value_size )
- {
- // is zero
- index = 0;
- table_id = 0;
- return false;
- }
- // table[table_id] is different from 0
- index = FindLowestBitInWord( table[table_id] );
- return true;
- }
- /*!
- getting the 'bit_index' bit
- bit_index bigger or equal zero
- */
- uint GetBit(uint bit_index) const
- {
- TTMATH_ASSERT( bit_index < value_size * TTMATH_BITS_PER_UINT )
- uint index = bit_index / TTMATH_BITS_PER_UINT;
- uint bit = bit_index % TTMATH_BITS_PER_UINT;
- uint temp = table[index];
- uint res = SetBitInWord(temp, bit);
- return res;
- }
- /*!
- setting the 'bit_index' bit
- and returning the last state of the bit
- bit_index bigger or equal zero
- */
- uint SetBit(uint bit_index)
- {
- TTMATH_ASSERT( bit_index < value_size * TTMATH_BITS_PER_UINT )
- uint index = bit_index / TTMATH_BITS_PER_UINT;
- uint bit = bit_index % TTMATH_BITS_PER_UINT;
- uint res = SetBitInWord(table[index], bit);
- TTMATH_LOG("UInt::SetBit")
- return res;
- }
- /*!
- this method performs a bitwise operation AND
- */
- void BitAnd(const UInt<value_size> & ss2)
- {
- for(uint x=0 ; x<value_size ; ++x)
- table[x] &= ss2.table[x];
- TTMATH_LOG("UInt::BitAnd")
- }
- /*!
- this method performs a bitwise operation OR
- */
- void BitOr(const UInt<value_size> & ss2)
- {
- for(uint x=0 ; x<value_size ; ++x)
- table[x] |= ss2.table[x];
- TTMATH_LOG("UInt::BitOr")
- }
- /*!
- this method performs a bitwise operation XOR
- */
- void BitXor(const UInt<value_size> & ss2)
- {
- for(uint x=0 ; x<value_size ; ++x)
- table[x] ^= ss2.table[x];
- TTMATH_LOG("UInt::BitXor")
- }
- /*!
- this method performs a bitwise operation NOT
- */
- void BitNot()
- {
- for(uint x=0 ; x<value_size ; ++x)
- table[x] = ~table[x];
- TTMATH_LOG("UInt::BitNot")
- }
- /*!
- this method performs a bitwise operation NOT but only
- on the range of <0, leading_bit>
- for example:
- BitNot2(8) = BitNot2( 1000(bin) ) = 111(bin) = 7
- */
- void BitNot2()
- {
- uint table_id, index;
- if( FindLeadingBit(table_id, index) )
- {
- for(uint x=0 ; x<table_id ; ++x)
- table[x] = ~table[x];
- uint mask = TTMATH_UINT_MAX_VALUE;
- uint shift = TTMATH_BITS_PER_UINT - index - 1;
- if(shift)
- mask >>= shift;
- table[table_id] ^= mask;
- }
- else
- table[0] = 1;
- TTMATH_LOG("UInt::BitNot2")
- }
- /*!
- *
- * Multiplication
- *
- *
- */
- /*!
- multiplication: this = this * ss2
- it can return a carry
- */
- uint MulInt(uint ss2)
- {
- uint r1, r2, x1;
- uint c = 0;
- UInt<value_size> u(*this);
- SetZero();
- if( ss2 == 0 )
- {
- TTMATH_LOGC("UInt::MulInt(uint)", 0)
- return 0;
- }
- for(x1=0 ; x1<value_size-1 ; ++x1)
- {
- MulTwoWords(u.table[x1], ss2, &r2, &r1);
- c += AddTwoInts(r2,r1,x1);
- }
- // x1 = value_size-1 (last word)
- MulTwoWords(u.table[x1], ss2, &r2, &r1);
- c += (r2!=0) ? 1 : 0;
- c += AddInt(r1, x1);
- TTMATH_LOGC("UInt::MulInt(uint)", c)
- return (c==0)? 0 : 1;
- }
- /*!
- multiplication: result = this * ss2
- we're using this method only when result_size is greater than value_size
- if so there will not be a carry
- */
- template<uint result_size>
- void MulInt(uint ss2, UInt<result_size> & result) const
- {
- TTMATH_ASSERT( result_size > value_size )
- uint r2,r1;
- uint x1size=value_size;
- uint x1start=0;
- result.SetZero();
- if( ss2 == 0 )
- {
- TTMATH_VECTOR_LOG("UInt::MulInt(uint, UInt<>)", result.table, result_size)
- return;
- }
- if( value_size > 2 )
- {
- // if the value_size is smaller than or equal to 2
- // there is no sense to set x1size and x1start to another values
- for(x1size=value_size ; x1size>0 && table[x1size-1]==0 ; --x1size);
- if( x1size == 0 )
- {
- TTMATH_VECTOR_LOG("UInt::MulInt(uint, UInt<>)", result.table, result_size)
- return;
- }
- for(x1start=0 ; x1start<x1size && table[x1start]==0 ; ++x1start);
- }
- for(uint x1=x1start ; x1<x1size ; ++x1)
- {
- MulTwoWords(table[x1], ss2, &r2, &r1 );
- result.AddTwoInts(r2,r1,x1);
- }
- TTMATH_VECTOR_LOG("UInt::MulInt(uint, UInt<>)", result.table, result_size)
- return;
- }
- /*!
- the multiplication 'this' = 'this' * ss2
- algorithm: 100 - means automatically choose the fastest algorithm
- */
- uint Mul(const UInt<value_size> & ss2, uint algorithm = 100)
- {
- switch( algorithm )
- {
- case 1:
- return Mul1(ss2);
- case 2:
- return Mul2(ss2);
- case 3:
- return Mul3(ss2);
- case 100:
- default:
- return MulFastest(ss2);
- }
- }
- /*!
- the multiplication 'result' = 'this' * ss2
- since the 'result' is twice bigger than 'this' and 'ss2'
- this method never returns a carry
- algorithm: 100 - means automatically choose the fastest algorithm
- */
- void MulBig(const UInt<value_size> & ss2,
- UInt<value_size*2> & result,
- uint algorithm = 100)
- {
- switch( algorithm )
- {
- case 1:
- return Mul1Big(ss2, result);
- case 2:
- return Mul2Big(ss2, result);
- case 3:
- return Mul3Big(ss2, result);
- case 100:
- default:
- return MulFastestBig(ss2, result);
- }
- }
- /*!
- the first version of the multiplication algorithm
- */
- /*!
- multiplication: this = this * ss2
- it returns carry if it has been
- */
- uint Mul1Ref(const UInt<value_size> & ss2)
- {
- UInt<value_size> ss1( *this );
- SetZero();
- for(uint i=0; i < value_size*TTMATH_BITS_PER_UINT ; ++i)
- {
- if( Add(*this) )
- {
- TTMATH_LOGC("UInt::Mul1", 1)
- return 1;
- }
- if( ss1.Rcl(1) )
- if( Add(ss2) )
- {
- TTMATH_LOGC("UInt::Mul1", 1)
- return 1;
- }
- }
- TTMATH_LOGC("UInt::Mul1", 0)
- return 0;
- }
- /*!
- multiplication: this = this * ss2
- can return carry
- */
- uint Mul1(const UInt<value_size> & ss2)
- {
- if( this == &ss2 )
- {
- UInt<value_size> copy_ss2(ss2);
- return Mul1Ref(copy_ss2);
- }
- else
- {
- return Mul1Ref(ss2);
- }
- }
- /*!
- multiplication: result = this * ss2
- result is twice bigger than 'this' and 'ss2'
- this method never returns carry
- */
- void Mul1Big(const UInt<value_size> & ss2_, UInt<value_size*2> & result)
- {
- UInt<value_size*2> ss2;
- uint i;
- // copying *this into result and ss2_ into ss2
- for(i=0 ; i<value_size ; ++i)
- {
- result.table[i] = table[i];
- ss2.table[i] = ss2_.table[i];
- }
- // cleaning the highest bytes in result and ss2
- for( ; i < value_size*2 ; ++i)
- {
- result.table[i] = 0;
- ss2.table[i] = 0;
- }
- // multiply
- // (there will not be a carry)
- result.Mul1( ss2 );
- TTMATH_LOG("UInt::Mul1Big")
- }
- /*!
- the second version of the multiplication algorithm
- this algorithm is similar to the 'schoolbook method' which is done by hand
- */
- /*!
- multiplication: this = this * ss2
- it returns carry if it has been
- */
- uint Mul2(const UInt<value_size> & ss2)
- {
- UInt<value_size*2> result;
- uint i, c = 0;
- Mul2Big(ss2, result);
- // copying result
- for(i=0 ; i<value_size ; ++i)
- table[i] = result.table[i];
- // testing carry
- for( ; i<value_size*2 ; ++i)
- if( result.table[i] != 0 )
- {
- c = 1;
- break;
- }
- TTMATH_LOGC("UInt::Mul2", c)
- return c;
- }
- /*!
- multiplication: result = this * ss2
- result is twice bigger than this and ss2
- this method never returns carry
- */
- void Mul2Big(const UInt<value_size> & ss2, UInt<value_size*2> & result)
- {
- Mul2Big2<value_size>(table, ss2.table, result);
- TTMATH_LOG("UInt::Mul2Big")
- }
- /*!
- an auxiliary method for calculating the multiplication
- arguments we're taking as pointers (this is to improve the Mul3Big2()- avoiding
- unnecessary copying objects), the result should be taken as a pointer too,
- but at the moment there is no method AddTwoInts() which can operate on pointers
- */
- template<uint ss_size>
- void Mul2Big2(const uint * ss1, const uint * ss2, UInt<ss_size*2> & result)
- {
- uint x1size = ss_size, x2size = ss_size;
- uint x1start = 0, x2start = 0;
- if( ss_size > 2 )
- {
- // if the ss_size is smaller than or equal to 2
- // there is no sense to set x1size (and others) to another values
- for(x1size=ss_size ; x1size>0 && ss1[x1size-1]==0 ; --x1size);
- for(x2size=ss_size ; x2size>0 && ss2[x2size-1]==0 ; --x2size);
- for(x1start=0 ; x1start<x1size && ss1[x1start]==0 ; ++x1start);
- for(x2start=0 ; x2start<x2size && ss2[x2start]==0 ; ++x2start);
- }
- Mul2Big3<ss_size>(ss1, ss2, result, x1start, x1size, x2start, x2size);
- }
- /*!
- an auxiliary method for calculating the multiplication
- */
- template<uint ss_size>
- void Mul2Big3(const uint * ss1, const uint * ss2, UInt<ss_size*2> & result, uint x1start, uint x1size, uint x2start, uint x2size)
- {
- uint r2, r1;
- result.SetZero();
- if( x1size==0 || x2size==0 )
- return;
- for(uint x1=x1start ; x1<x1size ; ++x1)
- {
- for(uint x2=x2start ; x2<x2size ; ++x2)
- {
- MulTwoWords(ss1[x1], ss2[x2], &r2, &r1);
- result.AddTwoInts(r2, r1, x2+x1);
- // here will never be a carry
- }
- }
- }
- /*!
- multiplication: this = this * ss2
- This is Karatsuba Multiplication algorithm, we're using it when value_size is greater than
- or equal to TTMATH_USE_KARATSUBA_MULTIPLICATION_FROM_SIZE macro (defined in ttmathuint.h).
- If value_size is smaller then we're using Mul2Big() instead.
- Karatsuba multiplication:
- Assume we have:
- this = x = x1*B^m + x0
- ss2 = y = y1*B^m + y0
- where x0 and y0 are less than B^m
- the product from multiplication we can show as:
- x*y = (x1*B^m + x0)(y1*B^m + y0) = z2*B^(2m) + z1*B^m + z0
- where
- z2 = x1*y1
- z1 = x1*y0 + x0*y1
- z0 = x0*y0
- this is standard schoolbook algorithm with O(n^2), Karatsuba observed that z1 can be given in other form:
- z1 = (x1 + x0)*(y1 + y0) - z2 - z0 / z1 = (x1*y1 + x1*y0 + x0*y1 + x0*y0) - x1*y1 - x0*y0 = x1*y0 + x0*y1 /
- and to calculate the multiplication we need only three multiplications (with some additions and subtractions)
- Our objects 'this' and 'ss2' we divide into two parts and by using recurrence we calculate the multiplication.
- Karatsuba multiplication has O( n^(ln(3)/ln(2)) )
- */
- uint Mul3(const UInt<value_size> & ss2)
- {
- UInt<value_size*2> result;
- uint i, c = 0;
- Mul3Big(ss2, result);
- // copying result
- for(i=0 ; i<value_size ; ++i)
- table[i] = result.table[i];
- // testing carry
- for( ; i<value_size*2 ; ++i)
- if( result.table[i] != 0 )
- {
- c = 1;
- break;
- }
- TTMATH_LOGC("UInt::Mul3", c)
- return c;
- }
- /*!
- multiplication: result = this * ss2
- result is twice bigger than this and ss2,
- this method never returns carry,
- (Karatsuba multiplication)
- */
- void Mul3Big(const UInt<value_size> & ss2, UInt<value_size*2> & result)
- {
- Mul3Big2<value_size>(table, ss2.table, result.table);
- TTMATH_LOG("UInt::Mul3Big")
- }
- /*!
- an auxiliary method for calculating the Karatsuba multiplication
- result_size is equal ss_size*2
- */
- template<uint ss_size>
- void Mul3Big2(const uint * ss1, const uint * ss2, uint * result)
- {
- const uint * x1, * x0, * y1, * y0;
- {
- UInt<ss_size*2> res;
- Mul2Big2<ss_size>(ss1, ss2, res);
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wtautological-compare"
- for(uint i=0 ; i<ss_size*2 ; ++i)
- result[i] = res.table[i];
-#ifdef __clang__
-#pragma clang diagnostic pop
- return;
- }
- else
- if( ss_size == 1 )
- {
- return MulTwoWords(*ss1, *ss2, &result[1], &result[0]);
- }
- if( (ss_size & 1) == 1 )
- {
- // ss_size is odd
- x0 = ss1;
- y0 = ss2;
- x1 = ss1 + ss_size / 2 + 1;
- y1 = ss2 + ss_size / 2 + 1;
- // the second vectors (x1 and y1) are smaller about one from the first ones (x0 and y0)
- Mul3Big3<ss_size/2 + 1, ss_size/2, ss_size*2>(x1, x0, y1, y0, result);
- }
- else
- {
- // ss_size is even
- x0 = ss1;
- y0 = ss2;
- x1 = ss1 + ss_size / 2;
- y1 = ss2 + ss_size / 2;
- // all four vectors (x0 x1 y0 y1) are equal in size
- Mul3Big3<ss_size/2, ss_size/2, ss_size*2>(x1, x0, y1, y0, result);
- }
- }
-#ifdef _MSC_VER
-#pragma warning (disable : 4717)
-//warning C4717: recursive on all control paths, function will cause runtime stack overflow
-//we have the stop point in Mul3Big2() method
- /*!
- an auxiliary method for calculating the Karatsuba multiplication
- x = x1*B^m + x0
- y = y1*B^m + y0
- first_size - is the size of vectors: x0 and y0
- second_size - is the size of vectors: x1 and y1 (can be either equal first_size or smaller about one from first_size)
- x*y = (x1*B^m + x0)(y1*B^m + y0) = z2*B^(2m) + z1*B^m + z0
- where
- z0 = x0*y0
- z2 = x1*y1
- z1 = (x1 + x0)*(y1 + y0) - z2 - z0
- */
- template<uint first_size, uint second_size, uint result_size>
- void Mul3Big3(const uint * x1, const uint * x0, const uint * y1, const uint * y0, uint * result)
- {
- uint i, c, xc, yc;
- UInt<first_size> temp, temp2;
- UInt<first_size*3> z1;
- // z0 and z2 we store directly in the result (we don't use any temporary variables)
- Mul3Big2<first_size>(x0, y0, result); // z0
- Mul3Big2<second_size>(x1, y1, result+first_size*2); // z2
- // now we calculate z1
- // temp = (x0 + x1)
- // temp2 = (y0 + y1)
- // we're using temp and temp2 with UInt<first_size>, although there can be a carry but
- // we simple remember it in xc and yc (xc and yc can be either 0 or 1),
- // and (x0 + x1)*(y0 + y1) we calculate in this way (schoolbook algorithm):
- //
- // xc | temp
- // yc | temp2
- // --------------------
- // (temp * temp2)
- // xc*temp2 |
- // yc*temp |
- // xc*yc |
- // ---------- z1 --------
- //
- // and the result is never larger in size than 3*first_size
- xc = AddVector(x0, x1, first_size, second_size, temp.table);
- yc = AddVector(y0, y1, first_size, second_size, temp2.table);
- Mul3Big2<first_size>(temp.table, temp2.table, z1.table);
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wtautological-compare"
- // clearing the rest of z1
- for(i=first_size*2 ; i<first_size*3 ; ++i)
- z1.table[i] = 0;
-#ifdef __clang__
-#pragma clang diagnostic pop
- if( xc )
- {
- c = AddVector(z1.table+first_size, temp2.table, first_size*3-first_size, first_size, z1.table+first_size);
- }
- if( yc )
- {
- c = AddVector(z1.table+first_size, temp.table, first_size*3-first_size, first_size, z1.table+first_size);
- }
- if( xc && yc )
- {
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wtautological-compare"
- for( i=first_size*2 ; i<first_size*3 ; ++i )
- if( ++z1.table[i] != 0 )
- break; // break if there was no carry
-#ifdef __clang__
-#pragma clang diagnostic pop
- }
- // z1 = z1 - z2
- c = SubVector(z1.table, result+first_size*2, first_size*3, second_size*2, z1.table);
- // z1 = z1 - z0
- c = SubVector(z1.table, result, first_size*3, first_size*2, z1.table);
- // here we've calculated the z1
- // now we're adding it to the result
- if( first_size > second_size )
- {
- uint z1_size = result_size - first_size;
- TTMATH_ASSERT( z1_size <= first_size*3 )
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wtautological-compare"
- for(i=z1_size ; i<first_size*3 ; ++i)
- {
- TTMATH_ASSERT( z1.table[i] == 0 )
- }
-#ifdef __clang__
-#pragma clang diagnostic pop
- c = AddVector(result+first_size, z1.table, result_size-first_size, z1_size, result+first_size);
- }
- else
- {
- c = AddVector(result+first_size, z1.table, result_size-first_size, first_size*3, result+first_size);
- }
- }
-#ifdef _MSC_VER
-#pragma warning (default : 4717)
- /*!
- multiplication this = this * ss2
- */
- uint MulFastest(const UInt<value_size> & ss2)
- {
- UInt<value_size*2> result;
- uint i, c = 0;
- MulFastestBig(ss2, result);
- // copying result
- for(i=0 ; i<value_size ; ++i)
- table[i] = result.table[i];
- // testing carry
- for( ; i<value_size*2 ; ++i)
- if( result.table[i] != 0 )
- {
- c = 1;
- break;
- }
- TTMATH_LOGC("UInt::MulFastest", c)
- return c;
- }
- /*!
- multiplication result = this * ss2
- this method is trying to select the fastest algorithm
- (in the future this method can be improved)
- */
- void MulFastestBig(const UInt<value_size> & ss2, UInt<value_size*2> & result)
- {
- return Mul2Big(ss2, result);
- uint x1size = value_size, x2size = value_size;
- uint x1start = 0, x2start = 0;
- for(x1size=value_size ; x1size>0 && table[x1size-1]==0 ; --x1size);
- for(x2size=value_size ; x2size>0 && ss2.table[x2size-1]==0 ; --x2size);
- if( x1size==0 || x2size==0 )
- {
- // either 'this' or 'ss2' is equal zero - the result is zero too
- result.SetZero();
- return;
- }
- for(x1start=0 ; x1start<x1size && table[x1start]==0 ; ++x1start);
- for(x2start=0 ; x2start<x2size && ss2.table[x2start]==0 ; ++x2start);
- uint distancex1 = x1size - x1start;
- uint distancex2 = x2size - x2start;
- if( distancex1 < 3 || distancex2 < 3 )
- // either 'this' or 'ss2' have only 2 (or 1) items different from zero (side by side)
- // (this condition in the future can be improved)
- return Mul2Big3<value_size>(table, ss2.table, result, x1start, x1size, x2start, x2size);
- // Karatsuba multiplication
- Mul3Big(ss2, result);
- TTMATH_LOG("UInt::MulFastestBig")
- }
- /*!
- *
- * Division
- *
- *
- */
- /*!
- division by one unsigned word
- returns 1 when divisor is zero
- */
- uint DivInt(uint divisor, uint * remainder = 0)
- {
- if( divisor == 0 )
- {
- if( remainder )
- *remainder = 0; // this is for convenience, without it the compiler can report that 'remainder' is uninitialized
- TTMATH_LOG("UInt::DivInt")
- return 1;
- }
- if( divisor == 1 )
- {
- if( remainder )
- *remainder = 0;
- TTMATH_LOG("UInt::DivInt")
- return 0;
- }
- UInt<value_size> dividend(*this);
- SetZero();
- sint i; // i must be with a sign
- uint r = 0;
- // we're looking for the last word in ss1
- for(i=value_size-1 ; i>0 && dividend.table[i]==0 ; --i);
- for( ; i>=0 ; --i)
- DivTwoWords(r, dividend.table[i], divisor, &table[i], &r);
- if( remainder )
- *remainder = r;
- TTMATH_LOG("UInt::DivInt")
- return 0;
- }
- uint DivInt(uint divisor, uint & remainder)
- {
- return DivInt(divisor, &remainder);
- }
- /*!
- division this = this / ss2
- return values:
- 0 - ok
- 1 - division by zero
- 'this' will be the quotient
- 'remainder' - remainder
- */
- uint Div( const UInt<value_size> & divisor,
- UInt<value_size> * remainder = 0,
- uint algorithm = 3)
- {
- switch( algorithm )
- {
- case 1:
- return Div1(divisor, remainder);
- case 2:
- return Div2(divisor, remainder);
- case 3:
- default:
- return Div3(divisor, remainder);
- }
- }
- uint Div(const UInt<value_size> & divisor, UInt<value_size> & remainder, uint algorithm = 3)
- {
- return Div(divisor, &remainder, algorithm);
- }
- /*!
- return values:
- 0 - none has to be done
- 1 - division by zero
- 2 - division should be made
- */
- uint Div_StandardTest( const UInt<value_size> & v,
- uint & m, uint & n,
- UInt<value_size> * remainder = 0)
- {
- switch( Div_CalculatingSize(v, m, n) )
- {
- case 4: // 'this' is equal v
- if( remainder )
- remainder->SetZero();
- SetOne();
- TTMATH_LOG("UInt::Div_StandardTest")
- return 0;
- case 3: // 'this' is smaller than v
- if( remainder )
- *remainder = *this;
- SetZero();
- TTMATH_LOG("UInt::Div_StandardTest")
- return 0;
- case 2: // 'this' is zero
- if( remainder )
- remainder->SetZero();
- SetZero();
- TTMATH_LOG("UInt::Div_StandardTest")
- return 0;
- case 1: // v is zero
- TTMATH_LOG("UInt::Div_StandardTest")
- return 1;
- }
- TTMATH_LOG("UInt::Div_StandardTest")
- return 2;
- }
- /*!
- return values:
- 0 - ok
- 'm' - is the index (from 0) of last non-zero word in table ('this')
- 'n' - is the index (from 0) of last non-zero word in v.table
- 1 - v is zero
- 2 - 'this' is zero
- 3 - 'this' is smaller than v
- 4 - 'this' is equal v
- if the return value is different than zero the 'm' and 'n' are undefined
- */
- uint Div_CalculatingSize(const UInt<value_size> & v, uint & m, uint & n)
- {
- m = n = value_size-1;
- for( ; n!=0 && v.table[n]==0 ; --n);
- if( n==0 && v.table[n]==0 )
- return 1;
- for( ; m!=0 && table[m]==0 ; --m);
- if( m==0 && table[m]==0 )
- return 2;
- if( m < n )
- return 3;
- else
- if( m == n )
- {
- uint i;
- for(i = n ; i!=0 && table[i]==v.table[i] ; --i);
- if( table[i] < v.table[i] )
- return 3;
- else
- if (table[i] == v.table[i] )
- return 4;
- }
- return 0;
- }
- /*!
- the first division algorithm
- radix 2
- */
- uint Div1(const UInt<value_size> & divisor, UInt<value_size> * remainder = 0)
- {
- uint m,n, test;
- test = Div_StandardTest(divisor, m, n, remainder);
- if( test < 2 )
- return test;
- if( !remainder )
- {
- UInt<value_size> rem;
- return Div1_Calculate(divisor, rem);
- }
- return Div1_Calculate(divisor, *remainder);
- }
- /*!
- the first division algorithm
- radix 2
- */
- uint Div1(const UInt<value_size> & divisor, UInt<value_size> & remainder)
- {
- return Div1(divisor, &remainder);
- }
- uint Div1_Calculate(const UInt<value_size> & divisor, UInt<value_size> & rest)
- {
- if( this == &divisor )
- {
- UInt<value_size> divisor_copy(divisor);
- return Div1_CalculateRef(divisor_copy, rest);
- }
- else
- {
- return Div1_CalculateRef(divisor, rest);
- }
- }
- uint Div1_CalculateRef(const UInt<value_size> & divisor, UInt<value_size> & rest)
- {
- sint loop;
- sint c;
- rest.SetZero();
- loop = value_size * TTMATH_BITS_PER_UINT;
- c = 0;
- div_a:
- c = Rcl(1, c);
- c = rest.Add(rest,c);
- c = rest.Sub(divisor,c);
- c = !c;
- if(!c)
- goto div_d;
- div_b:
- --loop;
- if(loop)
- goto div_a;
- c = Rcl(1, c);
- TTMATH_LOG("UInt::Div1_Calculate")
- return 0;
- div_c:
- c = Rcl(1, c);
- c = rest.Add(rest,c);
- c = rest.Add(divisor);
- if(c)
- goto div_b;
- div_d:
- --loop;
- if(loop)
- goto div_c;
- c = Rcl(1, c);
- c = rest.Add(divisor);
- TTMATH_LOG("UInt::Div1_Calculate")
- return 0;
- }
- /*!
- the second division algorithm
- return values:
- 0 - ok
- 1 - division by zero
- */
- uint Div2(const UInt<value_size> & divisor, UInt<value_size> * remainder = 0)
- {
- if( this == &divisor )
- {
- UInt<value_size> divisor_copy(divisor);
- return Div2Ref(divisor_copy, remainder);
- }
- else
- {
- return Div2Ref(divisor, remainder);
- }
- }
- /*!
- the second division algorithm
- return values:
- 0 - ok
- 1 - division by zero
- */
- uint Div2(const UInt<value_size> & divisor, UInt<value_size> & remainder)
- {
- return Div2(divisor, &remainder);
- }
- /*!
- the second division algorithm
- return values:
- 0 - ok
- 1 - division by zero
- */
- uint Div2Ref(const UInt<value_size> & divisor, UInt<value_size> * remainder = 0)
- {
- uint bits_diff;
- uint status = Div2_Calculate(divisor, remainder, bits_diff);
- if( status < 2 )
- return status;
- if( CmpBiggerEqual(divisor) )
- {
- Div2(divisor, remainder);
- SetBit(bits_diff);
- }
- else
- {
- if( remainder )
- *remainder = *this;
- SetZero();
- SetBit(bits_diff);
- }
- TTMATH_LOG("UInt::Div2")
- return 0;
- }
- /*!
- return values:
- 0 - we've calculated the division
- 1 - division by zero
- 2 - we have to still calculate
- */
- uint Div2_Calculate(const UInt<value_size> & divisor, UInt<value_size> * remainder,
- uint & bits_diff)
- {
- uint table_id, index;
- uint divisor_table_id, divisor_index;
- uint status = Div2_FindLeadingBitsAndCheck( divisor, remainder,
- table_id, index,
- divisor_table_id, divisor_index);
- if( status < 2 )
- {
- TTMATH_LOG("UInt::Div2_Calculate")
- return status;
- }
- // here we know that 'this' is greater than divisor
- // then 'index' is greater or equal 'divisor_index'
- bits_diff = index - divisor_index;
- UInt<value_size> divisor_copy(divisor);
- divisor_copy.Rcl(bits_diff, 0);
- if( CmpSmaller(divisor_copy, table_id) )
- {
- divisor_copy.Rcr(1);
- --bits_diff;
- }
- Sub(divisor_copy, 0);
- TTMATH_LOG("UInt::Div2_Calculate")
- return 2;
- }
- /*!
- return values:
- 0 - we've calculated the division
- 1 - division by zero
- 2 - we have to still calculate
- */
- uint Div2_FindLeadingBitsAndCheck( const UInt<value_size> & divisor,
- UInt<value_size> * remainder,
- uint & table_id, uint & index,
- uint & divisor_table_id, uint & divisor_index)
- {
- if( !divisor.FindLeadingBit(divisor_table_id, divisor_index) )
- {
- // division by zero
- TTMATH_LOG("UInt::Div2_FindLeadingBitsAndCheck")
- return 1;
- }
- if( !FindLeadingBit(table_id, index) )
- {
- // zero is divided by something
- SetZero();
- if( remainder )
- remainder->SetZero();
- TTMATH_LOG("UInt::Div2_FindLeadingBitsAndCheck")
- return 0;
- }
- divisor_index += divisor_table_id * TTMATH_BITS_PER_UINT;
- index += table_id * TTMATH_BITS_PER_UINT;
- if( divisor_table_id == 0 )
- {
- // dividor has only one 32-bit word
- uint r;
- DivInt(divisor.table[0], &r);
- if( remainder )
- {
- remainder->SetZero();
- remainder->table[0] = r;
- }
- TTMATH_LOG("UInt::Div2_FindLeadingBitsAndCheck")
- return 0;
- }
- if( Div2_DivisorGreaterOrEqual( divisor, remainder,
- table_id, index,
- divisor_index) )
- {
- TTMATH_LOG("UInt::Div2_FindLeadingBitsAndCheck")
- return 0;
- }
- TTMATH_LOG("UInt::Div2_FindLeadingBitsAndCheck")
- return 2;
- }
- /*!
- return values:
- true if divisor is equal or greater than 'this'
- */
- bool Div2_DivisorGreaterOrEqual( const UInt<value_size> & divisor,
- UInt<value_size> * remainder,
- uint table_id, uint index,
- uint divisor_index )
- {
- if( divisor_index > index )
- {
- // divisor is greater than this
- if( remainder )
- *remainder = *this;
- SetZero();
- TTMATH_LOG("UInt::Div2_DivisorGreaterOrEqual")
- return true;
- }
- if( divisor_index == index )
- {
- // table_id == divisor_table_id as well
- uint i;
- for(i = table_id ; i!=0 && table[i]==divisor.table[i] ; --i);
- if( table[i] < divisor.table[i] )
- {
- // divisor is greater than 'this'
- if( remainder )
- *remainder = *this;
- SetZero();
- TTMATH_LOG("UInt::Div2_DivisorGreaterOrEqual")
- return true;
- }
- else
- if( table[i] == divisor.table[i] )
- {
- // divisor is equal 'this'
- if( remainder )
- remainder->SetZero();
- SetOne();
- TTMATH_LOG("UInt::Div2_DivisorGreaterOrEqual")
- return true;
- }
- }
- TTMATH_LOG("UInt::Div2_DivisorGreaterOrEqual")
- return false;
- }
- /*!
- the third division algorithm
- */
- uint Div3(const UInt<value_size> & ss2, UInt<value_size> * remainder = 0)
- {
- if( this == &ss2 )
- {
- UInt<value_size> copy_ss2(ss2);
- return Div3Ref(copy_ss2, remainder);
- }
- else
- {
- return Div3Ref(ss2, remainder);
- }
- }
- /*!
- the third division algorithm
- */
- uint Div3(const UInt<value_size> & ss2, UInt<value_size> & remainder)
- {
- return Div3(ss2, &remainder);
- }
- /*!
- the third division algorithm
- this algorithm is described in the following book:
- "The art of computer programming 2" (4.3.1 page 272)
- Donald E. Knuth
- !! give the description here (from the book)
- */
- uint Div3Ref(const UInt<value_size> & v, UInt<value_size> * remainder = 0)
- {
- uint m,n, test;
- test = Div_StandardTest(v, m, n, remainder);
- if( test < 2 )
- return test;
- if( n == 0 )
- {
- uint r;
- DivInt( v.table[0], &r );
- if( remainder )
- {
- remainder->SetZero();
- remainder->table[0] = r;
- }
- TTMATH_LOG("UInt::Div3")
- return 0;
- }
- // we can only use the third division algorithm when
- // the divisor is greater or equal 2^32 (has more than one 32-bit word)
- ++m;
- ++n;
- m = m - n;
- Div3_Division(v, remainder, m, n);
- TTMATH_LOG("UInt::Div3")
- return 0;
- }
- void Div3_Division(UInt<value_size> v, UInt<value_size> * remainder, uint m, uint n)
- {
- UInt<value_size+1> uu, vv;
- UInt<value_size> q;
- uint d, u_value_size, u0, u1, u2, v1, v0, j=m;
- u_value_size = Div3_Normalize(v, n, d);
- if( j+n == value_size )
- u2 = u_value_size;
- else
- u2 = table[j+n];
- Div3_MakeBiggerV(v, vv);
- for(uint i = j+1 ; i<value_size ; ++i)
- q.table[i] = 0;
- while( true )
- {
- u1 = table[j+n-1];
- u0 = table[j+n-2];
- v1 = v.table[n-1];
- v0 = v.table[n-2];
- uint qp = Div3_Calculate(u2,u1,u0, v1,v0);
- Div3_MakeNewU(uu, j, n, u2);
- Div3_MultiplySubtract(uu, vv, qp);
- Div3_CopyNewU(uu, j, n);
- q.table[j] = qp;
- // the next loop
- if( j-- == 0 )
- break;
- u2 = table[j+n];
- }
- if( remainder )
- Div3_Unnormalize(remainder, n, d);
- *this = q;
- TTMATH_LOG("UInt::Div3_Division")
- }
- void Div3_MakeNewU(UInt<value_size+1> & uu, uint j, uint n, uint u_max)
- {
- uint i;
- for(i=0 ; i<n ; ++i, ++j)
- uu.table[i] = table[j];
- // 'n' is from <1..value_size> so and 'i' is from <0..value_size>
- // then table[i] is always correct (look at the declaration of 'uu')
- uu.table[i] = u_max;
- for( ++i ; i<value_size+1 ; ++i)
- uu.table[i] = 0;
- TTMATH_LOG("UInt::Div3_MakeNewU")
- }
- void Div3_CopyNewU(const UInt<value_size+1> & uu, uint j, uint n)
- {
- uint i;
- for(i=0 ; i<n ; ++i)
- table[i+j] = uu.table[i];
- if( i+j < value_size )
- table[i+j] = uu.table[i];
- TTMATH_LOG("UInt::Div3_CopyNewU")
- }
- /*!
- we're making the new 'vv'
- the value is actually the same but the 'table' is bigger (value_size+1)
- */
- void Div3_MakeBiggerV(const UInt<value_size> & v, UInt<value_size+1> & vv)
- {
- for(uint i=0 ; i<value_size ; ++i)
- vv.table[i] = v.table[i];
- vv.table[value_size] = 0;
- TTMATH_LOG("UInt::Div3_MakeBiggerV")
- }
- /*!
- we're moving all bits from 'v' into the left side of the n-1 word
- (the highest bit at v.table[n-1] will be equal one,
- the bits from 'this' we're moving the same times as 'v')
- return values:
- d - how many times we've moved
- return - the next-left value from 'this' (that after table[value_size-1])
- */
- uint Div3_Normalize(UInt<value_size> & v, uint n, uint & d)
- {
- // v.table[n-1] is != 0
- uint bit = (uint)FindLeadingBitInWord(v.table[n-1]);
- uint move = (TTMATH_BITS_PER_UINT - bit - 1);
- uint res = table[value_size-1];
- d = move;
- if( move > 0 )
- {
- v.Rcl(move, 0);
- Rcl(move, 0);
- res = res >> (bit + 1);
- }
- else
- {
- res = 0;
- }
- TTMATH_LOG("UInt::Div3_Normalize")
- return res;
- }
- void Div3_Unnormalize(UInt<value_size> * remainder, uint n, uint d)
- {
- for(uint i=n ; i<value_size ; ++i)
- table[i] = 0;
- Rcr(d,0);
- *remainder = *this;
- TTMATH_LOG("UInt::Div3_Unnormalize")
- }
- uint Div3_Calculate(uint u2, uint u1, uint u0, uint v1, uint v0)
- {
- UInt<2> u_temp;
- uint rp;
- bool next_test;
- TTMATH_ASSERT( v1 != 0 )
- u_temp.table[1] = u2;
- u_temp.table[0] = u1;
- u_temp.DivInt(v1, &rp);
- TTMATH_ASSERT( u_temp.table[1]==0 || u_temp.table[1]==1 )
- do
- {
- bool decrease = false;
- if( u_temp.table[1] == 1 )
- decrease = true;
- else
- {
- UInt<2> temp1, temp2;
- UInt<2>::MulTwoWords(u_temp.table[0], v0, temp1.table+1, temp1.table);
- temp2.table[1] = rp;
- temp2.table[0] = u0;
- if( temp1 > temp2 )
- decrease = true;
- }
- next_test = false;
- if( decrease )
- {
- u_temp.SubOne();
- rp += v1;
- if( rp >= v1 ) // it means that there wasn't a carry (r<b from the book)
- next_test = true;
- }
- }
- while( next_test );
- TTMATH_LOG("UInt::Div3_Calculate")
- return u_temp.table[0];
- }
- void Div3_MultiplySubtract( UInt<value_size+1> & uu,
- const UInt<value_size+1> & vv, uint & qp)
- {
- // D4 (in the book)
- UInt<value_size+1> vv_temp(vv);
- vv_temp.MulInt(qp);
- if( uu.Sub(vv_temp) )
- {
- // there was a carry
- //
- // !!! this part of code was not tested
- //
- --qp;
- uu.Add(vv);
- // can be a carry from this additions but it should be ignored
- // because it cancels with the borrow from uu.Sub(vv_temp)
- }
- TTMATH_LOG("UInt::Div3_MultiplySubtract")
- }
- /*!
- power this = this ^ pow
- binary algorithm (r-to-l)
- return values:
- 0 - ok
- 1 - carry
- 2 - incorrect argument (0^0)
- */
- uint Pow(UInt<value_size> pow)
- {
- if(pow.IsZero() && IsZero())
- // we don't define zero^zero
- return 2;
- UInt<value_size> start(*this);
- UInt<value_size> result;
- result.SetOne();
- uint c = 0;
- while( !c )
- {
- if( pow.table[0] & 1 )
- c += result.Mul(start);
- pow.Rcr2_one(0);
- if( pow.IsZero() )
- break;
- c += start.Mul(start);
- }
- *this = result;
- TTMATH_LOGC("UInt::Pow(UInt<>)", c)
- return (c==0)? 0 : 1;
- }
- /*!
- square root
- e.g. Sqrt(9) = 3
- ('digit-by-digit' algorithm)
- */
- void Sqrt()
- {
- UInt<value_size> bit, temp;
- if( IsZero() )
- return;
- UInt<value_size> value(*this);
- SetZero();
- bit.SetZero();
- bit.table[value_size-1] = (TTMATH_UINT_HIGHEST_BIT >> 1);
- while( bit > value )
- bit.Rcr(2);
- while( !bit.IsZero() )
- {
- temp = *this;
- temp.Add(bit);
- if( value >= temp )
- {
- value.Sub(temp);
- Rcr(1);
- Add(bit);
- }
- else
- {
- Rcr(1);
- }
- bit.Rcr(2);
- }
- TTMATH_LOG("UInt::Sqrt")
- }
- /*!
- this method sets n first bits to value zero
- For example:
- let n=2 then if there's a value 111 (bin) there'll be '100' (bin)
- */
- void ClearFirstBits(uint n)
- {
- if( n >= value_size*TTMATH_BITS_PER_UINT )
- {
- SetZero();
- TTMATH_LOG("UInt::ClearFirstBits")
- return;
- }
- uint * p = table;
- // first we're clearing the whole words
- while( n >= TTMATH_BITS_PER_UINT )
- {
- *p++ = 0;
- }
- if( n == 0 )
- {
- TTMATH_LOG("UInt::ClearFirstBits")
- return;
- }
- // and then we're clearing one word which has left
- // mask -- all bits are set to one
- uint mask = TTMATH_UINT_MAX_VALUE;
- mask = mask << n;
- (*p) &= mask;
- TTMATH_LOG("UInt::ClearFirstBits")
- }
- /*!
- this method returns true if the highest bit of the value is set
- */
- bool IsTheHighestBitSet() const
- {
- return (table[value_size-1] & TTMATH_UINT_HIGHEST_BIT) != 0;
- }
- /*!
- this method returns true if the lowest bit of the value is set
- */
- bool IsTheLowestBitSet() const
- {
- return (*table & 1) != 0;
- }
- /*!
- returning true if only the highest bit is set
- */
- bool IsOnlyTheHighestBitSet() const
- {
-#ifdef __clang__
-#pragma clang diagnostic push
-#pragma clang diagnostic ignored "-Wtautological-compare"
- for(uint i=0 ; i<value_size-1 ; ++i)
- if( table[i] != 0 )
- return false;
-#ifdef __clang__
-#pragma clang diagnostic pop
- if( table[value_size-1] != TTMATH_UINT_HIGHEST_BIT )
- return false;
- return true;
- }
- /*!
- returning true if only the lowest bit is set
- */
- bool IsOnlyTheLowestBitSet() const
- {
- if( table[0] != 1 )
- return false;
- for(uint i=1 ; i<value_size ; ++i)
- if( table[i] != 0 )
- return false;
- return true;
- }
- /*!
- this method returns true if the value is equal zero
- */
- bool IsZero() const
- {
- for(uint i=0 ; i<value_size ; ++i)
- if(table[i] != 0)
- return false;
- return true;
- }
- /*!
- returning true if first 'bits' bits are equal zero
- */
- bool AreFirstBitsZero(uint bits) const
- {
- TTMATH_ASSERT( bits <= value_size * TTMATH_BITS_PER_UINT )
- uint index = bits / TTMATH_BITS_PER_UINT;
- uint rest = bits % TTMATH_BITS_PER_UINT;
- uint i;
- for(i=0 ; i<index ; ++i)
- if(table[i] != 0 )
- return false;
- if( rest == 0 )
- return true;
- return (table[i] & mask) == 0;
- }
- /*!
- *
- * conversion methods
- *
- */
- /*!
- this method converts an UInt<another_size> type to this class
- this operation has mainly sense if the value from p is
- equal or smaller than that one which is returned from UInt<value_size>::SetMax()
- it returns a carry if the value 'p' is too big
- */
- template<uint argument_size>
- uint FromUInt(const UInt<argument_size> & p)
- {
- uint min_size = (value_size < argument_size)? value_size : argument_size;
- uint i;
- for(i=0 ; i<min_size ; ++i)
- table[i] = p.table[i];
- if( value_size > argument_size )
- {
- // 'this' is longer than 'p'
- for( ; i<value_size ; ++i)
- table[i] = 0;
- }
- else
- {
- for( ; i<argument_size ; ++i)
- if( p.table[i] != 0 )
- {
- TTMATH_LOGC("UInt::FromUInt(UInt<>)", 1)
- return 1;
- }
- }
- TTMATH_LOGC("UInt::FromUInt(UInt<>)", 0)
- return 0;
- }
- /*!
- this method converts an UInt<another_size> type to this class
- this operation has mainly sense if the value from p is
- equal or smaller than that one which is returned from UInt<value_size>::SetMax()
- it returns a carry if the value 'p' is too big
- */
- template<uint argument_size>
- uint FromInt(const UInt<argument_size> & p)
- {
- return FromUInt(p);
- }
- /*!
- this method converts the uint type to this class
- */
- uint FromUInt(uint value)
- {
- for(uint i=1 ; i<value_size ; ++i)
- table[i] = 0;
- table[0] = value;
- TTMATH_LOG("UInt::FromUInt(uint)")
- // there'll never be a carry here
- return 0;
- }
- /*!
- this method converts the uint type to this class
- */
- uint FromInt(uint value)
- {
- return FromUInt(value);
- }
- /*!
- this method converts the sint type to this class
- */
- uint FromInt(sint value)
- {
- uint c = FromUInt(uint(value));
- if( c || value < 0 )
- return 1;
- return 0;
- }
- /*!
- this operator converts an UInt<another_size> type to this class
- it doesn't return a carry
- */
- template<uint argument_size>
- UInt<value_size> & operator=(const UInt<argument_size> & p)
- {
- FromUInt(p);
- return *this;
- }
- /*!
- the assignment operator
- */
- UInt<value_size> & operator=(const UInt<value_size> & p)
- {
- for(uint i=0 ; i<value_size ; ++i)
- table[i] = p.table[i];
- TTMATH_LOG("UInt::operator=(UInt<>)")
- return *this;
- }
- /*!
- this method converts the uint type to this class
- */
- UInt<value_size> & operator=(uint i)
- {
- FromUInt(i);
- return *this;
- }
- /*!
- a constructor for converting the uint to this class
- */
- UInt(uint i)
- {
- FromUInt(i);
- }
- /*!
- this method converts the sint type to this class
- */
- UInt<value_size> & operator=(sint i)
- {
- FromInt(i);
- return *this;
- }
- /*!
- a constructor for converting the sint to this class
- look at the description of UInt::operator=(sint)
- */
- UInt(sint i)
- {
- FromInt(i);
- }
- /*!
- this method converts unsigned 64 bit int type to this class
- ***this method is created only on a 32bit platform***
- */
- uint FromUInt(ulint n)
- {
- table[0] = (uint)n;
- if( value_size == 1 )
- {
- uint c = ((n >> TTMATH_BITS_PER_UINT) == 0) ? 0 : 1;
- TTMATH_LOGC("UInt::FromUInt(ulint)", c)
- return c;
- }
- table[1] = (uint)(n >> TTMATH_BITS_PER_UINT);
- for(uint i=2 ; i<value_size ; ++i)
- table[i] = 0;
- TTMATH_LOG("UInt::FromUInt(ulint)")
- return 0;
- }
- /*!
- this method converts unsigned 64 bit int type to this class
- ***this method is created only on a 32bit platform***
- */
- uint FromInt(ulint n)
- {
- return FromUInt(n);
- }
- /*!
- this method converts signed 64 bit int type to this class
- ***this method is created only on a 32bit platform***
- */
- uint FromInt(slint n)
- {
- uint c = FromUInt(ulint(n));
- if( c || n < 0 )
- return 1;
- return 0;
- }
- /*!
- this operator converts unsigned 64 bit int type to this class
- ***this operator is created only on a 32bit platform***
- */
- UInt<value_size> & operator=(ulint n)
- {
- FromUInt(n);
- return *this;
- }
- /*!
- a constructor for converting unsigned 64 bit int to this class
- ***this constructor is created only on a 32bit platform***
- */
- UInt(ulint n)
- {
- FromUInt(n);
- }
- /*!
- this operator converts signed 64 bit int type to this class
- ***this operator is created only on a 32bit platform***
- */
- UInt<value_size> & operator=(slint n)
- {
- FromInt(n);
- return *this;
- }
- /*!
- a constructor for converting signed 64 bit int to this class
- ***this constructor is created only on a 32bit platform***
- */
- UInt(slint n)
- {
- FromInt(n);
- }
- /*!
- this method converts 32 bit unsigned int type to this class
- ***this operator is created only on a 64bit platform***
- */
- uint FromUInt(unsigned int i)
- {
- return FromUInt(uint(i));
- }
- /*!
- this method converts 32 bit unsigned int type to this class
- ***this operator is created only on a 64bit platform***
- */
- uint FromInt(unsigned int i)
- {
- return FromUInt(uint(i));
- }
- /*!
- this method converts 32 bit signed int type to this class
- ***this operator is created only on a 64bit platform***
- */
- uint FromInt(signed int i)
- {
- return FromInt(sint(i));
- }
- /*!
- this operator converts 32 bit unsigned int type to this class
- ***this operator is created only on a 64bit platform***
- */
- UInt<value_size> & operator=(unsigned int i)
- {
- FromUInt(i);
- return *this;
- }
- /*!
- a constructor for converting 32 bit unsigned int to this class
- ***this constructor is created only on a 64bit platform***
- */
- UInt(unsigned int i)
- {
- FromUInt(i);
- }
- /*!
- an operator for converting 32 bit signed int to this class
- ***this constructor is created only on a 64bit platform***
- */
- UInt<value_size> & operator=(signed int i)
- {
- FromInt(i);
- return *this;
- }
- /*!
- a constructor for converting 32 bit signed int to this class
- ***this constructor is created only on a 64bit platform***
- */
- UInt(signed int i)
- {
- FromInt(i);
- }
- /*!
- a constructor for converting a string to this class (with the base=10)
- */
- UInt(const char * s)
- {
- FromString(s);
- }
- /*!
- a constructor for converting a string to this class (with the base=10)
- */
- UInt(const std::string & s)
- {
- FromString( s.c_str() );
- }
- /*!
- a constructor for converting a string to this class (with the base=10)
- */
- UInt(const wchar_t * s)
- {
- FromString(s);
- }
- /*!
- a constructor for converting a string to this class (with the base=10)
- */
- UInt(const std::wstring & s)
- {
- FromString( s.c_str() );
- }
- /*!
- a default constructor
- we don't clear the table
- */
- UInt()
- {
- // when macro TTMATH_DEBUG_LOG is defined
- // we set special values to the table
- // in order to be everywhere the same value of the UInt object
- // without this it would be difficult to analyse the log file
- for(uint i=0 ; i<value_size ; ++i)
- table[i] = 0xc1c1c1c1;
- #else
- for(uint i=0 ; i<value_size ; ++i)
- table[i] = 0xc1c1c1c1c1c1c1c1;
- #endif
- #endif
- }
- /*!
- a copy constructor
- */
- UInt(const UInt<value_size> & u)
- {
- for(uint i=0 ; i<value_size ; ++i)
- table[i] = u.table[i];
- TTMATH_LOG("UInt::UInt(UInt<>)")
- }
- /*!
- a template for producting constructors for copying from another types
- */
- template<uint argument_size>
- UInt(const UInt<argument_size> & u)
- {
- // look that 'size' we still set as 'value_size' and not as u.value_size
- FromUInt(u);
- }
- /*!
- a destructor
- */
- ~UInt()
- {
- }
- /*!
- this method returns the lowest value from table
- we must be sure when we using this method whether the value
- will hold in an uint type or not (the rest value from the table must be zero)
- */
- uint ToUInt() const
- {
- return table[0];
- }
- /*!
- this method converts the value to uint type
- can return a carry if the value is too long to store it in uint type
- */
- uint ToUInt(uint & result) const
- {
- result = table[0];
- for(uint i=1 ; i<value_size ; ++i)
- if( table[i] != 0 )
- return 1;
- return 0;
- }
- /*!
- this method converts the value to uint type
- can return a carry if the value is too long to store it in uint type
- */
- uint ToInt(uint & result) const
- {
- return ToUInt(result);
- }
- /*!
- this method converts the value to sint type (signed integer)
- can return a carry if the value is too long to store it in sint type
- */
- uint ToInt(sint & result) const
- {
- result = sint(table[0]);
- if( (result & TTMATH_UINT_HIGHEST_BIT) != 0 )
- return 1;
- for(uint i=1 ; i<value_size ; ++i)
- if( table[i] != 0 )
- return 1;
- return 0;
- }
- /*!
- this method converts the value to ulint type (64 bit unsigned integer)
- can return a carry if the value is too long to store it in ulint type
- *** this method is created only on a 32 bit platform ***
- */
- uint ToUInt(ulint & result) const
- {
- if( value_size == 1 )
- {
- result = table[0];
- }
- else
- {
- uint low = table[0];
- uint high = table[1];
- result = low;
- result |= (ulint(high) << TTMATH_BITS_PER_UINT);
- for(uint i=2 ; i<value_size ; ++i)
- if( table[i] != 0 )
- return 1;
- }
- return 0;
- }
- /*!
- this method converts the value to ulint type (64 bit unsigned integer)
- can return a carry if the value is too long to store it in ulint type
- *** this method is created only on a 32 bit platform ***
- */
- uint ToInt(ulint & result) const
- {
- return ToUInt(result);
- }
- /*!
- this method converts the value to slint type (64 bit signed integer)
- can return a carry if the value is too long to store it in slint type
- *** this method is created only on a 32 bit platform ***
- */
- uint ToInt(slint & result) const
- {
- ulint temp;
- uint c = ToUInt(temp);
- result = slint(temp);
- if( c || result < 0 )
- return 1;
- return 0;
- }
- /*!
- this method converts the value to a 32 unsigned integer
- can return a carry if the value is too long to store it in this type
- *** this method is created only on a 64 bit platform ***
- */
- uint ToUInt(unsigned int & result) const
- {
- result = (unsigned int)table[0];
- if( (table[0] >> 32) != 0 )
- return 1;
- for(uint i=1 ; i<value_size ; ++i)
- if( table[i] != 0 )
- return 1;
- return 0;
- }
- /*!
- this method converts the value to a 32 unsigned integer
- can return a carry if the value is too long to store it in this type
- *** this method is created only on a 64 bit platform ***
- */
- uint ToInt(unsigned int & result) const
- {
- return ToUInt(result);
- }
- /*!
- this method converts the value to a 32 signed integer
- can return a carry if the value is too long to store it in this type
- *** this method is created only on a 64 bit platform ***
- */
- uint ToInt(int & result) const
- {
- unsigned int temp;
- uint c = ToUInt(temp);
- result = int(temp);
- if( c || result < 0 )
- return 1;
- return 0;
- }
- /*!
- an auxiliary method for converting into the string
- it returns the log (with the base 2) from x
- where x is in <2;16>
- */
- double ToStringLog2(uint x) const
- {
- static double log_tab[] = {
- 1.000000000000000000,
- 0.630929753571457437,
- 0.500000000000000000,
- 0.430676558073393050,
- 0.386852807234541586,
- 0.356207187108022176,
- 0.333333333333333333,
- 0.315464876785728718,
- 0.301029995663981195,
- 0.289064826317887859,
- 0.278942945651129843,
- 0.270238154427319741,
- 0.262649535037193547,
- 0.255958024809815489,
- 0.250000000000000000
- };
- if( x<2 || x>16 )
- return 0;
- return log_tab[x-2];
- }
- /*!
- an auxiliary method for converting to a string
- it's used from Int::ToString() too (negative is set true then)
- */
- template<class string_type>
- void ToStringBase(string_type & result, uint b = 10, bool negative = false) const
- {
- UInt<value_size> temp(*this);
- uint rest, table_id, index, digits;
- double digits_d;
- char character;
- result.clear();
- if( b<2 || b>16 )
- return;
- if( !FindLeadingBit(table_id, index) )
- {
- result = '0';
- return;
- }
- if( negative )
- result = '-';
- digits_d = table_id; // for not making an overflow in uint type
- digits_d *= TTMATH_BITS_PER_UINT;
- digits_d += index + 1;
- digits_d *= ToStringLog2(b);
- digits = static_cast<uint>(digits_d) + 3; // plus some epsilon
- if( result.capacity() < digits )
- result.reserve(digits);
- do
- {
- temp.DivInt(b, &rest);
- character = static_cast<char>(Misc::DigitToChar(rest));
- result.insert(result.end(), character);
- }
- while( !temp.IsZero() );
- size_t i1 = negative ? 1 : 0; // the first is a hyphen (when negative is true)
- size_t i2 = result.size() - 1;
- for( ; i1 < i2 ; ++i1, --i2 )
- {
- char tempc = static_cast<char>(result[i1]);
- result[i1] = result[i2];
- result[i2] = tempc;
- }
- }
- /*!
- this method converts the value to a string with a base equal 'b'
- */
- void ToString(std::string & result, uint b = 10) const
- {
- return ToStringBase(result, b);
- }
- std::string ToString(uint b = 10) const
- {
- std::string result;
- ToStringBase(result, b);
- return result;
- }
- void ToString(std::wstring & result, uint b = 10) const
- {
- return ToStringBase(result, b);
- }
- std::wstring ToWString(uint b = 10) const
- {
- std::wstring result;
- ToStringBase(result, b);
- return result;
- }
- /*!
- an auxiliary method for converting from a string
- */
- template<class char_type>
- uint FromStringBase(const char_type * s, uint b = 10, const char_type ** after_source = 0, bool * value_read = 0)
- {
- UInt<value_size> base( b );
- UInt<value_size> temp;
- sint z;
- uint c = 0;
- SetZero();
- temp.SetZero();
- Misc::SkipWhiteCharacters(s);
- if( after_source )
- *after_source = s;
- if( value_read )
- *value_read = false;
- if( b<2 || b>16 )
- return 1;
- for( ; (z=Misc::CharToDigit(*s, b)) != -1 ; ++s)
- {
- if( value_read )
- *value_read = true;
- if( c == 0 )
- {
- temp.table[0] = z;
- c += Mul(base); // !! IMPROVE ME: there can be used MulInt here
- c += Add(temp);
- }
- }
- if( after_source )
- *after_source = s;
- TTMATH_LOGC("UInt::FromString", c)
- return (c==0)? 0 : 1;
- }
- /*!
- this method converts a string into its value
- it returns carry=1 if the value will be too big or an incorrect base 'b' is given
- string is ended with a non-digit value, for example:
- "12" will be translated to 12
- as well as:
- "12foo" will be translated to 12 too
- existing first white characters will be ommited
- if the value from s is too large the rest digits will be skipped
- after_source (if exists) is pointing at the end of the parsed string
- value_read (if exists) tells whether something has actually been read (at least one digit)
- */
- uint FromString(const char * s, uint b = 10, const char ** after_source = 0, bool * value_read = 0)
- {
- return FromStringBase(s, b, after_source, value_read);
- }
- /*!
- this method converts a string into its value
- (it returns carry=1 if the value will be too big or an incorrect base 'b' is given)
- */
- uint FromString(const std::string & s, uint b = 10)
- {
- return FromString( s.c_str(), b );
- }
- /*!
- this operator converts a string into its value (with base = 10)
- */
- UInt<value_size> & operator=(const char * s)
- {
- FromString(s);
- return *this;
- }
- /*!
- this operator converts a string into its value (with base = 10)
- */
- UInt<value_size> & operator=(const std::string & s)
- {
- FromString( s.c_str() );
- return *this;
- }
- /*!
- this method converts a string into its value
- */
- uint FromString(const wchar_t * s, uint b = 10, const wchar_t ** after_source = 0, bool * value_read = 0)
- {
- return FromStringBase(s, b, after_source, value_read);
- }
- /*!
- this method converts a string into its value
- (it returns carry=1 if the value will be too big or an incorrect base 'b' is given)
- */
- uint FromString(const std::wstring & s, uint b = 10)
- {
- return FromString( s.c_str(), b );
- }
- /*!
- this operator converts a string into its value (with base = 10)
- */
- UInt<value_size> & operator=(const wchar_t * s)
- {
- FromString(s);
- return *this;
- }
- /*!
- this operator converts a string into its value (with base = 10)
- */
- UInt<value_size> & operator=(const std::wstring & s)
- {
- FromString( s.c_str() );
- return *this;
- }
- /*!
- *
- * methods for comparing
- *
- */
- /*!
- this method returns true if 'this' is smaller than 'l'
- 'index' is an index of the first word from will be the comparison performed
- (note: we start the comparison from back - from the last word, when index is -1 /default/
- it is automatically set into the last word)
- I introduced it for some kind of optimization made in the second division algorithm (Div2)
- */
- bool CmpSmaller(const UInt<value_size> & l, sint index = -1) const
- {
- sint i;
- if( index==-1 || index>=sint(value_size) )
- i = value_size - 1;
- else
- i = index;
- for( ; i>=0 ; --i)
- {
- if( table[i] != l.table[i] )
- return table[i] < l.table[i];
- }
- // they're equal
- return false;
- }
- /*!
- this method returns true if 'this' is bigger than 'l'
- 'index' is an index of the first word from will be the comparison performed
- (note: we start the comparison from back - from the last word, when index is -1 /default/
- it is automatically set into the last word)
- I introduced it for some kind of optimization made in the second division algorithm (Div2)
- */
- bool CmpBigger(const UInt<value_size> & l, sint index = -1) const
- {
- sint i;
- if( index==-1 || index>=sint(value_size) )
- i = value_size - 1;
- else
- i = index;
- for( ; i>=0 ; --i)
- {
- if( table[i] != l.table[i] )
- return table[i] > l.table[i];
- }
- // they're equal
- return false;
- }
- /*!
- this method returns true if 'this' is equal 'l'
- 'index' is an index of the first word from will be the comparison performed
- (note: we start the comparison from back - from the last word, when index is -1 /default/
- it is automatically set into the last word)
- */
- bool CmpEqual(const UInt<value_size> & l, sint index = -1) const
- {
- sint i;
- if( index==-1 || index>=sint(value_size) )
- i = value_size - 1;
- else
- i = index;
- for( ; i>=0 ; --i)
- if( table[i] != l.table[i] )
- return false;
- return true;
- }
- /*!
- this method returns true if 'this' is smaller than or equal 'l'
- 'index' is an index of the first word from will be the comparison performed
- (note: we start the comparison from back - from the last word, when index is -1 /default/
- it is automatically set into the last word)
- */
- bool CmpSmallerEqual(const UInt<value_size> & l, sint index=-1) const
- {
- sint i;
- if( index==-1 || index>=sint(value_size) )
- i = value_size - 1;
- else
- i = index;
- for( ; i>=0 ; --i)
- {
- if( table[i] != l.table[i] )
- return table[i] < l.table[i];
- }
- // they're equal
- return true;
- }
- /*!
- this method returns true if 'this' is bigger than or equal 'l'
- 'index' is an index of the first word from will be the comparison performed
- (note: we start the comparison from back - from the last word, when index is -1 /default/
- it is automatically set into the last word)
- */
- bool CmpBiggerEqual(const UInt<value_size> & l, sint index=-1) const
- {
- sint i;
- if( index==-1 || index>=sint(value_size) )
- i = value_size - 1;
- else
- i = index;
- for( ; i>=0 ; --i)
- {
- if( table[i] != l.table[i] )
- return table[i] > l.table[i];
- }
- // they're equal
- return true;
- }
- /*
- operators for comparising
- */
- bool operator<(const UInt<value_size> & l) const
- {
- return CmpSmaller(l);
- }
- bool operator>(const UInt<value_size> & l) const
- {
- return CmpBigger(l);
- }
- bool operator==(const UInt<value_size> & l) const
- {
- return CmpEqual(l);
- }
- bool operator!=(const UInt<value_size> & l) const
- {
- return !operator==(l);
- }
- bool operator<=(const UInt<value_size> & l) const
- {
- return CmpSmallerEqual(l);
- }
- bool operator>=(const UInt<value_size> & l) const
- {
- return CmpBiggerEqual(l);
- }
- /*!
- *
- * standard mathematical operators
- *
- */
- UInt<value_size> operator-(const UInt<value_size> & p2) const
- {
- UInt<value_size> temp(*this);
- temp.Sub(p2);
- return temp;
- }
- UInt<value_size> & operator-=(const UInt<value_size> & p2)
- {
- Sub(p2);
- return *this;
- }
- UInt<value_size> operator+(const UInt<value_size> & p2) const
- {
- UInt<value_size> temp(*this);
- temp.Add(p2);
- return temp;
- }
- UInt<value_size> & operator+=(const UInt<value_size> & p2)
- {
- Add(p2);
- return *this;
- }
- UInt<value_size> operator*(const UInt<value_size> & p2) const
- {
- UInt<value_size> temp(*this);
- temp.Mul(p2);
- return temp;
- }
- UInt<value_size> & operator*=(const UInt<value_size> & p2)
- {
- Mul(p2);
- return *this;
- }
- UInt<value_size> operator/(const UInt<value_size> & p2) const
- {
- UInt<value_size> temp(*this);
- temp.Div(p2);
- return temp;
- }
- UInt<value_size> & operator/=(const UInt<value_size> & p2)
- {
- Div(p2);
- return *this;
- }
- UInt<value_size> operator%(const UInt<value_size> & p2) const
- {
- UInt<value_size> temp(*this);
- UInt<value_size> remainder;
- temp.Div( p2, remainder );
- return remainder;
- }
- UInt<value_size> & operator%=(const UInt<value_size> & p2)
- {
- UInt<value_size> remainder;
- Div( p2, remainder );
- operator=(remainder);
- return *this;
- }
- /*!
- Prefix operator e.g ++variable
- */
- UInt<value_size> & operator++()
- {
- AddOne();
- return *this;
- }
- /*!
- Postfix operator e.g variable++
- */
- UInt<value_size> operator++(int)
- {
- UInt<value_size> temp( *this );
- AddOne();
- return temp;
- }
- UInt<value_size> & operator--()
- {
- SubOne();
- return *this;
- }
- UInt<value_size> operator--(int)
- {
- UInt<value_size> temp( *this );
- SubOne();
- return temp;
- }
- /*!
- *
- * bitwise operators
- *
- */
- UInt<value_size> operator~() const
- {
- UInt<value_size> temp( *this );
- temp.BitNot();
- return temp;
- }
- UInt<value_size> operator&(const UInt<value_size> & p2) const
- {
- UInt<value_size> temp( *this );
- temp.BitAnd(p2);
- return temp;
- }
- UInt<value_size> & operator&=(const UInt<value_size> & p2)
- {
- BitAnd(p2);
- return *this;
- }
- UInt<value_size> operator|(const UInt<value_size> & p2) const
- {
- UInt<value_size> temp( *this );
- temp.BitOr(p2);
- return temp;
- }
- UInt<value_size> & operator|=(const UInt<value_size> & p2)
- {
- BitOr(p2);
- return *this;
- }
- UInt<value_size> operator^(const UInt<value_size> & p2) const
- {
- UInt<value_size> temp( *this );
- temp.BitXor(p2);
- return temp;
- }
- UInt<value_size> & operator^=(const UInt<value_size> & p2)
- {
- BitXor(p2);
- return *this;
- }
- UInt<value_size> operator>>(int move) const
- {
- UInt<value_size> temp( *this );
- temp.Rcr(move);
- return temp;
- }
- UInt<value_size> & operator>>=(int move)
- {
- Rcr(move);
- return *this;
- }
- UInt<value_size> operator<<(int move) const
- {
- UInt<value_size> temp( *this );
- temp.Rcl(move);
- return temp;
- }
- UInt<value_size> & operator<<=(int move)
- {
- Rcl(move);
- return *this;
- }
- /*!
- *
- * input/output operators for standard streams
- *
- * (they are very simple, in the future they should be changed)
- *
- */
- /*!
- an auxiliary method for outputing to standard streams
- */
- template<class ostream_type, class string_type>
- static ostream_type & OutputToStream(ostream_type & s, const UInt<value_size> & l)
- {
- string_type ss;
- l.ToString(ss);
- s << ss;
- return s;
- }
- /*!
- output to standard streams
- */
- friend std::ostream & operator<<(std::ostream & s, const UInt<value_size> & l)
- {
- return OutputToStream<std::ostream, std::string>(s, l);
- }
- /*!
- output to standard streams
- */
- friend std::wostream & operator<<(std::wostream & s, const UInt<value_size> & l)
- {
- return OutputToStream<std::wostream, std::wstring>(s, l);
- }
- /*!
- an auxiliary method for reading from standard streams
- */
- template<class istream_type, class string_type, class char_type>
- static istream_type & InputFromStream(istream_type & s, UInt<value_size> & l)
- {
- string_type ss;
- // char or wchar_t for operator>>
- char_type z;
- // operator>> omits white characters if they're set for ommiting
- s >> z;
- // we're reading only digits (base=10)
- while( s.good() && Misc::CharToDigit(z, 10)>=0 )
- {
- ss += z;
- z = static_cast<char_type>(s.get());
- }
- // we're leaving the last read character
- // (it's not belonging to the value)
- s.unget();
- l.FromString(ss);
- return s;
- }
- /*!
- input from standard streams
- */
- friend std::istream & operator>>(std::istream & s, UInt<value_size> & l)
- {
- return InputFromStream<std::istream, std::string, char>(s, l);
- }
- /*!
- input from standard streams
- */
- friend std::wistream & operator>>(std::wistream & s, UInt<value_size> & l)
- {
- return InputFromStream<std::wistream, std::wstring, wchar_t>(s, l);
- }
- /*
- Following methods are defined in:
- ttmathuint_x86.h
- ttmathuint_x86_64.h
- ttmathuint_noasm.h
- */
- static uint AddTwoWords(uint a, uint b, uint carry, uint * result);
- static uint SubTwoWords(uint a, uint b, uint carry, uint * result);
- union uint_
- {
- struct
- {
- unsigned int low; // 32 bit
- unsigned int high; // 32 bit
- } u_;
- uint u; // 64 bit
- };
- static void DivTwoWords2(uint a,uint b, uint c, uint * r, uint * rest);
- static uint DivTwoWordsNormalize(uint_ & a_, uint_ & b_, uint_ & c_);
- static uint DivTwoWordsUnnormalize(uint u, uint d);
- static unsigned int DivTwoWordsCalculate(uint_ u_, unsigned int u3, uint_ v_);
- static void MultiplySubtract(uint_ & u_, unsigned int & u3, unsigned int & q, uint_ v_);
-#endif // TTMATH_PLATFORM64
-#endif // TTMATH_NOASM
- uint Rcl2_one(uint c);
- uint Rcr2_one(uint c);
- uint Rcl2(uint bits, uint c);
- uint Rcr2(uint bits, uint c);
- static const char * LibTypeStr();
- static LibTypeCode LibType();
- uint Add(const UInt<value_size> & ss2, uint c=0);
- uint AddInt(uint value, uint index = 0);
- uint AddTwoInts(uint x2, uint x1, uint index);
- static uint AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
- uint Sub(const UInt<value_size> & ss2, uint c=0);
- uint SubInt(uint value, uint index = 0);
- static uint SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
- static sint FindLeadingBitInWord(uint x);
- static sint FindLowestBitInWord(uint x);
- static uint SetBitInWord(uint & value, uint bit);
- static void MulTwoWords(uint a, uint b, uint * result_high, uint * result_low);
- static void DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest);
- this specialization is needed in order to not confused the compiler "error: ISO C++ forbids zero-size array"
- when compiling Mul3Big2() method
-class UInt<0>
- uint table[1];
- void Mul2Big(const UInt<0> &, UInt<0> &) { TTMATH_ASSERT(false) };
- void SetZero() { TTMATH_ASSERT(false) };
- uint AddTwoInts(uint, uint, uint) { TTMATH_ASSERT(false) return 0; };
-} //namespace
-#include "ttmathuint_x86.h"
-#include "ttmathuint_x86_64.h"
-#include "ttmathuint_noasm.h"
diff --git a/ttmath/ttmathuint_noasm.h b/ttmath/ttmathuint_noasm.h
deleted file mode 100644
index 07c73fc..0000000
--- a/ttmath/ttmathuint_noasm.h
+++ /dev/null
@@ -1,1017 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2010, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathuint_noasm
-#define headerfilettmathuint_noasm
- \file ttmathuint_noasm.h
- \brief template class UInt<uint> with methods without any assembler code
- this file is included at the end of ttmathuint.h
-namespace ttmath
- /*!
- returning the string represents the currect type of the library
- we have following types:
- asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits)
- asm_gcc_32 - with asm code designed for GCC (32 bits)
- asm_vc_64 - with asm for VC (64 bit)
- asm_gcc_64 - with asm for GCC (64 bit)
- no_asm_32 - pure C++ version (32 bit) - without any asm code
- no_asm_64 - pure C++ version (64 bit) - without any asm code
- */
- template<uint value_size>
- const char * UInt<value_size>::LibTypeStr()
- {
- static const char info[] = "no_asm_32";
- #endif
- static const char info[] = "no_asm_64";
- #endif
- return info;
- }
- /*!
- returning the currect type of the library
- */
- template<uint value_size>
- LibTypeCode UInt<value_size>::LibType()
- {
- LibTypeCode info = no_asm_32;
- #endif
- LibTypeCode info = no_asm_64;
- #endif
- return info;
- }
- /*!
- this method adds two words together
- returns carry
- this method is created only when TTMATH_NOASM macro is defined
- */
- template<uint value_size>
- uint UInt<value_size>::AddTwoWords(uint a, uint b, uint carry, uint * result)
- {
- uint temp;
- if( carry == 0 )
- {
- temp = a + b;
- if( temp < a )
- carry = 1;
- }
- else
- {
- carry = 1;
- temp = a + b + carry;
- if( temp > a ) // !(temp<=a)
- carry = 0;
- }
- *result = temp;
- return carry;
- }
- /*!
- this method adding ss2 to the this and adding carry if it's defined
- (this = this + ss2 + c)
- c must be zero or one (might be a bigger value than 1)
- function returns carry (1) (if it was)
- */
- template<uint value_size>
- uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
- {
- uint i;
- for(i=0 ; i<value_size ; ++i)
- c = AddTwoWords(table[i], ss2.table[i], c, &table[i]);
- TTMATH_LOGC("UInt::Add", c)
- return c;
- }
- /*!
- this method adds one word (at a specific position)
- and returns a carry (if it was)
- if we've got (value_size=3):
- table[0] = 10;
- table[1] = 30;
- table[2] = 5;
- and we call:
- AddInt(2,1)
- then it'll be:
- table[0] = 10;
- table[1] = 30 + 2;
- table[2] = 5;
- of course if there was a carry from table[2] it would be returned
- */
- template<uint value_size>
- uint UInt<value_size>::AddInt(uint value, uint index)
- {
- uint i, c;
- TTMATH_ASSERT( index < value_size )
- c = AddTwoWords(table[index], value, 0, &table[index]);
- for(i=index+1 ; i<value_size && c ; ++i)
- c = AddTwoWords(table[i], 0, c, &table[i]);
- TTMATH_LOGC("UInt::AddInt", c)
- return c;
- }
- /*!
- this method adds only two unsigned words to the existing value
- and these words begin on the 'index' position
- (it's used in the multiplication algorithm 2)
- index should be equal or smaller than value_size-2 (index <= value_size-2)
- x1 - lower word, x2 - higher word
- for example if we've got value_size equal 4 and:
- table[0] = 3
- table[1] = 4
- table[2] = 5
- table[3] = 6
- then let
- x1 = 10
- x2 = 20
- and
- index = 1
- the result of this method will be:
- table[0] = 3
- table[1] = 4 + x1 = 14
- table[2] = 5 + x2 = 25
- table[3] = 6
- and no carry at the end of table[3]
- (of course if there was a carry in table[2](5+20) then
- this carry would be passed to the table[3] etc.)
- */
- template<uint value_size>
- uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
- {
- uint i, c;
- TTMATH_ASSERT( index < value_size - 1 )
- c = AddTwoWords(table[index], x1, 0, &table[index]);
- c = AddTwoWords(table[index+1], x2, c, &table[index+1]);
- for(i=index+2 ; i<value_size && c ; ++i)
- c = AddTwoWords(table[i], 0, c, &table[i]);
- TTMATH_LOGC("UInt::AddTwoInts", c)
- return c;
- }
- /*!
- this static method addes one vector to the other
- 'ss1' is larger in size or equal to 'ss2'
- ss1 points to the first (larger) vector
- ss2 points to the second vector
- ss1_size - size of the ss1 (and size of the result too)
- ss2_size - size of the ss2
- result - is the result vector (which has size the same as ss1: ss1_size)
- Example: ss1_size is 5, ss2_size is 3
- ss1: ss2: result (output):
- 5 1 5+1
- 4 3 4+3
- 2 7 2+7
- 6 6
- 9 9
- of course the carry is propagated and will be returned from the last item
- (this method is used by the Karatsuba multiplication algorithm)
- */
- template<uint value_size>
- uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
- {
- uint i, c = 0;
- TTMATH_ASSERT( ss1_size >= ss2_size )
- for(i=0 ; i<ss2_size ; ++i)
- c = AddTwoWords(ss1[i], ss2[i], c, &result[i]);
- for( ; i<ss1_size ; ++i)
- c = AddTwoWords(ss1[i], 0, c, &result[i]);
- TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size)
- return c;
- }
- /*!
- this method subtractes one word from the other
- returns carry
- this method is created only when TTMATH_NOASM macro is defined
- */
- template<uint value_size>
- uint UInt<value_size>::SubTwoWords(uint a, uint b, uint carry, uint * result)
- {
- if( carry == 0 )
- {
- *result = a - b;
- if( a < b )
- carry = 1;
- }
- else
- {
- carry = 1;
- *result = a - b - carry;
- if( a > b ) // !(a <= b )
- carry = 0;
- }
- return carry;
- }
- /*!
- this method's subtracting ss2 from the 'this' and subtracting
- carry if it has been defined
- (this = this - ss2 - c)
- c must be zero or one (might be a bigger value than 1)
- function returns carry (1) (if it was)
- */
- template<uint value_size>
- uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
- {
- uint i;
- for(i=0 ; i<value_size ; ++i)
- c = SubTwoWords(table[i], ss2.table[i], c, &table[i]);
- TTMATH_LOGC("UInt::Sub", c)
- return c;
- }
- /*!
- this method subtracts one word (at a specific position)
- and returns a carry (if it was)
- if we've got (value_size=3):
- table[0] = 10;
- table[1] = 30;
- table[2] = 5;
- and we call:
- SubInt(2,1)
- then it'll be:
- table[0] = 10;
- table[1] = 30 - 2;
- table[2] = 5;
- of course if there was a carry from table[2] it would be returned
- */
- template<uint value_size>
- uint UInt<value_size>::SubInt(uint value, uint index)
- {
- uint i, c;
- TTMATH_ASSERT( index < value_size )
- c = SubTwoWords(table[index], value, 0, &table[index]);
- for(i=index+1 ; i<value_size && c ; ++i)
- c = SubTwoWords(table[i], 0, c, &table[i]);
- TTMATH_LOGC("UInt::SubInt", c)
- return c;
- }
- /*!
- this static method subtractes one vector from the other
- 'ss1' is larger in size or equal to 'ss2'
- ss1 points to the first (larger) vector
- ss2 points to the second vector
- ss1_size - size of the ss1 (and size of the result too)
- ss2_size - size of the ss2
- result - is the result vector (which has size the same as ss1: ss1_size)
- Example: ss1_size is 5, ss2_size is 3
- ss1: ss2: result (output):
- 5 1 5-1
- 4 3 4-3
- 2 7 2-7
- 6 6-1 (the borrow from previous item)
- 9 9
- return (carry): 0
- of course the carry (borrow) is propagated and will be returned from the last item
- (this method is used by the Karatsuba multiplication algorithm)
- */
- template<uint value_size>
- uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
- {
- uint i, c = 0;
- TTMATH_ASSERT( ss1_size >= ss2_size )
- for(i=0 ; i<ss2_size ; ++i)
- c = SubTwoWords(ss1[i], ss2[i], c, &result[i]);
- for( ; i<ss1_size ; ++i)
- c = SubTwoWords(ss1[i], 0, c, &result[i]);
- TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size)
- return c;
- }
- /*!
- this method moves all bits into the left hand side
- return value <- this <- c
- the lowest *bit* will be held the 'c' and
- the state of one additional bit (on the left hand side)
- will be returned
- for example:
- let this is 001010000
- after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
- */
- template<uint value_size>
- uint UInt<value_size>::Rcl2_one(uint c)
- {
- uint i, new_c;
- if( c != 0 )
- c = 1;
- for(i=0 ; i<value_size ; ++i)
- {
- new_c = (table[i] & TTMATH_UINT_HIGHEST_BIT) ? 1 : 0;
- table[i] = (table[i] << 1) | c;
- c = new_c;
- }
- TTMATH_LOGC("UInt::Rcl2_one", c)
- return c;
- }
- /*!
- this method moves all bits into the right hand side
- c -> this -> return value
- the highest *bit* will be held the 'c' and
- the state of one additional bit (on the right hand side)
- will be returned
- for example:
- let this is 000000010
- after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
- */
- template<uint value_size>
- uint UInt<value_size>::Rcr2_one(uint c)
- {
- sint i; // signed i
- uint new_c;
- if( c != 0 )
- for(i=sint(value_size)-1 ; i>=0 ; --i)
- {
- new_c = (table[i] & 1) ? TTMATH_UINT_HIGHEST_BIT : 0;
- table[i] = (table[i] >> 1) | c;
- c = new_c;
- }
- c = (c != 0)? 1 : 0;
- TTMATH_LOGC("UInt::Rcr2_one", c)
- return c;
- }
- /*!
- this method moves all bits into the left hand side
- return value <- this <- c
- the lowest *bits* will be held the 'c' and
- the state of one additional bit (on the left hand side)
- will be returned
- for example:
- let this is 001010000
- after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
- */
- template<uint value_size>
- uint UInt<value_size>::Rcl2(uint bits, uint c)
- {
- uint move = TTMATH_BITS_PER_UINT - bits;
- uint i, new_c;
- if( c != 0 )
- c = TTMATH_UINT_MAX_VALUE >> move;
- for(i=0 ; i<value_size ; ++i)
- {
- new_c = table[i] >> move;
- table[i] = (table[i] << bits) | c;
- c = new_c;
- }
- TTMATH_LOGC("UInt::Rcl2", (c & 1))
- return (c & 1);
- }
- /*!
- this method moves all bits into the right hand side
- C -> this -> return value
- the highest *bits* will be held the 'c' and
- the state of one additional bit (on the right hand side)
- will be returned
- for example:
- let this is 000000010
- after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
- */
- template<uint value_size>
- uint UInt<value_size>::Rcr2(uint bits, uint c)
- {
- uint move = TTMATH_BITS_PER_UINT - bits;
- sint i; // signed
- uint new_c;
- if( c != 0 )
- c = TTMATH_UINT_MAX_VALUE << move;
- for(i=value_size-1 ; i>=0 ; --i)
- {
- new_c = table[i] << move;
- table[i] = (table[i] >> bits) | c;
- c = new_c;
- }
- c = (c & TTMATH_UINT_HIGHEST_BIT) ? 1 : 0;
- TTMATH_LOGC("UInt::Rcr2", c)
- return c;
- }
- /*!
- this method returns the number of the highest set bit in x
- if the 'x' is zero this method returns '-1'
- */
- template<uint value_size>
- sint UInt<value_size>::FindLeadingBitInWord(uint x)
- {
- if( x == 0 )
- return -1;
- uint bit = TTMATH_BITS_PER_UINT - 1;
- while( (x & TTMATH_UINT_HIGHEST_BIT) == 0 )
- {
- x = x << 1;
- --bit;
- }
- return bit;
- }
- /*!
- this method returns the number of the highest set bit in x
- if the 'x' is zero this method returns '-1'
- */
- template<uint value_size>
- sint UInt<value_size>::FindLowestBitInWord(uint x)
- {
- if( x == 0 )
- return -1;
- uint bit = 0;
- while( (x & 1) == 0 )
- {
- x = x >> 1;
- ++bit;
- }
- return bit;
- }
- /*!
- this method sets a special bit in the 'value'
- and returns the last state of the bit (zero or one)
- bit is from <0,TTMATH_BITS_PER_UINT-1>
- e.g.
- uint x = 100;
- uint bit = SetBitInWord(x, 3);
- now: x = 108 and bit = 0
- */
- template<uint value_size>
- uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
- {
- uint mask = 1;
- if( bit > 0 )
- mask = mask << bit;
- uint last = value & mask;
- value = value | mask;
- return (last != 0) ? 1 : 0;
- }
- /*!
- *
- * Multiplication
- *
- *
- */
- /*!
- multiplication: result_high:result_low = a * b
- result_high - higher word of the result
- result_low - lower word of the result
- this methos never returns a carry
- this method is used in the second version of the multiplication algorithms
- */
- template<uint value_size>
- void UInt<value_size>::MulTwoWords(uint a, uint b, uint * result_high, uint * result_low)
- {
- /*
- on 32bit platforms we have defined 'unsigned long long int' type known as 'ulint' in ttmath namespace
- this type has 64 bits, then we're using only one multiplication: 32bit * 32bit = 64bit
- */
- union uint_
- {
- struct
- {
- uint low; // 32 bits
- uint high; // 32 bits
- } u_;
- ulint u; // 64 bits
- } res;
- res.u = ulint(a) * ulint(b); // multiply two 32bit words, the result has 64 bits
- *result_high = res.u_.high;
- *result_low = res.u_.low;
- #else
- /*
- 64 bits platforms
- we don't have a native type which has 128 bits
- then we're splitting 'a' and 'b' to 4 parts (high and low halves)
- and using 4 multiplications (with additions and carry correctness)
- */
- uint_ a_;
- uint_ b_;
- uint_ res_high1, res_high2;
- uint_ res_low1, res_low2;
- a_.u = a;
- b_.u = b;
- /*
- the multiplication is as follows (schoolbook algorithm with O(n^2) ):
- 32 bits 32 bits
- +--------------------------------+
- | a_.u_.high | a_.u_.low |
- +--------------------------------+
- | b_.u_.high | b_.u_.low |
- +--------------------------------+--------------------------------+
- | res_high1.u | res_low1.u |
- +--------------------------------+--------------------------------+
- | res_high2.u | res_low2.u |
- +--------------------------------+--------------------------------+
- 64 bits 64 bits
- */
- uint_ temp;
- res_low1.u = uint(b_.u_.low) * uint(a_.u_.low);
- temp.u = uint(res_low1.u_.high) + uint(b_.u_.low) * uint(a_.u_.high);
- res_low1.u_.high = temp.u_.low;
- res_high1.u_.low = temp.u_.high;
- res_high1.u_.high = 0;
- res_low2.u_.low = 0;
- temp.u = uint(b_.u_.high) * uint(a_.u_.low);
- res_low2.u_.high = temp.u_.low;
- res_high2.u = uint(b_.u_.high) * uint(a_.u_.high) + uint(temp.u_.high);
- uint c = AddTwoWords(res_low1.u, res_low2.u, 0, &res_low2.u);
- AddTwoWords(res_high1.u, res_high2.u, c, &res_high2.u); // there is no carry from here
- *result_high = res_high2.u;
- *result_low = res_low2.u;
- #endif
- }
- /*!
- *
- * Division
- *
- *
- */
- /*!
- this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
- r = a:b / c and rest - remainder
- *
- * the c has to be suitably large for the result being keeped in one word,
- * if c is equal zero there'll be a hardware interruption (0)
- * and probably the end of your program
- *
- */
- template<uint value_size>
- void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
- {
- // (a < c ) for the result to be one word
- TTMATH_ASSERT( c != 0 && a < c )
- union
- {
- struct
- {
- uint low; // 32 bits
- uint high; // 32 bits
- } u_;
- ulint u; // 64 bits
- } ab;
- ab.u_.high = a;
- ab.u_.low = b;
- *r = uint(ab.u / c);
- *rest = uint(ab.u % c);
- #else
- uint_ c_;
- c_.u = c;
- if( a == 0 )
- {
- *r = b / c;
- *rest = b % c;
- }
- else
- if( c_.u_.high == 0 )
- {
- // higher half of 'c' is zero
- // then higher half of 'a' is zero too (look at the asserts at the beginning - 'a' is smaller than 'c')
- uint_ a_, b_, res_, temp1, temp2;
- a_.u = a;
- b_.u = b;
- temp1.u_.high = a_.u_.low;
- temp1.u_.low = b_.u_.high;
- res_.u_.high = (unsigned int)(temp1.u / c);
- temp2.u_.high = (unsigned int)(temp1.u % c);
- temp2.u_.low = b_.u_.low;
- res_.u_.low = (unsigned int)(temp2.u / c);
- *rest = temp2.u % c;
- *r = res_.u;
- }
- else
- {
- return DivTwoWords2(a, b, c, r, rest);
- }
- #endif
- }
- /*!
- this method is available only on 64bit platforms
- the same algorithm like the third division algorithm in ttmathuint.h
- but now with the radix=2^32
- */
- template<uint value_size>
- void UInt<value_size>::DivTwoWords2(uint a, uint b, uint c, uint * r, uint * rest)
- {
- // a is not zero
- // c_.u_.high is not zero
- uint_ a_, b_, c_, u_, q_;
- unsigned int u3; // 32 bit
- a_.u = a;
- b_.u = b;
- c_.u = c;
- // normalizing
- uint d = DivTwoWordsNormalize(a_, b_, c_);
- // loop from j=1 to j=0
- // the first step (for j=2) is skipped because our result is only in one word,
- // (first 'q' were 0 and nothing would be changed)
- u_.u_.high = a_.u_.high;
- u_.u_.low = a_.u_.low;
- u3 = b_.u_.high;
- q_.u_.high = DivTwoWordsCalculate(u_, u3, c_);
- MultiplySubtract(u_, u3, q_.u_.high, c_);
- u_.u_.high = u_.u_.low;
- u_.u_.low = u3;
- u3 = b_.u_.low;
- q_.u_.low = DivTwoWordsCalculate(u_, u3, c_);
- MultiplySubtract(u_, u3, q_.u_.low, c_);
- *r = q_.u;
- // unnormalizing for the remainder
- u_.u_.high = u_.u_.low;
- u_.u_.low = u3;
- *rest = DivTwoWordsUnnormalize(u_.u, d);
- }
- template<uint value_size>
- uint UInt<value_size>::DivTwoWordsNormalize(uint_ & a_, uint_ & b_, uint_ & c_)
- {
- uint d = 0;
- for( ; (c_.u & TTMATH_UINT_HIGHEST_BIT) == 0 ; ++d )
- {
- c_.u = c_.u << 1;
- uint bc = b_.u & TTMATH_UINT_HIGHEST_BIT; // carry from 'b'
- b_.u = b_.u << 1;
- a_.u = a_.u << 1; // carry bits from 'a' are simply skipped
- if( bc )
- a_.u = a_.u | 1;
- }
- return d;
- }
- template<uint value_size>
- uint UInt<value_size>::DivTwoWordsUnnormalize(uint u, uint d)
- {
- if( d == 0 )
- return u;
- u = u >> d;
- return u;
- }
- template<uint value_size>
- unsigned int UInt<value_size>::DivTwoWordsCalculate(uint_ u_, unsigned int u3, uint_ v_)
- {
- bool next_test;
- uint_ qp_, rp_, temp_;
- qp_.u = u_.u / uint(v_.u_.high);
- rp_.u = u_.u % uint(v_.u_.high);
- TTMATH_ASSERT( qp_.u_.high==0 || qp_.u_.high==1 )
- do
- {
- bool decrease = false;
- if( qp_.u_.high == 1 )
- decrease = true;
- else
- {
- temp_.u_.high = rp_.u_.low;
- temp_.u_.low = u3;
- if( qp_.u * uint(v_.u_.low) > temp_.u )
- decrease = true;
- }
- next_test = false;
- if( decrease )
- {
- --qp_.u;
- rp_.u += v_.u_.high;
- if( rp_.u_.high == 0 )
- next_test = true;
- }
- }
- while( next_test );
- return qp_.u_.low;
- }
- template<uint value_size>
- void UInt<value_size>::MultiplySubtract(uint_ & u_, unsigned int & u3, unsigned int & q, uint_ v_)
- {
- uint_ temp_;
- uint res_high;
- uint res_low;
- MulTwoWords(v_.u, q, &res_high, &res_low);
- uint_ sub_res_high_;
- uint_ sub_res_low_;
- temp_.u_.high = u_.u_.low;
- temp_.u_.low = u3;
- uint c = SubTwoWords(temp_.u, res_low, 0, &sub_res_low_.u);
- temp_.u_.high = 0;
- temp_.u_.low = u_.u_.high;
- c = SubTwoWords(temp_.u, res_high, c, &sub_res_high_.u);
- if( c )
- {
- --q;
- c = AddTwoWords(sub_res_low_.u, v_.u, 0, &sub_res_low_.u);
- AddTwoWords(sub_res_high_.u, 0, c, &sub_res_high_.u);
- }
- u_.u_.high = sub_res_high_.u_.low;
- u_.u_.low = sub_res_low_.u_.high;
- u3 = sub_res_low_.u_.low;
- }
-#endif // #ifdef TTMATH_PLATFORM64
-} //namespace
-#endif //ifdef TTMATH_NOASM
diff --git a/ttmath/ttmathuint_x86.h b/ttmath/ttmathuint_x86.h
deleted file mode 100644
index 1dd087f..0000000
--- a/ttmath/ttmathuint_x86.h
+++ /dev/null
@@ -1,1602 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2009, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathuint_x86
-#define headerfilettmathuint_x86
- \file ttmathuint_x86.h
- \brief template class UInt<uint> with assembler code for 32bit x86 processors
- this file is included at the end of ttmathuint.h
- \brief a namespace for the TTMath library
-namespace ttmath
- /*!
- returning the string represents the currect type of the library
- we have following types:
- asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits)
- asm_gcc_32 - with asm code designed for GCC (32 bits)
- asm_vc_64 - with asm for VC (64 bit)
- asm_gcc_64 - with asm for GCC (64 bit)
- no_asm_32 - pure C++ version (32 bit) - without any asm code
- no_asm_64 - pure C++ version (64 bit) - without any asm code
- */
- template<uint value_size>
- const char * UInt<value_size>::LibTypeStr()
- {
- #ifndef __GNUC__
- static const char info[] = "asm_vc_32";
- #endif
- #ifdef __GNUC__
- static const char info[] = "asm_gcc_32";
- #endif
- return info;
- }
- /*!
- returning the currect type of the library
- */
- template<uint value_size>
- LibTypeCode UInt<value_size>::LibType()
- {
- #ifndef __GNUC__
- LibTypeCode info = asm_vc_32;
- #endif
- #ifdef __GNUC__
- LibTypeCode info = asm_gcc_32;
- #endif
- return info;
- }
- /*!
- *
- * basic mathematic functions
- *
- */
- /*!
- adding ss2 to the this and adding carry if it's defined
- (this = this + ss2 + c)
- c must be zero or one (might be a bigger value than 1)
- function returns carry (1) (if it has been)
- */
- template<uint value_size>
- uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- uint * p2 = const_cast<uint*>(ss2.table);
- // we don't have to use TTMATH_REFERENCE_ASSERT here
- // this algorithm doesn't require it
- #ifndef __GNUC__
- // this part might be compiled with for example visual c
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- push esi
- mov ecx,[b]
- mov ebx,[p1]
- mov esi,[p2]
- xor edx,edx // edx=0
- mov eax,[c]
- neg eax // CF=1 if rax!=0 , CF=0 if rax==0
- ttmath_loop:
- mov eax,[esi+edx*4]
- adc [ebx+edx*4],eax
- inc edx
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx
- mov [c], ecx
- pop esi
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- // this part should be compiled with gcc
- __asm__ __volatile__(
- "xorl %%edx, %%edx \n"
- "negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
- "1: \n"
- "movl (%%esi,%%edx,4), %%eax \n"
- "adcl %%eax, (%%ebx,%%edx,4) \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "adc %%ecx, %%ecx \n"
- : "=c" (c), "=a" (dummy), "=d" (dummy2)
- : "0" (b), "1" (c), "b" (p1), "S" (p2)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Add", c)
- return c;
- }
- /*!
- adding one word (at a specific position)
- and returning a carry (if it has been)
- e.g.
- if we've got (value_size=3):
- table[0] = 10;
- table[1] = 30;
- table[2] = 5;
- and we call:
- AddInt(2,1)
- then it'll be:
- table[0] = 10;
- table[1] = 30 + 2;
- table[2] = 5;
- of course if there was a carry from table[2] it would be returned
- */
- template<uint value_size>
- uint UInt<value_size>::AddInt(uint value, uint index)
- {
- uint b = value_size;
- uint * p1 = table;
- uint c;
- TTMATH_ASSERT( index < value_size )
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- mov ecx, [b]
- sub ecx, [index]
- mov edx, [index]
- mov ebx, [p1]
- mov eax, [value]
- ttmath_loop:
- add [ebx+edx*4], eax
- jnc ttmath_end
- mov eax, 1
- inc edx
- dec ecx
- jnz ttmath_loop
- ttmath_end:
- setc al
- movzx edx, al
- mov [c], edx
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "subl %%edx, %%ecx \n"
- "1: \n"
- "addl %%eax, (%%ebx,%%edx,4) \n"
- "jnc 2f \n"
- "movl $1, %%eax \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "2: \n"
- "setc %%al \n"
- "movzx %%al, %%edx \n"
- : "=d" (c), "=a" (dummy), "=c" (dummy2)
- : "0" (index), "1" (value), "2" (b), "b" (p1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::AddInt", c)
- return c;
- }
- /*!
- adding only two unsigned words to the existing value
- and these words begin on the 'index' position
- (it's used in the multiplication algorithm 2)
- index should be equal or smaller than value_size-2 (index <= value_size-2)
- x1 - lower word, x2 - higher word
- for example if we've got value_size equal 4 and:
- table[0] = 3
- table[1] = 4
- table[2] = 5
- table[3] = 6
- then let
- x1 = 10
- x2 = 20
- and
- index = 1
- the result of this method will be:
- table[0] = 3
- table[1] = 4 + x1 = 14
- table[2] = 5 + x2 = 25
- table[3] = 6
- and no carry at the end of table[3]
- (of course if there was a carry in table[2](5+20) then
- this carry would be passed to the table[3] etc.)
- */
- template<uint value_size>
- uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
- {
- uint b = value_size;
- uint * p1 = table;
- uint c;
- TTMATH_ASSERT( index < value_size - 1 )
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- mov ecx, [b]
- sub ecx, [index]
- mov ebx, [p1]
- mov edx, [index]
- mov eax, [x1]
- add [ebx+edx*4], eax
- inc edx
- dec ecx
- mov eax, [x2]
- ttmath_loop:
- adc [ebx+edx*4], eax
- jnc ttmath_end
- mov eax, 0
- inc edx
- dec ecx
- jnz ttmath_loop
- ttmath_end:
- setc al
- movzx edx, al
- mov [c], edx
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "subl %%edx, %%ecx \n"
- "addl %%esi, (%%ebx,%%edx,4) \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "1: \n"
- "adcl %%eax, (%%ebx,%%edx,4) \n"
- "jnc 2f \n"
- "mov $0, %%eax \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "2: \n"
- "setc %%al \n"
- "movzx %%al, %%eax \n"
- : "=a" (c), "=c" (dummy), "=d" (dummy2)
- : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::AddTwoInts", c)
- return c;
- }
- /*!
- this static method addes one vector to the other
- 'ss1' is larger in size or equal to 'ss2'
- ss1 points to the first (larger) vector
- ss2 points to the second vector
- ss1_size - size of the ss1 (and size of the result too)
- ss2_size - size of the ss2
- result - is the result vector (which has size the same as ss1: ss1_size)
- Example: ss1_size is 5, ss2_size is 3
- ss1: ss2: result (output):
- 5 1 5+1
- 4 3 4+3
- 2 7 2+7
- 6 6
- 9 9
- of course the carry is propagated and will be returned from the last item
- (this method is used by the Karatsuba multiplication algorithm)
- */
- template<uint value_size>
- uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
- {
- TTMATH_ASSERT( ss1_size >= ss2_size )
- uint rest = ss1_size - ss2_size;
- uint c;
- #ifndef __GNUC__
- // this part might be compiled with for example visual c
- __asm
- {
- pushad
- mov ecx, [ss2_size]
- xor edx, edx // edx = 0, cf = 0
- mov esi, [ss1]
- mov ebx, [ss2]
- mov edi, [result]
- ttmath_loop:
- mov eax, [esi+edx*4]
- adc eax, [ebx+edx*4]
- mov [edi+edx*4], eax
- inc edx
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx // ecx has the cf state
- mov ebx, [rest]
- or ebx, ebx
- jz ttmath_end
- xor ebx, ebx // ebx = 0
- neg ecx // setting cf from ecx
- mov ecx, [rest] // ecx is != 0
- ttmath_loop2:
- mov eax, [esi+edx*4]
- adc eax, ebx
- mov [edi+edx*4], eax
- inc edx
- dec ecx
- jnz ttmath_loop2
- adc ecx, ecx
- ttmath_end:
- mov [c], ecx
- popad
- }
- #endif
- #ifdef __GNUC__
- // this part should be compiled with gcc
- uint dummy1, dummy2, dummy3;
- __asm__ __volatile__(
- "push %%edx \n"
- "xor %%edx, %%edx \n" // edx = 0, cf = 0
- "1: \n"
- "mov (%%esi,%%edx,4), %%eax \n"
- "adc (%%ebx,%%edx,4), %%eax \n"
- "mov %%eax, (%%edi,%%edx,4) \n"
- "inc %%edx \n"
- "dec %%ecx \n"
- "jnz 1b \n"
- "adc %%ecx, %%ecx \n" // ecx has the cf state
- "pop %%eax \n" // eax = rest
- "or %%eax, %%eax \n"
- "jz 3f \n"
- "xor %%ebx, %%ebx \n" // ebx = 0
- "neg %%ecx \n" // setting cf from ecx
- "mov %%eax, %%ecx \n" // ecx=rest and is != 0
- "2: \n"
- "mov (%%esi, %%edx, 4), %%eax \n"
- "adc %%ebx, %%eax \n"
- "mov %%eax, (%%edi, %%edx, 4) \n"
- "inc %%edx \n"
- "dec %%ecx \n"
- "jnz 2b \n"
- "adc %%ecx, %%ecx \n"
- "3: \n"
- : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
- : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
- : "cc", "memory" );
- #endif
- TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size)
- return c;
- }
- /*!
- subtracting ss2 from the 'this' and subtracting
- carry if it has been defined
- (this = this - ss2 - c)
- c must be zero or one (might be a bigger value than 1)
- function returns carry (1) (if it has been)
- */
- template<uint value_size>
- uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- uint * p2 = const_cast<uint*>(ss2.table);
- // we don't have to use TTMATH_REFERENCE_ASSERT here
- // this algorithm doesn't require it
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- push esi
- mov ecx,[b]
- mov ebx,[p1]
- mov esi,[p2]
- xor edx,edx // edx=0
- mov eax,[c]
- neg eax // CF=1 if rax!=0 , CF=0 if rax==0
- ttmath_loop:
- mov eax,[esi+edx*4]
- sbb [ebx+edx*4],eax
- inc edx
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx
- mov [c], ecx
- pop esi
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "xorl %%edx, %%edx \n"
- "negl %%eax \n" // CF=1 if rax!=0 , CF=0 if rax==0
- "1: \n"
- "movl (%%esi,%%edx,4), %%eax \n"
- "sbbl %%eax, (%%ebx,%%edx,4) \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "adc %%ecx, %%ecx \n"
- : "=c" (c), "=a" (dummy), "=d" (dummy2)
- : "0" (b), "1" (c), "b" (p1), "S" (p2)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Sub", c)
- return c;
- }
- /*!
- this method subtracts one word (at a specific position)
- and returns a carry (if it was)
- e.g.
- if we've got (value_size=3):
- table[0] = 10;
- table[1] = 30;
- table[2] = 5;
- and we call:
- SubInt(2,1)
- then it'll be:
- table[0] = 10;
- table[1] = 30 - 2;
- table[2] = 5;
- of course if there was a carry from table[2] it would be returned
- */
- template<uint value_size>
- uint UInt<value_size>::SubInt(uint value, uint index)
- {
- uint b = value_size;
- uint * p1 = table;
- uint c;
- TTMATH_ASSERT( index < value_size )
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- mov ecx, [b]
- sub ecx, [index]
- mov edx, [index]
- mov ebx, [p1]
- mov eax, [value]
- ttmath_loop:
- sub [ebx+edx*4], eax
- jnc ttmath_end
- mov eax, 1
- inc edx
- dec ecx
- jnz ttmath_loop
- ttmath_end:
- setc al
- movzx edx, al
- mov [c], edx
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "subl %%edx, %%ecx \n"
- "1: \n"
- "subl %%eax, (%%ebx,%%edx,4) \n"
- "jnc 2f \n"
- "movl $1, %%eax \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "2: \n"
- "setc %%al \n"
- "movzx %%al, %%edx \n"
- : "=d" (c), "=a" (dummy), "=c" (dummy2)
- : "0" (index), "1" (value), "2" (b), "b" (p1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::SubInt", c)
- return c;
- }
- /*!
- this static method subtractes one vector from the other
- 'ss1' is larger in size or equal to 'ss2'
- ss1 points to the first (larger) vector
- ss2 points to the second vector
- ss1_size - size of the ss1 (and size of the result too)
- ss2_size - size of the ss2
- result - is the result vector (which has size the same as ss1: ss1_size)
- Example: ss1_size is 5, ss2_size is 3
- ss1: ss2: result (output):
- 5 1 5-1
- 4 3 4-3
- 2 7 2-7
- 6 6-1 (the borrow from previous item)
- 9 9
- return (carry): 0
- of course the carry (borrow) is propagated and will be returned from the last item
- (this method is used by the Karatsuba multiplication algorithm)
- */
- template<uint value_size>
- uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
- {
- TTMATH_ASSERT( ss1_size >= ss2_size )
- uint rest = ss1_size - ss2_size;
- uint c;
- #ifndef __GNUC__
- // this part might be compiled with for example visual c
- /*
- the asm code is nearly the same as in AddVector
- only two instructions 'adc' are changed to 'sbb'
- */
- __asm
- {
- pushad
- mov ecx, [ss2_size]
- xor edx, edx // edx = 0, cf = 0
- mov esi, [ss1]
- mov ebx, [ss2]
- mov edi, [result]
- ttmath_loop:
- mov eax, [esi+edx*4]
- sbb eax, [ebx+edx*4]
- mov [edi+edx*4], eax
- inc edx
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx // ecx has the cf state
- mov ebx, [rest]
- or ebx, ebx
- jz ttmath_end
- xor ebx, ebx // ebx = 0
- neg ecx // setting cf from ecx
- mov ecx, [rest] // ecx is != 0
- ttmath_loop2:
- mov eax, [esi+edx*4]
- sbb eax, ebx
- mov [edi+edx*4], eax
- inc edx
- dec ecx
- jnz ttmath_loop2
- adc ecx, ecx
- ttmath_end:
- mov [c], ecx
- popad
- }
- #endif
- #ifdef __GNUC__
- // this part should be compiled with gcc
- uint dummy1, dummy2, dummy3;
- __asm__ __volatile__(
- "push %%edx \n"
- "xor %%edx, %%edx \n" // edx = 0, cf = 0
- "1: \n"
- "mov (%%esi,%%edx,4), %%eax \n"
- "sbb (%%ebx,%%edx,4), %%eax \n"
- "mov %%eax, (%%edi,%%edx,4) \n"
- "inc %%edx \n"
- "dec %%ecx \n"
- "jnz 1b \n"
- "adc %%ecx, %%ecx \n" // ecx has the cf state
- "pop %%eax \n" // eax = rest
- "or %%eax, %%eax \n"
- "jz 3f \n"
- "xor %%ebx, %%ebx \n" // ebx = 0
- "neg %%ecx \n" // setting cf from ecx
- "mov %%eax, %%ecx \n" // ecx=rest and is != 0
- "2: \n"
- "mov (%%esi, %%edx, 4), %%eax \n"
- "sbb %%ebx, %%eax \n"
- "mov %%eax, (%%edi, %%edx, 4) \n"
- "inc %%edx \n"
- "dec %%ecx \n"
- "jnz 2b \n"
- "adc %%ecx, %%ecx \n"
- "3: \n"
- : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
- : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
- : "cc", "memory" );
- #endif
- TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size)
- return c;
- }
- /*!
- this method moves all bits into the left hand side
- return value <- this <- c
- the lowest *bit* will be held the 'c' and
- the state of one additional bit (on the left hand side)
- will be returned
- for example:
- let this is 001010000
- after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
- */
- template<uint value_size>
- uint UInt<value_size>::Rcl2_one(uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- __asm
- {
- push ebx
- push ecx
- push edx
- mov ebx, [p1]
- xor edx, edx
- mov ecx, [c]
- neg ecx
- mov ecx, [b]
- ttmath_loop:
- rcl dword ptr [ebx+edx*4], 1
- inc edx
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx
- mov [c], ecx
- pop edx
- pop ecx
- pop ebx
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "xorl %%edx, %%edx \n" // edx=0
- "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
- "1: \n"
- "rcll $1, (%%ebx, %%edx, 4) \n"
- "incl %%edx \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "adcl %%ecx, %%ecx \n"
- : "=c" (c), "=a" (dummy), "=d" (dummy2)
- : "0" (b), "1" (c), "b" (p1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcl2_one", c)
- return c;
- }
- /*!
- this method moves all bits into the right hand side
- c -> this -> return value
- the highest *bit* will be held the 'c' and
- the state of one additional bit (on the right hand side)
- will be returned
- for example:
- let this is 000000010
- after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
- */
- template<uint value_size>
- uint UInt<value_size>::Rcr2_one(uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- __asm
- {
- push ebx
- push ecx
- mov ebx, [p1]
- mov ecx, [c]
- neg ecx
- mov ecx, [b]
- ttmath_loop:
- rcr dword ptr [ebx+ecx*4-4], 1
- dec ecx
- jnz ttmath_loop
- adc ecx, ecx
- mov [c], ecx
- pop ecx
- pop ebx
- }
- #endif
- #ifdef __GNUC__
- uint dummy;
- __asm__ __volatile__(
- "negl %%eax \n" // CF=1 if eax!=0 , CF=0 if eax==0
- "1: \n"
- "rcrl $1, -4(%%ebx, %%ecx, 4) \n"
- "decl %%ecx \n"
- "jnz 1b \n"
- "adcl %%ecx, %%ecx \n"
- : "=c" (c), "=a" (dummy)
- : "0" (b), "1" (c), "b" (p1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcr2_one", c)
- return c;
- }
-#ifdef _MSC_VER
-#pragma warning (disable : 4731)
-//warning C4731: frame pointer register 'ebp' modified by inline assembly code
- /*!
- this method moves all bits into the left hand side
- return value <- this <- c
- the lowest *bits* will be held the 'c' and
- the state of one additional bit (on the left hand side)
- will be returned
- for example:
- let this is 001010000
- after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
- */
- template<uint value_size>
- uint UInt<value_size>::Rcl2(uint bits, uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- push esi
- push edi
- push ebp
- mov edi, [b]
- mov ecx, 32
- sub ecx, [bits]
- mov edx, -1
- shr edx, cl
- mov ecx, [bits]
- mov ebx, [p1]
- mov eax, [c]
- mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
- xor edx, edx // edx = 0
- mov esi, edx
- or eax, eax
- cmovnz esi, ebp // if(c) esi=mask else esi=0
- ttmath_loop:
- rol dword ptr [ebx+edx*4], cl
- mov eax, [ebx+edx*4]
- and eax, ebp
- xor [ebx+edx*4], eax // clearing bits
- or [ebx+edx*4], esi // saving old value
- mov esi, eax
- inc edx
- dec edi
- jnz ttmath_loop
- pop ebp // restoring ebp
- and eax, 1
- mov [c], eax
- pop edi
- pop esi
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2, dummy3;
- __asm__ __volatile__(
- "push %%ebp \n"
- "movl %%ecx, %%esi \n"
- "movl $32, %%ecx \n"
- "subl %%esi, %%ecx \n" // ecx = 32 - bits
- "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
- "shrl %%cl, %%edx \n" // shifting (0 -> edx -> cf) (cl times)
- "movl %%edx, %%ebp \n" // ebp = edx = mask
- "movl %%esi, %%ecx \n"
- "xorl %%edx, %%edx \n"
- "movl %%edx, %%esi \n"
- "orl %%eax, %%eax \n"
- "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
- "1: \n"
- "roll %%cl, (%%ebx,%%edx,4) \n"
- "movl (%%ebx,%%edx,4), %%eax \n"
- "andl %%ebp, %%eax \n"
- "xorl %%eax, (%%ebx,%%edx,4) \n"
- "orl %%esi, (%%ebx,%%edx,4) \n"
- "movl %%eax, %%esi \n"
- "incl %%edx \n"
- "decl %%edi \n"
- "jnz 1b \n"
- "and $1, %%eax \n"
- "pop %%ebp \n"
- : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
- : "0" (c), "1" (b), "b" (p1), "c" (bits)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcl2", c)
- return c;
- }
- /*!
- this method moves all bits into the right hand side
- C -> this -> return value
- the highest *bits* will be held the 'c' and
- the state of one additional bit (on the right hand side)
- will be returned
- for example:
- let this is 000000010
- after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
- */
- template<uint value_size>
- uint UInt<value_size>::Rcr2(uint bits, uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- __asm
- {
- push eax
- push ebx
- push ecx
- push edx
- push esi
- push edi
- push ebp
- mov edi, [b]
- mov ecx, 32
- sub ecx, [bits]
- mov edx, -1
- shl edx, cl
- mov ecx, [bits]
- mov ebx, [p1]
- mov eax, [c]
- mov ebp, edx // ebp = mask (modified ebp - don't read/write to variables)
- xor edx, edx // edx = 0
- mov esi, edx
- add edx, edi
- dec edx // edx is pointing at the end of the table (on last word)
- or eax, eax
- cmovnz esi, ebp // if(c) esi=mask else esi=0
- ttmath_loop:
- ror dword ptr [ebx+edx*4], cl
- mov eax, [ebx+edx*4]
- and eax, ebp
- xor [ebx+edx*4], eax // clearing bits
- or [ebx+edx*4], esi // saving old value
- mov esi, eax
- dec edx
- dec edi
- jnz ttmath_loop
- pop ebp // restoring ebp
- rol eax, 1 // 31bit will be first
- and eax, 1
- mov [c], eax
- pop edi
- pop esi
- pop edx
- pop ecx
- pop ebx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2, dummy3;
- __asm__ __volatile__(
- "push %%ebp \n"
- "movl %%ecx, %%esi \n"
- "movl $32, %%ecx \n"
- "subl %%esi, %%ecx \n" // ecx = 32 - bits
- "movl $-1, %%edx \n" // edx = -1 (all bits set to one)
- "shll %%cl, %%edx \n" // shifting (cf <- edx <- 0) (cl times)
- "movl %%edx, %%ebp \n" // ebp = edx = mask
- "movl %%esi, %%ecx \n"
- "xorl %%edx, %%edx \n"
- "movl %%edx, %%esi \n"
- "addl %%edi, %%edx \n"
- "decl %%edx \n" // edx is pointing at the end of the table (on last word)
- "orl %%eax, %%eax \n"
- "cmovnz %%ebp, %%esi \n" // if(c) esi=mask else esi=0
- "1: \n"
- "rorl %%cl, (%%ebx,%%edx,4) \n"
- "movl (%%ebx,%%edx,4), %%eax \n"
- "andl %%ebp, %%eax \n"
- "xorl %%eax, (%%ebx,%%edx,4) \n"
- "orl %%esi, (%%ebx,%%edx,4) \n"
- "movl %%eax, %%esi \n"
- "decl %%edx \n"
- "decl %%edi \n"
- "jnz 1b \n"
- "roll $1, %%eax \n"
- "andl $1, %%eax \n"
- "pop %%ebp \n"
- : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
- : "0" (c), "1" (b), "b" (p1), "c" (bits)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcr2", c)
- return c;
- }
-#ifdef _MSC_VER
-#pragma warning (default : 4731)
- /*
- this method returns the number of the highest set bit in one 32-bit word
- if the 'x' is zero this method returns '-1'
- */
- template<uint value_size>
- sint UInt<value_size>::FindLeadingBitInWord(uint x)
- {
- sint result;
- #ifndef __GNUC__
- __asm
- {
- push eax
- push edx
- mov edx,-1
- bsr eax,[x]
- cmovz eax,edx
- mov [result], eax
- pop edx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy;
- __asm__ (
- "movl $-1, %1 \n"
- "bsrl %2, %0 \n"
- "cmovz %1, %0 \n"
- : "=r" (result), "=&r" (dummy)
- : "r" (x)
- : "cc" );
- #endif
- return result;
- }
- /*
- this method returns the number of the smallest set bit in one 32-bit word
- if the 'x' is zero this method returns '-1'
- */
- template<uint value_size>
- sint UInt<value_size>::FindLowestBitInWord(uint x)
- {
- sint result;
- #ifndef __GNUC__
- __asm
- {
- push eax
- push edx
- mov edx,-1
- bsf eax,[x]
- cmovz eax,edx
- mov [result], eax
- pop edx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- uint dummy;
- __asm__ (
- "movl $-1, %1 \n"
- "bsfl %2, %0 \n"
- "cmovz %1, %0 \n"
- : "=r" (result), "=&r" (dummy)
- : "r" (x)
- : "cc" );
- #endif
- return result;
- }
- /*!
- this method sets a special bit in the 'value'
- and returns the last state of the bit (zero or one)
- bit is from <0,31>
- e.g.
- uint x = 100;
- uint bit = SetBitInWord(x, 3);
- now: x = 108 and bit = 0
- */
- template<uint value_size>
- uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
- {
- uint old_bit;
- uint v = value;
- #ifndef __GNUC__
- __asm
- {
- push ebx
- push eax
- mov eax, [v]
- mov ebx, [bit]
- bts eax, ebx
- mov [v], eax
- setc bl
- movzx ebx, bl
- mov [old_bit], ebx
- pop eax
- pop ebx
- }
- #endif
- #ifdef __GNUC__
- __asm__ (
- "btsl %%ebx, %%eax \n"
- "setc %%bl \n"
- "movzx %%bl, %%ebx \n"
- : "=a" (v), "=b" (old_bit)
- : "0" (v), "1" (bit)
- : "cc" );
- #endif
- value = v;
- return old_bit;
- }
- /*!
- multiplication: result_high:result_low = a * b
- result_high - higher word of the result
- result_low - lower word of the result
- this methos never returns a carry
- this method is used in the second version of the multiplication algorithms
- */
- template<uint value_size>
- void UInt<value_size>::MulTwoWords(uint a, uint b, uint * result_high, uint * result_low)
- {
- /*
- we must use these temporary variables in order to inform the compilator
- that value pointed with result1 and result2 has changed
- this has no effect in visual studio but it's useful when
- using gcc and options like -Ox
- */
- uint result1_;
- uint result2_;
- #ifndef __GNUC__
- __asm
- {
- push eax
- push edx
- mov eax, [a]
- mul dword ptr [b]
- mov [result2_], edx
- mov [result1_], eax
- pop edx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- __asm__ (
- "mull %%edx \n"
- : "=a" (result1_), "=d" (result2_)
- : "0" (a), "1" (b)
- : "cc" );
- #endif
- *result_low = result1_;
- *result_high = result2_;
- }
- /*!
- *
- * Division
- *
- *
- */
- /*!
- this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
- r = a:b / c and rest - remainder
- *
- * if r (one word) is too small for the result or c is equal zero
- * there'll be a hardware interruption (0)
- * and probably the end of your program
- *
- */
- template<uint value_size>
- void UInt<value_size>::DivTwoWords(uint a, uint b, uint c, uint * r, uint * rest)
- {
- uint r_;
- uint rest_;
- /*
- these variables have similar meaning like those in
- the multiplication algorithm MulTwoWords
- */
- TTMATH_ASSERT( c != 0 )
- #ifndef __GNUC__
- __asm
- {
- push eax
- push edx
- mov edx, [a]
- mov eax, [b]
- div dword ptr [c]
- mov [r_], eax
- mov [rest_], edx
- pop edx
- pop eax
- }
- #endif
- #ifdef __GNUC__
- __asm__ (
- "divl %%ecx \n"
- : "=a" (r_), "=d" (rest_)
- : "0" (b), "1" (a), "c" (c)
- : "cc" );
- #endif
- *r = r_;
- *rest = rest_;
- }
-} //namespace
-#endif //ifdef TTMATH_PLATFORM32
-#endif //ifndef TTMATH_NOASM
diff --git a/ttmath/ttmathuint_x86_64.h b/ttmath/ttmathuint_x86_64.h
deleted file mode 100644
index 188fc5e..0000000
--- a/ttmath/ttmathuint_x86_64.h
+++ /dev/null
@@ -1,1146 +0,0 @@
- * This file is a part of TTMath Bignum Library
- * and is distributed under the (new) BSD licence.
- * Author: Tomasz Sowa <t.sowa at ttmath.org>
- */
- * Copyright (c) 2006-2010, Tomasz Sowa
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimer.
- *
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the name Tomasz Sowa nor the names of contributors to this
- * project may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- */
-#ifndef headerfilettmathuint_x86_64
-#define headerfilettmathuint_x86_64
- \file ttmathuint_x86_64.h
- \brief template class UInt<uint> with assembler code for 64bit x86_64 processors
- this file is included at the end of ttmathuint.h
-#ifndef __GNUC__
-#include <intrin.h>
-namespace ttmath
- #ifndef __GNUC__
- extern "C"
- {
- uint __fastcall ttmath_adc_x64(uint* p1, const uint* p2, uint nSize, uint c);
- uint __fastcall ttmath_addindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
- uint __fastcall ttmath_addindexed2_x64(uint* p1, uint nSize, uint nPos, uint nValue1, uint nValue2);
- uint __fastcall ttmath_addvector_x64(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
- uint __fastcall ttmath_sbb_x64(uint* p1, const uint* p2, uint nSize, uint c);
- uint __fastcall ttmath_subindexed_x64(uint* p1, uint nSize, uint nPos, uint nValue);
- uint __fastcall ttmath_subvector_x64(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result);
- uint __fastcall ttmath_rcl_x64(uint* p1, uint nSize, uint nLowestBit);
- uint __fastcall ttmath_rcr_x64(uint* p1, uint nSize, uint nLowestBit);
- uint __fastcall ttmath_div_x64(uint* pnValHi, uint* pnValLo, uint nDiv);
- uint __fastcall ttmath_rcl2_x64(uint* p1, uint nSize, uint nBits, uint c);
- uint __fastcall ttmath_rcr2_x64(uint* p1, uint nSize, uint nBits, uint c);
- };
- #endif
- /*!
- returning the string represents the currect type of the library
- we have following types:
- asm_vc_32 - with asm code designed for Microsoft Visual C++ (32 bits)
- asm_gcc_32 - with asm code designed for GCC (32 bits)
- asm_vc_64 - with asm for VC (64 bit)
- asm_gcc_64 - with asm for GCC (64 bit)
- no_asm_32 - pure C++ version (32 bit) - without any asm code
- no_asm_64 - pure C++ version (64 bit) - without any asm code
- */
- template<uint value_size>
- const char * UInt<value_size>::LibTypeStr()
- {
- #ifndef __GNUC__
- static const char info[] = "asm_vc_64";
- #endif
- #ifdef __GNUC__
- static const char info[] = "asm_gcc_64";
- #endif
- return info;
- }
- /*!
- returning the currect type of the library
- */
- template<uint value_size>
- LibTypeCode UInt<value_size>::LibType()
- {
- #ifndef __GNUC__
- LibTypeCode info = asm_vc_64;
- #endif
- #ifdef __GNUC__
- LibTypeCode info = asm_gcc_64;
- #endif
- return info;
- }
- /*!
- *
- * basic mathematic functions
- *
- */
- /*!
- this method adding ss2 to the this and adding carry if it's defined
- (this = this + ss2 + c)
- ***this method is created only on a 64bit platform***
- c must be zero or one (might be a bigger value than 1)
- function returns carry (1) (if it was)
- */
- template<uint value_size>
- uint UInt<value_size>::Add(const UInt<value_size> & ss2, uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- const uint * p2 = ss2.table;
- // we don't have to use TTMATH_REFERENCE_ASSERT here
- // this algorithm doesn't require it
- #ifndef __GNUC__
- c = ttmath_adc_x64(p1,p2,b,c);
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- /*
- this part should be compiled with gcc
- */
- __asm__ __volatile__(
- "xorq %%rdx, %%rdx \n"
- "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
- "1: \n"
- "movq (%%rsi,%%rdx,8), %%rax \n"
- "adcq %%rax, (%%rbx,%%rdx,8) \n"
- "incq %%rdx \n"
- "decq %%rcx \n"
- "jnz 1b \n"
- "adcq %%rcx, %%rcx \n"
- : "=c" (c), "=a" (dummy), "=d" (dummy2)
- : "0" (b), "1" (c), "b" (p1), "S" (p2)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Add", c)
- return c;
- }
- /*!
- this method adds one word (at a specific position)
- and returns a carry (if it was)
- ***this method is created only on a 64bit platform***
- if we've got (value_size=3):
- table[0] = 10;
- table[1] = 30;
- table[2] = 5;
- and we call:
- AddInt(2,1)
- then it'll be:
- table[0] = 10;
- table[1] = 30 + 2;
- table[2] = 5;
- of course if there was a carry from table[2] it would be returned
- */
- template<uint value_size>
- uint UInt<value_size>::AddInt(uint value, uint index)
- {
- uint b = value_size;
- uint * p1 = table;
- uint c;
- TTMATH_ASSERT( index < value_size )
- #ifndef __GNUC__
- c = ttmath_addindexed_x64(p1,b,index,value);
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "subq %%rdx, %%rcx \n"
- "1: \n"
- "addq %%rax, (%%rbx,%%rdx,8) \n"
- "jnc 2f \n"
- "movq $1, %%rax \n"
- "incq %%rdx \n"
- "decq %%rcx \n"
- "jnz 1b \n"
- "2: \n"
- "setc %%al \n"
- "movzx %%al, %%rdx \n"
- : "=d" (c), "=a" (dummy), "=c" (dummy2)
- : "0" (index), "1" (value), "2" (b), "b" (p1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::AddInt", c)
- return c;
- }
- /*!
- this method adds only two unsigned words to the existing value
- and these words begin on the 'index' position
- (it's used in the multiplication algorithm 2)
- ***this method is created only on a 64bit platform***
- index should be equal or smaller than value_size-2 (index <= value_size-2)
- x1 - lower word, x2 - higher word
- for example if we've got value_size equal 4 and:
- table[0] = 3
- table[1] = 4
- table[2] = 5
- table[3] = 6
- then let
- x1 = 10
- x2 = 20
- and
- index = 1
- the result of this method will be:
- table[0] = 3
- table[1] = 4 + x1 = 14
- table[2] = 5 + x2 = 25
- table[3] = 6
- and no carry at the end of table[3]
- (of course if there was a carry in table[2](5+20) then
- this carry would be passed to the table[3] etc.)
- */
- template<uint value_size>
- uint UInt<value_size>::AddTwoInts(uint x2, uint x1, uint index)
- {
- uint b = value_size;
- uint * p1 = table;
- uint c;
- TTMATH_ASSERT( index < value_size - 1 )
- #ifndef __GNUC__
- c = ttmath_addindexed2_x64(p1,b,index,x1,x2);
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "subq %%rdx, %%rcx \n"
- "addq %%rsi, (%%rbx,%%rdx,8) \n"
- "incq %%rdx \n"
- "decq %%rcx \n"
- "1: \n"
- "adcq %%rax, (%%rbx,%%rdx,8) \n"
- "jnc 2f \n"
- "mov $0, %%rax \n"
- "incq %%rdx \n"
- "decq %%rcx \n"
- "jnz 1b \n"
- "2: \n"
- "setc %%al \n"
- "movzx %%al, %%rax \n"
- : "=a" (c), "=c" (dummy), "=d" (dummy2)
- : "0" (x2), "1" (b), "2" (index), "b" (p1), "S" (x1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::AddTwoInts", c)
- return c;
- }
- /*!
- this static method addes one vector to the other
- 'ss1' is larger in size or equal to 'ss2'
- ss1 points to the first (larger) vector
- ss2 points to the second vector
- ss1_size - size of the ss1 (and size of the result too)
- ss2_size - size of the ss2
- result - is the result vector (which has size the same as ss1: ss1_size)
- Example: ss1_size is 5, ss2_size is 3
- ss1: ss2: result (output):
- 5 1 5+1
- 4 3 4+3
- 2 7 2+7
- 6 6
- 9 9
- of course the carry is propagated and will be returned from the last item
- (this method is used by the Karatsuba multiplication algorithm)
- */
- template<uint value_size>
- uint UInt<value_size>::AddVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
- {
- TTMATH_ASSERT( ss1_size >= ss2_size )
- uint c;
- #ifndef __GNUC__
- c = ttmath_addvector_x64(ss1, ss2, ss1_size, ss2_size, result);
- #endif
- #ifdef __GNUC__
- uint dummy1, dummy2, dummy3;
- uint rest = ss1_size - ss2_size;
- // this part should be compiled with gcc
- __asm__ __volatile__(
- "mov %%rdx, %%r8 \n"
- "xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
- "1: \n"
- "mov (%%rsi,%%rdx,8), %%rax \n"
- "adc (%%rbx,%%rdx,8), %%rax \n"
- "mov %%rax, (%%rdi,%%rdx,8) \n"
- "inc %%rdx \n"
- "dec %%rcx \n"
- "jnz 1b \n"
- "adc %%rcx, %%rcx \n" // rcx has the cf state
- "or %%r8, %%r8 \n"
- "jz 3f \n"
- "xor %%rbx, %%rbx \n" // ebx = 0
- "neg %%rcx \n" // setting cf from rcx
- "mov %%r8, %%rcx \n" // rcx=rest and is != 0
- "2: \n"
- "mov (%%rsi, %%rdx, 8), %%rax \n"
- "adc %%rbx, %%rax \n"
- "mov %%rax, (%%rdi, %%rdx, 8) \n"
- "inc %%rdx \n"
- "dec %%rcx \n"
- "jnz 2b \n"
- "adc %%rcx, %%rcx \n"
- "3: \n"
- : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
- : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
- : "%r8", "cc", "memory" );
- #endif
- TTMATH_VECTOR_LOGC("UInt::AddVector", c, result, ss1_size)
- return c;
- }
- /*!
- this method's subtracting ss2 from the 'this' and subtracting
- carry if it has been defined
- (this = this - ss2 - c)
- ***this method is created only on a 64bit platform***
- c must be zero or one (might be a bigger value than 1)
- function returns carry (1) (if it was)
- */
- template<uint value_size>
- uint UInt<value_size>::Sub(const UInt<value_size> & ss2, uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- const uint * p2 = ss2.table;
- // we don't have to use TTMATH_REFERENCE_ASSERT here
- // this algorithm doesn't require it
- #ifndef __GNUC__
- c = ttmath_sbb_x64(p1,p2,b,c);
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "xorq %%rdx, %%rdx \n"
- "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
- "1: \n"
- "movq (%%rsi,%%rdx,8), %%rax \n"
- "sbbq %%rax, (%%rbx,%%rdx,8) \n"
- "incq %%rdx \n"
- "decq %%rcx \n"
- "jnz 1b \n"
- "adcq %%rcx, %%rcx \n"
- : "=c" (c), "=a" (dummy), "=d" (dummy2)
- : "0" (b), "1" (c), "b" (p1), "S" (p2)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Sub", c)
- return c;
- }
- /*!
- this method subtracts one word (at a specific position)
- and returns a carry (if it was)
- ***this method is created only on a 64bit platform***
- if we've got (value_size=3):
- table[0] = 10;
- table[1] = 30;
- table[2] = 5;
- and we call:
- SubInt(2,1)
- then it'll be:
- table[0] = 10;
- table[1] = 30 - 2;
- table[2] = 5;
- of course if there was a carry from table[2] it would be returned
- */
- template<uint value_size>
- uint UInt<value_size>::SubInt(uint value, uint index)
- {
- uint b = value_size;
- uint * p1 = table;
- uint c;
- TTMATH_ASSERT( index < value_size )
- #ifndef __GNUC__
- c = ttmath_subindexed_x64(p1,b,index,value);
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "subq %%rdx, %%rcx \n"
- "1: \n"
- "subq %%rax, (%%rbx,%%rdx,8) \n"
- "jnc 2f \n"
- "movq $1, %%rax \n"
- "incq %%rdx \n"
- "decq %%rcx \n"
- "jnz 1b \n"
- "2: \n"
- "setc %%al \n"
- "movzx %%al, %%rdx \n"
- : "=d" (c), "=a" (dummy), "=c" (dummy2)
- : "0" (index), "1" (value), "2" (b), "b" (p1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::SubInt", c)
- return c;
- }
- /*!
- this static method subtractes one vector from the other
- 'ss1' is larger in size or equal to 'ss2'
- ss1 points to the first (larger) vector
- ss2 points to the second vector
- ss1_size - size of the ss1 (and size of the result too)
- ss2_size - size of the ss2
- result - is the result vector (which has size the same as ss1: ss1_size)
- Example: ss1_size is 5, ss2_size is 3
- ss1: ss2: result (output):
- 5 1 5-1
- 4 3 4-3
- 2 7 2-7
- 6 6-1 (the borrow from previous item)
- 9 9
- return (carry): 0
- of course the carry (borrow) is propagated and will be returned from the last item
- (this method is used by the Karatsuba multiplication algorithm)
- */
- template<uint value_size>
- uint UInt<value_size>::SubVector(const uint * ss1, const uint * ss2, uint ss1_size, uint ss2_size, uint * result)
- {
- TTMATH_ASSERT( ss1_size >= ss2_size )
- uint c;
- #ifndef __GNUC__
- c = ttmath_subvector_x64(ss1, ss2, ss1_size, ss2_size, result);
- #endif
- #ifdef __GNUC__
- // the asm code is nearly the same as in AddVector
- // only two instructions 'adc' are changed to 'sbb'
- uint dummy1, dummy2, dummy3;
- uint rest = ss1_size - ss2_size;
- __asm__ __volatile__(
- "mov %%rdx, %%r8 \n"
- "xor %%rdx, %%rdx \n" // rdx = 0, cf = 0
- "1: \n"
- "mov (%%rsi,%%rdx,8), %%rax \n"
- "sbb (%%rbx,%%rdx,8), %%rax \n"
- "mov %%rax, (%%rdi,%%rdx,8) \n"
- "inc %%rdx \n"
- "dec %%rcx \n"
- "jnz 1b \n"
- "adc %%rcx, %%rcx \n" // rcx has the cf state
- "or %%r8, %%r8 \n"
- "jz 3f \n"
- "xor %%rbx, %%rbx \n" // ebx = 0
- "neg %%rcx \n" // setting cf from rcx
- "mov %%r8, %%rcx \n" // rcx=rest and is != 0
- "2: \n"
- "mov (%%rsi, %%rdx, 8), %%rax \n"
- "sbb %%rbx, %%rax \n"
- "mov %%rax, (%%rdi, %%rdx, 8) \n"
- "inc %%rdx \n"
- "dec %%rcx \n"
- "jnz 2b \n"
- "adc %%rcx, %%rcx \n"
- "3: \n"
- : "=a" (dummy1), "=b" (dummy2), "=c" (c), "=d" (dummy3)
- : "1" (ss2), "2" (ss2_size), "3" (rest), "S" (ss1), "D" (result)
- : "%r8", "cc", "memory" );
- #endif
- TTMATH_VECTOR_LOGC("UInt::SubVector", c, result, ss1_size)
- return c;
- }
- /*!
- this method moves all bits into the left hand side
- return value <- this <- c
- the lowest *bit* will be held the 'c' and
- the state of one additional bit (on the left hand side)
- will be returned
- for example:
- let this is 001010000
- after Rcl2_one(1) there'll be 010100001 and Rcl2_one returns 0
- ***this method is created only on a 64bit platform***
- */
- template<uint value_size>
- uint UInt<value_size>::Rcl2_one(uint c)
- {
- sint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- c = ttmath_rcl_x64(p1,b,c);
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2;
- __asm__ __volatile__(
- "xorq %%rdx, %%rdx \n" // rdx=0
- "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
- "1: \n"
- "rclq $1, (%%rbx, %%rdx, 8) \n"
- "incq %%rdx \n"
- "decq %%rcx \n"
- "jnz 1b \n"
- "adcq %%rcx, %%rcx \n"
- : "=c" (c), "=a" (dummy), "=d" (dummy2)
- : "0" (b), "1" (c), "b" (p1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcl2_one", c)
- return c;
- }
- /*!
- this method moves all bits into the right hand side
- c -> this -> return value
- the highest *bit* will be held the 'c' and
- the state of one additional bit (on the right hand side)
- will be returned
- for example:
- let this is 000000010
- after Rcr2_one(1) there'll be 100000001 and Rcr2_one returns 0
- ***this method is created only on a 64bit platform***
- */
- template<uint value_size>
- uint UInt<value_size>::Rcr2_one(uint c)
- {
- sint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- c = ttmath_rcr_x64(p1,b,c);
- #endif
- #ifdef __GNUC__
- uint dummy;
- __asm__ __volatile__(
- "negq %%rax \n" // CF=1 if rax!=0 , CF=0 if rax==0
- "1: \n"
- "rcrq $1, -8(%%rbx, %%rcx, 8) \n"
- "decq %%rcx \n"
- "jnz 1b \n"
- "adcq %%rcx, %%rcx \n"
- : "=c" (c), "=a" (dummy)
- : "0" (b), "1" (c), "b" (p1)
- : "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcr2_one", c)
- return c;
- }
- /*!
- this method moves all bits into the left hand side
- return value <- this <- c
- the lowest *bits* will be held the 'c' and
- the state of one additional bit (on the left hand side)
- will be returned
- for example:
- let this is 001010000
- after Rcl2(3, 1) there'll be 010000111 and Rcl2 returns 1
- ***this method is created only on a 64bit platform***
- */
- template<uint value_size>
- uint UInt<value_size>::Rcl2(uint bits, uint c)
- {
- uint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- c = ttmath_rcl2_x64(p1,b,bits,c);
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2, dummy3;
- __asm__ __volatile__(
- "movq %%rcx, %%rsi \n"
- "movq $64, %%rcx \n"
- "subq %%rsi, %%rcx \n"
- "movq $-1, %%rdx \n"
- "shrq %%cl, %%rdx \n"
- "movq %%rdx, %%r8 \n"
- "movq %%rsi, %%rcx \n"
- "xorq %%rdx, %%rdx \n"
- "movq %%rdx, %%rsi \n"
- "orq %%rax, %%rax \n"
- "cmovnz %%r8, %%rsi \n"
- "1: \n"
- "rolq %%cl, (%%rbx,%%rdx,8) \n"
- "movq (%%rbx,%%rdx,8), %%rax \n"
- "andq %%r8, %%rax \n"
- "xorq %%rax, (%%rbx,%%rdx,8) \n"
- "orq %%rsi, (%%rbx,%%rdx,8) \n"
- "movq %%rax, %%rsi \n"
- "incq %%rdx \n"
- "decq %%rdi \n"
- "jnz 1b \n"
- "and $1, %%rax \n"
- : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
- : "0" (c), "1" (b), "b" (p1), "c" (bits)
- : "%r8", "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcl2", c)
- return c;
- }
- /*!
- this method moves all bits into the right hand side
- C -> this -> return value
- the highest *bits* will be held the 'c' and
- the state of one additional bit (on the right hand side)
- will be returned
- for example:
- let this is 000000010
- after Rcr2(2, 1) there'll be 110000000 and Rcr2 returns 1
- ***this method is created only on a 64bit platform***
- */
- template<uint value_size>
- uint UInt<value_size>::Rcr2(uint bits, uint c)
- {
- sint b = value_size;
- uint * p1 = table;
- #ifndef __GNUC__
- c = ttmath_rcr2_x64(p1,b,bits,c);
- #endif
- #ifdef __GNUC__
- uint dummy, dummy2, dummy3;
- __asm__ __volatile__(
- "movq %%rcx, %%rsi \n"
- "movq $64, %%rcx \n"
- "subq %%rsi, %%rcx \n"
- "movq $-1, %%rdx \n"
- "shlq %%cl, %%rdx \n"
- "movq %%rdx, %%R8 \n"
- "movq %%rsi, %%rcx \n"
- "xorq %%rdx, %%rdx \n"
- "movq %%rdx, %%rsi \n"
- "addq %%rdi, %%rdx \n"
- "decq %%rdx \n"
- "orq %%rax, %%rax \n"
- "cmovnz %%R8, %%rsi \n"
- "1: \n"
- "rorq %%cl, (%%rbx,%%rdx,8) \n"
- "movq (%%rbx,%%rdx,8), %%rax \n"
- "andq %%R8, %%rax \n"
- "xorq %%rax, (%%rbx,%%rdx,8) \n"
- "orq %%rsi, (%%rbx,%%rdx,8) \n"
- "movq %%rax, %%rsi \n"
- "decq %%rdx \n"
- "decq %%rdi \n"
- "jnz 1b \n"
- "rolq $1, %%rax \n"
- "andq $1, %%rax \n"
- : "=a" (c), "=D" (dummy), "=S" (dummy2), "=d" (dummy3)
- : "0" (c), "1" (b), "b" (p1), "c" (bits)
- : "%r8", "cc", "memory" );
- #endif
- TTMATH_LOGC("UInt::Rcr2", c)
- return c;
- }
- /*
- this method returns the number of the highest set bit in one 64-bit word
- if the 'x' is zero this method returns '-1'
- ***this method is created only on a 64bit platform***
- */
- template<uint value_size>
- sint UInt<value_size>::FindLeadingBitInWord(uint x)
- {
- sint result;
- #ifndef __GNUC__
- unsigned long nIndex = 0;
- if( _BitScanReverse64(&nIndex,x) == 0 )
- result = -1;
- else
- result = nIndex;
- #endif
- #ifdef __GNUC__
- uint dummy;
- __asm__ (
- "movq $-1, %1 \n"
- "bsrq %2, %0 \n"
- "cmovz %1, %0 \n"
- : "=r" (result), "=&r" (dummy)
- : "r" (x)
- : "cc" );
- #endif
- return result;
- }
- /*
- this method returns the number of the highest set bit in one 64-bit word
- if the 'x' is zero this method returns '-1'
- ***this method is created only on a 64bit platform***
- */
- template<uint value_size>
- sint UInt<value_size>::FindLowestBitInWord(uint x)
- {
- sint result;
- #ifndef __GNUC__
- unsigned long nIndex = 0;
- if( _BitScanForward64(&nIndex,x) == 0 )
- result = -1;
- else
- result = nIndex;
- #endif
- #ifdef __GNUC__
- uint dummy;
- __asm__ (
- "movq $-1, %1 \n"
- "bsfq %2, %0 \n"
- "cmovz %1, %0 \n"
- : "=r" (result), "=&r" (dummy)
- : "r" (x)
- : "cc" );
- #endif
- return result;
- }
- /*!
- this method sets a special bit in the 'value'
- and returns the last state of the bit (zero or one)
- ***this method is created only on a 64bit platform***
- bit is from <0,63>
- e.g.
- uint x = 100;
- uint bit = SetBitInWord(x, 3);
- now: x = 108 and bit = 0
- */
- template<uint value_size>
- uint UInt<value_size>::SetBitInWord(uint & value, uint bit)
- {
- uint old_bit;
- uint v = value;
- #ifndef __GNUC__
- old_bit = _bittestandset64((__int64*)&value,bit) != 0;
- #endif
- #ifdef __GNUC__
- __asm__ (
- "btsq %%rbx, %%rax \n"
- "setc %%bl \n"
- "movzx %%bl, %%rbx \n"
- : "=a" (v), "=b" (old_bit)
- : "0" (v), "1" (bit)
- : "cc" );
- #endif
- value = v;
- return old_bit;
- }
- /*!
- *
- * Multiplication
- *
- *
- */
- /*!
- multiplication: result_high:result_low = a * b
- result_high - higher word of the result
- result_low - lower word of the result
- this methos never returns a carry
- this method is used in the second version of the multiplication algorithms
- ***this method is created only on a 64bit platform***
- */
- template<uint value_size>
- void UInt<value_size>::MulTwoWords(uint a, uint b, uint * result_high, uint * result_low)
- {
- /*
- we must use these temporary variables in order to inform the compilator
- that value pointed with result1 and result2 has changed
- this has no effect in visual studio but it's usefull when
- using gcc and options like -O
- */
- uint result1_;
- uint result2_;
- #ifndef __GNUC__
- result1_ = _umul128(a,b,&result2_);
- #endif
- #ifdef __GNUC__
- __asm__ (
- "mulq %%rdx \n"
- : "=a" (result1_), "=d" (result2_)
- : "0" (a), "1" (b)
- : "cc" );
- #endif
- *result_low = result1_;
- *result_high = result2_;
- }
- /*!
- *
- * Division
- *
- *
- */
- /*!
- this method calculates 64bits word a:b / 32bits c (a higher, b lower word)
- r = a:b / c and rest - remainder
- ***this method is created only on a 64bit platform***
- *
- * if r (one word) is too small for the result or c is equal zero
- * there'll be a hardware interruption (0)
- * and probably the end of your program
- *
- */
- template<uint value_size>
- void UInt<value_size>::DivTwoWords(uint a,uint b, uint c, uint * r, uint * rest)
- {
- uint r_;
- uint rest_;
- /*
- these variables have similar meaning like those in
- the multiplication algorithm MulTwoWords
- */
- TTMATH_ASSERT( c != 0 )
- #ifndef __GNUC__
- ttmath_div_x64(&a,&b,c);
- r_ = a;
- rest_ = b;
- #endif
- #ifdef __GNUC__
- __asm__ (
- "divq %%rcx \n"
- : "=a" (r_), "=d" (rest_)
- : "d" (a), "a" (b), "c" (c)
- : "cc" );
- #endif
- *r = r_;
- *rest = rest_;
- }
-} //namespace
-#endif //ifdef TTMATH_PLATFORM64
-#endif //ifndef TTMATH_NOASM
diff --git a/ttmath/ttmathuint_x86_64_msvc.asm b/ttmath/ttmathuint_x86_64_msvc.asm
deleted file mode 100644
index b7c85c2..0000000
--- a/ttmath/ttmathuint_x86_64_msvc.asm
+++ /dev/null
@@ -1,548 +0,0 @@
-; This file is a part of TTMath Bignum Library
-; and is distributed under the (new) BSD licence.
-; Author: Christian Kaiser <chk at online.de>
-; Copyright (c) 2009, Christian Kaiser
-; All rights reserved.
-; Redistribution and use in source and binary forms, with or without
-; modification, are permitted provided that the following conditions are met:
-; * Redistributions of source code must retain the above copyright notice,
-; this list of conditions and the following disclaimer.
-; * Redistributions in binary form must reproduce the above copyright
-; notice, this list of conditions and the following disclaimer in the
-; documentation and/or other materials provided with the distribution.
-; * Neither the name Christian Kaiser nor the names of contributors to this
-; project may be used to endorse or promote products derived
-; from this software without specific prior written permission.
-; compile with debug info: ml64.exe /c /Zd /Zi ttmathuint_x86_64_msvc.asm
-; compile without debug info: ml64.exe /c ttmathuint_x86_64_msvc.asm
-; this creates ttmathuint_x86_64_msvc.obj file which can be linked with your program
-PUBLIC ttmath_adc_x64
-PUBLIC ttmath_addindexed_x64
-PUBLIC ttmath_addindexed2_x64
-PUBLIC ttmath_addvector_x64
-PUBLIC ttmath_sbb_x64
-PUBLIC ttmath_subindexed_x64
-PUBLIC ttmath_subvector_x64
-PUBLIC ttmath_rcl_x64
-PUBLIC ttmath_rcr_x64
-PUBLIC ttmath_rcl2_x64
-PUBLIC ttmath_rcr2_x64
-PUBLIC ttmath_div_x64
-; Microsoft x86_64 convention: http://msdn.microsoft.com/en-us/library/9b372w95.aspx
-; "rax, rcx, rdx, r8-r11 are volatile."
-; "rbx, rbp, rdi, rsi, r12-r15 are nonvolatile."
-ttmath_adc_x64 PROC
- ; rcx = p1
- ; rdx = p2
- ; r8 = nSize
- ; r9 = nCarry
- xor rax, rax
- xor r11, r11
- sub rax, r9 ; sets CARRY if r9 != 0
- ALIGN 16
- loop1:
- mov rax,qword ptr [rdx + r11 * 8]
- adc qword ptr [rcx + r11 * 8], rax
- lea r11, [r11+1]
- dec r8
- jnz loop1
- setc al
- movzx rax, al
- ret
-ttmath_adc_x64 ENDP
-ttmath_addindexed_x64 PROC
- ; rcx = p1
- ; rdx = nSize
- ; r8 = nPos
- ; r9 = nValue
- xor rax, rax ; rax = result
- sub rdx, r8 ; rdx = remaining count of uints
- add qword ptr [rcx + r8 * 8], r9
- jc next1
- ret
- mov r9, 1
- ALIGN 16
- dec rdx
- jz done_with_cy
- lea r8, [r8+1]
- add qword ptr [rcx + r8 * 8], r9
- jc loop1
- ret
- lea rax, [rax+1] ; rax = 1
- ret
-ttmath_addindexed_x64 ENDP
-ttmath_addindexed2_x64 PROC
- ; rcx = p1 (pointer)
- ; rdx = b (value size)
- ; r8 = nPos
- ; r9 = nValue1
- ; [esp+0x28] = nValue2
- xor rax, rax ; return value
- mov r11, rcx ; table
- sub rdx, r8 ; rdx = remaining count of uints
- mov r10, [esp+028h] ; r10 = nValue2
- add qword ptr [r11 + r8 * 8], r9
- lea r8, [r8+1]
- lea rdx, [rdx-1]
- adc qword ptr [r11 + r8 * 8], r10
- jc next
- ret
- ALIGN 16
- lea r8, [r8+1]
- add qword ptr [r11 + r8 * 8], 1
- jc next
- ret
- dec rdx ; does not modify CY too...
- jnz loop1
- lea rax, [rax+1]
- ret
-ttmath_addindexed2_x64 ENDP
-ttmath_addvector_x64 PROC
- ; rcx = ss1
- ; rdx = ss2
- ; r8 = ss1_size
- ; r9 = ss2_size
- ; [esp+0x28] = result
- mov r10, [esp+028h]
- sub r8, r9
- xor r11, r11 ; r11=0, cf=0
- ALIGN 16
- loop1:
- mov rax, qword ptr [rcx + r11 * 8]
- adc rax, qword ptr [rdx + r11 * 8]
- mov qword ptr [r10 + r11 * 8], rax
- inc r11
- dec r9
- jnz loop1
- adc r9, r9 ; r9 has the cf state
- or r8, r8
- jz done
- neg r9 ; setting cf from r9
- mov r9, 0 ; don't use xor here (cf is used)
- loop2:
- mov rax, qword ptr [rcx + r11 * 8]
- adc rax, r9
- mov qword ptr [r10 + r11 * 8], rax
- inc r11
- dec r8
- jnz loop2
- adc r8, r8
- mov rax, r8
- ret
- mov rax, r9
- ret
-ttmath_addvector_x64 ENDP
-ttmath_sbb_x64 PROC
- ; rcx = p1
- ; rdx = p2
- ; r8 = nCount
- ; r9 = nCarry
- xor rax, rax
- xor r11, r11
- sub rax, r9 ; sets CARRY if r9 != 0
- ALIGN 16
- loop1:
- mov rax,qword ptr [rdx + r11 * 8]
- sbb qword ptr [rcx + r11 * 8], rax
- lea r11, [r11+1]
- dec r8
- jnz loop1
- setc al
- movzx rax, al
- ret
-ttmath_sbb_x64 ENDP
-ttmath_subindexed_x64 PROC
- ; rcx = p1
- ; rdx = nSize
- ; r8 = nPos
- ; r9 = nValue
- sub rdx, r8 ; rdx = remaining count of uints
- ALIGN 16
- sub qword ptr [rcx + r8 * 8], r9
- jnc done
- lea r8, [r8+1]
- mov r9, 1
- dec rdx
- jnz loop1
- mov rax, 1
- ret
- xor rax, rax
- ret
-ttmath_subindexed_x64 ENDP
-; the same asm code as in addvector_x64 only two instructions 'adc' changed to 'sbb'
-ttmath_subvector_x64 PROC
- ; rcx = ss1
- ; rdx = ss2
- ; r8 = ss1_size
- ; r9 = ss2_size
- ; [esp+0x28] = result
- mov r10, [esp+028h]
- sub r8, r9
- xor r11, r11 ; r11=0, cf=0
- ALIGN 16
- loop1:
- mov rax, qword ptr [rcx + r11 * 8]
- sbb rax, qword ptr [rdx + r11 * 8]
- mov qword ptr [r10 + r11 * 8], rax
- inc r11
- dec r9
- jnz loop1
- adc r9, r9 ; r9 has the cf state
- or r8, r8
- jz done
- neg r9 ; setting cf from r9
- mov r9, 0 ; don't use xor here (cf is used)
- loop2:
- mov rax, qword ptr [rcx + r11 * 8]
- sbb rax, r9
- mov qword ptr [r10 + r11 * 8], rax
- inc r11
- dec r8
- jnz loop2
- adc r8, r8
- mov rax, r8
- ret
- mov rax, r9
- ret
-ttmath_subvector_x64 ENDP
-ttmath_rcl_x64 PROC
- ; rcx = p1
- ; rdx = b
- ; r8 = nLowestBit
- mov r11, rcx ; table
- xor r10, r10
- neg r8 ; CY set if r8 <> 0
- ALIGN 16
- rcl qword ptr [r11 + r10 * 8], 1
- lea r10, [r10+1]
- dec rdx
- jnz loop1
- setc al
- movzx rax, al
- ret
-ttmath_rcl_x64 ENDP
-ttmath_rcr_x64 PROC
- ; rcx = p1
- ; rdx = nSize
- ; r8 = nLowestBit
- xor r10, r10
- neg r8 ; CY set if r8 <> 0
- ALIGN 16
- rcr qword ptr -8[rcx + rdx * 8], 1
- dec rdx
- jnz loop1
- setc al
- movzx rax, al
- ret
-ttmath_rcr_x64 ENDP
-ttmath_div_x64 PROC
- ; rcx = &Hi
- ; rdx = &Lo
- ; r8 = nDiv
- mov r11, rcx
- mov r10, rdx
- mov rdx, qword ptr [r11]
- mov rax, qword ptr [r10]
- div r8
- mov qword ptr [r10], rdx ; remainder
- mov qword ptr [r11], rax ; value
- ret
-ttmath_div_x64 ENDP
-ttmath_rcl2_x64 PROC
- ; rcx = p1
- ; rdx = nSize
- ; r8 = bits
- ; r9 = c
- push rbx
- mov r10, rcx ; r10 = p1
- xor rax, rax
- mov rcx, 64
- sub rcx, r8
- mov r11, -1
- shr r11, cl ; r11 = mask
- mov rcx, r8 ; rcx = count of bits
- mov rbx, rax ; rbx = old value = 0
- or r9, r9
- cmovnz rbx, r11 ; if (c) then old value = mask
- mov r9, rax ; r9 = index (0..nSize-1)
- ALIGN 16
- rol qword ptr [r10+r9*8], cl
- mov rax, qword ptr [r10+r9*8]
- and rax, r11
- xor qword ptr [r10+r9*8], rax
- or qword ptr [r10+r9*8], rbx
- mov rbx, rax
- lea r9, [r9+1]
- dec rdx
- jnz loop1
- and rax, 1
- pop rbx
- ret
-ttmath_rcl2_x64 ENDP
-ttmath_rcr2_x64 PROC
- ; rcx = p1
- ; rdx = nSize
- ; r8 = bits
- ; r9 = c
- push rbx
- mov r10, rcx ; r10 = p1
- xor rax, rax
- mov rcx, 64
- sub rcx, r8
- mov r11, -1
- shl r11, cl ; r11 = mask
- mov rcx, r8 ; rcx = count of bits
- mov rbx, rax ; rbx = old value = 0
- or r9, r9
- cmovnz rbx, r11 ; if (c) then old value = mask
- mov r9, rdx ; r9 = index (0..nSize-1)
- lea r9, [r9-1]
- ALIGN 16
- ror qword ptr [r10+r9*8], cl
- mov rax, qword ptr [r10+r9*8]
- and rax, r11
- xor qword ptr [r10+r9*8], rax
- or qword ptr [r10+r9*8], rbx
- mov rbx, rax
- lea r9, [r9-1]
- dec rdx
- jnz loop1
- rol rax, 1
- and rax, 1
- pop rbx
- ret
-ttmath_rcr2_x64 ENDP
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/freebayes.git
More information about the debian-med-commit
mailing list