[med-svn] [r-cran-biasedurn] 05/12: New upstream version 1.07
Andreas Tille
tille at debian.org
Wed Nov 29 13:46:49 UTC 2017
This is an automated email from the git hooks/post-receive script.
tille pushed a commit to branch master
in repository r-cran-biasedurn.
commit 3e7cdbee4892618a85c4ab00687eb414d3b6fab3
Author: Andreas Tille <tille at debian.org>
Date: Wed Nov 29 14:38:43 2017 +0100
New upstream version 1.07
---
DESCRIPTION | 19 +
MD5 | 29 +
NAMESPACE | 41 +
R/urn1.R | 386 +++++++
R/urn2.R | 319 ++++++
build/vignette.rds | Bin 0 -> 204 bytes
debian/changelog | 14 -
debian/compat | 1 -
debian/control | 27 -
debian/copyright | 29 -
debian/rules | 4 -
debian/source/format | 1 -
debian/watch | 2 -
demo/00Index | 5 +
demo/ApproxHypergeo.R | 26 +
demo/CompareHypergeo.R | 25 +
demo/OddsPrecision.R | 21 +
demo/SampleWallenius.R | 36 +
demo/UrnTheory.R | 4 +
inst/doc/UrnTheory.Rtex | 492 +++++++++
inst/doc/UrnTheory.pdf | Bin 0 -> 207777 bytes
man/BiasedUrn-1-Package.Rd | 105 ++
man/BiasedUrn-2-Univariate.Rd | 185 ++++
man/BiasedUrn-3-Multivariate.Rd | 189 ++++
src/Makevars | 3 +
src/erfres.h | 203 ++++
src/fnchyppr.cpp | 738 ++++++++++++++
src/randomc.h | 209 ++++
src/stoc1.cpp | 825 +++++++++++++++
src/stoc3.cpp | 1164 ++++++++++++++++++++++
src/stocR.cpp | 25 +
src/stocR.h | 96 ++
src/stocc.h | 554 +++++++++++
src/urn1.cpp | 1668 +++++++++++++++++++++++++++++++
src/urn2.cpp | 1375 +++++++++++++++++++++++++
src/wnchyppr.cpp | 2100 +++++++++++++++++++++++++++++++++++++++
vignettes/UrnTheory.Rtex | 492 +++++++++
37 files changed, 11334 insertions(+), 78 deletions(-)
diff --git a/DESCRIPTION b/DESCRIPTION
new file mode 100644
index 0000000..e233559
--- /dev/null
+++ b/DESCRIPTION
@@ -0,0 +1,19 @@
+Package: BiasedUrn
+Type: Package
+Title: Biased Urn Model Distributions
+Version: 1.07
+Date: 2015-12-28
+Author: Agner Fog
+Maintainer: Agner Fog <agner at agner.org>
+Description: Statistical models of biased sampling in the form of
+ univariate and multivariate noncentral hypergeometric distributions,
+ including Wallenius' noncentral hypergeometric distribution and
+ Fisher's noncentral hypergeometric distribution
+ (also called extended hypergeometric distribution).
+ See vignette("UrnTheory") for explanation of these distributions.
+License: GPL-3
+URL: http://www.agner.org/random/ http://www.r-project.org
+Packaged: 2015-12-28 07:10:49 UTC; A
+NeedsCompilation: yes
+Repository: CRAN
+Date/Publication: 2015-12-28 09:01:09
diff --git a/MD5 b/MD5
new file mode 100644
index 0000000..b0242ba
--- /dev/null
+++ b/MD5
@@ -0,0 +1,29 @@
+85f839c1984347af7ac5e7627045d6fe *DESCRIPTION
+9fa4a3e4d963627db9d413967e9d9035 *NAMESPACE
+ec4aeb239b877144b70e3f3704367577 *R/urn1.R
+e8c596d16048677b6ffaa18704123b1b *R/urn2.R
+4c54999418c75264f6cde4c74453107b *build/vignette.rds
+e92f1eef3885a16d9434e377add583ff *demo/00Index
+c6b954448030b0e8c07a378b8ce58824 *demo/ApproxHypergeo.R
+0923f2e69cc488f8184b4973dd7cdd82 *demo/CompareHypergeo.R
+adab2da4435745974b8b256e97a50f68 *demo/OddsPrecision.R
+ca257d52b737cf05ab2fc234337f4b43 *demo/SampleWallenius.R
+b3569fe26b9aa87e6704636440db6bc2 *demo/UrnTheory.R
+ce2ab2b8db398976a55e06637399e0de *inst/doc/UrnTheory.Rtex
+69913004cac8403c6cc2f6f6c5cccd89 *inst/doc/UrnTheory.pdf
+a95229ba9b90590be861c30ff444a80d *man/BiasedUrn-1-Package.Rd
+97403bdfab6aaf656a4679416e22a877 *man/BiasedUrn-2-Univariate.Rd
+37777167d425c4814aa40743a600faa0 *man/BiasedUrn-3-Multivariate.Rd
+42ccfaba5f55a5dc2977e86c8f517bd3 *src/Makevars
+1c81532e7a97515f3d4cea2f1b7f4a40 *src/erfres.h
+b66f879a752c7f9b2c015d80de9c5423 *src/fnchyppr.cpp
+d2819bb817f16c170d749808fc8ef0d9 *src/randomc.h
+4bdf94a3ed6df4755cf8b1671f931329 *src/stoc1.cpp
+8cd472f044c6c8f00d39d43769a9add2 *src/stoc3.cpp
+07c99ea27028a3a74080d0ef829bd3e5 *src/stocR.cpp
+b2fb76cd6f7ec73231499db763d364bf *src/stocR.h
+196790e709f48dc5809fe12148d95be5 *src/stocc.h
+645795fc46d1d9c51c8ad2c045544ed3 *src/urn1.cpp
+2408d260c33a7f94bafd55ea7c2bed27 *src/urn2.cpp
+c7ad7f6edc61c320261fcac84d20cb45 *src/wnchyppr.cpp
+ce2ab2b8db398976a55e06637399e0de *vignettes/UrnTheory.Rtex
diff --git a/NAMESPACE b/NAMESPACE
new file mode 100644
index 0000000..ffc1c04
--- /dev/null
+++ b/NAMESPACE
@@ -0,0 +1,41 @@
+useDynLib(BiasedUrn)
+
+# Functions in urn1.R
+export(dFNCHypergeo)
+export(dWNCHypergeo)
+export(pFNCHypergeo)
+export(pWNCHypergeo)
+export(qFNCHypergeo)
+export(qWNCHypergeo)
+export(rFNCHypergeo)
+export(rWNCHypergeo)
+export(meanFNCHypergeo)
+export(meanWNCHypergeo)
+export(varFNCHypergeo)
+export(varWNCHypergeo)
+export(modeFNCHypergeo)
+export(modeWNCHypergeo)
+export(oddsFNCHypergeo)
+export(oddsWNCHypergeo)
+export(numFNCHypergeo)
+export(numWNCHypergeo)
+export(minHypergeo)
+export(maxHypergeo)
+
+# Functions in urn2.R
+export(dMFNCHypergeo)
+export(dMWNCHypergeo)
+export(rMFNCHypergeo)
+export(rMWNCHypergeo)
+export(momentsMFNCHypergeo)
+export(momentsMWNCHypergeo)
+export(meanMFNCHypergeo)
+export(meanMWNCHypergeo)
+export(varMFNCHypergeo)
+export(varMWNCHypergeo)
+export(oddsMFNCHypergeo)
+export(oddsMWNCHypergeo)
+export(numMFNCHypergeo)
+export(numMWNCHypergeo)
+export(minMHypergeo)
+export(maxMHypergeo)
diff --git a/R/urn1.R b/R/urn1.R
new file mode 100644
index 0000000..6a7d0ca
--- /dev/null
+++ b/R/urn1.R
@@ -0,0 +1,386 @@
+# Package BiasedUrn, file urn1.R
+# R interface to univariate noncentral hypergeometric distributions
+
+# *****************************************************************************
+# dFNCHypergeo
+# Mass function, Fisher's NonCentral Hypergeometric distribution
+# *****************************************************************************
+dFNCHypergeo <-
+function(x, m1, m2, n, odds, precision=1E-7) {
+ stopifnot(is.numeric(x), is.numeric(m1), is.numeric(m2),
+ is.numeric(n), is.numeric(odds), is.numeric(precision));
+ .Call("dFNCHypergeo",
+ as.integer(x), # Number of red balls drawn, scalar or vector
+ as.integer(m1), # Number of red balls in urn
+ as.integer(m2), # Number of white balls in urn
+ as.integer(n), # Number of balls drawn from urn
+ as.double(odds), # Odds of getting a red ball among one red and one white
+ as.double(precision), # Precision of calculation
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# dWNCHypergeo
+# Mass function, Wallenius' NonCentral Hypergeometric distribution
+# *****************************************************************************
+dWNCHypergeo <-
+function(x, m1, m2, n, odds, precision=1E-7 ) {
+ stopifnot(is.numeric(x), is.numeric(m1), is.numeric(m2),
+ is.numeric(n), is.numeric(odds), is.numeric(precision));
+ .Call("dWNCHypergeo",
+ as.integer(x), # Number of red balls drawn, scalar or vector
+ as.integer(m1), # Number of red balls in urn
+ as.integer(m2), # Number of white balls in urn
+ as.integer(n), # Number of balls drawn from urn
+ as.double(odds), # Odds of getting a red ball among one red and one white
+ as.double(precision), # Precision of calculation
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# pFNCHypergeo
+# Cumulative distribution function for
+# Fisher's NonCentral Hypergeometric distribution
+# *****************************************************************************
+pFNCHypergeo <-
+function(x, m1, m2, n, odds, precision=1E-7, lower.tail=TRUE) {
+ stopifnot(is.numeric(x), is.numeric(m1), is.numeric(m2), is.numeric(n),
+ is.numeric(odds), is.numeric(precision), is.vector(lower.tail));
+ .Call("pFNCHypergeo",
+ as.integer(x), # Number of red balls drawn, scalar or vector
+ as.integer(m1), # Number of red balls in urn
+ as.integer(m2), # Number of white balls in urn
+ as.integer(n), # Number of balls drawn from urn
+ as.double(odds), # Odds of getting a red ball among one red and one white
+ as.double(precision), # Precision of calculation
+ as.logical(lower.tail), # TRUE: P(X <= x), FALSE: P(X > x)
+ PACKAGE = "BiasedUrn");
+}
+
+# *****************************************************************************
+# pWNCHypergeo
+# Cumulative distribution function for
+# Wallenius' NonCentral Hypergeometric distribution
+# *****************************************************************************
+pWNCHypergeo <-
+function(x, m1, m2, n, odds, precision=1E-7, lower.tail=TRUE) {
+ stopifnot(is.numeric(x), is.numeric(m1), is.numeric(m2), is.numeric(n),
+ is.numeric(odds), is.numeric(precision), is.vector(lower.tail));
+ .Call("pWNCHypergeo",
+ as.integer(x), # Number of red balls drawn, scalar or vector
+ as.integer(m1), # Number of red balls in urn
+ as.integer(m2), # Number of white balls in urn
+ as.integer(n), # Number of balls drawn from urn
+ as.double(odds), # Odds of getting a red ball among one red and one white
+ as.double(precision), # Precision of calculation
+ as.logical(lower.tail), # TRUE: P(X <= x), FALSE: P(X > x)
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# qFNCHypergeo
+# Quantile function for
+# Fisher's NonCentral Hypergeometric distribution.
+# Returns the lowest x for which P(X<=x) >= p when lower.tail = TRUE
+# Returns the lowest x for which P(X >x) <= p when lower.tail = FALSE
+# *****************************************************************************
+# Note: qWNCHypergeo if more accurate than qFNCHypergeo when odds = 1
+qFNCHypergeo <-
+function(p, m1, m2, n, odds, precision=1E-7, lower.tail=TRUE) {
+ stopifnot(is.numeric(p), is.numeric(m1), is.numeric(m2), is.numeric(n),
+ is.numeric(odds), is.numeric(precision), is.vector(lower.tail));
+ .Call("qFNCHypergeo",
+ as.double(p), # Cumulative probability
+ as.integer(m1), # Number of red balls in urn
+ as.integer(m2), # Number of white balls in urn
+ as.integer(n), # Number of balls drawn from urn
+ as.double(odds), # Odds of getting a red ball among one red and one white
+ as.double(precision), # Precision of calculation
+ as.logical(lower.tail), # TRUE: P(X <= x), FALSE: P(X > x)
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# qWNCHypergeo
+# Quantile function for
+# Wallenius' NonCentral Hypergeometric distribution.
+# Returns the lowest x for which P(X<=x) >= p when lower.tail = TRUE
+# Returns the lowest x for which P(X >x) <= p when lower.tail = FALSE
+# *****************************************************************************
+qWNCHypergeo <-
+function(p, m1, m2, n, odds, precision=1E-7, lower.tail=TRUE) {
+ stopifnot(is.numeric(p), is.numeric(m1), is.numeric(m2), is.numeric(n),
+ is.numeric(odds), is.numeric(precision), is.vector(lower.tail));
+ .Call("qWNCHypergeo",
+ as.double(p), # Cumulative probability
+ as.integer(m1), # Number of red balls in urn
+ as.integer(m2), # Number of white balls in urn
+ as.integer(n), # Number of balls drawn from urn
+ as.double(odds), # Odds of getting a red ball among one red and one white
+ as.double(precision), # Precision of calculation
+ as.logical(lower.tail), # TRUE: P(X <= x), FALSE: P(X > x)
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# rFNCHypergeo
+# Random variate generation function for
+# Fisher's NonCentral Hypergeometric distribution.
+# *****************************************************************************
+rFNCHypergeo <-
+function(nran, m1, m2, n, odds, precision=1E-7) {
+ stopifnot(is.numeric(nran), is.numeric(m1), is.numeric(m2),
+ is.numeric(n), is.numeric(odds), is.numeric(precision));
+ .Call("rFNCHypergeo",
+ as.integer(nran), # Number of random variates desired
+ as.integer(m1), # Number of red balls in urn
+ as.integer(m2), # Number of white balls in urn
+ as.integer(n), # Number of balls drawn from urn
+ as.double(odds), # Odds of getting a red ball among one red and one white
+ as.double(precision), # Precision of calculation
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# rWNCHypergeo
+# Random variate generation function for
+# Wallenius' NonCentral Hypergeometric distribution.
+# *****************************************************************************
+rWNCHypergeo <-
+function(nran, m1, m2, n, odds, precision=1E-7) {
+ stopifnot(is.numeric(nran), is.numeric(m1), is.numeric(m2),
+ is.numeric(n), is.numeric(odds), is.numeric(precision));
+ .Call("rWNCHypergeo",
+ as.integer(nran), # Number of random variates desired
+ as.integer(m1), # Number of red balls in urn
+ as.integer(m2), # Number of white balls in urn
+ as.integer(n), # Number of balls drawn from urn
+ as.double(odds), # Odds of getting a red ball among one red and one white
+ as.double(precision), # Precision of calculation
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# meanFNCHypergeo
+# Calculates the mean of
+# Fisher's NonCentral Hypergeometric distribution.
+# *****************************************************************************
+meanFNCHypergeo <- function(
+ m1, # Number of red balls in urn
+ m2, # Number of white balls in urn
+ n, # Number of balls drawn from urn
+ odds, # Odds of getting a red ball among one red and one white
+ precision=1E-7) { # Precision of calculation
+ stopifnot(is.numeric(m1), is.numeric(m2), is.numeric(n),
+ is.numeric(odds), is.numeric(precision));
+ .Call("momentsFNCHypergeo", as.integer(m1), as.integer(m2),
+ as.integer(n), as.double(odds), as.double(precision),
+ as.integer(1), # 1 for mean, 2 for variance
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# meanWNCHypergeo
+# Calculates the mean of
+# Wallenius' NonCentral Hypergeometric distribution.
+# *****************************************************************************
+meanWNCHypergeo <- function(
+ m1, # Number of red balls in urn
+ m2, # Number of white balls in urn
+ n, # Number of balls drawn from urn
+ odds, # Odds of getting a red ball among one red and one white
+ precision=1E-7) { # Precision of calculation
+ stopifnot(is.numeric(m1), is.numeric(m2), is.numeric(n),
+ is.numeric(odds), is.numeric(precision));
+ .Call("momentsWNCHypergeo", as.integer(m1), as.integer(m2),
+ as.integer(n), as.double(odds), as.double(precision),
+ as.integer(1), # 1 for mean, 2 for variance
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# varFNCHypergeo
+# Calculates the variance of
+# Fisher's NonCentral Hypergeometric distribution.
+# *****************************************************************************
+varFNCHypergeo <- function(
+ m1, # Number of red balls in urn
+ m2, # Number of white balls in urn
+ n, # Number of balls drawn from urn
+ odds, # Odds of getting a red ball among one red and one white
+ precision=1E-7) { # Precision of calculation
+ stopifnot(is.numeric(m1), is.numeric(m2), is.numeric(n),
+ is.numeric(odds), is.numeric(precision));
+ .Call("momentsFNCHypergeo", as.integer(m1), as.integer(m2),
+ as.integer(n), as.double(odds), as.double(precision),
+ as.integer(2), # 1 for mean, 2 for variance
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# varWNCHypergeo
+# Calculates the variance of
+# Wallenius' NonCentral Hypergeometric distribution.
+# *****************************************************************************
+varWNCHypergeo <- function(
+ m1, # Number of red balls in urn
+ m2, # Number of white balls in urn
+ n, # Number of balls drawn from urn
+ odds, # Odds of getting a red ball among one red and one white
+ precision=1E-7) { # Precision of calculation
+ stopifnot(is.numeric(m1), is.numeric(m2), is.numeric(n),
+ is.numeric(odds), is.numeric(precision));
+ .Call("momentsWNCHypergeo", as.integer(m1), as.integer(m2),
+ as.integer(n), as.double(odds), as.double(precision),
+ as.integer(2), # 1 for mean, 2 for variance
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# modeFNCHypergeo
+# Calculates the mode of
+# Fisher's NonCentral Hypergeometric distribution.
+# *****************************************************************************
+# Note: The result is exact regardless of the precision parameter.
+# The precision parameter is included only for analogy with modeWNCHypergeo.
+modeFNCHypergeo <- function(
+ m1, # Number of red balls in urn
+ m2, # Number of white balls in urn
+ n, # Number of balls drawn from urn
+ odds, # Odds of getting a red ball among one red and one white
+ precision=0) { # Precision of calculation
+ stopifnot(is.numeric(m1), is.numeric(m2), is.numeric(n),
+ is.numeric(odds));
+ .Call("modeFNCHypergeo", as.integer(m1), as.integer(m2),
+ as.integer(n), as.double(odds),
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# modeWNCHypergeo
+# Calculates the mode of
+# Fisher's NonCentral Hypergeometric distribution.
+# *****************************************************************************
+modeWNCHypergeo <- function(
+ m1, # Number of red balls in urn
+ m2, # Number of white balls in urn
+ n, # Number of balls drawn from urn
+ odds, # Odds of getting a red ball among one red and one white
+ precision=1E-7) { # Precision of calculation
+ stopifnot(is.numeric(m1), is.numeric(m2), is.numeric(n),
+ is.numeric(odds), is.numeric(precision));
+ .Call("modeWNCHypergeo", as.integer(m1), as.integer(m2),
+ as.integer(n), as.double(odds), as.double(precision),
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# oddsFNCHypergeo
+# Estimate odds ratio from mean for
+# Fisher's NonCentral Hypergeometric distribution
+# *****************************************************************************
+# Uses Cornfield's approximation. Specified precision is ignored.
+oddsFNCHypergeo <-
+function(mu, m1, m2, n, precision=0.1) {
+ stopifnot(is.numeric(mu), is.numeric(m1), is.numeric(m2),
+ is.numeric(n), is.numeric(precision));
+ .Call("oddsFNCHypergeo",
+ as.double(mu), # Observed mean of x1
+ as.integer(m1), # Number of red balls in urn
+ as.integer(m2), # Number of white balls in urn
+ as.integer(n), # Number of balls drawn from urn
+ as.double(precision), # Precision of calculation
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# oddsWNCHypergeo
+# Estimate odds ratio from mean for
+# Wallenius' NonCentral Hypergeometric distribution
+# *****************************************************************************
+oddsWNCHypergeo <-
+function(mu, m1, m2, n, precision=0.1) {
+ stopifnot(is.numeric(mu), is.numeric(m1), is.numeric(m2),
+ is.numeric(n), is.numeric(precision));
+ .Call("oddsWNCHypergeo",
+ as.double(mu), # Observed mean of x1
+ as.integer(m1), # Number of red balls in urn
+ as.integer(m2), # Number of white balls in urn
+ as.integer(n), # Number of balls drawn from urn
+ as.double(precision), # Precision of calculation
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# numFNCHypergeo
+# Estimate number of balls of each color from experimental mean for
+# Fisher's NonCentral Hypergeometric distribution
+# *****************************************************************************
+# Uses Cornfield's approximation. Specified precision is ignored.
+numFNCHypergeo <-
+function(mu, n, N, odds, precision=0.1) {
+ stopifnot(is.numeric(mu), is.numeric(n), is.numeric(N),
+ is.numeric(odds), is.numeric(precision));
+ .Call("numFNCHypergeo",
+ as.double(mu), # Observed mean of x1
+ as.integer(n), # Number of balls sampled
+ as.integer(N), # Number of balls in urn before sampling
+ as.double(odds), # Odds of getting a red ball among one red and one white
+ as.double(precision), # Precision of calculation (ignored)
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# numWNCHypergeo
+# Estimate number of balls of each color from experimental mean for
+# Wallenius' NonCentral Hypergeometric distribution
+# *****************************************************************************
+# Uses approximation. Specified precision is ignored.
+numWNCHypergeo <-
+function(mu, n, N, odds, precision=0.1) {
+ stopifnot(is.numeric(mu), is.numeric(n), is.numeric(N),
+ is.numeric(odds), is.numeric(precision));
+ .Call("numWNCHypergeo",
+ as.double(mu), # Observed mean of x1
+ as.integer(n), # Number of balls sampled
+ as.integer(N), # Number of balls in urn before sampling
+ as.double(odds), # Odds of getting a red ball among one red and one white
+ as.double(precision), # Precision of calculation (ignored)
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# minHypergeo
+# Minimum of x for central and noncentral Hypergeometric distributions
+# *****************************************************************************
+minHypergeo <- function(m1, m2, n) {
+ stopifnot(m1>=0, m2>=0, n>=0, n<=m1+m2);
+ max(n-m2, 0);
+}
+
+
+# *****************************************************************************
+# maxHypergeo
+# Maximum of x for central and noncentral Hypergeometric distributions
+# *****************************************************************************
+maxHypergeo <- function(m1, m2, n) {
+ stopifnot(m1>=0, m2>=0, n>=0, n<=m1+m2);
+ min(m1, n);
+}
diff --git a/R/urn2.R b/R/urn2.R
new file mode 100644
index 0000000..b043673
--- /dev/null
+++ b/R/urn2.R
@@ -0,0 +1,319 @@
+# Package BiasedUrn, file urn2.R
+# R interface to multivariate noncentral hypergeometric distributions
+
+# *****************************************************************************
+# dMFNCHypergeo
+# Mass function for
+# Multivariate Fisher's NonCentral Hypergeometric distribution
+# *****************************************************************************
+dMFNCHypergeo <-
+function(
+ x, # Number of balls drawn of each color, vector or matrix
+ m, # Number of balls of each color in urn, vector
+ n, # Number of balls drawn from urn, scalar
+ odds, # Odds for each color, vector
+ precision=1E-7) { # Precision of calculation, scalar
+ stopifnot(is.numeric(x), is.numeric(m), is.numeric(n), is.numeric(odds), is.numeric(precision));
+
+ # Convert x to integer vector or matrix without loosing dimensions:
+ if (is.matrix(x)) {
+ xx <- matrix(as.integer(x), nrow=dim(x)[1], ncol=dim(x)[2]);
+ }
+ else {
+ xx <- as.integer(x);
+ }
+ .Call("dMFNCHypergeo", xx, as.integer(m), as.integer(n),
+ as.double(odds), as.double(precision), PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# dMWNCHypergeo
+# Mass function for
+# Multivariate Wallenius' NonCentral Hypergeometric distribution
+# *****************************************************************************
+dMWNCHypergeo <-
+function(
+ x, # Number of balls drawn of each color, vector or matrix
+ m, # Number of balls of each color in urn, vector
+ n, # Number of balls drawn from urn, scalar
+ odds, # Odds for each color, vector
+ precision=1E-7) { # Precision of calculation, scalar
+ stopifnot(is.numeric(x), is.numeric(m), is.numeric(n), is.numeric(odds), is.numeric(precision));
+
+ # Convert x to integer vector or matrix without loosing dimensions:
+ if (is.matrix(x)) {
+ xx <- matrix(as.integer(x), nrow=dim(x)[1], ncol=dim(x)[2]);
+ }
+ else {
+ xx <- as.integer(x);
+ }
+ .Call("dMWNCHypergeo", xx, as.integer(m), as.integer(n),
+ as.double(odds), as.double(precision), PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# rMFNCHypergeo
+# Random variate generation function for
+# Multivariate Fisher's NonCentral Hypergeometric distribution.
+# *****************************************************************************
+rMFNCHypergeo <-
+function(nran, m, n, odds, precision=1E-7) {
+ stopifnot(is.numeric(nran), is.numeric(m),
+ is.numeric(n), is.numeric(odds), is.numeric(precision));
+ .Call("rMFNCHypergeo",
+ as.integer(nran), # Number of random variates desired, scalar
+ as.integer(m), # Number of balls of each color in urn, vector
+ as.integer(n), # Number of balls drawn from urn, scalar
+ as.double(odds), # Odds for each color, vector
+ as.double(precision), # Precision of calculation, scalar
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# rMWNCHypergeo
+# Random variate generation function for
+# Multivariate Wallenius' NonCentral Hypergeometric distribution.
+# *****************************************************************************
+rMWNCHypergeo <-
+function(nran, m, n, odds, precision=1E-7) {
+ stopifnot(is.numeric(nran), is.numeric(m),
+ is.numeric(n), is.numeric(odds), is.numeric(precision));
+ .Call("rMWNCHypergeo",
+ as.integer(nran), # Number of random variates desired, scalar
+ as.integer(m), # Number of balls of each color in urn, vector
+ as.integer(n), # Number of balls drawn from urn, scalar
+ as.double(odds), # Odds for each color, vector
+ as.double(precision), # Precision of calculation, scalar
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# momentsMFNCHypergeo
+# Calculates the mean and variance of the
+# Multivariate Fisher's NonCentral Hypergeometric distribution.
+# Results are returned as a data frame.
+# *****************************************************************************
+momentsMFNCHypergeo <- function(
+ m, # Number of balls of each color in urn, vector
+ n, # Number of balls drawn from urn, scalar
+ odds, # Odds for each color, vector
+ precision = 0.1) { # Precision of calculation, scalar
+ stopifnot(is.numeric(m), is.numeric(n),
+ is.numeric(odds), is.numeric(precision));
+ res <- .Call("momentsMFNCHypergeo", as.integer(m),
+ as.integer(n), as.double(odds), as.double(precision),
+ PACKAGE = "BiasedUrn");
+ # Convert result to data frame
+ colnames(res) <- list("xMean","xVariance")
+ as.data.frame(res);
+}
+
+
+# *****************************************************************************
+# momentsMWNCHypergeo
+# Calculates the mean and variance of the
+# Multivariate Wallenius' NonCentral Hypergeometric distribution.
+# Results are returned as a data frame.
+# *****************************************************************************
+momentsMWNCHypergeo <- function(
+ m, # Number of balls of each color in urn, vector
+ n, # Number of balls drawn from urn, scalar
+ odds, # Odds for each color, vector
+ precision = 0.1) { # Precision of calculation, scalar
+ stopifnot(is.numeric(m), is.numeric(n),
+ is.numeric(odds), is.numeric(precision));
+ res <- .Call("momentsMWNCHypergeo", as.integer(m),
+ as.integer(n), as.double(odds), as.double(precision),
+ PACKAGE = "BiasedUrn");
+ # Convert result to data frame
+ colnames(res) <- list("xMean","xVariance")
+ as.data.frame(res);
+}
+
+
+# *****************************************************************************
+# meanMFNCHypergeo
+# Calculates the mean of the
+# Multivariate Fisher's NonCentral Hypergeometric distribution.
+# *****************************************************************************
+meanMFNCHypergeo <- function(
+ m, # Number of balls of each color in urn, vector
+ n, # Number of balls drawn from urn, scalar
+ odds, # Odds for each color, vector
+ precision = 0.1) { # Precision of calculation, scalar
+ momentsMFNCHypergeo(m, n, odds, precision)$xMean
+}
+
+
+# *****************************************************************************
+# meanMWNCHypergeo
+# Calculates the mean of the
+# Multivariate Wallenius' NonCentral Hypergeometric distribution.
+# *****************************************************************************
+meanMWNCHypergeo <- function(
+ m, # Number of balls of each color in urn, vector
+ n, # Number of balls drawn from urn, scalar
+ odds, # Odds for each color, vector
+ precision = 0.1) { # Precision of calculation, scalar
+ momentsMWNCHypergeo(m, n, odds, precision)$xMean
+}
+
+
+# *****************************************************************************
+# varMFNCHypergeo
+# Calculates the variance of the
+# Multivariate Fisher's NonCentral Hypergeometric distribution.
+# *****************************************************************************
+varMFNCHypergeo <- function(
+ m, # Number of balls of each color in urn, vector
+ n, # Number of balls drawn from urn, scalar
+ odds, # Odds for each color, vector
+ precision = 0.1) { # Precision of calculation, scalar
+ momentsMFNCHypergeo(m, n, odds, precision)$xVariance
+}
+
+
+# *****************************************************************************
+# varMWNCHypergeo
+# Calculates the variance of the
+# Multivariate Wallenius' NonCentral Hypergeometric distribution.
+# *****************************************************************************
+varMWNCHypergeo <- function(
+ m, # Number of balls of each color in urn, vector
+ n, # Number of balls drawn from urn, scalar
+ odds, # Odds for each color, vector
+ precision = 0.1) { # Precision of calculation, scalar
+ momentsMWNCHypergeo(m, n, odds, precision)$xVariance
+}
+
+
+# *****************************************************************************
+# oddsMFNCHypergeo
+# Estimate odds ratio from mean for the
+# Multivariate Fisher's NonCentral Hypergeometric distribution
+# *****************************************************************************
+# Uses Cornfield's approximation. Specified precision is ignored.
+oddsMFNCHypergeo <-
+function(mu, m, n, precision=0.1) {
+ stopifnot(is.numeric(mu), is.numeric(m), is.numeric(n), is.numeric(precision));
+ # Convert mu to double vector or matrix without loosing dimensions:
+ if (is.matrix(mu)) {
+ mux <- matrix(as.double(mu), nrow=dim(mu)[1], ncol=dim(mu)[2]);
+ }
+ else {
+ mux <- as.double(mu);
+ }
+ .Call("oddsMFNCHypergeo",
+ mux, # Observed mean of each x, vector
+ as.integer(m), # Number of balls of each color in urn, vector
+ as.integer(n), # Number of balls drawn from urn, scalar
+ as.double(precision), # Precision of calculation, scalar
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# oddsMWNCHypergeo
+# Estimate odds ratio from mean for the
+# Multivariate Wallenius' NonCentral Hypergeometric distribution
+# *****************************************************************************
+# Uses approximation. Specified precision is ignored.
+oddsMWNCHypergeo <-
+function(mu, m, n, precision=0.1) {
+ stopifnot(is.numeric(mu), is.numeric(m), is.numeric(n), is.numeric(precision));
+ # Convert mu to double vector or matrix without loosing dimensions:
+ if (is.matrix(mu)) {
+ mux <- matrix(as.double(mu), nrow=dim(mu)[1], ncol=dim(mu)[2]);
+ }
+ else {
+ mux <- as.double(mu);
+ }
+ .Call("oddsMWNCHypergeo",
+ mux, # Observed mean of each x, vector
+ as.integer(m), # Number of balls of each color in urn, vector
+ as.integer(n), # Number of balls drawn from urn, scalar
+ as.double(precision), # Precision of calculation, scalar
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# numMFNCHypergeo
+# Estimate number of balls of each color from experimental mean for
+# Multivariate Fisher's NonCentral Hypergeometric distribution
+# *****************************************************************************
+# Uses Cornfield's approximation. Specified precision is ignored.
+numMFNCHypergeo <-
+function(mu, n, N, odds, precision=0.1) {
+ stopifnot(is.numeric(mu), is.numeric(n), is.numeric(N), is.numeric(odds), is.numeric(precision));
+ # Convert mu to double vector or matrix without loosing dimensions:
+ if (is.matrix(mu)) {
+ mux <- matrix(as.double(mu), nrow=dim(mu)[1], ncol=dim(mu)[2]);
+ }
+ else {
+ mux <- as.double(mu);
+ }
+ .Call("numMFNCHypergeo",
+ mux, # Observed mean of each x, vector
+ as.integer(n), # Number of balls drawn from urn, scalar
+ as.integer(N), # Number of balls in urn before sampling, scalar
+ as.double(odds), # Odds for each color, vector
+ as.double(precision), # Precision of calculation, scalar (ignored)
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# numMWNCHypergeo
+# Estimate number of balls of each color from experimental mean for
+# Multivariate Wallenius' NonCentral Hypergeometric distribution
+# *****************************************************************************
+# Uses approximation. Specified precision is ignored.
+numMWNCHypergeo <-
+function(mu, n, N, odds, precision=0.1) {
+ stopifnot(is.numeric(mu), is.numeric(n), is.numeric(N), is.numeric(odds), is.numeric(precision));
+ # Convert mu to double vector or matrix without loosing dimensions:
+ if (is.matrix(mu)) {
+ mux <- matrix(as.double(mu), nrow=dim(mu)[1], ncol=dim(mu)[2]);
+ }
+ else {
+ mux <- as.double(mu);
+ }
+ .Call("numMWNCHypergeo",
+ mux, # Observed mean of each x, vector
+ as.integer(n), # Number of balls drawn from urn, scalar
+ as.integer(N), # Number of balls in urn before sampling, scalar
+ as.double(odds), # Odds for each color, vector
+ as.double(precision), # Precision of calculation, scalar (ignored)
+ PACKAGE = "BiasedUrn");
+}
+
+
+# *****************************************************************************
+# minMHypergeo
+# Minimum of x for central and noncentral
+# Multivariate Hypergeometric distributions
+# *****************************************************************************
+# m = Number of balls of each color in urn, vector
+# n = Number of balls drawn from urn, scalar
+minMHypergeo <- function(m, n) {
+ stopifnot(m>=0, n>=0, n<=sum(m));
+ pmax(n - sum(m) + m, 0);
+}
+
+
+# *****************************************************************************
+# maxMHypergeo
+# Maximum of x for central and noncentral
+# Multivariate Hypergeometric distributions
+# *****************************************************************************
+# m = Number of balls of each color in urn, vector
+# n = Number of balls drawn from urn, scalar
+maxMHypergeo <- function(m, n) {
+ stopifnot(m>=0, n>=0, n<=sum(m));
+ pmin(m, n);
+}
diff --git a/build/vignette.rds b/build/vignette.rds
new file mode 100644
index 0000000..a031dc4
Binary files /dev/null and b/build/vignette.rds differ
diff --git a/debian/changelog b/debian/changelog
deleted file mode 100644
index 2aec403..0000000
--- a/debian/changelog
+++ /dev/null
@@ -1,14 +0,0 @@
-r-cran-biasedurn (1.07-1) unstable; urgency=medium
-
- * New upstream version
- * Convert to dh-r
- * Canonical homepage for CRAN
- * d/watch: version=4
-
- -- Andreas Tille <tille at debian.org> Sun, 13 Nov 2016 18:35:29 +0100
-
-r-cran-biasedurn (1.06.1-1) unstable; urgency=low
-
- * Initial release (closes: #804216)
-
- -- Andreas Tille <tille at debian.org> Fri, 06 Nov 2015 11:33:58 +0100
diff --git a/debian/compat b/debian/compat
deleted file mode 100644
index ec63514..0000000
--- a/debian/compat
+++ /dev/null
@@ -1 +0,0 @@
-9
diff --git a/debian/control b/debian/control
deleted file mode 100644
index cc3eba9..0000000
--- a/debian/control
+++ /dev/null
@@ -1,27 +0,0 @@
-Source: r-cran-biasedurn
-Maintainer: Debian Med Packaging Team <debian-med-packaging at lists.alioth.debian.org>
-Uploaders: Andreas Tille <tille at debian.org>
-Section: gnu-r
-Priority: optional
-Build-Depends: debhelper (>= 9),
- dh-r,
- r-base-dev
-Standards-Version: 3.9.8
-Vcs-Browser: https://anonscm.debian.org/viewvc/debian-med/trunk/packages/R/r-cran-biasedurn/trunk
-Vcs-Svn: svn://anonscm.debian.org/debian-med/trunk/packages/R/r-cran-biasedurn/trunk/
-Homepage: https://cran.r-project.org/package=BiasedUrn
-
-Package: r-cran-biasedurn
-Architecture: any
-Depends: ${shlibs:Depends},
- ${misc:Depends},
- ${R:Depends}
-Recommends: ${R:Recommends}
-Suggests: ${R:Suggests}
-Description: GNU R Biased Urn model distributions
- Statistical models of biased sampling in the form of univariate and
- multivariate noncentral hypergeometric distributions, including
- Wallenius' noncentral hypergeometric distribution and Fisher's
- noncentral hypergeometric distribution (also called extended
- hypergeometric distribution). See vignette("UrnTheory") for explanation
- of these distributions.
diff --git a/debian/copyright b/debian/copyright
deleted file mode 100644
index b8effbc..0000000
--- a/debian/copyright
+++ /dev/null
@@ -1,29 +0,0 @@
-Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
-Upstream-Name: BiasedUrn
-Upstream-Contact: Agner Fog <agner at agner.org>
-Source: https://cran.r-project.org/package=BiasedUrn
-
-Files: *
-Copyright: 2012-2016 Agner Fog <agner at agner.org>
-License: GPL-3
-
-Files: debian/*
-Copyright: 2015-2016 Andreas Tille <tille at debian.org>
-License: GPL-3
-
-License: GPL-3
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, version 3 of the License.
- .
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- .
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- .
- On Debian systems, the complete text of the GNU General Public
- License version 3 can be found in `/usr/share/common-licenses/GPL-3'.
-
diff --git a/debian/rules b/debian/rules
deleted file mode 100755
index 68d9a36..0000000
--- a/debian/rules
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/usr/bin/make -f
-
-%:
- dh $@ --buildsystem R
diff --git a/debian/source/format b/debian/source/format
deleted file mode 100644
index 163aaf8..0000000
--- a/debian/source/format
+++ /dev/null
@@ -1 +0,0 @@
-3.0 (quilt)
diff --git a/debian/watch b/debian/watch
deleted file mode 100644
index 467d58c..0000000
--- a/debian/watch
+++ /dev/null
@@ -1,2 +0,0 @@
-version=4
-http://cran.r-project.org/src/contrib/BiasedUrn_([-\d.]+)\.tar\.gz
diff --git a/demo/00Index b/demo/00Index
new file mode 100644
index 0000000..1b65e5e
--- /dev/null
+++ b/demo/00Index
@@ -0,0 +1,5 @@
+UrnTheory Vignette explaining the distributions of biased sampling
+CompareHypergeo Compares different noncentral hypergeometric distributions
+ApproxHypergeo Compares different noncentral hypergeometric distributions with same mean rather than same odds
+OddsPrecision Measures precision of odds function
+SampleWallenius Makes random variates from Wallenius noncentral hypergeometric distribution
diff --git a/demo/ApproxHypergeo.R b/demo/ApproxHypergeo.R
new file mode 100644
index 0000000..8cb99c0
--- /dev/null
+++ b/demo/ApproxHypergeo.R
@@ -0,0 +1,26 @@
+# ApproxHypergeo.R
+# This demo compares a Wallenius' and a Fisher's noncentral hypergeometric
+# distribution with the same mean rather than the same odds in order to
+# make them approximate each other better.
+
+require(BiasedUrn)
+require(stats)
+
+ApproxHypPlot <- function(m1, m2, n, w.odds) {
+ xmin <- minHypergeo(m1, m2, n)
+ xmax <- maxHypergeo(m1, m2, n)
+ x <- xmin : xmax
+ w.mean <- meanWNCHypergeo(m1, m2, n, w.odds)
+ f.odds <- oddsFNCHypergeo(w.mean, m1, m2, n)
+ wnc <- dWNCHypergeo(x, m1, m2, n, w.odds)
+ fnc <- dFNCHypergeo(x, m1, m2, n, f.odds)
+ fnc0 <- dFNCHypergeo(x, m1, m2, n, w.odds)
+ plot (x, fnc, type="l", col="red",
+ main = "Hypergeometric distributions",
+ sub = "Blue = Wallenius, Red = Fisher w. same mean,\n Green = Fisher w. same odds",
+ xlab = "", ylab = "Probability")
+ points (x, wnc, type="l", col="blue")
+ points (x, fnc0, type="l", col="green", lty="dashed")
+}
+
+ApproxHypPlot(80, 60, 100, 0.5)
diff --git a/demo/CompareHypergeo.R b/demo/CompareHypergeo.R
new file mode 100644
index 0000000..3623bba
--- /dev/null
+++ b/demo/CompareHypergeo.R
@@ -0,0 +1,25 @@
+# CompareHypergeo.R
+# This demo shows the difference between the three distributions:
+# 1. Wallenius' noncentral hypergeometric distribution
+# 2. Fisher's noncentral hypergeometric distribution
+# 3. The (central) hypergeometric distribution
+
+require(BiasedUrn)
+require(stats)
+
+ComparePlot <- function(m1, m2, n, odds) {
+ xmin <- minHypergeo(m1, m2, n)
+ xmax <- maxHypergeo(m1, m2, n)
+ x <- xmin : xmax
+ wnc <- dWNCHypergeo(x, m1, m2, n, odds)
+ fnc <- dFNCHypergeo(x, m1, m2, n, odds)
+ hyp <- dhyper(x, m1, m2, n)
+ plot (x, wnc, type="l", col="blue",
+ main = "Hypergeometric distributions",
+ sub = "Blue = Wallenius, Red = Fisher, Green = Central",
+ xlab = "x", ylab = "Probability")
+ points (x, fnc, type="l", col="red")
+ points (x, hyp, type="l", col="green")
+}
+
+ComparePlot(80, 60, 100, 0.5)
diff --git a/demo/OddsPrecision.R b/demo/OddsPrecision.R
new file mode 100644
index 0000000..0a4098a
--- /dev/null
+++ b/demo/OddsPrecision.R
@@ -0,0 +1,21 @@
+# OddsPrecision.R
+# This demo tests the precision of the odds functions for
+# Wallenius' and a Fisher's noncentral hypergeometric distributions
+# by calculating the mean of distributions with known odds and then
+# estimating the odds from the means.
+
+require(BiasedUrn)
+require(stats)
+
+OddsEst <- function(m1, m2, n, odds) {
+ meanW <- meanWNCHypergeo(m1, m2, n, odds, 1E-9)
+ oddsEstW <- oddsWNCHypergeo(meanW, m1, m2, n)
+ meanF <- meanFNCHypergeo(m1, m2, n, odds, 1E-9)
+ oddsEstF <- oddsFNCHypergeo(meanF, m1, m2, n)
+ list(Odds=odds, Wallenius.mean = meanW, Fisher.mean = meanF,
+ Wallenius.estimated.odds = oddsEstW, Fisher.estimated.odds = oddsEstF,
+ Wallenius.rel.error = (oddsEstW-odds)/odds,
+ Fisher.rel.error = (oddsEstF-odds)/odds)
+}
+
+OddsEst(10, 12, 15, 0.6)
diff --git a/demo/SampleWallenius.R b/demo/SampleWallenius.R
new file mode 100644
index 0000000..28116a1
--- /dev/null
+++ b/demo/SampleWallenius.R
@@ -0,0 +1,36 @@
+# SampleWallenius.R
+# This demo makes random samples from Wallenius' noncentral hypergeometric
+# distribution and compares measured and expected frequencies
+
+require(BiasedUrn)
+require(stats)
+
+MakeSamples <- function(m1, m2, n, odds) {
+ nsamp <- 100000 # Desired number of samples from distribution
+ xmin <- minHypergeo(m1, m2, n) # Lower limit for x
+ xmax <- maxHypergeo(m1, m2, n) # Upper limit for x
+
+ # Make nsamp samples from Wallenius' distribution
+ X <- rWNCHypergeo(nsamp, m1, m2, n, odds)
+
+ # Get table of frequencies
+ XTab <- as.data.frame(table(X))
+
+ # Relative frequencies
+ XTab$Freq <- XTab$Freq / nsamp
+
+ # Get expected frequencies
+ XTab$Expected <- dWNCHypergeo(as.integer(levels(XTab$X)), m1, m2, n, odds)
+
+ print("X frequencies in Wallenius' noncentral hypergeometric distribution")
+
+ # List measured vs. expected frequencies
+ # (How do I get rid of the row names?)
+ print(XTab, digits=5)
+
+ # Draw histogram
+ # (Why does my histogram show densities bigger than 1?)
+ hist(X, freq=FALSE)
+}
+
+MakeSamples(6, 8, 5, 1.5)
\ No newline at end of file
diff --git a/demo/UrnTheory.R b/demo/UrnTheory.R
new file mode 100644
index 0000000..3de977a
--- /dev/null
+++ b/demo/UrnTheory.R
@@ -0,0 +1,4 @@
+# UrnTheory.R
+# This opens the file UrnTheory.pdf to explain the biased urn models.
+
+vignette("UrnTheory", package="BiasedUrn")
diff --git a/inst/doc/UrnTheory.Rtex b/inst/doc/UrnTheory.Rtex
new file mode 100644
index 0000000..2e8b52b
--- /dev/null
+++ b/inst/doc/UrnTheory.Rtex
@@ -0,0 +1,492 @@
+\documentclass[a4paper]{article}
+
+% Note: Remember to edit the .Snw file, not the .tex file!
+
+%\VignetteIndexEntry{Biased Urn Theory}
+%\VignettePackage{BiasedUrn}
+
+\usepackage{amsmath}
+\usepackage{amssymb}
+%
+% \usepackage{c:/R/share/texmf/Sweave}
+\usepackage{Sweave}
+\begin{document}
+
+\title{Biased Urn Theory}
+\author{Agner Fog}
+
+\maketitle
+
+\section{Introduction}
+%
+Two different probability distributions are both known in the
+literature as ``the'' noncentral hypergeometric distribution. These
+two distributions will be called Fisher's and Wallenius' noncentral
+hypergeometric distribution, respectively.
+
+Both distributions can be associated with the classical experiment
+of taking colored balls at random from an urn without replacement.
+If the experiment is unbiased then the result will follow the well-known
+hypergeometric distribution. If the balls have different size or weight
+or whatever so that balls of one color have a higher probability of being
+taken than balls of another color then the result will be a
+noncentral hypergeometric distribution.
+
+The distribution depends on how the balls are taken from the urn.
+Wallenius' noncentral hypergeometric distribution is obtained if $n$
+balls are taken one by one. Fisher's noncentral hypergeometric
+distribution is obtained if balls are taken independently of each other.
+
+Wallenius' distribution is used in models of natural selection and biased
+sampling. Fisher's distribution is used mainly for statistical tests in
+contingency tables. Both distributions are supported in the {\tt BiasedUrn}
+package.
+
+The difference between the two noncentral hypergeometric distributions
+is difficult to understand. I am therefore providing a detailed
+explanation in the following sections.
+
+
+\section{Definition of Wallenius' noncentral hypergeometric distribution}
+%
+Assume that an urn contains $N$ balls of $c$ different colors and let
+$m_i$ be the number of balls of color $i$. Balls of color $i$ have the
+weight $\omega_i$. $n$ balls are drawn from the urn, one by one, in
+such a way that the probability of taking a particular ball at a
+particular draw is equal to this ball's fraction of the total weight of
+all balls that lie in the urn at this moment.
+
+The colors of the $n$ balls that are taken in this way will follow Wallenius'
+noncentral hypergeometric distribution. This distribution has the
+probability mass function:
+%
+$$
+\operatorname{dMWNCHypergeo}(\boldsymbol{x};\boldsymbol{m},n,\boldsymbol{\omega})
+\:=\:
+\left( \prod_{i=1}^c \binom{m_i}{x_i} \right)
+\: \int_0^1 \prod_{i=1}^c
+(1-t^{{\omega_i}/{d}})^{x_i} \, \mathrm{d}t \;,
+$$
+%
+$$
+\text{where } \: d \:=\:
+\sum_{i=1}^c \omega_i(m_i-x_i) \,.
+$$
+%
+$\boldsymbol{x}=(x_1,x_2,\ldots,x_c)$
+is the number of balls drawn of each color.\\
+$\boldsymbol{m}=(m_1,m_2,\ldots,m_c)$
+is the initial number of balls of each color in the urn.\\
+$\boldsymbol{\omega}=(\omega_1,\omega_2,\ldots,\omega_c)$
+is the weight or odds of balls of each color.\\
+$n = \sum_{i=1}^c x_i$ is the total number of balls drawn.\\
+$c$ is the number of colors. The unexpected integral in this
+formula arises as the solution to a difference equation.
+(The above formula is invalid in the trivial case $n = N$.)
+
+
+\section{Definition of Fisher's noncentral hypergeometric distribution}
+%
+If the colored balls are taken from the urn in such a way that
+the probability of taking a particular ball of color $i$ is
+proportional to its weight $\omega_i$ and the probability for
+each particular ball is independent of what happens to the
+other balls, then the number of balls taken will follow a
+binomial distribution for each color.
+
+The total number of balls taken $n = \sum_{i=1}^c x_i$ is
+necessarily random and unknown prior to the experiment.
+After the experiment, we can determine $n$ and calculate the
+distribution of colors for the given value of $n$.
+This is Fisher's noncentral hypergeometric distribution, which
+is defined as the distribution of independent binomial variates
+conditional upon their sum $n$.
+
+The probability mass function of Fisher's noncentral hypergeometric
+distribution is given by
+%
+$$
+\operatorname{dMFNCHypergeo}(\boldsymbol{x};\boldsymbol{m},n,\boldsymbol{\omega})
+\:=\:
+\frac{\textrm{g}(\boldsymbol{x};\boldsymbol{m},n,\boldsymbol{\omega})}
+{\sum\limits_{\boldsymbol{y}\in \: \Xi}
+\textrm{g}(\boldsymbol{y};\boldsymbol{m},n,\boldsymbol{\omega})}\:,
+$$
+%
+$$
+\text{where } \: \textrm{g}(\boldsymbol{x};\boldsymbol{m},n,\boldsymbol{\omega})
+\:=\: \prod_{i=1}^c
+\binom{m_i}{x_i}\omega_i^{\,x_i}\:,
+$$
+%
+$$
+\text{and the domain }\: \Xi \:=\: \left\{\boldsymbol{x}\in\mathbb{Z}^c \,\middle|\,
+\sum_{i=1}^c x_i = n \: \wedge \:
+\forall\, i \in [1,c] \: : \: 0 \leq x_i \leq m_i \right\}\:.
+$$
+
+
+\section{Univariate distributions}
+%
+The univariate distributions are used when the number of colors
+$c$ is $2$. The multivariate distributions are used when the number
+of colors is more than $2$.
+
+The above formulas apply to any number of colors $c$. The univariate
+distributions can be expressed by setting
+$c=2$, $\:x_1=x$, $\:x_2=n-x$, $\:m_1=m$, $\:m_2=N-m$,
+$\:\omega_1=\omega$, $\:\omega_2=1$
+in the above formulas.
+
+
+\section{Name confusion}
+Wallenius' and Fisher's distribution are both known in the literature
+as ``the'' noncentral hypergeometric distribution. Fisher's distribution
+was first given the name extended hypergeometric distribution, but
+some scientists are strongly opposed to using this name.
+
+There is a widespread confusion in the literature because these two
+distributions have been given the same name and because it is not obvious that
+they are different. Several publications have used the wrong distribution
+or erroneously assumed that the two distributions were identical.
+
+I am therefore recommending to use the prefixes Wallenius' and Fisher's
+to distinguish the two noncentral hypergeometric distributions. While this
+makes the names rather long, it has the advantage of emphasizing that there
+is more than one noncentral hypergeometric distribution, whereby the
+risk of confusion is minimized.
+Wallenius and Fisher are the names of the scientists who first described each of
+these two distributions.
+
+The following section explains why the two distributions are different and
+how to decide which distribution to use in a specific situation.
+
+
+\section{The difference between the two distributions}
+%
+Both distributions degenerate into the well-known hypergeometric distribution
+when all balls have the same weight. In other words: It doesn't matter how
+the balls are sampled if the balls are unbiased. Only if the urn experiment
+is biased can we get different distributions depending on how the balls are
+sampled.
+
+It is important to understand how this dependence on the sampling procedure
+arises. In the Wallenius model, there is competition between the balls.
+The probability that a particular ball is taken is lower when the other
+balls in the urn are heavier. The probability of taking a particular ball
+at a particular draw is equal to its fraction of the total weight of the
+balls that remain in the urn at that moment. This total weight
+depends on the weight of
+the balls that have been removed in previous draws. Therefore, each draw
+except the first one has a probability distribution that depends on the
+results of the previous draws. The fact that each draw depends on the
+previous draws is what makes Wallenius' distribution unique and makes the
+calculation of it complicated. What happens to each ball depends on what
+has happened to other balls in the preceding draws.
+
+In the Fisher model, there is no such dependence between draws. We may
+as well take all $n$ balls at the same time. Each ball
+has no ``knowledge'' of what happens to the other balls. For the same
+reason, it is impossible to know the value of $n$ before the experiment.
+If we tried to fix the value of $n$ then we would have no way of
+preventing ball number $n+1$ from being taken without violating the principle
+of independence between balls. $n$ is therefore a random variable and
+the Fisher distribution is a conditional distribution which can only
+be determined after the experiment when $n$ is known. The unconditional
+distribution is $c$ independent binomials.
+
+The difference between Wallenius' and Fisher's distributions is low
+when odds ratios are
+near 1, and $n$ is low compared to $N$. The difference between the two
+distributions becomes higher when odds ratios are high and $n$ is near $N$.
+
+Consider the extreme example where an urn contains one red ball with the
+weight 1000, and a thousand white balls each with the weight 1.
+We want to calculate the probability that the red ball is not taken
+when balls are taken one by one.
+The probability that the red ball is not taken in the first draw is
+$\frac{1000}{2000} = \frac 12$. The probability that the red ball is
+not taken in the second draw, under the condition that it was not taken
+in the first draw, is $\frac{999}{1999} \approx \frac 12$.
+The probability that the red ball is
+not taken in the third draw, under the condition that it was not taken
+in the first two draws, is $\frac{998}{1998} \approx \frac 12$.
+Continuing in this way, we can calculate that the probability of not
+taking the red ball in $n$ draws is approximately $2^{-n}$ for moderate
+values of $n$.
+In other words, the probability of not taking a very heavy ball in $n$
+draws falls almost exponentially with $n$ in Wallenius' model.
+The exponential function arises because the probabilities for each draw
+are all multiplied together.
+
+This is not the case in Fisher's model where balls may be taken
+simultaneously. Here the draws are independent
+and the probabilities are therefore not multiplied together. The
+probability of not taking the heavy red ball in Fisher's model is approximately
+$\frac{1}{n+1}$. The two distributions are therefore very different
+in this extreme case.
+\vskip 5mm
+
+The following conditions must be fulfilled for Wallenius' distribution
+to be applicable:
+%
+\begin{itemize}
+%
+\item Items are taken randomly from a finite source containing different
+kinds of items without replacement.
+%
+\item Items are drawn one by one.
+%
+\item The probability of taking a particular item at a particular draw is equal
+to its fraction of the total weight of all items that have not yet been taken at that
+moment. The weight of an item depends only on its kind (color) $i$.
+(It is convenient to use the word ``weight'' for $\omega_i$ even if the
+physical property that determines the odds is something else than weight).
+%
+\item The total number $n$ of items to take is fixed and independent of
+which items happen to be taken.
+%
+\end{itemize}
+\vskip 5mm
+
+The following conditions must be fulfilled for Fisher's distribution
+to be applicable:
+%
+\begin{itemize}
+%
+\item Items are taken randomly from a finite source containing different
+kinds of items without replacement.
+%
+\item Items are taken independently of each other. Whether one item is taken
+is independent of whether another item is taken. Whether one item is taken
+before, after, or simultaneously with another item is irrelevant.
+%
+\item The probability of taking a particular item is proportional to its weight.
+The weight of an item depends only on its kind (color) $i$.
+%
+\item The total number $n$ of items that will be taken is not known
+before the experiment.
+%
+\item $n$ is determined after the experiment and the conditional distribution
+for $n$ known is desired.
+%
+\end{itemize}
+
+
+\section{Examples}
+%
+The following examples will further clarify which distribution to use in different
+situations.
+
+\subsection{Example 1}
+You are catching fish in a small lake that contains a limited number of fish.
+There are different kinds of fish with different weights. The probability of
+catching a particular fish is proportional to its weight when you only catch
+one fish.
+
+You are catching the fish one by one with a fishing rod. You have been ordered
+to catch $n$ fish. You are determined to catch exactly $n$ fish regardless of
+how long time it may take. You are stopping after you have caught $n$ fish
+even if you can see more fish that are tempting you.
+
+This scenario will give a distribution of the types of fish caught that is equal to
+Wallenius' noncentral hypergeometric distribution.
+
+\subsection{Example 2}
+You are catching fish as in example 1, but you are using a big net.
+You are setting up the net one day and coming back the next day to
+remove the net. You count how many fish you have caught and then you go
+home regardless of how many fish you have caught.
+
+Each fish has a probability of getting into the net that is proportional
+to its weight but independent of what happens to the other fish.
+
+This scenario gives Fisher's noncentral hypergeometric distribution after
+$n$ is known.
+
+\subsection{Example 3}
+You are catching fish with a small net. It is possible that more than one
+fish can go into the net at the same time. You are using the net multiple
+times until you have at least $n$ fish.
+
+This scenario gives a distribution that lies between Wallenius' and Fisher's
+distributions. The total number of fish caught can vary if you are getting too
+many fish in the last catch. You may put the excess fish back into the lake,
+but this still doesn't give Wallenius' distribution. This is because you
+are catching multiple fish at the same time. The condition that each catch
+depends on all previous catches does not hold for fish that are caught
+simultaneously or in the same operation.
+
+The resulting distribution will be close to Wallenius' distribution if
+there are only few fish in the net in each catch and you are catching
+many times.
+
+The resulting distribution will be close to Fisher's distribution if
+there are many fish in the net in each catch and you are catching
+few times.
+
+\subsection{Example 4}
+You are catching fish with a big net. Fish are swimming into the net
+randomly in a situation that resembles a Poisson process. You are
+watching the net all the time and take up the net as soon as you have
+caught exactly $n$ fish.
+
+The resulting distribution will be close to Fisher's distribution
+because the fish swim into the net independently of each other.
+But the fates of the fish are not totally independent because
+a particular fish can be saved from getting caught if $n$ other
+fish happen to get into the net before the time that this particular
+fish would have been caught. This is more likely to happen if the other
+fish are heavy than if they are light.
+
+\subsection{Example 5}
+You are catching fish one by one with a fishing rod as in example 1.
+You need a particular amount of fish in order to feed your family.
+You are stopping when the total weight of the fish you have caught
+exceeds a predetermined limit.
+
+The resulting distribution will be close to Wallenius' distribution,
+but not exactly because the decision to stop depends on the weight of
+the fish you have caught so far. $n$ is therefore not known exactly
+before the fishing trip.
+
+\subsection{Conclusion}
+These examples show that the distribution of the types of
+fish you catch depends on the way they are caught. Many situations
+will give a distribution that lies somewhere between Wallenius'
+and Fisher's noncentral hypergeometric distributions.
+
+An interesting consequence of the difference between these two
+distributions is that you will get more of the heavy fish, on average,
+if you catch $n$ fish one by one than if you catch all $n$
+at the same time.
+
+These conclusions can of course be applied to biased sampling of
+other items than fish.
+
+
+\section{Applications}
+%
+The biased urn models can be applied to many different situations
+where items are sampled with bias and without replacement.
+
+\subsection{\tt Calculating probabilities etc.}
+Probabilities, mean and variance can be calculated with the appropriate
+functions. More complicated systems, such as the natural selection
+of animals, can be treated with Monte Carlo simulation, using the
+random variate generating functions.
+
+\subsection{\tt Measuring odds ratios}
+The odds of a sampling process can be measured by an experiment or
+a series of experiments where the number of items sampled of
+each kind (color) is counted.
+
+It is recommended to use sampling with replacement if possible.
+Sampling with replacement makes it possible to use the binomial
+distribution, whereby the calculation of the odds becomes simpler
+and more accurate. If sampling with replacement is not possible,
+then the procedure of sampling without replacement must be
+carefully controlled in order to get a pure Wallenius' distribution
+or a pure Fisher's distribution rather than a mixture of the two,
+as explained in the examples above.
+Use the {\tt odds} functions to calculate the odds ratios from
+experimental values of the mean.
+
+\subsection{\tt Estimating the number of items of a particular kind
+from experimental sampling}
+It is possible to estimate the number of items of a particular kind,
+for example defective items in a production, from biased sampling.
+The traditional procedure is to use unbiased sampling.
+But a model of biased sampling may be used if bias is unavoidable
+or if bias is desired in order to increase the probability of
+detecting e.g. defective items.
+
+It is recommended to use sampling with replacement if possible.
+Sampling with replacement makes it possible to use the binomial
+distribution, whereby the calculation of the number of items
+becomes simpler and more accurate. If sampling with replacement
+is not possible, then the procedure of sampling without
+replacement must be carefully controlled in order to get a pure
+Wallenius' distribution or a pure Fisher's distribution rather
+than a mixture of the two, as explained in the examples above.
+The value of the bias (odds ratio) must be determined before
+the numbers can be calculated.
+
+Use the functions with names beginning with ``{\tt num}'' to
+calculate the number of items of each kind from the result
+of a sampling experiment with known odds ratios.
+
+
+\section{Demos}
+%
+The following demos are included in the {\tt BiasedUrn} package:
+
+\subsection{\tt CompareHypergeo}
+%
+This demo shows the difference between the hypergeometric distribution
+and the two noncentral hypergeometric distributions by plotting
+the probability mass functions.
+
+\subsection{\tt ApproxHypergeo}
+%
+This demo shows shows that the two noncentral hypergeometric
+distributions are approximately equal when the parameters are
+adjusted so that they have the same mean rather than the same odds.
+
+\subsection{\tt OddsPrecision}
+%
+Calculates the precision of the {\tt oddsWNCHypergeo} and {\tt oddsFNCHypergeo}
+functions that are used for estimating the odds from a measured mean.
+
+\subsection{\tt SampleWallenius}
+%
+Makes 100,000 random samples from Wallenius noncentral hypergeometric
+distribution and compares the measured mean with the theoretical mean.
+
+\subsection{\tt UrnTheory}
+%
+Displays this document.
+
+
+\section{Calculation methods}
+%
+The {\tt BiasedUrn} package can calculate the univariate
+and multivariate
+Wallenius' and Fisher's noncentral hypergeometric distributions.
+Several different calculation methods are used, depending on the
+parameters.
+
+The calculation methods and sampling methods are documented at \\
+{\tt http://www.agner.org/random/theory/}.
+
+\section{References}
+
+\noindent Fog, A. (2008a). Calculation Methods for Wallenius'
+Noncentral Hypergeometric Distribution.
+{\it Communications in Statistics, Simulation and Computation}.
+Vol. 37, no. 2, pp 258-273.
+
+\vskip 3mm
+%
+\noindent Fog, A. (2008b). Sampling Methods for Wallenius'
+and Fisher's Noncentral Hypergeometric Distributions.
+{\it Communications in Statistics, Simulation and Computation}.
+Vol. 37, no. 2, pp 241-257.
+
+\vskip 3mm
+%
+\noindent Johnson, N. L., Kemp, A. W. Kotz, S. (2005). {\it
+Univariate Discrete Distributions}. Hoboken, New Jersey: Wiley and
+Sons.
+
+\vskip 3mm
+%
+\noindent McCullagh, P., Nelder, J. A. (1983). {\it Generalized
+Linear Models}. London: Chapman \& Hall.
+
+\vskip 3mm
+%
+\noindent {\tt http://www.agner.org/random/theory/}.
+
+
+\end{document}
diff --git a/inst/doc/UrnTheory.pdf b/inst/doc/UrnTheory.pdf
new file mode 100644
index 0000000..090d5d1
Binary files /dev/null and b/inst/doc/UrnTheory.pdf differ
diff --git a/man/BiasedUrn-1-Package.Rd b/man/BiasedUrn-1-Package.Rd
new file mode 100644
index 0000000..24e75b3
--- /dev/null
+++ b/man/BiasedUrn-1-Package.Rd
@@ -0,0 +1,105 @@
+\name{BiasedUrn-package}
+\alias{BiasedUrn}
+\concept{noncentral hypergeometric distribution}
+\concept{Wallenius' noncentral hypergeometric distribution}
+\concept{Fisher's noncentral hypergeometric distribution}
+\concept{extended hypergeometric distribution}
+\concept{multivariate hypergeometric distribution}
+\concept{biased urn model}
+\concept{biased sampling}
+\concept{evolution by natural selection}
+\docType{package}
+\title{Biased Urn Model Distributions}
+\description{
+Statistical models of biased sampling in the form of univariate and
+multivariate noncentral hypergeometric distributions,
+including Wallenius' noncentral hypergeometric distribution and
+Fisher's noncentral hypergeometric distribution
+(also called extended hypergeometric distribution).
+
+These are distributions that you can get when taking colored balls
+from an urn without replacement, with bias.
+The univariate distributions are used when there are two colors of balls.
+The multivariate distributions are used when there are more
+than two colors of balls.
+
+The (central) univariate and multivariate hypergeometric distribution
+can be obtained by setting \code{odds} = 1.
+
+Please see \code{vignette("UrnTheory")}
+for a definition of these distributions and how
+to decide which distribution to use in a specific case.
+}
+\details{
+\tabular{ll}{
+Package: \tab BiasedUrn\cr
+Type: \tab Package\cr
+Version: \tab 1.06\cr
+Date: \tab 2013-11-06\cr
+License: \tab GPL\cr
+}
+
+\bold{Univariate functions in this package}
+\tabular{lcc}{
+ \tab Wallenius' noncentral hypergeometric \tab Fisher's noncentral hypergeometric \cr
+Probability mass function \tab dWNCHypergeo \tab dFNCHypergeo \cr
+Cumulative distribution function \tab pWNCHypergeo \tab pFNCHypergeo \cr
+Quantile function \tab qWNCHypergeo \tab qFNCHypergeo \cr
+Random variate generation function \tab rWNCHypergeo \tab rFNCHypergeo \cr
+Calculate mean \tab meanWNCHypergeo \tab meanFNCHypergeo \cr
+Calculate variance \tab varWNCHypergeo \tab varFNCHypergeo \cr
+Calculate mode \tab modeWNCHypergeo \tab modeFNCHypergeo \cr
+Estimate odds from mean \tab oddsWNCHypergeo \tab oddsFNCHypergeo \cr
+Estimate number from mean and odds \tab numWNCHypergeo \tab numFNCHypergeo \cr
+Minimum x \tab minHypergeo \tab minHypergeo \cr
+Maximum x \tab maxHypergeo \tab maxHypergeo
+}
+
+\bold{Multivariate functions in this package}
+\tabular{lcc}{
+ \tab Wallenius' noncentral hypergeometric \tab Fisher's noncentral hypergeometric \cr
+Probability mass function \tab dMWNCHypergeo \tab dMFNCHypergeo \cr
+Random variate generation function \tab rMWNCHypergeo \tab rMFNCHypergeo \cr
+Calculate mean \tab meanMWNCHypergeo \tab meanMFNCHypergeo \cr
+Calculate variance \tab varMWNCHypergeo \tab varMFNCHypergeo \cr
+Calculate mean and variance \tab momentsMWNCHypergeo \tab momentsMFNCHypergeo \cr
+Estimate odds from mean \tab oddsMWNCHypergeo \tab oddsMFNCHypergeo \cr
+Estimage number from mean and odds \tab numMWNCHypergeo \tab numMFNCHypergeo \cr
+Minimum x \tab minMHypergeo \tab minMHypergeo \cr
+Maximum x \tab maxMHypergeo \tab maxMHypergeo
+}
+
+}
+\note{The implementation cannot run safely in multiple threads simultaneously
+}
+
+\author{
+Agner Fog
+
+Maintainer: Agner Fog <agner at agner.org>
+}
+
+\references{
+\url{http://www.agner.org/random/}
+}
+
+\keyword{ package }
+\keyword{ distribution }
+\keyword{ univar }
+\keyword{ multivariate }
+
+\seealso{
+\code{vignette("UrnTheory")}
+\cr
+\code{\link{BiasedUrn-Univariate}}.
+\cr
+\code{\link{BiasedUrn-Multivariate}}.
+\cr
+\code{\link{dhyper}}
+\cr
+\code{\link{fisher.test}}
+}
+
+\examples{
+dWNCHypergeo(12, 25, 32, 20, 2.5)
+}
diff --git a/man/BiasedUrn-2-Univariate.Rd b/man/BiasedUrn-2-Univariate.Rd
new file mode 100644
index 0000000..68c6892
--- /dev/null
+++ b/man/BiasedUrn-2-Univariate.Rd
@@ -0,0 +1,185 @@
+\name{BiasedUrn-Univariate}
+\alias{BiasedUrn-Univariate}
+\alias{dWNCHypergeo}
+\alias{dFNCHypergeo}
+\alias{pWNCHypergeo}
+\alias{pFNCHypergeo}
+\alias{qWNCHypergeo}
+\alias{qFNCHypergeo}
+\alias{rWNCHypergeo}
+\alias{rFNCHypergeo}
+\alias{meanWNCHypergeo}
+\alias{meanFNCHypergeo}
+\alias{varWNCHypergeo}
+\alias{varFNCHypergeo}
+\alias{modeWNCHypergeo}
+\alias{modeFNCHypergeo}
+\alias{oddsWNCHypergeo}
+\alias{oddsFNCHypergeo}
+\alias{numWNCHypergeo}
+\alias{numFNCHypergeo}
+\alias{minHypergeo}
+\alias{maxHypergeo}
+
+\title{Biased urn models: Univariate distributions}
+
+\description{
+Statistical models of biased sampling in the form of noncentral
+hypergeometric distributions,
+including Wallenius' noncentral hypergeometric distribution and
+Fisher's noncentral hypergeometric distribution
+(also called extended hypergeometric distribution).
+
+These are distributions that you can get when taking colored balls
+from an urn without replacement, with bias.
+The univariate distributions are used when there are two colors of balls.
+The multivariate distributions are used when there are more
+than two colors of balls.
+
+Please see \code{vignette("UrnTheory")}
+for a definition of these distributions and how
+to decide which distribution to use in a specific case.
+}
+
+\usage{
+dWNCHypergeo(x, m1, m2, n, odds, precision=1E-7)
+dFNCHypergeo(x, m1, m2, n, odds, precision=1E-7)
+pWNCHypergeo(x, m1, m2, n, odds, precision=1E-7, lower.tail=TRUE)
+pFNCHypergeo(x, m1, m2, n, odds, precision=1E-7, lower.tail=TRUE)
+qWNCHypergeo(p, m1, m2, n, odds, precision=1E-7, lower.tail=TRUE)
+qFNCHypergeo(p, m1, m2, n, odds, precision=1E-7, lower.tail=TRUE)
+rWNCHypergeo(nran, m1, m2, n, odds, precision=1E-7)
+rFNCHypergeo(nran, m1, m2, n, odds, precision=1E-7)
+meanWNCHypergeo(m1, m2, n, odds, precision=1E-7)
+meanFNCHypergeo(m1, m2, n, odds, precision=1E-7)
+varWNCHypergeo(m1, m2, n, odds, precision=1E-7)
+varFNCHypergeo(m1, m2, n, odds, precision=1E-7)
+modeWNCHypergeo(m1, m2, n, odds, precision=1E-7)
+modeFNCHypergeo(m1, m2, n, odds, precision=0)
+oddsWNCHypergeo(mu, m1, m2, n, precision=0.1)
+oddsFNCHypergeo(mu, m1, m2, n, precision=0.1)
+numWNCHypergeo(mu, n, N, odds, precision=0.1)
+numFNCHypergeo(mu, n, N, odds, precision=0.1)
+minHypergeo(m1, m2, n)
+maxHypergeo(m1, m2, n)
+}
+
+\arguments{
+\item{x}{Number of red balls sampled.}
+\item{m1}{Initial number of red balls in the urn.}
+\item{m2}{Initial number of white balls in the urn.}
+\item{n}{Total number of balls sampled.}
+\item{N}{Total number of balls in urn before sampling.}
+\item{odds}{Probability ratio of red over white balls.}
+\item{p}{Cumulative probability.}
+\item{nran}{Number of random variates to generate.}
+\item{mu}{Mean x.}
+\item{precision}{Desired precision of calculation.}
+\item{lower.tail}{if TRUE (default), probabilities are
+ \eqn{P(X \le x)}{P(X <= x)}, otherwise, \eqn{P(X > x)}{P(X > x)}.}
+ }
+
+\details{
+\bold{Allowed parameter values} \cr
+All parameters must be non-negative. \code{n} cannot exceed \code{N = m1 + m2}.
+The code has been tested with odds in the range
+\eqn{10^{-9} \ldots 10^9}{1E-9 to 1E9} and zero. The code may work with odds
+outside this range, but errors or NAN can occur for extreme values of odds.
+A ball with odds = 0 is equivalent to no ball.
+\code{mu} must be within the possible range of \code{x}.
+
+\bold{Calculation time} \cr
+The calculation time depends on the specified precision.
+}
+
+\value{
+\code{dWNCHypergeo} and \code{dFNCHypergeo} return the probability mass function for
+Wallenius' and Fisher's noncentral hypergeometric distribution, respectively.
+A single value is returned if \code{x} is a scalar.
+Multiple values are returned if \code{x} is a vector.
+\cr
+
+\code{pWNCHypergeo} and \code{pFNCHypergeo} return the
+cumulative probability function for
+Wallenius' and Fisher's noncentral hypergeometric distribution, respectively.
+A single value is returned if \code{x} is a scalar.
+Multiple values are returned if \code{x} is a vector.
+\cr
+
+\code{qWNCHypergeo} and \code{qFNCHypergeo} return the quantile function for
+Wallenius' and Fisher's noncentral hypergeometric distribution, respectively.
+A single value is returned if \code{p} is a scalar.
+Multiple values are returned if \code{p} is a vector.
+\cr
+
+\code{rWNCHypergeo} and \code{rFNCHypergeo} return
+random variates with Wallenius' and Fisher's noncentral hypergeometric
+distribution, respectively.
+\cr
+
+\code{meanWNCHypergeo} and \code{meanFNCHypergeo} calculate the mean
+of Wallenius' and Fisher's noncentral hypergeometric
+distribution, respectively. A simple and fast approximation is used when
+\eqn{precision \geq 0.1}{precision >= 0.1}.
+\cr
+
+\code{varWNCHypergeo} and \code{varFNCHypergeo} calculate the variance
+of Wallenius' and Fisher's noncentral hypergeometric
+distribution, respectively. A simple and fast approximation is used when
+\eqn{precision \geq 0.1}{precision >= 0.1}.
+\cr
+
+\code{modeWNCHypergeo} and \code{modeFNCHypergeo} calculate the mode
+of Wallenius' and Fisher's noncentral hypergeometric
+distribution, respectively.
+\cr
+
+\code{oddsWNCHypergeo} and \code{oddsFNCHypergeo} estimate the odds
+of Wallenius' and Fisher's noncentral hypergeometric
+distribution from a measured mean.
+A single value is returned if \code{mu} is a scalar.
+Multiple values are returned if \code{mu} is a vector.
+A simple and fast approximation is used regardless of the specified precision.
+Exact calculation is not supported.
+See \code{demo(OddsPrecision)}.
+\cr
+
+\code{numWNCHypergeo} and \code{numFNCHypergeo} estimate the
+number of balls of each color in the urn before sampling from
+an experimental mean and a known odds ratio for
+Wallenius' and Fisher's noncentral hypergeometric distributions.
+The returned numbers \code{m1} and \code{m2} are not integers.
+A vector of \code{m1} and \code{m2} is returned if \code{mu} is a scalar.
+A matrix is returned if \code{mu} is a vector.
+A simple approximation is used regardless of the specified precision.
+Exact calculation is not supported.
+The precision of calculation is indicated by \code{demo(OddsPrecision)}.
+\cr
+
+\code{minHypergeo} and \code{maxHypergeo} calculate the
+minimum and maximum value of \code{x}. The value is valid for
+Wallenius' and Fisher's noncentral hypergeometric distribution
+as well as for the (central) hypergeometric distribution.
+}
+
+\seealso{
+\code{vignette("UrnTheory")}
+\cr
+\code{\link{BiasedUrn-Multivariate}}.
+\cr
+\code{\link{BiasedUrn}}.
+\cr
+\code{\link{fisher.test}}
+}
+
+\examples{
+# get probability
+dWNCHypergeo(12, 25, 32, 20, 2.5)
+}
+
+\references{
+\url{http://www.agner.org/random/}
+}
+
+\keyword{ distribution }
+\keyword{ univar }
diff --git a/man/BiasedUrn-3-Multivariate.Rd b/man/BiasedUrn-3-Multivariate.Rd
new file mode 100644
index 0000000..ed8fdad
--- /dev/null
+++ b/man/BiasedUrn-3-Multivariate.Rd
@@ -0,0 +1,189 @@
+\name{BiasedUrn-Multivariate}
+\alias{BiasedUrn-Multivariate}
+\alias{dMWNCHypergeo}
+\alias{dMFNCHypergeo}
+\alias{rMWNCHypergeo}
+\alias{rMFNCHypergeo}
+\alias{meanMWNCHypergeo}
+\alias{meanMFNCHypergeo}
+\alias{varMWNCHypergeo}
+\alias{varMFNCHypergeo}
+\alias{momentsMWNCHypergeo}
+\alias{momentsMFNCHypergeo}
+\alias{oddsMWNCHypergeo}
+\alias{oddsMFNCHypergeo}
+\alias{numMWNCHypergeo}
+\alias{numMFNCHypergeo}
+\alias{minMHypergeo}
+\alias{maxMHypergeo}
+
+\title{Biased urn models: Multivariate distributions}
+
+\description{
+Statistical models of biased sampling in the form of
+multivariate noncentral hypergeometric distributions,
+including Wallenius' noncentral hypergeometric distribution and
+Fisher's noncentral hypergeometric distribution
+(also called extended hypergeometric distribution).
+
+These are distributions that you can get when taking colored balls
+from an urn without replacement, with bias.
+The univariate distributions are used when there are two colors of balls.
+The multivariate distributions are used when there are more
+than two colors of balls.
+
+Please see \code{vignette("UrnTheory")}
+for a definition of these distributions and how
+to decide which distribution to use in a specific case.
+}
+
+\usage{
+dMWNCHypergeo(x, m, n, odds, precision = 1E-7)
+dMFNCHypergeo(x, m, n, odds, precision = 1E-7)
+rMWNCHypergeo(nran, m, n, odds, precision = 1E-7)
+rMFNCHypergeo(nran, m, n, odds, precision = 1E-7)
+meanMWNCHypergeo(m, n, odds, precision = 0.1)
+meanMFNCHypergeo(m, n, odds, precision = 0.1)
+varMWNCHypergeo(m, n, odds, precision = 0.1)
+varMFNCHypergeo(m, n, odds, precision = 0.1)
+momentsMWNCHypergeo(m, n, odds, precision = 0.1)
+momentsMFNCHypergeo(m, n, odds, precision = 0.1)
+oddsMWNCHypergeo(mu, m, n, precision = 0.1)
+oddsMFNCHypergeo(mu, m, n, precision = 0.1)
+numMWNCHypergeo(mu, n, N, odds, precision = 0.1)
+numMFNCHypergeo(mu, n, N, odds, precision = 0.1)
+minMHypergeo(m, n)
+maxMHypergeo(m, n)
+}
+
+\arguments{
+\item{x}{Number of balls of each color sampled.
+Vector with length = number of colors, or matrix with nrows = number of colors.}
+\item{m}{Initial number of balls of each color in the urn.
+Length of vector = number of colors.}
+\item{n}{Total number of balls sampled. Scalar.}
+\item{N}{Total number of balls in urn before sampling. Scalar.}
+\item{odds}{Odds or weight for each color, arbitrarily scaled. Length of vector = number of colors.
+Gives the (central) multivariate hypergeometric distribution if all odds are equal.}
+\item{nran}{Number of random variates to generate. Scalar.}
+\item{mu}{Mean x for each color. Length of vector = number of colors.}
+\item{precision}{Desired precision of calculation. Scalar.}
+}
+
+\details{
+\bold{Allowed parameter values} \cr
+\code{x}, \code{m}, \code{odds} and \code{mu} are all vectors with one
+element for each color. These vectors must have the same length.
+\code{x} can also be a matrix with one column for each observation.
+The number of rows in this matrix must be equal to the number of colors.
+The maximum number of colors is currently set to 32.
+
+All parameters must be non-negative.
+\code{n} cannot exceed \code{N = sum(m)}.
+The odds may be arbitrarily scaled.
+The code has been tested with odds ratios in the range
+\eqn{10^{-9} \ldots 10^9}{1E-9 to 1E9} and zero.
+The code may work with odds ratios
+outside this range, but errors or NAN can occur for extreme values of odds.
+A ball with odds = 0 is equivalent to no ball.
+\code{mu} must be within the possible range of \code{x}.
+
+\bold{Calculation time} \cr
+The calculation time depends on the specified precision and the number of colors.
+The calculation time can be high for rMWNCHypergeo and rMFNCHypergeo when nran
+is high.
+The calculation time can be extremely high for dMFNCHypergeo when n is high and
+the number of colors is high.
+The calculation time can be extremely high for the mean... var... and moments...
+functions when \code{precision} < 0.1 and n is high and the
+number of colors is high.
+}
+
+\value{
+\code{dMWNCHypergeo} and \code{dMFNCHypergeo} return the probability mass
+function for the multivariate Wallenius' and Fisher's noncentral hypergeometric
+distribution, respectively.
+A single value is returned if \code{x} is a vector with length = number of colors.
+Multiple values are returned if \code{x} is a matrix with one column for each
+observation. The number of rows must be equal to the number of colors.
+\cr
+
+\code{rMWNCHypergeo} and \code{rMFNCHypergeo} return random vectors with
+the multivariate Wallenius' and Fisher's noncentral hypergeometric
+distribution, respectively.
+A vector is returned when \code{nran = 1}. A matrix with one column for each
+observation is returned when \code{nran > 1}.
+\cr
+
+\code{meanMWNCHypergeo} and \code{meanMFNCHypergeo} return the mean
+of the multivariate Wallenius' and Fisher's noncentral hypergeometric
+distribution, respectively. A simple and fast approximation is used when
+\code{precision} >= 0.1. A full calculation of all
+possible x combinations is used when \code{precision} < 0.1.
+This can take extremely long time when the number of colors is high.
+\cr
+
+\code{varMWNCHypergeo} and \code{varMFNCHypergeo} return the variance
+of the multivariate Wallenius' and Fisher's noncentral hypergeometric
+distribution, respectively. A simple and fast approximation is used when
+\code{precision} >= 0.1. A full calculation of all
+possible x combinations is used when \code{precision} < 0.1.
+This can take extremely long time when the number of colors is high.
+\cr
+
+\code{momentsMWNCHypergeo} and \code{momentsMFNCHypergeo} return a data
+frame with the mean and variance of the multivariate Wallenius' and
+Fisher's noncentral hypergeometric distribution, respectively.
+Calculating the mean and variance in the same operation saves time when
+\code{precision} < 0.1.
+\cr
+
+\code{oddsMWNCHypergeo} and \code{oddsMFNCHypergeo} estimate the odds
+from an observed mean for the multivariate Wallenius' and
+Fisher's noncentral hypergeometric distribution, respectively.
+A vector of odds is returned if \code{mu} is a vector.
+A matrix is returned if \code{mu} is a matrix with one row for each color.
+A simple and fast approximation is used regardless of the specified precision.
+Exact calculation is not supported.
+See \code{demo(OddsPrecision)}.
+\cr
+
+\code{numMWNCHypergeo} and \code{numMFNCHypergeo} estimate the
+number of balls of each color in the urn before sampling from
+experimental mean and known odds ratios for
+Wallenius' and Fisher's noncentral hypergeometric distributions.
+The returned \code{m} values are not integers.
+A vector of \code{m} is returned if \code{mu} is a vector.
+A matrix of \code{m} is returned if \code{mu} is a matrix with one row for each color.
+A simple and fast approximation is used regardless of the specified precision.
+Exact calculation is not supported.
+The precision of calculation is indicated by \code{demo(OddsPrecision)}.
+\cr
+
+\code{minMHypergeo} and \code{maxMHypergeo} calculate the
+minimum and maximum value of \code{x} for the multivariate distributions.
+The values are valid for the multivariate Wallenius' and Fisher's noncentral
+hypergeometric distributions as well as for the multivariate (central)
+hypergeometric distribution.
+}
+
+\seealso{
+\code{vignette("UrnTheory")}
+\cr
+\code{\link{BiasedUrn-Univariate}}.
+\cr
+\code{\link{BiasedUrn}}.
+}
+
+\examples{
+# get probability
+dMWNCHypergeo(c(8,10,6), c(20,30,20), 24, c(1.,2.5,1.8))
+}
+
+\references{
+\url{http://www.agner.org/random/}
+}
+
+\keyword{ distribution }
+\keyword{ univar }
+\keyword{ multivariate }
diff --git a/src/Makevars b/src/Makevars
new file mode 100644
index 0000000..b4615ab
--- /dev/null
+++ b/src/Makevars
@@ -0,0 +1,3 @@
+# Makevars for BiasedUrn
+# The value of MAXCOLORS may be modified
+PKG_CPPFLAGS= -DR_BUILD=1 -DMAXCOLORS=32
diff --git a/src/erfres.h b/src/erfres.h
new file mode 100644
index 0000000..29fb740
--- /dev/null
+++ b/src/erfres.h
@@ -0,0 +1,203 @@
+/***************************** ERFRES.H **************************************
+* Author: Agner Fog
+* Date created: 2004-07-10
+* Last modified: 2008-12-12
+* Project: stocc.zip
+* Source URL: www.agner.org/random
+*
+* Description:
+Table of residues of a certain expansion of the error function.
+These tables are used in the Laplace method for calculating Wallenius noncentral
+hypergeometric distribution. Used in CWalleniusNCHypergeometric::laplace() and
+CMultiWalleniusNCHypergeometric::laplace().
+
+This file is generated by ERFRESMK.CPP. Please see the file ERFRESMK.CPP for a
+detailed description. You must re-run ERFRESMK.CPP if the constants in STOCC.H
+are changed.
+
+The following constants have been used for making the tables below:
+ERFRES_B = 16 (-log2 of lowest precision)
+ERFRES_E = 40 (-log2 of highest precision)
+ERFRES_S = 2 (step size from begin to end)
+ERFRES_N = 13 (number of tables)
+ERFRES_L = 48 (length of each table)
+
+* Copyright 2004-2008 by Agner Fog.
+* GNU General Public License http://www.gnu.org/licenses/gpl.html
+*****************************************************************************/
+
+//number of standard deviations to integrate
+double NumSDev[ERFRES_N] = {
+ 4.324919041, 4.621231001, 4.900964208, 5.16657812, 5.419983175, 5.662697617, 5.895951217, 6.120756286, 6.337957755, 6.548269368, 6.752300431, 6.950575948, 7.143552034};
+
+//tables of error function residues
+double ErfRes[ERFRES_N][ERFRES_L] = {
+ // 0: precision 1.53E-05
+ {1.77242680540608204400E+00, 4.42974050453076994800E-01, 5.52683719287987914000E-02, 4.57346771067359261300E-03,
+ 2.80459064155823224600E-04, 1.34636065677244878500E-05, 5.21352785817798300800E-07, 1.65832271688171705300E-08,
+ 4.38865717471213472100E-10, 9.76518286165874680600E-12, 1.84433013221606645200E-13, 2.98319658966723379900E-15,
+ 4.16751049288581722800E-17, 5.06844293411881381200E-19, 5.40629927341885830200E-21, 5.09268600245963099700E-23,
+ 4.26365286677037947600E-25, 3.19120961809492396300E-27, 2.14691825888024309100E-29, 1.30473994083903636000E-31,
+ 7.19567933922698314600E-34, 3.61655672748362805300E-36, 1.66299275803871018000E-38, 7.02143932105206679000E-41,
+ 2.73122271211734530800E-43, 9.81824938600123102500E-46, 3.27125155121613401700E-48, 1.01290491600297417870E-50,
+ 2.92208589554240568800E-53, 7.87247562929246970200E-56, 1.98510836143160618600E-58, 4.69476368999432417500E-61,
+ 1.04339442450396263710E-63, 2.18317315734482557700E-66, 4.30811606197931495800E-69, 8.03081062303437395000E-72,
+ 1.41637813978528824300E-74, 2.36693694351427741600E-77, 3.75309000199992425400E-80, 5.65409397708564003600E-83,
+ 8.10322084538751956300E-86, 1.10610328893385430400E-88, 1.43971150303803736000E-91, 1.78884532267880002700E-94,
+ 2.12393968173898899400E-97, 2.41222807417272408400E-100, 2.62311608532487946600E-103, 2.73362126618952541200E-106},
+ // 1: precision 3.81E-06
+ {1.77244708953065753100E+00, 4.43074113723358004800E-01, 5.53507546366094128100E-02, 4.60063583541917741200E-03,
+ 2.85265530531727983900E-04, 1.39934570721569428400E-05, 5.61234181715130108200E-07, 1.87635216633109792000E-08,
+ 5.29386567604284238200E-10, 1.27170893476994027400E-11, 2.62062404027629145800E-13, 4.66479837413316034000E-15,
+ 7.22069968938298529400E-17, 9.78297384753513147400E-19, 1.16744590415498861200E-20, 1.23448081765041655900E-22,
+ 1.16327347874717650400E-24, 9.82084801488552519700E-27, 7.46543820883360082800E-29, 5.13361419796185362400E-31,
+ 3.20726459674397306300E-33, 1.82784782995019591600E-35, 9.53819678596992509200E-38, 4.57327699736894183000E-40,
+ 2.02131302843758583500E-42, 8.26035836048709995200E-45, 3.13004443753993537100E-47, 1.10264466279388735400E-49,
+ 3.62016356599029098800E-52, 1.11028768672354227000E-54, 3.18789098809699663200E-57, 8.58660896411902915800E-60,
+ 2.17384332055877431800E-62, 5.18219413865915035000E-65, 1.16526530012222654600E-67, 2.47552943408735877700E-70,
+ 4.97637013794934320200E-73, 9.47966949394160838200E-76, 1.71361124212171341900E-78, 2.94335699587741039100E-81,
+ 4.80983789654609513600E-84, 7.48676877660738410200E-87, 1.11129798477201315100E-89, 1.57475145101473103400E-92,
+ 2.13251069867015016100E-95, 2.76249093386952224300E-98, 3.42653604413897348900E-101, 4.07334940102519697800E-104},
+ // 2: precision 9.54E-07
+ {1.77245216056180140300E+00, 4.43102496776356791100E-01, 5.53772601883593673800E-02, 4.61054749828262358400E-03,
+ 2.87253302758514987700E-04, 1.42417784632842086400E-05, 5.82408831964509309600E-07, 2.00745450404117050700E-08,
+ 5.91011604093749423400E-10, 1.49916022838813094600E-11, 3.29741365965300606900E-13, 6.32307780683001018100E-15,
+ 1.06252674842175897800E-16, 1.57257431560311360800E-18, 2.06034642322747725700E-20, 2.40159615347654528000E-22,
+ 2.50271435589313449400E-24, 2.34271631492982176000E-26, 1.97869636045309031700E-28, 1.51440731538936707000E-30,
+ 1.05452976534458622500E-32, 6.70612854853490875900E-35, 3.90863249061728208500E-37, 2.09490406980039604000E-39,
+ 1.03572639732910843160E-41, 4.73737271771599553200E-44, 2.01016799853191990700E-46, 7.93316727009805559200E-49,
+ 2.91896910080597410900E-51, 1.00361556207253403120E-53, 3.23138481735358914000E-56, 9.76266225260763484100E-59,
+ 2.77288342251948021500E-61, 7.41751660051554639600E-64, 1.87191699537047863600E-66, 4.46389809367038823800E-69,
+ 1.00740435367143552990E-71, 2.15468537440631290200E-74, 4.37372804933525238000E-77, 8.43676369508201162800E-80,
+ 1.54845094802349484100E-82, 2.70727577941653793200E-85, 4.51412388960109772800E-88, 7.18605932463221426200E-91,
+ 1.09328719452457957600E-93, 1.59123500193816486400E-96, 2.21770259794482485600E-99, 2.96235081914900644200E-102},
+ // 3: precision 2.38E-07
+ {1.77245342831958737100E+00, 4.43110438095780200600E-01, 5.53855581791170228000E-02, 4.61401880234106439000E-03,
+ 2.88031928895194049600E-04, 1.43505456256023050800E-05, 5.92777558091362167400E-07, 2.07920891418090254000E-08,
+ 6.28701715960960909000E-10, 1.65457546101845217200E-11, 3.81394501062348919800E-13, 7.73640169798996619200E-15,
+ 1.38648618664047143200E-16, 2.20377376795474051600E-18, 3.11871105901085320300E-20, 3.94509797765438339700E-22,
+ 4.47871054279593642800E-24, 4.58134444141001287500E-26, 4.23915369932833545200E-28, 3.56174643985755223000E-30,
+ 2.72729562179570597400E-32, 1.90986605998546816600E-34, 1.22720072734085613700E-36, 7.25829034260272865500E-39,
+ 3.96321699645874596800E-41, 2.00342049456074966200E-43, 9.40055798441764717800E-46, 4.10462275003981738400E-48,
+ 1.67166813346582579800E-50, 6.36422340874443565900E-53, 2.26969100679582421400E-55, 7.59750937838053600600E-58,
+ 2.39149482673471882600E-60, 7.09134153544718378800E-63, 1.98415128824311335000E-65, 5.24683837588056156800E-68,
+ 1.31326161465641387500E-70, 3.11571024962460536800E-73, 7.01627137211411880000E-76, 1.50162731270605666400E-78,
+ 3.05816530510335364700E-81, 5.93355048535012188600E-84, 1.09802441010335521600E-86, 1.94008240128183308800E-89,
+ 3.27631821921541675800E-92, 5.29343480369738200400E-95, 8.19001419434114020600E-98, 1.21456436757992622700E-100},
+ // 4: precision 5.96E-08
+ {1.77245374525903386300E+00, 4.43112635580628681700E-01, 5.53880993417431935600E-02, 4.61519508177347361400E-03,
+ 2.88323830371235781500E-04, 1.43956506488931199600E-05, 5.97533121516696046900E-07, 2.11560073234896927000E-08,
+ 6.49836113541376862800E-10, 1.75091216044688314800E-11, 4.16782737060155846600E-13, 8.80643257335436424800E-15,
+ 1.65748420791207225100E-16, 2.78707349086274968000E-18, 4.19899868515935354900E-20, 5.68498078698629510200E-22,
+ 6.93816222596422139400E-24, 7.65747618996655475200E-26, 7.66779861336649418200E-28, 6.98905143723583695400E-30,
+ 5.81737537190421990800E-32, 4.43568540037466870600E-34, 3.10768227888207447300E-36, 2.00640852664381818400E-38,
+ 1.19706367104711013300E-40, 6.61729939738396217600E-43, 3.39784063694262711800E-45, 1.62450416252839296200E-47,
+ 7.24798161653719932800E-50, 3.02428684730111423300E-52, 1.18255348374176440700E-54, 4.34156802253088795200E-57,
+ 1.49931575039307549400E-59, 4.87879082698754128200E-62, 1.49836511723882777600E-64, 4.34998243416684050900E-67,
+ 1.19554618884894856000E-69, 3.11506828608539767000E-72, 7.70504604851319512900E-75, 1.81153231245726529100E-77,
+ 4.05332288179748454100E-80, 8.64127160751002389800E-83, 1.75723563299790750600E-85, 3.41217779987510142000E-88,
+ 6.33324341504830543600E-91, 1.12470466360665277900E-93, 1.91282818505057981800E-96, 3.11838272111119088500E-99},
+ // 5: precision 1.49E-08
+ {1.77245382449389548700E+00, 4.43113238150016054000E-01, 5.53888635367372804600E-02, 4.61558298326459057200E-03,
+ 2.88429374592283566800E-04, 1.44135302457832808700E-05, 5.99599530816354110000E-07, 2.13293263207088596800E-08,
+ 6.60866899904610148200E-10, 1.80600922150303605400E-11, 4.38957621672449876700E-13, 9.54096365498724593600E-15,
+ 1.86125270560486321400E-16, 3.26743200260750243300E-18, 5.17322947745786073000E-20, 7.40303709577309752000E-22,
+ 9.59703297362487960100E-24, 1.12979041959758568400E-25, 1.21090586780714120800E-27, 1.18477600671972569200E-29,
+ 1.06110784945102789800E-31, 8.72301430014194580800E-34, 6.59978694597213862400E-36, 4.60782503988683505400E-38,
+ 2.97629996764696360400E-40, 1.78296967476668997800E-42, 9.92947813649120231300E-45, 5.15238281451496107200E-47,
+ 2.49648080941516617600E-49, 1.13183145876711695200E-51, 4.81083885812771760200E-54, 1.92068525483444959800E-56,
+ 7.21538203720691761200E-59, 2.55484244329461795400E-61, 8.54021947322263940200E-64, 2.69922457940407460300E-66,
+ 8.07806757099831088400E-69, 2.29233505413233278200E-71, 6.17627451352383776600E-74, 1.58198519435517862400E-76,
+ 3.85682833066898009900E-79, 8.96007783937447061800E-82, 1.98575880907873828900E-84, 4.20275001914011054200E-87,
+ 8.50301055680340658200E-90, 1.64613519849643900900E-92, 3.05222294684008316300E-95, 5.42516704506242119200E-98},
+ // 6: precision 3.73E-09
+ {1.77245384430261089200E+00, 4.43113402125597019200E-01, 5.53890898808651020700E-02, 4.61570802060252211600E-03,
+ 2.88466397094702578100E-04, 1.44203545983349722400E-05, 6.00457657669759309400E-07, 2.14076280553580130200E-08,
+ 6.66287908992827087900E-10, 1.83546080772263722600E-11, 4.51849203153760888400E-13, 1.00053478654150626250E-14,
+ 2.00133542358651377800E-16, 3.62647881190865840300E-18, 5.96489800325831839200E-20, 8.92069144951359438200E-22,
+ 1.21499978844978062400E-23, 1.50969159775091919100E-25, 1.71458470816131592700E-27, 1.78354149193378771000E-29,
+ 1.70298947555869630200E-31, 1.49600537831395400600E-33, 1.21186208172570666700E-35, 9.07362642179266008600E-38,
+ 6.29382543478586469600E-40, 4.05352760000606626000E-42, 2.42933889358226154400E-44, 1.35768914148821438100E-46,
+ 7.09017160688256911600E-49, 3.46664168532600651800E-51, 1.58991153690202909500E-53, 6.85218984466549798200E-56,
+ 2.77986852228382907500E-58, 1.06333492956411188200E-60, 3.84102521375678317000E-63, 1.31221496031384552800E-65,
+ 4.24584095965170648000E-68, 1.30291378525223696900E-70, 3.79687911940099574200E-73, 1.05205378465263412500E-75,
+ 2.77502269989758744900E-78, 6.97601832816401403200E-81, 1.67315109709482392200E-83, 3.83268665565667928900E-86,
+ 8.39358376033290752000E-89, 1.75907817494562062400E-91, 3.53115954806899335200E-94, 6.79562013989671425000E-97},
+ // 7: precision 9.31E-10
+ {1.77245384925478974400E+00, 4.43113446460012284000E-01, 5.53891560601252504200E-02, 4.61574755288994634700E-03,
+ 2.88479053368568788400E-04, 1.44228769021976818600E-05, 6.00800544645992949800E-07, 2.14414502554089331400E-08,
+ 6.68819005926294320800E-10, 1.85032367193584636900E-11, 4.58880445172944815400E-13, 1.02790650461108873560E-14,
+ 2.09055796622121955200E-16, 3.87357904265687446300E-18, 6.55355746022352119400E-20, 1.01398465283490267200E-21,
+ 1.43654532753298842400E-23, 1.86580454392148962200E-25, 2.22454554378132065200E-27, 2.43828788210971585600E-29,
+ 2.46099438567553070000E-31, 2.29136593939231572900E-33, 1.97178483051357608300E-35, 1.57129911859150760300E-37,
+ 1.16187715309016251400E-39, 7.98791034830625946600E-42, 5.11610271388176540200E-44, 3.05861085454619325800E-46,
+ 1.71006575230074253400E-48, 8.95787473757552059200E-51, 4.40426750636187741200E-53, 2.03593329808165663200E-55,
+ 8.86319619094250260800E-58, 3.63949556302483252000E-60, 1.41180525527432472100E-62, 5.18110448656726197600E-65,
+ 1.80130976146235507900E-67, 5.94089489436009998000E-70, 1.86108901096460881000E-72, 5.54453617603266634800E-75,
+ 1.57273231131712670500E-77, 4.25229555550383344000E-80, 1.09708064410784368000E-82, 2.70363777400980301400E-85,
+ 6.37064773173804957600E-88, 1.43666982549400138800E-90, 3.10359876850474266200E-93, 6.42822304267944541900E-96},
+ // 8: precision 2.33E-10
+ {1.77245385049283445600E+00, 4.43113458380306853400E-01, 5.53891751960330686200E-02, 4.61575984524613369300E-03,
+ 2.88483285115404915700E-04, 1.44237837119469849000E-05, 6.00933085215778545800E-07, 2.14555059613473259000E-08,
+ 6.69949807134525424700E-10, 1.85746173246056176400E-11, 4.62510251141501895600E-13, 1.04309449728125451550E-14,
+ 2.14376794695367282400E-16, 4.03195345507914206800E-18, 6.95901230873262760600E-20, 1.10422005968960415700E-21,
+ 1.61274044622451622200E-23, 2.17010646570190394600E-25, 2.69272585719737993500E-27, 3.08406442023150341400E-29,
+ 3.26412756902204044100E-31, 3.19659762892894327800E-33, 2.90079234489442113000E-35, 2.44307440922101839900E-37,
+ 1.91280099578638699700E-39, 1.39463784147443818800E-41, 9.48568383329895892700E-44, 6.02906080392955580400E-46,
+ 3.58720420688290561300E-48, 2.00136767763554841800E-50, 1.04877885428425423540E-52, 5.17045929753308956200E-55,
+ 2.40183088534749939500E-57, 1.05288434613857573000E-59, 4.36191374659545444200E-62, 1.71017740178796946700E-64,
+ 6.35417287308090154000E-67, 2.24023617204667066100E-69, 7.50388817892399787300E-72, 2.39087016939309798700E-74,
+ 7.25439736654156264700E-77, 2.09846227207024494800E-79, 5.79315651373498761100E-82, 1.52786617607871741100E-84,
+ 3.85332605389629328300E-87, 9.30196261538477647000E-90, 2.15126632809118648300E-92, 4.77058936290696223500E-95},
+ // 9: precision 5.82E-11
+ {1.77245385080234563500E+00, 4.43113461569894215700E-01, 5.53891806760746538300E-02, 4.61576361260268991600E-03,
+ 2.88484673044866409200E-04, 1.44241019771415521500E-05, 6.00982861902849871600E-07, 2.14611541966231908200E-08,
+ 6.70435999307504633400E-10, 1.86074527008731886600E-11, 4.64296589104966284700E-13, 1.05109058078120195880E-14,
+ 2.17373506425627932200E-16, 4.12736258800510237200E-18, 7.22027572389545573000E-20, 1.16641031427122158000E-21,
+ 1.74261574594878846800E-23, 2.40999131874158664000E-25, 3.08741471404781296800E-27, 3.66622899027160893300E-29,
+ 4.03832398444680182100E-31, 4.12964092806000764200E-33, 3.92459969957984993300E-35, 3.47023698321199047400E-37,
+ 2.85870037656881575800E-39, 2.19701222983622897200E-41, 1.57757442199878062800E-43, 1.05998290283581317870E-45,
+ 6.67461794578944750100E-48, 3.94493775265477963400E-50, 2.19180590286711897200E-52, 1.14647284342367091100E-54,
+ 5.65409064942635909000E-57, 2.63281413190197920300E-59, 1.15914855705146421000E-61, 4.83173813806023163900E-64,
+ 1.90931412007029721900E-66, 7.16152712238209948300E-69, 2.55277823724126351900E-71, 8.65775632882397637500E-74,
+ 2.79685049229469435800E-76, 8.61535752145576873700E-79, 2.53319381071928112300E-81, 7.11686161831786026200E-84,
+ 1.91227899461300469000E-86, 4.91879425560043181900E-89, 1.21226578717106016000E-91, 2.86511260628508142200E-94},
+ // 10: precision 1.46E-11
+ {1.77245385087972342800E+00, 4.43113462419744630200E-01, 5.53891822321947835700E-02, 4.61576475266972634100E-03,
+ 2.88485120632836570100E-04, 1.44242113476668549100E-05, 6.01001089101483108200E-07, 2.14633579957941871400E-08,
+ 6.70638121912630560800E-10, 1.86219965341716152100E-11, 4.65139560168398521100E-13, 1.05511053035457485150E-14,
+ 2.18978467579008781700E-16, 4.18179627467181890600E-18, 7.37905600609363562400E-20, 1.20666925770415139000E-21,
+ 1.83216676939141016100E-23, 2.58616160243870388400E-25, 3.39612594393133643000E-27, 4.15117456105401982300E-29,
+ 4.72512355800254106200E-31, 5.01108411105699264300E-33, 4.95452692086540934200E-35, 4.57052259669118191500E-37,
+ 3.93757613394119041600E-39, 3.17143225730425447800E-41, 2.39087136989889684400E-43, 1.68918677399352864600E-45,
+ 1.11992962513487784300E-47, 6.97720003652956407000E-50, 4.09017183052803247800E-52, 2.25925194899934230000E-54,
+ 1.17743902383784437300E-56, 5.79751618317805258800E-59, 2.70049127204827368400E-61, 1.19150157862632851000E-63,
+ 4.98581510751975724600E-66, 1.98102566456273457700E-68, 7.48277410614888503600E-71, 2.68994458637406843000E-73,
+ 9.21308680313745922900E-76, 3.00957175301701607000E-78, 9.38604174484261857600E-81, 2.79745691952436047200E-83,
+ 7.97548757616816228000E-86, 2.17700350714256603000E-88, 5.69442820814374326200E-91, 1.42855756885812751800E-93},
+ // 11: precision 3.64E-12
+ {1.77245385089906787700E+00, 4.43113462645337308000E-01, 5.53891826707801996000E-02, 4.61576509382801447000E-03,
+ 2.88485262834342722100E-04, 1.44242482379506758200E-05, 6.01007615943023924400E-07, 2.14641957411498484200E-08,
+ 6.70719685646245707700E-10, 1.86282265411023575000E-11, 4.65522856702499667400E-13, 1.05705070352080171380E-14,
+ 2.19800647930093079100E-16, 4.21139261151871749000E-18, 7.47068213693802656400E-20, 1.23132525686457329000E-21,
+ 1.89037080673535316000E-23, 2.70767450402634975900E-25, 3.62208731605653583200E-27, 4.52783644780645903400E-29,
+ 5.29116794891083221600E-31, 5.78191926529856774600E-33, 5.91019131357709915300E-35, 5.65375339320520942200E-37,
+ 5.06448494950527399600E-39, 4.25125004489814020300E-41, 3.34702040997479327500E-43, 2.47392597585772167100E-45,
+ 1.71856809642179370600E-47, 1.12329116466680264100E-49, 6.91635006957699099400E-52, 4.01648185933072044700E-54,
+ 2.20256743728563483200E-56, 1.14197705850825122000E-58, 5.60474946818590333800E-61, 2.60701847612354797700E-63,
+ 1.15061401831998511400E-65, 4.82402847794291118400E-68, 1.92339714685666953300E-70, 7.30092195189691915600E-73,
+ 2.64114863236683700200E-75, 9.11500639536260716600E-78, 3.00399043312000082200E-80, 9.46306767642663343000E-83,
+ 2.85205432245625504600E-85, 8.23120145271503093200E-88, 2.27678649791096140000E-90, 6.04082678746563674000E-93},
+ // 12: precision 9.09E-13
+ {1.77245385090390399000E+00, 4.43113462705021723200E-01, 5.53891827935733966800E-02, 4.61576519490408572200E-03,
+ 2.88485307416075940900E-04, 1.44242604760223605000E-05, 6.01009907022372119900E-07, 2.14645068933581115800E-08,
+ 6.70751738699247757000E-10, 1.86308168994678478700E-11, 4.65691470353760117700E-13, 1.05795367138350319200E-14,
+ 2.20205466324054638500E-16, 4.22680889851439179400E-18, 7.52117118137557251000E-20, 1.24569747014608843200E-21,
+ 1.92626007811754286900E-23, 2.78693040917777943300E-25, 3.77798094465194860200E-27, 4.80270052176922369800E-29,
+ 5.72806202403284098500E-31, 6.41118455649104110000E-33, 6.73530071235990996000E-35, 6.64287180769401900600E-37,
+ 6.15272463485746774200E-39, 5.35401292372264035500E-41, 4.37964050507321407500E-43, 3.37013878900376065400E-45,
+ 2.44151902553507999600E-47, 1.66674472552984171500E-49, 1.07324838386391679300E-51, 6.52532932562465070600E-54,
+ 3.75007759408864456600E-56, 2.03933010598440151000E-58, 1.05056269424470639500E-60, 5.13240427502016103000E-63,
+ 2.38044205354512290600E-65, 1.04929890842558070320E-67, 4.40052237815903136000E-70, 1.75760526644875492000E-72,
+ 6.69249991110777975200E-75, 2.43182093294000139800E-77, 8.44044451319186471300E-80, 2.80086205952805676200E-82,
+ 8.89407469263960473600E-85, 2.70501913533005623200E-87, 7.88617413146613817400E-90, 2.20568290007963387700E-92}};
diff --git a/src/fnchyppr.cpp b/src/fnchyppr.cpp
new file mode 100644
index 0000000..f856f8b
--- /dev/null
+++ b/src/fnchyppr.cpp
@@ -0,0 +1,738 @@
+/*************************** fnchyppr.cpp **********************************
+* Author: Agner Fog
+* Date created: 2002-10-20
+* Last modified: 2015-12-27
+* Project: stocc.zip
+* Source URL: www.agner.org/random
+*
+* Description:
+* Calculation of univariate and multivariate Fisher's noncentral hypergeometric
+* probability distribution.
+*
+* This file contains source code for the class CFishersNCHypergeometric
+* and CMultiFishersNCHypergeometric defined in stocc.h.
+*
+* Documentation:
+* ==============
+* The file stocc.h contains class definitions.
+* Further documentation on www.agner.org/random
+*
+* Copyright 2002-2015 by Agner Fog.
+* GNU General Public License http://www.gnu.org/licenses/gpl.html
+*****************************************************************************/
+
+#include <string.h> // memmove function
+#include "stocc.h" // class definition
+
+
+/***********************************************************************
+Methods for class CFishersNCHypergeometric
+***********************************************************************/
+
+CFishersNCHypergeometric::CFishersNCHypergeometric(int32 n, int32 m, int32 N, double odds, double accuracy) {
+ // constructor
+ // set parameters
+ this->n = n; this->m = m; this->N = N;
+ this->odds = odds; this->accuracy = accuracy;
+
+ // check validity of parameters
+ if (n < 0 || m < 0 || N < 0 || odds < 0. || n > N || m > N) {
+ FatalError("Parameter out of range in class CFishersNCHypergeometric");
+ }
+ if (accuracy < 0) accuracy = 0;
+ if (accuracy > 1) accuracy = 1;
+ // initialize
+ logodds = log(odds); scale = rsum = 0.;
+ ParametersChanged = 1;
+ // calculate xmin and xmax
+ xmin = m + n - N; if (xmin < 0) xmin = 0;
+ xmax = n; if (xmax > m) xmax = m;
+}
+
+
+int32 CFishersNCHypergeometric::mode(void) {
+ // Find mode (exact)
+ // Uses the method of Liao and Rosen, The American Statistician, vol 55,
+ // no 4, 2001, p. 366-369.
+ // Note that there is an error in Liao and Rosen's formula.
+ // Replace sgn(b) with -1 in Liao and Rosen's formula.
+
+ double A, B, C, D; // coefficients for quadratic equation
+ double x; // mode
+ int32 L = m + n - N;
+ int32 m1 = m+1, n1 = n+1;
+
+ if (odds == 1.) {
+ // simple hypergeometric
+ x = (m + 1.) * (n + 1.) / (N + 2.);
+ }
+ else {
+ // calculate analogously to Cornfield mean
+ A = 1. - odds;
+ B = (m1+n1)*odds - L;
+ C = -(double)m1*n1*odds;
+ D = B*B -4*A*C;
+ D = D > 0. ? sqrt(D) : 0.;
+ x = (D - B)/(A+A);
+ }
+ return int32(x);
+}
+
+
+double CFishersNCHypergeometric::mean(void) {
+ // Find approximate mean
+ // Calculation analogous with mode
+ double a, b; // temporaries in calculation
+ double mean; // mean
+
+ if (odds == 1.) { // simple hypergeometric
+ return double(m)*n/N;
+ }
+ // calculate Cornfield mean
+ a = (m+n)*odds + (N-m-n);
+ b = a*a - 4.*odds*(odds-1.)*m*n;
+ b = b > 0. ? sqrt(b) : 0.;
+ mean = (a-b)/(2.*(odds-1.));
+ return mean;
+}
+
+double CFishersNCHypergeometric::variance(void) {
+ // find approximate variance (poor approximation)
+ double my = mean(); // approximate mean
+ // find approximate variance from Fisher's noncentral hypergeometric approximation
+ double r1 = my * (m-my); double r2 = (n-my)*(my+N-n-m);
+ if (r1 <= 0. || r2 <= 0.) return 0.;
+ double var = N*r1*r2/((N-1)*(m*r2+(N-m)*r1));
+ if (var < 0.) var = 0.;
+ return var;
+}
+
+
+double CFishersNCHypergeometric::moments(double * mean_, double * var_) {
+ // calculate exact mean and variance
+ // return value = sum of f(x), expected = 1.
+ double y, sy=0, sxy=0, sxxy=0, me1;
+ int32 x, xm, x1;
+ const double accur = 0.1 * accuracy; // accuracy of calculation
+ xm = (int32)mean(); // approximation to mean
+ for (x = xm; x <= xmax; x++) {
+ y = probability(x);
+ x1 = x - xm; // subtract approximate mean to avoid loss of precision in sums
+ sy += y; sxy += x1 * y; sxxy += x1 * x1 * y;
+ if (y < accur && x != xm) break;
+ }
+ for (x = xm-1; x >= xmin; x--) {
+ y = probability(x);
+ x1 = x - xm; // subtract approximate mean to avoid loss of precision in sums
+ sy += y; sxy += x1 * y; sxxy += x1 * x1 * y;
+ if (y < accur) break;
+ }
+ me1 = sxy / sy;
+ *mean_ = me1 + xm;
+ y = sxxy / sy - me1 * me1;
+ if (y < 0) y=0;
+ *var_ = y;
+ return sy;
+}
+
+
+double CFishersNCHypergeometric::probability(int32 x) {
+ // calculate probability function
+ const double accur = accuracy * 0.1;// accuracy of calculation
+
+ if (x < xmin || x > xmax) return 0;
+ if (n == 0) return 1.;
+
+ if (odds == 1.) {
+ // central hypergeometric
+ return exp(
+ LnFac(m) - LnFac(x) - LnFac(m-x) +
+ LnFac(N-m) - LnFac(n-x) - LnFac((N-m)-(n-x)) -
+ (LnFac(N) - LnFac(n) - LnFac(N-n)));
+ }
+
+ if (odds == 0.) {
+ if (n > N-m) FatalError("Not enough items with nonzero weight in CFishersNCHypergeometric::probability");
+ return x == 0;
+ }
+
+ if (!rsum) {
+ // first time. calculate rsum = reciprocal of sum of proportional
+ // function over all probable x values
+ int32 x1, x2; // x loop
+ double y; // value of proportional function
+ x1 = (int32)mean(); // start at mean
+ if (x1 < xmin) x1 = xmin;
+ x2 = x1 + 1;
+ scale = 0.; scale = lng(x1); // calculate scale to avoid overflow
+ rsum = 1.; // = exp(lng(x1)) with this scale
+ for (x1--; x1 >= xmin; x1--) {
+ rsum += y = exp(lng(x1)); // sum from x1 and down
+ if (y < accur) break; // until value becomes negligible
+ }
+ for (; x2 <= xmax; x2++) { // sum from x2 and up
+ rsum += y = exp(lng(x2));
+ if (y < accur) break; // until value becomes negligible
+ }
+ rsum = 1. / rsum; // save reciprocal sum
+ }
+ return exp(lng(x)) * rsum; // function value
+}
+
+
+double CFishersNCHypergeometric::probabilityRatio(int32 x, int32 x0) {
+ // Calculate probability ratio f(x)/f(x0)
+ // This is much faster than calculating a single probability because
+ // rsum is not needed
+ double a1, a2, a3, a4, f1, f2, f3, f4;
+ int32 y, dx = x - x0;
+ int invert = 0;
+
+ if (x < xmin || x > xmax) return 0.;
+ if (x0 < xmin || x0 > xmax) {
+ FatalError("Infinity in CFishersNCHypergeometric::probabilityRatio");
+ }
+ if (dx == 0.) return 1.;
+ if (dx < 0.) {
+ invert = 1;
+ dx = -dx;
+ y = x; x = x0; x0 = y;
+ }
+ a1 = m - x0; a2 = n - x0; a3 = x; a4 = N - m - n + x;
+ if (dx <= 28 && x <= 100000) { // avoid overflow
+ // direct calculation
+ f1 = f2 = 1.;
+ // compute ratio of binomials
+ for (y = 0; y < dx; y++) {
+ f1 *= a1-- * a2--;
+ f2 *= a3-- * a4--;
+ }
+ // compute odds^dx
+ f3 = 1.; f4 = odds; y = dx;
+ while (y) {
+ if (f4 < 1.E-100) {
+ f3 = 0.; break; // avoid underflow
+ }
+ if (y & 1) f3 *= f4;
+ f4 *= f4;
+ y = (unsigned long)(y) >> 1;
+ }
+ f1 = f3 * f1 / f2;
+ if (invert) f1 = 1. / f1;
+ }
+ else {
+ // use logarithms
+ f1 = FallingFactorial(a1,dx) + FallingFactorial(a2,dx) -
+ FallingFactorial(a3,dx) - FallingFactorial(a4,dx) +
+ dx * log(odds);
+ if (invert) f1 = -f1;
+ f1 = exp(f1);
+ }
+ return f1;
+}
+
+
+double CFishersNCHypergeometric::MakeTable(double * table, int32 MaxLength, int32 * xfirst, int32 * xlast, double cutoff) {
+ // Makes a table of Fisher's noncentral hypergeometric probabilities.
+ // Results are returned in the array table of size MaxLength.
+ // The values are scaled so that the highest value is 1. The return value
+ // is the sum, s, of all the values in the table. The normalized
+ // probabilities are obtained by multiplying all values in the table by
+ // 1/s.
+ // The tails are cut off where the values are < cutoff, so that
+ // *xfirst may be > xmin and *xlast may be < xmax.
+ // The value of cutoff will be 0.01 * accuracy if not specified.
+ // The first and last x value represented in the table are returned in
+ // *xfirst and *xlast. The resulting probability values are returned in the
+ // first (*xlast - *xfirst + 1) positions of table. If this would require
+ // more than MaxLength values then the table is filled with as many
+ // correct values as possible.
+ //
+ // The function will return the desired length of table when MaxLength = 0.
+
+ double f; // probability function value
+ double sum; // sum of table values
+ double a1, a2, b1, b2; // factors in recursive calculation of f(x)
+ int32 x; // x value
+ int32 x1, x2; // lowest and highest x
+ int32 i, i0, i1, i2; // table index
+ int32 mode = this->mode(); // mode
+ int32 L = n + m - N; // parameter
+ int32 DesiredLength; // desired length of table
+
+ // limits for x
+ x1 = (L > 0) ? L : 0; // xmin
+ x2 = (n < m) ? n : m; // xmax
+
+ // special cases
+ if (x1 == x2) goto DETERMINISTIC;
+ if (odds <= 0.) {
+ if (n > N-m) FatalError("Not enough items with nonzero weight in CWalleniusNCHypergeometric::MakeTable");
+ x1 = 0;
+ DETERMINISTIC:
+ if (MaxLength == 0) {
+ if (xfirst) *xfirst = 1;
+ return 1;
+ }
+ *xfirst = *xlast = x1;
+ *table = 1.;
+ return 1;
+ }
+
+ if (MaxLength <= 0) {
+ // Return UseTable and LengthNeeded
+ DesiredLength = x2 - x1 + 1; // max length of table
+ if (DesiredLength > 200) {
+ double sd = sqrt(variance()); // calculate approximate standard deviation
+ // estimate number of standard deviations to include from normal distribution
+ i = (int32)(NumSD(accuracy) * sd + 0.5);
+ if (DesiredLength > i) DesiredLength = i;
+ }
+ if (xfirst) *xfirst = 1; // for analogy with CWalleniusNCHypergeometric::MakeTable
+ return DesiredLength;
+ }
+
+ // place mode in the table
+ if (mode - x1 <= MaxLength/2) {
+ // There is enough space for left tail
+ i0 = mode - x1;
+ }
+ else if (x2 - mode <= MaxLength/2) {
+ // There is enough space for right tail
+ i0 = MaxLength - x2 + mode - 1;
+ if (i0 < 0) i0 = 0;
+ }
+ else {
+ // There is not enough space for any of the tails. Place mode in middle of table
+ i0 = MaxLength/2;
+ }
+ // Table start index
+ i1 = i0 - mode + x1; if (i1 < 0) i1 = 0;
+
+ // Table end index
+ i2 = i0 + x2 - mode; if (i2 > MaxLength-1) i2 = MaxLength-1;
+
+ // make center
+ table[i0] = sum = f = 1.;
+
+ // make left tail
+ x = mode;
+ a1 = m + 1 - x; a2 = n + 1 - x;
+ b1 = x; b2 = x - L;
+ for (i = i0 - 1; i >= i1; i--) {
+ f *= b1 * b2 / (a1 * a2 * odds); // recursive formula
+ a1++; a2++; b1--; b2--;
+ sum += table[i] = f;
+ if (f < cutoff) {
+ i1 = i; break; // cut off tail if < accuracy
+ }
+ }
+ if (i1 > 0) {
+ // move table down for cut-off left tail
+ memmove(table, table+i1, (i0-i1+1)*sizeof(*table));
+ // adjust indices
+ i0 -= i1; i2 -= i1; i1 = 0;
+ }
+ // make right tail
+ x = mode + 1;
+ a1 = m + 1 - x; a2 = n + 1 - x;
+ b1 = x; b2 = x - L;
+ f = 1.;
+ for (i = i0 + 1; i <= i2; i++) {
+ f *= a1 * a2 * odds / (b1 * b2); // recursive formula
+ a1--; a2--; b1++; b2++;
+ sum += table[i] = f;
+ if (f < cutoff) {
+ i2 = i; break; // cut off tail if < accuracy
+ }
+ }
+ // x limits
+ *xfirst = mode - (i0 - i1);
+ *xlast = mode + (i2 - i0);
+
+ return sum;
+}
+
+
+double CFishersNCHypergeometric::lng(int32 x) {
+ // natural log of proportional function
+ // returns lambda = log(m!*x!/(m-x)!*m2!*x2!/(m2-x2)!*odds^x)
+ int32 x2 = n - x, m2 = N - m;
+ if (ParametersChanged) {
+ mFac = LnFac(m) + LnFac(m2);
+ xLast = -99; ParametersChanged = 0;
+ }
+ if (m < FAK_LEN && m2 < FAK_LEN) goto DEFLT;
+ switch (x - xLast) {
+ case 0: // x unchanged
+ break;
+ case 1: // x incremented. calculate from previous value
+ xFac += log (double(x) * (m2-x2) / (double(x2+1)*(m-x+1)));
+ break;
+ case -1: // x decremented. calculate from previous value
+ xFac += log (double(x2) * (m-x) / (double(x+1)*(m2-x2+1)));
+ break;
+ default: DEFLT: // calculate all
+ xFac = LnFac(x) + LnFac(x2) + LnFac(m-x) + LnFac(m2-x2);
+ }
+ xLast = x;
+ return mFac - xFac + x * logodds - scale;
+}
+
+
+/***********************************************************************
+calculation methods in class CMultiFishersNCHypergeometric
+***********************************************************************/
+
+CMultiFishersNCHypergeometric::CMultiFishersNCHypergeometric(int32 n_, int32 * m_, double * odds_, int colors_, double accuracy_) {
+ // constructor
+ int i; // loop counter
+
+ // copy parameters
+ n = n_; Colors = colors_; accuracy = accuracy_;
+
+ // check if parameters are valid
+ reduced = 2; N = Nu = 0; usedcolors = 0;
+ for (i = 0; i < Colors; i++) {
+ nonzero[i] = 1; // remember if color i has m > 0 and odds > 0
+ m[usedcolors] = m_[i]; // copy m
+ N += m_[i]; // sum of m
+ if (m_[i] <= 0) {
+ nonzero[i] = 0; // color i unused
+ reduced |= 1;
+ if (m_[i] < 0) FatalError("Parameter m negative in constructor for CMultiFishersNCHypergeometric");
+ }
+ odds[usedcolors] = odds_[i]; // copy odds
+ if (odds_[i] <= 0) {
+ nonzero[i] = 0; // color i unused
+ reduced |= 1;
+ if (odds_[i] < 0) FatalError("Parameter odds negative in constructor for CMultiFishersNCHypergeometric");
+ }
+ if (usedcolors > 0 && nonzero[i] && odds[usedcolors] != odds[usedcolors-1]) {
+ reduced &= ~2; // odds are not all equal
+ }
+ if (nonzero[i]) {
+ Nu += m[usedcolors]; // sum of m for used colors
+ usedcolors++; // skip color i if zero
+ }
+ }
+ if (N < n) FatalError("Taking more items than there are in constructor for CMultiFishersNCHypergeometric");
+ if (Nu< n) FatalError("Not enough items with nonzero weight in constructor for CMultiFishersNCHypergeometric");
+
+ // calculate mFac and logodds
+ for (i=0, mFac=0.; i < usedcolors; i++) {
+ mFac += LnFac(m[i]);
+ logodds[i] = log(odds[i]);
+ }
+ // initialize
+ sn = 0;
+}
+
+
+void CMultiFishersNCHypergeometric::mean(double * mu) {
+ // calculates approximate mean of multivariate Fisher's noncentral
+ // hypergeometric distribution. Result is returned in mu[0..colors-1].
+ // The calculation is reasonably fast.
+ int i, j; // color index
+ double mur[MAXCOLORS]; // mean for used colors
+
+ // get mean of used colors
+ mean1(mur);
+
+ // resolve unused colors
+ for (i = j = 0; i < Colors; i++) {
+ if (nonzero[i]) {
+ mu[i] = mur[j++];
+ }
+ else {
+ mu[i] = 0.;
+ }
+ }
+}
+
+
+void CMultiFishersNCHypergeometric::mean1(double * mu) {
+ // calculates approximate mean of multivariate Fisher's noncentral
+ // hypergeometric distribution, except for unused colors
+ double r, r1; // iteration variable
+ double q; // mean of color i
+ double W; // total weight
+ int i; // color index
+ int iter = 0; // iteration counter
+
+ if (usedcolors < 3) {
+ // simple cases
+ if (usedcolors == 1) mu[0] = n;
+ if (usedcolors == 2) {
+ mu[0] = CFishersNCHypergeometric(n,m[0],Nu,odds[0]/odds[1]).mean();
+ mu[1] = n - mu[0];
+ }
+ }
+ else if (n == Nu) {
+ // Taking all balls
+ for (i = 0; i < usedcolors; i++) mu[i] = m[i];
+ }
+ else {
+ // not a special case
+
+ // initial guess for r
+ for (i=0, W=0.; i < usedcolors; i++) W += m[i] * odds[i];
+ r = (double)n * Nu / ((Nu-n)*W);
+
+ if (r > 0.) {
+ // iteration loop to find r
+ do {
+ r1 = r;
+ for (i=0, q=0.; i < usedcolors; i++) {
+ q += m[i] * r * odds[i] / (r * odds[i] + 1.);
+ }
+ r *= n * (Nu-q) / (q * (Nu-n));
+ if (++iter > 100) FatalError("convergence problem in function CMultiFishersNCHypergeometric::mean");
+ }
+ while (fabs(r-r1) > 1E-5);
+ }
+
+ // get result
+ for (i=0; i < usedcolors; i++) {
+ mu[i] = m[i] * r * odds[i] / (r * odds[i] + 1.);
+ }
+ }
+}
+
+
+void CMultiFishersNCHypergeometric::variance(double * var, double * mean_) {
+ // calculates approximate variance of multivariate Fisher's noncentral
+ // hypergeometric distribution (accuracy is not too good).
+ // Variance is returned in variance[0..colors-1].
+ // Mean is returned in mean_[0..colors-1] if not NULL.
+ // The calculation is reasonably fast.
+ double r1, r2;
+ double mu[MAXCOLORS];
+ int i, j;
+
+ mean1(mu); // Mean of used colors
+
+ for (i = j = 0; i < Colors; i++) {
+ if (nonzero[i]) {
+ r1 = mu[j] * (m[j]-mu[j]);
+ r2 = (n-mu[j])*(mu[j]+Nu-n-m[j]);
+ if (r1 <= 0. || r2 <= 0.) {
+ var[i] = 0.;
+ }
+ else {
+ var[i] = Nu*r1*r2/((Nu-1)*(m[j]*r2+(Nu-m[j])*r1));
+ }
+ j++;
+ }
+ else { // unused color
+ var[i] = 0.;
+ }
+ }
+
+ // Store mean if mean_ is not NULL
+ if (mean_) {
+ // resolve unused colors
+ for (i = j = 0; i < Colors; i++) {
+ if (nonzero[i]) {
+ mean_[i] = mu[j++];
+ }
+ else {
+ mean_[i] = 0.;
+ }
+ }
+ }
+}
+
+
+double CMultiFishersNCHypergeometric::probability(int32 * x) {
+ // Calculate probability function.
+ // Note: The first-time call takes very long time because it requires
+ // a calculation of all possible x combinations with probability >
+ // accuracy, which may be extreme.
+ // The calculation uses logarithms to avoid overflow.
+ // (Recursive calculation may be faster, but this has not been implemented)
+ int i, j; // color index
+ int32 xsum = 0; // sum of x
+ int32 Xu[MAXCOLORS]; // x for used colors
+
+ // resolve unused colors
+ for (i = j = 0; i < Colors; i++) {
+ if (nonzero[i]) {
+ Xu[j++] = x[i]; // copy x to array of used colors
+ xsum += x[i]; // sum of x
+ }
+ else {
+ if (x[i]) return 0.; // taking balls with zero weight
+ }
+ }
+
+ if (xsum != n) {
+ FatalError("sum of x values not equal to n in function CMultiFishersNCHypergeometric::probability");
+ }
+
+ for (i = 0; i < usedcolors; i++) {
+ if (Xu[i] > m[i] || Xu[i] < 0 || Xu[i] < n - Nu + m[i]) return 0.; // Outside bounds for x
+ }
+
+ if (n == 0 || n == Nu) return 1.; // deterministic cases
+
+ if (usedcolors < 3) { // cases with < 3 colors
+ if (usedcolors < 2) return 1.;
+ // Univariate probability
+ return CFishersNCHypergeometric(n, m[0], Nu, odds[0]/odds[1], accuracy).probability(Xu[0]);
+ }
+
+ if (reduced & 2) {
+ // All odds are equal. This is multivariate central hypergeometric distribution
+ int32 sx = n, sm = N;
+ double p = 1.;
+ for (i = 0; i < usedcolors - 1; i++) {
+ // Use univariate hypergeometric (usedcolors-1) times
+ p *= CFishersNCHypergeometric(sx, m[i], sm, 1.).probability(x[i]);
+ sx -= x[i]; sm -= m[i];
+ }
+ return p;
+ }
+
+ // all special cases eliminated. Calculate sum of all function values
+ if (sn == 0) SumOfAll(); // first time initialize
+
+ return exp(lng(Xu)) * rsum; // function value
+}
+
+
+double CMultiFishersNCHypergeometric::moments(double * mean, double * variance, int32 * combinations) {
+ // calculates mean and variance of the Fisher's noncentral hypergeometric
+ // distribution by calculating all combinations of x-values with
+ // probability > accuracy.
+ // Return value = 1.
+ // Returns the mean in mean[0...colors-1]
+ // Returns the variance in variance[0...colors-1]
+
+ int i, j; // color index
+ if (sn == 0) {
+ // first time initialization includes calculation of mean and variance
+ SumOfAll();
+ }
+ // copy results and resolve unused colors
+ for (i = j = 0; i < Colors; i++) {
+ if (nonzero[i]) {
+ mean[i] = sx[j];
+ variance[i] = sxx[j];
+ j++;
+ }
+ else {
+ mean[i] = variance[i] = 0.;
+ }
+ }
+ if (combinations) *combinations = sn;
+ return 1.;
+}
+
+
+void CMultiFishersNCHypergeometric::SumOfAll() {
+ // this function does the very time consuming job of calculating the sum
+ // of the proportional function g(x) over all possible combinations of
+ // the x[i] values with probability > accuracy. These combinations are
+ // generated by the recursive function loop().
+ // The mean and variance are generated as by-products.
+
+ int i; // color index
+ int32 msum; // sum of m[i]
+
+ // get approximate mean
+ mean1(sx);
+
+ // round mean to integers
+ for (i=0, msum=0; i < usedcolors; i++) {
+ msum += xm[i] = (int32)(sx[i]+0.4999999);}
+ // adjust truncated x values to make the sum = n
+ msum -= n;
+ for (i = 0; msum < 0; i++) {
+ if (xm[i] < m[i]) {
+ xm[i]++; msum++;
+ }
+ }
+ for (i = 0; msum > 0; i++) {
+ if (xm[i] > 0) {
+ xm[i]--; msum--;
+ }
+ }
+
+ // adjust scale factor to g(mean) to avoid overflow
+ scale = 0.; scale = lng(xm);
+
+ // initialize for recursive loops
+ sn = 0;
+ for (i = usedcolors-1, msum = 0; i >= 0; i--) {
+ remaining[i] = msum; msum += m[i];
+ }
+ for (i = 0; i < usedcolors; i++) {
+ sx[i] = 0; sxx[i] = 0;
+ }
+
+ // recursive loops to calculate sums of g(x) over all x combinations
+ rsum = 1. / loop(n, 0);
+
+ // calculate mean and variance
+ for (i = 0; i < usedcolors; i++) {
+ sxx[i] = sxx[i]*rsum - sx[i]*sx[i]*rsum*rsum;
+ sx[i] = sx[i]*rsum;
+ }
+}
+
+
+double CMultiFishersNCHypergeometric::loop(int32 n, int c) {
+ // recursive function to loop through all combinations of x-values.
+ // used by SumOfAll
+ int32 x, x0; // x of color c
+ int32 xmin, xmax; // min and max of x[c]
+ double s1, s2, sum = 0.; // sum of g(x) values
+ int i; // loop counter
+
+ if (c < usedcolors-1) {
+ // not the last color
+ // calculate min and max of x[c] for given x[0]..x[c-1]
+ xmin = n - remaining[c]; if (xmin < 0) xmin = 0;
+ xmax = m[c]; if (xmax > n) xmax = n;
+ x0 = xm[c]; if (x0 < xmin) x0 = xmin; if (x0 > xmax) x0 = xmax;
+ // loop for all x[c] from mean and up
+ for (x = x0, s2 = 0.; x <= xmax; x++) {
+ xi[c] = x;
+ sum += s1 = loop(n-x, c+1); // recursive loop for remaining colors
+ if (s1 < accuracy && s1 < s2) break; // stop when values become negligible
+ s2 = s1;
+ }
+ // loop for all x[c] from mean and down
+ for (x = x0-1; x >= xmin; x--) {
+ xi[c] = x;
+ sum += s1 = loop(n-x, c+1); // recursive loop for remaining colors
+ if (s1 < accuracy && s1 < s2) break; // stop when values become negligible
+ s2 = s1;
+ }
+ }
+ else {
+ // last color
+ xi[c] = n;
+ // sums and squaresums
+ s1 = exp(lng(xi)); // proportional function g(x)
+ for (i = 0; i < usedcolors; i++) { // update sums
+ sx[i] += s1 * xi[i];
+ sxx[i] += s1 * xi[i] * xi[i];
+ }
+ sn++;
+ sum += s1;
+ }
+ return sum;
+}
+
+
+double CMultiFishersNCHypergeometric::lng(int32 * x) {
+ // natural log of proportional function g(x)
+ double y = 0.;
+ int i;
+ for (i = 0; i < usedcolors; i++) {
+ y += x[i]*logodds[i] - LnFac(x[i]) - LnFac(m[i]-x[i]);
+ }
+ return mFac + y - scale;
+}
diff --git a/src/randomc.h b/src/randomc.h
new file mode 100644
index 0000000..eec8bf9
--- /dev/null
+++ b/src/randomc.h
@@ -0,0 +1,209 @@
+/***************************** randomc.h **********************************
+* Author: Agner Fog
+* Date created: 1997
+* Last modified: 2011-08-05
+* Project: randomc.h
+* Source URL: www.agner.org/random
+*
+* Description:
+* This header file contains class declarations and other definitions for the
+* randomc class library of uniform random number generators in C++ language.
+*
+* Overview of classes:
+* ====================
+*
+* class TRandomMersenne:
+* Random number generator of type Mersenne twister.
+* Source file mersenne.cpp
+*
+* class TRandomMotherOfAll:
+* Random number generator of type Mother-of-All (Multiply with carry).
+* Source file mother.cpp
+*
+* class TRanrotBGenerator:
+* Random number generator of type RANROT-B.
+* Source file ranrotb.cpp
+*
+* class TRanrotWGenerator:
+* Random number generator of type RANROT-W.
+* Source file ranrotw.cpp
+*
+* class TRandomMotRot:
+* Combination of Mother-of-All and RANROT-W generators.
+* Source file ranmoro.cpp and motrot.asm.
+* Coded in assembly language for improved speed.
+* Must link in RANDOMAO.LIB or RANDOMAM.LIB.
+*
+*
+* Member functions (methods):
+* ===========================
+*
+* All these classes have identical member functions:
+*
+* Constructor(uint32 seed):
+* The seed can be any integer. Usually the time is used as seed.
+* Executing a program twice with the same seed will give the same sequence of
+* random numbers. A different seed will give a different sequence.
+*
+* void RandomInit(uint32 seed);
+* Re-initializes the random number generator with a new seed.
+*
+* void RandomInitByArray(uint32 seeds[], int length);
+* In TRandomMersenne only: Use this function if you want to initialize with
+* a seed with more than 32 bits. All bits in the seeds[] array will influence
+* the sequence of random numbers generated. length is the number of entries
+* in the seeds[] array.
+*
+* double Random();
+* Gives a floating point random number in the interval 0 <= x < 1.
+* The resolution is 32 bits in TRanrotBGenerator, TRandomMotherOfAll and
+* TRandomMersenne. 52 or 63 bits in TRanrotWGenerator. 63 bits in
+* TRandomMotRot.
+*
+* int IRandom(int min, int max);
+* Gives an integer random number in the interval min <= x <= max.
+* (max-min < MAXINT).
+* The resolution is the same as for Random().
+*
+* uint32 BRandom();
+* Gives 32 random bits.
+* Only available in the classes TRanrotWGenerator and TRandomMersenne.
+*
+*
+* Example:
+* ========
+* The file EX-RAN.CPP contains an example of how to generate random numbers.
+*
+*
+* Further documentation:
+* ======================
+* The file randomc.htm contains further documentation on these random number
+* generators.
+*
+* � 1997 - 2011 Agner Fog.
+* GNU General Public License www.gnu.org/copyleft/gpl.html
+*******************************************************************************/
+
+#ifndef RANDOMC_H
+#define RANDOMC_H
+
+#include <stdio.h>
+#ifdef __INTEL_COMPILER
+ #include <mathimf.h> // Intel math function library
+#else
+ #include <math.h> // default math function linrary
+#endif
+
+// Define 32 bit signed and unsigned integers.
+// Change these definitions, if necessary, on 64 bit computers
+#if defined(_WIN16) || defined(__MSDOS__) || defined(_MSDOS) // 16 bit system
+ typedef long int int32; // 32 bit signed integer
+ typedef unsigned long int uint32; // 32 bit unsigned integer
+#else
+ typedef int int32; // 32 bit signed integer
+ typedef unsigned int uint32; // 32 bit unsigned integer
+#endif
+
+/***********************************************************************
+ System-specific user interface functions
+***********************************************************************/
+
+void EndOfProgram(void); // system-specific exit code (userintf.cpp)
+
+void FatalError(const char * ErrorText); // system-specific error reporting (userintf.cpp)
+
+
+/***********************************************************************
+ Different random number generator classes
+***********************************************************************/
+
+class TRandomMersenne { // encapsulate random number generator
+ #if 0
+ // define constants for MT11213A:
+ // (32 bit constants cannot be defined as enum in 16-bit compilers)
+ #define MERS_N 351
+ #define MERS_M 175
+ #define MERS_R 19
+ #define MERS_U 11
+ #define MERS_S 7
+ #define MERS_T 15
+ #define MERS_L 17
+ #define MERS_A 0xE4BD75F5
+ #define MERS_B 0x655E5280
+ #define MERS_C 0xFFD58000
+ #else
+ // or constants for MT19937:
+ #define MERS_N 624
+ #define MERS_M 397
+ #define MERS_R 31
+ #define MERS_U 11
+ #define MERS_S 7
+ #define MERS_T 15
+ #define MERS_L 18
+ #define MERS_A 0x9908B0DF
+ #define MERS_B 0x9D2C5680
+ #define MERS_C 0xEFC60000
+ #endif
+ public:
+ TRandomMersenne(uint32 seed) { // constructor
+ RandomInit(seed);}
+ void RandomInit(uint32 seed); // re-seed
+ void RandomInitByArray(uint32 seeds[], int length); // seed by more than 32 bits
+ int IRandom(int min, int max); // output random integer
+ double Random(); // output random float
+ uint32 BRandom(); // output random bits
+ private:
+ uint32 mt[MERS_N]; // state vector
+ int mti; // index into mt
+ enum TArch {LITTLE_ENDIAN1, BIG_ENDIAN1, NONIEEE};
+ TArch Architecture; // conversion to float depends on computer architecture
+ };
+
+class TRanrotBGenerator { // encapsulate random number generator
+ enum constants { // define parameters
+ KK = 17, JJ = 10, R1 = 13, R2 = 9};
+ public:
+ void RandomInit(uint32 seed); // initialization
+ int IRandom(int min, int max); // get integer random number in desired interval
+ double Random(); // get floating point random number
+ TRanrotBGenerator(uint32 seed); // constructor
+ protected:
+ int p1, p2; // indexes into buffer
+ uint32 randbuffer[KK]; // history buffer
+ uint32 randbufcopy[KK*2]; // used for self-test
+ enum TArch {LITTLE_ENDIAN1, BIG_ENDIAN1, NONIEEE};
+ TArch Architecture; // conversion to float depends on computer architecture
+};
+
+
+class TRanrotWGenerator { // encapsulate random number generator
+ enum constants { // define parameters
+ KK = 17, JJ = 10, R1 = 19, R2 = 27};
+ public:
+ void RandomInit(uint32 seed); // initialization
+ int IRandom(int min, int max); // get integer random number in desired interval
+ long double Random(); // get floating point random number
+ uint32 BRandom(); // output random bits
+ TRanrotWGenerator(uint32 seed); // constructor
+ protected:
+ int p1, p2; // indexes into buffer
+ union { // used for conversion to float
+ long double randp1;
+ uint32 randbits[3];};
+ uint32 randbuffer[KK][2]; // history buffer
+ uint32 randbufcopy[KK*2][2]; // used for self-test
+ enum TArch {LITTLE_ENDIAN1, BIG_ENDIAN1, NONIEEE, EXTENDEDPRECISIONLITTLEENDIAN};
+ TArch Architecture; // conversion to float depends on computer architecture
+};
+
+class TRandomMotherOfAll { // encapsulate random number generator
+ public:
+ void RandomInit(uint32 seed); // initialization
+ int IRandom(int min, int max); // get integer random number in desired interval
+ double Random(); // get floating point random number
+ TRandomMotherOfAll(uint32 seed); // constructor
+ protected:
+ double x[5]; // history buffer
+ };
+
+#endif
diff --git a/src/stoc1.cpp b/src/stoc1.cpp
new file mode 100644
index 0000000..afded7b
--- /dev/null
+++ b/src/stoc1.cpp
@@ -0,0 +1,825 @@
+/*************************** stoc1.cpp **********************************
+* Author: Agner Fog
+* Date created: 2002-01-04
+* Last modified: 2008-11-30
+* Project: stocc.zip
+* Source URL: www.agner.org/random
+*
+* Description:
+* Non-uniform random number generator functions.
+*
+* This file contains source code for the class StochasticLib1 defined in stocc.h.
+*
+* Documentation:
+* ==============
+* The file stocc.h contains class definitions.
+* Further documentation at www.agner.org/random
+*
+* Copyright 2002-2008 by Agner Fog.
+* GNU General Public License http://www.gnu.org/licenses/gpl.html
+*****************************************************************************/
+
+#include "stocc.h" // class definition
+
+
+/***********************************************************************
+constants
+***********************************************************************/
+const double SHAT1 = 2.943035529371538573; // 8/e
+const double SHAT2 = 0.8989161620588987408; // 3-sqrt(12/e)
+
+
+/***********************************************************************
+Log factorial function
+***********************************************************************/
+double LnFac(int32 n) {
+ // log factorial function. gives natural logarithm of n!
+
+ // define constants
+ static const double // coefficients in Stirling approximation
+ C0 = 0.918938533204672722, // ln(sqrt(2*pi))
+ C1 = 1./12.,
+ C3 = -1./360.;
+ // C5 = 1./1260., // use r^5 term if FAK_LEN < 50
+ // C7 = -1./1680.; // use r^7 term if FAK_LEN < 20
+ // static variables
+ static double fac_table[FAK_LEN]; // table of ln(n!):
+ static int initialized = 0; // remember if fac_table has been initialized
+
+ if (n < FAK_LEN) {
+ if (n <= 1) {
+ if (n < 0) FatalError("Parameter negative in LnFac function");
+ return 0;
+ }
+ if (!initialized) { // first time. Must initialize table
+ // make table of ln(n!)
+ double sum = fac_table[0] = 0.;
+ for (int i=1; i<FAK_LEN; i++) {
+ sum += log(double(i));
+ fac_table[i] = sum;
+ }
+ initialized = 1;
+ }
+ return fac_table[n];
+ }
+ // not found in table. use Stirling approximation
+ double n1, r;
+ n1 = n; r = 1. / n1;
+ return (n1 + 0.5)*log(n1) - n1 + C0 + r*(C1 + r*r*C3);
+}
+
+
+/***********************************************************************
+Constructor
+***********************************************************************/
+StochasticLib1::StochasticLib1 (int seed)
+: STOC_BASE(seed) {
+ normal_x2_valid = 0;
+}
+
+
+/***********************************************************************
+Hypergeometric distribution
+***********************************************************************/
+int32 StochasticLib1::Hypergeometric (int32 n, int32 m, int32 N) {
+ /*
+ This function generates a random variate with the hypergeometric
+ distribution. This is the distribution you get when drawing balls without
+ replacement from an urn with two colors. n is the number of balls you take,
+ m is the number of red balls in the urn, N is the total number of balls in
+ the urn, and the return value is the number of red balls you get.
+
+ This function uses inversion by chop-down search from the mode when
+ parameters are small, and the ratio-of-uniforms method when the former
+ method would be too slow or would give overflow.
+ */
+
+ int32 fak, addd; // used for undoing transformations
+ int32 x; // result
+
+ // check if parameters are valid
+ if (n > N || m > N || n < 0 || m < 0) {
+ FatalError("Parameter out of range in hypergeometric function");}
+
+ // symmetry transformations
+ fak = 1; addd = 0;
+ if (m > N/2) {
+ // invert m
+ m = N - m;
+ fak = -1; addd = n;
+ }
+ if (n > N/2) {
+ // invert n
+ n = N - n;
+ addd += fak * m; fak = - fak;
+ }
+ if (n > m) {
+ // swap n and m
+ x = n; n = m; m = x;
+ }
+ // cases with only one possible result end here
+ if (n == 0) return addd;
+
+ //------------------------------------------------------------------
+ // choose method
+ //------------------------------------------------------------------
+ if (N > 680 || n > 70) {
+ // use ratio-of-uniforms method
+ x = HypRatioOfUnifoms (n, m, N);
+ }
+ else {
+ // inversion method, using chop-down search from mode
+ x = HypInversionMod (n, m, N);
+ }
+ // undo symmetry transformations
+ return x * fak + addd;
+}
+
+
+/***********************************************************************
+Subfunctions used by hypergeometric
+***********************************************************************/
+
+int32 StochasticLib1::HypInversionMod (int32 n, int32 m, int32 N) {
+ /*
+ Subfunction for Hypergeometric distribution. Assumes 0 <= n <= m <= N/2.
+ Overflow protection is needed when N > 680 or n > 75.
+
+ Hypergeometric distribution by inversion method, using down-up
+ search starting at the mode using the chop-down technique.
+
+ This method is faster than the rejection method when the variance is low.
+ */
+
+ // Setup
+ static int32 h_n_last = -1, h_m_last = -1, h_N_last = -1; // Last values
+ static int32 h_mode, h_mp; // Mode, mode+1
+ static int32 h_bound; // Safety bound
+ static double h_fm; // Value at mode
+ // Sampling
+ int32 I; // Loop counter
+ int32 L = N - m - n; // Parameter
+ double modef; // mode, float
+ double Mp, np; // m + 1, n + 1
+ double p; // temporary
+ double U; // uniform random
+ double c, d; // factors in iteration
+ double divisor; // divisor, eliminated by scaling
+ double k1, k2; // float version of loop counter
+ double L1 = L; // float version of L
+
+ Mp = (double)(m + 1);
+ np = (double)(n + 1);
+
+ if (N != h_N_last || m != h_m_last || n != h_n_last) {
+ // set-up when parameters have changed
+ h_N_last = N; h_m_last = m; h_n_last = n;
+
+ p = Mp / (N + 2.);
+ modef = np * p; // mode, real
+ h_mode = (int32)modef; // mode, integer
+ if (h_mode == modef && p == 0.5) {
+ h_mp = h_mode--;
+ }
+ else {
+ h_mp = h_mode + 1;
+ }
+ // mode probability, using log factorial function
+ // (may read directly from fac_table if N < FAK_LEN)
+ h_fm = exp(LnFac(N-m) - LnFac(L+h_mode) - LnFac(n-h_mode)
+ + LnFac(m) - LnFac(m-h_mode) - LnFac(h_mode)
+ - LnFac(N) + LnFac(N-n) + LnFac(n) );
+
+ // safety bound - guarantees at least 17 significant decimal digits
+ // bound = min(n, (int32)(modef + k*c'))
+ h_bound = (int32)(modef + 11. * sqrt(modef * (1.-p) * (1.-n/(double)N)+1.));
+ if (h_bound > n) h_bound = n;
+ }
+
+ // loop until accepted
+ while(1) {
+ U = Random(); // uniform random number to be converted
+
+ // start chop-down search at mode
+ if ((U -= h_fm) <= 0.) return(h_mode);
+ c = d = h_fm;
+
+ // alternating down- and upward search from the mode
+ k1 = h_mp - 1; k2 = h_mode + 1;
+ for (I = 1; I <= h_mode; I++, k1--, k2++) {
+ // Downward search from k1 = h_mp - 1
+ divisor = (np - k1)*(Mp - k1);
+ // Instead of dividing c with divisor, we multiply U and d because
+ // multiplication is faster. This will give overflow if N > 800
+ U *= divisor; d *= divisor;
+ c *= k1 * (L1 + k1);
+ if ((U -= c) <= 0.) return(h_mp - I - 1); // = k1 - 1
+
+ // Upward search from k2 = h_mode + 1
+ divisor = k2 * (L1 + k2);
+ // re-scale parameters to avoid time-consuming division
+ U *= divisor; c *= divisor;
+ d *= (np - k2) * (Mp - k2);
+ if ((U -= d) <= 0.) return(h_mode + I); // = k2
+ // Values of n > 75 or N > 680 may give overflow if you leave out this..
+ // overflow protection
+ // if (U > 1.E100) {U *= 1.E-100; c *= 1.E-100; d *= 1.E-100;}
+ }
+
+ // Upward search from k2 = 2*mode + 1 to bound
+ for (k2 = I = h_mp + h_mode; I <= h_bound; I++, k2++) {
+ divisor = k2 * (L1 + k2);
+ U *= divisor;
+ d *= (np - k2) * (Mp - k2);
+ if ((U -= d) <= 0.) return(I);
+ // more overflow protection
+ // if (U > 1.E100) {U *= 1.E-100; d *= 1.E-100;}
+ }
+ }
+}
+
+
+int32 StochasticLib1::HypRatioOfUnifoms (int32 n, int32 m, int32 N) {
+ /*
+ Subfunction for Hypergeometric distribution using the ratio-of-uniforms
+ rejection method.
+
+ This code is valid for 0 < n <= m <= N/2.
+
+ The computation time hardly depends on the parameters, except that it matters
+ a lot whether parameters are within the range where the LnFac function is
+ tabulated.
+
+ Reference: E. Stadlober: "The ratio of uniforms approach for generating
+ discrete random variates". Journal of Computational and Applied Mathematics,
+ vol. 31, no. 1, 1990, pp. 181-189.
+ */
+ static int32 h_N_last = -1; // previous parameter
+ static int32 h_m_last = -1; // previous parameter
+ static int32 h_n_last = -1; // previous parameter
+ static int32 h_bound; // upper bound
+ static double h_a; // hat center
+ static double h_h; // hat width
+ static double h_g; // value at mode
+ int32 L; // N-m-n
+ int32 mode; // mode
+ int32 k; // integer sample
+ double x; // real sample
+ double rNN; // 1/(N*(N+2))
+ double my; // mean
+ double var; // variance
+ double u; // uniform random
+ double lf; // ln(f(x))
+
+ L = N - m - n;
+ if (h_N_last != N || h_m_last != m || h_n_last != n) {
+ h_N_last = N; h_m_last = m; h_n_last = n; // Set-up
+ rNN = 1. / ((double)N*(N+2)); // make two divisions in one
+ my = (double)n * m * rNN * (N+2); // mean = n*m/N
+ mode = (int32)(double(n+1) * double(m+1) * rNN * N); // mode = floor((n+1)*(m+1)/(N+2))
+ var = (double)n * m * (N-m) * (N-n) / ((double)N*N*(N-1)); // variance
+ h_h = sqrt(SHAT1 * (var+0.5)) + SHAT2; // hat width
+ h_a = my + 0.5; // hat center
+ h_g = fc_lnpk(mode, L, m, n); // maximum
+ h_bound = (int32)(h_a + 4.0 * h_h); // safety-bound
+ if (h_bound > n) h_bound = n;
+ }
+ while(1) {
+ u = Random(); // uniform random number
+ if (u == 0) continue; // avoid division by 0
+ x = h_a + h_h * (Random()-0.5) / u; // generate hat distribution
+ if (x < 0. || x > 2E9) continue; // reject, avoid overflow
+ k = (int32)x;
+ if (k > h_bound) continue; // reject if outside range
+ lf = h_g - fc_lnpk(k,L,m,n); // ln(f(k))
+ if (u * (4.0 - u) - 3.0 <= lf) break; // lower squeeze accept
+ if (u * (u-lf) > 1.0) continue; // upper squeeze reject
+ if (2.0 * log(u) <= lf) break; // final acceptance
+ }
+ return k;
+}
+
+
+double StochasticLib1::fc_lnpk(int32 k, int32 L, int32 m, int32 n) {
+ // subfunction used by hypergeometric and Fisher's noncentral hypergeometric distribution
+ return(LnFac(k) + LnFac(m - k) + LnFac(n - k) + LnFac(L + k));
+}
+
+
+#ifndef R_BUILD // Not needed if making R interface
+
+/***********************************************************************
+Multivariate hypergeometric distribution
+***********************************************************************/
+void StochasticLib1::MultiHypergeometric (int32 * destination, int32 * source, int32 n, int colors) {
+ /*
+ This function generates a vector of random variates, each with the
+ hypergeometric distribution.
+
+ The multivariate hypergeometric distribution is the distribution you
+ get when drawing balls from an urn with more than two colors, without
+ replacement.
+
+ Parameters:
+ destination: An output array to receive the number of balls of each
+ color. Must have space for at least 'colors' elements.
+ source: An input array containing the number of balls of each
+ color in the urn. Must have 'colors' elements.
+ All elements must be non-negative.
+ n: The number of balls drawn from the urn.
+ Can't exceed the total number of balls in the urn.
+ colors: The number of possible colors.
+ */
+ int32 sum, x, y;
+ int i;
+ if (n < 0 || colors < 0) FatalError("Parameter negative in multihypergeo function");
+ if (colors == 0) return;
+
+ // compute total number of balls
+ for (i = 0, sum = 0; i < colors; i++) {
+ y = source[i];
+ if (y < 0) FatalError("Parameter negative in multihypergeo function");
+ sum += y;
+ }
+ if (n > sum) FatalError("n > sum in multihypergeo function");
+
+ for (i = 0; i < colors-1; i++) {
+ // generate output by calling hypergeometric colors-1 times
+ y = source[i];
+ x = Hypergeometric(n, y, sum);
+ n -= x; sum -= y;
+ destination[i] = x;
+ }
+ // get the last one
+ destination[i] = n;
+}
+
+
+/***********************************************************************
+Poisson distribution
+***********************************************************************/
+int32 StochasticLib1::Poisson (double L) {
+ /*
+ This function generates a random variate with the poisson distribution.
+
+ Uses inversion by chop-down method for L < 17, and ratio-of-uniforms
+ method for L >= 17.
+
+ For L < 1.E-6 numerical inaccuracy is avoided by direct calculation.
+ */
+
+ //------------------------------------------------------------------
+ // choose method
+ //------------------------------------------------------------------
+ if (L < 17) {
+ if (L < 1.E-6) {
+ if (L == 0) return 0;
+ if (L < 0) FatalError("Parameter negative in poisson function");
+
+ //--------------------------------------------------------------
+ // calculate probabilities
+ //--------------------------------------------------------------
+ // For extremely small L we calculate the probabilities of x = 1
+ // and x = 2 (ignoring higher x). The reason for using this
+ // method is to prevent numerical inaccuracies in other methods.
+ //--------------------------------------------------------------
+ return PoissonLow(L);
+ }
+ else {
+ //--------------------------------------------------------------
+ // inversion method
+ //--------------------------------------------------------------
+ // The computation time for this method grows with L.
+ // Gives overflow for L > 80
+ //--------------------------------------------------------------
+ return PoissonInver(L);
+ }
+ }
+ else {
+ if (L > 2.E9) FatalError("Parameter too big in poisson function");
+
+ //----------------------------------------------------------------
+ // ratio-of-uniforms method
+ //----------------------------------------------------------------
+ // The computation time for this method does not depend on L.
+ // Use where other methods would be slower.
+ //----------------------------------------------------------------
+ return PoissonRatioUniforms(L);
+ }
+}
+
+
+/***********************************************************************
+Subfunctions used by poisson
+***********************************************************************/
+int32 StochasticLib1::PoissonLow(double L) {
+ /*
+ This subfunction generates a random variate with the poisson
+ distribution for extremely low values of L.
+
+ The method is a simple calculation of the probabilities of x = 1
+ and x = 2. Higher values are ignored.
+
+ The reason for using this method is to avoid the numerical inaccuracies
+ in other methods.
+ */
+ double d, r;
+ d = sqrt(L);
+ if (Random() >= d) return 0;
+ r = Random() * d;
+ if (r > L * (1.-L)) return 0;
+ if (r > 0.5 * L*L * (1.-L)) return 1;
+ return 2;
+}
+
+
+int32 StochasticLib1::PoissonInver(double L) {
+ /*
+ This subfunction generates a random variate with the poisson
+ distribution using inversion by the chop down method (PIN).
+
+ Execution time grows with L. Gives overflow for L > 80.
+
+ The value of bound must be adjusted to the maximal value of L.
+ */
+ const int bound = 130; // safety bound. Must be > L + 8*sqrt(L).
+ static double p_L_last = -1.; // previous value of L
+ static double p_f0; // value at x=0
+ double r; // uniform random number
+ double f; // function value
+ int32 x; // return value
+
+ if (L != p_L_last) { // set up
+ p_L_last = L;
+ p_f0 = exp(-L); // f(0) = probability of x=0
+ }
+ while (1) {
+ r = Random(); x = 0; f = p_f0;
+ do { // recursive calculation: f(x) = f(x-1) * L / x
+ r -= f;
+ if (r <= 0) return x;
+ x++;
+ f *= L;
+ r *= x; // instead of f /= x
+ }
+ while (x <= bound);
+ }
+}
+
+
+int32 StochasticLib1::PoissonRatioUniforms(double L) {
+ /*
+ This subfunction generates a random variate with the poisson
+ distribution using the ratio-of-uniforms rejection method (PRUAt).
+
+ Execution time does not depend on L, except that it matters whether L
+ is within the range where ln(n!) is tabulated.
+
+ Reference: E. Stadlober: "The ratio of uniforms approach for generating
+ discrete random variates". Journal of Computational and Applied Mathematics,
+ vol. 31, no. 1, 1990, pp. 181-189.
+ */
+ static double p_L_last = -1.0; // previous L
+ static double p_a; // hat center
+ static double p_h; // hat width
+ static double p_g; // ln(L)
+ static double p_q; // value at mode
+ static int32 p_bound; // upper bound
+ int32 mode; // mode
+ double u; // uniform random
+ double lf; // ln(f(x))
+ double x; // real sample
+ int32 k; // integer sample
+
+ if (p_L_last != L) {
+ p_L_last = L; // Set-up
+ p_a = L + 0.5; // hat center
+ mode = (int32)L; // mode
+ p_g = log(L);
+ p_q = mode * p_g - LnFac(mode); // value at mode
+ p_h = sqrt(SHAT1 * (L+0.5)) + SHAT2; // hat width
+ p_bound = (int32)(p_a + 6.0 * p_h); // safety-bound
+ }
+ while(1) {
+ u = Random();
+ if (u == 0) continue; // avoid division by 0
+ x = p_a + p_h * (Random() - 0.5) / u;
+ if (x < 0 || x >= p_bound) continue; // reject if outside valid range
+ k = (int32)(x);
+ lf = k * p_g - LnFac(k) - p_q;
+ if (lf >= u * (4.0 - u) - 3.0) break; // quick acceptance
+ if (u * (u - lf) > 1.0) continue; // quick rejection
+ if (2.0 * log(u) <= lf) break; // final acceptance
+ }
+ return(k);
+}
+
+
+/***********************************************************************
+Binomial distribution
+***********************************************************************/
+int32 StochasticLib1::Binomial (int32 n, double p) {
+ /*
+ This function generates a random variate with the binomial distribution.
+
+ Uses inversion by chop-down method for n*p < 35, and ratio-of-uniforms
+ method for n*p >= 35.
+
+ For n*p < 1.E-6 numerical inaccuracy is avoided by poisson approximation.
+ */
+ int inv = 0; // invert
+ int32 x; // result
+ double np = n * p;
+
+ if (p > 0.5) { // faster calculation by inversion
+ p = 1. - p; inv = 1;
+ }
+ if (n <= 0 || p <= 0) {
+ if (n == 0 || p == 0) return inv * n; // only one possible result
+ FatalError("Parameter out of range in binomial function"); // error exit
+ }
+
+ //------------------------------------------------------------------
+ // choose method
+ //------------------------------------------------------------------
+ if (np < 35.) {
+ if (np < 1.E-6) {
+ // Poisson approximation for extremely low np
+ x = PoissonLow(np);
+ }
+ else {
+ // inversion method, using chop-down search from 0
+ x = BinomialInver(n, p);
+ }
+ }
+ else {
+ // ratio of uniforms method
+ x = BinomialRatioOfUniforms(n, p);
+ }
+ if (inv) {
+ x = n - x; // undo inversion
+ }
+ return x;
+}
+
+
+/***********************************************************************
+Subfunctions used by binomial
+***********************************************************************/
+
+int32 StochasticLib1::BinomialInver (int32 n, double p) {
+ /*
+ Subfunction for Binomial distribution. Assumes p < 0.5.
+
+ Uses inversion method by search starting at 0.
+
+ Gives overflow for n*p > 60.
+
+ This method is fast when n*p is low.
+ */
+ double f0, f, q;
+ int32 bound;
+ double pn, r, rc;
+ int32 x, n1, i;
+
+ // f(0) = probability of x=0 is (1-p)^n
+ // fast calculation of (1-p)^n
+ f0 = 1.; pn = 1.-p; n1 = n;
+ while (n1) {
+ if (n1 & 1) f0 *= pn;
+ pn *= pn; n1 >>= 1;
+ }
+ // calculate safety bound
+ rc = (n + 1) * p;
+ bound = (int32)(rc + 11.0*(sqrt(rc) + 1.0));
+ if (bound > n) bound = n;
+ q = p / (1. - p);
+
+ while (1) {
+ r = Random();
+ // recursive calculation: f(x) = f(x-1) * (n-x+1)/x*p/(1-p)
+ f = f0; x = 0; i = n;
+ do {
+ r -= f;
+ if (r <= 0) return x;
+ x++;
+ f *= q * i;
+ r *= x; // it is faster to multiply r by x than dividing f by x
+ i--;
+ }
+ while (x <= bound);
+ }
+}
+
+
+int32 StochasticLib1::BinomialRatioOfUniforms (int32 n, double p) {
+ /*
+ Subfunction for Binomial distribution. Assumes p < 0.5.
+
+ Uses the Ratio-of-Uniforms rejection method.
+
+ The computation time hardly depends on the parameters, except that it matters
+ a lot whether parameters are within the range where the LnFac function is
+ tabulated.
+
+ Reference: E. Stadlober: "The ratio of uniforms approach for generating
+ discrete random variates". Journal of Computational and Applied Mathematics,
+ vol. 31, no. 1, 1990, pp. 181-189.
+ */
+ static int32 b_n_last = -1; // last n
+ static double b_p_last = -1.; // last p
+ static int32 b_mode; // mode
+ static int32 b_bound; // upper bound
+ static double b_a; // hat center
+ static double b_h; // hat width
+ static double b_g; // value at mode
+ static double b_r1; // ln(p/(1-p))
+ double u; // uniform random
+ double q1; // 1-p
+ double np; // n*p
+ double var; // variance
+ double lf; // ln(f(x))
+ double x; // real sample
+ int32 k; // integer sample
+
+ if(b_n_last != n || b_p_last != p) { // Set_up
+ b_n_last = n;
+ b_p_last = p;
+ q1 = 1.0 - p;
+ np = n * p;
+ b_mode = (int32)(np + p); // mode
+ b_a = np + 0.5; // hat center
+ b_r1 = log(p / q1);
+ b_g = LnFac(b_mode) + LnFac(n-b_mode);
+ var = np * q1; // variance
+ b_h = sqrt(SHAT1 * (var+0.5)) + SHAT2; // hat width
+ b_bound = (int32)(b_a + 6.0 * b_h); // safety-bound
+ if (b_bound > n) b_bound = n; // safety-bound
+ }
+
+ while (1) { // rejection loop
+ u = Random();
+ if (u == 0) continue; // avoid division by 0
+ x = b_a + b_h * (Random() - 0.5) / u;
+ if (x < 0. || x > b_bound) continue; // reject, avoid overflow
+ k = (int32)x; // truncate
+ lf = (k-b_mode)*b_r1+b_g-LnFac(k)-LnFac(n-k);// ln(f(k))
+ if (u * (4.0 - u) - 3.0 <= lf) break; // lower squeeze accept
+ if (u * (u - lf) > 1.0) continue; // upper squeeze reject
+ if (2.0 * log(u) <= lf) break; // final acceptance
+ }
+ return k;
+}
+
+
+/***********************************************************************
+Multinomial distribution
+***********************************************************************/
+void StochasticLib1::Multinomial (int32 * destination, double * source, int32 n, int colors) {
+ /*
+ This function generates a vector of random variates, each with the
+ binomial distribution.
+
+ The multinomial distribution is the distribution you get when drawing
+ balls from an urn with more than two colors, with replacement.
+
+ Parameters:
+ destination: An output array to receive the number of balls of each
+ color. Must have space for at least 'colors' elements.
+ source: An input array containing the probability or fraction
+ of each color in the urn. Must have 'colors' elements.
+ All elements must be non-negative. The sum doesn't have
+ to be 1, but the sum must be positive.
+ n: The number of balls drawn from the urn.
+ colors: The number of possible colors.
+ */
+ double s, sum;
+ int32 x;
+ int i;
+ if (n < 0 || colors < 0) FatalError("Parameter negative in multinomial function");
+ if (colors == 0) return;
+
+ // compute sum of probabilities
+ for (i=0, sum=0; i<colors; i++) {
+ s = source[i];
+ if (s < 0) FatalError("Parameter negative in multinomial function");
+ sum += s;
+ }
+ if (sum == 0 && n > 0) FatalError("Zero sum in multinomial function");
+
+ for (i=0; i<colors-1; i++) {
+ // generate output by calling binomial (colors-1) times
+ s = source[i];
+ if (sum <= s) {
+ // this fixes two problems:
+ // 1. prevent division by 0 when sum = 0
+ // 2. prevent s/sum getting bigger than 1 in case of rounding errors
+ x = n;
+ }
+ else {
+ x = Binomial(n, s/sum);
+ }
+ n -= x; sum -= s;
+ destination[i] = x;
+ }
+ // get the last one
+ destination[i] = n;
+}
+
+
+void StochasticLib1::Multinomial (int32 * destination, int32 * source, int32 n, int colors) {
+ // same as above, with integer source
+ int32 x, p, sum;
+ int i;
+ if (n < 0 || colors < 0) FatalError("Parameter negative in multinomial function");
+ if (colors == 0) return;
+
+ // compute sum of probabilities
+ for (i=0, sum=0; i<colors; i++) {
+ p = source[i];
+ if (p < 0) FatalError("Parameter negative in multinomial function");
+ sum += p;
+ }
+ if (sum == 0 && n > 0) FatalError("Zero sum in multinomial function");
+
+ for (i=0; i<colors-1; i++) {
+ // generate output by calling binomial (colors-1) times
+ if (sum == 0) {
+ destination[i] = 0; continue;
+ }
+ p = source[i];
+ x = Binomial(n, (double)p/sum);
+ n -= x; sum -= p;
+ destination[i] = x;
+ }
+ // get the last one
+ destination[i] = n;
+}
+
+
+/***********************************************************************
+Normal distribution
+***********************************************************************/
+
+double StochasticLib1::Normal(double m, double s) {
+ // normal distribution with mean m and standard deviation s
+ double normal_x1; // first random coordinate (normal_x2 is member of class)
+ double w; // radius
+ if (normal_x2_valid) { // we have a valid result from last call
+ normal_x2_valid = 0;
+ return normal_x2 * s + m;
+ }
+ // make two normally distributed variates by Box-Muller transformation
+ do {
+ normal_x1 = 2. * Random() - 1.;
+ normal_x2 = 2. * Random() - 1.;
+ w = normal_x1*normal_x1 + normal_x2*normal_x2;
+ }
+ while (w >= 1. || w < 1E-30);
+ w = sqrt(log(w)*(-2./w));
+ normal_x1 *= w; normal_x2 *= w; // normal_x1 and normal_x2 are independent normally distributed variates
+ normal_x2_valid = 1; // save normal_x2 for next call
+ return normal_x1 * s + m;
+}
+
+
+/***********************************************************************
+Bernoulli distribution
+***********************************************************************/
+int StochasticLib1::Bernoulli(double p) {
+ // Bernoulli distribution with parameter p. This function returns
+ // 0 or 1 with probability (1-p) and p, respectively.
+ if (p < 0 || p > 1) FatalError("Parameter out of range in Bernoulli function");
+ return Random() < p;
+}
+
+
+/***********************************************************************
+Shuffle function
+***********************************************************************/
+void StochasticLib1::Shuffle(int * list, int min, int n) {
+ /*
+ This function makes a list of the n numbers from min to min+n-1
+ in random order.
+
+ The parameter 'list' must be an array with at least n elements.
+ The array index goes from 0 to n-1.
+
+ If you want to shuffle something else than integers then use the
+ integers in list as an index into a table of the items you want to shuffle.
+ */
+
+ int i, j, swap;
+ // put numbers from min to min+n-1 into list
+ for (i=0, j=min; i<n; i++, j++) list[i] = j;
+ // shuffle list
+ for (i=0; i<n-1; i++) {
+ // item number i has n-i numbers to choose between
+ j = IRandom(i,n-1);
+ // swap items i and j
+ swap = list[j]; list[j] = list[i]; list[i] = swap;
+ }
+}
+
+
+#endif // ifndef R_BUILD
diff --git a/src/stoc3.cpp b/src/stoc3.cpp
new file mode 100644
index 0000000..af7af4a
--- /dev/null
+++ b/src/stoc3.cpp
@@ -0,0 +1,1164 @@
+/*************************** stoc3.cpp **********************************
+* Author: Agner Fog
+* Date created: 2002-10-02
+* Last modified: 2008-11-21
+* Project: stocc.zip
+* Source URL: www.agner.org/random
+*
+* Description:
+* Non-uniform random number generator functions.
+*
+* This file contains source code for the class StochasticLib3 derived
+* from StochasticLib1 or StochasticLib2, defined in stocc.h.
+*
+* This class implements methods for sampling from the noncentral and extended
+* hypergeometric distributions, as well as the multivariate versions of these.
+*
+* Documentation:
+* ==============
+* The file stocc.h contains class definitions.
+* Further documentation at www.agner.org/random
+*
+* Copyright 2002-2008 by Agner Fog.
+* GNU General Public License http://www.gnu.org/licenses/gpl.html
+*****************************************************************************/
+
+#include <string.h> // memcpy function
+#include "stocc.h" // class definitions
+//#include "wnchyppr.cpp" // calculate Wallenius noncentral hypergeometric probability
+//#include "fnchyppr.cpp" // calculate Fisher's noncentral hypergeometric probability
+
+
+/******************************************************************************
+ Methods for class StochasticLib3
+******************************************************************************/
+
+
+/***********************************************************************
+ Constructor
+***********************************************************************/
+StochasticLib3::StochasticLib3(int seed) : StochasticLib1(seed) {
+ SetAccuracy(1.E-8); // set default accuracy
+}
+
+
+/***********************************************************************
+ SetAccuracy
+***********************************************************************/
+void StochasticLib3::SetAccuracy(double accur) {
+ // define accuracy of calculations for
+ // WalleniusNCHyp and MultiWalleniusNCHyp
+ if (accur < 0.) accur = 0.;
+ if (accur > 0.01) accur = 0.01;
+ accuracy = accur;
+}
+
+
+/***********************************************************************
+ Wallenius Non-central Hypergeometric distribution
+***********************************************************************/
+
+int32 StochasticLib3::WalleniusNCHyp (int32 n, int32 m, int32 N, double odds) {
+/*
+ This function generates a random variate with Wallenius noncentral
+ hypergeometric distribution.
+
+ Wallenius noncentral hypergeometric distribution is the distribution you
+ get when drawing balls without replacement from an urn containing red and
+ white balls, with bias.
+
+ We define the weight of the balls so that the probability of taking a
+ particular ball is proportional to its weight. The value of odds is the
+ normalized odds ratio: odds = weight(red) / weight(white).
+ If all balls have the same weight, i.e. odds = 1, then we get the
+ hypergeometric distribution.
+
+ n is the number of balls you take,
+ m is the number of red balls in the urn,
+ N is the total number of balls in the urn,
+ odds is the odds ratio,
+ and the return value is the number of red balls you get.
+
+ Four different calculation methods are implemented. This function decides
+ which method to use, based on the parameters.
+*/
+
+ // check parameters
+ if (n >= N || m >= N || n <= 0 || m <= 0 || odds <= 0.) {
+ // trivial cases
+ if (n == 0 || m == 0) return 0;
+ if (m == N) return n;
+ if (n == N) return m;
+ if (odds == 0.) {
+ if (n > N-m) FatalError("Not enough items with nonzero weight in function WalleniusNCHyp");
+ return 0;}
+ // illegal parameter
+ FatalError("Parameter out of range in function WalleniusNCHyp");}
+
+ if (odds == 1.) {
+ // use hypergeometric function if odds == 1
+ return Hypergeometric(n, m, N);}
+
+ if (n < 30) {
+ return WalleniusNCHypUrn(n, m, N, odds);}
+
+ if (double(n)*N < 10000) {
+ return WalleniusNCHypTable(n, m, N, odds);}
+
+ return WalleniusNCHypRatioOfUnifoms(n, m, N, odds);
+ // the decision to use NoncentralHypergeometricInversion is
+ // taken inside WalleniusNCHypRatioOfUnifoms based
+ // on the calculated variance.
+ }
+
+
+/***********************************************************************
+ Subfunctions for WalleniusNCHyp
+***********************************************************************/
+
+int32 StochasticLib3::WalleniusNCHypUrn (int32 n, int32 m, int32 N, double odds) {
+ // sampling from Wallenius noncentral hypergeometric distribution
+ // by simulating urn model
+ int32 x; // sample
+ int32 m2; // items of color 2 in urn
+ double mw1, mw2; // total weight of balls of color 1 or 2
+ x = 0; m2 = N - m;
+ mw1 = m * odds; mw2 = m2;
+ do {
+ if (Random() * (mw1 + mw2) < mw1) {
+ x++; m--;
+ if (m == 0) break;
+ mw1 = m * odds;}
+ else {
+ m2--;
+ if (m2 == 0) {
+ x += n-1; break;}
+ mw2 = m2;}}
+ while (--n);
+ return x;}
+
+
+int32 StochasticLib3::WalleniusNCHypTable (int32 n, int32 m, int32 N, double odds) {
+ // Sampling from Wallenius noncentral hypergeometric distribution
+ // using chop-down search from a table created by recursive calculation.
+ // This method is fast when n is low or when called repeatedly with
+ // the same parameters.
+ static int32 wnc_n_last = -1, wnc_m_last = -1, wnc_N_last = -1; // previous parameters
+ static double wnc_o_last = -1;
+
+ const int TABLELENGTH = 512; // max length of table
+ static double ytable[TABLELENGTH]; // table of probability values
+ static int32 len; // length of table
+ static int32 x1; // lower x limit for table
+ int32 x2; // upper x limit for table
+ int32 x; // sample
+ double u; // uniform random number
+ int success; // table long enough
+
+ if (n != wnc_n_last || m != wnc_m_last || N != wnc_N_last || odds != wnc_o_last) {
+ // set-up: This is done only when parameters have changed
+ wnc_n_last = n; wnc_m_last = m; wnc_N_last = N; wnc_o_last = odds;
+
+ CWalleniusNCHypergeometric wnch(n,m,N,odds); // make object for calculation
+ success = wnch.MakeTable(ytable, TABLELENGTH, &x1, &x2); // make table of probability values
+ if (success) {
+ len = x2 - x1 + 1;} // table long enough. remember length
+ else {
+ len = 0;}} // remember failure
+
+ if (len == 0) {
+ // table not long enough. Use another method
+ return WalleniusNCHypRatioOfUnifoms(n,m,N,odds);}
+
+ while (1) { // repeat in the rare case of failure
+ u = Random(); // uniform variate to convert
+ for (x=0; x<len; x++) { // chop-down search
+ u -= ytable[x];
+ if (u < 0.) return x + x1;}}} // value found
+
+
+int32 StochasticLib3::WalleniusNCHypRatioOfUnifoms (int32 n, int32 m, int32 N, double odds) {
+ // sampling from Wallenius noncentral hypergeometric distribution
+ // using ratio-of-uniforms rejection method.
+ static int32 wnc_n_last = -1, wnc_m_last = -1, wnc_N_last = -1; // previous parameters
+ static double wnc_o_last = -1;
+ static int32 wnc_bound1, wnc_bound2; // lower and upper bound
+ static int32 wnc_mode; // mode
+ static double wnc_a; // hat center
+ static double wnc_h; // hat width
+ static double wnc_k; // probability value at mode
+ static int UseChopDown; // use chop down inversion instead
+ int32 xmin, xmax; // x limits
+ double mean; // mean
+ double variance; // variance
+ double x; // real sample
+ int32 xi; // integer sample
+ int32 x2; // limit when searching for mode
+ double u; // uniform random
+ double f, f2; // probability function value
+ double s123; // components 1,2,3 of hat width
+ double s4; // component 4 of hat width
+ double r1, r2; // temporaries
+ static const double rsqrt2pi = 0.3989422804014326857; // 1/sqrt(2*pi)
+
+ // Make object for calculating mean and probability.
+ CWalleniusNCHypergeometric wnch(n, m, N, odds, accuracy);
+
+ xmin = m+n-N; if (xmin < 0) xmin = 0; // calculate limits
+ xmax = n; if (xmax > m) xmax = m;
+
+ if (n != wnc_n_last || m != wnc_m_last || N != wnc_N_last || odds != wnc_o_last) {
+ // set-up: This is done only when parameters have changed
+ wnc_n_last = n; wnc_m_last = m; wnc_N_last = N; wnc_o_last = odds;
+
+ // find approximate mean
+ mean = wnch.mean();
+
+ // find approximate variance from Fisher's noncentral hypergeometric approximation
+ r1 = mean * (m-mean); r2 = (n-mean)*(mean+N-n-m);
+ variance = N*r1*r2/((N-1)*(m*r2+(N-m)*r1));
+ UseChopDown = variance < 4.; // use chop-down method if variance is low
+
+ if (!UseChopDown) {
+ // find mode (same code in CWalleniusNCHypergeometric::mode)
+ wnc_mode = (int32)(mean); f2 = 0.;
+ if (odds < 1.) {
+ if (wnc_mode < xmax) wnc_mode++;
+ x2 = xmin;
+ if (odds > 0.294 && N <= 10000000) {
+ x2 = wnc_mode - 1;} // search for mode can be limited
+ for (xi = wnc_mode; xi >= x2; xi--) {
+ f = wnch.probability(xi);
+ if (f <= f2) break;
+ wnc_mode = xi; f2 = f;}}
+ else {
+ if (wnc_mode < xmin) wnc_mode++;
+ x2 = xmax;
+ if (odds < 3.4 && N <= 10000000) {
+ x2 = wnc_mode + 1;} // search for mode can be limited
+ for (xi = wnc_mode; xi <= x2; xi++) {
+ f = wnch.probability(xi);
+ if (f <= f2) break;
+ wnc_mode = xi; f2 = f;}}
+ wnc_k = f2; // value at mode
+
+ // find approximate variance from normal distribution approximation
+ variance = rsqrt2pi / wnc_k; variance *= variance;
+
+ // find center and width of hat function
+ wnc_a = mean + 0.5;
+ s123 = 0.40 + 0.8579*sqrt(variance+0.5) + 0.4*fabs(mean-wnc_mode);
+ s4 = 0.;
+ r1 = xmax - mean - s123; r2 = mean - s123 - xmin;
+ if (r1 > r2) r1 = r2;
+ if ((odds>5. || odds<0.2) && r1>=-0.5 && r1<=8.) {
+ // s4 correction needed
+ if (r1 < 1.) r1 = 1.;
+ s4 = 0.029 * pow(double(N),0.23) / (r1*r1);}
+ wnc_h = 2. * (s123 + s4);
+
+ // find safety bounds
+ wnc_bound1 = (int32)(mean - 4. * wnc_h);
+ if (wnc_bound1 < xmin) wnc_bound1 = xmin;
+ wnc_bound2 = (int32)(mean + 4. * wnc_h);
+ if (wnc_bound2 > xmax) wnc_bound2 = xmax;}}
+
+ if (UseChopDown) { // for small variance, use chop down inversion
+ return WalleniusNCHypInversion(n,m,N,odds);}
+
+ // use ratio-of-uniforms rejection method
+ while(1) { // rejection loop
+ u = Random();
+ if (u == 0.) continue; // avoid division by 0
+ x = wnc_a + wnc_h * (Random()-0.5)/u;
+ if (x < 0. || x > 2E9) continue; // reject, avoid overflow
+ xi = (int32)(x); // truncate
+ if (xi < wnc_bound1 || xi > wnc_bound2) {
+ continue;} // reject if outside safety bounds
+ #if 0 // use rejection in x-domain
+ if (xi == wnc_mode) break; // accept
+ f = wnch.probability(xi); // function value
+ if (f > wnc_k * u * u) {
+ break;} // acceptance
+ #else // use rejection in t-domain (this is faster)
+ double hx, s2, xma2; // compute h(x)
+ s2 = wnc_h * 0.5; s2 *= s2;
+ xma2 = xi - (wnc_a-0.5);
+ xma2 *= xma2;
+ hx = (s2 >= xma2) ? 1. : s2 / xma2;
+ // rejection in t-domain implemented in CWalleniusNCHypergeometric::BernouilliH
+ if (wnch.BernouilliH(xi, hx * wnc_k * 1.01, u * u * wnc_k * 1.01, this)) {
+ break;} // acceptance
+ #endif
+ } // rejection
+ return xi;}
+
+
+int32 StochasticLib3::WalleniusNCHypInversion (int32 n, int32 m, int32 N, double odds) {
+ // sampling from Wallenius noncentral hypergeometric distribution
+ // using down-up search starting at the mean using the chop-down technique.
+ // This method is faster than the rejection method when the variance is low.
+ int32 x1, x2; // search values
+ int32 xmin, xmax; // x limits
+ double u; // uniform random number to be converted
+ double f; // probability function value
+ double accura; // absolute accuracy
+ int updown; // 1 = search down, 2 = search up, 3 = both
+
+ // Make objects for calculating mean and probability.
+ // It is more efficient to have two identical objects, one for down search
+ // and one for up search, because they are obtimized for consecutive x values.
+ CWalleniusNCHypergeometric wnch1(n, m, N, odds, accuracy);
+ CWalleniusNCHypergeometric wnch2(n, m, N, odds, accuracy);
+
+ accura = accuracy * 0.01;
+ if (accura > 1E-7) accura = 1E-7; // absolute accuracy
+
+ x1 = (int32)(wnch1.mean()); // start at floor and ceiling of mean
+ x2 = x1 + 1;
+ xmin = m+n-N; if (xmin<0) xmin = 0; // calculate limits
+ xmax = n; if (xmax>m) xmax = m;
+ updown = 3; // start searching both up and down
+
+ while(1) { // loop until accepted (normally executes only once)
+ u = Random(); // uniform random number to be converted
+ while (updown) { // search loop
+ if (updown & 1) { // search down
+ if (x1 < xmin) {
+ updown &= ~1;} // stop searching down
+ else {
+ f = wnch1.probability(x1);
+ u -= f; // subtract probability until 0
+ if (u <= 0.) return x1;
+ x1--;
+ if (f < accura) updown &= ~1;}} // stop searching down
+ if (updown & 2) { // search up
+ if (x2 > xmax) {
+ updown &= ~2;} // stop searching up
+ else {
+ f = wnch2.probability(x2);
+ u -= f; // subtract probability until 0
+ if (u <= 0.) return x2;
+ x2++;
+ if (f < accura) updown &= ~2;}}}}} // stop searching down
+
+
+/***********************************************************************
+ Multivariate Wallenius noncentral hypergeometric distribution
+***********************************************************************/
+
+void StochasticLib3::MultiWalleniusNCHyp (int32 * destination,
+int32 * source, double * weights, int32 n, int colors) {
+/*
+ This function generates a vector of random variables with the
+ multivariate Wallenius noncentral hypergeometric distribution.
+
+ The multivariate Wallenius noncentral hypergeometric distribution is
+ the distribution you get when drawing colored balls from an urn
+ with any number of colors, without replacement, and with bias.
+
+ The weights are defined so that the probability of taking a particular
+ ball is proportional to its weight.
+
+ Parameters:
+ destination: An output array to receive the number of balls of each
+ color. Must have space for at least 'colors' elements.
+ source: An input array containing the number of balls of each
+ color in the urn. Must have 'colors' elements.
+ All elements must be non-negative.
+ weights: The odds of each color. Must have 'colors' elements.
+ All elements must be non-negative.
+ n: The number of balls to draw from the urn.
+ Cannot exceed the total number of balls with nonzero weight
+ in source.
+ colors: The number of possible colors.
+
+ MAXCOLORS (defined in stocc.h): You may adjust MAXCOLORS to the maximum
+ number of colors you need.
+
+ The function will reduce the number of colors, if possible, by eliminating
+ colors with zero weight or zero number and pooling together colors with the
+ same weight. The problem thus reduced is handled in the arrays osource,
+ urn, oweights and osample of size colors2.
+
+ The sampling proceeds by either of two methods: simulating urn experiment,
+ or conditional method followed by Metropolis-Hastings sampling.
+
+ Simulating the urn experiment is simply taking one ball at a time, requiring
+ n uniform random variates. The problem is reduced whenever a color has been
+ exhausted.
+
+ The conditional method divides the colors into groups where the number of
+ balls in each group is determined by sampling from the marginal distribution
+ which is approximated by the univariate Wallenius distribution. Each group
+ is then subdivided by sampling one color at a time until all colors have
+ been sampled.
+
+ The sample from the conditional method does not have the exact distribution,
+ but it is used as a starting point for the Metropolis-Hastings sampling,
+ which proceeds as follows: colors c1 and c2 are re-sampled using the
+ univariate Wallenius distribution, keeping the samples of all other colors
+ constant. The new sample is accepted or the old sample retained, according
+ to the Metropolis formula which corrects for the slight error introduced
+ by not using the true conditional distribution. c1 and c2 are rotated in
+ an order determined by the variance of each color. This rotation (scan) is
+ repeated nHastings times.
+*/
+
+ // variables
+ int order1[MAXCOLORS]; // sort order, index into source and destination
+ int order2[MAXCOLORS]; // corresponding index into arrays when equal weights pooled together
+ int order3[MAXCOLORS]; // secondary index for sorting by variance
+ int32 osource[MAXCOLORS]; // contents of source, sorted by weight with equal weights pooled together
+ int32 urn[MAXCOLORS]; // balls from osource not taken yet
+ int32 osample[MAXCOLORS]; // balls sampled
+ double oweights[MAXCOLORS]; // sorted list of weights
+ double wcum[MAXCOLORS]; // list of accumulated probabilities
+ double var[MAXCOLORS]; // sorted list of variance
+ double w = 0.; // weight of balls of one color
+ double w1, w2; // odds within group; mean weight in group
+ double wsum; // total weight of all balls of several or all colors
+ double p; // probability
+ double f0, f1; // multivariate probability function
+ double g0, g1; // conditional probability function
+ double r1, r2; // temporaries in calculation of variance
+ int32 nn; // number of balls left to sample
+ int32 m; // number of balls of one color
+ int32 msum; // total number of balls of several or all colors
+ int32 N; // total number of balls with nonzero weight
+ int32 x0, x = 0; // sample of one color
+ int32 n1, n2, ng; // size of weight group sample or partial sample
+ int32 m1, m2; // size of weight group
+ int i, j, k; // loop counters
+ int c, c1, c2; // color index
+ int colors2; // reduced number of colors
+ int a, b; // color index delimiting weight group
+ int nHastings; // number of scans in Metropolis-Hastings sampling
+
+ // check validity of parameters
+ if (n < 0 || colors < 0 || colors > MAXCOLORS) FatalError("Parameter out of range in function MultiWalleniusNCHyp");
+ if (colors == 0) return;
+ if (n == 0) {
+ for (i=0; i<colors; i++) destination[i] = 0; return;}
+
+ // check validity of array parameters
+ for (i=0, msum=0; i < colors; i++) {
+ m = source[i]; w = weights[i];
+ if (m < 0 || w < 0) FatalError("Parameter negative in function MultiWalleniusNCHyp");
+ if (w) msum += m;}
+ N = msum;
+
+ // sort colors by weight, heaviest first
+ for (i=0; i < colors; i++) order1[i] = order3[i] = i;
+ for (i=0; i < colors-1; i++) {
+ c = order1[i]; k = i;
+ w = weights[c];
+ if (source[c]==0) w = 0; // zero number treated as zero weight
+ for (j=i+1; j < colors; j++) {
+ c2 = order1[j];
+ if (weights[c2] > w && source[c2]) {
+ w = weights[c2]; k = j;}}
+ order1[i] = order1[k]; order1[k] = c;}
+
+ // skip any colors with zero weight or zero number.
+ // this solves all problems with zero weights
+ while (colors && (weights[c=order1[colors-1]]==0 || source[c]==0)) {
+ colors--; destination[c] = 0;}
+
+ // check if there are more than n balls with nonzero weight
+ if (n >= N) {
+ if (n > N) FatalError("Taking more items than there are in function MultiWalleniusNCHyp");
+ for (i = 0; i < colors; i++) {c = order1[i]; destination[c] = source[c];}
+ return;}
+
+ // copy source and weights into ordered lists
+ // and pool together colors with same weight
+ for (i=0, c2=-1; i < colors; i++) {
+ c = order1[i];
+ if (i==0 || weights[c] != w) {
+ c2++;
+ x = source[c];
+ oweights[c2] = w = weights[c];}
+ else {
+ x += source[c];} // join colors with same weight
+ urn[c2] = osource[c2] = x;
+ order2[i] = c2;
+ osample[c2] = 0;}
+ colors2 = c2 + 1;
+
+ // check number of colors left
+ if (colors2 < 3) {
+ // simple cases
+ if (colors2 == 1) osample[0] = n;
+ if (colors2 == 2) {
+ x = WalleniusNCHyp(n, osource[0], N, oweights[0]/oweights[1]);
+ osample[0] = x; osample[1] = n - x;}}
+ else {
+
+ // more than 2 colors
+ nn = n;
+
+ // decide which method to use
+ if (nn < 5000 * colors2) {
+
+ // Simulate urn experiment
+
+ // Make list of accumulated probabilities of each color
+ for (i=0, wsum=0; i < colors2; i++) {
+ wsum += urn[i] * oweights[i];
+ wcum[i] = wsum;}
+
+ // take one item nn times
+ j = colors2-1;
+ do {
+
+ // get random color according to probability distribution wcum
+ p = Random() * wcum[colors2-1];
+ // get color from search in probability distribution wcum
+ for (i=0; i < j; i++) {
+ if (p < wcum[i]) break;}
+
+ // sample one ball of color i
+ osample[i]++; urn[i]--; nn--;
+
+ // check if this color has been exhausted
+ if (urn[i] == 0) {
+ if (i != j) {
+ // put exhausted color at the end of lists so that colors2 can be reduced
+ m = osource[i]; osource[i] = osource[j]; osource[j] = m;
+ m = urn[i]; urn[i] = urn[j]; urn[j] = m;
+ m = osample[i]; osample[i] = osample[j]; osample[j] = m;
+ w = oweights[i]; oweights[i] = oweights[j]; oweights[j] = w;
+ // update order2 list (no longer sorted by weight)
+ for (k=0; k<colors; k++) {
+ if (order2[k] == i) order2[k] = j; else
+ if (order2[k] == j) order2[k] = i;}}
+ colors2--; j = colors2-1; // decrement number of colors left in urn
+
+ if (colors2 == 2 && nn > 50) {
+ // two colors left. use univariate distribution for the rest
+ x = WalleniusNCHyp(nn, urn[0], urn[0]+urn[1], oweights[0]/oweights[1]);
+ osample[0] += x;
+ osample[1] += nn - x;
+ break;}
+
+ if (colors2 == 1) {
+ // only one color left. The rest is deterministic
+ osample[0] += nn;
+ break;}
+
+ // make sure wcum is re-calculated from beginning
+ i = 0;}
+
+ // update list of accumulated probabilities
+ wsum = i > 0 ? wcum[i-1] : 0.;
+ for (k=i; k<colors2; k++) {
+ wsum += urn[k] * oweights[k];
+ wcum[k] = wsum;}}
+
+ while (nn);}
+
+ else {
+ // use conditional method to make starting point for
+ // Metropolis-Hastings sampling
+
+ // divide weights into two groups, heavy and light
+ a = 0; b = colors2-1;
+ w = sqrt(oweights[0] * oweights[colors2-1]);
+ do {
+ c = (a + b) / 2;
+ if (oweights[c] > w) a = c; else b = c;}
+ while (b > a + 1);
+ // heavy group goes from 0 to b-1, light group goes from b to colors2-1
+
+ // calculate mean weight for heavy color group
+ for (i=0, m1=0, wsum=0; i < b; i++) {
+ m1 += urn[i]; wsum += oweights[i] * urn[i];}
+ w1 = wsum / m1;
+
+ // calculate mean weight for light color group
+ for (i=b, m2=0, wsum=0; i < colors2; i++) {
+ m2 += urn[i]; wsum += oweights[i] * urn[i];}
+ w2 = wsum / m2;
+
+ // split partial sample n into heavy (n1) and light (n2)
+ n1 = WalleniusNCHyp(n, m1, m1+m2, w1/w2);
+ n2 = n - n1;
+
+ // set parameters for first group (heavy)
+ a = 0; ng = n1;
+
+ // loop twice, for the two groops
+ for (k=0; k < 2; k++) {
+
+ // split group into single colors by calling univariate distribution b-a-1 times
+ for (i = a; i < b-1; i++) {
+ m = urn[i]; w = oweights[i];
+
+ // calculate mean weight of remaining colors
+ for (j=i+1, msum=0, wsum=0; j < b; j++) {
+ m1 = urn[j]; w1 = oweights[j];
+ msum += m1; wsum += m1 * w1;}
+
+ // sample color i in group
+ x = wsum ? WalleniusNCHyp(ng, m, msum + m, w * msum / wsum) : ng;
+
+ osample[i] = x;
+ ng -= x;}
+
+ // get the last one in the group
+ osample[i] = ng;
+
+ // set parameters for second group (light)
+ a = b; b = colors2; ng = n2;}
+
+ // finished with conditional method.
+ // osample contains starting point for Metropolis-Hastings sampling
+
+ // make object for calculating probabilities and mean
+ CMultiWalleniusNCHypergeometric wmnc(n, osource, oweights, colors2);
+
+ wmnc.mean(var); // calculate mean
+ // calculate approximate variance from mean
+ for (i=0; i<colors; i++) {
+ r1 = var[i] * (osource[i]-var[i]);
+ r2 = (n-var[i])*(var[i]+N-n-osource[i]);
+ if (r1 <= 0. || r2 <= 0.) {
+ var[i] = 0.;}
+ else {
+ var[i] = N*r1*r2/((N-1)*(osource[i]*r2+(N-osource[i])*r1));}}
+
+ // sort again, this time by variance
+ for (i=0; i < colors2-1; i++) {
+ c = order3[i]; k = i;
+ w = var[c];
+ for (j=i+1; j < colors2; j++) {
+ c2 = order3[j];
+ if (var[c2] > w) {
+ w = var[c2]; k = j;}}
+ order3[i] = order3[k]; order3[k] = c;}
+
+ // number of scans (this value of nHastings has not been fine-tuned)
+ nHastings = 4;
+ if (accuracy < 1E-6) nHastings = 6;
+ if (colors2 > 5) nHastings++;
+
+ // Metropolis-Hastings sampler
+ f0 = -1.;
+ for (k = 0; k < nHastings; k++) {
+ for (i = 0; i < colors2; i++) {
+ j = i+1;
+ if (j >= colors2) j = 0;
+ c1 = order3[i]; c2 = order3[j];
+ w = oweights[c1] / oweights[c2];
+ n1 = osample[c1] + osample[c2];
+ x0 = osample[c1];
+ x = WalleniusNCHyp(n1, osource[c1], osource[c1]+osource[c2], w);
+ if (x == x0) continue; // accepted
+ if (f0 < 0.) f0 = wmnc.probability(osample);
+ CWalleniusNCHypergeometric nc(n1, osource[c1], osource[c1]+osource[c2], w, accuracy);
+ g0 = nc.probability(x0);
+ g1 = nc.probability(x);
+ osample[c1] = x;
+ osample[c2] = n1 - x;
+ f1 = wmnc.probability(osample);
+ g0 = f1 * g0; g1 = f0 * g1;
+ if (g0 >= g1 || g0 > g1 * Random()) {
+ // new state accepted
+ f0 = -1.;}
+ else {
+ // rejected. restore old sample
+ osample[c1] = x0;
+ osample[c2] = n1 - x0;}}}}}
+
+ // finished sampling by either method
+ // un-sort sample into destination and untangle re-orderings
+ for (i=0; i < colors; i++) {
+ c1 = order1[i]; c2 = order2[i];
+ if (source[c1] == osource[c2]) {
+ destination[c1] = osample[c2];}
+ else {
+ // split colors with same weight that have been treated as one
+ x = Hypergeometric(osample[c2], source[c1], osource[c2]);
+ destination[c1] = x;
+ osample[c2] -= x;
+ osource[c2] -= source[c1];}}}
+
+
+/******************************************************************************
+ Multivariate complementary Wallenius noncentral hypergeometric distribution
+******************************************************************************/
+
+void StochasticLib3::MultiComplWalleniusNCHyp (
+int32 * destination, int32 * source, double * weights, int32 n, int colors) {
+ // This function generates a vector of random variables with the multivariate
+ // complementary Wallenius noncentral hypergeometric distribution.
+ // See MultiWalleniusNCHyp for details.
+ double rweights[MAXCOLORS]; // reciprocal weights
+ int32 sample[MAXCOLORS]; // balls sampled
+ double w; // weight
+ int32 N; // total number of balls
+ int i; // color index
+
+ // make reciprocal weights and calculate N
+ for (i=0, N=0; i<colors; i++) {
+ w = weights[i];
+ if (w == 0) FatalError("Zero weight in function MultiComplWalleniusNCHyp");
+ rweights[i] = 1. / w;
+ N += source[i];}
+
+ // use multivariate Wallenius noncentral hypergeometric distribution
+ MultiWalleniusNCHyp(sample, source, rweights, N - n, colors);
+
+ // complementary distribution = balls not taken
+ for (i=0; i<colors; i++) {
+ destination[i] = source[i] - sample[i];}}
+
+
+/******************************************************************************
+ Fisher's noncentral hypergeometric distribution
+******************************************************************************/
+int32 StochasticLib3::FishersNCHyp (int32 n, int32 m, int32 N, double odds) {
+/*
+ This function generates a random variate with Fisher's noncentral
+ hypergeometric distribution.
+
+ This distribution resembles Wallenius noncentral hypergeometric distribution
+ and the two distributions are sometimes confused. A more detailed
+ explanation of this distribution is given below under the multivariate
+ Fisher's noncentral hypergeometric distribution (MultiFishersNCHyp).
+ For further documentation see nchyp.pdf, awailable from www.agner.org/random
+
+ This function uses inversion by chop-down search from zero when parameters
+ are small, and the ratio-of-uniforms rejection method when the former
+ method would be too slow or would give overflow.
+*/
+ int32 fak, addd; // used for undoing transformations
+ int32 x; // result
+
+ // check if parameters are valid
+ if (n > N || m > N || n < 0 || m < 0 || odds <= 0.) {
+ if (odds == 0.) {
+ if (n > N-m) FatalError("Not enough items with nonzero weight in function FishersNCHyp");
+ return 0;}
+ FatalError("Parameter out of range in function FishersNCHyp");}
+
+ if (odds == 1.) {
+ // use hypergeometric function if odds == 1
+ return Hypergeometric(n, m, N);}
+
+ // symmetry transformations
+ fak = 1; addd = 0;
+ if (m > N/2) {
+ // invert m
+ m = N - m;
+ fak = -1; addd = n;}
+
+ if (n > N/2) {
+ // invert n
+ n = N - n;
+ addd += fak * m; fak = - fak;}
+
+ if (n > m) {
+ // swap n and m
+ x = n; n = m; m = x;}
+
+ // cases with only one possible result end here
+ if (n == 0 || odds == 0.) return addd;
+
+ if (fak == -1) {
+ // reciprocal odds if inverting
+ odds = 1. / odds;}
+
+ // choose method
+ if (n < 30 && N < 1024 && odds > 1.E-5 && odds < 1.E5) {
+ // use inversion by chop down method
+ x = FishersNCHypInversion (n, m, N, odds);}
+
+ else {
+ // use ratio-of-uniforms method
+ x = FishersNCHypRatioOfUnifoms (n, m, N, odds);}
+
+ // undo symmetry transformations
+ return x * fak + addd;}
+
+
+/***********************************************************************
+ Subfunctions used by FishersNCHyp
+***********************************************************************/
+
+int32 StochasticLib3::FishersNCHypInversion
+(int32 n, int32 m, int32 N, double odds) {
+/*
+ Subfunction for FishersNCHyp distribution.
+ Implements Fisher's noncentral hypergeometric distribution by inversion
+ method, using chop-down search starting at zero.
+
+ Valid only for 0 <= n <= m <= N/2.
+ Without overflow check the parameters must be limited to n < 30, N < 1024,
+ and 1.E-5 < odds < 1.E5. This limitation is acceptable because this method
+ is slow for higher n.
+
+ The execution time of this function grows with n.
+
+ See the file nchyp.pdf for theoretical explanation.
+*/
+ static int32 fnc_n_last = -1, fnc_m_last = -1, fnc_N_last = -1;
+ static double fnc_o_last = -1, fnc_f0, fnc_scale;
+
+ int32 x; // x value
+ int32 L; // derived parameter
+ double f; // scaled function value
+ double sum; // scaled sum of function values
+ double a1, a2, b1, b2, f1, f2; // factors in recursive calculation
+ double u; // uniform random variate
+
+ L = N-m-n;
+
+ if (n != fnc_n_last || m != fnc_m_last || N != fnc_N_last || odds != fnc_o_last) {
+ // parameters have changed. set-up
+ fnc_n_last = n; fnc_m_last = m; fnc_N_last = N; fnc_o_last = odds;
+
+ // f(0) is set to an arbitrary value because it cancels out.
+ // A low value is chosen to avoid overflow.
+ fnc_f0 = 1.E-100;
+
+ // calculate summation of e(x), using the formula:
+ // f(x) = f(x-1) * (m-x+1)*(n-x+1)*odds / (x*(L+x))
+ // All divisions are avoided by scaling the parameters
+ sum = f = fnc_f0; fnc_scale = 1.;
+ a1 = m; a2 = n; b1 = 1; b2 = L + 1;
+ for (x = 1; x <= n; x++) {
+ f1 = a1 * a2 * odds;
+ f2 = b1 * b2;
+ a1--; a2--; b1++; b2++;
+ f *= f1;
+ sum *= f2;
+ fnc_scale *= f2;
+ sum += f;
+ // overflow check. not needed if parameters are limited:
+ // if (sum > 1E100) {sum *= 1E-100; f *= 1E-100; fnc_scale *= 1E-100;}
+ }
+ fnc_f0 *= fnc_scale;
+ fnc_scale = sum;
+ // now f(0) = fnc_f0 / fnc_scale.
+ // We are still avoiding all divisions by saving the scale factor
+ }
+
+ // uniform random
+ u = Random() * fnc_scale;
+
+ // recursive calculation:
+ // f(x) = f(x-1) * (m-x+1)*(n-x+1)*odds / (x*(L+x))
+ f = fnc_f0; x = 0; a1 = m; a2 = n; b1 = 0; b2 = L;
+ do {
+ u -= f;
+ if (u <= 0) break;
+ x++; b1++; b2++;
+ f *= a1 * a2 * odds;
+ u *= b1 * b2;
+ // overflow check. not needed if parameters are limited:
+ // if (u > 1.E100) {u *= 1E-100; f *= 1E-100;}
+ a1--; a2--;}
+ while (x < n);
+ return x;}
+
+
+int32 StochasticLib3::FishersNCHypRatioOfUnifoms
+(int32 n, int32 m, int32 N, double odds) {
+/*
+ Subfunction for FishersNCHyp distribution.
+ Valid for 0 <= n <= m <= N/2, odds != 1
+
+ Fisher's noncentral hypergeometric distribution by ratio-of-uniforms
+ rejection method.
+
+ The execution time of this function is almost independent of the parameters.
+*/
+ static int32 fnc_n_last = -1, fnc_m_last = -1, fnc_N_last = -1; // previous parameters
+ static double fnc_o_last = -1;
+ static int32 fnc_bound; // upper bound
+ static double fnc_a; // hat center
+ static double fnc_h; // hat width
+ static double fnc_lfm; // ln(f(mode))
+ static double fnc_logb; // ln(odds)
+ int32 L; // N-m-n
+ int32 mode; // mode
+ double mean; // mean
+ double variance; // variance
+ double x; // real sample
+ int32 k; // integer sample
+ double u; // uniform random
+ double lf; // ln(f(x))
+ double AA, BB, g1, g2; // temporary
+
+ L = N-m-n;
+
+ if (n != fnc_n_last || m != fnc_m_last || N != fnc_N_last || odds != fnc_o_last) {
+ // parameters have changed. set-up
+ fnc_n_last = n; fnc_m_last = m; fnc_N_last = N; fnc_o_last = odds;
+
+ // find approximate mean
+ AA = (m+n)*odds+L; BB = sqrt(AA*AA - 4*odds*(odds-1)*m*n);
+ mean = (AA-BB)/(2*(odds-1));
+
+ // find approximate variance
+ AA = mean * (m-mean); BB = (n-mean)*(mean+L);
+ variance = N*AA*BB/((N-1)*(m*BB+(n+L)*AA));
+
+ // compute log(odds)
+ fnc_logb = log(odds);
+
+ // find center and width of hat function
+ fnc_a = mean + 0.5;
+ fnc_h = 1.028 + 1.717*sqrt(variance+0.5) + 0.032*fabs(fnc_logb);
+
+ // find safety bound
+ fnc_bound = (int32)(mean + 4.0 * fnc_h);
+ if (fnc_bound > n) fnc_bound = n;
+
+ // find mode
+ mode = (int32)(mean);
+ g1 =(double)(m-mode)*(n-mode)*odds;
+ g2 =(double)(mode+1)*(L+mode+1);
+ if (g1 > g2 && mode < n) mode++;
+
+ // value at mode to scale with:
+ fnc_lfm = mode * fnc_logb - fc_lnpk(mode, L, m, n);}
+
+ while(1) {
+ u = Random();
+ if (u == 0) continue; // avoid divide by 0
+ x = fnc_a + fnc_h * (Random()-0.5)/u;
+ if (x < 0. || x > 2E9) continue; // reject, avoid overflow
+ k = (int32)(x); // truncate
+ if (k > fnc_bound) continue; // reject if outside safety bound
+ lf = k*fnc_logb - fc_lnpk(k,L,m,n) - fnc_lfm; // compute function value
+ if (u * (4.0 - u) - 3.0 <= lf) break; // lower squeeze accept
+ if (u * (u-lf) > 1.0) continue; // upper squeeze reject
+ if (2.0 * log(u) <= lf) break;} // final acceptance
+
+ return k;}
+
+
+/***********************************************************************
+ Multivariate Fisher's noncentral hypergeometric distribution
+***********************************************************************/
+void StochasticLib3::MultiFishersNCHyp (int32 * destination,
+int32 * source, double * weights, int32 n, int colors) {
+/*
+ This function generates a vector of random variates with the
+ multivariate Fisher's noncentral hypergeometric distribution.
+
+ This distribution is defined as the conditional distribution of 'colors'
+ independent binomial variates
+ x[i] = binomial(source[i], p[i])
+ on the condition that the sum of all x[i] is n.
+ p[i] = r * weights[i] / (1 + r * weights[i]),
+ r is an arbitrary scale factor.
+
+ Parameters:
+ destination: An output array to receive the number of balls of each
+ color. Must have space for at least 'colors' elements.
+ source: An input array containing the number of balls of each
+ color in the urn. Must have 'colors' elements.
+ All elements must be non-negative.
+ weights: The odds of each color. Must have 'colors' elements.
+ All elements must be non-negative.
+ n: The number of balls drawn from the urn.
+ Can't exceed the total number of balls with nonzero weight
+ in the urn.
+ colors: The number of possible colors.
+
+ Method: The conditional method is used for generating a sample with the
+ approximate distribution. This sample is used as a starting point for
+ a Gibbs sampler. The accuracy depends on the number of scans with the
+ Gibbs sampler.
+
+ The function will reduce the number of colors, if possible, by eliminating
+ colors with zero weight or zero number and pooling together colors with the
+ same weight. A symmetry transformation is used if more than half the balls
+ are taken. The problem thus reduced is handled in the arrays osource,
+ oweights and osample of dimension colors2.
+*/
+ int order1[MAXCOLORS]; // sort order, index into source and destination
+ int order2[MAXCOLORS]; // corresponding index into osource when equal weights pooled together
+ int order3[MAXCOLORS]; // secondary index for sorting by variance
+ int32 osource[MAXCOLORS]; // contents of source, sorted by weight with equal weights pooled together
+ int32 osample[MAXCOLORS]; // balls sampled, sorted by weight
+ double oweights[MAXCOLORS]; // sorted list of weights
+ double var[MAXCOLORS]; // sorted list of variance
+ int32 x = 0; // univariate sample
+ int32 m; // number of items of one color
+ int32 m1, m2; // number of items in each weight group
+ int32 msum; // total number of items of several or all colors
+ int32 n0; // remaining balls to sample
+ int32 n1, n2; // sample size for each weight group
+ double w = 0.; // weight or variance of items of one color
+ double w1, w2; // mean weight of each weight group
+ double wsum; // total weight of all items of several or all colors
+ double odds; // weight ratio
+ int i, j, k; // loop counters
+ int a, b; // limits for weight group
+ int c, c1, c2; // color index
+ int colors2; // reduced number of colors, number of entries in osource
+ int ngibbs; // number of scans in Gibbs sampler
+ int invert = 0; // 1 if symmetry transformation used
+
+ // check validity of parameters
+ if (n < 0 || colors < 0 || colors > MAXCOLORS) FatalError("Parameter out of range in function MultiFishersNCHyp");
+ if (colors == 0) return;
+ if (n == 0) {for (i=0; i<colors; i++) destination[i] = 0; return;}
+
+ // check validity of array parameters
+ for (i=0, msum=0; i < colors; i++) {
+ m = source[i]; w = weights[i];
+ if (m < 0 || w < 0) FatalError("Parameter negative in function MultiFishersNCHyp");
+ if (w) msum += m;}
+
+ // sort by weight, heaviest first
+ for (i=0; i < colors; i++) order1[i] = order3[i] = i;
+ for (i=0; i < colors-1; i++) {
+ c = order1[i]; k = i;
+ w = weights[c]; if (source[c]==0) w = 0;
+ for (j=i+1; j < colors; j++) {
+ c2 = order1[j];
+ if (weights[c2] > w && source[c2]) {
+ w = weights[c2]; k = j;}}
+ order1[i] = order1[k]; order1[k] = c;}
+
+ // Skip any items with zero weight
+ // this solves all problems with zero weights
+ while (colors && (weights[c=order1[colors-1]]==0 || source[c]==0)) {
+ colors--; destination[c] = 0;}
+
+ // check if we are taking all, or too many, balls
+ if (n >= msum) {
+ if (n > msum) FatalError("Taking more items than there are in function MultiFishersNCHyp");
+ for (i = 0; i < colors; i++) {c = order1[i]; destination[c] = source[c];}
+ return;}
+
+ if (n > msum / 2) {
+ // improve accuracy by symmetry transformation
+ for (i=0, j=colors-1; i < j; i++, j--) { // reverse order list
+ c = order1[i]; order1[i] = order1[j]; order1[j] = c;}
+ n = msum - n; invert = 1;}
+
+ // copy source and weights into ordered lists and pool together colors with same weight
+ for (i=0, c2=-1; i < colors; i++) {
+ c = order1[i];
+ if (i==0 || weights[c] != w) {
+ c2++;
+ x = source[c];
+ oweights[c2] = w = invert ? 1./weights[c] : weights[c];}
+ else {
+ x += source[c];}
+ osource[c2] = x;
+ order2[i] = c2;
+ osample[c2] = 0;}
+ colors2 = c2 + 1;
+
+ // simple cases
+ if (colors2 == 1) osample[0] = n;
+ if (colors2 == 2) {
+ x = FishersNCHyp(n, osource[0], msum, oweights[0]/oweights[1]);
+ osample[0] = x; osample[1] = n - x;}
+
+ if (colors2 > 2) {
+
+ // divide weights into two groups, heavy and light
+ a = 0; b = colors2-1;
+ w = sqrt(oweights[0] * oweights[colors2-1]);
+ do {
+ c = (a + b) / 2;
+ if (oweights[c] > w) a = c; else b = c;}
+ while (b > a + 1);
+ a = 0; // heavy group goes from a to b-1, light group goes from b to colors2-1
+
+ // calculate mean weight for heavy group
+ for (i=a, m1=0, wsum=0; i < b; i++) {
+ m1 += osource[i]; wsum += oweights[i] * osource[i];}
+ w1 = wsum / m1;
+
+ // calculate mean weight for light group
+ for (i=b, m2=0, wsum=0; i < colors2; i++) {
+ m2 += osource[i]; wsum += oweights[i] * osource[i];}
+ w2 = wsum / m2;
+
+ // split sample n into heavy (n1) and light (n2) groups
+ n1 = FishersNCHyp(n, m1, m1+m2, w1/w2);
+ n2 = n - n1;
+ n0 = n1;
+
+ // loop twice, for the two groops
+ for (k=0; k < 2; k++) {
+
+ // split group into single colors by calling FishersNCHyp b-a-1 times
+ for (i = a; i < b-1; i++) {
+ m = osource[i]; w = oweights[i];
+
+ // calculate mean weight of remaining colors
+ for (j=i+1, msum=0, wsum=0; j < b; j++) {
+ m1 = osource[j]; w1 = oweights[j];
+ msum += m1; wsum += m1 * w1;}
+
+ // split out color i
+ if (w == w1) {
+ x = Hypergeometric(n0, m, msum + m);}
+ else {
+ if (wsum == 0) {
+ x = n0;}
+ else {
+ odds = w * msum / wsum;
+ x = FishersNCHyp(n0, m, msum + m, odds);}}
+ osample[i] += x;
+ n0 -= x;}
+
+ // get the last color in the group
+ osample[i] += n0;
+
+ // set parameters for second group
+ a = b; b = colors2; n0 = n2;}
+
+ // calculate variance
+ CMultiFishersNCHypergeometric(n, osource, oweights, colors2).variance(var);
+
+ // sort again, this time by variance
+ for (i=0; i < colors2-1; i++) {
+ c = order3[i]; k = i;
+ w = var[c];
+ for (j=i+1; j < colors2; j++) {
+ c2 = order3[j];
+ if (var[c2] > w) {
+ w = var[c2]; k = j;}}
+ order3[i] = order3[k]; order3[k] = c;}
+
+ // determine number of scans (not fine-tuned):
+ ngibbs = 4; if (accuracy < 1E-6) ngibbs = 6; if (colors2 > 5) ngibbs++;
+
+ // Gibbs sampler
+ for (k = 0; k < ngibbs; k++) {
+ for (i = 0; i < colors2; i++) {
+ c1 = order3[i];
+ j = i + 1; if (j == colors2) j = 0;
+ c2 = order3[j];
+ n1 = osample[c1] + osample[c2];
+ x = FishersNCHyp(n1, osource[c1], osource[c1]+osource[c2], oweights[c1]/oweights[c2]);
+ osample[c1] = x;
+ osample[c2] = n1 - x;}}}
+
+ if (invert) {
+ // reverse symmetry transformation on result
+ for (i=0; i < colors2; i++) {
+ osample[i] = osource[i] - osample[i];}}
+
+ // un-sort sample into destination
+ for (i=0; i < colors; i++) {
+ c1 = order1[i]; c2 = order2[i];
+ if (source[c1] == osource[c2]) {
+ destination[c1] = osample[c2];}
+ else {
+ x = Hypergeometric(osample[c2], source[c1], osource[c2]);
+ destination[c1] = x;
+ osample[c2] -= x;
+ osource[c2] -= source[c1];}}}
diff --git a/src/stocR.cpp b/src/stocR.cpp
new file mode 100644
index 0000000..39a2530
--- /dev/null
+++ b/src/stocR.cpp
@@ -0,0 +1,25 @@
+/*************************** stocR.cpp **********************************
+* Author: Agner Fog
+* Date created: 2006
+* Last modified: 2011-08-05
+* Project: BiasedUrn
+* Source URL: www.agner.org/random
+*
+* Description:
+* Interface of non-uniform random number generators to R-language implementation.
+* This file contains source code for the class StocRBase defined in stocR.h.
+*
+* Copyright 2006-2011 by Agner Fog.
+* GNU General Public License http://www.gnu.org/licenses/gpl.html
+*****************************************************************************/
+
+#include "stocc.h" // class definition
+
+/***********************************************************************
+Fatal error exit (Replaces userintf.cpp)
+***********************************************************************/
+
+void FatalError(const char * ErrorText) {
+ // This function outputs an error message and aborts the program.
+ error("%s", ErrorText); // Error exit in R.DLL
+}
diff --git a/src/stocR.h b/src/stocR.h
new file mode 100644
index 0000000..75c6f56
--- /dev/null
+++ b/src/stocR.h
@@ -0,0 +1,96 @@
+/**************************** STOCR.H ************************ 2006-10-21 AF *
+*
+* This file defines additions to the C++ library of non-uniform random number
+* generators for the R-language interface.
+*
+*
+* class StocRBase
+* ===============
+* This class replaces the base classes for class StochasticLib3 when used for
+* the R-language interface.
+* Member functions:
+*
+* double Normal(double m, double s);
+* Normal distribution with mean m and standard deviation s.
+*
+* int32 Hypergeometric (int32 n, int32 m, int32 N);
+* Hypergeometric distribution. Taking n items out N, m of which are colored.
+*
+*
+*
+* source code:
+* ============
+* The code for EndOfProgram and FatalError is found in the file userintf.cpp.
+* The code for the functions in StochasticLib1 is found in the file stoc1.cpp.
+* The code for the functions in StochasticLib2 is found in the file stoc2.cpp.
+* The code for the functions in StochasticLib3 is found in the file stoc3.cpp.
+* The code for the functions in CWalleniusNCHypergeometric,
+* CMultiWalleniusNCHypergeometric and CMultiWalleniusNCHypergeometricMoments
+* is found in the file wnchyppr.cpp.
+* The code for the functions in CFishersNCHypergeometric and
+* CMultiFishersNCHypergeometric is found in the file fnchyppr.cpp
+* LnFac is found in stoc1.cpp.
+* Erf is found in wnchyppr.cpp.
+*
+*
+* Examples:
+* =========
+*
+* Documentation:
+* ==============
+* The file stocc.htm contains further instructions.
+*
+* The file distrib.pdf contains definitions of the standard statistic distributions:
+* Bernoulli, Normal, Poisson, Binomial, Hypergeometric, Multinomial, MultiHypergeometric.
+*
+* The file sampmet.pdf contains theoretical descriptions of the methods used
+* for sampling from these distributions.
+*
+* The file nchyp.pdf, available from www.agner.org/random/, contains
+* definitions of the univariate and multivariate Wallenius and Fisher's
+* noncentral hypergeometric distributions and theoretical explanations of
+* the methods for calculating and sampling from these.
+*
+* � 2006 Agner Fog. GNU General Public License www.gnu.org/copyleft/gpl.html
+*******************************************************************************/
+
+#ifndef STOC_R_H
+#define STOC_R_H
+
+#include <R.h>
+#include <Rinternals.h>
+
+// Declaration specification for exported functions
+#if defined(_WIN32) || defined(__WINDOWS__)
+ #define REXPORTS extern "C" __declspec(dllexport)
+#else
+ #define REXPORTS extern "C"
+#endif
+
+
+/***********************************************************************
+ Class StochasticLib1
+***********************************************************************/
+
+class StocRBase {
+ // This class is used as base class for the random variate generating
+ // classes when used for the R-language interface
+ // Encapsulates the random number generator in R.DLL.
+public:
+ StocRBase(int32 seed) {} // Constructor
+ static void InitRan() { // Call this before first random number
+ GetRNGstate();} // From R.DLL
+ static void EndRan() { // Call this after last random number
+ PutRNGstate();} // From R.DLL
+ double Random() { // output random float number in the interval 0 <= x < 1
+ return unif_rand();} // From R.DLL
+ double Normal(double m, double s) { // normal distribution
+ return norm_rand()*s + m;} // From R.DLL
+ int32 Hypergeometric(int32 n, int32 m, int32 N); // hypergeometric distribution (stocR.cpp)
+protected:
+ int32 HypInversionMod (int32 n, int32 M, int32 N); // hypergeometric by inversion searching from mode
+ int32 HypRatioOfUnifoms (int32 n, int32 M, int32 N);// hypergeometric by ratio of uniforms method
+ static double fc_lnpk(int32 k, int32 N_Mn, int32 M, int32 n); // used by Hypergeometric
+};
+
+#endif
diff --git a/src/stocc.h b/src/stocc.h
new file mode 100644
index 0000000..7b190b1
--- /dev/null
+++ b/src/stocc.h
@@ -0,0 +1,554 @@
+/***************************** stocc.h **********************************
+* Author: Agner Fog
+* Date created: 2004-01-08
+* Last modified: 2011-08-05
+* Project: randomc.h
+* Source URL: www.agner.org/random
+*
+* Description:
+* This file contains function prototypes and class declarations for the C++
+* library of non-uniform random number generators. Most functions are fast and
+* accurate, even for extreme values of the parameters.
+*
+*
+* functions without classes:
+* ==========================
+*
+* void EndOfProgram(void);
+* System-specific exit code. You may modify this to make it fit your
+* user interface.
+*
+* void FatalError(const char * ErrorText);
+* Used for outputting error messages from the other functions and classes.
+* You may have to modify this function to make it fit your user interface.
+*
+* double Erf (double x);
+* Calculates the error function, which is the integral of the normal distribution.
+*
+* double LnFac(int32 n);
+* Calculates the natural logarithm of the factorial of n.
+*
+*
+* class StochasticLib1:
+* ====================
+* This class can be derived from any of the uniform random number generators
+* defined in randomc.h. StochasticLib1 provides the following non-uniform random
+* variate generators:
+*
+* int Bernoulli(double p);
+* Bernoulli distribution. Gives 0 or 1 with probability 1-p and p.
+*
+* double Normal(double m, double s);
+* Normal distribution with mean m and standard deviation s.
+*
+* int32 Poisson (double L);
+* Poisson distribution with mean L.
+*
+* int32 Binomial (int32 n, double p);
+* Binomial distribution. n trials with probability p.
+*
+* int32 Hypergeometric (int32 n, int32 m, int32 N);
+* Hypergeometric distribution. Taking n items out N, m of which are colored.
+*
+* void Multinomial (int32 * destination, double * source, int32 n, int colors);
+* void Multinomial (int32 * destination, int32 * source, int32 n, int colors);
+* Multivariate binomial distribution.
+*
+* void MultiHypergeometric (int32 * destination, int32 * source, int32 n, int colors);
+* Multivariate hypergeometric distribution.
+*
+* void Shuffle(int * list, int min, int n);
+* Shuffle a list of integers.
+*
+*
+* class StochasticLib2:
+* =====================
+* This class is derived from class StochasticLib1. It redefines the functions
+* Poisson, Binomial and HyperGeometric.
+* In StochasticLib1, these functions are optimized for being called with
+* parameters that vary. In StochasticLib2, the same functions are optimized
+* for being called repeatedly with the same parameters. If your parameters
+* seldom vary, then StochasticLib2 is faster. The two classes use different
+* calculation methods, both of which are accurate.
+*
+*
+* class StochasticLib3:
+* =====================
+* This class can be derived from either StochasticLib1 or StochasticLib2,
+* whichever is preferred. It contains functions for generating variates with
+* the univariate and multivariate Wallenius' and Fisher's noncentral
+* hypergeometric distributions.
+*
+* int32 WalleniusNCHyp (int32 n, int32 m, int32 N, double odds);
+* Sampling from Wallenius' noncentral hypergeometric distribution, which is
+* what you get when taking n items out N, m of which are colored, without
+* replacement, with bias.
+*
+* int32 FishersNCHyp (int32 n, int32 m, int32 N, double odds);
+* Sampling from Fisher's noncentral hypergeometric distribution which is the
+* conditional distribution of independent binomial variates given their sum n.
+*
+* void MultiWalleniusNCHyp (int32 * destination, int32 * source, double * weights, int32 n, int colors);
+* Sampling from multivariate Wallenius' noncentral hypergeometric distribution.
+*
+* void MultiFishersNCHyp (int32 * destination, int32 * source, double * weights, int32 n, int colors);
+* Sampling from multivariate Fisher's noncentral hypergeometric distribution.
+*
+*
+* Uniform random number generators (integer and float) are also available, as
+* these are inherited from the random number generator class that is the base
+* class of StochasticLib1.
+*
+*
+* class CWalleniusNCHypergeometric
+* ================================
+* This class implements various methods for calculating the probability
+* function and the mean and variance of the univariate Wallenius' noncentral
+* hypergeometric distribution. It is used by StochasticLib3 and can also be
+* used independently.
+*
+*
+* class CMultiWalleniusNCHypergeometric
+* =====================================
+* This class implements various methods for calculating the probability func-
+* tion and the mean of the multivariate Wallenius' noncentral hypergeometric
+* distribution. It is used by StochasticLib3 and can also be used independently.
+*
+*
+* class CMultiWalleniusNCHypergeometricMoments
+* ============================================
+* This class calculates the exact mean and variance of the multivariate
+* Wallenius' noncentral hypergeometric probability distribution.
+*
+*
+* class CFishersNCHypergeometric
+* ==============================
+* This class calculates the probability function and the mean and variance
+* of Fisher's noncentral hypergeometric distribution.
+*
+*
+* class CMultiFishersNCHypergeometric
+* ===================================
+* This class calculates the probability function and the mean and variance
+* of the multivariate Fisher's noncentral hypergeometric distribution.
+*
+*
+* source code:
+* ============
+* The code for EndOfProgram and FatalError is found in the file userintf.cpp.
+* The code for the functions in StochasticLib1 is found in the file stoc1.cpp.
+* The code for the functions in StochasticLib2 is found in the file stoc2.cpp.
+* The code for the functions in StochasticLib3 is found in the file stoc3.cpp.
+* The code for the functions in CWalleniusNCHypergeometric,
+* CMultiWalleniusNCHypergeometric and CMultiWalleniusNCHypergeometricMoments
+* is found in the file wnchyppr.cpp.
+* The code for the functions in CFishersNCHypergeometric and
+* CMultiFishersNCHypergeometric is found in the file fnchyppr.cpp
+* LnFac is found in stoc1.cpp.
+* Erf is found in wnchyppr.cpp.
+*
+*
+* Examples:
+* =========
+* The file ex-stoc.cpp contains an example of how to use this class library.
+*
+* The file ex-cards.cpp contains an example of how to shuffle a list of items.
+*
+* The file ex-lotto.cpp contains an example of how to generate a sequence of
+* random integers where no number can occur more than once.
+*
+* The file testbino.cpp contains an example of sampling from the binomial distribution.
+*
+* The file testhype.cpp contains an example of sampling from the hypergeometric distribution.
+*
+* The file testpois.cpp contains an example of sampling from the poisson distribution.
+*
+* The file testwnch.cpp contains an example of sampling from Wallenius noncentral hypergeometric distribution.
+*
+* The file testfnch.cpp contains an example of sampling from Fisher's noncentral hypergeometric distribution.
+*
+* The file testmwnc.cpp contains an example of sampling from the multivariate Wallenius noncentral hypergeometric distribution.
+*
+* The file testmfnc.cpp contains an example of sampling from the multivariate Fisher's noncentral hypergeometric distribution.
+*
+* The file evolc.zip contains examples of how to simulate biological evolution using this class library.
+*
+*
+* Documentation:
+* ==============
+* The file stocc.htm contains further instructions.
+*
+* The file distrib.pdf contains definitions of the standard statistic distributions:
+* Bernoulli, Normal, Poisson, Binomial, Hypergeometric, Multinomial, MultiHypergeometric.
+*
+* The file sampmet.pdf contains theoretical descriptions of the methods used
+* for sampling from these distributions.
+*
+* The file nchyp.pdf, available from www.agner.org/random/, contains
+* definitions of the univariate and multivariate Wallenius and Fisher's
+* noncentral hypergeometric distributions and theoretical explanations of
+* the methods for calculating and sampling from these.
+*
+* � 2002-2011 Agner Fog. GNU General Public License www.gnu.org/copyleft/gpl.html
+*******************************************************************************/
+
+#ifndef STOCC_H
+#define STOCC_H
+
+#include "randomc.h"
+
+#ifdef R_BUILD
+ #include "stocR.h" // Include this when building R-language interface
+#endif
+
+
+/***********************************************************************
+ Choose which uniform random number generator to base these classes on
+***********************************************************************/
+
+// STOC_BASE defines which base class to use for the non-uniform
+// random number generator classes StochasticLib1, 2, and 3.
+
+#ifndef STOC_BASE
+ #ifdef R_BUILD
+ // Inherit from StocRBase when building for R-language interface
+ #define STOC_BASE StocRBase
+ #else
+ #define STOC_BASE TRandomMersenne
+ // Or choose any other random number generator base class:
+ //#define STOC_BASE TRanrotWGenerator
+ //#define STOC_BASE TRandomMotherOfAll
+ #endif
+#endif
+
+/***********************************************************************
+ Other simple functions
+***********************************************************************/
+
+double LnFac(int32 n); // log factorial (stoc1.cpp)
+double LnFacr(double x); // log factorial of non-integer (wnchyppr.cpp)
+double FallingFactorial(double a, double b); // Falling factorial (wnchyppr.cpp)
+double Erf (double x); // error function (wnchyppr.cpp)
+int32 FloorLog2(float x); // floor(log2(x)) for x > 0 (wnchyppr.cpp)
+int NumSD (double accuracy); // used internally for determining summation interval
+
+
+/***********************************************************************
+ Constants and tables
+***********************************************************************/
+
+// Maximum number of colors in the multivariate distributions
+#ifndef MAXCOLORS
+ #define MAXCOLORS 32 // You may change this value
+#endif
+
+// constant for LnFac function:
+static const int FAK_LEN = 1024; // length of factorial table
+
+// The following tables are tables of residues of a certain expansion
+// of the error function. These tables are used in the Laplace method
+// for calculating Wallenius' noncentral hypergeometric distribution.
+// There are ERFRES_N tables covering desired precisions from
+// 2^(-ERFRES_B) to 2^(-ERFRES_E). Only the table that matches the
+// desired precision is used. The tables are defined in erfres.h which
+// is included in wnchyppr.cpp.
+
+// constants for ErfRes tables:
+static const int ERFRES_B = 16; // begin: -log2 of lowest precision
+static const int ERFRES_E = 40; // end: -log2 of highest precision
+static const int ERFRES_S = 2; // step size from begin to end
+static const int ERFRES_N = (ERFRES_E-ERFRES_B)/ERFRES_S+1; // number of tables
+static const int ERFRES_L = 48; // length of each table
+
+// tables of error function residues:
+extern "C" double ErfRes [ERFRES_N][ERFRES_L];
+
+// number of std. deviations to include in integral to obtain desired precision:
+extern "C" double NumSDev[ERFRES_N];
+
+
+/***********************************************************************
+ Class StochasticLib1
+***********************************************************************/
+
+class StochasticLib1 : public STOC_BASE {
+ // This class encapsulates the random variate generating functions.
+ // May be derived from any of the random number generators.
+public:
+ StochasticLib1 (int seed); // constructor
+ int Bernoulli(double p); // bernoulli distribution
+ #ifndef R_BUILD
+ double Normal(double m, double s); // normal distribution
+ #endif
+ int32 Poisson (double L); // poisson distribution
+ int32 Binomial (int32 n, double p); // binomial distribution
+ int32 Hypergeometric (int32 n, int32 m, int32 N); // hypergeometric distribution
+ void Multinomial (int32 * destination, double * source, int32 n, int colors); // multinomial distribution
+ void Multinomial (int32 * destination, int32 * source, int32 n, int colors); // multinomial distribution
+ void MultiHypergeometric (int32 * destination, int32 * source, int32 n, int colors); // multivariate hypergeometric distribution
+ void Shuffle(int * list, int min, int n); // shuffle integers
+
+ // functions used internally
+protected:
+ static double fc_lnpk(int32 k, int32 N_Mn, int32 M, int32 n); // used by Hypergeometric
+
+ // subfunctions for each approximation method
+ int32 PoissonInver(double L); // poisson by inversion
+ int32 PoissonRatioUniforms(double L); // poisson by ratio of uniforms
+ int32 PoissonLow(double L); // poisson for extremely low L
+ int32 BinomialInver (int32 n, double p); // binomial by inversion
+ int32 BinomialRatioOfUniforms (int32 n, double p); // binomial by ratio of uniforms
+ int32 HypInversionMod (int32 n, int32 M, int32 N); // hypergeometric by inversion searching from mode
+ int32 HypRatioOfUnifoms (int32 n, int32 M, int32 N);// hypergeometric by ratio of uniforms method
+
+ // variables used by Normal distribution
+ double normal_x2; int normal_x2_valid;
+};
+
+
+/***********************************************************************
+Class StochasticLib2
+***********************************************************************/
+
+class StochasticLib2 : public StochasticLib1 {
+ // derived class, redefining some functions
+public:
+ int32 Poisson (double L); // poisson distribution
+ int32 Binomial (int32 n, double p); // binomial distribution
+ int32 Hypergeometric (int32 n, int32 M, int32 N); // hypergeometric distribution
+ StochasticLib2(int seed):StochasticLib1(seed){}; // constructor
+
+ // subfunctions for each approximation method:
+protected:
+ int32 PoissonModeSearch(double L); // poisson by search from mode
+ int32 PoissonPatchwork(double L); // poisson by patchwork rejection
+ static double PoissonF(int32 k, double l_nu, double c_pm); // used by PoissonPatchwork
+ int32 BinomialModeSearch(int32 n, double p); // binomial by search from mode
+ int32 BinomialPatchwork(int32 n, double p); // binomial by patchwork rejection
+ double BinomialF(int32 k, int32 n, double l_pq, double c_pm); // used by BinomialPatchwork
+ int32 HypPatchwork (int32 n, int32 M, int32 N); // hypergeometric by patchwork rejection
+};
+
+
+/***********************************************************************
+Class StochasticLib3
+***********************************************************************/
+
+class StochasticLib3 : public StochasticLib1 {
+ // This class can be derived from either StochasticLib1 or StochasticLib2.
+ // Adds more probability distributions
+public:
+ StochasticLib3(int seed); // constructor
+ void SetAccuracy(double accur); // define accuracy of calculations
+ int32 WalleniusNCHyp (int32 n, int32 m, int32 N, double odds); // Wallenius noncentral hypergeometric distribution
+ int32 FishersNCHyp (int32 n, int32 m, int32 N, double odds); // Fisher's noncentral hypergeometric distribution
+ void MultiWalleniusNCHyp (int32 * destination, int32 * source, double * weights, int32 n, int colors); // multivariate Wallenius noncentral hypergeometric distribution
+ void MultiComplWalleniusNCHyp (int32 * destination, int32 * source, double * weights, int32 n, int colors); // multivariate complementary Wallenius noncentral hypergeometric distribution
+ void MultiFishersNCHyp (int32 * destination, int32 * source, double * weights, int32 n, int colors); // multivariate Fisher's noncentral hypergeometric distribution
+ // subfunctions for each approximation method
+protected:
+ int32 WalleniusNCHypUrn (int32 n, int32 m, int32 N, double odds); // WalleniusNCHyp by urn model
+ int32 WalleniusNCHypInversion (int32 n, int32 m, int32 N, double odds); // WalleniusNCHyp by inversion method
+ int32 WalleniusNCHypTable (int32 n, int32 m, int32 N, double odds); // WalleniusNCHyp by table method
+ int32 WalleniusNCHypRatioOfUnifoms (int32 n, int32 m, int32 N, double odds); // WalleniusNCHyp by ratio-of-uniforms
+ int32 FishersNCHypInversion (int32 n, int32 m, int32 N, double odds); // FishersNCHyp by inversion
+ int32 FishersNCHypRatioOfUnifoms (int32 n, int32 m, int32 N, double odds); // FishersNCHyp by ratio-of-uniforms
+ // variables
+ double accuracy; // desired accuracy of calculations
+};
+
+
+/***********************************************************************
+Class CWalleniusNCHypergeometric
+***********************************************************************/
+
+class CWalleniusNCHypergeometric {
+ // This class contains methods for calculating the univariate
+ // Wallenius' noncentral hypergeometric probability function
+public:
+ CWalleniusNCHypergeometric(int32 n, int32 m, int32 N, double odds, double accuracy=1.E-8); // constructor
+ void SetParameters(int32 n, int32 m, int32 N, double odds); // change parameters
+ double probability(int32 x); // calculate probability function
+ int32 MakeTable(double * table, int32 MaxLength, int32 * xfirst, int32 * xlast, double cutoff = 0.); // make table of probabilities
+ double mean(void); // approximate mean
+ double variance(void); // approximate variance (poor approximation)
+ int32 mode(void); // calculate mode
+ double moments(double * mean, double * var); // calculate exact mean and variance
+ int BernouilliH(int32 x, double h, double rh, StochasticLib1 *sto); // used by rejection method
+
+ // implementations of different calculation methods
+protected:
+ double recursive(void); // recursive calculation
+ double binoexpand(void); // binomial expansion of integrand
+ double laplace(void); // Laplace's method with narrow integration interval
+ double integrate(void); // numerical integration
+
+ // other subfunctions
+ double lnbico(void); // natural log of binomial coefficients
+ void findpars(void); // calculate r, w, E
+ double integrate_step(double a, double b); // used by integrate()
+ double search_inflect(double t_from, double t_to); // used by integrate()
+
+ // parameters
+ double omega; // Odds
+ int32 n, m, N, x; // Parameters
+ int32 xmin, xmax; // Minimum and maximum x
+ double accuracy; // Desired precision
+ // parameters used by lnbico
+ int32 xLastBico;
+ double bico, mFac, xFac;
+ // parameters generated by findpars and used by probability, laplace, integrate:
+ double r, rd, w, wr, E, phi2d;
+ int32 xLastFindpars;
+};
+
+
+/***********************************************************************
+Class CMultiWalleniusNCHypergeometric
+***********************************************************************/
+
+class CMultiWalleniusNCHypergeometric {
+ // This class encapsulates the different methods for calculating the
+ // multivariate Wallenius noncentral hypergeometric probability function
+public:
+ CMultiWalleniusNCHypergeometric(int32 n, int32 * m, double * odds, int colors, double accuracy=1.E-8); // constructor
+ void SetParameters(int32 n, int32 * m, double * odds, int colors); // change parameters
+ double probability(int32 * x); // calculate probability function
+ void mean(double * mu); // calculate approximate mean
+ void variance(double * var, double * mean = 0); // calculate approximate variance and mean
+
+ // implementations of different calculation methods
+protected:
+ double binoexpand(void); // binomial expansion of integrand
+ double laplace(void); // Laplace's method with narrow integration interval
+ double integrate(void); // numerical integration
+
+ // other subfunctions
+ double lnbico(void); // natural log of binomial coefficients
+ void findpars(void); // calculate r, w, E
+ double integrate_step(double a, double b); // used by integrate()
+ double search_inflect(double t_from, double t_to); // used by integrate()
+
+ // parameters
+ double * omega;
+ double accuracy;
+ int32 n, N;
+ int32 * m, * x;
+ int colors;
+ int Dummy_align;
+ // parameters generated by findpars and used by probability, laplace, integrate:
+ double r, rd, w, wr, E, phi2d;
+ // generated by lnbico
+ double bico;
+};
+
+
+/***********************************************************************
+Class CMultiWalleniusNCHypergeometricMoments
+***********************************************************************/
+
+class CMultiWalleniusNCHypergeometricMoments: public CMultiWalleniusNCHypergeometric {
+ // This class calculates the exact mean and variance of the multivariate
+ // Wallenius noncentral hypergeometric distribution by calculating all the
+ // possible x-combinations with probability < accuracy
+public:
+ CMultiWalleniusNCHypergeometricMoments(int32 n, int32 * m, double * odds, int colors, double accuracy=1.E-8)
+ : CMultiWalleniusNCHypergeometric(n, m, odds, colors, accuracy) {};
+ double moments(double * mean, double * var, int32 * combinations = 0);
+
+protected:
+ // functions used internally
+ double loop(int32 n, int c); // recursive loops
+ // data
+ int32 xi[MAXCOLORS]; // x vector to calculate probability of
+ int32 xm[MAXCOLORS]; // rounded approximate mean of x[i]
+ int32 remaining[MAXCOLORS]; // number of balls of color > c in urn
+ double sx[MAXCOLORS]; // sum of x*f(x)
+ double sxx[MAXCOLORS]; // sum of x^2*f(x)
+ int32 sn; // number of combinations
+};
+
+
+/***********************************************************************
+Class CFishersNCHypergeometric
+***********************************************************************/
+
+class CFishersNCHypergeometric {
+ // This class contains methods for calculating the univariate Fisher's
+ // noncentral hypergeometric probability function
+public:
+ CFishersNCHypergeometric(int32 n, int32 m, int32 N, double odds, double accuracy = 1E-8); // constructor
+ double probability(int32 x); // calculate probability function
+ double probabilityRatio(int32 x, int32 x0); // calculate probability f(x)/f(x0)
+ double MakeTable(double * table, int32 MaxLength, int32 * xfirst, int32 * xlast, double cutoff = 0.); // make table of probabilities
+ double mean(void); // calculate approximate mean
+ double variance(void); // approximate variance
+ int32 mode(void); // calculate mode (exact)
+ double moments(double * mean, double * var); // calculate exact mean and variance
+
+protected:
+ double lng(int32 x); // natural log of proportional function
+
+ // parameters
+ double odds; // odds ratio
+ double logodds; // ln odds ratio
+ double accuracy; // accuracy
+ int32 n, m, N; // Parameters
+ int32 xmin, xmax; // minimum and maximum of x
+
+ // parameters used by subfunctions
+ int32 xLast;
+ double mFac, xFac; // log factorials
+ double scale; // scale to apply to lng function
+ double rsum; // reciprocal sum of proportional function
+ int ParametersChanged;
+};
+
+
+/***********************************************************************
+Class CMultiFishersNCHypergeometric
+***********************************************************************/
+
+class CMultiFishersNCHypergeometric {
+ // This class contains functions for calculating the multivariate
+ // Fisher's noncentral hypergeometric probability function and its mean and
+ // variance. Warning: the time consumption for first call to
+ // probability or moments is proportional to the total number of
+ // possible x combinations, which may be extreme!
+public:
+ CMultiFishersNCHypergeometric(int32 n, int32 * m, double * odds, int colors, double accuracy = 1E-9); // constructor
+ double probability(int32 * x); // calculate probability function
+ void mean(double * mu); // calculate approximate mean
+ void variance(double * var, double * mean = 0); // calculate approximate variance and mean
+ double moments(double * mean, double * var, int32 * combinations = 0); // calculate exact mean and variance
+protected:
+ void mean1(double * mu); // calculate approximate mean except for unused colors
+ double lng(int32 * x); // natural log of proportional function
+ void SumOfAll(void); // calculates sum of proportional function for all x combinations
+ double loop(int32 n, int c); // recursive loops used by SumOfAll
+ double odds[MAXCOLORS]; // copy of all nonzero odds
+ double logodds[MAXCOLORS]; // log odds
+ int32 m[MAXCOLORS]; // copy of all nonzero m
+ int nonzero[MAXCOLORS]; // colors for which m and odds are not zero
+ int32 n; // number of balls to take
+ int32 N; // number of balls in urn
+ int32 Nu; // number of balls in urn with nonzero weight
+ int Colors; // number of colors
+ int reduced; // bit 0: some colors have m=0 or odds=0.
+ // bit 1: all nonzero odds are equal
+ int usedcolors; // number of colors with m > 0 and odds > 0
+ double mFac; // sum of log m[i]!
+ double scale; // scale to apply to lng function
+ double rsum; // reciprocal sum of proportional function
+ double accuracy; // accuracy of calculation
+
+ // data used by used by SumOfAll
+ int32 xi[MAXCOLORS]; // x vector to calculate probability of
+ int32 xm[MAXCOLORS]; // rounded approximate mean of x[i]
+ int32 remaining[MAXCOLORS]; // number of balls of color > c in urn
+ double sx[MAXCOLORS]; // sum of x*f(x) or mean
+ double sxx[MAXCOLORS]; // sum of x^2*f(x) or variance
+ int32 sn; // number of possible combinations of x
+};
+
+#endif
diff --git a/src/urn1.cpp b/src/urn1.cpp
new file mode 100644
index 0000000..038fff9
--- /dev/null
+++ b/src/urn1.cpp
@@ -0,0 +1,1668 @@
+/*************************** urn1.cpp **********************************
+* Author: Agner Fog
+* Date created: 2006
+* Last modified: 2011-08-05
+* Project: BiasedUrn
+* Source URL: www.agner.org/random
+*
+* Description:
+* R interface to univariate noncentral hypergeometric distributions
+*
+* Copyright 2006-2011 by Agner Fog.
+* GNU General Public License http://www.gnu.org/licenses/gpl.html
+*****************************************************************************/
+
+#include <R.h>
+#include <Rinternals.h>
+#include "stocc.h"
+
+
+/******************************************************************************
+ dFNCHypergeo
+ Mass function, Fisher's NonCentral Hypergeometric distribution
+******************************************************************************/
+REXPORTS SEXP dFNCHypergeo(
+SEXP rx, // Number of red balls drawn, scalar or vector
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision // Precision of calculation
+// ,SEXP rlog // Will return log(p) if TRUE
+) {
+ // Check for vectors
+ if (LENGTH(rx) < 0
+ || LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ // || LENGTH(rlog) > 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ int *px = INTEGER(rx);
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ //int ilog = *LOGICAL(rlog);
+ int nres = LENGTH(rx); // Number of probability values to return
+ int N = m1 + m2; // Total number of balls
+ double* buffer = 0; // Table of probabilities
+ int BufferLength; // Length of table
+ double factor; // Scale factor
+ int x; // Temporary x
+ int32 x1, x2; // Table limits
+ int xmin, xmax; // Absolute limits for x
+ int i; // Loop counter
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocVector(REALSXP, nres));
+ presult = REAL(result);
+
+ // Make object for calculating probabilities
+ CFishersNCHypergeometric fnc(n, m1, N, odds, prec);
+
+ // Check if it is advantageous to use MakeTable:
+ if (nres > 1 &&
+ (BufferLength = (int)fnc.MakeTable(buffer, 0, &x1, &x2),
+ (uint32)nres > (uint32)BufferLength / 32)) {
+ // Use MakeTable
+ xmin = m1 + n - N; if (xmin < 0) xmin = 0; // Minimum x
+ xmax = n; if (xmax > m1) xmax = m1; // Maximum x
+
+ // Allocate buffer
+ buffer = (double*)R_alloc(BufferLength, sizeof(double));
+
+ // Make table of probabilities
+ factor = 1. / fnc.MakeTable(buffer, BufferLength, &x1, &x2, prec*0.001);
+ // Get probabilities from table
+ for (i = 0; i < nres; i++) {
+ x = px[i];
+ if (x >= x1 && x <= x2) {
+ // x within table
+ presult[i] = buffer[x - x1] * factor; // Get result from table
+ }
+ else if (x >= xmin && x <= xmax) {
+ // Outside table. Result is very small but not 0
+ presult[i] = fnc.probability(x); // Calculate result
+ }
+ else {
+ // Impossible value of x
+ presult[i] = 0.; // Result is 0
+ }
+ // if (ilog) presult[i] = log(presult[i]); // Log desired
+ }
+ }
+ else {
+ // Calculate probabilities one by one
+ for (i = 0; i < nres; i++) {
+ presult[i] = fnc.probability(px[i]); // Probability
+ //if (ilog) presult[i] = log(presult[i]); // Log desired
+ }
+ }
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ dWNCHypergeo
+ Mass function, Wallenius' NonCentral Hypergeometric distribution
+******************************************************************************/
+REXPORTS SEXP dWNCHypergeo(
+SEXP rx, // Number of red balls drawn, scalar or vector
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision // Precision of calculation
+// ,SEXP rlog // Will return log(p) if TRUE
+) {
+ // Check for vectors
+ if (LENGTH(rx) < 0
+ || LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ // || LENGTH(rlog) > 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ int * px = INTEGER(rx);
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ //int ilog = *LOGICAL(rlog);
+ int nres = LENGTH(rx); // Number of probability values to return
+ int N = m1 + m2; // Total number of balls
+ double* buffer = 0; // Table of probabilities
+ int BufferLength; // Length of table
+ int x; // Temporary x
+ int32 x1, x2; // Table limits
+ int xmin, xmax; // Absolute limits for x
+ int i; // Loop counter
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocVector(REALSXP, nres));
+ presult = REAL(result);
+
+ // Make object for calculating probabilities
+ CWalleniusNCHypergeometric wnc(n, m1, N, odds, prec);
+
+ // Check if it is advantageous to use MakeTable:
+ if (nres > 1 &&
+ (BufferLength = wnc.MakeTable(buffer, 0, &x1, &x2),
+ x1)) {
+ // Use MakeTable
+ xmin = m1 + n - N; if (xmin < 0) xmin = 0; // Minimum x
+ xmax = n; if (xmax > m1) xmax = m1; // Maximum x
+
+ // Allocate buffer
+ buffer = (double*)R_alloc(BufferLength, sizeof(double));
+ // Make table of probabilities
+ wnc.MakeTable(buffer, BufferLength, &x1, &x2, prec*0.001);
+ // Get probabilities from table
+ for (i = 0; i < nres; i++) {
+ x = px[i];
+ if (x >= x1 && x <= x2) {
+ // x within table
+ presult[i] = buffer[x - x1]; // Get result from table
+ }
+ else if (x >= xmin && x <= xmax) {
+ // Outside table. Result is very small but not 0
+ presult[i] = wnc.probability(x); // Calculate result
+ }
+ else {
+ // Impossible value of x
+ presult[i] = 0.; // Result is 0
+ }
+ // if (ilog) presult[i] = log(presult[i]); // Log desired
+ }
+ }
+ else {
+ // Calculate probabilities one by one
+ for (i = 0; i < nres; i++) {
+ presult[i] = wnc.probability(px[i]);
+ //if (ilog) presult[i] = log(presult[i]);
+ }
+ }
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ pFNCHypergeo
+ Cumulative distribution function for
+ Fisher's NonCentral Hypergeometric distribution
+******************************************************************************/
+REXPORTS SEXP pFNCHypergeo(
+SEXP rx, // Number of red balls drawn, scalar or vector
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision, // Precision of calculation
+SEXP rlower_tail // TRUE: P(X <= x), FALSE: P(X > x)
+) {
+ // Check for vectors
+ if (LENGTH(rx) < 0
+ || LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ || LENGTH(rlower_tail) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ int * px = INTEGER(rx);
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int lower_tail = *LOGICAL(rlower_tail);
+ int nres = LENGTH(rx); // Number of probability values to return
+ int N = m1 + m2; // Total number of balls
+ double* buffer = 0; // Table of probabilities
+ int BufferLength; // Length of table
+ double factor; // Scale factor
+ double sum; // Used for summation
+ double p; // Probability
+ int x; // Temporary x
+ int32 x1, x2; // Table limits
+ int xmin, xmax; // Absolute limits for x
+ int xmean; // Approximate mean of x
+ int i; // Loop counter
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // min and max
+ xmin = m1 + n - N; if (xmin < 0) xmin = 0; // Minimum x
+ xmax = n; if (xmax > m1) xmax = m1; // Maximum x
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocVector(REALSXP, nres));
+ presult = REAL(result);
+
+ // Make object for calculating probabilities
+ CFishersNCHypergeometric fnc(n, m1, N, odds, prec);
+
+ // Get necessary buffer length
+ BufferLength = (int)fnc.MakeTable(buffer, 0, &x1, &x2, prec * 0.001);
+
+ // Allocate buffer
+ buffer = (double*)R_alloc(BufferLength, sizeof(double));
+
+ // Make table of probabilities
+ factor = 1. / fnc.MakeTable(buffer, BufferLength, &x1, &x2, prec * 0.001);
+
+ // Get mean
+ xmean = (int)(fnc.mean() + 0.5); // Round mean
+
+ // Check for consistency
+ if (xmean < x1 || xmean > x2) {
+ error("Inconsistency. mean = %i, lower limit = %i, upper limit = %i",
+ xmean, x1, x2);
+ }
+
+ // Make left tail of table cumulative:
+ for (x = x1, sum = 0; x <= xmean; x++) sum = buffer[x-x1] += sum;
+
+ // Probabilities for x > xmean are calculated by summation from the
+ // right in order to avoid loss of precision.
+ // Make right tail of table cumulative from the right:
+ for (x = x2, sum = 0; x > xmean; x--) sum = buffer[x-x1] += sum;
+
+ // Loop through x vector
+ for (i = 0; i < nres; i++) {
+ x = px[i]; // Input x value
+ if (x <= xmean) {
+ // Left tail
+ if (x < x1) {
+ p = 0.; // Outside table
+ }
+ else {
+ p = buffer[x-x1] * factor; // Probability from table
+ }
+ if (!lower_tail) p = 1. - p; // Invert if right tail
+ presult[i] = p; // Store result
+ }
+ else {
+ // Right tail
+ if (x >= x2) {
+ p = 0.; // Outside table
+ }
+ else {
+ p = buffer[x-x1+1] * factor; // Probability from table
+ }
+ if (lower_tail) p = 1. - p; // Invert if left tail
+ presult[i] = p; // Store result
+ }
+ }
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ pWNCHypergeo
+ Cumulative distribution function for
+ Wallenius' NonCentral Hypergeometric distribution
+******************************************************************************/
+REXPORTS SEXP pWNCHypergeo(
+SEXP rx, // Number of red balls drawn, scalar or vector
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision, // Precision of calculation
+SEXP rlower_tail // TRUE: P(X <= x), FALSE: P(X > x)
+) {
+ // Check for vectors
+ if (LENGTH(rx) < 0
+ || LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ || LENGTH(rlower_tail) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ int * px = INTEGER(rx);
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int lower_tail = *LOGICAL(rlower_tail);
+ int nres = LENGTH(rx); // Number of probability values to return
+ int N = m1 + m2; // Total number of balls
+ double* buffer = 0; // Table of probabilities
+ int BufferLength; // Length of table
+ double sum; // Used for summation
+ double p; // Probability
+ int x; // Temporary x
+ int32 x1, x2; // Table limits
+ int xmin, xmax; // Absolute limits for x
+ int xmean; // Approximate mean of x
+ int i; // Loop counter
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // min and max
+ xmin = m1 + n - N; if (xmin < 0) xmin = 0; // Minimum x
+ xmax = n; if (xmax > m1) xmax = m1; // Maximum x
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocVector(REALSXP, nres));
+ presult = REAL(result);
+
+ // Make object for calculating probabilities
+ CWalleniusNCHypergeometric wnc(n, m1, N, odds, prec);
+
+ // Get necessary buffer length
+ BufferLength = wnc.MakeTable(buffer, 0, &x1, &x2, prec * 0.001);
+
+ // Allocate buffer
+ buffer = (double*)R_alloc(BufferLength, sizeof(double));
+
+ // Make table of probabilities
+ wnc.MakeTable(buffer, BufferLength, &x1, &x2, prec * 0.001);
+
+ // Get mean
+ xmean = (int)(wnc.mean() + 0.5); // Round mean
+
+ // Check for consistency
+ if (xmean < x1 || xmean > x2) {
+ error("Inconsistency. mean = %i, lower limit = %i, upper limit = %i",
+ xmean, x1, x2);
+ }
+
+ // Make left tail of table cumulative:
+ for (x = x1, sum = 0; x <= xmean; x++) sum = buffer[x-x1] += sum;
+
+ // Probabilities for x > xmean are calculated by summation from the
+ // right in order to avoid loss of precision.
+ // Make right tail of table cumulative from the right:
+ for (x = x2, sum = 0; x > xmean; x--) sum = buffer[x-x1] += sum;
+
+ // Loop through x vector
+ for (i = 0; i < nres; i++) {
+ x = px[i]; // Input x value
+ if (x <= xmean) {
+ // Left tail
+ if (x < x1) {
+ p = 0.; // Outside table
+ }
+ else {
+ p = buffer[x-x1]; // Probability from table
+ }
+ if (!lower_tail) p = 1. - p; // Invert if right tail
+ presult[i] = p; // Store result
+ }
+ else {
+ // Right tail
+ if (x >= x2) {
+ p = 0.; // Outside table
+ }
+ else {
+ p = buffer[x-x1+1]; // Probability from table
+ }
+ if (lower_tail) p = 1. - p; // Invert if left tail
+ presult[i] = p; // Store result
+ }
+ }
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ qFNCHypergeo
+ Quantile function for
+ Fisher's NonCentral Hypergeometric distribution.
+ Returns the lowest x for which P(X<=x) >= p when lower.tail = TRUE
+ Returns the lowest x for which P(X >x) <= p when lower.tail = FALSE
+******************************************************************************/
+REXPORTS SEXP qFNCHypergeo(
+SEXP rp, // Cumulative probability
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision, // Precision of calculation
+SEXP rlower_tail // TRUE: P(X <= x), FALSE: P(X > x)
+) {
+ // Check for vectors
+ if (LENGTH(rp) < 0
+ || LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ || LENGTH(rlower_tail) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ double* pp = REAL(rp);
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int lower_tail = *LOGICAL(rlower_tail);
+ int nres = LENGTH(rp); // Number of probability values to return
+ int N = m1 + m2; // Total number of balls
+ double* buffer = 0; // Table of probabilities
+ int BufferLength; // Length of table
+ double factor; // Scale factor
+ double sum; // Used for summation
+ double p; // Probability
+ int x; // Temporary x
+ int32 x1, x2; // Table limits
+ int i; // Loop counter
+ unsigned int a, b, c; // Used in binary search
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; int * presult;
+ PROTECT(result = allocVector(INTSXP, nres));
+ presult = INTEGER(result);
+
+ // Make object for calculating probabilities
+ CFishersNCHypergeometric fnc(n, m1, N, odds, prec);
+
+ // Get necessary buffer length
+ BufferLength = (int)fnc.MakeTable(buffer, 0, &x1, &x2, prec * 0.001);
+
+ // Allocate buffer
+ buffer = (double*)R_alloc(BufferLength, sizeof(double));
+
+ // Make table of probabilities
+ factor = fnc.MakeTable(buffer, BufferLength, &x1, &x2, prec * 0.001);
+
+ // Make table cumulative:
+ for (x = x1, sum = 0; x <= x2; x++) sum = buffer[x-x1] += sum;
+
+ // Loop through p vector
+ for (i = 0; i < nres; i++) {
+ p = pp[i]; // Input p value
+ if (!R_FINITE(p) || p < 0. || p > 1.) {
+ presult[i] = NA_INTEGER; // Invalid input. Return NA
+ }
+ else {
+ if (!lower_tail) p = 1. - p; // Invert if right tail
+ p *= factor; // Table is scaled by factor
+
+ // Binary search in table
+ a = 0; b = x2 - x1 + 1;
+ while (a < b) {
+ c = (a + b) / 2;
+ if (p <= buffer[c]) {
+ b = c;
+ }
+ else {
+ a = c + 1;
+ }
+ }
+ x = x1 + a;
+ if (x > x2) x = x2; // Prevent values > xmax that occur because of small imprecisions
+ presult[i] = x;
+ }
+ }
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ qWNCHypergeo
+ Quantile function for
+ Wallenius' NonCentral Hypergeometric distribution.
+ Returns the lowest x for which P(X<=x) >= p when lower.tail = TRUE
+ Returns the lowest x for which P(X >x) <= p when lower.tail = FALSE
+******************************************************************************/
+REXPORTS SEXP qWNCHypergeo(
+SEXP rp, // Cumulative probability
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision, // Precision of calculation
+SEXP rlower_tail // TRUE: P(X <= x), FALSE: P(X > x)
+) {
+ // Check for vectors
+ if (LENGTH(rp) < 0
+ || LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ || LENGTH(rlower_tail) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ double* pp = REAL(rp);
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int lower_tail = *LOGICAL(rlower_tail);
+ int nres = LENGTH(rp); // Number of probability values to return
+ int N = m1 + m2; // Total number of balls
+ double* buffer = 0; // Table of probabilities
+ int BufferLength; // Length of table
+ double sum; // Used for summation
+ double p; // Probability
+ int x; // Temporary x
+ int32 x1, x2; // Table limits
+ int i; // Loop counter
+ unsigned int a, b, c; // Used in binary search
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; int * presult;
+ PROTECT(result = allocVector(INTSXP, nres));
+ presult = INTEGER(result);
+
+ // Make object for calculating probabilities
+ CWalleniusNCHypergeometric wnc(n, m1, N, odds, prec);
+
+ // Get necessary buffer length
+ BufferLength = wnc.MakeTable(buffer, 0, &x1, &x2, prec * 0.001);
+
+ // Allocate buffer
+ buffer = (double*)R_alloc(BufferLength, sizeof(double));
+
+ // Make table of probabilities
+ wnc.MakeTable(buffer, BufferLength, &x1, &x2, prec * 0.001);
+
+ // Make table cumulative:
+ for (x = x1, sum = 0; x <= x2; x++) sum = buffer[x-x1] += sum;
+
+ // Loop through p vector
+ for (i = 0; i < nres; i++) {
+ p = pp[i]; // Input p value
+ if (!R_FINITE(p) || p < 0. || p > 1.) {
+ presult[i] = NA_INTEGER; // Invalid input. Return NA
+ }
+ else {
+ if (!lower_tail) p = 1. - p; // Invert if right tail
+
+ // Binary search in table
+ a = 0; b = x2 - x1 + 1;
+ while (a < b) {
+ c = (a + b) / 2;
+ if (p <= buffer[c]) {
+ b = c;
+ }
+ else {
+ a = c + 1;
+ }
+ }
+ x = x1 + a;
+ if (x > x2) x = x2; // Prevent values > xmax that occur because of small imprecisions
+ presult[i] = x;
+ }
+ }
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ rFNCHypergeo
+ Random variate generation function for
+ Fisher's NonCentral Hypergeometric distribution.
+******************************************************************************/
+REXPORTS SEXP rFNCHypergeo(
+SEXP rnran, // Number of random variates desired
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision // Precision of calculation
+) {
+ // Check for vectors
+ if (LENGTH(rnran) != 1
+ || LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ int nran = *INTEGER(rnran); if (LENGTH(rnran) > 1) nran = LENGTH(rnran);
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int N = m1 + m2; // Total number of balls
+ double* buffer = 0; // Table of probabilities
+ int BufferLength; // Length of table
+ double sum; // Used for summation
+ double u; // Uniform random number
+ int x; // Temporary x
+ int32 x1, x2; // Table limits
+ unsigned int a, b, c; // Used in binary search
+ int i; // Loop counter
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if (nran <= 0) error("Parameter nran must be positive");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; int * presult;
+ PROTECT(result = allocVector(INTSXP, nran));
+ presult = INTEGER(result);
+
+ // Make object for generating variates
+ StochasticLib3 sto(0); // Seed is not used
+ sto.SetAccuracy(prec); // Set precision
+ sto.InitRan(); // Initialize RNG in R.dll
+
+ if (nran > 4) {
+ // Check necessary table length
+ CFishersNCHypergeometric fnc(n, m1, N, odds, prec);
+ BufferLength = (int)fnc.MakeTable(buffer, 0, &x1, &x2, prec * 0.001);
+
+ if (BufferLength / 2 < nran) {
+ // It is advantageous to make a table
+
+ // Allocate buffer
+ buffer = (double*)R_alloc(BufferLength, sizeof(double));
+
+ // Make table of probabilities
+ fnc.MakeTable(buffer, BufferLength, &x1, &x2, prec * 0.001);
+
+ // Make table cumulative:
+ for (x = x1, sum = 0; x <= x2; x++) sum = buffer[x-x1] += sum;
+
+ // Loop for each variate
+ for (i = 0; i < nran; i++) {
+
+ // Make uniform random
+ u = sto.Random() * sum;
+
+ // Binary search in table
+ a = 0; b = x2 - x1 + 1;
+ while (a < b) {
+ c = (a + b) / 2;
+ if (u < buffer[c]) {
+ b = c;
+ }
+ else {
+ a = c + 1;
+ }
+ }
+ x = x1 + a;
+ if (x > x2) x = x2; // Prevent values > xmax that occur because of small imprecisions
+ presult[i] = x;
+ }
+ goto FINISHED_R;
+ }
+ }
+
+ // Not using table.
+ // Generate variates one by one
+ for (i = 0; i < nran; i++) {
+ presult[i] = sto.FishersNCHyp(n, m1, N, odds);
+ }
+
+ FINISHED_R:
+ sto.EndRan(); // Return RNG state to R.dll
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ rWNCHypergeo
+ Random variate generation function for
+ Wallenius' NonCentral Hypergeometric distribution.
+******************************************************************************/
+REXPORTS SEXP rWNCHypergeo(
+SEXP rnran, // Number of random variates desired
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision // Precision of calculation
+) {
+ // Check for vectors
+ if (LENGTH(rnran) != 1
+ || LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ int nran = *INTEGER(rnran); if (LENGTH(rnran) > 1) nran = LENGTH(rnran);
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int N = m1 + m2; // Total number of balls
+ double* buffer = 0; // Table of probabilities
+ int BufferLength; // Length of table
+ double sum; // Used for summation
+ double u; // Uniform random number
+ int x; // Temporary x
+ int32 x1, x2; // Table limits
+ unsigned int a, b, c; // Used in binary search
+ int i; // Loop counter
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if (nran <= 0) error("Parameter nran must be positive");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; int * presult;
+ PROTECT(result = allocVector(INTSXP, nran));
+ presult = INTEGER(result);
+
+ // Make object for generating variates
+ StochasticLib3 sto(0); // Seed is not used
+ sto.SetAccuracy(prec); // Set precision
+ sto.InitRan(); // Initialize RNG in R.dll
+
+ if (nran > 4) {
+ // Check necessary table length
+ CWalleniusNCHypergeometric wnc(n, m1, N, odds, prec);
+ BufferLength = (int)wnc.MakeTable(buffer, 0, &x1, &x2, prec * 0.001);
+
+ if (BufferLength / 2 < nran) {
+ // It is advantageous to make a table
+
+ // Allocate buffer
+ buffer = (double*)R_alloc(BufferLength, sizeof(double));
+
+ // Make table of probabilities
+ wnc.MakeTable(buffer, BufferLength, &x1, &x2, prec * 0.001);
+
+ // Make table cumulative:
+ for (x = x1, sum = 0; x <= x2; x++) sum = buffer[x-x1] += sum;
+
+ // Loop for each variate
+ for (i = 0; i < nran; i++) {
+
+ // Make uniform random
+ u = sto.Random() * sum; // sum should be 1.0 but might be slightly less if tails are cut off in table
+
+ // Binary search in table
+ a = 0; b = x2 - x1 + 1;
+ while (a < b) {
+ c = (a + b) / 2;
+ if (u < buffer[c]) {
+ b = c;
+ }
+ else {
+ a = c + 1;
+ }
+ }
+ x = x1 + a;
+ if (x > x2) x = x2; // Prevent values > xmax that occur because of small imprecisions
+ presult[i] = x;
+ }
+ goto FINISHED_R;
+ }
+ }
+
+ // Not using table.
+ // Generate variates one by one
+ for (i = 0; i < nran; i++) {
+ presult[i] = sto.WalleniusNCHyp(n, m1, N, odds);
+ }
+
+ FINISHED_R:
+ sto.EndRan(); // Return RNG state to R.dll
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ momentsFNCHypergeo
+ Calculates the mean or variance of
+ Fisher's NonCentral Hypergeometric distribution.
+******************************************************************************/
+// Uses simple approximations when precision >= 0.1.
+// Uses calculation by enumeration of all non-negligible x values when
+// precision < 0.1.
+// Note that several other approximations have been proposed in the literature.
+// See e.g.:
+// Levin, B. Biometrika, vol. 71, no. 3, 1984, pp. 630-632.
+// Liao, J. Biometrics, vol. 48, no. 3, 1992, pp. 889-892.
+// McCullagh, P. & Nelder, J.A.: Generalized Linear Models, 2'nd ed., 1989.
+
+REXPORTS SEXP momentsFNCHypergeo(
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision, // Precision of calculation
+SEXP rmoment // 1 = mean, 2 = variance
+) {
+ // Check for vectors
+ if (LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int imoment = *INTEGER(rmoment);
+ int N = m1 + m2; // Total number of balls
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (imoment != 1 && imoment != 2) error ("Only moments 1 and 2 supported");
+ if (!R_FINITE(prec) || prec < 0) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocVector(REALSXP, 1));
+ presult = REAL(result);
+
+ // Make object for calculating mean and variance
+ CFishersNCHypergeometric fnc(n, m1, N, odds, prec);
+
+ // Check precision
+ if (prec >= 0.1) {
+ // Simple approximation allowed
+ if (imoment == 1) {
+ *presult = fnc.mean();
+ }
+ else {
+ *presult = fnc.variance();
+ }
+ }
+ else {
+ // Exact calculation required
+ // Values saved from last calculation:
+ static int old_m1 = 0;
+ static int old_m2 = 0;
+ static int old_n = 0;
+ static double old_odds = 0;
+ static double old_prec = 0;
+ static double old_mean = 0;
+ static double old_var = 0;
+
+ if (m1 != old_m1 || m2 != old_m2 || n != old_n
+ || odds != old_odds || prec < old_prec) {
+ // Parameters have changed. Cannot reuse results.
+ // Calculate mean and variance.
+
+ // We are calculating both mean and variance in the same
+ // process. The values are stored for the next call in case
+ // both mean and variance are requested
+ fnc.moments(&old_mean, &old_var);
+
+ // Store parameters for possible reuse in next call
+ old_m1 = m1; old_m2 = m2; old_n = n;
+ old_odds = odds; old_prec = prec;
+ }
+ if (imoment == 1) {
+ // Return mean
+ *presult = old_mean;
+ }
+ else {
+ // Return variance
+ *presult = old_var;
+ }
+ }
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ momentsWNCHypergeo
+ Calculates the mean or variance of
+ Wallenius' NonCentral Hypergeometric distribution.
+******************************************************************************/
+// Uses simple approximations when precision >= 0.1.
+// Uses calculation by enumeration of all non-negligible x values when
+// precision < 0.1.
+REXPORTS SEXP momentsWNCHypergeo(
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision, // Precision of calculation
+SEXP rmoment // 1 = mean, 2 = variance
+) {
+ // Check for vectors
+ if (LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int imoment = *INTEGER(rmoment);
+ int N = m1 + m2; // Total number of balls
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (imoment != 1 && imoment != 2) error ("Only moments 1 and 2 supported");
+ if (!R_FINITE(prec) || prec < 0) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocVector(REALSXP, 1));
+ presult = REAL(result);
+
+ // Make object for calculating mean and variance
+ CWalleniusNCHypergeometric wnc(n, m1, N, odds, prec);
+
+ // Check precision
+ if (prec >= 0.1) {
+ // Simple approximation allowed
+ if (imoment == 1) {
+ *presult = wnc.mean();
+ }
+ else {
+ *presult = wnc.variance();
+ }
+ }
+ else {
+ // Exact calculation required
+ // Values saved from last calculation:
+ static int old_m1 = 0;
+ static int old_m2 = 0;
+ static int old_n = 0;
+ static double old_odds = 0;
+ static double old_prec = 0;
+ static double old_mean = 0;
+ static double old_var = 0;
+
+ if (m1 != old_m1 || m2 != old_m2 || n != old_n
+ || odds != old_odds || prec < old_prec) {
+ // Parameters have changed. Cannot reuse results.
+ // Calculate mean and variance.
+
+ // We are calculating both mean and variance in the same
+ // process. The values are stored for the next call in case
+ // both mean and variance are requested
+ wnc.moments(&old_mean, &old_var);
+
+ // Store parameters for possible reuse in next call
+ old_m1 = m1; old_m2 = m2; old_n = n;
+ old_odds = odds; old_prec = prec;
+ }
+ if (imoment == 1) {
+ // Return mean
+ *presult = old_mean;
+ }
+ else {
+ // Return variance
+ *presult = old_var;
+ }
+ }
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ modeFNCHypergeo
+ Calculates the mode of
+ Fisher's NonCentral Hypergeometric distribution.
+******************************************************************************/
+REXPORTS SEXP modeFNCHypergeo(
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds // Odds of getting a red ball among one red and one white
+) {
+ // Check for vectors
+ if (LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ int N = m1 + m2; // Total number of balls
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+
+ // Allocate result vector
+ SEXP result; int * presult;
+ PROTECT(result = allocVector(INTSXP, 1));
+ presult = INTEGER(result);
+
+ // Calculate mode
+ *presult = CFishersNCHypergeometric(n, m1, N, odds).mode();
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ modeWNCHypergeo
+ Calculates the mode of
+ Wallenius' NonCentral Hypergeometric distribution.
+******************************************************************************/
+REXPORTS SEXP modeWNCHypergeo(
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision // Precision of calculation
+) {
+ // Check for vectors
+ if (LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int N = m1 + m2; // Total number of balls
+
+ // Check validity of parameters
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (n > m2 && odds == 0) error ("Not enough items with nonzero weight");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; int * presult;
+ PROTECT(result = allocVector(INTSXP, 1));
+ presult = INTEGER(result);
+
+ // Calculate mode
+ *presult = CWalleniusNCHypergeometric(n, m1, N, odds, prec).mode();
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ oddsFNCHypergeo
+ Estimate odds ratio from mean for
+ Fisher's NonCentral Hypergeometric distribution.
+******************************************************************************/
+// Uses Cornfield's approximation. precision is ignored.
+REXPORTS SEXP oddsFNCHypergeo(
+SEXP rmu, // Observed mean of x1
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rprecision // Precision of calculation
+) {
+ // Check for vectors
+ if (LENGTH(rmu) < 1
+ || LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ double *pmu = REAL(rmu);
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double prec = *REAL(rprecision);
+ int nres = LENGTH(rmu);
+ int N = m1 + m2; // Total number of balls
+ int i; // Loop counter
+ int err = 0; // Remember any error
+
+ // Check validity of parameters
+ if (nres < 0) error("mu has wrong length");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 0.1;
+ if (prec < 0.05) warning ("Cannot obtain high precision");
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocVector(REALSXP, nres));
+ presult = REAL(result);
+
+ // Get xmin and xmax
+ int xmin = m1 + n - N; if (xmin < 0) xmin = 0; // Minimum x
+ int xmax = n; if (xmax > m1) xmax = m1; // Maximum x
+
+ // Loop for all mu inputs
+ for (i = 0; i < nres; i++) {
+ double mu = pmu[i];
+
+ // Check limits
+ if (xmin == xmax) {
+ presult[i] = R_NaN; err |= 1; // Indetermined
+ continue;
+ }
+ if (mu <= double(xmin)) {
+ if (mu == double(xmin)) {
+ presult[i] = 0.; err |= 2; // Zero
+ continue;
+ }
+ presult[i] = R_NaN; err |= 8; // Out of range
+ continue;
+ }
+ if (mu >= double(xmax)) {
+ if (mu == double(xmax)) {
+ presult[i] = R_PosInf; err |= 4; // Infinite
+ continue;
+ }
+ presult[i] = R_NaN; err |= 8; // Out of range
+ continue;
+ }
+
+ // Calculate odds ratio
+ presult[i] = mu * (m2 - n + mu) / ((m1 - mu)*(n - mu));
+ }
+ // Check for errors
+ if (err & 8) error("mu out of range");
+ else if (err & 1) warning("odds is indetermined");
+ else {
+ if (err & 4) warning("odds is infinite");
+ if (err & 2) warning("odds is zero with no precision");
+ }
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ oddsWNCHypergeo
+ Estimate odds ratio from mean for
+ Wallenius' NonCentral Hypergeometric distribution.
+******************************************************************************/
+// Uses Manly's approximation. precision is ignored.
+REXPORTS SEXP oddsWNCHypergeo(
+SEXP rmu, // Observed mean of x1
+SEXP rm1, // Number of red balls in urn
+SEXP rm2, // Number of white balls in urn
+SEXP rn, // Number of balls drawn from urn
+SEXP rprecision // Precision of calculation
+) {
+ // Check for vectors
+ if (LENGTH(rmu) < 1
+ || LENGTH(rm1) != 1
+ || LENGTH(rm2) != 1
+ || LENGTH(rn) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ double *pmu = REAL(rmu);
+ int m1 = *INTEGER(rm1);
+ int m2 = *INTEGER(rm2);
+ int n = *INTEGER(rn);
+ double prec = *REAL(rprecision);
+ int nres = LENGTH(rmu);
+ int N = m1 + m2; // Total number of balls
+ int i; // Loop counter
+ int err = 0; // Remember any error
+
+ // Check validity of parameters
+ if (nres < 0) error("mu has wrong length");
+ if (m1 < 0 || m2 < 0 || n < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > m1 + m2: Taking more items than there are");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 0.1;
+ if (prec < 0.02) warning ("Cannot obtain high precision");
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocVector(REALSXP, nres));
+ presult = REAL(result);
+
+ // Get xmin and xmax
+ int xmin = m1 + n - N; if (xmin < 0) xmin = 0; // Minimum x
+ int xmax = n; if (xmax > m1) xmax = m1; // Maximum x
+
+ // Loop for all mu inputs
+ for (i = 0; i < nres; i++) {
+ double mu = pmu[i];
+
+ // Check limits
+ if (xmin == xmax) {
+ presult[i] = R_NaN; err |= 1; // Indetermined
+ continue;
+ }
+ if (mu <= double(xmin)) {
+ if (mu == double(xmin)) {
+ presult[i] = 0.; err |= 2; // Zero
+ continue;
+ }
+ presult[i] = R_NaN; err |= 8; // Out of range
+ continue;
+ }
+ if (mu >= double(xmax)) {
+ if (mu == double(xmax)) {
+ presult[i] = R_PosInf; err |= 4; // Infinite
+ continue;
+ }
+ presult[i] = R_NaN; err |= 8; // Out of range
+ continue;
+ }
+
+ // Calculate odds ratio
+ presult[i] = log(1. - mu / m1) / log(1. - (n-mu)/m2);
+ }
+ // Check for errors
+ if (err & 8) error("mu out of range");
+ else if (err & 1) warning("odds is indetermined");
+ else {
+ if (err & 4) warning("odds is infinite");
+ if (err & 2) warning("odds is zero with no precision");
+ }
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ numWNCHypergeo
+ Estimate number of balls of each color from experimental mean for
+ Wallenius' NonCentral Hypergeometric distribution.
+******************************************************************************/
+// Uses Manly's approximation. Precision is ignored.
+/* Calculation method:
+ Manly's approximate equation for the mean is transformed to:
+ log(1-mu1/m1) = omega*(log(1-mu2/(N-m1))
+ This equation is solved by Newton-Raphson iteration
+*/
+REXPORTS SEXP numWNCHypergeo(
+SEXP rmu, // Observed mean of x1
+SEXP rn, // Number of balls drawn from urn
+SEXP rN, // Number of balls in urn before sampling
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision // Precision of calculation
+) {
+ // Check for vectors
+ if (LENGTH(rmu) < 1
+ || LENGTH(rn) != 1
+ || LENGTH(rN) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ double *pmu = REAL(rmu);
+ int n = *INTEGER(rn);
+ int N = *INTEGER(rN);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int nres = LENGTH(rmu);
+ int i; // Loop counter
+ int err = 0; // Remember any error
+
+ // Check validity of parameters
+ if (nres < 0) error("mu has wrong length");
+ if (n < 0 || N < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > N: Taking more items than there are");
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 0.1;
+ if (prec < 0.02) warning ("Cannot obtain high precision");
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ if (nres == 1) {
+ PROTECT(result = allocVector(REALSXP, 2));
+ }
+ else {
+ PROTECT(result = allocMatrix(REALSXP, 2, nres));
+ }
+ presult = REAL(result);
+
+ // Loop for all mu inputs
+ for (i = 0; i < nres; i++, presult += 2) {
+ double mu = pmu[i];
+
+ // Check limits
+ if (n == 0) {
+ presult[0] = presult[1] = R_NaN;
+ err |= 1; // Indetermined
+ continue;
+ }
+ if (odds == 0.) {
+ presult[0] = presult[1] = R_NaN;
+ if (mu == 0.) err |= 1; // Indetermined
+ else err |= 0x10; // Out of range
+ continue;
+ }
+ if (n == N) { // Known exactly
+ presult[0] = mu;
+ presult[1] = N - mu;
+ continue;
+ }
+ if (mu <= 0.) {
+ if (mu == 0.) {
+ presult[0] = 0; presult[1] = N;
+ err |= 2; // Zero
+ continue;
+ }
+ presult[0] = presult[1] = R_NaN;
+ err |= 8; // Out of range
+ continue;
+ }
+ if (mu >= double(n)) {
+ if (mu == double(n)) {
+ presult[0] = N; presult[1] = 0;
+ err |= 4; // Infinite
+ continue;
+ }
+ presult[0] = presult[1] = R_NaN;
+ err |= 8; // Out of range
+ continue;
+ }
+
+ // Calculate m1
+ double z, zd, m1, m2, lastm1, mu2 = n - mu;
+
+ // Initial guess
+ m1 = N * mu / n;
+ m2 = N - m1;
+ int niter = 0;
+
+ // Newton Raphson iteration
+ do {
+ lastm1 = m1;
+ z = log(1. - mu/m1) - odds*log(1. - mu2/m2);
+ zd = mu/(m1*(m1-mu)) + odds*mu2/(m2*(m2-mu2));
+ m1 -= z / zd;
+ if (m1 <= mu) { // out of range
+ m1 = (lastm1 + mu) * 0.5;
+ }
+ m2 = N - m1;
+ if (m2 <= mu2) { // out of range
+ m2 = (N - lastm1 + mu2) * 0.5;
+ m1 = N - m2;
+ }
+ if (++niter > 200) error ("Convergence problem");
+
+ } while (fabs(m1-lastm1) > N * 1E-10);
+
+ presult[0] = m1; presult[1] = N - m1;
+ }
+
+ // Check for errors
+ if (err & 0x08) error("mu out of range");
+ else {
+ if (err & 0x10) warning("Zero odds conflicts with nonzero mean");
+ if (err & 1) warning("odds is indetermined");
+ }
+ //else if (err & 6) warning("result is independent of odds");
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ numFNCHypergeo
+ Estimate number of balls of each color from experimental mean for
+ Fisher's NonCentral Hypergeometric distribution.
+******************************************************************************/
+// Uses Cornfield's approximation. Precision is ignored.
+REXPORTS SEXP numFNCHypergeo(
+SEXP rmu, // Observed mean of x1
+SEXP rn, // Number of balls drawn from urn
+SEXP rN, // Number of balls in urn before sampling
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision // Precision of calculation
+) {
+ // Check for vectors
+ if (LENGTH(rmu) < 1
+ || LENGTH(rn) != 1
+ || LENGTH(rN) != 1
+ || LENGTH(rodds) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+ // Get parameter values
+ double *pmu = REAL(rmu);
+ int n = *INTEGER(rn);
+ int N = *INTEGER(rN);
+ double odds = *REAL(rodds);
+ double prec = *REAL(rprecision);
+ int nres = LENGTH(rmu);
+ int i; // Loop counter
+ int err = 0; // Remember any error
+
+ // Check validity of parameters
+ if (nres < 0) error("mu has wrong length");
+ if (n < 0 || N < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > N: Taking more items than there are");
+ if (!R_FINITE(odds) || odds < 0) error("Invalid value for odds");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 0.1;
+ if (prec < 0.02) warning ("Cannot obtain high precision");
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ if (nres == 1) {
+ PROTECT(result = allocVector(REALSXP, 2));
+ }
+ else {
+ PROTECT(result = allocMatrix(REALSXP, 2, nres));
+ }
+ presult = REAL(result);
+
+ // Loop for all mu inputs
+ for (i = 0; i < nres; i++, presult += 2) {
+ double mu = pmu[i];
+
+ // Check limits
+ if (n == 0) {
+ presult[0] = presult[1] = R_NaN;
+ err |= 1; // Indetermined
+ continue;
+ }
+ if (odds == 0.) {
+ presult[0] = presult[1] = R_NaN;
+ if (mu == 0.) err |= 1; // Indetermined
+ else err |= 0x10; // Out of range
+ continue;
+ }
+ if (n == N) { // Known exactly
+ presult[0] = mu;
+ presult[1] = N - mu;
+ continue;
+ }
+ if (mu <= 0.) {
+ if (mu == 0.) {
+ presult[0] = 0; presult[1] = N;
+ err |= 2; // Zero
+ continue;
+ }
+ presult[0] = presult[1] = R_NaN;
+ err |= 8; // Out of range
+ continue;
+ }
+ if (mu >= double(n)) {
+ if (mu == double(n)) {
+ presult[0] = N; presult[1] = 0;
+ err |= 4; // Infinite
+ continue;
+ }
+ presult[0] = presult[1] = R_NaN;
+ err |= 8; // Out of range
+ continue;
+ }
+
+ // Calculate m1
+ double mu2 = n - mu, mu_o = mu / odds;;
+ double m1 = (mu_o*(N-mu2) + mu*mu2) / (mu_o + mu2);
+ presult[0] = m1; presult[1] = N - m1;
+ }
+
+ // Check for errors
+ if (err & 0x08) error("mu out of range");
+ else {
+ if (err & 0x10) warning("Zero odds conflicts with nonzero mean");
+ if (err & 1) warning("odds is indetermined");
+ }
+ //else if (err & 6) warning("result is independent of odds");
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/***********************************************************************
+ DllMain
+***********************************************************************/
+// Define entry point DllMain if Windows and not Gnu compiler
+#if defined (_WIN32) && ! defined (__GNUC__)
+ extern "C" __declspec(dllexport)
+ int __stdcall DllMain(int, int, void*) {
+ return 1;
+ }
+#endif
diff --git a/src/urn2.cpp b/src/urn2.cpp
new file mode 100644
index 0000000..0283bc5
--- /dev/null
+++ b/src/urn2.cpp
@@ -0,0 +1,1375 @@
+/*************************** urn2.cpp **********************************
+* Author: Agner Fog
+* Date created: 2006
+* Last modified: 2011-08-05
+* Project: BiasedUrn
+* Source URL: www.agner.org/random
+*
+* Description:
+* R interface to multivariate noncentral hypergeometric distributions
+*
+* Copyright 2006-2011 by Agner Fog.
+* GNU General Public License http://www.gnu.org/licenses/gpl.html
+*****************************************************************************/
+
+#include <R.h>
+#include <Rinternals.h>
+#include "stocc.h"
+
+
+/******************************************************************************
+ dMFNCHypergeo
+ Mass function for
+ Multivariate Fisher's NonCentral Hypergeometric distribution
+******************************************************************************/
+REXPORTS SEXP dMFNCHypergeo(
+SEXP rx, // Number of balls drawn of each color, vector or matrix
+SEXP rm, // Number of balls of each color in urn, vector
+SEXP rn, // Number of balls drawn from urn, scalar
+SEXP rodds, // Odds for each color, vector
+SEXP rprecision // Precision of calculation, scalar
+) {
+
+ // Check number of colors
+ int colors = LENGTH(rm);
+ if (colors < 1) error ("Number of colors too small");
+ if (colors > MAXCOLORS) {
+ error ("Number of colors (%i) exceeds maximum (%i).\n"
+ "You may recompile the BiasedUrn package with a bigger value of MAXCOLORS in the file Makevars.",
+ colors, MAXCOLORS);
+ }
+ if (LENGTH(rn) != 1 || LENGTH(rprecision) != 1) error("Parameter n has wrong length");
+ int nres; // Number of results
+ if (isMatrix(rx)) {
+ nres = ncols(rx);
+ if (nrows(rx) != colors) error("matrix x must have one row for each color and one column for each sample");
+ }
+ else {
+ nres = 1;
+ if (LENGTH(rx) != colors) error("Length of vectors x, m, and odds must be the same");
+ }
+
+ // Get parameter values
+ int32 * px = INTEGER(rx);
+ int32 * pm = INTEGER(rm);
+ int n = *INTEGER(rn);
+ double *podds = REAL(rodds);
+ double prec = *REAL(rprecision);
+ int N; // Total number of balls
+ int Nu; // Total number of balls with nonzero odds
+ int i, j; // Loop counter
+ int xsum; // Column sum of x = n
+
+ // Check if odds = 1
+ double OddsOne[MAXCOLORS]; // Used if odds = 1
+ if (LENGTH(rodds) == 1 && *podds == 1.) {
+ // Odds = scalar 1. Set to vector of all 1's
+ for (i = 0; i < colors; i++) OddsOne[i] = 1.;
+ podds = OddsOne;
+ }
+ else {
+ if (LENGTH(rodds) != colors) error("Length of odds vector must match length of m vector");
+ }
+
+ // Get N = sum(m) and check validity of m and odds
+ for (N = Nu = i = 0; i < colors; i++) {
+ int32 m = pm[i];
+ if (m < 0) error("m[%i] < 0", i+1);
+ N += m;
+ if (podds[i]) Nu += m;
+ if ((unsigned int)N > 2000000000) error ("Integer overflow");
+ if (!R_FINITE(podds[i]) || podds[i] < 0) error("Invalid value for odds[%i]", i+1);
+ }
+
+ // Check validity of scalar parameters
+ if (n < 0) error("Negative parameter n");
+ if (n > N) error ("n > sum(m): Taking more items than there are");
+ if (n > Nu) error ("Not enough items with nonzero odds");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocVector(REALSXP, nres));
+ presult = REAL(result);
+
+ // Make object for calculating probabilities
+ CMultiFishersNCHypergeometric mfnc(n, pm, podds, colors, prec);
+
+ // Loop over x inputs
+ for (i = 0; i < nres; i++) {
+ // Calculate x sum and check each x
+ for (xsum = j = 0; j < colors; j++) {
+ xsum += px[j];
+ /* Include this if you want error messages for x < 0 and x > m
+ if (px[j] > pm[j]) {
+ // Error
+ if (nres == 1) error("x[%i] = %i is bigger than m[%i] = %i", j+1, px[j], j+1, pm[j]);
+ else error("x[%i,%i] = %i is bigger than m[%i] = %i", j+1, i+1, px[j], j+1, pm[j]);
+ }
+ else if (px[j] < 0) {
+ if (nres == 1) error("x[%i] = %i is negative", j+1, px[j]);
+ else error("x[%i,%i] = %i is negative", j+1, i+1, px[j]);
+ }
+ */
+ }
+ // Check x sum
+ if (xsum != n) {
+ // Error
+ if (nres == 1) error("sum(x) = %i must be equal to n = %i", xsum, n);
+ else error("sum(x[,%i]) = %i must be equal to n = %i", i+1, xsum, n);
+ }
+
+ // Calculate probability
+ presult[i] = mfnc.probability(px); // Probability
+
+ // Get next column
+ px += colors;
+ }
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ dMWNCHypergeo
+ Mass function for
+ Multivariate Wallenius' NonCentral Hypergeometric distribution
+******************************************************************************/
+REXPORTS SEXP dMWNCHypergeo(
+SEXP rx, // Number of balls drawn of each color, vector or matrix
+SEXP rm, // Number of balls of each color in urn, vector
+SEXP rn, // Number of balls drawn from urn, scalar
+SEXP rodds, // Odds for each color, vector
+SEXP rprecision // Precision of calculation, scalar
+) {
+
+ // Check number of colors
+ int colors = LENGTH(rm);
+ if (colors < 1) error ("Number of colors too small");
+ if (colors > MAXCOLORS) {
+ error ("Number of colors (%i) exceeds maximum (%i).\n"
+ "You may recompile the BiasedUrn package with a bigger value of MAXCOLORS in the file Makevars.",
+ colors, MAXCOLORS);
+ }
+ if (LENGTH(rn) != 1 || LENGTH(rprecision) != 1) error("Parameter n has wrong length");
+ int nres; // Number of results
+ if (isMatrix(rx)) {
+ nres = ncols(rx);
+ if (nrows(rx) != colors) error("matrix x must have one row for each color and one column for each sample");
+ }
+ else {
+ nres = 1;
+ if (LENGTH(rx) != colors) error("Length of vectors x, m, and odds must be the same");
+ }
+
+ // Get parameter values
+ int32 * px = INTEGER(rx);
+ int32 * pm = INTEGER(rm);
+ int n = *INTEGER(rn);
+ double *podds = REAL(rodds);
+ double prec = *REAL(rprecision);
+ int N; // Total number of balls
+ int Nu; // Total number of balls with nonzero odds
+ int i, j; // Loop counter
+ int xsum; // Column sum of x = n
+
+ // Check if odds = 1
+ double OddsOne[MAXCOLORS]; // Used if odds = 1
+ if (LENGTH(rodds) == 1 && *podds == 1.) {
+ // Odds = scalar 1. Set to vector of all 1's
+ for (i = 0; i < colors; i++) OddsOne[i] = 1.;
+ podds = OddsOne;
+ }
+ else {
+ if (LENGTH(rodds) != colors) error("Length of odds vector must match length of m vector");
+ }
+
+ // Get N = sum(m) and check validity of m and odds
+ for (N = Nu = i = 0; i < colors; i++) {
+ int32 m = pm[i];
+ if (m < 0) error("m[%i] < 0", i+1);
+ N += m;
+ if (podds[i]) Nu += m;
+ if ((unsigned int)N > 2000000000) error ("Integer overflow");
+ if (!R_FINITE(podds[i]) || podds[i] < 0) error("Invalid value for odds[%i]", i+1);
+ }
+
+ // Check validity of scalar parameters
+ if (n < 0) error("Negative parameter n");
+ if (n > N) error ("n > sum(m): Taking more items than there are");
+ if (n > Nu) error ("Not enough items with nonzero odds");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocVector(REALSXP, nres));
+ presult = REAL(result);
+
+ // Make object for calculating probabilities
+ CMultiWalleniusNCHypergeometric mwnc(n, pm, podds, colors, prec);
+
+ // Loop over x inputs
+ for (i = 0; i < nres; i++) {
+ // Calculate x sum and check each x
+ for (xsum = j = 0; j < colors; j++) {
+ xsum += px[j];
+ /* Include this if you want error messages for x > m and x < 0
+ if (px[j] > pm[j]) {
+ // Error
+ if (nres == 1) error("x[%i] = %i is bigger than m[%i] = %i", j+1, px[j], j+1, pm[j]);
+ else error("x[%i,%i] = %i is bigger than m[%i] = %i", j+1, i+1, px[j], j+1, pm[j]);
+ }
+ else if (px[j] < 0) {
+ if (nres == 1) error("x[%i] = %i is negative", j+1, px[j]);
+ else error("x[%i,%i] = %i is negative", j+1, i+1, px[j]);
+ }
+ */
+ }
+ // Check x sum
+ if (xsum != n) {
+ // Error
+ if (nres == 1) error("sum(x) = %i must be equal to n = %i", xsum, n);
+ else error("sum(x[,%i]) = %i must be equal to n = %i", i+1, xsum, n);
+ }
+
+ // Calculate probability
+ presult[i] = mwnc.probability(px); // Probability
+
+ // Get next column
+ px += colors;
+ }
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ rMFNCHypergeo
+ Random variate generation function for
+ Multivariate Fisher's NonCentral Hypergeometric distribution
+******************************************************************************/
+REXPORTS SEXP rMFNCHypergeo(
+SEXP rnran, // Number of random variates desired, scalar
+SEXP rm, // Number of balls of each color in urn, vector
+SEXP rn, // Number of balls drawn from urn, scalar
+SEXP rodds, // Odds for each color, vector
+SEXP rprecision // Precision of calculation, scalar
+) {
+
+ // Check number of colors
+ int colors = LENGTH(rm);
+ if (colors < 1) error ("Number of colors too small");
+ if (colors > MAXCOLORS) {
+ error ("Number of colors (%i) exceeds maximum (%i).\n"
+ "You may recompile the BiasedUrn package with a bigger value of MAXCOLORS in the file Makevars.",
+ colors, MAXCOLORS);
+ }
+ if (LENGTH(rn) != 1) error("Parameter n has wrong length");
+ if (LENGTH(rprecision) != 1) error("Parameter precision has wrong length");
+
+ // Get parameter values
+ int nran = *INTEGER(rnran); if (LENGTH(rnran) > 1) nran = LENGTH(rnran);
+ int32 * pm = INTEGER(rm);
+ int n = *INTEGER(rn);
+ double *podds = REAL(rodds);
+ double prec = *REAL(rprecision);
+ int i; // Loop counter
+ int N; // Total number of balls
+ int Nu; // Total number of balls with nonzero odds
+
+ // Check validity of scalar parameters
+ if (n < 0) error("Negative parameter n");
+ if (nran <= 0) error("Parameter nran must be positive");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Check if odds = 1
+ double OddsOne[MAXCOLORS]; // Used if odds = 1
+ if (LENGTH(rodds) == 1 && *podds == 1.) {
+ // Odds = scalar 1. Set to vector of all 1's
+ for (i = 0; i < colors; i++) OddsOne[i] = 1.;
+ podds = OddsOne;
+ }
+ else {
+ if (LENGTH(rodds) != colors) error("Length of odds vector must match length of m vector");
+ }
+
+ // Get N = sum(m) and check validity of m and odds
+ for (N = Nu = i = 0; i < colors; i++) {
+ int32 m = pm[i];
+ if (m < 0) error("m[%i] < 0", i+1);
+ N += m;
+ if (podds[i]) Nu += m;
+ if ((unsigned int)N > 2000000000) error ("Integer overflow");
+ if (!R_FINITE(podds[i]) || podds[i] < 0) error("Invalid value for odds[%i]", i+1);
+ }
+ if (n > N) error ("n > sum(m): Taking more items than there are");
+ if (n > Nu) error ("Not enough items with nonzero odds");
+
+ // Allocate result vector
+ SEXP result; int * presult;
+ if (nran <= 1) { // One result. Make vector
+ PROTECT(result = allocVector(INTSXP, colors));
+ }
+ else { // Multiple results. Make matrix
+ PROTECT(result = allocMatrix(INTSXP, colors, nran));
+ }
+
+ presult = INTEGER(result);
+
+ // Make object for generating variates
+ StochasticLib3 sto(0); // Seed is not used
+ sto.SetAccuracy(prec); // Set precision
+ sto.InitRan(); // Initialize RNG in R.dll
+
+ // Generate variates one by one
+ for (i = 0; i < nran; i++) {
+ sto.MultiFishersNCHyp(presult, pm, podds, n, colors); // Generate variate
+ presult += colors; // Point to next column in matrix
+ }
+
+ sto.EndRan(); // Return RNG state to R.dll
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ rMWNCHypergeo
+ Random variate generation function for
+ Multivariate Wallenius' NonCentral Hypergeometric distribution
+******************************************************************************/
+REXPORTS SEXP rMWNCHypergeo(
+SEXP rnran, // Number of random variates desired, scalar
+SEXP rm, // Number of balls of each color in urn, vector
+SEXP rn, // Number of balls drawn from urn, scalar
+SEXP rodds, // Odds for each color, vector
+SEXP rprecision // Precision of calculation, scalar
+) {
+
+ // Check number of colors
+ int colors = LENGTH(rm);
+ if (colors < 1) error ("Number of colors too small");
+ if (colors > MAXCOLORS) {
+ error ("Number of colors (%i) exceeds maximum (%i).\n"
+ "You may recompile the BiasedUrn package with a bigger value of MAXCOLORS in the file Makevars.",
+ colors, MAXCOLORS);
+ }
+ if (LENGTH(rn) != 1) error("Parameter n has wrong length");
+ if (LENGTH(rprecision) != 1) error("Parameter precision has wrong length");
+
+ // Get parameter values
+ int nran = *INTEGER(rnran); if (LENGTH(rnran) > 1) nran = LENGTH(rnran);
+ int32 * pm = INTEGER(rm);
+ int n = *INTEGER(rn);
+ double *podds = REAL(rodds);
+ double prec = *REAL(rprecision);
+ int i; // Loop counter
+ int N; // Total number of balls
+ int Nu; // Total number of balls with nonzero odds
+
+ // Check validity of scalar parameters
+ if (n < 0) error("Negative parameter n");
+ if (nran <= 0) error("Parameter nran must be positive");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 1E-7;
+
+ // Check if odds = 1
+ double OddsOne[MAXCOLORS]; // Used if odds = 1
+ if (LENGTH(rodds) == 1 && *podds == 1.) {
+ // Odds = scalar 1. Set to vector of all 1's
+ for (i = 0; i < colors; i++) OddsOne[i] = 1.;
+ podds = OddsOne;
+ }
+ else {
+ if (LENGTH(rodds) != colors) error("Length of odds vector must match length of m vector");
+ }
+
+ // Get N = sum(m) and check validity of m and odds
+ for (N = Nu = i = 0; i < colors; i++) {
+ int32 m = pm[i];
+ if (m < 0) error("m[%i] < 0", i+1);
+ N += m;
+ if (podds[i]) Nu += m;
+ if ((unsigned int)N > 2000000000) error ("Integer overflow");
+ if (!R_FINITE(podds[i]) || podds[i] < 0) error("Invalid value for odds[%i]", i+1);
+ }
+ if (n > N) error ("n > sum(m): Taking more items than there are");
+ if (n > Nu) error ("Not enough items with nonzero odds");
+
+ // Allocate result vector
+ SEXP result; int * presult;
+ if (nran <= 1) { // One result. Make vector
+ PROTECT(result = allocVector(INTSXP, colors));
+ }
+ else { // Multiple results. Make matrix
+ PROTECT(result = allocMatrix(INTSXP, colors, nran));
+ }
+
+ presult = INTEGER(result);
+
+ // Make object for generating variates
+ StochasticLib3 sto(0); // Seed is not used
+ sto.SetAccuracy(prec); // Set precision
+ sto.InitRan(); // Initialize RNG in R.dll
+
+ // Generate variates one by one
+ for (i = 0; i < nran; i++) {
+ sto.MultiWalleniusNCHyp(presult, pm, podds, n, colors); // Generate variate
+ presult += colors; // Point to next column in matrix
+ }
+
+ sto.EndRan(); // Return RNG state to R.dll
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ momentsMFNCHypergeo
+ Calculates the mean and variance of the
+ Multivariate Fisher's NonCentral Hypergeometric distribution
+******************************************************************************/
+REXPORTS SEXP momentsMFNCHypergeo(
+SEXP rm, // Number of balls of each color in urn, vector
+SEXP rn, // Number of balls drawn from urn, scalar
+SEXP rodds, // Odds for each color, vector
+SEXP rprecision // Precision of calculation, scalar
+) {
+
+ // Check number of colors
+ int colors = LENGTH(rm);
+ if (colors < 1) error ("Number of colors too small");
+ if (colors > MAXCOLORS) {
+ error ("Number of colors (%i) exceeds maximum (%i).\n"
+ "You may recompile the BiasedUrn package with a bigger value of MAXCOLORS in the file Makevars.",
+ colors, MAXCOLORS);
+ }
+ if (LENGTH(rn) != 1) error("Parameter n has wrong length");
+ if (LENGTH(rprecision) != 1) error("Parameter precision has wrong length");
+
+ // Get parameter values
+ int32 * pm = INTEGER(rm);
+ int n = *INTEGER(rn);
+ double *podds = REAL(rodds);
+ double prec = *REAL(rprecision);
+ int i; // Loop counter
+ int N; // Total number of balls
+ int Nu; // Total number of balls with nonzero odds
+
+ // Check validity of scalar parameters
+ if (n < 0) error("Negative parameter n");
+ if (!R_FINITE(prec) || prec < 0) prec = 1;
+
+ // Check if odds = 1
+ double OddsOne[MAXCOLORS]; // Used if odds = 1
+ if (LENGTH(rodds) == 1 && *podds == 1.) {
+ // Odds = scalar 1. Set to vector of all 1's
+ for (i = 0; i < colors; i++) OddsOne[i] = 1.;
+ podds = OddsOne;
+ }
+ else {
+ if (LENGTH(rodds) != colors) error("Length of odds vector must match length of m vector");
+ }
+
+ // Get N = sum(m) and check validity of m and odds
+ for (N = Nu = i = 0; i < colors; i++) {
+ int32 m = pm[i];
+ if (m < 0) error("m[%i] < 0", i+1);
+ N += m;
+ if (podds[i]) Nu += m;
+ if ((unsigned int)N > 2000000000) error ("Integer overflow");
+ if (!R_FINITE(podds[i]) || podds[i] < 0) error("Invalid value for odds[%i]", i+1);
+ }
+ if (n > N) error ("n > sum(m): Taking more items than there are");
+ if (n > Nu) error ("Not enough items with nonzero odds");
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocMatrix(REALSXP, colors, 2));
+
+ presult = REAL(result);
+
+ // Make object for calculating mean and variance
+ CMultiFishersNCHypergeometric mfnc(n, pm, podds, colors, prec);
+
+ if (prec >= 0.1) {
+ // use approximate calculation methods
+ mfnc.variance(presult + colors, presult);
+ }
+ else {
+ // use exact calculation
+ mfnc.moments(presult, presult + colors);
+ }
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ momentsMWNCHypergeo
+ Calculates the mean and variance of the
+ Multivariate Wallenius' NonCentral Hypergeometric distribution
+******************************************************************************/
+REXPORTS SEXP momentsMWNCHypergeo(
+SEXP rm, // Number of balls of each color in urn, vector
+SEXP rn, // Number of balls drawn from urn, scalar
+SEXP rodds, // Odds for each color, vector
+SEXP rprecision // Precision of calculation, scalar
+) {
+
+ // Check number of colors
+ int colors = LENGTH(rm);
+ if (colors < 1) error ("Number of colors too small");
+ if (colors > MAXCOLORS) {
+ error ("Number of colors (%i) exceeds maximum (%i).\n"
+ "You may recompile the BiasedUrn package with a bigger value of MAXCOLORS in the file Makevars.",
+ colors, MAXCOLORS);
+ }
+ if (LENGTH(rn) != 1) error("Parameter n has wrong length");
+ if (LENGTH(rprecision) != 1) error("Parameter precision has wrong length");
+
+ // Get parameter values
+ int32 * pm = INTEGER(rm);
+ int n = *INTEGER(rn);
+ double *podds = REAL(rodds);
+ double prec = *REAL(rprecision);
+ int i; // Loop counter
+ int N; // Total number of balls
+ int Nu; // Total number of balls with nonzero odds
+
+ // Check validity of scalar parameters
+ if (n < 0) error("Negative parameter n");
+ if (!R_FINITE(prec) || prec < 0) prec = 1;
+
+ // Check if odds = 1
+ double OddsOne[MAXCOLORS]; // Used if odds = 1
+ if (LENGTH(rodds) == 1 && *podds == 1.) {
+ // Odds = scalar 1. Set to vector of all 1's
+ for (i = 0; i < colors; i++) OddsOne[i] = 1.;
+ podds = OddsOne;
+ }
+ else {
+ if (LENGTH(rodds) != colors) error("Length of odds vector must match length of m vector");
+ }
+
+ // Get N = sum(m) and check validity of m and odds
+ for (N = Nu = i = 0; i < colors; i++) {
+ int32 m = pm[i];
+ if (m < 0) error("m[%i] < 0", i+1);
+ N += m;
+ if (podds[i]) Nu += m;
+ if ((unsigned int)N > 2000000000) error ("Integer overflow");
+ if (!R_FINITE(podds[i]) || podds[i] < 0) error("Invalid value for odds[%i]", i+1);
+ }
+ if (n > N) error ("n > sum(m): Taking more items than there are");
+ if (n > Nu) error ("Not enough items with nonzero odds");
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ PROTECT(result = allocMatrix(REALSXP, colors, 2));
+
+ presult = REAL(result);
+
+ // Make object for calculating mean and variance
+ CMultiWalleniusNCHypergeometricMoments mwnc(n, pm, podds, colors, prec);
+
+ if (prec >= 0.1) {
+ // use approximate calculation methods
+ mwnc.variance(presult + colors, presult);
+ }
+ else {
+ // use exact calculation
+ mwnc.moments(presult, presult + colors);
+ }
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ oddsMFNCHypergeo
+ Estimate odds ratio from mean for the
+ Multivariate Fisher's NonCentral Hypergeometric distribution
+******************************************************************************/
+// Uses the multivariate extension of Cornfield's approximation.
+// Precision is ignored
+REXPORTS SEXP oddsMFNCHypergeo(
+SEXP rmu, // Number of balls drawn of each color, vector or matrix
+SEXP rm, // Number of balls of each color in urn, vector
+SEXP rn, // Number of balls drawn from urn, scalar
+SEXP rprecision // Precision of calculation, scalar
+) {
+ // Check number of colors
+ int colors = LENGTH(rm);
+ if (colors < 1) error ("Number of colors too small");
+ if (colors > MAXCOLORS) {
+ error ("Number of colors (%i) exceeds maximum (%i).\n"
+ "You may recompile the BiasedUrn package with a bigger value of MAXCOLORS in the file Makevars.",
+ colors, MAXCOLORS);
+ }
+ int nres; // Number of results
+ if (isMatrix(rmu)) {
+ nres = ncols(rmu);
+ if (nrows(rmu) != colors) error("matrix mu must have one row for each color and one column for each sample");
+ }
+ else {
+ nres = 1;
+ if (LENGTH(rmu) != colors) error("Length of vectors mu and m must be the same");
+ }
+
+ // Get parameter values
+ double *pmu = REAL(rmu);
+ int32 * pm = INTEGER(rm);
+ int n = *INTEGER(rn);
+ double prec = *REAL(rprecision);
+ int N; // Total number of balls
+ int i, j; // Loop counter
+ int x1, x2; // x limits
+ int c0; // Reference color
+ double xd0, xd1, xd2; // Used for searching for reference color
+ double mu; // Mean
+ double sum_mu = 0.; // Sum of means
+ int err = 0; // Warning and error messages
+
+ // Get N = sum(m) and check validity of m and odds
+ for (N = i = 0; i < colors; i++) {
+ int32 m = pm[i];
+ if (m < 0) error("m[%i] < 0", i+1);
+ N += m;
+ if ((unsigned int)N > 2000000000) error ("Integer overflow");
+ sum_mu += pmu[i];
+ }
+ if (n > 0 && fabs(sum_mu-n)/n > 0.1) {
+ err |= 0x100; // sum of means should be equal to n
+ }
+
+ // Check validity of scalar parameters
+ if (n < 0) error("Negative parameter n");
+ if (n > N) error ("n > sum(m): Taking more items than there are");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 0.1;
+ if (prec < 0.05) warning ("Cannot obtain high precision");
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ if (nres == 1) {
+ PROTECT(result = allocVector(REALSXP, colors));
+ }
+ else {
+ PROTECT(result = allocMatrix(REALSXP, colors, nres));
+ }
+ presult = REAL(result);
+
+ // Loop over x inputs
+ for (i = 0; i < nres; i++) {
+
+ // Find the color with the highest variance to use as reference
+ for (xd0 = 0., j = c0 = 0; j < colors; j++) {
+ // Get limits for x[j]
+ x1 = pm[j] + n - N; if (x1 < 0) x1 = 0;
+ x2 = n; if (x2 > pm[j]) x2 = pm[j];
+ // Find max distance of mu from limits
+ xd1 = pmu[j] - x1; xd2 = x2 - pmu[j];
+ if (xd1 > xd2) xd1 = xd2;
+ if (xd1 > xd0) {xd0 = xd1; c0 = j;}
+ }
+ if (xd0 == 0.) {
+ // All odds are indetermined
+ err |= 0x10;
+ for (j = 0; j < colors; j++) presult[j] = R_NaN;
+ }
+ else {
+ // Use color c0 as reference
+ presult[c0] = 1.;
+
+ // Get odds for all colors except c0
+ for (j = 0; j < colors; j++) {
+ if (j != c0) {
+
+ // Get limits for x[j]
+ x1 = pm[j] + n - N; if (x1 < 0) x1 = 0;
+ x2 = n; if (x2 > pm[j]) x2 = pm[j];
+ mu = pmu[j];
+
+ // Check limits
+ if (x1 == x2) {
+ presult[j] = R_NaN; err |= 1; // Indetermined
+ continue;
+ }
+ if (mu <= double(x1)) {
+ if (mu == double(x1)) {
+ presult[j] = 0.; err |= 2; // Zero
+ continue;
+ }
+ presult[j] = R_NaN; err |= 8; // Out of range
+ continue;
+ }
+ if (mu >= double(x2)) {
+ if (mu == double(x2)) {
+ presult[j] = R_PosInf; err |= 4; // Infinite
+ continue;
+ }
+ presult[j] = R_NaN; err |= 8; // Out of range
+ continue;
+ }
+
+ // Calculate odds relative to c0
+ presult[j] = pmu[j] * (pm[c0] - pmu[c0]) / (pmu[c0] * (pm[j] - pmu[j]));
+ }
+ }
+ }
+ presult += colors; pmu += colors;
+ }
+
+ // Check for errors
+ if (err & 0x10) warning("All odds are indetermined");
+ else if (err & 8) error("mu out of range");
+ else if (err & 1) warning("odds is indetermined");
+ else {
+ if (err & 4) warning("odds is infinite");
+ if (err & 2) warning("odds is zero with no precision");
+ }
+ if (err & 0x100) warning("Sum of means should be equal to n");
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ oddsMWNCHypergeo
+ Estimate odds ratio from mean for the
+ Multivariate Wallenius' NonCentral Hypergeometric distribution
+******************************************************************************/
+// Uses Manly's approximation. Precision is ignored
+REXPORTS SEXP oddsMWNCHypergeo(
+SEXP rmu, // Number of balls drawn of each color, vector or matrix
+SEXP rm, // Number of balls of each color in urn, vector
+SEXP rn, // Number of balls drawn from urn, scalar
+SEXP rprecision // Precision of calculation, scalar
+) {
+ // Check number of colors
+ int colors = LENGTH(rm);
+ if (colors < 1) error ("Number of colors too small");
+ if (colors > MAXCOLORS) {
+ error ("Number of colors (%i) exceeds maximum (%i).\n"
+ "You may recompile the BiasedUrn package with a bigger value of MAXCOLORS in the file Makevars.",
+ colors, MAXCOLORS);
+ }
+ int nres; // Number of results
+ if (isMatrix(rmu)) {
+ nres = ncols(rmu);
+ if (nrows(rmu) != colors) error("matrix mu must have one row for each color and one column for each sample");
+ }
+ else {
+ nres = 1;
+ if (LENGTH(rmu) != colors) error("Length of vectors mu and m must be the same");
+ }
+
+ // Get parameter values
+ double *pmu = REAL(rmu);
+ int32 * pm = INTEGER(rm);
+ int n = *INTEGER(rn);
+ double prec = *REAL(rprecision);
+ int N; // Total number of balls
+ int i, j; // Loop counter
+ int x1, x2; // x limits
+ int c0; // Reference color
+ double xd0, xd1, xd2; // Used for searching for reference color
+ double mu; // Mean
+ double sum_mu = 0.; // Sum of means
+ int err = 0; // Warning and error messages
+
+ // Get N = sum(m) and check validity of m and odds
+ for (N = i = 0; i < colors; i++) {
+ int32 m = pm[i];
+ if (m < 0) error("m[%i] < 0", i+1);
+ N += m;
+ if ((unsigned int)N > 2000000000) error ("Integer overflow");
+ sum_mu += pmu[i];
+ }
+ if (n > 0 && fabs(sum_mu-n)/n > 0.1) {
+ err |= 0x100; // sum of means should be equal to n
+ }
+
+ // Check validity of scalar parameters
+ if (n < 0) error("Negative parameter n");
+ if (n > N) error ("n > sum(m): Taking more items than there are");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 0.1;
+ if (prec < 0.02) warning ("Cannot obtain high precision");
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ if (nres == 1) {
+ PROTECT(result = allocVector(REALSXP, colors));
+ }
+ else {
+ PROTECT(result = allocMatrix(REALSXP, colors, nres));
+ }
+ presult = REAL(result);
+
+ // Loop over x inputs
+ for (i = 0; i < nres; i++) {
+
+ // Find the color with the highest variance to use as reference
+ for (xd0 = 0., j = c0 = 0; j < colors; j++) {
+ // Get limits for x[j]
+ x1 = pm[j] + n - N; if (x1 < 0) x1 = 0;
+ x2 = n; if (x2 > pm[j]) x2 = pm[j];
+ // Find max distance of mu from limits
+ xd1 = pmu[j] - x1; xd2 = x2 - pmu[j];
+ if (xd1 > xd2) xd1 = xd2;
+ if (xd1 > xd0) {xd0 = xd1; c0 = j;}
+ }
+ if (xd0 == 0.) {
+ // All odds are indetermined
+ err |= 0x10;
+ for (j = 0; j < colors; j++) presult[j] = R_NaN;
+ }
+ else {
+ // Use color c0 as reference
+ presult[c0] = 1.;
+
+ // Get odds for all colors except c0
+ for (j = 0; j < colors; j++) {
+ if (j != c0) {
+
+ // Get limits for x[j]
+ x1 = pm[j] + n - N; if (x1 < 0) x1 = 0;
+ x2 = n; if (x2 > pm[j]) x2 = pm[j];
+ mu = pmu[j];
+
+ // Check limits
+ if (x1 == x2) {
+ presult[j] = R_NaN; err |= 1; // Indetermined
+ continue;
+ }
+ if (mu <= double(x1)) {
+ if (mu == double(x1)) {
+ presult[j] = 0.; err |= 2; // Zero
+ continue;
+ }
+ presult[j] = R_NaN; err |= 8; // Out of range
+ continue;
+ }
+ if (mu >= double(x2)) {
+ if (mu == double(x2)) {
+ presult[j] = R_PosInf; err |= 4; // Infinite
+ continue;
+ }
+ presult[j] = R_NaN; err |= 8; // Out of range
+ continue;
+ }
+
+ // Calculate odds relative to c0
+ presult[j] = log(1. - pmu[j] / pm[j]) / log(1. - pmu[c0] / pm[c0]);
+ }
+ }
+ }
+ presult += colors; pmu += colors;
+ }
+
+ // Check for errors
+ if (err & 0x10) warning("All odds are indetermined");
+ else if (err & 8) error("mu out of range");
+ else if (err & 1) warning("odds is indetermined");
+ else {
+ if (err & 4) warning("odds is infinite");
+ if (err & 2) warning("odds is zero with no precision");
+ }
+ if (err & 0x100) warning("Sum of means should be equal to n");
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ numMFNCHypergeo
+ Estimate number of balls of each color from experimental mean for
+ Multivariate Fisher's NonCentral Hypergeometric distribution
+******************************************************************************/
+// Uses Cornfield's approximation. Precision is ignored.
+// Calculation method: Solves the multivariate Cornfield's equation by
+// Newton Raphson iteration with r as independent parameter.
+REXPORTS SEXP numMFNCHypergeo(
+SEXP rmu, // Observed mean of x1
+SEXP rn, // Number of balls drawn from urn
+SEXP rN, // Number of balls in urn before sampling
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision // Precision of calculation
+) {
+ int nres; // Number of results
+ int colors; // Number of colors
+
+ // Check for vectors
+ if (LENGTH(rn) != 1
+ || LENGTH(rN) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+
+ // Check mu matrix size
+ if (isMatrix(rmu)) {
+ nres = ncols(rmu);
+ colors = nrows(rmu);
+ }
+ else {
+ nres = 1;
+ colors = LENGTH(rmu);
+ }
+
+ // Check number of colors
+ if (colors < 1) error ("Number of colors too small");
+ if (colors > MAXCOLORS) {
+ error ("Number of colors (%i) exceeds maximum (%i).\n"
+ "You may recompile the BiasedUrn package with a bigger value of MAXCOLORS in the file Makevars.",
+ colors, MAXCOLORS);
+ }
+
+ // Get parameter values
+ double *pmu = REAL(rmu);
+ int n = *INTEGER(rn);
+ int N = *INTEGER(rN);
+ double *podds = REAL(rodds);
+ double prec = *REAL(rprecision);
+
+ int i, j; // Loop counter
+ int err, err1 = 0; // Remember any error
+ int cu = 0; // Number of colors with nonzero odds
+ double smu; // Sum of means, reciprocal.
+ double mu[MAXCOLORS]; // Normalized means
+
+ // Check if odds = 1
+ double OddsOne[MAXCOLORS]; // Used if odds = 1
+ if (LENGTH(rodds) == 1 && *podds == 1.) {
+ // Odds = scalar 1. Set to vector of all 1's
+ for (i = 0; i < colors; i++) OddsOne[i] = 1.;
+ podds = OddsOne;
+ }
+ else {
+ if (LENGTH(rodds) != colors) {
+ // Size mismatch
+ if (isMatrix(rmu)) {
+ error("matrix mu must have one row for each color and one column for each sample");
+ }
+ else {
+ error("Length of vectors mu and odds must be the same");
+ }
+ }
+ }
+
+ // Check validity of parameters
+ if (n < 0 || N < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > N: Taking more items than there are");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 0.1;
+ if (prec < 0.05) warning ("Cannot obtain high precision");
+
+ // Check validity of odds
+ for (i = cu = 0; i < colors; i++) {
+ if (!R_FINITE(podds[i]) || podds[i] < 0) error("Invalid value for odds[%i]", i+1);
+ if (podds[i] > 0) cu++;
+ }
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ if (nres == 1) {
+ PROTECT(result = allocVector(REALSXP, colors));
+ }
+ else {
+ PROTECT(result = allocMatrix(REALSXP, colors, nres));
+ }
+ presult = REAL(result);
+
+ // Loop for all mu inputs
+ for (j = 0; j < nres; j++, presult += colors, pmu += colors) {
+ err = 0;
+
+ // Make results NAN in case of error exits below
+ for (i = 0; i < colors; i++) presult[i] = R_NaN;
+
+ // Check limits
+ if (n == 0) {
+ err1 |= 1; // Indetermined
+ continue;
+ }
+
+ // Check sum of mu must equal n
+ for (i = 0, smu = 0.; i < colors; i++) smu += pmu[i];
+ if (smu <= 0.) {
+ err1 |= 0x800; // Sum of means must be positive
+ break;
+ }
+ if (fabs(smu - n) > 0.02 * n) {
+ err |= 0x100; // Warning: sum not approx. equal to n
+ }
+ smu = n / smu;
+ for (i = 0; i < colors; i++) {
+ mu[i] = pmu[i] * smu; // Normalize mu
+ }
+
+ // More parameter checks
+ if (n == N) { // Results known exactly
+ for (i = 0; i < colors; i++) {
+ if (podds[i] == 0 && mu[i] != 0) {
+ err1 |= 0x10; // Out of range
+ }
+ else {
+ presult[i] = mu[i];
+ }
+ }
+ continue;
+ }
+
+ // Check odds
+ if (cu < colors || colors < 2) {
+ for (i = 0; i < colors; i++) {
+ if (podds[i] == 0) {
+ if (mu[i] != 0) err1 |= 0x10; // Out of range
+ else err1 |= 1; // Indetermined
+ }
+ else {
+ if (cu == 1) presult[i] = N; // Known exactly
+ }
+ }
+ continue;
+ }
+
+ // check mu within bounds
+ for (i = 0; i < colors; i++) {
+ if (mu[i] <= 0.) {
+ if (mu[i] == 0.) {
+ presult[i] = 0;
+ err |= 2; // Zero
+ }
+ else {
+ err |= 8; // Out of range
+ }
+ }
+ if (mu[i] >= double(n)) {
+ if (mu[i] == double(n)) {
+ presult[i] = N;
+ err |= 4;
+ }
+ else {
+ err |= 8; // Out of range
+ }
+ }
+ }
+
+ if (err & 0x18) {
+ // Results invalid
+ err1 |= err;
+ break;
+ }
+
+ // Calculate m[]
+ double z; // Newton Raphson function value
+ double zd; // Newton Raphson derivative of z
+ double r, lastr; // Independent parameter in Newton Raphson iteration
+ int niter = 0; // Number of iterations
+
+ // Initial guess
+ r = 1.;
+
+ // Newton Raphson iteration
+ do {
+ lastr = r;
+ // Calculate z and zd
+ z = zd = 0.;
+ for (i = 0; i < colors; i++) {
+ z += mu[i] * (1. + 1./(r*podds[i]));
+ zd -= mu[i] / (podds[i]*r*r);
+ }
+ r -= (z - N) / zd;
+ if (r <= 0.) {
+ // r must be positive. Get r within range
+ if (r < -lastr) {
+ r = lastr * 0.125;
+ }
+ else {
+ r = lastr * 0.5;
+ }
+ }
+ if (++niter > 200) error ("Convergence problem");
+
+ } while (fabs(r-lastr) > r * 1E-8);
+
+ // Get results from r
+ for (i = 0; i < colors; i++) {
+ presult[i] = mu[i] * (r*podds[i] + 1.) / (r*podds[i]);
+ }
+ err1 |= err;
+ }
+
+ // Check for errors
+ if (err1 & 0x808) error("Mean is out of range");
+ else {
+ if (err1 & 0x010) warning("Zero odds conflicts with nonzero mean");
+ if (err1 & 1) warning("Number of items is indetermined");
+ if (err1 & 0x100) warning("Sum of means is not equal to n");
+ }
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
+
+
+/******************************************************************************
+ numMWNCHypergeo
+ Estimate number of balls of each color from experimental mean for
+ Multivariate Wallenius' NonCentral Hypergeometric distribution
+******************************************************************************/
+// Uses Manly's approximation. Precision is ignored.
+// Calculation method: Solves Manly's equation by
+// Newton Raphson iteration with theta as independent parameter.
+REXPORTS SEXP numMWNCHypergeo(
+SEXP rmu, // Observed mean of x1
+SEXP rn, // Number of balls drawn from urn
+SEXP rN, // Number of balls in urn before sampling
+SEXP rodds, // Odds of getting a red ball among one red and one white
+SEXP rprecision // Precision of calculation
+) {
+ int nres; // Number of results
+ int colors; // Number of colors
+
+ // Check for vectors
+ if (LENGTH(rn) != 1
+ || LENGTH(rN) != 1
+ || LENGTH(rprecision) != 1
+ ) {
+ error("Parameter has wrong length");
+ }
+
+ // Check mu matrix size
+ if (isMatrix(rmu)) {
+ nres = ncols(rmu);
+ colors = nrows(rmu);
+ }
+ else {
+ nres = 1;
+ colors = LENGTH(rmu);
+ }
+
+ // Check number of colors
+ if (colors < 1) error ("Number of colors too small");
+ if (colors > MAXCOLORS) {
+ error ("Number of colors (%i) exceeds maximum (%i).\n"
+ "You may recompile the BiasedUrn package with a bigger value of MAXCOLORS in the file Makevars.",
+ colors, MAXCOLORS);
+ }
+
+ // Get parameter values
+ double *pmu = REAL(rmu);
+ int n = *INTEGER(rn);
+ int N = *INTEGER(rN);
+ double *podds = REAL(rodds);
+ double prec = *REAL(rprecision);
+
+ int i, j; // Loop counter
+ int err, err1 = 0; // Remember any error
+ int cu = 0; // Number of colors with nonzero odds
+ double smu; // Sum of means, reciprocal.
+ double mu[MAXCOLORS]; // Normalized means
+
+ // Check if odds = 1
+ double OddsOne[MAXCOLORS]; // Used if odds = 1
+ if (LENGTH(rodds) == 1 && *podds == 1.) {
+ // Odds = scalar 1. Set to vector of all 1's
+ for (i = 0; i < colors; i++) OddsOne[i] = 1.;
+ podds = OddsOne;
+ }
+ else {
+ if (LENGTH(rodds) != colors) {
+ // Size mismatch
+ if (isMatrix(rmu)) {
+ error("matrix mu must have one row for each color and one column for each sample");
+ }
+ else {
+ error("Length of vectors mu and odds must be the same");
+ }
+ }
+ }
+
+ // Check validity of parameters
+ if (n < 0 || N < 0) error("Negative parameter");
+ if ((unsigned int)N > 2000000000) error("Overflow");
+ if (n > N) error ("n > N: Taking more items than there are");
+ if (!R_FINITE(prec) || prec < 0 || prec > 1) prec = 0.1;
+ if (prec < 0.02) warning ("Cannot obtain high precision");
+
+ // Check validity of odds
+ for (i = cu = 0; i < colors; i++) {
+ if (!R_FINITE(podds[i]) || podds[i] < 0) error("Invalid value for odds[%i]", i+1);
+ if (podds[i] > 0) cu++;
+ }
+
+ // Allocate result vector
+ SEXP result; double * presult;
+ if (nres == 1) {
+ PROTECT(result = allocVector(REALSXP, colors));
+ }
+ else {
+ PROTECT(result = allocMatrix(REALSXP, colors, nres));
+ }
+ presult = REAL(result);
+
+ // Loop for all mu inputs
+ for (j = 0; j < nres; j++, presult += colors, pmu += colors) {
+ err = 0;
+
+ // Make results NAN in case of error exits below
+ for (i = 0; i < colors; i++) presult[i] = R_NaN;
+
+ // Check limits
+ if (n == 0) {
+ err1 |= 1; // Indetermined
+ continue;
+ }
+
+ // Check sum of mu must equal n
+ for (i = 0, smu = 0.; i < colors; i++) smu += pmu[i];
+ if (smu <= 0.) {
+ err1 |= 0x800; // Sum of means must be positive
+ break;
+ }
+ if (fabs(smu - n) > 0.02 * n) {
+ err |= 0x100; // Warning: sum not approx. equal to n
+ }
+ smu = n / smu;
+ for (i = 0; i < colors; i++) {
+ mu[i] = pmu[i] * smu; // Normalize mu
+ }
+
+ // More parameter checks
+ if (n == N) { // Results known exactly
+ for (i = 0; i < colors; i++) {
+ if (podds[i] == 0 && mu[i] != 0) {
+ err1 |= 0x10; // Out of range
+ }
+ else {
+ presult[i] = mu[i];
+ }
+ }
+ continue;
+ }
+
+ // Check odds
+ if (cu < colors || colors < 2) {
+ for (i = 0; i < colors; i++) {
+ if (podds[i] == 0) {
+ if (mu[i] != 0) err1 |= 0x10; // Out of range
+ else err1 |= 1; // Indetermined
+ }
+ else {
+ if (cu == 1) presult[i] = N; // Known exactly
+ }
+ }
+ continue;
+ }
+
+ // check mu within bounds
+ for (i = 0; i < colors; i++) {
+ if (mu[i] <= 0.) {
+ if (mu[i] == 0.) {
+ presult[i] = 0;
+ err |= 2; // Zero
+ }
+ else {
+ err |= 8; // Out of range
+ }
+ }
+ if (mu[i] >= double(n)) {
+ if (mu[i] == double(n)) {
+ presult[i] = N;
+ err |= 4;
+ }
+ else {
+ err |= 8; // Out of range
+ }
+ }
+ }
+
+ if (err & 0x18) {
+ // Results invalid
+ err1 |= err;
+ break;
+ }
+
+ // Calculate m[]
+ double z; // Newton Raphson function value
+ double zd; // Newton Raphson derivative of z
+ double t, lastt; // Independent parameter in Newton Raphson iteration
+ double eot; // exp(odds[i]*t)
+ double eot1 = 1.; // 1 - exp(odds[i]*t)
+ int niter = 0; // Number of iterations
+
+ // Initial guess
+ t = lastt = -1.;
+
+ // Newton Raphson iteration
+ do {
+ // Calculate z and zd
+ AGAIN:
+ z = zd = 0.;
+ for (i = 0; i < colors; i++) {
+ eot = exp(podds[i]*t);
+ eot1 = 1. - eot;
+ if (eot1 <= 0. || eot <= 0.) {
+ // Out of range
+ lastt = t;
+ t = 0.125 * lastt;
+ goto AGAIN;
+ }
+ z += mu[i] / eot1;
+ zd += mu[i] * podds[i] * eot / (eot1*eot1);
+ }
+ lastt = t;
+ t -= (z - N) / zd;
+ if (t >= 0.) {
+ // t must be negative. Get t within range
+ if (t > -lastt) {
+ t = lastt * 0.125;
+ }
+ else {
+ t = lastt * 0.5;
+ }
+ }
+ if (++niter > 200) error ("Convergence problem");
+
+ } while (fabs(t-lastt) > -t * 1E-8);
+
+ // Get results from t
+ for (i = 0; i < colors; i++) {
+ presult[i] = mu[i] / (1. - exp(podds[i]*t));
+ }
+ err1 |= err;
+ }
+
+ // Check for errors
+ if (err1 & 0x808) error("Mean is out of range");
+ else {
+ if (err1 & 0x010) warning("Zero odds conflicts with nonzero mean");
+ if (err1 & 1) warning("Number of items is indetermined");
+ if (err1 & 0x100) warning("Sum of means is not equal to n");
+ }
+
+ // Return result
+ UNPROTECT(1);
+ return(result);
+}
diff --git a/src/wnchyppr.cpp b/src/wnchyppr.cpp
new file mode 100644
index 0000000..dd2c272
--- /dev/null
+++ b/src/wnchyppr.cpp
@@ -0,0 +1,2100 @@
+/*************************** wnchyppr.cpp **********************************
+* Author: Agner Fog
+* Date created: 2002-10-20
+* Last modified: 2013-11-06
+* Project: stocc.zip
+* Source URL: www.agner.org/random
+*
+* Description:
+* Calculation of univariate and multivariate Wallenius noncentral
+* hypergeometric probability distribution.
+*
+* This file contains source code for the class CWalleniusNCHypergeometric
+* and CMultiWalleniusNCHypergeometricMoments defined in stocc.h.
+*
+* Documentation:
+* ==============
+* The file stocc.h contains class definitions.
+* The file nchyp.pdf, available from www.agner.org/random/theory
+* describes the theory of the calculation methods.
+* The file ran-instructions.pdf contains further documentation and
+* instructions.
+*
+* Copyright 2002-2013 by Agner Fog.
+* GNU General Public License http://www.gnu.org/licenses/gpl.html
+*****************************************************************************/
+
+#include <string.h> // memcpy function
+#include "stocc.h" // class definition
+#include "erfres.h" // table of error function residues (Don't precompile this header)
+
+/***********************************************************************
+constants
+***********************************************************************/
+static const double LN2 = 0.693147180559945309417; // log(2)
+
+
+/***********************************************************************
+Log and Exp functions with special care for small x
+***********************************************************************/
+// These are functions that involve expressions of the types log(1+x)
+// and exp(x)-1. These functions need special care when x is small to
+// avoid loss of precision. There are three versions of these functions:
+// (1) Assembly version in library randomaXX.lib
+// (2) Use library functions log1p and expm1 if available
+// (3) Use Taylor expansion if none of the above are available
+
+#ifdef RANDOMA_H
+// (1)
+// Assembly library randomaXX.lib is used.
+// Nothing to include here.
+
+#elif defined(__GNUC__) || defined (__INTEL_COMPILER) || defined(HAVE_EXPM1)
+// (2)
+// Functions log1p(x) = log(1+x) and expm1(x) = exp(x)-1 are available
+// in the math libraries of Gnu and Intel compilers
+// and in R.DLL (www.r-project.org).
+
+double pow2_1(double q, double * y0 = 0) {
+ // calculate 2^q and (1-2^q) without loss of precision.
+ // return value is (1-2^q). 2^q is returned in *y0
+ double y, y1;
+ q *= LN2;
+ if (fabs(q) > 0.1) {
+ y = exp(q); // 2^q
+ y1 = 1. - y; // 1-2^q
+ }
+ else { // Use expm1
+ y1 = expm1(q); // 2^q-1
+ y = y1 + 1; // 2^q
+ y1 = -y1; // 1-2^q
+ }
+ if (y0) *y0 = y; // Return y if not void pointer
+ return y1; // Return y1
+}
+
+double log1mx(double x, double x1) {
+ // Calculate log(1-x) without loss of precision when x is small.
+ // Parameter x1 must be = 1-x.
+ if (fabs(x) > 0.03) {
+ return log(x1);
+ }
+ else { // use log1p(x) = log(1+x)
+ return log1p(-x);
+ }
+}
+
+double log1pow(double q, double x) {
+ // calculate log((1-e^q)^x) without loss of precision.
+ // Combines the methods of the above two functions.
+ double y, y1;
+
+ if (fabs(q) > 0.1) {
+ y = exp(q); // e^q
+ y1 = 1. - y; // 1-e^q
+ }
+ else { // Use expm1
+ y1 = expm1(q); // e^q-1
+ y = y1 + 1; // e^q
+ y1 = -y1; // 1-e^q
+ }
+
+ if (y > 0.1) { // (1-y)^x calculated without problem
+ return x * log(y1);
+ }
+ else { // Use log1p
+ return x * log1p(-y);
+ }
+}
+
+#else
+// (3)
+// Functions log1p and expm1 are not available in MS and Borland compiler
+// libraries. Use explicit Taylor expansion when needed.
+
+double pow2_1(double q, double * y0 = 0) {
+ // calculate 2^q and (1-2^q) without loss of precision.
+ // return value is (1-2^q). 2^q is returned in *y0
+ double y, y1, y2, qn, i, ifac;
+ q *= LN2;
+ if (fabs(q) > 0.1) {
+ y = exp(q);
+ y1 = 1. - y;
+ }
+ else { // expand 1-e^q = -summa(q^n/n!) to avoid loss of precision
+ y1 = 0; qn = i = ifac = 1;
+ do {
+ y2 = y1;
+ qn *= q; ifac *= i++;
+ y1 -= qn / ifac;
+ }
+ while (y1 != y2);
+ y = 1.-y1;
+ }
+ if (y0) *y0 = y;
+ return y1;
+}
+
+double log1mx(double x, double x1) {
+ // Calculate log(1-x) without loss of precision when x is small.
+ // Parameter x1 must be = 1-x.
+ if (fabs(x) > 0.03) {
+ return log(x1);
+ }
+ else { // expand ln(1-x) = -summa(x^n/n)
+ double y, z1, z2, i;
+ y = i = 1.; z1 = 0;
+ do {
+ z2 = z1;
+ y *= x;
+ z1 -= y / i++;
+ }
+ while (z1 != z2);
+ return z1;
+ }
+}
+
+double log1pow(double q, double x) {
+ // calculate log((1-e^q)^x) without loss of precision
+ // Uses various Taylor expansions to avoid loss of precision
+ double y, y1, y2, z1, z2, qn, i, ifac;
+
+ if (fabs(q) > 0.1) {
+ y = exp(q); y1 = 1. - y;
+ }
+ else { // expand 1-e^q = -summa(q^n/n!) to avoid loss of precision
+ y1 = 0; qn = i = ifac = 1;
+ do {
+ y2 = y1;
+ qn *= q; ifac *= i++;
+ y1 -= qn / ifac;
+ }
+ while (y1 != y2);
+ y = 1. - y1;
+ }
+ if (y > 0.1) { // (1-y)^x calculated without problem
+ return x * log(y1);
+ }
+ else { // expand ln(1-y) = -summa(y^n/n)
+ y1 = i = 1.; z1 = 0;
+ do {
+ z2 = z1;
+ y1 *= y;
+ z1 -= y1 / i++;
+ }
+ while (z1 != z2);
+ return x * z1;
+ }
+}
+
+#endif
+
+/***********************************************************************
+Other shared functions
+***********************************************************************/
+
+double LnFacr(double x) {
+ // log factorial of non-integer x
+ int32 ix = (int32)(x);
+ if (x == ix) return LnFac(ix); // x is integer
+ double r, r2, D = 1., f;
+ static const double
+ C0 = 0.918938533204672722, // ln(sqrt(2*pi))
+ C1 = 1./12.,
+ C3 = -1./360.,
+ C5 = 1./1260.,
+ C7 = -1./1680.;
+ if (x < 6.) {
+ if (x == 0 || x == 1) return 0;
+ while (x < 6) D *= ++x;
+ }
+ r = 1. / x; r2 = r*r;
+ f = (x + 0.5)*log(x) - x + C0 + r*(C1 + r2*(C3 + r2*(C5 + r2*C7)));
+ if (D != 1.) f -= log(D);
+ return f;
+}
+
+
+double FallingFactorial(double a, double b) {
+ // calculates ln(a*(a-1)*(a-2)* ... * (a-b+1))
+
+ if (b < 30 && int(b) == b && a < 1E10) {
+ // direct calculation
+ double f = 1.;
+ for (int i = 0; i < b; i++) f *= a--;
+ return log(f);
+ }
+
+ if (a > 100.*b && b > 1.) {
+ // combine Stirling formulas for a and (a-b) to avoid loss of precision
+ double ar = 1./a;
+ double cr = 1./(a-b);
+ // calculate -log(1-b/a) by Taylor expansion
+ double s = 0., lasts, n = 1., ba = b*ar, f = ba;
+ do {
+ lasts = s;
+ s += f/n;
+ f *= ba;
+ n++;
+ }
+ while (s != lasts);
+ return (a+0.5)*s + b*log(a-b) - b + (1./12.)*(ar-cr)
+ //- (1./360.)*(ar*ar*ar-cr*cr*cr)
+ ;
+ }
+ // use LnFacr function
+ return LnFacr(a)-LnFacr(a-b);
+}
+
+double Erf (double x) {
+ // Calculates the error function erf(x) as a series expansion or
+ // continued fraction expansion.
+ // This function may be available in math libraries as erf(x)
+ static const double rsqrtpi = 0.564189583547756286948; // 1/sqrt(pi)
+ static const double rsqrtpi2 = 1.12837916709551257390; // 2/sqrt(pi)
+ if (x < 0.) return -Erf(-x);
+ if (x > 6.) return 1.;
+ if (x < 2.4) {
+ // use series expansion
+ double term; // term in summation
+ double j21; // 2j+1
+ double sum = 0; // summation
+ double xx2 = x*x*2.; // 2x^2
+ int j;
+ term = x; j21 = 1.;
+ for (j=0; j<=50; j++) { // summation loop
+ sum += term;
+ if (term <= 1.E-13) break;
+ j21 += 2.;
+ term *= xx2 / j21;
+ }
+ return exp(-x*x) * sum * rsqrtpi2;
+ }
+ else {
+ // use continued fraction expansion
+ double a, f;
+ int n = int(2.25f*x*x - 23.4f*x + 60.84f); // predict expansion degree
+ if (n < 1) n = 1;
+ a = 0.5 * n; f = x;
+ for (; n > 0; n--) { // continued fraction loop
+ f = x + a / f;
+ a -= 0.5;
+ }
+ return 1. - exp(-x*x) * rsqrtpi / f;
+ }
+}
+
+
+int32 FloorLog2(float x) {
+ // This function calculates floor(log2(x)) for positive x.
+ // The return value is <= -127 for x <= 0.
+
+ union UfloatInt { // Union for extracting bits from a float
+ float f;
+ int32 i;
+ UfloatInt(float ff) {f = ff;} // constructor
+ };
+
+#if defined(_M_IX86) || defined(__INTEL__) || defined(_M_X64) || defined(__IA64__) || defined(__POWERPC__)
+ // Running on a platform known to use IEEE-754 floating point format
+ //int32 n = *(int32*)&x;
+ int32 n = UfloatInt(x).i;
+ return (n >> 23) - 0x7F;
+#else
+ // Check if floating point format is IEEE-754
+ static const UfloatInt check(1.0f);
+ if (check.i == 0x3F800000) {
+ // We have the standard IEEE floating point format
+ int32 n = UfloatInt(x).i;
+ return (n >> 23) - 0x7F;
+ }
+ else {
+ // Unknown floating point format
+ if (x <= 0.f) return -127;
+ return (int32)floor(log(x)*(1./LN2));
+ }
+#endif
+}
+
+
+int NumSD (double accuracy) {
+ // Gives the length of the integration interval necessary to achieve
+ // the desired accuracy when integrating/summating a probability
+ // function, relative to the standard deviation
+ // Returns an integer approximation to 2*NormalDistrFractile(accuracy/2)
+ static const double fract[] = {
+ 2.699796e-03, 4.652582e-04, 6.334248e-05, 6.795346e-06, 5.733031e-07,
+ 3.797912e-08, 1.973175e-09, 8.032001e-11, 2.559625e-12, 6.381783e-14};
+ int i;
+ for (i = 0; i < (int)(sizeof(fract)/sizeof(*fract)); i++) {
+ if (accuracy >= fract[i]) break;
+ }
+ return i + 6;
+}
+
+
+/***********************************************************************
+Methods for class CWalleniusNCHypergeometric
+***********************************************************************/
+
+CWalleniusNCHypergeometric::CWalleniusNCHypergeometric(int32 n_, int32 m_, int32 N_, double odds_, double accuracy_) {
+ // constructor
+ accuracy = accuracy_;
+ SetParameters(n_, m_, N_, odds_);}
+
+
+void CWalleniusNCHypergeometric::SetParameters(int32 n_, int32 m_, int32 N_, double odds) {
+ // change parameters
+ if (n_ < 0 || n_ > N_ || m_ < 0 || m_ > N_ || odds < 0) FatalError("Parameter out of range in CWalleniusNCHypergeometric");
+ n = n_; m = m_; N = N_; omega = odds; // set parameters
+ xmin = m + n - N; if (xmin < 0) xmin = 0; // calculate xmin
+ xmax = n; if (xmax > m) xmax = m; // calculate xmax
+ xLastBico = xLastFindpars = -99; // indicate last x is invalid
+ r = 1.; // initialize
+}
+
+
+double CWalleniusNCHypergeometric::mean(void) {
+ // find approximate mean
+ int iter; // number of iterations
+ double a, b; // temporaries in calculation of first guess
+ double mean, mean1; // iteration value of mean
+ double m1r, m2r; // 1/m, 1/m2
+ double e1, e2; // temporaries
+ double g; // function to find root of
+ double gd; // derivative of g
+ double omegar; // 1/omega
+
+ if (omega == 1.) { // simple hypergeometric
+ return double(m)*n/N;
+ }
+
+ if (omega == 0.) {
+ if (n > N-m) FatalError("Not enough items with nonzero weight in CWalleniusNCHypergeometric::mean");
+ return 0.;
+ }
+
+ if (xmin == xmax) return xmin;
+
+ // calculate Cornfield mean of Fisher noncentral hypergeometric distribution as first guess
+ a = (m+n)*omega + (N-m-n);
+ b = a*a - 4.*omega*(omega-1.)*m*n;
+ b = b > 0. ? sqrt(b) : 0.;
+ mean = (a-b)/(2.*(omega-1.));
+ if (mean < xmin) mean = xmin;
+ if (mean > xmax) mean = xmax;
+
+ m1r = 1./m; m2r = 1./(N-m);
+ iter = 0;
+
+ if (omega > 1.) {
+ do { // Newton Raphson iteration
+ mean1 = mean;
+ e1 = 1.-(n-mean)*m2r;
+ if (e1 < 1E-14) {
+ e2 = 0.; // avoid underflow
+ }
+ else {
+ e2 = pow(e1,omega-1.);
+ }
+ g = e2*e1 + (mean-m)*m1r;
+ gd = e2*omega*m2r + m1r;
+ mean -= g / gd;
+ if (mean < xmin) mean = xmin;
+ if (mean > xmax) mean = xmax;
+ if (++iter > 40) {
+ FatalError("Search for mean failed in function CWalleniusNCHypergeometric::mean");
+ }
+ }
+ while (fabs(mean1 - mean) > 2E-6);
+ }
+ else { // omega < 1
+ omegar = 1./omega;
+ do { // Newton Raphson iteration
+ mean1 = mean;
+ e1 = 1.-mean*m1r;
+ if (e1 < 1E-14) {
+ e2 = 0.; // avoid underflow
+ }
+ else {
+ e2 = pow(e1,omegar-1.);
+ }
+ g = 1.-(n-mean)*m2r-e2*e1;
+ gd = e2*omegar*m1r + m2r;
+ mean -= g / gd;
+ if (mean < xmin) mean = xmin;
+ if (mean > xmax) mean = xmax;
+ if (++iter > 40) {
+ FatalError("Search for mean failed in function CWalleniusNCHypergeometric::mean");
+ }
+ }
+ while (fabs(mean1 - mean) > 2E-6);
+ }
+ return mean;
+}
+
+
+double CWalleniusNCHypergeometric::variance(void) {
+ // find approximate variance (poor approximation)
+ double my = mean(); // approximate mean
+ // find approximate variance from Fisher's noncentral hypergeometric approximation
+ double r1 = my * (m-my); double r2 = (n-my)*(my+N-n-m);
+ if (r1 <= 0. || r2 <= 0.) return 0.;
+ double var = N*r1*r2/((N-1)*(m*r2+(N-m)*r1));
+ if (var < 0.) var = 0.;
+ return var;
+}
+
+
+double CWalleniusNCHypergeometric::moments(double * mean_, double * var_) {
+ // calculate exact mean and variance
+ // return value = sum of f(x), expected = 1.
+ double y, sy=0, sxy=0, sxxy=0, me1;
+ int32 x, xm, x1;
+ const double accur = 0.1 * accuracy; // accuracy of calculation
+ xm = (int32)mean(); // approximation to mean
+ for (x = xm; x <= xmax; x++) {
+ y = probability(x);
+ x1 = x - xm; // subtract approximate mean to avoid loss of precision in sums
+ sy += y; sxy += x1 * y; sxxy += x1 * x1 * y;
+ if (y < accur && x != xm) break;
+ }
+ for (x = xm-1; x >= xmin; x--) {
+ y = probability(x);
+ x1 = x - xm; // subtract approximate mean to avoid loss of precision in sums
+ sy += y; sxy += x1 * y; sxxy += x1 * x1 * y;
+ if (y < accur) break;
+ }
+ me1 = sxy / sy;
+ *mean_ = me1 + xm;
+ y = sxxy / sy - me1 * me1;
+ if (y < 0) y=0;
+ *var_ = y;
+ return sy;
+}
+
+
+int32 CWalleniusNCHypergeometric::mode(void) {
+ // find mode
+ int32 Mode; // mode
+
+ if (omega == 1.) {
+ // simple hypergeometric
+ int32 L = m + n - N;
+ int32 m1 = m + 1, n1 = n + 1;
+ Mode = int32((double)m1*n1*omega/((m1+n1)*omega-L));
+ }
+ else {
+ // find mode
+ double f, f2 = -1.; // f2 = 0.;
+ int32 xi, x2;
+ int32 xmin = m + n - N; if (xmin < 0) xmin = 0; // calculate xmin
+ int32 xmax = n; if (xmax > m) xmax = m; // calculate xmax
+
+ Mode = (int32)mean(); // floor(mean)
+ if (omega < 1.) {
+ if (Mode < xmax) Mode++; // ceil(mean)
+ x2 = xmin; // lower limit
+ if (omega > 0.294 && N <= 10000000) {
+ x2 = Mode - 1;} // search for mode can be limited
+ for (xi = Mode; xi >= x2; xi--) {
+ f = probability(xi);
+ if (f <= f2) break;
+ Mode = xi; f2 = f;
+ }
+ }
+ else {
+ if (Mode < xmin) Mode++;
+ x2 = xmax; // upper limit
+ if (omega < 3.4 && N <= 10000000) {
+ x2 = Mode + 1;} // search for mode can be limited
+ for (xi = Mode; xi <= x2; xi++) {
+ f = probability(xi);
+ if (f <= f2) break;
+ Mode = xi; f2 = f;
+ }
+ }
+ }
+ return Mode;
+}
+
+
+double CWalleniusNCHypergeometric::lnbico() {
+ // natural log of binomial coefficients.
+ // returns lambda = log(m!*x!/(m-x)!*m2!*x2!/(m2-x2)!)
+ int32 x2 = n-x, m2 = N-m;
+ if (xLastBico < 0) { // m, n, N have changed
+ mFac = LnFac(m) + LnFac(m2);
+ }
+ if (m < FAK_LEN && m2 < FAK_LEN) goto DEFLT;
+ switch (x - xLastBico) {
+ case 0: // x unchanged
+ break;
+ case 1: // x incremented. calculate from previous value
+ xFac += log (double(x) * (m2-x2) / (double(x2+1)*(m-x+1)));
+ break;
+ case -1: // x decremented. calculate from previous value
+ xFac += log (double(x2) * (m-x) / (double(x+1)*(m2-x2+1)));
+ break;
+ default: DEFLT: // calculate all
+ xFac = LnFac(x) + LnFac(x2) + LnFac(m-x) + LnFac(m2-x2);
+ }
+ xLastBico = x;
+ return bico = mFac - xFac;
+}
+
+
+void CWalleniusNCHypergeometric::findpars() {
+ // calculate d, E, r, w
+ if (x == xLastFindpars) {
+ return; // all values are unchanged since last call
+ }
+
+ // find r to center peak of integrand at 0.5
+ double dd, d1, z, zd, rr, lastr, rrc, rt, r2, r21, a, b, dummy;
+ double oo[2];
+ double xx[2] = {double(x), double(n-x)};
+ int i, j = 0;
+ if (omega > 1.) { // make both omegas <= 1 to avoid overflow
+ oo[0] = 1.; oo[1] = 1./omega;
+ }
+ else {
+ oo[0] = omega; oo[1] = 1.;
+ }
+ dd = oo[0]*(m-x) + oo[1]*(N-m-xx[1]);
+ d1 = 1./dd;
+ E = (oo[0]*m + oo[1]*(N-m)) * d1;
+ rr = r;
+ if (rr <= d1) rr = 1.2*d1; // initial guess
+ // Newton-Raphson iteration to find r
+ do {
+ lastr = rr;
+ rrc = 1. / rr;
+ z = dd - rrc;
+ zd = rrc * rrc;
+ for (i=0; i<2; i++) {
+ rt = rr * oo[i];
+ if (rt < 100.) { // avoid overflow if rt big
+ r21 = pow2_1(rt, &r2); // r2=2^r, r21=1.-2^r
+ a = oo[i] / r21; // omegai/(1.-2^r)
+ b = xx[i] * a; // x*omegai/(1.-2^r)
+ z += b;
+ zd += b * a * LN2 * r2;
+ }
+ }
+ if (zd == 0) FatalError("can't find r in function CWalleniusNCHypergeometric::findpars");
+ rr -= z / zd;
+ if (rr <= d1) rr = lastr * 0.125 + d1*0.875;
+ if (++j == 70) FatalError("convergence problem searching for r in function CWalleniusNCHypergeometric::findpars");
+ }
+ while (fabs(rr-lastr) > rr * 1.E-6);
+ if (omega > 1) {
+ dd *= omega; rr *= oo[1];
+ }
+ r = rr; rd = rr * dd;
+
+ // find peak width
+ double ro, k1, k2;
+ ro = r * omega;
+ if (ro < 300) { // avoid overflow
+ k1 = pow2_1(ro, &dummy);
+ k1 = -1. / k1;
+ k1 = omega*omega*(k1+k1*k1);
+ }
+ else k1 = 0.;
+ if (r < 300) { // avoid overflow
+ k2 = pow2_1(r, &dummy);
+ k2 = -1. / k2;
+ k2 = (k2+k2*k2);
+ }
+ else k2 = 0.;
+ phi2d = -4.*r*r*(x*k1 + (n-x)*k2);
+ if (phi2d >= 0.) {
+ FatalError("peak width undefined in function CWalleniusNCHypergeometric::findpars");
+ /* wr = r = 0.; */
+ }
+ else {
+ wr = sqrt(-phi2d); w = 1./wr;
+ }
+ xLastFindpars = x;
+}
+
+
+int CWalleniusNCHypergeometric::BernouilliH(int32 x_, double h, double rh, StochasticLib1 *sto) {
+ // This function generates a Bernouilli variate with probability proportional
+ // to the univariate Wallenius' noncentral hypergeometric distribution.
+ // The return value will be 1 with probability f(x_)/h and 0 with probability
+ // 1-f(x_)/h.
+ // This is equivalent to calling sto->Bernouilli(probability(x_)/h),
+ // but this method is faster. The method used here avoids calculating the
+ // Wallenius probability by sampling in the t-domain.
+ // rh is a uniform random number in the interval 0 <= rh < h. The function
+ // uses additional random numbers generated from sto.
+ // This function is intended for use in rejection methods for sampling from
+ // the Wallenius distribution. It is called from
+ // StochasticLib3::WalleniusNCHypRatioOfUnifoms in the file stoc3.cpp
+ double f0; // Lambda*Phi(�)
+ double phideri0; // phi(�)/rd
+ double qi; // 2^(-r*omega[i])
+ double qi1; // 1-qi
+ double omegai[2] = {omega,1.}; // weights for each color
+ double romegi; // r*omega[i]
+ double xi[2] = {double(x_), double(n-x_)}; // number of each color sampled
+ double k; // adjusted width for majorizing function Ypsilon(t)
+ double erfk; // erf correction
+ double rdm1; // rd - 1
+ double G_integral; // integral of majorizing function Ypsilon(t)
+ double ts; // t sampled from Ypsilon(t) distribution
+ double logts; // log(ts)
+ double rlogts; // r*log(ts)
+ double fts; // Phi(ts)/rd
+ double rgts; // 1/(Ypsilon(ts)/rd)
+ double t2; // temporary in calculation of Ypsilon(ts)
+ int i, j; // loop counters
+ static const double rsqrt8 = 0.3535533905932737622; // 1/sqrt(8)
+ static const double sqrt2pi = 2.506628274631000454; // sqrt(2*pi)
+
+ x = x_; // save x in class object
+ lnbico(); // calculate bico = log(Lambda)
+ findpars(); // calculate r, d, rd, w, E
+ if (E > 0.) {
+ k = log(E); // correction for majorizing function
+ k = 1. + 0.0271 * (k * sqrt(k));
+ }
+ else k = 1.;
+ k *= w; // w * k
+ rdm1 = rd - 1.;
+
+ // calculate phi(�)/rd
+ phideri0 = -LN2 * rdm1;
+ for (i=0; i<2; i++) {
+ romegi = r * omegai[i];
+ if (romegi > 40.) {
+ qi=0.; qi1 = 1.; // avoid underflow
+ }
+ else {
+ qi1 = pow2_1(-romegi, &qi);
+ }
+ phideri0 += xi[i] * log1mx(qi, qi1);
+ }
+
+ erfk = Erf(rsqrt8 / k);
+ f0 = rd * exp(phideri0 + bico);
+ G_integral = f0 * sqrt2pi * k * erfk;
+
+ if (G_integral <= h) { // G fits under h-hat
+ do {
+ ts = sto->Normal(0,k); // sample ts from normal distribution
+ }
+ while (fabs(ts) >= 0.5); // reject values outside interval, and avoid ts = 0
+ ts += 0.5; // ts = normal distributed in interval (0,1)
+
+ for (fts=0., j=0; j<2; j++) { // calculate (Phi(ts)+Phi(1-ts))/2
+ logts = log(ts); rlogts = r * logts; // (ts = 0 avoided above)
+ fts += exp(log1pow(rlogts*omega,xi[0]) + log1pow(rlogts,xi[1]) + rdm1*logts + bico);
+ ts = 1. - ts;
+ }
+ fts *= 0.5;
+
+ t2 = (ts-0.5) / k; // calculate 1/Ypsilon(ts)
+ rgts = exp(-(phideri0 + bico - 0.5 * t2*t2));
+ return rh < G_integral * fts * rgts; // Bernouilli variate
+ }
+
+ else { // G > h: can't use sampling in t-domain
+ return rh < probability(x);
+ }
+}
+
+
+/***********************************************************************
+methods for calculating probability in class CWalleniusNCHypergeometric
+***********************************************************************/
+
+double CWalleniusNCHypergeometric::recursive() {
+ // recursive calculation
+ // Wallenius noncentral hypergeometric distribution by recursion formula
+ // Approximate by ignoring probabilities < accuracy and minimize storage requirement
+ const int BUFSIZE = 512; // buffer size
+ double p[BUFSIZE+2]; // probabilities
+ double * p1, * p2; // offset into p
+ double mxo; // (m-x)*omega
+ double Nmnx; // N-m-nu+x
+ double y, y1; // save old p[x] before it is overwritten
+ double d1, d2, dcom; // divisors in probability formula
+ double accuracya; // absolute accuracy
+ int32 xi, nu; // xi, nu = recursion values of x, n
+ int32 x1, x2; // xi_min, xi_max
+
+ accuracya = 0.005f * accuracy; // absolute accuracy
+ p1 = p2 = p + 1; // make space for p1[-1]
+ p1[-1] = 0.; p1[0] = 1.; // initialize for recursion
+ x1 = x2 = 0;
+ for (nu=1; nu<=n; nu++) {
+ if (n - nu < x - x1 || p1[x1] < accuracya) {
+ x1++; // increase lower limit when breakpoint passed or probability negligible
+ p2--; // compensate buffer offset in order to reduce storage space
+ }
+ if (x2 < x && p1[x2] >= accuracya) {
+ x2++; y1 = 0.; // increase upper limit until x has been reached
+ }
+ else {
+ y1 = p1[x2];
+ }
+ if (x1 > x2) return 0.;
+ if (p2+x2-p > BUFSIZE) FatalError("buffer overrun in function CWalleniusNCHypergeometric::recursive");
+
+ mxo = (m-x2)*omega;
+ Nmnx = N-m-nu+x2+1;
+ for (xi = x2; xi >= x1; xi--) { // backwards loop
+ d2 = mxo + Nmnx;
+ mxo += omega; Nmnx--;
+ d1 = mxo + Nmnx;
+ dcom = 1. / (d1 * d2); // save a division by making common divisor
+ y = p1[xi-1]*mxo*d2*dcom + y1*(Nmnx+1)*d1*dcom;
+ y1 = p1[xi-1]; // (warning: pointer alias, can't swap instruction order)
+ p2[xi] = y;
+ }
+ p1 = p2;
+ }
+
+ if (x < x1 || x > x2) return 0.;
+ return p1[x];
+}
+
+
+double CWalleniusNCHypergeometric::binoexpand() {
+ // calculate by binomial expansion of integrand
+ // only for x < 2 or n-x < 2 (not implemented for higher x because of loss of precision)
+ int32 x1, m1, m2;
+ double o;
+ if (x > n/2) { // invert
+ x1 = n-x; m1 = N-m; m2 = m; o = 1./omega;
+ }
+ else {
+ x1 = x; m1 = m; m2 = N-m; o = omega;
+ }
+ if (x1 == 0) {
+ return exp(FallingFactorial(m2,n) - FallingFactorial(m2+o*m1,n));
+ }
+ if (x1 == 1) {
+ double d, e, q, q0, q1;
+ q = FallingFactorial(m2,n-1);
+ e = o*m1+m2;
+ q1 = q - FallingFactorial(e,n);
+ e -= o;
+ q0 = q - FallingFactorial(e,n);
+ d = e - (n-1);
+ return m1*d*(exp(q0) - exp(q1));
+ }
+
+ FatalError("x > 1 not supported by function CWalleniusNCHypergeometric::binoexpand");
+ return 0;
+}
+
+
+double CWalleniusNCHypergeometric::laplace() {
+ // Laplace's method with narrow integration interval,
+ // using error function residues table, defined in erfres.cpp
+ // Note that this function can only be used when the integrand peak is narrow.
+ // findpars() must be called before this function.
+
+ const int COLORS = 2; // number of colors
+ const int MAXDEG = 40; // arraysize, maximum expansion degree
+ int degree; // max expansion degree
+ double accur; // stop expansion when terms below this threshold
+ double omegai[COLORS] = {omega, 1.}; // weights for each color
+ double xi[COLORS] = {double(x), double(n-x)}; // number of each color sampled
+ double f0; // factor outside integral
+ double rho[COLORS]; // r*omegai
+ double qi; // 2^(-rho)
+ double qi1; // 1-qi
+ double qq[COLORS]; // qi / qi1
+ double eta[COLORS+1][MAXDEG+1]; // eta coefficients
+ double phideri[MAXDEG+1]; // derivatives of phi
+ double PSIderi[MAXDEG+1]; // derivatives of PSI
+ double * erfresp; // pointer to table of error function residues
+
+ // variables in asymptotic summation
+ static const double sqrt8 = 2.828427124746190098; // sqrt(8)
+ double qqpow; // qq^j
+ double pow2k; // 2^k
+ double bino; // binomial coefficient
+ double vr; // 1/v, v = integration interval
+ double v2m2; // (2*v)^(-2)
+ double v2mk1; // (2*v)^(-k-1)
+ double s; // summation term
+ double sum; // Taylor sum
+
+ int i; // loop counter for color
+ int j; // loop counter for derivative
+ int k; // loop counter for expansion degree
+ int ll; // k/2
+ int converg = 0; // number of consequtive terms below accuracy
+ int PrecisionIndex; // index into ErfRes table according to desired precision
+
+ // initialize
+ for (k = 0; k <= 2; k++) phideri[k] = PSIderi[k] = 0;
+
+ // find rho[i], qq[i], first eta coefficients, and zero'th derivative of phi
+ for (i = 0; i < COLORS; i++) {
+ rho[i] = r * omegai[i];
+ if (rho[i] > 40.) {
+ qi=0.; qi1 = 1.;} // avoid underflow
+ else {
+ qi1 = pow2_1(-rho[i], &qi);} // qi=2^(-rho), qi1=1.-2^(-rho)
+ qq[i] = qi / qi1; // 2^(-r*omegai)/(1.-2^(-r*omegai))
+ // peak = zero'th derivative
+ phideri[0] += xi[i] * log1mx(qi, qi1);
+ // eta coefficients
+ eta[i][0] = 0.;
+ eta[i][1] = eta[i][2] = rho[i]*rho[i];
+ }
+
+ // r, rd, and w must be calculated by findpars()
+ // zero'th derivative
+ phideri[0] -= (rd - 1.) * LN2;
+ // scaled factor outside integral
+ f0 = rd * exp(phideri[0] + lnbico());
+
+ vr = sqrt8 * w;
+ phideri[2] = phi2d;
+
+ // get table according to desired precision
+ PrecisionIndex = (-FloorLog2((float)accuracy) - ERFRES_B + ERFRES_S - 1) / ERFRES_S;
+ if (PrecisionIndex < 0) PrecisionIndex = 0;
+ if (PrecisionIndex > ERFRES_N-1) PrecisionIndex = ERFRES_N-1;
+ while (w * NumSDev[PrecisionIndex] > 0.3) {
+ // check if integration interval is too wide
+ if (PrecisionIndex == 0) {
+ FatalError("Laplace method failed. Peak width too high in function CWalleniusNCHypergeometric::laplace");
+ break;}
+ PrecisionIndex--; // reduce precision to keep integration interval narrow
+ }
+ erfresp = ErfRes[PrecisionIndex]; // choose desired table
+
+ degree = MAXDEG; // max expansion degree
+ if (degree >= ERFRES_L*2) degree = ERFRES_L*2-2;
+
+ // set up for starting loop at k=3
+ v2m2 = 0.25 * vr * vr; // (2*v)^(-2)
+ PSIderi[0] = 1.;
+ pow2k = 8.;
+ sum = 0.5 * vr * erfresp[0];
+ v2mk1 = 0.5 * vr * v2m2 * v2m2;
+ accur = accuracy * sum;
+
+ // summation loop
+ for (k = 3; k <= degree; k++) {
+ phideri[k] = 0.;
+
+ // loop for all (2) colors
+ for (i = 0; i < COLORS; i++) {
+ eta[i][k] = 0.;
+ // backward loop for all powers
+ for (j = k; j > 0; j--) {
+ // find coefficients recursively from previous coefficients
+ eta[i][j] = eta[i][j]*(j*rho[i]-(k-2)) + eta[i][j-1]*rho[i]*(j-1);
+ }
+ qqpow = 1.;
+ // forward loop for all powers
+ for (j=1; j<=k; j++) {
+ qqpow *= qq[i]; // qq^j
+ // contribution to derivative
+ phideri[k] += xi[i] * eta[i][j] * qqpow;
+ }
+ }
+
+ // finish calculation of derivatives
+ phideri[k] = -pow2k*phideri[k] + 2*(1-k)*phideri[k-1];
+
+ pow2k *= 2.; // 2^k
+
+ // loop to calculate derivatives of PSI from derivatives of psi.
+ // terms # 0, 1, 2, k-2, and k-1 are zero and not included in loop.
+ // The j'th derivatives of psi are identical to the derivatives of phi for j>2, and
+ // zero for j=1,2. Hence we are using phideri[j] for j>2 here.
+ PSIderi[k] = phideri[k]; // this is term # k
+ bino = 0.5 * (k-1) * (k-2); // binomial coefficient for term # 3
+ for (j = 3; j < k-2; j++) { // loop for remaining nonzero terms (if k>5)
+ PSIderi[k] += PSIderi[k-j] * phideri[j] * bino;
+ bino *= double(k-j)/double(j);
+ }
+ if ((k & 1) == 0) { // only for even k
+ ll = k/2;
+ s = PSIderi[k] * v2mk1 * erfresp[ll];
+ sum += s;
+
+ // check for convergence of Taylor expansion
+ if (fabs(s) < accur) converg++; else converg = 0;
+ if (converg > 1) break;
+
+ // update recursive expressions
+ v2mk1 *= v2m2;
+ }
+ }
+ // multiply by terms outside integral
+ return f0 * sum;
+}
+
+
+double CWalleniusNCHypergeometric::integrate() {
+ // Wallenius non-central hypergeometric distribution function
+ // calculation by numerical integration with variable-length steps
+ // NOTE: findpars() must be called before this function.
+ double s; // result of integration step
+ double sum; // integral
+ double ta, tb; // subinterval for integration step
+
+ lnbico(); // compute log of binomial coefficients
+
+ // choose method:
+ if (w < 0.02 || (w < 0.1 && (x==m || n-x==N-m) && accuracy > 1E-6)) {
+ // normal method. Step length determined by peak width w
+ double delta, s1;
+ s1 = accuracy < 1E-9 ? 0.5 : 1.;
+ delta = s1 * w; // integration steplength
+ ta = 0.5 + 0.5 * delta;
+ sum = integrate_step(1.-ta, ta); // first integration step around center peak
+ do {
+ tb = ta + delta;
+ if (tb > 1.) tb = 1.;
+ s = integrate_step(ta, tb); // integration step to the right of peak
+ s += integrate_step(1.-tb,1.-ta);// integration step to the left of peak
+ sum += s;
+ if (s < accuracy * sum) break; // stop before interval finished if accuracy reached
+ ta = tb;
+ if (tb > 0.5 + w) delta *= 2.; // increase step length far from peak
+ }
+ while (tb < 1.);
+ }
+ else {
+ // difficult situation. Step length determined by inflection points
+ double t1, t2, tinf, delta, delta1;
+ sum = 0.;
+ // do left and right half of integration interval separately:
+ for (t1=0., t2=0.5; t1 < 1.; t1+=0.5, t2+=0.5) {
+ // integrate from 0 to 0.5 or from 0.5 to 1
+ tinf = search_inflect(t1, t2); // find inflection point
+ delta = tinf - t1; if (delta > t2 - tinf) delta = t2 - tinf; // distance to nearest endpoint
+ delta *= 1./7.; // 1/7 will give 3 steps to nearest endpoint
+ if (delta < 1E-4) delta = 1E-4;
+ delta1 = delta;
+ // integrate from tinf forwards to t2
+ ta = tinf;
+ do {
+ tb = ta + delta1;
+ if (tb > t2 - 0.25*delta1) tb = t2; // last step of this subinterval
+ s = integrate_step(ta, tb); // integration step
+ sum += s;
+ delta1 *= 2; // double steplength
+ if (s < sum * 1E-4) delta1 *= 8.; // large step when s small
+ ta = tb;
+ }
+ while (tb < t2);
+ if (tinf) {
+ // integrate from tinf backwards to t1
+ tb = tinf;
+ do {
+ ta = tb - delta;
+ if (ta < t1 + 0.25*delta) ta = t1; // last step of this subinterval
+ s = integrate_step(ta, tb); // integration step
+ sum += s;
+ delta *= 2; // double steplength
+ if (s < sum * 1E-4) delta *= 8.; // large step when s small
+ tb = ta;}
+ while (ta > t1);
+ }
+ }
+ }
+ return sum * rd;
+}
+
+
+double CWalleniusNCHypergeometric::integrate_step(double ta, double tb) {
+ // integration subprocedure used by integrate()
+ // makes one integration step from ta to tb using Gauss-Legendre method.
+ // result is scaled by multiplication with exp(bico)
+ double ab, delta, tau, ltau, y, sum, taur, rdm1;
+ int i;
+
+ // define constants for Gauss-Legendre integration with IPOINTS points
+#define IPOINTS 8 // number of points in each integration step
+
+#if IPOINTS == 3
+ static const double xval[3] = {-.774596669241,0,0.774596668241};
+ static const double weights[3] = {.5555555555555555,.88888888888888888,.55555555555555};
+#elif IPOINTS == 4
+ static const double xval[4] = {-0.861136311594,-0.339981043585,0.339981043585,0.861136311594},
+ static const double weights[4] = {0.347854845137,0.652145154863,0.652145154863,0.347854845137};
+#elif IPOINTS == 5
+ static const double xval[5] = {-0.906179845939,-0.538469310106,0,0.538469310106,0.906179845939};
+ static const double weights[5] = {0.236926885056,0.478628670499,0.568888888889,0.478628670499,0.236926885056};
+#elif IPOINTS == 6
+ static const double xval[6] = {-0.932469514203,-0.661209386466,-0.238619186083,0.238619186083,0.661209386466,0.932469514203};
+ static const double weights[6] = {0.171324492379,0.360761573048,0.467913934573,0.467913934573,0.360761573048,0.171324492379};
+#elif IPOINTS == 8
+ static const double xval[8] = {-0.960289856498,-0.796666477414,-0.525532409916,-0.183434642496,0.183434642496,0.525532409916,0.796666477414,0.960289856498};
+ static const double weights[8] = {0.10122853629,0.222381034453,0.313706645878,0.362683783378,0.362683783378,0.313706645878,0.222381034453,0.10122853629};
+#elif IPOINTS == 12
+ static const double xval[12] = {-0.981560634247,-0.90411725637,-0.769902674194,-0.587317954287,-0.367831498998,-0.125233408511,0.125233408511,0.367831498998,0.587317954287,0.769902674194,0.90411725637,0.981560634247};
+ static const double weights[12]= {0.0471753363866,0.106939325995,0.160078328543,0.203167426723,0.233492536538,0.249147045813,0.249147045813,0.233492536538,0.203167426723,0.160078328543,0.106939325995,0.0471753363866};
+#elif IPOINTS == 16
+ static const double xval[16] = {-0.989400934992,-0.944575023073,-0.865631202388,-0.755404408355,-0.617876244403,-0.458016777657,-0.281603550779,-0.0950125098376,0.0950125098376,0.281603550779,0.458016777657,0.617876244403,0.755404408355,0.865631202388,0.944575023073,0.989400934992};
+ static const double weights[16]= {0.027152459411,0.0622535239372,0.0951585116838,0.124628971256,0.149595988817,0.169156519395,0.182603415045,0.189450610455,0.189450610455,0.182603415045,0.169156519395,0.149595988817,0.124628971256,0.0951585116838,0.0622535239372,0.027152459411};
+#else
+#error // IPOINTS must be a value for which the tables are defined
+#endif
+
+ delta = 0.5 * (tb - ta);
+ ab = 0.5 * (ta + tb);
+ rdm1 = rd - 1.;
+ sum = 0;
+
+ for (i = 0; i < IPOINTS; i++) {
+ tau = ab + delta * xval[i];
+ ltau = log(tau);
+ taur = r * ltau;
+ // possible loss of precision due to subtraction here:
+ y = log1pow(taur*omega,x) + log1pow(taur,n-x) + rdm1*ltau + bico;
+ if (y > -50.) sum += weights[i] * exp(y);
+ }
+ return delta * sum;
+}
+
+
+double CWalleniusNCHypergeometric::search_inflect(double t_from, double t_to) {
+ // search for an inflection point of the integrand PHI(t) in the interval
+ // t_from < t < t_to
+ const int COLORS = 2; // number of colors
+ double t, t1; // independent variable
+ double rho[COLORS]; // r*omega[i]
+ double q; // t^rho[i] / (1-t^rho[i])
+ double q1; // 1-t^rho[i]
+ double xx[COLORS]; // x[i]
+ double zeta[COLORS][4][4]; // zeta[i,j,k] coefficients
+ double phi[4]; // derivatives of phi(t) = log PHI(t)
+ double Z2; // PHI''(t)/PHI(t)
+ double Zd; // derivative in Newton Raphson iteration
+ double rdm1; // r * d - 1
+ double tr; // 1/t
+ double log2t; // log2(t)
+ double method; // 0 for z2'(t) method, 1 for z3(t) method
+ int i; // color
+ int iter; // count iterations
+
+ rdm1 = rd - 1.;
+ if (t_from == 0 && rdm1 <= 1.) return 0.; //no inflection point
+ rho[0] = r*omega; rho[1] = r;
+ xx[0] = x; xx[1] = n - x;
+ t = 0.5 * (t_from + t_to);
+ for (i = 0; i < COLORS; i++) { // calculate zeta coefficients
+ zeta[i][1][1] = rho[i];
+ zeta[i][1][2] = rho[i] * (rho[i] - 1.);
+ zeta[i][2][2] = rho[i] * rho[i];
+ zeta[i][1][3] = zeta[i][1][2] * (rho[i] - 2.);
+ zeta[i][2][3] = zeta[i][1][2] * rho[i] * 3.;
+ zeta[i][3][3] = zeta[i][2][2] * rho[i] * 2.;
+ }
+ iter = 0;
+
+ do {
+ t1 = t;
+ tr = 1. / t;
+ log2t = log(t)*(1./LN2);
+ phi[1] = phi[2] = phi[3] = 0.;
+ for (i=0; i<COLORS; i++) { // calculate first 3 derivatives of phi(t)
+ q1 = pow2_1(rho[i]*log2t,&q);
+ q /= q1;
+ phi[1] -= xx[i] * zeta[i][1][1] * q;
+ phi[2] -= xx[i] * q * (zeta[i][1][2] + q * zeta[i][2][2]);
+ phi[3] -= xx[i] * q * (zeta[i][1][3] + q * (zeta[i][2][3] + q * zeta[i][3][3]));
+ }
+ phi[1] += rdm1;
+ phi[2] -= rdm1;
+ phi[3] += 2. * rdm1;
+ phi[1] *= tr;
+ phi[2] *= tr * tr;
+ phi[3] *= tr * tr * tr;
+ method = (iter & 2) >> 1; // alternate between the two methods
+ Z2 = phi[1]*phi[1] + phi[2];
+ Zd = method*phi[1]*phi[1]*phi[1] + (2.+method)*phi[1]*phi[2] + phi[3];
+
+ if (t < 0.5) {
+ if (Z2 > 0) {
+ t_from = t;
+ }
+ else {
+ t_to = t;
+ }
+ if (Zd >= 0) {
+ // use binary search if Newton-Raphson iteration makes problems
+ t = (t_from ? 0.5 : 0.2) * (t_from + t_to);
+ }
+ else {
+ // Newton-Raphson iteration
+ t -= Z2 / Zd;
+ }
+ }
+ else {
+ if (Z2 < 0) {
+ t_from = t;
+ }
+ else {
+ t_to = t;
+ }
+ if (Zd <= 0) {
+ // use binary search if Newton-Raphson iteration makes problems
+ t = 0.5 * (t_from + t_to);
+ }
+ else {
+ // Newton-Raphson iteration
+ t -= Z2 / Zd;
+ }
+ }
+ if (t >= t_to) t = (t1 + t_to) * 0.5;
+ if (t <= t_from) t = (t1 + t_from) * 0.5;
+ if (++iter > 20) FatalError("Search for inflection point failed in function CWalleniusNCHypergeometric::search_inflect");
+ }
+ while (fabs(t - t1) > 1E-5);
+ return t;
+}
+
+
+double CWalleniusNCHypergeometric::probability(int32 x_) {
+ // calculate probability function. choosing best method
+ x = x_;
+ if (x < xmin || x > xmax) return 0.;
+ if (xmin == xmax) return 1.;
+
+ if (omega == 1.) { // hypergeometric
+ return exp(lnbico() + LnFac(n) + LnFac(N-n) - LnFac(N));
+ }
+ if (omega == 0.) {
+ if (n > N-m) FatalError("Not enough items with nonzero weight in CWalleniusNCHypergeometric::probability");
+ return x == 0;
+ }
+
+ int32 x2 = n - x;
+ int32 x0 = x < x2 ? x : x2;
+ int em = (x == m || x2 == N-m);
+
+ if (x0 == 0 && n > 500) {
+ return binoexpand();
+ }
+
+ if (double(n)*x0 < 1000 || (double(n)*x0 < 10000 && (N > 1000.*n || em))) {
+ return recursive();
+ }
+
+ if (x0 <= 1 && N-n <= 1) {
+ return binoexpand();
+ }
+
+ findpars();
+
+ if (w < 0.04 && E < 10 && (!em || w > 0.004)) {
+ return laplace();
+ }
+
+ return integrate();
+}
+
+
+int32 CWalleniusNCHypergeometric::MakeTable(double * table, int32 MaxLength, int32 * xfirst, int32 * xlast, double cutoff) {
+ // Makes a table of Wallenius noncentral hypergeometric probabilities
+ // table must point to an array of length MaxLength.
+ // The function returns 1 if table is long enough. Otherwise it fills
+ // the table with as many correct values as possible and returns 0.
+ // The tails are cut off where the values are < cutoff, so that
+ // *xfirst may be > xmin and *xlast may be < xmax.
+ // The value of cutoff will be 0.01 * accuracy if not specified.
+ // The first and last x value represented in the table are returned in
+ // *xfirst and *xlast. The resulting probability values are returned in
+ // the first (*xfirst - *xlast + 1) positions of table. Any unused part
+ // of table may be overwritten with garbage.
+ //
+ // The function will return the following information when MaxLength = 0:
+ // The return value is the desired length of table.
+ // *xfirst is 1 if it will be more efficient to call MakeTable than to call
+ // probability repeatedly, even if only some of the table values are needed.
+ // *xfirst is 0 if it is more efficient to call probability repeatedly.
+
+ double * p1, * p2; // offset into p
+ double mxo; // (m-x)*omega
+ double Nmnx; // N-m-nu+x
+ double y, y1; // probability. Save old p[x] before it is overwritten
+ double d1, d2, dcom; // divisors in probability formula
+ double area; // estimate of area needed for recursion method
+ int32 xi, nu; // xi, nu = recursion values of x, n
+ int32 x1, x2; // lowest and highest x or xi
+ int32 i1, i2; // index into table
+ int32 UseTable; // 1 if table method used
+ int32 LengthNeeded; // Necessary table length
+
+ // special cases
+ if (n == 0 || m == 0) {x1 = 0; goto DETERMINISTIC;}
+ if (n == N) {x1 = m; goto DETERMINISTIC;}
+ if (m == N) {x1 = n; goto DETERMINISTIC;}
+ if (omega <= 0.) {
+ if (n > N-m) FatalError("Not enough items with nonzero weight in CWalleniusNCHypergeometric::MakeTable");
+ x1 = 0;
+ DETERMINISTIC:
+ if (MaxLength == 0) {
+ if (xfirst) *xfirst = 1;
+ return 1;
+ }
+ *xfirst = *xlast = x1;
+ *table = 1.;
+ return 1;
+ }
+
+ if (cutoff <= 0. || cutoff > 0.1) cutoff = 0.01 * accuracy;
+
+ LengthNeeded = N - m; // m2
+ if (m < LengthNeeded) LengthNeeded = m;
+ if (n < LengthNeeded) LengthNeeded = n; // LengthNeeded = min(m1,m2,n)
+ area = double(n)*LengthNeeded; // Estimate calculation time for table method
+ UseTable = area < 5000. || (area < 10000. && N > 1000. * n);
+
+ if (MaxLength <= 0) {
+ // Return UseTable and LengthNeeded
+ if (xfirst) *xfirst = UseTable;
+ i1 = LengthNeeded + 2; // Necessary table length
+ if (!UseTable && i1 > 200) {
+ // Calculate necessary table length from standard deviation
+ double sd = sqrt(variance()); // calculate approximate standard deviation
+ // estimate number of standard deviations to include from normal distribution
+ i2 = (int32)(NumSD(accuracy) * sd + 0.5);
+ if (i1 > i2) i1 = i2;
+ }
+ return i1;
+ }
+
+ if (UseTable && MaxLength > LengthNeeded) {
+ // use recursion table method
+ p1 = p2 = table + 1; // make space for p1[-1]
+ p1[-1] = 0.; p1[0] = 1.; // initialize for recursion
+ x1 = x2 = 0;
+ for (nu = 1; nu <= n; nu++) {
+ if (n - nu < xmin - x1 || p1[x1] < cutoff) {
+ x1++; // increase lower limit when breakpoint passed or probability negligible
+ p2--; // compensate buffer offset in order to reduce storage space
+ }
+ if (x2 < xmax && p1[x2] >= cutoff) {
+ x2++; y1 = 0.; // increase upper limit until x has been reached
+ }
+ else {
+ y1 = p1[x2];
+ }
+ if (p2 - table + x2 >= MaxLength || x1 > x2) {
+ goto ONE_BY_ONE; // Error: table length exceeded. Use other method
+ }
+
+ mxo = (m-x2)*omega;
+ Nmnx = N-m-nu+x2+1;
+ for (xi = x2; xi >= x1; xi--) { // backwards loop
+ d2 = mxo + Nmnx;
+ mxo += omega; Nmnx--;
+ d1 = mxo + Nmnx;
+ dcom = 1. / (d1 * d2); // save a division by making common divisor
+ y = p1[xi-1]*mxo*d2*dcom + y1*(Nmnx+1)*d1*dcom;
+ y1 = p1[xi-1]; // (warning: pointer alias, can't swap instruction order)
+ p2[xi] = y;
+ }
+ p1 = p2;
+ }
+
+ // return results
+ i1 = i2 = x2 - x1 + 1; // desired table length
+ if (i2 > MaxLength) i2 = MaxLength; // limit table length
+ *xfirst = x1; *xlast = x1 + i2 - 1;
+ if (i2 > 0) memmove(table, table+1, i2*sizeof(table[0]));// copy to start of table
+ return i1 == i2; // true if table size not reduced
+ }
+
+ else {
+ // Recursion method would take too much time
+ // Calculate values one by one
+ ONE_BY_ONE:
+
+ // Start to fill table from the end and down. start with x = floor(mean)
+ x2 = (int32)mean();
+ x1 = x2 + 1; i1 = MaxLength;
+ while (x1 > xmin) { // loop for left tail
+ x1--; i1--;
+ y = probability(x1);
+ table[i1] = y;
+ if (y < cutoff) break;
+ if (i1 == 0) break;
+ }
+ *xfirst = x1;
+ i2 = x2 - x1 + 1;
+ if (i1 > 0 && i2 > 0) { // move numbers down to beginning of table
+ memmove(table, table+i1, i2*sizeof(table[0]));
+ }
+ // Fill rest of table from mean and up
+ i2--;
+ while (x2 < xmax) { // loop for right tail
+ if (i2 == MaxLength-1) {
+ *xlast = x2; return 0; // table full
+ }
+ x2++; i2++;
+ y = probability(x2);
+ table[i2] = y;
+ if (y < cutoff) break;
+ }
+ *xlast = x2;
+ return 1;
+ }
+}
+
+
+/***********************************************************************
+calculation methods in class CMultiWalleniusNCHypergeometric
+***********************************************************************/
+
+CMultiWalleniusNCHypergeometric::CMultiWalleniusNCHypergeometric(int32 n_, int32 * m_, double * odds_, int colors_, double accuracy_) {
+ // constructor
+ accuracy = accuracy_;
+ SetParameters(n_, m_, odds_, colors_);
+}
+
+
+void CMultiWalleniusNCHypergeometric::SetParameters(int32 n_, int32 * m_, double * odds_, int colors_) {
+ // change parameters
+ int32 N1;
+ int i;
+ n = n_; m = m_; omega = odds_; colors = colors_;
+ r = 1.;
+ for (N = N1 = 0, i = 0; i < colors; i++) {
+ if (m[i] < 0 || omega[i] < 0) FatalError("Parameter negative in constructor for CMultiWalleniusNCHypergeometric");
+ N += m[i];
+ if (omega[i]) N1 += m[i];
+ }
+ if (N < n) FatalError("Taking more items than there are in CMultiWalleniusNCHypergeometric");
+ if (N1< n) FatalError("Not enough items with nonzero weight in CMultiWalleniusNCHypergeometric");
+}
+
+
+void CMultiWalleniusNCHypergeometric::mean(double * mu) {
+ // calculate approximate mean of multivariate Wallenius noncentral hypergeometric
+ // distribution. Result is returned in mu[0..colors-1]
+ double omeg[MAXCOLORS]; // scaled weights
+ double omr; // reciprocal mean weight
+ double t, t1; // independent variable in iteration
+ double To, To1; // exp(t*omega[i]), 1-exp(t*omega[i])
+ double H; // function to find root of
+ double HD; // derivative of H
+ double dummy; // unused return
+ int i; // color index
+ int iter; // number of iterations
+
+ if (n == 0) {
+ // needs special case
+ for (i = 0; i < colors; i++) {
+ mu[i] = 0.;
+ }
+ return;
+ }
+
+ // calculate mean weight
+ for (omr=0., i=0; i < colors; i++) omr += omega[i] * m[i];
+ omr = N / omr;
+
+ // scale weights to make mean = 1
+ for (i = 0; i < colors; i++) omeg[i] = omega[i] * omr;
+
+ // Newton Raphson iteration
+ iter = 0; t = -1.; // first guess
+ do {
+ t1 = t;
+ H = HD = 0.;
+ // calculate H and HD
+ for (i = 0; i < colors; i++) {
+ if (omeg[i] != 0.) {
+ To1 = pow2_1(t * (1./LN2) * omeg[i], &To);
+ H += m[i] * To1;
+ HD -= m[i] * omeg[i] * To;
+ }
+ }
+ t -= (H-n) / HD;
+ if (t >= 0) {
+ t = 0.5 * t1;
+ }
+ if (++iter > 20) {
+ FatalError("Search for mean failed in function CMultiWalleniusNCHypergeometric::mean");
+ }
+ }
+ while (fabs(H - n) > 1E-5);
+
+ // finished iteration. Get all mu[i]
+ for (i = 0; i < colors; i++) {
+ if (omeg[i] != 0.) {
+ To1 = pow2_1(t * (1./LN2) * omeg[i], &dummy);
+ mu[i] = m[i] * To1;
+ }
+ else {
+ mu[i] = 0.;
+ }
+ }
+}
+
+
+void CMultiWalleniusNCHypergeometric::variance(double * var, double * mean_) {
+ // calculates approximate variance and mean of multivariate
+ // Wallenius' noncentral hypergeometric distribution
+ // (accuracy is not too good).
+ // Variance is returned in variance[0..colors-1].
+ // Mean is returned in mean_[0..colors-1] if not NULL.
+ // The calculation is reasonably fast.
+ double r1, r2;
+ double mu[MAXCOLORS];
+ int i;
+
+ // Store mean in array mu if mean_ is NULL
+ if (mean_ == 0) mean_ = mu;
+
+ // Calculate mean
+ mean(mean_);
+
+ // Calculate variance
+ for (i = 0; i < colors; i++) {
+ r1 = mean_[i] * (m[i]-mean_[i]);
+ r2 = (n-mean_[i])*(mean_[i]+N-n-m[i]);
+ if (r1 <= 0. || r2 <= 0.) {
+ var[i] = 0.;
+ }
+ else {
+ var[i] = N*r1*r2/((N-1)*(m[i]*r2+(N-m[i])*r1));
+ }
+ }
+}
+
+
+// implementations of different calculation methods
+double CMultiWalleniusNCHypergeometric::binoexpand(void) {
+ // binomial expansion of integrand
+ // only implemented for x[i] = 0 for all but one i
+ int i, j, k;
+ double W = 0.; // total weight
+ for (i=j=k=0; i<colors; i++) {
+ W += omega[i] * m[i];
+ if (x[i]) {
+ j=i; k++; // find the nonzero x[i]
+ }
+ }
+ if (k > 1) FatalError("More than one x[i] nonzero in CMultiWalleniusNCHypergeometric::binoexpand");
+ return exp(FallingFactorial(m[j],n) - FallingFactorial(W/omega[j],n));
+}
+
+
+double CMultiWalleniusNCHypergeometric::lnbico(void) {
+ // natural log of binomial coefficients
+ bico = 0.;
+ int i;
+ for (i=0; i<colors; i++) {
+ if (x[i] < m[i] && omega[i]) {
+ bico += LnFac(m[i]) - LnFac(x[i]) - LnFac(m[i]-x[i]);
+ }
+ }
+ return bico;
+}
+
+
+void CMultiWalleniusNCHypergeometric::findpars(void) {
+ // calculate r, w, E
+ // calculate d, E, r, w
+
+ // find r to center peak of integrand at 0.5
+ double dd; // scaled d
+ double dr; // 1/d
+
+ double z, zd, rr, lastr, rrc, rt, r2, r21, a, b, ro, k1, dummy;
+ double omax; // highest omega
+ double omaxr; // 1/omax
+ double omeg[MAXCOLORS]; // scaled weights
+ int i, j = 0;
+
+ // find highest omega
+ for (omax=0., i=0; i < colors; i++) {
+ if (omega[i] > omax) omax = omega[i];
+ }
+ omaxr = 1. / omax;
+ dd = E = 0.;
+ for (i = 0; i < colors; i++) {
+ // scale weights to make max = 1
+ omeg[i] = omega[i] * omaxr;
+ // calculate d and E
+ dd += omeg[i] * (m[i]-x[i]);
+ E += omeg[i] * m[i];
+ }
+ dr = 1. / dd;
+ E *= dr;
+ rr = r * omax;
+ if (rr <= dr) rr = 1.2 * dr; // initial guess
+ // Newton-Raphson iteration to find r
+ do {
+ lastr = rr;
+ rrc = 1. / rr;
+ z = dd - rrc; // z(r)
+ zd = rrc * rrc; // z'(r)
+ for (i=0; i<colors; i++) {
+ rt = rr * omeg[i];
+ if (rt < 100. && rt > 0.) { // avoid overflow and division by 0
+ r21 = pow2_1(rt, &r2); // r2=2^r, r21=1.-2^r
+ a = omeg[i] / r21; // omegai/(1.-2^r)
+ b = x[i] * a; // x*omegai/(1.-2^r)
+ z += b;
+ zd += b * a * r2 * LN2;
+ }
+ }
+ if (zd == 0) FatalError("can't find r in function CMultiWalleniusNCHypergeometric::findpars");
+ rr -= z / zd; // next r
+ if (rr <= dr) rr = lastr * 0.125 + dr * 0.875;
+ if (++j == 70) FatalError("convergence problem searching for r in function CMultiWalleniusNCHypergeometric::findpars");
+ }
+ while (fabs(rr-lastr) > rr * 1.E-5);
+ rd = rr * dd;
+ r = rr * omaxr;
+
+ // find peak width
+ phi2d = 0.;
+ for (i=0; i<colors; i++) {
+ ro = rr * omeg[i];
+ if (ro < 300 && ro > 0.) { // avoid overflow and division by 0
+ k1 = pow2_1(ro, &dummy);
+ k1 = -1. / k1;
+ k1 = omeg[i] * omeg[i] * (k1 + k1*k1);
+ }
+ else k1 = 0.;
+ phi2d += x[i] * k1;
+ }
+ phi2d *= -4. * rr * rr;
+ if (phi2d > 0.) FatalError("peak width undefined in function CMultiWalleniusNCHypergeometric::findpars");
+ wr = sqrt(-phi2d); w = 1. / wr;
+}
+
+
+double CMultiWalleniusNCHypergeometric::laplace(void) {
+ // Laplace's method with narrow integration interval,
+ // using error function residues table, defined in erfres.cpp
+ // Note that this function can only be used when the integrand peak is narrow.
+ // findpars() must be called before this function.
+
+ const int MAXDEG = 40; // arraysize
+ int degree; // max expansion degree
+ double accur; // stop expansion when terms below this threshold
+ double f0; // factor outside integral
+ double rho[MAXCOLORS]; // r*omegai
+ double qi; // 2^(-rho)
+ double qi1; // 1-qi
+ double qq[MAXCOLORS]; // qi / qi1
+ double eta[MAXCOLORS+1][MAXDEG+1]; // eta coefficients
+ double phideri[MAXDEG+1]; // derivatives of phi
+ double PSIderi[MAXDEG+1]; // derivatives of PSI
+ double * erfresp; // pointer to table of error function residues
+
+ // variables in asymptotic summation
+ static const double sqrt8 = 2.828427124746190098; // sqrt(8)
+ double qqpow; // qq^j
+ double pow2k; // 2^k
+ double bino; // binomial coefficient
+ double vr; // 1/v, v = integration interval
+ double v2m2; // (2*v)^(-2)
+ double v2mk1; // (2*v)^(-k-1)
+ double s; // summation term
+ double sum; // Taylor sum
+
+ int i; // loop counter for color
+ int j; // loop counter for derivative
+ int k; // loop counter for expansion degree
+ int ll; // k/2
+ int converg = 0; // number of consequtive terms below accuracy
+ int PrecisionIndex; // index into ErfRes table according to desired precision
+
+ // initialize
+ for (k = 0; k <= 2; k++) phideri[k] = PSIderi[k] = 0;
+
+ // find rho[i], qq[i], first eta coefficients, and zero'th derivative of phi
+ for (i = 0; i < colors; i++) {
+ rho[i] = r * omega[i];
+ if (rho[i] == 0.) continue;
+ if (rho[i] > 40.) {
+ qi=0.; qi1 = 1.; // avoid underflow
+ }
+ else {
+ qi1 = pow2_1(-rho[i], &qi); // qi=2^(-rho), qi1=1.-2^(-rho)
+ }
+ qq[i] = qi / qi1; // 2^(-r*omegai)/(1.-2^(-r*omegai))
+ // peak = zero'th derivative
+ phideri[0] += x[i] * log1mx(qi, qi1);
+ // eta coefficients
+ eta[i][0] = 0.;
+ eta[i][1] = eta[i][2] = rho[i]*rho[i];
+ }
+
+ // d, r, and w must be calculated by findpars()
+ // zero'th derivative
+ phideri[0] -= (rd - 1.) * LN2;
+ // scaled factor outside integral
+ f0 = rd * exp(phideri[0] + lnbico());
+ // calculate narrowed integration interval
+ vr = sqrt8 * w;
+ phideri[2] = phi2d;
+
+ // get table according to desired precision
+ PrecisionIndex = (-FloorLog2((float)accuracy) - ERFRES_B + ERFRES_S - 1) / ERFRES_S;
+ if (PrecisionIndex < 0) PrecisionIndex = 0;
+ if (PrecisionIndex > ERFRES_N-1) PrecisionIndex = ERFRES_N-1;
+ while (w * NumSDev[PrecisionIndex] > 0.3) {
+ // check if integration interval is too wide
+ if (PrecisionIndex == 0) {
+ FatalError("Laplace method failed. Peak width too high in function CWalleniusNCHypergeometric::laplace");
+ break;
+ }
+ PrecisionIndex--; // reduce precision to keep integration interval narrow
+ }
+ erfresp = ErfRes[PrecisionIndex]; // choose desired table
+
+ degree = MAXDEG; // max expansion degree
+ if (degree >= ERFRES_L*2) degree = ERFRES_L*2-2;
+
+ // set up for starting loop at k=3
+ v2m2 = 0.25 * vr * vr; // (2*v)^(-2)
+ PSIderi[0] = 1.;
+ pow2k = 8.;
+ sum = 0.5 * vr * erfresp[0];
+ v2mk1 = 0.5 * vr * v2m2 * v2m2;
+ accur = accuracy * sum;
+
+ // summation loop
+ for (k = 3; k <= degree; k++) {
+ phideri[k] = 0.;
+
+ // loop for all colors
+ for (i = 0; i < colors; i++) {
+ if (rho[i] == 0.) continue;
+ eta[i][k] = 0.;
+ // backward loop for all powers
+ for (j = k; j > 0; j--) {
+ // find coefficients recursively from previous coefficients
+ eta[i][j] = eta[i][j]*(j*rho[i]-(k-2)) + eta[i][j-1]*rho[i]*(j-1);
+ }
+ qqpow = 1.;
+ // forward loop for all powers
+ for (j = 1; j <= k; j++) {
+ qqpow *= qq[i]; // qq^j
+ // contribution to derivative
+ phideri[k] += x[i] * eta[i][j] * qqpow;
+ }
+ }
+
+ // finish calculation of derivatives
+ phideri[k] = -pow2k * phideri[k] + 2*(1-k)*phideri[k-1];
+
+ pow2k *= 2.; // 2^k
+
+ // loop to calculate derivatives of PSI from derivatives of psi.
+ // terms # 0, 1, 2, k-2, and k-1 are zero and not included in loop.
+ // The j'th derivatives of psi are identical to the derivatives of phi for j>2, and
+ // zero for j=1,2. Hence we are using phideri[j] for j>2 here.
+ PSIderi[k] = phideri[k]; // this is term # k
+ bino = 0.5 * (k-1) * (k-2); // binomial coefficient for term # 3
+ for (j=3; j < k-2; j++) { // loop for remaining nonzero terms (if k>5)
+ PSIderi[k] += PSIderi[k-j] * phideri[j] * bino;
+ bino *= double(k-j)/double(j);
+ }
+
+ if ((k & 1) == 0) { // only for even k
+ ll = k/2;
+ s = PSIderi[k] * v2mk1 * erfresp[ll];
+ sum += s;
+
+ // check for convergence of Taylor expansion
+ if (fabs(s) < accur) converg++; else converg = 0;
+ if (converg > 1) break;
+
+ // update recursive expressions
+ v2mk1 *= v2m2;
+ }
+ }
+
+ // multiply by terms outside integral
+ return f0 * sum;
+}
+
+
+double CMultiWalleniusNCHypergeometric::integrate(void) {
+ // Wallenius non-central hypergeometric distribution function
+ // calculation by numerical integration with variable-length steps
+ // NOTE: findpars() must be called before this function.
+ double s; // result of integration step
+ double sum; // integral
+ double ta, tb; // subinterval for integration step
+
+ lnbico(); // compute log of binomial coefficients
+
+ // choose method:
+ if (w < 0.02) {
+ // normal method. Step length determined by peak width w
+ double delta, s1;
+ s1 = accuracy < 1E-9 ? 0.5 : 1.;
+ delta = s1 * w; // integration steplength
+ ta = 0.5 + 0.5 * delta;
+ sum = integrate_step(1.-ta, ta); // first integration step around center peak
+ do {
+ tb = ta + delta;
+ if (tb > 1.) tb = 1.;
+ s = integrate_step(ta, tb); // integration step to the right of peak
+ s += integrate_step(1.-tb,1.-ta);// integration step to the left of peak
+ sum += s;
+ if (s < accuracy * sum) break; // stop before interval finished if accuracy reached
+ ta = tb;
+ if (tb > 0.5 + w) delta *= 2.; // increase step length far from peak
+ }
+ while (tb < 1.);
+ }
+
+ else {
+ // difficult situation. Step length determined by inflection points
+ double t1, t2, tinf, delta, delta1;
+ sum = 0.;
+ // do left and right half of integration interval separately:
+ for (t1=0., t2=0.5; t1 < 1.; t1+=0.5, t2+=0.5) {
+ // integrate from 0 to 0.5 or from 0.5 to 1
+ tinf = search_inflect(t1, t2); // find inflection point
+ delta = tinf - t1; if (delta > t2 - tinf) delta = t2 - tinf; // distance to nearest endpoint
+ delta *= 1./7.; // 1/7 will give 3 steps to nearest endpoint
+ if (delta < 1E-4) delta = 1E-4;
+ delta1 = delta;
+ // integrate from tinf forwards to t2
+ ta = tinf;
+ do {
+ tb = ta + delta1;
+ if (tb > t2 - 0.25*delta1) tb = t2; // last step of this subinterval
+ s = integrate_step(ta, tb); // integration step
+ sum += s;
+ delta1 *= 2; // double steplength
+ if (s < sum * 1E-4) delta1 *= 8.; // large step when s small
+ ta = tb;
+ }
+ while (tb < t2);
+ if (tinf) {
+ // integrate from tinf backwards to t1
+ tb = tinf;
+ do {
+ ta = tb - delta;
+ if (ta < t1 + 0.25*delta) ta = t1; // last step of this subinterval
+ s = integrate_step(ta, tb); // integration step
+ sum += s;
+ delta *= 2; // double steplength
+ if (s < sum * 1E-4) delta *= 8.; // large step when s small
+ tb = ta;
+ }
+ while (ta > t1);
+ }
+ }
+ }
+ return sum * rd;
+}
+
+
+double CMultiWalleniusNCHypergeometric::integrate_step(double ta, double tb) {
+ // integration subprocedure used by integrate()
+ // makes one integration step from ta to tb using Gauss-Legendre method.
+ // result is scaled by multiplication with exp(bico)
+ double ab, delta, tau, ltau, y, sum, taur, rdm1;
+ int i, j;
+
+ // define constants for Gauss-Legendre integration with IPOINTS points
+#define IPOINTS 8 // number of points in each integration step
+
+#if IPOINTS == 3
+ static const double xval[3] = {-.774596669241,0,0.774596668241};
+ static const double weights[3] = {.5555555555555555,.88888888888888888,.55555555555555};
+#elif IPOINTS == 4
+ static const double xval[4] = {-0.861136311594,-0.339981043585,0.339981043585,0.861136311594},
+ static const double weights[4] = {0.347854845137,0.652145154863,0.652145154863,0.347854845137};
+#elif IPOINTS == 5
+ static const double xval[5] = {-0.906179845939,-0.538469310106,0,0.538469310106,0.906179845939};
+ static const double weights[5] = {0.236926885056,0.478628670499,0.568888888889,0.478628670499,0.236926885056};
+#elif IPOINTS == 6
+ static const double xval[6] = {-0.932469514203,-0.661209386466,-0.238619186083,0.238619186083,0.661209386466,0.932469514203};
+ static const double weights[6] = {0.171324492379,0.360761573048,0.467913934573,0.467913934573,0.360761573048,0.171324492379};
+#elif IPOINTS == 8
+ static const double xval[8] = {-0.960289856498,-0.796666477414,-0.525532409916,-0.183434642496,0.183434642496,0.525532409916,0.796666477414,0.960289856498};
+ static const double weights[8] = {0.10122853629,0.222381034453,0.313706645878,0.362683783378,0.362683783378,0.313706645878,0.222381034453,0.10122853629};
+#elif IPOINTS == 12
+ static const double xval[12] = {-0.981560634247,-0.90411725637,-0.769902674194,-0.587317954287,-0.367831498998,-0.125233408511,0.125233408511,0.367831498998,0.587317954287,0.769902674194,0.90411725637,0.981560634247};
+ static const double weights[12]= {0.0471753363866,0.106939325995,0.160078328543,0.203167426723,0.233492536538,0.249147045813,0.249147045813,0.233492536538,0.203167426723,0.160078328543,0.106939325995,0.0471753363866};
+#elif IPOINTS == 16
+ static const double xval[16] = {-0.989400934992,-0.944575023073,-0.865631202388,-0.755404408355,-0.617876244403,-0.458016777657,-0.281603550779,-0.0950125098376,0.0950125098376,0.281603550779,0.458016777657,0.617876244403,0.755404408355,0.865631202388,0.944575023073,0.989400934992};
+ static const double weights[16]= {0.027152459411,0.0622535239372,0.0951585116838,0.124628971256,0.149595988817,0.169156519395,0.182603415045,0.189450610455,0.189450610455,0.182603415045,0.169156519395,0.149595988817,0.124628971256,0.0951585116838,0.0622535239372,0.027152459411};
+#else
+#error // IPOINTS must be a value for which the tables are defined
+#endif
+
+ delta = 0.5 * (tb - ta);
+ ab = 0.5 * (ta + tb);
+ rdm1 = rd - 1.;
+ sum = 0;
+
+ for (j = 0; j < IPOINTS; j++) {
+ tau = ab + delta * xval[j];
+ ltau = log(tau);
+ taur = r * ltau;
+ y = 0.;
+ for (i = 0; i < colors; i++) {
+ // possible loss of precision due to subtraction here:
+ if (omega[i]) {
+ y += log1pow(taur*omega[i],x[i]); // ln((1-e^taur*omegai)^xi)
+ }
+ }
+ y += rdm1*ltau + bico;
+ if (y > -50.) sum += weights[j] * exp(y);
+ }
+ return delta * sum;
+}
+
+
+double CMultiWalleniusNCHypergeometric::search_inflect(double t_from, double t_to) {
+ // search for an inflection point of the integrand PHI(t) in the interval
+ // t_from < t < t_to
+ double t, t1; // independent variable
+ double rho[MAXCOLORS]; // r*omega[i]
+ double q; // t^rho[i] / (1-t^rho[i])
+ double q1; // 1-t^rho[i]
+ double zeta[MAXCOLORS][4][4]; // zeta[i,j,k] coefficients
+ double phi[4]; // derivatives of phi(t) = log PHI(t)
+ double Z2; // PHI''(t)/PHI(t)
+ double Zd; // derivative in Newton Raphson iteration
+ double rdm1; // r * d - 1
+ double tr; // 1/t
+ double log2t; // log2(t)
+ double method; // 0 for z2'(t) method, 1 for z3(t) method
+ int i; // color
+ int iter; // count iterations
+
+ rdm1 = rd - 1.;
+ if (t_from == 0 && rdm1 <= 1.) return 0.; //no inflection point
+ t = 0.5 * (t_from + t_to);
+ for (i = 0; i < colors; i++) { // calculate zeta coefficients
+ rho[i] = r * omega[i];
+ zeta[i][1][1] = rho[i];
+ zeta[i][1][2] = rho[i] * (rho[i] - 1.);
+ zeta[i][2][2] = rho[i] * rho[i];
+ zeta[i][1][3] = zeta[i][1][2] * (rho[i] - 2.);
+ zeta[i][2][3] = zeta[i][1][2] * rho[i] * 3.;
+ zeta[i][3][3] = zeta[i][2][2] * rho[i] * 2.;
+ }
+ iter = 0;
+
+ do {
+ t1 = t;
+ tr = 1. / t;
+ log2t = log(t)*(1./LN2);
+ phi[1] = phi[2] = phi[3] = 0.;
+ for (i=0; i<colors; i++) { // calculate first 3 derivatives of phi(t)
+ if (rho[i] == 0.) continue;
+ q1 = pow2_1(rho[i]*log2t,&q);
+ q /= q1;
+ phi[1] -= x[i] * zeta[i][1][1] * q;
+ phi[2] -= x[i] * q * (zeta[i][1][2] + q * zeta[i][2][2]);
+ phi[3] -= x[i] * q * (zeta[i][1][3] + q * (zeta[i][2][3] + q * zeta[i][3][3]));
+ }
+ phi[1] += rdm1;
+ phi[2] -= rdm1;
+ phi[3] += 2. * rdm1;
+ phi[1] *= tr;
+ phi[2] *= tr * tr;
+ phi[3] *= tr * tr * tr;
+ method = (iter & 2) >> 1; // alternate between the two methods
+ Z2 = phi[1]*phi[1] + phi[2];
+ Zd = method*phi[1]*phi[1]*phi[1] + (2.+method)*phi[1]*phi[2] + phi[3];
+
+ if (t < 0.5) {
+ if (Z2 > 0) {
+ t_from = t;
+ }
+ else {
+ t_to = t;
+ }
+ if (Zd >= 0) {
+ // use binary search if Newton-Raphson iteration makes problems
+ t = (t_from ? 0.5 : 0.2) * (t_from + t_to);
+ }
+ else {
+ // Newton-Raphson iteration
+ t -= Z2 / Zd;
+ }
+ }
+ else {
+ if (Z2 < 0) {
+ t_from = t;
+ }
+ else {
+ t_to = t;
+ }
+ if (Zd <= 0) {
+ // use binary search if Newton-Raphson iteration makes problems
+ t = 0.5 * (t_from + t_to);
+ }
+ else {
+ // Newton-Raphson iteration
+ t -= Z2 / Zd;
+ }
+ }
+ if (t >= t_to) t = (t1 + t_to) * 0.5;
+ if (t <= t_from) t = (t1 + t_from) * 0.5;
+ if (++iter > 20) FatalError("Search for inflection point failed in function CMultiWalleniusNCHypergeometric::search_inflect");
+ }
+ while (fabs(t - t1) > 1E-5);
+ return t;
+}
+
+
+double CMultiWalleniusNCHypergeometric::probability(int32 * x_) {
+ // calculate probability function. choosing best method
+ int i, j, em;
+ int central;
+ int32 xsum;
+ x = x_;
+
+ for (xsum = i = 0; i < colors; i++) xsum += x[i];
+ if (xsum != n) {
+ FatalError("sum of x values not equal to n in function CMultiWalleniusNCHypergeometric::probability");
+ }
+
+ if (colors < 3) {
+ if (colors <= 0) return 1.;
+ if (colors == 1) return x[0] == m[0];
+ // colors = 2
+ if (omega[1] == 0.) return x[0] == m[0];
+ return CWalleniusNCHypergeometric(n,m[0],N,omega[0]/omega[1],accuracy).probability(x[0]);
+ }
+
+ central = 1;
+ for (i = j = em = 0; i < colors; i++) {
+ if (x[i] > m[i] || x[i] < 0 || x[i] < n - N + m[i]) return 0.;
+ if (x[i] > 0) j++;
+ if (omega[i] == 0. && x[i]) return 0.;
+ if (x[i] == m[i] || omega[i] == 0.) em++;
+ if (i > 0 && omega[i] != omega[i-1]) central = 0;
+ }
+
+ if (n == 0 || em == colors) return 1.;
+
+ if (central) {
+ // All omega's are equal.
+ // This is multivariate central hypergeometric distribution
+ int32 sx = n, sm = N;
+ double p = 1.;
+ for (i = 0; i < colors - 1; i++) {
+ // Use univariate hypergeometric (usedcolors-1) times
+ p *= CWalleniusNCHypergeometric(sx, m[i], sm, 1.).probability(x[i]);
+ sx -= x[i]; sm -= m[i];
+ }
+ return p;
+ }
+
+
+ if (j == 1) {
+ return binoexpand();
+ }
+
+ findpars();
+ if (w < 0.04 && E < 10 && (!em || w > 0.004)) {
+ return laplace();
+ }
+
+ return integrate();
+}
+
+
+/***********************************************************************
+Methods for CMultiWalleniusNCHypergeometricMoments
+***********************************************************************/
+
+double CMultiWalleniusNCHypergeometricMoments::moments(double * mu, double * variance, int32 * combinations) {
+ // calculates mean and variance of multivariate Wallenius noncentral
+ // hypergeometric distribution by calculating all combinations of x-values.
+ // Return value = sum of all probabilities. The deviation of this value
+ // from 1 is a measure of the accuracy.
+ // Returns the mean to mean[0...colors-1]
+ // Returns the variance to variance[0...colors-1]
+ double sumf; // sum of all f(x) values
+ int32 msum; // temporary sum
+ int i; // loop counter
+
+ // get approximate mean
+ mean(sx);
+ // round mean to integers
+ for (i=0; i < colors; i++) {
+ xm[i] = (int32)(sx[i]+0.4999999);
+ }
+
+ // set up for recursive loops
+ for (i=colors-1, msum=0; i >= 0; i--) {
+ remaining[i] = msum; msum += m[i];
+ }
+ for (i=0; i<colors; i++) sx[i] = sxx[i] = 0.;
+ sn = 0;
+
+ // recursive loops to calculate sums
+ sumf = loop(n, 0);
+
+ // calculate mean and variance
+ for (i = 0; i < colors; i++) {
+ mu[i] = sx[i]/sumf;
+ variance[i] = sxx[i]/sumf - sx[i]*sx[i]/(sumf*sumf);
+ }
+
+ // return combinations and sum
+ if (combinations) *combinations = sn;
+ return sumf;
+}
+
+
+double CMultiWalleniusNCHypergeometricMoments::loop(int32 n, int c) {
+ // recursive function to loop through all combinations of x-values.
+ // used by moments()
+ int32 x, x0; // x of color c
+ int32 xmin, xmax; // min and max of x[c]
+ double s1, s2, sum = 0.; // sum of f(x) values
+ int i; // loop counter
+
+ if (c < colors-1) {
+ // not the last color
+ // calculate min and max of x[c] for given x[0]..x[c-1]
+ xmin = n - remaining[c]; if (xmin < 0) xmin = 0;
+ xmax = m[c]; if (xmax > n) xmax = n;
+ x0 = xm[c]; if (x0 < xmin) x0 = xmin; if (x0 > xmax) x0 = xmax;
+ // loop for all x[c] from mean and up
+ for (x = x0, s2 = 0.; x <= xmax; x++) {
+ xi[c] = x;
+ sum += s1 = loop(n-x, c+1); // recursive loop for remaining colors
+ if (s1 < accuracy && s1 < s2) break; // stop when values become negligible
+ s2 = s1;
+ }
+ // loop for all x[c] from mean and down
+ for (x = x0-1; x >= xmin; x--) {
+ xi[c] = x;
+ sum += s1 = loop(n-x, c+1); // recursive loop for remaining colors
+ if (s1 < accuracy && s1 < s2) break; // stop when values become negligible
+ s2 = s1;
+ }
+ }
+ else {
+ // last color
+ xi[c] = n;
+ s1 = probability(xi);
+ for (i=0; i < colors; i++) {
+ sx[i] += s1 * xi[i];
+ sxx[i] += s1 * xi[i] * xi[i];
+ }
+ sn++;
+ sum = s1;
+ }
+ return sum;
+}
diff --git a/vignettes/UrnTheory.Rtex b/vignettes/UrnTheory.Rtex
new file mode 100644
index 0000000..2e8b52b
--- /dev/null
+++ b/vignettes/UrnTheory.Rtex
@@ -0,0 +1,492 @@
+\documentclass[a4paper]{article}
+
+% Note: Remember to edit the .Snw file, not the .tex file!
+
+%\VignetteIndexEntry{Biased Urn Theory}
+%\VignettePackage{BiasedUrn}
+
+\usepackage{amsmath}
+\usepackage{amssymb}
+%
+% \usepackage{c:/R/share/texmf/Sweave}
+\usepackage{Sweave}
+\begin{document}
+
+\title{Biased Urn Theory}
+\author{Agner Fog}
+
+\maketitle
+
+\section{Introduction}
+%
+Two different probability distributions are both known in the
+literature as ``the'' noncentral hypergeometric distribution. These
+two distributions will be called Fisher's and Wallenius' noncentral
+hypergeometric distribution, respectively.
+
+Both distributions can be associated with the classical experiment
+of taking colored balls at random from an urn without replacement.
+If the experiment is unbiased then the result will follow the well-known
+hypergeometric distribution. If the balls have different size or weight
+or whatever so that balls of one color have a higher probability of being
+taken than balls of another color then the result will be a
+noncentral hypergeometric distribution.
+
+The distribution depends on how the balls are taken from the urn.
+Wallenius' noncentral hypergeometric distribution is obtained if $n$
+balls are taken one by one. Fisher's noncentral hypergeometric
+distribution is obtained if balls are taken independently of each other.
+
+Wallenius' distribution is used in models of natural selection and biased
+sampling. Fisher's distribution is used mainly for statistical tests in
+contingency tables. Both distributions are supported in the {\tt BiasedUrn}
+package.
+
+The difference between the two noncentral hypergeometric distributions
+is difficult to understand. I am therefore providing a detailed
+explanation in the following sections.
+
+
+\section{Definition of Wallenius' noncentral hypergeometric distribution}
+%
+Assume that an urn contains $N$ balls of $c$ different colors and let
+$m_i$ be the number of balls of color $i$. Balls of color $i$ have the
+weight $\omega_i$. $n$ balls are drawn from the urn, one by one, in
+such a way that the probability of taking a particular ball at a
+particular draw is equal to this ball's fraction of the total weight of
+all balls that lie in the urn at this moment.
+
+The colors of the $n$ balls that are taken in this way will follow Wallenius'
+noncentral hypergeometric distribution. This distribution has the
+probability mass function:
+%
+$$
+\operatorname{dMWNCHypergeo}(\boldsymbol{x};\boldsymbol{m},n,\boldsymbol{\omega})
+\:=\:
+\left( \prod_{i=1}^c \binom{m_i}{x_i} \right)
+\: \int_0^1 \prod_{i=1}^c
+(1-t^{{\omega_i}/{d}})^{x_i} \, \mathrm{d}t \;,
+$$
+%
+$$
+\text{where } \: d \:=\:
+\sum_{i=1}^c \omega_i(m_i-x_i) \,.
+$$
+%
+$\boldsymbol{x}=(x_1,x_2,\ldots,x_c)$
+is the number of balls drawn of each color.\\
+$\boldsymbol{m}=(m_1,m_2,\ldots,m_c)$
+is the initial number of balls of each color in the urn.\\
+$\boldsymbol{\omega}=(\omega_1,\omega_2,\ldots,\omega_c)$
+is the weight or odds of balls of each color.\\
+$n = \sum_{i=1}^c x_i$ is the total number of balls drawn.\\
+$c$ is the number of colors. The unexpected integral in this
+formula arises as the solution to a difference equation.
+(The above formula is invalid in the trivial case $n = N$.)
+
+
+\section{Definition of Fisher's noncentral hypergeometric distribution}
+%
+If the colored balls are taken from the urn in such a way that
+the probability of taking a particular ball of color $i$ is
+proportional to its weight $\omega_i$ and the probability for
+each particular ball is independent of what happens to the
+other balls, then the number of balls taken will follow a
+binomial distribution for each color.
+
+The total number of balls taken $n = \sum_{i=1}^c x_i$ is
+necessarily random and unknown prior to the experiment.
+After the experiment, we can determine $n$ and calculate the
+distribution of colors for the given value of $n$.
+This is Fisher's noncentral hypergeometric distribution, which
+is defined as the distribution of independent binomial variates
+conditional upon their sum $n$.
+
+The probability mass function of Fisher's noncentral hypergeometric
+distribution is given by
+%
+$$
+\operatorname{dMFNCHypergeo}(\boldsymbol{x};\boldsymbol{m},n,\boldsymbol{\omega})
+\:=\:
+\frac{\textrm{g}(\boldsymbol{x};\boldsymbol{m},n,\boldsymbol{\omega})}
+{\sum\limits_{\boldsymbol{y}\in \: \Xi}
+\textrm{g}(\boldsymbol{y};\boldsymbol{m},n,\boldsymbol{\omega})}\:,
+$$
+%
+$$
+\text{where } \: \textrm{g}(\boldsymbol{x};\boldsymbol{m},n,\boldsymbol{\omega})
+\:=\: \prod_{i=1}^c
+\binom{m_i}{x_i}\omega_i^{\,x_i}\:,
+$$
+%
+$$
+\text{and the domain }\: \Xi \:=\: \left\{\boldsymbol{x}\in\mathbb{Z}^c \,\middle|\,
+\sum_{i=1}^c x_i = n \: \wedge \:
+\forall\, i \in [1,c] \: : \: 0 \leq x_i \leq m_i \right\}\:.
+$$
+
+
+\section{Univariate distributions}
+%
+The univariate distributions are used when the number of colors
+$c$ is $2$. The multivariate distributions are used when the number
+of colors is more than $2$.
+
+The above formulas apply to any number of colors $c$. The univariate
+distributions can be expressed by setting
+$c=2$, $\:x_1=x$, $\:x_2=n-x$, $\:m_1=m$, $\:m_2=N-m$,
+$\:\omega_1=\omega$, $\:\omega_2=1$
+in the above formulas.
+
+
+\section{Name confusion}
+Wallenius' and Fisher's distribution are both known in the literature
+as ``the'' noncentral hypergeometric distribution. Fisher's distribution
+was first given the name extended hypergeometric distribution, but
+some scientists are strongly opposed to using this name.
+
+There is a widespread confusion in the literature because these two
+distributions have been given the same name and because it is not obvious that
+they are different. Several publications have used the wrong distribution
+or erroneously assumed that the two distributions were identical.
+
+I am therefore recommending to use the prefixes Wallenius' and Fisher's
+to distinguish the two noncentral hypergeometric distributions. While this
+makes the names rather long, it has the advantage of emphasizing that there
+is more than one noncentral hypergeometric distribution, whereby the
+risk of confusion is minimized.
+Wallenius and Fisher are the names of the scientists who first described each of
+these two distributions.
+
+The following section explains why the two distributions are different and
+how to decide which distribution to use in a specific situation.
+
+
+\section{The difference between the two distributions}
+%
+Both distributions degenerate into the well-known hypergeometric distribution
+when all balls have the same weight. In other words: It doesn't matter how
+the balls are sampled if the balls are unbiased. Only if the urn experiment
+is biased can we get different distributions depending on how the balls are
+sampled.
+
+It is important to understand how this dependence on the sampling procedure
+arises. In the Wallenius model, there is competition between the balls.
+The probability that a particular ball is taken is lower when the other
+balls in the urn are heavier. The probability of taking a particular ball
+at a particular draw is equal to its fraction of the total weight of the
+balls that remain in the urn at that moment. This total weight
+depends on the weight of
+the balls that have been removed in previous draws. Therefore, each draw
+except the first one has a probability distribution that depends on the
+results of the previous draws. The fact that each draw depends on the
+previous draws is what makes Wallenius' distribution unique and makes the
+calculation of it complicated. What happens to each ball depends on what
+has happened to other balls in the preceding draws.
+
+In the Fisher model, there is no such dependence between draws. We may
+as well take all $n$ balls at the same time. Each ball
+has no ``knowledge'' of what happens to the other balls. For the same
+reason, it is impossible to know the value of $n$ before the experiment.
+If we tried to fix the value of $n$ then we would have no way of
+preventing ball number $n+1$ from being taken without violating the principle
+of independence between balls. $n$ is therefore a random variable and
+the Fisher distribution is a conditional distribution which can only
+be determined after the experiment when $n$ is known. The unconditional
+distribution is $c$ independent binomials.
+
+The difference between Wallenius' and Fisher's distributions is low
+when odds ratios are
+near 1, and $n$ is low compared to $N$. The difference between the two
+distributions becomes higher when odds ratios are high and $n$ is near $N$.
+
+Consider the extreme example where an urn contains one red ball with the
+weight 1000, and a thousand white balls each with the weight 1.
+We want to calculate the probability that the red ball is not taken
+when balls are taken one by one.
+The probability that the red ball is not taken in the first draw is
+$\frac{1000}{2000} = \frac 12$. The probability that the red ball is
+not taken in the second draw, under the condition that it was not taken
+in the first draw, is $\frac{999}{1999} \approx \frac 12$.
+The probability that the red ball is
+not taken in the third draw, under the condition that it was not taken
+in the first two draws, is $\frac{998}{1998} \approx \frac 12$.
+Continuing in this way, we can calculate that the probability of not
+taking the red ball in $n$ draws is approximately $2^{-n}$ for moderate
+values of $n$.
+In other words, the probability of not taking a very heavy ball in $n$
+draws falls almost exponentially with $n$ in Wallenius' model.
+The exponential function arises because the probabilities for each draw
+are all multiplied together.
+
+This is not the case in Fisher's model where balls may be taken
+simultaneously. Here the draws are independent
+and the probabilities are therefore not multiplied together. The
+probability of not taking the heavy red ball in Fisher's model is approximately
+$\frac{1}{n+1}$. The two distributions are therefore very different
+in this extreme case.
+\vskip 5mm
+
+The following conditions must be fulfilled for Wallenius' distribution
+to be applicable:
+%
+\begin{itemize}
+%
+\item Items are taken randomly from a finite source containing different
+kinds of items without replacement.
+%
+\item Items are drawn one by one.
+%
+\item The probability of taking a particular item at a particular draw is equal
+to its fraction of the total weight of all items that have not yet been taken at that
+moment. The weight of an item depends only on its kind (color) $i$.
+(It is convenient to use the word ``weight'' for $\omega_i$ even if the
+physical property that determines the odds is something else than weight).
+%
+\item The total number $n$ of items to take is fixed and independent of
+which items happen to be taken.
+%
+\end{itemize}
+\vskip 5mm
+
+The following conditions must be fulfilled for Fisher's distribution
+to be applicable:
+%
+\begin{itemize}
+%
+\item Items are taken randomly from a finite source containing different
+kinds of items without replacement.
+%
+\item Items are taken independently of each other. Whether one item is taken
+is independent of whether another item is taken. Whether one item is taken
+before, after, or simultaneously with another item is irrelevant.
+%
+\item The probability of taking a particular item is proportional to its weight.
+The weight of an item depends only on its kind (color) $i$.
+%
+\item The total number $n$ of items that will be taken is not known
+before the experiment.
+%
+\item $n$ is determined after the experiment and the conditional distribution
+for $n$ known is desired.
+%
+\end{itemize}
+
+
+\section{Examples}
+%
+The following examples will further clarify which distribution to use in different
+situations.
+
+\subsection{Example 1}
+You are catching fish in a small lake that contains a limited number of fish.
+There are different kinds of fish with different weights. The probability of
+catching a particular fish is proportional to its weight when you only catch
+one fish.
+
+You are catching the fish one by one with a fishing rod. You have been ordered
+to catch $n$ fish. You are determined to catch exactly $n$ fish regardless of
+how long time it may take. You are stopping after you have caught $n$ fish
+even if you can see more fish that are tempting you.
+
+This scenario will give a distribution of the types of fish caught that is equal to
+Wallenius' noncentral hypergeometric distribution.
+
+\subsection{Example 2}
+You are catching fish as in example 1, but you are using a big net.
+You are setting up the net one day and coming back the next day to
+remove the net. You count how many fish you have caught and then you go
+home regardless of how many fish you have caught.
+
+Each fish has a probability of getting into the net that is proportional
+to its weight but independent of what happens to the other fish.
+
+This scenario gives Fisher's noncentral hypergeometric distribution after
+$n$ is known.
+
+\subsection{Example 3}
+You are catching fish with a small net. It is possible that more than one
+fish can go into the net at the same time. You are using the net multiple
+times until you have at least $n$ fish.
+
+This scenario gives a distribution that lies between Wallenius' and Fisher's
+distributions. The total number of fish caught can vary if you are getting too
+many fish in the last catch. You may put the excess fish back into the lake,
+but this still doesn't give Wallenius' distribution. This is because you
+are catching multiple fish at the same time. The condition that each catch
+depends on all previous catches does not hold for fish that are caught
+simultaneously or in the same operation.
+
+The resulting distribution will be close to Wallenius' distribution if
+there are only few fish in the net in each catch and you are catching
+many times.
+
+The resulting distribution will be close to Fisher's distribution if
+there are many fish in the net in each catch and you are catching
+few times.
+
+\subsection{Example 4}
+You are catching fish with a big net. Fish are swimming into the net
+randomly in a situation that resembles a Poisson process. You are
+watching the net all the time and take up the net as soon as you have
+caught exactly $n$ fish.
+
+The resulting distribution will be close to Fisher's distribution
+because the fish swim into the net independently of each other.
+But the fates of the fish are not totally independent because
+a particular fish can be saved from getting caught if $n$ other
+fish happen to get into the net before the time that this particular
+fish would have been caught. This is more likely to happen if the other
+fish are heavy than if they are light.
+
+\subsection{Example 5}
+You are catching fish one by one with a fishing rod as in example 1.
+You need a particular amount of fish in order to feed your family.
+You are stopping when the total weight of the fish you have caught
+exceeds a predetermined limit.
+
+The resulting distribution will be close to Wallenius' distribution,
+but not exactly because the decision to stop depends on the weight of
+the fish you have caught so far. $n$ is therefore not known exactly
+before the fishing trip.
+
+\subsection{Conclusion}
+These examples show that the distribution of the types of
+fish you catch depends on the way they are caught. Many situations
+will give a distribution that lies somewhere between Wallenius'
+and Fisher's noncentral hypergeometric distributions.
+
+An interesting consequence of the difference between these two
+distributions is that you will get more of the heavy fish, on average,
+if you catch $n$ fish one by one than if you catch all $n$
+at the same time.
+
+These conclusions can of course be applied to biased sampling of
+other items than fish.
+
+
+\section{Applications}
+%
+The biased urn models can be applied to many different situations
+where items are sampled with bias and without replacement.
+
+\subsection{\tt Calculating probabilities etc.}
+Probabilities, mean and variance can be calculated with the appropriate
+functions. More complicated systems, such as the natural selection
+of animals, can be treated with Monte Carlo simulation, using the
+random variate generating functions.
+
+\subsection{\tt Measuring odds ratios}
+The odds of a sampling process can be measured by an experiment or
+a series of experiments where the number of items sampled of
+each kind (color) is counted.
+
+It is recommended to use sampling with replacement if possible.
+Sampling with replacement makes it possible to use the binomial
+distribution, whereby the calculation of the odds becomes simpler
+and more accurate. If sampling with replacement is not possible,
+then the procedure of sampling without replacement must be
+carefully controlled in order to get a pure Wallenius' distribution
+or a pure Fisher's distribution rather than a mixture of the two,
+as explained in the examples above.
+Use the {\tt odds} functions to calculate the odds ratios from
+experimental values of the mean.
+
+\subsection{\tt Estimating the number of items of a particular kind
+from experimental sampling}
+It is possible to estimate the number of items of a particular kind,
+for example defective items in a production, from biased sampling.
+The traditional procedure is to use unbiased sampling.
+But a model of biased sampling may be used if bias is unavoidable
+or if bias is desired in order to increase the probability of
+detecting e.g. defective items.
+
+It is recommended to use sampling with replacement if possible.
+Sampling with replacement makes it possible to use the binomial
+distribution, whereby the calculation of the number of items
+becomes simpler and more accurate. If sampling with replacement
+is not possible, then the procedure of sampling without
+replacement must be carefully controlled in order to get a pure
+Wallenius' distribution or a pure Fisher's distribution rather
+than a mixture of the two, as explained in the examples above.
+The value of the bias (odds ratio) must be determined before
+the numbers can be calculated.
+
+Use the functions with names beginning with ``{\tt num}'' to
+calculate the number of items of each kind from the result
+of a sampling experiment with known odds ratios.
+
+
+\section{Demos}
+%
+The following demos are included in the {\tt BiasedUrn} package:
+
+\subsection{\tt CompareHypergeo}
+%
+This demo shows the difference between the hypergeometric distribution
+and the two noncentral hypergeometric distributions by plotting
+the probability mass functions.
+
+\subsection{\tt ApproxHypergeo}
+%
+This demo shows shows that the two noncentral hypergeometric
+distributions are approximately equal when the parameters are
+adjusted so that they have the same mean rather than the same odds.
+
+\subsection{\tt OddsPrecision}
+%
+Calculates the precision of the {\tt oddsWNCHypergeo} and {\tt oddsFNCHypergeo}
+functions that are used for estimating the odds from a measured mean.
+
+\subsection{\tt SampleWallenius}
+%
+Makes 100,000 random samples from Wallenius noncentral hypergeometric
+distribution and compares the measured mean with the theoretical mean.
+
+\subsection{\tt UrnTheory}
+%
+Displays this document.
+
+
+\section{Calculation methods}
+%
+The {\tt BiasedUrn} package can calculate the univariate
+and multivariate
+Wallenius' and Fisher's noncentral hypergeometric distributions.
+Several different calculation methods are used, depending on the
+parameters.
+
+The calculation methods and sampling methods are documented at \\
+{\tt http://www.agner.org/random/theory/}.
+
+\section{References}
+
+\noindent Fog, A. (2008a). Calculation Methods for Wallenius'
+Noncentral Hypergeometric Distribution.
+{\it Communications in Statistics, Simulation and Computation}.
+Vol. 37, no. 2, pp 258-273.
+
+\vskip 3mm
+%
+\noindent Fog, A. (2008b). Sampling Methods for Wallenius'
+and Fisher's Noncentral Hypergeometric Distributions.
+{\it Communications in Statistics, Simulation and Computation}.
+Vol. 37, no. 2, pp 241-257.
+
+\vskip 3mm
+%
+\noindent Johnson, N. L., Kemp, A. W. Kotz, S. (2005). {\it
+Univariate Discrete Distributions}. Hoboken, New Jersey: Wiley and
+Sons.
+
+\vskip 3mm
+%
+\noindent McCullagh, P., Nelder, J. A. (1983). {\it Generalized
+Linear Models}. London: Chapman \& Hall.
+
+\vskip 3mm
+%
+\noindent {\tt http://www.agner.org/random/theory/}.
+
+
+\end{document}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/r-cran-biasedurn.git
More information about the debian-med-commit
mailing list